diff options
| author | Linus Torvalds <torvalds@linux-foundation.org> | 2025-07-31 12:19:55 -0700 |
|---|---|---|
| committer | Linus Torvalds <torvalds@linux-foundation.org> | 2025-07-31 12:19:55 -0700 |
| commit | 7ce4de1cdaf11c39b507008dfb5a4e59079d4e8a (patch) | |
| tree | af5af5d6d0d5df206a6bf654c840a005a052db10 /drivers/infiniband/core/device.c | |
| parent | 2c8c9aae4492f813b9b9ae95f0931945a693100e (diff) | |
| parent | ee235923d205c6de73bf5035f3cdcaee22f3291c (diff) | |
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma
Pull rdma updates from Jason Gunthorpe:
- Various minor code cleanups and fixes for hns, iser, cxgb4, hfi1,
rxe, erdma, mana_ib
- Prefetch supprot for rxe ODP
- Remove memory window support from hns as new device FW is no longer
support it
- Remove qib, it is very old and obsolete now, Cornelis wishes to
restructure the hfi1/qib shared layer
- Fix a race in destroying CQs where we can still end up with work
running because the work is cancled before the driver stops
triggering it
- Improve interaction with namespaces:
* Follow the devlink namespace for newly spawned RDMA devices
* Create iopoib net devces in the parent IB device's namespace
* Allow CAP_NET_RAW checks to pass in user namespaces
- A new flow control scheme for IB MADs to try and avoid queue
overflows in the network
- Fix 2G message sizes in bnxt_re
- Optimize mkey layout for mlx5 DMABUF
- New "DMA Handle" concept to allow controlling PCI TPH and steering
tags
* tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma: (71 commits)
RDMA/siw: Change maintainer email address
RDMA/mana_ib: add support of multiple ports
RDMA/mlx5: Refactor optional counters steering code
RDMA/mlx5: Add DMAH support for reg_user_mr/reg_user_dmabuf_mr
IB: Extend UVERBS_METHOD_REG_MR to get DMAH
RDMA/mlx5: Add DMAH object support
RDMA/core: Introduce a DMAH object and its alloc/free APIs
IB/core: Add UVERBS_METHOD_REG_MR on the MR object
net/mlx5: Add support for device steering tag
net/mlx5: Expose IFC bits for TPH
PCI/TPH: Expose pcie_tph_get_st_table_size()
RDMA/mlx5: Fix incorrect MKEY masking
RDMA/mlx5: Fix returned type from _mlx5r_umr_zap_mkey()
RDMA/mlx5: remove redundant check on err on return expression
RDMA/mana_ib: add additional port counters
RDMA/mana_ib: Fix DSCP value in modify QP
RDMA/efa: Add CQ with external memory support
RDMA/core: Add umem "is_contiguous" and "start_dma_addr" helpers
RDMA/uverbs: Add a common way to create CQ with umem
RDMA/mlx5: Optimize DMABUF mkey page size
...
Diffstat (limited to 'drivers/infiniband/core/device.c')
| -rw-r--r-- | drivers/infiniband/core/device.c | 47 |
1 files changed, 45 insertions, 2 deletions
diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c index d4263385850a..3145cb34a1d2 100644 --- a/drivers/infiniband/core/device.c +++ b/drivers/infiniband/core/device.c @@ -145,6 +145,33 @@ bool rdma_dev_access_netns(const struct ib_device *dev, const struct net *net) } EXPORT_SYMBOL(rdma_dev_access_netns); +/** + * rdma_dev_has_raw_cap() - Returns whether a specified rdma device has + * CAP_NET_RAW capability or not. + * + * @dev: Pointer to rdma device whose capability to be checked + * + * Returns true if a rdma device's owning user namespace has CAP_NET_RAW + * capability, otherwise false. When rdma subsystem is in legacy shared network, + * namespace mode, the default net namespace is considered. + */ +bool rdma_dev_has_raw_cap(const struct ib_device *dev) +{ + const struct net *net; + + /* Network namespace is the resource whose user namespace + * to be considered. When in shared mode, there is no reliable + * network namespace resource, so consider the default net namespace. + */ + if (ib_devices_shared_netns) + net = &init_net; + else + net = read_pnet(&dev->coredev.rdma_net); + + return ns_capable(net->user_ns, CAP_NET_RAW); +} +EXPORT_SYMBOL(rdma_dev_has_raw_cap); + /* * xarray has this behavior where it won't iterate over NULL values stored in * allocated arrays. So we need our own iterator to see all values stored in @@ -557,6 +584,8 @@ static void rdma_init_coredev(struct ib_core_device *coredev, /** * _ib_alloc_device - allocate an IB device struct * @size:size of structure to allocate + * @net: network namespace device should be located in, namespace + * must stay valid until ib_register_device() is completed. * * Low-level drivers should use ib_alloc_device() to allocate &struct * ib_device. @size is the size of the structure to be allocated, @@ -564,7 +593,7 @@ static void rdma_init_coredev(struct ib_core_device *coredev, * ib_dealloc_device() must be used to free structures allocated with * ib_alloc_device(). */ -struct ib_device *_ib_alloc_device(size_t size) +struct ib_device *_ib_alloc_device(size_t size, struct net *net) { struct ib_device *device; unsigned int i; @@ -581,7 +610,15 @@ struct ib_device *_ib_alloc_device(size_t size) return NULL; } - rdma_init_coredev(&device->coredev, device, &init_net); + /* ib_devices_shared_netns can't change while we have active namespaces + * in the system which means either init_net is passed or the user has + * no idea what they are doing. + * + * To avoid breaking backward compatibility, when in shared mode, + * force to init the device in the init_net. + */ + net = ib_devices_shared_netns ? &init_net : net; + rdma_init_coredev(&device->coredev, device, net); INIT_LIST_HEAD(&device->event_handler_list); spin_lock_init(&device->qp_open_list_lock); @@ -2671,6 +2708,7 @@ void ib_set_device_ops(struct ib_device *dev, const struct ib_device_ops *ops) SET_DEVICE_OP(dev_ops, add_sub_dev); SET_DEVICE_OP(dev_ops, advise_mr); SET_DEVICE_OP(dev_ops, alloc_dm); + SET_DEVICE_OP(dev_ops, alloc_dmah); SET_DEVICE_OP(dev_ops, alloc_hw_device_stats); SET_DEVICE_OP(dev_ops, alloc_hw_port_stats); SET_DEVICE_OP(dev_ops, alloc_mr); @@ -2691,6 +2729,7 @@ void ib_set_device_ops(struct ib_device *dev, const struct ib_device_ops *ops) SET_DEVICE_OP(dev_ops, create_ah); SET_DEVICE_OP(dev_ops, create_counters); SET_DEVICE_OP(dev_ops, create_cq); + SET_DEVICE_OP(dev_ops, create_cq_umem); SET_DEVICE_OP(dev_ops, create_flow); SET_DEVICE_OP(dev_ops, create_qp); SET_DEVICE_OP(dev_ops, create_rwq_ind_table); @@ -2698,6 +2737,7 @@ void ib_set_device_ops(struct ib_device *dev, const struct ib_device_ops *ops) SET_DEVICE_OP(dev_ops, create_user_ah); SET_DEVICE_OP(dev_ops, create_wq); SET_DEVICE_OP(dev_ops, dealloc_dm); + SET_DEVICE_OP(dev_ops, dealloc_dmah); SET_DEVICE_OP(dev_ops, dealloc_driver); SET_DEVICE_OP(dev_ops, dealloc_mw); SET_DEVICE_OP(dev_ops, dealloc_pd); @@ -2763,8 +2803,10 @@ void ib_set_device_ops(struct ib_device *dev, const struct ib_device_ops *ops) SET_DEVICE_OP(dev_ops, modify_srq); SET_DEVICE_OP(dev_ops, modify_wq); SET_DEVICE_OP(dev_ops, peek_cq); + SET_DEVICE_OP(dev_ops, pre_destroy_cq); SET_DEVICE_OP(dev_ops, poll_cq); SET_DEVICE_OP(dev_ops, port_groups); + SET_DEVICE_OP(dev_ops, post_destroy_cq); SET_DEVICE_OP(dev_ops, post_recv); SET_DEVICE_OP(dev_ops, post_send); SET_DEVICE_OP(dev_ops, post_srq_recv); @@ -2793,6 +2835,7 @@ void ib_set_device_ops(struct ib_device *dev, const struct ib_device_ops *ops) SET_OBJ_SIZE(dev_ops, ib_ah); SET_OBJ_SIZE(dev_ops, ib_counters); SET_OBJ_SIZE(dev_ops, ib_cq); + SET_OBJ_SIZE(dev_ops, ib_dmah); SET_OBJ_SIZE(dev_ops, ib_mw); SET_OBJ_SIZE(dev_ops, ib_pd); SET_OBJ_SIZE(dev_ops, ib_qp); |
