From b80961a86b40372b7cfb3065439377f7e7550e59 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Wed, 26 Nov 2025 10:59:50 -0800 Subject: drm/xe/uapi: Add DRM_XE_EXEC_QUEUE_SET_HANG_REPLAY_STATE MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add DRM_XE_EXEC_QUEUE_SET_HANG_REPLAY_STATE which accepts a user pointer to populate the exec queue state so that a GPU hang can be replayed via a Mesa tool. v2: Update the value for HANG_REPLAY_STATE flag Cc: José Roberto de Souza Signed-off-by: Matthew Brost Signed-off-by: Carlos Santa Reviewed-by: Jonathan Cavitt Acked-by: José Roberto de Souza Acked-by: Rodrigo Vivi Link: https://patch.msgid.link/20251126185952.546277-8-matthew.brost@intel.com --- include/uapi/drm/xe_drm.h | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'include/uapi/drm') diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h index 47853659a705..37881b1eb6ba 100644 --- a/include/uapi/drm/xe_drm.h +++ b/include/uapi/drm/xe_drm.h @@ -210,8 +210,12 @@ struct drm_xe_ext_set_property { /** @pad: MBZ */ __u32 pad; - /** @value: property value */ - __u64 value; + union { + /** @value: property value */ + __u64 value; + /** @ptr: pointer to user value */ + __u64 ptr; + }; /** @reserved: Reserved */ __u64 reserved[2]; @@ -1292,6 +1296,7 @@ struct drm_xe_exec_queue_create { #define DRM_XE_EXEC_QUEUE_SET_PROPERTY_PRIORITY 0 #define DRM_XE_EXEC_QUEUE_SET_PROPERTY_TIMESLICE 1 #define DRM_XE_EXEC_QUEUE_SET_PROPERTY_PXP_TYPE 2 +#define DRM_XE_EXEC_QUEUE_SET_HANG_REPLAY_STATE 3 /** @extensions: Pointer to the first extension struct, if any */ __u64 extensions; -- cgit v1.2.3 From 78d91ba6bd7968d4750dad57c62bf5225ddcb388 Mon Sep 17 00:00:00 2001 From: Sanjay Yadav Date: Thu, 4 Dec 2025 09:34:03 +0530 Subject: drm/xe/uapi: Add NO_COMPRESSION BO flag and query capability MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Introduce DRM_XE_GEM_CREATE_FLAG_NO_COMPRESSION to let userspace opt out of CCS compression on a per-BO basis. When set, the driver maps this to XE_BO_FLAG_NO_COMPRESSION, skips CCS metadata allocation/clearing, and rejects compressed PAT indices at vm_bind. This avoids extra memory ops and manual CCS state handling for buffers. To allow userspace to detect at runtime whether the kernel supports this feature, add DRM_XE_QUERY_CONFIG_FLAG_HAS_NO_COMPRESSION_HINT and expose it via query_config() on Xe2+ platforms. Mesa PR: https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/38425 IGT PR: https://patchwork.freedesktop.org/patch/685180/ v2 - Changed error code from -EINVAL to -EOPNOTSUPP for unsupported flag usage on pre-Xe2 platforms - Fixed checkpatch warning in xe_vm.c - Fixed kernel-doc formatting in xe_drm.h v3 - Rebase - Updated commit title and description - Added UAPI for DRM_XE_QUERY_CONFIG_FLAG_HAS_NO_COMPRESSION_HINT and exposed it via query_config() v4 - Rebase v5 - Included Mesa PR and IGT PR in the commit description - Used xe_pat_index_get_comp_en() to extract the compression v6 - Added XE_IOCTL_DBG() checks for argument validation Suggested-by: Matthew Auld Suggested-by: José Roberto de Souza Acked-by: José Roberto de Souza Reviewed-by: Matthew Auld Signed-off-by: Sanjay Yadav Signed-off-by: Matthew Auld Link: https://patch.msgid.link/20251204040402.2692921-2-sanjay.kumar.yadav@intel.com --- drivers/gpu/drm/xe/xe_bo.c | 15 +++++++++++++-- drivers/gpu/drm/xe/xe_bo.h | 1 + drivers/gpu/drm/xe/xe_query.c | 3 +++ drivers/gpu/drm/xe/xe_vm.c | 4 ++++ include/uapi/drm/xe_drm.h | 16 ++++++++++++++++ 5 files changed, 37 insertions(+), 2 deletions(-) (limited to 'include/uapi/drm') diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c index b67fd337ff19..6280e6a013ff 100644 --- a/drivers/gpu/drm/xe/xe_bo.c +++ b/drivers/gpu/drm/xe/xe_bo.c @@ -3178,7 +3178,8 @@ int xe_gem_create_ioctl(struct drm_device *dev, void *data, if (XE_IOCTL_DBG(xe, args->flags & ~(DRM_XE_GEM_CREATE_FLAG_DEFER_BACKING | DRM_XE_GEM_CREATE_FLAG_SCANOUT | - DRM_XE_GEM_CREATE_FLAG_NEEDS_VISIBLE_VRAM))) + DRM_XE_GEM_CREATE_FLAG_NEEDS_VISIBLE_VRAM | + DRM_XE_GEM_CREATE_FLAG_NO_COMPRESSION))) return -EINVAL; if (XE_IOCTL_DBG(xe, args->handle)) @@ -3200,6 +3201,12 @@ int xe_gem_create_ioctl(struct drm_device *dev, void *data, if (args->flags & DRM_XE_GEM_CREATE_FLAG_SCANOUT) bo_flags |= XE_BO_FLAG_SCANOUT; + if (args->flags & DRM_XE_GEM_CREATE_FLAG_NO_COMPRESSION) { + if (XE_IOCTL_DBG(xe, GRAPHICS_VER(xe) < 20)) + return -EOPNOTSUPP; + bo_flags |= XE_BO_FLAG_NO_COMPRESSION; + } + bo_flags |= args->placement << (ffs(XE_BO_FLAG_SYSTEM) - 1); /* CCS formats need physical placement at a 64K alignment in VRAM. */ @@ -3521,8 +3528,12 @@ bool xe_bo_needs_ccs_pages(struct xe_bo *bo) * Compression implies coh_none, therefore we know for sure that WB * memory can't currently use compression, which is likely one of the * common cases. + * Additionally, userspace may explicitly request no compression via the + * DRM_XE_GEM_CREATE_FLAG_NO_COMPRESSION flag, which should also disable + * CCS usage. */ - if (bo->cpu_caching == DRM_XE_GEM_CPU_CACHING_WB) + if (bo->cpu_caching == DRM_XE_GEM_CPU_CACHING_WB || + bo->flags & XE_BO_FLAG_NO_COMPRESSION) return false; return true; diff --git a/drivers/gpu/drm/xe/xe_bo.h b/drivers/gpu/drm/xe/xe_bo.h index 911d5b90461a..8ab4474129c3 100644 --- a/drivers/gpu/drm/xe/xe_bo.h +++ b/drivers/gpu/drm/xe/xe_bo.h @@ -50,6 +50,7 @@ #define XE_BO_FLAG_GGTT3 BIT(23) #define XE_BO_FLAG_CPU_ADDR_MIRROR BIT(24) #define XE_BO_FLAG_FORCE_USER_VRAM BIT(25) +#define XE_BO_FLAG_NO_COMPRESSION BIT(26) /* this one is trigger internally only */ #define XE_BO_FLAG_INTERNAL_TEST BIT(30) diff --git a/drivers/gpu/drm/xe/xe_query.c b/drivers/gpu/drm/xe/xe_query.c index a7bf1fd6dd6a..6667403a8814 100644 --- a/drivers/gpu/drm/xe/xe_query.c +++ b/drivers/gpu/drm/xe/xe_query.c @@ -338,6 +338,9 @@ static int query_config(struct xe_device *xe, struct drm_xe_device_query *query) if (xe->info.has_usm && IS_ENABLED(CONFIG_DRM_XE_GPUSVM)) config->info[DRM_XE_QUERY_CONFIG_FLAGS] |= DRM_XE_QUERY_CONFIG_FLAG_HAS_CPU_ADDR_MIRROR; + if (GRAPHICS_VER(xe) >= 20) + config->info[DRM_XE_QUERY_CONFIG_FLAGS] |= + DRM_XE_QUERY_CONFIG_FLAG_HAS_NO_COMPRESSION_HINT; config->info[DRM_XE_QUERY_CONFIG_FLAGS] |= DRM_XE_QUERY_CONFIG_FLAG_HAS_LOW_LATENCY; config->info[DRM_XE_QUERY_CONFIG_MIN_ALIGNMENT] = diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index 00ffd3f03983..c2012d20faa6 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -3501,6 +3501,10 @@ static int xe_vm_bind_ioctl_validate_bo(struct xe_device *xe, struct xe_bo *bo, { u16 coh_mode; + if (XE_IOCTL_DBG(xe, (bo->flags & XE_BO_FLAG_NO_COMPRESSION) && + xe_pat_index_get_comp_en(xe, pat_index))) + return -EINVAL; + if (XE_IOCTL_DBG(xe, range > xe_bo_size(bo)) || XE_IOCTL_DBG(xe, obj_offset > xe_bo_size(bo) - range)) { diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h index 37881b1eb6ba..0d99bb0cd20a 100644 --- a/include/uapi/drm/xe_drm.h +++ b/include/uapi/drm/xe_drm.h @@ -407,6 +407,9 @@ struct drm_xe_query_mem_regions { * has low latency hint support * - %DRM_XE_QUERY_CONFIG_FLAG_HAS_CPU_ADDR_MIRROR - Flag is set if the * device has CPU address mirroring support + * - %DRM_XE_QUERY_CONFIG_FLAG_HAS_NO_COMPRESSION_HINT - Flag is set if the + * device supports the userspace hint %DRM_XE_GEM_CREATE_FLAG_NO_COMPRESSION. + * This is exposed only on Xe2+. * - %DRM_XE_QUERY_CONFIG_MIN_ALIGNMENT - Minimal memory alignment * required by this device, typically SZ_4K or SZ_64K * - %DRM_XE_QUERY_CONFIG_VA_BITS - Maximum bits of a virtual address @@ -425,6 +428,7 @@ struct drm_xe_query_config { #define DRM_XE_QUERY_CONFIG_FLAG_HAS_VRAM (1 << 0) #define DRM_XE_QUERY_CONFIG_FLAG_HAS_LOW_LATENCY (1 << 1) #define DRM_XE_QUERY_CONFIG_FLAG_HAS_CPU_ADDR_MIRROR (1 << 2) + #define DRM_XE_QUERY_CONFIG_FLAG_HAS_NO_COMPRESSION_HINT (1 << 3) #define DRM_XE_QUERY_CONFIG_MIN_ALIGNMENT 2 #define DRM_XE_QUERY_CONFIG_VA_BITS 3 #define DRM_XE_QUERY_CONFIG_MAX_EXEC_QUEUE_PRIORITY 4 @@ -795,6 +799,17 @@ struct drm_xe_device_query { * need to use VRAM for display surfaces, therefore the kernel requires * setting this flag for such objects, otherwise an error is thrown on * small-bar systems. + * - %DRM_XE_GEM_CREATE_FLAG_NO_COMPRESSION - Allows userspace to + * hint that compression (CCS) should be disabled for the buffer being + * created. This can avoid unnecessary memory operations and CCS state + * management. + * On pre-Xe2 platforms, this flag is currently rejected as compression + * control is not supported via PAT index. On Xe2+ platforms, compression + * is controlled via PAT entries. If this flag is set, the driver will reject + * any VM bind that requests a PAT index enabling compression for this BO. + * Note: On dGPU platforms, there is currently no change in behavior with + * this flag, but future improvements may leverage it. The current benefit is + * primarily applicable to iGPU platforms. * * @cpu_caching supports the following values: * - %DRM_XE_GEM_CPU_CACHING_WB - Allocate the pages with write-back @@ -841,6 +856,7 @@ struct drm_xe_gem_create { #define DRM_XE_GEM_CREATE_FLAG_DEFER_BACKING (1 << 0) #define DRM_XE_GEM_CREATE_FLAG_SCANOUT (1 << 1) #define DRM_XE_GEM_CREATE_FLAG_NEEDS_VISIBLE_VRAM (1 << 2) +#define DRM_XE_GEM_CREATE_FLAG_NO_COMPRESSION (1 << 3) /** * @flags: Flags, currently a mask of memory instances of where BO can * be placed -- cgit v1.2.3 From 16e076b036583702bb47554d3931b5e674dd9a8e Mon Sep 17 00:00:00 2001 From: Ashutosh Dixit Date: Mon, 1 Dec 2025 18:51:12 -0800 Subject: drm/xe/oa/uapi: Add gt_id to struct drm_xe_oa_unit gt_id was previously omitted from 'struct drm_xe_oa_unit' because it could be determine from hwe's attached to the OA unit. However, we now have OA units which don't have any hwe's attached to them. Hence add gt_id to 'struct drm_xe_oa_unit' in order to provide this needed information to userspace. Signed-off-by: Ashutosh Dixit Reviewed-by: Umesh Nerlige Ramappa Link: https://patch.msgid.link/20251202025115.373546-3-ashutosh.dixit@intel.com --- drivers/gpu/drm/xe/xe_query.c | 4 +++- include/uapi/drm/xe_drm.h | 9 ++++++++- 2 files changed, 11 insertions(+), 2 deletions(-) (limited to 'include/uapi/drm') diff --git a/drivers/gpu/drm/xe/xe_query.c b/drivers/gpu/drm/xe/xe_query.c index 6667403a8814..75490683bad2 100644 --- a/drivers/gpu/drm/xe/xe_query.c +++ b/drivers/gpu/drm/xe/xe_query.c @@ -685,7 +685,9 @@ static int query_oa_units(struct xe_device *xe, du->capabilities = DRM_XE_OA_CAPS_BASE | DRM_XE_OA_CAPS_SYNCS | DRM_XE_OA_CAPS_OA_BUFFER_SIZE | DRM_XE_OA_CAPS_WAIT_NUM_REPORTS | - DRM_XE_OA_CAPS_OAM; + DRM_XE_OA_CAPS_OAM | + DRM_XE_OA_CAPS_OA_UNIT_GT_ID; + du->gt_id = u->gt->info.id; j = 0; for_each_hw_engine(hwe, gt, hwe_id) { if (!xe_hw_engine_is_reserved(hwe) && diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h index 0d99bb0cd20a..876a076fa6c0 100644 --- a/include/uapi/drm/xe_drm.h +++ b/include/uapi/drm/xe_drm.h @@ -1697,12 +1697,19 @@ struct drm_xe_oa_unit { #define DRM_XE_OA_CAPS_OA_BUFFER_SIZE (1 << 2) #define DRM_XE_OA_CAPS_WAIT_NUM_REPORTS (1 << 3) #define DRM_XE_OA_CAPS_OAM (1 << 4) +#define DRM_XE_OA_CAPS_OA_UNIT_GT_ID (1 << 5) /** @oa_timestamp_freq: OA timestamp freq */ __u64 oa_timestamp_freq; + /** @gt_id: gt id for this OA unit */ + __u16 gt_id; + + /** @reserved1: MBZ */ + __u16 reserved1[3]; + /** @reserved: MBZ */ - __u64 reserved[4]; + __u64 reserved[3]; /** @num_engines: number of engines in @eci array */ __u64 num_engines; -- cgit v1.2.3 From 4d65215145de002defa985136093566a20fdb435 Mon Sep 17 00:00:00 2001 From: Hawking Zhang Date: Fri, 12 Sep 2025 13:21:09 -0400 Subject: drm/amdgpu: update VRAM types Update VRAM types. Signed-off-by: Hawking Zhang Reviewed-by: Likun Gao Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 3 ++- include/uapi/drm/amdgpu_drm.h | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) (limited to 'include/uapi/drm') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c index e08f58de4b17..926a3f09a776 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c @@ -1050,7 +1050,8 @@ static const char * const amdgpu_vram_names[] = { "DDR5", "LPDDR4", "LPDDR5", - "HBM3E" + "HBM3E", + "HBM4" }; /** diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h index f80aa4c9d88f..c705fbcad3e3 100644 --- a/include/uapi/drm/amdgpu_drm.h +++ b/include/uapi/drm/amdgpu_drm.h @@ -1427,6 +1427,7 @@ struct drm_amdgpu_info_vbios { #define AMDGPU_VRAM_TYPE_LPDDR4 11 #define AMDGPU_VRAM_TYPE_LPDDR5 12 #define AMDGPU_VRAM_TYPE_HBM3E 13 +#define AMDGPU_VRAM_TYPE_HBM4 14 struct drm_amdgpu_info_device { /** PCI Device ID */ -- cgit v1.2.3 From c3cd568d31b6d41fc201b1d0506e4f6cab7e488a Mon Sep 17 00:00:00 2001 From: Timur Kristóf Date: Wed, 19 Nov 2025 10:25:43 +0100 Subject: drm/amdgpu/uapi: Clarify comment on AMDGPU_VM_PAGE_PRT MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In the context of the amdgpu uAPI, the PRT flag is referring only to unmapped pages of a partially resident texture (aka. sparse resource), but not the full resource. Virtual addresses marked with this flag behave as follows: - Reads return zero - Writes are discarded Signed-off-by: Timur Kristóf Reviewed-by: Christian König Signed-off-by: Alex Deucher --- include/uapi/drm/amdgpu_drm.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/uapi/drm') diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h index c705fbcad3e3..351c2fb2df90 100644 --- a/include/uapi/drm/amdgpu_drm.h +++ b/include/uapi/drm/amdgpu_drm.h @@ -883,7 +883,7 @@ struct drm_amdgpu_gem_list_handles_entry { #define AMDGPU_VM_PAGE_WRITEABLE (1 << 2) /* executable mapping, new for VI */ #define AMDGPU_VM_PAGE_EXECUTABLE (1 << 3) -/* partially resident texture */ +/* unmapped page of partially resident textures */ #define AMDGPU_VM_PAGE_PRT (1 << 4) /* MTYPE flags use bit 5 to 8 */ #define AMDGPU_VM_MTYPE_MASK (0xf << 5) -- cgit v1.2.3 From ea78ec98265339997959eba3c9d764317614675a Mon Sep 17 00:00:00 2001 From: Boris Brezillon Date: Mon, 8 Dec 2025 11:08:30 +0100 Subject: drm/panthor: Expose the selected coherency protocol to the UMD If we want to be able to skip CPU cache maintenance operations on CPU-cached mappings, the UMD needs to know the kind of coherency in place. Add a field to drm_panthor_gpu_info to do that. We can re-use a padding field for that since this object is write-only from the KMD perspective, and the UMD should just ignore it. v2: - New commit v3: - Make coherency protocol a real enum, not a bitmask - Add BUILD_BUG_ON()s to make sure the values in panthor_regs.h and those exposed through the uAPI match v4: - Add Steve's R-b v5: - No changes v6: - No changes v7: - Fix kernel doc v8: - No changes Reviewed-by: Steven Price Reviewed-by: Karunika Choo Link: https://patch.msgid.link/20251208100841.730527-4-boris.brezillon@collabora.com Signed-off-by: Boris Brezillon --- drivers/gpu/drm/panthor/panthor_device.c | 10 +++++++- drivers/gpu/drm/panthor/panthor_gpu.c | 2 +- include/uapi/drm/panthor_drm.h | 39 +++++++++++++++++++++++++++++--- 3 files changed, 46 insertions(+), 5 deletions(-) (limited to 'include/uapi/drm') diff --git a/drivers/gpu/drm/panthor/panthor_device.c b/drivers/gpu/drm/panthor/panthor_device.c index 2979ee0e52c2..54fbb1aa07c5 100644 --- a/drivers/gpu/drm/panthor/panthor_device.c +++ b/drivers/gpu/drm/panthor/panthor_device.c @@ -28,6 +28,12 @@ static int panthor_gpu_coherency_init(struct panthor_device *ptdev) { + BUILD_BUG_ON(GPU_COHERENCY_NONE != DRM_PANTHOR_GPU_COHERENCY_NONE); + BUILD_BUG_ON(GPU_COHERENCY_ACE_LITE != DRM_PANTHOR_GPU_COHERENCY_ACE_LITE); + BUILD_BUG_ON(GPU_COHERENCY_ACE != DRM_PANTHOR_GPU_COHERENCY_ACE); + + /* Start with no coherency, and update it if the device is flagged coherent. */ + ptdev->gpu_info.selected_coherency = GPU_COHERENCY_NONE; ptdev->coherent = device_get_dma_attr(ptdev->base.dev) == DEV_DMA_COHERENT; if (!ptdev->coherent) @@ -37,8 +43,10 @@ static int panthor_gpu_coherency_init(struct panthor_device *ptdev) * ACE protocol has never been supported for command stream frontend GPUs. */ if ((gpu_read(ptdev, GPU_COHERENCY_FEATURES) & - GPU_COHERENCY_PROT_BIT(ACE_LITE))) + GPU_COHERENCY_PROT_BIT(ACE_LITE))) { + ptdev->gpu_info.selected_coherency = GPU_COHERENCY_ACE_LITE; return 0; + } drm_err(&ptdev->base, "Coherency not supported by the device"); return -ENOTSUPP; diff --git a/drivers/gpu/drm/panthor/panthor_gpu.c b/drivers/gpu/drm/panthor/panthor_gpu.c index ff5231269518..057e167468d0 100644 --- a/drivers/gpu/drm/panthor/panthor_gpu.c +++ b/drivers/gpu/drm/panthor/panthor_gpu.c @@ -51,7 +51,7 @@ struct panthor_gpu { static void panthor_gpu_coherency_set(struct panthor_device *ptdev) { gpu_write(ptdev, GPU_COHERENCY_PROTOCOL, - ptdev->coherent ? GPU_COHERENCY_ACE_LITE : GPU_COHERENCY_NONE); + ptdev->gpu_info.selected_coherency); } static void panthor_gpu_l2_config_set(struct panthor_device *ptdev) diff --git a/include/uapi/drm/panthor_drm.h b/include/uapi/drm/panthor_drm.h index 467d365ed7ba..28cf9e878db6 100644 --- a/include/uapi/drm/panthor_drm.h +++ b/include/uapi/drm/panthor_drm.h @@ -245,6 +245,26 @@ enum drm_panthor_dev_query_type { DRM_PANTHOR_DEV_QUERY_GROUP_PRIORITIES_INFO, }; +/** + * enum drm_panthor_gpu_coherency: Type of GPU coherency + */ +enum drm_panthor_gpu_coherency { + /** + * @DRM_PANTHOR_GPU_COHERENCY_ACE_LITE: ACE Lite coherency. + */ + DRM_PANTHOR_GPU_COHERENCY_ACE_LITE = 0, + + /** + * @DRM_PANTHOR_GPU_COHERENCY_ACE: ACE coherency. + */ + DRM_PANTHOR_GPU_COHERENCY_ACE = 1, + + /** + * @DRM_PANTHOR_GPU_COHERENCY_NONE: No coherency. + */ + DRM_PANTHOR_GPU_COHERENCY_NONE = 31, +}; + /** * struct drm_panthor_gpu_info - GPU information * @@ -301,7 +321,16 @@ struct drm_panthor_gpu_info { */ __u32 thread_max_barrier_size; - /** @coherency_features: Coherency features. */ + /** + * @coherency_features: Coherency features. + * + * Combination of drm_panthor_gpu_coherency flags. + * + * Note that this is just what the coherency protocols supported by the + * GPU, but the actual coherency in place depends on the SoC + * integration and is reflected by + * drm_panthor_gpu_info::selected_coherency. + */ __u32 coherency_features; /** @texture_features: Texture features. */ @@ -310,8 +339,12 @@ struct drm_panthor_gpu_info { /** @as_present: Bitmask encoding the number of address-space exposed by the MMU. */ __u32 as_present; - /** @pad0: MBZ. */ - __u32 pad0; + /** + * @select_coherency: Coherency selected for this device. + * + * One of drm_panthor_gpu_coherency. + */ + __u32 selected_coherency; /** @shader_present: Bitmask encoding the shader cores exposed by the GPU. */ __u64 shader_present; -- cgit v1.2.3 From e06177ec7a36391c66216b55b7c112d5ba8c4cc1 Mon Sep 17 00:00:00 2001 From: Boris Brezillon Date: Mon, 8 Dec 2025 11:08:31 +0100 Subject: drm/panthor: Add a PANTHOR_BO_SYNC ioctl This will be used by the UMD to synchronize CPU-cached mappings when the UMD can't do it directly (no usermode cache maintenance instruction on Arm32). v2: - Change the flags so they better match the drm_gem_shmem_sync() semantics v3: - Add Steve's R-b v4: - No changes v5: - Drop Steve's R-b (the semantics changes call for a new review) v6: - Drop ret initialization in panthor_ioctl_bo_sync() - Bail out early in panthor_ioctl_bo_sync() if ops.count is zero - Drop unused PANTHOR_BO_SYNC_OP_FLAGS definition v7: - Hand-roll the sync logic (was previously provided by gem_shmem) v8: - Collect R-b Signed-off-by: Faith Ekstrand Reviewed-by: Steven Price Link: https://patch.msgid.link/20251208100841.730527-5-boris.brezillon@collabora.com Signed-off-by: Boris Brezillon --- drivers/gpu/drm/panthor/panthor_drv.c | 41 ++++++++++++++++- drivers/gpu/drm/panthor/panthor_gem.c | 85 +++++++++++++++++++++++++++++++++++ drivers/gpu/drm/panthor/panthor_gem.h | 2 + include/uapi/drm/panthor_drm.h | 52 +++++++++++++++++++++ 4 files changed, 179 insertions(+), 1 deletion(-) (limited to 'include/uapi/drm') diff --git a/drivers/gpu/drm/panthor/panthor_drv.c b/drivers/gpu/drm/panthor/panthor_drv.c index 73d26e17e2a2..2a9f1feac57a 100644 --- a/drivers/gpu/drm/panthor/panthor_drv.c +++ b/drivers/gpu/drm/panthor/panthor_drv.c @@ -177,7 +177,8 @@ panthor_get_uobj_array(const struct drm_panthor_obj_array *in, u32 min_stride, PANTHOR_UOBJ_DECL(struct drm_panthor_sync_op, timeline_value), \ PANTHOR_UOBJ_DECL(struct drm_panthor_queue_submit, syncs), \ PANTHOR_UOBJ_DECL(struct drm_panthor_queue_create, ringbuf_size), \ - PANTHOR_UOBJ_DECL(struct drm_panthor_vm_bind_op, syncs)) + PANTHOR_UOBJ_DECL(struct drm_panthor_vm_bind_op, syncs), \ + PANTHOR_UOBJ_DECL(struct drm_panthor_bo_sync_op, size)) /** * PANTHOR_UOBJ_SET() - Copy a kernel object to a user object. @@ -1396,6 +1397,43 @@ static int panthor_ioctl_set_user_mmio_offset(struct drm_device *ddev, return 0; } +static int panthor_ioctl_bo_sync(struct drm_device *ddev, void *data, + struct drm_file *file) +{ + struct drm_panthor_bo_sync *args = data; + struct drm_panthor_bo_sync_op *ops; + struct drm_gem_object *obj; + int ret; + + if (!args->ops.count) + return 0; + + ret = PANTHOR_UOBJ_GET_ARRAY(ops, &args->ops); + if (ret) + return ret; + + for (u32 i = 0; i < args->ops.count; i++) { + obj = drm_gem_object_lookup(file, ops[i].handle); + if (!obj) { + ret = -ENOENT; + goto err_ops; + } + + ret = panthor_gem_sync(obj, ops[i].type, ops[i].offset, + ops[i].size); + + drm_gem_object_put(obj); + + if (ret) + goto err_ops; + } + +err_ops: + kvfree(ops); + + return ret; +} + static int panthor_open(struct drm_device *ddev, struct drm_file *file) { @@ -1470,6 +1508,7 @@ static const struct drm_ioctl_desc panthor_drm_driver_ioctls[] = { PANTHOR_IOCTL(GROUP_SUBMIT, group_submit, DRM_RENDER_ALLOW), PANTHOR_IOCTL(BO_SET_LABEL, bo_set_label, DRM_RENDER_ALLOW), PANTHOR_IOCTL(SET_USER_MMIO_OFFSET, set_user_mmio_offset, DRM_RENDER_ALLOW), + PANTHOR_IOCTL(BO_SYNC, bo_sync, DRM_RENDER_ALLOW), }; static int panthor_mmap(struct file *filp, struct vm_area_struct *vma) diff --git a/drivers/gpu/drm/panthor/panthor_gem.c b/drivers/gpu/drm/panthor/panthor_gem.c index 0de37733a2ef..69ee30603e0a 100644 --- a/drivers/gpu/drm/panthor/panthor_gem.c +++ b/drivers/gpu/drm/panthor/panthor_gem.c @@ -465,6 +465,91 @@ panthor_gem_kernel_bo_set_label(struct panthor_kernel_bo *bo, const char *label) panthor_gem_bo_set_label(bo->obj, str); } +int +panthor_gem_sync(struct drm_gem_object *obj, u32 type, + u64 offset, u64 size) +{ + struct panthor_gem_object *bo = to_panthor_bo(obj); + struct drm_gem_shmem_object *shmem = &bo->base; + const struct drm_device *dev = shmem->base.dev; + struct sg_table *sgt; + struct scatterlist *sgl; + unsigned int count; + + /* Make sure the range is in bounds. */ + if (offset + size < offset || offset + size > shmem->base.size) + return -EINVAL; + + /* Disallow CPU-cache maintenance on imported buffers. */ + if (drm_gem_is_imported(&shmem->base)) + return -EINVAL; + + switch (type) { + case DRM_PANTHOR_BO_SYNC_CPU_CACHE_FLUSH: + case DRM_PANTHOR_BO_SYNC_CPU_CACHE_FLUSH_AND_INVALIDATE: + break; + + default: + return -EINVAL; + } + + /* Don't bother if it's WC-mapped */ + if (shmem->map_wc) + return 0; + + /* Nothing to do if the size is zero. */ + if (size == 0) + return 0; + + sgt = drm_gem_shmem_get_pages_sgt(shmem); + if (IS_ERR(sgt)) + return PTR_ERR(sgt); + + for_each_sgtable_dma_sg(sgt, sgl, count) { + if (size == 0) + break; + + dma_addr_t paddr = sg_dma_address(sgl); + size_t len = sg_dma_len(sgl); + + if (len <= offset) { + offset -= len; + continue; + } + + paddr += offset; + len -= offset; + len = min_t(size_t, len, size); + size -= len; + offset = 0; + + /* It's unclear whether dma_sync_xxx() is the right API to do CPU + * cache maintenance given an IOMMU can register their own + * implementation doing more than just CPU cache flushes/invalidation, + * and what we really care about here is CPU caches only, but that's + * the best we have that is both arch-agnostic and does at least the + * CPU cache maintenance on a tuple. + * + * Also, I wish we could do a single + * + * dma_sync_single_for_device(BIDIR) + * + * and get a flush+invalidate, but that's not how it's implemented + * in practice (at least on arm64), so we have to make it + * + * dma_sync_single_for_device(TO_DEVICE) + * dma_sync_single_for_cpu(FROM_DEVICE) + * + * for the flush+invalidate case. + */ + dma_sync_single_for_device(dev->dev, paddr, len, DMA_TO_DEVICE); + if (type == DRM_PANTHOR_BO_SYNC_CPU_CACHE_FLUSH_AND_INVALIDATE) + dma_sync_single_for_cpu(dev->dev, paddr, len, DMA_FROM_DEVICE); + } + + return 0; +} + #ifdef CONFIG_DEBUG_FS struct gem_size_totals { size_t size; diff --git a/drivers/gpu/drm/panthor/panthor_gem.h b/drivers/gpu/drm/panthor/panthor_gem.h index 262c77a4d3c1..22519c570b5a 100644 --- a/drivers/gpu/drm/panthor/panthor_gem.h +++ b/drivers/gpu/drm/panthor/panthor_gem.h @@ -148,6 +148,8 @@ panthor_gem_create_with_handle(struct drm_file *file, void panthor_gem_bo_set_label(struct drm_gem_object *obj, const char *label); void panthor_gem_kernel_bo_set_label(struct panthor_kernel_bo *bo, const char *label); +int panthor_gem_sync(struct drm_gem_object *obj, + u32 type, u64 offset, u64 size); struct drm_gem_object * panthor_gem_prime_import(struct drm_device *dev, diff --git a/include/uapi/drm/panthor_drm.h b/include/uapi/drm/panthor_drm.h index 28cf9e878db6..9f810305db6e 100644 --- a/include/uapi/drm/panthor_drm.h +++ b/include/uapi/drm/panthor_drm.h @@ -144,6 +144,9 @@ enum drm_panthor_ioctl_id { * pgoff_t size. */ DRM_PANTHOR_SET_USER_MMIO_OFFSET, + + /** @DRM_PANTHOR_BO_SYNC: Sync BO data to/from the device */ + DRM_PANTHOR_BO_SYNC, }; /** @@ -1073,6 +1076,53 @@ struct drm_panthor_set_user_mmio_offset { __u64 offset; }; +/** + * enum drm_panthor_bo_sync_op_type - BO sync type + */ +enum drm_panthor_bo_sync_op_type { + /** @DRM_PANTHOR_BO_SYNC_CPU_CACHE_FLUSH: Flush CPU caches. */ + DRM_PANTHOR_BO_SYNC_CPU_CACHE_FLUSH = 0, + + /** @DRM_PANTHOR_BO_SYNC_CPU_CACHE_FLUSH_AND_INVALIDATE: Flush and invalidate CPU caches. */ + DRM_PANTHOR_BO_SYNC_CPU_CACHE_FLUSH_AND_INVALIDATE = 1, +}; + +/** + * struct drm_panthor_bo_sync_op - BO map sync op + */ +struct drm_panthor_bo_sync_op { + /** @handle: Handle of the buffer object to sync. */ + __u32 handle; + + /** @type: Type of operation. */ + __u32 type; + + /** + * @offset: Offset into the BO at which the sync range starts. + * + * This will be rounded down to the nearest cache line as needed. + */ + __u64 offset; + + /** + * @size: Size of the range to sync + * + * @size + @offset will be rounded up to the nearest cache line as + * needed. + */ + __u64 size; +}; + +/** + * struct drm_panthor_bo_sync - BO map sync request + */ +struct drm_panthor_bo_sync { + /** + * @ops: Array of struct drm_panthor_bo_sync_op sync operations. + */ + struct drm_panthor_obj_array ops; +}; + /** * DRM_IOCTL_PANTHOR() - Build a Panthor IOCTL number * @__access: Access type. Must be R, W or RW. @@ -1119,6 +1169,8 @@ enum { DRM_IOCTL_PANTHOR(WR, BO_SET_LABEL, bo_set_label), DRM_IOCTL_PANTHOR_SET_USER_MMIO_OFFSET = DRM_IOCTL_PANTHOR(WR, SET_USER_MMIO_OFFSET, set_user_mmio_offset), + DRM_IOCTL_PANTHOR_BO_SYNC = + DRM_IOCTL_PANTHOR(WR, BO_SYNC, bo_sync), }; #if defined(__cplusplus) -- cgit v1.2.3 From c146c82f862e9c7e602a908891c3adf992ef2beb Mon Sep 17 00:00:00 2001 From: Boris Brezillon Date: Mon, 8 Dec 2025 11:08:32 +0100 Subject: drm/panthor: Add an ioctl to query BO flags This is useful when importing BOs, so we can know about cacheability and flush the caches when needed. We can also know when the buffer comes from a different subsystem and take proper actions (avoid CPU mappings, or do kernel-based syncs instead of userland cache flushes). v2: - New commit v3: - Add Steve's R-b v4: - No changes v5: - No changes v6: - No changes v7: - No changes v8: - No changes Reviewed-by: Steven Price Link: https://patch.msgid.link/20251208100841.730527-6-boris.brezillon@collabora.com Signed-off-by: Boris Brezillon --- drivers/gpu/drm/panthor/panthor_drv.c | 24 +++++++++++++++ include/uapi/drm/panthor_drm.h | 57 +++++++++++++++++++++++++++++++++++ 2 files changed, 81 insertions(+) (limited to 'include/uapi/drm') diff --git a/drivers/gpu/drm/panthor/panthor_drv.c b/drivers/gpu/drm/panthor/panthor_drv.c index 2a9f1feac57a..67d694d00ccb 100644 --- a/drivers/gpu/drm/panthor/panthor_drv.c +++ b/drivers/gpu/drm/panthor/panthor_drv.c @@ -1434,6 +1434,29 @@ err_ops: return ret; } +static int panthor_ioctl_bo_query_info(struct drm_device *ddev, void *data, + struct drm_file *file) +{ + struct drm_panthor_bo_query_info *args = data; + struct panthor_gem_object *bo; + struct drm_gem_object *obj; + + obj = drm_gem_object_lookup(file, args->handle); + if (!obj) + return -ENOENT; + + bo = to_panthor_bo(obj); + args->pad = 0; + args->create_flags = bo->flags; + + args->extra_flags = 0; + if (drm_gem_is_imported(&bo->base.base)) + args->extra_flags |= DRM_PANTHOR_BO_IS_IMPORTED; + + drm_gem_object_put(obj); + return 0; +} + static int panthor_open(struct drm_device *ddev, struct drm_file *file) { @@ -1509,6 +1532,7 @@ static const struct drm_ioctl_desc panthor_drm_driver_ioctls[] = { PANTHOR_IOCTL(BO_SET_LABEL, bo_set_label, DRM_RENDER_ALLOW), PANTHOR_IOCTL(SET_USER_MMIO_OFFSET, set_user_mmio_offset, DRM_RENDER_ALLOW), PANTHOR_IOCTL(BO_SYNC, bo_sync, DRM_RENDER_ALLOW), + PANTHOR_IOCTL(BO_QUERY_INFO, bo_query_info, DRM_RENDER_ALLOW), }; static int panthor_mmap(struct file *filp, struct vm_area_struct *vma) diff --git a/include/uapi/drm/panthor_drm.h b/include/uapi/drm/panthor_drm.h index 9f810305db6e..39d5ce815742 100644 --- a/include/uapi/drm/panthor_drm.h +++ b/include/uapi/drm/panthor_drm.h @@ -147,6 +147,13 @@ enum drm_panthor_ioctl_id { /** @DRM_PANTHOR_BO_SYNC: Sync BO data to/from the device */ DRM_PANTHOR_BO_SYNC, + + /** + * @DRM_PANTHOR_BO_QUERY_INFO: Query information about a BO. + * + * This is useful for imported BOs. + */ + DRM_PANTHOR_BO_QUERY_INFO, }; /** @@ -1123,6 +1130,54 @@ struct drm_panthor_bo_sync { struct drm_panthor_obj_array ops; }; +/** + * enum drm_panthor_bo_extra_flags - Set of flags returned on a BO_QUERY_INFO request + * + * Those are flags reflecting BO properties that are not directly coming from the flags + * passed are creation time, or information on BOs that were imported from other drivers. + */ +enum drm_panthor_bo_extra_flags { + /** + * @DRM_PANTHOR_BO_IS_IMPORTED: BO has been imported from an external driver. + * + * Note that imported dma-buf handles are not flagged as imported if they + * where exported by panthor. Only buffers that are coming from other drivers + * (dma heaps, other GPUs, display controllers, V4L, ...). + * + * It's also important to note that all imported BOs are mapped cached and can't + * be considered IO-coherent even if the GPU is. This means they require explicit + * syncs that must go through the DRM_PANTHOR_BO_SYNC ioctl (userland cache + * maintenance is not allowed in that case, because extra operations might be + * needed to make changes visible to the CPU/device, like buffer migration when the + * exporter is a GPU with its own VRAM). + */ + DRM_PANTHOR_BO_IS_IMPORTED = (1 << 0), +}; + +/** + * struct drm_panthor_bo_query_info - Query BO info + */ +struct drm_panthor_bo_query_info { + /** @handle: Handle of the buffer object to query flags on. */ + __u32 handle; + + /** + * @extra_flags: Combination of enum drm_panthor_bo_extra_flags flags. + */ + __u32 extra_flags; + + /** + * @create_flags: Flags passed at creation time. + * + * Combination of enum drm_panthor_bo_flags flags. + * Will be zero if the buffer comes from a different driver. + */ + __u32 create_flags; + + /** @pad: Will be zero on return. */ + __u32 pad; +}; + /** * DRM_IOCTL_PANTHOR() - Build a Panthor IOCTL number * @__access: Access type. Must be R, W or RW. @@ -1171,6 +1226,8 @@ enum { DRM_IOCTL_PANTHOR(WR, SET_USER_MMIO_OFFSET, set_user_mmio_offset), DRM_IOCTL_PANTHOR_BO_SYNC = DRM_IOCTL_PANTHOR(WR, BO_SYNC, bo_sync), + DRM_IOCTL_PANTHOR_BO_QUERY_INFO = + DRM_IOCTL_PANTHOR(WR, BO_QUERY_INFO, bo_query_info), }; #if defined(__cplusplus) -- cgit v1.2.3 From cd2c9c3015e642e28e1b528c52c06a79f350d600 Mon Sep 17 00:00:00 2001 From: Loïc Molinari Date: Mon, 8 Dec 2025 11:08:33 +0100 Subject: drm/panthor: Add flag to map GEM object Write-Back Cacheable MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Will be used by the UMD to optimize CPU accesses to buffers that are frequently read by the CPU, or on which the access pattern makes non-cacheable mappings inefficient. Mapping buffers CPU-cached implies taking care of the CPU cache maintenance in the UMD, unless the GPU is IO coherent. v2: - Add more to the commit message - Tweak the doc - Make sure we sync the section of the BO pointing to the CS syncobj before we read its seqno v3: - Fix formatting/spelling issues v4: - Add Steve's R-b v5: - Drop Steve's R-b (changes in the ioctl semantics requiring new review) v6: - Fix the uAPI doc - Fix inverted logic in some comment v7: - No changes v8: - Collect R-b Signed-off-by: Loïc Molinari Reviewed-by: Steven Price Link: https://patch.msgid.link/20251208100841.730527-7-boris.brezillon@collabora.com Signed-off-by: Boris Brezillon --- drivers/gpu/drm/panthor/panthor_drv.c | 7 ++++++- drivers/gpu/drm/panthor/panthor_gem.c | 37 +++++++++++++++++++++++++++++++-- drivers/gpu/drm/panthor/panthor_sched.c | 18 ++++++++++++++-- include/uapi/drm/panthor_drm.h | 9 ++++++++ 4 files changed, 66 insertions(+), 5 deletions(-) (limited to 'include/uapi/drm') diff --git a/drivers/gpu/drm/panthor/panthor_drv.c b/drivers/gpu/drm/panthor/panthor_drv.c index 67d694d00ccb..598c7ad6f2b6 100644 --- a/drivers/gpu/drm/panthor/panthor_drv.c +++ b/drivers/gpu/drm/panthor/panthor_drv.c @@ -902,7 +902,8 @@ static int panthor_ioctl_vm_destroy(struct drm_device *ddev, void *data, return panthor_vm_pool_destroy_vm(pfile->vms, args->id); } -#define PANTHOR_BO_FLAGS DRM_PANTHOR_BO_NO_MMAP +#define PANTHOR_BO_FLAGS (DRM_PANTHOR_BO_NO_MMAP | \ + DRM_PANTHOR_BO_WB_MMAP) static int panthor_ioctl_bo_create(struct drm_device *ddev, void *data, struct drm_file *file) @@ -921,6 +922,10 @@ static int panthor_ioctl_bo_create(struct drm_device *ddev, void *data, goto out_dev_exit; } + if ((args->flags & DRM_PANTHOR_BO_NO_MMAP) && + (args->flags & DRM_PANTHOR_BO_WB_MMAP)) + return -EINVAL; + if (args->exclusive_vm_id) { vm = panthor_vm_pool_get_vm(pfile->vms, args->exclusive_vm_id); if (!vm) { diff --git a/drivers/gpu/drm/panthor/panthor_gem.c b/drivers/gpu/drm/panthor/panthor_gem.c index 69ee30603e0a..360d05abe891 100644 --- a/drivers/gpu/drm/panthor/panthor_gem.c +++ b/drivers/gpu/drm/panthor/panthor_gem.c @@ -77,6 +77,39 @@ static void panthor_gem_debugfs_set_usage_flags(struct panthor_gem_object *bo, u static void panthor_gem_debugfs_bo_init(struct panthor_gem_object *bo) {} #endif +static bool +should_map_wc(struct panthor_gem_object *bo, struct panthor_vm *exclusive_vm) +{ + struct panthor_device *ptdev = container_of(bo->base.base.dev, struct panthor_device, base); + + /* We can't do uncached mappings if the device is coherent, + * because the zeroing done by the shmem layer at page allocation + * time happens on a cached mapping which isn't CPU-flushed (at least + * not on Arm64 where the flush is deferred to PTE setup time, and + * only done conditionally based on the mapping permissions). We can't + * rely on dma_map_sgtable()/dma_sync_sgtable_for_xxx() either to flush + * those, because they are NOPed if dma_dev_coherent() returns true. + * + * FIXME: Note that this problem is going to pop up again when we + * decide to support mapping buffers with the NO_MMAP flag as + * non-shareable (AKA buffers accessed only by the GPU), because we + * need the same CPU flush to happen after page allocation, otherwise + * there's a risk of data leak or late corruption caused by a dirty + * cacheline being evicted. At this point we'll need a way to force + * CPU cache maintenance regardless of whether the device is coherent + * or not. + */ + if (ptdev->coherent) + return false; + + /* Cached mappings are explicitly requested, so no write-combine. */ + if (bo->flags & DRM_PANTHOR_BO_WB_MMAP) + return false; + + /* The default is write-combine. */ + return true; +} + static void panthor_gem_free_object(struct drm_gem_object *obj) { struct panthor_gem_object *bo = to_panthor_bo(obj); @@ -163,6 +196,7 @@ panthor_kernel_bo_create(struct panthor_device *ptdev, struct panthor_vm *vm, bo = to_panthor_bo(&obj->base); kbo->obj = &obj->base; bo->flags = bo_flags; + bo->base.map_wc = should_map_wc(bo, vm); bo->exclusive_vm_root_gem = panthor_vm_root_gem(vm); drm_gem_object_get(bo->exclusive_vm_root_gem); bo->base.base.resv = bo->exclusive_vm_root_gem->resv; @@ -363,7 +397,6 @@ static const struct drm_gem_object_funcs panthor_gem_funcs = { */ struct drm_gem_object *panthor_gem_create_object(struct drm_device *ddev, size_t size) { - struct panthor_device *ptdev = container_of(ddev, struct panthor_device, base); struct panthor_gem_object *obj; obj = kzalloc(sizeof(*obj), GFP_KERNEL); @@ -371,7 +404,6 @@ struct drm_gem_object *panthor_gem_create_object(struct drm_device *ddev, size_t return ERR_PTR(-ENOMEM); obj->base.base.funcs = &panthor_gem_funcs; - obj->base.map_wc = !ptdev->coherent; mutex_init(&obj->label.lock); panthor_gem_debugfs_bo_init(obj); @@ -406,6 +438,7 @@ panthor_gem_create_with_handle(struct drm_file *file, bo = to_panthor_bo(&shmem->base); bo->flags = flags; + bo->base.map_wc = should_map_wc(bo, exclusive_vm); if (exclusive_vm) { bo->exclusive_vm_root_gem = panthor_vm_root_gem(exclusive_vm); diff --git a/drivers/gpu/drm/panthor/panthor_sched.c b/drivers/gpu/drm/panthor/panthor_sched.c index 33b9ef537e35..5abc5744e5ac 100644 --- a/drivers/gpu/drm/panthor/panthor_sched.c +++ b/drivers/gpu/drm/panthor/panthor_sched.c @@ -863,8 +863,11 @@ panthor_queue_get_syncwait_obj(struct panthor_group *group, struct panthor_queue struct iosys_map map; int ret; - if (queue->syncwait.kmap) - return queue->syncwait.kmap + queue->syncwait.offset; + if (queue->syncwait.kmap) { + bo = container_of(queue->syncwait.obj, + struct panthor_gem_object, base.base); + goto out_sync; + } bo = panthor_vm_get_bo_for_va(group->vm, queue->syncwait.gpu_va, @@ -881,6 +884,17 @@ panthor_queue_get_syncwait_obj(struct panthor_group *group, struct panthor_queue if (drm_WARN_ON(&ptdev->base, !queue->syncwait.kmap)) goto err_put_syncwait_obj; +out_sync: + /* Make sure the CPU caches are invalidated before the seqno is read. + * drm_gem_shmem_sync() is a NOP if map_wc=true, so no need to check + * it here. + */ + panthor_gem_sync(&bo->base.base, queue->syncwait.offset, + queue->syncwait.sync64 ? + sizeof(struct panthor_syncobj_64b) : + sizeof(struct panthor_syncobj_32b), + DRM_PANTHOR_BO_SYNC_CPU_CACHE_FLUSH_AND_INVALIDATE); + return queue->syncwait.kmap + queue->syncwait.offset; err_put_syncwait_obj: diff --git a/include/uapi/drm/panthor_drm.h b/include/uapi/drm/panthor_drm.h index 39d5ce815742..e238c6264fa1 100644 --- a/include/uapi/drm/panthor_drm.h +++ b/include/uapi/drm/panthor_drm.h @@ -681,6 +681,15 @@ struct drm_panthor_vm_get_state { enum drm_panthor_bo_flags { /** @DRM_PANTHOR_BO_NO_MMAP: The buffer object will never be CPU-mapped in userspace. */ DRM_PANTHOR_BO_NO_MMAP = (1 << 0), + + /** + * @DRM_PANTHOR_BO_WB_MMAP: Force "Write-Back Cacheable" CPU mapping. + * + * CPU map the buffer object in userspace by forcing the "Write-Back + * Cacheable" cacheability attribute. The mapping otherwise uses the + * "Non-Cacheable" attribute if the GPU is not IO coherent. + */ + DRM_PANTHOR_BO_WB_MMAP = (1 << 1), }; /** -- cgit v1.2.3 From 2396d65d94fc75d39f096b9777f9edc9c8e677c1 Mon Sep 17 00:00:00 2001 From: Boris Brezillon Date: Mon, 8 Dec 2025 11:08:36 +0100 Subject: drm/panfrost: Expose the selected coherency protocol to the UMD Will be needed if we want to skip CPU cache maintenance operations when the GPU can snoop CPU caches. v2: - New commit v3: - Fix the coherency values (enum instead of bitmask) v4: - Fix init/test on coherency_features v5: - No changes v6: - Collect R-b v7: - No changes v8: - No changes Reviewed-by: Steven Price Link: https://patch.msgid.link/20251208100841.730527-10-boris.brezillon@collabora.com Signed-off-by: Boris Brezillon --- drivers/gpu/drm/panfrost/panfrost_device.h | 1 + drivers/gpu/drm/panfrost/panfrost_drv.c | 1 + drivers/gpu/drm/panfrost/panfrost_gpu.c | 26 +++++++++++++++++++++++--- drivers/gpu/drm/panfrost/panfrost_regs.h | 10 ++++++++-- include/uapi/drm/panfrost_drm.h | 7 +++++++ 5 files changed, 40 insertions(+), 5 deletions(-) (limited to 'include/uapi/drm') diff --git a/drivers/gpu/drm/panfrost/panfrost_device.h b/drivers/gpu/drm/panfrost/panfrost_device.h index e61c4329fd07..0f3992412205 100644 --- a/drivers/gpu/drm/panfrost/panfrost_device.h +++ b/drivers/gpu/drm/panfrost/panfrost_device.h @@ -79,6 +79,7 @@ struct panfrost_features { u32 thread_max_workgroup_sz; u32 thread_max_barrier_sz; u32 coherency_features; + u32 selected_coherency; u32 afbc_features; u32 texture_features[4]; u32 js_features[16]; diff --git a/drivers/gpu/drm/panfrost/panfrost_drv.c b/drivers/gpu/drm/panfrost/panfrost_drv.c index 199073cc7d3f..b2c3f6c81be0 100644 --- a/drivers/gpu/drm/panfrost/panfrost_drv.c +++ b/drivers/gpu/drm/panfrost/panfrost_drv.c @@ -95,6 +95,7 @@ static int panfrost_ioctl_get_param(struct drm_device *ddev, void *data, struct PANFROST_FEATURE_ARRAY(JS_FEATURES, js_features, 15); PANFROST_FEATURE(NR_CORE_GROUPS, nr_core_groups); PANFROST_FEATURE(THREAD_TLS_ALLOC, thread_tls_alloc); + PANFROST_FEATURE(SELECTED_COHERENCY, selected_coherency); case DRM_PANFROST_PARAM_SYSTEM_TIMESTAMP: ret = panfrost_ioctl_query_timestamp(pfdev, ¶m->value); diff --git a/drivers/gpu/drm/panfrost/panfrost_gpu.c b/drivers/gpu/drm/panfrost/panfrost_gpu.c index 483d278eb154..7d555e63e21a 100644 --- a/drivers/gpu/drm/panfrost/panfrost_gpu.c +++ b/drivers/gpu/drm/panfrost/panfrost_gpu.c @@ -159,8 +159,8 @@ static void panfrost_gpu_init_quirks(struct panfrost_device *pfdev) pfdev->features.revision >= 0x2000) quirks |= JM_MAX_JOB_THROTTLE_LIMIT << JM_JOB_THROTTLE_LIMIT_SHIFT; else if (panfrost_model_eq(pfdev, 0x6000) && - pfdev->features.coherency_features == COHERENCY_ACE) - quirks |= (COHERENCY_ACE_LITE | COHERENCY_ACE) << + pfdev->features.coherency_features == BIT(COHERENCY_ACE)) + quirks |= (BIT(COHERENCY_ACE_LITE) | BIT(COHERENCY_ACE)) << JM_FORCE_COHERENCY_FEATURES_SHIFT; if (panfrost_has_hw_feature(pfdev, HW_FEATURE_IDVS_GROUP_SIZE)) @@ -263,7 +263,27 @@ static int panfrost_gpu_init_features(struct panfrost_device *pfdev) pfdev->features.max_threads = gpu_read(pfdev, GPU_THREAD_MAX_THREADS); pfdev->features.thread_max_workgroup_sz = gpu_read(pfdev, GPU_THREAD_MAX_WORKGROUP_SIZE); pfdev->features.thread_max_barrier_sz = gpu_read(pfdev, GPU_THREAD_MAX_BARRIER_SIZE); - pfdev->features.coherency_features = gpu_read(pfdev, GPU_COHERENCY_FEATURES); + + if (panfrost_has_hw_feature(pfdev, HW_FEATURE_COHERENCY_REG)) + pfdev->features.coherency_features = gpu_read(pfdev, GPU_COHERENCY_FEATURES); + else + pfdev->features.coherency_features = BIT(COHERENCY_ACE_LITE); + + BUILD_BUG_ON(COHERENCY_ACE_LITE != DRM_PANFROST_GPU_COHERENCY_ACE_LITE); + BUILD_BUG_ON(COHERENCY_ACE != DRM_PANFROST_GPU_COHERENCY_ACE); + BUILD_BUG_ON(COHERENCY_NONE != DRM_PANFROST_GPU_COHERENCY_NONE); + + if (!pfdev->coherent) { + pfdev->features.selected_coherency = COHERENCY_NONE; + } else if (pfdev->features.coherency_features & BIT(COHERENCY_ACE)) { + pfdev->features.selected_coherency = COHERENCY_ACE; + } else if (pfdev->features.coherency_features & BIT(COHERENCY_ACE_LITE)) { + pfdev->features.selected_coherency = COHERENCY_ACE_LITE; + } else { + drm_WARN(&pfdev->base, true, "No known coherency protocol supported"); + pfdev->features.selected_coherency = COHERENCY_NONE; + } + pfdev->features.afbc_features = gpu_read(pfdev, GPU_AFBC_FEATURES); for (i = 0; i < 4; i++) pfdev->features.texture_features[i] = gpu_read(pfdev, GPU_TEXTURE_FEATURES(i)); diff --git a/drivers/gpu/drm/panfrost/panfrost_regs.h b/drivers/gpu/drm/panfrost/panfrost_regs.h index 2b8f1617b836..ee15f6bf6e6f 100644 --- a/drivers/gpu/drm/panfrost/panfrost_regs.h +++ b/drivers/gpu/drm/panfrost/panfrost_regs.h @@ -102,9 +102,15 @@ #define GPU_L2_PRESENT_LO 0x120 /* (RO) Level 2 cache present bitmap, low word */ #define GPU_L2_PRESENT_HI 0x124 /* (RO) Level 2 cache present bitmap, high word */ +/* GPU_COHERENCY_FEATURES is a bitmask of BIT(COHERENCY_xxx) values encoding the + * set of supported coherency protocols. GPU_COHERENCY_ENABLE is passed a + * COHERENCY_xxx value. + */ #define GPU_COHERENCY_FEATURES 0x300 /* (RO) Coherency features present */ -#define COHERENCY_ACE_LITE BIT(0) -#define COHERENCY_ACE BIT(1) +#define GPU_COHERENCY_ENABLE 0x304 /* (RW) Coherency protocol selection */ +#define COHERENCY_ACE_LITE 0 +#define COHERENCY_ACE 1 +#define COHERENCY_NONE 31 #define GPU_STACK_PRESENT_LO 0xE00 /* (RO) Core stack present bitmap, low word */ #define GPU_STACK_PRESENT_HI 0xE04 /* (RO) Core stack present bitmap, high word */ diff --git a/include/uapi/drm/panfrost_drm.h b/include/uapi/drm/panfrost_drm.h index 1956431bb391..0c59714ae42b 100644 --- a/include/uapi/drm/panfrost_drm.h +++ b/include/uapi/drm/panfrost_drm.h @@ -228,6 +228,13 @@ enum drm_panfrost_param { DRM_PANFROST_PARAM_SYSTEM_TIMESTAMP, DRM_PANFROST_PARAM_SYSTEM_TIMESTAMP_FREQUENCY, DRM_PANFROST_PARAM_ALLOWED_JM_CTX_PRIORITIES, + DRM_PANFROST_PARAM_SELECTED_COHERENCY, +}; + +enum drm_panfrost_gpu_coherency { + DRM_PANFROST_GPU_COHERENCY_ACE_LITE = 0, + DRM_PANFROST_GPU_COHERENCY_ACE = 1, + DRM_PANFROST_GPU_COHERENCY_NONE = 31, }; struct drm_panfrost_get_param { -- cgit v1.2.3 From 7be45f5489769520aa9276137d0f1f543fb81286 Mon Sep 17 00:00:00 2001 From: Faith Ekstrand Date: Mon, 8 Dec 2025 11:08:37 +0100 Subject: drm/panfrost: Add a PANFROST_SYNC_BO ioctl This will be used by the UMD to synchronize CPU-cached mappings when the UMD can't do it directly (no usermode cache maintenance instruction on Arm32). v2: - Add more to the commit message - Change the flags to better match the drm_gem_shmem_sync semantics v3: - Add Steve's R-b v4: - No changes v5: - Drop Steve's R-b (semantics changes requiring a new review) v6: - Bail out early in panfrost_ioctl_sync_bo() if op_count is zero v7: - Hand-roll our own bo_sync() helper v8: - Collect R-b Signed-off-by: Faith Ekstrand Reviewed-by: Steven Price Link: https://patch.msgid.link/20251208100841.730527-11-boris.brezillon@collabora.com Signed-off-by: Boris Brezillon --- drivers/gpu/drm/panfrost/panfrost_drv.c | 51 ++++++++++++++++++++ drivers/gpu/drm/panfrost/panfrost_gem.c | 84 +++++++++++++++++++++++++++++++++ drivers/gpu/drm/panfrost/panfrost_gem.h | 2 + include/uapi/drm/panfrost_drm.h | 45 ++++++++++++++++++ 4 files changed, 182 insertions(+) (limited to 'include/uapi/drm') diff --git a/drivers/gpu/drm/panfrost/panfrost_drv.c b/drivers/gpu/drm/panfrost/panfrost_drv.c index b2c3f6c81be0..450204fdbe45 100644 --- a/drivers/gpu/drm/panfrost/panfrost_drv.c +++ b/drivers/gpu/drm/panfrost/panfrost_drv.c @@ -580,6 +580,56 @@ static int panfrost_ioctl_jm_ctx_destroy(struct drm_device *dev, void *data, return panfrost_jm_ctx_destroy(file, args->handle); } +static int panfrost_ioctl_sync_bo(struct drm_device *ddev, void *data, + struct drm_file *file) +{ + struct drm_panfrost_sync_bo *args = data; + struct drm_panfrost_bo_sync_op *ops; + struct drm_gem_object *obj; + int ret; + u32 i; + + if (args->pad) + return -EINVAL; + + if (!args->op_count) + return 0; + + ops = kvmalloc_array(args->op_count, sizeof(*ops), GFP_KERNEL); + if (!ops) { + DRM_DEBUG("Failed to allocate incoming BO sync ops array\n"); + return -ENOMEM; + } + + if (copy_from_user(ops, (void __user *)(uintptr_t)args->ops, + args->op_count * sizeof(*ops))) { + DRM_DEBUG("Failed to copy in BO sync ops\n"); + ret = -EFAULT; + goto err_ops; + } + + for (i = 0; i < args->op_count; i++) { + obj = drm_gem_object_lookup(file, ops[i].handle); + if (!obj) { + ret = -ENOENT; + goto err_ops; + } + + ret = panfrost_gem_sync(obj, ops[i].type, + ops[i].offset, ops[i].size); + + drm_gem_object_put(obj); + + if (ret) + goto err_ops; + } + +err_ops: + kvfree(ops); + + return ret; +} + int panfrost_unstable_ioctl_check(void) { if (!unstable_ioctls) @@ -649,6 +699,7 @@ static const struct drm_ioctl_desc panfrost_drm_driver_ioctls[] = { PANFROST_IOCTL(SET_LABEL_BO, set_label_bo, DRM_RENDER_ALLOW), PANFROST_IOCTL(JM_CTX_CREATE, jm_ctx_create, DRM_RENDER_ALLOW), PANFROST_IOCTL(JM_CTX_DESTROY, jm_ctx_destroy, DRM_RENDER_ALLOW), + PANFROST_IOCTL(SYNC_BO, sync_bo, DRM_RENDER_ALLOW), }; static void panfrost_gpu_show_fdinfo(struct panfrost_device *pfdev, diff --git a/drivers/gpu/drm/panfrost/panfrost_gem.c b/drivers/gpu/drm/panfrost/panfrost_gem.c index 02721863b6ae..62c9e3a6b0e9 100644 --- a/drivers/gpu/drm/panfrost/panfrost_gem.c +++ b/drivers/gpu/drm/panfrost/panfrost_gem.c @@ -507,6 +507,90 @@ panfrost_gem_set_label(struct drm_gem_object *obj, const char *label) kfree_const(old_label); } +int +panfrost_gem_sync(struct drm_gem_object *obj, u32 type, u32 offset, u32 size) +{ + struct panfrost_gem_object *bo = to_panfrost_bo(obj); + struct drm_gem_shmem_object *shmem = &bo->base; + const struct drm_device *dev = shmem->base.dev; + struct sg_table *sgt; + struct scatterlist *sgl; + unsigned int count; + + /* Make sure the range is in bounds. */ + if (offset + size < offset || offset + size > shmem->base.size) + return -EINVAL; + + /* Disallow CPU-cache maintenance on imported buffers. */ + if (drm_gem_is_imported(&shmem->base)) + return -EINVAL; + + switch (type) { + case PANFROST_BO_SYNC_CPU_CACHE_FLUSH: + case PANFROST_BO_SYNC_CPU_CACHE_FLUSH_AND_INVALIDATE: + break; + + default: + return -EINVAL; + } + + /* Don't bother if it's WC-mapped */ + if (shmem->map_wc) + return 0; + + /* Nothing to do if the size is zero. */ + if (size == 0) + return 0; + + sgt = drm_gem_shmem_get_pages_sgt(shmem); + if (IS_ERR(sgt)) + return PTR_ERR(sgt); + + for_each_sgtable_dma_sg(sgt, sgl, count) { + if (size == 0) + break; + + dma_addr_t paddr = sg_dma_address(sgl); + size_t len = sg_dma_len(sgl); + + if (len <= offset) { + offset -= len; + continue; + } + + paddr += offset; + len -= offset; + len = min_t(size_t, len, size); + size -= len; + offset = 0; + + /* It's unclear whether dma_sync_xxx() is the right API to do CPU + * cache maintenance given an IOMMU can register their own + * implementation doing more than just CPU cache flushes/invalidation, + * and what we really care about here is CPU caches only, but that's + * the best we have that is both arch-agnostic and does at least the + * CPU cache maintenance on a tuple. + * + * Also, I wish we could do a single + * + * dma_sync_single_for_device(BIDIR) + * + * and get a flush+invalidate, but that's not how it's implemented + * in practice (at least on arm64), so we have to make it + * + * dma_sync_single_for_device(TO_DEVICE) + * dma_sync_single_for_cpu(FROM_DEVICE) + * + * for the flush+invalidate case. + */ + dma_sync_single_for_device(dev->dev, paddr, len, DMA_TO_DEVICE); + if (type == PANFROST_BO_SYNC_CPU_CACHE_FLUSH_AND_INVALIDATE) + dma_sync_single_for_cpu(dev->dev, paddr, len, DMA_FROM_DEVICE); + } + + return 0; +} + void panfrost_gem_internal_set_label(struct drm_gem_object *obj, const char *label) { diff --git a/drivers/gpu/drm/panfrost/panfrost_gem.h b/drivers/gpu/drm/panfrost/panfrost_gem.h index c2470e8255ab..45e2aa846cc7 100644 --- a/drivers/gpu/drm/panfrost/panfrost_gem.h +++ b/drivers/gpu/drm/panfrost/panfrost_gem.h @@ -153,6 +153,8 @@ int panfrost_gem_shrinker_init(struct drm_device *dev); void panfrost_gem_shrinker_cleanup(struct drm_device *dev); void panfrost_gem_set_label(struct drm_gem_object *obj, const char *label); +int panfrost_gem_sync(struct drm_gem_object *obj, u32 type, + u32 offset, u32 size); void panfrost_gem_internal_set_label(struct drm_gem_object *obj, const char *label); #ifdef CONFIG_DEBUG_FS diff --git a/include/uapi/drm/panfrost_drm.h b/include/uapi/drm/panfrost_drm.h index 0c59714ae42b..e194e087a0c8 100644 --- a/include/uapi/drm/panfrost_drm.h +++ b/include/uapi/drm/panfrost_drm.h @@ -24,6 +24,7 @@ extern "C" { #define DRM_PANFROST_SET_LABEL_BO 0x09 #define DRM_PANFROST_JM_CTX_CREATE 0x0a #define DRM_PANFROST_JM_CTX_DESTROY 0x0b +#define DRM_PANFROST_SYNC_BO 0x0c #define DRM_IOCTL_PANFROST_SUBMIT DRM_IOW(DRM_COMMAND_BASE + DRM_PANFROST_SUBMIT, struct drm_panfrost_submit) #define DRM_IOCTL_PANFROST_WAIT_BO DRM_IOW(DRM_COMMAND_BASE + DRM_PANFROST_WAIT_BO, struct drm_panfrost_wait_bo) @@ -35,6 +36,7 @@ extern "C" { #define DRM_IOCTL_PANFROST_SET_LABEL_BO DRM_IOWR(DRM_COMMAND_BASE + DRM_PANFROST_SET_LABEL_BO, struct drm_panfrost_set_label_bo) #define DRM_IOCTL_PANFROST_JM_CTX_CREATE DRM_IOWR(DRM_COMMAND_BASE + DRM_PANFROST_JM_CTX_CREATE, struct drm_panfrost_jm_ctx_create) #define DRM_IOCTL_PANFROST_JM_CTX_DESTROY DRM_IOWR(DRM_COMMAND_BASE + DRM_PANFROST_JM_CTX_DESTROY, struct drm_panfrost_jm_ctx_destroy) +#define DRM_IOCTL_PANFROST_SYNC_BO DRM_IOWR(DRM_COMMAND_BASE + DRM_PANFROST_SYNC_BO, struct drm_panfrost_sync_bo) /* * Unstable ioctl(s): only exposed when the unsafe unstable_ioctls module @@ -308,6 +310,49 @@ struct drm_panfrost_set_label_bo { __u64 label; }; +/* Valid flags to pass to drm_panfrost_bo_sync_op */ +#define PANFROST_BO_SYNC_CPU_CACHE_FLUSH 0 +#define PANFROST_BO_SYNC_CPU_CACHE_FLUSH_AND_INVALIDATE 1 + +/** + * struct drm_panthor_bo_flush_map_op - BO map sync op + */ +struct drm_panfrost_bo_sync_op { + /** @handle: Handle of the buffer object to sync. */ + __u32 handle; + + /** @type: Type of sync operation. */ + __u32 type; + + /** + * @offset: Offset into the BO at which the sync range starts. + * + * This will be rounded down to the nearest cache line as needed. + */ + __u32 offset; + + /** + * @size: Size of the range to sync + * + * @size + @offset will be rounded up to the nearest cache line as + * needed. + */ + __u32 size; +}; + +/** + * struct drm_panfrost_sync_bo - ioctl argument for syncing BO maps + */ +struct drm_panfrost_sync_bo { + /** Array of struct drm_panfrost_bo_sync_op */ + __u64 ops; + + /** Number of BO sync ops */ + __u32 op_count; + + __u32 pad; +}; + /* Definitions for coredump decoding in user space */ #define PANFROSTDUMP_MAJOR 1 #define PANFROSTDUMP_MINOR 0 -- cgit v1.2.3 From d17592e61fa8e3b2d58df7c4a24abc8ac58b8d3f Mon Sep 17 00:00:00 2001 From: Boris Brezillon Date: Mon, 8 Dec 2025 11:08:38 +0100 Subject: drm/panfrost: Add an ioctl to query BO flags This is useful when importing BOs, so we can know about cacheability and flush the caches when needed. v2: - New commit v3: - Add Steve's R-b v4: - No changes v5: - No changes v6: - No changes v7: - No changes v8: - No changes Reviewed-by: Steven Price Link: https://patch.msgid.link/20251208100841.730527-12-boris.brezillon@collabora.com Signed-off-by: Boris Brezillon --- drivers/gpu/drm/panfrost/panfrost_drv.c | 33 +++++++++++++++++++++++++++++++++ include/uapi/drm/panfrost_drm.h | 19 +++++++++++++++++++ 2 files changed, 52 insertions(+) (limited to 'include/uapi/drm') diff --git a/drivers/gpu/drm/panfrost/panfrost_drv.c b/drivers/gpu/drm/panfrost/panfrost_drv.c index 450204fdbe45..d461ecf8829d 100644 --- a/drivers/gpu/drm/panfrost/panfrost_drv.c +++ b/drivers/gpu/drm/panfrost/panfrost_drv.c @@ -630,6 +630,38 @@ err_ops: return ret; } +static int panfrost_ioctl_query_bo_info(struct drm_device *dev, void *data, + struct drm_file *file_priv) +{ + struct drm_panfrost_query_bo_info *args = data; + struct drm_gem_object *gem_obj; + struct panfrost_gem_object *bo; + + gem_obj = drm_gem_object_lookup(file_priv, args->handle); + if (!gem_obj) { + DRM_DEBUG("Failed to look up GEM BO %d\n", args->handle); + return -ENOENT; + } + + bo = to_panfrost_bo(gem_obj); + args->pad = 0; + args->create_flags = 0; + args->extra_flags = 0; + + if (drm_gem_is_imported(gem_obj)) { + args->extra_flags |= DRM_PANFROST_BO_IS_IMPORTED; + } else { + if (bo->noexec) + args->create_flags |= PANFROST_BO_NOEXEC; + + if (bo->is_heap) + args->create_flags |= PANFROST_BO_HEAP; + } + + drm_gem_object_put(gem_obj); + return 0; +} + int panfrost_unstable_ioctl_check(void) { if (!unstable_ioctls) @@ -700,6 +732,7 @@ static const struct drm_ioctl_desc panfrost_drm_driver_ioctls[] = { PANFROST_IOCTL(JM_CTX_CREATE, jm_ctx_create, DRM_RENDER_ALLOW), PANFROST_IOCTL(JM_CTX_DESTROY, jm_ctx_destroy, DRM_RENDER_ALLOW), PANFROST_IOCTL(SYNC_BO, sync_bo, DRM_RENDER_ALLOW), + PANFROST_IOCTL(QUERY_BO_INFO, query_bo_info, DRM_RENDER_ALLOW), }; static void panfrost_gpu_show_fdinfo(struct panfrost_device *pfdev, diff --git a/include/uapi/drm/panfrost_drm.h b/include/uapi/drm/panfrost_drm.h index e194e087a0c8..36ae48ea50d3 100644 --- a/include/uapi/drm/panfrost_drm.h +++ b/include/uapi/drm/panfrost_drm.h @@ -25,6 +25,7 @@ extern "C" { #define DRM_PANFROST_JM_CTX_CREATE 0x0a #define DRM_PANFROST_JM_CTX_DESTROY 0x0b #define DRM_PANFROST_SYNC_BO 0x0c +#define DRM_PANFROST_QUERY_BO_INFO 0x0d #define DRM_IOCTL_PANFROST_SUBMIT DRM_IOW(DRM_COMMAND_BASE + DRM_PANFROST_SUBMIT, struct drm_panfrost_submit) #define DRM_IOCTL_PANFROST_WAIT_BO DRM_IOW(DRM_COMMAND_BASE + DRM_PANFROST_WAIT_BO, struct drm_panfrost_wait_bo) @@ -37,6 +38,7 @@ extern "C" { #define DRM_IOCTL_PANFROST_JM_CTX_CREATE DRM_IOWR(DRM_COMMAND_BASE + DRM_PANFROST_JM_CTX_CREATE, struct drm_panfrost_jm_ctx_create) #define DRM_IOCTL_PANFROST_JM_CTX_DESTROY DRM_IOWR(DRM_COMMAND_BASE + DRM_PANFROST_JM_CTX_DESTROY, struct drm_panfrost_jm_ctx_destroy) #define DRM_IOCTL_PANFROST_SYNC_BO DRM_IOWR(DRM_COMMAND_BASE + DRM_PANFROST_SYNC_BO, struct drm_panfrost_sync_bo) +#define DRM_IOCTL_PANFROST_QUERY_BO_INFO DRM_IOWR(DRM_COMMAND_BASE + DRM_PANFROST_QUERY_BO_INFO, struct drm_panfrost_query_bo_info) /* * Unstable ioctl(s): only exposed when the unsafe unstable_ioctls module @@ -353,6 +355,23 @@ struct drm_panfrost_sync_bo { __u32 pad; }; +/** BO comes from a different subsystem. */ +#define DRM_PANFROST_BO_IS_IMPORTED (1 << 0) + +struct drm_panfrost_query_bo_info { + /** Handle of the object being queried. */ + __u32 handle; + + /** Extra flags that are not coming from the BO_CREATE ioctl(). */ + __u32 extra_flags; + + /** Flags passed at creation time. */ + __u32 create_flags; + + /** Will be zero on return. */ + __u32 pad; +}; + /* Definitions for coredump decoding in user space */ #define PANFROSTDUMP_MAJOR 1 #define PANFROSTDUMP_MINOR 0 -- cgit v1.2.3 From 62eedf1ccba534b318ca85d3890bf0951b9e0f87 Mon Sep 17 00:00:00 2001 From: Faith Ekstrand Date: Mon, 8 Dec 2025 11:08:39 +0100 Subject: drm/panfrost: Add flag to map GEM object Write-Back Cacheable Will be used by the UMD to optimize CPU accesses to buffers that are frequently read by the CPU, or on which the access pattern makes non-cacheable mappings inefficient. Mapping buffers CPU-cached implies taking care of the CPU cache maintenance in the UMD, unless the GPU is IO coherent. v2: - Add more to the commit message v3: - No changes v4: - Fix the map_wc test in panfrost_ioctl_query_bo_info() v5: - Drop Steve's R-b (enough has changed to justify a new review) v6: - Collect R-b v7: - No changes v8: - Fix double drm_gem_object_funcs::export assignment Signed-off-by: Faith Ekstrand Reviewed-by: Steven Price Link: https://patch.msgid.link/20251208100841.730527-13-boris.brezillon@collabora.com Signed-off-by: Boris Brezillon --- drivers/gpu/drm/panfrost/panfrost_drv.c | 10 ++++++++-- drivers/gpu/drm/panfrost/panfrost_gem.c | 32 ++++++++++++++++++++++++++++++++ drivers/gpu/drm/panfrost/panfrost_gem.h | 5 +++++ include/uapi/drm/panfrost_drm.h | 5 ++++- 4 files changed, 49 insertions(+), 3 deletions(-) (limited to 'include/uapi/drm') diff --git a/drivers/gpu/drm/panfrost/panfrost_drv.c b/drivers/gpu/drm/panfrost/panfrost_drv.c index d461ecf8829d..34969179544c 100644 --- a/drivers/gpu/drm/panfrost/panfrost_drv.c +++ b/drivers/gpu/drm/panfrost/panfrost_drv.c @@ -126,6 +126,10 @@ static int panfrost_ioctl_get_param(struct drm_device *ddev, void *data, struct return 0; } +#define PANFROST_BO_FLAGS (PANFROST_BO_NOEXEC | \ + PANFROST_BO_HEAP | \ + PANFROST_BO_WB_MMAP) + static int panfrost_ioctl_create_bo(struct drm_device *dev, void *data, struct drm_file *file) { @@ -135,8 +139,7 @@ static int panfrost_ioctl_create_bo(struct drm_device *dev, void *data, struct panfrost_gem_mapping *mapping; int ret; - if (!args->size || args->pad || - (args->flags & ~(PANFROST_BO_NOEXEC | PANFROST_BO_HEAP))) + if (!args->size || args->pad || (args->flags & ~PANFROST_BO_FLAGS)) return -EINVAL; /* Heaps should never be executable */ @@ -656,6 +659,9 @@ static int panfrost_ioctl_query_bo_info(struct drm_device *dev, void *data, if (bo->is_heap) args->create_flags |= PANFROST_BO_HEAP; + + if (!bo->base.map_wc) + args->create_flags |= PANFROST_BO_WB_MMAP; } drm_gem_object_put(gem_obj); diff --git a/drivers/gpu/drm/panfrost/panfrost_gem.c b/drivers/gpu/drm/panfrost/panfrost_gem.c index 62c9e3a6b0e9..44985b515212 100644 --- a/drivers/gpu/drm/panfrost/panfrost_gem.c +++ b/drivers/gpu/drm/panfrost/panfrost_gem.c @@ -444,12 +444,42 @@ struct drm_gem_object *panfrost_gem_create_object(struct drm_device *dev, size_t return &obj->base.base; } +static bool +should_map_wc(struct panfrost_gem_object *bo) +{ + struct panfrost_device *pfdev = to_panfrost_device(bo->base.base.dev); + + /* We can't do uncached mappings if the device is coherent, + * because the zeroing done by the shmem layer at page allocation + * time happens on a cached mapping which isn't CPU-flushed (at least + * not on Arm64 where the flush is deferred to PTE setup time, and + * only done conditionally based on the mapping permissions). We can't + * rely on dma_map_sgtable()/dma_sync_sgtable_for_xxx() either to flush + * those, because they are NOPed if dma_dev_coherent() returns true. + */ + if (pfdev->coherent) + return false; + + /* Cached mappings are explicitly requested, so no write-combine. */ + if (bo->wb_mmap) + return false; + + /* The default is write-combine. */ + return true; +} + struct panfrost_gem_object * panfrost_gem_create(struct drm_device *dev, size_t size, u32 flags) { struct drm_gem_shmem_object *shmem; struct panfrost_gem_object *bo; + /* The heap buffer is not supposed to be CPU-visible, so don't allow + * WB_MMAP on those. + */ + if ((flags & PANFROST_BO_HEAP) && (flags & PANFROST_BO_WB_MMAP)) + return ERR_PTR(-EINVAL); + /* Round up heap allocations to 2MB to keep fault handling simple */ if (flags & PANFROST_BO_HEAP) size = roundup(size, SZ_2M); @@ -461,6 +491,8 @@ panfrost_gem_create(struct drm_device *dev, size_t size, u32 flags) bo = to_panfrost_bo(&shmem->base); bo->noexec = !!(flags & PANFROST_BO_NOEXEC); bo->is_heap = !!(flags & PANFROST_BO_HEAP); + bo->wb_mmap = !!(flags & PANFROST_BO_WB_MMAP); + bo->base.map_wc = should_map_wc(bo); return bo; } diff --git a/drivers/gpu/drm/panfrost/panfrost_gem.h b/drivers/gpu/drm/panfrost/panfrost_gem.h index 45e2aa846cc7..79d4377019e9 100644 --- a/drivers/gpu/drm/panfrost/panfrost_gem.h +++ b/drivers/gpu/drm/panfrost/panfrost_gem.h @@ -98,6 +98,11 @@ struct panfrost_gem_object { bool noexec :1; bool is_heap :1; + /* On coherent devices, this reflects the creation flags, not the true + * cacheability attribute of the mapping. + */ + bool wb_mmap :1; + #ifdef CONFIG_DEBUG_FS struct panfrost_gem_debugfs debugfs; #endif diff --git a/include/uapi/drm/panfrost_drm.h b/include/uapi/drm/panfrost_drm.h index 36ae48ea50d3..50d5337f35ef 100644 --- a/include/uapi/drm/panfrost_drm.h +++ b/include/uapi/drm/panfrost_drm.h @@ -124,9 +124,12 @@ struct drm_panfrost_wait_bo { __s64 timeout_ns; }; -/* Valid flags to pass to drm_panfrost_create_bo */ +/* Valid flags to pass to drm_panfrost_create_bo. + * PANFROST_BO_WB_MMAP can't be set if PANFROST_BO_HEAP is. + */ #define PANFROST_BO_NOEXEC 1 #define PANFROST_BO_HEAP 2 +#define PANFROST_BO_WB_MMAP 4 /** * struct drm_panfrost_create_bo - ioctl argument for creating Panfrost BOs. -- cgit v1.2.3 From d9ec63474648a258094704ce223c9249fa7bb279 Mon Sep 17 00:00:00 2001 From: Niranjana Vishwanathapura Date: Wed, 10 Dec 2025 17:02:50 -0800 Subject: drm/xe/multi_queue: Add user interface for multi queue support Multi Queue is a new mode of execution supported by the compute and blitter copy command streamers (CCS and BCS, respectively). It is an enhancement of the existing hardware architecture and leverages the same submission model. It enables support for efficient, parallel execution of multiple queues within a single context. All the queues of a group must use the same address space (VM). The new DRM_XE_EXEC_QUEUE_SET_PROPERTY_MULTI_QUEUE execution queue property supports creating a multi queue group and adding queues to a queue group. All queues of a multi queue group share the same context. A exec queue create ioctl call with above property specified with value DRM_XE_SUPER_GROUP_CREATE will create a new multi queue group with the queue being created as the primary queue (aka q0) of the group. To add secondary queues to the group, they need to be created with the above property with id of the primary queue as the value. The properties of the primary queue (like priority, timeslice) applies to the whole group. So, these properties can't be set for secondary queues of a group. Once destroyed, the secondary queues of a multi queue group can't be replaced. However, they can be dynamically added to the group up to a total of 64 queues per group. Once the primary queue is destroyed, secondary queues can't be added to the queue group. v2: Remove group->lock, fix xe_exec_queue_group_add()/delete() function semantics, add additional comments, remove unused group->list_lock, add XE_BO_FLAG_GGTT_INVALIDATE for cgp bo, Assert LRC is valid, update uapi kernel doc. (Matt Brost) v3: Use XE_BO_FLAG_PINNED_LATE_RESTORE/USER_VRAM/GGTT_INVALIDATE flags for cgp bo (Matt) v4: Ensure queue is not a vm_bind queue uapi change due to rebase Signed-off-by: Stuart Summers Signed-off-by: Niranjana Vishwanathapura Reviewed-by: Matthew Brost Link: https://patch.msgid.link/20251211010249.1647839-21-niranjana.vishwanathapura@intel.com --- drivers/gpu/drm/xe/xe_exec_queue.c | 197 ++++++++++++++++++++++++++++++- drivers/gpu/drm/xe/xe_exec_queue.h | 47 ++++++++ drivers/gpu/drm/xe/xe_exec_queue_types.h | 26 ++++ include/uapi/drm/xe_drm.h | 10 ++ 4 files changed, 278 insertions(+), 2 deletions(-) (limited to 'include/uapi/drm') diff --git a/drivers/gpu/drm/xe/xe_exec_queue.c b/drivers/gpu/drm/xe/xe_exec_queue.c index 02b75652d497..f76ec277c5af 100644 --- a/drivers/gpu/drm/xe/xe_exec_queue.c +++ b/drivers/gpu/drm/xe/xe_exec_queue.c @@ -13,6 +13,7 @@ #include #include +#include "xe_bo.h" #include "xe_dep_scheduler.h" #include "xe_device.h" #include "xe_gt.h" @@ -63,6 +64,33 @@ enum xe_exec_queue_sched_prop { static int exec_queue_user_extensions(struct xe_device *xe, struct xe_exec_queue *q, u64 extensions, int ext_number); +static void xe_exec_queue_group_cleanup(struct xe_exec_queue *q) +{ + struct xe_exec_queue_group *group = q->multi_queue.group; + struct xe_lrc *lrc; + unsigned long idx; + + if (xe_exec_queue_is_multi_queue_secondary(q)) { + /* + * Put pairs with get from xe_exec_queue_lookup() call + * in xe_exec_queue_group_validate(). + */ + xe_exec_queue_put(xe_exec_queue_multi_queue_primary(q)); + return; + } + + if (!group) + return; + + /* Primary queue cleanup */ + xa_for_each(&group->xa, idx, lrc) + xe_lrc_put(lrc); + + xa_destroy(&group->xa); + xe_bo_unpin_map_no_vm(group->cgp_bo); + kfree(group); +} + static void __xe_exec_queue_free(struct xe_exec_queue *q) { int i; @@ -73,6 +101,10 @@ static void __xe_exec_queue_free(struct xe_exec_queue *q) if (xe_exec_queue_uses_pxp(q)) xe_pxp_exec_queue_remove(gt_to_xe(q->gt)->pxp, q); + + if (xe_exec_queue_is_multi_queue(q)) + xe_exec_queue_group_cleanup(q); + if (q->vm) xe_vm_put(q->vm); @@ -588,6 +620,150 @@ static int exec_queue_set_hang_replay_state(struct xe_device *xe, return 0; } +static int xe_exec_queue_group_init(struct xe_device *xe, struct xe_exec_queue *q) +{ + struct xe_tile *tile = gt_to_tile(q->gt); + struct xe_exec_queue_group *group; + struct xe_bo *bo; + + group = kzalloc(sizeof(*group), GFP_KERNEL); + if (!group) + return -ENOMEM; + + bo = xe_bo_create_pin_map_novm(xe, tile, SZ_4K, ttm_bo_type_kernel, + XE_BO_FLAG_VRAM_IF_DGFX(tile) | + XE_BO_FLAG_PINNED_LATE_RESTORE | + XE_BO_FLAG_FORCE_USER_VRAM | + XE_BO_FLAG_GGTT_INVALIDATE | + XE_BO_FLAG_GGTT, false); + if (IS_ERR(bo)) { + drm_err(&xe->drm, "CGP bo allocation for queue group failed: %ld\n", + PTR_ERR(bo)); + kfree(group); + return PTR_ERR(bo); + } + + xe_map_memset(xe, &bo->vmap, 0, 0, SZ_4K); + + group->primary = q; + group->cgp_bo = bo; + xa_init_flags(&group->xa, XA_FLAGS_ALLOC1); + q->multi_queue.group = group; + + return 0; +} + +static inline bool xe_exec_queue_supports_multi_queue(struct xe_exec_queue *q) +{ + return q->gt->info.multi_queue_engine_class_mask & BIT(q->class); +} + +static int xe_exec_queue_group_validate(struct xe_device *xe, struct xe_exec_queue *q, + u32 primary_id) +{ + struct xe_exec_queue_group *group; + struct xe_exec_queue *primary; + int ret; + + /* + * Get from below xe_exec_queue_lookup() pairs with put + * in xe_exec_queue_group_cleanup(). + */ + primary = xe_exec_queue_lookup(q->vm->xef, primary_id); + if (XE_IOCTL_DBG(xe, !primary)) + return -ENOENT; + + if (XE_IOCTL_DBG(xe, !xe_exec_queue_is_multi_queue_primary(primary)) || + XE_IOCTL_DBG(xe, q->vm != primary->vm) || + XE_IOCTL_DBG(xe, q->logical_mask != primary->logical_mask)) { + ret = -EINVAL; + goto put_primary; + } + + group = primary->multi_queue.group; + q->multi_queue.valid = true; + q->multi_queue.group = group; + + return 0; +put_primary: + xe_exec_queue_put(primary); + return ret; +} + +#define XE_MAX_GROUP_SIZE 64 +static int xe_exec_queue_group_add(struct xe_device *xe, struct xe_exec_queue *q) +{ + struct xe_exec_queue_group *group = q->multi_queue.group; + u32 pos; + int err; + + xe_assert(xe, xe_exec_queue_is_multi_queue_secondary(q)); + + /* Primary queue holds a reference to LRCs of all secondary queues */ + err = xa_alloc(&group->xa, &pos, xe_lrc_get(q->lrc[0]), + XA_LIMIT(1, XE_MAX_GROUP_SIZE - 1), GFP_KERNEL); + if (XE_IOCTL_DBG(xe, err)) { + xe_lrc_put(q->lrc[0]); + + /* It is invalid if queue group limit is exceeded */ + if (err == -EBUSY) + err = -EINVAL; + + return err; + } + + q->multi_queue.pos = pos; + + return 0; +} + +static void xe_exec_queue_group_delete(struct xe_device *xe, struct xe_exec_queue *q) +{ + struct xe_exec_queue_group *group = q->multi_queue.group; + struct xe_lrc *lrc; + + xe_assert(xe, xe_exec_queue_is_multi_queue_secondary(q)); + + lrc = xa_erase(&group->xa, q->multi_queue.pos); + xe_assert(xe, lrc); + xe_lrc_put(lrc); +} + +static int exec_queue_set_multi_group(struct xe_device *xe, struct xe_exec_queue *q, + u64 value) +{ + if (XE_IOCTL_DBG(xe, !xe_exec_queue_supports_multi_queue(q))) + return -ENODEV; + + if (XE_IOCTL_DBG(xe, !xe_device_uc_enabled(xe))) + return -EOPNOTSUPP; + + if (XE_IOCTL_DBG(xe, !q->vm->xef)) + return -EINVAL; + + if (XE_IOCTL_DBG(xe, xe_exec_queue_is_parallel(q))) + return -EINVAL; + + if (XE_IOCTL_DBG(xe, xe_exec_queue_is_multi_queue(q))) + return -EINVAL; + + if (value & DRM_XE_MULTI_GROUP_CREATE) { + if (XE_IOCTL_DBG(xe, value & ~DRM_XE_MULTI_GROUP_CREATE)) + return -EINVAL; + + q->multi_queue.valid = true; + q->multi_queue.is_primary = true; + q->multi_queue.pos = 0; + return 0; + } + + /* While adding secondary queues, the upper 32 bits must be 0 */ + if (XE_IOCTL_DBG(xe, value & (~0ull << 32))) + return -EINVAL; + + return xe_exec_queue_group_validate(xe, q, value); +} + typedef int (*xe_exec_queue_set_property_fn)(struct xe_device *xe, struct xe_exec_queue *q, u64 value); @@ -597,6 +773,7 @@ static const xe_exec_queue_set_property_fn exec_queue_set_property_funcs[] = { [DRM_XE_EXEC_QUEUE_SET_PROPERTY_TIMESLICE] = exec_queue_set_timeslice, [DRM_XE_EXEC_QUEUE_SET_PROPERTY_PXP_TYPE] = exec_queue_set_pxp_type, [DRM_XE_EXEC_QUEUE_SET_HANG_REPLAY_STATE] = exec_queue_set_hang_replay_state, + [DRM_XE_EXEC_QUEUE_SET_PROPERTY_MULTI_GROUP] = exec_queue_set_multi_group, }; static int exec_queue_user_ext_set_property(struct xe_device *xe, @@ -618,7 +795,8 @@ static int exec_queue_user_ext_set_property(struct xe_device *xe, XE_IOCTL_DBG(xe, ext.property != DRM_XE_EXEC_QUEUE_SET_PROPERTY_PRIORITY && ext.property != DRM_XE_EXEC_QUEUE_SET_PROPERTY_TIMESLICE && ext.property != DRM_XE_EXEC_QUEUE_SET_PROPERTY_PXP_TYPE && - ext.property != DRM_XE_EXEC_QUEUE_SET_HANG_REPLAY_STATE)) + ext.property != DRM_XE_EXEC_QUEUE_SET_HANG_REPLAY_STATE && + ext.property != DRM_XE_EXEC_QUEUE_SET_PROPERTY_MULTI_GROUP)) return -EINVAL; idx = array_index_nospec(ext.property, ARRAY_SIZE(exec_queue_set_property_funcs)); @@ -667,6 +845,12 @@ static int exec_queue_user_extensions(struct xe_device *xe, struct xe_exec_queue return exec_queue_user_extensions(xe, q, ext.next_extension, ++ext_number); + if (xe_exec_queue_is_multi_queue_primary(q)) { + err = xe_exec_queue_group_init(xe, q); + if (XE_IOCTL_DBG(xe, err)) + return err; + } + return 0; } @@ -821,12 +1005,18 @@ int xe_exec_queue_create_ioctl(struct drm_device *dev, void *data, if (IS_ERR(q)) return PTR_ERR(q); + if (xe_exec_queue_is_multi_queue_secondary(q)) { + err = xe_exec_queue_group_add(xe, q); + if (XE_IOCTL_DBG(xe, err)) + goto put_exec_queue; + } + if (xe_vm_in_preempt_fence_mode(vm)) { q->lr.context = dma_fence_context_alloc(1); err = xe_vm_add_compute_exec_queue(vm, q); if (XE_IOCTL_DBG(xe, err)) - goto put_exec_queue; + goto delete_queue_group; } if (q->vm && q->hwe->hw_engine_group) { @@ -849,6 +1039,9 @@ int xe_exec_queue_create_ioctl(struct drm_device *dev, void *data, kill_exec_queue: xe_exec_queue_kill(q); +delete_queue_group: + if (xe_exec_queue_is_multi_queue_secondary(q)) + xe_exec_queue_group_delete(xe, q); put_exec_queue: xe_exec_queue_put(q); return err; diff --git a/drivers/gpu/drm/xe/xe_exec_queue.h b/drivers/gpu/drm/xe/xe_exec_queue.h index fda4d4f9bda8..e6daa40003f2 100644 --- a/drivers/gpu/drm/xe/xe_exec_queue.h +++ b/drivers/gpu/drm/xe/xe_exec_queue.h @@ -66,6 +66,53 @@ static inline bool xe_exec_queue_uses_pxp(struct xe_exec_queue *q) return q->pxp.type; } +/** + * xe_exec_queue_is_multi_queue() - Whether an exec_queue is part of a queue group. + * @q: The exec_queue + * + * Return: True if the exec_queue is part of a queue group, false otherwise. + */ +static inline bool xe_exec_queue_is_multi_queue(struct xe_exec_queue *q) +{ + return q->multi_queue.valid; +} + +/** + * xe_exec_queue_is_multi_queue_primary() - Whether an exec_queue is primary queue + * of a multi queue group. + * @q: The exec_queue + * + * Return: True if @q is primary queue of a queue group, false otherwise. + */ +static inline bool xe_exec_queue_is_multi_queue_primary(struct xe_exec_queue *q) +{ + return q->multi_queue.is_primary; +} + +/** + * xe_exec_queue_is_multi_queue_secondary() - Whether an exec_queue is secondary queue + * of a multi queue group. + * @q: The exec_queue + * + * Return: True if @q is secondary queue of a queue group, false otherwise. + */ +static inline bool xe_exec_queue_is_multi_queue_secondary(struct xe_exec_queue *q) +{ + return xe_exec_queue_is_multi_queue(q) && !xe_exec_queue_is_multi_queue_primary(q); +} + +/** + * xe_exec_queue_multi_queue_primary() - Get multi queue group's primary queue + * @q: The exec_queue + * + * If @q belongs to a multi queue group, then the primary queue of the group will + * be returned. Otherwise, @q will be returned. + */ +static inline struct xe_exec_queue *xe_exec_queue_multi_queue_primary(struct xe_exec_queue *q) +{ + return xe_exec_queue_is_multi_queue(q) ? q->multi_queue.group->primary : q; +} + bool xe_exec_queue_is_lr(struct xe_exec_queue *q); bool xe_exec_queue_is_idle(struct xe_exec_queue *q); diff --git a/drivers/gpu/drm/xe/xe_exec_queue_types.h b/drivers/gpu/drm/xe/xe_exec_queue_types.h index 3ba10632dcd6..29feafb42e0a 100644 --- a/drivers/gpu/drm/xe/xe_exec_queue_types.h +++ b/drivers/gpu/drm/xe/xe_exec_queue_types.h @@ -32,6 +32,20 @@ enum xe_exec_queue_priority { XE_EXEC_QUEUE_PRIORITY_COUNT }; +/** + * struct xe_exec_queue_group - Execution multi queue group + * + * Contains multi queue group information. + */ +struct xe_exec_queue_group { + /** @primary: Primary queue of this group */ + struct xe_exec_queue *primary; + /** @cgp_bo: BO for the Context Group Page */ + struct xe_bo *cgp_bo; + /** @xa: xarray to store LRCs */ + struct xarray xa; +}; + /** * struct xe_exec_queue - Execution queue * @@ -111,6 +125,18 @@ struct xe_exec_queue { struct xe_guc_exec_queue *guc; }; + /** @multi_queue: Multi queue information */ + struct { + /** @multi_queue.group: Queue group information */ + struct xe_exec_queue_group *group; + /** @multi_queue.pos: Position of queue within the multi-queue group */ + u8 pos; + /** @multi_queue.valid: Queue belongs to a multi queue group */ + u8 valid:1; + /** @multi_queue.is_primary: Is primary queue (Q0) of the group */ + u8 is_primary:1; + } multi_queue; + /** @sched_props: scheduling properties */ struct { /** @sched_props.timeslice_us: timeslice period in micro-seconds */ diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h index 876a076fa6c0..19a8ae856a17 100644 --- a/include/uapi/drm/xe_drm.h +++ b/include/uapi/drm/xe_drm.h @@ -1272,6 +1272,14 @@ struct drm_xe_vm_bind { * Given that going into a power-saving state kills PXP HWDRM sessions, * runtime PM will be blocked while queues of this type are alive. * All PXP queues will be killed if a PXP invalidation event occurs. + * - %DRM_XE_EXEC_QUEUE_SET_PROPERTY_MULTI_GROUP - Create a multi-queue group + * or add secondary queues to a multi-queue group. + * If the extension's 'value' field has %DRM_XE_MULTI_GROUP_CREATE flag set, + * then a new multi-queue group is created with this queue as the primary queue + * (Q0). Otherwise, the queue gets added to the multi-queue group whose primary + * queue's exec_queue_id is specified in the lower 32 bits of the 'value' field. + * All the other non-relevant bits of extension's 'value' field while adding the + * primary or the secondary queues of the group must be set to 0. * * The example below shows how to use @drm_xe_exec_queue_create to create * a simple exec_queue (no parallel submission) of class @@ -1313,6 +1321,8 @@ struct drm_xe_exec_queue_create { #define DRM_XE_EXEC_QUEUE_SET_PROPERTY_TIMESLICE 1 #define DRM_XE_EXEC_QUEUE_SET_PROPERTY_PXP_TYPE 2 #define DRM_XE_EXEC_QUEUE_SET_HANG_REPLAY_STATE 3 +#define DRM_XE_EXEC_QUEUE_SET_PROPERTY_MULTI_GROUP 4 +#define DRM_XE_MULTI_GROUP_CREATE (1ull << 63) /** @extensions: Pointer to the first extension struct, if any */ __u64 extensions; -- cgit v1.2.3 From 898a00f4b43311adfd4da1711ed2b72adc8c98a5 Mon Sep 17 00:00:00 2001 From: Niranjana Vishwanathapura Date: Wed, 10 Dec 2025 17:02:52 -0800 Subject: drm/xe/multi_queue: Add multi queue priority property Add support for queues of a multi queue group to set their priority within the queue group by adding property DRM_XE_EXEC_QUEUE_SET_PROPERTY_MULTI_QUEUE_PRIORITY. This is the only other property supported by secondary queues of a multi queue group, other than DRM_XE_EXEC_QUEUE_SET_PROPERTY_MULTI_QUEUE. v2: Add kernel doc for enum xe_multi_queue_priority, Add assert for priority values, fix includes and declarations (Matt Brost) v3: update uapi kernel-doc (Matt Brost) v4: uapi change due to rebase Signed-off-by: Niranjana Vishwanathapura Reviewed-by: Matthew Brost Link: https://patch.msgid.link/20251211010249.1647839-23-niranjana.vishwanathapura@intel.com --- drivers/gpu/drm/xe/xe_exec_queue.c | 17 ++++++++++++++++- drivers/gpu/drm/xe/xe_exec_queue_types.h | 16 ++++++++++++++++ drivers/gpu/drm/xe/xe_guc_submit.c | 1 + drivers/gpu/drm/xe/xe_lrc.c | 29 +++++++++++++++++++++++++++++ drivers/gpu/drm/xe/xe_lrc.h | 3 +++ include/uapi/drm/xe_drm.h | 4 ++++ 6 files changed, 69 insertions(+), 1 deletion(-) (limited to 'include/uapi/drm') diff --git a/drivers/gpu/drm/xe/xe_exec_queue.c b/drivers/gpu/drm/xe/xe_exec_queue.c index f76ec277c5af..aa46d154d04a 100644 --- a/drivers/gpu/drm/xe/xe_exec_queue.c +++ b/drivers/gpu/drm/xe/xe_exec_queue.c @@ -180,6 +180,7 @@ static struct xe_exec_queue *__xe_exec_queue_alloc(struct xe_device *xe, INIT_LIST_HEAD(&q->multi_gt_link); INIT_LIST_HEAD(&q->hw_engine_group_link); INIT_LIST_HEAD(&q->pxp.link); + q->multi_queue.priority = XE_MULTI_QUEUE_PRIORITY_NORMAL; q->sched_props.timeslice_us = hwe->eclass->sched_props.timeslice_us; q->sched_props.preempt_timeout_us = @@ -764,6 +765,17 @@ static int exec_queue_set_multi_group(struct xe_device *xe, struct xe_exec_queue return xe_exec_queue_group_validate(xe, q, value); } +static int exec_queue_set_multi_queue_priority(struct xe_device *xe, struct xe_exec_queue *q, + u64 value) +{ + if (XE_IOCTL_DBG(xe, value > XE_MULTI_QUEUE_PRIORITY_HIGH)) + return -EINVAL; + + q->multi_queue.priority = value; + + return 0; +} + typedef int (*xe_exec_queue_set_property_fn)(struct xe_device *xe, struct xe_exec_queue *q, u64 value); @@ -774,6 +786,8 @@ static const xe_exec_queue_set_property_fn exec_queue_set_property_funcs[] = { [DRM_XE_EXEC_QUEUE_SET_PROPERTY_PXP_TYPE] = exec_queue_set_pxp_type, [DRM_XE_EXEC_QUEUE_SET_HANG_REPLAY_STATE] = exec_queue_set_hang_replay_state, [DRM_XE_EXEC_QUEUE_SET_PROPERTY_MULTI_GROUP] = exec_queue_set_multi_group, + [DRM_XE_EXEC_QUEUE_SET_PROPERTY_MULTI_QUEUE_PRIORITY] = + exec_queue_set_multi_queue_priority, }; static int exec_queue_user_ext_set_property(struct xe_device *xe, @@ -796,7 +810,8 @@ static int exec_queue_user_ext_set_property(struct xe_device *xe, ext.property != DRM_XE_EXEC_QUEUE_SET_PROPERTY_TIMESLICE && ext.property != DRM_XE_EXEC_QUEUE_SET_PROPERTY_PXP_TYPE && ext.property != DRM_XE_EXEC_QUEUE_SET_HANG_REPLAY_STATE && - ext.property != DRM_XE_EXEC_QUEUE_SET_PROPERTY_MULTI_GROUP)) + ext.property != DRM_XE_EXEC_QUEUE_SET_PROPERTY_MULTI_GROUP && + ext.property != DRM_XE_EXEC_QUEUE_SET_PROPERTY_MULTI_QUEUE_PRIORITY)) return -EINVAL; idx = array_index_nospec(ext.property, ARRAY_SIZE(exec_queue_set_property_funcs)); diff --git a/drivers/gpu/drm/xe/xe_exec_queue_types.h b/drivers/gpu/drm/xe/xe_exec_queue_types.h index 06fb518b8533..46e5f4715a0d 100644 --- a/drivers/gpu/drm/xe/xe_exec_queue_types.h +++ b/drivers/gpu/drm/xe/xe_exec_queue_types.h @@ -32,6 +32,20 @@ enum xe_exec_queue_priority { XE_EXEC_QUEUE_PRIORITY_COUNT }; +/** + * enum xe_multi_queue_priority - Multi Queue priority values + * + * The priority values of the queues within the multi queue group. + */ +enum xe_multi_queue_priority { + /** @XE_MULTI_QUEUE_PRIORITY_LOW: Priority low */ + XE_MULTI_QUEUE_PRIORITY_LOW = 0, + /** @XE_MULTI_QUEUE_PRIORITY_NORMAL: Priority normal */ + XE_MULTI_QUEUE_PRIORITY_NORMAL, + /** @XE_MULTI_QUEUE_PRIORITY_HIGH: Priority high */ + XE_MULTI_QUEUE_PRIORITY_HIGH, +}; + /** * struct xe_exec_queue_group - Execution multi queue group * @@ -131,6 +145,8 @@ struct xe_exec_queue { struct { /** @multi_queue.group: Queue group information */ struct xe_exec_queue_group *group; + /** @multi_queue.priority: Queue priority within the multi-queue group */ + enum xe_multi_queue_priority priority; /** @multi_queue.pos: Position of queue within the multi-queue group */ u8 pos; /** @multi_queue.valid: Queue belongs to a multi queue group */ diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c index bafe42393d22..7cca03d4296c 100644 --- a/drivers/gpu/drm/xe/xe_guc_submit.c +++ b/drivers/gpu/drm/xe/xe_guc_submit.c @@ -640,6 +640,7 @@ static void xe_guc_exec_queue_group_cgp_sync(struct xe_guc *guc, return; } + xe_lrc_set_multi_queue_priority(q->lrc[0], q->multi_queue.priority); xe_guc_exec_queue_group_cgp_update(xe, q); WRITE_ONCE(group->sync_pending, true); diff --git a/drivers/gpu/drm/xe/xe_lrc.c b/drivers/gpu/drm/xe/xe_lrc.c index a05060f75e7e..70eae7d03a27 100644 --- a/drivers/gpu/drm/xe/xe_lrc.c +++ b/drivers/gpu/drm/xe/xe_lrc.c @@ -44,6 +44,11 @@ #define LRC_INDIRECT_CTX_BO_SIZE SZ_4K #define LRC_INDIRECT_RING_STATE_SIZE SZ_4K +#define LRC_PRIORITY GENMASK_ULL(10, 9) +#define LRC_PRIORITY_LOW 0 +#define LRC_PRIORITY_NORMAL 1 +#define LRC_PRIORITY_HIGH 2 + /* * Layout of the LRC and associated data allocated as * lrc->bo: @@ -1399,6 +1404,30 @@ setup_indirect_ctx(struct xe_lrc *lrc, struct xe_hw_engine *hwe) return 0; } +static u8 xe_multi_queue_prio_to_lrc(struct xe_lrc *lrc, enum xe_multi_queue_priority priority) +{ + struct xe_device *xe = gt_to_xe(lrc->gt); + + xe_assert(xe, (priority >= XE_MULTI_QUEUE_PRIORITY_LOW && + priority <= XE_MULTI_QUEUE_PRIORITY_HIGH)); + + /* xe_multi_queue_priority is directly mapped to LRC priority values */ + return priority; +} + +/** + * xe_lrc_set_multi_queue_priority() - Set multi queue priority in LRC + * @lrc: Logical Ring Context + * @priority: Multi queue priority of the exec queue + * + * Convert @priority to LRC multi queue priority and update the @lrc descriptor + */ +void xe_lrc_set_multi_queue_priority(struct xe_lrc *lrc, enum xe_multi_queue_priority priority) +{ + lrc->desc &= ~LRC_PRIORITY; + lrc->desc |= FIELD_PREP(LRC_PRIORITY, xe_multi_queue_prio_to_lrc(lrc, priority)); +} + static int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe, struct xe_vm *vm, void *replay_state, u32 ring_size, u16 msix_vec, diff --git a/drivers/gpu/drm/xe/xe_lrc.h b/drivers/gpu/drm/xe/xe_lrc.h index a32472b92242..8acf85273c1a 100644 --- a/drivers/gpu/drm/xe/xe_lrc.h +++ b/drivers/gpu/drm/xe/xe_lrc.h @@ -13,6 +13,7 @@ struct drm_printer; struct xe_bb; struct xe_device; struct xe_exec_queue; +enum xe_multi_queue_priority; enum xe_engine_class; struct xe_gt; struct xe_hw_engine; @@ -135,6 +136,8 @@ void xe_lrc_dump_default(struct drm_printer *p, u32 *xe_lrc_emit_hwe_state_instructions(struct xe_exec_queue *q, u32 *cs); +void xe_lrc_set_multi_queue_priority(struct xe_lrc *lrc, enum xe_multi_queue_priority priority); + struct xe_lrc_snapshot *xe_lrc_snapshot_capture(struct xe_lrc *lrc); void xe_lrc_snapshot_capture_delayed(struct xe_lrc_snapshot *snapshot); void xe_lrc_snapshot_print(struct xe_lrc_snapshot *snapshot, struct drm_printer *p); diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h index 19a8ae856a17..fd79d78de2e9 100644 --- a/include/uapi/drm/xe_drm.h +++ b/include/uapi/drm/xe_drm.h @@ -1280,6 +1280,9 @@ struct drm_xe_vm_bind { * queue's exec_queue_id is specified in the lower 32 bits of the 'value' field. * All the other non-relevant bits of extension's 'value' field while adding the * primary or the secondary queues of the group must be set to 0. + * - %DRM_XE_EXEC_QUEUE_SET_PROPERTY_MULTI_QUEUE_PRIORITY - Set the queue + * priority within the multi-queue group. Current valid priority values are 0–2 + * (default is 1), with higher values indicating higher priority. * * The example below shows how to use @drm_xe_exec_queue_create to create * a simple exec_queue (no parallel submission) of class @@ -1323,6 +1326,7 @@ struct drm_xe_exec_queue_create { #define DRM_XE_EXEC_QUEUE_SET_HANG_REPLAY_STATE 3 #define DRM_XE_EXEC_QUEUE_SET_PROPERTY_MULTI_GROUP 4 #define DRM_XE_MULTI_GROUP_CREATE (1ull << 63) +#define DRM_XE_EXEC_QUEUE_SET_PROPERTY_MULTI_QUEUE_PRIORITY 5 /** @extensions: Pointer to the first extension struct, if any */ __u64 extensions; -- cgit v1.2.3 From 2a31ea17d5c69e51ea454485edd40e4aeff467c1 Mon Sep 17 00:00:00 2001 From: Niranjana Vishwanathapura Date: Wed, 10 Dec 2025 17:02:54 -0800 Subject: drm/xe/multi_queue: Add exec_queue set_property ioctl support This patch adds support for exec_queue set_property ioctl. It is derived from the original work which is part of https://patchwork.freedesktop.org/series/112188/ Currently only DRM_XE_EXEC_QUEUE_SET_PROPERTY_MULTI_QUEUE_PRIORITY property can be dynamically set. v2: Check for and update kernel-doc which property this ioctl supports (Matt Brost) Signed-off-by: Matthew Brost Signed-off-by: Pallavi Mishra Signed-off-by: Niranjana Vishwanathapura Reviewed-by: Matthew Brost Link: https://patch.msgid.link/20251211010249.1647839-25-niranjana.vishwanathapura@intel.com --- drivers/gpu/drm/xe/xe_device.c | 2 ++ drivers/gpu/drm/xe/xe_exec_queue.c | 35 +++++++++++++++++++++++++++++++++++ drivers/gpu/drm/xe/xe_exec_queue.h | 2 ++ include/uapi/drm/xe_drm.h | 26 ++++++++++++++++++++++++++ 4 files changed, 65 insertions(+) (limited to 'include/uapi/drm') diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c index 1197f914ef77..7a498c8db7b1 100644 --- a/drivers/gpu/drm/xe/xe_device.c +++ b/drivers/gpu/drm/xe/xe_device.c @@ -207,6 +207,8 @@ static const struct drm_ioctl_desc xe_ioctls[] = { DRM_IOCTL_DEF_DRV(XE_MADVISE, xe_vm_madvise_ioctl, DRM_RENDER_ALLOW), DRM_IOCTL_DEF_DRV(XE_VM_QUERY_MEM_RANGE_ATTRS, xe_vm_query_vmas_attrs_ioctl, DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(XE_EXEC_QUEUE_SET_PROPERTY, xe_exec_queue_set_property_ioctl, + DRM_RENDER_ALLOW), }; static long xe_drm_ioctl(struct file *file, unsigned int cmd, unsigned long arg) diff --git a/drivers/gpu/drm/xe/xe_exec_queue.c b/drivers/gpu/drm/xe/xe_exec_queue.c index d0082eb45a4a..d738a9fea1e1 100644 --- a/drivers/gpu/drm/xe/xe_exec_queue.c +++ b/drivers/gpu/drm/xe/xe_exec_queue.c @@ -790,6 +790,41 @@ static const xe_exec_queue_set_property_fn exec_queue_set_property_funcs[] = { exec_queue_set_multi_queue_priority, }; +int xe_exec_queue_set_property_ioctl(struct drm_device *dev, void *data, + struct drm_file *file) +{ + struct xe_device *xe = to_xe_device(dev); + struct xe_file *xef = to_xe_file(file); + struct drm_xe_exec_queue_set_property *args = data; + struct xe_exec_queue *q; + int ret; + u32 idx; + + if (XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1])) + return -EINVAL; + + if (XE_IOCTL_DBG(xe, args->property != + DRM_XE_EXEC_QUEUE_SET_PROPERTY_MULTI_QUEUE_PRIORITY)) + return -EINVAL; + + q = xe_exec_queue_lookup(xef, args->exec_queue_id); + if (XE_IOCTL_DBG(xe, !q)) + return -ENOENT; + + idx = array_index_nospec(args->property, + ARRAY_SIZE(exec_queue_set_property_funcs)); + ret = exec_queue_set_property_funcs[idx](xe, q, args->value); + if (XE_IOCTL_DBG(xe, ret)) + goto err_post_lookup; + + xe_exec_queue_put(q); + return 0; + + err_post_lookup: + xe_exec_queue_put(q); + return ret; +} + static int exec_queue_user_ext_check(struct xe_exec_queue *q, u64 properties) { u64 secondary_queue_valid_props = BIT_ULL(DRM_XE_EXEC_QUEUE_SET_PROPERTY_MULTI_GROUP) | diff --git a/drivers/gpu/drm/xe/xe_exec_queue.h b/drivers/gpu/drm/xe/xe_exec_queue.h index e6daa40003f2..ffcc1feb879e 100644 --- a/drivers/gpu/drm/xe/xe_exec_queue.h +++ b/drivers/gpu/drm/xe/xe_exec_queue.h @@ -125,6 +125,8 @@ int xe_exec_queue_destroy_ioctl(struct drm_device *dev, void *data, struct drm_file *file); int xe_exec_queue_get_property_ioctl(struct drm_device *dev, void *data, struct drm_file *file); +int xe_exec_queue_set_property_ioctl(struct drm_device *dev, void *data, + struct drm_file *file); enum xe_exec_queue_priority xe_exec_queue_device_get_max_priority(struct xe_device *xe); void xe_exec_queue_last_fence_put(struct xe_exec_queue *e, struct xe_vm *vm); diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h index fd79d78de2e9..705081bf0d81 100644 --- a/include/uapi/drm/xe_drm.h +++ b/include/uapi/drm/xe_drm.h @@ -106,6 +106,7 @@ extern "C" { #define DRM_XE_OBSERVATION 0x0b #define DRM_XE_MADVISE 0x0c #define DRM_XE_VM_QUERY_MEM_RANGE_ATTRS 0x0d +#define DRM_XE_EXEC_QUEUE_SET_PROPERTY 0x0e /* Must be kept compact -- no holes */ @@ -123,6 +124,7 @@ extern "C" { #define DRM_IOCTL_XE_OBSERVATION DRM_IOW(DRM_COMMAND_BASE + DRM_XE_OBSERVATION, struct drm_xe_observation_param) #define DRM_IOCTL_XE_MADVISE DRM_IOW(DRM_COMMAND_BASE + DRM_XE_MADVISE, struct drm_xe_madvise) #define DRM_IOCTL_XE_VM_QUERY_MEM_RANGE_ATTRS DRM_IOWR(DRM_COMMAND_BASE + DRM_XE_VM_QUERY_MEM_RANGE_ATTRS, struct drm_xe_vm_query_mem_range_attr) +#define DRM_IOCTL_XE_EXEC_QUEUE_SET_PROPERTY DRM_IOW(DRM_COMMAND_BASE + DRM_XE_EXEC_QUEUE_SET_PROPERTY, struct drm_xe_exec_queue_set_property) /** * DOC: Xe IOCTL Extensions @@ -2315,6 +2317,30 @@ struct drm_xe_vm_query_mem_range_attr { }; +/** + * struct drm_xe_exec_queue_set_property - exec queue set property + * + * Sets execution queue properties dynamically. + * Currently only %DRM_XE_EXEC_QUEUE_SET_PROPERTY_MULTI_QUEUE_PRIORITY + * property can be dynamically set. + */ +struct drm_xe_exec_queue_set_property { + /** @extensions: Pointer to the first extension struct, if any */ + __u64 extensions; + + /** @exec_queue_id: Exec queue ID */ + __u32 exec_queue_id; + + /** @property: property to set */ + __u32 property; + + /** @value: property value */ + __u64 value; + + /** @reserved: Reserved */ + __u64 reserved[2]; +}; + #if defined(__cplusplus) } #endif -- cgit v1.2.3 From 3131a43ecb346ae3b5287ee195779fc38c6fcd11 Mon Sep 17 00:00:00 2001 From: Niranjana Vishwanathapura Date: Wed, 10 Dec 2025 17:03:03 -0800 Subject: drm/xe/multi_queue: Support active group after primary is destroyed Add support to keep the group active after the primary queue is destroyed. Instead of killing the primary queue during exec_queue destroy ioctl, kill it when all the secondary queues of the group are killed. Signed-off-by: Niranjana Vishwanathapura Reviewed-by: Matthew Brost Link: https://patch.msgid.link/20251211010249.1647839-34-niranjana.vishwanathapura@intel.com --- drivers/gpu/drm/xe/xe_device.c | 7 +++- drivers/gpu/drm/xe/xe_exec_queue.c | 55 ++++++++++++++++++++++++++++++-- drivers/gpu/drm/xe/xe_exec_queue.h | 2 ++ drivers/gpu/drm/xe/xe_exec_queue_types.h | 4 +++ include/uapi/drm/xe_drm.h | 4 +++ 5 files changed, 69 insertions(+), 3 deletions(-) (limited to 'include/uapi/drm') diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c index 7a498c8db7b1..24efb6a3e0ea 100644 --- a/drivers/gpu/drm/xe/xe_device.c +++ b/drivers/gpu/drm/xe/xe_device.c @@ -177,7 +177,12 @@ static void xe_file_close(struct drm_device *dev, struct drm_file *file) xa_for_each(&xef->exec_queue.xa, idx, q) { if (q->vm && q->hwe->hw_engine_group) xe_hw_engine_group_del_exec_queue(q->hwe->hw_engine_group, q); - xe_exec_queue_kill(q); + + if (xe_exec_queue_is_multi_queue_primary(q)) + xe_exec_queue_group_kill_put(q->multi_queue.group); + else + xe_exec_queue_kill(q); + xe_exec_queue_put(q); } xa_for_each(&xef->vm.xa, idx, vm) diff --git a/drivers/gpu/drm/xe/xe_exec_queue.c b/drivers/gpu/drm/xe/xe_exec_queue.c index d337b7bc2b80..3f4840d135a0 100644 --- a/drivers/gpu/drm/xe/xe_exec_queue.c +++ b/drivers/gpu/drm/xe/xe_exec_queue.c @@ -418,6 +418,26 @@ struct xe_exec_queue *xe_exec_queue_create_bind(struct xe_device *xe, } ALLOW_ERROR_INJECTION(xe_exec_queue_create_bind, ERRNO); +static void xe_exec_queue_group_kill(struct kref *ref) +{ + struct xe_exec_queue_group *group = container_of(ref, struct xe_exec_queue_group, + kill_refcount); + xe_exec_queue_kill(group->primary); +} + +static inline void xe_exec_queue_group_kill_get(struct xe_exec_queue_group *group) +{ + kref_get(&group->kill_refcount); +} + +void xe_exec_queue_group_kill_put(struct xe_exec_queue_group *group) +{ + if (!group) + return; + + kref_put(&group->kill_refcount, xe_exec_queue_group_kill); +} + void xe_exec_queue_destroy(struct kref *ref) { struct xe_exec_queue *q = container_of(ref, struct xe_exec_queue, refcount); @@ -650,6 +670,7 @@ static int xe_exec_queue_group_init(struct xe_device *xe, struct xe_exec_queue * group->primary = q; group->cgp_bo = bo; INIT_LIST_HEAD(&group->list); + kref_init(&group->kill_refcount); xa_init_flags(&group->xa, XA_FLAGS_ALLOC1); mutex_init(&group->list_lock); q->multi_queue.group = group; @@ -725,6 +746,11 @@ static int xe_exec_queue_group_add(struct xe_device *xe, struct xe_exec_queue *q q->multi_queue.pos = pos; + if (group->primary->multi_queue.keep_active) { + xe_exec_queue_group_kill_get(group); + q->multi_queue.keep_active = true; + } + return 0; } @@ -738,6 +764,11 @@ static void xe_exec_queue_group_delete(struct xe_device *xe, struct xe_exec_queu lrc = xa_erase(&group->xa, q->multi_queue.pos); xe_assert(xe, lrc); xe_lrc_put(lrc); + + if (q->multi_queue.keep_active) { + xe_exec_queue_group_kill_put(group); + q->multi_queue.keep_active = false; + } } static int exec_queue_set_multi_group(struct xe_device *xe, struct xe_exec_queue *q, @@ -759,12 +790,24 @@ static int exec_queue_set_multi_group(struct xe_device *xe, struct xe_exec_queue return -EINVAL; if (value & DRM_XE_MULTI_GROUP_CREATE) { - if (XE_IOCTL_DBG(xe, value & ~DRM_XE_MULTI_GROUP_CREATE)) + if (XE_IOCTL_DBG(xe, value & ~(DRM_XE_MULTI_GROUP_CREATE | + DRM_XE_MULTI_GROUP_KEEP_ACTIVE))) + return -EINVAL; + + /* + * KEEP_ACTIVE is not supported in preempt fence mode as in that mode, + * VM_DESTROY ioctl expects all exec queues of that VM are already killed. + */ + if (XE_IOCTL_DBG(xe, (value & DRM_XE_MULTI_GROUP_KEEP_ACTIVE) && + xe_vm_in_preempt_fence_mode(q->vm))) return -EINVAL; q->multi_queue.valid = true; q->multi_queue.is_primary = true; q->multi_queue.pos = 0; + if (value & DRM_XE_MULTI_GROUP_KEEP_ACTIVE) + q->multi_queue.keep_active = true; + return 0; } @@ -1312,6 +1355,11 @@ void xe_exec_queue_kill(struct xe_exec_queue *q) q->ops->kill(q); xe_vm_remove_compute_exec_queue(q->vm, q); + + if (!xe_exec_queue_is_multi_queue_primary(q) && q->multi_queue.keep_active) { + xe_exec_queue_group_kill_put(q->multi_queue.group); + q->multi_queue.keep_active = false; + } } int xe_exec_queue_destroy_ioctl(struct drm_device *dev, void *data, @@ -1338,7 +1386,10 @@ int xe_exec_queue_destroy_ioctl(struct drm_device *dev, void *data, if (q->vm && q->hwe->hw_engine_group) xe_hw_engine_group_del_exec_queue(q->hwe->hw_engine_group, q); - xe_exec_queue_kill(q); + if (xe_exec_queue_is_multi_queue_primary(q)) + xe_exec_queue_group_kill_put(q->multi_queue.group); + else + xe_exec_queue_kill(q); trace_xe_exec_queue_close(q); xe_exec_queue_put(q); diff --git a/drivers/gpu/drm/xe/xe_exec_queue.h b/drivers/gpu/drm/xe/xe_exec_queue.h index ffcc1feb879e..10abed98fb6b 100644 --- a/drivers/gpu/drm/xe/xe_exec_queue.h +++ b/drivers/gpu/drm/xe/xe_exec_queue.h @@ -113,6 +113,8 @@ static inline struct xe_exec_queue *xe_exec_queue_multi_queue_primary(struct xe_ return xe_exec_queue_is_multi_queue(q) ? q->multi_queue.group->primary : q; } +void xe_exec_queue_group_kill_put(struct xe_exec_queue_group *group); + bool xe_exec_queue_is_lr(struct xe_exec_queue *q); bool xe_exec_queue_is_idle(struct xe_exec_queue *q); diff --git a/drivers/gpu/drm/xe/xe_exec_queue_types.h b/drivers/gpu/drm/xe/xe_exec_queue_types.h index 5fc516b0bb77..67ea5eebf70b 100644 --- a/drivers/gpu/drm/xe/xe_exec_queue_types.h +++ b/drivers/gpu/drm/xe/xe_exec_queue_types.h @@ -62,6 +62,8 @@ struct xe_exec_queue_group { struct list_head list; /** @list_lock: Secondary queue list lock */ struct mutex list_lock; + /** @kill_refcount: ref count to kill primary queue */ + struct kref kill_refcount; /** @sync_pending: CGP_SYNC_DONE g2h response pending */ bool sync_pending; /** @banned: Group banned */ @@ -161,6 +163,8 @@ struct xe_exec_queue { u8 valid:1; /** @multi_queue.is_primary: Is primary queue (Q0) of the group */ u8 is_primary:1; + /** @multi_queue.keep_active: Keep the group active after primary is destroyed */ + u8 keep_active:1; } multi_queue; /** @sched_props: scheduling properties */ diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h index 705081bf0d81..bd6154e3b728 100644 --- a/include/uapi/drm/xe_drm.h +++ b/include/uapi/drm/xe_drm.h @@ -1280,6 +1280,9 @@ struct drm_xe_vm_bind { * then a new multi-queue group is created with this queue as the primary queue * (Q0). Otherwise, the queue gets added to the multi-queue group whose primary * queue's exec_queue_id is specified in the lower 32 bits of the 'value' field. + * If the extension's 'value' field has %DRM_XE_MULTI_GROUP_KEEP_ACTIVE flag + * set, then the multi-queue group is kept active after the primary queue is + * destroyed. * All the other non-relevant bits of extension's 'value' field while adding the * primary or the secondary queues of the group must be set to 0. * - %DRM_XE_EXEC_QUEUE_SET_PROPERTY_MULTI_QUEUE_PRIORITY - Set the queue @@ -1328,6 +1331,7 @@ struct drm_xe_exec_queue_create { #define DRM_XE_EXEC_QUEUE_SET_HANG_REPLAY_STATE 3 #define DRM_XE_EXEC_QUEUE_SET_PROPERTY_MULTI_GROUP 4 #define DRM_XE_MULTI_GROUP_CREATE (1ull << 63) +#define DRM_XE_MULTI_GROUP_KEEP_ACTIVE (1ull << 62) #define DRM_XE_EXEC_QUEUE_SET_PROPERTY_MULTI_QUEUE_PRIORITY 5 /** @extensions: Pointer to the first extension struct, if any */ __u64 extensions; -- cgit v1.2.3 From b07bac9bd708ec468cd1b8a5fe70ae2ac9b0a11c Mon Sep 17 00:00:00 2001 From: Shuicheng Lin Date: Fri, 5 Dec 2025 23:47:17 +0000 Subject: drm/xe: Limit num_syncs to prevent oversized allocations MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The exec and vm_bind ioctl allow userspace to specify an arbitrary num_syncs value. Without bounds checking, a very large num_syncs can force an excessively large allocation, leading to kernel warnings from the page allocator as below. Introduce DRM_XE_MAX_SYNCS (set to 1024) and reject any request exceeding this limit. " ------------[ cut here ]------------ WARNING: CPU: 0 PID: 1217 at mm/page_alloc.c:5124 __alloc_frozen_pages_noprof+0x2f8/0x2180 mm/page_alloc.c:5124 ... Call Trace: alloc_pages_mpol+0xe4/0x330 mm/mempolicy.c:2416 ___kmalloc_large_node+0xd8/0x110 mm/slub.c:4317 __kmalloc_large_node_noprof+0x18/0xe0 mm/slub.c:4348 __do_kmalloc_node mm/slub.c:4364 [inline] __kmalloc_noprof+0x3d4/0x4b0 mm/slub.c:4388 kmalloc_noprof include/linux/slab.h:909 [inline] kmalloc_array_noprof include/linux/slab.h:948 [inline] xe_exec_ioctl+0xa47/0x1e70 drivers/gpu/drm/xe/xe_exec.c:158 drm_ioctl_kernel+0x1f1/0x3e0 drivers/gpu/drm/drm_ioctl.c:797 drm_ioctl+0x5e7/0xc50 drivers/gpu/drm/drm_ioctl.c:894 xe_drm_ioctl+0x10b/0x170 drivers/gpu/drm/xe/xe_device.c:224 vfs_ioctl fs/ioctl.c:51 [inline] __do_sys_ioctl fs/ioctl.c:598 [inline] __se_sys_ioctl fs/ioctl.c:584 [inline] __x64_sys_ioctl+0x18b/0x210 fs/ioctl.c:584 do_syscall_x64 arch/x86/entry/syscall_64.c:63 [inline] do_syscall_64+0xbb/0x380 arch/x86/entry/syscall_64.c:94 entry_SYSCALL_64_after_hwframe+0x77/0x7f ... " v2: Add "Reported-by" and Cc stable kernels. v3: Change XE_MAX_SYNCS from 64 to 1024. (Matt & Ashutosh) v4: s/XE_MAX_SYNCS/DRM_XE_MAX_SYNCS/ (Matt) v5: Do the check at the top of the exec func. (Matt) Fixes: dd08ebf6c352 ("drm/xe: Introduce a new DRM driver for Intel GPUs") Reported-by: Koen Koning Reported-by: Peter Senna Tschudin Closes: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/6450 Cc: # v6.12+ Cc: Matthew Brost Cc: Michal Mrozek Cc: Carl Zhang Cc: José Roberto de Souza Cc: Lionel Landwerlin Cc: Ivan Briano Cc: Thomas Hellström Cc: Ashutosh Dixit Signed-off-by: Shuicheng Lin Reviewed-by: Matthew Brost Signed-off-by: Matthew Brost Link: https://patch.msgid.link/20251205234715.2476561-5-shuicheng.lin@intel.com --- drivers/gpu/drm/xe/xe_exec.c | 3 ++- drivers/gpu/drm/xe/xe_vm.c | 3 +++ include/uapi/drm/xe_drm.h | 1 + 3 files changed, 6 insertions(+), 1 deletion(-) (limited to 'include/uapi/drm') diff --git a/drivers/gpu/drm/xe/xe_exec.c b/drivers/gpu/drm/xe/xe_exec.c index 4d81210e41f5..fd9480031750 100644 --- a/drivers/gpu/drm/xe/xe_exec.c +++ b/drivers/gpu/drm/xe/xe_exec.c @@ -132,7 +132,8 @@ int xe_exec_ioctl(struct drm_device *dev, void *data, struct drm_file *file) if (XE_IOCTL_DBG(xe, args->extensions) || XE_IOCTL_DBG(xe, args->pad[0] || args->pad[1] || args->pad[2]) || - XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1])) + XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1]) || + XE_IOCTL_DBG(xe, args->num_syncs > DRM_XE_MAX_SYNCS)) return -EINVAL; q = xe_exec_queue_lookup(xef, args->exec_queue_id); diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index bd787aae4248..ca546666a5c9 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -3341,6 +3341,9 @@ static int vm_bind_ioctl_check_args(struct xe_device *xe, struct xe_vm *vm, if (XE_IOCTL_DBG(xe, args->extensions)) return -EINVAL; + if (XE_IOCTL_DBG(xe, args->num_syncs > DRM_XE_MAX_SYNCS)) + return -EINVAL; + if (args->num_binds > 1) { u64 __user *bind_user = u64_to_user_ptr(args->vector_of_binds); diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h index bd6154e3b728..c59587529986 100644 --- a/include/uapi/drm/xe_drm.h +++ b/include/uapi/drm/xe_drm.h @@ -1504,6 +1504,7 @@ struct drm_xe_exec { /** @exec_queue_id: Exec queue ID for the batch buffer */ __u32 exec_queue_id; +#define DRM_XE_MAX_SYNCS 1024 /** @num_syncs: Amount of struct drm_xe_sync in array. */ __u32 num_syncs; -- cgit v1.2.3 From ab39e2a8f7aed72929bfc1d58eb5e8766f1d85db Mon Sep 17 00:00:00 2001 From: Ashutosh Dixit Date: Fri, 5 Dec 2025 13:26:11 -0800 Subject: drm/xe/oa/uapi: Expose MERT OA unit A MERT OA unit is available in the SoC on some platforms. Add support for this OA unit and expose it to userspace. The MERT OA unit does not have any HW engines attached, but is otherwise similar to an OAM unit. Signed-off-by: Lucas De Marchi Reviewed-by: Umesh Nerlige Ramappa Signed-off-by: Ashutosh Dixit Link: https://patch.msgid.link/20251205212613.826224-2-ashutosh.dixit@intel.com --- drivers/gpu/drm/xe/regs/xe_oa_regs.h | 9 +++++++++ drivers/gpu/drm/xe/xe_oa.c | 37 +++++++++++++++++++++++++++++++++--- include/uapi/drm/xe_drm.h | 3 +++ 3 files changed, 46 insertions(+), 3 deletions(-) (limited to 'include/uapi/drm') diff --git a/drivers/gpu/drm/xe/regs/xe_oa_regs.h b/drivers/gpu/drm/xe/regs/xe_oa_regs.h index 638ab3b99eb0..04a729e610aa 100644 --- a/drivers/gpu/drm/xe/regs/xe_oa_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_oa_regs.h @@ -108,4 +108,13 @@ #define XE_OAM_SCMI_0_BASE_ADJ (MEDIA_GT_GSI_OFFSET + XE_OAM_SCMI_0_BASE) #define XE_OAM_SCMI_1_BASE_ADJ (MEDIA_GT_GSI_OFFSET + XE_OAM_SCMI_1_BASE) +#define OAMERT_CONTROL XE_REG(0x1453a0) +#define OAMERT_DEBUG XE_REG(0x1453a4) +#define OAMERT_STATUS XE_REG(0x1453a8) +#define OAMERT_HEAD_POINTER XE_REG(0x1453ac) +#define OAMERT_TAIL_POINTER XE_REG(0x1453b0) +#define OAMERT_BUFFER XE_REG(0x1453b4) +#define OAMERT_CONTEXT_CONTROL XE_REG(0x1453c8) +#define OAMERT_MMIO_TRG XE_REG(0x1453cc) + #endif diff --git a/drivers/gpu/drm/xe/xe_oa.c b/drivers/gpu/drm/xe/xe_oa.c index 92aa25fc0422..d4e1585004e2 100644 --- a/drivers/gpu/drm/xe/xe_oa.c +++ b/drivers/gpu/drm/xe/xe_oa.c @@ -1940,6 +1940,7 @@ static bool oa_unit_supports_oa_format(struct xe_oa_open_param *param, int type) type == DRM_XE_OA_FMT_TYPE_OAC || type == DRM_XE_OA_FMT_TYPE_PEC; case DRM_XE_OA_UNIT_TYPE_OAM: case DRM_XE_OA_UNIT_TYPE_OAM_SAG: + case DRM_XE_OA_UNIT_TYPE_MERT: return type == DRM_XE_OA_FMT_TYPE_OAM || type == DRM_XE_OA_FMT_TYPE_OAM_MPEC; default: return false; @@ -2227,6 +2228,8 @@ static const struct xe_mmio_range xe2_oa_mux_regs[] = { { .start = 0xE18C, .end = 0xE18C }, /* SAMPLER_MODE */ { .start = 0xE590, .end = 0xE590 }, /* TDL_LSC_LAT_MEASURE_TDL_GFX */ { .start = 0x13000, .end = 0x137FC }, /* PES_0_PESL0 - PES_63_UPPER_PESL3 */ + { .start = 0x145194, .end = 0x145194 }, /* SYS_MEM_LAT_MEASURE */ + { .start = 0x145340, .end = 0x14537C }, /* MERTSS_PES_0 - MERTSS_PES_7 */ {}, }; @@ -2518,7 +2521,12 @@ int xe_oa_register(struct xe_device *xe) static u32 num_oa_units_per_gt(struct xe_gt *gt) { if (xe_gt_is_main_type(gt) || GRAPHICS_VER(gt_to_xe(gt)) < 20) - return 1; + /* + * Mert OA unit belongs to the SoC, not a gt, so should be accessed using + * xe_root_tile_mmio(). However, for all known platforms this is the same as + * accessing via xe_root_mmio_gt()->mmio. + */ + return xe_device_has_mert(gt_to_xe(gt)) ? 2 : 1; else if (!IS_DGFX(gt_to_xe(gt))) return XE_OAM_UNIT_SCMI_0 + 1; /* SAG + SCMI_0 */ else @@ -2602,6 +2610,22 @@ static struct xe_oa_regs __oag_regs(void) }; } +static struct xe_oa_regs __oamert_regs(void) +{ + return (struct xe_oa_regs) { + .base = 0, + .oa_head_ptr = OAMERT_HEAD_POINTER, + .oa_tail_ptr = OAMERT_TAIL_POINTER, + .oa_buffer = OAMERT_BUFFER, + .oa_ctx_ctrl = OAMERT_CONTEXT_CONTROL, + .oa_ctrl = OAMERT_CONTROL, + .oa_debug = OAMERT_DEBUG, + .oa_status = OAMERT_STATUS, + .oa_mmio_trg = OAMERT_MMIO_TRG, + .oa_ctrl_counter_select_mask = OAM_CONTROL_COUNTER_SEL_MASK, + }; +} + static void __xe_oa_init_oa_units(struct xe_gt *gt) { const u32 oam_base_addr[] = { @@ -2615,8 +2639,15 @@ static void __xe_oa_init_oa_units(struct xe_gt *gt) struct xe_oa_unit *u = >->oa.oa_unit[i]; if (xe_gt_is_main_type(gt)) { - u->regs = __oag_regs(); - u->type = DRM_XE_OA_UNIT_TYPE_OAG; + if (!i) { + u->regs = __oag_regs(); + u->type = DRM_XE_OA_UNIT_TYPE_OAG; + } else { + xe_gt_assert(gt, xe_device_has_mert(gt_to_xe(gt))); + xe_gt_assert(gt, gt == xe_root_mmio_gt(gt_to_xe(gt))); + u->regs = __oamert_regs(); + u->type = DRM_XE_OA_UNIT_TYPE_MERT; + } } else { xe_gt_assert(gt, GRAPHICS_VERx100(gt_to_xe(gt)) >= 1270); u->regs = __oam_regs(oam_base_addr[i]); diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h index c59587529986..726e481574fe 100644 --- a/include/uapi/drm/xe_drm.h +++ b/include/uapi/drm/xe_drm.h @@ -1696,6 +1696,9 @@ enum drm_xe_oa_unit_type { /** @DRM_XE_OA_UNIT_TYPE_OAM_SAG: OAM_SAG OA unit */ DRM_XE_OA_UNIT_TYPE_OAM_SAG, + + /** @DRM_XE_OA_UNIT_TYPE_MERT: MERT OA unit */ + DRM_XE_OA_UNIT_TYPE_MERT, }; /** -- cgit v1.2.3 From 5c3c3e7b654df01a69d49551a08b7863c09546f6 Mon Sep 17 00:00:00 2001 From: Boris Brezillon Date: Wed, 17 Dec 2025 14:24:03 +0100 Subject: drm/panthor: Fix kerneldoc in uAPI header Fix a typo in a kerneldoc header. Reported-by: Stephen Rothwell Closes: https://lore.kernel.org/dri-devel/20251216120049.3ed7e06e@canb.auug.org.au/ Signed-off-by: Boris Brezillon Reviewed-by: Liviu Dudau Reviewed-by: Steven Price Fixes: ea78ec982653 ("drm/panthor: Expose the selected coherency protocol to the UMD") Signed-off-by: Steven Price Link: https://patch.msgid.link/20251217132403.3996014-1-boris.brezillon@collabora.com --- include/uapi/drm/panthor_drm.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/uapi/drm') diff --git a/include/uapi/drm/panthor_drm.h b/include/uapi/drm/panthor_drm.h index e238c6264fa1..b401ac585d6a 100644 --- a/include/uapi/drm/panthor_drm.h +++ b/include/uapi/drm/panthor_drm.h @@ -350,7 +350,7 @@ struct drm_panthor_gpu_info { __u32 as_present; /** - * @select_coherency: Coherency selected for this device. + * @selected_coherency: Coherency selected for this device. * * One of drm_panthor_gpu_coherency. */ -- cgit v1.2.3 From 332070795bd96193756cb4446eddc3ec9ff6a0e8 Mon Sep 17 00:00:00 2001 From: Lizhi Hou Date: Wed, 17 Dec 2025 09:17:19 -0800 Subject: accel/amdxdna: Enable hardware context priority Newer firmware supports hardware context priority. Set the priority based on application input. Reviewed-by: Mario Limonciello (AMD) Signed-off-by: Lizhi Hou Link: https://patch.msgid.link/20251217171719.2139025-1-lizhi.hou@amd.com --- drivers/accel/amdxdna/aie2_message.c | 23 ++++++++++++++++++++++- drivers/accel/amdxdna/aie2_msg_priv.h | 5 +++++ include/uapi/drm/amdxdna_accel.h | 8 ++++++++ 3 files changed, 35 insertions(+), 1 deletion(-) (limited to 'include/uapi/drm') diff --git a/drivers/accel/amdxdna/aie2_message.c b/drivers/accel/amdxdna/aie2_message.c index e77a353cadc5..051f4ceaabae 100644 --- a/drivers/accel/amdxdna/aie2_message.c +++ b/drivers/accel/amdxdna/aie2_message.c @@ -205,6 +205,27 @@ static int aie2_destroy_context_req(struct amdxdna_dev_hdl *ndev, u32 id) return ret; } + +static u32 aie2_get_context_priority(struct amdxdna_dev_hdl *ndev, + struct amdxdna_hwctx *hwctx) +{ + if (!AIE2_FEATURE_ON(ndev, AIE2_PREEMPT)) + return PRIORITY_HIGH; + + switch (hwctx->qos.priority) { + case AMDXDNA_QOS_REALTIME_PRIORITY: + return PRIORITY_REALTIME; + case AMDXDNA_QOS_HIGH_PRIORITY: + return PRIORITY_HIGH; + case AMDXDNA_QOS_NORMAL_PRIORITY: + return PRIORITY_NORMAL; + case AMDXDNA_QOS_LOW_PRIORITY: + return PRIORITY_LOW; + default: + return PRIORITY_HIGH; + } +} + int aie2_create_context(struct amdxdna_dev_hdl *ndev, struct amdxdna_hwctx *hwctx) { DECLARE_AIE2_MSG(create_ctx, MSG_OP_CREATE_CONTEXT); @@ -221,7 +242,7 @@ int aie2_create_context(struct amdxdna_dev_hdl *ndev, struct amdxdna_hwctx *hwct req.num_unused_col = hwctx->num_unused_col; req.num_cq_pairs_requested = 1; req.pasid = hwctx->client->pasid; - req.context_priority = 2; + req.context_priority = aie2_get_context_priority(ndev, hwctx); ret = aie2_send_mgmt_msg_wait(ndev, &msg); if (ret) diff --git a/drivers/accel/amdxdna/aie2_msg_priv.h b/drivers/accel/amdxdna/aie2_msg_priv.h index cc912b7899ce..728ef56f7f0a 100644 --- a/drivers/accel/amdxdna/aie2_msg_priv.h +++ b/drivers/accel/amdxdna/aie2_msg_priv.h @@ -108,6 +108,11 @@ struct cq_pair { struct cq_info i2x_q; }; +#define PRIORITY_REALTIME 1 +#define PRIORITY_HIGH 2 +#define PRIORITY_NORMAL 3 +#define PRIORITY_LOW 4 + struct create_ctx_req { __u32 aie_type; __u8 start_col; diff --git a/include/uapi/drm/amdxdna_accel.h b/include/uapi/drm/amdxdna_accel.h index 62c917fd4f7b..9c44db2b3dcd 100644 --- a/include/uapi/drm/amdxdna_accel.h +++ b/include/uapi/drm/amdxdna_accel.h @@ -19,6 +19,14 @@ extern "C" { #define AMDXDNA_INVALID_BO_HANDLE 0 #define AMDXDNA_INVALID_FENCE_HANDLE 0 +/* + * Define hardware context priority + */ +#define AMDXDNA_QOS_REALTIME_PRIORITY 0x100 +#define AMDXDNA_QOS_HIGH_PRIORITY 0x180 +#define AMDXDNA_QOS_NORMAL_PRIORITY 0x200 +#define AMDXDNA_QOS_LOW_PRIORITY 0x280 + enum amdxdna_device_type { AMDXDNA_DEV_TYPE_UNKNOWN = -1, AMDXDNA_DEV_TYPE_KMQ, -- cgit v1.2.3 From dff547e137be2f36c6c4d77172a03a54a38230d3 Mon Sep 17 00:00:00 2001 From: Thomas Hellström Date: Fri, 19 Dec 2025 12:33:11 +0100 Subject: drm/xe/uapi: Extend the madvise functionality to support foreign pagemap placement for svm MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Use device file descriptors and regions to represent pagemaps on foreign or local devices. The underlying files are type-checked at madvise time, and references are kept on the drm_pagemap as long as there is are madvises pointing to it. Extend the madvise preferred_location UAPI to support the region instance to identify the foreign placement. v2: - Improve UAPI documentation. (Matt Brost) - Sanitize preferred_mem_loc.region_instance madvise. (Matt Brost) - Clarify madvise drm_pagemap vs xe_pagemap refcounting. (Matt Brost) - Don't allow a foreign drm_pagemap madvise without a fast interconnect. v3: - Add a comment about reference-counting in xe_devmem_open() and remove the reference-count get-and-put. (Matt Brost) Signed-off-by: Thomas Hellström Reviewed-by: Matthew Brost Link: https://patch.msgid.link/20251219113320.183860-16-thomas.hellstrom@linux.intel.com --- drivers/gpu/drm/xe/xe_device.c | 14 +++++++ drivers/gpu/drm/xe/xe_device.h | 2 + drivers/gpu/drm/xe/xe_svm.c | 75 +++++++++++++++++++++++++++++++++ drivers/gpu/drm/xe/xe_svm.h | 7 ++++ drivers/gpu/drm/xe/xe_vm_madvise.c | 86 +++++++++++++++++++++++++++++++++----- include/uapi/drm/xe_drm.h | 18 ++++++-- 6 files changed, 188 insertions(+), 14 deletions(-) (limited to 'include/uapi/drm') diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c index 8c12c12c27f6..951387d54295 100644 --- a/drivers/gpu/drm/xe/xe_device.c +++ b/drivers/gpu/drm/xe/xe_device.c @@ -378,6 +378,20 @@ static const struct file_operations xe_driver_fops = { .fop_flags = FOP_UNSIGNED_OFFSET, }; +/** + * xe_is_xe_file() - Is the file an xe device file? + * @file: The file. + * + * Checks whether the file is opened against + * an xe device. + * + * Return: %true if an xe file, %false if not. + */ +bool xe_is_xe_file(const struct file *file) +{ + return file->f_op == &xe_driver_fops; +} + static struct drm_driver driver = { /* Don't use MTRRs here; the Xserver or userspace app should * deal with them for Intel hardware. diff --git a/drivers/gpu/drm/xe/xe_device.h b/drivers/gpu/drm/xe/xe_device.h index 6604b89330d5..3e72fa4609f8 100644 --- a/drivers/gpu/drm/xe/xe_device.h +++ b/drivers/gpu/drm/xe/xe_device.h @@ -200,6 +200,8 @@ void xe_file_put(struct xe_file *xef); int xe_is_injection_active(void); +bool xe_is_xe_file(const struct file *file); + /* * Occasionally it is seen that the G2H worker starts running after a delay of more than * a second even after being queued and activated by the Linux workqueue subsystem. This diff --git a/drivers/gpu/drm/xe/xe_svm.c b/drivers/gpu/drm/xe/xe_svm.c index df26203b25e2..0484044091cf 100644 --- a/drivers/gpu/drm/xe/xe_svm.c +++ b/drivers/gpu/drm/xe/xe_svm.c @@ -1813,6 +1813,75 @@ int xe_pagemap_cache_create(struct xe_tile *tile) return 0; } +static struct drm_pagemap *xe_devmem_open(struct xe_device *xe, u32 region_instance) +{ + u32 tile_id = region_instance - 1; + struct xe_pagemap *xpagemap; + struct xe_vram_region *vr; + + if (tile_id >= xe->info.tile_count) + return ERR_PTR(-ENOENT); + + if (!((BIT(tile_id) << 1) & xe->info.mem_region_mask)) + return ERR_PTR(-ENOENT); + + vr = xe_tile_to_vr(&xe->tiles[tile_id]); + + /* Returns a reference-counted embedded struct drm_pagemap */ + xpagemap = xe_pagemap_find_or_create(xe, vr->dpagemap_cache, vr); + if (IS_ERR(xpagemap)) + return ERR_CAST(xpagemap); + + return &xpagemap->dpagemap; +} + +/** + * xe_drm_pagemap_from_fd() - Return a drm_pagemap pointer from a + * (file_descriptor, region_instance) pair. + * @fd: An fd opened against an xe device. + * @region_instance: The region instance representing the device memory + * on the opened xe device. + * + * Opens a struct drm_pagemap pointer on the + * indicated device and region_instance. + * + * Return: A reference-counted struct drm_pagemap pointer on success, + * negative error pointer on failure. + */ +struct drm_pagemap *xe_drm_pagemap_from_fd(int fd, u32 region_instance) +{ + struct drm_pagemap *dpagemap; + struct file *file; + struct drm_file *fpriv; + struct drm_device *drm; + int idx; + + if (fd <= 0) + return ERR_PTR(-EINVAL); + + file = fget(fd); + if (!file) + return ERR_PTR(-ENOENT); + + if (!xe_is_xe_file(file)) { + dpagemap = ERR_PTR(-ENOENT); + goto out; + } + + fpriv = file->private_data; + drm = fpriv->minor->dev; + if (!drm_dev_enter(drm, &idx)) { + dpagemap = ERR_PTR(-ENODEV); + goto out; + } + + dpagemap = xe_devmem_open(to_xe_device(drm), region_instance); + drm_dev_exit(idx); +out: + fput(file); + return dpagemap; +} + #else int xe_pagemap_shrinker_create(struct xe_device *xe) @@ -1836,6 +1905,12 @@ struct drm_pagemap *xe_vma_resolve_pagemap(struct xe_vma *vma, struct xe_tile *t { return NULL; } + +struct drm_pagemap *xe_drm_pagemap_from_fd(int fd, u32 region_instance) +{ + return ERR_PTR(-ENOENT); +} + #endif /** diff --git a/drivers/gpu/drm/xe/xe_svm.h b/drivers/gpu/drm/xe/xe_svm.h index a003f571c82a..ec7c6751cc86 100644 --- a/drivers/gpu/drm/xe/xe_svm.h +++ b/drivers/gpu/drm/xe/xe_svm.h @@ -187,6 +187,8 @@ int xe_pagemap_shrinker_create(struct xe_device *xe); int xe_pagemap_cache_create(struct xe_tile *tile); +struct drm_pagemap *xe_drm_pagemap_from_fd(int fd, u32 region_instance); + #else #include #include "xe_vm.h" @@ -378,6 +380,11 @@ static inline int xe_pagemap_cache_create(struct xe_tile *tile) return 0; } +static inline struct drm_pagemap *xe_drm_pagemap_from_fd(int fd, u32 region_instance) +{ + return ERR_PTR(-ENOENT); +} + #define xe_svm_range_has_dma_mapping(...) false #endif /* CONFIG_DRM_XE_GPUSVM */ diff --git a/drivers/gpu/drm/xe/xe_vm_madvise.c b/drivers/gpu/drm/xe/xe_vm_madvise.c index d6f47c8e146d..add9a6ca2390 100644 --- a/drivers/gpu/drm/xe/xe_vm_madvise.c +++ b/drivers/gpu/drm/xe/xe_vm_madvise.c @@ -22,6 +22,19 @@ struct xe_vmas_in_madvise_range { bool has_svm_userptr_vmas; }; +/** + * struct xe_madvise_details - Argument to madvise_funcs + * @dpagemap: Reference-counted pointer to a struct drm_pagemap. + * + * The madvise IOCTL handler may, in addition to the user-space + * args, have additional info to pass into the madvise_func that + * handles the madvise type. Use a struct_xe_madvise_details + * for that and extend the struct as necessary. + */ +struct xe_madvise_details { + struct drm_pagemap *dpagemap; +}; + static int get_vmas(struct xe_vm *vm, struct xe_vmas_in_madvise_range *madvise_range) { u64 addr = madvise_range->addr; @@ -74,7 +87,8 @@ static int get_vmas(struct xe_vm *vm, struct xe_vmas_in_madvise_range *madvise_r static void madvise_preferred_mem_loc(struct xe_device *xe, struct xe_vm *vm, struct xe_vma **vmas, int num_vmas, - struct drm_xe_madvise *op) + struct drm_xe_madvise *op, + struct xe_madvise_details *details) { int i; @@ -96,14 +110,18 @@ static void madvise_preferred_mem_loc(struct xe_device *xe, struct xe_vm *vm, * is of no use and can be ignored. */ loc->migration_policy = op->preferred_mem_loc.migration_policy; + drm_pagemap_put(loc->dpagemap); loc->dpagemap = NULL; + if (details->dpagemap) + loc->dpagemap = drm_pagemap_get(details->dpagemap); } } } static void madvise_atomic(struct xe_device *xe, struct xe_vm *vm, struct xe_vma **vmas, int num_vmas, - struct drm_xe_madvise *op) + struct drm_xe_madvise *op, + struct xe_madvise_details *details) { struct xe_bo *bo; int i; @@ -144,7 +162,8 @@ static void madvise_atomic(struct xe_device *xe, struct xe_vm *vm, static void madvise_pat_index(struct xe_device *xe, struct xe_vm *vm, struct xe_vma **vmas, int num_vmas, - struct drm_xe_madvise *op) + struct drm_xe_madvise *op, + struct xe_madvise_details *details) { int i; @@ -162,7 +181,8 @@ static void madvise_pat_index(struct xe_device *xe, struct xe_vm *vm, typedef void (*madvise_func)(struct xe_device *xe, struct xe_vm *vm, struct xe_vma **vmas, int num_vmas, - struct drm_xe_madvise *op); + struct drm_xe_madvise *op, + struct xe_madvise_details *details); static const madvise_func madvise_funcs[] = { [DRM_XE_MEM_RANGE_ATTR_PREFERRED_LOC] = madvise_preferred_mem_loc, @@ -246,11 +266,12 @@ static bool madvise_args_are_sane(struct xe_device *xe, const struct drm_xe_madv if (XE_IOCTL_DBG(xe, fd < DRM_XE_PREFERRED_LOC_DEFAULT_SYSTEM)) return false; - if (XE_IOCTL_DBG(xe, args->preferred_mem_loc.migration_policy > - DRM_XE_MIGRATE_ONLY_SYSTEM_PAGES)) + if (XE_IOCTL_DBG(xe, fd <= DRM_XE_PREFERRED_LOC_DEFAULT_DEVICE && + args->preferred_mem_loc.region_instance != 0)) return false; - if (XE_IOCTL_DBG(xe, args->preferred_mem_loc.pad)) + if (XE_IOCTL_DBG(xe, args->preferred_mem_loc.migration_policy > + DRM_XE_MIGRATE_ONLY_SYSTEM_PAGES)) return false; if (XE_IOCTL_DBG(xe, args->preferred_mem_loc.reserved)) @@ -296,6 +317,41 @@ static bool madvise_args_are_sane(struct xe_device *xe, const struct drm_xe_madv return true; } +static int xe_madvise_details_init(struct xe_vm *vm, const struct drm_xe_madvise *args, + struct xe_madvise_details *details) +{ + struct xe_device *xe = vm->xe; + + memset(details, 0, sizeof(*details)); + + if (args->type == DRM_XE_MEM_RANGE_ATTR_PREFERRED_LOC) { + int fd = args->preferred_mem_loc.devmem_fd; + struct drm_pagemap *dpagemap; + + if (fd <= 0) + return 0; + + dpagemap = xe_drm_pagemap_from_fd(args->preferred_mem_loc.devmem_fd, + args->preferred_mem_loc.region_instance); + if (XE_IOCTL_DBG(xe, IS_ERR(dpagemap))) + return PTR_ERR(dpagemap); + + /* Don't allow a foreign placement without a fast interconnect! */ + if (XE_IOCTL_DBG(xe, dpagemap->pagemap->owner != vm->svm.peer.owner)) { + drm_pagemap_put(dpagemap); + return -ENOLINK; + } + details->dpagemap = dpagemap; + } + + return 0; +} + +static void xe_madvise_details_fini(struct xe_madvise_details *details) +{ + drm_pagemap_put(details->dpagemap); +} + static bool check_bo_args_are_sane(struct xe_vm *vm, struct xe_vma **vmas, int num_vmas, u32 atomic_val) { @@ -349,6 +405,7 @@ int xe_vm_madvise_ioctl(struct drm_device *dev, void *data, struct drm_file *fil struct drm_xe_madvise *args = data; struct xe_vmas_in_madvise_range madvise_range = {.addr = args->start, .range = args->range, }; + struct xe_madvise_details details; struct xe_vm *vm; struct drm_exec exec; int err, attr_type; @@ -373,13 +430,17 @@ int xe_vm_madvise_ioctl(struct drm_device *dev, void *data, struct drm_file *fil goto unlock_vm; } - err = xe_vm_alloc_madvise_vma(vm, args->start, args->range); + err = xe_madvise_details_init(vm, args, &details); if (err) goto unlock_vm; + err = xe_vm_alloc_madvise_vma(vm, args->start, args->range); + if (err) + goto madv_fini; + err = get_vmas(vm, &madvise_range); if (err || !madvise_range.num_vmas) - goto unlock_vm; + goto madv_fini; if (madvise_range.has_bo_vmas) { if (args->type == DRM_XE_MEM_RANGE_ATTR_ATOMIC) { @@ -387,7 +448,7 @@ int xe_vm_madvise_ioctl(struct drm_device *dev, void *data, struct drm_file *fil madvise_range.num_vmas, args->atomic.val)) { err = -EINVAL; - goto unlock_vm; + goto madv_fini; } } @@ -413,7 +474,8 @@ int xe_vm_madvise_ioctl(struct drm_device *dev, void *data, struct drm_file *fil } attr_type = array_index_nospec(args->type, ARRAY_SIZE(madvise_funcs)); - madvise_funcs[attr_type](xe, vm, madvise_range.vmas, madvise_range.num_vmas, args); + madvise_funcs[attr_type](xe, vm, madvise_range.vmas, madvise_range.num_vmas, args, + &details); err = xe_vm_invalidate_madvise_range(vm, args->start, args->start + args->range); @@ -425,6 +487,8 @@ err_fini: drm_exec_fini(&exec); kfree(madvise_range.vmas); madvise_range.vmas = NULL; +madv_fini: + xe_madvise_details_fini(&details); unlock_vm: up_write(&vm->lock); put_vm: diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h index 726e481574fe..bb69f9b30c7d 100644 --- a/include/uapi/drm/xe_drm.h +++ b/include/uapi/drm/xe_drm.h @@ -2123,7 +2123,13 @@ struct drm_xe_madvise { struct { #define DRM_XE_PREFERRED_LOC_DEFAULT_DEVICE 0 #define DRM_XE_PREFERRED_LOC_DEFAULT_SYSTEM -1 - /** @preferred_mem_loc.devmem_fd: fd for preferred loc */ + /** + * @preferred_mem_loc.devmem_fd: + * Device file-descriptor of the device where the + * preferred memory is located, or one of the + * above special values. Please also see + * @preferred_mem_loc.region_instance below. + */ __u32 devmem_fd; #define DRM_XE_MIGRATE_ALL_PAGES 0 @@ -2131,8 +2137,14 @@ struct drm_xe_madvise { /** @preferred_mem_loc.migration_policy: Page migration policy */ __u16 migration_policy; - /** @preferred_mem_loc.pad : MBZ */ - __u16 pad; + /** + * @preferred_mem_loc.region_instance : Region instance. + * MBZ if @devmem_fd <= &DRM_XE_PREFERRED_LOC_DEFAULT_DEVICE. + * Otherwise should point to the desired device + * VRAM instance of the device indicated by + * @preferred_mem_loc.devmem_fd. + */ + __u16 region_instance; /** @preferred_mem_loc.reserved : Reserved */ __u64 reserved; -- cgit v1.2.3 From 44b69cf1d35cad4a846208e769b34a648fd637bb Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Fri, 10 Oct 2025 16:44:58 -0400 Subject: drm/amdgpu: Update AMDGPU_INFO_UQ_FW_AREAS query for compute Add a query for compute queues. Userspace can use this to query the size of the EOP buffers for compute user queues. Proposed userspace: https://gitlab.freedesktop.org/yogeshmohan/mesa/-/commits/userq_query Reviewed-by: Prike Liang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 26 ++++++++++++++++++++++++++ include/uapi/drm/amdgpu_drm.h | 8 ++++++++ 2 files changed, 34 insertions(+) (limited to 'include/uapi/drm') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c index 6ee77f431d56..b02da84ab99d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c @@ -391,6 +391,24 @@ static int amdgpu_userq_metadata_info_gfx(struct amdgpu_device *adev, return ret; } +static int amdgpu_userq_metadata_info_compute(struct amdgpu_device *adev, + struct drm_amdgpu_info *info, + struct drm_amdgpu_info_uq_metadata_compute *meta) +{ + int ret = -EOPNOTSUPP; + + if (adev->gfx.funcs->get_gfx_shadow_info) { + struct amdgpu_gfx_shadow_info shadow = {}; + + adev->gfx.funcs->get_gfx_shadow_info(adev, &shadow, true); + meta->eop_size = shadow.eop_size; + meta->eop_alignment = shadow.eop_alignment; + ret = 0; + } + + return ret; +} + static int amdgpu_hw_ip_info(struct amdgpu_device *adev, struct drm_amdgpu_info *info, struct drm_amdgpu_info_hw_ip *result) @@ -1360,6 +1378,14 @@ out: if (ret) return ret; + ret = copy_to_user(out, &meta_info, + min((size_t)size, sizeof(meta_info))) ? -EFAULT : 0; + return 0; + case AMDGPU_HW_IP_COMPUTE: + ret = amdgpu_userq_metadata_info_compute(adev, info, &meta_info.compute); + if (ret) + return ret; + ret = copy_to_user(out, &meta_info, min((size_t)size, sizeof(meta_info))) ? -EFAULT : 0; return 0; diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h index 351c2fb2df90..138d9ae1aa48 100644 --- a/include/uapi/drm/amdgpu_drm.h +++ b/include/uapi/drm/amdgpu_drm.h @@ -1630,9 +1630,17 @@ struct drm_amdgpu_info_uq_metadata_gfx { __u32 csa_alignment; }; +struct drm_amdgpu_info_uq_metadata_compute { + /* EOP size for gfx11 */ + __u32 eop_size; + /* EOP base virtual alignment for gfx11 */ + __u32 eop_alignment; +}; + struct drm_amdgpu_info_uq_metadata { union { struct drm_amdgpu_info_uq_metadata_gfx gfx; + struct drm_amdgpu_info_uq_metadata_compute compute; }; }; -- cgit v1.2.3 From 0030595c3e8b48b32a12b8354ce9dbe00efd632f Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Fri, 10 Oct 2025 16:47:02 -0400 Subject: drm/amdgpu: Update AMDGPU_INFO_UQ_FW_AREAS query for sdma Add a query for sdma queues. Userspace can use this to query the size of the CSA buffers for sdma user queues. Proposed userspace: https://gitlab.freedesktop.org/yogeshmohan/mesa/-/commits/userq_query Reviewed-by: Prike Liang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 26 ++++++++++++++++++++++++++ include/uapi/drm/amdgpu_drm.h | 8 ++++++++ 2 files changed, 34 insertions(+) (limited to 'include/uapi/drm') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c index b02da84ab99d..36fdd1af9d6b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c @@ -409,6 +409,24 @@ static int amdgpu_userq_metadata_info_compute(struct amdgpu_device *adev, return ret; } +static int amdgpu_userq_metadata_info_sdma(struct amdgpu_device *adev, + struct drm_amdgpu_info *info, + struct drm_amdgpu_info_uq_metadata_sdma *meta) +{ + int ret = -EOPNOTSUPP; + + if (adev->sdma.get_csa_info) { + struct amdgpu_sdma_csa_info csa = {}; + + adev->sdma.get_csa_info(adev, &csa); + meta->csa_size = csa.size; + meta->csa_alignment = csa.alignment; + ret = 0; + } + + return ret; +} + static int amdgpu_hw_ip_info(struct amdgpu_device *adev, struct drm_amdgpu_info *info, struct drm_amdgpu_info_hw_ip *result) @@ -1386,6 +1404,14 @@ out: if (ret) return ret; + ret = copy_to_user(out, &meta_info, + min((size_t)size, sizeof(meta_info))) ? -EFAULT : 0; + return 0; + case AMDGPU_HW_IP_DMA: + ret = amdgpu_userq_metadata_info_sdma(adev, info, &meta_info.sdma); + if (ret) + return ret; + ret = copy_to_user(out, &meta_info, min((size_t)size, sizeof(meta_info))) ? -EFAULT : 0; return 0; diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h index 138d9ae1aa48..f902add31fc6 100644 --- a/include/uapi/drm/amdgpu_drm.h +++ b/include/uapi/drm/amdgpu_drm.h @@ -1637,10 +1637,18 @@ struct drm_amdgpu_info_uq_metadata_compute { __u32 eop_alignment; }; +struct drm_amdgpu_info_uq_metadata_sdma { + /* context save area size for sdma6 */ + __u32 csa_size; + /* context save area base virtual alignment for sdma6 */ + __u32 csa_alignment; +}; + struct drm_amdgpu_info_uq_metadata { union { struct drm_amdgpu_info_uq_metadata_gfx gfx; struct drm_amdgpu_info_uq_metadata_compute compute; + struct drm_amdgpu_info_uq_metadata_sdma sdma; }; }; -- cgit v1.2.3 From caaed1dda7df9b4e21d439bb5e7750d4af4f1e78 Mon Sep 17 00:00:00 2001 From: Niranjana Vishwanathapura Date: Tue, 6 Jan 2026 11:10:50 -0800 Subject: Revert "drm/xe/multi_queue: Support active group after primary is destroyed" This reverts commit 3131a43ecb346ae3b5287ee195779fc38c6fcd11. There is no must have requirement for this feature from Compute UMD. Signed-off-by: Niranjana Vishwanathapura Reviewed-by: Matthew Brost Link: https://patch.msgid.link/20260106191051.2866538-5-niranjana.vishwanathapura@intel.com --- drivers/gpu/drm/xe/xe_device.c | 7 +--- drivers/gpu/drm/xe/xe_exec_queue.c | 55 ++------------------------------ drivers/gpu/drm/xe/xe_exec_queue.h | 2 -- drivers/gpu/drm/xe/xe_exec_queue_types.h | 4 --- include/uapi/drm/xe_drm.h | 4 --- 5 files changed, 3 insertions(+), 69 deletions(-) (limited to 'include/uapi/drm') diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c index e101d290b2a6..f4741cbe4c45 100644 --- a/drivers/gpu/drm/xe/xe_device.c +++ b/drivers/gpu/drm/xe/xe_device.c @@ -180,12 +180,7 @@ static void xe_file_close(struct drm_device *dev, struct drm_file *file) xa_for_each(&xef->exec_queue.xa, idx, q) { if (q->vm && q->hwe->hw_engine_group) xe_hw_engine_group_del_exec_queue(q->hwe->hw_engine_group, q); - - if (xe_exec_queue_is_multi_queue_primary(q)) - xe_exec_queue_group_kill_put(q->multi_queue.group); - else - xe_exec_queue_kill(q); - + xe_exec_queue_kill(q); xe_exec_queue_put(q); } xa_for_each(&xef->vm.xa, idx, vm) diff --git a/drivers/gpu/drm/xe/xe_exec_queue.c b/drivers/gpu/drm/xe/xe_exec_queue.c index 0b9e074b022f..529a40ca4002 100644 --- a/drivers/gpu/drm/xe/xe_exec_queue.c +++ b/drivers/gpu/drm/xe/xe_exec_queue.c @@ -467,26 +467,6 @@ struct xe_exec_queue *xe_exec_queue_create_bind(struct xe_device *xe, } ALLOW_ERROR_INJECTION(xe_exec_queue_create_bind, ERRNO); -static void xe_exec_queue_group_kill(struct kref *ref) -{ - struct xe_exec_queue_group *group = container_of(ref, struct xe_exec_queue_group, - kill_refcount); - xe_exec_queue_kill(group->primary); -} - -static inline void xe_exec_queue_group_kill_get(struct xe_exec_queue_group *group) -{ - kref_get(&group->kill_refcount); -} - -void xe_exec_queue_group_kill_put(struct xe_exec_queue_group *group) -{ - if (!group) - return; - - kref_put(&group->kill_refcount, xe_exec_queue_group_kill); -} - void xe_exec_queue_destroy(struct kref *ref) { struct xe_exec_queue *q = container_of(ref, struct xe_exec_queue, refcount); @@ -716,7 +696,6 @@ static int xe_exec_queue_group_init(struct xe_device *xe, struct xe_exec_queue * group->primary = q; group->cgp_bo = bo; INIT_LIST_HEAD(&group->list); - kref_init(&group->kill_refcount); xa_init_flags(&group->xa, XA_FLAGS_ALLOC1); mutex_init(&group->list_lock); q->multi_queue.group = group; @@ -792,11 +771,6 @@ static int xe_exec_queue_group_add(struct xe_device *xe, struct xe_exec_queue *q q->multi_queue.pos = pos; - if (group->primary->multi_queue.keep_active) { - xe_exec_queue_group_kill_get(group); - q->multi_queue.keep_active = true; - } - return 0; } @@ -810,11 +784,6 @@ static void xe_exec_queue_group_delete(struct xe_device *xe, struct xe_exec_queu lrc = xa_erase(&group->xa, q->multi_queue.pos); xe_assert(xe, lrc); xe_lrc_put(lrc); - - if (q->multi_queue.keep_active) { - xe_exec_queue_group_kill_put(group); - q->multi_queue.keep_active = false; - } } static int exec_queue_set_multi_group(struct xe_device *xe, struct xe_exec_queue *q, @@ -836,24 +805,12 @@ static int exec_queue_set_multi_group(struct xe_device *xe, struct xe_exec_queue return -EINVAL; if (value & DRM_XE_MULTI_GROUP_CREATE) { - if (XE_IOCTL_DBG(xe, value & ~(DRM_XE_MULTI_GROUP_CREATE | - DRM_XE_MULTI_GROUP_KEEP_ACTIVE))) - return -EINVAL; - - /* - * KEEP_ACTIVE is not supported in preempt fence mode as in that mode, - * VM_DESTROY ioctl expects all exec queues of that VM are already killed. - */ - if (XE_IOCTL_DBG(xe, (value & DRM_XE_MULTI_GROUP_KEEP_ACTIVE) && - xe_vm_in_preempt_fence_mode(q->vm))) + if (XE_IOCTL_DBG(xe, value & ~DRM_XE_MULTI_GROUP_CREATE)) return -EINVAL; q->multi_queue.valid = true; q->multi_queue.is_primary = true; q->multi_queue.pos = 0; - if (value & DRM_XE_MULTI_GROUP_KEEP_ACTIVE) - q->multi_queue.keep_active = true; - return 0; } @@ -1419,11 +1376,6 @@ void xe_exec_queue_kill(struct xe_exec_queue *q) q->ops->kill(q); xe_vm_remove_compute_exec_queue(q->vm, q); - - if (!xe_exec_queue_is_multi_queue_primary(q) && q->multi_queue.keep_active) { - xe_exec_queue_group_kill_put(q->multi_queue.group); - q->multi_queue.keep_active = false; - } } int xe_exec_queue_destroy_ioctl(struct drm_device *dev, void *data, @@ -1450,10 +1402,7 @@ int xe_exec_queue_destroy_ioctl(struct drm_device *dev, void *data, if (q->vm && q->hwe->hw_engine_group) xe_hw_engine_group_del_exec_queue(q->hwe->hw_engine_group, q); - if (xe_exec_queue_is_multi_queue_primary(q)) - xe_exec_queue_group_kill_put(q->multi_queue.group); - else - xe_exec_queue_kill(q); + xe_exec_queue_kill(q); trace_xe_exec_queue_close(q); xe_exec_queue_put(q); diff --git a/drivers/gpu/drm/xe/xe_exec_queue.h b/drivers/gpu/drm/xe/xe_exec_queue.h index b5ad975d7e97..b1e51789128f 100644 --- a/drivers/gpu/drm/xe/xe_exec_queue.h +++ b/drivers/gpu/drm/xe/xe_exec_queue.h @@ -113,8 +113,6 @@ static inline struct xe_exec_queue *xe_exec_queue_multi_queue_primary(struct xe_ return xe_exec_queue_is_multi_queue(q) ? q->multi_queue.group->primary : q; } -void xe_exec_queue_group_kill_put(struct xe_exec_queue_group *group); - bool xe_exec_queue_is_lr(struct xe_exec_queue *q); bool xe_exec_queue_is_idle(struct xe_exec_queue *q); diff --git a/drivers/gpu/drm/xe/xe_exec_queue_types.h b/drivers/gpu/drm/xe/xe_exec_queue_types.h index 67ea5eebf70b..5fc516b0bb77 100644 --- a/drivers/gpu/drm/xe/xe_exec_queue_types.h +++ b/drivers/gpu/drm/xe/xe_exec_queue_types.h @@ -62,8 +62,6 @@ struct xe_exec_queue_group { struct list_head list; /** @list_lock: Secondary queue list lock */ struct mutex list_lock; - /** @kill_refcount: ref count to kill primary queue */ - struct kref kill_refcount; /** @sync_pending: CGP_SYNC_DONE g2h response pending */ bool sync_pending; /** @banned: Group banned */ @@ -163,8 +161,6 @@ struct xe_exec_queue { u8 valid:1; /** @multi_queue.is_primary: Is primary queue (Q0) of the group */ u8 is_primary:1; - /** @multi_queue.keep_active: Keep the group active after primary is destroyed */ - u8 keep_active:1; } multi_queue; /** @sched_props: scheduling properties */ diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h index bb69f9b30c7d..077e66a682e2 100644 --- a/include/uapi/drm/xe_drm.h +++ b/include/uapi/drm/xe_drm.h @@ -1280,9 +1280,6 @@ struct drm_xe_vm_bind { * then a new multi-queue group is created with this queue as the primary queue * (Q0). Otherwise, the queue gets added to the multi-queue group whose primary * queue's exec_queue_id is specified in the lower 32 bits of the 'value' field. - * If the extension's 'value' field has %DRM_XE_MULTI_GROUP_KEEP_ACTIVE flag - * set, then the multi-queue group is kept active after the primary queue is - * destroyed. * All the other non-relevant bits of extension's 'value' field while adding the * primary or the secondary queues of the group must be set to 0. * - %DRM_XE_EXEC_QUEUE_SET_PROPERTY_MULTI_QUEUE_PRIORITY - Set the queue @@ -1331,7 +1328,6 @@ struct drm_xe_exec_queue_create { #define DRM_XE_EXEC_QUEUE_SET_HANG_REPLAY_STATE 3 #define DRM_XE_EXEC_QUEUE_SET_PROPERTY_MULTI_GROUP 4 #define DRM_XE_MULTI_GROUP_CREATE (1ull << 63) -#define DRM_XE_MULTI_GROUP_KEEP_ACTIVE (1ull << 62) #define DRM_XE_EXEC_QUEUE_SET_PROPERTY_MULTI_QUEUE_PRIORITY 5 /** @extensions: Pointer to the first extension struct, if any */ __u64 extensions; -- cgit v1.2.3 From 38feb171b3f92d77e8061fafb5ddfffc2c13b672 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Wed, 22 Oct 2025 23:24:40 -0700 Subject: accel/rocket: rocket_accel.h: fix kernel-doc warnings Fix all kernel-doc warnings in rocket_accel.h: Warning: include/uapi/drm/rocket_accel.h:35 Incorrect use of kernel-doc format: * Output: DMA address for the BO in the NPU address space. This address and 22 warnings like these: Warning: include/uapi/drm/rocket_accel.h:43 struct member 'size' not described in 'drm_rocket_create_bo' Warning: include/uapi/drm/rocket_accel.h:60 struct member 'handle' not described in 'drm_rocket_prep_bo' Warning: include/uapi/drm/rocket_accel.h:73 struct member 'handle' not described in 'drm_rocket_fini_bo' Warning: include/uapi/drm/rocket_accel.h:86 struct member 'regcmd' not described in 'drm_rocket_task' Warning: include/uapi/drm/rocket_accel.h:116 struct member 'tasks' not described in 'drm_rocket_job' Warning: include/uapi/drm/rocket_accel.h:135 struct member 'jobs' not described in 'drm_rocket_submit' Signed-off-by: Randy Dunlap Reviewed-by: Tomeu Vizoso Signed-off-by: Tomeu Vizoso Link: https://patch.msgid.link/20251023062440.4093661-1-rdunlap@infradead.org --- include/uapi/drm/rocket_accel.h | 98 +++++++++++++++++++++++++++++++---------- 1 file changed, 74 insertions(+), 24 deletions(-) (limited to 'include/uapi/drm') diff --git a/include/uapi/drm/rocket_accel.h b/include/uapi/drm/rocket_accel.h index 14b2e12b7c49..d0685e372b79 100644 --- a/include/uapi/drm/rocket_accel.h +++ b/include/uapi/drm/rocket_accel.h @@ -26,20 +26,27 @@ extern "C" { * */ struct drm_rocket_create_bo { - /** Input: Size of the requested BO. */ + /** + * @size: Input: Size of the requested BO. + */ __u32 size; - /** Output: GEM handle for the BO. */ + /** + * @handle: Output: GEM handle for the BO. + */ __u32 handle; /** - * Output: DMA address for the BO in the NPU address space. This address - * is private to the DRM fd and is valid for the lifetime of the GEM - * handle. + * @dma_address: Output: DMA address for the BO in the NPU address + * space. This address is private to the DRM fd and is valid for + * the lifetime of the GEM handle. */ __u64 dma_address; - /** Output: Offset into the drm node to use for subsequent mmap call. */ + /** + * @offset: Output: Offset into the drm node to use for subsequent + * mmap call. + */ __u64 offset; }; @@ -50,13 +57,19 @@ struct drm_rocket_create_bo { * synchronization. */ struct drm_rocket_prep_bo { - /** Input: GEM handle of the buffer object. */ + /** + * @handle: Input: GEM handle of the buffer object. + */ __u32 handle; - /** Reserved, must be zero. */ + /** + * @reserved: Reserved, must be zero. + */ __u32 reserved; - /** Input: Amount of time to wait for NPU jobs. */ + /** + * @timeout_ns: Input: Amount of time to wait for NPU jobs. + */ __s64 timeout_ns; }; @@ -66,10 +79,14 @@ struct drm_rocket_prep_bo { * Synchronize caches for NPU access. */ struct drm_rocket_fini_bo { - /** Input: GEM handle of the buffer object. */ + /** + * @handle: Input: GEM handle of the buffer object. + */ __u32 handle; - /** Reserved, must be zero. */ + /** + * @reserved: Reserved, must be zero. + */ __u32 reserved; }; @@ -79,10 +96,15 @@ struct drm_rocket_fini_bo { * A task is the smallest unit of work that can be run on the NPU. */ struct drm_rocket_task { - /** Input: DMA address to NPU mapping of register command buffer */ + /** + * @regcmd: Input: DMA address to NPU mapping of register command buffer + */ __u32 regcmd; - /** Input: Number of commands in the register command buffer */ + /** + * @regcmd_count: Input: Number of commands in the register command + * buffer + */ __u32 regcmd_count; }; @@ -94,25 +116,44 @@ struct drm_rocket_task { * sequentially on the same core, to benefit from memory residency in SRAM. */ struct drm_rocket_job { - /** Input: Pointer to an array of struct drm_rocket_task. */ + /** + * @tasks: Input: Pointer to an array of struct drm_rocket_task. + */ __u64 tasks; - /** Input: Pointer to a u32 array of the BOs that are read by the job. */ + /** + * @in_bo_handles: Input: Pointer to a u32 array of the BOs that + * are read by the job. + */ __u64 in_bo_handles; - /** Input: Pointer to a u32 array of the BOs that are written to by the job. */ + /** + * @out_bo_handles: Input: Pointer to a u32 array of the BOs that + * are written to by the job. + */ __u64 out_bo_handles; - /** Input: Number of tasks passed in. */ + /** + * @task_count: Input: Number of tasks passed in. + */ __u32 task_count; - /** Input: Size in bytes of the structs in the @tasks field. */ + /** + * @task_struct_size: Input: Size in bytes of the structs in the + * @tasks field. + */ __u32 task_struct_size; - /** Input: Number of input BO handles passed in (size is that times 4). */ + /** + * @in_bo_handle_count: Input: Number of input BO handles passed in + * (size is that times 4). + */ __u32 in_bo_handle_count; - /** Input: Number of output BO handles passed in (size is that times 4). */ + /** + * @out_bo_handle_count: Input: Number of output BO handles passed in + * (size is that times 4). + */ __u32 out_bo_handle_count; }; @@ -122,16 +163,25 @@ struct drm_rocket_job { * The kernel will schedule the execution of these jobs in dependency order. */ struct drm_rocket_submit { - /** Input: Pointer to an array of struct drm_rocket_job. */ + /** + * @jobs: Input: Pointer to an array of struct drm_rocket_job. + */ __u64 jobs; - /** Input: Number of jobs passed in. */ + /** + * @job_count: Input: Number of jobs passed in. + */ __u32 job_count; - /** Input: Size in bytes of the structs in the @jobs field. */ + /** + * @job_struct_size: Input: Size in bytes of the structs in the + * @jobs field. + */ __u32 job_struct_size; - /** Reserved, must be zero. */ + /** + * @reserved: Reserved, must be zero. + */ __u64 reserved; }; -- cgit v1.2.3 From 96e97a562d067a6d867862db79864cc66aae99c2 Mon Sep 17 00:00:00 2001 From: Christian König Date: Tue, 2 Dec 2025 16:12:41 +0100 Subject: drm/amdgpu: Drop MMIO_REMAP domain bit and keep it Internal MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit "AMDGPU_GEM_DOMAIN_MMIO_REMAP" - Never activated as UAPI and it turned out that this was to inflexible. Allocate the MMIO_REMAP buffer object as a regular GEM BO and explicitly move it into the fixed AMDGPU_PL_MMIO_REMAP placement at the TTM level. This avoids relying on GEM domain bits for MMIO_REMAP, keeps the placement purely internal, and makes the lifetime and pinning of the global MMIO_REMAP BO explicit. The BO is pinned in TTM so it cannot be migrated or evicted. The corresponding free path relies on normal DRM teardown ordering, where no further user ioctls can access the global BO once TTM teardown begins. v2 (Srini): - Updated patch title. - Drop use of AMDGPU_GEM_DOMAIN_MMIO_REMAP in amdgpu_ttm.c. The MMIO_REMAP domain bit is removed from UAPI, so keep the MMIO_REMAP BO allocation domain-less (bp.domain = 0) and rely on the TTM placement (AMDGPU_PL_MMIO_REMAP) for backing/pinning. - Keep fdinfo/mem-stats visibility for MMIO_REMAP by classifying BOs based on bo->tbo.resource->mem_type == AMDGPU_PL_MMIO_REMAP, since the domain bit is removed. v3: Squash patches #1 & #3 Fixes: 056132483724 ("drm/amdgpu/uapi: Introduce AMDGPU_GEM_DOMAIN_MMIO_REMAP") Fixes: 2a7a794eb82c ("drm/amdgpu/ttm: Allocate/Free 4K MMIO_REMAP Singleton") Cc: Alex Deucher Cc: Christian König Cc: Leo Liu Cc: Ruijing Dong Cc: David (Ming Qiang) Wu Signed-off-by: Srinivasan Shanmugam Signed-off-by: Christian König Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c | 3 -- drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 21 ++++---- drivers/gpu/drm/amd/amdgpu/amdgpu_object.h | 2 - drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 77 +++++++++++++++++++----------- include/uapi/drm/amdgpu_drm.h | 6 +-- 5 files changed, 60 insertions(+), 49 deletions(-) (limited to 'include/uapi/drm') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c index 032971d0a3cc..ab899709c260 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c @@ -417,9 +417,6 @@ int amdgpu_gem_create_ioctl(struct drm_device *dev, void *data, /* always clear VRAM */ flags |= AMDGPU_GEM_CREATE_VRAM_CLEARED; - if (args->in.domains & AMDGPU_GEM_DOMAIN_MMIO_REMAP) - return -EINVAL; - /* create a gem object to contain this object in */ if (args->in.domains & (AMDGPU_GEM_DOMAIN_GDS | AMDGPU_GEM_DOMAIN_GWS | AMDGPU_GEM_DOMAIN_OA)) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c index b676310ce9ac..1fb956400696 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c @@ -153,14 +153,6 @@ void amdgpu_bo_placement_from_domain(struct amdgpu_bo *abo, u32 domain) c++; } - if (domain & AMDGPU_GEM_DOMAIN_MMIO_REMAP) { - places[c].fpfn = 0; - places[c].lpfn = 0; - places[c].mem_type = AMDGPU_PL_MMIO_REMAP; - places[c].flags = 0; - c++; - } - if (domain & AMDGPU_GEM_DOMAIN_GTT) { places[c].fpfn = 0; places[c].lpfn = 0; @@ -1546,8 +1538,17 @@ u64 amdgpu_bo_gpu_offset_no_check(struct amdgpu_bo *bo) */ uint32_t amdgpu_bo_mem_stats_placement(struct amdgpu_bo *bo) { - uint32_t domain = bo->preferred_domains & AMDGPU_GEM_DOMAIN_MASK; + u32 domain; + /* + * MMIO_REMAP is internal now, so it no longer maps from a userspace + * domain bit. Keep fdinfo/mem-stats visibility by checking the actual + * TTM placement. + */ + if (bo->tbo.resource && bo->tbo.resource->mem_type == AMDGPU_PL_MMIO_REMAP) + return AMDGPU_PL_MMIO_REMAP; + + domain = bo->preferred_domains & AMDGPU_GEM_DOMAIN_MASK; if (!domain) return TTM_PL_SYSTEM; @@ -1566,8 +1567,6 @@ uint32_t amdgpu_bo_mem_stats_placement(struct amdgpu_bo *bo) return AMDGPU_PL_OA; case AMDGPU_GEM_DOMAIN_DOORBELL: return AMDGPU_PL_DOORBELL; - case AMDGPU_GEM_DOMAIN_MMIO_REMAP: - return AMDGPU_PL_MMIO_REMAP; default: return TTM_PL_SYSTEM; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h index 52c2d1731aab..912c9afaf9e1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h @@ -168,8 +168,6 @@ static inline unsigned amdgpu_mem_type_to_domain(u32 mem_type) return AMDGPU_GEM_DOMAIN_OA; case AMDGPU_PL_DOORBELL: return AMDGPU_GEM_DOMAIN_DOORBELL; - case AMDGPU_PL_MMIO_REMAP: - return AMDGPU_GEM_DOMAIN_MMIO_REMAP; default: break; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index cfbcce9c27c5..15d561e3d87f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -1909,42 +1909,45 @@ static void amdgpu_ttm_pools_fini(struct amdgpu_device *adev) } /** - * amdgpu_ttm_mmio_remap_bo_init - Allocate the singleton 4K MMIO_REMAP BO + * amdgpu_ttm_mmio_remap_bo_init - Allocate the singleton MMIO_REMAP BO * @adev: amdgpu device * - * Allocates a one-page (4K) GEM BO in AMDGPU_GEM_DOMAIN_MMIO_REMAP when the + * Allocates a global BO with backing AMDGPU_PL_MMIO_REMAP when the * hardware exposes a remap base (adev->rmmio_remap.bus_addr) and the host * PAGE_SIZE is <= AMDGPU_GPU_PAGE_SIZE (4K). The BO is created as a regular * GEM object (amdgpu_bo_create). * - * The BO is created as a normal GEM object via amdgpu_bo_create(), then - * reserved and pinned at the TTM level (ttm_bo_pin()) so it can never be - * migrated or evicted. No CPU mapping is established here. - * * Return: * * 0 on success or intentional skip (feature not present/unsupported) * * negative errno on allocation failure */ -static int amdgpu_ttm_mmio_remap_bo_init(struct amdgpu_device *adev) +static int amdgpu_ttm_alloc_mmio_remap_bo(struct amdgpu_device *adev) { + struct ttm_operation_ctx ctx = { false, false }; + struct ttm_placement placement; + struct ttm_buffer_object *tbo; + struct ttm_place placements; struct amdgpu_bo_param bp; + struct ttm_resource *tmp; int r; /* Skip if HW doesn't expose remap, or if PAGE_SIZE > AMDGPU_GPU_PAGE_SIZE (4K). */ if (!adev->rmmio_remap.bus_addr || PAGE_SIZE > AMDGPU_GPU_PAGE_SIZE) return 0; + /* + * Allocate a BO first and then move it to AMDGPU_PL_MMIO_REMAP. + * The initial TTM resource assigned by amdgpu_bo_create() is + * replaced below with a fixed MMIO_REMAP placement. + */ memset(&bp, 0, sizeof(bp)); - - /* Create exactly one GEM BO in the MMIO_REMAP domain. */ - bp.type = ttm_bo_type_device; /* userspace-mappable GEM */ - bp.size = AMDGPU_GPU_PAGE_SIZE; /* 4K */ + bp.type = ttm_bo_type_device; + bp.size = AMDGPU_GPU_PAGE_SIZE; bp.byte_align = AMDGPU_GPU_PAGE_SIZE; - bp.domain = AMDGPU_GEM_DOMAIN_MMIO_REMAP; + bp.domain = 0; bp.flags = 0; bp.resv = NULL; bp.bo_ptr_size = sizeof(struct amdgpu_bo); - r = amdgpu_bo_create(adev, &bp, &adev->rmmio_remap.bo); if (r) return r; @@ -1953,42 +1956,60 @@ static int amdgpu_ttm_mmio_remap_bo_init(struct amdgpu_device *adev) if (r) goto err_unref; + tbo = &adev->rmmio_remap.bo->tbo; + /* * MMIO_REMAP is a fixed I/O placement (AMDGPU_PL_MMIO_REMAP). - * Use TTM-level pin so the BO cannot be evicted/migrated, - * independent of GEM domains. This - * enforces the “fixed I/O window” */ - ttm_bo_pin(&adev->rmmio_remap.bo->tbo); + placement.num_placement = 1; + placement.placement = &placements; + placements.fpfn = 0; + placements.lpfn = 0; + placements.mem_type = AMDGPU_PL_MMIO_REMAP; + placements.flags = 0; + /* Force the BO into the fixed MMIO_REMAP placement */ + r = ttm_bo_mem_space(tbo, &placement, &tmp, &ctx); + if (unlikely(r)) + goto err_unlock; + + ttm_resource_free(tbo, &tbo->resource); + ttm_bo_assign_mem(tbo, tmp); + ttm_bo_pin(tbo); amdgpu_bo_unreserve(adev->rmmio_remap.bo); return 0; +err_unlock: + amdgpu_bo_unreserve(adev->rmmio_remap.bo); + err_unref: - if (adev->rmmio_remap.bo) - amdgpu_bo_unref(&adev->rmmio_remap.bo); + amdgpu_bo_unref(&adev->rmmio_remap.bo); adev->rmmio_remap.bo = NULL; return r; } /** - * amdgpu_ttm_mmio_remap_bo_fini - Free the singleton MMIO_REMAP BO + * amdgpu_ttm_free_mmio_remap_bo - Free the singleton MMIO_REMAP BO * @adev: amdgpu device * * Frees the kernel-owned MMIO_REMAP BO if it was allocated by * amdgpu_ttm_mmio_remap_bo_init(). */ -static void amdgpu_ttm_mmio_remap_bo_fini(struct amdgpu_device *adev) +static void amdgpu_ttm_free_mmio_remap_bo(struct amdgpu_device *adev) { - struct amdgpu_bo *bo = adev->rmmio_remap.bo; - - if (!bo) - return; /* <-- safest early exit */ + if (!adev->rmmio_remap.bo) + return; if (!amdgpu_bo_reserve(adev->rmmio_remap.bo, true)) { ttm_bo_unpin(&adev->rmmio_remap.bo->tbo); amdgpu_bo_unreserve(adev->rmmio_remap.bo); } + + /* + * At this point we rely on normal DRM teardown ordering: + * no new user ioctls can access the global MMIO_REMAP BO + * once TTM teardown begins. + */ amdgpu_bo_unref(&adev->rmmio_remap.bo); adev->rmmio_remap.bo = NULL; } @@ -2172,8 +2193,8 @@ int amdgpu_ttm_init(struct amdgpu_device *adev) return r; } - /* Allocate the singleton MMIO_REMAP BO (4K) if supported */ - r = amdgpu_ttm_mmio_remap_bo_init(adev); + /* Allocate the singleton MMIO_REMAP BO if supported */ + r = amdgpu_ttm_alloc_mmio_remap_bo(adev); if (r) return r; @@ -2241,7 +2262,7 @@ void amdgpu_ttm_fini(struct amdgpu_device *adev) amdgpu_bo_free_kernel(&adev->mman.sdma_access_bo, NULL, &adev->mman.sdma_access_ptr); - amdgpu_ttm_mmio_remap_bo_fini(adev); + amdgpu_ttm_free_mmio_remap_bo(adev); amdgpu_ttm_fw_reserve_vram_fini(adev); amdgpu_ttm_drv_reserve_vram_fini(adev); diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h index f902add31fc6..1d34daa0ebcd 100644 --- a/include/uapi/drm/amdgpu_drm.h +++ b/include/uapi/drm/amdgpu_drm.h @@ -105,8 +105,6 @@ extern "C" { * * %AMDGPU_GEM_DOMAIN_DOORBELL Doorbell. It is an MMIO region for * signalling user mode queues. - * - * %AMDGPU_GEM_DOMAIN_MMIO_REMAP MMIO remap page (special mapping for HDP flushing). */ #define AMDGPU_GEM_DOMAIN_CPU 0x1 #define AMDGPU_GEM_DOMAIN_GTT 0x2 @@ -115,15 +113,13 @@ extern "C" { #define AMDGPU_GEM_DOMAIN_GWS 0x10 #define AMDGPU_GEM_DOMAIN_OA 0x20 #define AMDGPU_GEM_DOMAIN_DOORBELL 0x40 -#define AMDGPU_GEM_DOMAIN_MMIO_REMAP 0x80 #define AMDGPU_GEM_DOMAIN_MASK (AMDGPU_GEM_DOMAIN_CPU | \ AMDGPU_GEM_DOMAIN_GTT | \ AMDGPU_GEM_DOMAIN_VRAM | \ AMDGPU_GEM_DOMAIN_GDS | \ AMDGPU_GEM_DOMAIN_GWS | \ AMDGPU_GEM_DOMAIN_OA | \ - AMDGPU_GEM_DOMAIN_DOORBELL | \ - AMDGPU_GEM_DOMAIN_MMIO_REMAP) + AMDGPU_GEM_DOMAIN_DOORBELL) /* Flag that CPU access will be required for the case of VRAM domain */ #define AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED (1 << 0) -- cgit v1.2.3 From 57d00816c6a9c152f01b65bb7b3662f4d03ccd09 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Tue, 10 Feb 2026 16:53:08 -0500 Subject: drm/amdgpu: set family for GC 11.5.4 Set the family for GC 11.5.4 Fixes: 47ae1f938d12 ("drm/amdgpu: add support for GC IP version 11.5.4") Cc: Tim Huang Cc: Pratik Vishwakarma Cc: Roman Li Reviewed-by: Tim Huang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c | 4 +++- include/uapi/drm/amdgpu_drm.h | 1 + 2 files changed, 4 insertions(+), 1 deletion(-) (limited to 'include/uapi/drm') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c index 41e63c286912..4143a25a498b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c @@ -2988,9 +2988,11 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev) case IP_VERSION(11, 5, 1): case IP_VERSION(11, 5, 2): case IP_VERSION(11, 5, 3): - case IP_VERSION(11, 5, 4): adev->family = AMDGPU_FAMILY_GC_11_5_0; break; + case IP_VERSION(11, 5, 4): + adev->family = AMDGPU_FAMILY_GC_11_5_4; + break; case IP_VERSION(12, 0, 0): case IP_VERSION(12, 0, 1): case IP_VERSION(12, 1, 0): diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h index 1d34daa0ebcd..ebbd861ef0bc 100644 --- a/include/uapi/drm/amdgpu_drm.h +++ b/include/uapi/drm/amdgpu_drm.h @@ -1667,6 +1667,7 @@ struct drm_amdgpu_info_uq_metadata { #define AMDGPU_FAMILY_GC_10_3_6 149 /* GC 10.3.6 */ #define AMDGPU_FAMILY_GC_10_3_7 151 /* GC 10.3.7 */ #define AMDGPU_FAMILY_GC_11_5_0 150 /* GC 11.5.0 */ +#define AMDGPU_FAMILY_GC_11_5_4 154 /* GC 11.5.4 */ #define AMDGPU_FAMILY_GC_12_0_0 152 /* GC 12.0.0 */ #if defined(__cplusplus) -- cgit v1.2.3