From 31e6595fd1a0517cdcdc74740ac96e76f25ad312 Mon Sep 17 00:00:00 2001 From: sguttula Date: Sat, 21 Feb 2026 10:03:32 +0530 Subject: drm/amdgpu/vcn5: Add SMU dpm interface type [ Upstream commit a5fe1a54513196e4bc8f9170006057dc31e7155e ] This will set AMDGPU_VCN_SMU_DPM_INTERFACE_* smu_type based on soc type and fixing ring timeout issue seen for DPM enabled case. Signed-off-by: sguttula Reviewed-by: Pratik Vishwakarma Signed-off-by: Alex Deucher (cherry picked from commit f0f23c315b38c55e8ce9484cf59b65811f350630) Signed-off-by: Sasha Levin --- drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'drivers/gpu/drm/amd') diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c index 0202df5db1e1..6109124f852e 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c @@ -174,6 +174,10 @@ static int vcn_v5_0_0_sw_init(struct amdgpu_ip_block *ip_block) fw_shared->present_flag_0 = cpu_to_le32(AMDGPU_FW_SHARED_FLAG_0_UNIFIED_QUEUE); fw_shared->sq.is_enabled = 1; + fw_shared->present_flag_0 |= cpu_to_le32(AMDGPU_VCN_SMU_DPM_INTERFACE_FLAG); + fw_shared->smu_dpm_interface.smu_interface_type = (adev->flags & AMD_IS_APU) ? + AMDGPU_VCN_SMU_DPM_INTERFACE_APU : AMDGPU_VCN_SMU_DPM_INTERFACE_DGPU; + if (amdgpu_vcnfw_log) amdgpu_vcn_fwlog_init(&adev->vcn.inst[i]); -- cgit v1.2.3 From 4c32155265b67a876a5ab0e36819f17659e7b8a5 Mon Sep 17 00:00:00 2001 From: Yang Wang Date: Tue, 3 Mar 2026 21:10:11 -0500 Subject: drm/amd/pm: add missing od setting PP_OD_FEATURE_ZERO_FAN_BIT for smu v13 [ Upstream commit cb47c882c31334aadc13ace80781728ed22a05ee ] add missing od setting PP_OD_FEATURE_ZERO_FAN_BIT for smu v13.0.0/13.0.7 Fixes: cfffd980bf21 ("drm/amd/pm: add zero RPM OD setting support for SMU13") Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/5018 Signed-off-by: Yang Wang Acked-by: Alex Deucher Signed-off-by: Alex Deucher (cherry picked from commit 576a10797b607ee9e4068218daf367b481564120) Signed-off-by: Sasha Levin --- drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c | 3 ++- drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) (limited to 'drivers/gpu/drm/amd') diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c index 8d070a9ea2c1..651fe1926a69 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c @@ -2289,7 +2289,8 @@ static int smu_v13_0_0_restore_user_od_settings(struct smu_context *smu) user_od_table->OverDriveTable.FeatureCtrlMask = BIT(PP_OD_FEATURE_GFXCLK_BIT) | BIT(PP_OD_FEATURE_UCLK_BIT) | BIT(PP_OD_FEATURE_GFX_VF_CURVE_BIT) | - BIT(PP_OD_FEATURE_FAN_CURVE_BIT); + BIT(PP_OD_FEATURE_FAN_CURVE_BIT) | + BIT(PP_OD_FEATURE_ZERO_FAN_BIT); res = smu_v13_0_0_upload_overdrive_table(smu, user_od_table); user_od_table->OverDriveTable.FeatureCtrlMask = 0; if (res == 0) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c index c96fa5e49ed6..87dfc3c3cd9c 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c @@ -2275,7 +2275,8 @@ static int smu_v13_0_7_restore_user_od_settings(struct smu_context *smu) user_od_table->OverDriveTable.FeatureCtrlMask = BIT(PP_OD_FEATURE_GFXCLK_BIT) | BIT(PP_OD_FEATURE_UCLK_BIT) | BIT(PP_OD_FEATURE_GFX_VF_CURVE_BIT) | - BIT(PP_OD_FEATURE_FAN_CURVE_BIT); + BIT(PP_OD_FEATURE_FAN_CURVE_BIT) | + BIT(PP_OD_FEATURE_ZERO_FAN_BIT); res = smu_v13_0_7_upload_overdrive_table(smu, user_od_table); user_od_table->OverDriveTable.FeatureCtrlMask = 0; if (res == 0) -- cgit v1.2.3 From 2ee3645e0f3f4343ccdec769d584c85359537c12 Mon Sep 17 00:00:00 2001 From: Yang Wang Date: Tue, 3 Mar 2026 21:14:10 -0500 Subject: drm/amd/pm: add missing od setting PP_OD_FEATURE_ZERO_FAN_BIT for smu v14 [ Upstream commit 9d4837a26149355ffe3a1f80de80531eafdd3353 ] add missing od setting PP_OD_FEATURE_ZERO_FAN_BIT for smu v14.0.2/14.0.3 Fixes: 9710b84e2a6a ("drm/amd/pm: add overdrive support on smu v14.0.2/3") Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/5018 Signed-off-by: Yang Wang Acked-by: Alex Deucher Signed-off-by: Alex Deucher (cherry picked from commit 1b5cf07d80bb16d1593579ccdb23f08ea4262c14) Signed-off-by: Sasha Levin --- drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'drivers/gpu/drm/amd') diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c index bad8dd786bff..470a901926f3 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c @@ -2412,7 +2412,8 @@ static int smu_v14_0_2_restore_user_od_settings(struct smu_context *smu) user_od_table->OverDriveTable.FeatureCtrlMask = BIT(PP_OD_FEATURE_GFXCLK_BIT) | BIT(PP_OD_FEATURE_UCLK_BIT) | BIT(PP_OD_FEATURE_GFX_VF_CURVE_BIT) | - BIT(PP_OD_FEATURE_FAN_CURVE_BIT); + BIT(PP_OD_FEATURE_FAN_CURVE_BIT) | + BIT(PP_OD_FEATURE_ZERO_FAN_BIT); res = smu_v14_0_2_upload_overdrive_table(smu, user_od_table); user_od_table->OverDriveTable.FeatureCtrlMask = 0; if (res == 0) -- cgit v1.2.3 From 529c985da1b277b36dc99aad660f96dc70f3c467 Mon Sep 17 00:00:00 2001 From: Philip Yang Date: Tue, 9 Dec 2025 15:13:23 -0500 Subject: drm/amdkfd: Unreserve bo if queue update failed [ Upstream commit 2ce75a0b7e1bfddbcb9bc8aeb2e5e7fa99971acf ] Error handling path should unreserve bo then return failed. Fixes: 305cd109b761 ("drm/amdkfd: Validate user queue update") Signed-off-by: Philip Yang Reviewed-by: Alex Sierra Signed-off-by: Alex Deucher (cherry picked from commit c24afed7de9ecce341825d8ab55a43a254348b33) Signed-off-by: Sasha Levin --- drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers/gpu/drm/amd') diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c index 7fbb5c274ccc..7bf712032c52 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c @@ -606,6 +606,7 @@ int pqm_update_queue_properties(struct process_queue_manager *pqm, p->queue_size)) { pr_debug("ring buf 0x%llx size 0x%llx not mapped on GPU\n", p->queue_address, p->queue_size); + amdgpu_bo_unreserve(vm->root.bo); return -EFAULT; } -- cgit v1.2.3 From 1051eb2f53886ec7e36896dfa356884d7212443a Mon Sep 17 00:00:00 2001 From: Perry Yuan Date: Wed, 28 Jan 2026 13:54:31 +0800 Subject: drm/amdgpu: ensure no_hw_access is visible before MMIO commit 31b153315b8702d0249aa44d83d9fbf42c5c7a79 upstream. Add a full memory barrier after clearing no_hw_access in amdgpu_device_mode1_reset() so subsequent PCI state restore access cannot observe stale state on other CPUs. Fixes: 7edb503fe4b6 ("drm/amd/pm: Disable MMIO access during SMU Mode 1 reset") Signed-off-by: Perry Yuan Reviewed-by: Yifan Zhang Signed-off-by: Alex Deucher Cc: Simon Liebold Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'drivers/gpu/drm/amd') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index dbcd55611a37..477f7b197415 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -5743,6 +5743,9 @@ int amdgpu_device_mode1_reset(struct amdgpu_device *adev) /* enable mmio access after mode 1 reset completed */ adev->no_hw_access = false; + /* ensure no_hw_access is updated before we access hw */ + smp_mb(); + amdgpu_device_load_pci_state(adev->pdev); ret = amdgpu_psp_wait_for_bootloader(adev); if (ret) -- cgit v1.2.3 From 6fff5204d8aa26b1be50b6427f833bd3e8899c4f Mon Sep 17 00:00:00 2001 From: Sunil Khatri Date: Fri, 20 Feb 2026 13:47:58 +0530 Subject: drm/amdgpu: add upper bound check on user inputs in signal ioctl MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit ea78f8c68f4f6211c557df49174c54d167821962 upstream. Huge input values in amdgpu_userq_signal_ioctl can lead to a OOM and could be exploited. So check these input value against AMDGPU_USERQ_MAX_HANDLES which is big enough value for genuine use cases and could potentially avoid OOM. Signed-off-by: Sunil Khatri Reviewed-by: Christian König Signed-off-by: Alex Deucher (cherry picked from commit be267e15f99bc97cbe202cd556717797cdcf79a5) Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'drivers/gpu/drm/amd') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c index ead153897445..395df0b9caf9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c @@ -35,6 +35,8 @@ static const struct dma_fence_ops amdgpu_userq_fence_ops; static struct kmem_cache *amdgpu_userq_fence_slab; +#define AMDGPU_USERQ_MAX_HANDLES (1U << 16) + int amdgpu_userq_fence_slab_init(void) { amdgpu_userq_fence_slab = kmem_cache_create("amdgpu_userq_fence", @@ -475,6 +477,11 @@ int amdgpu_userq_signal_ioctl(struct drm_device *dev, void *data, if (!amdgpu_userq_enabled(dev)) return -ENOTSUPP; + if (args->num_syncobj_handles > AMDGPU_USERQ_MAX_HANDLES || + args->num_bo_write_handles > AMDGPU_USERQ_MAX_HANDLES || + args->num_bo_read_handles > AMDGPU_USERQ_MAX_HANDLES) + return -EINVAL; + num_syncobj_handles = args->num_syncobj_handles; syncobj_handles = memdup_user(u64_to_user_ptr(args->syncobj_handles), size_mul(sizeof(u32), num_syncobj_handles)); -- cgit v1.2.3 From 762f47e2b824383d5be65eee2c40a1269b7d50c8 Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Mon, 23 Feb 2026 12:41:30 +0000 Subject: drm/amdgpu/userq: Fix reference leak in amdgpu_userq_wait_ioctl MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 49abfa812617a7f2d0132c70d23ac98b389c6ec1 upstream. Drop reference to syncobj and timeline fence when aborting the ioctl due output array being too small. Reviewed-by: Alex Deucher Signed-off-by: Tvrtko Ursulin Fixes: a292fdecd728 ("drm/amdgpu: Implement userqueue signal/wait IOCTL") Cc: Arunpravin Paneer Selvam Cc: Christian König Cc: Alex Deucher Signed-off-by: Alex Deucher (cherry picked from commit 68951e9c3e6bb22396bc42ef2359751c8315dd27) Cc: # v6.16+ Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'drivers/gpu/drm/amd') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c index 395df0b9caf9..42c4e04c64cc 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c @@ -879,6 +879,7 @@ int amdgpu_userq_wait_ioctl(struct drm_device *dev, void *data, dma_fence_unwrap_for_each(f, &iter, fence) { if (num_fences >= wait_info->num_fences) { r = -EINVAL; + dma_fence_put(fence); goto free_fences; } @@ -903,6 +904,7 @@ int amdgpu_userq_wait_ioctl(struct drm_device *dev, void *data, if (num_fences >= wait_info->num_fences) { r = -EINVAL; + dma_fence_put(fence); goto free_fences; } -- cgit v1.2.3 From b1d10508da559da2e0ca9cca6505094a7df948e1 Mon Sep 17 00:00:00 2001 From: Sunil Khatri Date: Tue, 24 Feb 2026 12:13:09 +0530 Subject: drm/amdgpu: add upper bound check on user inputs in wait ioctl MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 64ac7c09fc44985ec9bb6a9db740899fa40ca613 upstream. Huge input values in amdgpu_userq_wait_ioctl can lead to a OOM and could be exploited. So check these input value against AMDGPU_USERQ_MAX_HANDLES which is big enough value for genuine use cases and could potentially avoid OOM. v2: squash in Srini's fix Signed-off-by: Sunil Khatri Reviewed-by: Christian König Signed-off-by: Alex Deucher (cherry picked from commit fcec012c664247531aed3e662f4280ff804d1476) Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'drivers/gpu/drm/amd') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c index 42c4e04c64cc..73e1827816a0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c @@ -667,6 +667,11 @@ int amdgpu_userq_wait_ioctl(struct drm_device *dev, void *data, if (!amdgpu_userq_enabled(dev)) return -ENOTSUPP; + if (wait_info->num_syncobj_handles > AMDGPU_USERQ_MAX_HANDLES || + wait_info->num_bo_write_handles > AMDGPU_USERQ_MAX_HANDLES || + wait_info->num_bo_read_handles > AMDGPU_USERQ_MAX_HANDLES) + return -EINVAL; + num_read_bo_handles = wait_info->num_bo_read_handles; bo_handles_read = memdup_user(u64_to_user_ptr(wait_info->bo_read_handles), size_mul(sizeof(u32), num_read_bo_handles)); -- cgit v1.2.3 From 06ef2ba582c68daa6bdaaef82827734d9f07b8fd Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Wed, 25 Feb 2026 10:51:16 -0600 Subject: drm/amd: Disable MES LR compute W/A commit 6b0d812971370c64b837a2db4275410f478272fe upstream. A workaround was introduced in commit 1fb710793ce2 ("drm/amdgpu: Enable MES lr_compute_wa by default") to help with some hangs observed in gfx1151. This WA didn't fully fix the issue. It was actually fixed by adjusting the VGPR size to the correct value that matched the hardware in commit b42f3bf9536c ("drm/amdkfd: bump minimum vgpr size for gfx1151"). There are reports of instability on other products with newer GC microcode versions, and I believe they're caused by this workaround. As we don't need the workaround any more, remove it. Fixes: b42f3bf9536c ("drm/amdkfd: bump minimum vgpr size for gfx1151") Acked-by: Alex Deucher Signed-off-by: Mario Limonciello Signed-off-by: Alex Deucher (cherry picked from commit 9973e64bd6ee7642860a6f3b6958cbf14e89cabd) Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/amdgpu/mes_v11_0.c | 5 ----- drivers/gpu/drm/amd/amdgpu/mes_v12_0.c | 5 ----- 2 files changed, 10 deletions(-) (limited to 'drivers/gpu/drm/amd') diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c index 05546a6e80ae..d5cc32fa7848 100644 --- a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c @@ -714,11 +714,6 @@ static int mes_v11_0_set_hw_resources(struct amdgpu_mes *mes) mes_set_hw_res_pkt.enable_reg_active_poll = 1; mes_set_hw_res_pkt.enable_level_process_quantum_check = 1; mes_set_hw_res_pkt.oversubscription_timer = 50; - if ((mes->adev->mes.sched_version & AMDGPU_MES_VERSION_MASK) >= 0x7f) - mes_set_hw_res_pkt.enable_lr_compute_wa = 1; - else - dev_info_once(mes->adev->dev, - "MES FW version must be >= 0x7f to enable LR compute workaround.\n"); if (amdgpu_mes_log_enable) { mes_set_hw_res_pkt.enable_mes_event_int_logging = 1; diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c index 7f3512d9de07..4a424c1f9d55 100644 --- a/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c @@ -775,11 +775,6 @@ static int mes_v12_0_set_hw_resources(struct amdgpu_mes *mes, int pipe) mes_set_hw_res_pkt.use_different_vmid_compute = 1; mes_set_hw_res_pkt.enable_reg_active_poll = 1; mes_set_hw_res_pkt.enable_level_process_quantum_check = 1; - if ((mes->adev->mes.sched_version & AMDGPU_MES_VERSION_MASK) >= 0x82) - mes_set_hw_res_pkt.enable_lr_compute_wa = 1; - else - dev_info_once(adev->dev, - "MES FW version must be >= 0x82 to enable LR compute workaround.\n"); /* * Keep oversubscribe timer for sdma . When we have unmapped doorbell -- cgit v1.2.3 From 1a34999922ba6c95df6e3ba5c82624f61323f82b Mon Sep 17 00:00:00 2001 From: Dillon Varone Date: Wed, 18 Feb 2026 14:34:28 -0500 Subject: drm/amd/display: Fallback to boot snapshot for dispclk commit 30d937f63bd19bbcaafa4b892eb251f8bbbf04ef upstream. [WHY & HOW] If the dentist is unavailable, fallback to reading CLKIP via the boot snapshot to get the current dispclk. Reviewed-by: Nicholas Kazlauskas Signed-off-by: Dillon Varone Signed-off-by: Alex Hung Cc: Mario Limonciello Cc: Alex Deucher Tested-by: Dan Wheeler Signed-off-by: Alex Deucher (cherry picked from commit 2ab77600d1e55a042c02437326d3c7563e853c6c) Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'drivers/gpu/drm/amd') diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c index d4a0961f6b51..0a001efe1281 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c @@ -68,7 +68,11 @@ void dcn401_initialize_min_clocks(struct dc *dc) * audio corruption. Read current DISPCLK from DENTIST and request the same * freq to ensure that the timing is valid and unchanged. */ - clocks->dispclk_khz = dc->clk_mgr->funcs->get_dispclk_from_dentist(dc->clk_mgr); + if (dc->clk_mgr->funcs->get_dispclk_from_dentist) { + clocks->dispclk_khz = dc->clk_mgr->funcs->get_dispclk_from_dentist(dc->clk_mgr); + } else { + clocks->dispclk_khz = dc->clk_mgr->boot_snapshot.dispclk * 1000; + } } clocks->ref_dtbclk_khz = dc->clk_mgr->bw_params->clk_table.entries[0].dtbclk_mhz * 1000; clocks->fclk_p_state_change_support = true; -- cgit v1.2.3 From 33c3a4db31719d414f0622659ca086b708270c9f Mon Sep 17 00:00:00 2001 From: Yang Wang Date: Wed, 25 Feb 2026 22:51:06 -0500 Subject: drm/amd/pm: remove invalid gpu_metrics.energy_accumulator on smu v13.0.x commit 68785c5e79e0fc1eacf63026fbba32be3867f410 upstream. v1: The metrics->EnergyAccumulator field has been deprecated on newer pmfw. v2: add smu 13.0.0/13.0.7/13.0.10 support. Signed-off-by: Yang Wang Acked-by: Alex Deucher Signed-off-by: Alex Deucher (cherry picked from commit 8de9edb35976fa56565dc8fbb5d1310e8e10187c) Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c | 8 +++++++- drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c | 3 ++- 2 files changed, 9 insertions(+), 2 deletions(-) (limited to 'drivers/gpu/drm/amd') diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c index 651fe1926a69..2136db732893 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c @@ -2109,6 +2109,7 @@ static ssize_t smu_v13_0_0_get_gpu_metrics(struct smu_context *smu, (struct gpu_metrics_v1_3 *)smu_table->gpu_metrics_table; SmuMetricsExternal_t metrics_ext; SmuMetrics_t *metrics = &metrics_ext.SmuMetrics; + uint32_t mp1_ver = amdgpu_ip_version(smu->adev, MP1_HWIP, 0); int ret = 0; ret = smu_cmn_get_metrics_table(smu, @@ -2133,7 +2134,12 @@ static ssize_t smu_v13_0_0_get_gpu_metrics(struct smu_context *smu, metrics->Vcn1ActivityPercentage); gpu_metrics->average_socket_power = metrics->AverageSocketPower; - gpu_metrics->energy_accumulator = metrics->EnergyAccumulator; + + if ((mp1_ver == IP_VERSION(13, 0, 0) && smu->smc_fw_version <= 0x004e1e00) || + (mp1_ver == IP_VERSION(13, 0, 10) && smu->smc_fw_version <= 0x00500800)) + gpu_metrics->energy_accumulator = metrics->EnergyAccumulator; + else + gpu_metrics->energy_accumulator = UINT_MAX; if (metrics->AverageGfxActivity <= SMU_13_0_0_BUSY_THRESHOLD) gpu_metrics->average_gfxclk_frequency = metrics->AverageGfxclkFrequencyPostDs; diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c index 87dfc3c3cd9c..2b6c407c6a8c 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c @@ -2119,7 +2119,8 @@ static ssize_t smu_v13_0_7_get_gpu_metrics(struct smu_context *smu, metrics->Vcn1ActivityPercentage); gpu_metrics->average_socket_power = metrics->AverageSocketPower; - gpu_metrics->energy_accumulator = metrics->EnergyAccumulator; + gpu_metrics->energy_accumulator = smu->smc_fw_version <= 0x00521400 ? + metrics->EnergyAccumulator : UINT_MAX; if (metrics->AverageGfxActivity <= SMU_13_0_7_BUSY_THRESHOLD) gpu_metrics->average_gfxclk_frequency = metrics->AverageGfxclkFrequencyPostDs; -- cgit v1.2.3 From 7885eb335d8f9e9942925d57e300a85e3f82ded4 Mon Sep 17 00:00:00 2001 From: Alysa Liu Date: Thu, 5 Feb 2026 11:21:45 -0500 Subject: drm/amdgpu: Fix use-after-free race in VM acquire commit 2c1030f2e84885cc58bffef6af67d5b9d2e7098f upstream. Replace non-atomic vm->process_info assignment with cmpxchg() to prevent race when parent/child processes sharing a drm_file both try to acquire the same VM after fork(). Reviewed-by: Harish Kasiviswanathan Signed-off-by: Alysa Liu Signed-off-by: Alex Deucher (cherry picked from commit c7c573275ec20db05be769288a3e3bb2250ec618) Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'drivers/gpu/drm/amd') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index 923f0fa7350c..d3f541d3108c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -1421,7 +1421,10 @@ static int init_kfd_vm(struct amdgpu_vm *vm, void **process_info, *process_info = info; } - vm->process_info = *process_info; + if (cmpxchg(&vm->process_info, NULL, *process_info) != NULL) { + ret = -EINVAL; + goto already_acquired; + } /* Validate page directory and attach eviction fence */ ret = amdgpu_bo_reserve(vm->root.bo, true); @@ -1461,6 +1464,7 @@ validate_pd_fail: amdgpu_bo_unreserve(vm->root.bo); reserve_pd_fail: vm->process_info = NULL; +already_acquired: if (info) { dma_fence_put(&info->eviction_fence->base); *process_info = NULL; -- cgit v1.2.3 From 57579312e0e87dffa2aeca9acd4ba2ec25da999d Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Tue, 10 Mar 2026 11:58:22 -0500 Subject: drm/amd: Set num IP blocks to 0 if discovery fails commit 3646ff28780b4c52c5b5081443199e7a430110e5 upstream. If discovery has failed for any reason (such as no support for a block) then there is no need to unwind all the IP blocks in fini. In this condition there can actually be failures during the unwind too. Reset num_ip_blocks to zero during failure path and skip the unnecessary cleanup path. Suggested-by: Lijo Lazar Reviewed-by: Lijo Lazar Signed-off-by: Mario Limonciello Signed-off-by: Alex Deucher (cherry picked from commit fae5984296b981c8cc3acca35b701c1f332a6cd8) Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 4 +++- drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 2 +- 2 files changed, 4 insertions(+), 2 deletions(-) (limited to 'drivers/gpu/drm/amd') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 477f7b197415..8f49cd72b5fd 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -2814,8 +2814,10 @@ static int amdgpu_device_ip_early_init(struct amdgpu_device *adev) break; default: r = amdgpu_discovery_set_ip_blocks(adev); - if (r) + if (r) { + adev->num_ip_blocks = 0; return r; + } break; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c index a8b507fd8567..d8c0154c5297 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c @@ -83,7 +83,7 @@ void amdgpu_driver_unload_kms(struct drm_device *dev) { struct amdgpu_device *adev = drm_to_adev(dev); - if (adev == NULL) + if (adev == NULL || !adev->num_ip_blocks) return; amdgpu_unregister_gpu_instance(adev); -- cgit v1.2.3 From 43025c941aced9a9009f9ff20eea4eb78c61deb8 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Wed, 4 Mar 2026 14:07:40 -0600 Subject: drm/amd: Fix NULL pointer dereference in device cleanup commit 062ea905fff7756b2e87143ffccaece5cdb44267 upstream. When GPU initialization fails due to an unsupported HW block IP blocks may have a NULL version pointer. During cleanup in amdgpu_device_fini_hw, the code calls amdgpu_device_set_pg_state and amdgpu_device_set_cg_state which iterate over all IP blocks and access adev->ip_blocks[i].version without NULL checks, leading to a kernel NULL pointer dereference. Add NULL checks for adev->ip_blocks[i].version in both amdgpu_device_set_cg_state and amdgpu_device_set_pg_state to prevent dereferencing NULL pointers during GPU teardown when initialization has failed. Fixes: 39fc2bc4da00 ("drm/amdgpu: Protect GPU register accesses in powergated state in some paths") Reviewed-by: Alex Deucher Signed-off-by: Mario Limonciello Signed-off-by: Alex Deucher (cherry picked from commit b7ac77468cda92eecae560b05f62f997a12fe2f2) Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'drivers/gpu/drm/amd') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 8f49cd72b5fd..cb38326571ce 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -3375,6 +3375,8 @@ int amdgpu_device_set_cg_state(struct amdgpu_device *adev, i = state == AMD_CG_STATE_GATE ? j : adev->num_ip_blocks - j - 1; if (!adev->ip_blocks[i].status.late_initialized) continue; + if (!adev->ip_blocks[i].version) + continue; /* skip CG for GFX, SDMA on S0ix */ if (adev->in_s0ix && (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX || @@ -3414,6 +3416,8 @@ int amdgpu_device_set_pg_state(struct amdgpu_device *adev, i = state == AMD_PG_STATE_GATE ? j : adev->num_ip_blocks - j - 1; if (!adev->ip_blocks[i].status.late_initialized) continue; + if (!adev->ip_blocks[i].version) + continue; /* skip PG for GFX, SDMA on S0ix */ if (adev->in_s0ix && (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX || -- cgit v1.2.3 From 38f1640db7f8bf57b9e09c5b0b8b205a598f1b3e Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Thu, 5 Mar 2026 09:06:11 -0600 Subject: drm/amd: Fix a few more NULL pointer dereference in device cleanup commit 72ecb1dae72775fa9fea0159d8445d620a0a2295 upstream. I found a few more paths that cleanup fails due to a NULL version pointer on unsupported hardware. Add NULL checks as applicable. Fixes: 39fc2bc4da00 ("drm/amdgpu: Protect GPU register accesses in powergated state in some paths") Reviewed-by: Alex Deucher Signed-off-by: Mario Limonciello Signed-off-by: Alex Deucher (cherry picked from commit f5a05f8414fc10f307eb965f303580c7778f8dd2) Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'drivers/gpu/drm/amd') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index cb38326571ce..c22aea46efcd 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -3625,6 +3625,8 @@ static int amdgpu_device_ip_fini_early(struct amdgpu_device *adev) int i, r; for (i = 0; i < adev->num_ip_blocks; i++) { + if (!adev->ip_blocks[i].version) + continue; if (!adev->ip_blocks[i].version->funcs->early_fini) continue; @@ -3687,6 +3689,8 @@ static int amdgpu_device_ip_fini(struct amdgpu_device *adev) if (!adev->ip_blocks[i].status.sw) continue; + if (!adev->ip_blocks[i].version) + continue; if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) { amdgpu_ucode_free_bo(adev); amdgpu_free_static_csa(&adev->virt.csa_obj); @@ -3713,6 +3717,8 @@ static int amdgpu_device_ip_fini(struct amdgpu_device *adev) for (i = adev->num_ip_blocks - 1; i >= 0; i--) { if (!adev->ip_blocks[i].status.late_initialized) continue; + if (!adev->ip_blocks[i].version) + continue; if (adev->ip_blocks[i].version->funcs->late_fini) adev->ip_blocks[i].version->funcs->late_fini(&adev->ip_blocks[i]); adev->ip_blocks[i].status.late_initialized = false; -- cgit v1.2.3