summaryrefslogtreecommitdiff
path: root/drivers/target
diff options
context:
space:
mode:
authorJosef Bacik <josef@toxicpanda.com>2026-03-31 10:31:29 -0400
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>2026-04-11 14:29:19 +0200
commit05ac3754467363558a0a54ae4bb7c89b2c9574cf (patch)
tree1f590a8286f54593e185e6c78450c46f0a5a29f2 /drivers/target
parent4eaff1728d0e69b95933412241bbccf4f797dba8 (diff)
scsi: target: tcm_loop: Drain commands in target_reset handler
[ Upstream commit 1333eee56cdf3f0cf67c6ab4114c2c9e0a952026 ] tcm_loop_target_reset() violates the SCSI EH contract: it returns SUCCESS without draining any in-flight commands. The SCSI EH documentation (scsi_eh.rst) requires that when a reset handler returns SUCCESS the driver has made lower layers "forget about timed out scmds" and is ready for new commands. Every other SCSI LLD (virtio_scsi, mpt3sas, ipr, scsi_debug, mpi3mr) enforces this by draining or completing outstanding commands before returning SUCCESS. Because tcm_loop_target_reset() doesn't drain, the SCSI EH reuses in-flight scsi_cmnd structures for recovery commands (e.g. TUR) while the target core still has async completion work queued for the old se_cmd. The memset in queuecommand zeroes se_lun and lun_ref_active, causing transport_lun_remove_cmd() to skip its percpu_ref_put(). The leaked LUN reference prevents transport_clear_lun_ref() from completing, hanging configfs LUN unlink forever in D-state: INFO: task rm:264 blocked for more than 122 seconds. rm D 0 264 258 0x00004000 Call Trace: __schedule+0x3d0/0x8e0 schedule+0x36/0xf0 transport_clear_lun_ref+0x78/0x90 [target_core_mod] core_tpg_remove_lun+0x28/0xb0 [target_core_mod] target_fabric_port_unlink+0x50/0x60 [target_core_mod] configfs_unlink+0x156/0x1f0 [configfs] vfs_unlink+0x109/0x290 do_unlinkat+0x1d5/0x2d0 Fix this by making tcm_loop_target_reset() actually drain commands: 1. Issue TMR_LUN_RESET via tcm_loop_issue_tmr() to drain all commands that the target core knows about (those not yet CMD_T_COMPLETE). 2. Use blk_mq_tagset_busy_iter() to iterate all started requests and flush_work() on each se_cmd — this drains any deferred completion work for commands that already had CMD_T_COMPLETE set before the TMR (which the TMR skips via __target_check_io_state()). This is the same pattern used by mpi3mr, scsi_debug, and libsas to drain outstanding commands during reset. Fixes: e0eb5d38b732 ("scsi: target: tcm_loop: Use block cmd allocator for se_cmds") Cc: stable@vger.kernel.org Assisted-by: Claude:claude-opus-4-6 Signed-off-by: Josef Bacik <josef@toxicpanda.com> Link: https://patch.msgid.link/27011aa34c8f6b1b94d2e3cf5655b6d037f53428.1773706803.git.josef@toxicpanda.com Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com> Signed-off-by: Sasha Levin <sashal@kernel.org> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Diffstat (limited to 'drivers/target')
-rw-r--r--drivers/target/loopback/tcm_loop.c52
1 files changed, 46 insertions, 6 deletions
diff --git a/drivers/target/loopback/tcm_loop.c b/drivers/target/loopback/tcm_loop.c
index 01a8e349dc4d..156f934049f1 100644
--- a/drivers/target/loopback/tcm_loop.c
+++ b/drivers/target/loopback/tcm_loop.c
@@ -26,6 +26,7 @@
#include <linux/slab.h>
#include <linux/types.h>
#include <linux/configfs.h>
+#include <linux/blk-mq.h>
#include <scsi/scsi.h>
#include <scsi/scsi_tcq.h>
#include <scsi/scsi_host.h>
@@ -268,15 +269,27 @@ static int tcm_loop_device_reset(struct scsi_cmnd *sc)
return (ret == TMR_FUNCTION_COMPLETE) ? SUCCESS : FAILED;
}
+static bool tcm_loop_flush_work_iter(struct request *rq, void *data)
+{
+ struct scsi_cmnd *sc = blk_mq_rq_to_pdu(rq);
+ struct tcm_loop_cmd *tl_cmd = scsi_cmd_priv(sc);
+ struct se_cmd *se_cmd = &tl_cmd->tl_se_cmd;
+
+ flush_work(&se_cmd->work);
+ return true;
+}
+
static int tcm_loop_target_reset(struct scsi_cmnd *sc)
{
struct tcm_loop_hba *tl_hba;
struct tcm_loop_tpg *tl_tpg;
+ struct Scsi_Host *sh = sc->device->host;
+ int ret;
/*
* Locate the tcm_loop_hba_t pointer
*/
- tl_hba = *(struct tcm_loop_hba **)shost_priv(sc->device->host);
+ tl_hba = *(struct tcm_loop_hba **)shost_priv(sh);
if (!tl_hba) {
pr_err("Unable to perform device reset without active I_T Nexus\n");
return FAILED;
@@ -285,11 +298,38 @@ static int tcm_loop_target_reset(struct scsi_cmnd *sc)
* Locate the tl_tpg pointer from TargetID in sc->device->id
*/
tl_tpg = &tl_hba->tl_hba_tpgs[sc->device->id];
- if (tl_tpg) {
- tl_tpg->tl_transport_status = TCM_TRANSPORT_ONLINE;
- return SUCCESS;
- }
- return FAILED;
+ if (!tl_tpg)
+ return FAILED;
+
+ /*
+ * Issue a LUN_RESET to drain all commands that the target core
+ * knows about. This handles commands not yet marked CMD_T_COMPLETE.
+ */
+ ret = tcm_loop_issue_tmr(tl_tpg, sc->device->lun, 0, TMR_LUN_RESET);
+ if (ret != TMR_FUNCTION_COMPLETE)
+ return FAILED;
+
+ /*
+ * Flush any deferred target core completion work that may still be
+ * queued. Commands that already had CMD_T_COMPLETE set before the TMR
+ * are skipped by the TMR drain, but their async completion work
+ * (transport_lun_remove_cmd → percpu_ref_put, release_cmd → scsi_done)
+ * may still be pending in target_completion_wq.
+ *
+ * The SCSI EH will reuse in-flight scsi_cmnd structures for recovery
+ * commands (e.g. TUR) immediately after this handler returns SUCCESS —
+ * if deferred work is still pending, the memset in queuecommand would
+ * zero the se_cmd while the work accesses it, leaking the LUN
+ * percpu_ref and hanging configfs unlink forever.
+ *
+ * Use blk_mq_tagset_busy_iter() to find all started requests and
+ * flush_work() on each — the same pattern used by mpi3mr, scsi_debug,
+ * and other SCSI drivers to drain outstanding commands during reset.
+ */
+ blk_mq_tagset_busy_iter(&sh->tag_set, tcm_loop_flush_work_iter, NULL);
+
+ tl_tpg->tl_transport_status = TCM_TRANSPORT_ONLINE;
+ return SUCCESS;
}
static const struct scsi_host_template tcm_loop_driver_template = {