aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDan Williams <dan.j.williams@intel.com>2011-11-28 14:29:20 -0500
committerJames Bottomley <JBottomley@Parallels.com>2012-02-19 15:06:08 -0500
commit9095a64a9aead653df320e3a6fc70835c15d46e4 (patch)
tree63ad804f78668bf28d90ea2f8c9640ce92dacdea
parenta3a142524aa4b1539a64a55087bf12ffa4b1f94e (diff)
[SCSI] libsas: fix timeout vs completion race
Until we have told the lldd to forget a task a timed out operation can return from the hardware at any time. Since completion frees the task we need to make sure that no tasks run their normal completion handler once eh has decided to manage the task. Similar to ata_scsi_cmd_error_handler() freeze completions to let eh judge the outcome of the race. Task collector mode is problematic because it presents a situation where a task can be timed out and aborted before the lldd has even seen it. For this case we need to guarantee that a task that an lldd has been told to forget does not get queued after the lldd says "never seen it". With sas_scsi_timed_out we achieve this with the ->task_queue_flush mutex, rather than adding more time. Signed-off-by: Dan Williams <dan.j.williams@intel.com> Signed-off-by: James Bottomley <JBottomley@Parallels.com>
-rw-r--r--drivers/scsi/libsas/sas_ata.c35
-rw-r--r--drivers/scsi/libsas/sas_internal.h1
-rw-r--r--drivers/scsi/libsas/sas_scsi_host.c104
-rw-r--r--include/scsi/libsas.h3
-rw-r--r--include/scsi/sas_ata.h8
5 files changed, 68 insertions, 83 deletions
diff --git a/drivers/scsi/libsas/sas_ata.c b/drivers/scsi/libsas/sas_ata.c
index 903bb441b9f9..4c2a1402373c 100644
--- a/drivers/scsi/libsas/sas_ata.c
+++ b/drivers/scsi/libsas/sas_ata.c
@@ -93,21 +93,30 @@ static enum ata_completion_errors sas_to_ata_err(struct task_status_struct *ts)
93static void sas_ata_task_done(struct sas_task *task) 93static void sas_ata_task_done(struct sas_task *task)
94{ 94{
95 struct ata_queued_cmd *qc = task->uldd_task; 95 struct ata_queued_cmd *qc = task->uldd_task;
96 struct domain_device *dev; 96 struct domain_device *dev = task->dev;
97 struct task_status_struct *stat = &task->task_status; 97 struct task_status_struct *stat = &task->task_status;
98 struct ata_task_resp *resp = (struct ata_task_resp *)stat->buf; 98 struct ata_task_resp *resp = (struct ata_task_resp *)stat->buf;
99 struct sas_ha_struct *sas_ha; 99 struct sas_ha_struct *sas_ha = dev->port->ha;
100 enum ata_completion_errors ac; 100 enum ata_completion_errors ac;
101 unsigned long flags; 101 unsigned long flags;
102 struct ata_link *link; 102 struct ata_link *link;
103 struct ata_port *ap; 103 struct ata_port *ap;
104 104
105 spin_lock_irqsave(&dev->done_lock, flags);
106 if (test_bit(SAS_HA_FROZEN, &sas_ha->state))
107 task = NULL;
108 else if (qc && qc->scsicmd)
109 ASSIGN_SAS_TASK(qc->scsicmd, NULL);
110 spin_unlock_irqrestore(&dev->done_lock, flags);
111
112 /* check if libsas-eh got to the task before us */
113 if (unlikely(!task))
114 return;
115
105 if (!qc) 116 if (!qc)
106 goto qc_already_gone; 117 goto qc_already_gone;
107 118
108 ap = qc->ap; 119 ap = qc->ap;
109 dev = ap->private_data;
110 sas_ha = dev->port->ha;
111 link = &ap->link; 120 link = &ap->link;
112 121
113 spin_lock_irqsave(ap->lock, flags); 122 spin_lock_irqsave(ap->lock, flags);
@@ -156,8 +165,6 @@ static void sas_ata_task_done(struct sas_task *task)
156 } 165 }
157 166
158 qc->lldd_task = NULL; 167 qc->lldd_task = NULL;
159 if (qc->scsicmd)
160 ASSIGN_SAS_TASK(qc->scsicmd, NULL);
161 ata_qc_complete(qc); 168 ata_qc_complete(qc);
162 spin_unlock_irqrestore(ap->lock, flags); 169 spin_unlock_irqrestore(ap->lock, flags);
163 170
@@ -633,22 +640,6 @@ void sas_ata_strategy_handler(struct Scsi_Host *shost)
633 sas_enable_revalidation(sas_ha); 640 sas_enable_revalidation(sas_ha);
634} 641}
635 642
636int sas_ata_timed_out(struct scsi_cmnd *cmd, struct sas_task *task,
637 enum blk_eh_timer_return *rtn)
638{
639 struct domain_device *ddev = cmd_to_domain_dev(cmd);
640
641 if (!dev_is_sata(ddev) || task)
642 return 0;
643
644 /* we're a sata device with no task, so this must be a libata
645 * eh timeout. Ideally should hook into libata timeout
646 * handling, but there's no point, it just wants to activate
647 * the eh thread */
648 *rtn = BLK_EH_NOT_HANDLED;
649 return 1;
650}
651
652int sas_ata_eh(struct Scsi_Host *shost, struct list_head *work_q, 643int sas_ata_eh(struct Scsi_Host *shost, struct list_head *work_q,
653 struct list_head *done_q) 644 struct list_head *done_q)
654{ 645{
diff --git a/drivers/scsi/libsas/sas_internal.h b/drivers/scsi/libsas/sas_internal.h
index ebe9b81ddef5..662ffcba99d2 100644
--- a/drivers/scsi/libsas/sas_internal.h
+++ b/drivers/scsi/libsas/sas_internal.h
@@ -142,6 +142,7 @@ static inline struct domain_device *sas_alloc_device(void)
142 INIT_LIST_HEAD(&dev->dev_list_node); 142 INIT_LIST_HEAD(&dev->dev_list_node);
143 INIT_LIST_HEAD(&dev->disco_list_node); 143 INIT_LIST_HEAD(&dev->disco_list_node);
144 kref_init(&dev->kref); 144 kref_init(&dev->kref);
145 spin_lock_init(&dev->done_lock);
145 } 146 }
146 return dev; 147 return dev;
147} 148}
diff --git a/drivers/scsi/libsas/sas_scsi_host.c b/drivers/scsi/libsas/sas_scsi_host.c
index 50db8f971a06..0e3fdba7b510 100644
--- a/drivers/scsi/libsas/sas_scsi_host.c
+++ b/drivers/scsi/libsas/sas_scsi_host.c
@@ -119,9 +119,19 @@ static void sas_end_task(struct scsi_cmnd *sc, struct sas_task *task)
119static void sas_scsi_task_done(struct sas_task *task) 119static void sas_scsi_task_done(struct sas_task *task)
120{ 120{
121 struct scsi_cmnd *sc = task->uldd_task; 121 struct scsi_cmnd *sc = task->uldd_task;
122 struct domain_device *dev = task->dev;
123 struct sas_ha_struct *ha = dev->port->ha;
124 unsigned long flags;
125
126 spin_lock_irqsave(&dev->done_lock, flags);
127 if (test_bit(SAS_HA_FROZEN, &ha->state))
128 task = NULL;
129 else
130 ASSIGN_SAS_TASK(sc, NULL);
131 spin_unlock_irqrestore(&dev->done_lock, flags);
122 132
123 if (unlikely(task->task_state_flags & SAS_TASK_STATE_ABORTED)) { 133 if (unlikely(!task)) {
124 /* Aborted tasks will be completed by the error handler */ 134 /* task will be completed by the error handler */
125 SAS_DPRINTK("task done but aborted\n"); 135 SAS_DPRINTK("task done but aborted\n");
126 return; 136 return;
127 } 137 }
@@ -133,7 +143,6 @@ static void sas_scsi_task_done(struct sas_task *task)
133 return; 143 return;
134 } 144 }
135 145
136 ASSIGN_SAS_TASK(sc, NULL);
137 sas_end_task(sc, task); 146 sas_end_task(sc, task);
138 sc->scsi_done(sc); 147 sc->scsi_done(sc);
139} 148}
@@ -298,6 +307,7 @@ enum task_disposition {
298 TASK_IS_DONE, 307 TASK_IS_DONE,
299 TASK_IS_ABORTED, 308 TASK_IS_ABORTED,
300 TASK_IS_AT_LU, 309 TASK_IS_AT_LU,
310 TASK_IS_NOT_AT_HA,
301 TASK_IS_NOT_AT_LU, 311 TASK_IS_NOT_AT_LU,
302 TASK_ABORT_FAILED, 312 TASK_ABORT_FAILED,
303}; 313};
@@ -314,19 +324,18 @@ static enum task_disposition sas_scsi_find_task(struct sas_task *task)
314 struct scsi_core *core = &ha->core; 324 struct scsi_core *core = &ha->core;
315 struct sas_task *t, *n; 325 struct sas_task *t, *n;
316 326
327 mutex_lock(&core->task_queue_flush);
317 spin_lock_irqsave(&core->task_queue_lock, flags); 328 spin_lock_irqsave(&core->task_queue_lock, flags);
318 list_for_each_entry_safe(t, n, &core->task_queue, list) { 329 list_for_each_entry_safe(t, n, &core->task_queue, list)
319 if (task == t) { 330 if (task == t) {
320 list_del_init(&t->list); 331 list_del_init(&t->list);
321 spin_unlock_irqrestore(&core->task_queue_lock, 332 break;
322 flags);
323 SAS_DPRINTK("%s: task 0x%p aborted from "
324 "task_queue\n",
325 __func__, task);
326 return TASK_IS_ABORTED;
327 } 333 }
328 }
329 spin_unlock_irqrestore(&core->task_queue_lock, flags); 334 spin_unlock_irqrestore(&core->task_queue_lock, flags);
335 mutex_unlock(&core->task_queue_flush);
336
337 if (task == t)
338 return TASK_IS_NOT_AT_HA;
330 } 339 }
331 340
332 for (i = 0; i < 5; i++) { 341 for (i = 0; i < 5; i++) {
@@ -499,8 +508,7 @@ try_bus_reset:
499} 508}
500 509
501static int sas_eh_handle_sas_errors(struct Scsi_Host *shost, 510static int sas_eh_handle_sas_errors(struct Scsi_Host *shost,
502 struct list_head *work_q, 511 struct list_head *work_q)
503 struct list_head *done_q)
504{ 512{
505 struct scsi_cmnd *cmd, *n; 513 struct scsi_cmnd *cmd, *n;
506 enum task_disposition res = TASK_IS_DONE; 514 enum task_disposition res = TASK_IS_DONE;
@@ -511,7 +519,16 @@ static int sas_eh_handle_sas_errors(struct Scsi_Host *shost,
511 519
512Again: 520Again:
513 list_for_each_entry_safe(cmd, n, work_q, eh_entry) { 521 list_for_each_entry_safe(cmd, n, work_q, eh_entry) {
514 struct sas_task *task = TO_SAS_TASK(cmd); 522 struct domain_device *dev = cmd_to_domain_dev(cmd);
523 struct sas_task *task;
524
525 spin_lock_irqsave(&dev->done_lock, flags);
526 /* by this point the lldd has either observed
527 * SAS_HA_FROZEN and is leaving the task alone, or has
528 * won the race with eh and decided to complete it
529 */
530 task = TO_SAS_TASK(cmd);
531 spin_unlock_irqrestore(&dev->done_lock, flags);
515 532
516 if (!task) 533 if (!task)
517 continue; 534 continue;
@@ -534,6 +551,14 @@ Again:
534 cmd->eh_eflags = 0; 551 cmd->eh_eflags = 0;
535 552
536 switch (res) { 553 switch (res) {
554 case TASK_IS_NOT_AT_HA:
555 SAS_DPRINTK("%s: task 0x%p is not at ha: %s\n",
556 __func__, task,
557 cmd->retries ? "retry" : "aborted");
558 if (cmd->retries)
559 cmd->retries--;
560 sas_eh_finish_cmd(cmd);
561 continue;
537 case TASK_IS_DONE: 562 case TASK_IS_DONE:
538 SAS_DPRINTK("%s: task 0x%p is done\n", __func__, 563 SAS_DPRINTK("%s: task 0x%p is done\n", __func__,
539 task); 564 task);
@@ -635,7 +660,8 @@ void sas_scsi_recover_host(struct Scsi_Host *shost)
635 * Deal with commands that still have SAS tasks (i.e. they didn't 660 * Deal with commands that still have SAS tasks (i.e. they didn't
636 * complete via the normal sas_task completion mechanism) 661 * complete via the normal sas_task completion mechanism)
637 */ 662 */
638 if (sas_eh_handle_sas_errors(shost, &eh_work_q, &ha->eh_done_q)) 663 set_bit(SAS_HA_FROZEN, &ha->state);
664 if (sas_eh_handle_sas_errors(shost, &eh_work_q))
639 goto out; 665 goto out;
640 666
641 /* 667 /*
@@ -649,6 +675,10 @@ void sas_scsi_recover_host(struct Scsi_Host *shost)
649 scsi_eh_ready_devs(shost, &eh_work_q, &ha->eh_done_q); 675 scsi_eh_ready_devs(shost, &eh_work_q, &ha->eh_done_q);
650 676
651out: 677out:
678 clear_bit(SAS_HA_FROZEN, &ha->state);
679 if (ha->lldd_max_execute_num > 1)
680 wake_up_process(ha->core.queue_thread);
681
652 /* now link into libata eh --- if we have any ata devices */ 682 /* now link into libata eh --- if we have any ata devices */
653 sas_ata_strategy_handler(shost); 683 sas_ata_strategy_handler(shost);
654 684
@@ -660,43 +690,7 @@ out:
660 690
661enum blk_eh_timer_return sas_scsi_timed_out(struct scsi_cmnd *cmd) 691enum blk_eh_timer_return sas_scsi_timed_out(struct scsi_cmnd *cmd)
662{ 692{
663 struct sas_task *task = TO_SAS_TASK(cmd); 693 scmd_printk(KERN_DEBUG, cmd, "command %p timed out\n", cmd);
664 unsigned long flags;
665 enum blk_eh_timer_return rtn;
666
667 if (sas_ata_timed_out(cmd, task, &rtn))
668 return rtn;
669
670 if (!task) {
671 cmd->request->timeout /= 2;
672 SAS_DPRINTK("command 0x%p, task 0x%p, gone: %s\n",
673 cmd, task, (cmd->request->timeout ?
674 "BLK_EH_RESET_TIMER" : "BLK_EH_NOT_HANDLED"));
675 if (!cmd->request->timeout)
676 return BLK_EH_NOT_HANDLED;
677 return BLK_EH_RESET_TIMER;
678 }
679
680 spin_lock_irqsave(&task->task_state_lock, flags);
681 BUG_ON(task->task_state_flags & SAS_TASK_STATE_ABORTED);
682 if (task->task_state_flags & SAS_TASK_STATE_DONE) {
683 spin_unlock_irqrestore(&task->task_state_lock, flags);
684 SAS_DPRINTK("command 0x%p, task 0x%p, timed out: "
685 "BLK_EH_HANDLED\n", cmd, task);
686 return BLK_EH_HANDLED;
687 }
688 if (!(task->task_state_flags & SAS_TASK_AT_INITIATOR)) {
689 spin_unlock_irqrestore(&task->task_state_lock, flags);
690 SAS_DPRINTK("command 0x%p, task 0x%p, not at initiator: "
691 "BLK_EH_RESET_TIMER\n",
692 cmd, task);
693 return BLK_EH_RESET_TIMER;
694 }
695 task->task_state_flags |= SAS_TASK_STATE_ABORTED;
696 spin_unlock_irqrestore(&task->task_state_lock, flags);
697
698 SAS_DPRINTK("command 0x%p, task 0x%p, timed out: BLK_EH_NOT_HANDLED\n",
699 cmd, task);
700 694
701 return BLK_EH_NOT_HANDLED; 695 return BLK_EH_NOT_HANDLED;
702} 696}
@@ -861,9 +855,11 @@ static void sas_queue(struct sas_ha_struct *sas_ha)
861 int res; 855 int res;
862 struct sas_internal *i = to_sas_internal(core->shost->transportt); 856 struct sas_internal *i = to_sas_internal(core->shost->transportt);
863 857
858 mutex_lock(&core->task_queue_flush);
864 spin_lock_irqsave(&core->task_queue_lock, flags); 859 spin_lock_irqsave(&core->task_queue_lock, flags);
865 while (!kthread_should_stop() && 860 while (!kthread_should_stop() &&
866 !list_empty(&core->task_queue)) { 861 !list_empty(&core->task_queue) &&
862 !test_bit(SAS_HA_FROZEN, &sas_ha->state)) {
867 863
868 can_queue = sas_ha->lldd_queue_size - core->task_queue_size; 864 can_queue = sas_ha->lldd_queue_size - core->task_queue_size;
869 if (can_queue >= 0) { 865 if (can_queue >= 0) {
@@ -899,6 +895,7 @@ static void sas_queue(struct sas_ha_struct *sas_ha)
899 } 895 }
900 } 896 }
901 spin_unlock_irqrestore(&core->task_queue_lock, flags); 897 spin_unlock_irqrestore(&core->task_queue_lock, flags);
898 mutex_unlock(&core->task_queue_flush);
902} 899}
903 900
904/** 901/**
@@ -925,6 +922,7 @@ int sas_init_queue(struct sas_ha_struct *sas_ha)
925 struct scsi_core *core = &sas_ha->core; 922 struct scsi_core *core = &sas_ha->core;
926 923
927 spin_lock_init(&core->task_queue_lock); 924 spin_lock_init(&core->task_queue_lock);
925 mutex_init(&core->task_queue_flush);
928 core->task_queue_size = 0; 926 core->task_queue_size = 0;
929 INIT_LIST_HEAD(&core->task_queue); 927 INIT_LIST_HEAD(&core->task_queue);
930 928
diff --git a/include/scsi/libsas.h b/include/scsi/libsas.h
index 071041b290d6..aa7192ff4355 100644
--- a/include/scsi/libsas.h
+++ b/include/scsi/libsas.h
@@ -178,6 +178,7 @@ enum {
178}; 178};
179 179
180struct domain_device { 180struct domain_device {
181 spinlock_t done_lock;
181 enum sas_dev_type dev_type; 182 enum sas_dev_type dev_type;
182 183
183 enum sas_linkrate linkrate; 184 enum sas_linkrate linkrate;
@@ -321,6 +322,7 @@ struct asd_sas_phy {
321struct scsi_core { 322struct scsi_core {
322 struct Scsi_Host *shost; 323 struct Scsi_Host *shost;
323 324
325 struct mutex task_queue_flush;
324 spinlock_t task_queue_lock; 326 spinlock_t task_queue_lock;
325 struct list_head task_queue; 327 struct list_head task_queue;
326 int task_queue_size; 328 int task_queue_size;
@@ -337,6 +339,7 @@ enum sas_ha_state {
337 SAS_HA_REGISTERED, 339 SAS_HA_REGISTERED,
338 SAS_HA_DRAINING, 340 SAS_HA_DRAINING,
339 SAS_HA_ATA_EH_ACTIVE, 341 SAS_HA_ATA_EH_ACTIVE,
342 SAS_HA_FROZEN,
340}; 343};
341 344
342struct sas_ha_struct { 345struct sas_ha_struct {
diff --git a/include/scsi/sas_ata.h b/include/scsi/sas_ata.h
index 557fc9a8559b..9f7a23d1146d 100644
--- a/include/scsi/sas_ata.h
+++ b/include/scsi/sas_ata.h
@@ -41,8 +41,6 @@ int sas_ata_init_host_and_port(struct domain_device *found_dev,
41 41
42void sas_ata_task_abort(struct sas_task *task); 42void sas_ata_task_abort(struct sas_task *task);
43void sas_ata_strategy_handler(struct Scsi_Host *shost); 43void sas_ata_strategy_handler(struct Scsi_Host *shost);
44int sas_ata_timed_out(struct scsi_cmnd *cmd, struct sas_task *task,
45 enum blk_eh_timer_return *rtn);
46int sas_ata_eh(struct Scsi_Host *shost, struct list_head *work_q, 44int sas_ata_eh(struct Scsi_Host *shost, struct list_head *work_q,
47 struct list_head *done_q); 45 struct list_head *done_q);
48void sas_probe_sata(struct work_struct *work); 46void sas_probe_sata(struct work_struct *work);
@@ -67,12 +65,6 @@ static inline void sas_ata_strategy_handler(struct Scsi_Host *shost)
67{ 65{
68} 66}
69 67
70static inline int sas_ata_timed_out(struct scsi_cmnd *cmd,
71 struct sas_task *task,
72 enum blk_eh_timer_return *rtn)
73{
74 return 0;
75}
76static inline int sas_ata_eh(struct Scsi_Host *shost, struct list_head *work_q, 68static inline int sas_ata_eh(struct Scsi_Host *shost, struct list_head *work_q,
77 struct list_head *done_q) 69 struct list_head *done_q)
78{ 70{