aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--drivers/scsi/libata-eh.c136
-rw-r--r--include/linux/libata.h3
2 files changed, 134 insertions, 5 deletions
diff --git a/drivers/scsi/libata-eh.c b/drivers/scsi/libata-eh.c
index cb4e2b8d32d9..0803231f6577 100644
--- a/drivers/scsi/libata-eh.c
+++ b/drivers/scsi/libata-eh.c
@@ -44,6 +44,8 @@
44 44
45#include "libata.h" 45#include "libata.h"
46 46
47static void __ata_port_freeze(struct ata_port *ap);
48
47/** 49/**
48 * ata_scsi_timed_out - SCSI layer time out callback 50 * ata_scsi_timed_out - SCSI layer time out callback
49 * @cmd: timed out SCSI command 51 * @cmd: timed out SCSI command
@@ -55,6 +57,8 @@
55 * from finishing it by setting EH_SCHEDULED and return 57 * from finishing it by setting EH_SCHEDULED and return
56 * EH_NOT_HANDLED. 58 * EH_NOT_HANDLED.
57 * 59 *
60 * TODO: kill this function once old EH is gone.
61 *
58 * LOCKING: 62 * LOCKING:
59 * Called from timer context 63 * Called from timer context
60 * 64 *
@@ -67,10 +71,16 @@ enum scsi_eh_timer_return ata_scsi_timed_out(struct scsi_cmnd *cmd)
67 struct ata_port *ap = ata_shost_to_port(host); 71 struct ata_port *ap = ata_shost_to_port(host);
68 unsigned long flags; 72 unsigned long flags;
69 struct ata_queued_cmd *qc; 73 struct ata_queued_cmd *qc;
70 enum scsi_eh_timer_return ret = EH_HANDLED; 74 enum scsi_eh_timer_return ret;
71 75
72 DPRINTK("ENTER\n"); 76 DPRINTK("ENTER\n");
73 77
78 if (ap->ops->error_handler) {
79 ret = EH_NOT_HANDLED;
80 goto out;
81 }
82
83 ret = EH_HANDLED;
74 spin_lock_irqsave(&ap->host_set->lock, flags); 84 spin_lock_irqsave(&ap->host_set->lock, flags);
75 qc = ata_qc_from_tag(ap, ap->active_tag); 85 qc = ata_qc_from_tag(ap, ap->active_tag);
76 if (qc) { 86 if (qc) {
@@ -81,6 +91,7 @@ enum scsi_eh_timer_return ata_scsi_timed_out(struct scsi_cmnd *cmd)
81 } 91 }
82 spin_unlock_irqrestore(&ap->host_set->lock, flags); 92 spin_unlock_irqrestore(&ap->host_set->lock, flags);
83 93
94 out:
84 DPRINTK("EXIT, ret=%d\n", ret); 95 DPRINTK("EXIT, ret=%d\n", ret);
85 return ret; 96 return ret;
86} 97}
@@ -100,21 +111,132 @@ enum scsi_eh_timer_return ata_scsi_timed_out(struct scsi_cmnd *cmd)
100void ata_scsi_error(struct Scsi_Host *host) 111void ata_scsi_error(struct Scsi_Host *host)
101{ 112{
102 struct ata_port *ap = ata_shost_to_port(host); 113 struct ata_port *ap = ata_shost_to_port(host);
114 spinlock_t *hs_lock = &ap->host_set->lock;
115 int i, repeat_cnt = ATA_EH_MAX_REPEAT;
116 unsigned long flags;
103 117
104 DPRINTK("ENTER\n"); 118 DPRINTK("ENTER\n");
105 119
106 /* synchronize with IRQ handler and port task */ 120 /* synchronize with port task */
107 spin_unlock_wait(&ap->host_set->lock);
108 ata_port_flush_task(ap); 121 ata_port_flush_task(ap);
109 122
110 WARN_ON(ata_qc_from_tag(ap, ap->active_tag) == NULL); 123 /* synchronize with host_set lock and sort out timeouts */
124
125 /* For new EH, all qcs are finished in one of three ways -
126 * normal completion, error completion, and SCSI timeout.
127 * Both cmpletions can race against SCSI timeout. When normal
128 * completion wins, the qc never reaches EH. When error
129 * completion wins, the qc has ATA_QCFLAG_FAILED set.
130 *
131 * When SCSI timeout wins, things are a bit more complex.
132 * Normal or error completion can occur after the timeout but
133 * before this point. In such cases, both types of
134 * completions are honored. A scmd is determined to have
135 * timed out iff its associated qc is active and not failed.
136 */
137 if (ap->ops->error_handler) {
138 struct scsi_cmnd *scmd, *tmp;
139 int nr_timedout = 0;
140
141 spin_lock_irqsave(hs_lock, flags);
142
143 list_for_each_entry_safe(scmd, tmp, &host->eh_cmd_q, eh_entry) {
144 struct ata_queued_cmd *qc;
145
146 for (i = 0; i < ATA_MAX_QUEUE; i++) {
147 qc = __ata_qc_from_tag(ap, i);
148 if (qc->flags & ATA_QCFLAG_ACTIVE &&
149 qc->scsicmd == scmd)
150 break;
151 }
152
153 if (i < ATA_MAX_QUEUE) {
154 /* the scmd has an associated qc */
155 if (!(qc->flags & ATA_QCFLAG_FAILED)) {
156 /* which hasn't failed yet, timeout */
157 qc->err_mask |= AC_ERR_TIMEOUT;
158 qc->flags |= ATA_QCFLAG_FAILED;
159 nr_timedout++;
160 }
161 } else {
162 /* Normal completion occurred after
163 * SCSI timeout but before this point.
164 * Successfully complete it.
165 */
166 scmd->retries = scmd->allowed;
167 scsi_eh_finish_cmd(scmd, &ap->eh_done_q);
168 }
169 }
170
171 /* If we have timed out qcs. They belong to EH from
172 * this point but the state of the controller is
173 * unknown. Freeze the port to make sure the IRQ
174 * handler doesn't diddle with those qcs. This must
175 * be done atomically w.r.t. setting QCFLAG_FAILED.
176 */
177 if (nr_timedout)
178 __ata_port_freeze(ap);
179
180 spin_unlock_irqrestore(hs_lock, flags);
181 } else
182 spin_unlock_wait(hs_lock);
183
184 repeat:
185 /* invoke error handler */
186 if (ap->ops->error_handler) {
187 /* clear EH pending */
188 spin_lock_irqsave(hs_lock, flags);
189 ap->flags &= ~ATA_FLAG_EH_PENDING;
190 spin_unlock_irqrestore(hs_lock, flags);
191
192 /* invoke EH */
193 ap->ops->error_handler(ap);
194
195 /* Exception might have happend after ->error_handler
196 * recovered the port but before this point. Repeat
197 * EH in such case.
198 */
199 spin_lock_irqsave(hs_lock, flags);
200
201 if (ap->flags & ATA_FLAG_EH_PENDING) {
202 if (--repeat_cnt) {
203 ata_port_printk(ap, KERN_INFO,
204 "EH pending after completion, "
205 "repeating EH (cnt=%d)\n", repeat_cnt);
206 spin_unlock_irqrestore(hs_lock, flags);
207 goto repeat;
208 }
209 ata_port_printk(ap, KERN_ERR, "EH pending after %d "
210 "tries, giving up\n", ATA_EH_MAX_REPEAT);
211 }
111 212
112 ap->ops->eng_timeout(ap); 213 /* Clear host_eh_scheduled while holding hs_lock such
214 * that if exception occurs after this point but
215 * before EH completion, SCSI midlayer will
216 * re-initiate EH.
217 */
218 host->host_eh_scheduled = 0;
219
220 spin_unlock_irqrestore(hs_lock, flags);
221 } else {
222 WARN_ON(ata_qc_from_tag(ap, ap->active_tag) == NULL);
223 ap->ops->eng_timeout(ap);
224 }
113 225
226 /* finish or retry handled scmd's and clean up */
114 WARN_ON(host->host_failed || !list_empty(&host->eh_cmd_q)); 227 WARN_ON(host->host_failed || !list_empty(&host->eh_cmd_q));
115 228
116 scsi_eh_flush_done_q(&ap->eh_done_q); 229 scsi_eh_flush_done_q(&ap->eh_done_q);
117 230
231 /* clean up */
232 spin_lock_irqsave(hs_lock, flags);
233
234 if (ap->flags & ATA_FLAG_RECOVERED)
235 ata_port_printk(ap, KERN_INFO, "EH complete\n");
236 ap->flags &= ~ATA_FLAG_RECOVERED;
237
238 spin_unlock_irqrestore(hs_lock, flags);
239
118 DPRINTK("EXIT\n"); 240 DPRINTK("EXIT\n");
119} 241}
120 242
@@ -133,6 +255,8 @@ void ata_scsi_error(struct Scsi_Host *host)
133 * an interrupt was not delivered to the driver, even though the 255 * an interrupt was not delivered to the driver, even though the
134 * transaction completed successfully. 256 * transaction completed successfully.
135 * 257 *
258 * TODO: kill this function once old EH is gone.
259 *
136 * LOCKING: 260 * LOCKING:
137 * Inherited from SCSI layer (none, can sleep) 261 * Inherited from SCSI layer (none, can sleep)
138 */ 262 */
@@ -198,6 +322,8 @@ static void ata_qc_timeout(struct ata_queued_cmd *qc)
198 * an interrupt was not delivered to the driver, even though the 322 * an interrupt was not delivered to the driver, even though the
199 * transaction completed successfully. 323 * transaction completed successfully.
200 * 324 *
325 * TODO: kill this function once old EH is gone.
326 *
201 * LOCKING: 327 * LOCKING:
202 * Inherited from SCSI layer (none, can sleep) 328 * Inherited from SCSI layer (none, can sleep)
203 */ 329 */
diff --git a/include/linux/libata.h b/include/linux/libata.h
index 6758b4d374a0..5ad50163c8ef 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -225,6 +225,9 @@ enum {
225 ATA_PORT_PRIMARY = (1 << 0), 225 ATA_PORT_PRIMARY = (1 << 0),
226 ATA_PORT_SECONDARY = (1 << 1), 226 ATA_PORT_SECONDARY = (1 << 1),
227 227
228 /* max repeat if error condition is still set after ->error_handler */
229 ATA_EH_MAX_REPEAT = 5,
230
228 /* how hard are we gonna try to probe/recover devices */ 231 /* how hard are we gonna try to probe/recover devices */
229 ATA_PROBE_MAX_TRIES = 3, 232 ATA_PROBE_MAX_TRIES = 3,
230}; 233};