diff options
author | Tejun Heo <htejun@gmail.com> | 2006-05-15 07:58:12 -0400 |
---|---|---|
committer | Tejun Heo <htejun@gmail.com> | 2006-05-15 07:58:12 -0400 |
commit | ad9e27624479bd167dd7eac0cea4bb3ad13bc926 (patch) | |
tree | 47b863c91509a29c9b0f760faa52d60a95272ba6 | |
parent | dafadcde8d4dc5ea8c742faa7ff4403336b542b8 (diff) |
[PATCH] libata-eh-fw: update ata_scsi_error() for new EH
Update ata_scsi_error() for new EH. ata_scsi_error() is responsible
for claiming timed out qcs and invoking ->error_handler in safe and
synchronized manner. As the state of the controller is unknown if a
qc has timed out, the port is frozen in such cases.
Note that ata_scsi_timed_out() isn't used for new EH. This is because
a timed out qc cannot be claimed by EH without freezing the port and
freezing the port in ata_scsi_timed_out() results in unnecessary
abortion of other active qcs. ata_scsi_timed_out() can be removed
once all drivers are converted to new EH.
While at it, add 'TODO: kill' comments to old EH functions.
Signed-off-by: Tejun Heo <htejun@gmail.com>
-rw-r--r-- | drivers/scsi/libata-eh.c | 136 | ||||
-rw-r--r-- | include/linux/libata.h | 3 |
2 files changed, 134 insertions, 5 deletions
diff --git a/drivers/scsi/libata-eh.c b/drivers/scsi/libata-eh.c index cb4e2b8d32d9..0803231f6577 100644 --- a/drivers/scsi/libata-eh.c +++ b/drivers/scsi/libata-eh.c | |||
@@ -44,6 +44,8 @@ | |||
44 | 44 | ||
45 | #include "libata.h" | 45 | #include "libata.h" |
46 | 46 | ||
47 | static void __ata_port_freeze(struct ata_port *ap); | ||
48 | |||
47 | /** | 49 | /** |
48 | * ata_scsi_timed_out - SCSI layer time out callback | 50 | * ata_scsi_timed_out - SCSI layer time out callback |
49 | * @cmd: timed out SCSI command | 51 | * @cmd: timed out SCSI command |
@@ -55,6 +57,8 @@ | |||
55 | * from finishing it by setting EH_SCHEDULED and return | 57 | * from finishing it by setting EH_SCHEDULED and return |
56 | * EH_NOT_HANDLED. | 58 | * EH_NOT_HANDLED. |
57 | * | 59 | * |
60 | * TODO: kill this function once old EH is gone. | ||
61 | * | ||
58 | * LOCKING: | 62 | * LOCKING: |
59 | * Called from timer context | 63 | * Called from timer context |
60 | * | 64 | * |
@@ -67,10 +71,16 @@ enum scsi_eh_timer_return ata_scsi_timed_out(struct scsi_cmnd *cmd) | |||
67 | struct ata_port *ap = ata_shost_to_port(host); | 71 | struct ata_port *ap = ata_shost_to_port(host); |
68 | unsigned long flags; | 72 | unsigned long flags; |
69 | struct ata_queued_cmd *qc; | 73 | struct ata_queued_cmd *qc; |
70 | enum scsi_eh_timer_return ret = EH_HANDLED; | 74 | enum scsi_eh_timer_return ret; |
71 | 75 | ||
72 | DPRINTK("ENTER\n"); | 76 | DPRINTK("ENTER\n"); |
73 | 77 | ||
78 | if (ap->ops->error_handler) { | ||
79 | ret = EH_NOT_HANDLED; | ||
80 | goto out; | ||
81 | } | ||
82 | |||
83 | ret = EH_HANDLED; | ||
74 | spin_lock_irqsave(&ap->host_set->lock, flags); | 84 | spin_lock_irqsave(&ap->host_set->lock, flags); |
75 | qc = ata_qc_from_tag(ap, ap->active_tag); | 85 | qc = ata_qc_from_tag(ap, ap->active_tag); |
76 | if (qc) { | 86 | if (qc) { |
@@ -81,6 +91,7 @@ enum scsi_eh_timer_return ata_scsi_timed_out(struct scsi_cmnd *cmd) | |||
81 | } | 91 | } |
82 | spin_unlock_irqrestore(&ap->host_set->lock, flags); | 92 | spin_unlock_irqrestore(&ap->host_set->lock, flags); |
83 | 93 | ||
94 | out: | ||
84 | DPRINTK("EXIT, ret=%d\n", ret); | 95 | DPRINTK("EXIT, ret=%d\n", ret); |
85 | return ret; | 96 | return ret; |
86 | } | 97 | } |
@@ -100,21 +111,132 @@ enum scsi_eh_timer_return ata_scsi_timed_out(struct scsi_cmnd *cmd) | |||
100 | void ata_scsi_error(struct Scsi_Host *host) | 111 | void ata_scsi_error(struct Scsi_Host *host) |
101 | { | 112 | { |
102 | struct ata_port *ap = ata_shost_to_port(host); | 113 | struct ata_port *ap = ata_shost_to_port(host); |
114 | spinlock_t *hs_lock = &ap->host_set->lock; | ||
115 | int i, repeat_cnt = ATA_EH_MAX_REPEAT; | ||
116 | unsigned long flags; | ||
103 | 117 | ||
104 | DPRINTK("ENTER\n"); | 118 | DPRINTK("ENTER\n"); |
105 | 119 | ||
106 | /* synchronize with IRQ handler and port task */ | 120 | /* synchronize with port task */ |
107 | spin_unlock_wait(&ap->host_set->lock); | ||
108 | ata_port_flush_task(ap); | 121 | ata_port_flush_task(ap); |
109 | 122 | ||
110 | WARN_ON(ata_qc_from_tag(ap, ap->active_tag) == NULL); | 123 | /* synchronize with host_set lock and sort out timeouts */ |
124 | |||
125 | /* For new EH, all qcs are finished in one of three ways - | ||
126 | * normal completion, error completion, and SCSI timeout. | ||
127 | * Both cmpletions can race against SCSI timeout. When normal | ||
128 | * completion wins, the qc never reaches EH. When error | ||
129 | * completion wins, the qc has ATA_QCFLAG_FAILED set. | ||
130 | * | ||
131 | * When SCSI timeout wins, things are a bit more complex. | ||
132 | * Normal or error completion can occur after the timeout but | ||
133 | * before this point. In such cases, both types of | ||
134 | * completions are honored. A scmd is determined to have | ||
135 | * timed out iff its associated qc is active and not failed. | ||
136 | */ | ||
137 | if (ap->ops->error_handler) { | ||
138 | struct scsi_cmnd *scmd, *tmp; | ||
139 | int nr_timedout = 0; | ||
140 | |||
141 | spin_lock_irqsave(hs_lock, flags); | ||
142 | |||
143 | list_for_each_entry_safe(scmd, tmp, &host->eh_cmd_q, eh_entry) { | ||
144 | struct ata_queued_cmd *qc; | ||
145 | |||
146 | for (i = 0; i < ATA_MAX_QUEUE; i++) { | ||
147 | qc = __ata_qc_from_tag(ap, i); | ||
148 | if (qc->flags & ATA_QCFLAG_ACTIVE && | ||
149 | qc->scsicmd == scmd) | ||
150 | break; | ||
151 | } | ||
152 | |||
153 | if (i < ATA_MAX_QUEUE) { | ||
154 | /* the scmd has an associated qc */ | ||
155 | if (!(qc->flags & ATA_QCFLAG_FAILED)) { | ||
156 | /* which hasn't failed yet, timeout */ | ||
157 | qc->err_mask |= AC_ERR_TIMEOUT; | ||
158 | qc->flags |= ATA_QCFLAG_FAILED; | ||
159 | nr_timedout++; | ||
160 | } | ||
161 | } else { | ||
162 | /* Normal completion occurred after | ||
163 | * SCSI timeout but before this point. | ||
164 | * Successfully complete it. | ||
165 | */ | ||
166 | scmd->retries = scmd->allowed; | ||
167 | scsi_eh_finish_cmd(scmd, &ap->eh_done_q); | ||
168 | } | ||
169 | } | ||
170 | |||
171 | /* If we have timed out qcs. They belong to EH from | ||
172 | * this point but the state of the controller is | ||
173 | * unknown. Freeze the port to make sure the IRQ | ||
174 | * handler doesn't diddle with those qcs. This must | ||
175 | * be done atomically w.r.t. setting QCFLAG_FAILED. | ||
176 | */ | ||
177 | if (nr_timedout) | ||
178 | __ata_port_freeze(ap); | ||
179 | |||
180 | spin_unlock_irqrestore(hs_lock, flags); | ||
181 | } else | ||
182 | spin_unlock_wait(hs_lock); | ||
183 | |||
184 | repeat: | ||
185 | /* invoke error handler */ | ||
186 | if (ap->ops->error_handler) { | ||
187 | /* clear EH pending */ | ||
188 | spin_lock_irqsave(hs_lock, flags); | ||
189 | ap->flags &= ~ATA_FLAG_EH_PENDING; | ||
190 | spin_unlock_irqrestore(hs_lock, flags); | ||
191 | |||
192 | /* invoke EH */ | ||
193 | ap->ops->error_handler(ap); | ||
194 | |||
195 | /* Exception might have happend after ->error_handler | ||
196 | * recovered the port but before this point. Repeat | ||
197 | * EH in such case. | ||
198 | */ | ||
199 | spin_lock_irqsave(hs_lock, flags); | ||
200 | |||
201 | if (ap->flags & ATA_FLAG_EH_PENDING) { | ||
202 | if (--repeat_cnt) { | ||
203 | ata_port_printk(ap, KERN_INFO, | ||
204 | "EH pending after completion, " | ||
205 | "repeating EH (cnt=%d)\n", repeat_cnt); | ||
206 | spin_unlock_irqrestore(hs_lock, flags); | ||
207 | goto repeat; | ||
208 | } | ||
209 | ata_port_printk(ap, KERN_ERR, "EH pending after %d " | ||
210 | "tries, giving up\n", ATA_EH_MAX_REPEAT); | ||
211 | } | ||
111 | 212 | ||
112 | ap->ops->eng_timeout(ap); | 213 | /* Clear host_eh_scheduled while holding hs_lock such |
214 | * that if exception occurs after this point but | ||
215 | * before EH completion, SCSI midlayer will | ||
216 | * re-initiate EH. | ||
217 | */ | ||
218 | host->host_eh_scheduled = 0; | ||
219 | |||
220 | spin_unlock_irqrestore(hs_lock, flags); | ||
221 | } else { | ||
222 | WARN_ON(ata_qc_from_tag(ap, ap->active_tag) == NULL); | ||
223 | ap->ops->eng_timeout(ap); | ||
224 | } | ||
113 | 225 | ||
226 | /* finish or retry handled scmd's and clean up */ | ||
114 | WARN_ON(host->host_failed || !list_empty(&host->eh_cmd_q)); | 227 | WARN_ON(host->host_failed || !list_empty(&host->eh_cmd_q)); |
115 | 228 | ||
116 | scsi_eh_flush_done_q(&ap->eh_done_q); | 229 | scsi_eh_flush_done_q(&ap->eh_done_q); |
117 | 230 | ||
231 | /* clean up */ | ||
232 | spin_lock_irqsave(hs_lock, flags); | ||
233 | |||
234 | if (ap->flags & ATA_FLAG_RECOVERED) | ||
235 | ata_port_printk(ap, KERN_INFO, "EH complete\n"); | ||
236 | ap->flags &= ~ATA_FLAG_RECOVERED; | ||
237 | |||
238 | spin_unlock_irqrestore(hs_lock, flags); | ||
239 | |||
118 | DPRINTK("EXIT\n"); | 240 | DPRINTK("EXIT\n"); |
119 | } | 241 | } |
120 | 242 | ||
@@ -133,6 +255,8 @@ void ata_scsi_error(struct Scsi_Host *host) | |||
133 | * an interrupt was not delivered to the driver, even though the | 255 | * an interrupt was not delivered to the driver, even though the |
134 | * transaction completed successfully. | 256 | * transaction completed successfully. |
135 | * | 257 | * |
258 | * TODO: kill this function once old EH is gone. | ||
259 | * | ||
136 | * LOCKING: | 260 | * LOCKING: |
137 | * Inherited from SCSI layer (none, can sleep) | 261 | * Inherited from SCSI layer (none, can sleep) |
138 | */ | 262 | */ |
@@ -198,6 +322,8 @@ static void ata_qc_timeout(struct ata_queued_cmd *qc) | |||
198 | * an interrupt was not delivered to the driver, even though the | 322 | * an interrupt was not delivered to the driver, even though the |
199 | * transaction completed successfully. | 323 | * transaction completed successfully. |
200 | * | 324 | * |
325 | * TODO: kill this function once old EH is gone. | ||
326 | * | ||
201 | * LOCKING: | 327 | * LOCKING: |
202 | * Inherited from SCSI layer (none, can sleep) | 328 | * Inherited from SCSI layer (none, can sleep) |
203 | */ | 329 | */ |
diff --git a/include/linux/libata.h b/include/linux/libata.h index 6758b4d374a0..5ad50163c8ef 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h | |||
@@ -225,6 +225,9 @@ enum { | |||
225 | ATA_PORT_PRIMARY = (1 << 0), | 225 | ATA_PORT_PRIMARY = (1 << 0), |
226 | ATA_PORT_SECONDARY = (1 << 1), | 226 | ATA_PORT_SECONDARY = (1 << 1), |
227 | 227 | ||
228 | /* max repeat if error condition is still set after ->error_handler */ | ||
229 | ATA_EH_MAX_REPEAT = 5, | ||
230 | |||
228 | /* how hard are we gonna try to probe/recover devices */ | 231 | /* how hard are we gonna try to probe/recover devices */ |
229 | ATA_PROBE_MAX_TRIES = 3, | 232 | ATA_PROBE_MAX_TRIES = 3, |
230 | }; | 233 | }; |