Commit | Line | Data |
---|---|---|
ece1d636 TH |
1 | /* |
2 | * libata-eh.c - libata error handling | |
3 | * | |
4 | * Maintained by: Jeff Garzik <jgarzik@pobox.com> | |
5 | * Please ALWAYS copy linux-ide@vger.kernel.org | |
6 | * on emails. | |
7 | * | |
8 | * Copyright 2006 Tejun Heo <htejun@gmail.com> | |
9 | * | |
10 | * | |
11 | * This program is free software; you can redistribute it and/or | |
12 | * modify it under the terms of the GNU General Public License as | |
13 | * published by the Free Software Foundation; either version 2, or | |
14 | * (at your option) any later version. | |
15 | * | |
16 | * This program is distributed in the hope that it will be useful, | |
17 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
18 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
19 | * General Public License for more details. | |
20 | * | |
21 | * You should have received a copy of the GNU General Public License | |
22 | * along with this program; see the file COPYING. If not, write to | |
23 | * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, | |
24 | * USA. | |
25 | * | |
26 | * | |
27 | * libata documentation is available via 'make {ps|pdf}docs', | |
28 | * as Documentation/DocBook/libata.* | |
29 | * | |
30 | * Hardware documentation available from http://www.t13.org/ and | |
31 | * http://www.sata-io.org/ | |
32 | * | |
33 | */ | |
34 | ||
35 | #include <linux/config.h> | |
36 | #include <linux/kernel.h> | |
37 | #include <scsi/scsi.h> | |
38 | #include <scsi/scsi_host.h> | |
39 | #include <scsi/scsi_eh.h> | |
40 | #include <scsi/scsi_device.h> | |
41 | #include <scsi/scsi_cmnd.h> | |
42 | ||
43 | #include <linux/libata.h> | |
44 | ||
45 | #include "libata.h" | |
46 | ||
ad9e2762 TH |
47 | static void __ata_port_freeze(struct ata_port *ap); |
48 | ||
ece1d636 TH |
49 | /** |
50 | * ata_scsi_timed_out - SCSI layer time out callback | |
51 | * @cmd: timed out SCSI command | |
52 | * | |
53 | * Handles SCSI layer timeout. We race with normal completion of | |
54 | * the qc for @cmd. If the qc is already gone, we lose and let | |
55 | * the scsi command finish (EH_HANDLED). Otherwise, the qc has | |
56 | * timed out and EH should be invoked. Prevent ata_qc_complete() | |
57 | * from finishing it by setting EH_SCHEDULED and return | |
58 | * EH_NOT_HANDLED. | |
59 | * | |
ad9e2762 TH |
60 | * TODO: kill this function once old EH is gone. |
61 | * | |
ece1d636 TH |
62 | * LOCKING: |
63 | * Called from timer context | |
64 | * | |
65 | * RETURNS: | |
66 | * EH_HANDLED or EH_NOT_HANDLED | |
67 | */ | |
68 | enum scsi_eh_timer_return ata_scsi_timed_out(struct scsi_cmnd *cmd) | |
69 | { | |
70 | struct Scsi_Host *host = cmd->device->host; | |
35bb94b1 | 71 | struct ata_port *ap = ata_shost_to_port(host); |
ece1d636 TH |
72 | unsigned long flags; |
73 | struct ata_queued_cmd *qc; | |
ad9e2762 | 74 | enum scsi_eh_timer_return ret; |
ece1d636 TH |
75 | |
76 | DPRINTK("ENTER\n"); | |
77 | ||
ad9e2762 TH |
78 | if (ap->ops->error_handler) { |
79 | ret = EH_NOT_HANDLED; | |
80 | goto out; | |
81 | } | |
82 | ||
83 | ret = EH_HANDLED; | |
ece1d636 TH |
84 | spin_lock_irqsave(&ap->host_set->lock, flags); |
85 | qc = ata_qc_from_tag(ap, ap->active_tag); | |
86 | if (qc) { | |
87 | WARN_ON(qc->scsicmd != cmd); | |
88 | qc->flags |= ATA_QCFLAG_EH_SCHEDULED; | |
89 | qc->err_mask |= AC_ERR_TIMEOUT; | |
90 | ret = EH_NOT_HANDLED; | |
91 | } | |
92 | spin_unlock_irqrestore(&ap->host_set->lock, flags); | |
93 | ||
ad9e2762 | 94 | out: |
ece1d636 TH |
95 | DPRINTK("EXIT, ret=%d\n", ret); |
96 | return ret; | |
97 | } | |
98 | ||
99 | /** | |
100 | * ata_scsi_error - SCSI layer error handler callback | |
101 | * @host: SCSI host on which error occurred | |
102 | * | |
103 | * Handles SCSI-layer-thrown error events. | |
104 | * | |
105 | * LOCKING: | |
106 | * Inherited from SCSI layer (none, can sleep) | |
107 | * | |
108 | * RETURNS: | |
109 | * Zero. | |
110 | */ | |
381544bb | 111 | void ata_scsi_error(struct Scsi_Host *host) |
ece1d636 | 112 | { |
35bb94b1 | 113 | struct ata_port *ap = ata_shost_to_port(host); |
ad9e2762 TH |
114 | spinlock_t *hs_lock = &ap->host_set->lock; |
115 | int i, repeat_cnt = ATA_EH_MAX_REPEAT; | |
116 | unsigned long flags; | |
ece1d636 TH |
117 | |
118 | DPRINTK("ENTER\n"); | |
119 | ||
ad9e2762 | 120 | /* synchronize with port task */ |
ece1d636 TH |
121 | ata_port_flush_task(ap); |
122 | ||
ad9e2762 TH |
123 | /* synchronize with host_set lock and sort out timeouts */ |
124 | ||
125 | /* For new EH, all qcs are finished in one of three ways - | |
126 | * normal completion, error completion, and SCSI timeout. | |
127 | * Both cmpletions can race against SCSI timeout. When normal | |
128 | * completion wins, the qc never reaches EH. When error | |
129 | * completion wins, the qc has ATA_QCFLAG_FAILED set. | |
130 | * | |
131 | * When SCSI timeout wins, things are a bit more complex. | |
132 | * Normal or error completion can occur after the timeout but | |
133 | * before this point. In such cases, both types of | |
134 | * completions are honored. A scmd is determined to have | |
135 | * timed out iff its associated qc is active and not failed. | |
136 | */ | |
137 | if (ap->ops->error_handler) { | |
138 | struct scsi_cmnd *scmd, *tmp; | |
139 | int nr_timedout = 0; | |
140 | ||
141 | spin_lock_irqsave(hs_lock, flags); | |
142 | ||
143 | list_for_each_entry_safe(scmd, tmp, &host->eh_cmd_q, eh_entry) { | |
144 | struct ata_queued_cmd *qc; | |
145 | ||
146 | for (i = 0; i < ATA_MAX_QUEUE; i++) { | |
147 | qc = __ata_qc_from_tag(ap, i); | |
148 | if (qc->flags & ATA_QCFLAG_ACTIVE && | |
149 | qc->scsicmd == scmd) | |
150 | break; | |
151 | } | |
152 | ||
153 | if (i < ATA_MAX_QUEUE) { | |
154 | /* the scmd has an associated qc */ | |
155 | if (!(qc->flags & ATA_QCFLAG_FAILED)) { | |
156 | /* which hasn't failed yet, timeout */ | |
157 | qc->err_mask |= AC_ERR_TIMEOUT; | |
158 | qc->flags |= ATA_QCFLAG_FAILED; | |
159 | nr_timedout++; | |
160 | } | |
161 | } else { | |
162 | /* Normal completion occurred after | |
163 | * SCSI timeout but before this point. | |
164 | * Successfully complete it. | |
165 | */ | |
166 | scmd->retries = scmd->allowed; | |
167 | scsi_eh_finish_cmd(scmd, &ap->eh_done_q); | |
168 | } | |
169 | } | |
170 | ||
171 | /* If we have timed out qcs. They belong to EH from | |
172 | * this point but the state of the controller is | |
173 | * unknown. Freeze the port to make sure the IRQ | |
174 | * handler doesn't diddle with those qcs. This must | |
175 | * be done atomically w.r.t. setting QCFLAG_FAILED. | |
176 | */ | |
177 | if (nr_timedout) | |
178 | __ata_port_freeze(ap); | |
179 | ||
180 | spin_unlock_irqrestore(hs_lock, flags); | |
181 | } else | |
182 | spin_unlock_wait(hs_lock); | |
183 | ||
184 | repeat: | |
185 | /* invoke error handler */ | |
186 | if (ap->ops->error_handler) { | |
187 | /* clear EH pending */ | |
188 | spin_lock_irqsave(hs_lock, flags); | |
189 | ap->flags &= ~ATA_FLAG_EH_PENDING; | |
190 | spin_unlock_irqrestore(hs_lock, flags); | |
191 | ||
192 | /* invoke EH */ | |
193 | ap->ops->error_handler(ap); | |
194 | ||
195 | /* Exception might have happend after ->error_handler | |
196 | * recovered the port but before this point. Repeat | |
197 | * EH in such case. | |
198 | */ | |
199 | spin_lock_irqsave(hs_lock, flags); | |
200 | ||
201 | if (ap->flags & ATA_FLAG_EH_PENDING) { | |
202 | if (--repeat_cnt) { | |
203 | ata_port_printk(ap, KERN_INFO, | |
204 | "EH pending after completion, " | |
205 | "repeating EH (cnt=%d)\n", repeat_cnt); | |
206 | spin_unlock_irqrestore(hs_lock, flags); | |
207 | goto repeat; | |
208 | } | |
209 | ata_port_printk(ap, KERN_ERR, "EH pending after %d " | |
210 | "tries, giving up\n", ATA_EH_MAX_REPEAT); | |
211 | } | |
212 | ||
213 | /* Clear host_eh_scheduled while holding hs_lock such | |
214 | * that if exception occurs after this point but | |
215 | * before EH completion, SCSI midlayer will | |
216 | * re-initiate EH. | |
217 | */ | |
218 | host->host_eh_scheduled = 0; | |
219 | ||
220 | spin_unlock_irqrestore(hs_lock, flags); | |
221 | } else { | |
222 | WARN_ON(ata_qc_from_tag(ap, ap->active_tag) == NULL); | |
223 | ap->ops->eng_timeout(ap); | |
224 | } | |
ece1d636 | 225 | |
ad9e2762 | 226 | /* finish or retry handled scmd's and clean up */ |
ece1d636 TH |
227 | WARN_ON(host->host_failed || !list_empty(&host->eh_cmd_q)); |
228 | ||
229 | scsi_eh_flush_done_q(&ap->eh_done_q); | |
230 | ||
ad9e2762 TH |
231 | /* clean up */ |
232 | spin_lock_irqsave(hs_lock, flags); | |
233 | ||
234 | if (ap->flags & ATA_FLAG_RECOVERED) | |
235 | ata_port_printk(ap, KERN_INFO, "EH complete\n"); | |
236 | ap->flags &= ~ATA_FLAG_RECOVERED; | |
237 | ||
238 | spin_unlock_irqrestore(hs_lock, flags); | |
239 | ||
ece1d636 | 240 | DPRINTK("EXIT\n"); |
ece1d636 TH |
241 | } |
242 | ||
243 | /** | |
244 | * ata_qc_timeout - Handle timeout of queued command | |
245 | * @qc: Command that timed out | |
246 | * | |
247 | * Some part of the kernel (currently, only the SCSI layer) | |
248 | * has noticed that the active command on port @ap has not | |
249 | * completed after a specified length of time. Handle this | |
250 | * condition by disabling DMA (if necessary) and completing | |
251 | * transactions, with error if necessary. | |
252 | * | |
253 | * This also handles the case of the "lost interrupt", where | |
254 | * for some reason (possibly hardware bug, possibly driver bug) | |
255 | * an interrupt was not delivered to the driver, even though the | |
256 | * transaction completed successfully. | |
257 | * | |
ad9e2762 TH |
258 | * TODO: kill this function once old EH is gone. |
259 | * | |
ece1d636 TH |
260 | * LOCKING: |
261 | * Inherited from SCSI layer (none, can sleep) | |
262 | */ | |
263 | static void ata_qc_timeout(struct ata_queued_cmd *qc) | |
264 | { | |
265 | struct ata_port *ap = qc->ap; | |
266 | struct ata_host_set *host_set = ap->host_set; | |
267 | u8 host_stat = 0, drv_stat; | |
268 | unsigned long flags; | |
269 | ||
270 | DPRINTK("ENTER\n"); | |
271 | ||
272 | ap->hsm_task_state = HSM_ST_IDLE; | |
273 | ||
274 | spin_lock_irqsave(&host_set->lock, flags); | |
275 | ||
276 | switch (qc->tf.protocol) { | |
277 | ||
278 | case ATA_PROT_DMA: | |
279 | case ATA_PROT_ATAPI_DMA: | |
280 | host_stat = ap->ops->bmdma_status(ap); | |
281 | ||
282 | /* before we do anything else, clear DMA-Start bit */ | |
283 | ap->ops->bmdma_stop(qc); | |
284 | ||
285 | /* fall through */ | |
286 | ||
287 | default: | |
288 | ata_altstatus(ap); | |
289 | drv_stat = ata_chk_status(ap); | |
290 | ||
291 | /* ack bmdma irq events */ | |
292 | ap->ops->irq_clear(ap); | |
293 | ||
f15a1daf TH |
294 | ata_dev_printk(qc->dev, KERN_ERR, "command 0x%x timeout, " |
295 | "stat 0x%x host_stat 0x%x\n", | |
296 | qc->tf.command, drv_stat, host_stat); | |
ece1d636 TH |
297 | |
298 | /* complete taskfile transaction */ | |
299 | qc->err_mask |= ac_err_mask(drv_stat); | |
300 | break; | |
301 | } | |
302 | ||
303 | spin_unlock_irqrestore(&host_set->lock, flags); | |
304 | ||
305 | ata_eh_qc_complete(qc); | |
306 | ||
307 | DPRINTK("EXIT\n"); | |
308 | } | |
309 | ||
310 | /** | |
311 | * ata_eng_timeout - Handle timeout of queued command | |
312 | * @ap: Port on which timed-out command is active | |
313 | * | |
314 | * Some part of the kernel (currently, only the SCSI layer) | |
315 | * has noticed that the active command on port @ap has not | |
316 | * completed after a specified length of time. Handle this | |
317 | * condition by disabling DMA (if necessary) and completing | |
318 | * transactions, with error if necessary. | |
319 | * | |
320 | * This also handles the case of the "lost interrupt", where | |
321 | * for some reason (possibly hardware bug, possibly driver bug) | |
322 | * an interrupt was not delivered to the driver, even though the | |
323 | * transaction completed successfully. | |
324 | * | |
ad9e2762 TH |
325 | * TODO: kill this function once old EH is gone. |
326 | * | |
ece1d636 TH |
327 | * LOCKING: |
328 | * Inherited from SCSI layer (none, can sleep) | |
329 | */ | |
330 | void ata_eng_timeout(struct ata_port *ap) | |
331 | { | |
332 | DPRINTK("ENTER\n"); | |
333 | ||
334 | ata_qc_timeout(ata_qc_from_tag(ap, ap->active_tag)); | |
335 | ||
336 | DPRINTK("EXIT\n"); | |
337 | } | |
338 | ||
f686bcb8 TH |
339 | /** |
340 | * ata_qc_schedule_eh - schedule qc for error handling | |
341 | * @qc: command to schedule error handling for | |
342 | * | |
343 | * Schedule error handling for @qc. EH will kick in as soon as | |
344 | * other commands are drained. | |
345 | * | |
346 | * LOCKING: | |
347 | * spin_lock_irqsave(host_set lock) | |
348 | */ | |
349 | void ata_qc_schedule_eh(struct ata_queued_cmd *qc) | |
350 | { | |
351 | struct ata_port *ap = qc->ap; | |
352 | ||
353 | WARN_ON(!ap->ops->error_handler); | |
354 | ||
355 | qc->flags |= ATA_QCFLAG_FAILED; | |
356 | qc->ap->flags |= ATA_FLAG_EH_PENDING; | |
357 | ||
358 | /* The following will fail if timeout has already expired. | |
359 | * ata_scsi_error() takes care of such scmds on EH entry. | |
360 | * Note that ATA_QCFLAG_FAILED is unconditionally set after | |
361 | * this function completes. | |
362 | */ | |
363 | scsi_req_abort_cmd(qc->scsicmd); | |
364 | } | |
365 | ||
7b70fc03 TH |
366 | /** |
367 | * ata_port_schedule_eh - schedule error handling without a qc | |
368 | * @ap: ATA port to schedule EH for | |
369 | * | |
370 | * Schedule error handling for @ap. EH will kick in as soon as | |
371 | * all commands are drained. | |
372 | * | |
373 | * LOCKING: | |
374 | * spin_lock_irqsave(host_set lock) | |
375 | */ | |
376 | void ata_port_schedule_eh(struct ata_port *ap) | |
377 | { | |
378 | WARN_ON(!ap->ops->error_handler); | |
379 | ||
380 | ap->flags |= ATA_FLAG_EH_PENDING; | |
381 | ata_schedule_scsi_eh(ap->host); | |
382 | ||
383 | DPRINTK("port EH scheduled\n"); | |
384 | } | |
385 | ||
386 | /** | |
387 | * ata_port_abort - abort all qc's on the port | |
388 | * @ap: ATA port to abort qc's for | |
389 | * | |
390 | * Abort all active qc's of @ap and schedule EH. | |
391 | * | |
392 | * LOCKING: | |
393 | * spin_lock_irqsave(host_set lock) | |
394 | * | |
395 | * RETURNS: | |
396 | * Number of aborted qc's. | |
397 | */ | |
398 | int ata_port_abort(struct ata_port *ap) | |
399 | { | |
400 | int tag, nr_aborted = 0; | |
401 | ||
402 | WARN_ON(!ap->ops->error_handler); | |
403 | ||
404 | for (tag = 0; tag < ATA_MAX_QUEUE; tag++) { | |
405 | struct ata_queued_cmd *qc = ata_qc_from_tag(ap, tag); | |
406 | ||
407 | if (qc) { | |
408 | qc->flags |= ATA_QCFLAG_FAILED; | |
409 | ata_qc_complete(qc); | |
410 | nr_aborted++; | |
411 | } | |
412 | } | |
413 | ||
414 | if (!nr_aborted) | |
415 | ata_port_schedule_eh(ap); | |
416 | ||
417 | return nr_aborted; | |
418 | } | |
419 | ||
e3180499 TH |
420 | /** |
421 | * __ata_port_freeze - freeze port | |
422 | * @ap: ATA port to freeze | |
423 | * | |
424 | * This function is called when HSM violation or some other | |
425 | * condition disrupts normal operation of the port. Frozen port | |
426 | * is not allowed to perform any operation until the port is | |
427 | * thawed, which usually follows a successful reset. | |
428 | * | |
429 | * ap->ops->freeze() callback can be used for freezing the port | |
430 | * hardware-wise (e.g. mask interrupt and stop DMA engine). If a | |
431 | * port cannot be frozen hardware-wise, the interrupt handler | |
432 | * must ack and clear interrupts unconditionally while the port | |
433 | * is frozen. | |
434 | * | |
435 | * LOCKING: | |
436 | * spin_lock_irqsave(host_set lock) | |
437 | */ | |
438 | static void __ata_port_freeze(struct ata_port *ap) | |
439 | { | |
440 | WARN_ON(!ap->ops->error_handler); | |
441 | ||
442 | if (ap->ops->freeze) | |
443 | ap->ops->freeze(ap); | |
444 | ||
445 | ap->flags |= ATA_FLAG_FROZEN; | |
446 | ||
447 | DPRINTK("ata%u port frozen\n", ap->id); | |
448 | } | |
449 | ||
450 | /** | |
451 | * ata_port_freeze - abort & freeze port | |
452 | * @ap: ATA port to freeze | |
453 | * | |
454 | * Abort and freeze @ap. | |
455 | * | |
456 | * LOCKING: | |
457 | * spin_lock_irqsave(host_set lock) | |
458 | * | |
459 | * RETURNS: | |
460 | * Number of aborted commands. | |
461 | */ | |
462 | int ata_port_freeze(struct ata_port *ap) | |
463 | { | |
464 | int nr_aborted; | |
465 | ||
466 | WARN_ON(!ap->ops->error_handler); | |
467 | ||
468 | nr_aborted = ata_port_abort(ap); | |
469 | __ata_port_freeze(ap); | |
470 | ||
471 | return nr_aborted; | |
472 | } | |
473 | ||
474 | /** | |
475 | * ata_eh_freeze_port - EH helper to freeze port | |
476 | * @ap: ATA port to freeze | |
477 | * | |
478 | * Freeze @ap. | |
479 | * | |
480 | * LOCKING: | |
481 | * None. | |
482 | */ | |
483 | void ata_eh_freeze_port(struct ata_port *ap) | |
484 | { | |
485 | unsigned long flags; | |
486 | ||
487 | if (!ap->ops->error_handler) | |
488 | return; | |
489 | ||
490 | spin_lock_irqsave(&ap->host_set->lock, flags); | |
491 | __ata_port_freeze(ap); | |
492 | spin_unlock_irqrestore(&ap->host_set->lock, flags); | |
493 | } | |
494 | ||
495 | /** | |
496 | * ata_port_thaw_port - EH helper to thaw port | |
497 | * @ap: ATA port to thaw | |
498 | * | |
499 | * Thaw frozen port @ap. | |
500 | * | |
501 | * LOCKING: | |
502 | * None. | |
503 | */ | |
504 | void ata_eh_thaw_port(struct ata_port *ap) | |
505 | { | |
506 | unsigned long flags; | |
507 | ||
508 | if (!ap->ops->error_handler) | |
509 | return; | |
510 | ||
511 | spin_lock_irqsave(&ap->host_set->lock, flags); | |
512 | ||
513 | ap->flags &= ~ATA_FLAG_FROZEN; | |
514 | ||
515 | if (ap->ops->thaw) | |
516 | ap->ops->thaw(ap); | |
517 | ||
518 | spin_unlock_irqrestore(&ap->host_set->lock, flags); | |
519 | ||
520 | DPRINTK("ata%u port thawed\n", ap->id); | |
521 | } | |
522 | ||
ece1d636 TH |
523 | static void ata_eh_scsidone(struct scsi_cmnd *scmd) |
524 | { | |
525 | /* nada */ | |
526 | } | |
527 | ||
528 | static void __ata_eh_qc_complete(struct ata_queued_cmd *qc) | |
529 | { | |
530 | struct ata_port *ap = qc->ap; | |
531 | struct scsi_cmnd *scmd = qc->scsicmd; | |
532 | unsigned long flags; | |
533 | ||
534 | spin_lock_irqsave(&ap->host_set->lock, flags); | |
535 | qc->scsidone = ata_eh_scsidone; | |
536 | __ata_qc_complete(qc); | |
537 | WARN_ON(ata_tag_valid(qc->tag)); | |
538 | spin_unlock_irqrestore(&ap->host_set->lock, flags); | |
539 | ||
540 | scsi_eh_finish_cmd(scmd, &ap->eh_done_q); | |
541 | } | |
542 | ||
543 | /** | |
544 | * ata_eh_qc_complete - Complete an active ATA command from EH | |
545 | * @qc: Command to complete | |
546 | * | |
547 | * Indicate to the mid and upper layers that an ATA command has | |
548 | * completed. To be used from EH. | |
549 | */ | |
550 | void ata_eh_qc_complete(struct ata_queued_cmd *qc) | |
551 | { | |
552 | struct scsi_cmnd *scmd = qc->scsicmd; | |
553 | scmd->retries = scmd->allowed; | |
554 | __ata_eh_qc_complete(qc); | |
555 | } | |
556 | ||
557 | /** | |
558 | * ata_eh_qc_retry - Tell midlayer to retry an ATA command after EH | |
559 | * @qc: Command to retry | |
560 | * | |
561 | * Indicate to the mid and upper layers that an ATA command | |
562 | * should be retried. To be used from EH. | |
563 | * | |
564 | * SCSI midlayer limits the number of retries to scmd->allowed. | |
565 | * scmd->retries is decremented for commands which get retried | |
566 | * due to unrelated failures (qc->err_mask is zero). | |
567 | */ | |
568 | void ata_eh_qc_retry(struct ata_queued_cmd *qc) | |
569 | { | |
570 | struct scsi_cmnd *scmd = qc->scsicmd; | |
571 | if (!qc->err_mask && scmd->retries) | |
572 | scmd->retries--; | |
573 | __ata_eh_qc_complete(qc); | |
574 | } |