[SCSI] allow sleeping in ->eh_host_reset_handler()
[deliverable/linux.git] / drivers / scsi / scsi_error.c
CommitLineData
1da177e4
LT
1/*
2 * scsi_error.c Copyright (C) 1997 Eric Youngdale
3 *
4 * SCSI error/timeout handling
5 * Initial versions: Eric Youngdale. Based upon conversations with
6 * Leonard Zubkoff and David Miller at Linux Expo,
7 * ideas originating from all over the place.
8 *
9 * Restructured scsi_unjam_host and associated functions.
10 * September 04, 2002 Mike Anderson (andmike@us.ibm.com)
11 *
12 * Forward port of Russell King's (rmk@arm.linux.org.uk) changes and
13 * minor cleanups.
14 * September 30, 2002 Mike Anderson (andmike@us.ibm.com)
15 */
16
17#include <linux/module.h>
18#include <linux/sched.h>
19#include <linux/timer.h>
20#include <linux/string.h>
21#include <linux/slab.h>
22#include <linux/kernel.h>
23#include <linux/interrupt.h>
24#include <linux/blkdev.h>
25#include <linux/delay.h>
26
27#include <scsi/scsi.h>
28#include <scsi/scsi_dbg.h>
29#include <scsi/scsi_device.h>
30#include <scsi/scsi_eh.h>
31#include <scsi/scsi_host.h>
32#include <scsi/scsi_ioctl.h>
33#include <scsi/scsi_request.h>
34
35#include "scsi_priv.h"
36#include "scsi_logging.h"
37
38#define SENSE_TIMEOUT (10*HZ)
39#define START_UNIT_TIMEOUT (30*HZ)
40
41/*
42 * These should *probably* be handled by the host itself.
43 * Since it is allowed to sleep, it probably should.
44 */
45#define BUS_RESET_SETTLE_TIME (10)
46#define HOST_RESET_SETTLE_TIME (10)
47
48/* called with shost->host_lock held */
49void scsi_eh_wakeup(struct Scsi_Host *shost)
50{
51 if (shost->host_busy == shost->host_failed) {
52 up(shost->eh_wait);
53 SCSI_LOG_ERROR_RECOVERY(5,
54 printk("Waking error handler thread\n"));
55 }
56}
57
58/**
59 * scsi_eh_scmd_add - add scsi cmd to error handling.
60 * @scmd: scmd to run eh on.
61 * @eh_flag: optional SCSI_EH flag.
62 *
63 * Return value:
64 * 0 on failure.
65 **/
66int scsi_eh_scmd_add(struct scsi_cmnd *scmd, int eh_flag)
67{
68 struct Scsi_Host *shost = scmd->device->host;
69 unsigned long flags;
70
71 if (shost->eh_wait == NULL)
72 return 0;
73
74 spin_lock_irqsave(shost->host_lock, flags);
75
76 scsi_eh_eflags_set(scmd, eh_flag);
77 /*
78 * FIXME: Can we stop setting owner and state.
79 */
80 scmd->owner = SCSI_OWNER_ERROR_HANDLER;
81 scmd->state = SCSI_STATE_FAILED;
1da177e4
LT
82 list_add_tail(&scmd->eh_entry, &shost->eh_cmd_q);
83 set_bit(SHOST_RECOVERY, &shost->shost_state);
84 shost->host_failed++;
85 scsi_eh_wakeup(shost);
86 spin_unlock_irqrestore(shost->host_lock, flags);
87 return 1;
88}
89
90/**
91 * scsi_add_timer - Start timeout timer for a single scsi command.
92 * @scmd: scsi command that is about to start running.
93 * @timeout: amount of time to allow this command to run.
94 * @complete: timeout function to call if timer isn't canceled.
95 *
96 * Notes:
97 * This should be turned into an inline function. Each scsi command
98 * has its own timer, and as it is added to the queue, we set up the
99 * timer. When the command completes, we cancel the timer.
100 **/
101void scsi_add_timer(struct scsi_cmnd *scmd, int timeout,
102 void (*complete)(struct scsi_cmnd *))
103{
104
105 /*
106 * If the clock was already running for this command, then
107 * first delete the timer. The timer handling code gets rather
108 * confused if we don't do this.
109 */
110 if (scmd->eh_timeout.function)
111 del_timer(&scmd->eh_timeout);
112
113 scmd->eh_timeout.data = (unsigned long)scmd;
114 scmd->eh_timeout.expires = jiffies + timeout;
115 scmd->eh_timeout.function = (void (*)(unsigned long)) complete;
116
117 SCSI_LOG_ERROR_RECOVERY(5, printk("%s: scmd: %p, time:"
118 " %d, (%p)\n", __FUNCTION__,
119 scmd, timeout, complete));
120
121 add_timer(&scmd->eh_timeout);
122}
123EXPORT_SYMBOL(scsi_add_timer);
124
125/**
126 * scsi_delete_timer - Delete/cancel timer for a given function.
127 * @scmd: Cmd that we are canceling timer for
128 *
129 * Notes:
130 * This should be turned into an inline function.
131 *
132 * Return value:
133 * 1 if we were able to detach the timer. 0 if we blew it, and the
134 * timer function has already started to run.
135 **/
136int scsi_delete_timer(struct scsi_cmnd *scmd)
137{
138 int rtn;
139
140 rtn = del_timer(&scmd->eh_timeout);
141
142 SCSI_LOG_ERROR_RECOVERY(5, printk("%s: scmd: %p,"
143 " rtn: %d\n", __FUNCTION__,
144 scmd, rtn));
145
146 scmd->eh_timeout.data = (unsigned long)NULL;
147 scmd->eh_timeout.function = NULL;
148
149 return rtn;
150}
151EXPORT_SYMBOL(scsi_delete_timer);
152
153/**
154 * scsi_times_out - Timeout function for normal scsi commands.
155 * @scmd: Cmd that is timing out.
156 *
157 * Notes:
158 * We do not need to lock this. There is the potential for a race
159 * only in that the normal completion handling might run, but if the
160 * normal completion function determines that the timer has already
161 * fired, then it mustn't do anything.
162 **/
163void scsi_times_out(struct scsi_cmnd *scmd)
164{
165 scsi_log_completion(scmd, TIMEOUT_ERROR);
166
167 if (scmd->device->host->hostt->eh_timed_out)
168 switch (scmd->device->host->hostt->eh_timed_out(scmd)) {
169 case EH_HANDLED:
170 __scsi_done(scmd);
171 return;
172 case EH_RESET_TIMER:
173 /* This allows a single retry even of a command
174 * with allowed == 0 */
175 if (scmd->retries++ > scmd->allowed)
176 break;
177 scsi_add_timer(scmd, scmd->timeout_per_command,
178 scsi_times_out);
179 return;
180 case EH_NOT_HANDLED:
181 break;
182 }
183
184 if (unlikely(!scsi_eh_scmd_add(scmd, SCSI_EH_CANCEL_CMD))) {
185 panic("Error handler thread not present at %p %p %s %d",
186 scmd, scmd->device->host, __FILE__, __LINE__);
187 }
188}
189
190/**
191 * scsi_block_when_processing_errors - Prevent cmds from being queued.
192 * @sdev: Device on which we are performing recovery.
193 *
194 * Description:
195 * We block until the host is out of error recovery, and then check to
196 * see whether the host or the device is offline.
197 *
198 * Return value:
199 * 0 when dev was taken offline by error recovery. 1 OK to proceed.
200 **/
201int scsi_block_when_processing_errors(struct scsi_device *sdev)
202{
203 int online;
204
205 wait_event(sdev->host->host_wait, (!test_bit(SHOST_RECOVERY, &sdev->host->shost_state)));
206
207 online = scsi_device_online(sdev);
208
209 SCSI_LOG_ERROR_RECOVERY(5, printk("%s: rtn: %d\n", __FUNCTION__,
210 online));
211
212 return online;
213}
214EXPORT_SYMBOL(scsi_block_when_processing_errors);
215
216#ifdef CONFIG_SCSI_LOGGING
217/**
218 * scsi_eh_prt_fail_stats - Log info on failures.
219 * @shost: scsi host being recovered.
220 * @work_q: Queue of scsi cmds to process.
221 **/
222static inline void scsi_eh_prt_fail_stats(struct Scsi_Host *shost,
223 struct list_head *work_q)
224{
225 struct scsi_cmnd *scmd;
226 struct scsi_device *sdev;
227 int total_failures = 0;
228 int cmd_failed = 0;
229 int cmd_cancel = 0;
230 int devices_failed = 0;
231
232 shost_for_each_device(sdev, shost) {
233 list_for_each_entry(scmd, work_q, eh_entry) {
234 if (scmd->device == sdev) {
235 ++total_failures;
236 if (scsi_eh_eflags_chk(scmd,
237 SCSI_EH_CANCEL_CMD))
238 ++cmd_cancel;
239 else
240 ++cmd_failed;
241 }
242 }
243
244 if (cmd_cancel || cmd_failed) {
245 SCSI_LOG_ERROR_RECOVERY(3,
246 printk("%s: %d:%d:%d:%d cmds failed: %d,"
247 " cancel: %d\n",
248 __FUNCTION__, shost->host_no,
249 sdev->channel, sdev->id, sdev->lun,
250 cmd_failed, cmd_cancel));
251 cmd_cancel = 0;
252 cmd_failed = 0;
253 ++devices_failed;
254 }
255 }
256
257 SCSI_LOG_ERROR_RECOVERY(2, printk("Total of %d commands on %d"
258 " devices require eh work\n",
259 total_failures, devices_failed));
260}
261#endif
262
263/**
264 * scsi_check_sense - Examine scsi cmd sense
265 * @scmd: Cmd to have sense checked.
266 *
267 * Return value:
268 * SUCCESS or FAILED or NEEDS_RETRY
269 *
270 * Notes:
271 * When a deferred error is detected the current command has
272 * not been executed and needs retrying.
273 **/
274static int scsi_check_sense(struct scsi_cmnd *scmd)
275{
276 struct scsi_sense_hdr sshdr;
277
278 if (! scsi_command_normalize_sense(scmd, &sshdr))
279 return FAILED; /* no valid sense data */
280
281 if (scsi_sense_is_deferred(&sshdr))
282 return NEEDS_RETRY;
283
284 /*
285 * Previous logic looked for FILEMARK, EOM or ILI which are
286 * mainly associated with tapes and returned SUCCESS.
287 */
288 if (sshdr.response_code == 0x70) {
289 /* fixed format */
290 if (scmd->sense_buffer[2] & 0xe0)
291 return SUCCESS;
292 } else {
293 /*
294 * descriptor format: look for "stream commands sense data
295 * descriptor" (see SSC-3). Assume single sense data
296 * descriptor. Ignore ILI from SBC-2 READ LONG and WRITE LONG.
297 */
298 if ((sshdr.additional_length > 3) &&
299 (scmd->sense_buffer[8] == 0x4) &&
300 (scmd->sense_buffer[11] & 0xe0))
301 return SUCCESS;
302 }
303
304 switch (sshdr.sense_key) {
305 case NO_SENSE:
306 return SUCCESS;
307 case RECOVERED_ERROR:
308 return /* soft_error */ SUCCESS;
309
310 case ABORTED_COMMAND:
311 return NEEDS_RETRY;
312 case NOT_READY:
313 case UNIT_ATTENTION:
314 /*
315 * if we are expecting a cc/ua because of a bus reset that we
316 * performed, treat this just as a retry. otherwise this is
317 * information that we should pass up to the upper-level driver
318 * so that we can deal with it there.
319 */
320 if (scmd->device->expecting_cc_ua) {
321 scmd->device->expecting_cc_ua = 0;
322 return NEEDS_RETRY;
323 }
324 /*
325 * if the device is in the process of becoming ready, we
326 * should retry.
327 */
328 if ((sshdr.asc == 0x04) && (sshdr.ascq == 0x01))
329 return NEEDS_RETRY;
330 /*
331 * if the device is not started, we need to wake
332 * the error handler to start the motor
333 */
334 if (scmd->device->allow_restart &&
335 (sshdr.asc == 0x04) && (sshdr.ascq == 0x02))
336 return FAILED;
337 return SUCCESS;
338
339 /* these three are not supported */
340 case COPY_ABORTED:
341 case VOLUME_OVERFLOW:
342 case MISCOMPARE:
343 return SUCCESS;
344
345 case MEDIUM_ERROR:
346 return NEEDS_RETRY;
347
348 case HARDWARE_ERROR:
349 if (scmd->device->retry_hwerror)
350 return NEEDS_RETRY;
351 else
352 return SUCCESS;
353
354 case ILLEGAL_REQUEST:
355 case BLANK_CHECK:
356 case DATA_PROTECT:
357 default:
358 return SUCCESS;
359 }
360}
361
362/**
363 * scsi_eh_completed_normally - Disposition a eh cmd on return from LLD.
364 * @scmd: SCSI cmd to examine.
365 *
366 * Notes:
367 * This is *only* called when we are examining the status of commands
368 * queued during error recovery. the main difference here is that we
369 * don't allow for the possibility of retries here, and we are a lot
370 * more restrictive about what we consider acceptable.
371 **/
372static int scsi_eh_completed_normally(struct scsi_cmnd *scmd)
373{
374 /*
375 * first check the host byte, to see if there is anything in there
376 * that would indicate what we need to do.
377 */
378 if (host_byte(scmd->result) == DID_RESET) {
379 /*
380 * rats. we are already in the error handler, so we now
381 * get to try and figure out what to do next. if the sense
382 * is valid, we have a pretty good idea of what to do.
383 * if not, we mark it as FAILED.
384 */
385 return scsi_check_sense(scmd);
386 }
387 if (host_byte(scmd->result) != DID_OK)
388 return FAILED;
389
390 /*
391 * next, check the message byte.
392 */
393 if (msg_byte(scmd->result) != COMMAND_COMPLETE)
394 return FAILED;
395
396 /*
397 * now, check the status byte to see if this indicates
398 * anything special.
399 */
400 switch (status_byte(scmd->result)) {
401 case GOOD:
402 case COMMAND_TERMINATED:
403 return SUCCESS;
404 case CHECK_CONDITION:
405 return scsi_check_sense(scmd);
406 case CONDITION_GOOD:
407 case INTERMEDIATE_GOOD:
408 case INTERMEDIATE_C_GOOD:
409 /*
410 * who knows? FIXME(eric)
411 */
412 return SUCCESS;
413 case BUSY:
414 case QUEUE_FULL:
415 case RESERVATION_CONFLICT:
416 default:
417 return FAILED;
418 }
419 return FAILED;
420}
421
422/**
423 * scsi_eh_times_out - timeout function for error handling.
424 * @scmd: Cmd that is timing out.
425 *
426 * Notes:
427 * During error handling, the kernel thread will be sleeping waiting
428 * for some action to complete on the device. our only job is to
429 * record that it timed out, and to wake up the thread.
430 **/
431static void scsi_eh_times_out(struct scsi_cmnd *scmd)
432{
433 scsi_eh_eflags_set(scmd, SCSI_EH_REC_TIMEOUT);
434 SCSI_LOG_ERROR_RECOVERY(3, printk("%s: scmd:%p\n", __FUNCTION__,
435 scmd));
436
5b8ef842 437 up(scmd->device->host->eh_action);
1da177e4
LT
438}
439
440/**
441 * scsi_eh_done - Completion function for error handling.
442 * @scmd: Cmd that is done.
443 **/
444static void scsi_eh_done(struct scsi_cmnd *scmd)
445{
446 /*
447 * if the timeout handler is already running, then just set the
448 * flag which says we finished late, and return. we have no
449 * way of stopping the timeout handler from running, so we must
450 * always defer to it.
451 */
452 if (del_timer(&scmd->eh_timeout)) {
453 scmd->request->rq_status = RQ_SCSI_DONE;
454 scmd->owner = SCSI_OWNER_ERROR_HANDLER;
455
456 SCSI_LOG_ERROR_RECOVERY(3, printk("%s scmd: %p result: %x\n",
457 __FUNCTION__, scmd, scmd->result));
458
5b8ef842 459 up(scmd->device->host->eh_action);
1da177e4
LT
460 }
461}
462
463/**
464 * scsi_send_eh_cmnd - send a cmd to a device as part of error recovery.
465 * @scmd: SCSI Cmd to send.
466 * @timeout: Timeout for cmd.
467 *
468 * Notes:
469 * The initialization of the structures is quite a bit different in
470 * this case, and furthermore, there is a different completion handler
471 * vs scsi_dispatch_cmd.
472 * Return value:
473 * SUCCESS or FAILED or NEEDS_RETRY
474 **/
475static int scsi_send_eh_cmnd(struct scsi_cmnd *scmd, int timeout)
476{
f59114b7 477 struct scsi_device *sdev = scmd->device;
478 struct Scsi_Host *shost = sdev->host;
1da177e4
LT
479 DECLARE_MUTEX_LOCKED(sem);
480 unsigned long flags;
481 int rtn = SUCCESS;
482
483 /*
484 * we will use a queued command if possible, otherwise we will
485 * emulate the queuing and calling of completion function ourselves.
486 */
487 scmd->owner = SCSI_OWNER_LOWLEVEL;
488
f59114b7 489 if (sdev->scsi_level <= SCSI_2)
1da177e4 490 scmd->cmnd[1] = (scmd->cmnd[1] & 0x1f) |
f59114b7 491 (sdev->lun << 5 & 0xe0);
1da177e4
LT
492
493 scsi_add_timer(scmd, timeout, scsi_eh_times_out);
494
495 /*
496 * set up the semaphore so we wait for the command to complete.
497 */
f59114b7 498 shost->eh_action = &sem;
1da177e4
LT
499 scmd->request->rq_status = RQ_SCSI_BUSY;
500
f59114b7 501 spin_lock_irqsave(shost->host_lock, flags);
1da177e4 502 scsi_log_send(scmd);
f59114b7 503 shost->hostt->queuecommand(scmd, scsi_eh_done);
504 spin_unlock_irqrestore(shost->host_lock, flags);
1da177e4
LT
505
506 down(&sem);
507 scsi_log_completion(scmd, SUCCESS);
508
f59114b7 509 shost->eh_action = NULL;
1da177e4
LT
510
511 /*
512 * see if timeout. if so, tell the host to forget about it.
513 * in other words, we don't want a callback any more.
514 */
515 if (scsi_eh_eflags_chk(scmd, SCSI_EH_REC_TIMEOUT)) {
516 scsi_eh_eflags_clr(scmd, SCSI_EH_REC_TIMEOUT);
517 scmd->owner = SCSI_OWNER_LOWLEVEL;
518
519 /*
520 * as far as the low level driver is
521 * concerned, this command is still active, so
522 * we must give the low level driver a chance
523 * to abort it. (db)
524 *
525 * FIXME(eric) - we are not tracking whether we could
526 * abort a timed out command or not. not sure how
527 * we should treat them differently anyways.
528 */
f59114b7 529 if (shost->hostt->eh_abort_handler)
530 shost->hostt->eh_abort_handler(scmd);
1da177e4
LT
531
532 scmd->request->rq_status = RQ_SCSI_DONE;
533 scmd->owner = SCSI_OWNER_ERROR_HANDLER;
534
535 rtn = FAILED;
536 }
537
538 SCSI_LOG_ERROR_RECOVERY(3, printk("%s: scmd: %p, rtn:%x\n",
539 __FUNCTION__, scmd, rtn));
540
541 /*
542 * now examine the actual status codes to see whether the command
543 * actually did complete normally.
544 */
545 if (rtn == SUCCESS) {
546 rtn = scsi_eh_completed_normally(scmd);
547 SCSI_LOG_ERROR_RECOVERY(3,
548 printk("%s: scsi_eh_completed_normally %x\n",
549 __FUNCTION__, rtn));
550 switch (rtn) {
551 case SUCCESS:
552 case NEEDS_RETRY:
553 case FAILED:
554 break;
555 default:
556 rtn = FAILED;
557 break;
558 }
559 }
560
561 return rtn;
562}
563
564/**
565 * scsi_request_sense - Request sense data from a particular target.
566 * @scmd: SCSI cmd for request sense.
567 *
568 * Notes:
569 * Some hosts automatically obtain this information, others require
570 * that we obtain it on our own. This function will *not* return until
571 * the command either times out, or it completes.
572 **/
573static int scsi_request_sense(struct scsi_cmnd *scmd)
574{
575 static unsigned char generic_sense[6] =
576 {REQUEST_SENSE, 0, 0, 0, 252, 0};
577 unsigned char *scsi_result;
578 int saved_result;
579 int rtn;
580
581 memcpy(scmd->cmnd, generic_sense, sizeof(generic_sense));
582
bc86120a 583 scsi_result = kmalloc(252, GFP_ATOMIC | ((scmd->device->host->hostt->unchecked_isa_dma) ? __GFP_DMA : 0));
1da177e4
LT
584
585
586 if (unlikely(!scsi_result)) {
587 printk(KERN_ERR "%s: cannot allocate scsi_result.\n",
588 __FUNCTION__);
589 return FAILED;
590 }
591
592 /*
593 * zero the sense buffer. some host adapters automatically always
594 * request sense, so it is not a good idea that
595 * scmd->request_buffer and scmd->sense_buffer point to the same
596 * address (db). 0 is not a valid sense code.
597 */
598 memset(scmd->sense_buffer, 0, sizeof(scmd->sense_buffer));
599 memset(scsi_result, 0, 252);
600
601 saved_result = scmd->result;
602 scmd->request_buffer = scsi_result;
603 scmd->request_bufflen = 252;
604 scmd->use_sg = 0;
605 scmd->cmd_len = COMMAND_SIZE(scmd->cmnd[0]);
606 scmd->sc_data_direction = DMA_FROM_DEVICE;
607 scmd->underflow = 0;
608
609 rtn = scsi_send_eh_cmnd(scmd, SENSE_TIMEOUT);
610
611 /* last chance to have valid sense data */
612 if(!SCSI_SENSE_VALID(scmd)) {
613 memcpy(scmd->sense_buffer, scmd->request_buffer,
614 sizeof(scmd->sense_buffer));
615 }
616
617 kfree(scsi_result);
618
619 /*
620 * when we eventually call scsi_finish, we really wish to complete
621 * the original request, so let's restore the original data. (db)
622 */
623 scsi_setup_cmd_retry(scmd);
624 scmd->result = saved_result;
625 return rtn;
626}
627
628/**
629 * scsi_eh_finish_cmd - Handle a cmd that eh is finished with.
630 * @scmd: Original SCSI cmd that eh has finished.
631 * @done_q: Queue for processed commands.
632 *
633 * Notes:
634 * We don't want to use the normal command completion while we are are
635 * still handling errors - it may cause other commands to be queued,
636 * and that would disturb what we are doing. thus we really want to
637 * keep a list of pending commands for final completion, and once we
638 * are ready to leave error handling we handle completion for real.
639 **/
640static void scsi_eh_finish_cmd(struct scsi_cmnd *scmd,
641 struct list_head *done_q)
642{
643 scmd->device->host->host_failed--;
644 scmd->state = SCSI_STATE_BHQUEUE;
645
646 scsi_eh_eflags_clr_all(scmd);
647
648 /*
649 * set this back so that the upper level can correctly free up
650 * things.
651 */
652 scsi_setup_cmd_retry(scmd);
653 list_move_tail(&scmd->eh_entry, done_q);
654}
655
656/**
657 * scsi_eh_get_sense - Get device sense data.
658 * @work_q: Queue of commands to process.
659 * @done_q: Queue of proccessed commands..
660 *
661 * Description:
662 * See if we need to request sense information. if so, then get it
663 * now, so we have a better idea of what to do.
664 *
665 * Notes:
666 * This has the unfortunate side effect that if a shost adapter does
667 * not automatically request sense information, that we end up shutting
668 * it down before we request it.
669 *
670 * All drivers should request sense information internally these days,
671 * so for now all I have to say is tough noogies if you end up in here.
672 *
673 * XXX: Long term this code should go away, but that needs an audit of
674 * all LLDDs first.
675 **/
676static int scsi_eh_get_sense(struct list_head *work_q,
677 struct list_head *done_q)
678{
679 struct list_head *lh, *lh_sf;
680 struct scsi_cmnd *scmd;
681 int rtn;
682
683 list_for_each_safe(lh, lh_sf, work_q) {
684 scmd = list_entry(lh, struct scsi_cmnd, eh_entry);
685 if (scsi_eh_eflags_chk(scmd, SCSI_EH_CANCEL_CMD) ||
686 SCSI_SENSE_VALID(scmd))
687 continue;
688
689 SCSI_LOG_ERROR_RECOVERY(2, printk("%s: requesting sense"
690 " for id: %d\n",
691 current->comm,
692 scmd->device->id));
693 rtn = scsi_request_sense(scmd);
694 if (rtn != SUCCESS)
695 continue;
696
697 SCSI_LOG_ERROR_RECOVERY(3, printk("sense requested for %p"
698 " result %x\n", scmd,
699 scmd->result));
700 SCSI_LOG_ERROR_RECOVERY(3, scsi_print_sense("bh", scmd));
701
702 rtn = scsi_decide_disposition(scmd);
703
704 /*
705 * if the result was normal, then just pass it along to the
706 * upper level.
707 */
708 if (rtn == SUCCESS)
709 /* we don't want this command reissued, just
710 * finished with the sense data, so set
711 * retries to the max allowed to ensure it
712 * won't get reissued */
713 scmd->retries = scmd->allowed;
714 else if (rtn != NEEDS_RETRY)
715 continue;
716
717 scsi_eh_finish_cmd(scmd, done_q);
718 }
719
720 return list_empty(work_q);
721}
722
723/**
724 * scsi_try_to_abort_cmd - Ask host to abort a running command.
725 * @scmd: SCSI cmd to abort from Lower Level.
726 *
727 * Notes:
728 * This function will not return until the user's completion function
729 * has been called. there is no timeout on this operation. if the
730 * author of the low-level driver wishes this operation to be timed,
731 * they can provide this facility themselves. helper functions in
732 * scsi_error.c can be supplied to make this easier to do.
733 **/
734static int scsi_try_to_abort_cmd(struct scsi_cmnd *scmd)
735{
1da177e4 736 if (!scmd->device->host->hostt->eh_abort_handler)
8fa728a2 737 return FAILED;
1da177e4
LT
738
739 /*
740 * scsi_done was called just after the command timed out and before
741 * we had a chance to process it. (db)
742 */
743 if (scmd->serial_number == 0)
744 return SUCCESS;
745
746 scmd->owner = SCSI_OWNER_LOWLEVEL;
747
8fa728a2 748 return scmd->device->host->hostt->eh_abort_handler(scmd);
1da177e4
LT
749}
750
751/**
752 * scsi_eh_tur - Send TUR to device.
753 * @scmd: Scsi cmd to send TUR
754 *
755 * Return value:
756 * 0 - Device is ready. 1 - Device NOT ready.
757 **/
758static int scsi_eh_tur(struct scsi_cmnd *scmd)
759{
760 static unsigned char tur_command[6] = {TEST_UNIT_READY, 0, 0, 0, 0, 0};
761 int retry_cnt = 1, rtn;
793698ce 762 int saved_result;
1da177e4
LT
763
764retry_tur:
765 memcpy(scmd->cmnd, tur_command, sizeof(tur_command));
766
767 /*
768 * zero the sense buffer. the scsi spec mandates that any
769 * untransferred sense data should be interpreted as being zero.
770 */
771 memset(scmd->sense_buffer, 0, sizeof(scmd->sense_buffer));
772
793698ce 773 saved_result = scmd->result;
1da177e4
LT
774 scmd->request_buffer = NULL;
775 scmd->request_bufflen = 0;
776 scmd->use_sg = 0;
777 scmd->cmd_len = COMMAND_SIZE(scmd->cmnd[0]);
778 scmd->underflow = 0;
779 scmd->sc_data_direction = DMA_NONE;
780
781 rtn = scsi_send_eh_cmnd(scmd, SENSE_TIMEOUT);
782
783 /*
784 * when we eventually call scsi_finish, we really wish to complete
785 * the original request, so let's restore the original data. (db)
786 */
787 scsi_setup_cmd_retry(scmd);
793698ce 788 scmd->result = saved_result;
1da177e4
LT
789
790 /*
791 * hey, we are done. let's look to see what happened.
792 */
793 SCSI_LOG_ERROR_RECOVERY(3, printk("%s: scmd %p rtn %x\n",
794 __FUNCTION__, scmd, rtn));
795 if (rtn == SUCCESS)
796 return 0;
797 else if (rtn == NEEDS_RETRY)
798 if (retry_cnt--)
799 goto retry_tur;
800 return 1;
801}
802
803/**
804 * scsi_eh_abort_cmds - abort canceled commands.
805 * @shost: scsi host being recovered.
806 * @eh_done_q: list_head for processed commands.
807 *
808 * Decription:
809 * Try and see whether or not it makes sense to try and abort the
810 * running command. this only works out to be the case if we have one
811 * command that has timed out. if the command simply failed, it makes
812 * no sense to try and abort the command, since as far as the shost
813 * adapter is concerned, it isn't running.
814 **/
815static int scsi_eh_abort_cmds(struct list_head *work_q,
816 struct list_head *done_q)
817{
818 struct list_head *lh, *lh_sf;
819 struct scsi_cmnd *scmd;
820 int rtn;
821
822 list_for_each_safe(lh, lh_sf, work_q) {
823 scmd = list_entry(lh, struct scsi_cmnd, eh_entry);
824 if (!scsi_eh_eflags_chk(scmd, SCSI_EH_CANCEL_CMD))
825 continue;
826 SCSI_LOG_ERROR_RECOVERY(3, printk("%s: aborting cmd:"
827 "0x%p\n", current->comm,
828 scmd));
829 rtn = scsi_try_to_abort_cmd(scmd);
830 if (rtn == SUCCESS) {
831 scsi_eh_eflags_clr(scmd, SCSI_EH_CANCEL_CMD);
832 if (!scsi_device_online(scmd->device) ||
833 !scsi_eh_tur(scmd)) {
834 scsi_eh_finish_cmd(scmd, done_q);
835 }
836
837 } else
838 SCSI_LOG_ERROR_RECOVERY(3, printk("%s: aborting"
839 " cmd failed:"
840 "0x%p\n",
841 current->comm,
842 scmd));
843 }
844
845 return list_empty(work_q);
846}
847
848/**
849 * scsi_try_bus_device_reset - Ask host to perform a BDR on a dev
850 * @scmd: SCSI cmd used to send BDR
851 *
852 * Notes:
853 * There is no timeout for this operation. if this operation is
854 * unreliable for a given host, then the host itself needs to put a
855 * timer on it, and set the host back to a consistent state prior to
856 * returning.
857 **/
858static int scsi_try_bus_device_reset(struct scsi_cmnd *scmd)
859{
94d0e7b8 860 int rtn;
1da177e4
LT
861
862 if (!scmd->device->host->hostt->eh_device_reset_handler)
94d0e7b8 863 return FAILED;
1da177e4
LT
864
865 scmd->owner = SCSI_OWNER_LOWLEVEL;
866
1da177e4 867 rtn = scmd->device->host->hostt->eh_device_reset_handler(scmd);
1da177e4
LT
868
869 if (rtn == SUCCESS) {
870 scmd->device->was_reset = 1;
871 scmd->device->expecting_cc_ua = 1;
872 }
873
874 return rtn;
875}
876
877/**
878 * scsi_eh_try_stu - Send START_UNIT to device.
879 * @scmd: Scsi cmd to send START_UNIT
880 *
881 * Return value:
882 * 0 - Device is ready. 1 - Device NOT ready.
883 **/
884static int scsi_eh_try_stu(struct scsi_cmnd *scmd)
885{
886 static unsigned char stu_command[6] = {START_STOP, 0, 0, 0, 1, 0};
887 int rtn;
793698ce 888 int saved_result;
1da177e4
LT
889
890 if (!scmd->device->allow_restart)
891 return 1;
892
893 memcpy(scmd->cmnd, stu_command, sizeof(stu_command));
894
895 /*
896 * zero the sense buffer. the scsi spec mandates that any
897 * untransferred sense data should be interpreted as being zero.
898 */
899 memset(scmd->sense_buffer, 0, sizeof(scmd->sense_buffer));
900
793698ce 901 saved_result = scmd->result;
1da177e4
LT
902 scmd->request_buffer = NULL;
903 scmd->request_bufflen = 0;
904 scmd->use_sg = 0;
905 scmd->cmd_len = COMMAND_SIZE(scmd->cmnd[0]);
906 scmd->underflow = 0;
907 scmd->sc_data_direction = DMA_NONE;
908
909 rtn = scsi_send_eh_cmnd(scmd, START_UNIT_TIMEOUT);
910
911 /*
912 * when we eventually call scsi_finish, we really wish to complete
913 * the original request, so let's restore the original data. (db)
914 */
915 scsi_setup_cmd_retry(scmd);
793698ce 916 scmd->result = saved_result;
1da177e4
LT
917
918 /*
919 * hey, we are done. let's look to see what happened.
920 */
921 SCSI_LOG_ERROR_RECOVERY(3, printk("%s: scmd %p rtn %x\n",
922 __FUNCTION__, scmd, rtn));
923 if (rtn == SUCCESS)
924 return 0;
925 return 1;
926}
927
928 /**
929 * scsi_eh_stu - send START_UNIT if needed
930 * @shost: scsi host being recovered.
931 * @eh_done_q: list_head for processed commands.
932 *
933 * Notes:
934 * If commands are failing due to not ready, initializing command required,
935 * try revalidating the device, which will end up sending a start unit.
936 **/
937static int scsi_eh_stu(struct Scsi_Host *shost,
938 struct list_head *work_q,
939 struct list_head *done_q)
940{
941 struct list_head *lh, *lh_sf;
942 struct scsi_cmnd *scmd, *stu_scmd;
943 struct scsi_device *sdev;
944
945 shost_for_each_device(sdev, shost) {
946 stu_scmd = NULL;
947 list_for_each_entry(scmd, work_q, eh_entry)
948 if (scmd->device == sdev && SCSI_SENSE_VALID(scmd) &&
949 scsi_check_sense(scmd) == FAILED ) {
950 stu_scmd = scmd;
951 break;
952 }
953
954 if (!stu_scmd)
955 continue;
956
957 SCSI_LOG_ERROR_RECOVERY(3, printk("%s: Sending START_UNIT to sdev:"
958 " 0x%p\n", current->comm, sdev));
959
960 if (!scsi_eh_try_stu(stu_scmd)) {
961 if (!scsi_device_online(sdev) ||
962 !scsi_eh_tur(stu_scmd)) {
963 list_for_each_safe(lh, lh_sf, work_q) {
964 scmd = list_entry(lh, struct scsi_cmnd, eh_entry);
965 if (scmd->device == sdev)
966 scsi_eh_finish_cmd(scmd, done_q);
967 }
968 }
969 } else {
970 SCSI_LOG_ERROR_RECOVERY(3,
971 printk("%s: START_UNIT failed to sdev:"
972 " 0x%p\n", current->comm, sdev));
973 }
974 }
975
976 return list_empty(work_q);
977}
978
979
980/**
981 * scsi_eh_bus_device_reset - send bdr if needed
982 * @shost: scsi host being recovered.
983 * @eh_done_q: list_head for processed commands.
984 *
985 * Notes:
986 * Try a bus device reset. still, look to see whether we have multiple
987 * devices that are jammed or not - if we have multiple devices, it
988 * makes no sense to try bus_device_reset - we really would need to try
989 * a bus_reset instead.
990 **/
991static int scsi_eh_bus_device_reset(struct Scsi_Host *shost,
992 struct list_head *work_q,
993 struct list_head *done_q)
994{
995 struct list_head *lh, *lh_sf;
996 struct scsi_cmnd *scmd, *bdr_scmd;
997 struct scsi_device *sdev;
998 int rtn;
999
1000 shost_for_each_device(sdev, shost) {
1001 bdr_scmd = NULL;
1002 list_for_each_entry(scmd, work_q, eh_entry)
1003 if (scmd->device == sdev) {
1004 bdr_scmd = scmd;
1005 break;
1006 }
1007
1008 if (!bdr_scmd)
1009 continue;
1010
1011 SCSI_LOG_ERROR_RECOVERY(3, printk("%s: Sending BDR sdev:"
1012 " 0x%p\n", current->comm,
1013 sdev));
1014 rtn = scsi_try_bus_device_reset(bdr_scmd);
1015 if (rtn == SUCCESS) {
1016 if (!scsi_device_online(sdev) ||
1017 !scsi_eh_tur(bdr_scmd)) {
1018 list_for_each_safe(lh, lh_sf,
1019 work_q) {
1020 scmd = list_entry(lh, struct
1021 scsi_cmnd,
1022 eh_entry);
1023 if (scmd->device == sdev)
1024 scsi_eh_finish_cmd(scmd,
1025 done_q);
1026 }
1027 }
1028 } else {
1029 SCSI_LOG_ERROR_RECOVERY(3, printk("%s: BDR"
1030 " failed sdev:"
1031 "0x%p\n",
1032 current->comm,
1033 sdev));
1034 }
1035 }
1036
1037 return list_empty(work_q);
1038}
1039
1040/**
1041 * scsi_try_bus_reset - ask host to perform a bus reset
1042 * @scmd: SCSI cmd to send bus reset.
1043 **/
1044static int scsi_try_bus_reset(struct scsi_cmnd *scmd)
1045{
1046 unsigned long flags;
1047 int rtn;
1048
1049 SCSI_LOG_ERROR_RECOVERY(3, printk("%s: Snd Bus RST\n",
1050 __FUNCTION__));
1051 scmd->owner = SCSI_OWNER_LOWLEVEL;
1da177e4
LT
1052
1053 if (!scmd->device->host->hostt->eh_bus_reset_handler)
1054 return FAILED;
1055
1da177e4 1056 rtn = scmd->device->host->hostt->eh_bus_reset_handler(scmd);
1da177e4
LT
1057
1058 if (rtn == SUCCESS) {
1059 if (!scmd->device->host->hostt->skip_settle_delay)
1060 ssleep(BUS_RESET_SETTLE_TIME);
1061 spin_lock_irqsave(scmd->device->host->host_lock, flags);
1062 scsi_report_bus_reset(scmd->device->host, scmd->device->channel);
1063 spin_unlock_irqrestore(scmd->device->host->host_lock, flags);
1064 }
1065
1066 return rtn;
1067}
1068
1069/**
1070 * scsi_try_host_reset - ask host adapter to reset itself
1071 * @scmd: SCSI cmd to send hsot reset.
1072 **/
1073static int scsi_try_host_reset(struct scsi_cmnd *scmd)
1074{
1075 unsigned long flags;
1076 int rtn;
1077
1078 SCSI_LOG_ERROR_RECOVERY(3, printk("%s: Snd Host RST\n",
1079 __FUNCTION__));
1080 scmd->owner = SCSI_OWNER_LOWLEVEL;
1da177e4
LT
1081
1082 if (!scmd->device->host->hostt->eh_host_reset_handler)
1083 return FAILED;
1084
1da177e4 1085 rtn = scmd->device->host->hostt->eh_host_reset_handler(scmd);
1da177e4
LT
1086
1087 if (rtn == SUCCESS) {
1088 if (!scmd->device->host->hostt->skip_settle_delay)
1089 ssleep(HOST_RESET_SETTLE_TIME);
1090 spin_lock_irqsave(scmd->device->host->host_lock, flags);
1091 scsi_report_bus_reset(scmd->device->host, scmd->device->channel);
1092 spin_unlock_irqrestore(scmd->device->host->host_lock, flags);
1093 }
1094
1095 return rtn;
1096}
1097
1098/**
1099 * scsi_eh_bus_reset - send a bus reset
1100 * @shost: scsi host being recovered.
1101 * @eh_done_q: list_head for processed commands.
1102 **/
1103static int scsi_eh_bus_reset(struct Scsi_Host *shost,
1104 struct list_head *work_q,
1105 struct list_head *done_q)
1106{
1107 struct list_head *lh, *lh_sf;
1108 struct scsi_cmnd *scmd;
1109 struct scsi_cmnd *chan_scmd;
1110 unsigned int channel;
1111 int rtn;
1112
1113 /*
1114 * we really want to loop over the various channels, and do this on
1115 * a channel by channel basis. we should also check to see if any
1116 * of the failed commands are on soft_reset devices, and if so, skip
1117 * the reset.
1118 */
1119
1120 for (channel = 0; channel <= shost->max_channel; channel++) {
1121 chan_scmd = NULL;
1122 list_for_each_entry(scmd, work_q, eh_entry) {
1123 if (channel == scmd->device->channel) {
1124 chan_scmd = scmd;
1125 break;
1126 /*
1127 * FIXME add back in some support for
1128 * soft_reset devices.
1129 */
1130 }
1131 }
1132
1133 if (!chan_scmd)
1134 continue;
1135 SCSI_LOG_ERROR_RECOVERY(3, printk("%s: Sending BRST chan:"
1136 " %d\n", current->comm,
1137 channel));
1138 rtn = scsi_try_bus_reset(chan_scmd);
1139 if (rtn == SUCCESS) {
1140 list_for_each_safe(lh, lh_sf, work_q) {
1141 scmd = list_entry(lh, struct scsi_cmnd,
1142 eh_entry);
1143 if (channel == scmd->device->channel)
1144 if (!scsi_device_online(scmd->device) ||
1145 !scsi_eh_tur(scmd))
1146 scsi_eh_finish_cmd(scmd,
1147 done_q);
1148 }
1149 } else {
1150 SCSI_LOG_ERROR_RECOVERY(3, printk("%s: BRST"
1151 " failed chan: %d\n",
1152 current->comm,
1153 channel));
1154 }
1155 }
1156 return list_empty(work_q);
1157}
1158
1159/**
1160 * scsi_eh_host_reset - send a host reset
1161 * @work_q: list_head for processed commands.
1162 * @done_q: list_head for processed commands.
1163 **/
1164static int scsi_eh_host_reset(struct list_head *work_q,
1165 struct list_head *done_q)
1166{
1167 int rtn;
1168 struct list_head *lh, *lh_sf;
1169 struct scsi_cmnd *scmd;
1170
1171 if (!list_empty(work_q)) {
1172 scmd = list_entry(work_q->next,
1173 struct scsi_cmnd, eh_entry);
1174
1175 SCSI_LOG_ERROR_RECOVERY(3, printk("%s: Sending HRST\n"
1176 , current->comm));
1177
1178 rtn = scsi_try_host_reset(scmd);
1179 if (rtn == SUCCESS) {
1180 list_for_each_safe(lh, lh_sf, work_q) {
1181 scmd = list_entry(lh, struct scsi_cmnd, eh_entry);
1182 if (!scsi_device_online(scmd->device) ||
1183 (!scsi_eh_try_stu(scmd) && !scsi_eh_tur(scmd)) ||
1184 !scsi_eh_tur(scmd))
1185 scsi_eh_finish_cmd(scmd, done_q);
1186 }
1187 } else {
1188 SCSI_LOG_ERROR_RECOVERY(3, printk("%s: HRST"
1189 " failed\n",
1190 current->comm));
1191 }
1192 }
1193 return list_empty(work_q);
1194}
1195
1196/**
1197 * scsi_eh_offline_sdevs - offline scsi devices that fail to recover
1198 * @work_q: list_head for processed commands.
1199 * @done_q: list_head for processed commands.
1200 *
1201 **/
1202static void scsi_eh_offline_sdevs(struct list_head *work_q,
1203 struct list_head *done_q)
1204{
1205 struct list_head *lh, *lh_sf;
1206 struct scsi_cmnd *scmd;
1207
1208 list_for_each_safe(lh, lh_sf, work_q) {
1209 scmd = list_entry(lh, struct scsi_cmnd, eh_entry);
1210 printk(KERN_INFO "scsi: Device offlined - not"
1211 " ready after error recovery: host"
1212 " %d channel %d id %d lun %d\n",
1213 scmd->device->host->host_no,
1214 scmd->device->channel,
1215 scmd->device->id,
1216 scmd->device->lun);
1217 scsi_device_set_state(scmd->device, SDEV_OFFLINE);
1218 if (scsi_eh_eflags_chk(scmd, SCSI_EH_CANCEL_CMD)) {
1219 /*
1220 * FIXME: Handle lost cmds.
1221 */
1222 }
1223 scsi_eh_finish_cmd(scmd, done_q);
1224 }
1225 return;
1226}
1227
1228/**
1229 * scsi_decide_disposition - Disposition a cmd on return from LLD.
1230 * @scmd: SCSI cmd to examine.
1231 *
1232 * Notes:
1233 * This is *only* called when we are examining the status after sending
1234 * out the actual data command. any commands that are queued for error
1235 * recovery (e.g. test_unit_ready) do *not* come through here.
1236 *
1237 * When this routine returns failed, it means the error handler thread
1238 * is woken. In cases where the error code indicates an error that
1239 * doesn't require the error handler read (i.e. we don't need to
1240 * abort/reset), this function should return SUCCESS.
1241 **/
1242int scsi_decide_disposition(struct scsi_cmnd *scmd)
1243{
1244 int rtn;
1245
1246 /*
1247 * if the device is offline, then we clearly just pass the result back
1248 * up to the top level.
1249 */
1250 if (!scsi_device_online(scmd->device)) {
1251 SCSI_LOG_ERROR_RECOVERY(5, printk("%s: device offline - report"
1252 " as SUCCESS\n",
1253 __FUNCTION__));
1254 return SUCCESS;
1255 }
1256
1257 /*
1258 * first check the host byte, to see if there is anything in there
1259 * that would indicate what we need to do.
1260 */
1261 switch (host_byte(scmd->result)) {
1262 case DID_PASSTHROUGH:
1263 /*
1264 * no matter what, pass this through to the upper layer.
1265 * nuke this special code so that it looks like we are saying
1266 * did_ok.
1267 */
1268 scmd->result &= 0xff00ffff;
1269 return SUCCESS;
1270 case DID_OK:
1271 /*
1272 * looks good. drop through, and check the next byte.
1273 */
1274 break;
1275 case DID_NO_CONNECT:
1276 case DID_BAD_TARGET:
1277 case DID_ABORT:
1278 /*
1279 * note - this means that we just report the status back
1280 * to the top level driver, not that we actually think
1281 * that it indicates SUCCESS.
1282 */
1283 return SUCCESS;
1284 /*
1285 * when the low level driver returns did_soft_error,
1286 * it is responsible for keeping an internal retry counter
1287 * in order to avoid endless loops (db)
1288 *
1289 * actually this is a bug in this function here. we should
1290 * be mindful of the maximum number of retries specified
1291 * and not get stuck in a loop.
1292 */
1293 case DID_SOFT_ERROR:
1294 goto maybe_retry;
1295 case DID_IMM_RETRY:
1296 return NEEDS_RETRY;
1297
bf341919 1298 case DID_REQUEUE:
1299 return ADD_TO_MLQUEUE;
1300
1da177e4
LT
1301 case DID_ERROR:
1302 if (msg_byte(scmd->result) == COMMAND_COMPLETE &&
1303 status_byte(scmd->result) == RESERVATION_CONFLICT)
1304 /*
1305 * execute reservation conflict processing code
1306 * lower down
1307 */
1308 break;
1309 /* fallthrough */
1310
1311 case DID_BUS_BUSY:
1312 case DID_PARITY:
1313 goto maybe_retry;
1314 case DID_TIME_OUT:
1315 /*
1316 * when we scan the bus, we get timeout messages for
1317 * these commands if there is no device available.
1318 * other hosts report did_no_connect for the same thing.
1319 */
1320 if ((scmd->cmnd[0] == TEST_UNIT_READY ||
1321 scmd->cmnd[0] == INQUIRY)) {
1322 return SUCCESS;
1323 } else {
1324 return FAILED;
1325 }
1326 case DID_RESET:
1327 return SUCCESS;
1328 default:
1329 return FAILED;
1330 }
1331
1332 /*
1333 * next, check the message byte.
1334 */
1335 if (msg_byte(scmd->result) != COMMAND_COMPLETE)
1336 return FAILED;
1337
1338 /*
1339 * check the status byte to see if this indicates anything special.
1340 */
1341 switch (status_byte(scmd->result)) {
1342 case QUEUE_FULL:
1343 /*
1344 * the case of trying to send too many commands to a
1345 * tagged queueing device.
1346 */
1347 case BUSY:
1348 /*
1349 * device can't talk to us at the moment. Should only
1350 * occur (SAM-3) when the task queue is empty, so will cause
1351 * the empty queue handling to trigger a stall in the
1352 * device.
1353 */
1354 return ADD_TO_MLQUEUE;
1355 case GOOD:
1356 case COMMAND_TERMINATED:
1357 case TASK_ABORTED:
1358 return SUCCESS;
1359 case CHECK_CONDITION:
1360 rtn = scsi_check_sense(scmd);
1361 if (rtn == NEEDS_RETRY)
1362 goto maybe_retry;
1363 /* if rtn == FAILED, we have no sense information;
1364 * returning FAILED will wake the error handler thread
1365 * to collect the sense and redo the decide
1366 * disposition */
1367 return rtn;
1368 case CONDITION_GOOD:
1369 case INTERMEDIATE_GOOD:
1370 case INTERMEDIATE_C_GOOD:
1371 case ACA_ACTIVE:
1372 /*
1373 * who knows? FIXME(eric)
1374 */
1375 return SUCCESS;
1376
1377 case RESERVATION_CONFLICT:
1378 printk(KERN_INFO "scsi: reservation conflict: host"
1379 " %d channel %d id %d lun %d\n",
1380 scmd->device->host->host_no, scmd->device->channel,
1381 scmd->device->id, scmd->device->lun);
1382 return SUCCESS; /* causes immediate i/o error */
1383 default:
1384 return FAILED;
1385 }
1386 return FAILED;
1387
1388 maybe_retry:
1389
1390 /* we requeue for retry because the error was retryable, and
1391 * the request was not marked fast fail. Note that above,
1392 * even if the request is marked fast fail, we still requeue
1393 * for queue congestion conditions (QUEUE_FULL or BUSY) */
1394 if ((++scmd->retries) < scmd->allowed
1395 && !blk_noretry_request(scmd->request)) {
1396 return NEEDS_RETRY;
1397 } else {
1398 /*
1399 * no more retries - report this one back to upper level.
1400 */
1401 return SUCCESS;
1402 }
1403}
1404
1405/**
1406 * scsi_eh_lock_done - done function for eh door lock request
1407 * @scmd: SCSI command block for the door lock request
1408 *
1409 * Notes:
1410 * We completed the asynchronous door lock request, and it has either
1411 * locked the door or failed. We must free the command structures
1412 * associated with this request.
1413 **/
1414static void scsi_eh_lock_done(struct scsi_cmnd *scmd)
1415{
1416 struct scsi_request *sreq = scmd->sc_request;
1417
1418 scsi_release_request(sreq);
1419}
1420
1421
1422/**
1423 * scsi_eh_lock_door - Prevent medium removal for the specified device
1424 * @sdev: SCSI device to prevent medium removal
1425 *
1426 * Locking:
1427 * We must be called from process context; scsi_allocate_request()
1428 * may sleep.
1429 *
1430 * Notes:
1431 * We queue up an asynchronous "ALLOW MEDIUM REMOVAL" request on the
1432 * head of the devices request queue, and continue.
1433 *
1434 * Bugs:
1435 * scsi_allocate_request() may sleep waiting for existing requests to
1436 * be processed. However, since we haven't kicked off any request
1437 * processing for this host, this may deadlock.
1438 *
1439 * If scsi_allocate_request() fails for what ever reason, we
1440 * completely forget to lock the door.
1441 **/
1442static void scsi_eh_lock_door(struct scsi_device *sdev)
1443{
1444 struct scsi_request *sreq = scsi_allocate_request(sdev, GFP_KERNEL);
1445
1446 if (unlikely(!sreq)) {
1447 printk(KERN_ERR "%s: request allocate failed,"
1448 "prevent media removal cmd not sent\n", __FUNCTION__);
1449 return;
1450 }
1451
1452 sreq->sr_cmnd[0] = ALLOW_MEDIUM_REMOVAL;
1453 sreq->sr_cmnd[1] = 0;
1454 sreq->sr_cmnd[2] = 0;
1455 sreq->sr_cmnd[3] = 0;
1456 sreq->sr_cmnd[4] = SCSI_REMOVAL_PREVENT;
1457 sreq->sr_cmnd[5] = 0;
1458 sreq->sr_data_direction = DMA_NONE;
1459 sreq->sr_bufflen = 0;
1460 sreq->sr_buffer = NULL;
1461 sreq->sr_allowed = 5;
1462 sreq->sr_done = scsi_eh_lock_done;
1463 sreq->sr_timeout_per_command = 10 * HZ;
1464 sreq->sr_cmd_len = COMMAND_SIZE(sreq->sr_cmnd[0]);
1465
1466 scsi_insert_special_req(sreq, 1);
1467}
1468
1469
1470/**
1471 * scsi_restart_operations - restart io operations to the specified host.
1472 * @shost: Host we are restarting.
1473 *
1474 * Notes:
1475 * When we entered the error handler, we blocked all further i/o to
1476 * this device. we need to 'reverse' this process.
1477 **/
1478static void scsi_restart_operations(struct Scsi_Host *shost)
1479{
1480 struct scsi_device *sdev;
1481
1482 /*
1483 * If the door was locked, we need to insert a door lock request
1484 * onto the head of the SCSI request queue for the device. There
1485 * is no point trying to lock the door of an off-line device.
1486 */
1487 shost_for_each_device(sdev, shost) {
1488 if (scsi_device_online(sdev) && sdev->locked)
1489 scsi_eh_lock_door(sdev);
1490 }
1491
1492 /*
1493 * next free up anything directly waiting upon the host. this
1494 * will be requests for character device operations, and also for
1495 * ioctls to queued block devices.
1496 */
1497 SCSI_LOG_ERROR_RECOVERY(3, printk("%s: waking up host to restart\n",
1498 __FUNCTION__));
1499
1500 clear_bit(SHOST_RECOVERY, &shost->shost_state);
1501
1502 wake_up(&shost->host_wait);
1503
1504 /*
1505 * finally we need to re-initiate requests that may be pending. we will
1506 * have had everything blocked while error handling is taking place, and
1507 * now that error recovery is done, we will need to ensure that these
1508 * requests are started.
1509 */
1510 scsi_run_host_queues(shost);
1511}
1512
1513/**
1514 * scsi_eh_ready_devs - check device ready state and recover if not.
1515 * @shost: host to be recovered.
1516 * @eh_done_q: list_head for processed commands.
1517 *
1518 **/
1519static void scsi_eh_ready_devs(struct Scsi_Host *shost,
1520 struct list_head *work_q,
1521 struct list_head *done_q)
1522{
1523 if (!scsi_eh_stu(shost, work_q, done_q))
1524 if (!scsi_eh_bus_device_reset(shost, work_q, done_q))
1525 if (!scsi_eh_bus_reset(shost, work_q, done_q))
1526 if (!scsi_eh_host_reset(work_q, done_q))
1527 scsi_eh_offline_sdevs(work_q, done_q);
1528}
1529
1530/**
1531 * scsi_eh_flush_done_q - finish processed commands or retry them.
1532 * @done_q: list_head of processed commands.
1533 *
1534 **/
1535static void scsi_eh_flush_done_q(struct list_head *done_q)
1536{
1537 struct list_head *lh, *lh_sf;
1538 struct scsi_cmnd *scmd;
1539
1540 list_for_each_safe(lh, lh_sf, done_q) {
1541 scmd = list_entry(lh, struct scsi_cmnd, eh_entry);
1542 list_del_init(lh);
1543 if (scsi_device_online(scmd->device) &&
1544 !blk_noretry_request(scmd->request) &&
1545 (++scmd->retries < scmd->allowed)) {
1546 SCSI_LOG_ERROR_RECOVERY(3, printk("%s: flush"
1547 " retry cmd: %p\n",
1548 current->comm,
1549 scmd));
1550 scsi_queue_insert(scmd, SCSI_MLQUEUE_EH_RETRY);
1551 } else {
793698ce
PM
1552 /*
1553 * If just we got sense for the device (called
1554 * scsi_eh_get_sense), scmd->result is already
1555 * set, do not set DRIVER_TIMEOUT.
1556 */
1da177e4
LT
1557 if (!scmd->result)
1558 scmd->result |= (DRIVER_TIMEOUT << 24);
1559 SCSI_LOG_ERROR_RECOVERY(3, printk("%s: flush finish"
1560 " cmd: %p\n",
1561 current->comm, scmd));
1562 scsi_finish_command(scmd);
1563 }
1564 }
1565}
1566
1567/**
1568 * scsi_unjam_host - Attempt to fix a host which has a cmd that failed.
1569 * @shost: Host to unjam.
1570 *
1571 * Notes:
1572 * When we come in here, we *know* that all commands on the bus have
1573 * either completed, failed or timed out. we also know that no further
1574 * commands are being sent to the host, so things are relatively quiet
1575 * and we have freedom to fiddle with things as we wish.
1576 *
1577 * This is only the *default* implementation. it is possible for
1578 * individual drivers to supply their own version of this function, and
1579 * if the maintainer wishes to do this, it is strongly suggested that
1580 * this function be taken as a template and modified. this function
1581 * was designed to correctly handle problems for about 95% of the
1582 * different cases out there, and it should always provide at least a
1583 * reasonable amount of error recovery.
1584 *
1585 * Any command marked 'failed' or 'timeout' must eventually have
1586 * scsi_finish_cmd() called for it. we do all of the retry stuff
1587 * here, so when we restart the host after we return it should have an
1588 * empty queue.
1589 **/
1590static void scsi_unjam_host(struct Scsi_Host *shost)
1591{
1592 unsigned long flags;
1593 LIST_HEAD(eh_work_q);
1594 LIST_HEAD(eh_done_q);
1595
1596 spin_lock_irqsave(shost->host_lock, flags);
1597 list_splice_init(&shost->eh_cmd_q, &eh_work_q);
1598 spin_unlock_irqrestore(shost->host_lock, flags);
1599
1600 SCSI_LOG_ERROR_RECOVERY(1, scsi_eh_prt_fail_stats(shost, &eh_work_q));
1601
1602 if (!scsi_eh_get_sense(&eh_work_q, &eh_done_q))
1603 if (!scsi_eh_abort_cmds(&eh_work_q, &eh_done_q))
1604 scsi_eh_ready_devs(shost, &eh_work_q, &eh_done_q);
1605
1606 scsi_eh_flush_done_q(&eh_done_q);
1607}
1608
1609/**
1610 * scsi_error_handler - Handle errors/timeouts of SCSI cmds.
1611 * @data: Host for which we are running.
1612 *
1613 * Notes:
1614 * This is always run in the context of a kernel thread. The idea is
1615 * that we start this thing up when the kernel starts up (one per host
1616 * that we detect), and it immediately goes to sleep and waits for some
1617 * event (i.e. failure). When this takes place, we have the job of
1618 * trying to unjam the bus and restarting things.
1619 **/
1620int scsi_error_handler(void *data)
1621{
1622 struct Scsi_Host *shost = (struct Scsi_Host *) data;
1623 int rtn;
1624 DECLARE_MUTEX_LOCKED(sem);
1625
1626 /*
1627 * Flush resources
1628 */
1629
1630 daemonize("scsi_eh_%d", shost->host_no);
1631
1632 current->flags |= PF_NOFREEZE;
1633
1634 shost->eh_wait = &sem;
1635 shost->ehandler = current;
1636
1637 /*
1638 * Wake up the thread that created us.
1639 */
1640 SCSI_LOG_ERROR_RECOVERY(3, printk("Wake up parent of"
1641 " scsi_eh_%d\n",shost->host_no));
1642
1643 complete(shost->eh_notify);
1644
1645 while (1) {
1646 /*
1647 * If we get a signal, it means we are supposed to go
1648 * away and die. This typically happens if the user is
1649 * trying to unload a module.
1650 */
1651 SCSI_LOG_ERROR_RECOVERY(1, printk("Error handler"
1652 " scsi_eh_%d"
1653 " sleeping\n",shost->host_no));
1654
1655 /*
1656 * Note - we always use down_interruptible with the semaphore
1657 * even if the module was loaded as part of the kernel. The
1658 * reason is that down() will cause this thread to be counted
1659 * in the load average as a running process, and down
1660 * interruptible doesn't. Given that we need to allow this
1661 * thread to die if the driver was loaded as a module, using
1662 * semaphores isn't unreasonable.
1663 */
1664 down_interruptible(&sem);
1665 if (shost->eh_kill)
1666 break;
1667
1668 SCSI_LOG_ERROR_RECOVERY(1, printk("Error handler"
1669 " scsi_eh_%d waking"
1670 " up\n",shost->host_no));
1671
1672 shost->eh_active = 1;
1673
1674 /*
1675 * We have a host that is failing for some reason. Figure out
1676 * what we need to do to get it up and online again (if we can).
1677 * If we fail, we end up taking the thing offline.
1678 */
1679 if (shost->hostt->eh_strategy_handler)
1680 rtn = shost->hostt->eh_strategy_handler(shost);
1681 else
1682 scsi_unjam_host(shost);
1683
1684 shost->eh_active = 0;
1685
1686 /*
1687 * Note - if the above fails completely, the action is to take
1688 * individual devices offline and flush the queue of any
1689 * outstanding requests that may have been pending. When we
1690 * restart, we restart any I/O to any other devices on the bus
1691 * which are still online.
1692 */
1693 scsi_restart_operations(shost);
1694
1695 }
1696
1697 SCSI_LOG_ERROR_RECOVERY(1, printk("Error handler scsi_eh_%d"
1698 " exiting\n",shost->host_no));
1699
1700 /*
1701 * Make sure that nobody tries to wake us up again.
1702 */
1703 shost->eh_wait = NULL;
1704
1705 /*
1706 * Knock this down too. From this point on, the host is flying
1707 * without a pilot. If this is because the module is being unloaded,
1708 * that's fine. If the user sent a signal to this thing, we are
1709 * potentially in real danger.
1710 */
1711 shost->eh_active = 0;
1712 shost->ehandler = NULL;
1713
1714 /*
1715 * If anyone is waiting for us to exit (i.e. someone trying to unload
1716 * a driver), then wake up that process to let them know we are on
1717 * the way out the door.
1718 */
1719 complete_and_exit(shost->eh_notify, 0);
1720 return 0;
1721}
1722
1723/*
1724 * Function: scsi_report_bus_reset()
1725 *
1726 * Purpose: Utility function used by low-level drivers to report that
1727 * they have observed a bus reset on the bus being handled.
1728 *
1729 * Arguments: shost - Host in question
1730 * channel - channel on which reset was observed.
1731 *
1732 * Returns: Nothing
1733 *
1734 * Lock status: Host lock must be held.
1735 *
1736 * Notes: This only needs to be called if the reset is one which
1737 * originates from an unknown location. Resets originated
1738 * by the mid-level itself don't need to call this, but there
1739 * should be no harm.
1740 *
1741 * The main purpose of this is to make sure that a CHECK_CONDITION
1742 * is properly treated.
1743 */
1744void scsi_report_bus_reset(struct Scsi_Host *shost, int channel)
1745{
1746 struct scsi_device *sdev;
1747
1748 __shost_for_each_device(sdev, shost) {
1749 if (channel == sdev->channel) {
1750 sdev->was_reset = 1;
1751 sdev->expecting_cc_ua = 1;
1752 }
1753 }
1754}
1755EXPORT_SYMBOL(scsi_report_bus_reset);
1756
1757/*
1758 * Function: scsi_report_device_reset()
1759 *
1760 * Purpose: Utility function used by low-level drivers to report that
1761 * they have observed a device reset on the device being handled.
1762 *
1763 * Arguments: shost - Host in question
1764 * channel - channel on which reset was observed
1765 * target - target on which reset was observed
1766 *
1767 * Returns: Nothing
1768 *
1769 * Lock status: Host lock must be held
1770 *
1771 * Notes: This only needs to be called if the reset is one which
1772 * originates from an unknown location. Resets originated
1773 * by the mid-level itself don't need to call this, but there
1774 * should be no harm.
1775 *
1776 * The main purpose of this is to make sure that a CHECK_CONDITION
1777 * is properly treated.
1778 */
1779void scsi_report_device_reset(struct Scsi_Host *shost, int channel, int target)
1780{
1781 struct scsi_device *sdev;
1782
1783 __shost_for_each_device(sdev, shost) {
1784 if (channel == sdev->channel &&
1785 target == sdev->id) {
1786 sdev->was_reset = 1;
1787 sdev->expecting_cc_ua = 1;
1788 }
1789 }
1790}
1791EXPORT_SYMBOL(scsi_report_device_reset);
1792
1793static void
1794scsi_reset_provider_done_command(struct scsi_cmnd *scmd)
1795{
1796}
1797
1798/*
1799 * Function: scsi_reset_provider
1800 *
1801 * Purpose: Send requested reset to a bus or device at any phase.
1802 *
1803 * Arguments: device - device to send reset to
1804 * flag - reset type (see scsi.h)
1805 *
1806 * Returns: SUCCESS/FAILURE.
1807 *
1808 * Notes: This is used by the SCSI Generic driver to provide
1809 * Bus/Device reset capability.
1810 */
1811int
1812scsi_reset_provider(struct scsi_device *dev, int flag)
1813{
1814 struct scsi_cmnd *scmd = scsi_get_command(dev, GFP_KERNEL);
1815 struct request req;
1816 int rtn;
1817
1818 scmd->request = &req;
1819 memset(&scmd->eh_timeout, 0, sizeof(scmd->eh_timeout));
1820 scmd->request->rq_status = RQ_SCSI_BUSY;
1821 scmd->state = SCSI_STATE_INITIALIZING;
1822 scmd->owner = SCSI_OWNER_MIDLEVEL;
1823
1824 memset(&scmd->cmnd, '\0', sizeof(scmd->cmnd));
1825
1826 scmd->scsi_done = scsi_reset_provider_done_command;
1827 scmd->done = NULL;
1828 scmd->buffer = NULL;
1829 scmd->bufflen = 0;
1830 scmd->request_buffer = NULL;
1831 scmd->request_bufflen = 0;
1da177e4
LT
1832 scmd->abort_reason = DID_ABORT;
1833
1834 scmd->cmd_len = 0;
1835
1836 scmd->sc_data_direction = DMA_BIDIRECTIONAL;
1837 scmd->sc_request = NULL;
1838 scmd->sc_magic = SCSI_CMND_MAGIC;
1839
1840 init_timer(&scmd->eh_timeout);
1841
1842 /*
1843 * Sometimes the command can get back into the timer chain,
1844 * so use the pid as an identifier.
1845 */
1846 scmd->pid = 0;
1847
1848 switch (flag) {
1849 case SCSI_TRY_RESET_DEVICE:
1850 rtn = scsi_try_bus_device_reset(scmd);
1851 if (rtn == SUCCESS)
1852 break;
1853 /* FALLTHROUGH */
1854 case SCSI_TRY_RESET_BUS:
1855 rtn = scsi_try_bus_reset(scmd);
1856 if (rtn == SUCCESS)
1857 break;
1858 /* FALLTHROUGH */
1859 case SCSI_TRY_RESET_HOST:
1860 rtn = scsi_try_host_reset(scmd);
1861 break;
1862 default:
1863 rtn = FAILED;
1864 }
1865
1da177e4
LT
1866 scsi_next_command(scmd);
1867 return rtn;
1868}
1869EXPORT_SYMBOL(scsi_reset_provider);
1870
1871/**
1872 * scsi_normalize_sense - normalize main elements from either fixed or
1873 * descriptor sense data format into a common format.
1874 *
1875 * @sense_buffer: byte array containing sense data returned by device
1876 * @sb_len: number of valid bytes in sense_buffer
1877 * @sshdr: pointer to instance of structure that common
1878 * elements are written to.
1879 *
1880 * Notes:
1881 * The "main elements" from sense data are: response_code, sense_key,
1882 * asc, ascq and additional_length (only for descriptor format).
1883 *
1884 * Typically this function can be called after a device has
1885 * responded to a SCSI command with the CHECK_CONDITION status.
1886 *
1887 * Return value:
1888 * 1 if valid sense data information found, else 0;
1889 **/
1890int scsi_normalize_sense(const u8 *sense_buffer, int sb_len,
1891 struct scsi_sense_hdr *sshdr)
1892{
1893 if (!sense_buffer || !sb_len || (sense_buffer[0] & 0x70) != 0x70)
1894 return 0;
1895
1896 memset(sshdr, 0, sizeof(struct scsi_sense_hdr));
1897
1898 sshdr->response_code = (sense_buffer[0] & 0x7f);
1899 if (sshdr->response_code >= 0x72) {
1900 /*
1901 * descriptor format
1902 */
1903 if (sb_len > 1)
1904 sshdr->sense_key = (sense_buffer[1] & 0xf);
1905 if (sb_len > 2)
1906 sshdr->asc = sense_buffer[2];
1907 if (sb_len > 3)
1908 sshdr->ascq = sense_buffer[3];
1909 if (sb_len > 7)
1910 sshdr->additional_length = sense_buffer[7];
1911 } else {
1912 /*
1913 * fixed format
1914 */
1915 if (sb_len > 2)
1916 sshdr->sense_key = (sense_buffer[2] & 0xf);
1917 if (sb_len > 7) {
1918 sb_len = (sb_len < (sense_buffer[7] + 8)) ?
1919 sb_len : (sense_buffer[7] + 8);
1920 if (sb_len > 12)
1921 sshdr->asc = sense_buffer[12];
1922 if (sb_len > 13)
1923 sshdr->ascq = sense_buffer[13];
1924 }
1925 }
1926
1927 return 1;
1928}
1929EXPORT_SYMBOL(scsi_normalize_sense);
1930
1931int scsi_request_normalize_sense(struct scsi_request *sreq,
1932 struct scsi_sense_hdr *sshdr)
1933{
1934 return scsi_normalize_sense(sreq->sr_sense_buffer,
1935 sizeof(sreq->sr_sense_buffer), sshdr);
1936}
1937EXPORT_SYMBOL(scsi_request_normalize_sense);
1938
1939int scsi_command_normalize_sense(struct scsi_cmnd *cmd,
1940 struct scsi_sense_hdr *sshdr)
1941{
1942 return scsi_normalize_sense(cmd->sense_buffer,
1943 sizeof(cmd->sense_buffer), sshdr);
1944}
1945EXPORT_SYMBOL(scsi_command_normalize_sense);
1946
1947/**
1948 * scsi_sense_desc_find - search for a given descriptor type in
1949 * descriptor sense data format.
1950 *
1951 * @sense_buffer: byte array of descriptor format sense data
1952 * @sb_len: number of valid bytes in sense_buffer
1953 * @desc_type: value of descriptor type to find
1954 * (e.g. 0 -> information)
1955 *
1956 * Notes:
1957 * only valid when sense data is in descriptor format
1958 *
1959 * Return value:
1960 * pointer to start of (first) descriptor if found else NULL
1961 **/
1962const u8 * scsi_sense_desc_find(const u8 * sense_buffer, int sb_len,
1963 int desc_type)
1964{
1965 int add_sen_len, add_len, desc_len, k;
1966 const u8 * descp;
1967
1968 if ((sb_len < 8) || (0 == (add_sen_len = sense_buffer[7])))
1969 return NULL;
1970 if ((sense_buffer[0] < 0x72) || (sense_buffer[0] > 0x73))
1971 return NULL;
1972 add_sen_len = (add_sen_len < (sb_len - 8)) ?
1973 add_sen_len : (sb_len - 8);
1974 descp = &sense_buffer[8];
1975 for (desc_len = 0, k = 0; k < add_sen_len; k += desc_len) {
1976 descp += desc_len;
1977 add_len = (k < (add_sen_len - 1)) ? descp[1]: -1;
1978 desc_len = add_len + 2;
1979 if (descp[0] == desc_type)
1980 return descp;
1981 if (add_len < 0) // short descriptor ??
1982 break;
1983 }
1984 return NULL;
1985}
1986EXPORT_SYMBOL(scsi_sense_desc_find);
1987
1988/**
1989 * scsi_get_sense_info_fld - attempts to get information field from
1990 * sense data (either fixed or descriptor format)
1991 *
1992 * @sense_buffer: byte array of sense data
1993 * @sb_len: number of valid bytes in sense_buffer
1994 * @info_out: pointer to 64 integer where 8 or 4 byte information
1995 * field will be placed if found.
1996 *
1997 * Return value:
1998 * 1 if information field found, 0 if not found.
1999 **/
2000int scsi_get_sense_info_fld(const u8 * sense_buffer, int sb_len,
2001 u64 * info_out)
2002{
2003 int j;
2004 const u8 * ucp;
2005 u64 ull;
2006
2007 if (sb_len < 7)
2008 return 0;
2009 switch (sense_buffer[0] & 0x7f) {
2010 case 0x70:
2011 case 0x71:
2012 if (sense_buffer[0] & 0x80) {
2013 *info_out = (sense_buffer[3] << 24) +
2014 (sense_buffer[4] << 16) +
2015 (sense_buffer[5] << 8) + sense_buffer[6];
2016 return 1;
2017 } else
2018 return 0;
2019 case 0x72:
2020 case 0x73:
2021 ucp = scsi_sense_desc_find(sense_buffer, sb_len,
2022 0 /* info desc */);
2023 if (ucp && (0xa == ucp[1])) {
2024 ull = 0;
2025 for (j = 0; j < 8; ++j) {
2026 if (j > 0)
2027 ull <<= 8;
2028 ull |= ucp[4 + j];
2029 }
2030 *info_out = ull;
2031 return 1;
2032 } else
2033 return 0;
2034 default:
2035 return 0;
2036 }
2037}
2038EXPORT_SYMBOL(scsi_get_sense_info_fld);
This page took 0.111215 seconds and 5 git commands to generate.