aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/scsi/scsi_error.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/scsi/scsi_error.c')
-rw-r--r--drivers/scsi/scsi_error.c157
1 files changed, 46 insertions, 111 deletions
diff --git a/drivers/scsi/scsi_error.c b/drivers/scsi/scsi_error.c
index 0c5b02d4c7f8..18c5d2523014 100644
--- a/drivers/scsi/scsi_error.c
+++ b/drivers/scsi/scsi_error.c
@@ -417,43 +417,15 @@ static int scsi_eh_completed_normally(struct scsi_cmnd *scmd)
417} 417}
418 418
419/** 419/**
420 * scsi_eh_times_out - timeout function for error handling.
421 * @scmd: Cmd that is timing out.
422 *
423 * Notes:
424 * During error handling, the kernel thread will be sleeping waiting
425 * for some action to complete on the device. our only job is to
426 * record that it timed out, and to wake up the thread.
427 **/
428static void scsi_eh_times_out(struct scsi_cmnd *scmd)
429{
430 scmd->eh_eflags |= SCSI_EH_REC_TIMEOUT;
431 SCSI_LOG_ERROR_RECOVERY(3, printk("%s: scmd:%p\n", __FUNCTION__,
432 scmd));
433
434 up(scmd->device->host->eh_action);
435}
436
437/**
438 * scsi_eh_done - Completion function for error handling. 420 * scsi_eh_done - Completion function for error handling.
439 * @scmd: Cmd that is done. 421 * @scmd: Cmd that is done.
440 **/ 422 **/
441static void scsi_eh_done(struct scsi_cmnd *scmd) 423static void scsi_eh_done(struct scsi_cmnd *scmd)
442{ 424{
443 /* 425 SCSI_LOG_ERROR_RECOVERY(3,
444 * if the timeout handler is already running, then just set the 426 printk("%s scmd: %p result: %x\n",
445 * flag which says we finished late, and return. we have no 427 __FUNCTION__, scmd, scmd->result));
446 * way of stopping the timeout handler from running, so we must 428 complete(scmd->device->host->eh_action);
447 * always defer to it.
448 */
449 if (del_timer(&scmd->eh_timeout)) {
450 scmd->request->rq_status = RQ_SCSI_DONE;
451
452 SCSI_LOG_ERROR_RECOVERY(3, printk("%s scmd: %p result: %x\n",
453 __FUNCTION__, scmd, scmd->result));
454
455 up(scmd->device->host->eh_action);
456 }
457} 429}
458 430
459/** 431/**
@@ -461,10 +433,6 @@ static void scsi_eh_done(struct scsi_cmnd *scmd)
461 * @scmd: SCSI Cmd to send. 433 * @scmd: SCSI Cmd to send.
462 * @timeout: Timeout for cmd. 434 * @timeout: Timeout for cmd.
463 * 435 *
464 * Notes:
465 * The initialization of the structures is quite a bit different in
466 * this case, and furthermore, there is a different completion handler
467 * vs scsi_dispatch_cmd.
468 * Return value: 436 * Return value:
469 * SUCCESS or FAILED or NEEDS_RETRY 437 * SUCCESS or FAILED or NEEDS_RETRY
470 **/ 438 **/
@@ -472,24 +440,16 @@ static int scsi_send_eh_cmnd(struct scsi_cmnd *scmd, int timeout)
472{ 440{
473 struct scsi_device *sdev = scmd->device; 441 struct scsi_device *sdev = scmd->device;
474 struct Scsi_Host *shost = sdev->host; 442 struct Scsi_Host *shost = sdev->host;
475 DECLARE_MUTEX_LOCKED(sem); 443 DECLARE_COMPLETION(done);
444 unsigned long timeleft;
476 unsigned long flags; 445 unsigned long flags;
477 int rtn = SUCCESS; 446 int rtn;
478 447
479 /*
480 * we will use a queued command if possible, otherwise we will
481 * emulate the queuing and calling of completion function ourselves.
482 */
483 if (sdev->scsi_level <= SCSI_2) 448 if (sdev->scsi_level <= SCSI_2)
484 scmd->cmnd[1] = (scmd->cmnd[1] & 0x1f) | 449 scmd->cmnd[1] = (scmd->cmnd[1] & 0x1f) |
485 (sdev->lun << 5 & 0xe0); 450 (sdev->lun << 5 & 0xe0);
486 451
487 scsi_add_timer(scmd, timeout, scsi_eh_times_out); 452 shost->eh_action = &done;
488
489 /*
490 * set up the semaphore so we wait for the command to complete.
491 */
492 shost->eh_action = &sem;
493 scmd->request->rq_status = RQ_SCSI_BUSY; 453 scmd->request->rq_status = RQ_SCSI_BUSY;
494 454
495 spin_lock_irqsave(shost->host_lock, flags); 455 spin_lock_irqsave(shost->host_lock, flags);
@@ -497,47 +457,29 @@ static int scsi_send_eh_cmnd(struct scsi_cmnd *scmd, int timeout)
497 shost->hostt->queuecommand(scmd, scsi_eh_done); 457 shost->hostt->queuecommand(scmd, scsi_eh_done);
498 spin_unlock_irqrestore(shost->host_lock, flags); 458 spin_unlock_irqrestore(shost->host_lock, flags);
499 459
500 down(&sem); 460 timeleft = wait_for_completion_timeout(&done, timeout);
501 scsi_log_completion(scmd, SUCCESS);
502 461
462 scmd->request->rq_status = RQ_SCSI_DONE;
503 shost->eh_action = NULL; 463 shost->eh_action = NULL;
504 464
505 /* 465 scsi_log_completion(scmd, SUCCESS);
506 * see if timeout. if so, tell the host to forget about it.
507 * in other words, we don't want a callback any more.
508 */
509 if (scmd->eh_eflags & SCSI_EH_REC_TIMEOUT) {
510 scmd->eh_eflags &= ~SCSI_EH_REC_TIMEOUT;
511
512 /*
513 * as far as the low level driver is
514 * concerned, this command is still active, so
515 * we must give the low level driver a chance
516 * to abort it. (db)
517 *
518 * FIXME(eric) - we are not tracking whether we could
519 * abort a timed out command or not. not sure how
520 * we should treat them differently anyways.
521 */
522 if (shost->hostt->eh_abort_handler)
523 shost->hostt->eh_abort_handler(scmd);
524
525 scmd->request->rq_status = RQ_SCSI_DONE;
526 rtn = FAILED;
527 }
528 466
529 SCSI_LOG_ERROR_RECOVERY(3, printk("%s: scmd: %p, rtn:%x\n", 467 SCSI_LOG_ERROR_RECOVERY(3,
530 __FUNCTION__, scmd, rtn)); 468 printk("%s: scmd: %p, timeleft: %ld\n",
469 __FUNCTION__, scmd, timeleft));
531 470
532 /* 471 /*
533 * now examine the actual status codes to see whether the command 472 * If there is time left scsi_eh_done got called, and we will
534 * actually did complete normally. 473 * examine the actual status codes to see whether the command
474 * actually did complete normally, else tell the host to forget
475 * about this command.
535 */ 476 */
536 if (rtn == SUCCESS) { 477 if (timeleft) {
537 rtn = scsi_eh_completed_normally(scmd); 478 rtn = scsi_eh_completed_normally(scmd);
538 SCSI_LOG_ERROR_RECOVERY(3, 479 SCSI_LOG_ERROR_RECOVERY(3,
539 printk("%s: scsi_eh_completed_normally %x\n", 480 printk("%s: scsi_eh_completed_normally %x\n",
540 __FUNCTION__, rtn)); 481 __FUNCTION__, rtn));
482
541 switch (rtn) { 483 switch (rtn) {
542 case SUCCESS: 484 case SUCCESS:
543 case NEEDS_RETRY: 485 case NEEDS_RETRY:
@@ -547,6 +489,15 @@ static int scsi_send_eh_cmnd(struct scsi_cmnd *scmd, int timeout)
547 rtn = FAILED; 489 rtn = FAILED;
548 break; 490 break;
549 } 491 }
492 } else {
493 /*
494 * FIXME(eric) - we are not tracking whether we could
495 * abort a timed out command or not. not sure how
496 * we should treat them differently anyways.
497 */
498 if (shost->hostt->eh_abort_handler)
499 shost->hostt->eh_abort_handler(scmd);
500 rtn = FAILED;
550 } 501 }
551 502
552 return rtn; 503 return rtn;
@@ -1571,50 +1522,41 @@ static void scsi_unjam_host(struct Scsi_Host *shost)
1571} 1522}
1572 1523
1573/** 1524/**
1574 * scsi_error_handler - Handle errors/timeouts of SCSI cmds. 1525 * scsi_error_handler - SCSI error handler thread
1575 * @data: Host for which we are running. 1526 * @data: Host for which we are running.
1576 * 1527 *
1577 * Notes: 1528 * Notes:
1578 * This is always run in the context of a kernel thread. The idea is 1529 * This is the main error handling loop. This is run as a kernel thread
1579 * that we start this thing up when the kernel starts up (one per host 1530 * for every SCSI host and handles all error handling activity.
1580 * that we detect), and it immediately goes to sleep and waits for some
1581 * event (i.e. failure). When this takes place, we have the job of
1582 * trying to unjam the bus and restarting things.
1583 **/ 1531 **/
1584int scsi_error_handler(void *data) 1532int scsi_error_handler(void *data)
1585{ 1533{
1586 struct Scsi_Host *shost = (struct Scsi_Host *) data; 1534 struct Scsi_Host *shost = data;
1587 int rtn;
1588 1535
1589 current->flags |= PF_NOFREEZE; 1536 current->flags |= PF_NOFREEZE;
1590 1537
1591
1592 /* 1538 /*
1593 * Note - we always use TASK_INTERRUPTIBLE even if the module 1539 * We use TASK_INTERRUPTIBLE so that the thread is not
1594 * was loaded as part of the kernel. The reason is that 1540 * counted against the load average as a running process.
1595 * UNINTERRUPTIBLE would cause this thread to be counted in 1541 * We never actually get interrupted because kthread_run
1596 * the load average as a running process, and an interruptible 1542 * disables singal delivery for the created thread.
1597 * wait doesn't.
1598 */ 1543 */
1599 set_current_state(TASK_INTERRUPTIBLE); 1544 set_current_state(TASK_INTERRUPTIBLE);
1600 while (!kthread_should_stop()) { 1545 while (!kthread_should_stop()) {
1601 if (shost->host_failed == 0 || 1546 if (shost->host_failed == 0 ||
1602 shost->host_failed != shost->host_busy) { 1547 shost->host_failed != shost->host_busy) {
1603 SCSI_LOG_ERROR_RECOVERY(1, printk("Error handler" 1548 SCSI_LOG_ERROR_RECOVERY(1,
1604 " scsi_eh_%d" 1549 printk("Error handler scsi_eh_%d sleeping\n",
1605 " sleeping\n", 1550 shost->host_no));
1606 shost->host_no));
1607 schedule(); 1551 schedule();
1608 set_current_state(TASK_INTERRUPTIBLE); 1552 set_current_state(TASK_INTERRUPTIBLE);
1609 continue; 1553 continue;
1610 } 1554 }
1611 1555
1612 __set_current_state(TASK_RUNNING); 1556 __set_current_state(TASK_RUNNING);
1613 SCSI_LOG_ERROR_RECOVERY(1, printk("Error handler" 1557 SCSI_LOG_ERROR_RECOVERY(1,
1614 " scsi_eh_%d waking" 1558 printk("Error handler scsi_eh_%d waking up\n",
1615 " up\n",shost->host_no)); 1559 shost->host_no));
1616
1617 shost->eh_active = 1;
1618 1560
1619 /* 1561 /*
1620 * We have a host that is failing for some reason. Figure out 1562 * We have a host that is failing for some reason. Figure out
@@ -1622,12 +1564,10 @@ int scsi_error_handler(void *data)
1622 * If we fail, we end up taking the thing offline. 1564 * If we fail, we end up taking the thing offline.
1623 */ 1565 */
1624 if (shost->hostt->eh_strategy_handler) 1566 if (shost->hostt->eh_strategy_handler)
1625 rtn = shost->hostt->eh_strategy_handler(shost); 1567 shost->hostt->eh_strategy_handler(shost);
1626 else 1568 else
1627 scsi_unjam_host(shost); 1569 scsi_unjam_host(shost);
1628 1570
1629 shost->eh_active = 0;
1630
1631 /* 1571 /*
1632 * Note - if the above fails completely, the action is to take 1572 * Note - if the above fails completely, the action is to take
1633 * individual devices offline and flush the queue of any 1573 * individual devices offline and flush the queue of any
@@ -1638,15 +1578,10 @@ int scsi_error_handler(void *data)
1638 scsi_restart_operations(shost); 1578 scsi_restart_operations(shost);
1639 set_current_state(TASK_INTERRUPTIBLE); 1579 set_current_state(TASK_INTERRUPTIBLE);
1640 } 1580 }
1641
1642 __set_current_state(TASK_RUNNING); 1581 __set_current_state(TASK_RUNNING);
1643 1582
1644 SCSI_LOG_ERROR_RECOVERY(1, printk("Error handler scsi_eh_%d" 1583 SCSI_LOG_ERROR_RECOVERY(1,
1645 " exiting\n",shost->host_no)); 1584 printk("Error handler scsi_eh_%d exiting\n", shost->host_no));
1646
1647 /*
1648 * Make sure that nobody tries to wake us up again.
1649 */
1650 shost->ehandler = NULL; 1585 shost->ehandler = NULL;
1651 return 0; 1586 return 0;
1652} 1587}