diff options
Diffstat (limited to 'drivers/scsi/scsi_error.c')
-rw-r--r-- | drivers/scsi/scsi_error.c | 157 |
1 files changed, 46 insertions, 111 deletions
diff --git a/drivers/scsi/scsi_error.c b/drivers/scsi/scsi_error.c index 0c5b02d4c7f8..18c5d2523014 100644 --- a/drivers/scsi/scsi_error.c +++ b/drivers/scsi/scsi_error.c | |||
@@ -417,43 +417,15 @@ static int scsi_eh_completed_normally(struct scsi_cmnd *scmd) | |||
417 | } | 417 | } |
418 | 418 | ||
419 | /** | 419 | /** |
420 | * scsi_eh_times_out - timeout function for error handling. | ||
421 | * @scmd: Cmd that is timing out. | ||
422 | * | ||
423 | * Notes: | ||
424 | * During error handling, the kernel thread will be sleeping waiting | ||
425 | * for some action to complete on the device. our only job is to | ||
426 | * record that it timed out, and to wake up the thread. | ||
427 | **/ | ||
428 | static void scsi_eh_times_out(struct scsi_cmnd *scmd) | ||
429 | { | ||
430 | scmd->eh_eflags |= SCSI_EH_REC_TIMEOUT; | ||
431 | SCSI_LOG_ERROR_RECOVERY(3, printk("%s: scmd:%p\n", __FUNCTION__, | ||
432 | scmd)); | ||
433 | |||
434 | up(scmd->device->host->eh_action); | ||
435 | } | ||
436 | |||
437 | /** | ||
438 | * scsi_eh_done - Completion function for error handling. | 420 | * scsi_eh_done - Completion function for error handling. |
439 | * @scmd: Cmd that is done. | 421 | * @scmd: Cmd that is done. |
440 | **/ | 422 | **/ |
441 | static void scsi_eh_done(struct scsi_cmnd *scmd) | 423 | static void scsi_eh_done(struct scsi_cmnd *scmd) |
442 | { | 424 | { |
443 | /* | 425 | SCSI_LOG_ERROR_RECOVERY(3, |
444 | * if the timeout handler is already running, then just set the | 426 | printk("%s scmd: %p result: %x\n", |
445 | * flag which says we finished late, and return. we have no | 427 | __FUNCTION__, scmd, scmd->result)); |
446 | * way of stopping the timeout handler from running, so we must | 428 | complete(scmd->device->host->eh_action); |
447 | * always defer to it. | ||
448 | */ | ||
449 | if (del_timer(&scmd->eh_timeout)) { | ||
450 | scmd->request->rq_status = RQ_SCSI_DONE; | ||
451 | |||
452 | SCSI_LOG_ERROR_RECOVERY(3, printk("%s scmd: %p result: %x\n", | ||
453 | __FUNCTION__, scmd, scmd->result)); | ||
454 | |||
455 | up(scmd->device->host->eh_action); | ||
456 | } | ||
457 | } | 429 | } |
458 | 430 | ||
459 | /** | 431 | /** |
@@ -461,10 +433,6 @@ static void scsi_eh_done(struct scsi_cmnd *scmd) | |||
461 | * @scmd: SCSI Cmd to send. | 433 | * @scmd: SCSI Cmd to send. |
462 | * @timeout: Timeout for cmd. | 434 | * @timeout: Timeout for cmd. |
463 | * | 435 | * |
464 | * Notes: | ||
465 | * The initialization of the structures is quite a bit different in | ||
466 | * this case, and furthermore, there is a different completion handler | ||
467 | * vs scsi_dispatch_cmd. | ||
468 | * Return value: | 436 | * Return value: |
469 | * SUCCESS or FAILED or NEEDS_RETRY | 437 | * SUCCESS or FAILED or NEEDS_RETRY |
470 | **/ | 438 | **/ |
@@ -472,24 +440,16 @@ static int scsi_send_eh_cmnd(struct scsi_cmnd *scmd, int timeout) | |||
472 | { | 440 | { |
473 | struct scsi_device *sdev = scmd->device; | 441 | struct scsi_device *sdev = scmd->device; |
474 | struct Scsi_Host *shost = sdev->host; | 442 | struct Scsi_Host *shost = sdev->host; |
475 | DECLARE_MUTEX_LOCKED(sem); | 443 | DECLARE_COMPLETION(done); |
444 | unsigned long timeleft; | ||
476 | unsigned long flags; | 445 | unsigned long flags; |
477 | int rtn = SUCCESS; | 446 | int rtn; |
478 | 447 | ||
479 | /* | ||
480 | * we will use a queued command if possible, otherwise we will | ||
481 | * emulate the queuing and calling of completion function ourselves. | ||
482 | */ | ||
483 | if (sdev->scsi_level <= SCSI_2) | 448 | if (sdev->scsi_level <= SCSI_2) |
484 | scmd->cmnd[1] = (scmd->cmnd[1] & 0x1f) | | 449 | scmd->cmnd[1] = (scmd->cmnd[1] & 0x1f) | |
485 | (sdev->lun << 5 & 0xe0); | 450 | (sdev->lun << 5 & 0xe0); |
486 | 451 | ||
487 | scsi_add_timer(scmd, timeout, scsi_eh_times_out); | 452 | shost->eh_action = &done; |
488 | |||
489 | /* | ||
490 | * set up the semaphore so we wait for the command to complete. | ||
491 | */ | ||
492 | shost->eh_action = &sem; | ||
493 | scmd->request->rq_status = RQ_SCSI_BUSY; | 453 | scmd->request->rq_status = RQ_SCSI_BUSY; |
494 | 454 | ||
495 | spin_lock_irqsave(shost->host_lock, flags); | 455 | spin_lock_irqsave(shost->host_lock, flags); |
@@ -497,47 +457,29 @@ static int scsi_send_eh_cmnd(struct scsi_cmnd *scmd, int timeout) | |||
497 | shost->hostt->queuecommand(scmd, scsi_eh_done); | 457 | shost->hostt->queuecommand(scmd, scsi_eh_done); |
498 | spin_unlock_irqrestore(shost->host_lock, flags); | 458 | spin_unlock_irqrestore(shost->host_lock, flags); |
499 | 459 | ||
500 | down(&sem); | 460 | timeleft = wait_for_completion_timeout(&done, timeout); |
501 | scsi_log_completion(scmd, SUCCESS); | ||
502 | 461 | ||
462 | scmd->request->rq_status = RQ_SCSI_DONE; | ||
503 | shost->eh_action = NULL; | 463 | shost->eh_action = NULL; |
504 | 464 | ||
505 | /* | 465 | scsi_log_completion(scmd, SUCCESS); |
506 | * see if timeout. if so, tell the host to forget about it. | ||
507 | * in other words, we don't want a callback any more. | ||
508 | */ | ||
509 | if (scmd->eh_eflags & SCSI_EH_REC_TIMEOUT) { | ||
510 | scmd->eh_eflags &= ~SCSI_EH_REC_TIMEOUT; | ||
511 | |||
512 | /* | ||
513 | * as far as the low level driver is | ||
514 | * concerned, this command is still active, so | ||
515 | * we must give the low level driver a chance | ||
516 | * to abort it. (db) | ||
517 | * | ||
518 | * FIXME(eric) - we are not tracking whether we could | ||
519 | * abort a timed out command or not. not sure how | ||
520 | * we should treat them differently anyways. | ||
521 | */ | ||
522 | if (shost->hostt->eh_abort_handler) | ||
523 | shost->hostt->eh_abort_handler(scmd); | ||
524 | |||
525 | scmd->request->rq_status = RQ_SCSI_DONE; | ||
526 | rtn = FAILED; | ||
527 | } | ||
528 | 466 | ||
529 | SCSI_LOG_ERROR_RECOVERY(3, printk("%s: scmd: %p, rtn:%x\n", | 467 | SCSI_LOG_ERROR_RECOVERY(3, |
530 | __FUNCTION__, scmd, rtn)); | 468 | printk("%s: scmd: %p, timeleft: %ld\n", |
469 | __FUNCTION__, scmd, timeleft)); | ||
531 | 470 | ||
532 | /* | 471 | /* |
533 | * now examine the actual status codes to see whether the command | 472 | * If there is time left scsi_eh_done got called, and we will |
534 | * actually did complete normally. | 473 | * examine the actual status codes to see whether the command |
474 | * actually did complete normally, else tell the host to forget | ||
475 | * about this command. | ||
535 | */ | 476 | */ |
536 | if (rtn == SUCCESS) { | 477 | if (timeleft) { |
537 | rtn = scsi_eh_completed_normally(scmd); | 478 | rtn = scsi_eh_completed_normally(scmd); |
538 | SCSI_LOG_ERROR_RECOVERY(3, | 479 | SCSI_LOG_ERROR_RECOVERY(3, |
539 | printk("%s: scsi_eh_completed_normally %x\n", | 480 | printk("%s: scsi_eh_completed_normally %x\n", |
540 | __FUNCTION__, rtn)); | 481 | __FUNCTION__, rtn)); |
482 | |||
541 | switch (rtn) { | 483 | switch (rtn) { |
542 | case SUCCESS: | 484 | case SUCCESS: |
543 | case NEEDS_RETRY: | 485 | case NEEDS_RETRY: |
@@ -547,6 +489,15 @@ static int scsi_send_eh_cmnd(struct scsi_cmnd *scmd, int timeout) | |||
547 | rtn = FAILED; | 489 | rtn = FAILED; |
548 | break; | 490 | break; |
549 | } | 491 | } |
492 | } else { | ||
493 | /* | ||
494 | * FIXME(eric) - we are not tracking whether we could | ||
495 | * abort a timed out command or not. not sure how | ||
496 | * we should treat them differently anyways. | ||
497 | */ | ||
498 | if (shost->hostt->eh_abort_handler) | ||
499 | shost->hostt->eh_abort_handler(scmd); | ||
500 | rtn = FAILED; | ||
550 | } | 501 | } |
551 | 502 | ||
552 | return rtn; | 503 | return rtn; |
@@ -1571,50 +1522,41 @@ static void scsi_unjam_host(struct Scsi_Host *shost) | |||
1571 | } | 1522 | } |
1572 | 1523 | ||
1573 | /** | 1524 | /** |
1574 | * scsi_error_handler - Handle errors/timeouts of SCSI cmds. | 1525 | * scsi_error_handler - SCSI error handler thread |
1575 | * @data: Host for which we are running. | 1526 | * @data: Host for which we are running. |
1576 | * | 1527 | * |
1577 | * Notes: | 1528 | * Notes: |
1578 | * This is always run in the context of a kernel thread. The idea is | 1529 | * This is the main error handling loop. This is run as a kernel thread |
1579 | * that we start this thing up when the kernel starts up (one per host | 1530 | * for every SCSI host and handles all error handling activity. |
1580 | * that we detect), and it immediately goes to sleep and waits for some | ||
1581 | * event (i.e. failure). When this takes place, we have the job of | ||
1582 | * trying to unjam the bus and restarting things. | ||
1583 | **/ | 1531 | **/ |
1584 | int scsi_error_handler(void *data) | 1532 | int scsi_error_handler(void *data) |
1585 | { | 1533 | { |
1586 | struct Scsi_Host *shost = (struct Scsi_Host *) data; | 1534 | struct Scsi_Host *shost = data; |
1587 | int rtn; | ||
1588 | 1535 | ||
1589 | current->flags |= PF_NOFREEZE; | 1536 | current->flags |= PF_NOFREEZE; |
1590 | 1537 | ||
1591 | |||
1592 | /* | 1538 | /* |
1593 | * Note - we always use TASK_INTERRUPTIBLE even if the module | 1539 | * We use TASK_INTERRUPTIBLE so that the thread is not |
1594 | * was loaded as part of the kernel. The reason is that | 1540 | * counted against the load average as a running process. |
1595 | * UNINTERRUPTIBLE would cause this thread to be counted in | 1541 | * We never actually get interrupted because kthread_run |
1596 | * the load average as a running process, and an interruptible | 1542 | * disables singal delivery for the created thread. |
1597 | * wait doesn't. | ||
1598 | */ | 1543 | */ |
1599 | set_current_state(TASK_INTERRUPTIBLE); | 1544 | set_current_state(TASK_INTERRUPTIBLE); |
1600 | while (!kthread_should_stop()) { | 1545 | while (!kthread_should_stop()) { |
1601 | if (shost->host_failed == 0 || | 1546 | if (shost->host_failed == 0 || |
1602 | shost->host_failed != shost->host_busy) { | 1547 | shost->host_failed != shost->host_busy) { |
1603 | SCSI_LOG_ERROR_RECOVERY(1, printk("Error handler" | 1548 | SCSI_LOG_ERROR_RECOVERY(1, |
1604 | " scsi_eh_%d" | 1549 | printk("Error handler scsi_eh_%d sleeping\n", |
1605 | " sleeping\n", | 1550 | shost->host_no)); |
1606 | shost->host_no)); | ||
1607 | schedule(); | 1551 | schedule(); |
1608 | set_current_state(TASK_INTERRUPTIBLE); | 1552 | set_current_state(TASK_INTERRUPTIBLE); |
1609 | continue; | 1553 | continue; |
1610 | } | 1554 | } |
1611 | 1555 | ||
1612 | __set_current_state(TASK_RUNNING); | 1556 | __set_current_state(TASK_RUNNING); |
1613 | SCSI_LOG_ERROR_RECOVERY(1, printk("Error handler" | 1557 | SCSI_LOG_ERROR_RECOVERY(1, |
1614 | " scsi_eh_%d waking" | 1558 | printk("Error handler scsi_eh_%d waking up\n", |
1615 | " up\n",shost->host_no)); | 1559 | shost->host_no)); |
1616 | |||
1617 | shost->eh_active = 1; | ||
1618 | 1560 | ||
1619 | /* | 1561 | /* |
1620 | * We have a host that is failing for some reason. Figure out | 1562 | * We have a host that is failing for some reason. Figure out |
@@ -1622,12 +1564,10 @@ int scsi_error_handler(void *data) | |||
1622 | * If we fail, we end up taking the thing offline. | 1564 | * If we fail, we end up taking the thing offline. |
1623 | */ | 1565 | */ |
1624 | if (shost->hostt->eh_strategy_handler) | 1566 | if (shost->hostt->eh_strategy_handler) |
1625 | rtn = shost->hostt->eh_strategy_handler(shost); | 1567 | shost->hostt->eh_strategy_handler(shost); |
1626 | else | 1568 | else |
1627 | scsi_unjam_host(shost); | 1569 | scsi_unjam_host(shost); |
1628 | 1570 | ||
1629 | shost->eh_active = 0; | ||
1630 | |||
1631 | /* | 1571 | /* |
1632 | * Note - if the above fails completely, the action is to take | 1572 | * Note - if the above fails completely, the action is to take |
1633 | * individual devices offline and flush the queue of any | 1573 | * individual devices offline and flush the queue of any |
@@ -1638,15 +1578,10 @@ int scsi_error_handler(void *data) | |||
1638 | scsi_restart_operations(shost); | 1578 | scsi_restart_operations(shost); |
1639 | set_current_state(TASK_INTERRUPTIBLE); | 1579 | set_current_state(TASK_INTERRUPTIBLE); |
1640 | } | 1580 | } |
1641 | |||
1642 | __set_current_state(TASK_RUNNING); | 1581 | __set_current_state(TASK_RUNNING); |
1643 | 1582 | ||
1644 | SCSI_LOG_ERROR_RECOVERY(1, printk("Error handler scsi_eh_%d" | 1583 | SCSI_LOG_ERROR_RECOVERY(1, |
1645 | " exiting\n",shost->host_no)); | 1584 | printk("Error handler scsi_eh_%d exiting\n", shost->host_no)); |
1646 | |||
1647 | /* | ||
1648 | * Make sure that nobody tries to wake us up again. | ||
1649 | */ | ||
1650 | shost->ehandler = NULL; | 1585 | shost->ehandler = NULL; |
1651 | return 0; | 1586 | return 0; |
1652 | } | 1587 | } |