diff options
author | Darrick J. Wong <djwong@us.ibm.com> | 2007-01-26 17:08:52 -0500 |
---|---|---|
committer | James Bottomley <jejb@mulgrave.il.steeleye.com> | 2007-01-27 11:06:51 -0500 |
commit | ad689233bee854dced741c91aff12a8771a22f6f (patch) | |
tree | ae1a786c97c779c16d5e10ac17efdb24c72aa5ae /drivers/scsi | |
parent | dca84e4694419adf61ad052b1e5a50ac82726597 (diff) |
[SCSI] libsas: Handle SCSI commands that complete with failure codes
This patch moves the code that handles SAS failures out of the main EH
function and into a separate function. It also detects commands that have
no sas_task (i.e. they completed, but with error data) and sends them into
scsi_error for processing. This allows us to handle SCSI errors (and
enables auto-spinup as a side effect) instead of dropping them on the
floor and falling into an infinite loop. It also requires the
implementation of a device reset function, which the SAS failure code has
been modified to employ for REQ_DEVICE_RESET.
Signed-off-by: Darrick J. Wong <djwong@us.ibm.com>
Signed-off-by: James Bottomley <James.Bottomley@SteelEye.com>
Diffstat (limited to 'drivers/scsi')
-rw-r--r-- | drivers/scsi/libsas/sas_scsi_host.c | 148 |
1 files changed, 104 insertions, 44 deletions
diff --git a/drivers/scsi/libsas/sas_scsi_host.c b/drivers/scsi/libsas/sas_scsi_host.c index 9ffe7605fb9d..eac1d2dde743 100644 --- a/drivers/scsi/libsas/sas_scsi_host.c +++ b/drivers/scsi/libsas/sas_scsi_host.c | |||
@@ -34,6 +34,7 @@ | |||
34 | #include <scsi/scsi_transport_sas.h> | 34 | #include <scsi/scsi_transport_sas.h> |
35 | #include "../scsi_sas_internal.h" | 35 | #include "../scsi_sas_internal.h" |
36 | #include "../scsi_transport_api.h" | 36 | #include "../scsi_transport_api.h" |
37 | #include "../scsi_priv.h" | ||
37 | 38 | ||
38 | #include <linux/err.h> | 39 | #include <linux/err.h> |
39 | #include <linux/blkdev.h> | 40 | #include <linux/blkdev.h> |
@@ -396,54 +397,80 @@ static int sas_recover_I_T(struct domain_device *dev) | |||
396 | return res; | 397 | return res; |
397 | } | 398 | } |
398 | 399 | ||
399 | static int eh_reset_phy_helper(struct sas_phy *phy) | 400 | /* Find the sas_phy that's attached to this device */ |
401 | struct sas_phy *find_local_sas_phy(struct domain_device *dev) | ||
400 | { | 402 | { |
401 | int tmf_resp; | 403 | struct domain_device *pdev = dev->parent; |
404 | struct ex_phy *exphy = NULL; | ||
405 | int i; | ||
406 | |||
407 | /* Directly attached device */ | ||
408 | if (!pdev) | ||
409 | return dev->port->phy; | ||
402 | 410 | ||
403 | tmf_resp = sas_phy_reset(phy, 1); | 411 | /* Otherwise look in the expander */ |
404 | if (tmf_resp) | 412 | for (i = 0; i < pdev->ex_dev.num_phys; i++) |
405 | SAS_DPRINTK("Hard reset of phy %d failed 0x%x\n", | 413 | if (!memcmp(dev->sas_addr, |
406 | phy->identify.phy_identifier, | 414 | pdev->ex_dev.ex_phy[i].attached_sas_addr, |
407 | tmf_resp); | 415 | SAS_ADDR_SIZE)) { |
416 | exphy = &pdev->ex_dev.ex_phy[i]; | ||
417 | break; | ||
418 | } | ||
408 | 419 | ||
409 | return tmf_resp; | 420 | BUG_ON(!exphy); |
421 | return exphy->phy; | ||
410 | } | 422 | } |
411 | 423 | ||
412 | void sas_scsi_recover_host(struct Scsi_Host *shost) | 424 | /* Attempt to send a target reset message to a device */ |
425 | int sas_eh_device_reset_handler(struct scsi_cmnd *cmd) | ||
426 | { | ||
427 | struct domain_device *dev = cmd_to_domain_dev(cmd); | ||
428 | struct sas_phy *phy = find_local_sas_phy(dev); | ||
429 | int res; | ||
430 | |||
431 | res = sas_phy_reset(phy, 1); | ||
432 | if (res) | ||
433 | SAS_DPRINTK("Device reset of %s failed 0x%x\n", | ||
434 | phy->dev.kobj.k_name, | ||
435 | res); | ||
436 | if (res == TMF_RESP_FUNC_SUCC || res == TMF_RESP_FUNC_COMPLETE) | ||
437 | return SUCCESS; | ||
438 | |||
439 | return FAILED; | ||
440 | } | ||
441 | |||
442 | /* Try to reset a device */ | ||
443 | static int try_to_reset_cmd_device(struct Scsi_Host *shost, | ||
444 | struct scsi_cmnd *cmd) | ||
445 | { | ||
446 | if (!shost->hostt->eh_device_reset_handler) | ||
447 | return FAILED; | ||
448 | |||
449 | return shost->hostt->eh_device_reset_handler(cmd); | ||
450 | } | ||
451 | |||
452 | static int sas_eh_handle_sas_errors(struct Scsi_Host *shost, | ||
453 | struct list_head *work_q, | ||
454 | struct list_head *done_q) | ||
413 | { | 455 | { |
414 | struct sas_ha_struct *ha = SHOST_TO_SAS_HA(shost); | ||
415 | unsigned long flags; | ||
416 | LIST_HEAD(error_q); | ||
417 | struct scsi_cmnd *cmd, *n; | 456 | struct scsi_cmnd *cmd, *n; |
418 | enum task_disposition res = TASK_IS_DONE; | 457 | enum task_disposition res = TASK_IS_DONE; |
419 | int tmf_resp, need_reset; | 458 | int tmf_resp, need_reset; |
420 | struct sas_internal *i = to_sas_internal(shost->transportt); | 459 | struct sas_internal *i = to_sas_internal(shost->transportt); |
421 | struct sas_phy *task_sas_phy = NULL; | 460 | unsigned long flags; |
422 | 461 | struct sas_ha_struct *ha = SHOST_TO_SAS_HA(shost); | |
423 | spin_lock_irqsave(shost->host_lock, flags); | ||
424 | list_splice_init(&shost->eh_cmd_q, &error_q); | ||
425 | spin_unlock_irqrestore(shost->host_lock, flags); | ||
426 | |||
427 | SAS_DPRINTK("Enter %s\n", __FUNCTION__); | ||
428 | 462 | ||
429 | /* All tasks on this list were marked SAS_TASK_STATE_ABORTED | ||
430 | * by sas_scsi_timed_out() callback. | ||
431 | */ | ||
432 | Again: | 463 | Again: |
433 | SAS_DPRINTK("going over list...\n"); | 464 | list_for_each_entry_safe(cmd, n, work_q, eh_entry) { |
434 | list_for_each_entry_safe(cmd, n, &error_q, eh_entry) { | ||
435 | struct sas_task *task = TO_SAS_TASK(cmd); | 465 | struct sas_task *task = TO_SAS_TASK(cmd); |
436 | list_del_init(&cmd->eh_entry); | ||
437 | 466 | ||
438 | if (!task) { | 467 | if (!task) |
439 | SAS_DPRINTK("%s: taskless cmd?!\n", __FUNCTION__); | ||
440 | continue; | 468 | continue; |
441 | } | 469 | |
470 | list_del_init(&cmd->eh_entry); | ||
442 | 471 | ||
443 | spin_lock_irqsave(&task->task_state_lock, flags); | 472 | spin_lock_irqsave(&task->task_state_lock, flags); |
444 | need_reset = task->task_state_flags & SAS_TASK_NEED_DEV_RESET; | 473 | need_reset = task->task_state_flags & SAS_TASK_NEED_DEV_RESET; |
445 | if (need_reset) | ||
446 | task_sas_phy = task->dev->port->phy; | ||
447 | spin_unlock_irqrestore(&task->task_state_lock, flags); | 474 | spin_unlock_irqrestore(&task->task_state_lock, flags); |
448 | 475 | ||
449 | SAS_DPRINTK("trying to find task 0x%p\n", task); | 476 | SAS_DPRINTK("trying to find task 0x%p\n", task); |
@@ -457,14 +484,14 @@ Again: | |||
457 | task); | 484 | task); |
458 | task->task_done(task); | 485 | task->task_done(task); |
459 | if (need_reset) | 486 | if (need_reset) |
460 | eh_reset_phy_helper(task_sas_phy); | 487 | try_to_reset_cmd_device(shost, cmd); |
461 | continue; | 488 | continue; |
462 | case TASK_IS_ABORTED: | 489 | case TASK_IS_ABORTED: |
463 | SAS_DPRINTK("%s: task 0x%p is aborted\n", | 490 | SAS_DPRINTK("%s: task 0x%p is aborted\n", |
464 | __FUNCTION__, task); | 491 | __FUNCTION__, task); |
465 | task->task_done(task); | 492 | task->task_done(task); |
466 | if (need_reset) | 493 | if (need_reset) |
467 | eh_reset_phy_helper(task_sas_phy); | 494 | try_to_reset_cmd_device(shost, cmd); |
468 | continue; | 495 | continue; |
469 | case TASK_IS_AT_LU: | 496 | case TASK_IS_AT_LU: |
470 | SAS_DPRINTK("task 0x%p is at LU: lu recover\n", task); | 497 | SAS_DPRINTK("task 0x%p is at LU: lu recover\n", task); |
@@ -476,8 +503,8 @@ Again: | |||
476 | cmd->device->lun); | 503 | cmd->device->lun); |
477 | task->task_done(task); | 504 | task->task_done(task); |
478 | if (need_reset) | 505 | if (need_reset) |
479 | eh_reset_phy_helper(task_sas_phy); | 506 | try_to_reset_cmd_device(shost, cmd); |
480 | sas_scsi_clear_queue_lu(&error_q, cmd); | 507 | sas_scsi_clear_queue_lu(work_q, cmd); |
481 | goto Again; | 508 | goto Again; |
482 | } | 509 | } |
483 | /* fallthrough */ | 510 | /* fallthrough */ |
@@ -491,8 +518,8 @@ Again: | |||
491 | SAS_ADDR(task->dev->sas_addr)); | 518 | SAS_ADDR(task->dev->sas_addr)); |
492 | task->task_done(task); | 519 | task->task_done(task); |
493 | if (need_reset) | 520 | if (need_reset) |
494 | eh_reset_phy_helper(task_sas_phy); | 521 | try_to_reset_cmd_device(shost, cmd); |
495 | sas_scsi_clear_queue_I_T(&error_q, task->dev); | 522 | sas_scsi_clear_queue_I_T(work_q, task->dev); |
496 | goto Again; | 523 | goto Again; |
497 | } | 524 | } |
498 | /* Hammer time :-) */ | 525 | /* Hammer time :-) */ |
@@ -506,8 +533,8 @@ Again: | |||
506 | "succeeded\n", port->id); | 533 | "succeeded\n", port->id); |
507 | task->task_done(task); | 534 | task->task_done(task); |
508 | if (need_reset) | 535 | if (need_reset) |
509 | eh_reset_phy_helper(task_sas_phy); | 536 | try_to_reset_cmd_device(shost, cmd); |
510 | sas_scsi_clear_queue_port(&error_q, | 537 | sas_scsi_clear_queue_port(work_q, |
511 | port); | 538 | port); |
512 | goto Again; | 539 | goto Again; |
513 | } | 540 | } |
@@ -520,7 +547,7 @@ Again: | |||
520 | "succeeded\n"); | 547 | "succeeded\n"); |
521 | task->task_done(task); | 548 | task->task_done(task); |
522 | if (need_reset) | 549 | if (need_reset) |
523 | eh_reset_phy_helper(task_sas_phy); | 550 | try_to_reset_cmd_device(shost, cmd); |
524 | goto out; | 551 | goto out; |
525 | } | 552 | } |
526 | } | 553 | } |
@@ -535,21 +562,53 @@ Again: | |||
535 | 562 | ||
536 | task->task_done(task); | 563 | task->task_done(task); |
537 | if (need_reset) | 564 | if (need_reset) |
538 | eh_reset_phy_helper(task_sas_phy); | 565 | try_to_reset_cmd_device(shost, cmd); |
539 | goto clear_q; | 566 | goto clear_q; |
540 | } | 567 | } |
541 | } | 568 | } |
542 | out: | 569 | out: |
543 | scsi_eh_flush_done_q(&ha->eh_done_q); | 570 | return list_empty(work_q); |
544 | SAS_DPRINTK("--- Exit %s\n", __FUNCTION__); | ||
545 | return; | ||
546 | clear_q: | 571 | clear_q: |
547 | SAS_DPRINTK("--- Exit %s -- clear_q\n", __FUNCTION__); | 572 | SAS_DPRINTK("--- Exit %s -- clear_q\n", __FUNCTION__); |
548 | list_for_each_entry_safe(cmd, n, &error_q, eh_entry) { | 573 | list_for_each_entry_safe(cmd, n, work_q, eh_entry) { |
549 | struct sas_task *task = TO_SAS_TASK(cmd); | 574 | struct sas_task *task = TO_SAS_TASK(cmd); |
550 | list_del_init(&cmd->eh_entry); | 575 | list_del_init(&cmd->eh_entry); |
551 | task->task_done(task); | 576 | task->task_done(task); |
552 | } | 577 | } |
578 | return list_empty(work_q); | ||
579 | } | ||
580 | |||
581 | void sas_scsi_recover_host(struct Scsi_Host *shost) | ||
582 | { | ||
583 | struct sas_ha_struct *ha = SHOST_TO_SAS_HA(shost); | ||
584 | unsigned long flags; | ||
585 | LIST_HEAD(eh_work_q); | ||
586 | |||
587 | spin_lock_irqsave(shost->host_lock, flags); | ||
588 | list_splice_init(&shost->eh_cmd_q, &eh_work_q); | ||
589 | spin_unlock_irqrestore(shost->host_lock, flags); | ||
590 | |||
591 | SAS_DPRINTK("Enter %s\n", __FUNCTION__); | ||
592 | /* | ||
593 | * Deal with commands that still have SAS tasks (i.e. they didn't | ||
594 | * complete via the normal sas_task completion mechanism) | ||
595 | */ | ||
596 | if (sas_eh_handle_sas_errors(shost, &eh_work_q, &ha->eh_done_q)) | ||
597 | goto out; | ||
598 | |||
599 | /* | ||
600 | * Now deal with SCSI commands that completed ok but have a an error | ||
601 | * code (and hopefully sense data) attached. This is roughly what | ||
602 | * scsi_unjam_host does, but we skip scsi_eh_abort_cmds because any | ||
603 | * command we see here has no sas_task and is thus unknown to the HA. | ||
604 | */ | ||
605 | if (!scsi_eh_get_sense(&eh_work_q, &ha->eh_done_q)) | ||
606 | scsi_eh_ready_devs(shost, &eh_work_q, &ha->eh_done_q); | ||
607 | |||
608 | out: | ||
609 | scsi_eh_flush_done_q(&ha->eh_done_q); | ||
610 | SAS_DPRINTK("--- Exit %s\n", __FUNCTION__); | ||
611 | return; | ||
553 | } | 612 | } |
554 | 613 | ||
555 | enum scsi_eh_timer_return sas_scsi_timed_out(struct scsi_cmnd *cmd) | 614 | enum scsi_eh_timer_return sas_scsi_timed_out(struct scsi_cmnd *cmd) |
@@ -914,3 +973,4 @@ EXPORT_SYMBOL_GPL(__sas_task_abort); | |||
914 | EXPORT_SYMBOL_GPL(sas_task_abort); | 973 | EXPORT_SYMBOL_GPL(sas_task_abort); |
915 | EXPORT_SYMBOL_GPL(sas_phy_reset); | 974 | EXPORT_SYMBOL_GPL(sas_phy_reset); |
916 | EXPORT_SYMBOL_GPL(sas_phy_enable); | 975 | EXPORT_SYMBOL_GPL(sas_phy_enable); |
976 | EXPORT_SYMBOL_GPL(sas_eh_device_reset_handler); | ||