aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDarrick J. Wong <djwong@us.ibm.com>2007-01-26 17:08:52 -0500
committerJames Bottomley <jejb@mulgrave.il.steeleye.com>2007-01-27 11:06:51 -0500
commitad689233bee854dced741c91aff12a8771a22f6f (patch)
treeae1a786c97c779c16d5e10ac17efdb24c72aa5ae
parentdca84e4694419adf61ad052b1e5a50ac82726597 (diff)
[SCSI] libsas: Handle SCSI commands that complete with failure codes
This patch moves the code that handles SAS failures out of the main EH function and into a separate function. It also detects commands that have no sas_task (i.e. they completed, but with error data) and sends them into scsi_error for processing. This allows us to handle SCSI errors (and enables auto-spinup as a side effect) instead of dropping them on the floor and falling into an infinite loop. It also requires the implementation of a device reset function, which the SAS failure code has been modified to employ for REQ_DEVICE_RESET. Signed-off-by: Darrick J. Wong <djwong@us.ibm.com> Signed-off-by: James Bottomley <James.Bottomley@SteelEye.com>
-rw-r--r--drivers/scsi/libsas/sas_scsi_host.c148
-rw-r--r--include/scsi/libsas.h1
2 files changed, 105 insertions, 44 deletions
diff --git a/drivers/scsi/libsas/sas_scsi_host.c b/drivers/scsi/libsas/sas_scsi_host.c
index 9ffe7605fb9d..eac1d2dde743 100644
--- a/drivers/scsi/libsas/sas_scsi_host.c
+++ b/drivers/scsi/libsas/sas_scsi_host.c
@@ -34,6 +34,7 @@
34#include <scsi/scsi_transport_sas.h> 34#include <scsi/scsi_transport_sas.h>
35#include "../scsi_sas_internal.h" 35#include "../scsi_sas_internal.h"
36#include "../scsi_transport_api.h" 36#include "../scsi_transport_api.h"
37#include "../scsi_priv.h"
37 38
38#include <linux/err.h> 39#include <linux/err.h>
39#include <linux/blkdev.h> 40#include <linux/blkdev.h>
@@ -396,54 +397,80 @@ static int sas_recover_I_T(struct domain_device *dev)
396 return res; 397 return res;
397} 398}
398 399
399static int eh_reset_phy_helper(struct sas_phy *phy) 400/* Find the sas_phy that's attached to this device */
401struct sas_phy *find_local_sas_phy(struct domain_device *dev)
400{ 402{
401 int tmf_resp; 403 struct domain_device *pdev = dev->parent;
404 struct ex_phy *exphy = NULL;
405 int i;
406
407 /* Directly attached device */
408 if (!pdev)
409 return dev->port->phy;
402 410
403 tmf_resp = sas_phy_reset(phy, 1); 411 /* Otherwise look in the expander */
404 if (tmf_resp) 412 for (i = 0; i < pdev->ex_dev.num_phys; i++)
405 SAS_DPRINTK("Hard reset of phy %d failed 0x%x\n", 413 if (!memcmp(dev->sas_addr,
406 phy->identify.phy_identifier, 414 pdev->ex_dev.ex_phy[i].attached_sas_addr,
407 tmf_resp); 415 SAS_ADDR_SIZE)) {
416 exphy = &pdev->ex_dev.ex_phy[i];
417 break;
418 }
408 419
409 return tmf_resp; 420 BUG_ON(!exphy);
421 return exphy->phy;
410} 422}
411 423
412void sas_scsi_recover_host(struct Scsi_Host *shost) 424/* Attempt to send a target reset message to a device */
425int sas_eh_device_reset_handler(struct scsi_cmnd *cmd)
426{
427 struct domain_device *dev = cmd_to_domain_dev(cmd);
428 struct sas_phy *phy = find_local_sas_phy(dev);
429 int res;
430
431 res = sas_phy_reset(phy, 1);
432 if (res)
433 SAS_DPRINTK("Device reset of %s failed 0x%x\n",
434 phy->dev.kobj.k_name,
435 res);
436 if (res == TMF_RESP_FUNC_SUCC || res == TMF_RESP_FUNC_COMPLETE)
437 return SUCCESS;
438
439 return FAILED;
440}
441
442/* Try to reset a device */
443static int try_to_reset_cmd_device(struct Scsi_Host *shost,
444 struct scsi_cmnd *cmd)
445{
446 if (!shost->hostt->eh_device_reset_handler)
447 return FAILED;
448
449 return shost->hostt->eh_device_reset_handler(cmd);
450}
451
452static int sas_eh_handle_sas_errors(struct Scsi_Host *shost,
453 struct list_head *work_q,
454 struct list_head *done_q)
413{ 455{
414 struct sas_ha_struct *ha = SHOST_TO_SAS_HA(shost);
415 unsigned long flags;
416 LIST_HEAD(error_q);
417 struct scsi_cmnd *cmd, *n; 456 struct scsi_cmnd *cmd, *n;
418 enum task_disposition res = TASK_IS_DONE; 457 enum task_disposition res = TASK_IS_DONE;
419 int tmf_resp, need_reset; 458 int tmf_resp, need_reset;
420 struct sas_internal *i = to_sas_internal(shost->transportt); 459 struct sas_internal *i = to_sas_internal(shost->transportt);
421 struct sas_phy *task_sas_phy = NULL; 460 unsigned long flags;
422 461 struct sas_ha_struct *ha = SHOST_TO_SAS_HA(shost);
423 spin_lock_irqsave(shost->host_lock, flags);
424 list_splice_init(&shost->eh_cmd_q, &error_q);
425 spin_unlock_irqrestore(shost->host_lock, flags);
426
427 SAS_DPRINTK("Enter %s\n", __FUNCTION__);
428 462
429 /* All tasks on this list were marked SAS_TASK_STATE_ABORTED
430 * by sas_scsi_timed_out() callback.
431 */
432Again: 463Again:
433 SAS_DPRINTK("going over list...\n"); 464 list_for_each_entry_safe(cmd, n, work_q, eh_entry) {
434 list_for_each_entry_safe(cmd, n, &error_q, eh_entry) {
435 struct sas_task *task = TO_SAS_TASK(cmd); 465 struct sas_task *task = TO_SAS_TASK(cmd);
436 list_del_init(&cmd->eh_entry);
437 466
438 if (!task) { 467 if (!task)
439 SAS_DPRINTK("%s: taskless cmd?!\n", __FUNCTION__);
440 continue; 468 continue;
441 } 469
470 list_del_init(&cmd->eh_entry);
442 471
443 spin_lock_irqsave(&task->task_state_lock, flags); 472 spin_lock_irqsave(&task->task_state_lock, flags);
444 need_reset = task->task_state_flags & SAS_TASK_NEED_DEV_RESET; 473 need_reset = task->task_state_flags & SAS_TASK_NEED_DEV_RESET;
445 if (need_reset)
446 task_sas_phy = task->dev->port->phy;
447 spin_unlock_irqrestore(&task->task_state_lock, flags); 474 spin_unlock_irqrestore(&task->task_state_lock, flags);
448 475
449 SAS_DPRINTK("trying to find task 0x%p\n", task); 476 SAS_DPRINTK("trying to find task 0x%p\n", task);
@@ -457,14 +484,14 @@ Again:
457 task); 484 task);
458 task->task_done(task); 485 task->task_done(task);
459 if (need_reset) 486 if (need_reset)
460 eh_reset_phy_helper(task_sas_phy); 487 try_to_reset_cmd_device(shost, cmd);
461 continue; 488 continue;
462 case TASK_IS_ABORTED: 489 case TASK_IS_ABORTED:
463 SAS_DPRINTK("%s: task 0x%p is aborted\n", 490 SAS_DPRINTK("%s: task 0x%p is aborted\n",
464 __FUNCTION__, task); 491 __FUNCTION__, task);
465 task->task_done(task); 492 task->task_done(task);
466 if (need_reset) 493 if (need_reset)
467 eh_reset_phy_helper(task_sas_phy); 494 try_to_reset_cmd_device(shost, cmd);
468 continue; 495 continue;
469 case TASK_IS_AT_LU: 496 case TASK_IS_AT_LU:
470 SAS_DPRINTK("task 0x%p is at LU: lu recover\n", task); 497 SAS_DPRINTK("task 0x%p is at LU: lu recover\n", task);
@@ -476,8 +503,8 @@ Again:
476 cmd->device->lun); 503 cmd->device->lun);
477 task->task_done(task); 504 task->task_done(task);
478 if (need_reset) 505 if (need_reset)
479 eh_reset_phy_helper(task_sas_phy); 506 try_to_reset_cmd_device(shost, cmd);
480 sas_scsi_clear_queue_lu(&error_q, cmd); 507 sas_scsi_clear_queue_lu(work_q, cmd);
481 goto Again; 508 goto Again;
482 } 509 }
483 /* fallthrough */ 510 /* fallthrough */
@@ -491,8 +518,8 @@ Again:
491 SAS_ADDR(task->dev->sas_addr)); 518 SAS_ADDR(task->dev->sas_addr));
492 task->task_done(task); 519 task->task_done(task);
493 if (need_reset) 520 if (need_reset)
494 eh_reset_phy_helper(task_sas_phy); 521 try_to_reset_cmd_device(shost, cmd);
495 sas_scsi_clear_queue_I_T(&error_q, task->dev); 522 sas_scsi_clear_queue_I_T(work_q, task->dev);
496 goto Again; 523 goto Again;
497 } 524 }
498 /* Hammer time :-) */ 525 /* Hammer time :-) */
@@ -506,8 +533,8 @@ Again:
506 "succeeded\n", port->id); 533 "succeeded\n", port->id);
507 task->task_done(task); 534 task->task_done(task);
508 if (need_reset) 535 if (need_reset)
509 eh_reset_phy_helper(task_sas_phy); 536 try_to_reset_cmd_device(shost, cmd);
510 sas_scsi_clear_queue_port(&error_q, 537 sas_scsi_clear_queue_port(work_q,
511 port); 538 port);
512 goto Again; 539 goto Again;
513 } 540 }
@@ -520,7 +547,7 @@ Again:
520 "succeeded\n"); 547 "succeeded\n");
521 task->task_done(task); 548 task->task_done(task);
522 if (need_reset) 549 if (need_reset)
523 eh_reset_phy_helper(task_sas_phy); 550 try_to_reset_cmd_device(shost, cmd);
524 goto out; 551 goto out;
525 } 552 }
526 } 553 }
@@ -535,21 +562,53 @@ Again:
535 562
536 task->task_done(task); 563 task->task_done(task);
537 if (need_reset) 564 if (need_reset)
538 eh_reset_phy_helper(task_sas_phy); 565 try_to_reset_cmd_device(shost, cmd);
539 goto clear_q; 566 goto clear_q;
540 } 567 }
541 } 568 }
542out: 569out:
543 scsi_eh_flush_done_q(&ha->eh_done_q); 570 return list_empty(work_q);
544 SAS_DPRINTK("--- Exit %s\n", __FUNCTION__);
545 return;
546clear_q: 571clear_q:
547 SAS_DPRINTK("--- Exit %s -- clear_q\n", __FUNCTION__); 572 SAS_DPRINTK("--- Exit %s -- clear_q\n", __FUNCTION__);
548 list_for_each_entry_safe(cmd, n, &error_q, eh_entry) { 573 list_for_each_entry_safe(cmd, n, work_q, eh_entry) {
549 struct sas_task *task = TO_SAS_TASK(cmd); 574 struct sas_task *task = TO_SAS_TASK(cmd);
550 list_del_init(&cmd->eh_entry); 575 list_del_init(&cmd->eh_entry);
551 task->task_done(task); 576 task->task_done(task);
552 } 577 }
578 return list_empty(work_q);
579}
580
581void sas_scsi_recover_host(struct Scsi_Host *shost)
582{
583 struct sas_ha_struct *ha = SHOST_TO_SAS_HA(shost);
584 unsigned long flags;
585 LIST_HEAD(eh_work_q);
586
587 spin_lock_irqsave(shost->host_lock, flags);
588 list_splice_init(&shost->eh_cmd_q, &eh_work_q);
589 spin_unlock_irqrestore(shost->host_lock, flags);
590
591 SAS_DPRINTK("Enter %s\n", __FUNCTION__);
592 /*
593 * Deal with commands that still have SAS tasks (i.e. they didn't
594 * complete via the normal sas_task completion mechanism)
595 */
596 if (sas_eh_handle_sas_errors(shost, &eh_work_q, &ha->eh_done_q))
597 goto out;
598
599 /*
600 * Now deal with SCSI commands that completed ok but have a an error
601 * code (and hopefully sense data) attached. This is roughly what
602 * scsi_unjam_host does, but we skip scsi_eh_abort_cmds because any
603 * command we see here has no sas_task and is thus unknown to the HA.
604 */
605 if (!scsi_eh_get_sense(&eh_work_q, &ha->eh_done_q))
606 scsi_eh_ready_devs(shost, &eh_work_q, &ha->eh_done_q);
607
608out:
609 scsi_eh_flush_done_q(&ha->eh_done_q);
610 SAS_DPRINTK("--- Exit %s\n", __FUNCTION__);
611 return;
553} 612}
554 613
555enum scsi_eh_timer_return sas_scsi_timed_out(struct scsi_cmnd *cmd) 614enum scsi_eh_timer_return sas_scsi_timed_out(struct scsi_cmnd *cmd)
@@ -914,3 +973,4 @@ EXPORT_SYMBOL_GPL(__sas_task_abort);
914EXPORT_SYMBOL_GPL(sas_task_abort); 973EXPORT_SYMBOL_GPL(sas_task_abort);
915EXPORT_SYMBOL_GPL(sas_phy_reset); 974EXPORT_SYMBOL_GPL(sas_phy_reset);
916EXPORT_SYMBOL_GPL(sas_phy_enable); 975EXPORT_SYMBOL_GPL(sas_phy_enable);
976EXPORT_SYMBOL_GPL(sas_eh_device_reset_handler);
diff --git a/include/scsi/libsas.h b/include/scsi/libsas.h
index ca393929c10b..b200233cc6f4 100644
--- a/include/scsi/libsas.h
+++ b/include/scsi/libsas.h
@@ -660,5 +660,6 @@ void sas_init_dev(struct domain_device *);
660 660
661void sas_task_abort(struct sas_task *); 661void sas_task_abort(struct sas_task *);
662int __sas_task_abort(struct sas_task *); 662int __sas_task_abort(struct sas_task *);
663int sas_eh_device_reset_handler(struct scsi_cmnd *cmd);
663 664
664#endif /* _SASLIB_H_ */ 665#endif /* _SASLIB_H_ */