aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorStephen M. Cameron <scameron@beardog.cce.hp.com>2011-05-03 15:53:52 -0400
committerJens Axboe <jaxboe@fusionio.com>2011-05-06 10:23:56 -0400
commit5afe278114a8dd9480813377c75b5e40a42c5066 (patch)
tree40f9546c8c0ecd6fcfb05d2174c336cc657b0d10
parentbf2e2e6b87ae38fab460a36abfe272d99ae8be49 (diff)
cciss: do soft reset if hard reset is broken
on driver load, if reset_devices is set, and the hard reset attempts fail, try to bring up the controller to the point that a command can be sent, and send it a soft reset command, then after the reset undo whatever driver initialization was done to get it to the point to take a command, and re-do it after the reset. This is to get kdump to work on all the "non-resettable" controllers (except 64xx controllers which can't be reset due to the potentially shared cache module.) Signed-off-by: Stephen M. Cameron <scameron@beardog.cce.hp.com> Signed-off-by: Jens Axboe <jaxboe@fusionio.com>
-rw-r--r--drivers/block/cciss.c236
1 files changed, 221 insertions, 15 deletions
diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c
index df58c59d9031..23b0ba49300a 100644
--- a/drivers/block/cciss.c
+++ b/drivers/block/cciss.c
@@ -2477,6 +2477,31 @@ static int deregister_disk(ctlr_info_t *h, int drv_index,
2477 return 0; 2477 return 0;
2478} 2478}
2479 2479
2480static int __devinit cciss_send_reset(ctlr_info_t *h, unsigned char *scsi3addr,
2481 u8 reset_type)
2482{
2483 CommandList_struct *c;
2484 int return_status;
2485
2486 c = cmd_alloc(h);
2487 if (!c)
2488 return -ENOMEM;
2489 return_status = fill_cmd(h, c, CCISS_RESET_MSG, NULL, 0, 0,
2490 CTLR_LUNID, TYPE_MSG);
2491 c->Request.CDB[1] = reset_type; /* fill_cmd defaults to target reset */
2492 if (return_status != IO_OK) {
2493 cmd_special_free(h, c);
2494 return return_status;
2495 }
2496 c->waiting = NULL;
2497 enqueue_cmd_and_start_io(h, c);
2498 /* Don't wait for completion, the reset won't complete. Don't free
2499 * the command either. This is the last command we will send before
2500 * re-initializing everything, so it doesn't matter and won't leak.
2501 */
2502 return 0;
2503}
2504
2480static int fill_cmd(ctlr_info_t *h, CommandList_struct *c, __u8 cmd, void *buff, 2505static int fill_cmd(ctlr_info_t *h, CommandList_struct *c, __u8 cmd, void *buff,
2481 size_t size, __u8 page_code, unsigned char *scsi3addr, 2506 size_t size, __u8 page_code, unsigned char *scsi3addr,
2482 int cmd_type) 2507 int cmd_type)
@@ -3463,6 +3488,63 @@ static inline u32 process_nonindexed_cmd(ctlr_info_t *h, u32 raw_tag)
3463 return next_command(h); 3488 return next_command(h);
3464} 3489}
3465 3490
3491/* Some controllers, like p400, will give us one interrupt
3492 * after a soft reset, even if we turned interrupts off.
3493 * Only need to check for this in the cciss_xxx_discard_completions
3494 * functions.
3495 */
3496static int ignore_bogus_interrupt(ctlr_info_t *h)
3497{
3498 if (likely(!reset_devices))
3499 return 0;
3500
3501 if (likely(h->interrupts_enabled))
3502 return 0;
3503
3504 dev_info(&h->pdev->dev, "Received interrupt while interrupts disabled "
3505 "(known firmware bug.) Ignoring.\n");
3506
3507 return 1;
3508}
3509
3510static irqreturn_t cciss_intx_discard_completions(int irq, void *dev_id)
3511{
3512 ctlr_info_t *h = dev_id;
3513 unsigned long flags;
3514 u32 raw_tag;
3515
3516 if (ignore_bogus_interrupt(h))
3517 return IRQ_NONE;
3518
3519 if (interrupt_not_for_us(h))
3520 return IRQ_NONE;
3521 spin_lock_irqsave(&h->lock, flags);
3522 while (interrupt_pending(h)) {
3523 raw_tag = get_next_completion(h);
3524 while (raw_tag != FIFO_EMPTY)
3525 raw_tag = next_command(h);
3526 }
3527 spin_unlock_irqrestore(&h->lock, flags);
3528 return IRQ_HANDLED;
3529}
3530
3531static irqreturn_t cciss_msix_discard_completions(int irq, void *dev_id)
3532{
3533 ctlr_info_t *h = dev_id;
3534 unsigned long flags;
3535 u32 raw_tag;
3536
3537 if (ignore_bogus_interrupt(h))
3538 return IRQ_NONE;
3539
3540 spin_lock_irqsave(&h->lock, flags);
3541 raw_tag = get_next_completion(h);
3542 while (raw_tag != FIFO_EMPTY)
3543 raw_tag = next_command(h);
3544 spin_unlock_irqrestore(&h->lock, flags);
3545 return IRQ_HANDLED;
3546}
3547
3466static irqreturn_t do_cciss_intx(int irq, void *dev_id) 3548static irqreturn_t do_cciss_intx(int irq, void *dev_id)
3467{ 3549{
3468 ctlr_info_t *h = dev_id; 3550 ctlr_info_t *h = dev_id;
@@ -4380,7 +4462,6 @@ static __devinit int cciss_message(struct pci_dev *pdev, unsigned char opcode, u
4380 return 0; 4462 return 0;
4381} 4463}
4382 4464
4383#define cciss_soft_reset_controller(p) cciss_message(p, 1, 0)
4384#define cciss_noop(p) cciss_message(p, 3, 0) 4465#define cciss_noop(p) cciss_message(p, 3, 0)
4385 4466
4386static int cciss_controller_hard_reset(struct pci_dev *pdev, 4467static int cciss_controller_hard_reset(struct pci_dev *pdev,
@@ -4591,13 +4672,17 @@ static __devinit int cciss_kdump_hard_reset_controller(struct pci_dev *pdev)
4591 /* Wait for board to become not ready, then ready. */ 4672 /* Wait for board to become not ready, then ready. */
4592 dev_info(&pdev->dev, "Waiting for board to reset.\n"); 4673 dev_info(&pdev->dev, "Waiting for board to reset.\n");
4593 rc = cciss_wait_for_board_state(pdev, vaddr, BOARD_NOT_READY); 4674 rc = cciss_wait_for_board_state(pdev, vaddr, BOARD_NOT_READY);
4594 if (rc) /* Don't bail, might be E500, etc. which can't be reset */ 4675 if (rc) {
4595 dev_warn(&pdev->dev, 4676 dev_warn(&pdev->dev, "Failed waiting for board to hard reset."
4596 "failed waiting for board to reset\n"); 4677 " Will try soft reset.\n");
4678 rc = -ENOTSUPP; /* Not expected, but try soft reset later */
4679 goto unmap_cfgtable;
4680 }
4597 rc = cciss_wait_for_board_state(pdev, vaddr, BOARD_READY); 4681 rc = cciss_wait_for_board_state(pdev, vaddr, BOARD_READY);
4598 if (rc) { 4682 if (rc) {
4599 dev_warn(&pdev->dev, 4683 dev_warn(&pdev->dev,
4600 "failed waiting for board to become ready\n"); 4684 "failed waiting for board to become ready "
4685 "after hard reset\n");
4601 goto unmap_cfgtable; 4686 goto unmap_cfgtable;
4602 } 4687 }
4603 4688
@@ -4605,16 +4690,13 @@ static __devinit int cciss_kdump_hard_reset_controller(struct pci_dev *pdev)
4605 if (rc < 0) 4690 if (rc < 0)
4606 goto unmap_cfgtable; 4691 goto unmap_cfgtable;
4607 if (rc) { 4692 if (rc) {
4608 dev_warn(&pdev->dev, "Unable to successfully reset controller," 4693 dev_warn(&pdev->dev, "Unable to successfully hard reset "
4609 " Ignoring controller.\n"); 4694 "controller. Will try soft reset.\n");
4610 rc = -ENODEV; 4695 rc = -ENOTSUPP; /* Not expected, but try soft reset later */
4611 goto unmap_cfgtable;
4612 } else { 4696 } else {
4613 dev_info(&pdev->dev, "board ready.\n"); 4697 dev_info(&pdev->dev, "Board ready after hard reset.\n");
4614 } 4698 }
4615 4699
4616 dev_info(&pdev->dev, "board ready.\n");
4617
4618unmap_cfgtable: 4700unmap_cfgtable:
4619 iounmap(cfgtable); 4701 iounmap(cfgtable);
4620 4702
@@ -4639,7 +4721,7 @@ static __devinit int cciss_init_reset_devices(struct pci_dev *pdev)
4639 * due to concerns about shared bbwc between 6402/6404 pair. 4721 * due to concerns about shared bbwc between 6402/6404 pair.
4640 */ 4722 */
4641 if (rc == -ENOTSUPP) 4723 if (rc == -ENOTSUPP)
4642 return 0; /* just try to do the kdump anyhow. */ 4724 return rc; /* just try to do the kdump anyhow. */
4643 if (rc) 4725 if (rc)
4644 return -ENODEV; 4726 return -ENODEV;
4645 4727
@@ -4745,6 +4827,60 @@ static int cciss_request_irq(ctlr_info_t *h,
4745 return -1; 4827 return -1;
4746} 4828}
4747 4829
4830static int __devinit cciss_kdump_soft_reset(ctlr_info_t *h)
4831{
4832 if (cciss_send_reset(h, CTLR_LUNID, CCISS_RESET_TYPE_CONTROLLER)) {
4833 dev_warn(&h->pdev->dev, "Resetting array controller failed.\n");
4834 return -EIO;
4835 }
4836
4837 dev_info(&h->pdev->dev, "Waiting for board to soft reset.\n");
4838 if (cciss_wait_for_board_state(h->pdev, h->vaddr, BOARD_NOT_READY)) {
4839 dev_warn(&h->pdev->dev, "Soft reset had no effect.\n");
4840 return -1;
4841 }
4842
4843 dev_info(&h->pdev->dev, "Board reset, awaiting READY status.\n");
4844 if (cciss_wait_for_board_state(h->pdev, h->vaddr, BOARD_READY)) {
4845 dev_warn(&h->pdev->dev, "Board failed to become ready "
4846 "after soft reset.\n");
4847 return -1;
4848 }
4849
4850 return 0;
4851}
4852
4853static void cciss_undo_allocations_after_kdump_soft_reset(ctlr_info_t *h)
4854{
4855 int ctlr = h->ctlr;
4856
4857 free_irq(h->intr[PERF_MODE_INT], h);
4858#ifdef CONFIG_PCI_MSI
4859 if (h->msix_vector)
4860 pci_disable_msix(h->pdev);
4861 else if (h->msi_vector)
4862 pci_disable_msi(h->pdev);
4863#endif /* CONFIG_PCI_MSI */
4864 cciss_free_sg_chain_blocks(h->cmd_sg_list, h->nr_cmds);
4865 cciss_free_scatterlists(h);
4866 cciss_free_cmd_pool(h);
4867 kfree(h->blockFetchTable);
4868 if (h->reply_pool)
4869 pci_free_consistent(h->pdev, h->max_commands * sizeof(__u64),
4870 h->reply_pool, h->reply_pool_dhandle);
4871 if (h->transtable)
4872 iounmap(h->transtable);
4873 if (h->cfgtable)
4874 iounmap(h->cfgtable);
4875 if (h->vaddr)
4876 iounmap(h->vaddr);
4877 unregister_blkdev(h->major, h->devname);
4878 cciss_destroy_hba_sysfs_entry(h);
4879 pci_release_regions(h->pdev);
4880 kfree(h);
4881 hba[ctlr] = NULL;
4882}
4883
4748/* 4884/*
4749 * This is it. Find all the controllers and register them. I really hate 4885 * This is it. Find all the controllers and register them. I really hate
4750 * stealing all these major device numbers. 4886 * stealing all these major device numbers.
@@ -4756,13 +4892,27 @@ static int __devinit cciss_init_one(struct pci_dev *pdev,
4756 int i; 4892 int i;
4757 int j = 0; 4893 int j = 0;
4758 int rc; 4894 int rc;
4895 int try_soft_reset = 0;
4759 int dac, return_code; 4896 int dac, return_code;
4760 InquiryData_struct *inq_buff; 4897 InquiryData_struct *inq_buff;
4761 ctlr_info_t *h; 4898 ctlr_info_t *h;
4899 unsigned long flags;
4762 4900
4763 rc = cciss_init_reset_devices(pdev); 4901 rc = cciss_init_reset_devices(pdev);
4764 if (rc) 4902 if (rc) {
4765 return rc; 4903 if (rc != -ENOTSUPP)
4904 return rc;
4905 /* If the reset fails in a particular way (it has no way to do
4906 * a proper hard reset, so returns -ENOTSUPP) we can try to do
4907 * a soft reset once we get the controller configured up to the
4908 * point that it can accept a command.
4909 */
4910 try_soft_reset = 1;
4911 rc = 0;
4912 }
4913
4914reinit_after_soft_reset:
4915
4766 i = alloc_cciss_hba(pdev); 4916 i = alloc_cciss_hba(pdev);
4767 if (i < 0) 4917 if (i < 0)
4768 return -1; 4918 return -1;
@@ -4852,6 +5002,62 @@ static int __devinit cciss_init_one(struct pci_dev *pdev,
4852 h->gendisk[j] = NULL; 5002 h->gendisk[j] = NULL;
4853 } 5003 }
4854 5004
5005 /* At this point, the controller is ready to take commands.
5006 * Now, if reset_devices and the hard reset didn't work, try
5007 * the soft reset and see if that works.
5008 */
5009 if (try_soft_reset) {
5010
5011 /* This is kind of gross. We may or may not get a completion
5012 * from the soft reset command, and if we do, then the value
5013 * from the fifo may or may not be valid. So, we wait 10 secs
5014 * after the reset throwing away any completions we get during
5015 * that time. Unregister the interrupt handler and register
5016 * fake ones to scoop up any residual completions.
5017 */
5018 spin_lock_irqsave(&h->lock, flags);
5019 h->access.set_intr_mask(h, CCISS_INTR_OFF);
5020 spin_unlock_irqrestore(&h->lock, flags);
5021 free_irq(h->intr[PERF_MODE_INT], h);
5022 rc = cciss_request_irq(h, cciss_msix_discard_completions,
5023 cciss_intx_discard_completions);
5024 if (rc) {
5025 dev_warn(&h->pdev->dev, "Failed to request_irq after "
5026 "soft reset.\n");
5027 goto clean4;
5028 }
5029
5030 rc = cciss_kdump_soft_reset(h);
5031 if (rc) {
5032 dev_warn(&h->pdev->dev, "Soft reset failed.\n");
5033 goto clean4;
5034 }
5035
5036 dev_info(&h->pdev->dev, "Board READY.\n");
5037 dev_info(&h->pdev->dev,
5038 "Waiting for stale completions to drain.\n");
5039 h->access.set_intr_mask(h, CCISS_INTR_ON);
5040 msleep(10000);
5041 h->access.set_intr_mask(h, CCISS_INTR_OFF);
5042
5043 rc = controller_reset_failed(h->cfgtable);
5044 if (rc)
5045 dev_info(&h->pdev->dev,
5046 "Soft reset appears to have failed.\n");
5047
5048 /* since the controller's reset, we have to go back and re-init
5049 * everything. Easiest to just forget what we've done and do it
5050 * all over again.
5051 */
5052 cciss_undo_allocations_after_kdump_soft_reset(h);
5053 try_soft_reset = 0;
5054 if (rc)
5055 /* don't go to clean4, we already unallocated */
5056 return -ENODEV;
5057
5058 goto reinit_after_soft_reset;
5059 }
5060
4855 cciss_scsi_setup(h); 5061 cciss_scsi_setup(h);
4856 5062
4857 /* Turn the interrupts on so we can service requests */ 5063 /* Turn the interrupts on so we can service requests */