diff options
author | Stephen M. Cameron <scameron@beardog.cce.hp.com> | 2011-05-03 15:53:52 -0400 |
---|---|---|
committer | Jens Axboe <jaxboe@fusionio.com> | 2011-05-06 10:23:56 -0400 |
commit | 5afe278114a8dd9480813377c75b5e40a42c5066 (patch) | |
tree | 40f9546c8c0ecd6fcfb05d2174c336cc657b0d10 | |
parent | bf2e2e6b87ae38fab460a36abfe272d99ae8be49 (diff) |
cciss: do soft reset if hard reset is broken
on driver load, if reset_devices is set, and the hard reset
attempts fail, try to bring up the controller to the point that
a command can be sent, and send it a soft reset command, then
after the reset undo whatever driver initialization was done to get
it to the point to take a command, and re-do it after the reset.
This is to get kdump to work on all the "non-resettable" controllers
(except 64xx controllers which can't be reset due to the potentially
shared cache module.)
Signed-off-by: Stephen M. Cameron <scameron@beardog.cce.hp.com>
Signed-off-by: Jens Axboe <jaxboe@fusionio.com>
-rw-r--r-- | drivers/block/cciss.c | 236 |
1 files changed, 221 insertions, 15 deletions
diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c index df58c59d9031..23b0ba49300a 100644 --- a/drivers/block/cciss.c +++ b/drivers/block/cciss.c | |||
@@ -2477,6 +2477,31 @@ static int deregister_disk(ctlr_info_t *h, int drv_index, | |||
2477 | return 0; | 2477 | return 0; |
2478 | } | 2478 | } |
2479 | 2479 | ||
2480 | static int __devinit cciss_send_reset(ctlr_info_t *h, unsigned char *scsi3addr, | ||
2481 | u8 reset_type) | ||
2482 | { | ||
2483 | CommandList_struct *c; | ||
2484 | int return_status; | ||
2485 | |||
2486 | c = cmd_alloc(h); | ||
2487 | if (!c) | ||
2488 | return -ENOMEM; | ||
2489 | return_status = fill_cmd(h, c, CCISS_RESET_MSG, NULL, 0, 0, | ||
2490 | CTLR_LUNID, TYPE_MSG); | ||
2491 | c->Request.CDB[1] = reset_type; /* fill_cmd defaults to target reset */ | ||
2492 | if (return_status != IO_OK) { | ||
2493 | cmd_special_free(h, c); | ||
2494 | return return_status; | ||
2495 | } | ||
2496 | c->waiting = NULL; | ||
2497 | enqueue_cmd_and_start_io(h, c); | ||
2498 | /* Don't wait for completion, the reset won't complete. Don't free | ||
2499 | * the command either. This is the last command we will send before | ||
2500 | * re-initializing everything, so it doesn't matter and won't leak. | ||
2501 | */ | ||
2502 | return 0; | ||
2503 | } | ||
2504 | |||
2480 | static int fill_cmd(ctlr_info_t *h, CommandList_struct *c, __u8 cmd, void *buff, | 2505 | static int fill_cmd(ctlr_info_t *h, CommandList_struct *c, __u8 cmd, void *buff, |
2481 | size_t size, __u8 page_code, unsigned char *scsi3addr, | 2506 | size_t size, __u8 page_code, unsigned char *scsi3addr, |
2482 | int cmd_type) | 2507 | int cmd_type) |
@@ -3463,6 +3488,63 @@ static inline u32 process_nonindexed_cmd(ctlr_info_t *h, u32 raw_tag) | |||
3463 | return next_command(h); | 3488 | return next_command(h); |
3464 | } | 3489 | } |
3465 | 3490 | ||
3491 | /* Some controllers, like p400, will give us one interrupt | ||
3492 | * after a soft reset, even if we turned interrupts off. | ||
3493 | * Only need to check for this in the cciss_xxx_discard_completions | ||
3494 | * functions. | ||
3495 | */ | ||
3496 | static int ignore_bogus_interrupt(ctlr_info_t *h) | ||
3497 | { | ||
3498 | if (likely(!reset_devices)) | ||
3499 | return 0; | ||
3500 | |||
3501 | if (likely(h->interrupts_enabled)) | ||
3502 | return 0; | ||
3503 | |||
3504 | dev_info(&h->pdev->dev, "Received interrupt while interrupts disabled " | ||
3505 | "(known firmware bug.) Ignoring.\n"); | ||
3506 | |||
3507 | return 1; | ||
3508 | } | ||
3509 | |||
3510 | static irqreturn_t cciss_intx_discard_completions(int irq, void *dev_id) | ||
3511 | { | ||
3512 | ctlr_info_t *h = dev_id; | ||
3513 | unsigned long flags; | ||
3514 | u32 raw_tag; | ||
3515 | |||
3516 | if (ignore_bogus_interrupt(h)) | ||
3517 | return IRQ_NONE; | ||
3518 | |||
3519 | if (interrupt_not_for_us(h)) | ||
3520 | return IRQ_NONE; | ||
3521 | spin_lock_irqsave(&h->lock, flags); | ||
3522 | while (interrupt_pending(h)) { | ||
3523 | raw_tag = get_next_completion(h); | ||
3524 | while (raw_tag != FIFO_EMPTY) | ||
3525 | raw_tag = next_command(h); | ||
3526 | } | ||
3527 | spin_unlock_irqrestore(&h->lock, flags); | ||
3528 | return IRQ_HANDLED; | ||
3529 | } | ||
3530 | |||
3531 | static irqreturn_t cciss_msix_discard_completions(int irq, void *dev_id) | ||
3532 | { | ||
3533 | ctlr_info_t *h = dev_id; | ||
3534 | unsigned long flags; | ||
3535 | u32 raw_tag; | ||
3536 | |||
3537 | if (ignore_bogus_interrupt(h)) | ||
3538 | return IRQ_NONE; | ||
3539 | |||
3540 | spin_lock_irqsave(&h->lock, flags); | ||
3541 | raw_tag = get_next_completion(h); | ||
3542 | while (raw_tag != FIFO_EMPTY) | ||
3543 | raw_tag = next_command(h); | ||
3544 | spin_unlock_irqrestore(&h->lock, flags); | ||
3545 | return IRQ_HANDLED; | ||
3546 | } | ||
3547 | |||
3466 | static irqreturn_t do_cciss_intx(int irq, void *dev_id) | 3548 | static irqreturn_t do_cciss_intx(int irq, void *dev_id) |
3467 | { | 3549 | { |
3468 | ctlr_info_t *h = dev_id; | 3550 | ctlr_info_t *h = dev_id; |
@@ -4380,7 +4462,6 @@ static __devinit int cciss_message(struct pci_dev *pdev, unsigned char opcode, u | |||
4380 | return 0; | 4462 | return 0; |
4381 | } | 4463 | } |
4382 | 4464 | ||
4383 | #define cciss_soft_reset_controller(p) cciss_message(p, 1, 0) | ||
4384 | #define cciss_noop(p) cciss_message(p, 3, 0) | 4465 | #define cciss_noop(p) cciss_message(p, 3, 0) |
4385 | 4466 | ||
4386 | static int cciss_controller_hard_reset(struct pci_dev *pdev, | 4467 | static int cciss_controller_hard_reset(struct pci_dev *pdev, |
@@ -4591,13 +4672,17 @@ static __devinit int cciss_kdump_hard_reset_controller(struct pci_dev *pdev) | |||
4591 | /* Wait for board to become not ready, then ready. */ | 4672 | /* Wait for board to become not ready, then ready. */ |
4592 | dev_info(&pdev->dev, "Waiting for board to reset.\n"); | 4673 | dev_info(&pdev->dev, "Waiting for board to reset.\n"); |
4593 | rc = cciss_wait_for_board_state(pdev, vaddr, BOARD_NOT_READY); | 4674 | rc = cciss_wait_for_board_state(pdev, vaddr, BOARD_NOT_READY); |
4594 | if (rc) /* Don't bail, might be E500, etc. which can't be reset */ | 4675 | if (rc) { |
4595 | dev_warn(&pdev->dev, | 4676 | dev_warn(&pdev->dev, "Failed waiting for board to hard reset." |
4596 | "failed waiting for board to reset\n"); | 4677 | " Will try soft reset.\n"); |
4678 | rc = -ENOTSUPP; /* Not expected, but try soft reset later */ | ||
4679 | goto unmap_cfgtable; | ||
4680 | } | ||
4597 | rc = cciss_wait_for_board_state(pdev, vaddr, BOARD_READY); | 4681 | rc = cciss_wait_for_board_state(pdev, vaddr, BOARD_READY); |
4598 | if (rc) { | 4682 | if (rc) { |
4599 | dev_warn(&pdev->dev, | 4683 | dev_warn(&pdev->dev, |
4600 | "failed waiting for board to become ready\n"); | 4684 | "failed waiting for board to become ready " |
4685 | "after hard reset\n"); | ||
4601 | goto unmap_cfgtable; | 4686 | goto unmap_cfgtable; |
4602 | } | 4687 | } |
4603 | 4688 | ||
@@ -4605,16 +4690,13 @@ static __devinit int cciss_kdump_hard_reset_controller(struct pci_dev *pdev) | |||
4605 | if (rc < 0) | 4690 | if (rc < 0) |
4606 | goto unmap_cfgtable; | 4691 | goto unmap_cfgtable; |
4607 | if (rc) { | 4692 | if (rc) { |
4608 | dev_warn(&pdev->dev, "Unable to successfully reset controller," | 4693 | dev_warn(&pdev->dev, "Unable to successfully hard reset " |
4609 | " Ignoring controller.\n"); | 4694 | "controller. Will try soft reset.\n"); |
4610 | rc = -ENODEV; | 4695 | rc = -ENOTSUPP; /* Not expected, but try soft reset later */ |
4611 | goto unmap_cfgtable; | ||
4612 | } else { | 4696 | } else { |
4613 | dev_info(&pdev->dev, "board ready.\n"); | 4697 | dev_info(&pdev->dev, "Board ready after hard reset.\n"); |
4614 | } | 4698 | } |
4615 | 4699 | ||
4616 | dev_info(&pdev->dev, "board ready.\n"); | ||
4617 | |||
4618 | unmap_cfgtable: | 4700 | unmap_cfgtable: |
4619 | iounmap(cfgtable); | 4701 | iounmap(cfgtable); |
4620 | 4702 | ||
@@ -4639,7 +4721,7 @@ static __devinit int cciss_init_reset_devices(struct pci_dev *pdev) | |||
4639 | * due to concerns about shared bbwc between 6402/6404 pair. | 4721 | * due to concerns about shared bbwc between 6402/6404 pair. |
4640 | */ | 4722 | */ |
4641 | if (rc == -ENOTSUPP) | 4723 | if (rc == -ENOTSUPP) |
4642 | return 0; /* just try to do the kdump anyhow. */ | 4724 | return rc; /* just try to do the kdump anyhow. */ |
4643 | if (rc) | 4725 | if (rc) |
4644 | return -ENODEV; | 4726 | return -ENODEV; |
4645 | 4727 | ||
@@ -4745,6 +4827,60 @@ static int cciss_request_irq(ctlr_info_t *h, | |||
4745 | return -1; | 4827 | return -1; |
4746 | } | 4828 | } |
4747 | 4829 | ||
4830 | static int __devinit cciss_kdump_soft_reset(ctlr_info_t *h) | ||
4831 | { | ||
4832 | if (cciss_send_reset(h, CTLR_LUNID, CCISS_RESET_TYPE_CONTROLLER)) { | ||
4833 | dev_warn(&h->pdev->dev, "Resetting array controller failed.\n"); | ||
4834 | return -EIO; | ||
4835 | } | ||
4836 | |||
4837 | dev_info(&h->pdev->dev, "Waiting for board to soft reset.\n"); | ||
4838 | if (cciss_wait_for_board_state(h->pdev, h->vaddr, BOARD_NOT_READY)) { | ||
4839 | dev_warn(&h->pdev->dev, "Soft reset had no effect.\n"); | ||
4840 | return -1; | ||
4841 | } | ||
4842 | |||
4843 | dev_info(&h->pdev->dev, "Board reset, awaiting READY status.\n"); | ||
4844 | if (cciss_wait_for_board_state(h->pdev, h->vaddr, BOARD_READY)) { | ||
4845 | dev_warn(&h->pdev->dev, "Board failed to become ready " | ||
4846 | "after soft reset.\n"); | ||
4847 | return -1; | ||
4848 | } | ||
4849 | |||
4850 | return 0; | ||
4851 | } | ||
4852 | |||
4853 | static void cciss_undo_allocations_after_kdump_soft_reset(ctlr_info_t *h) | ||
4854 | { | ||
4855 | int ctlr = h->ctlr; | ||
4856 | |||
4857 | free_irq(h->intr[PERF_MODE_INT], h); | ||
4858 | #ifdef CONFIG_PCI_MSI | ||
4859 | if (h->msix_vector) | ||
4860 | pci_disable_msix(h->pdev); | ||
4861 | else if (h->msi_vector) | ||
4862 | pci_disable_msi(h->pdev); | ||
4863 | #endif /* CONFIG_PCI_MSI */ | ||
4864 | cciss_free_sg_chain_blocks(h->cmd_sg_list, h->nr_cmds); | ||
4865 | cciss_free_scatterlists(h); | ||
4866 | cciss_free_cmd_pool(h); | ||
4867 | kfree(h->blockFetchTable); | ||
4868 | if (h->reply_pool) | ||
4869 | pci_free_consistent(h->pdev, h->max_commands * sizeof(__u64), | ||
4870 | h->reply_pool, h->reply_pool_dhandle); | ||
4871 | if (h->transtable) | ||
4872 | iounmap(h->transtable); | ||
4873 | if (h->cfgtable) | ||
4874 | iounmap(h->cfgtable); | ||
4875 | if (h->vaddr) | ||
4876 | iounmap(h->vaddr); | ||
4877 | unregister_blkdev(h->major, h->devname); | ||
4878 | cciss_destroy_hba_sysfs_entry(h); | ||
4879 | pci_release_regions(h->pdev); | ||
4880 | kfree(h); | ||
4881 | hba[ctlr] = NULL; | ||
4882 | } | ||
4883 | |||
4748 | /* | 4884 | /* |
4749 | * This is it. Find all the controllers and register them. I really hate | 4885 | * This is it. Find all the controllers and register them. I really hate |
4750 | * stealing all these major device numbers. | 4886 | * stealing all these major device numbers. |
@@ -4756,13 +4892,27 @@ static int __devinit cciss_init_one(struct pci_dev *pdev, | |||
4756 | int i; | 4892 | int i; |
4757 | int j = 0; | 4893 | int j = 0; |
4758 | int rc; | 4894 | int rc; |
4895 | int try_soft_reset = 0; | ||
4759 | int dac, return_code; | 4896 | int dac, return_code; |
4760 | InquiryData_struct *inq_buff; | 4897 | InquiryData_struct *inq_buff; |
4761 | ctlr_info_t *h; | 4898 | ctlr_info_t *h; |
4899 | unsigned long flags; | ||
4762 | 4900 | ||
4763 | rc = cciss_init_reset_devices(pdev); | 4901 | rc = cciss_init_reset_devices(pdev); |
4764 | if (rc) | 4902 | if (rc) { |
4765 | return rc; | 4903 | if (rc != -ENOTSUPP) |
4904 | return rc; | ||
4905 | /* If the reset fails in a particular way (it has no way to do | ||
4906 | * a proper hard reset, so returns -ENOTSUPP) we can try to do | ||
4907 | * a soft reset once we get the controller configured up to the | ||
4908 | * point that it can accept a command. | ||
4909 | */ | ||
4910 | try_soft_reset = 1; | ||
4911 | rc = 0; | ||
4912 | } | ||
4913 | |||
4914 | reinit_after_soft_reset: | ||
4915 | |||
4766 | i = alloc_cciss_hba(pdev); | 4916 | i = alloc_cciss_hba(pdev); |
4767 | if (i < 0) | 4917 | if (i < 0) |
4768 | return -1; | 4918 | return -1; |
@@ -4852,6 +5002,62 @@ static int __devinit cciss_init_one(struct pci_dev *pdev, | |||
4852 | h->gendisk[j] = NULL; | 5002 | h->gendisk[j] = NULL; |
4853 | } | 5003 | } |
4854 | 5004 | ||
5005 | /* At this point, the controller is ready to take commands. | ||
5006 | * Now, if reset_devices and the hard reset didn't work, try | ||
5007 | * the soft reset and see if that works. | ||
5008 | */ | ||
5009 | if (try_soft_reset) { | ||
5010 | |||
5011 | /* This is kind of gross. We may or may not get a completion | ||
5012 | * from the soft reset command, and if we do, then the value | ||
5013 | * from the fifo may or may not be valid. So, we wait 10 secs | ||
5014 | * after the reset throwing away any completions we get during | ||
5015 | * that time. Unregister the interrupt handler and register | ||
5016 | * fake ones to scoop up any residual completions. | ||
5017 | */ | ||
5018 | spin_lock_irqsave(&h->lock, flags); | ||
5019 | h->access.set_intr_mask(h, CCISS_INTR_OFF); | ||
5020 | spin_unlock_irqrestore(&h->lock, flags); | ||
5021 | free_irq(h->intr[PERF_MODE_INT], h); | ||
5022 | rc = cciss_request_irq(h, cciss_msix_discard_completions, | ||
5023 | cciss_intx_discard_completions); | ||
5024 | if (rc) { | ||
5025 | dev_warn(&h->pdev->dev, "Failed to request_irq after " | ||
5026 | "soft reset.\n"); | ||
5027 | goto clean4; | ||
5028 | } | ||
5029 | |||
5030 | rc = cciss_kdump_soft_reset(h); | ||
5031 | if (rc) { | ||
5032 | dev_warn(&h->pdev->dev, "Soft reset failed.\n"); | ||
5033 | goto clean4; | ||
5034 | } | ||
5035 | |||
5036 | dev_info(&h->pdev->dev, "Board READY.\n"); | ||
5037 | dev_info(&h->pdev->dev, | ||
5038 | "Waiting for stale completions to drain.\n"); | ||
5039 | h->access.set_intr_mask(h, CCISS_INTR_ON); | ||
5040 | msleep(10000); | ||
5041 | h->access.set_intr_mask(h, CCISS_INTR_OFF); | ||
5042 | |||
5043 | rc = controller_reset_failed(h->cfgtable); | ||
5044 | if (rc) | ||
5045 | dev_info(&h->pdev->dev, | ||
5046 | "Soft reset appears to have failed.\n"); | ||
5047 | |||
5048 | /* since the controller's reset, we have to go back and re-init | ||
5049 | * everything. Easiest to just forget what we've done and do it | ||
5050 | * all over again. | ||
5051 | */ | ||
5052 | cciss_undo_allocations_after_kdump_soft_reset(h); | ||
5053 | try_soft_reset = 0; | ||
5054 | if (rc) | ||
5055 | /* don't go to clean4, we already unallocated */ | ||
5056 | return -ENODEV; | ||
5057 | |||
5058 | goto reinit_after_soft_reset; | ||
5059 | } | ||
5060 | |||
4855 | cciss_scsi_setup(h); | 5061 | cciss_scsi_setup(h); |
4856 | 5062 | ||
4857 | /* Turn the interrupts on so we can service requests */ | 5063 | /* Turn the interrupts on so we can service requests */ |