diff options
author | Stephen M. Cameron <scameron@beardog.cce.hp.com> | 2011-05-03 15:59:51 -0400 |
---|---|---|
committer | James Bottomley <jbottomley@parallels.com> | 2011-05-17 03:07:28 -0400 |
commit | 64670ac8702ec37a00ad6e479f3cacbde0fd4efa (patch) | |
tree | 6b116d86149c21b24386bf34537d672ba1550fb7 /drivers/scsi/hpsa.c | |
parent | cf0b08d0cd87ada9d284925834d08fb8026da888 (diff) |
[SCSI] hpsa: do soft reset if hard reset is broken
on driver load, if reset_devices is set, and the hard reset
attempts fail, try to bring up the controller to the point that
a command can be sent, and send it a soft reset command, then
after the reset undo whatever driver initialization was done to get
it to the point to take a command, and re-do it after the reset.
This is to get kdump to work on all the "non-resettable" controllers
(except 64xx controllers which can't be reset due to the potentially
shared cache module.)
Signed-off-by: Stephen M. Cameron <scameron@beardog.cce.hp.com>
Signed-off-by: James Bottomley <jbottomley@parallels.com>
Signed-off-by: James Bottomley <James.Bottomley@suse.de>
Diffstat (limited to 'drivers/scsi/hpsa.c')
-rw-r--r-- | drivers/scsi/hpsa.c | 226 |
1 files changed, 212 insertions, 14 deletions
diff --git a/drivers/scsi/hpsa.c b/drivers/scsi/hpsa.c index c096cda3a6f4..6fe77d0575ca 100644 --- a/drivers/scsi/hpsa.c +++ b/drivers/scsi/hpsa.c | |||
@@ -2743,6 +2743,26 @@ static int hpsa_ioctl(struct scsi_device *dev, int cmd, void *arg) | |||
2743 | } | 2743 | } |
2744 | } | 2744 | } |
2745 | 2745 | ||
2746 | static int __devinit hpsa_send_host_reset(struct ctlr_info *h, | ||
2747 | unsigned char *scsi3addr, u8 reset_type) | ||
2748 | { | ||
2749 | struct CommandList *c; | ||
2750 | |||
2751 | c = cmd_alloc(h); | ||
2752 | if (!c) | ||
2753 | return -ENOMEM; | ||
2754 | fill_cmd(c, HPSA_DEVICE_RESET_MSG, h, NULL, 0, 0, | ||
2755 | RAID_CTLR_LUNID, TYPE_MSG); | ||
2756 | c->Request.CDB[1] = reset_type; /* fill_cmd defaults to target reset */ | ||
2757 | c->waiting = NULL; | ||
2758 | enqueue_cmd_and_start_io(h, c); | ||
2759 | /* Don't wait for completion, the reset won't complete. Don't free | ||
2760 | * the command either. This is the last command we will send before | ||
2761 | * re-initializing everything, so it doesn't matter and won't leak. | ||
2762 | */ | ||
2763 | return 0; | ||
2764 | } | ||
2765 | |||
2746 | static void fill_cmd(struct CommandList *c, u8 cmd, struct ctlr_info *h, | 2766 | static void fill_cmd(struct CommandList *c, u8 cmd, struct ctlr_info *h, |
2747 | void *buff, size_t size, u8 page_code, unsigned char *scsi3addr, | 2767 | void *buff, size_t size, u8 page_code, unsigned char *scsi3addr, |
2748 | int cmd_type) | 2768 | int cmd_type) |
@@ -2820,7 +2840,8 @@ static void fill_cmd(struct CommandList *c, u8 cmd, struct ctlr_info *h, | |||
2820 | c->Request.Type.Attribute = ATTR_SIMPLE; | 2840 | c->Request.Type.Attribute = ATTR_SIMPLE; |
2821 | c->Request.Type.Direction = XFER_NONE; | 2841 | c->Request.Type.Direction = XFER_NONE; |
2822 | c->Request.Timeout = 0; /* Don't time out */ | 2842 | c->Request.Timeout = 0; /* Don't time out */ |
2823 | c->Request.CDB[0] = 0x01; /* RESET_MSG is 0x01 */ | 2843 | memset(&c->Request.CDB[0], 0, sizeof(c->Request.CDB)); |
2844 | c->Request.CDB[0] = cmd; | ||
2824 | c->Request.CDB[1] = 0x03; /* Reset target above */ | 2845 | c->Request.CDB[1] = 0x03; /* Reset target above */ |
2825 | /* If bytes 4-7 are zero, it means reset the */ | 2846 | /* If bytes 4-7 are zero, it means reset the */ |
2826 | /* LunID device */ | 2847 | /* LunID device */ |
@@ -2986,6 +3007,63 @@ static inline u32 process_nonindexed_cmd(struct ctlr_info *h, | |||
2986 | return next_command(h); | 3007 | return next_command(h); |
2987 | } | 3008 | } |
2988 | 3009 | ||
3010 | /* Some controllers, like p400, will give us one interrupt | ||
3011 | * after a soft reset, even if we turned interrupts off. | ||
3012 | * Only need to check for this in the hpsa_xxx_discard_completions | ||
3013 | * functions. | ||
3014 | */ | ||
3015 | static int ignore_bogus_interrupt(struct ctlr_info *h) | ||
3016 | { | ||
3017 | if (likely(!reset_devices)) | ||
3018 | return 0; | ||
3019 | |||
3020 | if (likely(h->interrupts_enabled)) | ||
3021 | return 0; | ||
3022 | |||
3023 | dev_info(&h->pdev->dev, "Received interrupt while interrupts disabled " | ||
3024 | "(known firmware bug.) Ignoring.\n"); | ||
3025 | |||
3026 | return 1; | ||
3027 | } | ||
3028 | |||
3029 | static irqreturn_t hpsa_intx_discard_completions(int irq, void *dev_id) | ||
3030 | { | ||
3031 | struct ctlr_info *h = dev_id; | ||
3032 | unsigned long flags; | ||
3033 | u32 raw_tag; | ||
3034 | |||
3035 | if (ignore_bogus_interrupt(h)) | ||
3036 | return IRQ_NONE; | ||
3037 | |||
3038 | if (interrupt_not_for_us(h)) | ||
3039 | return IRQ_NONE; | ||
3040 | spin_lock_irqsave(&h->lock, flags); | ||
3041 | while (interrupt_pending(h)) { | ||
3042 | raw_tag = get_next_completion(h); | ||
3043 | while (raw_tag != FIFO_EMPTY) | ||
3044 | raw_tag = next_command(h); | ||
3045 | } | ||
3046 | spin_unlock_irqrestore(&h->lock, flags); | ||
3047 | return IRQ_HANDLED; | ||
3048 | } | ||
3049 | |||
3050 | static irqreturn_t hpsa_msix_discard_completions(int irq, void *dev_id) | ||
3051 | { | ||
3052 | struct ctlr_info *h = dev_id; | ||
3053 | unsigned long flags; | ||
3054 | u32 raw_tag; | ||
3055 | |||
3056 | if (ignore_bogus_interrupt(h)) | ||
3057 | return IRQ_NONE; | ||
3058 | |||
3059 | spin_lock_irqsave(&h->lock, flags); | ||
3060 | raw_tag = get_next_completion(h); | ||
3061 | while (raw_tag != FIFO_EMPTY) | ||
3062 | raw_tag = next_command(h); | ||
3063 | spin_unlock_irqrestore(&h->lock, flags); | ||
3064 | return IRQ_HANDLED; | ||
3065 | } | ||
3066 | |||
2989 | static irqreturn_t do_hpsa_intr_intx(int irq, void *dev_id) | 3067 | static irqreturn_t do_hpsa_intr_intx(int irq, void *dev_id) |
2990 | { | 3068 | { |
2991 | struct ctlr_info *h = dev_id; | 3069 | struct ctlr_info *h = dev_id; |
@@ -3124,7 +3202,6 @@ static __devinit int hpsa_message(struct pci_dev *pdev, unsigned char opcode, | |||
3124 | return 0; | 3202 | return 0; |
3125 | } | 3203 | } |
3126 | 3204 | ||
3127 | #define hpsa_soft_reset_controller(p) hpsa_message(p, 1, 0) | ||
3128 | #define hpsa_noop(p) hpsa_message(p, 3, 0) | 3205 | #define hpsa_noop(p) hpsa_message(p, 3, 0) |
3129 | 3206 | ||
3130 | static int hpsa_controller_hard_reset(struct pci_dev *pdev, | 3207 | static int hpsa_controller_hard_reset(struct pci_dev *pdev, |
@@ -3320,7 +3397,7 @@ static __devinit int hpsa_kdump_hard_reset_controller(struct pci_dev *pdev) | |||
3320 | "'Bit 2 doorbell reset' is " | 3397 | "'Bit 2 doorbell reset' is " |
3321 | "supported, but not 'bit 5 doorbell reset'. " | 3398 | "supported, but not 'bit 5 doorbell reset'. " |
3322 | "Firmware update is recommended.\n"); | 3399 | "Firmware update is recommended.\n"); |
3323 | rc = -ENODEV; | 3400 | rc = -ENOTSUPP; /* try soft reset */ |
3324 | goto unmap_cfgtable; | 3401 | goto unmap_cfgtable; |
3325 | } | 3402 | } |
3326 | } | 3403 | } |
@@ -3344,13 +3421,18 @@ static __devinit int hpsa_kdump_hard_reset_controller(struct pci_dev *pdev) | |||
3344 | /* Wait for board to become not ready, then ready. */ | 3421 | /* Wait for board to become not ready, then ready. */ |
3345 | dev_info(&pdev->dev, "Waiting for board to reset.\n"); | 3422 | dev_info(&pdev->dev, "Waiting for board to reset.\n"); |
3346 | rc = hpsa_wait_for_board_state(pdev, vaddr, BOARD_NOT_READY); | 3423 | rc = hpsa_wait_for_board_state(pdev, vaddr, BOARD_NOT_READY); |
3347 | if (rc) | 3424 | if (rc) { |
3348 | dev_warn(&pdev->dev, | 3425 | dev_warn(&pdev->dev, |
3349 | "failed waiting for board to reset\n"); | 3426 | "failed waiting for board to reset." |
3427 | " Will try soft reset.\n"); | ||
3428 | rc = -ENOTSUPP; /* Not expected, but try soft reset later */ | ||
3429 | goto unmap_cfgtable; | ||
3430 | } | ||
3350 | rc = hpsa_wait_for_board_state(pdev, vaddr, BOARD_READY); | 3431 | rc = hpsa_wait_for_board_state(pdev, vaddr, BOARD_READY); |
3351 | if (rc) { | 3432 | if (rc) { |
3352 | dev_warn(&pdev->dev, | 3433 | dev_warn(&pdev->dev, |
3353 | "failed waiting for board to become ready\n"); | 3434 | "failed waiting for board to become ready " |
3435 | "after hard reset\n"); | ||
3354 | goto unmap_cfgtable; | 3436 | goto unmap_cfgtable; |
3355 | } | 3437 | } |
3356 | 3438 | ||
@@ -3358,11 +3440,11 @@ static __devinit int hpsa_kdump_hard_reset_controller(struct pci_dev *pdev) | |||
3358 | if (rc < 0) | 3440 | if (rc < 0) |
3359 | goto unmap_cfgtable; | 3441 | goto unmap_cfgtable; |
3360 | if (rc) { | 3442 | if (rc) { |
3361 | dev_warn(&pdev->dev, "Unable to successfully reset controller," | 3443 | dev_warn(&pdev->dev, "Unable to successfully reset " |
3362 | " Ignoring controller.\n"); | 3444 | "controller. Will try soft reset.\n"); |
3363 | rc = -ENODEV; | 3445 | rc = -ENOTSUPP; |
3364 | } else { | 3446 | } else { |
3365 | dev_info(&pdev->dev, "board ready.\n"); | 3447 | dev_info(&pdev->dev, "board ready after hard reset.\n"); |
3366 | } | 3448 | } |
3367 | 3449 | ||
3368 | unmap_cfgtable: | 3450 | unmap_cfgtable: |
@@ -3840,7 +3922,7 @@ static __devinit int hpsa_init_reset_devices(struct pci_dev *pdev) | |||
3840 | * due to concerns about shared bbwc between 6402/6404 pair. | 3922 | * due to concerns about shared bbwc between 6402/6404 pair. |
3841 | */ | 3923 | */ |
3842 | if (rc == -ENOTSUPP) | 3924 | if (rc == -ENOTSUPP) |
3843 | return 0; /* just try to do the kdump anyhow. */ | 3925 | return rc; /* just try to do the kdump anyhow. */ |
3844 | if (rc) | 3926 | if (rc) |
3845 | return -ENODEV; | 3927 | return -ENODEV; |
3846 | 3928 | ||
@@ -3910,18 +3992,79 @@ static int hpsa_request_irq(struct ctlr_info *h, | |||
3910 | return 0; | 3992 | return 0; |
3911 | } | 3993 | } |
3912 | 3994 | ||
3995 | static int __devinit hpsa_kdump_soft_reset(struct ctlr_info *h) | ||
3996 | { | ||
3997 | if (hpsa_send_host_reset(h, RAID_CTLR_LUNID, | ||
3998 | HPSA_RESET_TYPE_CONTROLLER)) { | ||
3999 | dev_warn(&h->pdev->dev, "Resetting array controller failed.\n"); | ||
4000 | return -EIO; | ||
4001 | } | ||
4002 | |||
4003 | dev_info(&h->pdev->dev, "Waiting for board to soft reset.\n"); | ||
4004 | if (hpsa_wait_for_board_state(h->pdev, h->vaddr, BOARD_NOT_READY)) { | ||
4005 | dev_warn(&h->pdev->dev, "Soft reset had no effect.\n"); | ||
4006 | return -1; | ||
4007 | } | ||
4008 | |||
4009 | dev_info(&h->pdev->dev, "Board reset, awaiting READY status.\n"); | ||
4010 | if (hpsa_wait_for_board_state(h->pdev, h->vaddr, BOARD_READY)) { | ||
4011 | dev_warn(&h->pdev->dev, "Board failed to become ready " | ||
4012 | "after soft reset.\n"); | ||
4013 | return -1; | ||
4014 | } | ||
4015 | |||
4016 | return 0; | ||
4017 | } | ||
4018 | |||
4019 | static void hpsa_undo_allocations_after_kdump_soft_reset(struct ctlr_info *h) | ||
4020 | { | ||
4021 | free_irq(h->intr[h->intr_mode], h); | ||
4022 | #ifdef CONFIG_PCI_MSI | ||
4023 | if (h->msix_vector) | ||
4024 | pci_disable_msix(h->pdev); | ||
4025 | else if (h->msi_vector) | ||
4026 | pci_disable_msi(h->pdev); | ||
4027 | #endif /* CONFIG_PCI_MSI */ | ||
4028 | hpsa_free_sg_chain_blocks(h); | ||
4029 | hpsa_free_cmd_pool(h); | ||
4030 | kfree(h->blockFetchTable); | ||
4031 | pci_free_consistent(h->pdev, h->reply_pool_size, | ||
4032 | h->reply_pool, h->reply_pool_dhandle); | ||
4033 | if (h->vaddr) | ||
4034 | iounmap(h->vaddr); | ||
4035 | if (h->transtable) | ||
4036 | iounmap(h->transtable); | ||
4037 | if (h->cfgtable) | ||
4038 | iounmap(h->cfgtable); | ||
4039 | pci_release_regions(h->pdev); | ||
4040 | kfree(h); | ||
4041 | } | ||
4042 | |||
3913 | static int __devinit hpsa_init_one(struct pci_dev *pdev, | 4043 | static int __devinit hpsa_init_one(struct pci_dev *pdev, |
3914 | const struct pci_device_id *ent) | 4044 | const struct pci_device_id *ent) |
3915 | { | 4045 | { |
3916 | int dac, rc; | 4046 | int dac, rc; |
3917 | struct ctlr_info *h; | 4047 | struct ctlr_info *h; |
4048 | int try_soft_reset = 0; | ||
4049 | unsigned long flags; | ||
3918 | 4050 | ||
3919 | if (number_of_controllers == 0) | 4051 | if (number_of_controllers == 0) |
3920 | printk(KERN_INFO DRIVER_NAME "\n"); | 4052 | printk(KERN_INFO DRIVER_NAME "\n"); |
3921 | 4053 | ||
3922 | rc = hpsa_init_reset_devices(pdev); | 4054 | rc = hpsa_init_reset_devices(pdev); |
3923 | if (rc) | 4055 | if (rc) { |
3924 | return rc; | 4056 | if (rc != -ENOTSUPP) |
4057 | return rc; | ||
4058 | /* If the reset fails in a particular way (it has no way to do | ||
4059 | * a proper hard reset, so returns -ENOTSUPP) we can try to do | ||
4060 | * a soft reset once we get the controller configured up to the | ||
4061 | * point that it can accept a command. | ||
4062 | */ | ||
4063 | try_soft_reset = 1; | ||
4064 | rc = 0; | ||
4065 | } | ||
4066 | |||
4067 | reinit_after_soft_reset: | ||
3925 | 4068 | ||
3926 | /* Command structures must be aligned on a 32-byte boundary because | 4069 | /* Command structures must be aligned on a 32-byte boundary because |
3927 | * the 5 lower bits of the address are used by the hardware. and by | 4070 | * the 5 lower bits of the address are used by the hardware. and by |
@@ -3981,11 +4124,66 @@ static int __devinit hpsa_init_one(struct pci_dev *pdev, | |||
3981 | h->ndevices = 0; | 4124 | h->ndevices = 0; |
3982 | h->scsi_host = NULL; | 4125 | h->scsi_host = NULL; |
3983 | spin_lock_init(&h->devlock); | 4126 | spin_lock_init(&h->devlock); |
4127 | hpsa_put_ctlr_into_performant_mode(h); | ||
4128 | |||
4129 | /* At this point, the controller is ready to take commands. | ||
4130 | * Now, if reset_devices and the hard reset didn't work, try | ||
4131 | * the soft reset and see if that works. | ||
4132 | */ | ||
4133 | if (try_soft_reset) { | ||
4134 | |||
4135 | /* This is kind of gross. We may or may not get a completion | ||
4136 | * from the soft reset command, and if we do, then the value | ||
4137 | * from the fifo may or may not be valid. So, we wait 10 secs | ||
4138 | * after the reset throwing away any completions we get during | ||
4139 | * that time. Unregister the interrupt handler and register | ||
4140 | * fake ones to scoop up any residual completions. | ||
4141 | */ | ||
4142 | spin_lock_irqsave(&h->lock, flags); | ||
4143 | h->access.set_intr_mask(h, HPSA_INTR_OFF); | ||
4144 | spin_unlock_irqrestore(&h->lock, flags); | ||
4145 | free_irq(h->intr[h->intr_mode], h); | ||
4146 | rc = hpsa_request_irq(h, hpsa_msix_discard_completions, | ||
4147 | hpsa_intx_discard_completions); | ||
4148 | if (rc) { | ||
4149 | dev_warn(&h->pdev->dev, "Failed to request_irq after " | ||
4150 | "soft reset.\n"); | ||
4151 | goto clean4; | ||
4152 | } | ||
4153 | |||
4154 | rc = hpsa_kdump_soft_reset(h); | ||
4155 | if (rc) | ||
4156 | /* Neither hard nor soft reset worked, we're hosed. */ | ||
4157 | goto clean4; | ||
4158 | |||
4159 | dev_info(&h->pdev->dev, "Board READY.\n"); | ||
4160 | dev_info(&h->pdev->dev, | ||
4161 | "Waiting for stale completions to drain.\n"); | ||
4162 | h->access.set_intr_mask(h, HPSA_INTR_ON); | ||
4163 | msleep(10000); | ||
4164 | h->access.set_intr_mask(h, HPSA_INTR_OFF); | ||
4165 | |||
4166 | rc = controller_reset_failed(h->cfgtable); | ||
4167 | if (rc) | ||
4168 | dev_info(&h->pdev->dev, | ||
4169 | "Soft reset appears to have failed.\n"); | ||
4170 | |||
4171 | /* since the controller's reset, we have to go back and re-init | ||
4172 | * everything. Easiest to just forget what we've done and do it | ||
4173 | * all over again. | ||
4174 | */ | ||
4175 | hpsa_undo_allocations_after_kdump_soft_reset(h); | ||
4176 | try_soft_reset = 0; | ||
4177 | if (rc) | ||
4178 | /* don't go to clean4, we already unallocated */ | ||
4179 | return -ENODEV; | ||
4180 | |||
4181 | goto reinit_after_soft_reset; | ||
4182 | } | ||
3984 | 4183 | ||
3985 | /* Turn the interrupts on so we can service requests */ | 4184 | /* Turn the interrupts on so we can service requests */ |
3986 | h->access.set_intr_mask(h, HPSA_INTR_ON); | 4185 | h->access.set_intr_mask(h, HPSA_INTR_ON); |
3987 | 4186 | ||
3988 | hpsa_put_ctlr_into_performant_mode(h); | ||
3989 | hpsa_hba_inquiry(h); | 4187 | hpsa_hba_inquiry(h); |
3990 | hpsa_register_scsi(h); /* hook ourselves into SCSI subsystem */ | 4188 | hpsa_register_scsi(h); /* hook ourselves into SCSI subsystem */ |
3991 | h->busy_initializing = 0; | 4189 | h->busy_initializing = 0; |