diff options
author | nagalakshmi.nandigama@lsi.com <nagalakshmi.nandigama@lsi.com> | 2011-11-30 21:12:04 -0500 |
---|---|---|
committer | James Bottomley <JBottomley@Parallels.com> | 2011-12-15 01:57:31 -0500 |
commit | 845a0e40afb77bebdbda353b44ebf48784aa51f4 (patch) | |
tree | 8a4279211ddb3a6c727eafdcaf6cbbdd6c0f005b | |
parent | 4053a4be525d3441cad6cd1ae207177f03eb9ce7 (diff) |
[SCSI] mpt2sas: Better handling DEAD IOC (PCI-E LInk down) error condition
Detection of Dead IOC has been done in fault_reset_work thread.
If IOC Doorbell is 0xFFFFFFFF, it will be detected as non-operation/DEAD IOC.
When a DEAD IOC is detected, the code is modified to remove that IOC and
all its attached devices from OS.
The PCI layer API pci_remove_bus_device() is called to remove the dead IOC.
Signed-off-by: Nagalakshmi Nandigama <nagalakshmi.nandigama@lsi.com>
Signed-off-by: James Bottomley <JBottomley@Parallels.com>
-rw-r--r-- | drivers/scsi/mpt2sas/mpt2sas_base.c | 59 | ||||
-rw-r--r-- | drivers/scsi/mpt2sas/mpt2sas_base.h | 3 | ||||
-rw-r--r-- | drivers/scsi/mpt2sas/mpt2sas_scsih.c | 1 |
3 files changed, 63 insertions, 0 deletions
diff --git a/drivers/scsi/mpt2sas/mpt2sas_base.c b/drivers/scsi/mpt2sas/mpt2sas_base.c index beda04a8404b..c041cc70a25f 100644 --- a/drivers/scsi/mpt2sas/mpt2sas_base.c +++ b/drivers/scsi/mpt2sas/mpt2sas_base.c | |||
@@ -57,6 +57,7 @@ | |||
57 | #include <linux/sort.h> | 57 | #include <linux/sort.h> |
58 | #include <linux/io.h> | 58 | #include <linux/io.h> |
59 | #include <linux/time.h> | 59 | #include <linux/time.h> |
60 | #include <linux/kthread.h> | ||
60 | #include <linux/aer.h> | 61 | #include <linux/aer.h> |
61 | 62 | ||
62 | #include "mpt2sas_base.h" | 63 | #include "mpt2sas_base.h" |
@@ -120,10 +121,34 @@ _scsih_set_fwfault_debug(const char *val, struct kernel_param *kp) | |||
120 | ioc->fwfault_debug = mpt2sas_fwfault_debug; | 121 | ioc->fwfault_debug = mpt2sas_fwfault_debug; |
121 | return 0; | 122 | return 0; |
122 | } | 123 | } |
124 | |||
123 | module_param_call(mpt2sas_fwfault_debug, _scsih_set_fwfault_debug, | 125 | module_param_call(mpt2sas_fwfault_debug, _scsih_set_fwfault_debug, |
124 | param_get_int, &mpt2sas_fwfault_debug, 0644); | 126 | param_get_int, &mpt2sas_fwfault_debug, 0644); |
125 | 127 | ||
126 | /** | 128 | /** |
129 | * mpt2sas_remove_dead_ioc_func - kthread context to remove dead ioc | ||
130 | * @arg: input argument, used to derive ioc | ||
131 | * | ||
132 | * Return 0 if controller is removed from pci subsystem. | ||
133 | * Return -1 for other case. | ||
134 | */ | ||
135 | static int mpt2sas_remove_dead_ioc_func(void *arg) | ||
136 | { | ||
137 | struct MPT2SAS_ADAPTER *ioc = (struct MPT2SAS_ADAPTER *)arg; | ||
138 | struct pci_dev *pdev; | ||
139 | |||
140 | if ((ioc == NULL)) | ||
141 | return -1; | ||
142 | |||
143 | pdev = ioc->pdev; | ||
144 | if ((pdev == NULL)) | ||
145 | return -1; | ||
146 | pci_remove_bus_device(pdev); | ||
147 | return 0; | ||
148 | } | ||
149 | |||
150 | |||
151 | /** | ||
127 | * _base_fault_reset_work - workq handling ioc fault conditions | 152 | * _base_fault_reset_work - workq handling ioc fault conditions |
128 | * @work: input argument, used to derive ioc | 153 | * @work: input argument, used to derive ioc |
129 | * Context: sleep. | 154 | * Context: sleep. |
@@ -138,6 +163,7 @@ _base_fault_reset_work(struct work_struct *work) | |||
138 | unsigned long flags; | 163 | unsigned long flags; |
139 | u32 doorbell; | 164 | u32 doorbell; |
140 | int rc; | 165 | int rc; |
166 | struct task_struct *p; | ||
141 | 167 | ||
142 | spin_lock_irqsave(&ioc->ioc_reset_in_progress_lock, flags); | 168 | spin_lock_irqsave(&ioc->ioc_reset_in_progress_lock, flags); |
143 | if (ioc->shost_recovery) | 169 | if (ioc->shost_recovery) |
@@ -145,6 +171,39 @@ _base_fault_reset_work(struct work_struct *work) | |||
145 | spin_unlock_irqrestore(&ioc->ioc_reset_in_progress_lock, flags); | 171 | spin_unlock_irqrestore(&ioc->ioc_reset_in_progress_lock, flags); |
146 | 172 | ||
147 | doorbell = mpt2sas_base_get_iocstate(ioc, 0); | 173 | doorbell = mpt2sas_base_get_iocstate(ioc, 0); |
174 | if ((doorbell & MPI2_IOC_STATE_MASK) == MPI2_IOC_STATE_MASK) { | ||
175 | printk(MPT2SAS_INFO_FMT "%s : SAS host is non-operational !!!!\n", | ||
176 | ioc->name, __func__); | ||
177 | |||
178 | /* | ||
179 | * Call _scsih_flush_pending_cmds callback so that we flush all | ||
180 | * pending commands back to OS. This call is required to aovid | ||
181 | * deadlock at block layer. Dead IOC will fail to do diag reset, | ||
182 | * and this call is safe since dead ioc will never return any | ||
183 | * command back from HW. | ||
184 | */ | ||
185 | ioc->schedule_dead_ioc_flush_running_cmds(ioc); | ||
186 | /* | ||
187 | * Set remove_host flag early since kernel thread will | ||
188 | * take some time to execute. | ||
189 | */ | ||
190 | ioc->remove_host = 1; | ||
191 | /*Remove the Dead Host */ | ||
192 | p = kthread_run(mpt2sas_remove_dead_ioc_func, ioc, | ||
193 | "mpt2sas_dead_ioc_%d", ioc->id); | ||
194 | if (IS_ERR(p)) { | ||
195 | printk(MPT2SAS_ERR_FMT | ||
196 | "%s: Running mpt2sas_dead_ioc thread failed !!!!\n", | ||
197 | ioc->name, __func__); | ||
198 | } else { | ||
199 | printk(MPT2SAS_ERR_FMT | ||
200 | "%s: Running mpt2sas_dead_ioc thread success !!!!\n", | ||
201 | ioc->name, __func__); | ||
202 | } | ||
203 | |||
204 | return; /* don't rearm timer */ | ||
205 | } | ||
206 | |||
148 | if ((doorbell & MPI2_IOC_STATE_MASK) == MPI2_IOC_STATE_FAULT) { | 207 | if ((doorbell & MPI2_IOC_STATE_MASK) == MPI2_IOC_STATE_FAULT) { |
149 | rc = mpt2sas_base_hard_reset_handler(ioc, CAN_SLEEP, | 208 | rc = mpt2sas_base_hard_reset_handler(ioc, CAN_SLEEP, |
150 | FORCE_BIG_HAMMER); | 209 | FORCE_BIG_HAMMER); |
diff --git a/drivers/scsi/mpt2sas/mpt2sas_base.h b/drivers/scsi/mpt2sas/mpt2sas_base.h index 3c3babc7d260..61e5b2400aa8 100644 --- a/drivers/scsi/mpt2sas/mpt2sas_base.h +++ b/drivers/scsi/mpt2sas/mpt2sas_base.h | |||
@@ -623,6 +623,7 @@ enum mutex_type { | |||
623 | TM_MUTEX_ON = 1, | 623 | TM_MUTEX_ON = 1, |
624 | }; | 624 | }; |
625 | 625 | ||
626 | typedef void (*MPT2SAS_FLUSH_RUNNING_CMDS)(struct MPT2SAS_ADAPTER *ioc); | ||
626 | /** | 627 | /** |
627 | * struct MPT2SAS_ADAPTER - per adapter struct | 628 | * struct MPT2SAS_ADAPTER - per adapter struct |
628 | * @list: ioc_list | 629 | * @list: ioc_list |
@@ -665,6 +666,7 @@ enum mutex_type { | |||
665 | * @msix_vector_count: number msix vectors | 666 | * @msix_vector_count: number msix vectors |
666 | * @cpu_msix_table: table for mapping cpus to msix index | 667 | * @cpu_msix_table: table for mapping cpus to msix index |
667 | * @cpu_msix_table_sz: table size | 668 | * @cpu_msix_table_sz: table size |
669 | * @schedule_dead_ioc_flush_running_cmds: callback to flush pending commands | ||
668 | * @scsi_io_cb_idx: shost generated commands | 670 | * @scsi_io_cb_idx: shost generated commands |
669 | * @tm_cb_idx: task management commands | 671 | * @tm_cb_idx: task management commands |
670 | * @scsih_cb_idx: scsih internal commands | 672 | * @scsih_cb_idx: scsih internal commands |
@@ -816,6 +818,7 @@ struct MPT2SAS_ADAPTER { | |||
816 | resource_size_t **reply_post_host_index; | 818 | resource_size_t **reply_post_host_index; |
817 | u16 cpu_msix_table_sz; | 819 | u16 cpu_msix_table_sz; |
818 | u32 ioc_reset_count; | 820 | u32 ioc_reset_count; |
821 | MPT2SAS_FLUSH_RUNNING_CMDS schedule_dead_ioc_flush_running_cmds; | ||
819 | 822 | ||
820 | /* internal commands, callback index */ | 823 | /* internal commands, callback index */ |
821 | u8 scsi_io_cb_idx; | 824 | u8 scsi_io_cb_idx; |
diff --git a/drivers/scsi/mpt2sas/mpt2sas_scsih.c b/drivers/scsi/mpt2sas/mpt2sas_scsih.c index d570573b7963..0b6b6b44e362 100644 --- a/drivers/scsi/mpt2sas/mpt2sas_scsih.c +++ b/drivers/scsi/mpt2sas/mpt2sas_scsih.c | |||
@@ -7928,6 +7928,7 @@ _scsih_probe(struct pci_dev *pdev, const struct pci_device_id *id) | |||
7928 | ioc->tm_tr_volume_cb_idx = tm_tr_volume_cb_idx; | 7928 | ioc->tm_tr_volume_cb_idx = tm_tr_volume_cb_idx; |
7929 | ioc->tm_sas_control_cb_idx = tm_sas_control_cb_idx; | 7929 | ioc->tm_sas_control_cb_idx = tm_sas_control_cb_idx; |
7930 | ioc->logging_level = logging_level; | 7930 | ioc->logging_level = logging_level; |
7931 | ioc->schedule_dead_ioc_flush_running_cmds = &_scsih_flush_running_cmds; | ||
7931 | /* misc semaphores and spin locks */ | 7932 | /* misc semaphores and spin locks */ |
7932 | mutex_init(&ioc->reset_in_progress_mutex); | 7933 | mutex_init(&ioc->reset_in_progress_mutex); |
7933 | spin_lock_init(&ioc->ioc_reset_in_progress_lock); | 7934 | spin_lock_init(&ioc->ioc_reset_in_progress_lock); |