aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authornagalakshmi.nandigama@lsi.com <nagalakshmi.nandigama@lsi.com>2011-11-30 21:12:04 -0500
committerJames Bottomley <JBottomley@Parallels.com>2011-12-15 01:57:31 -0500
commit845a0e40afb77bebdbda353b44ebf48784aa51f4 (patch)
tree8a4279211ddb3a6c727eafdcaf6cbbdd6c0f005b
parent4053a4be525d3441cad6cd1ae207177f03eb9ce7 (diff)
[SCSI] mpt2sas: Better handling DEAD IOC (PCI-E LInk down) error condition
Detection of Dead IOC has been done in fault_reset_work thread. If IOC Doorbell is 0xFFFFFFFF, it will be detected as non-operation/DEAD IOC. When a DEAD IOC is detected, the code is modified to remove that IOC and all its attached devices from OS. The PCI layer API pci_remove_bus_device() is called to remove the dead IOC. Signed-off-by: Nagalakshmi Nandigama <nagalakshmi.nandigama@lsi.com> Signed-off-by: James Bottomley <JBottomley@Parallels.com>
-rw-r--r--drivers/scsi/mpt2sas/mpt2sas_base.c59
-rw-r--r--drivers/scsi/mpt2sas/mpt2sas_base.h3
-rw-r--r--drivers/scsi/mpt2sas/mpt2sas_scsih.c1
3 files changed, 63 insertions, 0 deletions
diff --git a/drivers/scsi/mpt2sas/mpt2sas_base.c b/drivers/scsi/mpt2sas/mpt2sas_base.c
index beda04a8404b..c041cc70a25f 100644
--- a/drivers/scsi/mpt2sas/mpt2sas_base.c
+++ b/drivers/scsi/mpt2sas/mpt2sas_base.c
@@ -57,6 +57,7 @@
57#include <linux/sort.h> 57#include <linux/sort.h>
58#include <linux/io.h> 58#include <linux/io.h>
59#include <linux/time.h> 59#include <linux/time.h>
60#include <linux/kthread.h>
60#include <linux/aer.h> 61#include <linux/aer.h>
61 62
62#include "mpt2sas_base.h" 63#include "mpt2sas_base.h"
@@ -120,10 +121,34 @@ _scsih_set_fwfault_debug(const char *val, struct kernel_param *kp)
120 ioc->fwfault_debug = mpt2sas_fwfault_debug; 121 ioc->fwfault_debug = mpt2sas_fwfault_debug;
121 return 0; 122 return 0;
122} 123}
124
123module_param_call(mpt2sas_fwfault_debug, _scsih_set_fwfault_debug, 125module_param_call(mpt2sas_fwfault_debug, _scsih_set_fwfault_debug,
124 param_get_int, &mpt2sas_fwfault_debug, 0644); 126 param_get_int, &mpt2sas_fwfault_debug, 0644);
125 127
126/** 128/**
129 * mpt2sas_remove_dead_ioc_func - kthread context to remove dead ioc
130 * @arg: input argument, used to derive ioc
131 *
132 * Return 0 if controller is removed from pci subsystem.
133 * Return -1 for other case.
134 */
135static int mpt2sas_remove_dead_ioc_func(void *arg)
136{
137 struct MPT2SAS_ADAPTER *ioc = (struct MPT2SAS_ADAPTER *)arg;
138 struct pci_dev *pdev;
139
140 if ((ioc == NULL))
141 return -1;
142
143 pdev = ioc->pdev;
144 if ((pdev == NULL))
145 return -1;
146 pci_remove_bus_device(pdev);
147 return 0;
148}
149
150
151/**
127 * _base_fault_reset_work - workq handling ioc fault conditions 152 * _base_fault_reset_work - workq handling ioc fault conditions
128 * @work: input argument, used to derive ioc 153 * @work: input argument, used to derive ioc
129 * Context: sleep. 154 * Context: sleep.
@@ -138,6 +163,7 @@ _base_fault_reset_work(struct work_struct *work)
138 unsigned long flags; 163 unsigned long flags;
139 u32 doorbell; 164 u32 doorbell;
140 int rc; 165 int rc;
166 struct task_struct *p;
141 167
142 spin_lock_irqsave(&ioc->ioc_reset_in_progress_lock, flags); 168 spin_lock_irqsave(&ioc->ioc_reset_in_progress_lock, flags);
143 if (ioc->shost_recovery) 169 if (ioc->shost_recovery)
@@ -145,6 +171,39 @@ _base_fault_reset_work(struct work_struct *work)
145 spin_unlock_irqrestore(&ioc->ioc_reset_in_progress_lock, flags); 171 spin_unlock_irqrestore(&ioc->ioc_reset_in_progress_lock, flags);
146 172
147 doorbell = mpt2sas_base_get_iocstate(ioc, 0); 173 doorbell = mpt2sas_base_get_iocstate(ioc, 0);
174 if ((doorbell & MPI2_IOC_STATE_MASK) == MPI2_IOC_STATE_MASK) {
175 printk(MPT2SAS_INFO_FMT "%s : SAS host is non-operational !!!!\n",
176 ioc->name, __func__);
177
178 /*
179 * Call _scsih_flush_pending_cmds callback so that we flush all
180 * pending commands back to OS. This call is required to aovid
181 * deadlock at block layer. Dead IOC will fail to do diag reset,
182 * and this call is safe since dead ioc will never return any
183 * command back from HW.
184 */
185 ioc->schedule_dead_ioc_flush_running_cmds(ioc);
186 /*
187 * Set remove_host flag early since kernel thread will
188 * take some time to execute.
189 */
190 ioc->remove_host = 1;
191 /*Remove the Dead Host */
192 p = kthread_run(mpt2sas_remove_dead_ioc_func, ioc,
193 "mpt2sas_dead_ioc_%d", ioc->id);
194 if (IS_ERR(p)) {
195 printk(MPT2SAS_ERR_FMT
196 "%s: Running mpt2sas_dead_ioc thread failed !!!!\n",
197 ioc->name, __func__);
198 } else {
199 printk(MPT2SAS_ERR_FMT
200 "%s: Running mpt2sas_dead_ioc thread success !!!!\n",
201 ioc->name, __func__);
202 }
203
204 return; /* don't rearm timer */
205 }
206
148 if ((doorbell & MPI2_IOC_STATE_MASK) == MPI2_IOC_STATE_FAULT) { 207 if ((doorbell & MPI2_IOC_STATE_MASK) == MPI2_IOC_STATE_FAULT) {
149 rc = mpt2sas_base_hard_reset_handler(ioc, CAN_SLEEP, 208 rc = mpt2sas_base_hard_reset_handler(ioc, CAN_SLEEP,
150 FORCE_BIG_HAMMER); 209 FORCE_BIG_HAMMER);
diff --git a/drivers/scsi/mpt2sas/mpt2sas_base.h b/drivers/scsi/mpt2sas/mpt2sas_base.h
index 3c3babc7d260..61e5b2400aa8 100644
--- a/drivers/scsi/mpt2sas/mpt2sas_base.h
+++ b/drivers/scsi/mpt2sas/mpt2sas_base.h
@@ -623,6 +623,7 @@ enum mutex_type {
623 TM_MUTEX_ON = 1, 623 TM_MUTEX_ON = 1,
624}; 624};
625 625
626typedef void (*MPT2SAS_FLUSH_RUNNING_CMDS)(struct MPT2SAS_ADAPTER *ioc);
626/** 627/**
627 * struct MPT2SAS_ADAPTER - per adapter struct 628 * struct MPT2SAS_ADAPTER - per adapter struct
628 * @list: ioc_list 629 * @list: ioc_list
@@ -665,6 +666,7 @@ enum mutex_type {
665 * @msix_vector_count: number msix vectors 666 * @msix_vector_count: number msix vectors
666 * @cpu_msix_table: table for mapping cpus to msix index 667 * @cpu_msix_table: table for mapping cpus to msix index
667 * @cpu_msix_table_sz: table size 668 * @cpu_msix_table_sz: table size
669 * @schedule_dead_ioc_flush_running_cmds: callback to flush pending commands
668 * @scsi_io_cb_idx: shost generated commands 670 * @scsi_io_cb_idx: shost generated commands
669 * @tm_cb_idx: task management commands 671 * @tm_cb_idx: task management commands
670 * @scsih_cb_idx: scsih internal commands 672 * @scsih_cb_idx: scsih internal commands
@@ -816,6 +818,7 @@ struct MPT2SAS_ADAPTER {
816 resource_size_t **reply_post_host_index; 818 resource_size_t **reply_post_host_index;
817 u16 cpu_msix_table_sz; 819 u16 cpu_msix_table_sz;
818 u32 ioc_reset_count; 820 u32 ioc_reset_count;
821 MPT2SAS_FLUSH_RUNNING_CMDS schedule_dead_ioc_flush_running_cmds;
819 822
820 /* internal commands, callback index */ 823 /* internal commands, callback index */
821 u8 scsi_io_cb_idx; 824 u8 scsi_io_cb_idx;
diff --git a/drivers/scsi/mpt2sas/mpt2sas_scsih.c b/drivers/scsi/mpt2sas/mpt2sas_scsih.c
index d570573b7963..0b6b6b44e362 100644
--- a/drivers/scsi/mpt2sas/mpt2sas_scsih.c
+++ b/drivers/scsi/mpt2sas/mpt2sas_scsih.c
@@ -7928,6 +7928,7 @@ _scsih_probe(struct pci_dev *pdev, const struct pci_device_id *id)
7928 ioc->tm_tr_volume_cb_idx = tm_tr_volume_cb_idx; 7928 ioc->tm_tr_volume_cb_idx = tm_tr_volume_cb_idx;
7929 ioc->tm_sas_control_cb_idx = tm_sas_control_cb_idx; 7929 ioc->tm_sas_control_cb_idx = tm_sas_control_cb_idx;
7930 ioc->logging_level = logging_level; 7930 ioc->logging_level = logging_level;
7931 ioc->schedule_dead_ioc_flush_running_cmds = &_scsih_flush_running_cmds;
7931 /* misc semaphores and spin locks */ 7932 /* misc semaphores and spin locks */
7932 mutex_init(&ioc->reset_in_progress_mutex); 7933 mutex_init(&ioc->reset_in_progress_mutex);
7933 spin_lock_init(&ioc->ioc_reset_in_progress_lock); 7934 spin_lock_init(&ioc->ioc_reset_in_progress_lock);