summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorNagarajkumar Narayanan <nagarajkumar.narayanan@seagate.com>2015-08-18 03:57:10 -0400
committerJames Bottomley <JBottomley@Odin.com>2015-09-06 14:57:25 -0400
commit6229b414b3adb3aac0b54e67d72d6462fc230c0d (patch)
treedf110926c0947fd46d57eba75c88dafee6f35f92
parent50acde8ed35620fdfad88f4e20a39a1eb0d8d4c9 (diff)
mpt2sas: setpci reset kernel oops fix
mpt2sas: setpci reset on nytro warpdrive card along with sysfs access and cli ioctl access resulted in kernel oops 1. pci_access_mutex lock added to provide synchronization between IOCTL, sysfs, PCI resource handling path 2. gioc_lock spinlock to protect list operations over multiple controllers >From c53a1cff4c07528b8b9ec7f6716e94950283e8f9 Mon Sep 17 00:00:00 2001 From: Nagarajkumar Narayanan <nagarajkumar.narayanan@seagate.com> Date: Tue, 18 Aug 2015 11:58:13 +0530 Subject: [PATCH] mpt2sas setpci reset oops fix In mpt2sas driver due to lack of synchronization between ioctl, BRM status access through sysfs, pci resource removal kernel oops happen as ioctl path and BRM status sysfs access path still tries to access the removed resources Two locks added to provide syncrhonization 1. pci_access_mutex: Mutex to synchronize ioctl,sysfs show path and pci resource handling. PCI resource freeing will lead to free vital hardware/memory resource, which might be in use by cli/sysfs path functions resulting in Null pointer reference followed by kernel crash. To avoid the above race condition we use mutex syncrhonization which ensures the syncrhonization between cli/sysfs_show path Note: pci_access_mutex is used only if nytro warpdrive cards (ioc->is_warpdrive based on device id) are used as we could not test this case with other SAS2 HBA cards We can remove this check if this behaviour confirmed from other cards. 2. spinlock on list operations over IOCs Case: when multiple warpdrive cards(IOCs) are in use Each IOC will added to the ioc list stucture on initialization. Watchdog threads run at regular intervals to check IOC for any fault conditions which will trigger the dead_ioc thread to deallocate pci resource, resulting deleting the IOC netry from list, this deletion need to protected by spinlock to enusre that ioc removal is syncrhonized, if not synchronized it might lead to list_del corruption as the ioc list is traversed in cli path Signed-off-by: Nagarajkumar Narayanan <nagarajkumar.narayanan@seagate.com> Reviewed-by: Johannes Thumshirn <jthumshirn@suse.de> Acked-by: Sreekanth Reddy <sreekanth.reddy@avagotech.com> Signed-off-by: James Bottomley <JBottomley@Odin.com>
-rw-r--r--drivers/scsi/mpt2sas/mpt2sas_base.c6
-rw-r--r--drivers/scsi/mpt2sas/mpt2sas_base.h19
-rw-r--r--drivers/scsi/mpt2sas/mpt2sas_ctl.c38
-rw-r--r--drivers/scsi/mpt2sas/mpt2sas_scsih.c13
4 files changed, 67 insertions, 9 deletions
diff --git a/drivers/scsi/mpt2sas/mpt2sas_base.c b/drivers/scsi/mpt2sas/mpt2sas_base.c
index 6dec7cff316f..c167911221e9 100644
--- a/drivers/scsi/mpt2sas/mpt2sas_base.c
+++ b/drivers/scsi/mpt2sas/mpt2sas_base.c
@@ -112,9 +112,12 @@ _scsih_set_fwfault_debug(const char *val, struct kernel_param *kp)
112 if (ret) 112 if (ret)
113 return ret; 113 return ret;
114 114
115 /* global ioc spinlock to protect controller list on list operations */
115 printk(KERN_INFO "setting fwfault_debug(%d)\n", mpt2sas_fwfault_debug); 116 printk(KERN_INFO "setting fwfault_debug(%d)\n", mpt2sas_fwfault_debug);
117 spin_lock(&gioc_lock);
116 list_for_each_entry(ioc, &mpt2sas_ioc_list, list) 118 list_for_each_entry(ioc, &mpt2sas_ioc_list, list)
117 ioc->fwfault_debug = mpt2sas_fwfault_debug; 119 ioc->fwfault_debug = mpt2sas_fwfault_debug;
120 spin_unlock(&gioc_lock);
118 return 0; 121 return 0;
119} 122}
120 123
@@ -4437,6 +4440,8 @@ mpt2sas_base_free_resources(struct MPT2SAS_ADAPTER *ioc)
4437 dexitprintk(ioc, printk(MPT2SAS_INFO_FMT "%s\n", ioc->name, 4440 dexitprintk(ioc, printk(MPT2SAS_INFO_FMT "%s\n", ioc->name,
4438 __func__)); 4441 __func__));
4439 4442
4443 /* synchronizing freeing resource with pci_access_mutex lock */
4444 mutex_lock(&ioc->pci_access_mutex);
4440 if (ioc->chip_phys && ioc->chip) { 4445 if (ioc->chip_phys && ioc->chip) {
4441 _base_mask_interrupts(ioc); 4446 _base_mask_interrupts(ioc);
4442 ioc->shost_recovery = 1; 4447 ioc->shost_recovery = 1;
@@ -4456,6 +4461,7 @@ mpt2sas_base_free_resources(struct MPT2SAS_ADAPTER *ioc)
4456 pci_disable_pcie_error_reporting(pdev); 4461 pci_disable_pcie_error_reporting(pdev);
4457 pci_disable_device(pdev); 4462 pci_disable_device(pdev);
4458 } 4463 }
4464 mutex_unlock(&ioc->pci_access_mutex);
4459 return; 4465 return;
4460} 4466}
4461 4467
diff --git a/drivers/scsi/mpt2sas/mpt2sas_base.h b/drivers/scsi/mpt2sas/mpt2sas_base.h
index 78f41aca9598..97ea360c6920 100644
--- a/drivers/scsi/mpt2sas/mpt2sas_base.h
+++ b/drivers/scsi/mpt2sas/mpt2sas_base.h
@@ -817,6 +817,12 @@ typedef void (*MPT2SAS_FLUSH_RUNNING_CMDS)(struct MPT2SAS_ADAPTER *ioc);
817 * @delayed_tr_list: target reset link list 817 * @delayed_tr_list: target reset link list
818 * @delayed_tr_volume_list: volume target reset link list 818 * @delayed_tr_volume_list: volume target reset link list
819 * @@temp_sensors_count: flag to carry the number of temperature sensors 819 * @@temp_sensors_count: flag to carry the number of temperature sensors
820 * @pci_access_mutex: Mutex to synchronize ioctl,sysfs show path and
821 * pci resource handling. PCI resource freeing will lead to free
822 * vital hardware/memory resource, which might be in use by cli/sysfs
823 * path functions resulting in Null pointer reference followed by kernel
824 * crash. To avoid the above race condition we use mutex syncrhonization
825 * which ensures the syncrhonization between cli/sysfs_show path
820 */ 826 */
821struct MPT2SAS_ADAPTER { 827struct MPT2SAS_ADAPTER {
822 struct list_head list; 828 struct list_head list;
@@ -1033,6 +1039,7 @@ struct MPT2SAS_ADAPTER {
1033 u8 mfg_pg10_hide_flag; 1039 u8 mfg_pg10_hide_flag;
1034 u8 hide_drives; 1040 u8 hide_drives;
1035 1041
1042 struct mutex pci_access_mutex;
1036}; 1043};
1037 1044
1038typedef u8 (*MPT_CALLBACK)(struct MPT2SAS_ADAPTER *ioc, u16 smid, u8 msix_index, 1045typedef u8 (*MPT_CALLBACK)(struct MPT2SAS_ADAPTER *ioc, u16 smid, u8 msix_index,
@@ -1041,6 +1048,17 @@ typedef u8 (*MPT_CALLBACK)(struct MPT2SAS_ADAPTER *ioc, u16 smid, u8 msix_index,
1041 1048
1042/* base shared API */ 1049/* base shared API */
1043extern struct list_head mpt2sas_ioc_list; 1050extern struct list_head mpt2sas_ioc_list;
1051/* spinlock on list operations over IOCs
1052 * Case: when multiple warpdrive cards(IOCs) are in use
1053 * Each IOC will added to the ioc list stucture on initialization.
1054 * Watchdog threads run at regular intervals to check IOC for any
1055 * fault conditions which will trigger the dead_ioc thread to
1056 * deallocate pci resource, resulting deleting the IOC netry from list,
1057 * this deletion need to protected by spinlock to enusre that
1058 * ioc removal is syncrhonized, if not synchronized it might lead to
1059 * list_del corruption as the ioc list is traversed in cli path
1060 */
1061extern spinlock_t gioc_lock;
1044void mpt2sas_base_start_watchdog(struct MPT2SAS_ADAPTER *ioc); 1062void mpt2sas_base_start_watchdog(struct MPT2SAS_ADAPTER *ioc);
1045void mpt2sas_base_stop_watchdog(struct MPT2SAS_ADAPTER *ioc); 1063void mpt2sas_base_stop_watchdog(struct MPT2SAS_ADAPTER *ioc);
1046 1064
@@ -1119,7 +1137,6 @@ struct _sas_device *__mpt2sas_get_sdev_by_addr(
1119 struct MPT2SAS_ADAPTER *ioc, u64 sas_address); 1137 struct MPT2SAS_ADAPTER *ioc, u64 sas_address);
1120 1138
1121void mpt2sas_port_enable_complete(struct MPT2SAS_ADAPTER *ioc); 1139void mpt2sas_port_enable_complete(struct MPT2SAS_ADAPTER *ioc);
1122
1123void mpt2sas_scsih_reset_handler(struct MPT2SAS_ADAPTER *ioc, int reset_phase); 1140void mpt2sas_scsih_reset_handler(struct MPT2SAS_ADAPTER *ioc, int reset_phase);
1124 1141
1125/* config shared API */ 1142/* config shared API */
diff --git a/drivers/scsi/mpt2sas/mpt2sas_ctl.c b/drivers/scsi/mpt2sas/mpt2sas_ctl.c
index 4e509604b571..3694b63bd993 100644
--- a/drivers/scsi/mpt2sas/mpt2sas_ctl.c
+++ b/drivers/scsi/mpt2sas/mpt2sas_ctl.c
@@ -427,13 +427,16 @@ static int
427_ctl_verify_adapter(int ioc_number, struct MPT2SAS_ADAPTER **iocpp) 427_ctl_verify_adapter(int ioc_number, struct MPT2SAS_ADAPTER **iocpp)
428{ 428{
429 struct MPT2SAS_ADAPTER *ioc; 429 struct MPT2SAS_ADAPTER *ioc;
430 430 /* global ioc lock to protect controller on list operations */
431 spin_lock(&gioc_lock);
431 list_for_each_entry(ioc, &mpt2sas_ioc_list, list) { 432 list_for_each_entry(ioc, &mpt2sas_ioc_list, list) {
432 if (ioc->id != ioc_number) 433 if (ioc->id != ioc_number)
433 continue; 434 continue;
435 spin_unlock(&gioc_lock);
434 *iocpp = ioc; 436 *iocpp = ioc;
435 return ioc_number; 437 return ioc_number;
436 } 438 }
439 spin_unlock(&gioc_lock);
437 *iocpp = NULL; 440 *iocpp = NULL;
438 return -1; 441 return -1;
439} 442}
@@ -522,10 +525,15 @@ _ctl_poll(struct file *filep, poll_table *wait)
522 525
523 poll_wait(filep, &ctl_poll_wait, wait); 526 poll_wait(filep, &ctl_poll_wait, wait);
524 527
528 /* global ioc lock to protect controller on list operations */
529 spin_lock(&gioc_lock);
525 list_for_each_entry(ioc, &mpt2sas_ioc_list, list) { 530 list_for_each_entry(ioc, &mpt2sas_ioc_list, list) {
526 if (ioc->aen_event_read_flag) 531 if (ioc->aen_event_read_flag) {
532 spin_unlock(&gioc_lock);
527 return POLLIN | POLLRDNORM; 533 return POLLIN | POLLRDNORM;
534 }
528 } 535 }
536 spin_unlock(&gioc_lock);
529 return 0; 537 return 0;
530} 538}
531 539
@@ -2168,16 +2176,23 @@ _ctl_ioctl_main(struct file *file, unsigned int cmd, void __user *arg,
2168 2176
2169 if (_ctl_verify_adapter(ioctl_header.ioc_number, &ioc) == -1 || !ioc) 2177 if (_ctl_verify_adapter(ioctl_header.ioc_number, &ioc) == -1 || !ioc)
2170 return -ENODEV; 2178 return -ENODEV;
2179 /* pci_access_mutex lock acquired by ioctl path */
2180 mutex_lock(&ioc->pci_access_mutex);
2171 if (ioc->shost_recovery || ioc->pci_error_recovery || 2181 if (ioc->shost_recovery || ioc->pci_error_recovery ||
2172 ioc->is_driver_loading) 2182 ioc->is_driver_loading || ioc->remove_host) {
2173 return -EAGAIN; 2183 ret = -EAGAIN;
2184 goto out_unlock_pciaccess;
2185 }
2174 2186
2175 state = (file->f_flags & O_NONBLOCK) ? NON_BLOCKING : BLOCKING; 2187 state = (file->f_flags & O_NONBLOCK) ? NON_BLOCKING : BLOCKING;
2176 if (state == NON_BLOCKING) { 2188 if (state == NON_BLOCKING) {
2177 if (!mutex_trylock(&ioc->ctl_cmds.mutex)) 2189 if (!mutex_trylock(&ioc->ctl_cmds.mutex)) {
2178 return -EAGAIN; 2190 ret = -EAGAIN;
2191 goto out_unlock_pciaccess;
2192 }
2179 } else if (mutex_lock_interruptible(&ioc->ctl_cmds.mutex)) { 2193 } else if (mutex_lock_interruptible(&ioc->ctl_cmds.mutex)) {
2180 return -ERESTARTSYS; 2194 ret = -ERESTARTSYS;
2195 goto out_unlock_pciaccess;
2181 } 2196 }
2182 2197
2183 switch (cmd) { 2198 switch (cmd) {
@@ -2258,6 +2273,8 @@ _ctl_ioctl_main(struct file *file, unsigned int cmd, void __user *arg,
2258 } 2273 }
2259 2274
2260 mutex_unlock(&ioc->ctl_cmds.mutex); 2275 mutex_unlock(&ioc->ctl_cmds.mutex);
2276out_unlock_pciaccess:
2277 mutex_unlock(&ioc->pci_access_mutex);
2261 return ret; 2278 return ret;
2262} 2279}
2263 2280
@@ -2711,6 +2728,12 @@ _ctl_BRM_status_show(struct device *cdev, struct device_attribute *attr,
2711 "warpdrive\n", ioc->name, __func__); 2728 "warpdrive\n", ioc->name, __func__);
2712 goto out; 2729 goto out;
2713 } 2730 }
2731 /* pci_access_mutex lock acquired by sysfs show path */
2732 mutex_lock(&ioc->pci_access_mutex);
2733 if (ioc->pci_error_recovery || ioc->remove_host) {
2734 mutex_unlock(&ioc->pci_access_mutex);
2735 return 0;
2736 }
2714 2737
2715 /* allocate upto GPIOVal 36 entries */ 2738 /* allocate upto GPIOVal 36 entries */
2716 sz = offsetof(Mpi2IOUnitPage3_t, GPIOVal) + (sizeof(u16) * 36); 2739 sz = offsetof(Mpi2IOUnitPage3_t, GPIOVal) + (sizeof(u16) * 36);
@@ -2749,6 +2772,7 @@ _ctl_BRM_status_show(struct device *cdev, struct device_attribute *attr,
2749 2772
2750 out: 2773 out:
2751 kfree(io_unit_pg3); 2774 kfree(io_unit_pg3);
2775 mutex_unlock(&ioc->pci_access_mutex);
2752 return rc; 2776 return rc;
2753} 2777}
2754static DEVICE_ATTR(BRM_status, S_IRUGO, _ctl_BRM_status_show, NULL); 2778static DEVICE_ATTR(BRM_status, S_IRUGO, _ctl_BRM_status_show, NULL);
diff --git a/drivers/scsi/mpt2sas/mpt2sas_scsih.c b/drivers/scsi/mpt2sas/mpt2sas_scsih.c
index c0ff55b0d3cc..0ad09b2bff9c 100644
--- a/drivers/scsi/mpt2sas/mpt2sas_scsih.c
+++ b/drivers/scsi/mpt2sas/mpt2sas_scsih.c
@@ -79,7 +79,8 @@ static int _scsih_scan_finished(struct Scsi_Host *shost, unsigned long time);
79 79
80/* global parameters */ 80/* global parameters */
81LIST_HEAD(mpt2sas_ioc_list); 81LIST_HEAD(mpt2sas_ioc_list);
82 82/* global ioc lock for list operations */
83DEFINE_SPINLOCK(gioc_lock);
83/* local parameters */ 84/* local parameters */
84static u8 scsi_io_cb_idx = -1; 85static u8 scsi_io_cb_idx = -1;
85static u8 tm_cb_idx = -1; 86static u8 tm_cb_idx = -1;
@@ -321,8 +322,10 @@ _scsih_set_debug_level(const char *val, struct kernel_param *kp)
321 return ret; 322 return ret;
322 323
323 printk(KERN_INFO "setting logging_level(0x%08x)\n", logging_level); 324 printk(KERN_INFO "setting logging_level(0x%08x)\n", logging_level);
325 spin_lock(&gioc_lock);
324 list_for_each_entry(ioc, &mpt2sas_ioc_list, list) 326 list_for_each_entry(ioc, &mpt2sas_ioc_list, list)
325 ioc->logging_level = logging_level; 327 ioc->logging_level = logging_level;
328 spin_unlock(&gioc_lock);
326 return 0; 329 return 0;
327} 330}
328module_param_call(logging_level, _scsih_set_debug_level, param_get_int, 331module_param_call(logging_level, _scsih_set_debug_level, param_get_int,
@@ -8081,7 +8084,9 @@ _scsih_remove(struct pci_dev *pdev)
8081 sas_remove_host(shost); 8084 sas_remove_host(shost);
8082 scsi_remove_host(shost); 8085 scsi_remove_host(shost);
8083 mpt2sas_base_detach(ioc); 8086 mpt2sas_base_detach(ioc);
8087 spin_lock(&gioc_lock);
8084 list_del(&ioc->list); 8088 list_del(&ioc->list);
8089 spin_unlock(&gioc_lock);
8085 scsi_host_put(shost); 8090 scsi_host_put(shost);
8086} 8091}
8087 8092
@@ -8394,7 +8399,9 @@ _scsih_probe(struct pci_dev *pdev, const struct pci_device_id *id)
8394 ioc = shost_priv(shost); 8399 ioc = shost_priv(shost);
8395 memset(ioc, 0, sizeof(struct MPT2SAS_ADAPTER)); 8400 memset(ioc, 0, sizeof(struct MPT2SAS_ADAPTER));
8396 INIT_LIST_HEAD(&ioc->list); 8401 INIT_LIST_HEAD(&ioc->list);
8402 spin_lock(&gioc_lock);
8397 list_add_tail(&ioc->list, &mpt2sas_ioc_list); 8403 list_add_tail(&ioc->list, &mpt2sas_ioc_list);
8404 spin_unlock(&gioc_lock);
8398 ioc->shost = shost; 8405 ioc->shost = shost;
8399 ioc->id = mpt_ids++; 8406 ioc->id = mpt_ids++;
8400 sprintf(ioc->name, "%s%d", MPT2SAS_DRIVER_NAME, ioc->id); 8407 sprintf(ioc->name, "%s%d", MPT2SAS_DRIVER_NAME, ioc->id);
@@ -8419,6 +8426,8 @@ _scsih_probe(struct pci_dev *pdev, const struct pci_device_id *id)
8419 ioc->schedule_dead_ioc_flush_running_cmds = &_scsih_flush_running_cmds; 8426 ioc->schedule_dead_ioc_flush_running_cmds = &_scsih_flush_running_cmds;
8420 /* misc semaphores and spin locks */ 8427 /* misc semaphores and spin locks */
8421 mutex_init(&ioc->reset_in_progress_mutex); 8428 mutex_init(&ioc->reset_in_progress_mutex);
8429 /* initializing pci_access_mutex lock */
8430 mutex_init(&ioc->pci_access_mutex);
8422 spin_lock_init(&ioc->ioc_reset_in_progress_lock); 8431 spin_lock_init(&ioc->ioc_reset_in_progress_lock);
8423 spin_lock_init(&ioc->scsi_lookup_lock); 8432 spin_lock_init(&ioc->scsi_lookup_lock);
8424 spin_lock_init(&ioc->sas_device_lock); 8433 spin_lock_init(&ioc->sas_device_lock);
@@ -8521,7 +8530,9 @@ _scsih_probe(struct pci_dev *pdev, const struct pci_device_id *id)
8521 out_attach_fail: 8530 out_attach_fail:
8522 destroy_workqueue(ioc->firmware_event_thread); 8531 destroy_workqueue(ioc->firmware_event_thread);
8523 out_thread_fail: 8532 out_thread_fail:
8533 spin_lock(&gioc_lock);
8524 list_del(&ioc->list); 8534 list_del(&ioc->list);
8535 spin_unlock(&gioc_lock);
8525 scsi_host_put(shost); 8536 scsi_host_put(shost);
8526 return rv; 8537 return rv;
8527} 8538}