aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/s390
diff options
context:
space:
mode:
authorMartin Peschke <mpeschke@linux.vnet.ibm.com>2013-08-22 11:45:37 -0400
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>2013-08-29 12:47:39 -0400
commite1a289ee6734dd5f9a81a260f3027ad8010f530a (patch)
treee1fc09dfa08ffe812e7d7d3374b92925fe2055bb /drivers/s390
parentbda5d1efa09527e443a6990f81459779a313e24d (diff)
SCSI: zfcp: fix schedule-inside-lock in scsi_device list loops
commit 924dd584b198a58aa7cb3efefd8a03326550ce8f upstream. BUG: sleeping function called from invalid context at kernel/workqueue.c:2752 in_atomic(): 1, irqs_disabled(): 1, pid: 360, name: zfcperp0.0.1700 CPU: 1 Not tainted 3.9.3+ #69 Process zfcperp0.0.1700 (pid: 360, task: 0000000075b7e080, ksp: 000000007476bc30) <snip> Call Trace: ([<00000000001165de>] show_trace+0x106/0x154) [<00000000001166a0>] show_stack+0x74/0xf4 [<00000000006ff646>] dump_stack+0xc6/0xd4 [<000000000017f3a0>] __might_sleep+0x128/0x148 [<000000000015ece8>] flush_work+0x54/0x1f8 [<00000000001630de>] __cancel_work_timer+0xc6/0x128 [<00000000005067ac>] scsi_device_dev_release_usercontext+0x164/0x23c [<0000000000161816>] execute_in_process_context+0x96/0xa8 [<00000000004d33d8>] device_release+0x60/0xc0 [<000000000048af48>] kobject_release+0xa8/0x1c4 [<00000000004f4bf2>] __scsi_iterate_devices+0xfa/0x130 [<000003ff801b307a>] zfcp_erp_strategy+0x4da/0x1014 [zfcp] [<000003ff801b3caa>] zfcp_erp_thread+0xf6/0x2b0 [zfcp] [<000000000016b75a>] kthread+0xf2/0xfc [<000000000070c9de>] kernel_thread_starter+0x6/0xc [<000000000070c9d8>] kernel_thread_starter+0x0/0xc Apparently, the ref_count for some scsi_device drops down to zero, triggering device removal through execute_in_process_context(), while the lldd error recovery thread iterates through a scsi device list. Unfortunately, execute_in_process_context() decides to immediately execute that device removal function, instead of scheduling asynchronous execution, since it detects process context and thinks it is safe to do so. But almost all calls to shost_for_each_device() in our lldd are inside spin_lock_irq, even in thread context. Obviously, schedule() inside spin_lock_irq sections is a bad idea. Change the lldd to use the proper iterator function, __shost_for_each_device(), in combination with required locking. Occurences that need to be changed include all calls in zfcp_erp.c, since those might be executed in zfcp error recovery thread context with a lock held. Other occurences of shost_for_each_device() in zfcp_fsf.c do not need to be changed (no process context, no surrounding locking). The problem was introduced in Linux 2.6.37 by commit b62a8d9b45b971a67a0f8413338c230e3117dff5 "[SCSI] zfcp: Use SCSI device data zfcp_scsi_dev instead of zfcp_unit". Reported-by: Christian Borntraeger <borntraeger@de.ibm.com> Signed-off-by: Martin Peschke <mpeschke@linux.vnet.ibm.com> Signed-off-by: Steffen Maier <maier@linux.vnet.ibm.com> Signed-off-by: James Bottomley <JBottomley@Parallels.com> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Diffstat (limited to 'drivers/s390')
-rw-r--r--drivers/s390/scsi/zfcp_erp.c29
1 files changed, 22 insertions, 7 deletions
diff --git a/drivers/s390/scsi/zfcp_erp.c b/drivers/s390/scsi/zfcp_erp.c
index 4133ab6e20f1..8e8f3533d2a1 100644
--- a/drivers/s390/scsi/zfcp_erp.c
+++ b/drivers/s390/scsi/zfcp_erp.c
@@ -102,10 +102,13 @@ static void zfcp_erp_action_dismiss_port(struct zfcp_port *port)
102 102
103 if (atomic_read(&port->status) & ZFCP_STATUS_COMMON_ERP_INUSE) 103 if (atomic_read(&port->status) & ZFCP_STATUS_COMMON_ERP_INUSE)
104 zfcp_erp_action_dismiss(&port->erp_action); 104 zfcp_erp_action_dismiss(&port->erp_action);
105 else 105 else {
106 shost_for_each_device(sdev, port->adapter->scsi_host) 106 spin_lock(port->adapter->scsi_host->host_lock);
107 __shost_for_each_device(sdev, port->adapter->scsi_host)
107 if (sdev_to_zfcp(sdev)->port == port) 108 if (sdev_to_zfcp(sdev)->port == port)
108 zfcp_erp_action_dismiss_lun(sdev); 109 zfcp_erp_action_dismiss_lun(sdev);
110 spin_unlock(port->adapter->scsi_host->host_lock);
111 }
109} 112}
110 113
111static void zfcp_erp_action_dismiss_adapter(struct zfcp_adapter *adapter) 114static void zfcp_erp_action_dismiss_adapter(struct zfcp_adapter *adapter)
@@ -592,9 +595,11 @@ static void _zfcp_erp_lun_reopen_all(struct zfcp_port *port, int clear,
592{ 595{
593 struct scsi_device *sdev; 596 struct scsi_device *sdev;
594 597
595 shost_for_each_device(sdev, port->adapter->scsi_host) 598 spin_lock(port->adapter->scsi_host->host_lock);
599 __shost_for_each_device(sdev, port->adapter->scsi_host)
596 if (sdev_to_zfcp(sdev)->port == port) 600 if (sdev_to_zfcp(sdev)->port == port)
597 _zfcp_erp_lun_reopen(sdev, clear, id, 0); 601 _zfcp_erp_lun_reopen(sdev, clear, id, 0);
602 spin_unlock(port->adapter->scsi_host->host_lock);
598} 603}
599 604
600static void zfcp_erp_strategy_followup_failed(struct zfcp_erp_action *act) 605static void zfcp_erp_strategy_followup_failed(struct zfcp_erp_action *act)
@@ -1435,8 +1440,10 @@ void zfcp_erp_set_adapter_status(struct zfcp_adapter *adapter, u32 mask)
1435 atomic_set_mask(common_mask, &port->status); 1440 atomic_set_mask(common_mask, &port->status);
1436 read_unlock_irqrestore(&adapter->port_list_lock, flags); 1441 read_unlock_irqrestore(&adapter->port_list_lock, flags);
1437 1442
1438 shost_for_each_device(sdev, adapter->scsi_host) 1443 spin_lock_irqsave(adapter->scsi_host->host_lock, flags);
1444 __shost_for_each_device(sdev, adapter->scsi_host)
1439 atomic_set_mask(common_mask, &sdev_to_zfcp(sdev)->status); 1445 atomic_set_mask(common_mask, &sdev_to_zfcp(sdev)->status);
1446 spin_unlock_irqrestore(adapter->scsi_host->host_lock, flags);
1440} 1447}
1441 1448
1442/** 1449/**
@@ -1470,11 +1477,13 @@ void zfcp_erp_clear_adapter_status(struct zfcp_adapter *adapter, u32 mask)
1470 } 1477 }
1471 read_unlock_irqrestore(&adapter->port_list_lock, flags); 1478 read_unlock_irqrestore(&adapter->port_list_lock, flags);
1472 1479
1473 shost_for_each_device(sdev, adapter->scsi_host) { 1480 spin_lock_irqsave(adapter->scsi_host->host_lock, flags);
1481 __shost_for_each_device(sdev, adapter->scsi_host) {
1474 atomic_clear_mask(common_mask, &sdev_to_zfcp(sdev)->status); 1482 atomic_clear_mask(common_mask, &sdev_to_zfcp(sdev)->status);
1475 if (clear_counter) 1483 if (clear_counter)
1476 atomic_set(&sdev_to_zfcp(sdev)->erp_counter, 0); 1484 atomic_set(&sdev_to_zfcp(sdev)->erp_counter, 0);
1477 } 1485 }
1486 spin_unlock_irqrestore(adapter->scsi_host->host_lock, flags);
1478} 1487}
1479 1488
1480/** 1489/**
@@ -1488,16 +1497,19 @@ void zfcp_erp_set_port_status(struct zfcp_port *port, u32 mask)
1488{ 1497{
1489 struct scsi_device *sdev; 1498 struct scsi_device *sdev;
1490 u32 common_mask = mask & ZFCP_COMMON_FLAGS; 1499 u32 common_mask = mask & ZFCP_COMMON_FLAGS;
1500 unsigned long flags;
1491 1501
1492 atomic_set_mask(mask, &port->status); 1502 atomic_set_mask(mask, &port->status);
1493 1503
1494 if (!common_mask) 1504 if (!common_mask)
1495 return; 1505 return;
1496 1506
1497 shost_for_each_device(sdev, port->adapter->scsi_host) 1507 spin_lock_irqsave(port->adapter->scsi_host->host_lock, flags);
1508 __shost_for_each_device(sdev, port->adapter->scsi_host)
1498 if (sdev_to_zfcp(sdev)->port == port) 1509 if (sdev_to_zfcp(sdev)->port == port)
1499 atomic_set_mask(common_mask, 1510 atomic_set_mask(common_mask,
1500 &sdev_to_zfcp(sdev)->status); 1511 &sdev_to_zfcp(sdev)->status);
1512 spin_unlock_irqrestore(port->adapter->scsi_host->host_lock, flags);
1501} 1513}
1502 1514
1503/** 1515/**
@@ -1512,6 +1524,7 @@ void zfcp_erp_clear_port_status(struct zfcp_port *port, u32 mask)
1512 struct scsi_device *sdev; 1524 struct scsi_device *sdev;
1513 u32 common_mask = mask & ZFCP_COMMON_FLAGS; 1525 u32 common_mask = mask & ZFCP_COMMON_FLAGS;
1514 u32 clear_counter = mask & ZFCP_STATUS_COMMON_ERP_FAILED; 1526 u32 clear_counter = mask & ZFCP_STATUS_COMMON_ERP_FAILED;
1527 unsigned long flags;
1515 1528
1516 atomic_clear_mask(mask, &port->status); 1529 atomic_clear_mask(mask, &port->status);
1517 1530
@@ -1521,13 +1534,15 @@ void zfcp_erp_clear_port_status(struct zfcp_port *port, u32 mask)
1521 if (clear_counter) 1534 if (clear_counter)
1522 atomic_set(&port->erp_counter, 0); 1535 atomic_set(&port->erp_counter, 0);
1523 1536
1524 shost_for_each_device(sdev, port->adapter->scsi_host) 1537 spin_lock_irqsave(port->adapter->scsi_host->host_lock, flags);
1538 __shost_for_each_device(sdev, port->adapter->scsi_host)
1525 if (sdev_to_zfcp(sdev)->port == port) { 1539 if (sdev_to_zfcp(sdev)->port == port) {
1526 atomic_clear_mask(common_mask, 1540 atomic_clear_mask(common_mask,
1527 &sdev_to_zfcp(sdev)->status); 1541 &sdev_to_zfcp(sdev)->status);
1528 if (clear_counter) 1542 if (clear_counter)
1529 atomic_set(&sdev_to_zfcp(sdev)->erp_counter, 0); 1543 atomic_set(&sdev_to_zfcp(sdev)->erp_counter, 0);
1530 } 1544 }
1545 spin_unlock_irqrestore(port->adapter->scsi_host->host_lock, flags);
1531} 1546}
1532 1547
1533/** 1548/**