aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMartin Peschke <mpeschke@linux.vnet.ibm.com>2013-08-22 11:45:37 -0400
committerJames Bottomley <JBottomley@Parallels.com>2013-08-22 12:24:02 -0400
commit924dd584b198a58aa7cb3efefd8a03326550ce8f (patch)
treebe09f6ede505e15b0fa42c5609c504c03080a03c
parentd79ff142624e1be080ad8d09101f7004d79c36e1 (diff)
[SCSI] zfcp: fix schedule-inside-lock in scsi_device list loops
BUG: sleeping function called from invalid context at kernel/workqueue.c:2752 in_atomic(): 1, irqs_disabled(): 1, pid: 360, name: zfcperp0.0.1700 CPU: 1 Not tainted 3.9.3+ #69 Process zfcperp0.0.1700 (pid: 360, task: 0000000075b7e080, ksp: 000000007476bc30) <snip> Call Trace: ([<00000000001165de>] show_trace+0x106/0x154) [<00000000001166a0>] show_stack+0x74/0xf4 [<00000000006ff646>] dump_stack+0xc6/0xd4 [<000000000017f3a0>] __might_sleep+0x128/0x148 [<000000000015ece8>] flush_work+0x54/0x1f8 [<00000000001630de>] __cancel_work_timer+0xc6/0x128 [<00000000005067ac>] scsi_device_dev_release_usercontext+0x164/0x23c [<0000000000161816>] execute_in_process_context+0x96/0xa8 [<00000000004d33d8>] device_release+0x60/0xc0 [<000000000048af48>] kobject_release+0xa8/0x1c4 [<00000000004f4bf2>] __scsi_iterate_devices+0xfa/0x130 [<000003ff801b307a>] zfcp_erp_strategy+0x4da/0x1014 [zfcp] [<000003ff801b3caa>] zfcp_erp_thread+0xf6/0x2b0 [zfcp] [<000000000016b75a>] kthread+0xf2/0xfc [<000000000070c9de>] kernel_thread_starter+0x6/0xc [<000000000070c9d8>] kernel_thread_starter+0x0/0xc Apparently, the ref_count for some scsi_device drops down to zero, triggering device removal through execute_in_process_context(), while the lldd error recovery thread iterates through a scsi device list. Unfortunately, execute_in_process_context() decides to immediately execute that device removal function, instead of scheduling asynchronous execution, since it detects process context and thinks it is safe to do so. But almost all calls to shost_for_each_device() in our lldd are inside spin_lock_irq, even in thread context. Obviously, schedule() inside spin_lock_irq sections is a bad idea. Change the lldd to use the proper iterator function, __shost_for_each_device(), in combination with required locking. Occurences that need to be changed include all calls in zfcp_erp.c, since those might be executed in zfcp error recovery thread context with a lock held. Other occurences of shost_for_each_device() in zfcp_fsf.c do not need to be changed (no process context, no surrounding locking). The problem was introduced in Linux 2.6.37 by commit b62a8d9b45b971a67a0f8413338c230e3117dff5 "[SCSI] zfcp: Use SCSI device data zfcp_scsi_dev instead of zfcp_unit". Reported-by: Christian Borntraeger <borntraeger@de.ibm.com> Signed-off-by: Martin Peschke <mpeschke@linux.vnet.ibm.com> Cc: stable@vger.kernel.org #2.6.37+ Signed-off-by: Steffen Maier <maier@linux.vnet.ibm.com> Signed-off-by: James Bottomley <JBottomley@Parallels.com>
-rw-r--r--drivers/s390/scsi/zfcp_erp.c29
1 files changed, 22 insertions, 7 deletions
diff --git a/drivers/s390/scsi/zfcp_erp.c b/drivers/s390/scsi/zfcp_erp.c
index 1d4c8fe72752..c82fe65c4128 100644
--- a/drivers/s390/scsi/zfcp_erp.c
+++ b/drivers/s390/scsi/zfcp_erp.c
@@ -102,10 +102,13 @@ static void zfcp_erp_action_dismiss_port(struct zfcp_port *port)
102 102
103 if (atomic_read(&port->status) & ZFCP_STATUS_COMMON_ERP_INUSE) 103 if (atomic_read(&port->status) & ZFCP_STATUS_COMMON_ERP_INUSE)
104 zfcp_erp_action_dismiss(&port->erp_action); 104 zfcp_erp_action_dismiss(&port->erp_action);
105 else 105 else {
106 shost_for_each_device(sdev, port->adapter->scsi_host) 106 spin_lock(port->adapter->scsi_host->host_lock);
107 __shost_for_each_device(sdev, port->adapter->scsi_host)
107 if (sdev_to_zfcp(sdev)->port == port) 108 if (sdev_to_zfcp(sdev)->port == port)
108 zfcp_erp_action_dismiss_lun(sdev); 109 zfcp_erp_action_dismiss_lun(sdev);
110 spin_unlock(port->adapter->scsi_host->host_lock);
111 }
109} 112}
110 113
111static void zfcp_erp_action_dismiss_adapter(struct zfcp_adapter *adapter) 114static void zfcp_erp_action_dismiss_adapter(struct zfcp_adapter *adapter)
@@ -592,9 +595,11 @@ static void _zfcp_erp_lun_reopen_all(struct zfcp_port *port, int clear,
592{ 595{
593 struct scsi_device *sdev; 596 struct scsi_device *sdev;
594 597
595 shost_for_each_device(sdev, port->adapter->scsi_host) 598 spin_lock(port->adapter->scsi_host->host_lock);
599 __shost_for_each_device(sdev, port->adapter->scsi_host)
596 if (sdev_to_zfcp(sdev)->port == port) 600 if (sdev_to_zfcp(sdev)->port == port)
597 _zfcp_erp_lun_reopen(sdev, clear, id, 0); 601 _zfcp_erp_lun_reopen(sdev, clear, id, 0);
602 spin_unlock(port->adapter->scsi_host->host_lock);
598} 603}
599 604
600static void zfcp_erp_strategy_followup_failed(struct zfcp_erp_action *act) 605static void zfcp_erp_strategy_followup_failed(struct zfcp_erp_action *act)
@@ -1434,8 +1439,10 @@ void zfcp_erp_set_adapter_status(struct zfcp_adapter *adapter, u32 mask)
1434 atomic_set_mask(common_mask, &port->status); 1439 atomic_set_mask(common_mask, &port->status);
1435 read_unlock_irqrestore(&adapter->port_list_lock, flags); 1440 read_unlock_irqrestore(&adapter->port_list_lock, flags);
1436 1441
1437 shost_for_each_device(sdev, adapter->scsi_host) 1442 spin_lock_irqsave(adapter->scsi_host->host_lock, flags);
1443 __shost_for_each_device(sdev, adapter->scsi_host)
1438 atomic_set_mask(common_mask, &sdev_to_zfcp(sdev)->status); 1444 atomic_set_mask(common_mask, &sdev_to_zfcp(sdev)->status);
1445 spin_unlock_irqrestore(adapter->scsi_host->host_lock, flags);
1439} 1446}
1440 1447
1441/** 1448/**
@@ -1469,11 +1476,13 @@ void zfcp_erp_clear_adapter_status(struct zfcp_adapter *adapter, u32 mask)
1469 } 1476 }
1470 read_unlock_irqrestore(&adapter->port_list_lock, flags); 1477 read_unlock_irqrestore(&adapter->port_list_lock, flags);
1471 1478
1472 shost_for_each_device(sdev, adapter->scsi_host) { 1479 spin_lock_irqsave(adapter->scsi_host->host_lock, flags);
1480 __shost_for_each_device(sdev, adapter->scsi_host) {
1473 atomic_clear_mask(common_mask, &sdev_to_zfcp(sdev)->status); 1481 atomic_clear_mask(common_mask, &sdev_to_zfcp(sdev)->status);
1474 if (clear_counter) 1482 if (clear_counter)
1475 atomic_set(&sdev_to_zfcp(sdev)->erp_counter, 0); 1483 atomic_set(&sdev_to_zfcp(sdev)->erp_counter, 0);
1476 } 1484 }
1485 spin_unlock_irqrestore(adapter->scsi_host->host_lock, flags);
1477} 1486}
1478 1487
1479/** 1488/**
@@ -1487,16 +1496,19 @@ void zfcp_erp_set_port_status(struct zfcp_port *port, u32 mask)
1487{ 1496{
1488 struct scsi_device *sdev; 1497 struct scsi_device *sdev;
1489 u32 common_mask = mask & ZFCP_COMMON_FLAGS; 1498 u32 common_mask = mask & ZFCP_COMMON_FLAGS;
1499 unsigned long flags;
1490 1500
1491 atomic_set_mask(mask, &port->status); 1501 atomic_set_mask(mask, &port->status);
1492 1502
1493 if (!common_mask) 1503 if (!common_mask)
1494 return; 1504 return;
1495 1505
1496 shost_for_each_device(sdev, port->adapter->scsi_host) 1506 spin_lock_irqsave(port->adapter->scsi_host->host_lock, flags);
1507 __shost_for_each_device(sdev, port->adapter->scsi_host)
1497 if (sdev_to_zfcp(sdev)->port == port) 1508 if (sdev_to_zfcp(sdev)->port == port)
1498 atomic_set_mask(common_mask, 1509 atomic_set_mask(common_mask,
1499 &sdev_to_zfcp(sdev)->status); 1510 &sdev_to_zfcp(sdev)->status);
1511 spin_unlock_irqrestore(port->adapter->scsi_host->host_lock, flags);
1500} 1512}
1501 1513
1502/** 1514/**
@@ -1511,6 +1523,7 @@ void zfcp_erp_clear_port_status(struct zfcp_port *port, u32 mask)
1511 struct scsi_device *sdev; 1523 struct scsi_device *sdev;
1512 u32 common_mask = mask & ZFCP_COMMON_FLAGS; 1524 u32 common_mask = mask & ZFCP_COMMON_FLAGS;
1513 u32 clear_counter = mask & ZFCP_STATUS_COMMON_ERP_FAILED; 1525 u32 clear_counter = mask & ZFCP_STATUS_COMMON_ERP_FAILED;
1526 unsigned long flags;
1514 1527
1515 atomic_clear_mask(mask, &port->status); 1528 atomic_clear_mask(mask, &port->status);
1516 1529
@@ -1520,13 +1533,15 @@ void zfcp_erp_clear_port_status(struct zfcp_port *port, u32 mask)
1520 if (clear_counter) 1533 if (clear_counter)
1521 atomic_set(&port->erp_counter, 0); 1534 atomic_set(&port->erp_counter, 0);
1522 1535
1523 shost_for_each_device(sdev, port->adapter->scsi_host) 1536 spin_lock_irqsave(port->adapter->scsi_host->host_lock, flags);
1537 __shost_for_each_device(sdev, port->adapter->scsi_host)
1524 if (sdev_to_zfcp(sdev)->port == port) { 1538 if (sdev_to_zfcp(sdev)->port == port) {
1525 atomic_clear_mask(common_mask, 1539 atomic_clear_mask(common_mask,
1526 &sdev_to_zfcp(sdev)->status); 1540 &sdev_to_zfcp(sdev)->status);
1527 if (clear_counter) 1541 if (clear_counter)
1528 atomic_set(&sdev_to_zfcp(sdev)->erp_counter, 0); 1542 atomic_set(&sdev_to_zfcp(sdev)->erp_counter, 0);
1529 } 1543 }
1544 spin_unlock_irqrestore(port->adapter->scsi_host->host_lock, flags);
1530} 1545}
1531 1546
1532/** 1547/**