aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorStefan Haberland <sth@linux.vnet.ibm.com>2017-05-16 04:30:13 -0400
committerMartin Schwidefsky <schwidefsky@de.ibm.com>2017-06-12 10:26:01 -0400
commit2757fe1d8ebd0e6ab1dbf1105978b8c8369dcc49 (patch)
tree581299b3b20bcea725808b7a30ed0138df0fb25d
parentb487a914f853545842a0899329b6b72fe56c4081 (diff)
s390/dasd: fix unusable device after safe offline processing
The safe offline processing needs, as well as the normal offline processing, to be locked against multiple parallel executions. But it should be able to be overtaken by a normal offline processing to make sure that the device does not wait forever for outstanding I/O if the user wants to. Unfortunately the parallel processing of safe offline and normal offline might lead to a race situation where both threads report successful execution to the CIO layer which in turn tries to deregister the kobject of the device twice. This leads to a refcount_t: underflow; use-after-free. error and the device is not able to be set online again afterwards without a reboot. Correct the locking of the safe offline processing by doing the following: - Use the cdev lock to secure all set and test operations to the device flags. - Two safe offline processes are locked against each other using the DASD_FLAG_SAFE_OFFLINE and DASD_FLAG_SAFE_OFFLINE_RUNNING device flags. The differentiation between offline triggered and offline running is needed since the normal offline attribute is owned by CIO and we have to pass over control in between. - The dasd_generic_set_offline process handles the offline processing. It is locked against parallel execution using the DASD_FLAG_OFFLINE. - Only a running safe offline should be able to be overtaken by a single normal offline. This is ensured by clearing the DASD_FLAG_SAFE_OFFLINE_RUNNING flag when a normal offline overtakes. So this can only happen ones. - The safe offline just aborts in this case doing nothing and the normal offline processing finishes as usual. Signed-off-by: Stefan Haberland <sth@linux.vnet.ibm.com> Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
-rw-r--r--drivers/s390/block/dasd.c68
-rw-r--r--drivers/s390/block/dasd_devmap.c7
2 files changed, 44 insertions, 31 deletions
diff --git a/drivers/s390/block/dasd.c b/drivers/s390/block/dasd.c
index 6fb3fd5efc11..b0c65dcb6865 100644
--- a/drivers/s390/block/dasd.c
+++ b/drivers/s390/block/dasd.c
@@ -3562,57 +3562,69 @@ int dasd_generic_set_offline(struct ccw_device *cdev)
3562 else 3562 else
3563 pr_warn("%s: The DASD cannot be set offline while it is in use\n", 3563 pr_warn("%s: The DASD cannot be set offline while it is in use\n",
3564 dev_name(&cdev->dev)); 3564 dev_name(&cdev->dev));
3565 clear_bit(DASD_FLAG_OFFLINE, &device->flags); 3565 rc = -EBUSY;
3566 goto out_busy; 3566 goto out_err;
3567 } 3567 }
3568 } 3568 }
3569 3569
3570 if (test_bit(DASD_FLAG_SAFE_OFFLINE_RUNNING, &device->flags)) { 3570 /*
3571 /* 3571 * Test if the offline processing is already running and exit if so.
3572 * safe offline already running 3572 * If a safe offline is being processed this could only be a normal
3573 * could only be called by normal offline so safe_offline flag 3573 * offline that should be able to overtake the safe offline and
3574 * needs to be removed to run normal offline and kill all I/O 3574 * cancel any I/O we do not want to wait for any longer
3575 */ 3575 */
3576 if (test_and_set_bit(DASD_FLAG_OFFLINE, &device->flags)) 3576 if (test_bit(DASD_FLAG_OFFLINE, &device->flags)) {
3577 /* Already doing normal offline processing */ 3577 if (test_bit(DASD_FLAG_SAFE_OFFLINE_RUNNING, &device->flags)) {
3578 goto out_busy; 3578 clear_bit(DASD_FLAG_SAFE_OFFLINE_RUNNING,
3579 else 3579 &device->flags);
3580 clear_bit(DASD_FLAG_SAFE_OFFLINE, &device->flags); 3580 } else {
3581 } else { 3581 rc = -EBUSY;
3582 if (test_bit(DASD_FLAG_OFFLINE, &device->flags)) 3582 goto out_err;
3583 /* Already doing offline processing */ 3583 }
3584 goto out_busy;
3585 } 3584 }
3586
3587 set_bit(DASD_FLAG_OFFLINE, &device->flags); 3585 set_bit(DASD_FLAG_OFFLINE, &device->flags);
3588 spin_unlock_irqrestore(get_ccwdev_lock(cdev), flags);
3589 3586
3590 /* 3587 /*
3591 * if safe_offline called set safe_offline_running flag and 3588 * if safe_offline is called set safe_offline_running flag and
3592 * clear safe_offline so that a call to normal offline 3589 * clear safe_offline so that a call to normal offline
3593 * can overrun safe_offline processing 3590 * can overrun safe_offline processing
3594 */ 3591 */
3595 if (test_and_clear_bit(DASD_FLAG_SAFE_OFFLINE, &device->flags) && 3592 if (test_and_clear_bit(DASD_FLAG_SAFE_OFFLINE, &device->flags) &&
3596 !test_and_set_bit(DASD_FLAG_SAFE_OFFLINE_RUNNING, &device->flags)) { 3593 !test_and_set_bit(DASD_FLAG_SAFE_OFFLINE_RUNNING, &device->flags)) {
3594 /* need to unlock here to wait for outstanding I/O */
3595 spin_unlock_irqrestore(get_ccwdev_lock(cdev), flags);
3597 /* 3596 /*
3598 * If we want to set the device safe offline all IO operations 3597 * If we want to set the device safe offline all IO operations
3599 * should be finished before continuing the offline process 3598 * should be finished before continuing the offline process
3600 * so sync bdev first and then wait for our queues to become 3599 * so sync bdev first and then wait for our queues to become
3601 * empty 3600 * empty
3602 */ 3601 */
3603 /* sync blockdev and partitions */
3604 if (device->block) { 3602 if (device->block) {
3605 rc = fsync_bdev(device->block->bdev); 3603 rc = fsync_bdev(device->block->bdev);
3606 if (rc != 0) 3604 if (rc != 0)
3607 goto interrupted; 3605 goto interrupted;
3608 } 3606 }
3609 /* schedule device tasklet and wait for completion */
3610 dasd_schedule_device_bh(device); 3607 dasd_schedule_device_bh(device);
3611 rc = wait_event_interruptible(shutdown_waitq, 3608 rc = wait_event_interruptible(shutdown_waitq,
3612 _wait_for_empty_queues(device)); 3609 _wait_for_empty_queues(device));
3613 if (rc != 0) 3610 if (rc != 0)
3614 goto interrupted; 3611 goto interrupted;
3612
3613 /*
3614 * check if a normal offline process overtook the offline
3615 * processing in this case simply do nothing beside returning
3616 * that we got interrupted
3617 * otherwise mark safe offline as not running any longer and
3618 * continue with normal offline
3619 */
3620 spin_lock_irqsave(get_ccwdev_lock(cdev), flags);
3621 if (!test_bit(DASD_FLAG_SAFE_OFFLINE_RUNNING, &device->flags)) {
3622 rc = -ERESTARTSYS;
3623 goto out_err;
3624 }
3625 clear_bit(DASD_FLAG_SAFE_OFFLINE_RUNNING, &device->flags);
3615 } 3626 }
3627 spin_unlock_irqrestore(get_ccwdev_lock(cdev), flags);
3616 3628
3617 dasd_set_target_state(device, DASD_STATE_NEW); 3629 dasd_set_target_state(device, DASD_STATE_NEW);
3618 /* dasd_delete_device destroys the device reference. */ 3630 /* dasd_delete_device destroys the device reference. */
@@ -3624,22 +3636,18 @@ int dasd_generic_set_offline(struct ccw_device *cdev)
3624 */ 3636 */
3625 if (block) 3637 if (block)
3626 dasd_free_block(block); 3638 dasd_free_block(block);
3639
3627 return 0; 3640 return 0;
3628 3641
3629interrupted: 3642interrupted:
3630 /* interrupted by signal */ 3643 /* interrupted by signal */
3631 clear_bit(DASD_FLAG_SAFE_OFFLINE, &device->flags); 3644 spin_lock_irqsave(get_ccwdev_lock(cdev), flags);
3632 clear_bit(DASD_FLAG_SAFE_OFFLINE_RUNNING, &device->flags); 3645 clear_bit(DASD_FLAG_SAFE_OFFLINE_RUNNING, &device->flags);
3633 clear_bit(DASD_FLAG_OFFLINE, &device->flags); 3646 clear_bit(DASD_FLAG_OFFLINE, &device->flags);
3634 dasd_put_device(device); 3647out_err:
3635
3636 return rc;
3637
3638out_busy:
3639 dasd_put_device(device); 3648 dasd_put_device(device);
3640 spin_unlock_irqrestore(get_ccwdev_lock(cdev), flags); 3649 spin_unlock_irqrestore(get_ccwdev_lock(cdev), flags);
3641 3650 return rc;
3642 return -EBUSY;
3643} 3651}
3644EXPORT_SYMBOL_GPL(dasd_generic_set_offline); 3652EXPORT_SYMBOL_GPL(dasd_generic_set_offline);
3645 3653
diff --git a/drivers/s390/block/dasd_devmap.c b/drivers/s390/block/dasd_devmap.c
index 0ce84f0a4d7f..e943d9c48926 100644
--- a/drivers/s390/block/dasd_devmap.c
+++ b/drivers/s390/block/dasd_devmap.c
@@ -950,11 +950,14 @@ dasd_safe_offline_store(struct device *dev, struct device_attribute *attr,
950{ 950{
951 struct ccw_device *cdev = to_ccwdev(dev); 951 struct ccw_device *cdev = to_ccwdev(dev);
952 struct dasd_device *device; 952 struct dasd_device *device;
953 unsigned long flags;
953 int rc; 954 int rc;
954 955
955 device = dasd_device_from_cdev(cdev); 956 spin_lock_irqsave(get_ccwdev_lock(cdev), flags);
957 device = dasd_device_from_cdev_locked(cdev);
956 if (IS_ERR(device)) { 958 if (IS_ERR(device)) {
957 rc = PTR_ERR(device); 959 rc = PTR_ERR(device);
960 spin_unlock_irqrestore(get_ccwdev_lock(cdev), flags);
958 goto out; 961 goto out;
959 } 962 }
960 963
@@ -962,12 +965,14 @@ dasd_safe_offline_store(struct device *dev, struct device_attribute *attr,
962 test_bit(DASD_FLAG_SAFE_OFFLINE_RUNNING, &device->flags)) { 965 test_bit(DASD_FLAG_SAFE_OFFLINE_RUNNING, &device->flags)) {
963 /* Already doing offline processing */ 966 /* Already doing offline processing */
964 dasd_put_device(device); 967 dasd_put_device(device);
968 spin_unlock_irqrestore(get_ccwdev_lock(cdev), flags);
965 rc = -EBUSY; 969 rc = -EBUSY;
966 goto out; 970 goto out;
967 } 971 }
968 972
969 set_bit(DASD_FLAG_SAFE_OFFLINE, &device->flags); 973 set_bit(DASD_FLAG_SAFE_OFFLINE, &device->flags);
970 dasd_put_device(device); 974 dasd_put_device(device);
975 spin_unlock_irqrestore(get_ccwdev_lock(cdev), flags);
971 976
972 rc = ccw_device_set_offline(cdev); 977 rc = ccw_device_set_offline(cdev);
973 978