md: avoid deadlock when dirty buffers during md_stop.

When the last process closes /dev/mdX sync_blockdev will be called so that all buffers get flushed. So if it is then opened for the STOP_ARRAY ioctl to be sent there will be nothing to flush. However if we open /dev/mdX in order to send the STOP_ARRAY ioctl just moments before some other process which was writing closes their file descriptor, then there won't be a 'last close' and the buffers might not get flushed. So do_md_stop() calls sync_blockdev(). However at this point it is holding ->reconfig_mutex. So if the array is currently 'clean' then the writes from sync_blockdev() will not complete until the array can be marked dirty and that won't happen until some other thread can get ->reconfig_mutex. So we deadlock. We need to move the sync_blockdev() call to before we take ->reconfig_mutex. However then some other thread could open /dev/mdX and write to it after we call sync_blockdev() and before we actually stop the array. This can leave dirty data in the page cache which is awkward. So introduce new flag MD_STILL_CLOSED. Set it before calling sync_blockdev(), clear it if anyone does open the file, and abort the STOP_ARRAY attempt if it gets set before we lock against further opens. It is still possible to get problems if you open /dev/mdX, write to it, then issue the STOP_ARRAY ioctl. Just don't do that. Signed-off-by: NeilBrown <neilb@suse.de>
author: NeilBrown <neilb@suse.de> 2013-08-27 02:44:13 -0400
committer: NeilBrown <neilb@suse.de> 2013-08-27 02:45:00 -0400
commit: 260fa034ef7a4ff8b73068b48ac497edd5217491 (patch)
tree: 6a328502545c8f155b34c08c72f7023166991bce /drivers/md/md.c
parent: 7a0a5355cbc71efa430c3730ffbd67ae04abfe31 (diff)
1 files changed, 30 insertions, 9 deletions
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 084a6540a4bd..adf4d7e1d5e1 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -5337,8 +5337,14 @@ static int md_set_readonly(struct mddev *mddev, struct block_device *bdev)
                err = -EBUSY;
                goto out;
        }
-        if (bdev)
+        if (bdev && !test_bit(MD_STILL_CLOSED, &mddev->flags)) {
-                sync_blockdev(bdev);
+                /* Someone opened the device since we flushed it
+                 * so page cache could be dirty and it is too late
+                 * to flush.  So abort
+                 */
+                mutex_unlock(&mddev->open_mutex);
+                return -EBUSY;
+        }
        if (mddev->pers) {
                __md_stop_writes(mddev);
@@ -5373,14 +5379,14 @@ static int do_md_stop(struct mddev * mddev, int mode,
                mutex_unlock(&mddev->open_mutex);
                return -EBUSY;
        }
-        if (bdev)
+        if (bdev && !test_bit(MD_STILL_CLOSED, &mddev->flags)) {
-                /* It is possible IO was issued on some other
+                /* Someone opened the device since we flushed it
-                 * open file which was closed before we took ->open_mutex.
+                 * so page cache could be dirty and it is too late
-                 * As that was not the last close __blkdev_put will not
+                 * to flush.  So abort
-                 * have called sync_blockdev, so we must.
                 */
-                sync_blockdev(bdev);
+                mutex_unlock(&mddev->open_mutex);
+                return -EBUSY;
+        }
        if (mddev->pers) {
                if (mddev->ro)
                        set_disk_ro(disk, 0);
@@ -6417,6 +6423,20 @@ static int md_ioctl(struct block_device *bdev, fmode_t mode,
                                                 !test_bit(MD_RECOVERY_NEEDED,
                                                           &mddev->flags),
                                                 msecs_to_jiffies(5000));
+        if (cmd == STOP_ARRAY || cmd == STOP_ARRAY_RO) {
+                /* Need to flush page cache, and ensure no-one else opens
+                 * and writes
+                 */
+                mutex_lock(&mddev->open_mutex);
+                if (atomic_read(&mddev->openers) > 1) {
+                        mutex_unlock(&mddev->open_mutex);
+                        err = -EBUSY;
+                        goto abort;
+                }
+                set_bit(MD_STILL_CLOSED, &mddev->flags);
+                mutex_unlock(&mddev->open_mutex);
+                sync_blockdev(bdev);
+        }
        err = mddev_lock(mddev);
        if (err) {
                printk(KERN_INFO 
@@ -6670,6 +6690,7 @@ static int md_open(struct block_device *bdev, fmode_t mode)
        err = 0;
        atomic_inc(&mddev->openers);
+        clear_bit(MD_STILL_CLOSED, &mddev->flags);
        mutex_unlock(&mddev->open_mutex);
        check_disk_change(bdev);
author	NeilBrown <neilb@suse.de>	2013-08-27 02:44:13 -0400
committer	NeilBrown <neilb@suse.de>	2013-08-27 02:45:00 -0400
commit	260fa034ef7a4ff8b73068b48ac497edd5217491 (patch)
tree	6a328502545c8f155b34c08c72f7023166991bce /drivers/md/md.c
parent	7a0a5355cbc71efa430c3730ffbd67ae04abfe31 (diff)

diff --git a/drivers/md/md.c b/drivers/md/md.c index 084a6540a4bd..adf4d7e1d5e1 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c
@@ -5337,8 +5337,14 @@ static int md_set_readonly(struct mddev mddev, struct block_device bdev)
5337	err = -EBUSY;	5337	err = -EBUSY;
5338	goto out;	5338	goto out;
5339	}	5339	}
5340	if (bdev)	5340	if (bdev && !test_bit(MD_STILL_CLOSED, &mddev->flags)) {
5341	sync_blockdev(bdev);	5341	/* Someone opened the device since we flushed it
		5342	* so page cache could be dirty and it is too late
		5343	* to flush. So abort
		5344	*/
		5345	mutex_unlock(&mddev->open_mutex);
		5346	return -EBUSY;
		5347	}
5342	if (mddev->pers) {	5348	if (mddev->pers) {
5343	__md_stop_writes(mddev);	5349	__md_stop_writes(mddev);
5344		5350
@@ -5373,14 +5379,14 @@ static int do_md_stop(struct mddev * mddev, int mode,
5373	mutex_unlock(&mddev->open_mutex);	5379	mutex_unlock(&mddev->open_mutex);
5374	return -EBUSY;	5380	return -EBUSY;
5375	}	5381	}
5376	if (bdev)	5382	if (bdev && !test_bit(MD_STILL_CLOSED, &mddev->flags)) {
5377	/* It is possible IO was issued on some other	5383	/* Someone opened the device since we flushed it
5378	* open file which was closed before we took ->open_mutex.	5384	* so page cache could be dirty and it is too late
5379	* As that was not the last close __blkdev_put will not	5385	* to flush. So abort
5380	* have called sync_blockdev, so we must.
5381	*/	5386	*/
5382	sync_blockdev(bdev);	5387	mutex_unlock(&mddev->open_mutex);
5383		5388	return -EBUSY;
		5389	}
5384	if (mddev->pers) {	5390	if (mddev->pers) {
5385	if (mddev->ro)	5391	if (mddev->ro)
5386	set_disk_ro(disk, 0);	5392	set_disk_ro(disk, 0);
@@ -6417,6 +6423,20 @@ static int md_ioctl(struct block_device *bdev, fmode_t mode,
6417	!test_bit(MD_RECOVERY_NEEDED,	6423	!test_bit(MD_RECOVERY_NEEDED,
6418	&mddev->flags),	6424	&mddev->flags),
6419	msecs_to_jiffies(5000));	6425	msecs_to_jiffies(5000));
		6426	if (cmd == STOP_ARRAY \|\| cmd == STOP_ARRAY_RO) {
		6427	/* Need to flush page cache, and ensure no-one else opens
		6428	* and writes
		6429	*/
		6430	mutex_lock(&mddev->open_mutex);
		6431	if (atomic_read(&mddev->openers) > 1) {
		6432	mutex_unlock(&mddev->open_mutex);
		6433	err = -EBUSY;
		6434	goto abort;
		6435	}
		6436	set_bit(MD_STILL_CLOSED, &mddev->flags);
		6437	mutex_unlock(&mddev->open_mutex);
		6438	sync_blockdev(bdev);
		6439	}
6420	err = mddev_lock(mddev);	6440	err = mddev_lock(mddev);
6421	if (err) {	6441	if (err) {
6422	printk(KERN_INFO	6442	printk(KERN_INFO
@@ -6670,6 +6690,7 @@ static int md_open(struct block_device *bdev, fmode_t mode)
6670		6690
6671	err = 0;	6691	err = 0;
6672	atomic_inc(&mddev->openers);	6692	atomic_inc(&mddev->openers);
		6693	clear_bit(MD_STILL_CLOSED, &mddev->flags);
6673	mutex_unlock(&mddev->open_mutex);	6694	mutex_unlock(&mddev->open_mutex);
6674		6695
6675	check_disk_change(bdev);	6696	check_disk_change(bdev);