diff options
author | NeilBrown <neilb@suse.de> | 2007-01-26 03:57:11 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@woody.linux-foundation.org> | 2007-01-26 16:51:00 -0500 |
commit | 2a2275d630b982e5f90206f9bc497f6695a3ec5d (patch) | |
tree | 88cddee709b2107b74e5424810d4ffb6e3772382 | |
parent | a0ad13ef643a5829d63c456ab6143bbda60b44a9 (diff) |
[PATCH] md: fix potential memalloc deadlock in md
If a GFP_KERNEL allocation is attempted in md while the mddev_lock is held,
it is possible for a deadlock to eventuate.
This happens if the array was marked 'clean', and the memalloc triggers a
write-out to the md device.
For the writeout to succeed, the array must be marked 'dirty', and that
requires getting the mddev_lock.
So, before attempting a GFP_KERNEL allocation while holding the lock, make
sure the array is marked 'dirty' (unless it is currently read-only).
Signed-off-by: Neil Brown <neilb@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r-- | drivers/md/md.c | 29 | ||||
-rw-r--r-- | drivers/md/raid1.c | 2 | ||||
-rw-r--r-- | drivers/md/raid5.c | 3 | ||||
-rw-r--r-- | include/linux/raid/md.h | 2 |
4 files changed, 35 insertions, 1 deletions
diff --git a/drivers/md/md.c b/drivers/md/md.c index ec3d8e8a0bd3..e8807ea5377d 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c | |||
@@ -3564,6 +3564,8 @@ static int get_bitmap_file(mddev_t * mddev, void __user * arg) | |||
3564 | char *ptr, *buf = NULL; | 3564 | char *ptr, *buf = NULL; |
3565 | int err = -ENOMEM; | 3565 | int err = -ENOMEM; |
3566 | 3566 | ||
3567 | md_allow_write(mddev); | ||
3568 | |||
3567 | file = kmalloc(sizeof(*file), GFP_KERNEL); | 3569 | file = kmalloc(sizeof(*file), GFP_KERNEL); |
3568 | if (!file) | 3570 | if (!file) |
3569 | goto out; | 3571 | goto out; |
@@ -5032,6 +5034,33 @@ void md_write_end(mddev_t *mddev) | |||
5032 | } | 5034 | } |
5033 | } | 5035 | } |
5034 | 5036 | ||
5037 | /* md_allow_write(mddev) | ||
5038 | * Calling this ensures that the array is marked 'active' so that writes | ||
5039 | * may proceed without blocking. It is important to call this before | ||
5040 | * attempting a GFP_KERNEL allocation while holding the mddev lock. | ||
5041 | * Must be called with mddev_lock held. | ||
5042 | */ | ||
5043 | void md_allow_write(mddev_t *mddev) | ||
5044 | { | ||
5045 | if (!mddev->pers) | ||
5046 | return; | ||
5047 | if (mddev->ro) | ||
5048 | return; | ||
5049 | |||
5050 | spin_lock_irq(&mddev->write_lock); | ||
5051 | if (mddev->in_sync) { | ||
5052 | mddev->in_sync = 0; | ||
5053 | set_bit(MD_CHANGE_CLEAN, &mddev->flags); | ||
5054 | if (mddev->safemode_delay && | ||
5055 | mddev->safemode == 0) | ||
5056 | mddev->safemode = 1; | ||
5057 | spin_unlock_irq(&mddev->write_lock); | ||
5058 | md_update_sb(mddev, 0); | ||
5059 | } else | ||
5060 | spin_unlock_irq(&mddev->write_lock); | ||
5061 | } | ||
5062 | EXPORT_SYMBOL_GPL(md_allow_write); | ||
5063 | |||
5035 | static DECLARE_WAIT_QUEUE_HEAD(resync_wait); | 5064 | static DECLARE_WAIT_QUEUE_HEAD(resync_wait); |
5036 | 5065 | ||
5037 | #define SYNC_MARKS 10 | 5066 | #define SYNC_MARKS 10 |
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index ab74d40cac98..97ee870b265d 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c | |||
@@ -2104,6 +2104,8 @@ static int raid1_reshape(mddev_t *mddev) | |||
2104 | return -EINVAL; | 2104 | return -EINVAL; |
2105 | } | 2105 | } |
2106 | 2106 | ||
2107 | md_allow_write(mddev); | ||
2108 | |||
2107 | raid_disks = mddev->raid_disks + mddev->delta_disks; | 2109 | raid_disks = mddev->raid_disks + mddev->delta_disks; |
2108 | 2110 | ||
2109 | if (raid_disks < conf->raid_disks) { | 2111 | if (raid_disks < conf->raid_disks) { |
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index be008f034ada..8a30b297ac3a 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c | |||
@@ -405,6 +405,8 @@ static int resize_stripes(raid5_conf_t *conf, int newsize) | |||
405 | if (newsize <= conf->pool_size) | 405 | if (newsize <= conf->pool_size) |
406 | return 0; /* never bother to shrink */ | 406 | return 0; /* never bother to shrink */ |
407 | 407 | ||
408 | md_allow_write(conf->mddev); | ||
409 | |||
408 | /* Step 1 */ | 410 | /* Step 1 */ |
409 | sc = kmem_cache_create(conf->cache_name[1-conf->active_name], | 411 | sc = kmem_cache_create(conf->cache_name[1-conf->active_name], |
410 | sizeof(struct stripe_head)+(newsize-1)*sizeof(struct r5dev), | 412 | sizeof(struct stripe_head)+(newsize-1)*sizeof(struct r5dev), |
@@ -3250,6 +3252,7 @@ raid5_store_stripe_cache_size(mddev_t *mddev, const char *page, size_t len) | |||
3250 | else | 3252 | else |
3251 | break; | 3253 | break; |
3252 | } | 3254 | } |
3255 | md_allow_write(mddev); | ||
3253 | while (new > conf->max_nr_stripes) { | 3256 | while (new > conf->max_nr_stripes) { |
3254 | if (grow_one_stripe(conf)) | 3257 | if (grow_one_stripe(conf)) |
3255 | conf->max_nr_stripes++; | 3258 | conf->max_nr_stripes++; |
diff --git a/include/linux/raid/md.h b/include/linux/raid/md.h index 866a1e2b0ce0..fbaeda79b2e9 100644 --- a/include/linux/raid/md.h +++ b/include/linux/raid/md.h | |||
@@ -94,7 +94,7 @@ extern int sync_page_io(struct block_device *bdev, sector_t sector, int size, | |||
94 | struct page *page, int rw); | 94 | struct page *page, int rw); |
95 | extern void md_do_sync(mddev_t *mddev); | 95 | extern void md_do_sync(mddev_t *mddev); |
96 | extern void md_new_event(mddev_t *mddev); | 96 | extern void md_new_event(mddev_t *mddev); |
97 | 97 | extern void md_allow_write(mddev_t *mddev); | |
98 | 98 | ||
99 | #endif /* CONFIG_MD */ | 99 | #endif /* CONFIG_MD */ |
100 | #endif | 100 | #endif |