aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/md
diff options
context:
space:
mode:
authorKiyoshi Ueda <k-ueda@ct.jp.nec.com>2010-08-11 23:13:56 -0400
committerAlasdair G Kergon <agk@redhat.com>2010-08-11 23:13:56 -0400
commit3f77316de0ec0fd208467fbee8d9edc70e2c73b2 (patch)
tree73a91aa53eefe9537432a344ebf116cc7d106e51 /drivers/md
parent98f332855effef02aeb738e4d62e9a5b903c52fd (diff)
dm: separate device deletion from dm_put
This patch separates the device deletion code from dm_put() to make sure the deletion happens in the process context. By this patch, device deletion always occurs in an ioctl (process) context and dm_put() can be called in interrupt context. As a result, the request-based dm's bad dm_put() usage pointed out by Mikulas below disappears. http://marc.info/?l=dm-devel&m=126699981019735&w=2 Without this patch, I confirmed there is a case to crash the system: dm_put() => dm_table_destroy() => vfree() => BUG_ON(in_interrupt()) Some more backgrounds and details: In request-based dm, a device opener can remove a mapped_device while the last request is still completing, because bios in the last request complete first and then the device opener can close and remove the mapped_device before the last request completes: CPU0 CPU1 ================================================================= <<INTERRUPT>> blk_end_request_all(clone_rq) blk_update_request(clone_rq) bio_endio(clone_bio) == end_clone_bio blk_update_request(orig_rq) bio_endio(orig_bio) <<I/O completed>> dm_blk_close() dev_remove() dm_put(md) <<Free md>> blk_finish_request(clone_rq) .... dm_end_request(clone_rq) free_rq_clone(clone_rq) blk_end_request_all(orig_rq) rq_completed(md) So request-based dm used dm_get()/dm_put() to hold md for each I/O until its request completion handling is fully done. However, the final dm_put() can call the device deletion code which must not be run in interrupt context and may cause kernel panic. To solve the problem, this patch moves the device deletion code, dm_destroy(), to predetermined places that is actually deleting the mapped_device in ioctl (process) context, and changes dm_put() just to decrement the reference count of the mapped_device. By this change, dm_put() can be used in any context and the symmetric model below is introduced: dm_create(): create a mapped_device dm_destroy(): destroy a mapped_device dm_get(): increment the reference count of a mapped_device dm_put(): decrement the reference count of a mapped_device dm_destroy() waits for all references of the mapped_device to disappear, then deletes the mapped_device. dm_destroy() uses active waiting with msleep(1), since deleting the mapped_device isn't performance-critical task. And since at this point, nobody opens the mapped_device and no new reference will be taken, the pending counts are just for racing completing activity and will eventually decrease to zero. For the unlikely case of the forced module unload, dm_destroy_immediate(), which doesn't wait and forcibly deletes the mapped_device, is also introduced and used in dm_hash_remove_all(). Otherwise, "rmmod -f" may be stuck and never return. And now, because the mapped_device is deleted at this point, subsequent accesses to the mapped_device may cause NULL pointer references. Cc: stable@kernel.org Signed-off-by: Kiyoshi Ueda <k-ueda@ct.jp.nec.com> Signed-off-by: Jun'ichi Nomura <j-nomura@ce.jp.nec.com> Signed-off-by: Alasdair G Kergon <agk@redhat.com>
Diffstat (limited to 'drivers/md')
-rw-r--r--drivers/md/dm-ioctl.c15
-rw-r--r--drivers/md/dm.c62
-rw-r--r--drivers/md/dm.h5
3 files changed, 62 insertions, 20 deletions
diff --git a/drivers/md/dm-ioctl.c b/drivers/md/dm-ioctl.c
index 6a6d475f8e8..feb64d65fbf 100644
--- a/drivers/md/dm-ioctl.c
+++ b/drivers/md/dm-ioctl.c
@@ -274,6 +274,10 @@ retry:
274 up_write(&_hash_lock); 274 up_write(&_hash_lock);
275 275
276 dm_put(md); 276 dm_put(md);
277 if (likely(keep_open_devices))
278 dm_destroy(md);
279 else
280 dm_destroy_immediate(md);
277 281
278 /* 282 /*
279 * Some mapped devices may be using other mapped 283 * Some mapped devices may be using other mapped
@@ -644,17 +648,19 @@ static int dev_create(struct dm_ioctl *param, size_t param_size)
644 return r; 648 return r;
645 649
646 r = dm_hash_insert(param->name, *param->uuid ? param->uuid : NULL, md); 650 r = dm_hash_insert(param->name, *param->uuid ? param->uuid : NULL, md);
647 if (r) 651 if (r) {
648 goto out; 652 dm_put(md);
653 dm_destroy(md);
654 return r;
655 }
649 656
650 param->flags &= ~DM_INACTIVE_PRESENT_FLAG; 657 param->flags &= ~DM_INACTIVE_PRESENT_FLAG;
651 658
652 __dev_status(md, param); 659 __dev_status(md, param);
653 660
654out:
655 dm_put(md); 661 dm_put(md);
656 662
657 return r; 663 return 0;
658} 664}
659 665
660/* 666/*
@@ -748,6 +754,7 @@ static int dev_remove(struct dm_ioctl *param, size_t param_size)
748 param->flags |= DM_UEVENT_GENERATED_FLAG; 754 param->flags |= DM_UEVENT_GENERATED_FLAG;
749 755
750 dm_put(md); 756 dm_put(md);
757 dm_destroy(md);
751 return 0; 758 return 0;
752} 759}
753 760
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index ba6934c3e2c..a503b95ecbf 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -20,6 +20,7 @@
20#include <linux/slab.h> 20#include <linux/slab.h>
21#include <linux/idr.h> 21#include <linux/idr.h>
22#include <linux/hdreg.h> 22#include <linux/hdreg.h>
23#include <linux/delay.h>
23 24
24#include <trace/events/block.h> 25#include <trace/events/block.h>
25 26
@@ -2171,6 +2172,7 @@ void dm_set_mdptr(struct mapped_device *md, void *ptr)
2171void dm_get(struct mapped_device *md) 2172void dm_get(struct mapped_device *md)
2172{ 2173{
2173 atomic_inc(&md->holders); 2174 atomic_inc(&md->holders);
2175 BUG_ON(test_bit(DMF_FREEING, &md->flags));
2174} 2176}
2175 2177
2176const char *dm_device_name(struct mapped_device *md) 2178const char *dm_device_name(struct mapped_device *md)
@@ -2179,27 +2181,55 @@ const char *dm_device_name(struct mapped_device *md)
2179} 2181}
2180EXPORT_SYMBOL_GPL(dm_device_name); 2182EXPORT_SYMBOL_GPL(dm_device_name);
2181 2183
2182void dm_put(struct mapped_device *md) 2184static void __dm_destroy(struct mapped_device *md, bool wait)
2183{ 2185{
2184 struct dm_table *map; 2186 struct dm_table *map;
2185 2187
2186 BUG_ON(test_bit(DMF_FREEING, &md->flags)); 2188 might_sleep();
2187 2189
2188 if (atomic_dec_and_lock(&md->holders, &_minor_lock)) { 2190 spin_lock(&_minor_lock);
2189 map = dm_get_live_table(md); 2191 map = dm_get_live_table(md);
2190 idr_replace(&_minor_idr, MINOR_ALLOCED, 2192 idr_replace(&_minor_idr, MINOR_ALLOCED, MINOR(disk_devt(dm_disk(md))));
2191 MINOR(disk_devt(dm_disk(md)))); 2193 set_bit(DMF_FREEING, &md->flags);
2192 set_bit(DMF_FREEING, &md->flags); 2194 spin_unlock(&_minor_lock);
2193 spin_unlock(&_minor_lock); 2195
2194 if (!dm_suspended_md(md)) { 2196 if (!dm_suspended_md(md)) {
2195 dm_table_presuspend_targets(map); 2197 dm_table_presuspend_targets(map);
2196 dm_table_postsuspend_targets(map); 2198 dm_table_postsuspend_targets(map);
2197 }
2198 dm_sysfs_exit(md);
2199 dm_table_put(map);
2200 dm_table_destroy(__unbind(md));
2201 free_dev(md);
2202 } 2199 }
2200
2201 /*
2202 * Rare, but there may be I/O requests still going to complete,
2203 * for example. Wait for all references to disappear.
2204 * No one should increment the reference count of the mapped_device,
2205 * after the mapped_device state becomes DMF_FREEING.
2206 */
2207 if (wait)
2208 while (atomic_read(&md->holders))
2209 msleep(1);
2210 else if (atomic_read(&md->holders))
2211 DMWARN("%s: Forcibly removing mapped_device still in use! (%d users)",
2212 dm_device_name(md), atomic_read(&md->holders));
2213
2214 dm_sysfs_exit(md);
2215 dm_table_put(map);
2216 dm_table_destroy(__unbind(md));
2217 free_dev(md);
2218}
2219
2220void dm_destroy(struct mapped_device *md)
2221{
2222 __dm_destroy(md, true);
2223}
2224
2225void dm_destroy_immediate(struct mapped_device *md)
2226{
2227 __dm_destroy(md, false);
2228}
2229
2230void dm_put(struct mapped_device *md)
2231{
2232 atomic_dec(&md->holders);
2203} 2233}
2204EXPORT_SYMBOL_GPL(dm_put); 2234EXPORT_SYMBOL_GPL(dm_put);
2205 2235
diff --git a/drivers/md/dm.h b/drivers/md/dm.h
index bad1724d486..8223671e490 100644
--- a/drivers/md/dm.h
+++ b/drivers/md/dm.h
@@ -122,6 +122,11 @@ void dm_linear_exit(void);
122int dm_stripe_init(void); 122int dm_stripe_init(void);
123void dm_stripe_exit(void); 123void dm_stripe_exit(void);
124 124
125/*
126 * mapped_device operations
127 */
128void dm_destroy(struct mapped_device *md);
129void dm_destroy_immediate(struct mapped_device *md);
125int dm_open_count(struct mapped_device *md); 130int dm_open_count(struct mapped_device *md);
126int dm_lock_for_deletion(struct mapped_device *md); 131int dm_lock_for_deletion(struct mapped_device *md);
127 132