aboutsummaryrefslogtreecommitdiffstats
path: root/drivers
diff options
context:
space:
mode:
authorMikulas Patocka <mpatocka@redhat.com>2014-01-13 19:37:54 -0500
committerMike Snitzer <snitzer@redhat.com>2014-01-14 23:23:04 -0500
commit2995fa78e423d7193f3b57835f6c1c75006a0315 (patch)
tree9f107c0503a3c0ba58559a62847e9236f3fd1951 /drivers
parent55b082e614e219fb5199a6f93e648ed35d3c96d5 (diff)
dm sysfs: fix a module unload race
This reverts commit be35f48610 ("dm: wait until embedded kobject is released before destroying a device") and provides an improved fix. The kobject release code that calls the completion must be placed in a non-module file, otherwise there is a module unload race (if the process calling dm_kobject_release is preempted and the DM module unloaded after the completion is triggered, but before dm_kobject_release returns). To fix this race, this patch moves the completion code to dm-builtin.c which is always compiled directly into the kernel if BLK_DEV_DM is selected. The patch introduces a new dm_kobject_holder structure, its purpose is to keep the completion and kobject in one place, so that it can be accessed from non-module code without the need to export the layout of struct mapped_device to that code. Signed-off-by: Mikulas Patocka <mpatocka@redhat.com> Signed-off-by: Mike Snitzer <snitzer@redhat.com> Cc: stable@vger.kernel.org
Diffstat (limited to 'drivers')
-rw-r--r--drivers/md/Kconfig4
-rw-r--r--drivers/md/Makefile1
-rw-r--r--drivers/md/dm-builtin.c48
-rw-r--r--drivers/md/dm-sysfs.c5
-rw-r--r--drivers/md/dm.c20
-rw-r--r--drivers/md/dm.h17
6 files changed, 74 insertions, 21 deletions
diff --git a/drivers/md/Kconfig b/drivers/md/Kconfig
index 39b540a13369..9a06fe883766 100644
--- a/drivers/md/Kconfig
+++ b/drivers/md/Kconfig
@@ -176,8 +176,12 @@ config MD_FAULTY
176 176
177source "drivers/md/bcache/Kconfig" 177source "drivers/md/bcache/Kconfig"
178 178
179config BLK_DEV_DM_BUILTIN
180 boolean
181
179config BLK_DEV_DM 182config BLK_DEV_DM
180 tristate "Device mapper support" 183 tristate "Device mapper support"
184 select BLK_DEV_DM_BUILTIN
181 ---help--- 185 ---help---
182 Device-mapper is a low level volume manager. It works by allowing 186 Device-mapper is a low level volume manager. It works by allowing
183 people to specify mappings for ranges of logical sectors. Various 187 people to specify mappings for ranges of logical sectors. Various
diff --git a/drivers/md/Makefile b/drivers/md/Makefile
index 2acc43fe0229..f26d83292579 100644
--- a/drivers/md/Makefile
+++ b/drivers/md/Makefile
@@ -32,6 +32,7 @@ obj-$(CONFIG_MD_FAULTY) += faulty.o
32obj-$(CONFIG_BCACHE) += bcache/ 32obj-$(CONFIG_BCACHE) += bcache/
33obj-$(CONFIG_BLK_DEV_MD) += md-mod.o 33obj-$(CONFIG_BLK_DEV_MD) += md-mod.o
34obj-$(CONFIG_BLK_DEV_DM) += dm-mod.o 34obj-$(CONFIG_BLK_DEV_DM) += dm-mod.o
35obj-$(CONFIG_BLK_DEV_DM_BUILTIN) += dm-builtin.o
35obj-$(CONFIG_DM_BUFIO) += dm-bufio.o 36obj-$(CONFIG_DM_BUFIO) += dm-bufio.o
36obj-$(CONFIG_DM_BIO_PRISON) += dm-bio-prison.o 37obj-$(CONFIG_DM_BIO_PRISON) += dm-bio-prison.o
37obj-$(CONFIG_DM_CRYPT) += dm-crypt.o 38obj-$(CONFIG_DM_CRYPT) += dm-crypt.o
diff --git a/drivers/md/dm-builtin.c b/drivers/md/dm-builtin.c
new file mode 100644
index 000000000000..6c9049c51b2b
--- /dev/null
+++ b/drivers/md/dm-builtin.c
@@ -0,0 +1,48 @@
1#include "dm.h"
2
3/*
4 * The kobject release method must not be placed in the module itself,
5 * otherwise we are subject to module unload races.
6 *
7 * The release method is called when the last reference to the kobject is
8 * dropped. It may be called by any other kernel code that drops the last
9 * reference.
10 *
11 * The release method suffers from module unload race. We may prevent the
12 * module from being unloaded at the start of the release method (using
13 * increased module reference count or synchronizing against the release
14 * method), however there is no way to prevent the module from being
15 * unloaded at the end of the release method.
16 *
17 * If this code were placed in the dm module, the following race may
18 * happen:
19 * 1. Some other process takes a reference to dm kobject
20 * 2. The user issues ioctl function to unload the dm device
21 * 3. dm_sysfs_exit calls kobject_put, however the object is not released
22 * because of the other reference taken at step 1
23 * 4. dm_sysfs_exit waits on the completion
24 * 5. The other process that took the reference in step 1 drops it,
25 * dm_kobject_release is called from this process
26 * 6. dm_kobject_release calls complete()
27 * 7. a reschedule happens before dm_kobject_release returns
28 * 8. dm_sysfs_exit continues, the dm device is unloaded, module reference
29 * count is decremented
30 * 9. The user unloads the dm module
31 * 10. The other process that was rescheduled in step 7 continues to run,
32 * it is now executing code in unloaded module, so it crashes
33 *
34 * Note that if the process that takes the foreign reference to dm kobject
35 * has a low priority and the system is sufficiently loaded with
36 * higher-priority processes that prevent the low-priority process from
37 * being scheduled long enough, this bug may really happen.
38 *
39 * In order to fix this module unload race, we place the release method
40 * into a helper code that is compiled directly into the kernel.
41 */
42
43void dm_kobject_release(struct kobject *kobj)
44{
45 complete(dm_get_completion_from_kobject(kobj));
46}
47
48EXPORT_SYMBOL(dm_kobject_release);
diff --git a/drivers/md/dm-sysfs.c b/drivers/md/dm-sysfs.c
index e0cc5d6a9e46..c62c5ab6aed5 100644
--- a/drivers/md/dm-sysfs.c
+++ b/drivers/md/dm-sysfs.c
@@ -79,11 +79,6 @@ static const struct sysfs_ops dm_sysfs_ops = {
79 .show = dm_attr_show, 79 .show = dm_attr_show,
80}; 80};
81 81
82static void dm_kobject_release(struct kobject *kobj)
83{
84 complete(dm_get_completion_from_kobject(kobj));
85}
86
87/* 82/*
88 * dm kobject is embedded in mapped_device structure 83 * dm kobject is embedded in mapped_device structure
89 * no need to define release function here 84 * no need to define release function here
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index e290e72922a4..b49c76284241 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -200,11 +200,8 @@ struct mapped_device {
200 /* forced geometry settings */ 200 /* forced geometry settings */
201 struct hd_geometry geometry; 201 struct hd_geometry geometry;
202 202
203 /* sysfs handle */ 203 /* kobject and completion */
204 struct kobject kobj; 204 struct dm_kobject_holder kobj_holder;
205
206 /* wait until the kobject is released */
207 struct completion kobj_completion;
208 205
209 /* zero-length flush that will be cloned and submitted to targets */ 206 /* zero-length flush that will be cloned and submitted to targets */
210 struct bio flush_bio; 207 struct bio flush_bio;
@@ -2044,7 +2041,7 @@ static struct mapped_device *alloc_dev(int minor)
2044 init_waitqueue_head(&md->wait); 2041 init_waitqueue_head(&md->wait);
2045 INIT_WORK(&md->work, dm_wq_work); 2042 INIT_WORK(&md->work, dm_wq_work);
2046 init_waitqueue_head(&md->eventq); 2043 init_waitqueue_head(&md->eventq);
2047 init_completion(&md->kobj_completion); 2044 init_completion(&md->kobj_holder.completion);
2048 2045
2049 md->disk->major = _major; 2046 md->disk->major = _major;
2050 md->disk->first_minor = minor; 2047 md->disk->first_minor = minor;
@@ -2906,14 +2903,14 @@ struct gendisk *dm_disk(struct mapped_device *md)
2906 2903
2907struct kobject *dm_kobject(struct mapped_device *md) 2904struct kobject *dm_kobject(struct mapped_device *md)
2908{ 2905{
2909 return &md->kobj; 2906 return &md->kobj_holder.kobj;
2910} 2907}
2911 2908
2912struct mapped_device *dm_get_from_kobject(struct kobject *kobj) 2909struct mapped_device *dm_get_from_kobject(struct kobject *kobj)
2913{ 2910{
2914 struct mapped_device *md; 2911 struct mapped_device *md;
2915 2912
2916 md = container_of(kobj, struct mapped_device, kobj); 2913 md = container_of(kobj, struct mapped_device, kobj_holder.kobj);
2917 2914
2918 if (test_bit(DMF_FREEING, &md->flags) || 2915 if (test_bit(DMF_FREEING, &md->flags) ||
2919 dm_deleting_md(md)) 2916 dm_deleting_md(md))
@@ -2923,13 +2920,6 @@ struct mapped_device *dm_get_from_kobject(struct kobject *kobj)
2923 return md; 2920 return md;
2924} 2921}
2925 2922
2926struct completion *dm_get_completion_from_kobject(struct kobject *kobj)
2927{
2928 struct mapped_device *md = container_of(kobj, struct mapped_device, kobj);
2929
2930 return &md->kobj_completion;
2931}
2932
2933int dm_suspended_md(struct mapped_device *md) 2923int dm_suspended_md(struct mapped_device *md)
2934{ 2924{
2935 return test_bit(DMF_SUSPENDED, &md->flags); 2925 return test_bit(DMF_SUSPENDED, &md->flags);
diff --git a/drivers/md/dm.h b/drivers/md/dm.h
index 1ab2028559ca..c4569f02f50f 100644
--- a/drivers/md/dm.h
+++ b/drivers/md/dm.h
@@ -16,6 +16,7 @@
16#include <linux/blkdev.h> 16#include <linux/blkdev.h>
17#include <linux/hdreg.h> 17#include <linux/hdreg.h>
18#include <linux/completion.h> 18#include <linux/completion.h>
19#include <linux/kobject.h>
19 20
20#include "dm-stats.h" 21#include "dm-stats.h"
21 22
@@ -149,11 +150,25 @@ void dm_interface_exit(void);
149/* 150/*
150 * sysfs interface 151 * sysfs interface
151 */ 152 */
153struct dm_kobject_holder {
154 struct kobject kobj;
155 struct completion completion;
156};
157
158static inline struct completion *dm_get_completion_from_kobject(struct kobject *kobj)
159{
160 return &container_of(kobj, struct dm_kobject_holder, kobj)->completion;
161}
162
152int dm_sysfs_init(struct mapped_device *md); 163int dm_sysfs_init(struct mapped_device *md);
153void dm_sysfs_exit(struct mapped_device *md); 164void dm_sysfs_exit(struct mapped_device *md);
154struct kobject *dm_kobject(struct mapped_device *md); 165struct kobject *dm_kobject(struct mapped_device *md);
155struct mapped_device *dm_get_from_kobject(struct kobject *kobj); 166struct mapped_device *dm_get_from_kobject(struct kobject *kobj);
156struct completion *dm_get_completion_from_kobject(struct kobject *kobj); 167
168/*
169 * The kobject helper
170 */
171void dm_kobject_release(struct kobject *kobj);
157 172
158/* 173/*
159 * Targets for linear and striped mappings 174 * Targets for linear and striped mappings