diff options
author | Mikulas Patocka <mpatocka@redhat.com> | 2014-01-13 19:37:54 -0500 |
---|---|---|
committer | Greg Kroah-Hartman <gregkh@linuxfoundation.org> | 2014-02-13 16:48:02 -0500 |
commit | 4f66403630544a1e5ad3b041fe0dba9db16ff6d9 (patch) | |
tree | 8d04fbba988c181beb9241897161051118fb1456 | |
parent | b77d17778310cbe015bd1978a48b1e418b9370d1 (diff) |
dm sysfs: fix a module unload race
commit 2995fa78e423d7193f3b57835f6c1c75006a0315 upstream.
This reverts commit be35f48610 ("dm: wait until embedded kobject is
released before destroying a device") and provides an improved fix.
The kobject release code that calls the completion must be placed in a
non-module file, otherwise there is a module unload race (if the process
calling dm_kobject_release is preempted and the DM module unloaded after
the completion is triggered, but before dm_kobject_release returns).
To fix this race, this patch moves the completion code to dm-builtin.c
which is always compiled directly into the kernel if BLK_DEV_DM is
selected.
The patch introduces a new dm_kobject_holder structure, its purpose is
to keep the completion and kobject in one place, so that it can be
accessed from non-module code without the need to export the layout of
struct mapped_device to that code.
Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
Cc: stable@vger.kernel.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
-rw-r--r-- | drivers/md/Kconfig | 4 | ||||
-rw-r--r-- | drivers/md/Makefile | 1 | ||||
-rw-r--r-- | drivers/md/dm-builtin.c | 48 | ||||
-rw-r--r-- | drivers/md/dm-sysfs.c | 5 | ||||
-rw-r--r-- | drivers/md/dm.c | 26 | ||||
-rw-r--r-- | drivers/md/dm.h | 17 |
6 files changed, 74 insertions, 27 deletions
diff --git a/drivers/md/Kconfig b/drivers/md/Kconfig index 3bfc8f1da9fe..29cff90096ad 100644 --- a/drivers/md/Kconfig +++ b/drivers/md/Kconfig | |||
@@ -176,8 +176,12 @@ config MD_FAULTY | |||
176 | 176 | ||
177 | source "drivers/md/bcache/Kconfig" | 177 | source "drivers/md/bcache/Kconfig" |
178 | 178 | ||
179 | config BLK_DEV_DM_BUILTIN | ||
180 | boolean | ||
181 | |||
179 | config BLK_DEV_DM | 182 | config BLK_DEV_DM |
180 | tristate "Device mapper support" | 183 | tristate "Device mapper support" |
184 | select BLK_DEV_DM_BUILTIN | ||
181 | ---help--- | 185 | ---help--- |
182 | Device-mapper is a low level volume manager. It works by allowing | 186 | Device-mapper is a low level volume manager. It works by allowing |
183 | people to specify mappings for ranges of logical sectors. Various | 187 | people to specify mappings for ranges of logical sectors. Various |
diff --git a/drivers/md/Makefile b/drivers/md/Makefile index 1439fd4ad9b1..3591a7292381 100644 --- a/drivers/md/Makefile +++ b/drivers/md/Makefile | |||
@@ -32,6 +32,7 @@ obj-$(CONFIG_MD_FAULTY) += faulty.o | |||
32 | obj-$(CONFIG_BCACHE) += bcache/ | 32 | obj-$(CONFIG_BCACHE) += bcache/ |
33 | obj-$(CONFIG_BLK_DEV_MD) += md-mod.o | 33 | obj-$(CONFIG_BLK_DEV_MD) += md-mod.o |
34 | obj-$(CONFIG_BLK_DEV_DM) += dm-mod.o | 34 | obj-$(CONFIG_BLK_DEV_DM) += dm-mod.o |
35 | obj-$(CONFIG_BLK_DEV_DM_BUILTIN) += dm-builtin.o | ||
35 | obj-$(CONFIG_DM_BUFIO) += dm-bufio.o | 36 | obj-$(CONFIG_DM_BUFIO) += dm-bufio.o |
36 | obj-$(CONFIG_DM_BIO_PRISON) += dm-bio-prison.o | 37 | obj-$(CONFIG_DM_BIO_PRISON) += dm-bio-prison.o |
37 | obj-$(CONFIG_DM_CRYPT) += dm-crypt.o | 38 | obj-$(CONFIG_DM_CRYPT) += dm-crypt.o |
diff --git a/drivers/md/dm-builtin.c b/drivers/md/dm-builtin.c new file mode 100644 index 000000000000..6c9049c51b2b --- /dev/null +++ b/drivers/md/dm-builtin.c | |||
@@ -0,0 +1,48 @@ | |||
1 | #include "dm.h" | ||
2 | |||
3 | /* | ||
4 | * The kobject release method must not be placed in the module itself, | ||
5 | * otherwise we are subject to module unload races. | ||
6 | * | ||
7 | * The release method is called when the last reference to the kobject is | ||
8 | * dropped. It may be called by any other kernel code that drops the last | ||
9 | * reference. | ||
10 | * | ||
11 | * The release method suffers from module unload race. We may prevent the | ||
12 | * module from being unloaded at the start of the release method (using | ||
13 | * increased module reference count or synchronizing against the release | ||
14 | * method), however there is no way to prevent the module from being | ||
15 | * unloaded at the end of the release method. | ||
16 | * | ||
17 | * If this code were placed in the dm module, the following race may | ||
18 | * happen: | ||
19 | * 1. Some other process takes a reference to dm kobject | ||
20 | * 2. The user issues ioctl function to unload the dm device | ||
21 | * 3. dm_sysfs_exit calls kobject_put, however the object is not released | ||
22 | * because of the other reference taken at step 1 | ||
23 | * 4. dm_sysfs_exit waits on the completion | ||
24 | * 5. The other process that took the reference in step 1 drops it, | ||
25 | * dm_kobject_release is called from this process | ||
26 | * 6. dm_kobject_release calls complete() | ||
27 | * 7. a reschedule happens before dm_kobject_release returns | ||
28 | * 8. dm_sysfs_exit continues, the dm device is unloaded, module reference | ||
29 | * count is decremented | ||
30 | * 9. The user unloads the dm module | ||
31 | * 10. The other process that was rescheduled in step 7 continues to run, | ||
32 | * it is now executing code in unloaded module, so it crashes | ||
33 | * | ||
34 | * Note that if the process that takes the foreign reference to dm kobject | ||
35 | * has a low priority and the system is sufficiently loaded with | ||
36 | * higher-priority processes that prevent the low-priority process from | ||
37 | * being scheduled long enough, this bug may really happen. | ||
38 | * | ||
39 | * In order to fix this module unload race, we place the release method | ||
40 | * into a helper code that is compiled directly into the kernel. | ||
41 | */ | ||
42 | |||
43 | void dm_kobject_release(struct kobject *kobj) | ||
44 | { | ||
45 | complete(dm_get_completion_from_kobject(kobj)); | ||
46 | } | ||
47 | |||
48 | EXPORT_SYMBOL(dm_kobject_release); | ||
diff --git a/drivers/md/dm-sysfs.c b/drivers/md/dm-sysfs.c index e0cc5d6a9e46..c62c5ab6aed5 100644 --- a/drivers/md/dm-sysfs.c +++ b/drivers/md/dm-sysfs.c | |||
@@ -79,11 +79,6 @@ static const struct sysfs_ops dm_sysfs_ops = { | |||
79 | .show = dm_attr_show, | 79 | .show = dm_attr_show, |
80 | }; | 80 | }; |
81 | 81 | ||
82 | static void dm_kobject_release(struct kobject *kobj) | ||
83 | { | ||
84 | complete(dm_get_completion_from_kobject(kobj)); | ||
85 | } | ||
86 | |||
87 | /* | 82 | /* |
88 | * dm kobject is embedded in mapped_device structure | 83 | * dm kobject is embedded in mapped_device structure |
89 | * no need to define release function here | 84 | * no need to define release function here |
diff --git a/drivers/md/dm.c b/drivers/md/dm.c index d2523fedca3c..204a59fd872f 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c | |||
@@ -184,11 +184,8 @@ struct mapped_device { | |||
184 | /* forced geometry settings */ | 184 | /* forced geometry settings */ |
185 | struct hd_geometry geometry; | 185 | struct hd_geometry geometry; |
186 | 186 | ||
187 | /* sysfs handle */ | 187 | /* kobject and completion */ |
188 | struct kobject kobj; | 188 | struct dm_kobject_holder kobj_holder; |
189 | |||
190 | /* wait until the kobject is released */ | ||
191 | struct completion kobj_completion; | ||
192 | 189 | ||
193 | /* zero-length flush that will be cloned and submitted to targets */ | 190 | /* zero-length flush that will be cloned and submitted to targets */ |
194 | struct bio flush_bio; | 191 | struct bio flush_bio; |
@@ -1907,7 +1904,7 @@ static struct mapped_device *alloc_dev(int minor) | |||
1907 | init_waitqueue_head(&md->wait); | 1904 | init_waitqueue_head(&md->wait); |
1908 | INIT_WORK(&md->work, dm_wq_work); | 1905 | INIT_WORK(&md->work, dm_wq_work); |
1909 | init_waitqueue_head(&md->eventq); | 1906 | init_waitqueue_head(&md->eventq); |
1910 | init_completion(&md->kobj_completion); | 1907 | init_completion(&md->kobj_holder.completion); |
1911 | 1908 | ||
1912 | md->disk->major = _major; | 1909 | md->disk->major = _major; |
1913 | md->disk->first_minor = minor; | 1910 | md->disk->first_minor = minor; |
@@ -2739,20 +2736,14 @@ struct gendisk *dm_disk(struct mapped_device *md) | |||
2739 | 2736 | ||
2740 | struct kobject *dm_kobject(struct mapped_device *md) | 2737 | struct kobject *dm_kobject(struct mapped_device *md) |
2741 | { | 2738 | { |
2742 | return &md->kobj; | 2739 | return &md->kobj_holder.kobj; |
2743 | } | 2740 | } |
2744 | 2741 | ||
2745 | /* | ||
2746 | * struct mapped_device should not be exported outside of dm.c | ||
2747 | * so use this check to verify that kobj is part of md structure | ||
2748 | */ | ||
2749 | struct mapped_device *dm_get_from_kobject(struct kobject *kobj) | 2742 | struct mapped_device *dm_get_from_kobject(struct kobject *kobj) |
2750 | { | 2743 | { |
2751 | struct mapped_device *md; | 2744 | struct mapped_device *md; |
2752 | 2745 | ||
2753 | md = container_of(kobj, struct mapped_device, kobj); | 2746 | md = container_of(kobj, struct mapped_device, kobj_holder.kobj); |
2754 | if (&md->kobj != kobj) | ||
2755 | return NULL; | ||
2756 | 2747 | ||
2757 | if (test_bit(DMF_FREEING, &md->flags) || | 2748 | if (test_bit(DMF_FREEING, &md->flags) || |
2758 | dm_deleting_md(md)) | 2749 | dm_deleting_md(md)) |
@@ -2762,13 +2753,6 @@ struct mapped_device *dm_get_from_kobject(struct kobject *kobj) | |||
2762 | return md; | 2753 | return md; |
2763 | } | 2754 | } |
2764 | 2755 | ||
2765 | struct completion *dm_get_completion_from_kobject(struct kobject *kobj) | ||
2766 | { | ||
2767 | struct mapped_device *md = container_of(kobj, struct mapped_device, kobj); | ||
2768 | |||
2769 | return &md->kobj_completion; | ||
2770 | } | ||
2771 | |||
2772 | int dm_suspended_md(struct mapped_device *md) | 2756 | int dm_suspended_md(struct mapped_device *md) |
2773 | { | 2757 | { |
2774 | return test_bit(DMF_SUSPENDED, &md->flags); | 2758 | return test_bit(DMF_SUSPENDED, &md->flags); |
diff --git a/drivers/md/dm.h b/drivers/md/dm.h index 66cfb7d8d157..9b3222f44835 100644 --- a/drivers/md/dm.h +++ b/drivers/md/dm.h | |||
@@ -16,6 +16,7 @@ | |||
16 | #include <linux/blkdev.h> | 16 | #include <linux/blkdev.h> |
17 | #include <linux/hdreg.h> | 17 | #include <linux/hdreg.h> |
18 | #include <linux/completion.h> | 18 | #include <linux/completion.h> |
19 | #include <linux/kobject.h> | ||
19 | 20 | ||
20 | /* | 21 | /* |
21 | * Suspend feature flags | 22 | * Suspend feature flags |
@@ -126,11 +127,25 @@ void dm_interface_exit(void); | |||
126 | /* | 127 | /* |
127 | * sysfs interface | 128 | * sysfs interface |
128 | */ | 129 | */ |
130 | struct dm_kobject_holder { | ||
131 | struct kobject kobj; | ||
132 | struct completion completion; | ||
133 | }; | ||
134 | |||
135 | static inline struct completion *dm_get_completion_from_kobject(struct kobject *kobj) | ||
136 | { | ||
137 | return &container_of(kobj, struct dm_kobject_holder, kobj)->completion; | ||
138 | } | ||
139 | |||
129 | int dm_sysfs_init(struct mapped_device *md); | 140 | int dm_sysfs_init(struct mapped_device *md); |
130 | void dm_sysfs_exit(struct mapped_device *md); | 141 | void dm_sysfs_exit(struct mapped_device *md); |
131 | struct kobject *dm_kobject(struct mapped_device *md); | 142 | struct kobject *dm_kobject(struct mapped_device *md); |
132 | struct mapped_device *dm_get_from_kobject(struct kobject *kobj); | 143 | struct mapped_device *dm_get_from_kobject(struct kobject *kobj); |
133 | struct completion *dm_get_completion_from_kobject(struct kobject *kobj); | 144 | |
145 | /* | ||
146 | * The kobject helper | ||
147 | */ | ||
148 | void dm_kobject_release(struct kobject *kobj); | ||
134 | 149 | ||
135 | /* | 150 | /* |
136 | * Targets for linear and striped mappings | 151 | * Targets for linear and striped mappings |