diff options
author | Tejun Heo <tj@kernel.org> | 2008-09-03 03:03:02 -0400 |
---|---|---|
committer | Jens Axboe <jens.axboe@oracle.com> | 2008-10-09 02:56:06 -0400 |
commit | e71bf0d0ee89e51b92776391c5634938236977d5 (patch) | |
tree | 9fc62352a40ad388deebdd8ed497cab926cf0470 /include/linux | |
parent | f331c0296f2a9fee0d396a70598b954062603015 (diff) |
block: fix disk->part[] dereferencing race
disk->part[] is protected by its matching bdev's lock. However,
non-critical accesses like collecting stats and printing out sysfs and
proc information used to be performed without any locking. As
partitions can come and go dynamically, partitions can go away
underneath those non-critical accesses. As some of those accesses are
writes, this theoretically can lead to silent corruption.
This patch fixes the race by using RCU for the partition array and dev
reference counter to hold partitions.
* Rename disk->part[] to disk->__part[] to make sure no one outside
genhd layer proper accesses it directly.
* Use RCU for disk->__part[] dereferencing.
* Implement disk_{get|put}_part() which can be used to get and put
partitions from gendisk respectively.
* Iterators are implemented to help iterate through all partitions
safely.
* Functions which require RCU readlock are marked with _rcu suffix.
* Use disk_put_part() in __blkdev_put() instead of directly putting
the contained kobject.
Signed-off-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
Diffstat (limited to 'include/linux')
-rw-r--r-- | include/linux/genhd.h | 53 |
1 files changed, 39 insertions, 14 deletions
diff --git a/include/linux/genhd.h b/include/linux/genhd.h index 0ff75329199..7fbba19e076 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h | |||
@@ -11,6 +11,7 @@ | |||
11 | 11 | ||
12 | #include <linux/types.h> | 12 | #include <linux/types.h> |
13 | #include <linux/kdev_t.h> | 13 | #include <linux/kdev_t.h> |
14 | #include <linux/rcupdate.h> | ||
14 | 15 | ||
15 | #ifdef CONFIG_BLOCK | 16 | #ifdef CONFIG_BLOCK |
16 | 17 | ||
@@ -100,6 +101,7 @@ struct hd_struct { | |||
100 | #else | 101 | #else |
101 | struct disk_stats dkstats; | 102 | struct disk_stats dkstats; |
102 | #endif | 103 | #endif |
104 | struct rcu_head rcu_head; | ||
103 | }; | 105 | }; |
104 | 106 | ||
105 | #define GENHD_FL_REMOVABLE 1 | 107 | #define GENHD_FL_REMOVABLE 1 |
@@ -120,7 +122,14 @@ struct gendisk { | |||
120 | * disks that can't be partitioned. */ | 122 | * disks that can't be partitioned. */ |
121 | 123 | ||
122 | char disk_name[32]; /* name of major driver */ | 124 | char disk_name[32]; /* name of major driver */ |
123 | struct hd_struct **part; /* [indexed by minor - 1] */ | 125 | |
126 | /* Array of pointers to partitions indexed by partno - 1. | ||
127 | * Protected with matching bdev lock but stat and other | ||
128 | * non-critical accesses use RCU. Always access through | ||
129 | * helpers. | ||
130 | */ | ||
131 | struct hd_struct **__part; | ||
132 | |||
124 | struct block_device_operations *fops; | 133 | struct block_device_operations *fops; |
125 | struct request_queue *queue; | 134 | struct request_queue *queue; |
126 | void *private_data; | 135 | void *private_data; |
@@ -171,25 +180,41 @@ static inline dev_t part_devt(struct hd_struct *part) | |||
171 | return part->dev.devt; | 180 | return part->dev.devt; |
172 | } | 181 | } |
173 | 182 | ||
183 | extern struct hd_struct *disk_get_part(struct gendisk *disk, int partno); | ||
184 | |||
185 | static inline void disk_put_part(struct hd_struct *part) | ||
186 | { | ||
187 | if (likely(part)) | ||
188 | put_device(&part->dev); | ||
189 | } | ||
190 | |||
191 | /* | ||
192 | * Smarter partition iterator without context limits. | ||
193 | */ | ||
194 | #define DISK_PITER_REVERSE (1 << 0) /* iterate in the reverse direction */ | ||
195 | #define DISK_PITER_INCL_EMPTY (1 << 1) /* include 0-sized parts */ | ||
196 | |||
197 | struct disk_part_iter { | ||
198 | struct gendisk *disk; | ||
199 | struct hd_struct *part; | ||
200 | int idx; | ||
201 | unsigned int flags; | ||
202 | }; | ||
203 | |||
204 | extern void disk_part_iter_init(struct disk_part_iter *piter, | ||
205 | struct gendisk *disk, unsigned int flags); | ||
206 | extern struct hd_struct *disk_part_iter_next(struct disk_part_iter *piter); | ||
207 | extern void disk_part_iter_exit(struct disk_part_iter *piter); | ||
208 | |||
209 | extern struct hd_struct *disk_map_sector_rcu(struct gendisk *disk, | ||
210 | sector_t sector); | ||
211 | |||
174 | /* | 212 | /* |
175 | * Macros to operate on percpu disk statistics: | 213 | * Macros to operate on percpu disk statistics: |
176 | * | 214 | * |
177 | * The __ variants should only be called in critical sections. The full | 215 | * The __ variants should only be called in critical sections. The full |
178 | * variants disable/enable preemption. | 216 | * variants disable/enable preemption. |
179 | */ | 217 | */ |
180 | static inline struct hd_struct *disk_map_sector(struct gendisk *gendiskp, | ||
181 | sector_t sector) | ||
182 | { | ||
183 | struct hd_struct *part; | ||
184 | int i; | ||
185 | for (i = 0; i < disk_max_parts(gendiskp); i++) { | ||
186 | part = gendiskp->part[i]; | ||
187 | if (part && part->start_sect <= sector | ||
188 | && sector < part->start_sect + part->nr_sects) | ||
189 | return part; | ||
190 | } | ||
191 | return NULL; | ||
192 | } | ||
193 | 218 | ||
194 | #ifdef CONFIG_SMP | 219 | #ifdef CONFIG_SMP |
195 | #define __disk_stat_add(gendiskp, field, addnd) \ | 220 | #define __disk_stat_add(gendiskp, field, addnd) \ |