aboutsummaryrefslogtreecommitdiffstats
path: root/include/linux/genhd.h
diff options
context:
space:
mode:
authorTejun Heo <tj@kernel.org>2008-09-03 03:03:02 -0400
committerJens Axboe <jens.axboe@oracle.com>2008-10-09 02:56:06 -0400
commite71bf0d0ee89e51b92776391c5634938236977d5 (patch)
tree9fc62352a40ad388deebdd8ed497cab926cf0470 /include/linux/genhd.h
parentf331c0296f2a9fee0d396a70598b954062603015 (diff)
block: fix disk->part[] dereferencing race
disk->part[] is protected by its matching bdev's lock. However, non-critical accesses like collecting stats and printing out sysfs and proc information used to be performed without any locking. As partitions can come and go dynamically, partitions can go away underneath those non-critical accesses. As some of those accesses are writes, this theoretically can lead to silent corruption. This patch fixes the race by using RCU for the partition array and dev reference counter to hold partitions. * Rename disk->part[] to disk->__part[] to make sure no one outside genhd layer proper accesses it directly. * Use RCU for disk->__part[] dereferencing. * Implement disk_{get|put}_part() which can be used to get and put partitions from gendisk respectively. * Iterators are implemented to help iterate through all partitions safely. * Functions which require RCU readlock are marked with _rcu suffix. * Use disk_put_part() in __blkdev_put() instead of directly putting the contained kobject. Signed-off-by: Tejun Heo <tj@kernel.org> Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
Diffstat (limited to 'include/linux/genhd.h')
-rw-r--r--include/linux/genhd.h53
1 files changed, 39 insertions, 14 deletions
diff --git a/include/linux/genhd.h b/include/linux/genhd.h
index 0ff75329199c..7fbba19e076b 100644
--- a/include/linux/genhd.h
+++ b/include/linux/genhd.h
@@ -11,6 +11,7 @@
11 11
12#include <linux/types.h> 12#include <linux/types.h>
13#include <linux/kdev_t.h> 13#include <linux/kdev_t.h>
14#include <linux/rcupdate.h>
14 15
15#ifdef CONFIG_BLOCK 16#ifdef CONFIG_BLOCK
16 17
@@ -100,6 +101,7 @@ struct hd_struct {
100#else 101#else
101 struct disk_stats dkstats; 102 struct disk_stats dkstats;
102#endif 103#endif
104 struct rcu_head rcu_head;
103}; 105};
104 106
105#define GENHD_FL_REMOVABLE 1 107#define GENHD_FL_REMOVABLE 1
@@ -120,7 +122,14 @@ struct gendisk {
120 * disks that can't be partitioned. */ 122 * disks that can't be partitioned. */
121 123
122 char disk_name[32]; /* name of major driver */ 124 char disk_name[32]; /* name of major driver */
123 struct hd_struct **part; /* [indexed by minor - 1] */ 125
126 /* Array of pointers to partitions indexed by partno - 1.
127 * Protected with matching bdev lock but stat and other
128 * non-critical accesses use RCU. Always access through
129 * helpers.
130 */
131 struct hd_struct **__part;
132
124 struct block_device_operations *fops; 133 struct block_device_operations *fops;
125 struct request_queue *queue; 134 struct request_queue *queue;
126 void *private_data; 135 void *private_data;
@@ -171,25 +180,41 @@ static inline dev_t part_devt(struct hd_struct *part)
171 return part->dev.devt; 180 return part->dev.devt;
172} 181}
173 182
183extern struct hd_struct *disk_get_part(struct gendisk *disk, int partno);
184
185static inline void disk_put_part(struct hd_struct *part)
186{
187 if (likely(part))
188 put_device(&part->dev);
189}
190
191/*
192 * Smarter partition iterator without context limits.
193 */
194#define DISK_PITER_REVERSE (1 << 0) /* iterate in the reverse direction */
195#define DISK_PITER_INCL_EMPTY (1 << 1) /* include 0-sized parts */
196
197struct disk_part_iter {
198 struct gendisk *disk;
199 struct hd_struct *part;
200 int idx;
201 unsigned int flags;
202};
203
204extern void disk_part_iter_init(struct disk_part_iter *piter,
205 struct gendisk *disk, unsigned int flags);
206extern struct hd_struct *disk_part_iter_next(struct disk_part_iter *piter);
207extern void disk_part_iter_exit(struct disk_part_iter *piter);
208
209extern struct hd_struct *disk_map_sector_rcu(struct gendisk *disk,
210 sector_t sector);
211
174/* 212/*
175 * Macros to operate on percpu disk statistics: 213 * Macros to operate on percpu disk statistics:
176 * 214 *
177 * The __ variants should only be called in critical sections. The full 215 * The __ variants should only be called in critical sections. The full
178 * variants disable/enable preemption. 216 * variants disable/enable preemption.
179 */ 217 */
180static inline struct hd_struct *disk_map_sector(struct gendisk *gendiskp,
181 sector_t sector)
182{
183 struct hd_struct *part;
184 int i;
185 for (i = 0; i < disk_max_parts(gendiskp); i++) {
186 part = gendiskp->part[i];
187 if (part && part->start_sect <= sector
188 && sector < part->start_sect + part->nr_sects)
189 return part;
190 }
191 return NULL;
192}
193 218
194#ifdef CONFIG_SMP 219#ifdef CONFIG_SMP
195#define __disk_stat_add(gendiskp, field, addnd) \ 220#define __disk_stat_add(gendiskp, field, addnd) \