aboutsummaryrefslogtreecommitdiffstats
path: root/block/genhd.c
diff options
context:
space:
mode:
authorTejun Heo <tj@kernel.org>2008-09-03 03:03:02 -0400
committerJens Axboe <jens.axboe@oracle.com>2008-10-09 02:56:06 -0400
commite71bf0d0ee89e51b92776391c5634938236977d5 (patch)
tree9fc62352a40ad388deebdd8ed497cab926cf0470 /block/genhd.c
parentf331c0296f2a9fee0d396a70598b954062603015 (diff)
block: fix disk->part[] dereferencing race
disk->part[] is protected by its matching bdev's lock. However, non-critical accesses like collecting stats and printing out sysfs and proc information used to be performed without any locking. As partitions can come and go dynamically, partitions can go away underneath those non-critical accesses. As some of those accesses are writes, this theoretically can lead to silent corruption. This patch fixes the race by using RCU for the partition array and dev reference counter to hold partitions. * Rename disk->part[] to disk->__part[] to make sure no one outside genhd layer proper accesses it directly. * Use RCU for disk->__part[] dereferencing. * Implement disk_{get|put}_part() which can be used to get and put partitions from gendisk respectively. * Iterators are implemented to help iterate through all partitions safely. * Functions which require RCU readlock are marked with _rcu suffix. * Use disk_put_part() in __blkdev_put() instead of directly putting the contained kobject. Signed-off-by: Tejun Heo <tj@kernel.org> Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
Diffstat (limited to 'block/genhd.c')
-rw-r--r--block/genhd.c218
1 files changed, 187 insertions, 31 deletions
diff --git a/block/genhd.c b/block/genhd.c
index fa32d09fda24..b431d6543942 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -26,6 +26,158 @@ struct kobject *block_depr;
26 26
27static struct device_type disk_type; 27static struct device_type disk_type;
28 28
29/**
30 * disk_get_part - get partition
31 * @disk: disk to look partition from
32 * @partno: partition number
33 *
34 * Look for partition @partno from @disk. If found, increment
35 * reference count and return it.
36 *
37 * CONTEXT:
38 * Don't care.
39 *
40 * RETURNS:
41 * Pointer to the found partition on success, NULL if not found.
42 */
43struct hd_struct *disk_get_part(struct gendisk *disk, int partno)
44{
45 struct hd_struct *part;
46
47 if (unlikely(partno < 1 || partno > disk_max_parts(disk)))
48 return NULL;
49 rcu_read_lock();
50 part = rcu_dereference(disk->__part[partno - 1]);
51 if (part)
52 get_device(&part->dev);
53 rcu_read_unlock();
54
55 return part;
56}
57EXPORT_SYMBOL_GPL(disk_get_part);
58
59/**
60 * disk_part_iter_init - initialize partition iterator
61 * @piter: iterator to initialize
62 * @disk: disk to iterate over
63 * @flags: DISK_PITER_* flags
64 *
65 * Initialize @piter so that it iterates over partitions of @disk.
66 *
67 * CONTEXT:
68 * Don't care.
69 */
70void disk_part_iter_init(struct disk_part_iter *piter, struct gendisk *disk,
71 unsigned int flags)
72{
73 piter->disk = disk;
74 piter->part = NULL;
75
76 if (flags & DISK_PITER_REVERSE)
77 piter->idx = disk_max_parts(piter->disk) - 1;
78 else
79 piter->idx = 0;
80
81 piter->flags = flags;
82}
83EXPORT_SYMBOL_GPL(disk_part_iter_init);
84
85/**
86 * disk_part_iter_next - proceed iterator to the next partition and return it
87 * @piter: iterator of interest
88 *
89 * Proceed @piter to the next partition and return it.
90 *
91 * CONTEXT:
92 * Don't care.
93 */
94struct hd_struct *disk_part_iter_next(struct disk_part_iter *piter)
95{
96 int inc, end;
97
98 /* put the last partition */
99 disk_put_part(piter->part);
100 piter->part = NULL;
101
102 rcu_read_lock();
103
104 /* determine iteration parameters */
105 if (piter->flags & DISK_PITER_REVERSE) {
106 inc = -1;
107 end = -1;
108 } else {
109 inc = 1;
110 end = disk_max_parts(piter->disk);
111 }
112
113 /* iterate to the next partition */
114 for (; piter->idx != end; piter->idx += inc) {
115 struct hd_struct *part;
116
117 part = rcu_dereference(piter->disk->__part[piter->idx]);
118 if (!part)
119 continue;
120 if (!(piter->flags & DISK_PITER_INCL_EMPTY) && !part->nr_sects)
121 continue;
122
123 get_device(&part->dev);
124 piter->part = part;
125 piter->idx += inc;
126 break;
127 }
128
129 rcu_read_unlock();
130
131 return piter->part;
132}
133EXPORT_SYMBOL_GPL(disk_part_iter_next);
134
135/**
136 * disk_part_iter_exit - finish up partition iteration
137 * @piter: iter of interest
138 *
139 * Called when iteration is over. Cleans up @piter.
140 *
141 * CONTEXT:
142 * Don't care.
143 */
144void disk_part_iter_exit(struct disk_part_iter *piter)
145{
146 disk_put_part(piter->part);
147 piter->part = NULL;
148}
149EXPORT_SYMBOL_GPL(disk_part_iter_exit);
150
151/**
152 * disk_map_sector_rcu - map sector to partition
153 * @disk: gendisk of interest
154 * @sector: sector to map
155 *
156 * Find out which partition @sector maps to on @disk. This is
157 * primarily used for stats accounting.
158 *
159 * CONTEXT:
160 * RCU read locked. The returned partition pointer is valid only
161 * while preemption is disabled.
162 *
163 * RETURNS:
164 * Found partition on success, NULL if there's no matching partition.
165 */
166struct hd_struct *disk_map_sector_rcu(struct gendisk *disk, sector_t sector)
167{
168 int i;
169
170 for (i = 0; i < disk_max_parts(disk); i++) {
171 struct hd_struct *part = rcu_dereference(disk->__part[i]);
172
173 if (part && part->start_sect <= sector &&
174 sector < part->start_sect + part->nr_sects)
175 return part;
176 }
177 return NULL;
178}
179EXPORT_SYMBOL_GPL(disk_map_sector_rcu);
180
29/* 181/*
30 * Can be deleted altogether. Later. 182 * Can be deleted altogether. Later.
31 * 183 *
@@ -245,10 +397,12 @@ extern struct block_device *bdget_disk(struct gendisk *disk, int partno)
245 if (partno == 0) 397 if (partno == 0)
246 devt = disk_devt(disk); 398 devt = disk_devt(disk);
247 else { 399 else {
248 struct hd_struct *part = disk->part[partno - 1]; 400 struct hd_struct *part;
249 401
402 part = disk_get_part(disk, partno);
250 if (part && part->nr_sects) 403 if (part && part->nr_sects)
251 devt = part_devt(part); 404 devt = part_devt(part);
405 disk_put_part(part);
252 } 406 }
253 407
254 if (likely(devt != MKDEV(0, 0))) 408 if (likely(devt != MKDEV(0, 0)))
@@ -270,8 +424,9 @@ void __init printk_all_partitions(void)
270 class_dev_iter_init(&iter, &block_class, NULL, &disk_type); 424 class_dev_iter_init(&iter, &block_class, NULL, &disk_type);
271 while ((dev = class_dev_iter_next(&iter))) { 425 while ((dev = class_dev_iter_next(&iter))) {
272 struct gendisk *disk = dev_to_disk(dev); 426 struct gendisk *disk = dev_to_disk(dev);
427 struct disk_part_iter piter;
428 struct hd_struct *part;
273 char buf[BDEVNAME_SIZE]; 429 char buf[BDEVNAME_SIZE];
274 int n;
275 430
276 /* 431 /*
277 * Don't show empty devices or things that have been 432 * Don't show empty devices or things that have been
@@ -298,16 +453,13 @@ void __init printk_all_partitions(void)
298 printk(" (driver?)\n"); 453 printk(" (driver?)\n");
299 454
300 /* now show the partitions */ 455 /* now show the partitions */
301 for (n = 0; n < disk_max_parts(disk); ++n) { 456 disk_part_iter_init(&piter, disk, 0);
302 struct hd_struct *part = disk->part[n]; 457 while ((part = disk_part_iter_next(&piter)))
303
304 if (!part || !part->nr_sects)
305 continue;
306 printk(" %02x%02x %10llu %s\n", 458 printk(" %02x%02x %10llu %s\n",
307 MAJOR(part_devt(part)), MINOR(part_devt(part)), 459 MAJOR(part_devt(part)), MINOR(part_devt(part)),
308 (unsigned long long)part->nr_sects >> 1, 460 (unsigned long long)part->nr_sects >> 1,
309 disk_name(disk, part->partno, buf)); 461 disk_name(disk, part->partno, buf));
310 } 462 disk_part_iter_exit(&piter);
311 } 463 }
312 class_dev_iter_exit(&iter); 464 class_dev_iter_exit(&iter);
313} 465}
@@ -371,7 +523,8 @@ static void *show_partition_start(struct seq_file *seqf, loff_t *pos)
371static int show_partition(struct seq_file *seqf, void *v) 523static int show_partition(struct seq_file *seqf, void *v)
372{ 524{
373 struct gendisk *sgp = v; 525 struct gendisk *sgp = v;
374 int n; 526 struct disk_part_iter piter;
527 struct hd_struct *part;
375 char buf[BDEVNAME_SIZE]; 528 char buf[BDEVNAME_SIZE];
376 529
377 /* Don't show non-partitionable removeable devices or empty devices */ 530 /* Don't show non-partitionable removeable devices or empty devices */
@@ -386,17 +539,14 @@ static int show_partition(struct seq_file *seqf, void *v)
386 MAJOR(disk_devt(sgp)), MINOR(disk_devt(sgp)), 539 MAJOR(disk_devt(sgp)), MINOR(disk_devt(sgp)),
387 (unsigned long long)get_capacity(sgp) >> 1, 540 (unsigned long long)get_capacity(sgp) >> 1,
388 disk_name(sgp, 0, buf)); 541 disk_name(sgp, 0, buf));
389 for (n = 0; n < disk_max_parts(sgp); n++) { 542
390 struct hd_struct *part = sgp->part[n]; 543 disk_part_iter_init(&piter, sgp, 0);
391 if (!part) 544 while ((part = disk_part_iter_next(&piter)))
392 continue;
393 if (part->nr_sects == 0)
394 continue;
395 seq_printf(seqf, "%4d %4d %10llu %s\n", 545 seq_printf(seqf, "%4d %4d %10llu %s\n",
396 MAJOR(part_devt(part)), MINOR(part_devt(part)), 546 MAJOR(part_devt(part)), MINOR(part_devt(part)),
397 (unsigned long long)part->nr_sects >> 1, 547 (unsigned long long)part->nr_sects >> 1,
398 disk_name(sgp, part->partno, buf)); 548 disk_name(sgp, part->partno, buf));
399 } 549 disk_part_iter_exit(&piter);
400 550
401 return 0; 551 return 0;
402} 552}
@@ -571,7 +721,7 @@ static void disk_release(struct device *dev)
571 struct gendisk *disk = dev_to_disk(dev); 721 struct gendisk *disk = dev_to_disk(dev);
572 722
573 kfree(disk->random); 723 kfree(disk->random);
574 kfree(disk->part); 724 kfree(disk->__part);
575 free_disk_stats(disk); 725 free_disk_stats(disk);
576 kfree(disk); 726 kfree(disk);
577} 727}
@@ -596,8 +746,9 @@ static struct device_type disk_type = {
596static int diskstats_show(struct seq_file *seqf, void *v) 746static int diskstats_show(struct seq_file *seqf, void *v)
597{ 747{
598 struct gendisk *gp = v; 748 struct gendisk *gp = v;
749 struct disk_part_iter piter;
750 struct hd_struct *hd;
599 char buf[BDEVNAME_SIZE]; 751 char buf[BDEVNAME_SIZE];
600 int n;
601 752
602 /* 753 /*
603 if (&gp->dev.kobj.entry == block_class.devices.next) 754 if (&gp->dev.kobj.entry == block_class.devices.next)
@@ -624,12 +775,8 @@ static int diskstats_show(struct seq_file *seqf, void *v)
624 jiffies_to_msecs(disk_stat_read(gp, time_in_queue))); 775 jiffies_to_msecs(disk_stat_read(gp, time_in_queue)));
625 776
626 /* now show all non-0 size partitions of it */ 777 /* now show all non-0 size partitions of it */
627 for (n = 0; n < disk_max_parts(gp); n++) { 778 disk_part_iter_init(&piter, gp, 0);
628 struct hd_struct *hd = gp->part[n]; 779 while ((hd = disk_part_iter_next(&piter))) {
629
630 if (!hd || !hd->nr_sects)
631 continue;
632
633 preempt_disable(); 780 preempt_disable();
634 part_round_stats(hd); 781 part_round_stats(hd);
635 preempt_enable(); 782 preempt_enable();
@@ -650,6 +797,7 @@ static int diskstats_show(struct seq_file *seqf, void *v)
650 jiffies_to_msecs(part_stat_read(hd, time_in_queue)) 797 jiffies_to_msecs(part_stat_read(hd, time_in_queue))
651 ); 798 );
652 } 799 }
800 disk_part_iter_exit(&piter);
653 801
654 return 0; 802 return 0;
655} 803}
@@ -703,12 +851,16 @@ dev_t blk_lookup_devt(const char *name, int partno)
703 if (partno == 0) 851 if (partno == 0)
704 devt = disk_devt(disk); 852 devt = disk_devt(disk);
705 else { 853 else {
706 struct hd_struct *part = disk->part[partno - 1]; 854 struct hd_struct *part;
707 855
708 if (!part || !part->nr_sects) 856 part = disk_get_part(disk, partno);
857 if (!part || !part->nr_sects) {
858 disk_put_part(part);
709 continue; 859 continue;
860 }
710 861
711 devt = part_devt(part); 862 devt = part_devt(part);
863 disk_put_part(part);
712 } 864 }
713 break; 865 break;
714 } 866 }
@@ -735,9 +887,9 @@ struct gendisk *alloc_disk_node(int minors, int node_id)
735 } 887 }
736 if (minors > 1) { 888 if (minors > 1) {
737 int size = (minors - 1) * sizeof(struct hd_struct *); 889 int size = (minors - 1) * sizeof(struct hd_struct *);
738 disk->part = kmalloc_node(size, 890 disk->__part = kmalloc_node(size,
739 GFP_KERNEL | __GFP_ZERO, node_id); 891 GFP_KERNEL | __GFP_ZERO, node_id);
740 if (!disk->part) { 892 if (!disk->__part) {
741 free_disk_stats(disk); 893 free_disk_stats(disk);
742 kfree(disk); 894 kfree(disk);
743 return NULL; 895 return NULL;
@@ -798,10 +950,14 @@ EXPORT_SYMBOL(set_device_ro);
798 950
799void set_disk_ro(struct gendisk *disk, int flag) 951void set_disk_ro(struct gendisk *disk, int flag)
800{ 952{
801 int i; 953 struct disk_part_iter piter;
954 struct hd_struct *part;
955
802 disk->policy = flag; 956 disk->policy = flag;
803 for (i = 0; i < disk_max_parts(disk); i++) 957 disk_part_iter_init(&piter, disk, DISK_PITER_INCL_EMPTY);
804 if (disk->part[i]) disk->part[i]->policy = flag; 958 while ((part = disk_part_iter_next(&piter)))
959 part->policy = flag;
960 disk_part_iter_exit(&piter);
805} 961}
806 962
807EXPORT_SYMBOL(set_disk_ro); 963EXPORT_SYMBOL(set_disk_ro);