aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorTejun Heo <tj@kernel.org>2008-09-03 03:03:02 -0400
committerJens Axboe <jens.axboe@oracle.com>2008-10-09 02:56:06 -0400
commite71bf0d0ee89e51b92776391c5634938236977d5 (patch)
tree9fc62352a40ad388deebdd8ed497cab926cf0470 /fs
parentf331c0296f2a9fee0d396a70598b954062603015 (diff)
block: fix disk->part[] dereferencing race
disk->part[] is protected by its matching bdev's lock. However, non-critical accesses like collecting stats and printing out sysfs and proc information used to be performed without any locking. As partitions can come and go dynamically, partitions can go away underneath those non-critical accesses. As some of those accesses are writes, this theoretically can lead to silent corruption. This patch fixes the race by using RCU for the partition array and dev reference counter to hold partitions. * Rename disk->part[] to disk->__part[] to make sure no one outside genhd layer proper accesses it directly. * Use RCU for disk->__part[] dereferencing. * Implement disk_{get|put}_part() which can be used to get and put partitions from gendisk respectively. * Iterators are implemented to help iterate through all partitions safely. * Functions which require RCU readlock are marked with _rcu suffix. * Use disk_put_part() in __blkdev_put() instead of directly putting the contained kobject. Signed-off-by: Tejun Heo <tj@kernel.org> Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
Diffstat (limited to 'fs')
-rw-r--r--fs/block_dev.c15
-rw-r--r--fs/partitions/check.c70
2 files changed, 53 insertions, 32 deletions
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 72e0a2887cb7..2f2873b9a041 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -929,6 +929,7 @@ static int do_open(struct block_device *bdev, struct file *file, int for_part)
929{ 929{
930 struct module *owner = NULL; 930 struct module *owner = NULL;
931 struct gendisk *disk; 931 struct gendisk *disk;
932 struct hd_struct *part = NULL;
932 int ret; 933 int ret;
933 int partno; 934 int partno;
934 int perm = 0; 935 int perm = 0;
@@ -978,7 +979,6 @@ static int do_open(struct block_device *bdev, struct file *file, int for_part)
978 if (bdev->bd_invalidated) 979 if (bdev->bd_invalidated)
979 rescan_partitions(disk, bdev); 980 rescan_partitions(disk, bdev);
980 } else { 981 } else {
981 struct hd_struct *p;
982 struct block_device *whole; 982 struct block_device *whole;
983 whole = bdget_disk(disk, 0); 983 whole = bdget_disk(disk, 0);
984 ret = -ENOMEM; 984 ret = -ENOMEM;
@@ -989,16 +989,16 @@ static int do_open(struct block_device *bdev, struct file *file, int for_part)
989 if (ret) 989 if (ret)
990 goto out_first; 990 goto out_first;
991 bdev->bd_contains = whole; 991 bdev->bd_contains = whole;
992 p = disk->part[partno - 1]; 992 part = disk_get_part(disk, partno);
993 bdev->bd_inode->i_data.backing_dev_info = 993 bdev->bd_inode->i_data.backing_dev_info =
994 whole->bd_inode->i_data.backing_dev_info; 994 whole->bd_inode->i_data.backing_dev_info;
995 if (!(disk->flags & GENHD_FL_UP) || !p || !p->nr_sects) { 995 if (!(disk->flags & GENHD_FL_UP) ||
996 !part || !part->nr_sects) {
996 ret = -ENXIO; 997 ret = -ENXIO;
997 goto out_first; 998 goto out_first;
998 } 999 }
999 kobject_get(&p->dev.kobj); 1000 bdev->bd_part = part;
1000 bdev->bd_part = p; 1001 bd_set_size(bdev, (loff_t)part->nr_sects << 9);
1001 bd_set_size(bdev, (loff_t) p->nr_sects << 9);
1002 } 1002 }
1003 } else { 1003 } else {
1004 put_disk(disk); 1004 put_disk(disk);
@@ -1027,6 +1027,7 @@ out_first:
1027 __blkdev_put(bdev->bd_contains, 1); 1027 __blkdev_put(bdev->bd_contains, 1);
1028 bdev->bd_contains = NULL; 1028 bdev->bd_contains = NULL;
1029 put_disk(disk); 1029 put_disk(disk);
1030 disk_put_part(part);
1030 module_put(owner); 1031 module_put(owner);
1031out: 1032out:
1032 mutex_unlock(&bdev->bd_mutex); 1033 mutex_unlock(&bdev->bd_mutex);
@@ -1119,7 +1120,7 @@ static int __blkdev_put(struct block_device *bdev, int for_part)
1119 module_put(owner); 1120 module_put(owner);
1120 1121
1121 if (bdev->bd_contains != bdev) { 1122 if (bdev->bd_contains != bdev) {
1122 kobject_put(&bdev->bd_part->dev.kobj); 1123 disk_put_part(bdev->bd_part);
1123 bdev->bd_part = NULL; 1124 bdev->bd_part = NULL;
1124 } 1125 }
1125 bdev->bd_disk = NULL; 1126 bdev->bd_disk = NULL;
diff --git a/fs/partitions/check.c b/fs/partitions/check.c
index e77fa144a07d..96c8bf41e455 100644
--- a/fs/partitions/check.c
+++ b/fs/partitions/check.c
@@ -314,19 +314,29 @@ static inline void disk_sysfs_add_subdirs(struct gendisk *disk)
314 kobject_put(k); 314 kobject_put(k);
315} 315}
316 316
317static void delete_partition_rcu_cb(struct rcu_head *head)
318{
319 struct hd_struct *part = container_of(head, struct hd_struct, rcu_head);
320
321 part->start_sect = 0;
322 part->nr_sects = 0;
323 part_stat_set_all(part, 0);
324 put_device(&part->dev);
325}
326
317void delete_partition(struct gendisk *disk, int partno) 327void delete_partition(struct gendisk *disk, int partno)
318{ 328{
319 struct hd_struct *p = disk->part[partno - 1]; 329 struct hd_struct *part;
320 330
321 if (!p) 331 part = disk->__part[partno-1];
332 if (!part)
322 return; 333 return;
323 disk->part[partno - 1] = NULL; 334
324 p->start_sect = 0; 335 rcu_assign_pointer(disk->__part[partno-1], NULL);
325 p->nr_sects = 0; 336 kobject_put(part->holder_dir);
326 part_stat_set_all(p, 0); 337 device_del(&part->dev);
327 kobject_put(p->holder_dir); 338
328 device_del(&p->dev); 339 call_rcu(&part->rcu_head, delete_partition_rcu_cb);
329 put_device(&p->dev);
330} 340}
331 341
332static ssize_t whole_disk_show(struct device *dev, 342static ssize_t whole_disk_show(struct device *dev,
@@ -343,7 +353,7 @@ int add_partition(struct gendisk *disk, int partno,
343 struct hd_struct *p; 353 struct hd_struct *p;
344 int err; 354 int err;
345 355
346 if (disk->part[partno - 1]) 356 if (disk->__part[partno - 1])
347 return -EBUSY; 357 return -EBUSY;
348 358
349 p = kzalloc(sizeof(*p), GFP_KERNEL); 359 p = kzalloc(sizeof(*p), GFP_KERNEL);
@@ -391,7 +401,8 @@ int add_partition(struct gendisk *disk, int partno,
391 } 401 }
392 402
393 /* everything is up and running, commence */ 403 /* everything is up and running, commence */
394 disk->part[partno - 1] = p; 404 INIT_RCU_HEAD(&p->rcu_head);
405 rcu_assign_pointer(disk->__part[partno - 1], p);
395 406
396 /* suppress uevent if the disk supresses it */ 407 /* suppress uevent if the disk supresses it */
397 if (!disk->dev.uevent_suppress) 408 if (!disk->dev.uevent_suppress)
@@ -414,9 +425,9 @@ out_put:
414void register_disk(struct gendisk *disk) 425void register_disk(struct gendisk *disk)
415{ 426{
416 struct block_device *bdev; 427 struct block_device *bdev;
428 struct disk_part_iter piter;
429 struct hd_struct *part;
417 char *s; 430 char *s;
418 int i;
419 struct hd_struct *p;
420 int err; 431 int err;
421 432
422 disk->dev.parent = disk->driverfs_dev; 433 disk->dev.parent = disk->driverfs_dev;
@@ -466,16 +477,16 @@ exit:
466 kobject_uevent(&disk->dev.kobj, KOBJ_ADD); 477 kobject_uevent(&disk->dev.kobj, KOBJ_ADD);
467 478
468 /* announce possible partitions */ 479 /* announce possible partitions */
469 for (i = 0; i < disk_max_parts(disk); i++) { 480 disk_part_iter_init(&piter, disk, 0);
470 p = disk->part[i]; 481 while ((part = disk_part_iter_next(&piter)))
471 if (!p || !p->nr_sects) 482 kobject_uevent(&part->dev.kobj, KOBJ_ADD);
472 continue; 483 disk_part_iter_exit(&piter);
473 kobject_uevent(&p->dev.kobj, KOBJ_ADD);
474 }
475} 484}
476 485
477int rescan_partitions(struct gendisk *disk, struct block_device *bdev) 486int rescan_partitions(struct gendisk *disk, struct block_device *bdev)
478{ 487{
488 struct disk_part_iter piter;
489 struct hd_struct *part;
479 struct parsed_partitions *state; 490 struct parsed_partitions *state;
480 int p, res; 491 int p, res;
481 492
@@ -485,8 +496,12 @@ int rescan_partitions(struct gendisk *disk, struct block_device *bdev)
485 if (res) 496 if (res)
486 return res; 497 return res;
487 bdev->bd_invalidated = 0; 498 bdev->bd_invalidated = 0;
488 for (p = 1; p <= disk_max_parts(disk); p++) 499
489 delete_partition(disk, p); 500 disk_part_iter_init(&piter, disk, DISK_PITER_INCL_EMPTY);
501 while ((part = disk_part_iter_next(&piter)))
502 delete_partition(disk, part->partno);
503 disk_part_iter_exit(&piter);
504
490 if (disk->fops->revalidate_disk) 505 if (disk->fops->revalidate_disk)
491 disk->fops->revalidate_disk(disk); 506 disk->fops->revalidate_disk(disk);
492 if (!get_capacity(disk) || !(state = check_partition(disk, bdev))) 507 if (!get_capacity(disk) || !(state = check_partition(disk, bdev)))
@@ -545,13 +560,18 @@ EXPORT_SYMBOL(read_dev_sector);
545 560
546void del_gendisk(struct gendisk *disk) 561void del_gendisk(struct gendisk *disk)
547{ 562{
548 int p; 563 struct disk_part_iter piter;
564 struct hd_struct *part;
549 565
550 /* invalidate stuff */ 566 /* invalidate stuff */
551 for (p = disk_max_parts(disk); p > 0; p--) { 567 disk_part_iter_init(&piter, disk,
552 invalidate_partition(disk, p); 568 DISK_PITER_INCL_EMPTY | DISK_PITER_REVERSE);
553 delete_partition(disk, p); 569 while ((part = disk_part_iter_next(&piter))) {
570 invalidate_partition(disk, part->partno);
571 delete_partition(disk, part->partno);
554 } 572 }
573 disk_part_iter_exit(&piter);
574
555 invalidate_partition(disk, 0); 575 invalidate_partition(disk, 0);
556 disk->capacity = 0; 576 disk->capacity = 0;
557 disk->flags &= ~GENHD_FL_UP; 577 disk->flags &= ~GENHD_FL_UP;