diff options
author | Tejun Heo <tj@kernel.org> | 2008-09-03 03:03:02 -0400 |
---|---|---|
committer | Jens Axboe <jens.axboe@oracle.com> | 2008-10-09 02:56:06 -0400 |
commit | e71bf0d0ee89e51b92776391c5634938236977d5 (patch) | |
tree | 9fc62352a40ad388deebdd8ed497cab926cf0470 /fs | |
parent | f331c0296f2a9fee0d396a70598b954062603015 (diff) |
block: fix disk->part[] dereferencing race
disk->part[] is protected by its matching bdev's lock. However,
non-critical accesses like collecting stats and printing out sysfs and
proc information used to be performed without any locking. As
partitions can come and go dynamically, partitions can go away
underneath those non-critical accesses. As some of those accesses are
writes, this theoretically can lead to silent corruption.
This patch fixes the race by using RCU for the partition array and dev
reference counter to hold partitions.
* Rename disk->part[] to disk->__part[] to make sure no one outside
genhd layer proper accesses it directly.
* Use RCU for disk->__part[] dereferencing.
* Implement disk_{get|put}_part() which can be used to get and put
partitions from gendisk respectively.
* Iterators are implemented to help iterate through all partitions
safely.
* Functions which require RCU readlock are marked with _rcu suffix.
* Use disk_put_part() in __blkdev_put() instead of directly putting
the contained kobject.
Signed-off-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
Diffstat (limited to 'fs')
-rw-r--r-- | fs/block_dev.c | 15 | ||||
-rw-r--r-- | fs/partitions/check.c | 70 |
2 files changed, 53 insertions, 32 deletions
diff --git a/fs/block_dev.c b/fs/block_dev.c index 72e0a2887cb7..2f2873b9a041 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c | |||
@@ -929,6 +929,7 @@ static int do_open(struct block_device *bdev, struct file *file, int for_part) | |||
929 | { | 929 | { |
930 | struct module *owner = NULL; | 930 | struct module *owner = NULL; |
931 | struct gendisk *disk; | 931 | struct gendisk *disk; |
932 | struct hd_struct *part = NULL; | ||
932 | int ret; | 933 | int ret; |
933 | int partno; | 934 | int partno; |
934 | int perm = 0; | 935 | int perm = 0; |
@@ -978,7 +979,6 @@ static int do_open(struct block_device *bdev, struct file *file, int for_part) | |||
978 | if (bdev->bd_invalidated) | 979 | if (bdev->bd_invalidated) |
979 | rescan_partitions(disk, bdev); | 980 | rescan_partitions(disk, bdev); |
980 | } else { | 981 | } else { |
981 | struct hd_struct *p; | ||
982 | struct block_device *whole; | 982 | struct block_device *whole; |
983 | whole = bdget_disk(disk, 0); | 983 | whole = bdget_disk(disk, 0); |
984 | ret = -ENOMEM; | 984 | ret = -ENOMEM; |
@@ -989,16 +989,16 @@ static int do_open(struct block_device *bdev, struct file *file, int for_part) | |||
989 | if (ret) | 989 | if (ret) |
990 | goto out_first; | 990 | goto out_first; |
991 | bdev->bd_contains = whole; | 991 | bdev->bd_contains = whole; |
992 | p = disk->part[partno - 1]; | 992 | part = disk_get_part(disk, partno); |
993 | bdev->bd_inode->i_data.backing_dev_info = | 993 | bdev->bd_inode->i_data.backing_dev_info = |
994 | whole->bd_inode->i_data.backing_dev_info; | 994 | whole->bd_inode->i_data.backing_dev_info; |
995 | if (!(disk->flags & GENHD_FL_UP) || !p || !p->nr_sects) { | 995 | if (!(disk->flags & GENHD_FL_UP) || |
996 | !part || !part->nr_sects) { | ||
996 | ret = -ENXIO; | 997 | ret = -ENXIO; |
997 | goto out_first; | 998 | goto out_first; |
998 | } | 999 | } |
999 | kobject_get(&p->dev.kobj); | 1000 | bdev->bd_part = part; |
1000 | bdev->bd_part = p; | 1001 | bd_set_size(bdev, (loff_t)part->nr_sects << 9); |
1001 | bd_set_size(bdev, (loff_t) p->nr_sects << 9); | ||
1002 | } | 1002 | } |
1003 | } else { | 1003 | } else { |
1004 | put_disk(disk); | 1004 | put_disk(disk); |
@@ -1027,6 +1027,7 @@ out_first: | |||
1027 | __blkdev_put(bdev->bd_contains, 1); | 1027 | __blkdev_put(bdev->bd_contains, 1); |
1028 | bdev->bd_contains = NULL; | 1028 | bdev->bd_contains = NULL; |
1029 | put_disk(disk); | 1029 | put_disk(disk); |
1030 | disk_put_part(part); | ||
1030 | module_put(owner); | 1031 | module_put(owner); |
1031 | out: | 1032 | out: |
1032 | mutex_unlock(&bdev->bd_mutex); | 1033 | mutex_unlock(&bdev->bd_mutex); |
@@ -1119,7 +1120,7 @@ static int __blkdev_put(struct block_device *bdev, int for_part) | |||
1119 | module_put(owner); | 1120 | module_put(owner); |
1120 | 1121 | ||
1121 | if (bdev->bd_contains != bdev) { | 1122 | if (bdev->bd_contains != bdev) { |
1122 | kobject_put(&bdev->bd_part->dev.kobj); | 1123 | disk_put_part(bdev->bd_part); |
1123 | bdev->bd_part = NULL; | 1124 | bdev->bd_part = NULL; |
1124 | } | 1125 | } |
1125 | bdev->bd_disk = NULL; | 1126 | bdev->bd_disk = NULL; |
diff --git a/fs/partitions/check.c b/fs/partitions/check.c index e77fa144a07d..96c8bf41e455 100644 --- a/fs/partitions/check.c +++ b/fs/partitions/check.c | |||
@@ -314,19 +314,29 @@ static inline void disk_sysfs_add_subdirs(struct gendisk *disk) | |||
314 | kobject_put(k); | 314 | kobject_put(k); |
315 | } | 315 | } |
316 | 316 | ||
317 | static void delete_partition_rcu_cb(struct rcu_head *head) | ||
318 | { | ||
319 | struct hd_struct *part = container_of(head, struct hd_struct, rcu_head); | ||
320 | |||
321 | part->start_sect = 0; | ||
322 | part->nr_sects = 0; | ||
323 | part_stat_set_all(part, 0); | ||
324 | put_device(&part->dev); | ||
325 | } | ||
326 | |||
317 | void delete_partition(struct gendisk *disk, int partno) | 327 | void delete_partition(struct gendisk *disk, int partno) |
318 | { | 328 | { |
319 | struct hd_struct *p = disk->part[partno - 1]; | 329 | struct hd_struct *part; |
320 | 330 | ||
321 | if (!p) | 331 | part = disk->__part[partno-1]; |
332 | if (!part) | ||
322 | return; | 333 | return; |
323 | disk->part[partno - 1] = NULL; | 334 | |
324 | p->start_sect = 0; | 335 | rcu_assign_pointer(disk->__part[partno-1], NULL); |
325 | p->nr_sects = 0; | 336 | kobject_put(part->holder_dir); |
326 | part_stat_set_all(p, 0); | 337 | device_del(&part->dev); |
327 | kobject_put(p->holder_dir); | 338 | |
328 | device_del(&p->dev); | 339 | call_rcu(&part->rcu_head, delete_partition_rcu_cb); |
329 | put_device(&p->dev); | ||
330 | } | 340 | } |
331 | 341 | ||
332 | static ssize_t whole_disk_show(struct device *dev, | 342 | static ssize_t whole_disk_show(struct device *dev, |
@@ -343,7 +353,7 @@ int add_partition(struct gendisk *disk, int partno, | |||
343 | struct hd_struct *p; | 353 | struct hd_struct *p; |
344 | int err; | 354 | int err; |
345 | 355 | ||
346 | if (disk->part[partno - 1]) | 356 | if (disk->__part[partno - 1]) |
347 | return -EBUSY; | 357 | return -EBUSY; |
348 | 358 | ||
349 | p = kzalloc(sizeof(*p), GFP_KERNEL); | 359 | p = kzalloc(sizeof(*p), GFP_KERNEL); |
@@ -391,7 +401,8 @@ int add_partition(struct gendisk *disk, int partno, | |||
391 | } | 401 | } |
392 | 402 | ||
393 | /* everything is up and running, commence */ | 403 | /* everything is up and running, commence */ |
394 | disk->part[partno - 1] = p; | 404 | INIT_RCU_HEAD(&p->rcu_head); |
405 | rcu_assign_pointer(disk->__part[partno - 1], p); | ||
395 | 406 | ||
396 | /* suppress uevent if the disk supresses it */ | 407 | /* suppress uevent if the disk supresses it */ |
397 | if (!disk->dev.uevent_suppress) | 408 | if (!disk->dev.uevent_suppress) |
@@ -414,9 +425,9 @@ out_put: | |||
414 | void register_disk(struct gendisk *disk) | 425 | void register_disk(struct gendisk *disk) |
415 | { | 426 | { |
416 | struct block_device *bdev; | 427 | struct block_device *bdev; |
428 | struct disk_part_iter piter; | ||
429 | struct hd_struct *part; | ||
417 | char *s; | 430 | char *s; |
418 | int i; | ||
419 | struct hd_struct *p; | ||
420 | int err; | 431 | int err; |
421 | 432 | ||
422 | disk->dev.parent = disk->driverfs_dev; | 433 | disk->dev.parent = disk->driverfs_dev; |
@@ -466,16 +477,16 @@ exit: | |||
466 | kobject_uevent(&disk->dev.kobj, KOBJ_ADD); | 477 | kobject_uevent(&disk->dev.kobj, KOBJ_ADD); |
467 | 478 | ||
468 | /* announce possible partitions */ | 479 | /* announce possible partitions */ |
469 | for (i = 0; i < disk_max_parts(disk); i++) { | 480 | disk_part_iter_init(&piter, disk, 0); |
470 | p = disk->part[i]; | 481 | while ((part = disk_part_iter_next(&piter))) |
471 | if (!p || !p->nr_sects) | 482 | kobject_uevent(&part->dev.kobj, KOBJ_ADD); |
472 | continue; | 483 | disk_part_iter_exit(&piter); |
473 | kobject_uevent(&p->dev.kobj, KOBJ_ADD); | ||
474 | } | ||
475 | } | 484 | } |
476 | 485 | ||
477 | int rescan_partitions(struct gendisk *disk, struct block_device *bdev) | 486 | int rescan_partitions(struct gendisk *disk, struct block_device *bdev) |
478 | { | 487 | { |
488 | struct disk_part_iter piter; | ||
489 | struct hd_struct *part; | ||
479 | struct parsed_partitions *state; | 490 | struct parsed_partitions *state; |
480 | int p, res; | 491 | int p, res; |
481 | 492 | ||
@@ -485,8 +496,12 @@ int rescan_partitions(struct gendisk *disk, struct block_device *bdev) | |||
485 | if (res) | 496 | if (res) |
486 | return res; | 497 | return res; |
487 | bdev->bd_invalidated = 0; | 498 | bdev->bd_invalidated = 0; |
488 | for (p = 1; p <= disk_max_parts(disk); p++) | 499 | |
489 | delete_partition(disk, p); | 500 | disk_part_iter_init(&piter, disk, DISK_PITER_INCL_EMPTY); |
501 | while ((part = disk_part_iter_next(&piter))) | ||
502 | delete_partition(disk, part->partno); | ||
503 | disk_part_iter_exit(&piter); | ||
504 | |||
490 | if (disk->fops->revalidate_disk) | 505 | if (disk->fops->revalidate_disk) |
491 | disk->fops->revalidate_disk(disk); | 506 | disk->fops->revalidate_disk(disk); |
492 | if (!get_capacity(disk) || !(state = check_partition(disk, bdev))) | 507 | if (!get_capacity(disk) || !(state = check_partition(disk, bdev))) |
@@ -545,13 +560,18 @@ EXPORT_SYMBOL(read_dev_sector); | |||
545 | 560 | ||
546 | void del_gendisk(struct gendisk *disk) | 561 | void del_gendisk(struct gendisk *disk) |
547 | { | 562 | { |
548 | int p; | 563 | struct disk_part_iter piter; |
564 | struct hd_struct *part; | ||
549 | 565 | ||
550 | /* invalidate stuff */ | 566 | /* invalidate stuff */ |
551 | for (p = disk_max_parts(disk); p > 0; p--) { | 567 | disk_part_iter_init(&piter, disk, |
552 | invalidate_partition(disk, p); | 568 | DISK_PITER_INCL_EMPTY | DISK_PITER_REVERSE); |
553 | delete_partition(disk, p); | 569 | while ((part = disk_part_iter_next(&piter))) { |
570 | invalidate_partition(disk, part->partno); | ||
571 | delete_partition(disk, part->partno); | ||
554 | } | 572 | } |
573 | disk_part_iter_exit(&piter); | ||
574 | |||
555 | invalidate_partition(disk, 0); | 575 | invalidate_partition(disk, 0); |
556 | disk->capacity = 0; | 576 | disk->capacity = 0; |
557 | disk->flags &= ~GENHD_FL_UP; | 577 | disk->flags &= ~GENHD_FL_UP; |