summaryrefslogtreecommitdiffstats
path: root/block/genhd.c
diff options
context:
space:
mode:
authorJan Kara <jack@suse.cz>2018-02-26 07:01:41 -0500
committerJens Axboe <axboe@kernel.dk>2018-02-26 11:48:42 -0500
commit56c0908c855afbb2bdda17c15d2879949a091ad3 (patch)
tree2129af80a6454ef5cb0c41a28137cd33092c20d0 /block/genhd.c
parent897366537fb65e87755b822360c230354c3fc73b (diff)
genhd: Fix BUG in blkdev_open()
When two blkdev_open() calls for a partition race with device removal and recreation, we can hit BUG_ON(!bd_may_claim(bdev, whole, holder)) in blkdev_open(). The race can happen as follows: CPU0 CPU1 CPU2 del_gendisk() bdev_unhash_inode(part1); blkdev_open(part1, O_EXCL) blkdev_open(part1, O_EXCL) bdev = bd_acquire() bdev = bd_acquire() blkdev_get(bdev) bd_start_claiming(bdev) - finds old inode 'whole' bd_prepare_to_claim() -> 0 bdev_unhash_inode(whole); <device removed> <new device under same number created> blkdev_get(bdev); bd_start_claiming(bdev) - finds new inode 'whole' bd_prepare_to_claim() - this also succeeds as we have different 'whole' here... - bad things happen now as we have two exclusive openers of the same bdev The problem here is that block device opens can see various intermediate states while gendisk is shutting down and then being recreated. We fix the problem by introducing new lookup_sem in gendisk that synchronizes gendisk deletion with get_gendisk() and furthermore by making sure that get_gendisk() does not return gendisk that is being (or has been) deleted. This makes sure that once we ever manage to look up newly created bdev inode, we are also guaranteed that following get_gendisk() will either return failure (and we fail open) or it returns gendisk for the new device and following bdget_disk() will return new bdev inode (i.e., blkdev_open() follows the path as if it is completely run after new device is created). Reported-and-analyzed-by: Hou Tao <houtao1@huawei.com> Tested-by: Hou Tao <houtao1@huawei.com> Signed-off-by: Jan Kara <jack@suse.cz> Signed-off-by: Jens Axboe <axboe@kernel.dk>
Diffstat (limited to 'block/genhd.c')
-rw-r--r--block/genhd.c21
1 files changed, 20 insertions, 1 deletions
diff --git a/block/genhd.c b/block/genhd.c
index 4c0590434591..9656f9e9f99e 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -717,6 +717,11 @@ void del_gendisk(struct gendisk *disk)
717 blk_integrity_del(disk); 717 blk_integrity_del(disk);
718 disk_del_events(disk); 718 disk_del_events(disk);
719 719
720 /*
721 * Block lookups of the disk until all bdevs are unhashed and the
722 * disk is marked as dead (GENHD_FL_UP cleared).
723 */
724 down_write(&disk->lookup_sem);
720 /* invalidate stuff */ 725 /* invalidate stuff */
721 disk_part_iter_init(&piter, disk, 726 disk_part_iter_init(&piter, disk,
722 DISK_PITER_INCL_EMPTY | DISK_PITER_REVERSE); 727 DISK_PITER_INCL_EMPTY | DISK_PITER_REVERSE);
@@ -731,6 +736,7 @@ void del_gendisk(struct gendisk *disk)
731 bdev_unhash_inode(disk_devt(disk)); 736 bdev_unhash_inode(disk_devt(disk));
732 set_capacity(disk, 0); 737 set_capacity(disk, 0);
733 disk->flags &= ~GENHD_FL_UP; 738 disk->flags &= ~GENHD_FL_UP;
739 up_write(&disk->lookup_sem);
734 740
735 if (!(disk->flags & GENHD_FL_HIDDEN)) 741 if (!(disk->flags & GENHD_FL_HIDDEN))
736 sysfs_remove_link(&disk_to_dev(disk)->kobj, "bdi"); 742 sysfs_remove_link(&disk_to_dev(disk)->kobj, "bdi");
@@ -816,9 +822,21 @@ struct gendisk *get_gendisk(dev_t devt, int *partno)
816 spin_unlock_bh(&ext_devt_lock); 822 spin_unlock_bh(&ext_devt_lock);
817 } 823 }
818 824
819 if (disk && unlikely(disk->flags & GENHD_FL_HIDDEN)) { 825 if (!disk)
826 return NULL;
827
828 /*
829 * Synchronize with del_gendisk() to not return disk that is being
830 * destroyed.
831 */
832 down_read(&disk->lookup_sem);
833 if (unlikely((disk->flags & GENHD_FL_HIDDEN) ||
834 !(disk->flags & GENHD_FL_UP))) {
835 up_read(&disk->lookup_sem);
820 put_disk_and_module(disk); 836 put_disk_and_module(disk);
821 disk = NULL; 837 disk = NULL;
838 } else {
839 up_read(&disk->lookup_sem);
822 } 840 }
823 return disk; 841 return disk;
824} 842}
@@ -1418,6 +1436,7 @@ struct gendisk *__alloc_disk_node(int minors, int node_id)
1418 kfree(disk); 1436 kfree(disk);
1419 return NULL; 1437 return NULL;
1420 } 1438 }
1439 init_rwsem(&disk->lookup_sem);
1421 disk->node_id = node_id; 1440 disk->node_id = node_id;
1422 if (disk_expand_part_tbl(disk, 0)) { 1441 if (disk_expand_part_tbl(disk, 0)) {
1423 free_part_stats(&disk->part0); 1442 free_part_stats(&disk->part0);