aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorTejun Heo <tj@kernel.org>2011-10-17 07:42:43 -0400
committerJens Axboe <axboe@kernel.dk>2011-10-24 10:24:31 -0400
commitf992ae801a7dec34a4ed99a6598bbbbfb82af4fb (patch)
tree5c8892f67867cb09e9c3c046f881e56200926ff8
parente67b77c791ca2778198c9e7088f3266ed2da7a55 (diff)
block: make gendisk hold a reference to its queue
The following command sequence triggers an oops. # mount /dev/sdb1 /mnt # echo 1 > /sys/class/scsi_device/0\:0\:1\:0/device/delete # umount /mnt general protection fault: 0000 [#1] PREEMPT SMP CPU 2 Modules linked in: Pid: 791, comm: umount Not tainted 3.1.0-rc3-work+ #8 Bochs Bochs RIP: 0010:[<ffffffff810d0879>] [<ffffffff810d0879>] __lock_acquire+0x389/0x1d60 ... Call Trace: [<ffffffff810d2845>] lock_acquire+0x95/0x140 [<ffffffff81aed87b>] _raw_spin_lock+0x3b/0x50 [<ffffffff811573bc>] bdi_lock_two+0x5c/0x70 [<ffffffff811c2f6c>] bdev_inode_switch_bdi+0x4c/0xf0 [<ffffffff811c3fcb>] __blkdev_put+0x11b/0x1d0 [<ffffffff811c4010>] __blkdev_put+0x160/0x1d0 [<ffffffff811c40df>] blkdev_put+0x5f/0x190 [<ffffffff8118f18d>] kill_block_super+0x4d/0x80 [<ffffffff8118f4a5>] deactivate_locked_super+0x45/0x70 [<ffffffff8119003a>] deactivate_super+0x4a/0x70 [<ffffffff811ac4ad>] mntput_no_expire+0xed/0x130 [<ffffffff811acf2e>] sys_umount+0x7e/0x3a0 [<ffffffff81aeeeab>] system_call_fastpath+0x16/0x1b This is because bdev holds on to disk but disk doesn't pin the associated queue. If a SCSI device is removed while the device is still open, the sdev puts the base reference to the queue on release. When the bdev is finally released, the associated queue is already gone along with the bdi and bdev_inode_switch_bdi() ends up dereferencing already freed bdi. Even if it were not for this bug, disk not holding onto the associated queue is very unusual and error-prone. Fix it by making add_disk() take an extra reference to its queue and put it on disk_release() and ensuring that disk and its fops owner are put in that order after all accesses to the disk and queue are complete. Signed-off-by: Tejun Heo <tj@kernel.org> Cc: Jens Axboe <axboe@kernel.dk> Cc: stable@kernel.org Signed-off-by: Jens Axboe <axboe@kernel.dk>
-rw-r--r--block/genhd.c8
-rw-r--r--fs/block_dev.c13
2 files changed, 16 insertions, 5 deletions
diff --git a/block/genhd.c b/block/genhd.c
index e2f67902dd02..d261b73b9744 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -611,6 +611,12 @@ void add_disk(struct gendisk *disk)
611 register_disk(disk); 611 register_disk(disk);
612 blk_register_queue(disk); 612 blk_register_queue(disk);
613 613
614 /*
615 * Take an extra ref on queue which will be put on disk_release()
616 * so that it sticks around as long as @disk is there.
617 */
618 WARN_ON_ONCE(blk_get_queue(disk->queue));
619
614 retval = sysfs_create_link(&disk_to_dev(disk)->kobj, &bdi->dev->kobj, 620 retval = sysfs_create_link(&disk_to_dev(disk)->kobj, &bdi->dev->kobj,
615 "bdi"); 621 "bdi");
616 WARN_ON(retval); 622 WARN_ON(retval);
@@ -1095,6 +1101,8 @@ static void disk_release(struct device *dev)
1095 disk_replace_part_tbl(disk, NULL); 1101 disk_replace_part_tbl(disk, NULL);
1096 free_part_stats(&disk->part0); 1102 free_part_stats(&disk->part0);
1097 free_part_info(&disk->part0); 1103 free_part_info(&disk->part0);
1104 if (disk->queue)
1105 blk_put_queue(disk->queue);
1098 kfree(disk); 1106 kfree(disk);
1099} 1107}
1100struct class block_class = { 1108struct class block_class = {
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 95f786ec7f08..1c44b8d54504 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -1085,6 +1085,7 @@ static int __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part);
1085static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part) 1085static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
1086{ 1086{
1087 struct gendisk *disk; 1087 struct gendisk *disk;
1088 struct module *owner;
1088 int ret; 1089 int ret;
1089 int partno; 1090 int partno;
1090 int perm = 0; 1091 int perm = 0;
@@ -1110,6 +1111,7 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
1110 disk = get_gendisk(bdev->bd_dev, &partno); 1111 disk = get_gendisk(bdev->bd_dev, &partno);
1111 if (!disk) 1112 if (!disk)
1112 goto out; 1113 goto out;
1114 owner = disk->fops->owner;
1113 1115
1114 disk_block_events(disk); 1116 disk_block_events(disk);
1115 mutex_lock_nested(&bdev->bd_mutex, for_part); 1117 mutex_lock_nested(&bdev->bd_mutex, for_part);
@@ -1137,8 +1139,8 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
1137 bdev->bd_disk = NULL; 1139 bdev->bd_disk = NULL;
1138 mutex_unlock(&bdev->bd_mutex); 1140 mutex_unlock(&bdev->bd_mutex);
1139 disk_unblock_events(disk); 1141 disk_unblock_events(disk);
1140 module_put(disk->fops->owner);
1141 put_disk(disk); 1142 put_disk(disk);
1143 module_put(owner);
1142 goto restart; 1144 goto restart;
1143 } 1145 }
1144 } 1146 }
@@ -1194,8 +1196,8 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
1194 goto out_unlock_bdev; 1196 goto out_unlock_bdev;
1195 } 1197 }
1196 /* only one opener holds refs to the module and disk */ 1198 /* only one opener holds refs to the module and disk */
1197 module_put(disk->fops->owner);
1198 put_disk(disk); 1199 put_disk(disk);
1200 module_put(owner);
1199 } 1201 }
1200 bdev->bd_openers++; 1202 bdev->bd_openers++;
1201 if (for_part) 1203 if (for_part)
@@ -1215,8 +1217,8 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
1215 out_unlock_bdev: 1217 out_unlock_bdev:
1216 mutex_unlock(&bdev->bd_mutex); 1218 mutex_unlock(&bdev->bd_mutex);
1217 disk_unblock_events(disk); 1219 disk_unblock_events(disk);
1218 module_put(disk->fops->owner);
1219 put_disk(disk); 1220 put_disk(disk);
1221 module_put(owner);
1220 out: 1222 out:
1221 bdput(bdev); 1223 bdput(bdev);
1222 1224
@@ -1442,14 +1444,15 @@ static int __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part)
1442 if (!bdev->bd_openers) { 1444 if (!bdev->bd_openers) {
1443 struct module *owner = disk->fops->owner; 1445 struct module *owner = disk->fops->owner;
1444 1446
1445 put_disk(disk);
1446 module_put(owner);
1447 disk_put_part(bdev->bd_part); 1447 disk_put_part(bdev->bd_part);
1448 bdev->bd_part = NULL; 1448 bdev->bd_part = NULL;
1449 bdev->bd_disk = NULL; 1449 bdev->bd_disk = NULL;
1450 if (bdev != bdev->bd_contains) 1450 if (bdev != bdev->bd_contains)
1451 victim = bdev->bd_contains; 1451 victim = bdev->bd_contains;
1452 bdev->bd_contains = NULL; 1452 bdev->bd_contains = NULL;
1453
1454 put_disk(disk);
1455 module_put(owner);
1453 } 1456 }
1454 mutex_unlock(&bdev->bd_mutex); 1457 mutex_unlock(&bdev->bd_mutex);
1455 bdput(bdev); 1458 bdput(bdev);