aboutsummaryrefslogtreecommitdiffstats
path: root/fs/block_dev.c
diff options
context:
space:
mode:
authorTejun Heo <tj@kernel.org>2010-11-13 05:55:17 -0500
committerTejun Heo <tj@kernel.org>2010-11-13 05:55:17 -0500
commite525fd89d380c4a94c0d63913a1dd1a593ed25e7 (patch)
treed226ef40d3f99e42fcf272ad432585cbd641ebec /fs/block_dev.c
parente09b457bdb7e8d23fc54dcef0930ac697d8de895 (diff)
block: make blkdev_get/put() handle exclusive access
Over time, block layer has accumulated a set of APIs dealing with bdev open, close, claim and release. * blkdev_get/put() are the primary open and close functions. * bd_claim/release() deal with exclusive open. * open/close_bdev_exclusive() are combination of open and claim and the other way around, respectively. * bd_link/unlink_disk_holder() to create and remove holder/slave symlinks. * open_by_devnum() wraps bdget() + blkdev_get(). The interface is a bit confusing and the decoupling of open and claim makes it impossible to properly guarantee exclusive access as in-kernel open + claim sequence can disturb the existing exclusive open even before the block layer knows the current open if for another exclusive access. Reorganize the interface such that, * blkdev_get() is extended to include exclusive access management. @holder argument is added and, if is @FMODE_EXCL specified, it will gain exclusive access atomically w.r.t. other exclusive accesses. * blkdev_put() is similarly extended. It now takes @mode argument and if @FMODE_EXCL is set, it releases an exclusive access. Also, when the last exclusive claim is released, the holder/slave symlinks are removed automatically. * bd_claim/release() and close_bdev_exclusive() are no longer necessary and either made static or removed. * bd_link_disk_holder() remains the same but bd_unlink_disk_holder() is no longer necessary and removed. * open_bdev_exclusive() becomes a simple wrapper around lookup_bdev() and blkdev_get(). It also has an unexpected extra bdev_read_only() test which probably should be moved into blkdev_get(). * open_by_devnum() is modified to take @holder argument and pass it to blkdev_get(). Most of bdev open/close operations are unified into blkdev_get/put() and most exclusive accesses are tested atomically at the open time (as it should). This cleans up code and removes some, both valid and invalid, but unnecessary all the same, corner cases. open_bdev_exclusive() and open_by_devnum() can use further cleanup - rename to blkdev_get_by_path() and blkdev_get_by_devt() and drop special features. Well, let's leave them for another day. Most conversions are straight-forward. drbd conversion is a bit more involved as there was some reordering, but the logic should stay the same. Signed-off-by: Tejun Heo <tj@kernel.org> Acked-by: Neil Brown <neilb@suse.de> Acked-by: Ryusuke Konishi <konishi.ryusuke@lab.ntt.co.jp> Acked-by: Mike Snitzer <snitzer@redhat.com> Acked-by: Philipp Reisner <philipp.reisner@linbit.com> Cc: Peter Osterlund <petero2@telia.com> Cc: Martin Schwidefsky <schwidefsky@de.ibm.com> Cc: Heiko Carstens <heiko.carstens@de.ibm.com> Cc: Jan Kara <jack@suse.cz> Cc: Andrew Morton <akpm@linux-foundation.org> Cc: Andreas Dilger <adilger.kernel@dilger.ca> Cc: "Theodore Ts'o" <tytso@mit.edu> Cc: Mark Fasheh <mfasheh@suse.com> Cc: Joel Becker <joel.becker@oracle.com> Cc: Alex Elder <aelder@sgi.com> Cc: Christoph Hellwig <hch@infradead.org> Cc: dm-devel@redhat.com Cc: drbd-dev@lists.linbit.com Cc: Leo Chen <leochen@broadcom.com> Cc: Scott Branden <sbranden@broadcom.com> Cc: Chris Mason <chris.mason@oracle.com> Cc: Steven Whitehouse <swhiteho@redhat.com> Cc: Dave Kleikamp <shaggy@linux.vnet.ibm.com> Cc: Joern Engel <joern@logfs.org> Cc: reiserfs-devel@vger.kernel.org Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Diffstat (limited to 'fs/block_dev.c')
-rw-r--r--fs/block_dev.c149
1 files changed, 50 insertions, 99 deletions
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 9329068684d2..fc48912354d1 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -660,7 +660,7 @@ static bool bd_may_claim(struct block_device *bdev, struct block_device *whole,
660 else if (bdev->bd_contains == bdev) 660 else if (bdev->bd_contains == bdev)
661 return true; /* is a whole device which isn't held */ 661 return true; /* is a whole device which isn't held */
662 662
663 else if (whole->bd_holder == bd_claim) 663 else if (whole->bd_holder == bd_may_claim)
664 return true; /* is a partition of a device that is being partitioned */ 664 return true; /* is a partition of a device that is being partitioned */
665 else if (whole->bd_holder != NULL) 665 else if (whole->bd_holder != NULL)
666 return false; /* is a partition of a held device */ 666 return false; /* is a partition of a held device */
@@ -807,10 +807,10 @@ static void __bd_claim(struct block_device *bdev, struct block_device *whole,
807{ 807{
808 /* note that for a whole device bd_holders 808 /* note that for a whole device bd_holders
809 * will be incremented twice, and bd_holder will 809 * will be incremented twice, and bd_holder will
810 * be set to bd_claim before being set to holder 810 * be set to bd_may_claim before being set to holder
811 */ 811 */
812 whole->bd_holders++; 812 whole->bd_holders++;
813 whole->bd_holder = bd_claim; 813 whole->bd_holder = bd_may_claim;
814 bdev->bd_holders++; 814 bdev->bd_holders++;
815 bdev->bd_holder = holder; 815 bdev->bd_holder = holder;
816} 816}
@@ -835,37 +835,7 @@ static void bd_finish_claiming(struct block_device *bdev,
835 __bd_abort_claiming(whole, holder); /* not actually an abort */ 835 __bd_abort_claiming(whole, holder); /* not actually an abort */
836} 836}
837 837
838/** 838static void bd_release(struct block_device *bdev)
839 * bd_claim - claim a block device
840 * @bdev: block device to claim
841 * @holder: holder trying to claim @bdev
842 *
843 * Try to claim @bdev which must have been opened successfully.
844 *
845 * CONTEXT:
846 * Might sleep.
847 *
848 * RETURNS:
849 * 0 if successful, -EBUSY if @bdev is already claimed.
850 */
851int bd_claim(struct block_device *bdev, void *holder)
852{
853 struct block_device *whole = bdev->bd_contains;
854 int res;
855
856 might_sleep();
857
858 spin_lock(&bdev_lock);
859 res = bd_prepare_to_claim(bdev, whole, holder);
860 if (res == 0)
861 __bd_claim(bdev, whole, holder);
862 spin_unlock(&bdev_lock);
863
864 return res;
865}
866EXPORT_SYMBOL(bd_claim);
867
868void bd_release(struct block_device *bdev)
869{ 839{
870 spin_lock(&bdev_lock); 840 spin_lock(&bdev_lock);
871 if (!--bdev->bd_contains->bd_holders) 841 if (!--bdev->bd_contains->bd_holders)
@@ -875,8 +845,6 @@ void bd_release(struct block_device *bdev)
875 spin_unlock(&bdev_lock); 845 spin_unlock(&bdev_lock);
876} 846}
877 847
878EXPORT_SYMBOL(bd_release);
879
880#ifdef CONFIG_SYSFS 848#ifdef CONFIG_SYSFS
881static int add_symlink(struct kobject *from, struct kobject *to) 849static int add_symlink(struct kobject *from, struct kobject *to)
882{ 850{
@@ -943,7 +911,7 @@ out_unlock:
943} 911}
944EXPORT_SYMBOL_GPL(bd_link_disk_holder); 912EXPORT_SYMBOL_GPL(bd_link_disk_holder);
945 913
946void bd_unlink_disk_holder(struct block_device *bdev) 914static void bd_unlink_disk_holder(struct block_device *bdev)
947{ 915{
948 struct gendisk *disk = bdev->bd_holder_disk; 916 struct gendisk *disk = bdev->bd_holder_disk;
949 917
@@ -954,7 +922,9 @@ void bd_unlink_disk_holder(struct block_device *bdev)
954 del_symlink(disk->slave_dir, &part_to_dev(bdev->bd_part)->kobj); 922 del_symlink(disk->slave_dir, &part_to_dev(bdev->bd_part)->kobj);
955 del_symlink(bdev->bd_part->holder_dir, &disk_to_dev(disk)->kobj); 923 del_symlink(bdev->bd_part->holder_dir, &disk_to_dev(disk)->kobj);
956} 924}
957EXPORT_SYMBOL_GPL(bd_unlink_disk_holder); 925#else
926static inline void bd_unlink_disk_holder(struct block_device *bdev)
927{ }
958#endif 928#endif
959 929
960/* 930/*
@@ -964,12 +934,12 @@ EXPORT_SYMBOL_GPL(bd_unlink_disk_holder);
964 * to be used for internal purposes. If you ever need it - reconsider 934 * to be used for internal purposes. If you ever need it - reconsider
965 * your API. 935 * your API.
966 */ 936 */
967struct block_device *open_by_devnum(dev_t dev, fmode_t mode) 937struct block_device *open_by_devnum(dev_t dev, fmode_t mode, void *holder)
968{ 938{
969 struct block_device *bdev = bdget(dev); 939 struct block_device *bdev = bdget(dev);
970 int err = -ENOMEM; 940 int err = -ENOMEM;
971 if (bdev) 941 if (bdev)
972 err = blkdev_get(bdev, mode); 942 err = blkdev_get(bdev, mode, holder);
973 return err ? ERR_PTR(err) : bdev; 943 return err ? ERR_PTR(err) : bdev;
974} 944}
975 945
@@ -1235,17 +1205,37 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
1235 return ret; 1205 return ret;
1236} 1206}
1237 1207
1238int blkdev_get(struct block_device *bdev, fmode_t mode) 1208int blkdev_get(struct block_device *bdev, fmode_t mode, void *holder)
1239{ 1209{
1240 return __blkdev_get(bdev, mode, 0); 1210 struct block_device *whole = NULL;
1211 int res;
1212
1213 WARN_ON_ONCE((mode & FMODE_EXCL) && !holder);
1214
1215 if ((mode & FMODE_EXCL) && holder) {
1216 whole = bd_start_claiming(bdev, holder);
1217 if (IS_ERR(whole)) {
1218 bdput(bdev);
1219 return PTR_ERR(whole);
1220 }
1221 }
1222
1223 res = __blkdev_get(bdev, mode, 0);
1224
1225 if (whole) {
1226 if (res == 0)
1227 bd_finish_claiming(bdev, whole, holder);
1228 else
1229 bd_abort_claiming(whole, holder);
1230 }
1231
1232 return res;
1241} 1233}
1242EXPORT_SYMBOL(blkdev_get); 1234EXPORT_SYMBOL(blkdev_get);
1243 1235
1244static int blkdev_open(struct inode * inode, struct file * filp) 1236static int blkdev_open(struct inode * inode, struct file * filp)
1245{ 1237{
1246 struct block_device *whole = NULL;
1247 struct block_device *bdev; 1238 struct block_device *bdev;
1248 int res;
1249 1239
1250 /* 1240 /*
1251 * Preserve backwards compatibility and allow large file access 1241 * Preserve backwards compatibility and allow large file access
@@ -1266,26 +1256,9 @@ static int blkdev_open(struct inode * inode, struct file * filp)
1266 if (bdev == NULL) 1256 if (bdev == NULL)
1267 return -ENOMEM; 1257 return -ENOMEM;
1268 1258
1269 if (filp->f_mode & FMODE_EXCL) {
1270 whole = bd_start_claiming(bdev, filp);
1271 if (IS_ERR(whole)) {
1272 bdput(bdev);
1273 return PTR_ERR(whole);
1274 }
1275 }
1276
1277 filp->f_mapping = bdev->bd_inode->i_mapping; 1259 filp->f_mapping = bdev->bd_inode->i_mapping;
1278 1260
1279 res = blkdev_get(bdev, filp->f_mode); 1261 return blkdev_get(bdev, filp->f_mode, filp);
1280
1281 if (whole) {
1282 if (res == 0)
1283 bd_finish_claiming(bdev, whole, filp);
1284 else
1285 bd_abort_claiming(whole, filp);
1286 }
1287
1288 return res;
1289} 1262}
1290 1263
1291static int __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part) 1264static int __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part)
@@ -1329,6 +1302,13 @@ static int __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part)
1329 1302
1330int blkdev_put(struct block_device *bdev, fmode_t mode) 1303int blkdev_put(struct block_device *bdev, fmode_t mode)
1331{ 1304{
1305 if (mode & FMODE_EXCL) {
1306 mutex_lock(&bdev->bd_mutex);
1307 bd_release(bdev);
1308 if (!bdev->bd_holders)
1309 bd_unlink_disk_holder(bdev);
1310 mutex_unlock(&bdev->bd_mutex);
1311 }
1332 return __blkdev_put(bdev, mode, 0); 1312 return __blkdev_put(bdev, mode, 0);
1333} 1313}
1334EXPORT_SYMBOL(blkdev_put); 1314EXPORT_SYMBOL(blkdev_put);
@@ -1336,8 +1316,7 @@ EXPORT_SYMBOL(blkdev_put);
1336static int blkdev_close(struct inode * inode, struct file * filp) 1316static int blkdev_close(struct inode * inode, struct file * filp)
1337{ 1317{
1338 struct block_device *bdev = I_BDEV(filp->f_mapping->host); 1318 struct block_device *bdev = I_BDEV(filp->f_mapping->host);
1339 if (bdev->bd_holder == filp) 1319
1340 bd_release(bdev);
1341 return blkdev_put(bdev, filp->f_mode); 1320 return blkdev_put(bdev, filp->f_mode);
1342} 1321}
1343 1322
@@ -1494,55 +1473,27 @@ EXPORT_SYMBOL(lookup_bdev);
1494 */ 1473 */
1495struct block_device *open_bdev_exclusive(const char *path, fmode_t mode, void *holder) 1474struct block_device *open_bdev_exclusive(const char *path, fmode_t mode, void *holder)
1496{ 1475{
1497 struct block_device *bdev, *whole; 1476 struct block_device *bdev;
1498 int error; 1477 int error;
1499 1478
1500 bdev = lookup_bdev(path); 1479 bdev = lookup_bdev(path);
1501 if (IS_ERR(bdev)) 1480 if (IS_ERR(bdev))
1502 return bdev; 1481 return bdev;
1503 1482
1504 whole = bd_start_claiming(bdev, holder); 1483 error = blkdev_get(bdev, mode | FMODE_EXCL, holder);
1505 if (IS_ERR(whole)) {
1506 bdput(bdev);
1507 return whole;
1508 }
1509
1510 error = blkdev_get(bdev, mode);
1511 if (error) 1484 if (error)
1512 goto out_abort_claiming; 1485 return ERR_PTR(error);
1513 1486
1514 error = -EACCES; 1487 if ((mode & FMODE_WRITE) && bdev_read_only(bdev)) {
1515 if ((mode & FMODE_WRITE) && bdev_read_only(bdev)) 1488 blkdev_put(bdev, mode);
1516 goto out_blkdev_put; 1489 return ERR_PTR(-EACCES);
1490 }
1517 1491
1518 bd_finish_claiming(bdev, whole, holder);
1519 return bdev; 1492 return bdev;
1520
1521out_blkdev_put:
1522 blkdev_put(bdev, mode);
1523out_abort_claiming:
1524 bd_abort_claiming(whole, holder);
1525 return ERR_PTR(error);
1526} 1493}
1527 1494
1528EXPORT_SYMBOL(open_bdev_exclusive); 1495EXPORT_SYMBOL(open_bdev_exclusive);
1529 1496
1530/**
1531 * close_bdev_exclusive - close a blockdevice opened by open_bdev_exclusive()
1532 *
1533 * @bdev: blockdevice to close
1534 * @mode: mode, must match that used to open.
1535 *
1536 * This is the counterpart to open_bdev_exclusive().
1537 */
1538void close_bdev_exclusive(struct block_device *bdev, fmode_t mode)
1539{
1540 bd_release(bdev);
1541 blkdev_put(bdev, mode);
1542}
1543
1544EXPORT_SYMBOL(close_bdev_exclusive);
1545
1546int __invalidate_device(struct block_device *bdev) 1497int __invalidate_device(struct block_device *bdev)
1547{ 1498{
1548 struct super_block *sb = get_super(bdev); 1499 struct super_block *sb = get_super(bdev);