aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/block/drbd
diff options
context:
space:
mode:
authorTejun Heo <tj@kernel.org>2010-11-13 05:55:17 -0500
committerTejun Heo <tj@kernel.org>2010-11-13 05:55:17 -0500
commite525fd89d380c4a94c0d63913a1dd1a593ed25e7 (patch)
treed226ef40d3f99e42fcf272ad432585cbd641ebec /drivers/block/drbd
parente09b457bdb7e8d23fc54dcef0930ac697d8de895 (diff)
block: make blkdev_get/put() handle exclusive access
Over time, block layer has accumulated a set of APIs dealing with bdev open, close, claim and release. * blkdev_get/put() are the primary open and close functions. * bd_claim/release() deal with exclusive open. * open/close_bdev_exclusive() are combination of open and claim and the other way around, respectively. * bd_link/unlink_disk_holder() to create and remove holder/slave symlinks. * open_by_devnum() wraps bdget() + blkdev_get(). The interface is a bit confusing and the decoupling of open and claim makes it impossible to properly guarantee exclusive access as in-kernel open + claim sequence can disturb the existing exclusive open even before the block layer knows the current open if for another exclusive access. Reorganize the interface such that, * blkdev_get() is extended to include exclusive access management. @holder argument is added and, if is @FMODE_EXCL specified, it will gain exclusive access atomically w.r.t. other exclusive accesses. * blkdev_put() is similarly extended. It now takes @mode argument and if @FMODE_EXCL is set, it releases an exclusive access. Also, when the last exclusive claim is released, the holder/slave symlinks are removed automatically. * bd_claim/release() and close_bdev_exclusive() are no longer necessary and either made static or removed. * bd_link_disk_holder() remains the same but bd_unlink_disk_holder() is no longer necessary and removed. * open_bdev_exclusive() becomes a simple wrapper around lookup_bdev() and blkdev_get(). It also has an unexpected extra bdev_read_only() test which probably should be moved into blkdev_get(). * open_by_devnum() is modified to take @holder argument and pass it to blkdev_get(). Most of bdev open/close operations are unified into blkdev_get/put() and most exclusive accesses are tested atomically at the open time (as it should). This cleans up code and removes some, both valid and invalid, but unnecessary all the same, corner cases. open_bdev_exclusive() and open_by_devnum() can use further cleanup - rename to blkdev_get_by_path() and blkdev_get_by_devt() and drop special features. Well, let's leave them for another day. Most conversions are straight-forward. drbd conversion is a bit more involved as there was some reordering, but the logic should stay the same. Signed-off-by: Tejun Heo <tj@kernel.org> Acked-by: Neil Brown <neilb@suse.de> Acked-by: Ryusuke Konishi <konishi.ryusuke@lab.ntt.co.jp> Acked-by: Mike Snitzer <snitzer@redhat.com> Acked-by: Philipp Reisner <philipp.reisner@linbit.com> Cc: Peter Osterlund <petero2@telia.com> Cc: Martin Schwidefsky <schwidefsky@de.ibm.com> Cc: Heiko Carstens <heiko.carstens@de.ibm.com> Cc: Jan Kara <jack@suse.cz> Cc: Andrew Morton <akpm@linux-foundation.org> Cc: Andreas Dilger <adilger.kernel@dilger.ca> Cc: "Theodore Ts'o" <tytso@mit.edu> Cc: Mark Fasheh <mfasheh@suse.com> Cc: Joel Becker <joel.becker@oracle.com> Cc: Alex Elder <aelder@sgi.com> Cc: Christoph Hellwig <hch@infradead.org> Cc: dm-devel@redhat.com Cc: drbd-dev@lists.linbit.com Cc: Leo Chen <leochen@broadcom.com> Cc: Scott Branden <sbranden@broadcom.com> Cc: Chris Mason <chris.mason@oracle.com> Cc: Steven Whitehouse <swhiteho@redhat.com> Cc: Dave Kleikamp <shaggy@linux.vnet.ibm.com> Cc: Joern Engel <joern@logfs.org> Cc: reiserfs-devel@vger.kernel.org Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Diffstat (limited to 'drivers/block/drbd')
-rw-r--r--drivers/block/drbd/drbd_int.h2
-rw-r--r--drivers/block/drbd/drbd_main.c7
-rw-r--r--drivers/block/drbd/drbd_nl.c103
3 files changed, 38 insertions, 74 deletions
diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h
index 9bdcf4393c0a..0590b9f67ec6 100644
--- a/drivers/block/drbd/drbd_int.h
+++ b/drivers/block/drbd/drbd_int.h
@@ -923,8 +923,6 @@ struct drbd_md {
923struct drbd_backing_dev { 923struct drbd_backing_dev {
924 struct block_device *backing_bdev; 924 struct block_device *backing_bdev;
925 struct block_device *md_bdev; 925 struct block_device *md_bdev;
926 struct file *lo_file;
927 struct file *md_file;
928 struct drbd_md md; 926 struct drbd_md md;
929 struct disk_conf dc; /* The user provided config... */ 927 struct disk_conf dc; /* The user provided config... */
930 sector_t known_size; /* last known size of that backing device */ 928 sector_t known_size; /* last known size of that backing device */
diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c
index 25c7a73c5062..7ec1a82064a9 100644
--- a/drivers/block/drbd/drbd_main.c
+++ b/drivers/block/drbd/drbd_main.c
@@ -3361,11 +3361,8 @@ void drbd_free_bc(struct drbd_backing_dev *ldev)
3361 if (ldev == NULL) 3361 if (ldev == NULL)
3362 return; 3362 return;
3363 3363
3364 bd_release(ldev->backing_bdev); 3364 blkdev_put(ldev->backing_bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL);
3365 bd_release(ldev->md_bdev); 3365 blkdev_put(ldev->md_bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL);
3366
3367 fput(ldev->lo_file);
3368 fput(ldev->md_file);
3369 3366
3370 kfree(ldev); 3367 kfree(ldev);
3371} 3368}
diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c
index 87925e97e613..fd0346090289 100644
--- a/drivers/block/drbd/drbd_nl.c
+++ b/drivers/block/drbd/drbd_nl.c
@@ -855,7 +855,7 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp
855 sector_t max_possible_sectors; 855 sector_t max_possible_sectors;
856 sector_t min_md_device_sectors; 856 sector_t min_md_device_sectors;
857 struct drbd_backing_dev *nbc = NULL; /* new_backing_conf */ 857 struct drbd_backing_dev *nbc = NULL; /* new_backing_conf */
858 struct inode *inode, *inode2; 858 struct block_device *bdev;
859 struct lru_cache *resync_lru = NULL; 859 struct lru_cache *resync_lru = NULL;
860 union drbd_state ns, os; 860 union drbd_state ns, os;
861 unsigned int max_seg_s; 861 unsigned int max_seg_s;
@@ -902,46 +902,40 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp
902 } 902 }
903 } 903 }
904 904
905 nbc->lo_file = filp_open(nbc->dc.backing_dev, O_RDWR, 0); 905 bdev = open_bdev_exclusive(nbc->dc.backing_dev,
906 if (IS_ERR(nbc->lo_file)) { 906 FMODE_READ | FMODE_WRITE, mdev);
907 if (IS_ERR(bdev)) {
907 dev_err(DEV, "open(\"%s\") failed with %ld\n", nbc->dc.backing_dev, 908 dev_err(DEV, "open(\"%s\") failed with %ld\n", nbc->dc.backing_dev,
908 PTR_ERR(nbc->lo_file)); 909 PTR_ERR(bdev));
909 nbc->lo_file = NULL;
910 retcode = ERR_OPEN_DISK; 910 retcode = ERR_OPEN_DISK;
911 goto fail; 911 goto fail;
912 } 912 }
913 nbc->backing_bdev = bdev;
913 914
914 inode = nbc->lo_file->f_dentry->d_inode; 915 /*
915 916 * meta_dev_idx >= 0: external fixed size, possibly multiple
916 if (!S_ISBLK(inode->i_mode)) { 917 * drbd sharing one meta device. TODO in that case, paranoia
917 retcode = ERR_DISK_NOT_BDEV; 918 * check that [md_bdev, meta_dev_idx] is not yet used by some
918 goto fail; 919 * other drbd minor! (if you use drbd.conf + drbdadm, that
919 } 920 * should check it for you already; but if you don't, or
920 921 * someone fooled it, we need to double check here)
921 nbc->md_file = filp_open(nbc->dc.meta_dev, O_RDWR, 0); 922 */
922 if (IS_ERR(nbc->md_file)) { 923 bdev = open_bdev_exclusive(nbc->dc.meta_dev,
924 FMODE_READ | FMODE_WRITE,
925 (nbc->dc.meta_dev_idx < 0) ?
926 (void *)mdev : (void *)drbd_m_holder);
927 if (IS_ERR(bdev)) {
923 dev_err(DEV, "open(\"%s\") failed with %ld\n", nbc->dc.meta_dev, 928 dev_err(DEV, "open(\"%s\") failed with %ld\n", nbc->dc.meta_dev,
924 PTR_ERR(nbc->md_file)); 929 PTR_ERR(bdev));
925 nbc->md_file = NULL;
926 retcode = ERR_OPEN_MD_DISK; 930 retcode = ERR_OPEN_MD_DISK;
927 goto fail; 931 goto fail;
928 } 932 }
933 nbc->md_bdev = bdev;
929 934
930 inode2 = nbc->md_file->f_dentry->d_inode; 935 if ((nbc->backing_bdev == nbc->md_bdev) !=
931 936 (nbc->dc.meta_dev_idx == DRBD_MD_INDEX_INTERNAL ||
932 if (!S_ISBLK(inode2->i_mode)) { 937 nbc->dc.meta_dev_idx == DRBD_MD_INDEX_FLEX_INT)) {
933 retcode = ERR_MD_NOT_BDEV; 938 retcode = ERR_MD_IDX_INVALID;
934 goto fail;
935 }
936
937 nbc->backing_bdev = inode->i_bdev;
938 if (bd_claim(nbc->backing_bdev, mdev)) {
939 printk(KERN_ERR "drbd: bd_claim(%p,%p); failed [%p;%p;%u]\n",
940 nbc->backing_bdev, mdev,
941 nbc->backing_bdev->bd_holder,
942 nbc->backing_bdev->bd_contains->bd_holder,
943 nbc->backing_bdev->bd_holders);
944 retcode = ERR_BDCLAIM_DISK;
945 goto fail; 939 goto fail;
946 } 940 }
947 941
@@ -950,28 +944,7 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp
950 offsetof(struct bm_extent, lce)); 944 offsetof(struct bm_extent, lce));
951 if (!resync_lru) { 945 if (!resync_lru) {
952 retcode = ERR_NOMEM; 946 retcode = ERR_NOMEM;
953 goto release_bdev_fail; 947 goto fail;
954 }
955
956 /* meta_dev_idx >= 0: external fixed size,
957 * possibly multiple drbd sharing one meta device.
958 * TODO in that case, paranoia check that [md_bdev, meta_dev_idx] is
959 * not yet used by some other drbd minor!
960 * (if you use drbd.conf + drbdadm,
961 * that should check it for you already; but if you don't, or someone
962 * fooled it, we need to double check here) */
963 nbc->md_bdev = inode2->i_bdev;
964 if (bd_claim(nbc->md_bdev, (nbc->dc.meta_dev_idx < 0) ? (void *)mdev
965 : (void *) drbd_m_holder)) {
966 retcode = ERR_BDCLAIM_MD_DISK;
967 goto release_bdev_fail;
968 }
969
970 if ((nbc->backing_bdev == nbc->md_bdev) !=
971 (nbc->dc.meta_dev_idx == DRBD_MD_INDEX_INTERNAL ||
972 nbc->dc.meta_dev_idx == DRBD_MD_INDEX_FLEX_INT)) {
973 retcode = ERR_MD_IDX_INVALID;
974 goto release_bdev2_fail;
975 } 948 }
976 949
977 /* RT - for drbd_get_max_capacity() DRBD_MD_INDEX_FLEX_INT */ 950 /* RT - for drbd_get_max_capacity() DRBD_MD_INDEX_FLEX_INT */
@@ -982,7 +955,7 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp
982 (unsigned long long) drbd_get_max_capacity(nbc), 955 (unsigned long long) drbd_get_max_capacity(nbc),
983 (unsigned long long) nbc->dc.disk_size); 956 (unsigned long long) nbc->dc.disk_size);
984 retcode = ERR_DISK_TO_SMALL; 957 retcode = ERR_DISK_TO_SMALL;
985 goto release_bdev2_fail; 958 goto fail;
986 } 959 }
987 960
988 if (nbc->dc.meta_dev_idx < 0) { 961 if (nbc->dc.meta_dev_idx < 0) {
@@ -999,7 +972,7 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp
999 dev_warn(DEV, "refusing attach: md-device too small, " 972 dev_warn(DEV, "refusing attach: md-device too small, "
1000 "at least %llu sectors needed for this meta-disk type\n", 973 "at least %llu sectors needed for this meta-disk type\n",
1001 (unsigned long long) min_md_device_sectors); 974 (unsigned long long) min_md_device_sectors);
1002 goto release_bdev2_fail; 975 goto fail;
1003 } 976 }
1004 977
1005 /* Make sure the new disk is big enough 978 /* Make sure the new disk is big enough
@@ -1007,7 +980,7 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp
1007 if (drbd_get_max_capacity(nbc) < 980 if (drbd_get_max_capacity(nbc) <
1008 drbd_get_capacity(mdev->this_bdev)) { 981 drbd_get_capacity(mdev->this_bdev)) {
1009 retcode = ERR_DISK_TO_SMALL; 982 retcode = ERR_DISK_TO_SMALL;
1010 goto release_bdev2_fail; 983 goto fail;
1011 } 984 }
1012 985
1013 nbc->known_size = drbd_get_capacity(nbc->backing_bdev); 986 nbc->known_size = drbd_get_capacity(nbc->backing_bdev);
@@ -1030,7 +1003,7 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp
1030 retcode = _drbd_request_state(mdev, NS(disk, D_ATTACHING), CS_VERBOSE); 1003 retcode = _drbd_request_state(mdev, NS(disk, D_ATTACHING), CS_VERBOSE);
1031 drbd_resume_io(mdev); 1004 drbd_resume_io(mdev);
1032 if (retcode < SS_SUCCESS) 1005 if (retcode < SS_SUCCESS)
1033 goto release_bdev2_fail; 1006 goto fail;
1034 1007
1035 if (!get_ldev_if_state(mdev, D_ATTACHING)) 1008 if (!get_ldev_if_state(mdev, D_ATTACHING))
1036 goto force_diskless; 1009 goto force_diskless;
@@ -1264,18 +1237,14 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp
1264 force_diskless: 1237 force_diskless:
1265 drbd_force_state(mdev, NS(disk, D_DISKLESS)); 1238 drbd_force_state(mdev, NS(disk, D_DISKLESS));
1266 drbd_md_sync(mdev); 1239 drbd_md_sync(mdev);
1267 release_bdev2_fail:
1268 if (nbc)
1269 bd_release(nbc->md_bdev);
1270 release_bdev_fail:
1271 if (nbc)
1272 bd_release(nbc->backing_bdev);
1273 fail: 1240 fail:
1274 if (nbc) { 1241 if (nbc) {
1275 if (nbc->lo_file) 1242 if (nbc->backing_bdev)
1276 fput(nbc->lo_file); 1243 blkdev_put(nbc->backing_bdev,
1277 if (nbc->md_file) 1244 FMODE_READ | FMODE_WRITE | FMODE_EXCL);
1278 fput(nbc->md_file); 1245 if (nbc->md_bdev)
1246 blkdev_put(nbc->md_bdev,
1247 FMODE_READ | FMODE_WRITE | FMODE_EXCL);
1279 kfree(nbc); 1248 kfree(nbc);
1280 } 1249 }
1281 lc_destroy(resync_lru); 1250 lc_destroy(resync_lru);