summaryrefslogtreecommitdiffstats
path: root/drivers/block
diff options
context:
space:
mode:
authorIlya Dryomov <idryomov@gmail.com>2017-04-13 06:17:39 -0400
committerIlya Dryomov <idryomov@gmail.com>2017-05-04 03:19:24 -0400
commite010dd0ada619ed6d3411de7371fba12c1baa48b (patch)
treed7767fd111a68592b4e7fa6498b0c5179cac3e35 /drivers/block
parent3b77faa0495abd07e94119681be8cc66af5e0a3b (diff)
rbd: exclusive map option
Support disabling automatic exclusive lock transfers to allow users to be in charge of which node should own the lock while being able to reuse exclusive lock's built-in blacklist/break-lock functionality. Signed-off-by: Ilya Dryomov <idryomov@gmail.com> Reviewed-by: Jason Dillaman <dillaman@redhat.com>
Diffstat (limited to 'drivers/block')
-rw-r--r--drivers/block/rbd.c83
1 files changed, 73 insertions, 10 deletions
diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
index 8babb1a59a0a..3402ff7414c5 100644
--- a/drivers/block/rbd.c
+++ b/drivers/block/rbd.c
@@ -798,6 +798,7 @@ enum {
798 Opt_read_only, 798 Opt_read_only,
799 Opt_read_write, 799 Opt_read_write,
800 Opt_lock_on_read, 800 Opt_lock_on_read,
801 Opt_exclusive,
801 Opt_err 802 Opt_err
802}; 803};
803 804
@@ -810,6 +811,7 @@ static match_table_t rbd_opts_tokens = {
810 {Opt_read_write, "read_write"}, 811 {Opt_read_write, "read_write"},
811 {Opt_read_write, "rw"}, /* Alternate spelling */ 812 {Opt_read_write, "rw"}, /* Alternate spelling */
812 {Opt_lock_on_read, "lock_on_read"}, 813 {Opt_lock_on_read, "lock_on_read"},
814 {Opt_exclusive, "exclusive"},
813 {Opt_err, NULL} 815 {Opt_err, NULL}
814}; 816};
815 817
@@ -817,11 +819,13 @@ struct rbd_options {
817 int queue_depth; 819 int queue_depth;
818 bool read_only; 820 bool read_only;
819 bool lock_on_read; 821 bool lock_on_read;
822 bool exclusive;
820}; 823};
821 824
822#define RBD_QUEUE_DEPTH_DEFAULT BLKDEV_MAX_RQ 825#define RBD_QUEUE_DEPTH_DEFAULT BLKDEV_MAX_RQ
823#define RBD_READ_ONLY_DEFAULT false 826#define RBD_READ_ONLY_DEFAULT false
824#define RBD_LOCK_ON_READ_DEFAULT false 827#define RBD_LOCK_ON_READ_DEFAULT false
828#define RBD_EXCLUSIVE_DEFAULT false
825 829
826static int parse_rbd_opts_token(char *c, void *private) 830static int parse_rbd_opts_token(char *c, void *private)
827{ 831{
@@ -860,6 +864,9 @@ static int parse_rbd_opts_token(char *c, void *private)
860 case Opt_lock_on_read: 864 case Opt_lock_on_read:
861 rbd_opts->lock_on_read = true; 865 rbd_opts->lock_on_read = true;
862 break; 866 break;
867 case Opt_exclusive:
868 rbd_opts->exclusive = true;
869 break;
863 default: 870 default:
864 /* libceph prints "bad option" msg */ 871 /* libceph prints "bad option" msg */
865 return -EINVAL; 872 return -EINVAL;
@@ -3440,6 +3447,18 @@ again:
3440 ret = rbd_request_lock(rbd_dev); 3447 ret = rbd_request_lock(rbd_dev);
3441 if (ret == -ETIMEDOUT) { 3448 if (ret == -ETIMEDOUT) {
3442 goto again; /* treat this as a dead client */ 3449 goto again; /* treat this as a dead client */
3450 } else if (ret == -EROFS) {
3451 rbd_warn(rbd_dev, "peer will not release lock");
3452 /*
3453 * If this is rbd_add_acquire_lock(), we want to fail
3454 * immediately -- reuse BLACKLISTED flag. Otherwise we
3455 * want to block.
3456 */
3457 if (!(rbd_dev->disk->flags & GENHD_FL_UP)) {
3458 set_bit(RBD_DEV_FLAG_BLACKLISTED, &rbd_dev->flags);
3459 /* wake "rbd map --exclusive" process */
3460 wake_requests(rbd_dev, false);
3461 }
3443 } else if (ret < 0) { 3462 } else if (ret < 0) {
3444 rbd_warn(rbd_dev, "error requesting lock: %d", ret); 3463 rbd_warn(rbd_dev, "error requesting lock: %d", ret);
3445 mod_delayed_work(rbd_dev->task_wq, &rbd_dev->lock_dwork, 3464 mod_delayed_work(rbd_dev->task_wq, &rbd_dev->lock_dwork,
@@ -3606,9 +3625,15 @@ static int rbd_handle_request_lock(struct rbd_device *rbd_dev, u8 struct_v,
3606 result = 0; 3625 result = 0;
3607 3626
3608 if (rbd_dev->lock_state == RBD_LOCK_STATE_LOCKED) { 3627 if (rbd_dev->lock_state == RBD_LOCK_STATE_LOCKED) {
3609 dout("%s rbd_dev %p queueing unlock_work\n", __func__, 3628 if (!rbd_dev->opts->exclusive) {
3610 rbd_dev); 3629 dout("%s rbd_dev %p queueing unlock_work\n",
3611 queue_work(rbd_dev->task_wq, &rbd_dev->unlock_work); 3630 __func__, rbd_dev);
3631 queue_work(rbd_dev->task_wq,
3632 &rbd_dev->unlock_work);
3633 } else {
3634 /* refuse to release the lock */
3635 result = -EROFS;
3636 }
3612 } 3637 }
3613 } 3638 }
3614 3639
@@ -4073,8 +4098,14 @@ static void rbd_queue_workfn(struct work_struct *work)
4073 if (must_be_locked) { 4098 if (must_be_locked) {
4074 down_read(&rbd_dev->lock_rwsem); 4099 down_read(&rbd_dev->lock_rwsem);
4075 if (rbd_dev->lock_state != RBD_LOCK_STATE_LOCKED && 4100 if (rbd_dev->lock_state != RBD_LOCK_STATE_LOCKED &&
4076 !test_bit(RBD_DEV_FLAG_BLACKLISTED, &rbd_dev->flags)) 4101 !test_bit(RBD_DEV_FLAG_BLACKLISTED, &rbd_dev->flags)) {
4102 if (rbd_dev->opts->exclusive) {
4103 rbd_warn(rbd_dev, "exclusive lock required");
4104 result = -EROFS;
4105 goto err_unlock;
4106 }
4077 rbd_wait_state_locked(rbd_dev); 4107 rbd_wait_state_locked(rbd_dev);
4108 }
4078 if (test_bit(RBD_DEV_FLAG_BLACKLISTED, &rbd_dev->flags)) { 4109 if (test_bit(RBD_DEV_FLAG_BLACKLISTED, &rbd_dev->flags)) {
4079 result = -EBLACKLISTED; 4110 result = -EBLACKLISTED;
4080 goto err_unlock; 4111 goto err_unlock;
@@ -5640,6 +5671,7 @@ static int rbd_add_parse_args(const char *buf,
5640 rbd_opts->read_only = RBD_READ_ONLY_DEFAULT; 5671 rbd_opts->read_only = RBD_READ_ONLY_DEFAULT;
5641 rbd_opts->queue_depth = RBD_QUEUE_DEPTH_DEFAULT; 5672 rbd_opts->queue_depth = RBD_QUEUE_DEPTH_DEFAULT;
5642 rbd_opts->lock_on_read = RBD_LOCK_ON_READ_DEFAULT; 5673 rbd_opts->lock_on_read = RBD_LOCK_ON_READ_DEFAULT;
5674 rbd_opts->exclusive = RBD_EXCLUSIVE_DEFAULT;
5643 5675
5644 copts = ceph_parse_options(options, mon_addrs, 5676 copts = ceph_parse_options(options, mon_addrs,
5645 mon_addrs + mon_addrs_size - 1, 5677 mon_addrs + mon_addrs_size - 1,
@@ -5698,6 +5730,33 @@ again:
5698 return ret; 5730 return ret;
5699} 5731}
5700 5732
5733static void rbd_dev_image_unlock(struct rbd_device *rbd_dev)
5734{
5735 down_write(&rbd_dev->lock_rwsem);
5736 if (__rbd_is_lock_owner(rbd_dev))
5737 rbd_unlock(rbd_dev);
5738 up_write(&rbd_dev->lock_rwsem);
5739}
5740
5741static int rbd_add_acquire_lock(struct rbd_device *rbd_dev)
5742{
5743 if (!(rbd_dev->header.features & RBD_FEATURE_EXCLUSIVE_LOCK)) {
5744 rbd_warn(rbd_dev, "exclusive-lock feature is not enabled");
5745 return -EINVAL;
5746 }
5747
5748 /* FIXME: "rbd map --exclusive" should be in interruptible */
5749 down_read(&rbd_dev->lock_rwsem);
5750 rbd_wait_state_locked(rbd_dev);
5751 up_read(&rbd_dev->lock_rwsem);
5752 if (test_bit(RBD_DEV_FLAG_BLACKLISTED, &rbd_dev->flags)) {
5753 rbd_warn(rbd_dev, "failed to acquire exclusive lock");
5754 return -EROFS;
5755 }
5756
5757 return 0;
5758}
5759
5701/* 5760/*
5702 * An rbd format 2 image has a unique identifier, distinct from the 5761 * An rbd format 2 image has a unique identifier, distinct from the
5703 * name given to it by the user. Internally, that identifier is 5762 * name given to it by the user. Internally, that identifier is
@@ -6141,11 +6200,17 @@ static ssize_t do_rbd_add(struct bus_type *bus,
6141 if (rc) 6200 if (rc)
6142 goto err_out_image_probe; 6201 goto err_out_image_probe;
6143 6202
6203 if (rbd_dev->opts->exclusive) {
6204 rc = rbd_add_acquire_lock(rbd_dev);
6205 if (rc)
6206 goto err_out_device_setup;
6207 }
6208
6144 /* Everything's ready. Announce the disk to the world. */ 6209 /* Everything's ready. Announce the disk to the world. */
6145 6210
6146 rc = device_add(&rbd_dev->dev); 6211 rc = device_add(&rbd_dev->dev);
6147 if (rc) 6212 if (rc)
6148 goto err_out_device_setup; 6213 goto err_out_image_lock;
6149 6214
6150 add_disk(rbd_dev->disk); 6215 add_disk(rbd_dev->disk);
6151 /* see rbd_init_disk() */ 6216 /* see rbd_init_disk() */
@@ -6163,6 +6228,8 @@ out:
6163 module_put(THIS_MODULE); 6228 module_put(THIS_MODULE);
6164 return rc; 6229 return rc;
6165 6230
6231err_out_image_lock:
6232 rbd_dev_image_unlock(rbd_dev);
6166err_out_device_setup: 6233err_out_device_setup:
6167 rbd_dev_device_release(rbd_dev); 6234 rbd_dev_device_release(rbd_dev);
6168err_out_image_probe: 6235err_out_image_probe:
@@ -6286,11 +6353,7 @@ static ssize_t do_rbd_remove(struct bus_type *bus,
6286 spin_unlock(&rbd_dev_list_lock); 6353 spin_unlock(&rbd_dev_list_lock);
6287 device_del(&rbd_dev->dev); 6354 device_del(&rbd_dev->dev);
6288 6355
6289 down_write(&rbd_dev->lock_rwsem); 6356 rbd_dev_image_unlock(rbd_dev);
6290 if (__rbd_is_lock_owner(rbd_dev))
6291 rbd_unlock(rbd_dev);
6292 up_write(&rbd_dev->lock_rwsem);
6293
6294 rbd_dev_device_release(rbd_dev); 6357 rbd_dev_device_release(rbd_dev);
6295 rbd_dev_image_release(rbd_dev); 6358 rbd_dev_image_release(rbd_dev);
6296 rbd_dev_destroy(rbd_dev); 6359 rbd_dev_destroy(rbd_dev);