aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/block
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2015-07-02 14:35:00 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2015-07-02 14:35:00 -0400
commit0c76c6ba246043bbc5c0f9620a0645ae78217421 (patch)
tree644a4db58706c4e97478951f0a3a0087ddf26e5e /drivers/block
parent8688d9540cc6e17df4cba71615e27f04e0378fe6 (diff)
parent5a60e87603c4c533492c515b7f62578189b03c9c (diff)
Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/sage/ceph-client
Pull Ceph updates from Sage Weil: "We have a pile of bug fixes from Ilya, including a few patches that sync up the CRUSH code with the latest from userspace. There is also a long series from Zheng that fixes various issues with snapshots, inline data, and directory fsync, some simplification and improvement in the cap release code, and a rework of the caching of directory contents. To top it off there are a few small fixes and cleanups from Benoit and Hong" * 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/sage/ceph-client: (40 commits) rbd: use GFP_NOIO in rbd_obj_request_create() crush: fix a bug in tree bucket decode libceph: Fix ceph_tcp_sendpage()'s more boolean usage libceph: Remove spurious kunmap() of the zero page rbd: queue_depth map option rbd: store rbd_options in rbd_device rbd: terminate rbd_opts_tokens with Opt_err ceph: fix ceph_writepages_start() rbd: bump queue_max_segments ceph: rework dcache readdir crush: sync up with userspace crush: fix crash from invalid 'take' argument ceph: switch some GFP_NOFS memory allocation to GFP_KERNEL ceph: pre-allocate data structure that tracks caps flushing ceph: re-send flushing caps (which are revoked) in reconnect stage ceph: send TID of the oldest pending caps flush to MDS ceph: track pending caps flushing globally ceph: track pending caps flushing accurately libceph: fix wrong name "Ceph filesystem for Linux" ceph: fix directory fsync ...
Diffstat (limited to 'drivers/block')
-rw-r--r--drivers/block/rbd.c111
1 files changed, 69 insertions, 42 deletions
diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
index ec6c5c6e1ac9..d94529d5c8e9 100644
--- a/drivers/block/rbd.c
+++ b/drivers/block/rbd.c
@@ -346,6 +346,7 @@ struct rbd_device {
346 struct rbd_image_header header; 346 struct rbd_image_header header;
347 unsigned long flags; /* possibly lock protected */ 347 unsigned long flags; /* possibly lock protected */
348 struct rbd_spec *spec; 348 struct rbd_spec *spec;
349 struct rbd_options *opts;
349 350
350 char *header_name; 351 char *header_name;
351 352
@@ -724,34 +725,36 @@ static struct rbd_client *rbd_client_find(struct ceph_options *ceph_opts)
724} 725}
725 726
726/* 727/*
727 * mount options 728 * (Per device) rbd map options
728 */ 729 */
729enum { 730enum {
731 Opt_queue_depth,
730 Opt_last_int, 732 Opt_last_int,
731 /* int args above */ 733 /* int args above */
732 Opt_last_string, 734 Opt_last_string,
733 /* string args above */ 735 /* string args above */
734 Opt_read_only, 736 Opt_read_only,
735 Opt_read_write, 737 Opt_read_write,
736 /* Boolean args above */ 738 Opt_err
737 Opt_last_bool,
738}; 739};
739 740
740static match_table_t rbd_opts_tokens = { 741static match_table_t rbd_opts_tokens = {
742 {Opt_queue_depth, "queue_depth=%d"},
741 /* int args above */ 743 /* int args above */
742 /* string args above */ 744 /* string args above */
743 {Opt_read_only, "read_only"}, 745 {Opt_read_only, "read_only"},
744 {Opt_read_only, "ro"}, /* Alternate spelling */ 746 {Opt_read_only, "ro"}, /* Alternate spelling */
745 {Opt_read_write, "read_write"}, 747 {Opt_read_write, "read_write"},
746 {Opt_read_write, "rw"}, /* Alternate spelling */ 748 {Opt_read_write, "rw"}, /* Alternate spelling */
747 /* Boolean args above */ 749 {Opt_err, NULL}
748 {-1, NULL}
749}; 750};
750 751
751struct rbd_options { 752struct rbd_options {
753 int queue_depth;
752 bool read_only; 754 bool read_only;
753}; 755};
754 756
757#define RBD_QUEUE_DEPTH_DEFAULT BLKDEV_MAX_RQ
755#define RBD_READ_ONLY_DEFAULT false 758#define RBD_READ_ONLY_DEFAULT false
756 759
757static int parse_rbd_opts_token(char *c, void *private) 760static int parse_rbd_opts_token(char *c, void *private)
@@ -761,27 +764,27 @@ static int parse_rbd_opts_token(char *c, void *private)
761 int token, intval, ret; 764 int token, intval, ret;
762 765
763 token = match_token(c, rbd_opts_tokens, argstr); 766 token = match_token(c, rbd_opts_tokens, argstr);
764 if (token < 0)
765 return -EINVAL;
766
767 if (token < Opt_last_int) { 767 if (token < Opt_last_int) {
768 ret = match_int(&argstr[0], &intval); 768 ret = match_int(&argstr[0], &intval);
769 if (ret < 0) { 769 if (ret < 0) {
770 pr_err("bad mount option arg (not int) " 770 pr_err("bad mount option arg (not int) at '%s'\n", c);
771 "at '%s'\n", c);
772 return ret; 771 return ret;
773 } 772 }
774 dout("got int token %d val %d\n", token, intval); 773 dout("got int token %d val %d\n", token, intval);
775 } else if (token > Opt_last_int && token < Opt_last_string) { 774 } else if (token > Opt_last_int && token < Opt_last_string) {
776 dout("got string token %d val %s\n", token, 775 dout("got string token %d val %s\n", token, argstr[0].from);
777 argstr[0].from);
778 } else if (token > Opt_last_string && token < Opt_last_bool) {
779 dout("got Boolean token %d\n", token);
780 } else { 776 } else {
781 dout("got token %d\n", token); 777 dout("got token %d\n", token);
782 } 778 }
783 779
784 switch (token) { 780 switch (token) {
781 case Opt_queue_depth:
782 if (intval < 1) {
783 pr_err("queue_depth out of range\n");
784 return -EINVAL;
785 }
786 rbd_opts->queue_depth = intval;
787 break;
785 case Opt_read_only: 788 case Opt_read_only:
786 rbd_opts->read_only = true; 789 rbd_opts->read_only = true;
787 break; 790 break;
@@ -789,9 +792,10 @@ static int parse_rbd_opts_token(char *c, void *private)
789 rbd_opts->read_only = false; 792 rbd_opts->read_only = false;
790 break; 793 break;
791 default: 794 default:
792 rbd_assert(false); 795 /* libceph prints "bad option" msg */
793 break; 796 return -EINVAL;
794 } 797 }
798
795 return 0; 799 return 0;
796} 800}
797 801
@@ -1563,22 +1567,39 @@ static void rbd_obj_request_end(struct rbd_obj_request *obj_request)
1563/* 1567/*
1564 * Wait for an object request to complete. If interrupted, cancel the 1568 * Wait for an object request to complete. If interrupted, cancel the
1565 * underlying osd request. 1569 * underlying osd request.
1570 *
1571 * @timeout: in jiffies, 0 means "wait forever"
1566 */ 1572 */
1567static int rbd_obj_request_wait(struct rbd_obj_request *obj_request) 1573static int __rbd_obj_request_wait(struct rbd_obj_request *obj_request,
1574 unsigned long timeout)
1568{ 1575{
1569 int ret; 1576 long ret;
1570 1577
1571 dout("%s %p\n", __func__, obj_request); 1578 dout("%s %p\n", __func__, obj_request);
1572 1579 ret = wait_for_completion_interruptible_timeout(
1573 ret = wait_for_completion_interruptible(&obj_request->completion); 1580 &obj_request->completion,
1574 if (ret < 0) { 1581 ceph_timeout_jiffies(timeout));
1575 dout("%s %p interrupted\n", __func__, obj_request); 1582 if (ret <= 0) {
1583 if (ret == 0)
1584 ret = -ETIMEDOUT;
1576 rbd_obj_request_end(obj_request); 1585 rbd_obj_request_end(obj_request);
1577 return ret; 1586 } else {
1587 ret = 0;
1578 } 1588 }
1579 1589
1580 dout("%s %p done\n", __func__, obj_request); 1590 dout("%s %p ret %d\n", __func__, obj_request, (int)ret);
1581 return 0; 1591 return ret;
1592}
1593
1594static int rbd_obj_request_wait(struct rbd_obj_request *obj_request)
1595{
1596 return __rbd_obj_request_wait(obj_request, 0);
1597}
1598
1599static int rbd_obj_request_wait_timeout(struct rbd_obj_request *obj_request,
1600 unsigned long timeout)
1601{
1602 return __rbd_obj_request_wait(obj_request, timeout);
1582} 1603}
1583 1604
1584static void rbd_img_request_complete(struct rbd_img_request *img_request) 1605static void rbd_img_request_complete(struct rbd_img_request *img_request)
@@ -2001,11 +2022,11 @@ static struct rbd_obj_request *rbd_obj_request_create(const char *object_name,
2001 rbd_assert(obj_request_type_valid(type)); 2022 rbd_assert(obj_request_type_valid(type));
2002 2023
2003 size = strlen(object_name) + 1; 2024 size = strlen(object_name) + 1;
2004 name = kmalloc(size, GFP_KERNEL); 2025 name = kmalloc(size, GFP_NOIO);
2005 if (!name) 2026 if (!name)
2006 return NULL; 2027 return NULL;
2007 2028
2008 obj_request = kmem_cache_zalloc(rbd_obj_request_cache, GFP_KERNEL); 2029 obj_request = kmem_cache_zalloc(rbd_obj_request_cache, GFP_NOIO);
2009 if (!obj_request) { 2030 if (!obj_request) {
2010 kfree(name); 2031 kfree(name);
2011 return NULL; 2032 return NULL;
@@ -2376,7 +2397,7 @@ static void rbd_img_obj_request_fill(struct rbd_obj_request *obj_request,
2376 } 2397 }
2377 2398
2378 if (opcode == CEPH_OSD_OP_DELETE) 2399 if (opcode == CEPH_OSD_OP_DELETE)
2379 osd_req_op_init(osd_request, num_ops, opcode); 2400 osd_req_op_init(osd_request, num_ops, opcode, 0);
2380 else 2401 else
2381 osd_req_op_extent_init(osd_request, num_ops, opcode, 2402 osd_req_op_extent_init(osd_request, num_ops, opcode,
2382 offset, length, 0, 0); 2403 offset, length, 0, 0);
@@ -2848,7 +2869,7 @@ static int rbd_img_obj_exists_submit(struct rbd_obj_request *obj_request)
2848 goto out; 2869 goto out;
2849 stat_request->callback = rbd_img_obj_exists_callback; 2870 stat_request->callback = rbd_img_obj_exists_callback;
2850 2871
2851 osd_req_op_init(stat_request->osd_req, 0, CEPH_OSD_OP_STAT); 2872 osd_req_op_init(stat_request->osd_req, 0, CEPH_OSD_OP_STAT, 0);
2852 osd_req_op_raw_data_in_pages(stat_request->osd_req, 0, pages, size, 0, 2873 osd_req_op_raw_data_in_pages(stat_request->osd_req, 0, pages, size, 0,
2853 false, false); 2874 false, false);
2854 rbd_osd_req_format_read(stat_request); 2875 rbd_osd_req_format_read(stat_request);
@@ -3122,6 +3143,7 @@ static struct rbd_obj_request *rbd_obj_watch_request_helper(
3122 bool watch) 3143 bool watch)
3123{ 3144{
3124 struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc; 3145 struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc;
3146 struct ceph_options *opts = osdc->client->options;
3125 struct rbd_obj_request *obj_request; 3147 struct rbd_obj_request *obj_request;
3126 int ret; 3148 int ret;
3127 3149
@@ -3148,7 +3170,7 @@ static struct rbd_obj_request *rbd_obj_watch_request_helper(
3148 if (ret) 3170 if (ret)
3149 goto out; 3171 goto out;
3150 3172
3151 ret = rbd_obj_request_wait(obj_request); 3173 ret = rbd_obj_request_wait_timeout(obj_request, opts->mount_timeout);
3152 if (ret) 3174 if (ret)
3153 goto out; 3175 goto out;
3154 3176
@@ -3750,10 +3772,9 @@ static int rbd_init_disk(struct rbd_device *rbd_dev)
3750 3772
3751 memset(&rbd_dev->tag_set, 0, sizeof(rbd_dev->tag_set)); 3773 memset(&rbd_dev->tag_set, 0, sizeof(rbd_dev->tag_set));
3752 rbd_dev->tag_set.ops = &rbd_mq_ops; 3774 rbd_dev->tag_set.ops = &rbd_mq_ops;
3753 rbd_dev->tag_set.queue_depth = BLKDEV_MAX_RQ; 3775 rbd_dev->tag_set.queue_depth = rbd_dev->opts->queue_depth;
3754 rbd_dev->tag_set.numa_node = NUMA_NO_NODE; 3776 rbd_dev->tag_set.numa_node = NUMA_NO_NODE;
3755 rbd_dev->tag_set.flags = 3777 rbd_dev->tag_set.flags = BLK_MQ_F_SHOULD_MERGE | BLK_MQ_F_SG_MERGE;
3756 BLK_MQ_F_SHOULD_MERGE | BLK_MQ_F_SG_MERGE;
3757 rbd_dev->tag_set.nr_hw_queues = 1; 3778 rbd_dev->tag_set.nr_hw_queues = 1;
3758 rbd_dev->tag_set.cmd_size = sizeof(struct work_struct); 3779 rbd_dev->tag_set.cmd_size = sizeof(struct work_struct);
3759 3780
@@ -3773,6 +3794,7 @@ static int rbd_init_disk(struct rbd_device *rbd_dev)
3773 /* set io sizes to object size */ 3794 /* set io sizes to object size */
3774 segment_size = rbd_obj_bytes(&rbd_dev->header); 3795 segment_size = rbd_obj_bytes(&rbd_dev->header);
3775 blk_queue_max_hw_sectors(q, segment_size / SECTOR_SIZE); 3796 blk_queue_max_hw_sectors(q, segment_size / SECTOR_SIZE);
3797 blk_queue_max_segments(q, segment_size / SECTOR_SIZE);
3776 blk_queue_max_segment_size(q, segment_size); 3798 blk_queue_max_segment_size(q, segment_size);
3777 blk_queue_io_min(q, segment_size); 3799 blk_queue_io_min(q, segment_size);
3778 blk_queue_io_opt(q, segment_size); 3800 blk_queue_io_opt(q, segment_size);
@@ -4044,7 +4066,8 @@ static void rbd_spec_free(struct kref *kref)
4044} 4066}
4045 4067
4046static struct rbd_device *rbd_dev_create(struct rbd_client *rbdc, 4068static struct rbd_device *rbd_dev_create(struct rbd_client *rbdc,
4047 struct rbd_spec *spec) 4069 struct rbd_spec *spec,
4070 struct rbd_options *opts)
4048{ 4071{
4049 struct rbd_device *rbd_dev; 4072 struct rbd_device *rbd_dev;
4050 4073
@@ -4058,8 +4081,9 @@ static struct rbd_device *rbd_dev_create(struct rbd_client *rbdc,
4058 INIT_LIST_HEAD(&rbd_dev->node); 4081 INIT_LIST_HEAD(&rbd_dev->node);
4059 init_rwsem(&rbd_dev->header_rwsem); 4082 init_rwsem(&rbd_dev->header_rwsem);
4060 4083
4061 rbd_dev->spec = spec;
4062 rbd_dev->rbd_client = rbdc; 4084 rbd_dev->rbd_client = rbdc;
4085 rbd_dev->spec = spec;
4086 rbd_dev->opts = opts;
4063 4087
4064 /* Initialize the layout used for all rbd requests */ 4088 /* Initialize the layout used for all rbd requests */
4065 4089
@@ -4075,6 +4099,7 @@ static void rbd_dev_destroy(struct rbd_device *rbd_dev)
4075{ 4099{
4076 rbd_put_client(rbd_dev->rbd_client); 4100 rbd_put_client(rbd_dev->rbd_client);
4077 rbd_spec_put(rbd_dev->spec); 4101 rbd_spec_put(rbd_dev->spec);
4102 kfree(rbd_dev->opts);
4078 kfree(rbd_dev); 4103 kfree(rbd_dev);
4079} 4104}
4080 4105
@@ -4933,6 +4958,7 @@ static int rbd_add_parse_args(const char *buf,
4933 goto out_mem; 4958 goto out_mem;
4934 4959
4935 rbd_opts->read_only = RBD_READ_ONLY_DEFAULT; 4960 rbd_opts->read_only = RBD_READ_ONLY_DEFAULT;
4961 rbd_opts->queue_depth = RBD_QUEUE_DEPTH_DEFAULT;
4936 4962
4937 copts = ceph_parse_options(options, mon_addrs, 4963 copts = ceph_parse_options(options, mon_addrs,
4938 mon_addrs + mon_addrs_size - 1, 4964 mon_addrs + mon_addrs_size - 1,
@@ -4963,8 +4989,8 @@ out_err:
4963 */ 4989 */
4964static int rbd_add_get_pool_id(struct rbd_client *rbdc, const char *pool_name) 4990static int rbd_add_get_pool_id(struct rbd_client *rbdc, const char *pool_name)
4965{ 4991{
4992 struct ceph_options *opts = rbdc->client->options;
4966 u64 newest_epoch; 4993 u64 newest_epoch;
4967 unsigned long timeout = rbdc->client->options->mount_timeout * HZ;
4968 int tries = 0; 4994 int tries = 0;
4969 int ret; 4995 int ret;
4970 4996
@@ -4979,7 +5005,8 @@ again:
4979 if (rbdc->client->osdc.osdmap->epoch < newest_epoch) { 5005 if (rbdc->client->osdc.osdmap->epoch < newest_epoch) {
4980 ceph_monc_request_next_osdmap(&rbdc->client->monc); 5006 ceph_monc_request_next_osdmap(&rbdc->client->monc);
4981 (void) ceph_monc_wait_osdmap(&rbdc->client->monc, 5007 (void) ceph_monc_wait_osdmap(&rbdc->client->monc,
4982 newest_epoch, timeout); 5008 newest_epoch,
5009 opts->mount_timeout);
4983 goto again; 5010 goto again;
4984 } else { 5011 } else {
4985 /* the osdmap we have is new enough */ 5012 /* the osdmap we have is new enough */
@@ -5148,7 +5175,7 @@ static int rbd_dev_probe_parent(struct rbd_device *rbd_dev)
5148 rbdc = __rbd_get_client(rbd_dev->rbd_client); 5175 rbdc = __rbd_get_client(rbd_dev->rbd_client);
5149 5176
5150 ret = -ENOMEM; 5177 ret = -ENOMEM;
5151 parent = rbd_dev_create(rbdc, parent_spec); 5178 parent = rbd_dev_create(rbdc, parent_spec, NULL);
5152 if (!parent) 5179 if (!parent)
5153 goto out_err; 5180 goto out_err;
5154 5181
@@ -5394,9 +5421,6 @@ static ssize_t do_rbd_add(struct bus_type *bus,
5394 rc = rbd_add_parse_args(buf, &ceph_opts, &rbd_opts, &spec); 5421 rc = rbd_add_parse_args(buf, &ceph_opts, &rbd_opts, &spec);
5395 if (rc < 0) 5422 if (rc < 0)
5396 goto err_out_module; 5423 goto err_out_module;
5397 read_only = rbd_opts->read_only;
5398 kfree(rbd_opts);
5399 rbd_opts = NULL; /* done with this */
5400 5424
5401 rbdc = rbd_get_client(ceph_opts); 5425 rbdc = rbd_get_client(ceph_opts);
5402 if (IS_ERR(rbdc)) { 5426 if (IS_ERR(rbdc)) {
@@ -5422,11 +5446,12 @@ static ssize_t do_rbd_add(struct bus_type *bus,
5422 goto err_out_client; 5446 goto err_out_client;
5423 } 5447 }
5424 5448
5425 rbd_dev = rbd_dev_create(rbdc, spec); 5449 rbd_dev = rbd_dev_create(rbdc, spec, rbd_opts);
5426 if (!rbd_dev) 5450 if (!rbd_dev)
5427 goto err_out_client; 5451 goto err_out_client;
5428 rbdc = NULL; /* rbd_dev now owns this */ 5452 rbdc = NULL; /* rbd_dev now owns this */
5429 spec = NULL; /* rbd_dev now owns this */ 5453 spec = NULL; /* rbd_dev now owns this */
5454 rbd_opts = NULL; /* rbd_dev now owns this */
5430 5455
5431 rc = rbd_dev_image_probe(rbd_dev, true); 5456 rc = rbd_dev_image_probe(rbd_dev, true);
5432 if (rc < 0) 5457 if (rc < 0)
@@ -5434,6 +5459,7 @@ static ssize_t do_rbd_add(struct bus_type *bus,
5434 5459
5435 /* If we are mapping a snapshot it must be marked read-only */ 5460 /* If we are mapping a snapshot it must be marked read-only */
5436 5461
5462 read_only = rbd_dev->opts->read_only;
5437 if (rbd_dev->spec->snap_id != CEPH_NOSNAP) 5463 if (rbd_dev->spec->snap_id != CEPH_NOSNAP)
5438 read_only = true; 5464 read_only = true;
5439 rbd_dev->mapping.read_only = read_only; 5465 rbd_dev->mapping.read_only = read_only;
@@ -5458,6 +5484,7 @@ err_out_client:
5458 rbd_put_client(rbdc); 5484 rbd_put_client(rbdc);
5459err_out_args: 5485err_out_args:
5460 rbd_spec_put(spec); 5486 rbd_spec_put(spec);
5487 kfree(rbd_opts);
5461err_out_module: 5488err_out_module:
5462 module_put(THIS_MODULE); 5489 module_put(THIS_MODULE);
5463 5490