aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorIlya Dryomov <ilya.dryomov@inktank.com>2013-12-13 08:28:57 -0500
committerIlya Dryomov <ilya.dryomov@inktank.com>2013-12-31 13:31:59 -0500
commit9b60e70b3b6a8e4bc2d1b6d9f858a30e1cec496b (patch)
treea6aee3f149ccfe729586eeed02a20c463f0dbc24
parent92c76dc036e2139226e90851864d3e01e1db5dd8 (diff)
rbd: add support for single-major device number allocation scheme
Currently each rbd device is allocated its own major number, which leads to a hard limit of 230-250 images mapped at once. This commit adds support for a new single-major device number allocation scheme, which is hidden behind a new single_major boolean module parameter and is disabled by default for backwards compatibility reasons. (Old userspace cannot correctly unmap images mapped under single-major scheme and would essentially just unmap a random image, if that.) $ rbd showmapped id pool image snap device 0 rbd b100 - /dev/rbd0 1 rbd b101 - /dev/rbd1 2 rbd b102 - /dev/rbd2 3 rbd b103 - /dev/rbd3 Old scheme (modprobe rbd): $ ls -l /dev/rbd* brw-rw---- 1 root disk 253, 0 Dec 10 12:24 /dev/rbd0 brw-rw---- 1 root disk 252, 0 Dec 10 12:28 /dev/rbd1 brw-rw---- 1 root disk 252, 1 Dec 10 12:28 /dev/rbd1p1 brw-rw---- 1 root disk 252, 2 Dec 10 12:28 /dev/rbd1p2 brw-rw---- 1 root disk 252, 3 Dec 10 12:28 /dev/rbd1p3 brw-rw---- 1 root disk 251, 0 Dec 10 12:28 /dev/rbd2 brw-rw---- 1 root disk 251, 1 Dec 10 12:28 /dev/rbd2p1 brw-rw---- 1 root disk 250, 0 Dec 10 12:24 /dev/rbd3 New scheme (modprobe rbd single_major=Y): $ ls -l /dev/rbd* brw-rw---- 1 root disk 253, 0 Dec 10 12:30 /dev/rbd0 brw-rw---- 1 root disk 253, 256 Dec 10 12:30 /dev/rbd1 brw-rw---- 1 root disk 253, 257 Dec 10 12:30 /dev/rbd1p1 brw-rw---- 1 root disk 253, 258 Dec 10 12:30 /dev/rbd1p2 brw-rw---- 1 root disk 253, 259 Dec 10 12:30 /dev/rbd1p3 brw-rw---- 1 root disk 253, 512 Dec 10 12:30 /dev/rbd2 brw-rw---- 1 root disk 253, 513 Dec 10 12:30 /dev/rbd2p1 brw-rw---- 1 root disk 253, 768 Dec 10 12:30 /dev/rbd3 (major 253 was assigned dynamically at module load time) The new limit is 4096 images mapped at once, and it comes from the fact that, as before, 256 minor numbers are reserved for each mapping. (A follow-up commit changes the number of minors reserved and the way we deal with partitions over that number.) If single_major is set to true, two new sysfs interfaces show up: /sys/bus/rbd/{add,remove}_single_major. These are to be used instead of /sys/bus/rbd/{add,remove}, which are disabled for backwards compatibility reasons outlined above. Signed-off-by: Ilya Dryomov <ilya.dryomov@inktank.com> Reviewed-by: Alex Elder <elder@linaro.org> Reviewed-by: Josh Durgin <josh.durgin@inktank.com>
-rw-r--r--Documentation/ABI/testing/sysfs-bus-rbd22
-rw-r--r--drivers/block/rbd.c132
2 files changed, 134 insertions, 20 deletions
diff --git a/Documentation/ABI/testing/sysfs-bus-rbd b/Documentation/ABI/testing/sysfs-bus-rbd
index 17b119c692da..501adc2a9ec7 100644
--- a/Documentation/ABI/testing/sysfs-bus-rbd
+++ b/Documentation/ABI/testing/sysfs-bus-rbd
@@ -18,6 +18,28 @@ Removal of a device:
18 18
19 $ echo <dev-id> > /sys/bus/rbd/remove 19 $ echo <dev-id> > /sys/bus/rbd/remove
20 20
21What: /sys/bus/rbd/add_single_major
22Date: December 2013
23KernelVersion: 3.14
24Contact: Sage Weil <sage@inktank.com>
25Description: Available only if rbd module is inserted with single_major
26 parameter set to true.
27 Usage is the same as for /sys/bus/rbd/add. If present,
28 should be used instead of the latter: any attempts to use
29 /sys/bus/rbd/add if /sys/bus/rbd/add_single_major is
30 available will fail for backwards compatibility reasons.
31
32What: /sys/bus/rbd/remove_single_major
33Date: December 2013
34KernelVersion: 3.14
35Contact: Sage Weil <sage@inktank.com>
36Description: Available only if rbd module is inserted with single_major
37 parameter set to true.
38 Usage is the same as for /sys/bus/rbd/remove. If present,
39 should be used instead of the latter: any attempts to use
40 /sys/bus/rbd/remove if /sys/bus/rbd/remove_single_major is
41 available will fail for backwards compatibility reasons.
42
21Entries under /sys/bus/rbd/devices/<dev-id>/ 43Entries under /sys/bus/rbd/devices/<dev-id>/
22-------------------------------------------- 44--------------------------------------------
23 45
diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
index 3fa18b0c5e4d..e5ddcb58e9a2 100644
--- a/drivers/block/rbd.c
+++ b/drivers/block/rbd.c
@@ -91,7 +91,7 @@ static int atomic_dec_return_safe(atomic_t *v)
91 91
92#define RBD_DRV_NAME "rbd" 92#define RBD_DRV_NAME "rbd"
93 93
94#define RBD_MINORS_PER_MAJOR 256 /* max minors per blkdev */ 94#define RBD_PART_SHIFT 8
95 95
96#define RBD_SNAP_DEV_NAME_PREFIX "snap_" 96#define RBD_SNAP_DEV_NAME_PREFIX "snap_"
97#define RBD_MAX_SNAP_NAME_LEN \ 97#define RBD_MAX_SNAP_NAME_LEN \
@@ -387,8 +387,17 @@ static struct kmem_cache *rbd_img_request_cache;
387static struct kmem_cache *rbd_obj_request_cache; 387static struct kmem_cache *rbd_obj_request_cache;
388static struct kmem_cache *rbd_segment_name_cache; 388static struct kmem_cache *rbd_segment_name_cache;
389 389
390static int rbd_major;
390static DEFINE_IDA(rbd_dev_id_ida); 391static DEFINE_IDA(rbd_dev_id_ida);
391 392
393/*
394 * Default to false for now, as single-major requires >= 0.75 version of
395 * userspace rbd utility.
396 */
397static bool single_major = false;
398module_param(single_major, bool, S_IRUGO);
399MODULE_PARM_DESC(single_major, "Use a single major number for all rbd devices (default: false)");
400
392static int rbd_img_request_submit(struct rbd_img_request *img_request); 401static int rbd_img_request_submit(struct rbd_img_request *img_request);
393 402
394static void rbd_dev_device_release(struct device *dev); 403static void rbd_dev_device_release(struct device *dev);
@@ -397,21 +406,44 @@ static ssize_t rbd_add(struct bus_type *bus, const char *buf,
397 size_t count); 406 size_t count);
398static ssize_t rbd_remove(struct bus_type *bus, const char *buf, 407static ssize_t rbd_remove(struct bus_type *bus, const char *buf,
399 size_t count); 408 size_t count);
409static ssize_t rbd_add_single_major(struct bus_type *bus, const char *buf,
410 size_t count);
411static ssize_t rbd_remove_single_major(struct bus_type *bus, const char *buf,
412 size_t count);
400static int rbd_dev_image_probe(struct rbd_device *rbd_dev, bool mapping); 413static int rbd_dev_image_probe(struct rbd_device *rbd_dev, bool mapping);
401static void rbd_spec_put(struct rbd_spec *spec); 414static void rbd_spec_put(struct rbd_spec *spec);
402 415
416static int rbd_dev_id_to_minor(int dev_id)
417{
418 return dev_id << RBD_PART_SHIFT;
419}
420
421static int minor_to_rbd_dev_id(int minor)
422{
423 return minor >> RBD_PART_SHIFT;
424}
425
403static BUS_ATTR(add, S_IWUSR, NULL, rbd_add); 426static BUS_ATTR(add, S_IWUSR, NULL, rbd_add);
404static BUS_ATTR(remove, S_IWUSR, NULL, rbd_remove); 427static BUS_ATTR(remove, S_IWUSR, NULL, rbd_remove);
428static BUS_ATTR(add_single_major, S_IWUSR, NULL, rbd_add_single_major);
429static BUS_ATTR(remove_single_major, S_IWUSR, NULL, rbd_remove_single_major);
405 430
406static struct attribute *rbd_bus_attrs[] = { 431static struct attribute *rbd_bus_attrs[] = {
407 &bus_attr_add.attr, 432 &bus_attr_add.attr,
408 &bus_attr_remove.attr, 433 &bus_attr_remove.attr,
434 &bus_attr_add_single_major.attr,
435 &bus_attr_remove_single_major.attr,
409 NULL, 436 NULL,
410}; 437};
411 438
412static umode_t rbd_bus_is_visible(struct kobject *kobj, 439static umode_t rbd_bus_is_visible(struct kobject *kobj,
413 struct attribute *attr, int index) 440 struct attribute *attr, int index)
414{ 441{
442 if (!single_major &&
443 (attr == &bus_attr_add_single_major.attr ||
444 attr == &bus_attr_remove_single_major.attr))
445 return 0;
446
415 return attr->mode; 447 return attr->mode;
416} 448}
417 449
@@ -3402,7 +3434,7 @@ static int rbd_init_disk(struct rbd_device *rbd_dev)
3402 u64 segment_size; 3434 u64 segment_size;
3403 3435
3404 /* create gendisk info */ 3436 /* create gendisk info */
3405 disk = alloc_disk(RBD_MINORS_PER_MAJOR); 3437 disk = alloc_disk(1 << RBD_PART_SHIFT);
3406 if (!disk) 3438 if (!disk)
3407 return -ENOMEM; 3439 return -ENOMEM;
3408 3440
@@ -4403,7 +4435,9 @@ static int rbd_dev_id_get(struct rbd_device *rbd_dev)
4403{ 4435{
4404 int new_dev_id; 4436 int new_dev_id;
4405 4437
4406 new_dev_id = ida_simple_get(&rbd_dev_id_ida, 0, 0, GFP_KERNEL); 4438 new_dev_id = ida_simple_get(&rbd_dev_id_ida,
4439 0, minor_to_rbd_dev_id(1 << MINORBITS),
4440 GFP_KERNEL);
4407 if (new_dev_id < 0) 4441 if (new_dev_id < 0)
4408 return new_dev_id; 4442 return new_dev_id;
4409 4443
@@ -4863,13 +4897,19 @@ static int rbd_dev_device_setup(struct rbd_device *rbd_dev)
4863 < sizeof (RBD_DRV_NAME) + MAX_INT_FORMAT_WIDTH); 4897 < sizeof (RBD_DRV_NAME) + MAX_INT_FORMAT_WIDTH);
4864 sprintf(rbd_dev->name, "%s%d", RBD_DRV_NAME, rbd_dev->dev_id); 4898 sprintf(rbd_dev->name, "%s%d", RBD_DRV_NAME, rbd_dev->dev_id);
4865 4899
4866 /* Get our block major device number. */ 4900 /* Record our major and minor device numbers. */
4867 4901
4868 ret = register_blkdev(0, rbd_dev->name); 4902 if (!single_major) {
4869 if (ret < 0) 4903 ret = register_blkdev(0, rbd_dev->name);
4870 goto err_out_id; 4904 if (ret < 0)
4871 rbd_dev->major = ret; 4905 goto err_out_id;
4872 rbd_dev->minor = 0; 4906
4907 rbd_dev->major = ret;
4908 rbd_dev->minor = 0;
4909 } else {
4910 rbd_dev->major = rbd_major;
4911 rbd_dev->minor = rbd_dev_id_to_minor(rbd_dev->dev_id);
4912 }
4873 4913
4874 /* Set up the blkdev mapping. */ 4914 /* Set up the blkdev mapping. */
4875 4915
@@ -4901,7 +4941,8 @@ err_out_mapping:
4901err_out_disk: 4941err_out_disk:
4902 rbd_free_disk(rbd_dev); 4942 rbd_free_disk(rbd_dev);
4903err_out_blkdev: 4943err_out_blkdev:
4904 unregister_blkdev(rbd_dev->major, rbd_dev->name); 4944 if (!single_major)
4945 unregister_blkdev(rbd_dev->major, rbd_dev->name);
4905err_out_id: 4946err_out_id:
4906 rbd_dev_id_put(rbd_dev); 4947 rbd_dev_id_put(rbd_dev);
4907 rbd_dev_mapping_clear(rbd_dev); 4948 rbd_dev_mapping_clear(rbd_dev);
@@ -5022,9 +5063,9 @@ err_out_format:
5022 return ret; 5063 return ret;
5023} 5064}
5024 5065
5025static ssize_t rbd_add(struct bus_type *bus, 5066static ssize_t do_rbd_add(struct bus_type *bus,
5026 const char *buf, 5067 const char *buf,
5027 size_t count) 5068 size_t count)
5028{ 5069{
5029 struct rbd_device *rbd_dev = NULL; 5070 struct rbd_device *rbd_dev = NULL;
5030 struct ceph_options *ceph_opts = NULL; 5071 struct ceph_options *ceph_opts = NULL;
@@ -5106,6 +5147,23 @@ err_out_module:
5106 return (ssize_t)rc; 5147 return (ssize_t)rc;
5107} 5148}
5108 5149
5150static ssize_t rbd_add(struct bus_type *bus,
5151 const char *buf,
5152 size_t count)
5153{
5154 if (single_major)
5155 return -EINVAL;
5156
5157 return do_rbd_add(bus, buf, count);
5158}
5159
5160static ssize_t rbd_add_single_major(struct bus_type *bus,
5161 const char *buf,
5162 size_t count)
5163{
5164 return do_rbd_add(bus, buf, count);
5165}
5166
5109static void rbd_dev_device_release(struct device *dev) 5167static void rbd_dev_device_release(struct device *dev)
5110{ 5168{
5111 struct rbd_device *rbd_dev = dev_to_rbd_dev(dev); 5169 struct rbd_device *rbd_dev = dev_to_rbd_dev(dev);
@@ -5113,8 +5171,8 @@ static void rbd_dev_device_release(struct device *dev)
5113 rbd_free_disk(rbd_dev); 5171 rbd_free_disk(rbd_dev);
5114 clear_bit(RBD_DEV_FLAG_EXISTS, &rbd_dev->flags); 5172 clear_bit(RBD_DEV_FLAG_EXISTS, &rbd_dev->flags);
5115 rbd_dev_mapping_clear(rbd_dev); 5173 rbd_dev_mapping_clear(rbd_dev);
5116 unregister_blkdev(rbd_dev->major, rbd_dev->name); 5174 if (!single_major)
5117 rbd_dev->major = 0; 5175 unregister_blkdev(rbd_dev->major, rbd_dev->name);
5118 rbd_dev_id_put(rbd_dev); 5176 rbd_dev_id_put(rbd_dev);
5119 rbd_dev_mapping_clear(rbd_dev); 5177 rbd_dev_mapping_clear(rbd_dev);
5120} 5178}
@@ -5145,9 +5203,9 @@ static void rbd_dev_remove_parent(struct rbd_device *rbd_dev)
5145 } 5203 }
5146} 5204}
5147 5205
5148static ssize_t rbd_remove(struct bus_type *bus, 5206static ssize_t do_rbd_remove(struct bus_type *bus,
5149 const char *buf, 5207 const char *buf,
5150 size_t count) 5208 size_t count)
5151{ 5209{
5152 struct rbd_device *rbd_dev = NULL; 5210 struct rbd_device *rbd_dev = NULL;
5153 struct list_head *tmp; 5211 struct list_head *tmp;
@@ -5210,6 +5268,23 @@ static ssize_t rbd_remove(struct bus_type *bus,
5210 return count; 5268 return count;
5211} 5269}
5212 5270
5271static ssize_t rbd_remove(struct bus_type *bus,
5272 const char *buf,
5273 size_t count)
5274{
5275 if (single_major)
5276 return -EINVAL;
5277
5278 return do_rbd_remove(bus, buf, count);
5279}
5280
5281static ssize_t rbd_remove_single_major(struct bus_type *bus,
5282 const char *buf,
5283 size_t count)
5284{
5285 return do_rbd_remove(bus, buf, count);
5286}
5287
5213/* 5288/*
5214 * create control files in sysfs 5289 * create control files in sysfs
5215 * /sys/bus/rbd/... 5290 * /sys/bus/rbd/...
@@ -5298,13 +5373,28 @@ static int __init rbd_init(void)
5298 if (rc) 5373 if (rc)
5299 return rc; 5374 return rc;
5300 5375
5376 if (single_major) {
5377 rbd_major = register_blkdev(0, RBD_DRV_NAME);
5378 if (rbd_major < 0) {
5379 rc = rbd_major;
5380 goto err_out_slab;
5381 }
5382 }
5383
5301 rc = rbd_sysfs_init(); 5384 rc = rbd_sysfs_init();
5302 if (rc) 5385 if (rc)
5303 goto err_out_slab; 5386 goto err_out_blkdev;
5387
5388 if (single_major)
5389 pr_info("loaded (major %d)\n", rbd_major);
5390 else
5391 pr_info("loaded\n");
5304 5392
5305 pr_info("loaded\n");
5306 return 0; 5393 return 0;
5307 5394
5395err_out_blkdev:
5396 if (single_major)
5397 unregister_blkdev(rbd_major, RBD_DRV_NAME);
5308err_out_slab: 5398err_out_slab:
5309 rbd_slab_exit(); 5399 rbd_slab_exit();
5310 return rc; 5400 return rc;
@@ -5313,6 +5403,8 @@ err_out_slab:
5313static void __exit rbd_exit(void) 5403static void __exit rbd_exit(void)
5314{ 5404{
5315 rbd_sysfs_cleanup(); 5405 rbd_sysfs_cleanup();
5406 if (single_major)
5407 unregister_blkdev(rbd_major, RBD_DRV_NAME);
5316 rbd_slab_exit(); 5408 rbd_slab_exit();
5317} 5409}
5318 5410