diff options
author | Boaz Harrosh <boaz@plexistor.com> | 2015-01-07 11:07:56 -0500 |
---|---|---|
committer | Jens Axboe <axboe@fb.com> | 2015-01-13 23:59:03 -0500 |
commit | 937af5ecd0591e84ee54180fa97dcbe9bbe5fed6 (patch) | |
tree | 490017495a84e6291894b8b0561ab06d02f45805 /drivers/block | |
parent | d4119ee0e1aa2b74e5e367cbc915e79db7b9e271 (diff) |
brd: Fix all partitions BUGs
This patch fixes up brd's partitions scheme, now enjoying all worlds.
The MAIN fix here is that currently, if one fdisks some partitions,
a BAD bug will make all partitions point to the same start-end sector
ie: 0 - brd_size And an mkfs of any partition would trash the partition
table and the other partition.
Another fix is that "mount -U uuid" will not work if show_part was not
specified, because of the GENHD_FL_SUPPRESS_PARTITION_INFO flag.
We now always load without it and remove the show_part parameter.
[We remove Dmitry's new module-param part_show it is now always
show]
So NOW the logic goes like this:
* max_part - Just says how many minors to reserve between ramX
devices. In any way, there can be as many partition as requested.
If minors between devices ends, then dynamic 259-major ids will
be allocated on the fly.
The default is now max_part=1, which means all partitions devt(s)
will be from the dynamic (259) major-range.
(If persistent partition minors is needed use max_part=X)
For example with /dev/sdX max_part is hard coded 16.
* Creation of new devices on the fly still/always work:
mknod /path/devnod b 1 X
fdisk -l /path/devnod
Will create a new device if [X / max_part] was not already
created before. (Just as before)
partitions on the dynamically created device will work as well
Same logic applies with minors as with the pre-created ones.
TODO: dynamic grow of device size. So each device can have it's
own size.
CC: Dmitry Monakhov <dmonakhov@openvz.org>
Tested-by: Ross Zwisler <ross.zwisler@linux.intel.com>
Signed-off-by: Boaz Harrosh <boaz@plexistor.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
Diffstat (limited to 'drivers/block')
-rw-r--r-- | drivers/block/brd.c | 100 |
1 files changed, 38 insertions, 62 deletions
diff --git a/drivers/block/brd.c b/drivers/block/brd.c index 89e90ec52f28..a7463c9595e7 100644 --- a/drivers/block/brd.c +++ b/drivers/block/brd.c | |||
@@ -438,19 +438,18 @@ static const struct block_device_operations brd_fops = { | |||
438 | /* | 438 | /* |
439 | * And now the modules code and kernel interface. | 439 | * And now the modules code and kernel interface. |
440 | */ | 440 | */ |
441 | static int rd_nr; | 441 | static int rd_nr = CONFIG_BLK_DEV_RAM_COUNT; |
442 | int rd_size = CONFIG_BLK_DEV_RAM_SIZE; | ||
443 | static int max_part; | ||
444 | static int part_shift; | ||
445 | static int part_show = 0; | ||
446 | module_param(rd_nr, int, S_IRUGO); | 442 | module_param(rd_nr, int, S_IRUGO); |
447 | MODULE_PARM_DESC(rd_nr, "Maximum number of brd devices"); | 443 | MODULE_PARM_DESC(rd_nr, "Maximum number of brd devices"); |
444 | |||
445 | int rd_size = CONFIG_BLK_DEV_RAM_SIZE; | ||
448 | module_param(rd_size, int, S_IRUGO); | 446 | module_param(rd_size, int, S_IRUGO); |
449 | MODULE_PARM_DESC(rd_size, "Size of each RAM disk in kbytes."); | 447 | MODULE_PARM_DESC(rd_size, "Size of each RAM disk in kbytes."); |
448 | |||
449 | static int max_part = 1; | ||
450 | module_param(max_part, int, S_IRUGO); | 450 | module_param(max_part, int, S_IRUGO); |
451 | MODULE_PARM_DESC(max_part, "Maximum number of partitions per RAM disk"); | 451 | MODULE_PARM_DESC(max_part, "Num Minors to reserve between devices"); |
452 | module_param(part_show, int, S_IRUGO); | 452 | |
453 | MODULE_PARM_DESC(part_show, "Control RAM disk visibility in /proc/partitions"); | ||
454 | MODULE_LICENSE("GPL"); | 453 | MODULE_LICENSE("GPL"); |
455 | MODULE_ALIAS_BLOCKDEV_MAJOR(RAMDISK_MAJOR); | 454 | MODULE_ALIAS_BLOCKDEV_MAJOR(RAMDISK_MAJOR); |
456 | MODULE_ALIAS("rd"); | 455 | MODULE_ALIAS("rd"); |
@@ -496,16 +495,15 @@ static struct brd_device *brd_alloc(int i) | |||
496 | brd->brd_queue->limits.discard_zeroes_data = 1; | 495 | brd->brd_queue->limits.discard_zeroes_data = 1; |
497 | queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, brd->brd_queue); | 496 | queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, brd->brd_queue); |
498 | 497 | ||
499 | disk = brd->brd_disk = alloc_disk(1 << part_shift); | 498 | disk = brd->brd_disk = alloc_disk(max_part); |
500 | if (!disk) | 499 | if (!disk) |
501 | goto out_free_queue; | 500 | goto out_free_queue; |
502 | disk->major = RAMDISK_MAJOR; | 501 | disk->major = RAMDISK_MAJOR; |
503 | disk->first_minor = i << part_shift; | 502 | disk->first_minor = i * max_part; |
504 | disk->fops = &brd_fops; | 503 | disk->fops = &brd_fops; |
505 | disk->private_data = brd; | 504 | disk->private_data = brd; |
506 | disk->queue = brd->brd_queue; | 505 | disk->queue = brd->brd_queue; |
507 | if (!part_show) | 506 | disk->flags = GENHD_FL_EXT_DEVT; |
508 | disk->flags |= GENHD_FL_SUPPRESS_PARTITION_INFO; | ||
509 | sprintf(disk->disk_name, "ram%d", i); | 507 | sprintf(disk->disk_name, "ram%d", i); |
510 | set_capacity(disk, rd_size * 2); | 508 | set_capacity(disk, rd_size * 2); |
511 | 509 | ||
@@ -527,10 +525,11 @@ static void brd_free(struct brd_device *brd) | |||
527 | kfree(brd); | 525 | kfree(brd); |
528 | } | 526 | } |
529 | 527 | ||
530 | static struct brd_device *brd_init_one(int i) | 528 | static struct brd_device *brd_init_one(int i, bool *new) |
531 | { | 529 | { |
532 | struct brd_device *brd; | 530 | struct brd_device *brd; |
533 | 531 | ||
532 | *new = false; | ||
534 | list_for_each_entry(brd, &brd_devices, brd_list) { | 533 | list_for_each_entry(brd, &brd_devices, brd_list) { |
535 | if (brd->brd_number == i) | 534 | if (brd->brd_number == i) |
536 | goto out; | 535 | goto out; |
@@ -541,6 +540,7 @@ static struct brd_device *brd_init_one(int i) | |||
541 | add_disk(brd->brd_disk); | 540 | add_disk(brd->brd_disk); |
542 | list_add_tail(&brd->brd_list, &brd_devices); | 541 | list_add_tail(&brd->brd_list, &brd_devices); |
543 | } | 542 | } |
543 | *new = true; | ||
544 | out: | 544 | out: |
545 | return brd; | 545 | return brd; |
546 | } | 546 | } |
@@ -556,70 +556,46 @@ static struct kobject *brd_probe(dev_t dev, int *part, void *data) | |||
556 | { | 556 | { |
557 | struct brd_device *brd; | 557 | struct brd_device *brd; |
558 | struct kobject *kobj; | 558 | struct kobject *kobj; |
559 | bool new; | ||
559 | 560 | ||
560 | mutex_lock(&brd_devices_mutex); | 561 | mutex_lock(&brd_devices_mutex); |
561 | brd = brd_init_one(MINOR(dev) >> part_shift); | 562 | brd = brd_init_one(MINOR(dev) / max_part, &new); |
562 | kobj = brd ? get_disk(brd->brd_disk) : NULL; | 563 | kobj = brd ? get_disk(brd->brd_disk) : NULL; |
563 | mutex_unlock(&brd_devices_mutex); | 564 | mutex_unlock(&brd_devices_mutex); |
564 | 565 | ||
565 | *part = 0; | 566 | if (new) |
567 | *part = 0; | ||
568 | |||
566 | return kobj; | 569 | return kobj; |
567 | } | 570 | } |
568 | 571 | ||
569 | static int __init brd_init(void) | 572 | static int __init brd_init(void) |
570 | { | 573 | { |
571 | int i, nr; | ||
572 | unsigned long range; | ||
573 | struct brd_device *brd, *next; | 574 | struct brd_device *brd, *next; |
575 | int i; | ||
574 | 576 | ||
575 | /* | 577 | /* |
576 | * brd module now has a feature to instantiate underlying device | 578 | * brd module now has a feature to instantiate underlying device |
577 | * structure on-demand, provided that there is an access dev node. | 579 | * structure on-demand, provided that there is an access dev node. |
578 | * However, this will not work well with user space tool that doesn't | ||
579 | * know about such "feature". In order to not break any existing | ||
580 | * tool, we do the following: | ||
581 | * | 580 | * |
582 | * (1) if rd_nr is specified, create that many upfront, and this | 581 | * (1) if rd_nr is specified, create that many upfront. else |
583 | * also becomes a hard limit. | 582 | * it defaults to CONFIG_BLK_DEV_RAM_COUNT |
584 | * (2) if rd_nr is not specified, create CONFIG_BLK_DEV_RAM_COUNT | 583 | * (2) User can further extend brd devices by create dev node themselves |
585 | * (default 16) rd device on module load, user can further | 584 | * and have kernel automatically instantiate actual device |
586 | * extend brd device by create dev node themselves and have | 585 | * on-demand. Example: |
587 | * kernel automatically instantiate actual device on-demand. | 586 | * mknod /path/devnod_name b 1 X # 1 is the rd major |
587 | * fdisk -l /path/devnod_name | ||
588 | * If (X / max_part) was not already created it will be created | ||
589 | * dynamically. | ||
588 | */ | 590 | */ |
589 | 591 | ||
590 | part_shift = 0; | ||
591 | if (max_part > 0) { | ||
592 | part_shift = fls(max_part); | ||
593 | |||
594 | /* | ||
595 | * Adjust max_part according to part_shift as it is exported | ||
596 | * to user space so that user can decide correct minor number | ||
597 | * if [s]he want to create more devices. | ||
598 | * | ||
599 | * Note that -1 is required because partition 0 is reserved | ||
600 | * for the whole disk. | ||
601 | */ | ||
602 | max_part = (1UL << part_shift) - 1; | ||
603 | } | ||
604 | |||
605 | if ((1UL << part_shift) > DISK_MAX_PARTS) | ||
606 | return -EINVAL; | ||
607 | |||
608 | if (rd_nr > 1UL << (MINORBITS - part_shift)) | ||
609 | return -EINVAL; | ||
610 | |||
611 | if (rd_nr) { | ||
612 | nr = rd_nr; | ||
613 | range = rd_nr << part_shift; | ||
614 | } else { | ||
615 | nr = CONFIG_BLK_DEV_RAM_COUNT; | ||
616 | range = 1UL << MINORBITS; | ||
617 | } | ||
618 | |||
619 | if (register_blkdev(RAMDISK_MAJOR, "ramdisk")) | 592 | if (register_blkdev(RAMDISK_MAJOR, "ramdisk")) |
620 | return -EIO; | 593 | return -EIO; |
621 | 594 | ||
622 | for (i = 0; i < nr; i++) { | 595 | if (unlikely(!max_part)) |
596 | max_part = 1; | ||
597 | |||
598 | for (i = 0; i < rd_nr; i++) { | ||
623 | brd = brd_alloc(i); | 599 | brd = brd_alloc(i); |
624 | if (!brd) | 600 | if (!brd) |
625 | goto out_free; | 601 | goto out_free; |
@@ -631,10 +607,10 @@ static int __init brd_init(void) | |||
631 | list_for_each_entry(brd, &brd_devices, brd_list) | 607 | list_for_each_entry(brd, &brd_devices, brd_list) |
632 | add_disk(brd->brd_disk); | 608 | add_disk(brd->brd_disk); |
633 | 609 | ||
634 | blk_register_region(MKDEV(RAMDISK_MAJOR, 0), range, | 610 | blk_register_region(MKDEV(RAMDISK_MAJOR, 0), 1UL << MINORBITS, |
635 | THIS_MODULE, brd_probe, NULL, NULL); | 611 | THIS_MODULE, brd_probe, NULL, NULL); |
636 | 612 | ||
637 | printk(KERN_INFO "brd: module loaded\n"); | 613 | pr_info("brd: module loaded\n"); |
638 | return 0; | 614 | return 0; |
639 | 615 | ||
640 | out_free: | 616 | out_free: |
@@ -644,21 +620,21 @@ out_free: | |||
644 | } | 620 | } |
645 | unregister_blkdev(RAMDISK_MAJOR, "ramdisk"); | 621 | unregister_blkdev(RAMDISK_MAJOR, "ramdisk"); |
646 | 622 | ||
623 | pr_info("brd: module NOT loaded !!!\n"); | ||
647 | return -ENOMEM; | 624 | return -ENOMEM; |
648 | } | 625 | } |
649 | 626 | ||
650 | static void __exit brd_exit(void) | 627 | static void __exit brd_exit(void) |
651 | { | 628 | { |
652 | unsigned long range; | ||
653 | struct brd_device *brd, *next; | 629 | struct brd_device *brd, *next; |
654 | 630 | ||
655 | range = rd_nr ? rd_nr << part_shift : 1UL << MINORBITS; | ||
656 | |||
657 | list_for_each_entry_safe(brd, next, &brd_devices, brd_list) | 631 | list_for_each_entry_safe(brd, next, &brd_devices, brd_list) |
658 | brd_del_one(brd); | 632 | brd_del_one(brd); |
659 | 633 | ||
660 | blk_unregister_region(MKDEV(RAMDISK_MAJOR, 0), range); | 634 | blk_unregister_region(MKDEV(RAMDISK_MAJOR, 0), 1UL << MINORBITS); |
661 | unregister_blkdev(RAMDISK_MAJOR, "ramdisk"); | 635 | unregister_blkdev(RAMDISK_MAJOR, "ramdisk"); |
636 | |||
637 | pr_info("brd: module unloaded\n"); | ||
662 | } | 638 | } |
663 | 639 | ||
664 | module_init(brd_init); | 640 | module_init(brd_init); |