diff options
author | Boaz Harrosh <bharrosh@panasas.com> | 2011-09-28 07:43:09 -0400 |
---|---|---|
committer | Boaz Harrosh <bharrosh@panasas.com> | 2011-10-04 06:13:59 -0400 |
commit | d866d875f68fdeae63df334d291fe138dc636d96 (patch) | |
tree | 9606674db2311ab869640526ef245aaa7fbf4ea8 /fs | |
parent | eb507bc18969f63b8968034144fd69706c492516 (diff) |
ore/exofs: Change the type of the devices array (API change)
In the pNFS obj-LD the device table at the layout level needs
to point to a device_cache node, where it is possible and likely
that many layouts will point to the same device-nodes.
In Exofs we have a more orderly structure where we have a single
array of devices that repeats twice for a round-robin view of the
device table
This patch moves to a model that can be used by the pNFS obj-LD
where struct ore_components holds an array of ore_dev-pointers.
(ore_dev is newly defined and contains a struct osd_dev *od
member)
Each pointer in the array of pointers will point to a bigger
user-defined dev_struct. That can be accessed by use of the
container_of macro.
In Exofs an __alloc_dev_table() function allocates the
ore_dev-pointers array as well as an exofs_dev array, in one
allocation and does the addresses dance to set everything pointing
correctly. It still keeps the double allocation trick for the
inodes round-robin view of the table.
The device table is always allocated dynamically, also for the
single device case. So it is unconditionally freed at umount.
Signed-off-by: Boaz Harrosh <bharrosh@panasas.com>
Diffstat (limited to 'fs')
-rw-r--r-- | fs/exofs/exofs.h | 10 | ||||
-rw-r--r-- | fs/exofs/ore.c | 2 | ||||
-rw-r--r-- | fs/exofs/super.c | 99 |
3 files changed, 69 insertions, 42 deletions
diff --git a/fs/exofs/exofs.h b/fs/exofs/exofs.h index 3b2e0478f363..006fd6f33571 100644 --- a/fs/exofs/exofs.h +++ b/fs/exofs/exofs.h | |||
@@ -53,6 +53,10 @@ | |||
53 | /* u64 has problems with printk this will cast it to unsigned long long */ | 53 | /* u64 has problems with printk this will cast it to unsigned long long */ |
54 | #define _LLU(x) (unsigned long long)(x) | 54 | #define _LLU(x) (unsigned long long)(x) |
55 | 55 | ||
56 | struct exofs_dev { | ||
57 | struct ore_dev ored; | ||
58 | unsigned did; | ||
59 | }; | ||
56 | /* | 60 | /* |
57 | * our extension to the in-memory superblock | 61 | * our extension to the in-memory superblock |
58 | */ | 62 | */ |
@@ -69,7 +73,6 @@ struct exofs_sb_info { | |||
69 | struct ore_layout layout; /* Default files layout */ | 73 | struct ore_layout layout; /* Default files layout */ |
70 | struct ore_comp one_comp; /* id & cred of partition id=0*/ | 74 | struct ore_comp one_comp; /* id & cred of partition id=0*/ |
71 | struct ore_components oc; /* comps for the partition */ | 75 | struct ore_components oc; /* comps for the partition */ |
72 | struct osd_dev *_min_one_dev[1]; /* Place holder for one dev */ | ||
73 | }; | 76 | }; |
74 | 77 | ||
75 | /* | 78 | /* |
@@ -214,13 +217,14 @@ static inline void exofs_init_comps(struct ore_components *oc, | |||
214 | one_comp->obj.id = oid; | 217 | one_comp->obj.id = oid; |
215 | exofs_make_credential(one_comp->cred, &one_comp->obj); | 218 | exofs_make_credential(one_comp->cred, &one_comp->obj); |
216 | 219 | ||
217 | oc->numdevs = sbi->oc.numdevs; | 220 | oc->numdevs = sbi->layout.group_width * sbi->layout.mirrors_p1 * |
221 | sbi->layout.group_count; | ||
218 | oc->single_comp = EC_SINGLE_COMP; | 222 | oc->single_comp = EC_SINGLE_COMP; |
219 | oc->comps = one_comp; | 223 | oc->comps = one_comp; |
220 | 224 | ||
221 | /* Round robin device view of the table */ | 225 | /* Round robin device view of the table */ |
222 | first_dev = (dev_mod * sbi->layout.mirrors_p1) % sbi->oc.numdevs; | 226 | first_dev = (dev_mod * sbi->layout.mirrors_p1) % sbi->oc.numdevs; |
223 | oc->ods = sbi->oc.ods + first_dev; | 227 | oc->ods = &sbi->oc.ods[first_dev]; |
224 | } | 228 | } |
225 | 229 | ||
226 | #endif | 230 | #endif |
diff --git a/fs/exofs/ore.c b/fs/exofs/ore.c index c2b0033a724b..a7d79257fc65 100644 --- a/fs/exofs/ore.c +++ b/fs/exofs/ore.c | |||
@@ -59,7 +59,7 @@ static struct osd_obj_id *_ios_obj(struct ore_io_state *ios, unsigned index) | |||
59 | 59 | ||
60 | static struct osd_dev *_ios_od(struct ore_io_state *ios, unsigned index) | 60 | static struct osd_dev *_ios_od(struct ore_io_state *ios, unsigned index) |
61 | { | 61 | { |
62 | return ios->oc->ods[index]; | 62 | return ore_comp_dev(ios->oc, index); |
63 | } | 63 | } |
64 | 64 | ||
65 | int ore_get_rw_state(struct ore_layout *layout, struct ore_components *oc, | 65 | int ore_get_rw_state(struct ore_layout *layout, struct ore_components *oc, |
diff --git a/fs/exofs/super.c b/fs/exofs/super.c index 90b4c526939f..bce3686f0aa0 100644 --- a/fs/exofs/super.c +++ b/fs/exofs/super.c | |||
@@ -431,17 +431,18 @@ static void _exofs_print_device(const char *msg, const char *dev_path, | |||
431 | 431 | ||
432 | static void exofs_free_sbi(struct exofs_sb_info *sbi) | 432 | static void exofs_free_sbi(struct exofs_sb_info *sbi) |
433 | { | 433 | { |
434 | while (sbi->oc.numdevs) { | 434 | unsigned numdevs = sbi->oc.numdevs; |
435 | int i = --sbi->oc.numdevs; | 435 | |
436 | struct osd_dev *od = sbi->oc.ods[i]; | 436 | while (numdevs) { |
437 | unsigned i = --numdevs; | ||
438 | struct osd_dev *od = ore_comp_dev(&sbi->oc, i); | ||
437 | 439 | ||
438 | if (od) { | 440 | if (od) { |
439 | sbi->oc.ods[i] = NULL; | 441 | ore_comp_set_dev(&sbi->oc, i, NULL); |
440 | osduld_put_device(od); | 442 | osduld_put_device(od); |
441 | } | 443 | } |
442 | } | 444 | } |
443 | if (sbi->oc.ods != sbi->_min_one_dev) | 445 | kfree(sbi->oc.ods); |
444 | kfree(sbi->oc.ods); | ||
445 | kfree(sbi); | 446 | kfree(sbi); |
446 | } | 447 | } |
447 | 448 | ||
@@ -468,7 +469,7 @@ static void exofs_put_super(struct super_block *sb) | |||
468 | msecs_to_jiffies(100)); | 469 | msecs_to_jiffies(100)); |
469 | } | 470 | } |
470 | 471 | ||
471 | _exofs_print_device("Unmounting", NULL, sbi->oc.ods[0], | 472 | _exofs_print_device("Unmounting", NULL, ore_comp_dev(&sbi->oc, 0), |
472 | sbi->one_comp.obj.partition); | 473 | sbi->one_comp.obj.partition); |
473 | 474 | ||
474 | bdi_destroy(&sbi->bdi); | 475 | bdi_destroy(&sbi->bdi); |
@@ -592,12 +593,40 @@ static int exofs_devs_2_odi(struct exofs_dt_device_info *dt_dev, | |||
592 | return !(odi->systemid_len || odi->osdname_len); | 593 | return !(odi->systemid_len || odi->osdname_len); |
593 | } | 594 | } |
594 | 595 | ||
596 | int __alloc_dev_table(struct exofs_sb_info *sbi, unsigned numdevs, | ||
597 | struct exofs_dev **peds) | ||
598 | { | ||
599 | struct __alloc_ore_devs_and_exofs_devs { | ||
600 | /* Twice bigger table: See exofs_init_comps() and comment at | ||
601 | * exofs_read_lookup_dev_table() | ||
602 | */ | ||
603 | struct ore_dev *oreds[numdevs * 2 - 1]; | ||
604 | struct exofs_dev eds[numdevs]; | ||
605 | } *aoded; | ||
606 | struct exofs_dev *eds; | ||
607 | unsigned i; | ||
608 | |||
609 | aoded = kzalloc(sizeof(*aoded), GFP_KERNEL); | ||
610 | if (unlikely(!aoded)) { | ||
611 | EXOFS_ERR("ERROR: faild allocating Device array[%d]\n", | ||
612 | numdevs); | ||
613 | return -ENOMEM; | ||
614 | } | ||
615 | |||
616 | sbi->oc.ods = aoded->oreds; | ||
617 | *peds = eds = aoded->eds; | ||
618 | for (i = 0; i < numdevs; ++i) | ||
619 | aoded->oreds[i] = &eds[i].ored; | ||
620 | return 0; | ||
621 | } | ||
622 | |||
595 | static int exofs_read_lookup_dev_table(struct exofs_sb_info *sbi, | 623 | static int exofs_read_lookup_dev_table(struct exofs_sb_info *sbi, |
596 | struct osd_dev *fscb_od, | 624 | struct osd_dev *fscb_od, |
597 | unsigned table_count) | 625 | unsigned table_count) |
598 | { | 626 | { |
599 | struct ore_comp comp; | 627 | struct ore_comp comp; |
600 | struct exofs_device_table *dt; | 628 | struct exofs_device_table *dt; |
629 | struct exofs_dev *eds; | ||
601 | unsigned table_bytes = table_count * sizeof(dt->dt_dev_table[0]) + | 630 | unsigned table_bytes = table_count * sizeof(dt->dt_dev_table[0]) + |
602 | sizeof(*dt); | 631 | sizeof(*dt); |
603 | unsigned numdevs, i; | 632 | unsigned numdevs, i; |
@@ -634,20 +663,16 @@ static int exofs_read_lookup_dev_table(struct exofs_sb_info *sbi, | |||
634 | if (unlikely(ret)) | 663 | if (unlikely(ret)) |
635 | goto out; | 664 | goto out; |
636 | 665 | ||
637 | if (likely(numdevs > 1)) { | 666 | ret = __alloc_dev_table(sbi, numdevs, &eds); |
638 | unsigned size = numdevs * sizeof(sbi->oc.ods[0]); | 667 | if (unlikely(ret)) |
639 | 668 | goto out; | |
640 | /* Twice bigger table: See exofs_init_comps() and below | 669 | /* exofs round-robins the device table view according to inode |
641 | * comment | 670 | * number. We hold a: twice bigger table hence inodes can point |
642 | */ | 671 | * to any device and have a sequential view of the table |
643 | sbi->oc.ods = kzalloc(size + size - 1, GFP_KERNEL); | 672 | * starting at this device. See exofs_init_comps() |
644 | if (unlikely(!sbi->oc.ods)) { | 673 | */ |
645 | EXOFS_ERR("ERROR: faild allocating Device array[%d]\n", | 674 | memcpy(&sbi->oc.ods[numdevs], &sbi->oc.ods[0], |
646 | numdevs); | 675 | (numdevs - 1) * sizeof(sbi->oc.ods[0])); |
647 | ret = -ENOMEM; | ||
648 | goto out; | ||
649 | } | ||
650 | } | ||
651 | 676 | ||
652 | for (i = 0; i < numdevs; i++) { | 677 | for (i = 0; i < numdevs; i++) { |
653 | struct exofs_fscb fscb; | 678 | struct exofs_fscb fscb; |
@@ -663,12 +688,15 @@ static int exofs_read_lookup_dev_table(struct exofs_sb_info *sbi, | |||
663 | printk(KERN_NOTICE "Add device[%d]: osd_name-%s\n", | 688 | printk(KERN_NOTICE "Add device[%d]: osd_name-%s\n", |
664 | i, odi.osdname); | 689 | i, odi.osdname); |
665 | 690 | ||
691 | /* the exofs id is currently the table index */ | ||
692 | eds[i].did = i; | ||
693 | |||
666 | /* On all devices the device table is identical. The user can | 694 | /* On all devices the device table is identical. The user can |
667 | * specify any one of the participating devices on the command | 695 | * specify any one of the participating devices on the command |
668 | * line. We always keep them in device-table order. | 696 | * line. We always keep them in device-table order. |
669 | */ | 697 | */ |
670 | if (fscb_od && osduld_device_same(fscb_od, &odi)) { | 698 | if (fscb_od && osduld_device_same(fscb_od, &odi)) { |
671 | sbi->oc.ods[i] = fscb_od; | 699 | eds[i].ored.od = fscb_od; |
672 | ++sbi->oc.numdevs; | 700 | ++sbi->oc.numdevs; |
673 | fscb_od = NULL; | 701 | fscb_od = NULL; |
674 | continue; | 702 | continue; |
@@ -682,7 +710,7 @@ static int exofs_read_lookup_dev_table(struct exofs_sb_info *sbi, | |||
682 | goto out; | 710 | goto out; |
683 | } | 711 | } |
684 | 712 | ||
685 | sbi->oc.ods[i] = od; | 713 | eds[i].ored.od = od; |
686 | ++sbi->oc.numdevs; | 714 | ++sbi->oc.numdevs; |
687 | 715 | ||
688 | /* Read the fscb of the other devices to make sure the FS | 716 | /* Read the fscb of the other devices to make sure the FS |
@@ -705,21 +733,10 @@ static int exofs_read_lookup_dev_table(struct exofs_sb_info *sbi, | |||
705 | 733 | ||
706 | out: | 734 | out: |
707 | kfree(dt); | 735 | kfree(dt); |
708 | if (likely(!ret)) { | 736 | if (unlikely(fscb_od && !ret)) { |
709 | unsigned numdevs = sbi->oc.numdevs; | ||
710 | |||
711 | if (unlikely(fscb_od)) { | ||
712 | EXOFS_ERR("ERROR: Bad device-table container device not present\n"); | 737 | EXOFS_ERR("ERROR: Bad device-table container device not present\n"); |
713 | osduld_put_device(fscb_od); | 738 | osduld_put_device(fscb_od); |
714 | return -EINVAL; | 739 | return -EINVAL; |
715 | } | ||
716 | /* exofs round-robins the device table view according to inode | ||
717 | * number. We hold a: twice bigger table hence inodes can point | ||
718 | * to any device and have a sequential view of the table | ||
719 | * starting at this device. See exofs_init_comps() | ||
720 | */ | ||
721 | for (i = 0; i < numdevs - 1; ++i) | ||
722 | sbi->oc.ods[i + numdevs] = sbi->oc.ods[i]; | ||
723 | } | 740 | } |
724 | return ret; | 741 | return ret; |
725 | } | 742 | } |
@@ -773,7 +790,6 @@ static int exofs_fill_super(struct super_block *sb, void *data, int silent) | |||
773 | sbi->oc.numdevs = 1; | 790 | sbi->oc.numdevs = 1; |
774 | sbi->oc.single_comp = EC_SINGLE_COMP; | 791 | sbi->oc.single_comp = EC_SINGLE_COMP; |
775 | sbi->oc.comps = &sbi->one_comp; | 792 | sbi->oc.comps = &sbi->one_comp; |
776 | sbi->oc.ods = sbi->_min_one_dev; | ||
777 | 793 | ||
778 | /* fill in some other data by hand */ | 794 | /* fill in some other data by hand */ |
779 | memset(sb->s_id, 0, sizeof(sb->s_id)); | 795 | memset(sb->s_id, 0, sizeof(sb->s_id)); |
@@ -822,7 +838,13 @@ static int exofs_fill_super(struct super_block *sb, void *data, int silent) | |||
822 | if (unlikely(ret)) | 838 | if (unlikely(ret)) |
823 | goto free_sbi; | 839 | goto free_sbi; |
824 | } else { | 840 | } else { |
825 | sbi->oc.ods[0] = od; | 841 | struct exofs_dev *eds; |
842 | |||
843 | ret = __alloc_dev_table(sbi, 1, &eds); | ||
844 | if (unlikely(ret)) | ||
845 | goto free_sbi; | ||
846 | |||
847 | ore_comp_set_dev(&sbi->oc, 0, od); | ||
826 | } | 848 | } |
827 | 849 | ||
828 | __sbi_read_stats(sbi); | 850 | __sbi_read_stats(sbi); |
@@ -862,7 +884,8 @@ static int exofs_fill_super(struct super_block *sb, void *data, int silent) | |||
862 | goto free_sbi; | 884 | goto free_sbi; |
863 | } | 885 | } |
864 | 886 | ||
865 | _exofs_print_device("Mounting", opts->dev_name, sbi->oc.ods[0], | 887 | _exofs_print_device("Mounting", opts->dev_name, |
888 | ore_comp_dev(&sbi->oc, 0), | ||
866 | sbi->one_comp.obj.partition); | 889 | sbi->one_comp.obj.partition); |
867 | return 0; | 890 | return 0; |
868 | 891 | ||