aboutsummaryrefslogtreecommitdiffstats
path: root/fs/exofs
diff options
context:
space:
mode:
authorBoaz Harrosh <bharrosh@panasas.com>2011-09-28 07:43:09 -0400
committerBoaz Harrosh <bharrosh@panasas.com>2011-10-04 06:13:59 -0400
commitd866d875f68fdeae63df334d291fe138dc636d96 (patch)
tree9606674db2311ab869640526ef245aaa7fbf4ea8 /fs/exofs
parenteb507bc18969f63b8968034144fd69706c492516 (diff)
ore/exofs: Change the type of the devices array (API change)
In the pNFS obj-LD the device table at the layout level needs to point to a device_cache node, where it is possible and likely that many layouts will point to the same device-nodes. In Exofs we have a more orderly structure where we have a single array of devices that repeats twice for a round-robin view of the device table This patch moves to a model that can be used by the pNFS obj-LD where struct ore_components holds an array of ore_dev-pointers. (ore_dev is newly defined and contains a struct osd_dev *od member) Each pointer in the array of pointers will point to a bigger user-defined dev_struct. That can be accessed by use of the container_of macro. In Exofs an __alloc_dev_table() function allocates the ore_dev-pointers array as well as an exofs_dev array, in one allocation and does the addresses dance to set everything pointing correctly. It still keeps the double allocation trick for the inodes round-robin view of the table. The device table is always allocated dynamically, also for the single device case. So it is unconditionally freed at umount. Signed-off-by: Boaz Harrosh <bharrosh@panasas.com>
Diffstat (limited to 'fs/exofs')
-rw-r--r--fs/exofs/exofs.h10
-rw-r--r--fs/exofs/ore.c2
-rw-r--r--fs/exofs/super.c99
3 files changed, 69 insertions, 42 deletions
diff --git a/fs/exofs/exofs.h b/fs/exofs/exofs.h
index 3b2e0478f36..006fd6f3357 100644
--- a/fs/exofs/exofs.h
+++ b/fs/exofs/exofs.h
@@ -53,6 +53,10 @@
53/* u64 has problems with printk this will cast it to unsigned long long */ 53/* u64 has problems with printk this will cast it to unsigned long long */
54#define _LLU(x) (unsigned long long)(x) 54#define _LLU(x) (unsigned long long)(x)
55 55
56struct exofs_dev {
57 struct ore_dev ored;
58 unsigned did;
59};
56/* 60/*
57 * our extension to the in-memory superblock 61 * our extension to the in-memory superblock
58 */ 62 */
@@ -69,7 +73,6 @@ struct exofs_sb_info {
69 struct ore_layout layout; /* Default files layout */ 73 struct ore_layout layout; /* Default files layout */
70 struct ore_comp one_comp; /* id & cred of partition id=0*/ 74 struct ore_comp one_comp; /* id & cred of partition id=0*/
71 struct ore_components oc; /* comps for the partition */ 75 struct ore_components oc; /* comps for the partition */
72 struct osd_dev *_min_one_dev[1]; /* Place holder for one dev */
73}; 76};
74 77
75/* 78/*
@@ -214,13 +217,14 @@ static inline void exofs_init_comps(struct ore_components *oc,
214 one_comp->obj.id = oid; 217 one_comp->obj.id = oid;
215 exofs_make_credential(one_comp->cred, &one_comp->obj); 218 exofs_make_credential(one_comp->cred, &one_comp->obj);
216 219
217 oc->numdevs = sbi->oc.numdevs; 220 oc->numdevs = sbi->layout.group_width * sbi->layout.mirrors_p1 *
221 sbi->layout.group_count;
218 oc->single_comp = EC_SINGLE_COMP; 222 oc->single_comp = EC_SINGLE_COMP;
219 oc->comps = one_comp; 223 oc->comps = one_comp;
220 224
221 /* Round robin device view of the table */ 225 /* Round robin device view of the table */
222 first_dev = (dev_mod * sbi->layout.mirrors_p1) % sbi->oc.numdevs; 226 first_dev = (dev_mod * sbi->layout.mirrors_p1) % sbi->oc.numdevs;
223 oc->ods = sbi->oc.ods + first_dev; 227 oc->ods = &sbi->oc.ods[first_dev];
224} 228}
225 229
226#endif 230#endif
diff --git a/fs/exofs/ore.c b/fs/exofs/ore.c
index c2b0033a724..a7d79257fc6 100644
--- a/fs/exofs/ore.c
+++ b/fs/exofs/ore.c
@@ -59,7 +59,7 @@ static struct osd_obj_id *_ios_obj(struct ore_io_state *ios, unsigned index)
59 59
60static struct osd_dev *_ios_od(struct ore_io_state *ios, unsigned index) 60static struct osd_dev *_ios_od(struct ore_io_state *ios, unsigned index)
61{ 61{
62 return ios->oc->ods[index]; 62 return ore_comp_dev(ios->oc, index);
63} 63}
64 64
65int ore_get_rw_state(struct ore_layout *layout, struct ore_components *oc, 65int ore_get_rw_state(struct ore_layout *layout, struct ore_components *oc,
diff --git a/fs/exofs/super.c b/fs/exofs/super.c
index 90b4c526939..bce3686f0aa 100644
--- a/fs/exofs/super.c
+++ b/fs/exofs/super.c
@@ -431,17 +431,18 @@ static void _exofs_print_device(const char *msg, const char *dev_path,
431 431
432static void exofs_free_sbi(struct exofs_sb_info *sbi) 432static void exofs_free_sbi(struct exofs_sb_info *sbi)
433{ 433{
434 while (sbi->oc.numdevs) { 434 unsigned numdevs = sbi->oc.numdevs;
435 int i = --sbi->oc.numdevs; 435
436 struct osd_dev *od = sbi->oc.ods[i]; 436 while (numdevs) {
437 unsigned i = --numdevs;
438 struct osd_dev *od = ore_comp_dev(&sbi->oc, i);
437 439
438 if (od) { 440 if (od) {
439 sbi->oc.ods[i] = NULL; 441 ore_comp_set_dev(&sbi->oc, i, NULL);
440 osduld_put_device(od); 442 osduld_put_device(od);
441 } 443 }
442 } 444 }
443 if (sbi->oc.ods != sbi->_min_one_dev) 445 kfree(sbi->oc.ods);
444 kfree(sbi->oc.ods);
445 kfree(sbi); 446 kfree(sbi);
446} 447}
447 448
@@ -468,7 +469,7 @@ static void exofs_put_super(struct super_block *sb)
468 msecs_to_jiffies(100)); 469 msecs_to_jiffies(100));
469 } 470 }
470 471
471 _exofs_print_device("Unmounting", NULL, sbi->oc.ods[0], 472 _exofs_print_device("Unmounting", NULL, ore_comp_dev(&sbi->oc, 0),
472 sbi->one_comp.obj.partition); 473 sbi->one_comp.obj.partition);
473 474
474 bdi_destroy(&sbi->bdi); 475 bdi_destroy(&sbi->bdi);
@@ -592,12 +593,40 @@ static int exofs_devs_2_odi(struct exofs_dt_device_info *dt_dev,
592 return !(odi->systemid_len || odi->osdname_len); 593 return !(odi->systemid_len || odi->osdname_len);
593} 594}
594 595
596int __alloc_dev_table(struct exofs_sb_info *sbi, unsigned numdevs,
597 struct exofs_dev **peds)
598{
599 struct __alloc_ore_devs_and_exofs_devs {
600 /* Twice bigger table: See exofs_init_comps() and comment at
601 * exofs_read_lookup_dev_table()
602 */
603 struct ore_dev *oreds[numdevs * 2 - 1];
604 struct exofs_dev eds[numdevs];
605 } *aoded;
606 struct exofs_dev *eds;
607 unsigned i;
608
609 aoded = kzalloc(sizeof(*aoded), GFP_KERNEL);
610 if (unlikely(!aoded)) {
611 EXOFS_ERR("ERROR: faild allocating Device array[%d]\n",
612 numdevs);
613 return -ENOMEM;
614 }
615
616 sbi->oc.ods = aoded->oreds;
617 *peds = eds = aoded->eds;
618 for (i = 0; i < numdevs; ++i)
619 aoded->oreds[i] = &eds[i].ored;
620 return 0;
621}
622
595static int exofs_read_lookup_dev_table(struct exofs_sb_info *sbi, 623static int exofs_read_lookup_dev_table(struct exofs_sb_info *sbi,
596 struct osd_dev *fscb_od, 624 struct osd_dev *fscb_od,
597 unsigned table_count) 625 unsigned table_count)
598{ 626{
599 struct ore_comp comp; 627 struct ore_comp comp;
600 struct exofs_device_table *dt; 628 struct exofs_device_table *dt;
629 struct exofs_dev *eds;
601 unsigned table_bytes = table_count * sizeof(dt->dt_dev_table[0]) + 630 unsigned table_bytes = table_count * sizeof(dt->dt_dev_table[0]) +
602 sizeof(*dt); 631 sizeof(*dt);
603 unsigned numdevs, i; 632 unsigned numdevs, i;
@@ -634,20 +663,16 @@ static int exofs_read_lookup_dev_table(struct exofs_sb_info *sbi,
634 if (unlikely(ret)) 663 if (unlikely(ret))
635 goto out; 664 goto out;
636 665
637 if (likely(numdevs > 1)) { 666 ret = __alloc_dev_table(sbi, numdevs, &eds);
638 unsigned size = numdevs * sizeof(sbi->oc.ods[0]); 667 if (unlikely(ret))
639 668 goto out;
640 /* Twice bigger table: See exofs_init_comps() and below 669 /* exofs round-robins the device table view according to inode
641 * comment 670 * number. We hold a: twice bigger table hence inodes can point
642 */ 671 * to any device and have a sequential view of the table
643 sbi->oc.ods = kzalloc(size + size - 1, GFP_KERNEL); 672 * starting at this device. See exofs_init_comps()
644 if (unlikely(!sbi->oc.ods)) { 673 */
645 EXOFS_ERR("ERROR: faild allocating Device array[%d]\n", 674 memcpy(&sbi->oc.ods[numdevs], &sbi->oc.ods[0],
646 numdevs); 675 (numdevs - 1) * sizeof(sbi->oc.ods[0]));
647 ret = -ENOMEM;
648 goto out;
649 }
650 }
651 676
652 for (i = 0; i < numdevs; i++) { 677 for (i = 0; i < numdevs; i++) {
653 struct exofs_fscb fscb; 678 struct exofs_fscb fscb;
@@ -663,12 +688,15 @@ static int exofs_read_lookup_dev_table(struct exofs_sb_info *sbi,
663 printk(KERN_NOTICE "Add device[%d]: osd_name-%s\n", 688 printk(KERN_NOTICE "Add device[%d]: osd_name-%s\n",
664 i, odi.osdname); 689 i, odi.osdname);
665 690
691 /* the exofs id is currently the table index */
692 eds[i].did = i;
693
666 /* On all devices the device table is identical. The user can 694 /* On all devices the device table is identical. The user can
667 * specify any one of the participating devices on the command 695 * specify any one of the participating devices on the command
668 * line. We always keep them in device-table order. 696 * line. We always keep them in device-table order.
669 */ 697 */
670 if (fscb_od && osduld_device_same(fscb_od, &odi)) { 698 if (fscb_od && osduld_device_same(fscb_od, &odi)) {
671 sbi->oc.ods[i] = fscb_od; 699 eds[i].ored.od = fscb_od;
672 ++sbi->oc.numdevs; 700 ++sbi->oc.numdevs;
673 fscb_od = NULL; 701 fscb_od = NULL;
674 continue; 702 continue;
@@ -682,7 +710,7 @@ static int exofs_read_lookup_dev_table(struct exofs_sb_info *sbi,
682 goto out; 710 goto out;
683 } 711 }
684 712
685 sbi->oc.ods[i] = od; 713 eds[i].ored.od = od;
686 ++sbi->oc.numdevs; 714 ++sbi->oc.numdevs;
687 715
688 /* Read the fscb of the other devices to make sure the FS 716 /* Read the fscb of the other devices to make sure the FS
@@ -705,21 +733,10 @@ static int exofs_read_lookup_dev_table(struct exofs_sb_info *sbi,
705 733
706out: 734out:
707 kfree(dt); 735 kfree(dt);
708 if (likely(!ret)) { 736 if (unlikely(fscb_od && !ret)) {
709 unsigned numdevs = sbi->oc.numdevs;
710
711 if (unlikely(fscb_od)) {
712 EXOFS_ERR("ERROR: Bad device-table container device not present\n"); 737 EXOFS_ERR("ERROR: Bad device-table container device not present\n");
713 osduld_put_device(fscb_od); 738 osduld_put_device(fscb_od);
714 return -EINVAL; 739 return -EINVAL;
715 }
716 /* exofs round-robins the device table view according to inode
717 * number. We hold a: twice bigger table hence inodes can point
718 * to any device and have a sequential view of the table
719 * starting at this device. See exofs_init_comps()
720 */
721 for (i = 0; i < numdevs - 1; ++i)
722 sbi->oc.ods[i + numdevs] = sbi->oc.ods[i];
723 } 740 }
724 return ret; 741 return ret;
725} 742}
@@ -773,7 +790,6 @@ static int exofs_fill_super(struct super_block *sb, void *data, int silent)
773 sbi->oc.numdevs = 1; 790 sbi->oc.numdevs = 1;
774 sbi->oc.single_comp = EC_SINGLE_COMP; 791 sbi->oc.single_comp = EC_SINGLE_COMP;
775 sbi->oc.comps = &sbi->one_comp; 792 sbi->oc.comps = &sbi->one_comp;
776 sbi->oc.ods = sbi->_min_one_dev;
777 793
778 /* fill in some other data by hand */ 794 /* fill in some other data by hand */
779 memset(sb->s_id, 0, sizeof(sb->s_id)); 795 memset(sb->s_id, 0, sizeof(sb->s_id));
@@ -822,7 +838,13 @@ static int exofs_fill_super(struct super_block *sb, void *data, int silent)
822 if (unlikely(ret)) 838 if (unlikely(ret))
823 goto free_sbi; 839 goto free_sbi;
824 } else { 840 } else {
825 sbi->oc.ods[0] = od; 841 struct exofs_dev *eds;
842
843 ret = __alloc_dev_table(sbi, 1, &eds);
844 if (unlikely(ret))
845 goto free_sbi;
846
847 ore_comp_set_dev(&sbi->oc, 0, od);
826 } 848 }
827 849
828 __sbi_read_stats(sbi); 850 __sbi_read_stats(sbi);
@@ -862,7 +884,8 @@ static int exofs_fill_super(struct super_block *sb, void *data, int silent)
862 goto free_sbi; 884 goto free_sbi;
863 } 885 }
864 886
865 _exofs_print_device("Mounting", opts->dev_name, sbi->oc.ods[0], 887 _exofs_print_device("Mounting", opts->dev_name,
888 ore_comp_dev(&sbi->oc, 0),
866 sbi->one_comp.obj.partition); 889 sbi->one_comp.obj.partition);
867 return 0; 890 return 0;
868 891