aboutsummaryrefslogtreecommitdiffstats
path: root/fs/exofs/super.c
diff options
context:
space:
mode:
authorBoaz Harrosh <bharrosh@panasas.com>2011-08-05 18:06:04 -0400
committerBoaz Harrosh <bharrosh@panasas.com>2011-08-06 22:35:32 -0400
commit9e9db45649eb5d3ee5622fdad741914ecf1016a0 (patch)
tree19ab9e1431e3d6535cef3f2cba6fcff12bb6ba6c /fs/exofs/super.c
parent85e44df4748670a1a7d8441b2d75843cdebc478a (diff)
exofs: ios: Move to a per inode components & device-table
Exofs raid engine was saving on memory space by having a single layout-info, single pid, and a single device-table, global to the filesystem. Then passing a credential and object_id info at the io_state level, private for each inode. It would also devise this contraption of rotating the device table view for each inode->ino to spread out the device usage. This is not compatible with the pnfs-objects standard, demanding that each inode can have it's own layout-info, device-table, and each object component it's own pid, oid and creds. So: Bring exofs raid engine to be usable for generic pnfs-objects use by: * Define an exofs_comp structure that holds obj_id and credential info. * Break up exofs_layout struct to an exofs_components structure that holds a possible array of exofs_comp and the array of devices + the size of the arrays. * Add a "comps" parameter to get_io_state() that specifies the ids creds and device array to use for each IO. This enables to keep the layout global, but the device-table view, creds and IDs at the inode level. It only adds two 64bit to each inode, since some of these members already existed in another form. * ios raid engine now access layout-info and comps-info through the passed pointers. Everything is pre-prepared by caller for generic access of these structures and arrays. At the exofs Level: * Super block holds an exofs_components struct that holds the device array, previously in layout. The devices there are in device-table order. The device-array is twice bigger and repeats the device-table twice so now each inode's device array can point to a random device and have a round-robin view of the table, making it compatible to previous exofs versions. * Each inode has an exofs_components struct that is initialized at load time, with it's own view of the device table IDs and creds. When doing IO this gets passed to the io_state together with the layout. While preforming this change. Bugs where found where credentials with the wrong IDs where used to access the different SB objects (super.c). As well as some dead code. It was never noticed because the target we use does not check the credentials. Signed-off-by: Boaz Harrosh <bharrosh@panasas.com>
Diffstat (limited to 'fs/exofs/super.c')
-rw-r--r--fs/exofs/super.c136
1 files changed, 70 insertions, 66 deletions
diff --git a/fs/exofs/super.c b/fs/exofs/super.c
index 8783f3d33c4a..4d6877967c87 100644
--- a/fs/exofs/super.c
+++ b/fs/exofs/super.c
@@ -253,22 +253,6 @@ out:
253 return ret; 253 return ret;
254} 254}
255 255
256unsigned exofs_layout_od_id(struct exofs_layout *layout,
257 osd_id obj_no, unsigned layout_index)
258{
259/* switch (layout->lay_func) {
260 case LAYOUT_MOVING_WINDOW:
261 {*/
262 unsigned dev_mod = obj_no;
263
264 return (layout_index + dev_mod * layout->mirrors_p1) %
265 layout->s_numdevs;
266/* }
267 case LAYOUT_FUNC_IMPLICT:
268 return layout->devs[layout_index];
269 }*/
270}
271
272static const struct osd_attr g_attr_sb_stats = ATTR_DEF( 256static const struct osd_attr g_attr_sb_stats = ATTR_DEF(
273 EXOFS_APAGE_SB_DATA, 257 EXOFS_APAGE_SB_DATA,
274 EXOFS_ATTR_SB_STATS, 258 EXOFS_ATTR_SB_STATS,
@@ -282,14 +266,12 @@ static int __sbi_read_stats(struct exofs_sb_info *sbi)
282 struct exofs_io_state *ios; 266 struct exofs_io_state *ios;
283 int ret; 267 int ret;
284 268
285 ret = exofs_get_io_state(&sbi->layout, &ios); 269 ret = exofs_get_io_state(&sbi->layout, &sbi->comps, &ios);
286 if (unlikely(ret)) { 270 if (unlikely(ret)) {
287 EXOFS_ERR("%s: exofs_get_io_state failed.\n", __func__); 271 EXOFS_ERR("%s: exofs_get_io_state failed.\n", __func__);
288 return ret; 272 return ret;
289 } 273 }
290 274
291 ios->cred = sbi->s_cred;
292
293 ios->in_attr = attrs; 275 ios->in_attr = attrs;
294 ios->in_attr_len = ARRAY_SIZE(attrs); 276 ios->in_attr_len = ARRAY_SIZE(attrs);
295 277
@@ -339,7 +321,7 @@ int exofs_sbi_write_stats(struct exofs_sb_info *sbi)
339 struct exofs_io_state *ios; 321 struct exofs_io_state *ios;
340 int ret; 322 int ret;
341 323
342 ret = exofs_get_io_state(&sbi->layout, &ios); 324 ret = exofs_get_io_state(&sbi->layout, &sbi->comps, &ios);
343 if (unlikely(ret)) { 325 if (unlikely(ret)) {
344 EXOFS_ERR("%s: exofs_get_io_state failed.\n", __func__); 326 EXOFS_ERR("%s: exofs_get_io_state failed.\n", __func__);
345 return ret; 327 return ret;
@@ -349,7 +331,7 @@ int exofs_sbi_write_stats(struct exofs_sb_info *sbi)
349 sbi->s_ess.s_numfiles = cpu_to_le64(sbi->s_numfiles); 331 sbi->s_ess.s_numfiles = cpu_to_le64(sbi->s_numfiles);
350 attrs[0].val_ptr = &sbi->s_ess; 332 attrs[0].val_ptr = &sbi->s_ess;
351 333
352 ios->cred = sbi->s_cred; 334
353 ios->done = stats_done; 335 ios->done = stats_done;
354 ios->private = sbi; 336 ios->private = sbi;
355 ios->out_attr = attrs; 337 ios->out_attr = attrs;
@@ -377,6 +359,8 @@ int exofs_sync_fs(struct super_block *sb, int wait)
377{ 359{
378 struct exofs_sb_info *sbi; 360 struct exofs_sb_info *sbi;
379 struct exofs_fscb *fscb; 361 struct exofs_fscb *fscb;
362 struct exofs_comp one_comp;
363 struct exofs_components comps;
380 struct exofs_io_state *ios; 364 struct exofs_io_state *ios;
381 int ret = -ENOMEM; 365 int ret = -ENOMEM;
382 366
@@ -393,7 +377,10 @@ int exofs_sync_fs(struct super_block *sb, int wait)
393 * version). Otherwise the exofs_fscb is read-only from mkfs time. All 377 * version). Otherwise the exofs_fscb is read-only from mkfs time. All
394 * the writeable info is set in exofs_sbi_write_stats() above. 378 * the writeable info is set in exofs_sbi_write_stats() above.
395 */ 379 */
396 ret = exofs_get_io_state(&sbi->layout, &ios); 380
381 exofs_init_comps(&comps, &one_comp, sbi, EXOFS_SUPER_ID);
382
383 ret = exofs_get_io_state(&sbi->layout, &comps, &ios);
397 if (unlikely(ret)) 384 if (unlikely(ret))
398 goto out; 385 goto out;
399 386
@@ -407,10 +394,8 @@ int exofs_sync_fs(struct super_block *sb, int wait)
407 fscb->s_newfs = 0; 394 fscb->s_newfs = 0;
408 fscb->s_version = EXOFS_FSCB_VER; 395 fscb->s_version = EXOFS_FSCB_VER;
409 396
410 ios->obj.id = EXOFS_SUPER_ID;
411 ios->offset = 0; 397 ios->offset = 0;
412 ios->kern_buff = fscb; 398 ios->kern_buff = fscb;
413 ios->cred = sbi->s_cred;
414 399
415 ret = exofs_sbi_write(ios); 400 ret = exofs_sbi_write(ios);
416 if (unlikely(ret)) 401 if (unlikely(ret))
@@ -446,17 +431,17 @@ static void _exofs_print_device(const char *msg, const char *dev_path,
446 431
447void exofs_free_sbi(struct exofs_sb_info *sbi) 432void exofs_free_sbi(struct exofs_sb_info *sbi)
448{ 433{
449 while (sbi->layout.s_numdevs) { 434 while (sbi->comps.numdevs) {
450 int i = --sbi->layout.s_numdevs; 435 int i = --sbi->comps.numdevs;
451 struct osd_dev *od = sbi->layout.s_ods[i]; 436 struct osd_dev *od = sbi->comps.ods[i];
452 437
453 if (od) { 438 if (od) {
454 sbi->layout.s_ods[i] = NULL; 439 sbi->comps.ods[i] = NULL;
455 osduld_put_device(od); 440 osduld_put_device(od);
456 } 441 }
457 } 442 }
458 if (sbi->layout.s_ods != sbi->_min_one_dev) 443 if (sbi->comps.ods != sbi->_min_one_dev)
459 kfree(sbi->layout.s_ods); 444 kfree(sbi->comps.ods);
460 kfree(sbi); 445 kfree(sbi);
461} 446}
462 447
@@ -483,8 +468,8 @@ static void exofs_put_super(struct super_block *sb)
483 msecs_to_jiffies(100)); 468 msecs_to_jiffies(100));
484 } 469 }
485 470
486 _exofs_print_device("Unmounting", NULL, sbi->layout.s_ods[0], 471 _exofs_print_device("Unmounting", NULL, sbi->comps.ods[0],
487 sbi->layout.s_pid); 472 sbi->one_comp.obj.partition);
488 473
489 bdi_destroy(&sbi->bdi); 474 bdi_destroy(&sbi->bdi);
490 exofs_free_sbi(sbi); 475 exofs_free_sbi(sbi);
@@ -624,8 +609,7 @@ static int exofs_read_lookup_dev_table(struct exofs_sb_info *sbi,
624 struct osd_dev *fscb_od, 609 struct osd_dev *fscb_od,
625 unsigned table_count) 610 unsigned table_count)
626{ 611{
627 struct osd_obj_id obj = {.partition = sbi->layout.s_pid, 612 struct exofs_comp comp;
628 .id = EXOFS_DEVTABLE_ID};
629 struct exofs_device_table *dt; 613 struct exofs_device_table *dt;
630 unsigned table_bytes = table_count * sizeof(dt->dt_dev_table[0]) + 614 unsigned table_bytes = table_count * sizeof(dt->dt_dev_table[0]) +
631 sizeof(*dt); 615 sizeof(*dt);
@@ -639,8 +623,14 @@ static int exofs_read_lookup_dev_table(struct exofs_sb_info *sbi,
639 return -ENOMEM; 623 return -ENOMEM;
640 } 624 }
641 625
642 sbi->layout.s_numdevs = 0; 626 sbi->comps.numdevs = 0;
643 ret = exofs_read_kern(fscb_od, sbi->s_cred, &obj, 0, dt, table_bytes); 627
628 comp.obj.partition = sbi->one_comp.obj.partition;
629 comp.obj.id = EXOFS_DEVTABLE_ID;
630 exofs_make_credential(comp.cred, &comp.obj);
631
632 ret = exofs_read_kern(fscb_od, comp.cred, &comp.obj, 0, dt,
633 table_bytes);
644 if (unlikely(ret)) { 634 if (unlikely(ret)) {
645 EXOFS_ERR("ERROR: reading device table\n"); 635 EXOFS_ERR("ERROR: reading device table\n");
646 goto out; 636 goto out;
@@ -658,10 +648,13 @@ static int exofs_read_lookup_dev_table(struct exofs_sb_info *sbi,
658 goto out; 648 goto out;
659 649
660 if (likely(numdevs > 1)) { 650 if (likely(numdevs > 1)) {
661 unsigned size = numdevs * sizeof(sbi->layout.s_ods[0]); 651 unsigned size = numdevs * sizeof(sbi->comps.ods[0]);
662 652
663 sbi->layout.s_ods = kzalloc(size, GFP_KERNEL); 653 /* Twice bigger table: See exofs_init_comps() and below
664 if (unlikely(!sbi->layout.s_ods)) { 654 * comment
655 */
656 sbi->comps.ods = kzalloc(size + size - 1, GFP_KERNEL);
657 if (unlikely(!sbi->comps.ods)) {
665 EXOFS_ERR("ERROR: faild allocating Device array[%d]\n", 658 EXOFS_ERR("ERROR: faild allocating Device array[%d]\n",
666 numdevs); 659 numdevs);
667 ret = -ENOMEM; 660 ret = -ENOMEM;
@@ -688,8 +681,8 @@ static int exofs_read_lookup_dev_table(struct exofs_sb_info *sbi,
688 * line. We always keep them in device-table order. 681 * line. We always keep them in device-table order.
689 */ 682 */
690 if (fscb_od && osduld_device_same(fscb_od, &odi)) { 683 if (fscb_od && osduld_device_same(fscb_od, &odi)) {
691 sbi->layout.s_ods[i] = fscb_od; 684 sbi->comps.ods[i] = fscb_od;
692 ++sbi->layout.s_numdevs; 685 ++sbi->comps.numdevs;
693 fscb_od = NULL; 686 fscb_od = NULL;
694 continue; 687 continue;
695 } 688 }
@@ -702,13 +695,13 @@ static int exofs_read_lookup_dev_table(struct exofs_sb_info *sbi,
702 goto out; 695 goto out;
703 } 696 }
704 697
705 sbi->layout.s_ods[i] = od; 698 sbi->comps.ods[i] = od;
706 ++sbi->layout.s_numdevs; 699 ++sbi->comps.numdevs;
707 700
708 /* Read the fscb of the other devices to make sure the FS 701 /* Read the fscb of the other devices to make sure the FS
709 * partition is there. 702 * partition is there.
710 */ 703 */
711 ret = exofs_read_kern(od, sbi->s_cred, &obj, 0, &fscb, 704 ret = exofs_read_kern(od, comp.cred, &comp.obj, 0, &fscb,
712 sizeof(fscb)); 705 sizeof(fscb));
713 if (unlikely(ret)) { 706 if (unlikely(ret)) {
714 EXOFS_ERR("ERROR: Malformed participating device " 707 EXOFS_ERR("ERROR: Malformed participating device "
@@ -725,13 +718,22 @@ static int exofs_read_lookup_dev_table(struct exofs_sb_info *sbi,
725 718
726out: 719out:
727 kfree(dt); 720 kfree(dt);
728 if (unlikely(!ret && fscb_od)) { 721 if (likely(!ret)) {
729 EXOFS_ERR( 722 unsigned numdevs = sbi->comps.numdevs;
730 "ERROR: Bad device-table container device not present\n");
731 osduld_put_device(fscb_od);
732 ret = -EINVAL;
733 }
734 723
724 if (unlikely(fscb_od)) {
725 EXOFS_ERR("ERROR: Bad device-table container device not present\n");
726 osduld_put_device(fscb_od);
727 return -EINVAL;
728 }
729 /* exofs round-robins the device table view according to inode
730 * number. We hold a: twice bigger table hence inodes can point
731 * to any device and have a sequential view of the table
732 * starting at this device. See exofs_init_comps()
733 */
734 for (i = 0; i < numdevs - 1; ++i)
735 sbi->comps.ods[i + numdevs] = sbi->comps.ods[i];
736 }
735 return ret; 737 return ret;
736} 738}
737 739
@@ -745,7 +747,7 @@ static int exofs_fill_super(struct super_block *sb, void *data, int silent)
745 struct exofs_sb_info *sbi; /*extended info */ 747 struct exofs_sb_info *sbi; /*extended info */
746 struct osd_dev *od; /* Master device */ 748 struct osd_dev *od; /* Master device */
747 struct exofs_fscb fscb; /*on-disk superblock info */ 749 struct exofs_fscb fscb; /*on-disk superblock info */
748 struct osd_obj_id obj; 750 struct exofs_comp comp;
749 unsigned table_count; 751 unsigned table_count;
750 int ret; 752 int ret;
751 753
@@ -776,11 +778,16 @@ static int exofs_fill_super(struct super_block *sb, void *data, int silent)
776 sbi->layout.group_width = 1; 778 sbi->layout.group_width = 1;
777 sbi->layout.group_depth = -1; 779 sbi->layout.group_depth = -1;
778 sbi->layout.group_count = 1; 780 sbi->layout.group_count = 1;
779 sbi->layout.s_ods = sbi->_min_one_dev;
780 sbi->layout.s_numdevs = 1;
781 sbi->layout.s_pid = opts->pid;
782 sbi->s_timeout = opts->timeout; 781 sbi->s_timeout = opts->timeout;
783 782
783 sbi->one_comp.obj.partition = opts->pid;
784 sbi->one_comp.obj.id = 0;
785 exofs_make_credential(sbi->one_comp.cred, &sbi->one_comp.obj);
786 sbi->comps.numdevs = 1;
787 sbi->comps.single_comp = EC_SINGLE_COMP;
788 sbi->comps.comps = &sbi->one_comp;
789 sbi->comps.ods = sbi->_min_one_dev;
790
784 /* fill in some other data by hand */ 791 /* fill in some other data by hand */
785 memset(sb->s_id, 0, sizeof(sb->s_id)); 792 memset(sb->s_id, 0, sizeof(sb->s_id));
786 strcpy(sb->s_id, "exofs"); 793 strcpy(sb->s_id, "exofs");
@@ -791,11 +798,11 @@ static int exofs_fill_super(struct super_block *sb, void *data, int silent)
791 sb->s_bdev = NULL; 798 sb->s_bdev = NULL;
792 sb->s_dev = 0; 799 sb->s_dev = 0;
793 800
794 obj.partition = sbi->layout.s_pid; 801 comp.obj.partition = sbi->one_comp.obj.partition;
795 obj.id = EXOFS_SUPER_ID; 802 comp.obj.id = EXOFS_SUPER_ID;
796 exofs_make_credential(sbi->s_cred, &obj); 803 exofs_make_credential(comp.cred, &comp.obj);
797 804
798 ret = exofs_read_kern(od, sbi->s_cred, &obj, 0, &fscb, sizeof(fscb)); 805 ret = exofs_read_kern(od, comp.cred, &comp.obj, 0, &fscb, sizeof(fscb));
799 if (unlikely(ret)) 806 if (unlikely(ret))
800 goto free_sbi; 807 goto free_sbi;
801 808
@@ -828,7 +835,7 @@ static int exofs_fill_super(struct super_block *sb, void *data, int silent)
828 if (unlikely(ret)) 835 if (unlikely(ret))
829 goto free_sbi; 836 goto free_sbi;
830 } else { 837 } else {
831 sbi->layout.s_ods[0] = od; 838 sbi->comps.ods[0] = od;
832 } 839 }
833 840
834 __sbi_read_stats(sbi); 841 __sbi_read_stats(sbi);
@@ -868,13 +875,13 @@ static int exofs_fill_super(struct super_block *sb, void *data, int silent)
868 goto free_sbi; 875 goto free_sbi;
869 } 876 }
870 877
871 _exofs_print_device("Mounting", opts->dev_name, sbi->layout.s_ods[0], 878 _exofs_print_device("Mounting", opts->dev_name, sbi->comps.ods[0],
872 sbi->layout.s_pid); 879 sbi->one_comp.obj.partition);
873 return 0; 880 return 0;
874 881
875free_sbi: 882free_sbi:
876 EXOFS_ERR("Unable to mount exofs on %s pid=0x%llx err=%d\n", 883 EXOFS_ERR("Unable to mount exofs on %s pid=0x%llx err=%d\n",
877 opts->dev_name, sbi->layout.s_pid, ret); 884 opts->dev_name, sbi->one_comp.obj.partition, ret);
878 exofs_free_sbi(sbi); 885 exofs_free_sbi(sbi);
879 return ret; 886 return ret;
880} 887}
@@ -915,17 +922,14 @@ static int exofs_statfs(struct dentry *dentry, struct kstatfs *buf)
915 }; 922 };
916 uint64_t capacity = ULLONG_MAX; 923 uint64_t capacity = ULLONG_MAX;
917 uint64_t used = ULLONG_MAX; 924 uint64_t used = ULLONG_MAX;
918 uint8_t cred_a[OSD_CAP_LEN];
919 int ret; 925 int ret;
920 926
921 ret = exofs_get_io_state(&sbi->layout, &ios); 927 ret = exofs_get_io_state(&sbi->layout, &sbi->comps, &ios);
922 if (ret) { 928 if (ret) {
923 EXOFS_DBGMSG("exofs_get_io_state failed.\n"); 929 EXOFS_DBGMSG("exofs_get_io_state failed.\n");
924 return ret; 930 return ret;
925 } 931 }
926 932
927 exofs_make_credential(cred_a, &ios->obj);
928 ios->cred = sbi->s_cred;
929 ios->in_attr = attrs; 933 ios->in_attr = attrs;
930 ios->in_attr_len = ARRAY_SIZE(attrs); 934 ios->in_attr_len = ARRAY_SIZE(attrs);
931 935