aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--fs/exofs/Kconfig.ore2
-rw-r--r--fs/exofs/ore.c100
-rw-r--r--fs/exofs/ore_raid.c56
-rw-r--r--fs/exofs/ore_raid.h21
4 files changed, 98 insertions, 81 deletions
diff --git a/fs/exofs/Kconfig.ore b/fs/exofs/Kconfig.ore
index 1ca7fb7b6ba8..2daf2329c28d 100644
--- a/fs/exofs/Kconfig.ore
+++ b/fs/exofs/Kconfig.ore
@@ -9,4 +9,6 @@ config ORE
9 tristate 9 tristate
10 depends on EXOFS_FS || PNFS_OBJLAYOUT 10 depends on EXOFS_FS || PNFS_OBJLAYOUT
11 select ASYNC_XOR 11 select ASYNC_XOR
12 select RAID6_PQ
13 select ASYNC_PQ
12 default SCSI_OSD_ULD 14 default SCSI_OSD_ULD
diff --git a/fs/exofs/ore.c b/fs/exofs/ore.c
index dae884694bd9..cfc0205d62c4 100644
--- a/fs/exofs/ore.c
+++ b/fs/exofs/ore.c
@@ -58,9 +58,12 @@ int ore_verify_layout(unsigned total_comps, struct ore_layout *layout)
58 layout->parity = 1; 58 layout->parity = 1;
59 break; 59 break;
60 case PNFS_OSD_RAID_PQ: 60 case PNFS_OSD_RAID_PQ:
61 layout->parity = 2;
62 break;
61 case PNFS_OSD_RAID_4: 63 case PNFS_OSD_RAID_4:
62 default: 64 default:
63 ORE_ERR("Only RAID_0/5 for now\n"); 65 ORE_ERR("Only RAID_0/5/6 for now received-enum=%d\n",
66 layout->raid_algorithm);
64 return -EINVAL; 67 return -EINVAL;
65 } 68 }
66 if (0 != (layout->stripe_unit & ~PAGE_MASK)) { 69 if (0 != (layout->stripe_unit & ~PAGE_MASK)) {
@@ -112,6 +115,8 @@ int ore_verify_layout(unsigned total_comps, struct ore_layout *layout)
112 layout->max_io_length /= stripe_length; 115 layout->max_io_length /= stripe_length;
113 layout->max_io_length *= stripe_length; 116 layout->max_io_length *= stripe_length;
114 } 117 }
118 ORE_DBGMSG("max_io_length=0x%lx\n", layout->max_io_length);
119
115 return 0; 120 return 0;
116} 121}
117EXPORT_SYMBOL(ore_verify_layout); 122EXPORT_SYMBOL(ore_verify_layout);
@@ -545,21 +550,24 @@ void ore_calc_stripe_info(struct ore_layout *layout, u64 file_offset,
545 550
546 /* "H - (N * U)" is just "H % U" so it's bound to u32 */ 551 /* "H - (N * U)" is just "H % U" so it's bound to u32 */
547 u32 C = (u32)(H - (N * U)) / stripe_unit + G * group_width; 552 u32 C = (u32)(H - (N * U)) / stripe_unit + G * group_width;
553 u32 first_dev = C - C % group_width;
548 554
549 div_u64_rem(file_offset, stripe_unit, &si->unit_off); 555 div_u64_rem(file_offset, stripe_unit, &si->unit_off);
550 556
551 si->obj_offset = si->unit_off + (N * stripe_unit) + 557 si->obj_offset = si->unit_off + (N * stripe_unit) +
552 (M * group_depth * stripe_unit); 558 (M * group_depth * stripe_unit);
559 si->cur_comp = C - first_dev;
560 si->cur_pg = si->unit_off / PAGE_SIZE;
553 561
554 if (parity) { 562 if (parity) {
555 u32 LCMdP = lcm(group_width, parity) / parity; 563 u32 LCMdP = lcm(group_width, parity) / parity;
556 /* R = N % LCMdP; */ 564 /* R = N % LCMdP; */
557 u32 RxP = (N % LCMdP) * parity; 565 u32 RxP = (N % LCMdP) * parity;
558 u32 first_dev = C - C % group_width;
559 566
560 si->par_dev = (group_width + group_width - parity - RxP) % 567 si->par_dev = (group_width + group_width - parity - RxP) %
561 group_width + first_dev; 568 group_width + first_dev;
562 si->dev = (group_width + C - RxP) % group_width + first_dev; 569 si->dev = (group_width + group_width + C - RxP) %
570 group_width + first_dev;
563 si->bytes_in_stripe = U; 571 si->bytes_in_stripe = U;
564 si->first_stripe_start = M * S + G * T + N * U; 572 si->first_stripe_start = M * S + G * T + N * U;
565 } else { 573 } else {
@@ -649,6 +657,43 @@ out: /* we fail the complete unit on an error eg don't advance
649 return ret; 657 return ret;
650} 658}
651 659
660static int _add_parity_units(struct ore_io_state *ios,
661 struct ore_striping_info *si,
662 unsigned dev, unsigned first_dev,
663 unsigned mirrors_p1, unsigned devs_in_group,
664 unsigned cur_len)
665{
666 unsigned do_parity;
667 int ret = 0;
668
669 for (do_parity = ios->layout->parity; do_parity; --do_parity) {
670 struct ore_per_dev_state *per_dev;
671
672 per_dev = &ios->per_dev[dev - first_dev];
673 if (!per_dev->length && !per_dev->offset) {
674 /* Only/always the parity unit of the first
675 * stripe will be empty. So this is a chance to
676 * initialize the per_dev info.
677 */
678 per_dev->dev = dev;
679 per_dev->offset = si->obj_offset - si->unit_off;
680 }
681
682 ret = _ore_add_parity_unit(ios, si, per_dev, cur_len,
683 do_parity == 1);
684 if (unlikely(ret))
685 break;
686
687 if (do_parity != 1) {
688 dev = ((dev + mirrors_p1) % devs_in_group) + first_dev;
689 si->cur_comp = (si->cur_comp + 1) %
690 ios->layout->group_width;
691 }
692 }
693
694 return ret;
695}
696
652static int _prepare_for_striping(struct ore_io_state *ios) 697static int _prepare_for_striping(struct ore_io_state *ios)
653{ 698{
654 struct ore_striping_info *si = &ios->si; 699 struct ore_striping_info *si = &ios->si;
@@ -658,7 +703,6 @@ static int _prepare_for_striping(struct ore_io_state *ios)
658 unsigned devs_in_group = group_width * mirrors_p1; 703 unsigned devs_in_group = group_width * mirrors_p1;
659 unsigned dev = si->dev; 704 unsigned dev = si->dev;
660 unsigned first_dev = dev - (dev % devs_in_group); 705 unsigned first_dev = dev - (dev % devs_in_group);
661 unsigned dev_order;
662 unsigned cur_pg = ios->pages_consumed; 706 unsigned cur_pg = ios->pages_consumed;
663 u64 length = ios->length; 707 u64 length = ios->length;
664 int ret = 0; 708 int ret = 0;
@@ -670,16 +714,13 @@ static int _prepare_for_striping(struct ore_io_state *ios)
670 714
671 BUG_ON(length > si->length); 715 BUG_ON(length > si->length);
672 716
673 dev_order = _dev_order(devs_in_group, mirrors_p1, si->par_dev, dev);
674 si->cur_comp = dev_order;
675 si->cur_pg = si->unit_off / PAGE_SIZE;
676
677 while (length) { 717 while (length) {
678 unsigned comp = dev - first_dev; 718 struct ore_per_dev_state *per_dev =
679 struct ore_per_dev_state *per_dev = &ios->per_dev[comp]; 719 &ios->per_dev[dev - first_dev];
680 unsigned cur_len, page_off = 0; 720 unsigned cur_len, page_off = 0;
681 721
682 if (!per_dev->length) { 722 if (!per_dev->length && !per_dev->offset) {
723 /* First time initialize the per_dev info. */
683 per_dev->dev = dev; 724 per_dev->dev = dev;
684 if (dev == si->dev) { 725 if (dev == si->dev) {
685 WARN_ON(dev == si->par_dev); 726 WARN_ON(dev == si->par_dev);
@@ -688,13 +729,7 @@ static int _prepare_for_striping(struct ore_io_state *ios)
688 page_off = si->unit_off & ~PAGE_MASK; 729 page_off = si->unit_off & ~PAGE_MASK;
689 BUG_ON(page_off && (page_off != ios->pgbase)); 730 BUG_ON(page_off && (page_off != ios->pgbase));
690 } else { 731 } else {
691 if (si->cur_comp > dev_order) 732 per_dev->offset = si->obj_offset - si->unit_off;
692 per_dev->offset =
693 si->obj_offset - si->unit_off;
694 else /* si->cur_comp < dev_order */
695 per_dev->offset =
696 si->obj_offset + stripe_unit -
697 si->unit_off;
698 cur_len = stripe_unit; 733 cur_len = stripe_unit;
699 } 734 }
700 } else { 735 } else {
@@ -708,11 +743,9 @@ static int _prepare_for_striping(struct ore_io_state *ios)
708 if (unlikely(ret)) 743 if (unlikely(ret))
709 goto out; 744 goto out;
710 745
711 dev += mirrors_p1;
712 dev = (dev % devs_in_group) + first_dev;
713
714 length -= cur_len; 746 length -= cur_len;
715 747
748 dev = ((dev + mirrors_p1) % devs_in_group) + first_dev;
716 si->cur_comp = (si->cur_comp + 1) % group_width; 749 si->cur_comp = (si->cur_comp + 1) % group_width;
717 if (unlikely((dev == si->par_dev) || (!length && ios->sp2d))) { 750 if (unlikely((dev == si->par_dev) || (!length && ios->sp2d))) {
718 if (!length && ios->sp2d) { 751 if (!length && ios->sp2d) {
@@ -720,23 +753,16 @@ static int _prepare_for_striping(struct ore_io_state *ios)
720 * stripe. then operate on parity dev. 753 * stripe. then operate on parity dev.
721 */ 754 */
722 dev = si->par_dev; 755 dev = si->par_dev;
723 } 756 /* If last stripe operate on parity comp */
724 if (ios->sp2d) 757 si->cur_comp = group_width - ios->layout->parity;
725 /* In writes cur_len just means if it's the
726 * last one. See _ore_add_parity_unit.
727 */
728 cur_len = length;
729 per_dev = &ios->per_dev[dev - first_dev];
730 if (!per_dev->length) {
731 /* Only/always the parity unit of the first
732 * stripe will be empty. So this is a chance to
733 * initialize the per_dev info.
734 */
735 per_dev->dev = dev;
736 per_dev->offset = si->obj_offset - si->unit_off;
737 } 758 }
738 759
739 ret = _ore_add_parity_unit(ios, si, per_dev, cur_len); 760 /* In writes cur_len just means if it's the
761 * last one. See _ore_add_parity_unit.
762 */
763 ret = _add_parity_units(ios, si, dev, first_dev,
764 mirrors_p1, devs_in_group,
765 ios->sp2d ? length : cur_len);
740 if (unlikely(ret)) 766 if (unlikely(ret))
741 goto out; 767 goto out;
742 768
@@ -747,6 +773,8 @@ static int _prepare_for_striping(struct ore_io_state *ios)
747 /* Next stripe, start fresh */ 773 /* Next stripe, start fresh */
748 si->cur_comp = 0; 774 si->cur_comp = 0;
749 si->cur_pg = 0; 775 si->cur_pg = 0;
776 si->obj_offset += cur_len;
777 si->unit_off = 0;
750 } 778 }
751 } 779 }
752out: 780out:
diff --git a/fs/exofs/ore_raid.c b/fs/exofs/ore_raid.c
index 4e2c032ab8a1..7f20f25c232c 100644
--- a/fs/exofs/ore_raid.c
+++ b/fs/exofs/ore_raid.c
@@ -218,22 +218,28 @@ static unsigned _sp2d_max_pg(struct __stripe_pages_2d *sp2d)
218static void _gen_xor_unit(struct __stripe_pages_2d *sp2d) 218static void _gen_xor_unit(struct __stripe_pages_2d *sp2d)
219{ 219{
220 unsigned p; 220 unsigned p;
221 unsigned tx_flags = ASYNC_TX_ACK;
222
223 if (sp2d->parity == 1)
224 tx_flags |= ASYNC_TX_XOR_ZERO_DST;
225
221 for (p = 0; p < sp2d->pages_in_unit; p++) { 226 for (p = 0; p < sp2d->pages_in_unit; p++) {
222 struct __1_page_stripe *_1ps = &sp2d->_1p_stripes[p]; 227 struct __1_page_stripe *_1ps = &sp2d->_1p_stripes[p];
223 228
224 if (!_1ps->write_count) 229 if (!_1ps->write_count)
225 continue; 230 continue;
226 231
227 init_async_submit(&_1ps->submit, 232 init_async_submit(&_1ps->submit, tx_flags,
228 ASYNC_TX_XOR_ZERO_DST | ASYNC_TX_ACK, 233 NULL, NULL, NULL, (addr_conv_t *)_1ps->scribble);
229 NULL, 234
230 NULL, NULL, 235 if (sp2d->parity == 1)
231 (addr_conv_t *)_1ps->scribble); 236 _1ps->tx = async_xor(_1ps->pages[sp2d->data_devs],
232 237 _1ps->pages, 0, sp2d->data_devs,
233 /* TODO: raid6 */ 238 PAGE_SIZE, &_1ps->submit);
234 _1ps->tx = async_xor(_1ps->pages[sp2d->data_devs], _1ps->pages, 239 else /* parity == 2 */
235 0, sp2d->data_devs, PAGE_SIZE, 240 _1ps->tx = async_gen_syndrome(_1ps->pages, 0,
236 &_1ps->submit); 241 sp2d->data_devs + sp2d->parity,
242 PAGE_SIZE, &_1ps->submit);
237 } 243 }
238 244
239 for (p = 0; p < sp2d->pages_in_unit; p++) { 245 for (p = 0; p < sp2d->pages_in_unit; p++) {
@@ -404,9 +410,8 @@ static int _add_to_r4w_last_page(struct ore_io_state *ios, u64 *offset)
404 410
405 ore_calc_stripe_info(ios->layout, *offset, 0, &si); 411 ore_calc_stripe_info(ios->layout, *offset, 0, &si);
406 412
407 p = si.unit_off / PAGE_SIZE; 413 p = si.cur_pg;
408 c = _dev_order(ios->layout->group_width * ios->layout->mirrors_p1, 414 c = si.cur_comp;
409 ios->layout->mirrors_p1, si.par_dev, si.dev);
410 page = ios->sp2d->_1p_stripes[p].pages[c]; 415 page = ios->sp2d->_1p_stripes[p].pages[c];
411 416
412 pg_len = PAGE_SIZE - (si.unit_off % PAGE_SIZE); 417 pg_len = PAGE_SIZE - (si.unit_off % PAGE_SIZE);
@@ -534,9 +539,8 @@ static int _read_4_write_last_stripe(struct ore_io_state *ios)
534 goto read_it; 539 goto read_it;
535 540
536 ore_calc_stripe_info(ios->layout, offset, 0, &read_si); 541 ore_calc_stripe_info(ios->layout, offset, 0, &read_si);
537 p = read_si.unit_off / PAGE_SIZE; 542 p = read_si.cur_pg;
538 c = _dev_order(ios->layout->group_width * ios->layout->mirrors_p1, 543 c = read_si.cur_comp;
539 ios->layout->mirrors_p1, read_si.par_dev, read_si.dev);
540 544
541 if (min_p == sp2d->pages_in_unit) { 545 if (min_p == sp2d->pages_in_unit) {
542 /* Didn't do it yet */ 546 /* Didn't do it yet */
@@ -620,7 +624,7 @@ static int _read_4_write_execute(struct ore_io_state *ios)
620int _ore_add_parity_unit(struct ore_io_state *ios, 624int _ore_add_parity_unit(struct ore_io_state *ios,
621 struct ore_striping_info *si, 625 struct ore_striping_info *si,
622 struct ore_per_dev_state *per_dev, 626 struct ore_per_dev_state *per_dev,
623 unsigned cur_len) 627 unsigned cur_len, bool do_xor)
624{ 628{
625 if (ios->reading) { 629 if (ios->reading) {
626 if (per_dev->cur_sg >= ios->sgs_per_dev) { 630 if (per_dev->cur_sg >= ios->sgs_per_dev) {
@@ -640,17 +644,16 @@ int _ore_add_parity_unit(struct ore_io_state *ios,
640 si->cur_pg = _sp2d_min_pg(sp2d); 644 si->cur_pg = _sp2d_min_pg(sp2d);
641 num_pages = _sp2d_max_pg(sp2d) + 1 - si->cur_pg; 645 num_pages = _sp2d_max_pg(sp2d) + 1 - si->cur_pg;
642 646
643 if (!cur_len) /* If last stripe operate on parity comp */
644 si->cur_comp = sp2d->data_devs;
645
646 if (!per_dev->length) { 647 if (!per_dev->length) {
647 per_dev->offset += si->cur_pg * PAGE_SIZE; 648 per_dev->offset += si->cur_pg * PAGE_SIZE;
648 /* If first stripe, Read in all read4write pages 649 /* If first stripe, Read in all read4write pages
649 * (if needed) before we calculate the first parity. 650 * (if needed) before we calculate the first parity.
650 */ 651 */
651 _read_4_write_first_stripe(ios); 652 if (do_xor)
653 _read_4_write_first_stripe(ios);
652 } 654 }
653 if (!cur_len) /* If last stripe r4w pages of last stripe */ 655 if (!cur_len && do_xor)
656 /* If last stripe r4w pages of last stripe */
654 _read_4_write_last_stripe(ios); 657 _read_4_write_last_stripe(ios);
655 _read_4_write_execute(ios); 658 _read_4_write_execute(ios);
656 659
@@ -662,7 +665,7 @@ int _ore_add_parity_unit(struct ore_io_state *ios,
662 ++(ios->cur_par_page); 665 ++(ios->cur_par_page);
663 } 666 }
664 667
665 BUG_ON(si->cur_comp != sp2d->data_devs); 668 BUG_ON(si->cur_comp < sp2d->data_devs);
666 BUG_ON(si->cur_pg + num_pages > sp2d->pages_in_unit); 669 BUG_ON(si->cur_pg + num_pages > sp2d->pages_in_unit);
667 670
668 ret = _ore_add_stripe_unit(ios, &array_start, 0, pages, 671 ret = _ore_add_stripe_unit(ios, &array_start, 0, pages,
@@ -670,9 +673,10 @@ int _ore_add_parity_unit(struct ore_io_state *ios,
670 if (unlikely(ret)) 673 if (unlikely(ret))
671 return ret; 674 return ret;
672 675
673 /* TODO: raid6 if (last_parity_dev) */ 676 if (do_xor) {
674 _gen_xor_unit(sp2d); 677 _gen_xor_unit(sp2d);
675 _sp2d_reset(sp2d, ios->r4w, ios->private); 678 _sp2d_reset(sp2d, ios->r4w, ios->private);
679 }
676 } 680 }
677 return 0; 681 return 0;
678} 682}
diff --git a/fs/exofs/ore_raid.h b/fs/exofs/ore_raid.h
index 2ffd2c3c6e46..cf6375d82129 100644
--- a/fs/exofs/ore_raid.h
+++ b/fs/exofs/ore_raid.h
@@ -31,24 +31,6 @@
31#define ORE_DBGMSG2(M...) do {} while (0) 31#define ORE_DBGMSG2(M...) do {} while (0)
32/* #define ORE_DBGMSG2 ORE_DBGMSG */ 32/* #define ORE_DBGMSG2 ORE_DBGMSG */
33 33
34/* Calculate the component order in a stripe. eg the logical data unit
35 * address within the stripe of @dev given the @par_dev of this stripe.
36 */
37static inline unsigned _dev_order(unsigned devs_in_group, unsigned mirrors_p1,
38 unsigned par_dev, unsigned dev)
39{
40 unsigned first_dev = dev - dev % devs_in_group;
41
42 dev -= first_dev;
43 par_dev -= first_dev;
44
45 if (devs_in_group == par_dev) /* The raid 0 case */
46 return dev / mirrors_p1;
47 /* raid4/5/6 case */
48 return ((devs_in_group + dev - par_dev - mirrors_p1) % devs_in_group) /
49 mirrors_p1;
50}
51
52/* ios_raid.c stuff needed by ios.c */ 34/* ios_raid.c stuff needed by ios.c */
53int _ore_post_alloc_raid_stuff(struct ore_io_state *ios); 35int _ore_post_alloc_raid_stuff(struct ore_io_state *ios);
54void _ore_free_raid_stuff(struct ore_io_state *ios); 36void _ore_free_raid_stuff(struct ore_io_state *ios);
@@ -56,7 +38,8 @@ void _ore_free_raid_stuff(struct ore_io_state *ios);
56void _ore_add_sg_seg(struct ore_per_dev_state *per_dev, unsigned cur_len, 38void _ore_add_sg_seg(struct ore_per_dev_state *per_dev, unsigned cur_len,
57 bool not_last); 39 bool not_last);
58int _ore_add_parity_unit(struct ore_io_state *ios, struct ore_striping_info *si, 40int _ore_add_parity_unit(struct ore_io_state *ios, struct ore_striping_info *si,
59 struct ore_per_dev_state *per_dev, unsigned cur_len); 41 struct ore_per_dev_state *per_dev, unsigned cur_len,
42 bool do_xor);
60void _ore_add_stripe_page(struct __stripe_pages_2d *sp2d, 43void _ore_add_stripe_page(struct __stripe_pages_2d *sp2d,
61 struct ore_striping_info *si, struct page *page); 44 struct ore_striping_info *si, struct page *page);
62static inline void _add_stripe_page(struct __stripe_pages_2d *sp2d, 45static inline void _add_stripe_page(struct __stripe_pages_2d *sp2d,