aboutsummaryrefslogtreecommitdiffstats
path: root/fs/exofs
diff options
context:
space:
mode:
authorBoaz Harrosh <bharrosh@panasas.com>2013-11-21 10:58:08 -0500
committerBoaz Harrosh <bharrosh@panasas.com>2014-01-23 03:55:03 -0500
commitaad560b7f63b495f48a7232fd086c5913a676e6f (patch)
tree8287f6fac8cb1cb5d74b090fafe8a8f4531e5fdd /fs/exofs
parent5e01dc7b26d9f24f39abace5da98ccbd6a5ceb52 (diff)
ore: Fix wrong math in allocation of per device BIO
At IO preparation we calculate the max pages at each device and allocate a BIO per device of that size. The calculation was wrong on some unaligned corner cases offset/length combination and would make prepare return with -ENOMEM. This would be bad for pnfs-objects that would in that case IO through MDS. And fatal for exofs were it would fail writes with EIO. Fix it by doing the proper math, that will work in all cases. (I ran a test with all possible offset/length combinations this time round). Also when reading we do not need to allocate for the parity units since we jump over them. Also lower the max_io_length to take into account the parity pages so not to allocate BIOs bigger than PAGE_SIZE CC: Stable Kernel <stable@vger.kernel.org> Signed-off-by: Boaz Harrosh <bharrosh@panasas.com>
Diffstat (limited to 'fs/exofs')
-rw-r--r--fs/exofs/ore.c37
1 files changed, 25 insertions, 12 deletions
diff --git a/fs/exofs/ore.c b/fs/exofs/ore.c
index b74422888604..85cde3e76290 100644
--- a/fs/exofs/ore.c
+++ b/fs/exofs/ore.c
@@ -103,7 +103,7 @@ int ore_verify_layout(unsigned total_comps, struct ore_layout *layout)
103 103
104 layout->max_io_length = 104 layout->max_io_length =
105 (BIO_MAX_PAGES_KMALLOC * PAGE_SIZE - layout->stripe_unit) * 105 (BIO_MAX_PAGES_KMALLOC * PAGE_SIZE - layout->stripe_unit) *
106 layout->group_width; 106 (layout->group_width - layout->parity);
107 if (layout->parity) { 107 if (layout->parity) {
108 unsigned stripe_length = 108 unsigned stripe_length =
109 (layout->group_width - layout->parity) * 109 (layout->group_width - layout->parity) *
@@ -286,7 +286,8 @@ int ore_get_rw_state(struct ore_layout *layout, struct ore_components *oc,
286 if (length) { 286 if (length) {
287 ore_calc_stripe_info(layout, offset, length, &ios->si); 287 ore_calc_stripe_info(layout, offset, length, &ios->si);
288 ios->length = ios->si.length; 288 ios->length = ios->si.length;
289 ios->nr_pages = (ios->length + PAGE_SIZE - 1) / PAGE_SIZE; 289 ios->nr_pages = ((ios->offset & (PAGE_SIZE - 1)) +
290 ios->length + PAGE_SIZE - 1) / PAGE_SIZE;
290 if (layout->parity) 291 if (layout->parity)
291 _ore_post_alloc_raid_stuff(ios); 292 _ore_post_alloc_raid_stuff(ios);
292 } 293 }
@@ -536,6 +537,7 @@ void ore_calc_stripe_info(struct ore_layout *layout, u64 file_offset,
536 u64 H = LmodS - G * T; 537 u64 H = LmodS - G * T;
537 538
538 u32 N = div_u64(H, U); 539 u32 N = div_u64(H, U);
540 u32 Nlast;
539 541
540 /* "H - (N * U)" is just "H % U" so it's bound to u32 */ 542 /* "H - (N * U)" is just "H % U" so it's bound to u32 */
541 u32 C = (u32)(H - (N * U)) / stripe_unit + G * group_width; 543 u32 C = (u32)(H - (N * U)) / stripe_unit + G * group_width;
@@ -568,6 +570,10 @@ void ore_calc_stripe_info(struct ore_layout *layout, u64 file_offset,
568 si->length = T - H; 570 si->length = T - H;
569 if (si->length > length) 571 if (si->length > length)
570 si->length = length; 572 si->length = length;
573
574 Nlast = div_u64(H + si->length + U - 1, U);
575 si->maxdevUnits = Nlast - N;
576
571 si->M = M; 577 si->M = M;
572} 578}
573EXPORT_SYMBOL(ore_calc_stripe_info); 579EXPORT_SYMBOL(ore_calc_stripe_info);
@@ -583,13 +589,16 @@ int _ore_add_stripe_unit(struct ore_io_state *ios, unsigned *cur_pg,
583 int ret; 589 int ret;
584 590
585 if (per_dev->bio == NULL) { 591 if (per_dev->bio == NULL) {
586 unsigned pages_in_stripe = ios->layout->group_width * 592 unsigned bio_size;
587 (ios->layout->stripe_unit / PAGE_SIZE); 593
588 unsigned nr_pages = ios->nr_pages * ios->layout->group_width / 594 if (!ios->reading) {
589 (ios->layout->group_width - 595 bio_size = ios->si.maxdevUnits;
590 ios->layout->parity); 596 } else {
591 unsigned bio_size = (nr_pages + pages_in_stripe) / 597 bio_size = (ios->si.maxdevUnits + 1) *
592 ios->layout->group_width; 598 (ios->layout->group_width - ios->layout->parity) /
599 ios->layout->group_width;
600 }
601 bio_size *= (ios->layout->stripe_unit / PAGE_SIZE);
593 602
594 per_dev->bio = bio_kmalloc(GFP_KERNEL, bio_size); 603 per_dev->bio = bio_kmalloc(GFP_KERNEL, bio_size);
595 if (unlikely(!per_dev->bio)) { 604 if (unlikely(!per_dev->bio)) {
@@ -609,8 +618,12 @@ int _ore_add_stripe_unit(struct ore_io_state *ios, unsigned *cur_pg,
609 added_len = bio_add_pc_page(q, per_dev->bio, pages[pg], 618 added_len = bio_add_pc_page(q, per_dev->bio, pages[pg],
610 pglen, pgbase); 619 pglen, pgbase);
611 if (unlikely(pglen != added_len)) { 620 if (unlikely(pglen != added_len)) {
612 ORE_DBGMSG("Failed bio_add_pc_page bi_vcnt=%u\n", 621 /* If bi_vcnt == bi_max then this is a SW BUG */
613 per_dev->bio->bi_vcnt); 622 ORE_DBGMSG("Failed bio_add_pc_page bi_vcnt=0x%x "
623 "bi_max=0x%x BIO_MAX=0x%x cur_len=0x%x\n",
624 per_dev->bio->bi_vcnt,
625 per_dev->bio->bi_max_vecs,
626 BIO_MAX_PAGES_KMALLOC, cur_len);
614 ret = -ENOMEM; 627 ret = -ENOMEM;
615 goto out; 628 goto out;
616 } 629 }
@@ -1098,7 +1111,7 @@ int ore_truncate(struct ore_layout *layout, struct ore_components *oc,
1098 size_attr->attr = g_attr_logical_length; 1111 size_attr->attr = g_attr_logical_length;
1099 size_attr->attr.val_ptr = &size_attr->newsize; 1112 size_attr->attr.val_ptr = &size_attr->newsize;
1100 1113
1101 ORE_DBGMSG("trunc(0x%llx) obj_offset=0x%llx dev=%d\n", 1114 ORE_DBGMSG2("trunc(0x%llx) obj_offset=0x%llx dev=%d\n",
1102 _LLU(oc->comps->obj.id), _LLU(obj_size), i); 1115 _LLU(oc->comps->obj.id), _LLU(obj_size), i);
1103 ret = _truncate_mirrors(ios, i * ios->layout->mirrors_p1, 1116 ret = _truncate_mirrors(ios, i * ios->layout->mirrors_p1,
1104 &size_attr->attr); 1117 &size_attr->attr);