aboutsummaryrefslogtreecommitdiffstats
path: root/fs/exofs
diff options
context:
space:
mode:
Diffstat (limited to 'fs/exofs')
-rw-r--r--fs/exofs/Kconfig11
-rw-r--r--fs/exofs/Kconfig.ore12
-rw-r--r--fs/exofs/ore.c8
-rw-r--r--fs/exofs/ore_raid.c78
4 files changed, 81 insertions, 28 deletions
diff --git a/fs/exofs/Kconfig b/fs/exofs/Kconfig
index da42f32c49be..86194b2f799d 100644
--- a/fs/exofs/Kconfig
+++ b/fs/exofs/Kconfig
@@ -1,14 +1,3 @@
1# Note ORE needs to "select ASYNC_XOR". So Not to force multiple selects
2# for every ORE user we do it like this. Any user should add itself here
3# at the "depends on EXOFS_FS || ..." with an ||. The dependencies are
4# selected here, and we default to "ON". So in effect it is like been
5# selected by any of the users.
6config ORE
7 tristate
8 depends on EXOFS_FS || PNFS_OBJLAYOUT
9 select ASYNC_XOR
10 default SCSI_OSD_ULD
11
12config EXOFS_FS 1config EXOFS_FS
13 tristate "exofs: OSD based file system support" 2 tristate "exofs: OSD based file system support"
14 depends on SCSI_OSD_ULD 3 depends on SCSI_OSD_ULD
diff --git a/fs/exofs/Kconfig.ore b/fs/exofs/Kconfig.ore
new file mode 100644
index 000000000000..1ca7fb7b6ba8
--- /dev/null
+++ b/fs/exofs/Kconfig.ore
@@ -0,0 +1,12 @@
1# ORE - Objects Raid Engine (libore.ko)
2#
3# Note ORE needs to "select ASYNC_XOR". So Not to force multiple selects
4# for every ORE user we do it like this. Any user should add itself here
5# at the "depends on EXOFS_FS || ..." with an ||. The dependencies are
6# selected here, and we default to "ON". So in effect it is like been
7# selected by any of the users.
8config ORE
9 tristate
10 depends on EXOFS_FS || PNFS_OBJLAYOUT
11 select ASYNC_XOR
12 default SCSI_OSD_ULD
diff --git a/fs/exofs/ore.c b/fs/exofs/ore.c
index d271ad837202..49cf230554a2 100644
--- a/fs/exofs/ore.c
+++ b/fs/exofs/ore.c
@@ -266,7 +266,7 @@ int ore_get_rw_state(struct ore_layout *layout, struct ore_components *oc,
266 266
267 /* first/last seg is split */ 267 /* first/last seg is split */
268 num_raid_units += layout->group_width; 268 num_raid_units += layout->group_width;
269 sgs_per_dev = div_u64(num_raid_units, data_devs); 269 sgs_per_dev = div_u64(num_raid_units, data_devs) + 2;
270 } else { 270 } else {
271 /* For Writes add parity pages array. */ 271 /* For Writes add parity pages array. */
272 max_par_pages = num_raid_units * pages_in_unit * 272 max_par_pages = num_raid_units * pages_in_unit *
@@ -445,10 +445,10 @@ int ore_check_io(struct ore_io_state *ios, ore_on_dev_error on_dev_error)
445 u64 residual = ios->reading ? 445 u64 residual = ios->reading ?
446 or->in.residual : or->out.residual; 446 or->in.residual : or->out.residual;
447 u64 offset = (ios->offset + ios->length) - residual; 447 u64 offset = (ios->offset + ios->length) - residual;
448 struct ore_dev *od = ios->oc->ods[ 448 unsigned dev = per_dev->dev - ios->oc->first_dev;
449 per_dev->dev - ios->oc->first_dev]; 449 struct ore_dev *od = ios->oc->ods[dev];
450 450
451 on_dev_error(ios, od, per_dev->dev, osi.osd_err_pri, 451 on_dev_error(ios, od, dev, osi.osd_err_pri,
452 offset, residual); 452 offset, residual);
453 } 453 }
454 if (osi.osd_err_pri >= acumulated_osd_err) { 454 if (osi.osd_err_pri >= acumulated_osd_err) {
diff --git a/fs/exofs/ore_raid.c b/fs/exofs/ore_raid.c
index 29c47e5c4a86..d222c77cfa1b 100644
--- a/fs/exofs/ore_raid.c
+++ b/fs/exofs/ore_raid.c
@@ -328,8 +328,8 @@ static int _alloc_read_4_write(struct ore_io_state *ios)
328/* @si contains info of the to-be-inserted page. Update of @si should be 328/* @si contains info of the to-be-inserted page. Update of @si should be
329 * maintained by caller. Specificaly si->dev, si->obj_offset, ... 329 * maintained by caller. Specificaly si->dev, si->obj_offset, ...
330 */ 330 */
331static int _add_to_read_4_write(struct ore_io_state *ios, 331static int _add_to_r4w(struct ore_io_state *ios, struct ore_striping_info *si,
332 struct ore_striping_info *si, struct page *page) 332 struct page *page, unsigned pg_len)
333{ 333{
334 struct request_queue *q; 334 struct request_queue *q;
335 struct ore_per_dev_state *per_dev; 335 struct ore_per_dev_state *per_dev;
@@ -366,17 +366,60 @@ static int _add_to_read_4_write(struct ore_io_state *ios,
366 _ore_add_sg_seg(per_dev, gap, true); 366 _ore_add_sg_seg(per_dev, gap, true);
367 } 367 }
368 q = osd_request_queue(ore_comp_dev(read_ios->oc, per_dev->dev)); 368 q = osd_request_queue(ore_comp_dev(read_ios->oc, per_dev->dev));
369 added_len = bio_add_pc_page(q, per_dev->bio, page, PAGE_SIZE, 0); 369 added_len = bio_add_pc_page(q, per_dev->bio, page, pg_len,
370 if (unlikely(added_len != PAGE_SIZE)) { 370 si->obj_offset % PAGE_SIZE);
371 if (unlikely(added_len != pg_len)) {
371 ORE_DBGMSG("Failed to bio_add_pc_page bi_vcnt=%d\n", 372 ORE_DBGMSG("Failed to bio_add_pc_page bi_vcnt=%d\n",
372 per_dev->bio->bi_vcnt); 373 per_dev->bio->bi_vcnt);
373 return -ENOMEM; 374 return -ENOMEM;
374 } 375 }
375 376
376 per_dev->length += PAGE_SIZE; 377 per_dev->length += pg_len;
377 return 0; 378 return 0;
378} 379}
379 380
381/* read the beginning of an unaligned first page */
382static int _add_to_r4w_first_page(struct ore_io_state *ios, struct page *page)
383{
384 struct ore_striping_info si;
385 unsigned pg_len;
386
387 ore_calc_stripe_info(ios->layout, ios->offset, 0, &si);
388
389 pg_len = si.obj_offset % PAGE_SIZE;
390 si.obj_offset -= pg_len;
391
392 ORE_DBGMSG("offset=0x%llx len=0x%x index=0x%lx dev=%x\n",
393 _LLU(si.obj_offset), pg_len, page->index, si.dev);
394
395 return _add_to_r4w(ios, &si, page, pg_len);
396}
397
398/* read the end of an incomplete last page */
399static int _add_to_r4w_last_page(struct ore_io_state *ios, u64 *offset)
400{
401 struct ore_striping_info si;
402 struct page *page;
403 unsigned pg_len, p, c;
404
405 ore_calc_stripe_info(ios->layout, *offset, 0, &si);
406
407 p = si.unit_off / PAGE_SIZE;
408 c = _dev_order(ios->layout->group_width * ios->layout->mirrors_p1,
409 ios->layout->mirrors_p1, si.par_dev, si.dev);
410 page = ios->sp2d->_1p_stripes[p].pages[c];
411
412 pg_len = PAGE_SIZE - (si.unit_off % PAGE_SIZE);
413 *offset += pg_len;
414
415 ORE_DBGMSG("p=%d, c=%d next-offset=0x%llx len=0x%x dev=%x par_dev=%d\n",
416 p, c, _LLU(*offset), pg_len, si.dev, si.par_dev);
417
418 BUG_ON(!page);
419
420 return _add_to_r4w(ios, &si, page, pg_len);
421}
422
380static void _mark_read4write_pages_uptodate(struct ore_io_state *ios, int ret) 423static void _mark_read4write_pages_uptodate(struct ore_io_state *ios, int ret)
381{ 424{
382 struct bio_vec *bv; 425 struct bio_vec *bv;
@@ -444,9 +487,13 @@ static int _read_4_write(struct ore_io_state *ios)
444 struct page **pp = &_1ps->pages[c]; 487 struct page **pp = &_1ps->pages[c];
445 bool uptodate; 488 bool uptodate;
446 489
447 if (*pp) 490 if (*pp) {
491 if (ios->offset % PAGE_SIZE)
492 /* Read the remainder of the page */
493 _add_to_r4w_first_page(ios, *pp);
448 /* to-be-written pages start here */ 494 /* to-be-written pages start here */
449 goto read_last_stripe; 495 goto read_last_stripe;
496 }
450 497
451 *pp = ios->r4w->get_page(ios->private, offset, 498 *pp = ios->r4w->get_page(ios->private, offset,
452 &uptodate); 499 &uptodate);
@@ -454,7 +501,7 @@ static int _read_4_write(struct ore_io_state *ios)
454 return -ENOMEM; 501 return -ENOMEM;
455 502
456 if (!uptodate) 503 if (!uptodate)
457 _add_to_read_4_write(ios, &read_si, *pp); 504 _add_to_r4w(ios, &read_si, *pp, PAGE_SIZE);
458 505
459 /* Mark read-pages to be cache_released */ 506 /* Mark read-pages to be cache_released */
460 _1ps->page_is_read[c] = true; 507 _1ps->page_is_read[c] = true;
@@ -465,8 +512,11 @@ static int _read_4_write(struct ore_io_state *ios)
465 } 512 }
466 513
467read_last_stripe: 514read_last_stripe:
468 offset = ios->offset + (ios->length + PAGE_SIZE - 1) / 515 offset = ios->offset + ios->length;
469 PAGE_SIZE * PAGE_SIZE; 516 if (offset % PAGE_SIZE)
517 _add_to_r4w_last_page(ios, &offset);
518 /* offset will be aligned to next page */
519
470 last_stripe_end = div_u64(offset + bytes_in_stripe - 1, bytes_in_stripe) 520 last_stripe_end = div_u64(offset + bytes_in_stripe - 1, bytes_in_stripe)
471 * bytes_in_stripe; 521 * bytes_in_stripe;
472 if (offset == last_stripe_end) /* Optimize for the aligned case */ 522 if (offset == last_stripe_end) /* Optimize for the aligned case */
@@ -503,7 +553,7 @@ read_last_stripe:
503 /* Mark read-pages to be cache_released */ 553 /* Mark read-pages to be cache_released */
504 _1ps->page_is_read[c] = true; 554 _1ps->page_is_read[c] = true;
505 if (!uptodate) 555 if (!uptodate)
506 _add_to_read_4_write(ios, &read_si, page); 556 _add_to_r4w(ios, &read_si, page, PAGE_SIZE);
507 } 557 }
508 558
509 offset += PAGE_SIZE; 559 offset += PAGE_SIZE;
@@ -551,7 +601,11 @@ int _ore_add_parity_unit(struct ore_io_state *ios,
551 unsigned cur_len) 601 unsigned cur_len)
552{ 602{
553 if (ios->reading) { 603 if (ios->reading) {
554 BUG_ON(per_dev->cur_sg >= ios->sgs_per_dev); 604 if (per_dev->cur_sg >= ios->sgs_per_dev) {
605 ORE_DBGMSG("cur_sg(%d) >= sgs_per_dev(%d)\n" ,
606 per_dev->cur_sg, ios->sgs_per_dev);
607 return -ENOMEM;
608 }
555 _ore_add_sg_seg(per_dev, cur_len, true); 609 _ore_add_sg_seg(per_dev, cur_len, true);
556 } else { 610 } else {
557 struct __stripe_pages_2d *sp2d = ios->sp2d; 611 struct __stripe_pages_2d *sp2d = ios->sp2d;
@@ -612,8 +666,6 @@ int _ore_post_alloc_raid_stuff(struct ore_io_state *ios)
612 return -ENOMEM; 666 return -ENOMEM;
613 } 667 }
614 668
615 BUG_ON(ios->offset % PAGE_SIZE);
616
617 /* Round io down to last full strip */ 669 /* Round io down to last full strip */
618 first_stripe = div_u64(ios->offset, stripe_size); 670 first_stripe = div_u64(ios->offset, stripe_size);
619 last_stripe = div_u64(ios->offset + ios->length, stripe_size); 671 last_stripe = div_u64(ios->offset + ios->length, stripe_size);