diff options
Diffstat (limited to 'fs/exofs')
-rw-r--r-- | fs/exofs/Kconfig | 11 | ||||
-rw-r--r-- | fs/exofs/Kconfig.ore | 12 | ||||
-rw-r--r-- | fs/exofs/ore.c | 8 | ||||
-rw-r--r-- | fs/exofs/ore_raid.c | 78 |
4 files changed, 81 insertions, 28 deletions
diff --git a/fs/exofs/Kconfig b/fs/exofs/Kconfig index da42f32c49be..86194b2f799d 100644 --- a/fs/exofs/Kconfig +++ b/fs/exofs/Kconfig | |||
@@ -1,14 +1,3 @@ | |||
1 | # Note ORE needs to "select ASYNC_XOR". So Not to force multiple selects | ||
2 | # for every ORE user we do it like this. Any user should add itself here | ||
3 | # at the "depends on EXOFS_FS || ..." with an ||. The dependencies are | ||
4 | # selected here, and we default to "ON". So in effect it is like been | ||
5 | # selected by any of the users. | ||
6 | config ORE | ||
7 | tristate | ||
8 | depends on EXOFS_FS || PNFS_OBJLAYOUT | ||
9 | select ASYNC_XOR | ||
10 | default SCSI_OSD_ULD | ||
11 | |||
12 | config EXOFS_FS | 1 | config EXOFS_FS |
13 | tristate "exofs: OSD based file system support" | 2 | tristate "exofs: OSD based file system support" |
14 | depends on SCSI_OSD_ULD | 3 | depends on SCSI_OSD_ULD |
diff --git a/fs/exofs/Kconfig.ore b/fs/exofs/Kconfig.ore new file mode 100644 index 000000000000..1ca7fb7b6ba8 --- /dev/null +++ b/fs/exofs/Kconfig.ore | |||
@@ -0,0 +1,12 @@ | |||
1 | # ORE - Objects Raid Engine (libore.ko) | ||
2 | # | ||
3 | # Note ORE needs to "select ASYNC_XOR". So Not to force multiple selects | ||
4 | # for every ORE user we do it like this. Any user should add itself here | ||
5 | # at the "depends on EXOFS_FS || ..." with an ||. The dependencies are | ||
6 | # selected here, and we default to "ON". So in effect it is like been | ||
7 | # selected by any of the users. | ||
8 | config ORE | ||
9 | tristate | ||
10 | depends on EXOFS_FS || PNFS_OBJLAYOUT | ||
11 | select ASYNC_XOR | ||
12 | default SCSI_OSD_ULD | ||
diff --git a/fs/exofs/ore.c b/fs/exofs/ore.c index d271ad837202..49cf230554a2 100644 --- a/fs/exofs/ore.c +++ b/fs/exofs/ore.c | |||
@@ -266,7 +266,7 @@ int ore_get_rw_state(struct ore_layout *layout, struct ore_components *oc, | |||
266 | 266 | ||
267 | /* first/last seg is split */ | 267 | /* first/last seg is split */ |
268 | num_raid_units += layout->group_width; | 268 | num_raid_units += layout->group_width; |
269 | sgs_per_dev = div_u64(num_raid_units, data_devs); | 269 | sgs_per_dev = div_u64(num_raid_units, data_devs) + 2; |
270 | } else { | 270 | } else { |
271 | /* For Writes add parity pages array. */ | 271 | /* For Writes add parity pages array. */ |
272 | max_par_pages = num_raid_units * pages_in_unit * | 272 | max_par_pages = num_raid_units * pages_in_unit * |
@@ -445,10 +445,10 @@ int ore_check_io(struct ore_io_state *ios, ore_on_dev_error on_dev_error) | |||
445 | u64 residual = ios->reading ? | 445 | u64 residual = ios->reading ? |
446 | or->in.residual : or->out.residual; | 446 | or->in.residual : or->out.residual; |
447 | u64 offset = (ios->offset + ios->length) - residual; | 447 | u64 offset = (ios->offset + ios->length) - residual; |
448 | struct ore_dev *od = ios->oc->ods[ | 448 | unsigned dev = per_dev->dev - ios->oc->first_dev; |
449 | per_dev->dev - ios->oc->first_dev]; | 449 | struct ore_dev *od = ios->oc->ods[dev]; |
450 | 450 | ||
451 | on_dev_error(ios, od, per_dev->dev, osi.osd_err_pri, | 451 | on_dev_error(ios, od, dev, osi.osd_err_pri, |
452 | offset, residual); | 452 | offset, residual); |
453 | } | 453 | } |
454 | if (osi.osd_err_pri >= acumulated_osd_err) { | 454 | if (osi.osd_err_pri >= acumulated_osd_err) { |
diff --git a/fs/exofs/ore_raid.c b/fs/exofs/ore_raid.c index 29c47e5c4a86..d222c77cfa1b 100644 --- a/fs/exofs/ore_raid.c +++ b/fs/exofs/ore_raid.c | |||
@@ -328,8 +328,8 @@ static int _alloc_read_4_write(struct ore_io_state *ios) | |||
328 | /* @si contains info of the to-be-inserted page. Update of @si should be | 328 | /* @si contains info of the to-be-inserted page. Update of @si should be |
329 | * maintained by caller. Specificaly si->dev, si->obj_offset, ... | 329 | * maintained by caller. Specificaly si->dev, si->obj_offset, ... |
330 | */ | 330 | */ |
331 | static int _add_to_read_4_write(struct ore_io_state *ios, | 331 | static int _add_to_r4w(struct ore_io_state *ios, struct ore_striping_info *si, |
332 | struct ore_striping_info *si, struct page *page) | 332 | struct page *page, unsigned pg_len) |
333 | { | 333 | { |
334 | struct request_queue *q; | 334 | struct request_queue *q; |
335 | struct ore_per_dev_state *per_dev; | 335 | struct ore_per_dev_state *per_dev; |
@@ -366,17 +366,60 @@ static int _add_to_read_4_write(struct ore_io_state *ios, | |||
366 | _ore_add_sg_seg(per_dev, gap, true); | 366 | _ore_add_sg_seg(per_dev, gap, true); |
367 | } | 367 | } |
368 | q = osd_request_queue(ore_comp_dev(read_ios->oc, per_dev->dev)); | 368 | q = osd_request_queue(ore_comp_dev(read_ios->oc, per_dev->dev)); |
369 | added_len = bio_add_pc_page(q, per_dev->bio, page, PAGE_SIZE, 0); | 369 | added_len = bio_add_pc_page(q, per_dev->bio, page, pg_len, |
370 | if (unlikely(added_len != PAGE_SIZE)) { | 370 | si->obj_offset % PAGE_SIZE); |
371 | if (unlikely(added_len != pg_len)) { | ||
371 | ORE_DBGMSG("Failed to bio_add_pc_page bi_vcnt=%d\n", | 372 | ORE_DBGMSG("Failed to bio_add_pc_page bi_vcnt=%d\n", |
372 | per_dev->bio->bi_vcnt); | 373 | per_dev->bio->bi_vcnt); |
373 | return -ENOMEM; | 374 | return -ENOMEM; |
374 | } | 375 | } |
375 | 376 | ||
376 | per_dev->length += PAGE_SIZE; | 377 | per_dev->length += pg_len; |
377 | return 0; | 378 | return 0; |
378 | } | 379 | } |
379 | 380 | ||
381 | /* read the beginning of an unaligned first page */ | ||
382 | static int _add_to_r4w_first_page(struct ore_io_state *ios, struct page *page) | ||
383 | { | ||
384 | struct ore_striping_info si; | ||
385 | unsigned pg_len; | ||
386 | |||
387 | ore_calc_stripe_info(ios->layout, ios->offset, 0, &si); | ||
388 | |||
389 | pg_len = si.obj_offset % PAGE_SIZE; | ||
390 | si.obj_offset -= pg_len; | ||
391 | |||
392 | ORE_DBGMSG("offset=0x%llx len=0x%x index=0x%lx dev=%x\n", | ||
393 | _LLU(si.obj_offset), pg_len, page->index, si.dev); | ||
394 | |||
395 | return _add_to_r4w(ios, &si, page, pg_len); | ||
396 | } | ||
397 | |||
398 | /* read the end of an incomplete last page */ | ||
399 | static int _add_to_r4w_last_page(struct ore_io_state *ios, u64 *offset) | ||
400 | { | ||
401 | struct ore_striping_info si; | ||
402 | struct page *page; | ||
403 | unsigned pg_len, p, c; | ||
404 | |||
405 | ore_calc_stripe_info(ios->layout, *offset, 0, &si); | ||
406 | |||
407 | p = si.unit_off / PAGE_SIZE; | ||
408 | c = _dev_order(ios->layout->group_width * ios->layout->mirrors_p1, | ||
409 | ios->layout->mirrors_p1, si.par_dev, si.dev); | ||
410 | page = ios->sp2d->_1p_stripes[p].pages[c]; | ||
411 | |||
412 | pg_len = PAGE_SIZE - (si.unit_off % PAGE_SIZE); | ||
413 | *offset += pg_len; | ||
414 | |||
415 | ORE_DBGMSG("p=%d, c=%d next-offset=0x%llx len=0x%x dev=%x par_dev=%d\n", | ||
416 | p, c, _LLU(*offset), pg_len, si.dev, si.par_dev); | ||
417 | |||
418 | BUG_ON(!page); | ||
419 | |||
420 | return _add_to_r4w(ios, &si, page, pg_len); | ||
421 | } | ||
422 | |||
380 | static void _mark_read4write_pages_uptodate(struct ore_io_state *ios, int ret) | 423 | static void _mark_read4write_pages_uptodate(struct ore_io_state *ios, int ret) |
381 | { | 424 | { |
382 | struct bio_vec *bv; | 425 | struct bio_vec *bv; |
@@ -444,9 +487,13 @@ static int _read_4_write(struct ore_io_state *ios) | |||
444 | struct page **pp = &_1ps->pages[c]; | 487 | struct page **pp = &_1ps->pages[c]; |
445 | bool uptodate; | 488 | bool uptodate; |
446 | 489 | ||
447 | if (*pp) | 490 | if (*pp) { |
491 | if (ios->offset % PAGE_SIZE) | ||
492 | /* Read the remainder of the page */ | ||
493 | _add_to_r4w_first_page(ios, *pp); | ||
448 | /* to-be-written pages start here */ | 494 | /* to-be-written pages start here */ |
449 | goto read_last_stripe; | 495 | goto read_last_stripe; |
496 | } | ||
450 | 497 | ||
451 | *pp = ios->r4w->get_page(ios->private, offset, | 498 | *pp = ios->r4w->get_page(ios->private, offset, |
452 | &uptodate); | 499 | &uptodate); |
@@ -454,7 +501,7 @@ static int _read_4_write(struct ore_io_state *ios) | |||
454 | return -ENOMEM; | 501 | return -ENOMEM; |
455 | 502 | ||
456 | if (!uptodate) | 503 | if (!uptodate) |
457 | _add_to_read_4_write(ios, &read_si, *pp); | 504 | _add_to_r4w(ios, &read_si, *pp, PAGE_SIZE); |
458 | 505 | ||
459 | /* Mark read-pages to be cache_released */ | 506 | /* Mark read-pages to be cache_released */ |
460 | _1ps->page_is_read[c] = true; | 507 | _1ps->page_is_read[c] = true; |
@@ -465,8 +512,11 @@ static int _read_4_write(struct ore_io_state *ios) | |||
465 | } | 512 | } |
466 | 513 | ||
467 | read_last_stripe: | 514 | read_last_stripe: |
468 | offset = ios->offset + (ios->length + PAGE_SIZE - 1) / | 515 | offset = ios->offset + ios->length; |
469 | PAGE_SIZE * PAGE_SIZE; | 516 | if (offset % PAGE_SIZE) |
517 | _add_to_r4w_last_page(ios, &offset); | ||
518 | /* offset will be aligned to next page */ | ||
519 | |||
470 | last_stripe_end = div_u64(offset + bytes_in_stripe - 1, bytes_in_stripe) | 520 | last_stripe_end = div_u64(offset + bytes_in_stripe - 1, bytes_in_stripe) |
471 | * bytes_in_stripe; | 521 | * bytes_in_stripe; |
472 | if (offset == last_stripe_end) /* Optimize for the aligned case */ | 522 | if (offset == last_stripe_end) /* Optimize for the aligned case */ |
@@ -503,7 +553,7 @@ read_last_stripe: | |||
503 | /* Mark read-pages to be cache_released */ | 553 | /* Mark read-pages to be cache_released */ |
504 | _1ps->page_is_read[c] = true; | 554 | _1ps->page_is_read[c] = true; |
505 | if (!uptodate) | 555 | if (!uptodate) |
506 | _add_to_read_4_write(ios, &read_si, page); | 556 | _add_to_r4w(ios, &read_si, page, PAGE_SIZE); |
507 | } | 557 | } |
508 | 558 | ||
509 | offset += PAGE_SIZE; | 559 | offset += PAGE_SIZE; |
@@ -551,7 +601,11 @@ int _ore_add_parity_unit(struct ore_io_state *ios, | |||
551 | unsigned cur_len) | 601 | unsigned cur_len) |
552 | { | 602 | { |
553 | if (ios->reading) { | 603 | if (ios->reading) { |
554 | BUG_ON(per_dev->cur_sg >= ios->sgs_per_dev); | 604 | if (per_dev->cur_sg >= ios->sgs_per_dev) { |
605 | ORE_DBGMSG("cur_sg(%d) >= sgs_per_dev(%d)\n" , | ||
606 | per_dev->cur_sg, ios->sgs_per_dev); | ||
607 | return -ENOMEM; | ||
608 | } | ||
555 | _ore_add_sg_seg(per_dev, cur_len, true); | 609 | _ore_add_sg_seg(per_dev, cur_len, true); |
556 | } else { | 610 | } else { |
557 | struct __stripe_pages_2d *sp2d = ios->sp2d; | 611 | struct __stripe_pages_2d *sp2d = ios->sp2d; |
@@ -612,8 +666,6 @@ int _ore_post_alloc_raid_stuff(struct ore_io_state *ios) | |||
612 | return -ENOMEM; | 666 | return -ENOMEM; |
613 | } | 667 | } |
614 | 668 | ||
615 | BUG_ON(ios->offset % PAGE_SIZE); | ||
616 | |||
617 | /* Round io down to last full strip */ | 669 | /* Round io down to last full strip */ |
618 | first_stripe = div_u64(ios->offset, stripe_size); | 670 | first_stripe = div_u64(ios->offset, stripe_size); |
619 | last_stripe = div_u64(ios->offset + ios->length, stripe_size); | 671 | last_stripe = div_u64(ios->offset + ios->length, stripe_size); |