diff options
| author | Linus Torvalds <torvalds@linux-foundation.org> | 2012-01-09 15:51:01 -0500 |
|---|---|---|
| committer | Linus Torvalds <torvalds@linux-foundation.org> | 2012-01-09 15:51:01 -0500 |
| commit | 9e203936eac786f9268d6a13e6442d2accef1829 (patch) | |
| tree | 1da906e1c099b3afacf3b358b8981931c4da45fd /fs/exofs | |
| parent | 993ecff81aa6d50d694704de2647a5529ff2f166 (diff) | |
| parent | 724577ca355795b0a25c93ccbeee927871ca1a77 (diff) | |
Merge branch 'for-linus' of git://git.open-osd.org/linux-open-osd
* 'for-linus' of git://git.open-osd.org/linux-open-osd:
ore: Must support none-PAGE-aligned IO
ore: fix BUG_ON, too few sgs when reading
ore: Fix crash in case of an IO error.
ore: FIX breakage when MISC_FILESYSTEMS is not set
Diffstat (limited to 'fs/exofs')
| -rw-r--r-- | fs/exofs/Kconfig | 11 | ||||
| -rw-r--r-- | fs/exofs/Kconfig.ore | 12 | ||||
| -rw-r--r-- | fs/exofs/ore.c | 8 | ||||
| -rw-r--r-- | fs/exofs/ore_raid.c | 78 |
4 files changed, 81 insertions, 28 deletions
diff --git a/fs/exofs/Kconfig b/fs/exofs/Kconfig index da42f32c49be..86194b2f799d 100644 --- a/fs/exofs/Kconfig +++ b/fs/exofs/Kconfig | |||
| @@ -1,14 +1,3 @@ | |||
| 1 | # Note ORE needs to "select ASYNC_XOR". So Not to force multiple selects | ||
| 2 | # for every ORE user we do it like this. Any user should add itself here | ||
| 3 | # at the "depends on EXOFS_FS || ..." with an ||. The dependencies are | ||
| 4 | # selected here, and we default to "ON". So in effect it is like been | ||
| 5 | # selected by any of the users. | ||
| 6 | config ORE | ||
| 7 | tristate | ||
| 8 | depends on EXOFS_FS || PNFS_OBJLAYOUT | ||
| 9 | select ASYNC_XOR | ||
| 10 | default SCSI_OSD_ULD | ||
| 11 | |||
| 12 | config EXOFS_FS | 1 | config EXOFS_FS |
| 13 | tristate "exofs: OSD based file system support" | 2 | tristate "exofs: OSD based file system support" |
| 14 | depends on SCSI_OSD_ULD | 3 | depends on SCSI_OSD_ULD |
diff --git a/fs/exofs/Kconfig.ore b/fs/exofs/Kconfig.ore new file mode 100644 index 000000000000..1ca7fb7b6ba8 --- /dev/null +++ b/fs/exofs/Kconfig.ore | |||
| @@ -0,0 +1,12 @@ | |||
| 1 | # ORE - Objects Raid Engine (libore.ko) | ||
| 2 | # | ||
| 3 | # Note ORE needs to "select ASYNC_XOR". So Not to force multiple selects | ||
| 4 | # for every ORE user we do it like this. Any user should add itself here | ||
| 5 | # at the "depends on EXOFS_FS || ..." with an ||. The dependencies are | ||
| 6 | # selected here, and we default to "ON". So in effect it is like been | ||
| 7 | # selected by any of the users. | ||
| 8 | config ORE | ||
| 9 | tristate | ||
| 10 | depends on EXOFS_FS || PNFS_OBJLAYOUT | ||
| 11 | select ASYNC_XOR | ||
| 12 | default SCSI_OSD_ULD | ||
diff --git a/fs/exofs/ore.c b/fs/exofs/ore.c index d271ad837202..49cf230554a2 100644 --- a/fs/exofs/ore.c +++ b/fs/exofs/ore.c | |||
| @@ -266,7 +266,7 @@ int ore_get_rw_state(struct ore_layout *layout, struct ore_components *oc, | |||
| 266 | 266 | ||
| 267 | /* first/last seg is split */ | 267 | /* first/last seg is split */ |
| 268 | num_raid_units += layout->group_width; | 268 | num_raid_units += layout->group_width; |
| 269 | sgs_per_dev = div_u64(num_raid_units, data_devs); | 269 | sgs_per_dev = div_u64(num_raid_units, data_devs) + 2; |
| 270 | } else { | 270 | } else { |
| 271 | /* For Writes add parity pages array. */ | 271 | /* For Writes add parity pages array. */ |
| 272 | max_par_pages = num_raid_units * pages_in_unit * | 272 | max_par_pages = num_raid_units * pages_in_unit * |
| @@ -445,10 +445,10 @@ int ore_check_io(struct ore_io_state *ios, ore_on_dev_error on_dev_error) | |||
| 445 | u64 residual = ios->reading ? | 445 | u64 residual = ios->reading ? |
| 446 | or->in.residual : or->out.residual; | 446 | or->in.residual : or->out.residual; |
| 447 | u64 offset = (ios->offset + ios->length) - residual; | 447 | u64 offset = (ios->offset + ios->length) - residual; |
| 448 | struct ore_dev *od = ios->oc->ods[ | 448 | unsigned dev = per_dev->dev - ios->oc->first_dev; |
| 449 | per_dev->dev - ios->oc->first_dev]; | 449 | struct ore_dev *od = ios->oc->ods[dev]; |
| 450 | 450 | ||
| 451 | on_dev_error(ios, od, per_dev->dev, osi.osd_err_pri, | 451 | on_dev_error(ios, od, dev, osi.osd_err_pri, |
| 452 | offset, residual); | 452 | offset, residual); |
| 453 | } | 453 | } |
| 454 | if (osi.osd_err_pri >= acumulated_osd_err) { | 454 | if (osi.osd_err_pri >= acumulated_osd_err) { |
diff --git a/fs/exofs/ore_raid.c b/fs/exofs/ore_raid.c index 29c47e5c4a86..d222c77cfa1b 100644 --- a/fs/exofs/ore_raid.c +++ b/fs/exofs/ore_raid.c | |||
| @@ -328,8 +328,8 @@ static int _alloc_read_4_write(struct ore_io_state *ios) | |||
| 328 | /* @si contains info of the to-be-inserted page. Update of @si should be | 328 | /* @si contains info of the to-be-inserted page. Update of @si should be |
| 329 | * maintained by caller. Specificaly si->dev, si->obj_offset, ... | 329 | * maintained by caller. Specificaly si->dev, si->obj_offset, ... |
| 330 | */ | 330 | */ |
| 331 | static int _add_to_read_4_write(struct ore_io_state *ios, | 331 | static int _add_to_r4w(struct ore_io_state *ios, struct ore_striping_info *si, |
| 332 | struct ore_striping_info *si, struct page *page) | 332 | struct page *page, unsigned pg_len) |
| 333 | { | 333 | { |
| 334 | struct request_queue *q; | 334 | struct request_queue *q; |
| 335 | struct ore_per_dev_state *per_dev; | 335 | struct ore_per_dev_state *per_dev; |
| @@ -366,17 +366,60 @@ static int _add_to_read_4_write(struct ore_io_state *ios, | |||
| 366 | _ore_add_sg_seg(per_dev, gap, true); | 366 | _ore_add_sg_seg(per_dev, gap, true); |
| 367 | } | 367 | } |
| 368 | q = osd_request_queue(ore_comp_dev(read_ios->oc, per_dev->dev)); | 368 | q = osd_request_queue(ore_comp_dev(read_ios->oc, per_dev->dev)); |
| 369 | added_len = bio_add_pc_page(q, per_dev->bio, page, PAGE_SIZE, 0); | 369 | added_len = bio_add_pc_page(q, per_dev->bio, page, pg_len, |
| 370 | if (unlikely(added_len != PAGE_SIZE)) { | 370 | si->obj_offset % PAGE_SIZE); |
| 371 | if (unlikely(added_len != pg_len)) { | ||
| 371 | ORE_DBGMSG("Failed to bio_add_pc_page bi_vcnt=%d\n", | 372 | ORE_DBGMSG("Failed to bio_add_pc_page bi_vcnt=%d\n", |
| 372 | per_dev->bio->bi_vcnt); | 373 | per_dev->bio->bi_vcnt); |
| 373 | return -ENOMEM; | 374 | return -ENOMEM; |
| 374 | } | 375 | } |
| 375 | 376 | ||
| 376 | per_dev->length += PAGE_SIZE; | 377 | per_dev->length += pg_len; |
| 377 | return 0; | 378 | return 0; |
| 378 | } | 379 | } |
| 379 | 380 | ||
| 381 | /* read the beginning of an unaligned first page */ | ||
| 382 | static int _add_to_r4w_first_page(struct ore_io_state *ios, struct page *page) | ||
| 383 | { | ||
| 384 | struct ore_striping_info si; | ||
| 385 | unsigned pg_len; | ||
| 386 | |||
| 387 | ore_calc_stripe_info(ios->layout, ios->offset, 0, &si); | ||
| 388 | |||
| 389 | pg_len = si.obj_offset % PAGE_SIZE; | ||
| 390 | si.obj_offset -= pg_len; | ||
| 391 | |||
| 392 | ORE_DBGMSG("offset=0x%llx len=0x%x index=0x%lx dev=%x\n", | ||
| 393 | _LLU(si.obj_offset), pg_len, page->index, si.dev); | ||
| 394 | |||
| 395 | return _add_to_r4w(ios, &si, page, pg_len); | ||
| 396 | } | ||
| 397 | |||
| 398 | /* read the end of an incomplete last page */ | ||
| 399 | static int _add_to_r4w_last_page(struct ore_io_state *ios, u64 *offset) | ||
| 400 | { | ||
| 401 | struct ore_striping_info si; | ||
| 402 | struct page *page; | ||
| 403 | unsigned pg_len, p, c; | ||
| 404 | |||
| 405 | ore_calc_stripe_info(ios->layout, *offset, 0, &si); | ||
| 406 | |||
| 407 | p = si.unit_off / PAGE_SIZE; | ||
| 408 | c = _dev_order(ios->layout->group_width * ios->layout->mirrors_p1, | ||
| 409 | ios->layout->mirrors_p1, si.par_dev, si.dev); | ||
| 410 | page = ios->sp2d->_1p_stripes[p].pages[c]; | ||
| 411 | |||
| 412 | pg_len = PAGE_SIZE - (si.unit_off % PAGE_SIZE); | ||
| 413 | *offset += pg_len; | ||
| 414 | |||
| 415 | ORE_DBGMSG("p=%d, c=%d next-offset=0x%llx len=0x%x dev=%x par_dev=%d\n", | ||
| 416 | p, c, _LLU(*offset), pg_len, si.dev, si.par_dev); | ||
| 417 | |||
| 418 | BUG_ON(!page); | ||
| 419 | |||
| 420 | return _add_to_r4w(ios, &si, page, pg_len); | ||
| 421 | } | ||
| 422 | |||
| 380 | static void _mark_read4write_pages_uptodate(struct ore_io_state *ios, int ret) | 423 | static void _mark_read4write_pages_uptodate(struct ore_io_state *ios, int ret) |
| 381 | { | 424 | { |
| 382 | struct bio_vec *bv; | 425 | struct bio_vec *bv; |
| @@ -444,9 +487,13 @@ static int _read_4_write(struct ore_io_state *ios) | |||
| 444 | struct page **pp = &_1ps->pages[c]; | 487 | struct page **pp = &_1ps->pages[c]; |
| 445 | bool uptodate; | 488 | bool uptodate; |
| 446 | 489 | ||
| 447 | if (*pp) | 490 | if (*pp) { |
| 491 | if (ios->offset % PAGE_SIZE) | ||
| 492 | /* Read the remainder of the page */ | ||
| 493 | _add_to_r4w_first_page(ios, *pp); | ||
| 448 | /* to-be-written pages start here */ | 494 | /* to-be-written pages start here */ |
| 449 | goto read_last_stripe; | 495 | goto read_last_stripe; |
| 496 | } | ||
| 450 | 497 | ||
| 451 | *pp = ios->r4w->get_page(ios->private, offset, | 498 | *pp = ios->r4w->get_page(ios->private, offset, |
| 452 | &uptodate); | 499 | &uptodate); |
| @@ -454,7 +501,7 @@ static int _read_4_write(struct ore_io_state *ios) | |||
| 454 | return -ENOMEM; | 501 | return -ENOMEM; |
| 455 | 502 | ||
| 456 | if (!uptodate) | 503 | if (!uptodate) |
| 457 | _add_to_read_4_write(ios, &read_si, *pp); | 504 | _add_to_r4w(ios, &read_si, *pp, PAGE_SIZE); |
| 458 | 505 | ||
| 459 | /* Mark read-pages to be cache_released */ | 506 | /* Mark read-pages to be cache_released */ |
| 460 | _1ps->page_is_read[c] = true; | 507 | _1ps->page_is_read[c] = true; |
| @@ -465,8 +512,11 @@ static int _read_4_write(struct ore_io_state *ios) | |||
| 465 | } | 512 | } |
| 466 | 513 | ||
| 467 | read_last_stripe: | 514 | read_last_stripe: |
| 468 | offset = ios->offset + (ios->length + PAGE_SIZE - 1) / | 515 | offset = ios->offset + ios->length; |
| 469 | PAGE_SIZE * PAGE_SIZE; | 516 | if (offset % PAGE_SIZE) |
| 517 | _add_to_r4w_last_page(ios, &offset); | ||
| 518 | /* offset will be aligned to next page */ | ||
| 519 | |||
| 470 | last_stripe_end = div_u64(offset + bytes_in_stripe - 1, bytes_in_stripe) | 520 | last_stripe_end = div_u64(offset + bytes_in_stripe - 1, bytes_in_stripe) |
| 471 | * bytes_in_stripe; | 521 | * bytes_in_stripe; |
| 472 | if (offset == last_stripe_end) /* Optimize for the aligned case */ | 522 | if (offset == last_stripe_end) /* Optimize for the aligned case */ |
| @@ -503,7 +553,7 @@ read_last_stripe: | |||
| 503 | /* Mark read-pages to be cache_released */ | 553 | /* Mark read-pages to be cache_released */ |
| 504 | _1ps->page_is_read[c] = true; | 554 | _1ps->page_is_read[c] = true; |
| 505 | if (!uptodate) | 555 | if (!uptodate) |
| 506 | _add_to_read_4_write(ios, &read_si, page); | 556 | _add_to_r4w(ios, &read_si, page, PAGE_SIZE); |
| 507 | } | 557 | } |
| 508 | 558 | ||
| 509 | offset += PAGE_SIZE; | 559 | offset += PAGE_SIZE; |
| @@ -551,7 +601,11 @@ int _ore_add_parity_unit(struct ore_io_state *ios, | |||
| 551 | unsigned cur_len) | 601 | unsigned cur_len) |
| 552 | { | 602 | { |
| 553 | if (ios->reading) { | 603 | if (ios->reading) { |
| 554 | BUG_ON(per_dev->cur_sg >= ios->sgs_per_dev); | 604 | if (per_dev->cur_sg >= ios->sgs_per_dev) { |
| 605 | ORE_DBGMSG("cur_sg(%d) >= sgs_per_dev(%d)\n" , | ||
| 606 | per_dev->cur_sg, ios->sgs_per_dev); | ||
| 607 | return -ENOMEM; | ||
| 608 | } | ||
| 555 | _ore_add_sg_seg(per_dev, cur_len, true); | 609 | _ore_add_sg_seg(per_dev, cur_len, true); |
| 556 | } else { | 610 | } else { |
| 557 | struct __stripe_pages_2d *sp2d = ios->sp2d; | 611 | struct __stripe_pages_2d *sp2d = ios->sp2d; |
| @@ -612,8 +666,6 @@ int _ore_post_alloc_raid_stuff(struct ore_io_state *ios) | |||
| 612 | return -ENOMEM; | 666 | return -ENOMEM; |
| 613 | } | 667 | } |
| 614 | 668 | ||
| 615 | BUG_ON(ios->offset % PAGE_SIZE); | ||
| 616 | |||
| 617 | /* Round io down to last full strip */ | 669 | /* Round io down to last full strip */ |
| 618 | first_stripe = div_u64(ios->offset, stripe_size); | 670 | first_stripe = div_u64(ios->offset, stripe_size); |
| 619 | last_stripe = div_u64(ios->offset + ios->length, stripe_size); | 671 | last_stripe = div_u64(ios->offset + ios->length, stripe_size); |
