aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2012-07-20 14:43:53 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2012-07-20 14:43:53 -0400
commitce9f8d6b39b0d8f62092477b8d96a835860fb45a (patch)
treef35de5d7aa8f0d8e711ac3027e23cf555a722c37 /fs
parent17934162875ef93fefbf5df9dfc46b7c07828fd1 (diff)
parentc999ff68029ebd0f56ccae75444f640f6d5a27d2 (diff)
Merge branch 'for-linus' of git://git.open-osd.org/linux-open-osd
Pull pnfs/ore fixes from Boaz Harrosh: "These are catastrophic fixes to the pnfs objects-layout that were just discovered. They are also destined for @stable. I have found these and worked on them at around RC1 time but unfortunately went to the hospital for kidney stones and had a very slow recovery. I refrained from sending them as is, before proper testing, and surly I have found a bug just yesterday. So now they are all well tested, and have my sign-off. Other then fixing the problem at hand, and assuming there are no bugs at the new code, there is low risk to any surrounding code. And in anyway they affect only these paths that are now broken. That is RAID5 in pnfs objects-layout code. It does also affect exofs (which was not broken) but I have tested exofs and it is lower priority then objects-layout because no one is using exofs, but objects-layout has lots of users." * 'for-linus' of git://git.open-osd.org/linux-open-osd: pnfs-obj: Fix __r4w_get_page when offset is beyond i_size pnfs-obj: don't leak objio_state if ore_write/read fails ore: Unlock r4w pages in exact reverse order of locking ore: Remove support of partial IO request (NFS crash) ore: Fix NFS crash by supporting any unaligned RAID IO
Diffstat (limited to 'fs')
-rw-r--r--fs/exofs/ore.c8
-rw-r--r--fs/exofs/ore_raid.c91
-rw-r--r--fs/nfs/objlayout/objio_osd.c25
3 files changed, 69 insertions, 55 deletions
diff --git a/fs/exofs/ore.c b/fs/exofs/ore.c
index 49cf230554a..24a49d47e93 100644
--- a/fs/exofs/ore.c
+++ b/fs/exofs/ore.c
@@ -735,13 +735,7 @@ static int _prepare_for_striping(struct ore_io_state *ios)
735out: 735out:
736 ios->numdevs = devs_in_group; 736 ios->numdevs = devs_in_group;
737 ios->pages_consumed = cur_pg; 737 ios->pages_consumed = cur_pg;
738 if (unlikely(ret)) { 738 return ret;
739 if (length == ios->length)
740 return ret;
741 else
742 ios->length -= length;
743 }
744 return 0;
745} 739}
746 740
747int ore_create(struct ore_io_state *ios) 741int ore_create(struct ore_io_state *ios)
diff --git a/fs/exofs/ore_raid.c b/fs/exofs/ore_raid.c
index d222c77cfa1..5f376d14fdc 100644
--- a/fs/exofs/ore_raid.c
+++ b/fs/exofs/ore_raid.c
@@ -144,26 +144,26 @@ static void _sp2d_reset(struct __stripe_pages_2d *sp2d,
144{ 144{
145 unsigned data_devs = sp2d->data_devs; 145 unsigned data_devs = sp2d->data_devs;
146 unsigned group_width = data_devs + sp2d->parity; 146 unsigned group_width = data_devs + sp2d->parity;
147 unsigned p; 147 int p, c;
148 148
149 if (!sp2d->needed) 149 if (!sp2d->needed)
150 return; 150 return;
151 151
152 for (p = 0; p < sp2d->pages_in_unit; p++) { 152 for (c = data_devs - 1; c >= 0; --c)
153 struct __1_page_stripe *_1ps = &sp2d->_1p_stripes[p]; 153 for (p = sp2d->pages_in_unit - 1; p >= 0; --p) {
154 154 struct __1_page_stripe *_1ps = &sp2d->_1p_stripes[p];
155 if (_1ps->write_count < group_width) {
156 unsigned c;
157 155
158 for (c = 0; c < data_devs; c++) 156 if (_1ps->page_is_read[c]) {
159 if (_1ps->page_is_read[c]) { 157 struct page *page = _1ps->pages[c];
160 struct page *page = _1ps->pages[c];
161 158
162 r4w->put_page(priv, page); 159 r4w->put_page(priv, page);
163 _1ps->page_is_read[c] = false; 160 _1ps->page_is_read[c] = false;
164 } 161 }
165 } 162 }
166 163
164 for (p = 0; p < sp2d->pages_in_unit; p++) {
165 struct __1_page_stripe *_1ps = &sp2d->_1p_stripes[p];
166
167 memset(_1ps->pages, 0, group_width * sizeof(*_1ps->pages)); 167 memset(_1ps->pages, 0, group_width * sizeof(*_1ps->pages));
168 _1ps->write_count = 0; 168 _1ps->write_count = 0;
169 _1ps->tx = NULL; 169 _1ps->tx = NULL;
@@ -461,16 +461,12 @@ static void _mark_read4write_pages_uptodate(struct ore_io_state *ios, int ret)
461 * ios->sp2d[p][*], xor is calculated the same way. These pages are 461 * ios->sp2d[p][*], xor is calculated the same way. These pages are
462 * allocated/freed and don't go through cache 462 * allocated/freed and don't go through cache
463 */ 463 */
464static int _read_4_write(struct ore_io_state *ios) 464static int _read_4_write_first_stripe(struct ore_io_state *ios)
465{ 465{
466 struct ore_io_state *ios_read;
467 struct ore_striping_info read_si; 466 struct ore_striping_info read_si;
468 struct __stripe_pages_2d *sp2d = ios->sp2d; 467 struct __stripe_pages_2d *sp2d = ios->sp2d;
469 u64 offset = ios->si.first_stripe_start; 468 u64 offset = ios->si.first_stripe_start;
470 u64 last_stripe_end; 469 unsigned c, p, min_p = sp2d->pages_in_unit, max_p = -1;
471 unsigned bytes_in_stripe = ios->si.bytes_in_stripe;
472 unsigned i, c, p, min_p = sp2d->pages_in_unit, max_p = -1;
473 int ret;
474 470
475 if (offset == ios->offset) /* Go to start collect $200 */ 471 if (offset == ios->offset) /* Go to start collect $200 */
476 goto read_last_stripe; 472 goto read_last_stripe;
@@ -478,6 +474,9 @@ static int _read_4_write(struct ore_io_state *ios)
478 min_p = _sp2d_min_pg(sp2d); 474 min_p = _sp2d_min_pg(sp2d);
479 max_p = _sp2d_max_pg(sp2d); 475 max_p = _sp2d_max_pg(sp2d);
480 476
477 ORE_DBGMSG("stripe_start=0x%llx ios->offset=0x%llx min_p=%d max_p=%d\n",
478 offset, ios->offset, min_p, max_p);
479
481 for (c = 0; ; c++) { 480 for (c = 0; ; c++) {
482 ore_calc_stripe_info(ios->layout, offset, 0, &read_si); 481 ore_calc_stripe_info(ios->layout, offset, 0, &read_si);
483 read_si.obj_offset += min_p * PAGE_SIZE; 482 read_si.obj_offset += min_p * PAGE_SIZE;
@@ -512,6 +511,18 @@ static int _read_4_write(struct ore_io_state *ios)
512 } 511 }
513 512
514read_last_stripe: 513read_last_stripe:
514 return 0;
515}
516
517static int _read_4_write_last_stripe(struct ore_io_state *ios)
518{
519 struct ore_striping_info read_si;
520 struct __stripe_pages_2d *sp2d = ios->sp2d;
521 u64 offset;
522 u64 last_stripe_end;
523 unsigned bytes_in_stripe = ios->si.bytes_in_stripe;
524 unsigned c, p, min_p = sp2d->pages_in_unit, max_p = -1;
525
515 offset = ios->offset + ios->length; 526 offset = ios->offset + ios->length;
516 if (offset % PAGE_SIZE) 527 if (offset % PAGE_SIZE)
517 _add_to_r4w_last_page(ios, &offset); 528 _add_to_r4w_last_page(ios, &offset);
@@ -527,15 +538,15 @@ read_last_stripe:
527 c = _dev_order(ios->layout->group_width * ios->layout->mirrors_p1, 538 c = _dev_order(ios->layout->group_width * ios->layout->mirrors_p1,
528 ios->layout->mirrors_p1, read_si.par_dev, read_si.dev); 539 ios->layout->mirrors_p1, read_si.par_dev, read_si.dev);
529 540
530 BUG_ON(ios->si.first_stripe_start + bytes_in_stripe != last_stripe_end);
531 /* unaligned IO must be within a single stripe */
532
533 if (min_p == sp2d->pages_in_unit) { 541 if (min_p == sp2d->pages_in_unit) {
534 /* Didn't do it yet */ 542 /* Didn't do it yet */
535 min_p = _sp2d_min_pg(sp2d); 543 min_p = _sp2d_min_pg(sp2d);
536 max_p = _sp2d_max_pg(sp2d); 544 max_p = _sp2d_max_pg(sp2d);
537 } 545 }
538 546
547 ORE_DBGMSG("offset=0x%llx stripe_end=0x%llx min_p=%d max_p=%d\n",
548 offset, last_stripe_end, min_p, max_p);
549
539 while (offset < last_stripe_end) { 550 while (offset < last_stripe_end) {
540 struct __1_page_stripe *_1ps = &sp2d->_1p_stripes[p]; 551 struct __1_page_stripe *_1ps = &sp2d->_1p_stripes[p];
541 552
@@ -568,6 +579,15 @@ read_last_stripe:
568 } 579 }
569 580
570read_it: 581read_it:
582 return 0;
583}
584
585static int _read_4_write_execute(struct ore_io_state *ios)
586{
587 struct ore_io_state *ios_read;
588 unsigned i;
589 int ret;
590
571 ios_read = ios->ios_read_4_write; 591 ios_read = ios->ios_read_4_write;
572 if (!ios_read) 592 if (!ios_read)
573 return 0; 593 return 0;
@@ -591,6 +611,8 @@ read_it:
591 } 611 }
592 612
593 _mark_read4write_pages_uptodate(ios_read, ret); 613 _mark_read4write_pages_uptodate(ios_read, ret);
614 ore_put_io_state(ios_read);
615 ios->ios_read_4_write = NULL; /* Might need a reuse at last stripe */
594 return 0; 616 return 0;
595} 617}
596 618
@@ -626,8 +648,11 @@ int _ore_add_parity_unit(struct ore_io_state *ios,
626 /* If first stripe, Read in all read4write pages 648 /* If first stripe, Read in all read4write pages
627 * (if needed) before we calculate the first parity. 649 * (if needed) before we calculate the first parity.
628 */ 650 */
629 _read_4_write(ios); 651 _read_4_write_first_stripe(ios);
630 } 652 }
653 if (!cur_len) /* If last stripe r4w pages of last stripe */
654 _read_4_write_last_stripe(ios);
655 _read_4_write_execute(ios);
631 656
632 for (i = 0; i < num_pages; i++) { 657 for (i = 0; i < num_pages; i++) {
633 pages[i] = _raid_page_alloc(); 658 pages[i] = _raid_page_alloc();
@@ -654,34 +679,14 @@ int _ore_add_parity_unit(struct ore_io_state *ios,
654 679
655int _ore_post_alloc_raid_stuff(struct ore_io_state *ios) 680int _ore_post_alloc_raid_stuff(struct ore_io_state *ios)
656{ 681{
657 struct ore_layout *layout = ios->layout;
658
659 if (ios->parity_pages) { 682 if (ios->parity_pages) {
683 struct ore_layout *layout = ios->layout;
660 unsigned pages_in_unit = layout->stripe_unit / PAGE_SIZE; 684 unsigned pages_in_unit = layout->stripe_unit / PAGE_SIZE;
661 unsigned stripe_size = ios->si.bytes_in_stripe;
662 u64 last_stripe, first_stripe;
663 685
664 if (_sp2d_alloc(pages_in_unit, layout->group_width, 686 if (_sp2d_alloc(pages_in_unit, layout->group_width,
665 layout->parity, &ios->sp2d)) { 687 layout->parity, &ios->sp2d)) {
666 return -ENOMEM; 688 return -ENOMEM;
667 } 689 }
668
669 /* Round io down to last full strip */
670 first_stripe = div_u64(ios->offset, stripe_size);
671 last_stripe = div_u64(ios->offset + ios->length, stripe_size);
672
673 /* If an IO spans more then a single stripe it must end at
674 * a stripe boundary. The reminder at the end is pushed into the
675 * next IO.
676 */
677 if (last_stripe != first_stripe) {
678 ios->length = last_stripe * stripe_size - ios->offset;
679
680 BUG_ON(!ios->length);
681 ios->nr_pages = (ios->length + PAGE_SIZE - 1) /
682 PAGE_SIZE;
683 ios->si.length = ios->length; /*make it consistent */
684 }
685 } 690 }
686 return 0; 691 return 0;
687} 692}
diff --git a/fs/nfs/objlayout/objio_osd.c b/fs/nfs/objlayout/objio_osd.c
index b47277baeba..f50d3e8d6f2 100644
--- a/fs/nfs/objlayout/objio_osd.c
+++ b/fs/nfs/objlayout/objio_osd.c
@@ -454,7 +454,10 @@ int objio_read_pagelist(struct nfs_read_data *rdata)
454 objios->ios->done = _read_done; 454 objios->ios->done = _read_done;
455 dprintk("%s: offset=0x%llx length=0x%x\n", __func__, 455 dprintk("%s: offset=0x%llx length=0x%x\n", __func__,
456 rdata->args.offset, rdata->args.count); 456 rdata->args.offset, rdata->args.count);
457 return ore_read(objios->ios); 457 ret = ore_read(objios->ios);
458 if (unlikely(ret))
459 objio_free_result(&objios->oir);
460 return ret;
458} 461}
459 462
460/* 463/*
@@ -486,8 +489,16 @@ static struct page *__r4w_get_page(void *priv, u64 offset, bool *uptodate)
486 struct nfs_write_data *wdata = objios->oir.rpcdata; 489 struct nfs_write_data *wdata = objios->oir.rpcdata;
487 struct address_space *mapping = wdata->header->inode->i_mapping; 490 struct address_space *mapping = wdata->header->inode->i_mapping;
488 pgoff_t index = offset / PAGE_SIZE; 491 pgoff_t index = offset / PAGE_SIZE;
489 struct page *page = find_get_page(mapping, index); 492 struct page *page;
493 loff_t i_size = i_size_read(wdata->header->inode);
494
495 if (offset >= i_size) {
496 *uptodate = true;
497 dprintk("%s: g_zero_page index=0x%lx\n", __func__, index);
498 return ZERO_PAGE(0);
499 }
490 500
501 page = find_get_page(mapping, index);
491 if (!page) { 502 if (!page) {
492 page = find_or_create_page(mapping, index, GFP_NOFS); 503 page = find_or_create_page(mapping, index, GFP_NOFS);
493 if (unlikely(!page)) { 504 if (unlikely(!page)) {
@@ -507,8 +518,10 @@ static struct page *__r4w_get_page(void *priv, u64 offset, bool *uptodate)
507 518
508static void __r4w_put_page(void *priv, struct page *page) 519static void __r4w_put_page(void *priv, struct page *page)
509{ 520{
510 dprintk("%s: index=0x%lx\n", __func__, page->index); 521 dprintk("%s: index=0x%lx\n", __func__,
511 page_cache_release(page); 522 (page == ZERO_PAGE(0)) ? -1UL : page->index);
523 if (ZERO_PAGE(0) != page)
524 page_cache_release(page);
512 return; 525 return;
513} 526}
514 527
@@ -539,8 +552,10 @@ int objio_write_pagelist(struct nfs_write_data *wdata, int how)
539 dprintk("%s: offset=0x%llx length=0x%x\n", __func__, 552 dprintk("%s: offset=0x%llx length=0x%x\n", __func__,
540 wdata->args.offset, wdata->args.count); 553 wdata->args.offset, wdata->args.count);
541 ret = ore_write(objios->ios); 554 ret = ore_write(objios->ios);
542 if (unlikely(ret)) 555 if (unlikely(ret)) {
556 objio_free_result(&objios->oir);
543 return ret; 557 return ret;
558 }
544 559
545 if (objios->sync) 560 if (objios->sync)
546 _write_done(objios->ios, objios); 561 _write_done(objios->ios, objios);