diff options
author | Boaz Harrosh <bharrosh@panasas.com> | 2011-10-12 09:42:07 -0400 |
---|---|---|
committer | Boaz Harrosh <bharrosh@panasas.com> | 2011-10-24 20:22:28 -0400 |
commit | dd296619974c50c46c67e58f355a7e85ef3f0c01 (patch) | |
tree | 516eb00e50add1fe1cce74e8967857af5d41083e | |
parent | 769ba8d92025fa390f3097e658b8ed6e032d68e9 (diff) |
exofs: Support for RAID5 read-4-write interface.
The ore need suplied a r4w_get_page/r4w_put_page API
from Filesystem so it can get cache pages to read-into when
writing parial stripes.
Also I commented out and NULLed the .writepage (singular)
vector. Because it gives terrible write pattern to raid
and is apparently not needed. Even in OOM conditions the
system copes (even better) with out it.
TODO: How to specify to write_cache_pages() to start
or include a certain page?
Signed-off-by: Boaz Harrosh <bharrosh@panasas.com>
-rw-r--r-- | fs/exofs/inode.c | 61 |
1 files changed, 59 insertions, 2 deletions
diff --git a/fs/exofs/inode.c b/fs/exofs/inode.c index 86c0ac87b8e3..3e5f3a6be90a 100644 --- a/fs/exofs/inode.c +++ b/fs/exofs/inode.c | |||
@@ -63,6 +63,7 @@ struct page_collect { | |||
63 | bool read_4_write; /* This means two things: that the read is sync | 63 | bool read_4_write; /* This means two things: that the read is sync |
64 | * And the pages should not be unlocked. | 64 | * And the pages should not be unlocked. |
65 | */ | 65 | */ |
66 | struct page *that_locked_page; | ||
66 | }; | 67 | }; |
67 | 68 | ||
68 | static void _pcol_init(struct page_collect *pcol, unsigned expected_pages, | 69 | static void _pcol_init(struct page_collect *pcol, unsigned expected_pages, |
@@ -81,6 +82,7 @@ static void _pcol_init(struct page_collect *pcol, unsigned expected_pages, | |||
81 | pcol->length = 0; | 82 | pcol->length = 0; |
82 | pcol->pg_first = -1; | 83 | pcol->pg_first = -1; |
83 | pcol->read_4_write = false; | 84 | pcol->read_4_write = false; |
85 | pcol->that_locked_page = NULL; | ||
84 | } | 86 | } |
85 | 87 | ||
86 | static void _pcol_reset(struct page_collect *pcol) | 88 | static void _pcol_reset(struct page_collect *pcol) |
@@ -93,6 +95,7 @@ static void _pcol_reset(struct page_collect *pcol) | |||
93 | pcol->length = 0; | 95 | pcol->length = 0; |
94 | pcol->pg_first = -1; | 96 | pcol->pg_first = -1; |
95 | pcol->ios = NULL; | 97 | pcol->ios = NULL; |
98 | pcol->that_locked_page = NULL; | ||
96 | 99 | ||
97 | /* this is probably the end of the loop but in writes | 100 | /* this is probably the end of the loop but in writes |
98 | * it might not end here. don't be left with nothing | 101 | * it might not end here. don't be left with nothing |
@@ -391,6 +394,8 @@ static int readpage_strip(void *data, struct page *page) | |||
391 | EXOFS_ERR("PageUptodate(0x%lx, 0x%lx)\n", pcol->inode->i_ino, | 394 | EXOFS_ERR("PageUptodate(0x%lx, 0x%lx)\n", pcol->inode->i_ino, |
392 | page->index); | 395 | page->index); |
393 | 396 | ||
397 | pcol->that_locked_page = page; | ||
398 | |||
394 | if (page->index < end_index) | 399 | if (page->index < end_index) |
395 | len = PAGE_CACHE_SIZE; | 400 | len = PAGE_CACHE_SIZE; |
396 | else if (page->index == end_index) | 401 | else if (page->index == end_index) |
@@ -560,6 +565,56 @@ static void writepages_done(struct ore_io_state *ios, void *p) | |||
560 | EXOFS_DBGMSG2("writepages_done END\n"); | 565 | EXOFS_DBGMSG2("writepages_done END\n"); |
561 | } | 566 | } |
562 | 567 | ||
568 | static struct page *__r4w_get_page(void *priv, u64 offset, bool *uptodate) | ||
569 | { | ||
570 | struct page_collect *pcol = priv; | ||
571 | pgoff_t index = offset / PAGE_SIZE; | ||
572 | |||
573 | if (!pcol->that_locked_page || | ||
574 | (pcol->that_locked_page->index != index)) { | ||
575 | struct page *page = find_get_page(pcol->inode->i_mapping, index); | ||
576 | |||
577 | if (!page) { | ||
578 | page = find_or_create_page(pcol->inode->i_mapping, | ||
579 | index, GFP_NOFS); | ||
580 | if (unlikely(!page)) { | ||
581 | EXOFS_DBGMSG("grab_cache_page Failed " | ||
582 | "index=0x%llx\n", _LLU(index)); | ||
583 | return NULL; | ||
584 | } | ||
585 | unlock_page(page); | ||
586 | } | ||
587 | if (PageDirty(page) || PageWriteback(page)) | ||
588 | *uptodate = true; | ||
589 | else | ||
590 | *uptodate = PageUptodate(page); | ||
591 | EXOFS_DBGMSG("index=0x%lx uptodate=%d\n", index, *uptodate); | ||
592 | return page; | ||
593 | } else { | ||
594 | EXOFS_DBGMSG("YES that_locked_page index=0x%lx\n", | ||
595 | pcol->that_locked_page->index); | ||
596 | *uptodate = true; | ||
597 | return pcol->that_locked_page; | ||
598 | } | ||
599 | } | ||
600 | |||
601 | static void __r4w_put_page(void *priv, struct page *page) | ||
602 | { | ||
603 | struct page_collect *pcol = priv; | ||
604 | |||
605 | if (pcol->that_locked_page != page) { | ||
606 | EXOFS_DBGMSG("index=0x%lx\n", page->index); | ||
607 | page_cache_release(page); | ||
608 | return; | ||
609 | } | ||
610 | EXOFS_DBGMSG("that_locked_page index=0x%lx\n", page->index); | ||
611 | } | ||
612 | |||
613 | static const struct _ore_r4w_op _r4w_op = { | ||
614 | .get_page = &__r4w_get_page, | ||
615 | .put_page = &__r4w_put_page, | ||
616 | }; | ||
617 | |||
563 | static int write_exec(struct page_collect *pcol) | 618 | static int write_exec(struct page_collect *pcol) |
564 | { | 619 | { |
565 | struct exofs_i_info *oi = exofs_i(pcol->inode); | 620 | struct exofs_i_info *oi = exofs_i(pcol->inode); |
@@ -589,6 +644,7 @@ static int write_exec(struct page_collect *pcol) | |||
589 | ios = pcol->ios; | 644 | ios = pcol->ios; |
590 | ios->pages = pcol_copy->pages; | 645 | ios->pages = pcol_copy->pages; |
591 | ios->done = writepages_done; | 646 | ios->done = writepages_done; |
647 | ios->r4w = &_r4w_op; | ||
592 | ios->private = pcol_copy; | 648 | ios->private = pcol_copy; |
593 | 649 | ||
594 | /* pages ownership was passed to pcol_copy */ | 650 | /* pages ownership was passed to pcol_copy */ |
@@ -773,6 +829,7 @@ static int exofs_writepages(struct address_space *mapping, | |||
773 | return 0; | 829 | return 0; |
774 | } | 830 | } |
775 | 831 | ||
832 | /* | ||
776 | static int exofs_writepage(struct page *page, struct writeback_control *wbc) | 833 | static int exofs_writepage(struct page *page, struct writeback_control *wbc) |
777 | { | 834 | { |
778 | struct page_collect pcol; | 835 | struct page_collect pcol; |
@@ -788,7 +845,7 @@ static int exofs_writepage(struct page *page, struct writeback_control *wbc) | |||
788 | 845 | ||
789 | return write_exec(&pcol); | 846 | return write_exec(&pcol); |
790 | } | 847 | } |
791 | 848 | */ | |
792 | /* i_mutex held using inode->i_size directly */ | 849 | /* i_mutex held using inode->i_size directly */ |
793 | static void _write_failed(struct inode *inode, loff_t to) | 850 | static void _write_failed(struct inode *inode, loff_t to) |
794 | { | 851 | { |
@@ -894,7 +951,7 @@ static void exofs_invalidatepage(struct page *page, unsigned long offset) | |||
894 | const struct address_space_operations exofs_aops = { | 951 | const struct address_space_operations exofs_aops = { |
895 | .readpage = exofs_readpage, | 952 | .readpage = exofs_readpage, |
896 | .readpages = exofs_readpages, | 953 | .readpages = exofs_readpages, |
897 | .writepage = exofs_writepage, | 954 | .writepage = NULL, |
898 | .writepages = exofs_writepages, | 955 | .writepages = exofs_writepages, |
899 | .write_begin = exofs_write_begin_export, | 956 | .write_begin = exofs_write_begin_export, |
900 | .write_end = exofs_write_end, | 957 | .write_end = exofs_write_end, |