diff options
author | Boaz Harrosh <bharrosh@panasas.com> | 2010-01-28 11:24:06 -0500 |
---|---|---|
committer | Boaz Harrosh <bharrosh@panasas.com> | 2010-02-28 06:44:42 -0500 |
commit | 86093aaff5be5b214613eb60553e236bdb389c84 (patch) | |
tree | 64993f3fff8b60408441e8912aa5690346108492 /fs/exofs/inode.c | |
parent | 5d952b8391692553c31e620a92d6e09262a9a307 (diff) |
exofs: convert io_state to use pages array instead of bio at input
* inode.c operations are full-pages based, and not actually
true scatter-gather
* Lets us use more pages at once upto 512 (from 249) in 64 bit
* Brings us much much closer to be able to use exofs's io_state engine
from objlayout driver. (Once I decide where to put the common code)
After RAID0 patch the outer (input) bio was never used as a bio, but
was simply a page carrier into the raid engine. Even in the simple
mirror/single-dev arrangement pages info was copied into a second bio.
It is now easer to just pass a pages array into the io_state and prepare
bio(s) once.
Signed-off-by: Boaz Harrosh <bharrosh@panasas.com>
Diffstat (limited to 'fs/exofs/inode.c')
-rw-r--r-- | fs/exofs/inode.c | 81 |
1 files changed, 41 insertions, 40 deletions
diff --git a/fs/exofs/inode.c b/fs/exofs/inode.c index 2b3163ea56eb..6ca0b0117f04 100644 --- a/fs/exofs/inode.c +++ b/fs/exofs/inode.c | |||
@@ -41,16 +41,18 @@ | |||
41 | 41 | ||
42 | enum { BIO_MAX_PAGES_KMALLOC = | 42 | enum { BIO_MAX_PAGES_KMALLOC = |
43 | (PAGE_SIZE - sizeof(struct bio)) / sizeof(struct bio_vec), | 43 | (PAGE_SIZE - sizeof(struct bio)) / sizeof(struct bio_vec), |
44 | MAX_PAGES_KMALLOC = | ||
45 | PAGE_SIZE / sizeof(struct page *), | ||
44 | }; | 46 | }; |
45 | 47 | ||
46 | struct page_collect { | 48 | struct page_collect { |
47 | struct exofs_sb_info *sbi; | 49 | struct exofs_sb_info *sbi; |
48 | struct request_queue *req_q; | ||
49 | struct inode *inode; | 50 | struct inode *inode; |
50 | unsigned expected_pages; | 51 | unsigned expected_pages; |
51 | struct exofs_io_state *ios; | 52 | struct exofs_io_state *ios; |
52 | 53 | ||
53 | struct bio *bio; | 54 | struct page **pages; |
55 | unsigned alloc_pages; | ||
54 | unsigned nr_pages; | 56 | unsigned nr_pages; |
55 | unsigned long length; | 57 | unsigned long length; |
56 | loff_t pg_first; /* keep 64bit also in 32-arches */ | 58 | loff_t pg_first; /* keep 64bit also in 32-arches */ |
@@ -62,15 +64,12 @@ static void _pcol_init(struct page_collect *pcol, unsigned expected_pages, | |||
62 | struct exofs_sb_info *sbi = inode->i_sb->s_fs_info; | 64 | struct exofs_sb_info *sbi = inode->i_sb->s_fs_info; |
63 | 65 | ||
64 | pcol->sbi = sbi; | 66 | pcol->sbi = sbi; |
65 | /* Create master bios on first Q, later on cloning, each clone will be | ||
66 | * allocated on it's destination Q | ||
67 | */ | ||
68 | pcol->req_q = osd_request_queue(sbi->layout.s_ods[0]); | ||
69 | pcol->inode = inode; | 67 | pcol->inode = inode; |
70 | pcol->expected_pages = expected_pages; | 68 | pcol->expected_pages = expected_pages; |
71 | 69 | ||
72 | pcol->ios = NULL; | 70 | pcol->ios = NULL; |
73 | pcol->bio = NULL; | 71 | pcol->pages = NULL; |
72 | pcol->alloc_pages = 0; | ||
74 | pcol->nr_pages = 0; | 73 | pcol->nr_pages = 0; |
75 | pcol->length = 0; | 74 | pcol->length = 0; |
76 | pcol->pg_first = -1; | 75 | pcol->pg_first = -1; |
@@ -80,7 +79,8 @@ static void _pcol_reset(struct page_collect *pcol) | |||
80 | { | 79 | { |
81 | pcol->expected_pages -= min(pcol->nr_pages, pcol->expected_pages); | 80 | pcol->expected_pages -= min(pcol->nr_pages, pcol->expected_pages); |
82 | 81 | ||
83 | pcol->bio = NULL; | 82 | pcol->pages = NULL; |
83 | pcol->alloc_pages = 0; | ||
84 | pcol->nr_pages = 0; | 84 | pcol->nr_pages = 0; |
85 | pcol->length = 0; | 85 | pcol->length = 0; |
86 | pcol->pg_first = -1; | 86 | pcol->pg_first = -1; |
@@ -90,13 +90,13 @@ static void _pcol_reset(struct page_collect *pcol) | |||
90 | * it might not end here. don't be left with nothing | 90 | * it might not end here. don't be left with nothing |
91 | */ | 91 | */ |
92 | if (!pcol->expected_pages) | 92 | if (!pcol->expected_pages) |
93 | pcol->expected_pages = BIO_MAX_PAGES_KMALLOC; | 93 | pcol->expected_pages = MAX_PAGES_KMALLOC; |
94 | } | 94 | } |
95 | 95 | ||
96 | static int pcol_try_alloc(struct page_collect *pcol) | 96 | static int pcol_try_alloc(struct page_collect *pcol) |
97 | { | 97 | { |
98 | int pages = min_t(unsigned, pcol->expected_pages, | 98 | unsigned pages = min_t(unsigned, pcol->expected_pages, |
99 | BIO_MAX_PAGES_KMALLOC); | 99 | MAX_PAGES_KMALLOC); |
100 | 100 | ||
101 | if (!pcol->ios) { /* First time allocate io_state */ | 101 | if (!pcol->ios) { /* First time allocate io_state */ |
102 | int ret = exofs_get_io_state(&pcol->sbi->layout, &pcol->ios); | 102 | int ret = exofs_get_io_state(&pcol->sbi->layout, &pcol->ios); |
@@ -105,23 +105,28 @@ static int pcol_try_alloc(struct page_collect *pcol) | |||
105 | return ret; | 105 | return ret; |
106 | } | 106 | } |
107 | 107 | ||
108 | /* TODO: easily support bio chaining */ | ||
109 | pages = min_t(unsigned, pages, | ||
110 | pcol->sbi->layout.group_width * BIO_MAX_PAGES_KMALLOC); | ||
111 | |||
108 | for (; pages; pages >>= 1) { | 112 | for (; pages; pages >>= 1) { |
109 | pcol->bio = bio_kmalloc(GFP_KERNEL, pages); | 113 | pcol->pages = kmalloc(pages * sizeof(struct page *), |
110 | if (likely(pcol->bio)) | 114 | GFP_KERNEL); |
115 | if (likely(pcol->pages)) { | ||
116 | pcol->alloc_pages = pages; | ||
111 | return 0; | 117 | return 0; |
118 | } | ||
112 | } | 119 | } |
113 | 120 | ||
114 | EXOFS_ERR("Failed to bio_kmalloc expected_pages=%u\n", | 121 | EXOFS_ERR("Failed to kmalloc expected_pages=%u\n", |
115 | pcol->expected_pages); | 122 | pcol->expected_pages); |
116 | return -ENOMEM; | 123 | return -ENOMEM; |
117 | } | 124 | } |
118 | 125 | ||
119 | static void pcol_free(struct page_collect *pcol) | 126 | static void pcol_free(struct page_collect *pcol) |
120 | { | 127 | { |
121 | if (pcol->bio) { | 128 | kfree(pcol->pages); |
122 | bio_put(pcol->bio); | 129 | pcol->pages = NULL; |
123 | pcol->bio = NULL; | ||
124 | } | ||
125 | 130 | ||
126 | if (pcol->ios) { | 131 | if (pcol->ios) { |
127 | exofs_put_io_state(pcol->ios); | 132 | exofs_put_io_state(pcol->ios); |
@@ -132,11 +137,10 @@ static void pcol_free(struct page_collect *pcol) | |||
132 | static int pcol_add_page(struct page_collect *pcol, struct page *page, | 137 | static int pcol_add_page(struct page_collect *pcol, struct page *page, |
133 | unsigned len) | 138 | unsigned len) |
134 | { | 139 | { |
135 | int added_len = bio_add_pc_page(pcol->req_q, pcol->bio, page, len, 0); | 140 | if (unlikely(pcol->nr_pages >= pcol->alloc_pages)) |
136 | if (unlikely(len != added_len)) | ||
137 | return -ENOMEM; | 141 | return -ENOMEM; |
138 | 142 | ||
139 | ++pcol->nr_pages; | 143 | pcol->pages[pcol->nr_pages++] = page; |
140 | pcol->length += len; | 144 | pcol->length += len; |
141 | return 0; | 145 | return 0; |
142 | } | 146 | } |
@@ -181,7 +185,6 @@ static void update_write_page(struct page *page, int ret) | |||
181 | */ | 185 | */ |
182 | static int __readpages_done(struct page_collect *pcol, bool do_unlock) | 186 | static int __readpages_done(struct page_collect *pcol, bool do_unlock) |
183 | { | 187 | { |
184 | struct bio_vec *bvec; | ||
185 | int i; | 188 | int i; |
186 | u64 resid; | 189 | u64 resid; |
187 | u64 good_bytes; | 190 | u64 good_bytes; |
@@ -198,8 +201,8 @@ static int __readpages_done(struct page_collect *pcol, bool do_unlock) | |||
198 | pcol->inode->i_ino, _LLU(good_bytes), pcol->length, | 201 | pcol->inode->i_ino, _LLU(good_bytes), pcol->length, |
199 | pcol->nr_pages); | 202 | pcol->nr_pages); |
200 | 203 | ||
201 | __bio_for_each_segment(bvec, pcol->bio, i, 0) { | 204 | for (i = 0; i < pcol->nr_pages; i++) { |
202 | struct page *page = bvec->bv_page; | 205 | struct page *page = pcol->pages[i]; |
203 | struct inode *inode = page->mapping->host; | 206 | struct inode *inode = page->mapping->host; |
204 | int page_stat; | 207 | int page_stat; |
205 | 208 | ||
@@ -218,7 +221,7 @@ static int __readpages_done(struct page_collect *pcol, bool do_unlock) | |||
218 | ret = update_read_page(page, page_stat); | 221 | ret = update_read_page(page, page_stat); |
219 | if (do_unlock) | 222 | if (do_unlock) |
220 | unlock_page(page); | 223 | unlock_page(page); |
221 | length += bvec->bv_len; | 224 | length += PAGE_SIZE; |
222 | } | 225 | } |
223 | 226 | ||
224 | pcol_free(pcol); | 227 | pcol_free(pcol); |
@@ -238,11 +241,10 @@ static void readpages_done(struct exofs_io_state *ios, void *p) | |||
238 | 241 | ||
239 | static void _unlock_pcol_pages(struct page_collect *pcol, int ret, int rw) | 242 | static void _unlock_pcol_pages(struct page_collect *pcol, int ret, int rw) |
240 | { | 243 | { |
241 | struct bio_vec *bvec; | ||
242 | int i; | 244 | int i; |
243 | 245 | ||
244 | __bio_for_each_segment(bvec, pcol->bio, i, 0) { | 246 | for (i = 0; i < pcol->nr_pages; i++) { |
245 | struct page *page = bvec->bv_page; | 247 | struct page *page = pcol->pages[i]; |
246 | 248 | ||
247 | if (rw == READ) | 249 | if (rw == READ) |
248 | update_read_page(page, ret); | 250 | update_read_page(page, ret); |
@@ -260,13 +262,14 @@ static int read_exec(struct page_collect *pcol, bool is_sync) | |||
260 | struct page_collect *pcol_copy = NULL; | 262 | struct page_collect *pcol_copy = NULL; |
261 | int ret; | 263 | int ret; |
262 | 264 | ||
263 | if (!pcol->bio) | 265 | if (!pcol->pages) |
264 | return 0; | 266 | return 0; |
265 | 267 | ||
266 | /* see comment in _readpage() about sync reads */ | 268 | /* see comment in _readpage() about sync reads */ |
267 | WARN_ON(is_sync && (pcol->nr_pages != 1)); | 269 | WARN_ON(is_sync && (pcol->nr_pages != 1)); |
268 | 270 | ||
269 | ios->bio = pcol->bio; | 271 | ios->pages = pcol->pages; |
272 | ios->nr_pages = pcol->nr_pages; | ||
270 | ios->length = pcol->length; | 273 | ios->length = pcol->length; |
271 | ios->offset = pcol->pg_first << PAGE_CACHE_SHIFT; | 274 | ios->offset = pcol->pg_first << PAGE_CACHE_SHIFT; |
272 | 275 | ||
@@ -366,7 +369,7 @@ try_again: | |||
366 | goto try_again; | 369 | goto try_again; |
367 | } | 370 | } |
368 | 371 | ||
369 | if (!pcol->bio) { | 372 | if (!pcol->pages) { |
370 | ret = pcol_try_alloc(pcol); | 373 | ret = pcol_try_alloc(pcol); |
371 | if (unlikely(ret)) | 374 | if (unlikely(ret)) |
372 | goto fail; | 375 | goto fail; |
@@ -448,7 +451,6 @@ static int exofs_readpage(struct file *file, struct page *page) | |||
448 | static void writepages_done(struct exofs_io_state *ios, void *p) | 451 | static void writepages_done(struct exofs_io_state *ios, void *p) |
449 | { | 452 | { |
450 | struct page_collect *pcol = p; | 453 | struct page_collect *pcol = p; |
451 | struct bio_vec *bvec; | ||
452 | int i; | 454 | int i; |
453 | u64 resid; | 455 | u64 resid; |
454 | u64 good_bytes; | 456 | u64 good_bytes; |
@@ -467,8 +469,8 @@ static void writepages_done(struct exofs_io_state *ios, void *p) | |||
467 | pcol->inode->i_ino, _LLU(good_bytes), pcol->length, | 469 | pcol->inode->i_ino, _LLU(good_bytes), pcol->length, |
468 | pcol->nr_pages); | 470 | pcol->nr_pages); |
469 | 471 | ||
470 | __bio_for_each_segment(bvec, pcol->bio, i, 0) { | 472 | for (i = 0; i < pcol->nr_pages; i++) { |
471 | struct page *page = bvec->bv_page; | 473 | struct page *page = pcol->pages[i]; |
472 | struct inode *inode = page->mapping->host; | 474 | struct inode *inode = page->mapping->host; |
473 | int page_stat; | 475 | int page_stat; |
474 | 476 | ||
@@ -485,7 +487,7 @@ static void writepages_done(struct exofs_io_state *ios, void *p) | |||
485 | EXOFS_DBGMSG2(" writepages_done(0x%lx, 0x%lx) status=%d\n", | 487 | EXOFS_DBGMSG2(" writepages_done(0x%lx, 0x%lx) status=%d\n", |
486 | inode->i_ino, page->index, page_stat); | 488 | inode->i_ino, page->index, page_stat); |
487 | 489 | ||
488 | length += bvec->bv_len; | 490 | length += PAGE_SIZE; |
489 | } | 491 | } |
490 | 492 | ||
491 | pcol_free(pcol); | 493 | pcol_free(pcol); |
@@ -500,7 +502,7 @@ static int write_exec(struct page_collect *pcol) | |||
500 | struct page_collect *pcol_copy = NULL; | 502 | struct page_collect *pcol_copy = NULL; |
501 | int ret; | 503 | int ret; |
502 | 504 | ||
503 | if (!pcol->bio) | 505 | if (!pcol->pages) |
504 | return 0; | 506 | return 0; |
505 | 507 | ||
506 | pcol_copy = kmalloc(sizeof(*pcol_copy), GFP_KERNEL); | 508 | pcol_copy = kmalloc(sizeof(*pcol_copy), GFP_KERNEL); |
@@ -512,9 +514,8 @@ static int write_exec(struct page_collect *pcol) | |||
512 | 514 | ||
513 | *pcol_copy = *pcol; | 515 | *pcol_copy = *pcol; |
514 | 516 | ||
515 | pcol_copy->bio->bi_rw |= (1 << BIO_RW); /* FIXME: bio_set_dir() */ | 517 | ios->pages = pcol_copy->pages; |
516 | 518 | ios->nr_pages = pcol_copy->nr_pages; | |
517 | ios->bio = pcol_copy->bio; | ||
518 | ios->offset = pcol_copy->pg_first << PAGE_CACHE_SHIFT; | 519 | ios->offset = pcol_copy->pg_first << PAGE_CACHE_SHIFT; |
519 | ios->length = pcol_copy->length; | 520 | ios->length = pcol_copy->length; |
520 | ios->done = writepages_done; | 521 | ios->done = writepages_done; |
@@ -605,7 +606,7 @@ try_again: | |||
605 | goto try_again; | 606 | goto try_again; |
606 | } | 607 | } |
607 | 608 | ||
608 | if (!pcol->bio) { | 609 | if (!pcol->pages) { |
609 | ret = pcol_try_alloc(pcol); | 610 | ret = pcol_try_alloc(pcol); |
610 | if (unlikely(ret)) | 611 | if (unlikely(ret)) |
611 | goto fail; | 612 | goto fail; |