aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBoaz Harrosh <bharrosh@panasas.com>2010-01-28 11:24:06 -0500
committerBoaz Harrosh <bharrosh@panasas.com>2010-02-28 06:44:42 -0500
commit86093aaff5be5b214613eb60553e236bdb389c84 (patch)
tree64993f3fff8b60408441e8912aa5690346108492
parent5d952b8391692553c31e620a92d6e09262a9a307 (diff)
exofs: convert io_state to use pages array instead of bio at input
* inode.c operations are full-pages based, and not actually true scatter-gather * Lets us use more pages at once upto 512 (from 249) in 64 bit * Brings us much much closer to be able to use exofs's io_state engine from objlayout driver. (Once I decide where to put the common code) After RAID0 patch the outer (input) bio was never used as a bio, but was simply a page carrier into the raid engine. Even in the simple mirror/single-dev arrangement pages info was copied into a second bio. It is now easer to just pass a pages array into the io_state and prepare bio(s) once. Signed-off-by: Boaz Harrosh <bharrosh@panasas.com>
-rw-r--r--fs/exofs/exofs.h5
-rw-r--r--fs/exofs/inode.c81
-rw-r--r--fs/exofs/ios.c46
3 files changed, 71 insertions, 61 deletions
diff --git a/fs/exofs/exofs.h b/fs/exofs/exofs.h
index 0d8a34b21ae1..acfebd36de83 100644
--- a/fs/exofs/exofs.h
+++ b/fs/exofs/exofs.h
@@ -128,7 +128,10 @@ struct exofs_io_state {
128 loff_t offset; 128 loff_t offset;
129 unsigned long length; 129 unsigned long length;
130 void *kern_buff; 130 void *kern_buff;
131 struct bio *bio; 131
132 struct page **pages;
133 unsigned nr_pages;
134 unsigned pgbase;
132 135
133 /* Attributes */ 136 /* Attributes */
134 unsigned in_attr_len; 137 unsigned in_attr_len;
diff --git a/fs/exofs/inode.c b/fs/exofs/inode.c
index 2b3163ea56eb..6ca0b0117f04 100644
--- a/fs/exofs/inode.c
+++ b/fs/exofs/inode.c
@@ -41,16 +41,18 @@
41 41
42enum { BIO_MAX_PAGES_KMALLOC = 42enum { BIO_MAX_PAGES_KMALLOC =
43 (PAGE_SIZE - sizeof(struct bio)) / sizeof(struct bio_vec), 43 (PAGE_SIZE - sizeof(struct bio)) / sizeof(struct bio_vec),
44 MAX_PAGES_KMALLOC =
45 PAGE_SIZE / sizeof(struct page *),
44}; 46};
45 47
46struct page_collect { 48struct page_collect {
47 struct exofs_sb_info *sbi; 49 struct exofs_sb_info *sbi;
48 struct request_queue *req_q;
49 struct inode *inode; 50 struct inode *inode;
50 unsigned expected_pages; 51 unsigned expected_pages;
51 struct exofs_io_state *ios; 52 struct exofs_io_state *ios;
52 53
53 struct bio *bio; 54 struct page **pages;
55 unsigned alloc_pages;
54 unsigned nr_pages; 56 unsigned nr_pages;
55 unsigned long length; 57 unsigned long length;
56 loff_t pg_first; /* keep 64bit also in 32-arches */ 58 loff_t pg_first; /* keep 64bit also in 32-arches */
@@ -62,15 +64,12 @@ static void _pcol_init(struct page_collect *pcol, unsigned expected_pages,
62 struct exofs_sb_info *sbi = inode->i_sb->s_fs_info; 64 struct exofs_sb_info *sbi = inode->i_sb->s_fs_info;
63 65
64 pcol->sbi = sbi; 66 pcol->sbi = sbi;
65 /* Create master bios on first Q, later on cloning, each clone will be
66 * allocated on it's destination Q
67 */
68 pcol->req_q = osd_request_queue(sbi->layout.s_ods[0]);
69 pcol->inode = inode; 67 pcol->inode = inode;
70 pcol->expected_pages = expected_pages; 68 pcol->expected_pages = expected_pages;
71 69
72 pcol->ios = NULL; 70 pcol->ios = NULL;
73 pcol->bio = NULL; 71 pcol->pages = NULL;
72 pcol->alloc_pages = 0;
74 pcol->nr_pages = 0; 73 pcol->nr_pages = 0;
75 pcol->length = 0; 74 pcol->length = 0;
76 pcol->pg_first = -1; 75 pcol->pg_first = -1;
@@ -80,7 +79,8 @@ static void _pcol_reset(struct page_collect *pcol)
80{ 79{
81 pcol->expected_pages -= min(pcol->nr_pages, pcol->expected_pages); 80 pcol->expected_pages -= min(pcol->nr_pages, pcol->expected_pages);
82 81
83 pcol->bio = NULL; 82 pcol->pages = NULL;
83 pcol->alloc_pages = 0;
84 pcol->nr_pages = 0; 84 pcol->nr_pages = 0;
85 pcol->length = 0; 85 pcol->length = 0;
86 pcol->pg_first = -1; 86 pcol->pg_first = -1;
@@ -90,13 +90,13 @@ static void _pcol_reset(struct page_collect *pcol)
90 * it might not end here. don't be left with nothing 90 * it might not end here. don't be left with nothing
91 */ 91 */
92 if (!pcol->expected_pages) 92 if (!pcol->expected_pages)
93 pcol->expected_pages = BIO_MAX_PAGES_KMALLOC; 93 pcol->expected_pages = MAX_PAGES_KMALLOC;
94} 94}
95 95
96static int pcol_try_alloc(struct page_collect *pcol) 96static int pcol_try_alloc(struct page_collect *pcol)
97{ 97{
98 int pages = min_t(unsigned, pcol->expected_pages, 98 unsigned pages = min_t(unsigned, pcol->expected_pages,
99 BIO_MAX_PAGES_KMALLOC); 99 MAX_PAGES_KMALLOC);
100 100
101 if (!pcol->ios) { /* First time allocate io_state */ 101 if (!pcol->ios) { /* First time allocate io_state */
102 int ret = exofs_get_io_state(&pcol->sbi->layout, &pcol->ios); 102 int ret = exofs_get_io_state(&pcol->sbi->layout, &pcol->ios);
@@ -105,23 +105,28 @@ static int pcol_try_alloc(struct page_collect *pcol)
105 return ret; 105 return ret;
106 } 106 }
107 107
108 /* TODO: easily support bio chaining */
109 pages = min_t(unsigned, pages,
110 pcol->sbi->layout.group_width * BIO_MAX_PAGES_KMALLOC);
111
108 for (; pages; pages >>= 1) { 112 for (; pages; pages >>= 1) {
109 pcol->bio = bio_kmalloc(GFP_KERNEL, pages); 113 pcol->pages = kmalloc(pages * sizeof(struct page *),
110 if (likely(pcol->bio)) 114 GFP_KERNEL);
115 if (likely(pcol->pages)) {
116 pcol->alloc_pages = pages;
111 return 0; 117 return 0;
118 }
112 } 119 }
113 120
114 EXOFS_ERR("Failed to bio_kmalloc expected_pages=%u\n", 121 EXOFS_ERR("Failed to kmalloc expected_pages=%u\n",
115 pcol->expected_pages); 122 pcol->expected_pages);
116 return -ENOMEM; 123 return -ENOMEM;
117} 124}
118 125
119static void pcol_free(struct page_collect *pcol) 126static void pcol_free(struct page_collect *pcol)
120{ 127{
121 if (pcol->bio) { 128 kfree(pcol->pages);
122 bio_put(pcol->bio); 129 pcol->pages = NULL;
123 pcol->bio = NULL;
124 }
125 130
126 if (pcol->ios) { 131 if (pcol->ios) {
127 exofs_put_io_state(pcol->ios); 132 exofs_put_io_state(pcol->ios);
@@ -132,11 +137,10 @@ static void pcol_free(struct page_collect *pcol)
132static int pcol_add_page(struct page_collect *pcol, struct page *page, 137static int pcol_add_page(struct page_collect *pcol, struct page *page,
133 unsigned len) 138 unsigned len)
134{ 139{
135 int added_len = bio_add_pc_page(pcol->req_q, pcol->bio, page, len, 0); 140 if (unlikely(pcol->nr_pages >= pcol->alloc_pages))
136 if (unlikely(len != added_len))
137 return -ENOMEM; 141 return -ENOMEM;
138 142
139 ++pcol->nr_pages; 143 pcol->pages[pcol->nr_pages++] = page;
140 pcol->length += len; 144 pcol->length += len;
141 return 0; 145 return 0;
142} 146}
@@ -181,7 +185,6 @@ static void update_write_page(struct page *page, int ret)
181 */ 185 */
182static int __readpages_done(struct page_collect *pcol, bool do_unlock) 186static int __readpages_done(struct page_collect *pcol, bool do_unlock)
183{ 187{
184 struct bio_vec *bvec;
185 int i; 188 int i;
186 u64 resid; 189 u64 resid;
187 u64 good_bytes; 190 u64 good_bytes;
@@ -198,8 +201,8 @@ static int __readpages_done(struct page_collect *pcol, bool do_unlock)
198 pcol->inode->i_ino, _LLU(good_bytes), pcol->length, 201 pcol->inode->i_ino, _LLU(good_bytes), pcol->length,
199 pcol->nr_pages); 202 pcol->nr_pages);
200 203
201 __bio_for_each_segment(bvec, pcol->bio, i, 0) { 204 for (i = 0; i < pcol->nr_pages; i++) {
202 struct page *page = bvec->bv_page; 205 struct page *page = pcol->pages[i];
203 struct inode *inode = page->mapping->host; 206 struct inode *inode = page->mapping->host;
204 int page_stat; 207 int page_stat;
205 208
@@ -218,7 +221,7 @@ static int __readpages_done(struct page_collect *pcol, bool do_unlock)
218 ret = update_read_page(page, page_stat); 221 ret = update_read_page(page, page_stat);
219 if (do_unlock) 222 if (do_unlock)
220 unlock_page(page); 223 unlock_page(page);
221 length += bvec->bv_len; 224 length += PAGE_SIZE;
222 } 225 }
223 226
224 pcol_free(pcol); 227 pcol_free(pcol);
@@ -238,11 +241,10 @@ static void readpages_done(struct exofs_io_state *ios, void *p)
238 241
239static void _unlock_pcol_pages(struct page_collect *pcol, int ret, int rw) 242static void _unlock_pcol_pages(struct page_collect *pcol, int ret, int rw)
240{ 243{
241 struct bio_vec *bvec;
242 int i; 244 int i;
243 245
244 __bio_for_each_segment(bvec, pcol->bio, i, 0) { 246 for (i = 0; i < pcol->nr_pages; i++) {
245 struct page *page = bvec->bv_page; 247 struct page *page = pcol->pages[i];
246 248
247 if (rw == READ) 249 if (rw == READ)
248 update_read_page(page, ret); 250 update_read_page(page, ret);
@@ -260,13 +262,14 @@ static int read_exec(struct page_collect *pcol, bool is_sync)
260 struct page_collect *pcol_copy = NULL; 262 struct page_collect *pcol_copy = NULL;
261 int ret; 263 int ret;
262 264
263 if (!pcol->bio) 265 if (!pcol->pages)
264 return 0; 266 return 0;
265 267
266 /* see comment in _readpage() about sync reads */ 268 /* see comment in _readpage() about sync reads */
267 WARN_ON(is_sync && (pcol->nr_pages != 1)); 269 WARN_ON(is_sync && (pcol->nr_pages != 1));
268 270
269 ios->bio = pcol->bio; 271 ios->pages = pcol->pages;
272 ios->nr_pages = pcol->nr_pages;
270 ios->length = pcol->length; 273 ios->length = pcol->length;
271 ios->offset = pcol->pg_first << PAGE_CACHE_SHIFT; 274 ios->offset = pcol->pg_first << PAGE_CACHE_SHIFT;
272 275
@@ -366,7 +369,7 @@ try_again:
366 goto try_again; 369 goto try_again;
367 } 370 }
368 371
369 if (!pcol->bio) { 372 if (!pcol->pages) {
370 ret = pcol_try_alloc(pcol); 373 ret = pcol_try_alloc(pcol);
371 if (unlikely(ret)) 374 if (unlikely(ret))
372 goto fail; 375 goto fail;
@@ -448,7 +451,6 @@ static int exofs_readpage(struct file *file, struct page *page)
448static void writepages_done(struct exofs_io_state *ios, void *p) 451static void writepages_done(struct exofs_io_state *ios, void *p)
449{ 452{
450 struct page_collect *pcol = p; 453 struct page_collect *pcol = p;
451 struct bio_vec *bvec;
452 int i; 454 int i;
453 u64 resid; 455 u64 resid;
454 u64 good_bytes; 456 u64 good_bytes;
@@ -467,8 +469,8 @@ static void writepages_done(struct exofs_io_state *ios, void *p)
467 pcol->inode->i_ino, _LLU(good_bytes), pcol->length, 469 pcol->inode->i_ino, _LLU(good_bytes), pcol->length,
468 pcol->nr_pages); 470 pcol->nr_pages);
469 471
470 __bio_for_each_segment(bvec, pcol->bio, i, 0) { 472 for (i = 0; i < pcol->nr_pages; i++) {
471 struct page *page = bvec->bv_page; 473 struct page *page = pcol->pages[i];
472 struct inode *inode = page->mapping->host; 474 struct inode *inode = page->mapping->host;
473 int page_stat; 475 int page_stat;
474 476
@@ -485,7 +487,7 @@ static void writepages_done(struct exofs_io_state *ios, void *p)
485 EXOFS_DBGMSG2(" writepages_done(0x%lx, 0x%lx) status=%d\n", 487 EXOFS_DBGMSG2(" writepages_done(0x%lx, 0x%lx) status=%d\n",
486 inode->i_ino, page->index, page_stat); 488 inode->i_ino, page->index, page_stat);
487 489
488 length += bvec->bv_len; 490 length += PAGE_SIZE;
489 } 491 }
490 492
491 pcol_free(pcol); 493 pcol_free(pcol);
@@ -500,7 +502,7 @@ static int write_exec(struct page_collect *pcol)
500 struct page_collect *pcol_copy = NULL; 502 struct page_collect *pcol_copy = NULL;
501 int ret; 503 int ret;
502 504
503 if (!pcol->bio) 505 if (!pcol->pages)
504 return 0; 506 return 0;
505 507
506 pcol_copy = kmalloc(sizeof(*pcol_copy), GFP_KERNEL); 508 pcol_copy = kmalloc(sizeof(*pcol_copy), GFP_KERNEL);
@@ -512,9 +514,8 @@ static int write_exec(struct page_collect *pcol)
512 514
513 *pcol_copy = *pcol; 515 *pcol_copy = *pcol;
514 516
515 pcol_copy->bio->bi_rw |= (1 << BIO_RW); /* FIXME: bio_set_dir() */ 517 ios->pages = pcol_copy->pages;
516 518 ios->nr_pages = pcol_copy->nr_pages;
517 ios->bio = pcol_copy->bio;
518 ios->offset = pcol_copy->pg_first << PAGE_CACHE_SHIFT; 519 ios->offset = pcol_copy->pg_first << PAGE_CACHE_SHIFT;
519 ios->length = pcol_copy->length; 520 ios->length = pcol_copy->length;
520 ios->done = writepages_done; 521 ios->done = writepages_done;
@@ -605,7 +606,7 @@ try_again:
605 goto try_again; 606 goto try_again;
606 } 607 }
607 608
608 if (!pcol->bio) { 609 if (!pcol->pages) {
609 ret = pcol_try_alloc(pcol); 610 ret = pcol_try_alloc(pcol);
610 if (unlikely(ret)) 611 if (unlikely(ret))
611 goto fail; 612 goto fail;
diff --git a/fs/exofs/ios.c b/fs/exofs/ios.c
index 6e446b2670b9..263052c77f41 100644
--- a/fs/exofs/ios.c
+++ b/fs/exofs/ios.c
@@ -283,10 +283,11 @@ static void _offset_dev_unit_off(struct exofs_io_state *ios, u64 file_offset,
283 *dev = stripe_mod / stripe_unit * ios->layout->mirrors_p1; 283 *dev = stripe_mod / stripe_unit * ios->layout->mirrors_p1;
284} 284}
285 285
286static int _add_stripe_unit(struct exofs_io_state *ios, unsigned *cur_bvec, 286static int _add_stripe_unit(struct exofs_io_state *ios, unsigned *cur_pg,
287 struct exofs_per_dev_state *per_dev, int cur_len) 287 unsigned pgbase, struct exofs_per_dev_state *per_dev,
288 int cur_len)
288{ 289{
289 unsigned bv = *cur_bvec; 290 unsigned pg = *cur_pg;
290 struct request_queue *q = 291 struct request_queue *q =
291 osd_request_queue(exofs_ios_od(ios, per_dev->dev)); 292 osd_request_queue(exofs_ios_od(ios, per_dev->dev));
292 293
@@ -295,7 +296,7 @@ static int _add_stripe_unit(struct exofs_io_state *ios, unsigned *cur_bvec,
295 if (per_dev->bio == NULL) { 296 if (per_dev->bio == NULL) {
296 unsigned pages_in_stripe = ios->layout->group_width * 297 unsigned pages_in_stripe = ios->layout->group_width *
297 (ios->layout->stripe_unit / PAGE_SIZE); 298 (ios->layout->stripe_unit / PAGE_SIZE);
298 unsigned bio_size = (ios->bio->bi_vcnt + pages_in_stripe) / 299 unsigned bio_size = (ios->nr_pages + pages_in_stripe) /
299 ios->layout->group_width; 300 ios->layout->group_width;
300 301
301 per_dev->bio = bio_kmalloc(GFP_KERNEL, bio_size); 302 per_dev->bio = bio_kmalloc(GFP_KERNEL, bio_size);
@@ -307,21 +308,22 @@ static int _add_stripe_unit(struct exofs_io_state *ios, unsigned *cur_bvec,
307 } 308 }
308 309
309 while (cur_len > 0) { 310 while (cur_len > 0) {
310 int added_len; 311 unsigned pglen = min_t(unsigned, PAGE_SIZE - pgbase, cur_len);
311 struct bio_vec *bvec = &ios->bio->bi_io_vec[bv]; 312 unsigned added_len;
312 313
313 BUG_ON(ios->bio->bi_vcnt <= bv); 314 BUG_ON(ios->nr_pages <= pg);
314 cur_len -= bvec->bv_len; 315 cur_len -= pglen;
315 316
316 added_len = bio_add_pc_page(q, per_dev->bio, bvec->bv_page, 317 added_len = bio_add_pc_page(q, per_dev->bio, ios->pages[pg],
317 bvec->bv_len, bvec->bv_offset); 318 pglen, pgbase);
318 if (unlikely(bvec->bv_len != added_len)) 319 if (unlikely(pglen != added_len))
319 return -ENOMEM; 320 return -ENOMEM;
320 ++bv; 321 pgbase = 0;
322 ++pg;
321 } 323 }
322 BUG_ON(cur_len); 324 BUG_ON(cur_len);
323 325
324 *cur_bvec = bv; 326 *cur_pg = pg;
325 return 0; 327 return 0;
326} 328}
327 329
@@ -332,10 +334,10 @@ static int _prepare_for_striping(struct exofs_io_state *ios)
332 unsigned stripe_unit = ios->layout->stripe_unit; 334 unsigned stripe_unit = ios->layout->stripe_unit;
333 unsigned comp = 0; 335 unsigned comp = 0;
334 unsigned stripes = 0; 336 unsigned stripes = 0;
335 unsigned cur_bvec = 0; 337 unsigned cur_pg = 0;
336 int ret; 338 int ret = 0;
337 339
338 if (!ios->bio) { 340 if (!ios->pages) {
339 if (ios->kern_buff) { 341 if (ios->kern_buff) {
340 struct exofs_per_dev_state *per_dev = &ios->per_dev[0]; 342 struct exofs_per_dev_state *per_dev = &ios->per_dev[0];
341 unsigned unit_off; 343 unsigned unit_off;
@@ -352,7 +354,7 @@ static int _prepare_for_striping(struct exofs_io_state *ios)
352 354
353 while (length) { 355 while (length) {
354 struct exofs_per_dev_state *per_dev = &ios->per_dev[comp]; 356 struct exofs_per_dev_state *per_dev = &ios->per_dev[comp];
355 unsigned cur_len; 357 unsigned cur_len, page_off;
356 358
357 if (!per_dev->length) { 359 if (!per_dev->length) {
358 unsigned unit_off; 360 unsigned unit_off;
@@ -362,11 +364,15 @@ static int _prepare_for_striping(struct exofs_io_state *ios)
362 stripes++; 364 stripes++;
363 cur_len = min_t(u64, stripe_unit - unit_off, length); 365 cur_len = min_t(u64, stripe_unit - unit_off, length);
364 offset += cur_len; 366 offset += cur_len;
367 page_off = unit_off & ~PAGE_MASK;
368 BUG_ON(page_off != ios->pgbase);
365 } else { 369 } else {
366 cur_len = min_t(u64, stripe_unit, length); 370 cur_len = min_t(u64, stripe_unit, length);
371 page_off = 0;
367 } 372 }
368 373
369 ret = _add_stripe_unit(ios, &cur_bvec, per_dev, cur_len); 374 ret = _add_stripe_unit(ios, &cur_pg, page_off , per_dev,
375 cur_len);
370 if (unlikely(ret)) 376 if (unlikely(ret))
371 goto out; 377 goto out;
372 378
@@ -448,7 +454,7 @@ static int _sbi_write_mirror(struct exofs_io_state *ios, int cur_comp)
448 per_dev->or = or; 454 per_dev->or = or;
449 per_dev->offset = master_dev->offset; 455 per_dev->offset = master_dev->offset;
450 456
451 if (ios->bio) { 457 if (ios->pages) {
452 struct bio *bio; 458 struct bio *bio;
453 459
454 if (per_dev != master_dev) { 460 if (per_dev != master_dev) {
@@ -541,7 +547,7 @@ static int _sbi_read_mirror(struct exofs_io_state *ios, unsigned cur_comp)
541 } 547 }
542 per_dev->or = or; 548 per_dev->or = or;
543 549
544 if (ios->bio) { 550 if (ios->pages) {
545 osd_req_read(or, &ios->obj, per_dev->offset, 551 osd_req_read(or, &ios->obj, per_dev->offset,
546 per_dev->bio, per_dev->length); 552 per_dev->bio, per_dev->length);
547 EXOFS_DBGMSG("read(0x%llx) offset=0x%llx length=0x%llx" 553 EXOFS_DBGMSG("read(0x%llx) offset=0x%llx length=0x%llx"