aboutsummaryrefslogtreecommitdiffstats
path: root/fs/direct-io.c
diff options
context:
space:
mode:
authorAndi Kleen <ak@linux.intel.com>2011-08-02 00:38:03 -0400
committerroot <root@serles.lst.de>2011-10-28 08:58:56 -0400
commiteb28be2b4c0a0608e54f0a8fc237372c674eb7d0 (patch)
tree679aaf28fc8ed145aff89b6d4e4bb1c7ddee6318 /fs/direct-io.c
parent62a3ddef6181d7d932c565d97552d2f7b9ab4d28 (diff)
direct-io: separate fields only used in the submission path from struct dio
This large, but largely mechanic, patch moves all fields in struct dio that are only used in the submission path into a separate on stack data structure. This has the advantage that the memory is very likely cache hot, which is not guaranteed for memory fresh out of kmalloc. This also gives gcc more optimization potential because it can easier determine that there are no external aliases for these variables. The sdio initialization is a initialization now instead of memset. This allows gcc to break sdio into individual fields and optimize away unnecessary zeroing (after all the functions are inlined) Signed-off-by: Andi Kleen <ak@linux.intel.com> Acked-by: Jeff Moyer <jmoyer@redhat.com> Signed-off-by: Christoph Hellwig <hch@lst.de>
Diffstat (limited to 'fs/direct-io.c')
-rw-r--r--fs/direct-io.c389
1 files changed, 201 insertions, 188 deletions
diff --git a/fs/direct-io.c b/fs/direct-io.c
index 44a360ca8046..02ccf766903c 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -55,13 +55,10 @@
55 * blocksize. 55 * blocksize.
56 */ 56 */
57 57
58struct dio { 58/* dio_state only used in the submission path */
59 /* BIO submission state */ 59
60struct dio_submit {
60 struct bio *bio; /* bio under assembly */ 61 struct bio *bio; /* bio under assembly */
61 struct inode *inode;
62 int rw;
63 loff_t i_size; /* i_size when submitted */
64 int flags; /* doesn't change */
65 unsigned blkbits; /* doesn't change */ 62 unsigned blkbits; /* doesn't change */
66 unsigned blkfactor; /* When we're using an alignment which 63 unsigned blkfactor; /* When we're using an alignment which
67 is finer than the filesystem's soft 64 is finer than the filesystem's soft
@@ -81,13 +78,12 @@ struct dio {
81 int boundary; /* prev block is at a boundary */ 78 int boundary; /* prev block is at a boundary */
82 int reap_counter; /* rate limit reaping */ 79 int reap_counter; /* rate limit reaping */
83 get_block_t *get_block; /* block mapping function */ 80 get_block_t *get_block; /* block mapping function */
84 dio_iodone_t *end_io; /* IO completion function */
85 dio_submit_t *submit_io; /* IO submition function */ 81 dio_submit_t *submit_io; /* IO submition function */
82
86 loff_t logical_offset_in_bio; /* current first logical block in bio */ 83 loff_t logical_offset_in_bio; /* current first logical block in bio */
87 sector_t final_block_in_bio; /* current final block in bio + 1 */ 84 sector_t final_block_in_bio; /* current final block in bio + 1 */
88 sector_t next_block_for_io; /* next block to be put under IO, 85 sector_t next_block_for_io; /* next block to be put under IO,
89 in dio_blocks units */ 86 in dio_blocks units */
90 struct buffer_head map_bh; /* last get_block() result */
91 87
92 /* 88 /*
93 * Deferred addition of a page to the dio. These variables are 89 * Deferred addition of a page to the dio. These variables are
@@ -100,18 +96,6 @@ struct dio {
100 sector_t cur_page_block; /* Where it starts */ 96 sector_t cur_page_block; /* Where it starts */
101 loff_t cur_page_fs_offset; /* Offset in file */ 97 loff_t cur_page_fs_offset; /* Offset in file */
102 98
103 /* BIO completion state */
104 spinlock_t bio_lock; /* protects BIO fields below */
105 unsigned long refcount; /* direct_io_worker() and bios */
106 struct bio *bio_list; /* singly linked via bi_private */
107 struct task_struct *waiter; /* waiting task (NULL if none) */
108
109 /* AIO related stuff */
110 struct kiocb *iocb; /* kiocb */
111 int is_async; /* is IO async ? */
112 int io_error; /* IO error in completion path */
113 ssize_t result; /* IO result */
114
115 /* 99 /*
116 * Page fetching state. These variables belong to dio_refill_pages(). 100 * Page fetching state. These variables belong to dio_refill_pages().
117 */ 101 */
@@ -125,6 +109,30 @@ struct dio {
125 */ 109 */
126 unsigned head; /* next page to process */ 110 unsigned head; /* next page to process */
127 unsigned tail; /* last valid page + 1 */ 111 unsigned tail; /* last valid page + 1 */
112};
113
114/* dio_state communicated between submission path and end_io */
115struct dio {
116 int flags; /* doesn't change */
117 struct inode *inode;
118 int rw;
119 loff_t i_size; /* i_size when submitted */
120 dio_iodone_t *end_io; /* IO completion function */
121 struct buffer_head map_bh; /* last get_block() result */
122
123
124 /* BIO completion state */
125 spinlock_t bio_lock; /* protects BIO fields below */
126 unsigned long refcount; /* direct_io_worker() and bios */
127 struct bio *bio_list; /* singly linked via bi_private */
128 struct task_struct *waiter; /* waiting task (NULL if none) */
129
130 /* AIO related stuff */
131 struct kiocb *iocb; /* kiocb */
132 int is_async; /* is IO async ? */
133 int io_error; /* IO error in completion path */
134 ssize_t result; /* IO result */
135
128 int page_errors; /* errno from get_user_pages() */ 136 int page_errors; /* errno from get_user_pages() */
129 137
130 /* 138 /*
@@ -182,27 +190,27 @@ EXPORT_SYMBOL_GPL(inode_dio_done);
182/* 190/*
183 * How many pages are in the queue? 191 * How many pages are in the queue?
184 */ 192 */
185static inline unsigned dio_pages_present(struct dio *dio) 193static inline unsigned dio_pages_present(struct dio_submit *sdio)
186{ 194{
187 return dio->tail - dio->head; 195 return sdio->tail - sdio->head;
188} 196}
189 197
190/* 198/*
191 * Go grab and pin some userspace pages. Typically we'll get 64 at a time. 199 * Go grab and pin some userspace pages. Typically we'll get 64 at a time.
192 */ 200 */
193static int dio_refill_pages(struct dio *dio) 201static int dio_refill_pages(struct dio *dio, struct dio_submit *sdio)
194{ 202{
195 int ret; 203 int ret;
196 int nr_pages; 204 int nr_pages;
197 205
198 nr_pages = min(dio->total_pages - dio->curr_page, DIO_PAGES); 206 nr_pages = min(sdio->total_pages - sdio->curr_page, DIO_PAGES);
199 ret = get_user_pages_fast( 207 ret = get_user_pages_fast(
200 dio->curr_user_address, /* Where from? */ 208 sdio->curr_user_address, /* Where from? */
201 nr_pages, /* How many pages? */ 209 nr_pages, /* How many pages? */
202 dio->rw == READ, /* Write to memory? */ 210 dio->rw == READ, /* Write to memory? */
203 &dio->pages[0]); /* Put results here */ 211 &dio->pages[0]); /* Put results here */
204 212
205 if (ret < 0 && dio->blocks_available && (dio->rw & WRITE)) { 213 if (ret < 0 && sdio->blocks_available && (dio->rw & WRITE)) {
206 struct page *page = ZERO_PAGE(0); 214 struct page *page = ZERO_PAGE(0);
207 /* 215 /*
208 * A memory fault, but the filesystem has some outstanding 216 * A memory fault, but the filesystem has some outstanding
@@ -213,17 +221,17 @@ static int dio_refill_pages(struct dio *dio)
213 dio->page_errors = ret; 221 dio->page_errors = ret;
214 page_cache_get(page); 222 page_cache_get(page);
215 dio->pages[0] = page; 223 dio->pages[0] = page;
216 dio->head = 0; 224 sdio->head = 0;
217 dio->tail = 1; 225 sdio->tail = 1;
218 ret = 0; 226 ret = 0;
219 goto out; 227 goto out;
220 } 228 }
221 229
222 if (ret >= 0) { 230 if (ret >= 0) {
223 dio->curr_user_address += ret * PAGE_SIZE; 231 sdio->curr_user_address += ret * PAGE_SIZE;
224 dio->curr_page += ret; 232 sdio->curr_page += ret;
225 dio->head = 0; 233 sdio->head = 0;
226 dio->tail = ret; 234 sdio->tail = ret;
227 ret = 0; 235 ret = 0;
228 } 236 }
229out: 237out:
@@ -236,17 +244,17 @@ out:
236 * decent number of pages, less frequently. To provide nicer use of the 244 * decent number of pages, less frequently. To provide nicer use of the
237 * L1 cache. 245 * L1 cache.
238 */ 246 */
239static struct page *dio_get_page(struct dio *dio) 247static struct page *dio_get_page(struct dio *dio, struct dio_submit *sdio)
240{ 248{
241 if (dio_pages_present(dio) == 0) { 249 if (dio_pages_present(sdio) == 0) {
242 int ret; 250 int ret;
243 251
244 ret = dio_refill_pages(dio); 252 ret = dio_refill_pages(dio, sdio);
245 if (ret) 253 if (ret)
246 return ERR_PTR(ret); 254 return ERR_PTR(ret);
247 BUG_ON(dio_pages_present(dio) == 0); 255 BUG_ON(dio_pages_present(sdio) == 0);
248 } 256 }
249 return dio->pages[dio->head++]; 257 return dio->pages[sdio->head++];
250} 258}
251 259
252/** 260/**
@@ -368,8 +376,9 @@ void dio_end_io(struct bio *bio, int error)
368EXPORT_SYMBOL_GPL(dio_end_io); 376EXPORT_SYMBOL_GPL(dio_end_io);
369 377
370static void 378static void
371dio_bio_alloc(struct dio *dio, struct block_device *bdev, 379dio_bio_alloc(struct dio *dio, struct dio_submit *sdio,
372 sector_t first_sector, int nr_vecs) 380 struct block_device *bdev,
381 sector_t first_sector, int nr_vecs)
373{ 382{
374 struct bio *bio; 383 struct bio *bio;
375 384
@@ -386,8 +395,8 @@ dio_bio_alloc(struct dio *dio, struct block_device *bdev,
386 else 395 else
387 bio->bi_end_io = dio_bio_end_io; 396 bio->bi_end_io = dio_bio_end_io;
388 397
389 dio->bio = bio; 398 sdio->bio = bio;
390 dio->logical_offset_in_bio = dio->cur_page_fs_offset; 399 sdio->logical_offset_in_bio = sdio->cur_page_fs_offset;
391} 400}
392 401
393/* 402/*
@@ -397,9 +406,9 @@ dio_bio_alloc(struct dio *dio, struct block_device *bdev,
397 * 406 *
398 * bios hold a dio reference between submit_bio and ->end_io. 407 * bios hold a dio reference between submit_bio and ->end_io.
399 */ 408 */
400static void dio_bio_submit(struct dio *dio) 409static void dio_bio_submit(struct dio *dio, struct dio_submit *sdio)
401{ 410{
402 struct bio *bio = dio->bio; 411 struct bio *bio = sdio->bio;
403 unsigned long flags; 412 unsigned long flags;
404 413
405 bio->bi_private = dio; 414 bio->bi_private = dio;
@@ -411,24 +420,24 @@ static void dio_bio_submit(struct dio *dio)
411 if (dio->is_async && dio->rw == READ) 420 if (dio->is_async && dio->rw == READ)
412 bio_set_pages_dirty(bio); 421 bio_set_pages_dirty(bio);
413 422
414 if (dio->submit_io) 423 if (sdio->submit_io)
415 dio->submit_io(dio->rw, bio, dio->inode, 424 sdio->submit_io(dio->rw, bio, dio->inode,
416 dio->logical_offset_in_bio); 425 sdio->logical_offset_in_bio);
417 else 426 else
418 submit_bio(dio->rw, bio); 427 submit_bio(dio->rw, bio);
419 428
420 dio->bio = NULL; 429 sdio->bio = NULL;
421 dio->boundary = 0; 430 sdio->boundary = 0;
422 dio->logical_offset_in_bio = 0; 431 sdio->logical_offset_in_bio = 0;
423} 432}
424 433
425/* 434/*
426 * Release any resources in case of a failure 435 * Release any resources in case of a failure
427 */ 436 */
428static void dio_cleanup(struct dio *dio) 437static void dio_cleanup(struct dio *dio, struct dio_submit *sdio)
429{ 438{
430 while (dio_pages_present(dio)) 439 while (dio_pages_present(sdio))
431 page_cache_release(dio_get_page(dio)); 440 page_cache_release(dio_get_page(dio, sdio));
432} 441}
433 442
434/* 443/*
@@ -518,11 +527,11 @@ static void dio_await_completion(struct dio *dio)
518 * 527 *
519 * This also helps to limit the peak amount of pinned userspace memory. 528 * This also helps to limit the peak amount of pinned userspace memory.
520 */ 529 */
521static int dio_bio_reap(struct dio *dio) 530static int dio_bio_reap(struct dio *dio, struct dio_submit *sdio)
522{ 531{
523 int ret = 0; 532 int ret = 0;
524 533
525 if (dio->reap_counter++ >= 64) { 534 if (sdio->reap_counter++ >= 64) {
526 while (dio->bio_list) { 535 while (dio->bio_list) {
527 unsigned long flags; 536 unsigned long flags;
528 struct bio *bio; 537 struct bio *bio;
@@ -536,14 +545,14 @@ static int dio_bio_reap(struct dio *dio)
536 if (ret == 0) 545 if (ret == 0)
537 ret = ret2; 546 ret = ret2;
538 } 547 }
539 dio->reap_counter = 0; 548 sdio->reap_counter = 0;
540 } 549 }
541 return ret; 550 return ret;
542} 551}
543 552
544/* 553/*
545 * Call into the fs to map some more disk blocks. We record the current number 554 * Call into the fs to map some more disk blocks. We record the current number
546 * of available blocks at dio->blocks_available. These are in units of the 555 * of available blocks at sdio->blocks_available. These are in units of the
547 * fs blocksize, (1 << inode->i_blkbits). 556 * fs blocksize, (1 << inode->i_blkbits).
548 * 557 *
549 * The fs is allowed to map lots of blocks at once. If it wants to do that, 558 * The fs is allowed to map lots of blocks at once. If it wants to do that,
@@ -564,7 +573,7 @@ static int dio_bio_reap(struct dio *dio)
564 * buffer_mapped(). However the direct-io code will only process holes one 573 * buffer_mapped(). However the direct-io code will only process holes one
565 * block at a time - it will repeatedly call get_block() as it walks the hole. 574 * block at a time - it will repeatedly call get_block() as it walks the hole.
566 */ 575 */
567static int get_more_blocks(struct dio *dio) 576static int get_more_blocks(struct dio *dio, struct dio_submit *sdio)
568{ 577{
569 int ret; 578 int ret;
570 struct buffer_head *map_bh = &dio->map_bh; 579 struct buffer_head *map_bh = &dio->map_bh;
@@ -580,11 +589,11 @@ static int get_more_blocks(struct dio *dio)
580 */ 589 */
581 ret = dio->page_errors; 590 ret = dio->page_errors;
582 if (ret == 0) { 591 if (ret == 0) {
583 BUG_ON(dio->block_in_file >= dio->final_block_in_request); 592 BUG_ON(sdio->block_in_file >= sdio->final_block_in_request);
584 fs_startblk = dio->block_in_file >> dio->blkfactor; 593 fs_startblk = sdio->block_in_file >> sdio->blkfactor;
585 dio_count = dio->final_block_in_request - dio->block_in_file; 594 dio_count = sdio->final_block_in_request - sdio->block_in_file;
586 fs_count = dio_count >> dio->blkfactor; 595 fs_count = dio_count >> sdio->blkfactor;
587 blkmask = (1 << dio->blkfactor) - 1; 596 blkmask = (1 << sdio->blkfactor) - 1;
588 if (dio_count & blkmask) 597 if (dio_count & blkmask)
589 fs_count++; 598 fs_count++;
590 599
@@ -604,12 +613,12 @@ static int get_more_blocks(struct dio *dio)
604 */ 613 */
605 create = dio->rw & WRITE; 614 create = dio->rw & WRITE;
606 if (dio->flags & DIO_SKIP_HOLES) { 615 if (dio->flags & DIO_SKIP_HOLES) {
607 if (dio->block_in_file < (i_size_read(dio->inode) >> 616 if (sdio->block_in_file < (i_size_read(dio->inode) >>
608 dio->blkbits)) 617 sdio->blkbits))
609 create = 0; 618 create = 0;
610 } 619 }
611 620
612 ret = (*dio->get_block)(dio->inode, fs_startblk, 621 ret = (*sdio->get_block)(dio->inode, fs_startblk,
613 map_bh, create); 622 map_bh, create);
614 } 623 }
615 return ret; 624 return ret;
@@ -618,20 +627,21 @@ static int get_more_blocks(struct dio *dio)
618/* 627/*
619 * There is no bio. Make one now. 628 * There is no bio. Make one now.
620 */ 629 */
621static int dio_new_bio(struct dio *dio, sector_t start_sector) 630static int dio_new_bio(struct dio *dio, struct dio_submit *sdio,
631 sector_t start_sector)
622{ 632{
623 sector_t sector; 633 sector_t sector;
624 int ret, nr_pages; 634 int ret, nr_pages;
625 635
626 ret = dio_bio_reap(dio); 636 ret = dio_bio_reap(dio, sdio);
627 if (ret) 637 if (ret)
628 goto out; 638 goto out;
629 sector = start_sector << (dio->blkbits - 9); 639 sector = start_sector << (sdio->blkbits - 9);
630 nr_pages = min(dio->pages_in_io, bio_get_nr_vecs(dio->map_bh.b_bdev)); 640 nr_pages = min(sdio->pages_in_io, bio_get_nr_vecs(dio->map_bh.b_bdev));
631 nr_pages = min(nr_pages, BIO_MAX_PAGES); 641 nr_pages = min(nr_pages, BIO_MAX_PAGES);
632 BUG_ON(nr_pages <= 0); 642 BUG_ON(nr_pages <= 0);
633 dio_bio_alloc(dio, dio->map_bh.b_bdev, sector, nr_pages); 643 dio_bio_alloc(dio, sdio, dio->map_bh.b_bdev, sector, nr_pages);
634 dio->boundary = 0; 644 sdio->boundary = 0;
635out: 645out:
636 return ret; 646 return ret;
637} 647}
@@ -643,21 +653,21 @@ out:
643 * 653 *
644 * Return zero on success. Non-zero means the caller needs to start a new BIO. 654 * Return zero on success. Non-zero means the caller needs to start a new BIO.
645 */ 655 */
646static int dio_bio_add_page(struct dio *dio) 656static int dio_bio_add_page(struct dio_submit *sdio)
647{ 657{
648 int ret; 658 int ret;
649 659
650 ret = bio_add_page(dio->bio, dio->cur_page, 660 ret = bio_add_page(sdio->bio, sdio->cur_page,
651 dio->cur_page_len, dio->cur_page_offset); 661 sdio->cur_page_len, sdio->cur_page_offset);
652 if (ret == dio->cur_page_len) { 662 if (ret == sdio->cur_page_len) {
653 /* 663 /*
654 * Decrement count only, if we are done with this page 664 * Decrement count only, if we are done with this page
655 */ 665 */
656 if ((dio->cur_page_len + dio->cur_page_offset) == PAGE_SIZE) 666 if ((sdio->cur_page_len + sdio->cur_page_offset) == PAGE_SIZE)
657 dio->pages_in_io--; 667 sdio->pages_in_io--;
658 page_cache_get(dio->cur_page); 668 page_cache_get(sdio->cur_page);
659 dio->final_block_in_bio = dio->cur_page_block + 669 sdio->final_block_in_bio = sdio->cur_page_block +
660 (dio->cur_page_len >> dio->blkbits); 670 (sdio->cur_page_len >> sdio->blkbits);
661 ret = 0; 671 ret = 0;
662 } else { 672 } else {
663 ret = 1; 673 ret = 1;
@@ -675,14 +685,14 @@ static int dio_bio_add_page(struct dio *dio)
675 * The caller of this function is responsible for removing cur_page from the 685 * The caller of this function is responsible for removing cur_page from the
676 * dio, and for dropping the refcount which came from that presence. 686 * dio, and for dropping the refcount which came from that presence.
677 */ 687 */
678static int dio_send_cur_page(struct dio *dio) 688static int dio_send_cur_page(struct dio *dio, struct dio_submit *sdio)
679{ 689{
680 int ret = 0; 690 int ret = 0;
681 691
682 if (dio->bio) { 692 if (sdio->bio) {
683 loff_t cur_offset = dio->cur_page_fs_offset; 693 loff_t cur_offset = sdio->cur_page_fs_offset;
684 loff_t bio_next_offset = dio->logical_offset_in_bio + 694 loff_t bio_next_offset = sdio->logical_offset_in_bio +
685 dio->bio->bi_size; 695 sdio->bio->bi_size;
686 696
687 /* 697 /*
688 * See whether this new request is contiguous with the old. 698 * See whether this new request is contiguous with the old.
@@ -698,28 +708,28 @@ static int dio_send_cur_page(struct dio *dio)
698 * be the next logical offset in the bio, submit the bio we 708 * be the next logical offset in the bio, submit the bio we
699 * have. 709 * have.
700 */ 710 */
701 if (dio->final_block_in_bio != dio->cur_page_block || 711 if (sdio->final_block_in_bio != sdio->cur_page_block ||
702 cur_offset != bio_next_offset) 712 cur_offset != bio_next_offset)
703 dio_bio_submit(dio); 713 dio_bio_submit(dio, sdio);
704 /* 714 /*
705 * Submit now if the underlying fs is about to perform a 715 * Submit now if the underlying fs is about to perform a
706 * metadata read 716 * metadata read
707 */ 717 */
708 else if (dio->boundary) 718 else if (sdio->boundary)
709 dio_bio_submit(dio); 719 dio_bio_submit(dio, sdio);
710 } 720 }
711 721
712 if (dio->bio == NULL) { 722 if (sdio->bio == NULL) {
713 ret = dio_new_bio(dio, dio->cur_page_block); 723 ret = dio_new_bio(dio, sdio, sdio->cur_page_block);
714 if (ret) 724 if (ret)
715 goto out; 725 goto out;
716 } 726 }
717 727
718 if (dio_bio_add_page(dio) != 0) { 728 if (dio_bio_add_page(sdio) != 0) {
719 dio_bio_submit(dio); 729 dio_bio_submit(dio, sdio);
720 ret = dio_new_bio(dio, dio->cur_page_block); 730 ret = dio_new_bio(dio, sdio, sdio->cur_page_block);
721 if (ret == 0) { 731 if (ret == 0) {
722 ret = dio_bio_add_page(dio); 732 ret = dio_bio_add_page(sdio);
723 BUG_ON(ret != 0); 733 BUG_ON(ret != 0);
724 } 734 }
725 } 735 }
@@ -745,7 +755,7 @@ out:
745 * page to the dio instead. 755 * page to the dio instead.
746 */ 756 */
747static int 757static int
748submit_page_section(struct dio *dio, struct page *page, 758submit_page_section(struct dio *dio, struct dio_submit *sdio, struct page *page,
749 unsigned offset, unsigned len, sector_t blocknr) 759 unsigned offset, unsigned len, sector_t blocknr)
750{ 760{
751 int ret = 0; 761 int ret = 0;
@@ -760,20 +770,20 @@ submit_page_section(struct dio *dio, struct page *page,
760 /* 770 /*
761 * Can we just grow the current page's presence in the dio? 771 * Can we just grow the current page's presence in the dio?
762 */ 772 */
763 if ( (dio->cur_page == page) && 773 if (sdio->cur_page == page &&
764 (dio->cur_page_offset + dio->cur_page_len == offset) && 774 sdio->cur_page_offset + sdio->cur_page_len == offset &&
765 (dio->cur_page_block + 775 sdio->cur_page_block +
766 (dio->cur_page_len >> dio->blkbits) == blocknr)) { 776 (sdio->cur_page_len >> sdio->blkbits) == blocknr) {
767 dio->cur_page_len += len; 777 sdio->cur_page_len += len;
768 778
769 /* 779 /*
770 * If dio->boundary then we want to schedule the IO now to 780 * If sdio->boundary then we want to schedule the IO now to
771 * avoid metadata seeks. 781 * avoid metadata seeks.
772 */ 782 */
773 if (dio->boundary) { 783 if (sdio->boundary) {
774 ret = dio_send_cur_page(dio); 784 ret = dio_send_cur_page(dio, sdio);
775 page_cache_release(dio->cur_page); 785 page_cache_release(sdio->cur_page);
776 dio->cur_page = NULL; 786 sdio->cur_page = NULL;
777 } 787 }
778 goto out; 788 goto out;
779 } 789 }
@@ -781,20 +791,20 @@ submit_page_section(struct dio *dio, struct page *page,
781 /* 791 /*
782 * If there's a deferred page already there then send it. 792 * If there's a deferred page already there then send it.
783 */ 793 */
784 if (dio->cur_page) { 794 if (sdio->cur_page) {
785 ret = dio_send_cur_page(dio); 795 ret = dio_send_cur_page(dio, sdio);
786 page_cache_release(dio->cur_page); 796 page_cache_release(sdio->cur_page);
787 dio->cur_page = NULL; 797 sdio->cur_page = NULL;
788 if (ret) 798 if (ret)
789 goto out; 799 goto out;
790 } 800 }
791 801
792 page_cache_get(page); /* It is in dio */ 802 page_cache_get(page); /* It is in dio */
793 dio->cur_page = page; 803 sdio->cur_page = page;
794 dio->cur_page_offset = offset; 804 sdio->cur_page_offset = offset;
795 dio->cur_page_len = len; 805 sdio->cur_page_len = len;
796 dio->cur_page_block = blocknr; 806 sdio->cur_page_block = blocknr;
797 dio->cur_page_fs_offset = dio->block_in_file << dio->blkbits; 807 sdio->cur_page_fs_offset = sdio->block_in_file << sdio->blkbits;
798out: 808out:
799 return ret; 809 return ret;
800} 810}
@@ -826,19 +836,19 @@ static void clean_blockdev_aliases(struct dio *dio)
826 * `end' is zero if we're doing the start of the IO, 1 at the end of the 836 * `end' is zero if we're doing the start of the IO, 1 at the end of the
827 * IO. 837 * IO.
828 */ 838 */
829static void dio_zero_block(struct dio *dio, int end) 839static void dio_zero_block(struct dio *dio, struct dio_submit *sdio, int end)
830{ 840{
831 unsigned dio_blocks_per_fs_block; 841 unsigned dio_blocks_per_fs_block;
832 unsigned this_chunk_blocks; /* In dio_blocks */ 842 unsigned this_chunk_blocks; /* In dio_blocks */
833 unsigned this_chunk_bytes; 843 unsigned this_chunk_bytes;
834 struct page *page; 844 struct page *page;
835 845
836 dio->start_zero_done = 1; 846 sdio->start_zero_done = 1;
837 if (!dio->blkfactor || !buffer_new(&dio->map_bh)) 847 if (!sdio->blkfactor || !buffer_new(&dio->map_bh))
838 return; 848 return;
839 849
840 dio_blocks_per_fs_block = 1 << dio->blkfactor; 850 dio_blocks_per_fs_block = 1 << sdio->blkfactor;
841 this_chunk_blocks = dio->block_in_file & (dio_blocks_per_fs_block - 1); 851 this_chunk_blocks = sdio->block_in_file & (dio_blocks_per_fs_block - 1);
842 852
843 if (!this_chunk_blocks) 853 if (!this_chunk_blocks)
844 return; 854 return;
@@ -850,14 +860,14 @@ static void dio_zero_block(struct dio *dio, int end)
850 if (end) 860 if (end)
851 this_chunk_blocks = dio_blocks_per_fs_block - this_chunk_blocks; 861 this_chunk_blocks = dio_blocks_per_fs_block - this_chunk_blocks;
852 862
853 this_chunk_bytes = this_chunk_blocks << dio->blkbits; 863 this_chunk_bytes = this_chunk_blocks << sdio->blkbits;
854 864
855 page = ZERO_PAGE(0); 865 page = ZERO_PAGE(0);
856 if (submit_page_section(dio, page, 0, this_chunk_bytes, 866 if (submit_page_section(dio, sdio, page, 0, this_chunk_bytes,
857 dio->next_block_for_io)) 867 sdio->next_block_for_io))
858 return; 868 return;
859 869
860 dio->next_block_for_io += this_chunk_blocks; 870 sdio->next_block_for_io += this_chunk_blocks;
861} 871}
862 872
863/* 873/*
@@ -876,9 +886,9 @@ static void dio_zero_block(struct dio *dio, int end)
876 * it should set b_size to PAGE_SIZE or more inside get_block(). This gives 886 * it should set b_size to PAGE_SIZE or more inside get_block(). This gives
877 * fine alignment but still allows this function to work in PAGE_SIZE units. 887 * fine alignment but still allows this function to work in PAGE_SIZE units.
878 */ 888 */
879static int do_direct_IO(struct dio *dio) 889static int do_direct_IO(struct dio *dio, struct dio_submit *sdio)
880{ 890{
881 const unsigned blkbits = dio->blkbits; 891 const unsigned blkbits = sdio->blkbits;
882 const unsigned blocks_per_page = PAGE_SIZE >> blkbits; 892 const unsigned blocks_per_page = PAGE_SIZE >> blkbits;
883 struct page *page; 893 struct page *page;
884 unsigned block_in_page; 894 unsigned block_in_page;
@@ -886,10 +896,10 @@ static int do_direct_IO(struct dio *dio)
886 int ret = 0; 896 int ret = 0;
887 897
888 /* The I/O can start at any block offset within the first page */ 898 /* The I/O can start at any block offset within the first page */
889 block_in_page = dio->first_block_in_page; 899 block_in_page = sdio->first_block_in_page;
890 900
891 while (dio->block_in_file < dio->final_block_in_request) { 901 while (sdio->block_in_file < sdio->final_block_in_request) {
892 page = dio_get_page(dio); 902 page = dio_get_page(dio, sdio);
893 if (IS_ERR(page)) { 903 if (IS_ERR(page)) {
894 ret = PTR_ERR(page); 904 ret = PTR_ERR(page);
895 goto out; 905 goto out;
@@ -901,14 +911,14 @@ static int do_direct_IO(struct dio *dio)
901 unsigned this_chunk_blocks; /* # of blocks */ 911 unsigned this_chunk_blocks; /* # of blocks */
902 unsigned u; 912 unsigned u;
903 913
904 if (dio->blocks_available == 0) { 914 if (sdio->blocks_available == 0) {
905 /* 915 /*
906 * Need to go and map some more disk 916 * Need to go and map some more disk
907 */ 917 */
908 unsigned long blkmask; 918 unsigned long blkmask;
909 unsigned long dio_remainder; 919 unsigned long dio_remainder;
910 920
911 ret = get_more_blocks(dio); 921 ret = get_more_blocks(dio, sdio);
912 if (ret) { 922 if (ret) {
913 page_cache_release(page); 923 page_cache_release(page);
914 goto out; 924 goto out;
@@ -916,18 +926,18 @@ static int do_direct_IO(struct dio *dio)
916 if (!buffer_mapped(map_bh)) 926 if (!buffer_mapped(map_bh))
917 goto do_holes; 927 goto do_holes;
918 928
919 dio->blocks_available = 929 sdio->blocks_available =
920 map_bh->b_size >> dio->blkbits; 930 map_bh->b_size >> sdio->blkbits;
921 dio->next_block_for_io = 931 sdio->next_block_for_io =
922 map_bh->b_blocknr << dio->blkfactor; 932 map_bh->b_blocknr << sdio->blkfactor;
923 if (buffer_new(map_bh)) 933 if (buffer_new(map_bh))
924 clean_blockdev_aliases(dio); 934 clean_blockdev_aliases(dio);
925 935
926 if (!dio->blkfactor) 936 if (!sdio->blkfactor)
927 goto do_holes; 937 goto do_holes;
928 938
929 blkmask = (1 << dio->blkfactor) - 1; 939 blkmask = (1 << sdio->blkfactor) - 1;
930 dio_remainder = (dio->block_in_file & blkmask); 940 dio_remainder = (sdio->block_in_file & blkmask);
931 941
932 /* 942 /*
933 * If we are at the start of IO and that IO 943 * If we are at the start of IO and that IO
@@ -941,8 +951,8 @@ static int do_direct_IO(struct dio *dio)
941 * on-disk 951 * on-disk
942 */ 952 */
943 if (!buffer_new(map_bh)) 953 if (!buffer_new(map_bh))
944 dio->next_block_for_io += dio_remainder; 954 sdio->next_block_for_io += dio_remainder;
945 dio->blocks_available -= dio_remainder; 955 sdio->blocks_available -= dio_remainder;
946 } 956 }
947do_holes: 957do_holes:
948 /* Handle holes */ 958 /* Handle holes */
@@ -961,7 +971,7 @@ do_holes:
961 */ 971 */
962 i_size_aligned = ALIGN(i_size_read(dio->inode), 972 i_size_aligned = ALIGN(i_size_read(dio->inode),
963 1 << blkbits); 973 1 << blkbits);
964 if (dio->block_in_file >= 974 if (sdio->block_in_file >=
965 i_size_aligned >> blkbits) { 975 i_size_aligned >> blkbits) {
966 /* We hit eof */ 976 /* We hit eof */
967 page_cache_release(page); 977 page_cache_release(page);
@@ -969,7 +979,7 @@ do_holes:
969 } 979 }
970 zero_user(page, block_in_page << blkbits, 980 zero_user(page, block_in_page << blkbits,
971 1 << blkbits); 981 1 << blkbits);
972 dio->block_in_file++; 982 sdio->block_in_file++;
973 block_in_page++; 983 block_in_page++;
974 goto next_block; 984 goto next_block;
975 } 985 }
@@ -979,38 +989,40 @@ do_holes:
979 * is finer than the underlying fs, go check to see if 989 * is finer than the underlying fs, go check to see if
980 * we must zero out the start of this block. 990 * we must zero out the start of this block.
981 */ 991 */
982 if (unlikely(dio->blkfactor && !dio->start_zero_done)) 992 if (unlikely(sdio->blkfactor && !sdio->start_zero_done))
983 dio_zero_block(dio, 0); 993 dio_zero_block(dio, sdio, 0);
984 994
985 /* 995 /*
986 * Work out, in this_chunk_blocks, how much disk we 996 * Work out, in this_chunk_blocks, how much disk we
987 * can add to this page 997 * can add to this page
988 */ 998 */
989 this_chunk_blocks = dio->blocks_available; 999 this_chunk_blocks = sdio->blocks_available;
990 u = (PAGE_SIZE - offset_in_page) >> blkbits; 1000 u = (PAGE_SIZE - offset_in_page) >> blkbits;
991 if (this_chunk_blocks > u) 1001 if (this_chunk_blocks > u)
992 this_chunk_blocks = u; 1002 this_chunk_blocks = u;
993 u = dio->final_block_in_request - dio->block_in_file; 1003 u = sdio->final_block_in_request - sdio->block_in_file;
994 if (this_chunk_blocks > u) 1004 if (this_chunk_blocks > u)
995 this_chunk_blocks = u; 1005 this_chunk_blocks = u;
996 this_chunk_bytes = this_chunk_blocks << blkbits; 1006 this_chunk_bytes = this_chunk_blocks << blkbits;
997 BUG_ON(this_chunk_bytes == 0); 1007 BUG_ON(this_chunk_bytes == 0);
998 1008
999 dio->boundary = buffer_boundary(map_bh); 1009 sdio->boundary = buffer_boundary(map_bh);
1000 ret = submit_page_section(dio, page, offset_in_page, 1010 ret = submit_page_section(dio, sdio, page,
1001 this_chunk_bytes, dio->next_block_for_io); 1011 offset_in_page,
1012 this_chunk_bytes,
1013 sdio->next_block_for_io);
1002 if (ret) { 1014 if (ret) {
1003 page_cache_release(page); 1015 page_cache_release(page);
1004 goto out; 1016 goto out;
1005 } 1017 }
1006 dio->next_block_for_io += this_chunk_blocks; 1018 sdio->next_block_for_io += this_chunk_blocks;
1007 1019
1008 dio->block_in_file += this_chunk_blocks; 1020 sdio->block_in_file += this_chunk_blocks;
1009 block_in_page += this_chunk_blocks; 1021 block_in_page += this_chunk_blocks;
1010 dio->blocks_available -= this_chunk_blocks; 1022 sdio->blocks_available -= this_chunk_blocks;
1011next_block: 1023next_block:
1012 BUG_ON(dio->block_in_file > dio->final_block_in_request); 1024 BUG_ON(sdio->block_in_file > sdio->final_block_in_request);
1013 if (dio->block_in_file == dio->final_block_in_request) 1025 if (sdio->block_in_file == sdio->final_block_in_request)
1014 break; 1026 break;
1015 } 1027 }
1016 1028
@@ -1026,7 +1038,7 @@ static ssize_t
1026direct_io_worker(int rw, struct kiocb *iocb, struct inode *inode, 1038direct_io_worker(int rw, struct kiocb *iocb, struct inode *inode,
1027 const struct iovec *iov, loff_t offset, unsigned long nr_segs, 1039 const struct iovec *iov, loff_t offset, unsigned long nr_segs,
1028 unsigned blkbits, get_block_t get_block, dio_iodone_t end_io, 1040 unsigned blkbits, get_block_t get_block, dio_iodone_t end_io,
1029 dio_submit_t submit_io, struct dio *dio) 1041 dio_submit_t submit_io, struct dio *dio, struct dio_submit *sdio)
1030{ 1042{
1031 unsigned long user_addr; 1043 unsigned long user_addr;
1032 unsigned long flags; 1044 unsigned long flags;
@@ -1037,15 +1049,15 @@ direct_io_worker(int rw, struct kiocb *iocb, struct inode *inode,
1037 1049
1038 dio->inode = inode; 1050 dio->inode = inode;
1039 dio->rw = rw; 1051 dio->rw = rw;
1040 dio->blkbits = blkbits; 1052 sdio->blkbits = blkbits;
1041 dio->blkfactor = inode->i_blkbits - blkbits; 1053 sdio->blkfactor = inode->i_blkbits - blkbits;
1042 dio->block_in_file = offset >> blkbits; 1054 sdio->block_in_file = offset >> blkbits;
1043 1055
1044 dio->get_block = get_block; 1056 sdio->get_block = get_block;
1045 dio->end_io = end_io; 1057 dio->end_io = end_io;
1046 dio->submit_io = submit_io; 1058 sdio->submit_io = submit_io;
1047 dio->final_block_in_bio = -1; 1059 sdio->final_block_in_bio = -1;
1048 dio->next_block_for_io = -1; 1060 sdio->next_block_for_io = -1;
1049 1061
1050 dio->iocb = iocb; 1062 dio->iocb = iocb;
1051 dio->i_size = i_size_read(inode); 1063 dio->i_size = i_size_read(inode);
@@ -1057,45 +1069,45 @@ direct_io_worker(int rw, struct kiocb *iocb, struct inode *inode,
1057 * In case of non-aligned buffers, we may need 2 more 1069 * In case of non-aligned buffers, we may need 2 more
1058 * pages since we need to zero out first and last block. 1070 * pages since we need to zero out first and last block.
1059 */ 1071 */
1060 if (unlikely(dio->blkfactor)) 1072 if (unlikely(sdio->blkfactor))
1061 dio->pages_in_io = 2; 1073 sdio->pages_in_io = 2;
1062 1074
1063 for (seg = 0; seg < nr_segs; seg++) { 1075 for (seg = 0; seg < nr_segs; seg++) {
1064 user_addr = (unsigned long)iov[seg].iov_base; 1076 user_addr = (unsigned long)iov[seg].iov_base;
1065 dio->pages_in_io += 1077 sdio->pages_in_io +=
1066 ((user_addr+iov[seg].iov_len +PAGE_SIZE-1)/PAGE_SIZE 1078 ((user_addr+iov[seg].iov_len +PAGE_SIZE-1)/PAGE_SIZE
1067 - user_addr/PAGE_SIZE); 1079 - user_addr/PAGE_SIZE);
1068 } 1080 }
1069 1081
1070 for (seg = 0; seg < nr_segs; seg++) { 1082 for (seg = 0; seg < nr_segs; seg++) {
1071 user_addr = (unsigned long)iov[seg].iov_base; 1083 user_addr = (unsigned long)iov[seg].iov_base;
1072 dio->size += bytes = iov[seg].iov_len; 1084 sdio->size += bytes = iov[seg].iov_len;
1073 1085
1074 /* Index into the first page of the first block */ 1086 /* Index into the first page of the first block */
1075 dio->first_block_in_page = (user_addr & ~PAGE_MASK) >> blkbits; 1087 sdio->first_block_in_page = (user_addr & ~PAGE_MASK) >> blkbits;
1076 dio->final_block_in_request = dio->block_in_file + 1088 sdio->final_block_in_request = sdio->block_in_file +
1077 (bytes >> blkbits); 1089 (bytes >> blkbits);
1078 /* Page fetching state */ 1090 /* Page fetching state */
1079 dio->head = 0; 1091 sdio->head = 0;
1080 dio->tail = 0; 1092 sdio->tail = 0;
1081 dio->curr_page = 0; 1093 sdio->curr_page = 0;
1082 1094
1083 dio->total_pages = 0; 1095 sdio->total_pages = 0;
1084 if (user_addr & (PAGE_SIZE-1)) { 1096 if (user_addr & (PAGE_SIZE-1)) {
1085 dio->total_pages++; 1097 sdio->total_pages++;
1086 bytes -= PAGE_SIZE - (user_addr & (PAGE_SIZE - 1)); 1098 bytes -= PAGE_SIZE - (user_addr & (PAGE_SIZE - 1));
1087 } 1099 }
1088 dio->total_pages += (bytes + PAGE_SIZE - 1) / PAGE_SIZE; 1100 sdio->total_pages += (bytes + PAGE_SIZE - 1) / PAGE_SIZE;
1089 dio->curr_user_address = user_addr; 1101 sdio->curr_user_address = user_addr;
1090 1102
1091 ret = do_direct_IO(dio); 1103 ret = do_direct_IO(dio, sdio);
1092 1104
1093 dio->result += iov[seg].iov_len - 1105 dio->result += iov[seg].iov_len -
1094 ((dio->final_block_in_request - dio->block_in_file) << 1106 ((sdio->final_block_in_request - sdio->block_in_file) <<
1095 blkbits); 1107 blkbits);
1096 1108
1097 if (ret) { 1109 if (ret) {
1098 dio_cleanup(dio); 1110 dio_cleanup(dio, sdio);
1099 break; 1111 break;
1100 } 1112 }
1101 } /* end iovec loop */ 1113 } /* end iovec loop */
@@ -1111,23 +1123,23 @@ direct_io_worker(int rw, struct kiocb *iocb, struct inode *inode,
1111 * There may be some unwritten disk at the end of a part-written 1123 * There may be some unwritten disk at the end of a part-written
1112 * fs-block-sized block. Go zero that now. 1124 * fs-block-sized block. Go zero that now.
1113 */ 1125 */
1114 dio_zero_block(dio, 1); 1126 dio_zero_block(dio, sdio, 1);
1115 1127
1116 if (dio->cur_page) { 1128 if (sdio->cur_page) {
1117 ret2 = dio_send_cur_page(dio); 1129 ret2 = dio_send_cur_page(dio, sdio);
1118 if (ret == 0) 1130 if (ret == 0)
1119 ret = ret2; 1131 ret = ret2;
1120 page_cache_release(dio->cur_page); 1132 page_cache_release(sdio->cur_page);
1121 dio->cur_page = NULL; 1133 sdio->cur_page = NULL;
1122 } 1134 }
1123 if (dio->bio) 1135 if (sdio->bio)
1124 dio_bio_submit(dio); 1136 dio_bio_submit(dio, sdio);
1125 1137
1126 /* 1138 /*
1127 * It is possible that, we return short IO due to end of file. 1139 * It is possible that, we return short IO due to end of file.
1128 * In that case, we need to release all the pages we got hold on. 1140 * In that case, we need to release all the pages we got hold on.
1129 */ 1141 */
1130 dio_cleanup(dio); 1142 dio_cleanup(dio, sdio);
1131 1143
1132 /* 1144 /*
1133 * All block lookups have been performed. For READ requests 1145 * All block lookups have been performed. For READ requests
@@ -1146,7 +1158,7 @@ direct_io_worker(int rw, struct kiocb *iocb, struct inode *inode,
1146 */ 1158 */
1147 BUG_ON(ret == -EIOCBQUEUED); 1159 BUG_ON(ret == -EIOCBQUEUED);
1148 if (dio->is_async && ret == 0 && dio->result && 1160 if (dio->is_async && ret == 0 && dio->result &&
1149 ((rw & READ) || (dio->result == dio->size))) 1161 ((rw & READ) || (dio->result == sdio->size)))
1150 ret = -EIOCBQUEUED; 1162 ret = -EIOCBQUEUED;
1151 1163
1152 if (ret != -EIOCBQUEUED) 1164 if (ret != -EIOCBQUEUED)
@@ -1211,6 +1223,7 @@ __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
1211 ssize_t retval = -EINVAL; 1223 ssize_t retval = -EINVAL;
1212 loff_t end = offset; 1224 loff_t end = offset;
1213 struct dio *dio; 1225 struct dio *dio;
1226 struct dio_submit sdio = { 0, };
1214 1227
1215 if (rw & WRITE) 1228 if (rw & WRITE)
1216 rw = WRITE_ODIRECT; 1229 rw = WRITE_ODIRECT;
@@ -1290,7 +1303,7 @@ __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
1290 1303
1291 retval = direct_io_worker(rw, iocb, inode, iov, offset, 1304 retval = direct_io_worker(rw, iocb, inode, iov, offset,
1292 nr_segs, blkbits, get_block, end_io, 1305 nr_segs, blkbits, get_block, end_io,
1293 submit_io, dio); 1306 submit_io, dio, &sdio);
1294 1307
1295out: 1308out:
1296 return retval; 1309 return retval;