aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAndi Kleen <ak@linux.intel.com>2011-08-02 00:38:07 -0400
committerroot <root@serles.lst.de>2011-10-28 08:58:57 -0400
commit18772641dbe2c89c6122c603f81f6a9574aee556 (patch)
tree54a8b229f2385615df9f9921f743ac64184a3a6f
parent6e8267f532a17165ab551ac5fdafcba5333dcca5 (diff)
direct-io: separate map_bh from dio
Only a single b_private field in the map_bh buffer head is needed after the submission path. Move map_bh separately to avoid storing this information in the long term slab. This avoids the weird 104 byte hole in struct dio_submit which also needed to be memseted early. Signed-off-by: Andi Kleen <ak@linux.intel.com> Signed-off-by: Christoph Hellwig <hch@lst.de>
-rw-r--r--fs/direct-io.c66
1 files changed, 37 insertions, 29 deletions
diff --git a/fs/direct-io.c b/fs/direct-io.c
index 6bb04409e725..edf3174afd6a 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -119,6 +119,7 @@ struct dio {
119 loff_t i_size; /* i_size when submitted */ 119 loff_t i_size; /* i_size when submitted */
120 dio_iodone_t *end_io; /* IO completion function */ 120 dio_iodone_t *end_io; /* IO completion function */
121 121
122 void *private; /* copy from map_bh.b_private */
122 123
123 /* BIO completion state */ 124 /* BIO completion state */
124 spinlock_t bio_lock; /* protects BIO fields below */ 125 spinlock_t bio_lock; /* protects BIO fields below */
@@ -133,7 +134,6 @@ struct dio {
133 struct kiocb *iocb; /* kiocb */ 134 struct kiocb *iocb; /* kiocb */
134 ssize_t result; /* IO result */ 135 ssize_t result; /* IO result */
135 136
136 struct buffer_head map_bh; /* last get_block() result */
137 /* 137 /*
138 * pages[] (and any fields placed after it) are not zeroed out at 138 * pages[] (and any fields placed after it) are not zeroed out at
139 * allocation time. Don't add new fields after pages[] unless you 139 * allocation time. Don't add new fields after pages[] unless you
@@ -301,7 +301,7 @@ static ssize_t dio_complete(struct dio *dio, loff_t offset, ssize_t ret, bool is
301 301
302 if (dio->end_io && dio->result) { 302 if (dio->end_io && dio->result) {
303 dio->end_io(dio->iocb, offset, transferred, 303 dio->end_io(dio->iocb, offset, transferred,
304 dio->map_bh.b_private, ret, is_async); 304 dio->private, ret, is_async);
305 } else { 305 } else {
306 if (is_async) 306 if (is_async)
307 aio_complete(dio->iocb, ret, 0); 307 aio_complete(dio->iocb, ret, 0);
@@ -574,10 +574,10 @@ static int dio_bio_reap(struct dio *dio, struct dio_submit *sdio)
574 * buffer_mapped(). However the direct-io code will only process holes one 574 * buffer_mapped(). However the direct-io code will only process holes one
575 * block at a time - it will repeatedly call get_block() as it walks the hole. 575 * block at a time - it will repeatedly call get_block() as it walks the hole.
576 */ 576 */
577static int get_more_blocks(struct dio *dio, struct dio_submit *sdio) 577static int get_more_blocks(struct dio *dio, struct dio_submit *sdio,
578 struct buffer_head *map_bh)
578{ 579{
579 int ret; 580 int ret;
580 struct buffer_head *map_bh = &dio->map_bh;
581 sector_t fs_startblk; /* Into file, in filesystem-sized blocks */ 581 sector_t fs_startblk; /* Into file, in filesystem-sized blocks */
582 unsigned long fs_count; /* Number of filesystem-sized blocks */ 582 unsigned long fs_count; /* Number of filesystem-sized blocks */
583 unsigned long dio_count;/* Number of dio_block-sized blocks */ 583 unsigned long dio_count;/* Number of dio_block-sized blocks */
@@ -621,6 +621,9 @@ static int get_more_blocks(struct dio *dio, struct dio_submit *sdio)
621 621
622 ret = (*sdio->get_block)(dio->inode, fs_startblk, 622 ret = (*sdio->get_block)(dio->inode, fs_startblk,
623 map_bh, create); 623 map_bh, create);
624
625 /* Store for completion */
626 dio->private = map_bh->b_private;
624 } 627 }
625 return ret; 628 return ret;
626} 629}
@@ -629,7 +632,7 @@ static int get_more_blocks(struct dio *dio, struct dio_submit *sdio)
629 * There is no bio. Make one now. 632 * There is no bio. Make one now.
630 */ 633 */
631static int dio_new_bio(struct dio *dio, struct dio_submit *sdio, 634static int dio_new_bio(struct dio *dio, struct dio_submit *sdio,
632 sector_t start_sector) 635 sector_t start_sector, struct buffer_head *map_bh)
633{ 636{
634 sector_t sector; 637 sector_t sector;
635 int ret, nr_pages; 638 int ret, nr_pages;
@@ -638,10 +641,10 @@ static int dio_new_bio(struct dio *dio, struct dio_submit *sdio,
638 if (ret) 641 if (ret)
639 goto out; 642 goto out;
640 sector = start_sector << (sdio->blkbits - 9); 643 sector = start_sector << (sdio->blkbits - 9);
641 nr_pages = min(sdio->pages_in_io, bio_get_nr_vecs(dio->map_bh.b_bdev)); 644 nr_pages = min(sdio->pages_in_io, bio_get_nr_vecs(map_bh->b_bdev));
642 nr_pages = min(nr_pages, BIO_MAX_PAGES); 645 nr_pages = min(nr_pages, BIO_MAX_PAGES);
643 BUG_ON(nr_pages <= 0); 646 BUG_ON(nr_pages <= 0);
644 dio_bio_alloc(dio, sdio, dio->map_bh.b_bdev, sector, nr_pages); 647 dio_bio_alloc(dio, sdio, map_bh->b_bdev, sector, nr_pages);
645 sdio->boundary = 0; 648 sdio->boundary = 0;
646out: 649out:
647 return ret; 650 return ret;
@@ -686,7 +689,8 @@ static int dio_bio_add_page(struct dio_submit *sdio)
686 * The caller of this function is responsible for removing cur_page from the 689 * The caller of this function is responsible for removing cur_page from the
687 * dio, and for dropping the refcount which came from that presence. 690 * dio, and for dropping the refcount which came from that presence.
688 */ 691 */
689static int dio_send_cur_page(struct dio *dio, struct dio_submit *sdio) 692static int dio_send_cur_page(struct dio *dio, struct dio_submit *sdio,
693 struct buffer_head *map_bh)
690{ 694{
691 int ret = 0; 695 int ret = 0;
692 696
@@ -721,14 +725,14 @@ static int dio_send_cur_page(struct dio *dio, struct dio_submit *sdio)
721 } 725 }
722 726
723 if (sdio->bio == NULL) { 727 if (sdio->bio == NULL) {
724 ret = dio_new_bio(dio, sdio, sdio->cur_page_block); 728 ret = dio_new_bio(dio, sdio, sdio->cur_page_block, map_bh);
725 if (ret) 729 if (ret)
726 goto out; 730 goto out;
727 } 731 }
728 732
729 if (dio_bio_add_page(sdio) != 0) { 733 if (dio_bio_add_page(sdio) != 0) {
730 dio_bio_submit(dio, sdio); 734 dio_bio_submit(dio, sdio);
731 ret = dio_new_bio(dio, sdio, sdio->cur_page_block); 735 ret = dio_new_bio(dio, sdio, sdio->cur_page_block, map_bh);
732 if (ret == 0) { 736 if (ret == 0) {
733 ret = dio_bio_add_page(sdio); 737 ret = dio_bio_add_page(sdio);
734 BUG_ON(ret != 0); 738 BUG_ON(ret != 0);
@@ -757,7 +761,8 @@ out:
757 */ 761 */
758static int 762static int
759submit_page_section(struct dio *dio, struct dio_submit *sdio, struct page *page, 763submit_page_section(struct dio *dio, struct dio_submit *sdio, struct page *page,
760 unsigned offset, unsigned len, sector_t blocknr) 764 unsigned offset, unsigned len, sector_t blocknr,
765 struct buffer_head *map_bh)
761{ 766{
762 int ret = 0; 767 int ret = 0;
763 768
@@ -782,7 +787,7 @@ submit_page_section(struct dio *dio, struct dio_submit *sdio, struct page *page,
782 * avoid metadata seeks. 787 * avoid metadata seeks.
783 */ 788 */
784 if (sdio->boundary) { 789 if (sdio->boundary) {
785 ret = dio_send_cur_page(dio, sdio); 790 ret = dio_send_cur_page(dio, sdio, map_bh);
786 page_cache_release(sdio->cur_page); 791 page_cache_release(sdio->cur_page);
787 sdio->cur_page = NULL; 792 sdio->cur_page = NULL;
788 } 793 }
@@ -793,7 +798,7 @@ submit_page_section(struct dio *dio, struct dio_submit *sdio, struct page *page,
793 * If there's a deferred page already there then send it. 798 * If there's a deferred page already there then send it.
794 */ 799 */
795 if (sdio->cur_page) { 800 if (sdio->cur_page) {
796 ret = dio_send_cur_page(dio, sdio); 801 ret = dio_send_cur_page(dio, sdio, map_bh);
797 page_cache_release(sdio->cur_page); 802 page_cache_release(sdio->cur_page);
798 sdio->cur_page = NULL; 803 sdio->cur_page = NULL;
799 if (ret) 804 if (ret)
@@ -815,16 +820,16 @@ out:
815 * file blocks. Only called for S_ISREG files - blockdevs do not set 820 * file blocks. Only called for S_ISREG files - blockdevs do not set
816 * buffer_new 821 * buffer_new
817 */ 822 */
818static void clean_blockdev_aliases(struct dio *dio) 823static void clean_blockdev_aliases(struct dio *dio, struct buffer_head *map_bh)
819{ 824{
820 unsigned i; 825 unsigned i;
821 unsigned nblocks; 826 unsigned nblocks;
822 827
823 nblocks = dio->map_bh.b_size >> dio->inode->i_blkbits; 828 nblocks = map_bh->b_size >> dio->inode->i_blkbits;
824 829
825 for (i = 0; i < nblocks; i++) { 830 for (i = 0; i < nblocks; i++) {
826 unmap_underlying_metadata(dio->map_bh.b_bdev, 831 unmap_underlying_metadata(map_bh->b_bdev,
827 dio->map_bh.b_blocknr + i); 832 map_bh->b_blocknr + i);
828 } 833 }
829} 834}
830 835
@@ -837,7 +842,8 @@ static void clean_blockdev_aliases(struct dio *dio)
837 * `end' is zero if we're doing the start of the IO, 1 at the end of the 842 * `end' is zero if we're doing the start of the IO, 1 at the end of the
838 * IO. 843 * IO.
839 */ 844 */
840static void dio_zero_block(struct dio *dio, struct dio_submit *sdio, int end) 845static void dio_zero_block(struct dio *dio, struct dio_submit *sdio, int end,
846 struct buffer_head *map_bh)
841{ 847{
842 unsigned dio_blocks_per_fs_block; 848 unsigned dio_blocks_per_fs_block;
843 unsigned this_chunk_blocks; /* In dio_blocks */ 849 unsigned this_chunk_blocks; /* In dio_blocks */
@@ -845,7 +851,7 @@ static void dio_zero_block(struct dio *dio, struct dio_submit *sdio, int end)
845 struct page *page; 851 struct page *page;
846 852
847 sdio->start_zero_done = 1; 853 sdio->start_zero_done = 1;
848 if (!sdio->blkfactor || !buffer_new(&dio->map_bh)) 854 if (!sdio->blkfactor || !buffer_new(map_bh))
849 return; 855 return;
850 856
851 dio_blocks_per_fs_block = 1 << sdio->blkfactor; 857 dio_blocks_per_fs_block = 1 << sdio->blkfactor;
@@ -865,7 +871,7 @@ static void dio_zero_block(struct dio *dio, struct dio_submit *sdio, int end)
865 871
866 page = ZERO_PAGE(0); 872 page = ZERO_PAGE(0);
867 if (submit_page_section(dio, sdio, page, 0, this_chunk_bytes, 873 if (submit_page_section(dio, sdio, page, 0, this_chunk_bytes,
868 sdio->next_block_for_io)) 874 sdio->next_block_for_io, map_bh))
869 return; 875 return;
870 876
871 sdio->next_block_for_io += this_chunk_blocks; 877 sdio->next_block_for_io += this_chunk_blocks;
@@ -887,13 +893,13 @@ static void dio_zero_block(struct dio *dio, struct dio_submit *sdio, int end)
887 * it should set b_size to PAGE_SIZE or more inside get_block(). This gives 893 * it should set b_size to PAGE_SIZE or more inside get_block(). This gives
888 * fine alignment but still allows this function to work in PAGE_SIZE units. 894 * fine alignment but still allows this function to work in PAGE_SIZE units.
889 */ 895 */
890static int do_direct_IO(struct dio *dio, struct dio_submit *sdio) 896static int do_direct_IO(struct dio *dio, struct dio_submit *sdio,
897 struct buffer_head *map_bh)
891{ 898{
892 const unsigned blkbits = sdio->blkbits; 899 const unsigned blkbits = sdio->blkbits;
893 const unsigned blocks_per_page = PAGE_SIZE >> blkbits; 900 const unsigned blocks_per_page = PAGE_SIZE >> blkbits;
894 struct page *page; 901 struct page *page;
895 unsigned block_in_page; 902 unsigned block_in_page;
896 struct buffer_head *map_bh = &dio->map_bh;
897 int ret = 0; 903 int ret = 0;
898 904
899 /* The I/O can start at any block offset within the first page */ 905 /* The I/O can start at any block offset within the first page */
@@ -919,7 +925,7 @@ static int do_direct_IO(struct dio *dio, struct dio_submit *sdio)
919 unsigned long blkmask; 925 unsigned long blkmask;
920 unsigned long dio_remainder; 926 unsigned long dio_remainder;
921 927
922 ret = get_more_blocks(dio, sdio); 928 ret = get_more_blocks(dio, sdio, map_bh);
923 if (ret) { 929 if (ret) {
924 page_cache_release(page); 930 page_cache_release(page);
925 goto out; 931 goto out;
@@ -932,7 +938,7 @@ static int do_direct_IO(struct dio *dio, struct dio_submit *sdio)
932 sdio->next_block_for_io = 938 sdio->next_block_for_io =
933 map_bh->b_blocknr << sdio->blkfactor; 939 map_bh->b_blocknr << sdio->blkfactor;
934 if (buffer_new(map_bh)) 940 if (buffer_new(map_bh))
935 clean_blockdev_aliases(dio); 941 clean_blockdev_aliases(dio, map_bh);
936 942
937 if (!sdio->blkfactor) 943 if (!sdio->blkfactor)
938 goto do_holes; 944 goto do_holes;
@@ -991,7 +997,7 @@ do_holes:
991 * we must zero out the start of this block. 997 * we must zero out the start of this block.
992 */ 998 */
993 if (unlikely(sdio->blkfactor && !sdio->start_zero_done)) 999 if (unlikely(sdio->blkfactor && !sdio->start_zero_done))
994 dio_zero_block(dio, sdio, 0); 1000 dio_zero_block(dio, sdio, 0, map_bh);
995 1001
996 /* 1002 /*
997 * Work out, in this_chunk_blocks, how much disk we 1003 * Work out, in this_chunk_blocks, how much disk we
@@ -1011,7 +1017,8 @@ do_holes:
1011 ret = submit_page_section(dio, sdio, page, 1017 ret = submit_page_section(dio, sdio, page,
1012 offset_in_page, 1018 offset_in_page,
1013 this_chunk_bytes, 1019 this_chunk_bytes,
1014 sdio->next_block_for_io); 1020 sdio->next_block_for_io,
1021 map_bh);
1015 if (ret) { 1022 if (ret) {
1016 page_cache_release(page); 1023 page_cache_release(page);
1017 goto out; 1024 goto out;
@@ -1047,6 +1054,7 @@ direct_io_worker(int rw, struct kiocb *iocb, struct inode *inode,
1047 ssize_t ret = 0; 1054 ssize_t ret = 0;
1048 ssize_t ret2; 1055 ssize_t ret2;
1049 size_t bytes; 1056 size_t bytes;
1057 struct buffer_head map_bh = { 0, };
1050 1058
1051 dio->inode = inode; 1059 dio->inode = inode;
1052 dio->rw = rw; 1060 dio->rw = rw;
@@ -1101,7 +1109,7 @@ direct_io_worker(int rw, struct kiocb *iocb, struct inode *inode,
1101 sdio->total_pages += (bytes + PAGE_SIZE - 1) / PAGE_SIZE; 1109 sdio->total_pages += (bytes + PAGE_SIZE - 1) / PAGE_SIZE;
1102 sdio->curr_user_address = user_addr; 1110 sdio->curr_user_address = user_addr;
1103 1111
1104 ret = do_direct_IO(dio, sdio); 1112 ret = do_direct_IO(dio, sdio, &map_bh);
1105 1113
1106 dio->result += iov[seg].iov_len - 1114 dio->result += iov[seg].iov_len -
1107 ((sdio->final_block_in_request - sdio->block_in_file) << 1115 ((sdio->final_block_in_request - sdio->block_in_file) <<
@@ -1124,10 +1132,10 @@ direct_io_worker(int rw, struct kiocb *iocb, struct inode *inode,
1124 * There may be some unwritten disk at the end of a part-written 1132 * There may be some unwritten disk at the end of a part-written
1125 * fs-block-sized block. Go zero that now. 1133 * fs-block-sized block. Go zero that now.
1126 */ 1134 */
1127 dio_zero_block(dio, sdio, 1); 1135 dio_zero_block(dio, sdio, 1, &map_bh);
1128 1136
1129 if (sdio->cur_page) { 1137 if (sdio->cur_page) {
1130 ret2 = dio_send_cur_page(dio, sdio); 1138 ret2 = dio_send_cur_page(dio, sdio, &map_bh);
1131 if (ret == 0) 1139 if (ret == 0)
1132 ret = ret2; 1140 ret = ret2;
1133 page_cache_release(sdio->cur_page); 1141 page_cache_release(sdio->cur_page);