diff options
author | Andi Kleen <ak@linux.intel.com> | 2011-08-02 00:38:07 -0400 |
---|---|---|
committer | root <root@serles.lst.de> | 2011-10-28 08:58:57 -0400 |
commit | 18772641dbe2c89c6122c603f81f6a9574aee556 (patch) | |
tree | 54a8b229f2385615df9f9921f743ac64184a3a6f | |
parent | 6e8267f532a17165ab551ac5fdafcba5333dcca5 (diff) |
direct-io: separate map_bh from dio
Only a single b_private field in the map_bh buffer head is needed after
the submission path. Move map_bh separately to avoid storing
this information in the long term slab.
This avoids the weird 104 byte hole in struct dio_submit which also needed
to be memseted early.
Signed-off-by: Andi Kleen <ak@linux.intel.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
-rw-r--r-- | fs/direct-io.c | 66 |
1 files changed, 37 insertions, 29 deletions
diff --git a/fs/direct-io.c b/fs/direct-io.c index 6bb04409e725..edf3174afd6a 100644 --- a/fs/direct-io.c +++ b/fs/direct-io.c | |||
@@ -119,6 +119,7 @@ struct dio { | |||
119 | loff_t i_size; /* i_size when submitted */ | 119 | loff_t i_size; /* i_size when submitted */ |
120 | dio_iodone_t *end_io; /* IO completion function */ | 120 | dio_iodone_t *end_io; /* IO completion function */ |
121 | 121 | ||
122 | void *private; /* copy from map_bh.b_private */ | ||
122 | 123 | ||
123 | /* BIO completion state */ | 124 | /* BIO completion state */ |
124 | spinlock_t bio_lock; /* protects BIO fields below */ | 125 | spinlock_t bio_lock; /* protects BIO fields below */ |
@@ -133,7 +134,6 @@ struct dio { | |||
133 | struct kiocb *iocb; /* kiocb */ | 134 | struct kiocb *iocb; /* kiocb */ |
134 | ssize_t result; /* IO result */ | 135 | ssize_t result; /* IO result */ |
135 | 136 | ||
136 | struct buffer_head map_bh; /* last get_block() result */ | ||
137 | /* | 137 | /* |
138 | * pages[] (and any fields placed after it) are not zeroed out at | 138 | * pages[] (and any fields placed after it) are not zeroed out at |
139 | * allocation time. Don't add new fields after pages[] unless you | 139 | * allocation time. Don't add new fields after pages[] unless you |
@@ -301,7 +301,7 @@ static ssize_t dio_complete(struct dio *dio, loff_t offset, ssize_t ret, bool is | |||
301 | 301 | ||
302 | if (dio->end_io && dio->result) { | 302 | if (dio->end_io && dio->result) { |
303 | dio->end_io(dio->iocb, offset, transferred, | 303 | dio->end_io(dio->iocb, offset, transferred, |
304 | dio->map_bh.b_private, ret, is_async); | 304 | dio->private, ret, is_async); |
305 | } else { | 305 | } else { |
306 | if (is_async) | 306 | if (is_async) |
307 | aio_complete(dio->iocb, ret, 0); | 307 | aio_complete(dio->iocb, ret, 0); |
@@ -574,10 +574,10 @@ static int dio_bio_reap(struct dio *dio, struct dio_submit *sdio) | |||
574 | * buffer_mapped(). However the direct-io code will only process holes one | 574 | * buffer_mapped(). However the direct-io code will only process holes one |
575 | * block at a time - it will repeatedly call get_block() as it walks the hole. | 575 | * block at a time - it will repeatedly call get_block() as it walks the hole. |
576 | */ | 576 | */ |
577 | static int get_more_blocks(struct dio *dio, struct dio_submit *sdio) | 577 | static int get_more_blocks(struct dio *dio, struct dio_submit *sdio, |
578 | struct buffer_head *map_bh) | ||
578 | { | 579 | { |
579 | int ret; | 580 | int ret; |
580 | struct buffer_head *map_bh = &dio->map_bh; | ||
581 | sector_t fs_startblk; /* Into file, in filesystem-sized blocks */ | 581 | sector_t fs_startblk; /* Into file, in filesystem-sized blocks */ |
582 | unsigned long fs_count; /* Number of filesystem-sized blocks */ | 582 | unsigned long fs_count; /* Number of filesystem-sized blocks */ |
583 | unsigned long dio_count;/* Number of dio_block-sized blocks */ | 583 | unsigned long dio_count;/* Number of dio_block-sized blocks */ |
@@ -621,6 +621,9 @@ static int get_more_blocks(struct dio *dio, struct dio_submit *sdio) | |||
621 | 621 | ||
622 | ret = (*sdio->get_block)(dio->inode, fs_startblk, | 622 | ret = (*sdio->get_block)(dio->inode, fs_startblk, |
623 | map_bh, create); | 623 | map_bh, create); |
624 | |||
625 | /* Store for completion */ | ||
626 | dio->private = map_bh->b_private; | ||
624 | } | 627 | } |
625 | return ret; | 628 | return ret; |
626 | } | 629 | } |
@@ -629,7 +632,7 @@ static int get_more_blocks(struct dio *dio, struct dio_submit *sdio) | |||
629 | * There is no bio. Make one now. | 632 | * There is no bio. Make one now. |
630 | */ | 633 | */ |
631 | static int dio_new_bio(struct dio *dio, struct dio_submit *sdio, | 634 | static int dio_new_bio(struct dio *dio, struct dio_submit *sdio, |
632 | sector_t start_sector) | 635 | sector_t start_sector, struct buffer_head *map_bh) |
633 | { | 636 | { |
634 | sector_t sector; | 637 | sector_t sector; |
635 | int ret, nr_pages; | 638 | int ret, nr_pages; |
@@ -638,10 +641,10 @@ static int dio_new_bio(struct dio *dio, struct dio_submit *sdio, | |||
638 | if (ret) | 641 | if (ret) |
639 | goto out; | 642 | goto out; |
640 | sector = start_sector << (sdio->blkbits - 9); | 643 | sector = start_sector << (sdio->blkbits - 9); |
641 | nr_pages = min(sdio->pages_in_io, bio_get_nr_vecs(dio->map_bh.b_bdev)); | 644 | nr_pages = min(sdio->pages_in_io, bio_get_nr_vecs(map_bh->b_bdev)); |
642 | nr_pages = min(nr_pages, BIO_MAX_PAGES); | 645 | nr_pages = min(nr_pages, BIO_MAX_PAGES); |
643 | BUG_ON(nr_pages <= 0); | 646 | BUG_ON(nr_pages <= 0); |
644 | dio_bio_alloc(dio, sdio, dio->map_bh.b_bdev, sector, nr_pages); | 647 | dio_bio_alloc(dio, sdio, map_bh->b_bdev, sector, nr_pages); |
645 | sdio->boundary = 0; | 648 | sdio->boundary = 0; |
646 | out: | 649 | out: |
647 | return ret; | 650 | return ret; |
@@ -686,7 +689,8 @@ static int dio_bio_add_page(struct dio_submit *sdio) | |||
686 | * The caller of this function is responsible for removing cur_page from the | 689 | * The caller of this function is responsible for removing cur_page from the |
687 | * dio, and for dropping the refcount which came from that presence. | 690 | * dio, and for dropping the refcount which came from that presence. |
688 | */ | 691 | */ |
689 | static int dio_send_cur_page(struct dio *dio, struct dio_submit *sdio) | 692 | static int dio_send_cur_page(struct dio *dio, struct dio_submit *sdio, |
693 | struct buffer_head *map_bh) | ||
690 | { | 694 | { |
691 | int ret = 0; | 695 | int ret = 0; |
692 | 696 | ||
@@ -721,14 +725,14 @@ static int dio_send_cur_page(struct dio *dio, struct dio_submit *sdio) | |||
721 | } | 725 | } |
722 | 726 | ||
723 | if (sdio->bio == NULL) { | 727 | if (sdio->bio == NULL) { |
724 | ret = dio_new_bio(dio, sdio, sdio->cur_page_block); | 728 | ret = dio_new_bio(dio, sdio, sdio->cur_page_block, map_bh); |
725 | if (ret) | 729 | if (ret) |
726 | goto out; | 730 | goto out; |
727 | } | 731 | } |
728 | 732 | ||
729 | if (dio_bio_add_page(sdio) != 0) { | 733 | if (dio_bio_add_page(sdio) != 0) { |
730 | dio_bio_submit(dio, sdio); | 734 | dio_bio_submit(dio, sdio); |
731 | ret = dio_new_bio(dio, sdio, sdio->cur_page_block); | 735 | ret = dio_new_bio(dio, sdio, sdio->cur_page_block, map_bh); |
732 | if (ret == 0) { | 736 | if (ret == 0) { |
733 | ret = dio_bio_add_page(sdio); | 737 | ret = dio_bio_add_page(sdio); |
734 | BUG_ON(ret != 0); | 738 | BUG_ON(ret != 0); |
@@ -757,7 +761,8 @@ out: | |||
757 | */ | 761 | */ |
758 | static int | 762 | static int |
759 | submit_page_section(struct dio *dio, struct dio_submit *sdio, struct page *page, | 763 | submit_page_section(struct dio *dio, struct dio_submit *sdio, struct page *page, |
760 | unsigned offset, unsigned len, sector_t blocknr) | 764 | unsigned offset, unsigned len, sector_t blocknr, |
765 | struct buffer_head *map_bh) | ||
761 | { | 766 | { |
762 | int ret = 0; | 767 | int ret = 0; |
763 | 768 | ||
@@ -782,7 +787,7 @@ submit_page_section(struct dio *dio, struct dio_submit *sdio, struct page *page, | |||
782 | * avoid metadata seeks. | 787 | * avoid metadata seeks. |
783 | */ | 788 | */ |
784 | if (sdio->boundary) { | 789 | if (sdio->boundary) { |
785 | ret = dio_send_cur_page(dio, sdio); | 790 | ret = dio_send_cur_page(dio, sdio, map_bh); |
786 | page_cache_release(sdio->cur_page); | 791 | page_cache_release(sdio->cur_page); |
787 | sdio->cur_page = NULL; | 792 | sdio->cur_page = NULL; |
788 | } | 793 | } |
@@ -793,7 +798,7 @@ submit_page_section(struct dio *dio, struct dio_submit *sdio, struct page *page, | |||
793 | * If there's a deferred page already there then send it. | 798 | * If there's a deferred page already there then send it. |
794 | */ | 799 | */ |
795 | if (sdio->cur_page) { | 800 | if (sdio->cur_page) { |
796 | ret = dio_send_cur_page(dio, sdio); | 801 | ret = dio_send_cur_page(dio, sdio, map_bh); |
797 | page_cache_release(sdio->cur_page); | 802 | page_cache_release(sdio->cur_page); |
798 | sdio->cur_page = NULL; | 803 | sdio->cur_page = NULL; |
799 | if (ret) | 804 | if (ret) |
@@ -815,16 +820,16 @@ out: | |||
815 | * file blocks. Only called for S_ISREG files - blockdevs do not set | 820 | * file blocks. Only called for S_ISREG files - blockdevs do not set |
816 | * buffer_new | 821 | * buffer_new |
817 | */ | 822 | */ |
818 | static void clean_blockdev_aliases(struct dio *dio) | 823 | static void clean_blockdev_aliases(struct dio *dio, struct buffer_head *map_bh) |
819 | { | 824 | { |
820 | unsigned i; | 825 | unsigned i; |
821 | unsigned nblocks; | 826 | unsigned nblocks; |
822 | 827 | ||
823 | nblocks = dio->map_bh.b_size >> dio->inode->i_blkbits; | 828 | nblocks = map_bh->b_size >> dio->inode->i_blkbits; |
824 | 829 | ||
825 | for (i = 0; i < nblocks; i++) { | 830 | for (i = 0; i < nblocks; i++) { |
826 | unmap_underlying_metadata(dio->map_bh.b_bdev, | 831 | unmap_underlying_metadata(map_bh->b_bdev, |
827 | dio->map_bh.b_blocknr + i); | 832 | map_bh->b_blocknr + i); |
828 | } | 833 | } |
829 | } | 834 | } |
830 | 835 | ||
@@ -837,7 +842,8 @@ static void clean_blockdev_aliases(struct dio *dio) | |||
837 | * `end' is zero if we're doing the start of the IO, 1 at the end of the | 842 | * `end' is zero if we're doing the start of the IO, 1 at the end of the |
838 | * IO. | 843 | * IO. |
839 | */ | 844 | */ |
840 | static void dio_zero_block(struct dio *dio, struct dio_submit *sdio, int end) | 845 | static void dio_zero_block(struct dio *dio, struct dio_submit *sdio, int end, |
846 | struct buffer_head *map_bh) | ||
841 | { | 847 | { |
842 | unsigned dio_blocks_per_fs_block; | 848 | unsigned dio_blocks_per_fs_block; |
843 | unsigned this_chunk_blocks; /* In dio_blocks */ | 849 | unsigned this_chunk_blocks; /* In dio_blocks */ |
@@ -845,7 +851,7 @@ static void dio_zero_block(struct dio *dio, struct dio_submit *sdio, int end) | |||
845 | struct page *page; | 851 | struct page *page; |
846 | 852 | ||
847 | sdio->start_zero_done = 1; | 853 | sdio->start_zero_done = 1; |
848 | if (!sdio->blkfactor || !buffer_new(&dio->map_bh)) | 854 | if (!sdio->blkfactor || !buffer_new(map_bh)) |
849 | return; | 855 | return; |
850 | 856 | ||
851 | dio_blocks_per_fs_block = 1 << sdio->blkfactor; | 857 | dio_blocks_per_fs_block = 1 << sdio->blkfactor; |
@@ -865,7 +871,7 @@ static void dio_zero_block(struct dio *dio, struct dio_submit *sdio, int end) | |||
865 | 871 | ||
866 | page = ZERO_PAGE(0); | 872 | page = ZERO_PAGE(0); |
867 | if (submit_page_section(dio, sdio, page, 0, this_chunk_bytes, | 873 | if (submit_page_section(dio, sdio, page, 0, this_chunk_bytes, |
868 | sdio->next_block_for_io)) | 874 | sdio->next_block_for_io, map_bh)) |
869 | return; | 875 | return; |
870 | 876 | ||
871 | sdio->next_block_for_io += this_chunk_blocks; | 877 | sdio->next_block_for_io += this_chunk_blocks; |
@@ -887,13 +893,13 @@ static void dio_zero_block(struct dio *dio, struct dio_submit *sdio, int end) | |||
887 | * it should set b_size to PAGE_SIZE or more inside get_block(). This gives | 893 | * it should set b_size to PAGE_SIZE or more inside get_block(). This gives |
888 | * fine alignment but still allows this function to work in PAGE_SIZE units. | 894 | * fine alignment but still allows this function to work in PAGE_SIZE units. |
889 | */ | 895 | */ |
890 | static int do_direct_IO(struct dio *dio, struct dio_submit *sdio) | 896 | static int do_direct_IO(struct dio *dio, struct dio_submit *sdio, |
897 | struct buffer_head *map_bh) | ||
891 | { | 898 | { |
892 | const unsigned blkbits = sdio->blkbits; | 899 | const unsigned blkbits = sdio->blkbits; |
893 | const unsigned blocks_per_page = PAGE_SIZE >> blkbits; | 900 | const unsigned blocks_per_page = PAGE_SIZE >> blkbits; |
894 | struct page *page; | 901 | struct page *page; |
895 | unsigned block_in_page; | 902 | unsigned block_in_page; |
896 | struct buffer_head *map_bh = &dio->map_bh; | ||
897 | int ret = 0; | 903 | int ret = 0; |
898 | 904 | ||
899 | /* The I/O can start at any block offset within the first page */ | 905 | /* The I/O can start at any block offset within the first page */ |
@@ -919,7 +925,7 @@ static int do_direct_IO(struct dio *dio, struct dio_submit *sdio) | |||
919 | unsigned long blkmask; | 925 | unsigned long blkmask; |
920 | unsigned long dio_remainder; | 926 | unsigned long dio_remainder; |
921 | 927 | ||
922 | ret = get_more_blocks(dio, sdio); | 928 | ret = get_more_blocks(dio, sdio, map_bh); |
923 | if (ret) { | 929 | if (ret) { |
924 | page_cache_release(page); | 930 | page_cache_release(page); |
925 | goto out; | 931 | goto out; |
@@ -932,7 +938,7 @@ static int do_direct_IO(struct dio *dio, struct dio_submit *sdio) | |||
932 | sdio->next_block_for_io = | 938 | sdio->next_block_for_io = |
933 | map_bh->b_blocknr << sdio->blkfactor; | 939 | map_bh->b_blocknr << sdio->blkfactor; |
934 | if (buffer_new(map_bh)) | 940 | if (buffer_new(map_bh)) |
935 | clean_blockdev_aliases(dio); | 941 | clean_blockdev_aliases(dio, map_bh); |
936 | 942 | ||
937 | if (!sdio->blkfactor) | 943 | if (!sdio->blkfactor) |
938 | goto do_holes; | 944 | goto do_holes; |
@@ -991,7 +997,7 @@ do_holes: | |||
991 | * we must zero out the start of this block. | 997 | * we must zero out the start of this block. |
992 | */ | 998 | */ |
993 | if (unlikely(sdio->blkfactor && !sdio->start_zero_done)) | 999 | if (unlikely(sdio->blkfactor && !sdio->start_zero_done)) |
994 | dio_zero_block(dio, sdio, 0); | 1000 | dio_zero_block(dio, sdio, 0, map_bh); |
995 | 1001 | ||
996 | /* | 1002 | /* |
997 | * Work out, in this_chunk_blocks, how much disk we | 1003 | * Work out, in this_chunk_blocks, how much disk we |
@@ -1011,7 +1017,8 @@ do_holes: | |||
1011 | ret = submit_page_section(dio, sdio, page, | 1017 | ret = submit_page_section(dio, sdio, page, |
1012 | offset_in_page, | 1018 | offset_in_page, |
1013 | this_chunk_bytes, | 1019 | this_chunk_bytes, |
1014 | sdio->next_block_for_io); | 1020 | sdio->next_block_for_io, |
1021 | map_bh); | ||
1015 | if (ret) { | 1022 | if (ret) { |
1016 | page_cache_release(page); | 1023 | page_cache_release(page); |
1017 | goto out; | 1024 | goto out; |
@@ -1047,6 +1054,7 @@ direct_io_worker(int rw, struct kiocb *iocb, struct inode *inode, | |||
1047 | ssize_t ret = 0; | 1054 | ssize_t ret = 0; |
1048 | ssize_t ret2; | 1055 | ssize_t ret2; |
1049 | size_t bytes; | 1056 | size_t bytes; |
1057 | struct buffer_head map_bh = { 0, }; | ||
1050 | 1058 | ||
1051 | dio->inode = inode; | 1059 | dio->inode = inode; |
1052 | dio->rw = rw; | 1060 | dio->rw = rw; |
@@ -1101,7 +1109,7 @@ direct_io_worker(int rw, struct kiocb *iocb, struct inode *inode, | |||
1101 | sdio->total_pages += (bytes + PAGE_SIZE - 1) / PAGE_SIZE; | 1109 | sdio->total_pages += (bytes + PAGE_SIZE - 1) / PAGE_SIZE; |
1102 | sdio->curr_user_address = user_addr; | 1110 | sdio->curr_user_address = user_addr; |
1103 | 1111 | ||
1104 | ret = do_direct_IO(dio, sdio); | 1112 | ret = do_direct_IO(dio, sdio, &map_bh); |
1105 | 1113 | ||
1106 | dio->result += iov[seg].iov_len - | 1114 | dio->result += iov[seg].iov_len - |
1107 | ((sdio->final_block_in_request - sdio->block_in_file) << | 1115 | ((sdio->final_block_in_request - sdio->block_in_file) << |
@@ -1124,10 +1132,10 @@ direct_io_worker(int rw, struct kiocb *iocb, struct inode *inode, | |||
1124 | * There may be some unwritten disk at the end of a part-written | 1132 | * There may be some unwritten disk at the end of a part-written |
1125 | * fs-block-sized block. Go zero that now. | 1133 | * fs-block-sized block. Go zero that now. |
1126 | */ | 1134 | */ |
1127 | dio_zero_block(dio, sdio, 1); | 1135 | dio_zero_block(dio, sdio, 1, &map_bh); |
1128 | 1136 | ||
1129 | if (sdio->cur_page) { | 1137 | if (sdio->cur_page) { |
1130 | ret2 = dio_send_cur_page(dio, sdio); | 1138 | ret2 = dio_send_cur_page(dio, sdio, &map_bh); |
1131 | if (ret == 0) | 1139 | if (ret == 0) |
1132 | ret = ret2; | 1140 | ret = ret2; |
1133 | page_cache_release(sdio->cur_page); | 1141 | page_cache_release(sdio->cur_page); |