diff options
Diffstat (limited to 'fs/btrfs/extent_io.c')
-rw-r--r-- | fs/btrfs/extent_io.c | 80 |
1 files changed, 66 insertions, 14 deletions
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index f1018cfbfefa..3915c9473e94 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c | |||
@@ -2798,7 +2798,8 @@ static int submit_extent_page(int rw, struct extent_io_tree *tree, | |||
2798 | bio_end_io_t end_io_func, | 2798 | bio_end_io_t end_io_func, |
2799 | int mirror_num, | 2799 | int mirror_num, |
2800 | unsigned long prev_bio_flags, | 2800 | unsigned long prev_bio_flags, |
2801 | unsigned long bio_flags) | 2801 | unsigned long bio_flags, |
2802 | bool force_bio_submit) | ||
2802 | { | 2803 | { |
2803 | int ret = 0; | 2804 | int ret = 0; |
2804 | struct bio *bio; | 2805 | struct bio *bio; |
@@ -2814,6 +2815,7 @@ static int submit_extent_page(int rw, struct extent_io_tree *tree, | |||
2814 | contig = bio_end_sector(bio) == sector; | 2815 | contig = bio_end_sector(bio) == sector; |
2815 | 2816 | ||
2816 | if (prev_bio_flags != bio_flags || !contig || | 2817 | if (prev_bio_flags != bio_flags || !contig || |
2818 | force_bio_submit || | ||
2817 | merge_bio(rw, tree, page, offset, page_size, bio, bio_flags) || | 2819 | merge_bio(rw, tree, page, offset, page_size, bio, bio_flags) || |
2818 | bio_add_page(bio, page, page_size, offset) < page_size) { | 2820 | bio_add_page(bio, page, page_size, offset) < page_size) { |
2819 | ret = submit_one_bio(rw, bio, mirror_num, | 2821 | ret = submit_one_bio(rw, bio, mirror_num, |
@@ -2910,7 +2912,8 @@ static int __do_readpage(struct extent_io_tree *tree, | |||
2910 | get_extent_t *get_extent, | 2912 | get_extent_t *get_extent, |
2911 | struct extent_map **em_cached, | 2913 | struct extent_map **em_cached, |
2912 | struct bio **bio, int mirror_num, | 2914 | struct bio **bio, int mirror_num, |
2913 | unsigned long *bio_flags, int rw) | 2915 | unsigned long *bio_flags, int rw, |
2916 | u64 *prev_em_start) | ||
2914 | { | 2917 | { |
2915 | struct inode *inode = page->mapping->host; | 2918 | struct inode *inode = page->mapping->host; |
2916 | u64 start = page_offset(page); | 2919 | u64 start = page_offset(page); |
@@ -2958,6 +2961,7 @@ static int __do_readpage(struct extent_io_tree *tree, | |||
2958 | } | 2961 | } |
2959 | while (cur <= end) { | 2962 | while (cur <= end) { |
2960 | unsigned long pnr = (last_byte >> PAGE_CACHE_SHIFT) + 1; | 2963 | unsigned long pnr = (last_byte >> PAGE_CACHE_SHIFT) + 1; |
2964 | bool force_bio_submit = false; | ||
2961 | 2965 | ||
2962 | if (cur >= last_byte) { | 2966 | if (cur >= last_byte) { |
2963 | char *userpage; | 2967 | char *userpage; |
@@ -3008,6 +3012,49 @@ static int __do_readpage(struct extent_io_tree *tree, | |||
3008 | block_start = em->block_start; | 3012 | block_start = em->block_start; |
3009 | if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags)) | 3013 | if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags)) |
3010 | block_start = EXTENT_MAP_HOLE; | 3014 | block_start = EXTENT_MAP_HOLE; |
3015 | |||
3016 | /* | ||
3017 | * If we have a file range that points to a compressed extent | ||
3018 | * and it's followed by a consecutive file range that points to | ||
3019 | * to the same compressed extent (possibly with a different | ||
3020 | * offset and/or length, so it either points to the whole extent | ||
3021 | * or only part of it), we must make sure we do not submit a | ||
3022 | * single bio to populate the pages for the 2 ranges because | ||
3023 | * this makes the compressed extent read zero out the pages | ||
3024 | * belonging to the 2nd range. Imagine the following scenario: | ||
3025 | * | ||
3026 | * File layout | ||
3027 | * [0 - 8K] [8K - 24K] | ||
3028 | * | | | ||
3029 | * | | | ||
3030 | * points to extent X, points to extent X, | ||
3031 | * offset 4K, length of 8K offset 0, length 16K | ||
3032 | * | ||
3033 | * [extent X, compressed length = 4K uncompressed length = 16K] | ||
3034 | * | ||
3035 | * If the bio to read the compressed extent covers both ranges, | ||
3036 | * it will decompress extent X into the pages belonging to the | ||
3037 | * first range and then it will stop, zeroing out the remaining | ||
3038 | * pages that belong to the other range that points to extent X. | ||
3039 | * So here we make sure we submit 2 bios, one for the first | ||
3040 | * range and another one for the third range. Both will target | ||
3041 | * the same physical extent from disk, but we can't currently | ||
3042 | * make the compressed bio endio callback populate the pages | ||
3043 | * for both ranges because each compressed bio is tightly | ||
3044 | * coupled with a single extent map, and each range can have | ||
3045 | * an extent map with a different offset value relative to the | ||
3046 | * uncompressed data of our extent and different lengths. This | ||
3047 | * is a corner case so we prioritize correctness over | ||
3048 | * non-optimal behavior (submitting 2 bios for the same extent). | ||
3049 | */ | ||
3050 | if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags) && | ||
3051 | prev_em_start && *prev_em_start != (u64)-1 && | ||
3052 | *prev_em_start != em->orig_start) | ||
3053 | force_bio_submit = true; | ||
3054 | |||
3055 | if (prev_em_start) | ||
3056 | *prev_em_start = em->orig_start; | ||
3057 | |||
3011 | free_extent_map(em); | 3058 | free_extent_map(em); |
3012 | em = NULL; | 3059 | em = NULL; |
3013 | 3060 | ||
@@ -3057,7 +3104,8 @@ static int __do_readpage(struct extent_io_tree *tree, | |||
3057 | bdev, bio, pnr, | 3104 | bdev, bio, pnr, |
3058 | end_bio_extent_readpage, mirror_num, | 3105 | end_bio_extent_readpage, mirror_num, |
3059 | *bio_flags, | 3106 | *bio_flags, |
3060 | this_bio_flag); | 3107 | this_bio_flag, |
3108 | force_bio_submit); | ||
3061 | if (!ret) { | 3109 | if (!ret) { |
3062 | nr++; | 3110 | nr++; |
3063 | *bio_flags = this_bio_flag; | 3111 | *bio_flags = this_bio_flag; |
@@ -3084,7 +3132,8 @@ static inline void __do_contiguous_readpages(struct extent_io_tree *tree, | |||
3084 | get_extent_t *get_extent, | 3132 | get_extent_t *get_extent, |
3085 | struct extent_map **em_cached, | 3133 | struct extent_map **em_cached, |
3086 | struct bio **bio, int mirror_num, | 3134 | struct bio **bio, int mirror_num, |
3087 | unsigned long *bio_flags, int rw) | 3135 | unsigned long *bio_flags, int rw, |
3136 | u64 *prev_em_start) | ||
3088 | { | 3137 | { |
3089 | struct inode *inode; | 3138 | struct inode *inode; |
3090 | struct btrfs_ordered_extent *ordered; | 3139 | struct btrfs_ordered_extent *ordered; |
@@ -3104,7 +3153,7 @@ static inline void __do_contiguous_readpages(struct extent_io_tree *tree, | |||
3104 | 3153 | ||
3105 | for (index = 0; index < nr_pages; index++) { | 3154 | for (index = 0; index < nr_pages; index++) { |
3106 | __do_readpage(tree, pages[index], get_extent, em_cached, bio, | 3155 | __do_readpage(tree, pages[index], get_extent, em_cached, bio, |
3107 | mirror_num, bio_flags, rw); | 3156 | mirror_num, bio_flags, rw, prev_em_start); |
3108 | page_cache_release(pages[index]); | 3157 | page_cache_release(pages[index]); |
3109 | } | 3158 | } |
3110 | } | 3159 | } |
@@ -3114,7 +3163,8 @@ static void __extent_readpages(struct extent_io_tree *tree, | |||
3114 | int nr_pages, get_extent_t *get_extent, | 3163 | int nr_pages, get_extent_t *get_extent, |
3115 | struct extent_map **em_cached, | 3164 | struct extent_map **em_cached, |
3116 | struct bio **bio, int mirror_num, | 3165 | struct bio **bio, int mirror_num, |
3117 | unsigned long *bio_flags, int rw) | 3166 | unsigned long *bio_flags, int rw, |
3167 | u64 *prev_em_start) | ||
3118 | { | 3168 | { |
3119 | u64 start = 0; | 3169 | u64 start = 0; |
3120 | u64 end = 0; | 3170 | u64 end = 0; |
@@ -3135,7 +3185,7 @@ static void __extent_readpages(struct extent_io_tree *tree, | |||
3135 | index - first_index, start, | 3185 | index - first_index, start, |
3136 | end, get_extent, em_cached, | 3186 | end, get_extent, em_cached, |
3137 | bio, mirror_num, bio_flags, | 3187 | bio, mirror_num, bio_flags, |
3138 | rw); | 3188 | rw, prev_em_start); |
3139 | start = page_start; | 3189 | start = page_start; |
3140 | end = start + PAGE_CACHE_SIZE - 1; | 3190 | end = start + PAGE_CACHE_SIZE - 1; |
3141 | first_index = index; | 3191 | first_index = index; |
@@ -3146,7 +3196,8 @@ static void __extent_readpages(struct extent_io_tree *tree, | |||
3146 | __do_contiguous_readpages(tree, &pages[first_index], | 3196 | __do_contiguous_readpages(tree, &pages[first_index], |
3147 | index - first_index, start, | 3197 | index - first_index, start, |
3148 | end, get_extent, em_cached, bio, | 3198 | end, get_extent, em_cached, bio, |
3149 | mirror_num, bio_flags, rw); | 3199 | mirror_num, bio_flags, rw, |
3200 | prev_em_start); | ||
3150 | } | 3201 | } |
3151 | 3202 | ||
3152 | static int __extent_read_full_page(struct extent_io_tree *tree, | 3203 | static int __extent_read_full_page(struct extent_io_tree *tree, |
@@ -3172,7 +3223,7 @@ static int __extent_read_full_page(struct extent_io_tree *tree, | |||
3172 | } | 3223 | } |
3173 | 3224 | ||
3174 | ret = __do_readpage(tree, page, get_extent, NULL, bio, mirror_num, | 3225 | ret = __do_readpage(tree, page, get_extent, NULL, bio, mirror_num, |
3175 | bio_flags, rw); | 3226 | bio_flags, rw, NULL); |
3176 | return ret; | 3227 | return ret; |
3177 | } | 3228 | } |
3178 | 3229 | ||
@@ -3198,7 +3249,7 @@ int extent_read_full_page_nolock(struct extent_io_tree *tree, struct page *page, | |||
3198 | int ret; | 3249 | int ret; |
3199 | 3250 | ||
3200 | ret = __do_readpage(tree, page, get_extent, NULL, &bio, mirror_num, | 3251 | ret = __do_readpage(tree, page, get_extent, NULL, &bio, mirror_num, |
3201 | &bio_flags, READ); | 3252 | &bio_flags, READ, NULL); |
3202 | if (bio) | 3253 | if (bio) |
3203 | ret = submit_one_bio(READ, bio, mirror_num, bio_flags); | 3254 | ret = submit_one_bio(READ, bio, mirror_num, bio_flags); |
3204 | return ret; | 3255 | return ret; |
@@ -3451,7 +3502,7 @@ static noinline_for_stack int __extent_writepage_io(struct inode *inode, | |||
3451 | sector, iosize, pg_offset, | 3502 | sector, iosize, pg_offset, |
3452 | bdev, &epd->bio, max_nr, | 3503 | bdev, &epd->bio, max_nr, |
3453 | end_bio_extent_writepage, | 3504 | end_bio_extent_writepage, |
3454 | 0, 0, 0); | 3505 | 0, 0, 0, false); |
3455 | if (ret) | 3506 | if (ret) |
3456 | SetPageError(page); | 3507 | SetPageError(page); |
3457 | } | 3508 | } |
@@ -3754,7 +3805,7 @@ static noinline_for_stack int write_one_eb(struct extent_buffer *eb, | |||
3754 | ret = submit_extent_page(rw, tree, wbc, p, offset >> 9, | 3805 | ret = submit_extent_page(rw, tree, wbc, p, offset >> 9, |
3755 | PAGE_CACHE_SIZE, 0, bdev, &epd->bio, | 3806 | PAGE_CACHE_SIZE, 0, bdev, &epd->bio, |
3756 | -1, end_bio_extent_buffer_writepage, | 3807 | -1, end_bio_extent_buffer_writepage, |
3757 | 0, epd->bio_flags, bio_flags); | 3808 | 0, epd->bio_flags, bio_flags, false); |
3758 | epd->bio_flags = bio_flags; | 3809 | epd->bio_flags = bio_flags; |
3759 | if (ret) { | 3810 | if (ret) { |
3760 | set_btree_ioerr(p); | 3811 | set_btree_ioerr(p); |
@@ -4158,6 +4209,7 @@ int extent_readpages(struct extent_io_tree *tree, | |||
4158 | struct page *page; | 4209 | struct page *page; |
4159 | struct extent_map *em_cached = NULL; | 4210 | struct extent_map *em_cached = NULL; |
4160 | int nr = 0; | 4211 | int nr = 0; |
4212 | u64 prev_em_start = (u64)-1; | ||
4161 | 4213 | ||
4162 | for (page_idx = 0; page_idx < nr_pages; page_idx++) { | 4214 | for (page_idx = 0; page_idx < nr_pages; page_idx++) { |
4163 | page = list_entry(pages->prev, struct page, lru); | 4215 | page = list_entry(pages->prev, struct page, lru); |
@@ -4174,12 +4226,12 @@ int extent_readpages(struct extent_io_tree *tree, | |||
4174 | if (nr < ARRAY_SIZE(pagepool)) | 4226 | if (nr < ARRAY_SIZE(pagepool)) |
4175 | continue; | 4227 | continue; |
4176 | __extent_readpages(tree, pagepool, nr, get_extent, &em_cached, | 4228 | __extent_readpages(tree, pagepool, nr, get_extent, &em_cached, |
4177 | &bio, 0, &bio_flags, READ); | 4229 | &bio, 0, &bio_flags, READ, &prev_em_start); |
4178 | nr = 0; | 4230 | nr = 0; |
4179 | } | 4231 | } |
4180 | if (nr) | 4232 | if (nr) |
4181 | __extent_readpages(tree, pagepool, nr, get_extent, &em_cached, | 4233 | __extent_readpages(tree, pagepool, nr, get_extent, &em_cached, |
4182 | &bio, 0, &bio_flags, READ); | 4234 | &bio, 0, &bio_flags, READ, &prev_em_start); |
4183 | 4235 | ||
4184 | if (em_cached) | 4236 | if (em_cached) |
4185 | free_extent_map(em_cached); | 4237 | free_extent_map(em_cached); |