diff options
-rw-r--r-- | fs/btrfs/extent_io.c | 65 |
1 files changed, 57 insertions, 8 deletions
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index fa19f2f68c1b..11aa8f743b90 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c | |||
@@ -2805,7 +2805,8 @@ static int submit_extent_page(int rw, struct extent_io_tree *tree, | |||
2805 | bio_end_io_t end_io_func, | 2805 | bio_end_io_t end_io_func, |
2806 | int mirror_num, | 2806 | int mirror_num, |
2807 | unsigned long prev_bio_flags, | 2807 | unsigned long prev_bio_flags, |
2808 | unsigned long bio_flags) | 2808 | unsigned long bio_flags, |
2809 | bool force_bio_submit) | ||
2809 | { | 2810 | { |
2810 | int ret = 0; | 2811 | int ret = 0; |
2811 | struct bio *bio; | 2812 | struct bio *bio; |
@@ -2823,6 +2824,7 @@ static int submit_extent_page(int rw, struct extent_io_tree *tree, | |||
2823 | contig = bio_end_sector(bio) == sector; | 2824 | contig = bio_end_sector(bio) == sector; |
2824 | 2825 | ||
2825 | if (prev_bio_flags != bio_flags || !contig || | 2826 | if (prev_bio_flags != bio_flags || !contig || |
2827 | force_bio_submit || | ||
2826 | merge_bio(rw, tree, page, offset, page_size, bio, bio_flags) || | 2828 | merge_bio(rw, tree, page, offset, page_size, bio, bio_flags) || |
2827 | bio_add_page(bio, page, page_size, offset) < page_size) { | 2829 | bio_add_page(bio, page, page_size, offset) < page_size) { |
2828 | ret = submit_one_bio(rw, bio, mirror_num, | 2830 | ret = submit_one_bio(rw, bio, mirror_num, |
@@ -2922,7 +2924,8 @@ static int __do_readpage(struct extent_io_tree *tree, | |||
2922 | get_extent_t *get_extent, | 2924 | get_extent_t *get_extent, |
2923 | struct extent_map **em_cached, | 2925 | struct extent_map **em_cached, |
2924 | struct bio **bio, int mirror_num, | 2926 | struct bio **bio, int mirror_num, |
2925 | unsigned long *bio_flags, int rw) | 2927 | unsigned long *bio_flags, int rw, |
2928 | u64 *prev_em_start) | ||
2926 | { | 2929 | { |
2927 | struct inode *inode = page->mapping->host; | 2930 | struct inode *inode = page->mapping->host; |
2928 | u64 start = page_offset(page); | 2931 | u64 start = page_offset(page); |
@@ -2970,6 +2973,7 @@ static int __do_readpage(struct extent_io_tree *tree, | |||
2970 | } | 2973 | } |
2971 | while (cur <= end) { | 2974 | while (cur <= end) { |
2972 | unsigned long pnr = (last_byte >> PAGE_CACHE_SHIFT) + 1; | 2975 | unsigned long pnr = (last_byte >> PAGE_CACHE_SHIFT) + 1; |
2976 | bool force_bio_submit = false; | ||
2973 | 2977 | ||
2974 | if (cur >= last_byte) { | 2978 | if (cur >= last_byte) { |
2975 | char *userpage; | 2979 | char *userpage; |
@@ -3020,6 +3024,49 @@ static int __do_readpage(struct extent_io_tree *tree, | |||
3020 | block_start = em->block_start; | 3024 | block_start = em->block_start; |
3021 | if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags)) | 3025 | if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags)) |
3022 | block_start = EXTENT_MAP_HOLE; | 3026 | block_start = EXTENT_MAP_HOLE; |
3027 | |||
3028 | /* | ||
3029 | * If we have a file range that points to a compressed extent | ||
3030 | * and it's followed by a consecutive file range that points to | ||
3031 | * to the same compressed extent (possibly with a different | ||
3032 | * offset and/or length, so it either points to the whole extent | ||
3033 | * or only part of it), we must make sure we do not submit a | ||
3034 | * single bio to populate the pages for the 2 ranges because | ||
3035 | * this makes the compressed extent read zero out the pages | ||
3036 | * belonging to the 2nd range. Imagine the following scenario: | ||
3037 | * | ||
3038 | * File layout | ||
3039 | * [0 - 8K] [8K - 24K] | ||
3040 | * | | | ||
3041 | * | | | ||
3042 | * points to extent X, points to extent X, | ||
3043 | * offset 4K, length of 8K offset 0, length 16K | ||
3044 | * | ||
3045 | * [extent X, compressed length = 4K uncompressed length = 16K] | ||
3046 | * | ||
3047 | * If the bio to read the compressed extent covers both ranges, | ||
3048 | * it will decompress extent X into the pages belonging to the | ||
3049 | * first range and then it will stop, zeroing out the remaining | ||
3050 | * pages that belong to the other range that points to extent X. | ||
3051 | * So here we make sure we submit 2 bios, one for the first | ||
3052 | * range and another one for the third range. Both will target | ||
3053 | * the same physical extent from disk, but we can't currently | ||
3054 | * make the compressed bio endio callback populate the pages | ||
3055 | * for both ranges because each compressed bio is tightly | ||
3056 | * coupled with a single extent map, and each range can have | ||
3057 | * an extent map with a different offset value relative to the | ||
3058 | * uncompressed data of our extent and different lengths. This | ||
3059 | * is a corner case so we prioritize correctness over | ||
3060 | * non-optimal behavior (submitting 2 bios for the same extent). | ||
3061 | */ | ||
3062 | if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags) && | ||
3063 | prev_em_start && *prev_em_start != (u64)-1 && | ||
3064 | *prev_em_start != em->orig_start) | ||
3065 | force_bio_submit = true; | ||
3066 | |||
3067 | if (prev_em_start) | ||
3068 | *prev_em_start = em->orig_start; | ||
3069 | |||
3023 | free_extent_map(em); | 3070 | free_extent_map(em); |
3024 | em = NULL; | 3071 | em = NULL; |
3025 | 3072 | ||
@@ -3069,7 +3116,8 @@ static int __do_readpage(struct extent_io_tree *tree, | |||
3069 | bdev, bio, pnr, | 3116 | bdev, bio, pnr, |
3070 | end_bio_extent_readpage, mirror_num, | 3117 | end_bio_extent_readpage, mirror_num, |
3071 | *bio_flags, | 3118 | *bio_flags, |
3072 | this_bio_flag); | 3119 | this_bio_flag, |
3120 | force_bio_submit); | ||
3073 | if (!ret) { | 3121 | if (!ret) { |
3074 | nr++; | 3122 | nr++; |
3075 | *bio_flags = this_bio_flag; | 3123 | *bio_flags = this_bio_flag; |
@@ -3101,6 +3149,7 @@ static inline void __do_contiguous_readpages(struct extent_io_tree *tree, | |||
3101 | struct inode *inode; | 3149 | struct inode *inode; |
3102 | struct btrfs_ordered_extent *ordered; | 3150 | struct btrfs_ordered_extent *ordered; |
3103 | int index; | 3151 | int index; |
3152 | u64 prev_em_start = (u64)-1; | ||
3104 | 3153 | ||
3105 | inode = pages[0]->mapping->host; | 3154 | inode = pages[0]->mapping->host; |
3106 | while (1) { | 3155 | while (1) { |
@@ -3116,7 +3165,7 @@ static inline void __do_contiguous_readpages(struct extent_io_tree *tree, | |||
3116 | 3165 | ||
3117 | for (index = 0; index < nr_pages; index++) { | 3166 | for (index = 0; index < nr_pages; index++) { |
3118 | __do_readpage(tree, pages[index], get_extent, em_cached, bio, | 3167 | __do_readpage(tree, pages[index], get_extent, em_cached, bio, |
3119 | mirror_num, bio_flags, rw); | 3168 | mirror_num, bio_flags, rw, &prev_em_start); |
3120 | page_cache_release(pages[index]); | 3169 | page_cache_release(pages[index]); |
3121 | } | 3170 | } |
3122 | } | 3171 | } |
@@ -3184,7 +3233,7 @@ static int __extent_read_full_page(struct extent_io_tree *tree, | |||
3184 | } | 3233 | } |
3185 | 3234 | ||
3186 | ret = __do_readpage(tree, page, get_extent, NULL, bio, mirror_num, | 3235 | ret = __do_readpage(tree, page, get_extent, NULL, bio, mirror_num, |
3187 | bio_flags, rw); | 3236 | bio_flags, rw, NULL); |
3188 | return ret; | 3237 | return ret; |
3189 | } | 3238 | } |
3190 | 3239 | ||
@@ -3210,7 +3259,7 @@ int extent_read_full_page_nolock(struct extent_io_tree *tree, struct page *page, | |||
3210 | int ret; | 3259 | int ret; |
3211 | 3260 | ||
3212 | ret = __do_readpage(tree, page, get_extent, NULL, &bio, mirror_num, | 3261 | ret = __do_readpage(tree, page, get_extent, NULL, &bio, mirror_num, |
3213 | &bio_flags, READ); | 3262 | &bio_flags, READ, NULL); |
3214 | if (bio) | 3263 | if (bio) |
3215 | ret = submit_one_bio(READ, bio, mirror_num, bio_flags); | 3264 | ret = submit_one_bio(READ, bio, mirror_num, bio_flags); |
3216 | return ret; | 3265 | return ret; |
@@ -3463,7 +3512,7 @@ static noinline_for_stack int __extent_writepage_io(struct inode *inode, | |||
3463 | sector, iosize, pg_offset, | 3512 | sector, iosize, pg_offset, |
3464 | bdev, &epd->bio, max_nr, | 3513 | bdev, &epd->bio, max_nr, |
3465 | end_bio_extent_writepage, | 3514 | end_bio_extent_writepage, |
3466 | 0, 0, 0); | 3515 | 0, 0, 0, false); |
3467 | if (ret) | 3516 | if (ret) |
3468 | SetPageError(page); | 3517 | SetPageError(page); |
3469 | } | 3518 | } |
@@ -3765,7 +3814,7 @@ static noinline_for_stack int write_one_eb(struct extent_buffer *eb, | |||
3765 | ret = submit_extent_page(rw, tree, wbc, p, offset >> 9, | 3814 | ret = submit_extent_page(rw, tree, wbc, p, offset >> 9, |
3766 | PAGE_CACHE_SIZE, 0, bdev, &epd->bio, | 3815 | PAGE_CACHE_SIZE, 0, bdev, &epd->bio, |
3767 | -1, end_bio_extent_buffer_writepage, | 3816 | -1, end_bio_extent_buffer_writepage, |
3768 | 0, epd->bio_flags, bio_flags); | 3817 | 0, epd->bio_flags, bio_flags, false); |
3769 | epd->bio_flags = bio_flags; | 3818 | epd->bio_flags = bio_flags; |
3770 | if (ret) { | 3819 | if (ret) { |
3771 | set_btree_ioerr(p); | 3820 | set_btree_ioerr(p); |