aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--fs/btrfs/extent_io.c65
1 files changed, 57 insertions, 8 deletions
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index fa19f2f68c1b..11aa8f743b90 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -2805,7 +2805,8 @@ static int submit_extent_page(int rw, struct extent_io_tree *tree,
2805 bio_end_io_t end_io_func, 2805 bio_end_io_t end_io_func,
2806 int mirror_num, 2806 int mirror_num,
2807 unsigned long prev_bio_flags, 2807 unsigned long prev_bio_flags,
2808 unsigned long bio_flags) 2808 unsigned long bio_flags,
2809 bool force_bio_submit)
2809{ 2810{
2810 int ret = 0; 2811 int ret = 0;
2811 struct bio *bio; 2812 struct bio *bio;
@@ -2823,6 +2824,7 @@ static int submit_extent_page(int rw, struct extent_io_tree *tree,
2823 contig = bio_end_sector(bio) == sector; 2824 contig = bio_end_sector(bio) == sector;
2824 2825
2825 if (prev_bio_flags != bio_flags || !contig || 2826 if (prev_bio_flags != bio_flags || !contig ||
2827 force_bio_submit ||
2826 merge_bio(rw, tree, page, offset, page_size, bio, bio_flags) || 2828 merge_bio(rw, tree, page, offset, page_size, bio, bio_flags) ||
2827 bio_add_page(bio, page, page_size, offset) < page_size) { 2829 bio_add_page(bio, page, page_size, offset) < page_size) {
2828 ret = submit_one_bio(rw, bio, mirror_num, 2830 ret = submit_one_bio(rw, bio, mirror_num,
@@ -2922,7 +2924,8 @@ static int __do_readpage(struct extent_io_tree *tree,
2922 get_extent_t *get_extent, 2924 get_extent_t *get_extent,
2923 struct extent_map **em_cached, 2925 struct extent_map **em_cached,
2924 struct bio **bio, int mirror_num, 2926 struct bio **bio, int mirror_num,
2925 unsigned long *bio_flags, int rw) 2927 unsigned long *bio_flags, int rw,
2928 u64 *prev_em_start)
2926{ 2929{
2927 struct inode *inode = page->mapping->host; 2930 struct inode *inode = page->mapping->host;
2928 u64 start = page_offset(page); 2931 u64 start = page_offset(page);
@@ -2970,6 +2973,7 @@ static int __do_readpage(struct extent_io_tree *tree,
2970 } 2973 }
2971 while (cur <= end) { 2974 while (cur <= end) {
2972 unsigned long pnr = (last_byte >> PAGE_CACHE_SHIFT) + 1; 2975 unsigned long pnr = (last_byte >> PAGE_CACHE_SHIFT) + 1;
2976 bool force_bio_submit = false;
2973 2977
2974 if (cur >= last_byte) { 2978 if (cur >= last_byte) {
2975 char *userpage; 2979 char *userpage;
@@ -3020,6 +3024,49 @@ static int __do_readpage(struct extent_io_tree *tree,
3020 block_start = em->block_start; 3024 block_start = em->block_start;
3021 if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags)) 3025 if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags))
3022 block_start = EXTENT_MAP_HOLE; 3026 block_start = EXTENT_MAP_HOLE;
3027
3028 /*
3029 * If we have a file range that points to a compressed extent
3030 * and it's followed by a consecutive file range that points to
3031 * to the same compressed extent (possibly with a different
3032 * offset and/or length, so it either points to the whole extent
3033 * or only part of it), we must make sure we do not submit a
3034 * single bio to populate the pages for the 2 ranges because
3035 * this makes the compressed extent read zero out the pages
3036 * belonging to the 2nd range. Imagine the following scenario:
3037 *
3038 * File layout
3039 * [0 - 8K] [8K - 24K]
3040 * | |
3041 * | |
3042 * points to extent X, points to extent X,
3043 * offset 4K, length of 8K offset 0, length 16K
3044 *
3045 * [extent X, compressed length = 4K uncompressed length = 16K]
3046 *
3047 * If the bio to read the compressed extent covers both ranges,
3048 * it will decompress extent X into the pages belonging to the
3049 * first range and then it will stop, zeroing out the remaining
3050 * pages that belong to the other range that points to extent X.
3051 * So here we make sure we submit 2 bios, one for the first
3052 * range and another one for the third range. Both will target
3053 * the same physical extent from disk, but we can't currently
3054 * make the compressed bio endio callback populate the pages
3055 * for both ranges because each compressed bio is tightly
3056 * coupled with a single extent map, and each range can have
3057 * an extent map with a different offset value relative to the
3058 * uncompressed data of our extent and different lengths. This
3059 * is a corner case so we prioritize correctness over
3060 * non-optimal behavior (submitting 2 bios for the same extent).
3061 */
3062 if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags) &&
3063 prev_em_start && *prev_em_start != (u64)-1 &&
3064 *prev_em_start != em->orig_start)
3065 force_bio_submit = true;
3066
3067 if (prev_em_start)
3068 *prev_em_start = em->orig_start;
3069
3023 free_extent_map(em); 3070 free_extent_map(em);
3024 em = NULL; 3071 em = NULL;
3025 3072
@@ -3069,7 +3116,8 @@ static int __do_readpage(struct extent_io_tree *tree,
3069 bdev, bio, pnr, 3116 bdev, bio, pnr,
3070 end_bio_extent_readpage, mirror_num, 3117 end_bio_extent_readpage, mirror_num,
3071 *bio_flags, 3118 *bio_flags,
3072 this_bio_flag); 3119 this_bio_flag,
3120 force_bio_submit);
3073 if (!ret) { 3121 if (!ret) {
3074 nr++; 3122 nr++;
3075 *bio_flags = this_bio_flag; 3123 *bio_flags = this_bio_flag;
@@ -3101,6 +3149,7 @@ static inline void __do_contiguous_readpages(struct extent_io_tree *tree,
3101 struct inode *inode; 3149 struct inode *inode;
3102 struct btrfs_ordered_extent *ordered; 3150 struct btrfs_ordered_extent *ordered;
3103 int index; 3151 int index;
3152 u64 prev_em_start = (u64)-1;
3104 3153
3105 inode = pages[0]->mapping->host; 3154 inode = pages[0]->mapping->host;
3106 while (1) { 3155 while (1) {
@@ -3116,7 +3165,7 @@ static inline void __do_contiguous_readpages(struct extent_io_tree *tree,
3116 3165
3117 for (index = 0; index < nr_pages; index++) { 3166 for (index = 0; index < nr_pages; index++) {
3118 __do_readpage(tree, pages[index], get_extent, em_cached, bio, 3167 __do_readpage(tree, pages[index], get_extent, em_cached, bio,
3119 mirror_num, bio_flags, rw); 3168 mirror_num, bio_flags, rw, &prev_em_start);
3120 page_cache_release(pages[index]); 3169 page_cache_release(pages[index]);
3121 } 3170 }
3122} 3171}
@@ -3184,7 +3233,7 @@ static int __extent_read_full_page(struct extent_io_tree *tree,
3184 } 3233 }
3185 3234
3186 ret = __do_readpage(tree, page, get_extent, NULL, bio, mirror_num, 3235 ret = __do_readpage(tree, page, get_extent, NULL, bio, mirror_num,
3187 bio_flags, rw); 3236 bio_flags, rw, NULL);
3188 return ret; 3237 return ret;
3189} 3238}
3190 3239
@@ -3210,7 +3259,7 @@ int extent_read_full_page_nolock(struct extent_io_tree *tree, struct page *page,
3210 int ret; 3259 int ret;
3211 3260
3212 ret = __do_readpage(tree, page, get_extent, NULL, &bio, mirror_num, 3261 ret = __do_readpage(tree, page, get_extent, NULL, &bio, mirror_num,
3213 &bio_flags, READ); 3262 &bio_flags, READ, NULL);
3214 if (bio) 3263 if (bio)
3215 ret = submit_one_bio(READ, bio, mirror_num, bio_flags); 3264 ret = submit_one_bio(READ, bio, mirror_num, bio_flags);
3216 return ret; 3265 return ret;
@@ -3463,7 +3512,7 @@ static noinline_for_stack int __extent_writepage_io(struct inode *inode,
3463 sector, iosize, pg_offset, 3512 sector, iosize, pg_offset,
3464 bdev, &epd->bio, max_nr, 3513 bdev, &epd->bio, max_nr,
3465 end_bio_extent_writepage, 3514 end_bio_extent_writepage,
3466 0, 0, 0); 3515 0, 0, 0, false);
3467 if (ret) 3516 if (ret)
3468 SetPageError(page); 3517 SetPageError(page);
3469 } 3518 }
@@ -3765,7 +3814,7 @@ static noinline_for_stack int write_one_eb(struct extent_buffer *eb,
3765 ret = submit_extent_page(rw, tree, wbc, p, offset >> 9, 3814 ret = submit_extent_page(rw, tree, wbc, p, offset >> 9,
3766 PAGE_CACHE_SIZE, 0, bdev, &epd->bio, 3815 PAGE_CACHE_SIZE, 0, bdev, &epd->bio,
3767 -1, end_bio_extent_buffer_writepage, 3816 -1, end_bio_extent_buffer_writepage,
3768 0, epd->bio_flags, bio_flags); 3817 0, epd->bio_flags, bio_flags, false);
3769 epd->bio_flags = bio_flags; 3818 epd->bio_flags = bio_flags;
3770 if (ret) { 3819 if (ret) {
3771 set_btree_ioerr(p); 3820 set_btree_ioerr(p);