aboutsummaryrefslogtreecommitdiffstats
path: root/fs/btrfs/extent_io.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/btrfs/extent_io.c')
-rw-r--r--fs/btrfs/extent_io.c192
1 files changed, 147 insertions, 45 deletions
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 2e993cf1766e..fd3f172e94e6 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -1433,12 +1433,13 @@ int extent_clear_unlock_delalloc(struct inode *inode,
1433 */ 1433 */
1434u64 count_range_bits(struct extent_io_tree *tree, 1434u64 count_range_bits(struct extent_io_tree *tree,
1435 u64 *start, u64 search_end, u64 max_bytes, 1435 u64 *start, u64 search_end, u64 max_bytes,
1436 unsigned long bits) 1436 unsigned long bits, int contig)
1437{ 1437{
1438 struct rb_node *node; 1438 struct rb_node *node;
1439 struct extent_state *state; 1439 struct extent_state *state;
1440 u64 cur_start = *start; 1440 u64 cur_start = *start;
1441 u64 total_bytes = 0; 1441 u64 total_bytes = 0;
1442 u64 last = 0;
1442 int found = 0; 1443 int found = 0;
1443 1444
1444 if (search_end <= cur_start) { 1445 if (search_end <= cur_start) {
@@ -1463,7 +1464,9 @@ u64 count_range_bits(struct extent_io_tree *tree,
1463 state = rb_entry(node, struct extent_state, rb_node); 1464 state = rb_entry(node, struct extent_state, rb_node);
1464 if (state->start > search_end) 1465 if (state->start > search_end)
1465 break; 1466 break;
1466 if (state->end >= cur_start && (state->state & bits)) { 1467 if (contig && found && state->start > last + 1)
1468 break;
1469 if (state->end >= cur_start && (state->state & bits) == bits) {
1467 total_bytes += min(search_end, state->end) + 1 - 1470 total_bytes += min(search_end, state->end) + 1 -
1468 max(cur_start, state->start); 1471 max(cur_start, state->start);
1469 if (total_bytes >= max_bytes) 1472 if (total_bytes >= max_bytes)
@@ -1472,6 +1475,9 @@ u64 count_range_bits(struct extent_io_tree *tree,
1472 *start = state->start; 1475 *start = state->start;
1473 found = 1; 1476 found = 1;
1474 } 1477 }
1478 last = state->end;
1479 } else if (contig && found) {
1480 break;
1475 } 1481 }
1476 node = rb_next(node); 1482 node = rb_next(node);
1477 if (!node) 1483 if (!node)
@@ -1865,7 +1871,7 @@ static int submit_one_bio(int rw, struct bio *bio, int mirror_num,
1865 bio_get(bio); 1871 bio_get(bio);
1866 1872
1867 if (tree->ops && tree->ops->submit_bio_hook) 1873 if (tree->ops && tree->ops->submit_bio_hook)
1868 tree->ops->submit_bio_hook(page->mapping->host, rw, bio, 1874 ret = tree->ops->submit_bio_hook(page->mapping->host, rw, bio,
1869 mirror_num, bio_flags, start); 1875 mirror_num, bio_flags, start);
1870 else 1876 else
1871 submit_bio(rw, bio); 1877 submit_bio(rw, bio);
@@ -1920,6 +1926,8 @@ static int submit_extent_page(int rw, struct extent_io_tree *tree,
1920 nr = bio_get_nr_vecs(bdev); 1926 nr = bio_get_nr_vecs(bdev);
1921 1927
1922 bio = btrfs_bio_alloc(bdev, sector, nr, GFP_NOFS | __GFP_HIGH); 1928 bio = btrfs_bio_alloc(bdev, sector, nr, GFP_NOFS | __GFP_HIGH);
1929 if (!bio)
1930 return -ENOMEM;
1923 1931
1924 bio_add_page(bio, page, page_size, offset); 1932 bio_add_page(bio, page, page_size, offset);
1925 bio->bi_end_io = end_io_func; 1933 bio->bi_end_io = end_io_func;
@@ -1944,6 +1952,7 @@ void set_page_extent_mapped(struct page *page)
1944 1952
1945static void set_page_extent_head(struct page *page, unsigned long len) 1953static void set_page_extent_head(struct page *page, unsigned long len)
1946{ 1954{
1955 WARN_ON(!PagePrivate(page));
1947 set_page_private(page, EXTENT_PAGE_PRIVATE_FIRST_PAGE | len << 2); 1956 set_page_private(page, EXTENT_PAGE_PRIVATE_FIRST_PAGE | len << 2);
1948} 1957}
1949 1958
@@ -2126,7 +2135,7 @@ int extent_read_full_page(struct extent_io_tree *tree, struct page *page,
2126 ret = __extent_read_full_page(tree, page, get_extent, &bio, 0, 2135 ret = __extent_read_full_page(tree, page, get_extent, &bio, 0,
2127 &bio_flags); 2136 &bio_flags);
2128 if (bio) 2137 if (bio)
2129 submit_one_bio(READ, bio, 0, bio_flags); 2138 ret = submit_one_bio(READ, bio, 0, bio_flags);
2130 return ret; 2139 return ret;
2131} 2140}
2132 2141
@@ -2819,9 +2828,17 @@ int try_release_extent_state(struct extent_map_tree *map,
2819 * at this point we can safely clear everything except the 2828 * at this point we can safely clear everything except the
2820 * locked bit and the nodatasum bit 2829 * locked bit and the nodatasum bit
2821 */ 2830 */
2822 clear_extent_bit(tree, start, end, 2831 ret = clear_extent_bit(tree, start, end,
2823 ~(EXTENT_LOCKED | EXTENT_NODATASUM), 2832 ~(EXTENT_LOCKED | EXTENT_NODATASUM),
2824 0, 0, NULL, mask); 2833 0, 0, NULL, mask);
2834
2835 /* if clear_extent_bit failed for enomem reasons,
2836 * we can't allow the release to continue.
2837 */
2838 if (ret < 0)
2839 ret = 0;
2840 else
2841 ret = 1;
2825 } 2842 }
2826 return ret; 2843 return ret;
2827} 2844}
@@ -2901,6 +2918,46 @@ out:
2901 return sector; 2918 return sector;
2902} 2919}
2903 2920
2921/*
2922 * helper function for fiemap, which doesn't want to see any holes.
2923 * This maps until we find something past 'last'
2924 */
2925static struct extent_map *get_extent_skip_holes(struct inode *inode,
2926 u64 offset,
2927 u64 last,
2928 get_extent_t *get_extent)
2929{
2930 u64 sectorsize = BTRFS_I(inode)->root->sectorsize;
2931 struct extent_map *em;
2932 u64 len;
2933
2934 if (offset >= last)
2935 return NULL;
2936
2937 while(1) {
2938 len = last - offset;
2939 if (len == 0)
2940 break;
2941 len = (len + sectorsize - 1) & ~(sectorsize - 1);
2942 em = get_extent(inode, NULL, 0, offset, len, 0);
2943 if (!em || IS_ERR(em))
2944 return em;
2945
2946 /* if this isn't a hole return it */
2947 if (!test_bit(EXTENT_FLAG_VACANCY, &em->flags) &&
2948 em->block_start != EXTENT_MAP_HOLE) {
2949 return em;
2950 }
2951
2952 /* this is a hole, advance to the next extent */
2953 offset = extent_map_end(em);
2954 free_extent_map(em);
2955 if (offset >= last)
2956 break;
2957 }
2958 return NULL;
2959}
2960
2904int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, 2961int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
2905 __u64 start, __u64 len, get_extent_t *get_extent) 2962 __u64 start, __u64 len, get_extent_t *get_extent)
2906{ 2963{
@@ -2910,16 +2967,19 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
2910 u32 flags = 0; 2967 u32 flags = 0;
2911 u32 found_type; 2968 u32 found_type;
2912 u64 last; 2969 u64 last;
2970 u64 last_for_get_extent = 0;
2913 u64 disko = 0; 2971 u64 disko = 0;
2972 u64 isize = i_size_read(inode);
2914 struct btrfs_key found_key; 2973 struct btrfs_key found_key;
2915 struct extent_map *em = NULL; 2974 struct extent_map *em = NULL;
2916 struct extent_state *cached_state = NULL; 2975 struct extent_state *cached_state = NULL;
2917 struct btrfs_path *path; 2976 struct btrfs_path *path;
2918 struct btrfs_file_extent_item *item; 2977 struct btrfs_file_extent_item *item;
2919 int end = 0; 2978 int end = 0;
2920 u64 em_start = 0, em_len = 0; 2979 u64 em_start = 0;
2980 u64 em_len = 0;
2981 u64 em_end = 0;
2921 unsigned long emflags; 2982 unsigned long emflags;
2922 int hole = 0;
2923 2983
2924 if (len == 0) 2984 if (len == 0)
2925 return -EINVAL; 2985 return -EINVAL;
@@ -2929,6 +2989,10 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
2929 return -ENOMEM; 2989 return -ENOMEM;
2930 path->leave_spinning = 1; 2990 path->leave_spinning = 1;
2931 2991
2992 /*
2993 * lookup the last file extent. We're not using i_size here
2994 * because there might be preallocation past i_size
2995 */
2932 ret = btrfs_lookup_file_extent(NULL, BTRFS_I(inode)->root, 2996 ret = btrfs_lookup_file_extent(NULL, BTRFS_I(inode)->root,
2933 path, inode->i_ino, -1, 0); 2997 path, inode->i_ino, -1, 0);
2934 if (ret < 0) { 2998 if (ret < 0) {
@@ -2942,18 +3006,38 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
2942 btrfs_item_key_to_cpu(path->nodes[0], &found_key, path->slots[0]); 3006 btrfs_item_key_to_cpu(path->nodes[0], &found_key, path->slots[0]);
2943 found_type = btrfs_key_type(&found_key); 3007 found_type = btrfs_key_type(&found_key);
2944 3008
2945 /* No extents, just return */ 3009 /* No extents, but there might be delalloc bits */
2946 if (found_key.objectid != inode->i_ino || 3010 if (found_key.objectid != inode->i_ino ||
2947 found_type != BTRFS_EXTENT_DATA_KEY) { 3011 found_type != BTRFS_EXTENT_DATA_KEY) {
2948 btrfs_free_path(path); 3012 /* have to trust i_size as the end */
2949 return 0; 3013 last = (u64)-1;
3014 last_for_get_extent = isize;
3015 } else {
3016 /*
3017 * remember the start of the last extent. There are a
3018 * bunch of different factors that go into the length of the
3019 * extent, so its much less complex to remember where it started
3020 */
3021 last = found_key.offset;
3022 last_for_get_extent = last + 1;
2950 } 3023 }
2951 last = found_key.offset;
2952 btrfs_free_path(path); 3024 btrfs_free_path(path);
2953 3025
3026 /*
3027 * we might have some extents allocated but more delalloc past those
3028 * extents. so, we trust isize unless the start of the last extent is
3029 * beyond isize
3030 */
3031 if (last < isize) {
3032 last = (u64)-1;
3033 last_for_get_extent = isize;
3034 }
3035
2954 lock_extent_bits(&BTRFS_I(inode)->io_tree, start, start + len, 0, 3036 lock_extent_bits(&BTRFS_I(inode)->io_tree, start, start + len, 0,
2955 &cached_state, GFP_NOFS); 3037 &cached_state, GFP_NOFS);
2956 em = get_extent(inode, NULL, 0, off, max - off, 0); 3038
3039 em = get_extent_skip_holes(inode, off, last_for_get_extent,
3040 get_extent);
2957 if (!em) 3041 if (!em)
2958 goto out; 3042 goto out;
2959 if (IS_ERR(em)) { 3043 if (IS_ERR(em)) {
@@ -2962,19 +3046,14 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
2962 } 3046 }
2963 3047
2964 while (!end) { 3048 while (!end) {
2965 hole = 0; 3049 off = extent_map_end(em);
2966 off = em->start + em->len;
2967 if (off >= max) 3050 if (off >= max)
2968 end = 1; 3051 end = 1;
2969 3052
2970 if (em->block_start == EXTENT_MAP_HOLE) {
2971 hole = 1;
2972 goto next;
2973 }
2974
2975 em_start = em->start; 3053 em_start = em->start;
2976 em_len = em->len; 3054 em_len = em->len;
2977 3055 em_end = extent_map_end(em);
3056 emflags = em->flags;
2978 disko = 0; 3057 disko = 0;
2979 flags = 0; 3058 flags = 0;
2980 3059
@@ -2993,37 +3072,29 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
2993 if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) 3072 if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags))
2994 flags |= FIEMAP_EXTENT_ENCODED; 3073 flags |= FIEMAP_EXTENT_ENCODED;
2995 3074
2996next:
2997 emflags = em->flags;
2998 free_extent_map(em); 3075 free_extent_map(em);
2999 em = NULL; 3076 em = NULL;
3000 if (!end) { 3077 if ((em_start >= last) || em_len == (u64)-1 ||
3001 em = get_extent(inode, NULL, 0, off, max - off, 0); 3078 (last == (u64)-1 && isize <= em_end)) {
3002 if (!em)
3003 goto out;
3004 if (IS_ERR(em)) {
3005 ret = PTR_ERR(em);
3006 goto out;
3007 }
3008 emflags = em->flags;
3009 }
3010
3011 if (test_bit(EXTENT_FLAG_VACANCY, &emflags)) {
3012 flags |= FIEMAP_EXTENT_LAST; 3079 flags |= FIEMAP_EXTENT_LAST;
3013 end = 1; 3080 end = 1;
3014 } 3081 }
3015 3082
3016 if (em_start == last) { 3083 /* now scan forward to see if this is really the last extent. */
3084 em = get_extent_skip_holes(inode, off, last_for_get_extent,
3085 get_extent);
3086 if (IS_ERR(em)) {
3087 ret = PTR_ERR(em);
3088 goto out;
3089 }
3090 if (!em) {
3017 flags |= FIEMAP_EXTENT_LAST; 3091 flags |= FIEMAP_EXTENT_LAST;
3018 end = 1; 3092 end = 1;
3019 } 3093 }
3020 3094 ret = fiemap_fill_next_extent(fieinfo, em_start, disko,
3021 if (!hole) { 3095 em_len, flags);
3022 ret = fiemap_fill_next_extent(fieinfo, em_start, disko, 3096 if (ret)
3023 em_len, flags); 3097 goto out_free;
3024 if (ret)
3025 goto out_free;
3026 }
3027 } 3098 }
3028out_free: 3099out_free:
3029 free_extent_map(em); 3100 free_extent_map(em);
@@ -3192,7 +3263,13 @@ struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree,
3192 } 3263 }
3193 if (!PageUptodate(p)) 3264 if (!PageUptodate(p))
3194 uptodate = 0; 3265 uptodate = 0;
3195 unlock_page(p); 3266
3267 /*
3268 * see below about how we avoid a nasty race with release page
3269 * and why we unlock later
3270 */
3271 if (i != 0)
3272 unlock_page(p);
3196 } 3273 }
3197 if (uptodate) 3274 if (uptodate)
3198 set_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags); 3275 set_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags);
@@ -3216,9 +3293,26 @@ struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree,
3216 atomic_inc(&eb->refs); 3293 atomic_inc(&eb->refs);
3217 spin_unlock(&tree->buffer_lock); 3294 spin_unlock(&tree->buffer_lock);
3218 radix_tree_preload_end(); 3295 radix_tree_preload_end();
3296
3297 /*
3298 * there is a race where release page may have
3299 * tried to find this extent buffer in the radix
3300 * but failed. It will tell the VM it is safe to
3301 * reclaim the, and it will clear the page private bit.
3302 * We must make sure to set the page private bit properly
3303 * after the extent buffer is in the radix tree so
3304 * it doesn't get lost
3305 */
3306 set_page_extent_mapped(eb->first_page);
3307 set_page_extent_head(eb->first_page, eb->len);
3308 if (!page0)
3309 unlock_page(eb->first_page);
3219 return eb; 3310 return eb;
3220 3311
3221free_eb: 3312free_eb:
3313 if (eb->first_page && !page0)
3314 unlock_page(eb->first_page);
3315
3222 if (!atomic_dec_and_test(&eb->refs)) 3316 if (!atomic_dec_and_test(&eb->refs))
3223 return exists; 3317 return exists;
3224 btrfs_release_extent_buffer(eb); 3318 btrfs_release_extent_buffer(eb);
@@ -3269,10 +3363,11 @@ int clear_extent_buffer_dirty(struct extent_io_tree *tree,
3269 continue; 3363 continue;
3270 3364
3271 lock_page(page); 3365 lock_page(page);
3366 WARN_ON(!PagePrivate(page));
3367
3368 set_page_extent_mapped(page);
3272 if (i == 0) 3369 if (i == 0)
3273 set_page_extent_head(page, eb->len); 3370 set_page_extent_head(page, eb->len);
3274 else
3275 set_page_private(page, EXTENT_PAGE_PRIVATE);
3276 3371
3277 clear_page_dirty_for_io(page); 3372 clear_page_dirty_for_io(page);
3278 spin_lock_irq(&page->mapping->tree_lock); 3373 spin_lock_irq(&page->mapping->tree_lock);
@@ -3462,6 +3557,13 @@ int read_extent_buffer_pages(struct extent_io_tree *tree,
3462 3557
3463 for (i = start_i; i < num_pages; i++) { 3558 for (i = start_i; i < num_pages; i++) {
3464 page = extent_buffer_page(eb, i); 3559 page = extent_buffer_page(eb, i);
3560
3561 WARN_ON(!PagePrivate(page));
3562
3563 set_page_extent_mapped(page);
3564 if (i == 0)
3565 set_page_extent_head(page, eb->len);
3566
3465 if (inc_all_pages) 3567 if (inc_all_pages)
3466 page_cache_get(page); 3568 page_cache_get(page);
3467 if (!PageUptodate(page)) { 3569 if (!PageUptodate(page)) {