aboutsummaryrefslogtreecommitdiffstats
path: root/fs/btrfs/extent_io.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/btrfs/extent_io.c')
-rw-r--r--fs/btrfs/extent_io.c213
1 files changed, 167 insertions, 46 deletions
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 5e76a474cb7e..714adc4ac4c2 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -1433,12 +1433,13 @@ int extent_clear_unlock_delalloc(struct inode *inode,
1433 */ 1433 */
1434u64 count_range_bits(struct extent_io_tree *tree, 1434u64 count_range_bits(struct extent_io_tree *tree,
1435 u64 *start, u64 search_end, u64 max_bytes, 1435 u64 *start, u64 search_end, u64 max_bytes,
1436 unsigned long bits) 1436 unsigned long bits, int contig)
1437{ 1437{
1438 struct rb_node *node; 1438 struct rb_node *node;
1439 struct extent_state *state; 1439 struct extent_state *state;
1440 u64 cur_start = *start; 1440 u64 cur_start = *start;
1441 u64 total_bytes = 0; 1441 u64 total_bytes = 0;
1442 u64 last = 0;
1442 int found = 0; 1443 int found = 0;
1443 1444
1444 if (search_end <= cur_start) { 1445 if (search_end <= cur_start) {
@@ -1463,7 +1464,9 @@ u64 count_range_bits(struct extent_io_tree *tree,
1463 state = rb_entry(node, struct extent_state, rb_node); 1464 state = rb_entry(node, struct extent_state, rb_node);
1464 if (state->start > search_end) 1465 if (state->start > search_end)
1465 break; 1466 break;
1466 if (state->end >= cur_start && (state->state & bits)) { 1467 if (contig && found && state->start > last + 1)
1468 break;
1469 if (state->end >= cur_start && (state->state & bits) == bits) {
1467 total_bytes += min(search_end, state->end) + 1 - 1470 total_bytes += min(search_end, state->end) + 1 -
1468 max(cur_start, state->start); 1471 max(cur_start, state->start);
1469 if (total_bytes >= max_bytes) 1472 if (total_bytes >= max_bytes)
@@ -1472,6 +1475,9 @@ u64 count_range_bits(struct extent_io_tree *tree,
1472 *start = state->start; 1475 *start = state->start;
1473 found = 1; 1476 found = 1;
1474 } 1477 }
1478 last = state->end;
1479 } else if (contig && found) {
1480 break;
1475 } 1481 }
1476 node = rb_next(node); 1482 node = rb_next(node);
1477 if (!node) 1483 if (!node)
@@ -1946,6 +1952,7 @@ void set_page_extent_mapped(struct page *page)
1946 1952
1947static void set_page_extent_head(struct page *page, unsigned long len) 1953static void set_page_extent_head(struct page *page, unsigned long len)
1948{ 1954{
1955 WARN_ON(!PagePrivate(page));
1949 set_page_private(page, EXTENT_PAGE_PRIVATE_FIRST_PAGE | len << 2); 1956 set_page_private(page, EXTENT_PAGE_PRIVATE_FIRST_PAGE | len << 2);
1950} 1957}
1951 1958
@@ -2821,9 +2828,17 @@ int try_release_extent_state(struct extent_map_tree *map,
2821 * at this point we can safely clear everything except the 2828 * at this point we can safely clear everything except the
2822 * locked bit and the nodatasum bit 2829 * locked bit and the nodatasum bit
2823 */ 2830 */
2824 clear_extent_bit(tree, start, end, 2831 ret = clear_extent_bit(tree, start, end,
2825 ~(EXTENT_LOCKED | EXTENT_NODATASUM), 2832 ~(EXTENT_LOCKED | EXTENT_NODATASUM),
2826 0, 0, NULL, mask); 2833 0, 0, NULL, mask);
2834
2835 /* if clear_extent_bit failed for enomem reasons,
2836 * we can't allow the release to continue.
2837 */
2838 if (ret < 0)
2839 ret = 0;
2840 else
2841 ret = 1;
2827 } 2842 }
2828 return ret; 2843 return ret;
2829} 2844}
@@ -2903,6 +2918,46 @@ out:
2903 return sector; 2918 return sector;
2904} 2919}
2905 2920
2921/*
2922 * helper function for fiemap, which doesn't want to see any holes.
2923 * This maps until we find something past 'last'
2924 */
2925static struct extent_map *get_extent_skip_holes(struct inode *inode,
2926 u64 offset,
2927 u64 last,
2928 get_extent_t *get_extent)
2929{
2930 u64 sectorsize = BTRFS_I(inode)->root->sectorsize;
2931 struct extent_map *em;
2932 u64 len;
2933
2934 if (offset >= last)
2935 return NULL;
2936
2937 while(1) {
2938 len = last - offset;
2939 if (len == 0)
2940 break;
2941 len = (len + sectorsize - 1) & ~(sectorsize - 1);
2942 em = get_extent(inode, NULL, 0, offset, len, 0);
2943 if (!em || IS_ERR(em))
2944 return em;
2945
2946 /* if this isn't a hole return it */
2947 if (!test_bit(EXTENT_FLAG_VACANCY, &em->flags) &&
2948 em->block_start != EXTENT_MAP_HOLE) {
2949 return em;
2950 }
2951
2952 /* this is a hole, advance to the next extent */
2953 offset = extent_map_end(em);
2954 free_extent_map(em);
2955 if (offset >= last)
2956 break;
2957 }
2958 return NULL;
2959}
2960
2906int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, 2961int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
2907 __u64 start, __u64 len, get_extent_t *get_extent) 2962 __u64 start, __u64 len, get_extent_t *get_extent)
2908{ 2963{
@@ -2912,16 +2967,19 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
2912 u32 flags = 0; 2967 u32 flags = 0;
2913 u32 found_type; 2968 u32 found_type;
2914 u64 last; 2969 u64 last;
2970 u64 last_for_get_extent = 0;
2915 u64 disko = 0; 2971 u64 disko = 0;
2972 u64 isize = i_size_read(inode);
2916 struct btrfs_key found_key; 2973 struct btrfs_key found_key;
2917 struct extent_map *em = NULL; 2974 struct extent_map *em = NULL;
2918 struct extent_state *cached_state = NULL; 2975 struct extent_state *cached_state = NULL;
2919 struct btrfs_path *path; 2976 struct btrfs_path *path;
2920 struct btrfs_file_extent_item *item; 2977 struct btrfs_file_extent_item *item;
2921 int end = 0; 2978 int end = 0;
2922 u64 em_start = 0, em_len = 0; 2979 u64 em_start = 0;
2980 u64 em_len = 0;
2981 u64 em_end = 0;
2923 unsigned long emflags; 2982 unsigned long emflags;
2924 int hole = 0;
2925 2983
2926 if (len == 0) 2984 if (len == 0)
2927 return -EINVAL; 2985 return -EINVAL;
@@ -2931,6 +2989,10 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
2931 return -ENOMEM; 2989 return -ENOMEM;
2932 path->leave_spinning = 1; 2990 path->leave_spinning = 1;
2933 2991
2992 /*
2993 * lookup the last file extent. We're not using i_size here
2994 * because there might be preallocation past i_size
2995 */
2934 ret = btrfs_lookup_file_extent(NULL, BTRFS_I(inode)->root, 2996 ret = btrfs_lookup_file_extent(NULL, BTRFS_I(inode)->root,
2935 path, inode->i_ino, -1, 0); 2997 path, inode->i_ino, -1, 0);
2936 if (ret < 0) { 2998 if (ret < 0) {
@@ -2944,18 +3006,38 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
2944 btrfs_item_key_to_cpu(path->nodes[0], &found_key, path->slots[0]); 3006 btrfs_item_key_to_cpu(path->nodes[0], &found_key, path->slots[0]);
2945 found_type = btrfs_key_type(&found_key); 3007 found_type = btrfs_key_type(&found_key);
2946 3008
2947 /* No extents, just return */ 3009 /* No extents, but there might be delalloc bits */
2948 if (found_key.objectid != inode->i_ino || 3010 if (found_key.objectid != inode->i_ino ||
2949 found_type != BTRFS_EXTENT_DATA_KEY) { 3011 found_type != BTRFS_EXTENT_DATA_KEY) {
2950 btrfs_free_path(path); 3012 /* have to trust i_size as the end */
2951 return 0; 3013 last = (u64)-1;
3014 last_for_get_extent = isize;
3015 } else {
3016 /*
3017 * remember the start of the last extent. There are a
3018 * bunch of different factors that go into the length of the
3019 * extent, so its much less complex to remember where it started
3020 */
3021 last = found_key.offset;
3022 last_for_get_extent = last + 1;
2952 } 3023 }
2953 last = found_key.offset;
2954 btrfs_free_path(path); 3024 btrfs_free_path(path);
2955 3025
3026 /*
3027 * we might have some extents allocated but more delalloc past those
3028 * extents. so, we trust isize unless the start of the last extent is
3029 * beyond isize
3030 */
3031 if (last < isize) {
3032 last = (u64)-1;
3033 last_for_get_extent = isize;
3034 }
3035
2956 lock_extent_bits(&BTRFS_I(inode)->io_tree, start, start + len, 0, 3036 lock_extent_bits(&BTRFS_I(inode)->io_tree, start, start + len, 0,
2957 &cached_state, GFP_NOFS); 3037 &cached_state, GFP_NOFS);
2958 em = get_extent(inode, NULL, 0, off, max - off, 0); 3038
3039 em = get_extent_skip_holes(inode, off, last_for_get_extent,
3040 get_extent);
2959 if (!em) 3041 if (!em)
2960 goto out; 3042 goto out;
2961 if (IS_ERR(em)) { 3043 if (IS_ERR(em)) {
@@ -2964,22 +3046,38 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
2964 } 3046 }
2965 3047
2966 while (!end) { 3048 while (!end) {
2967 hole = 0; 3049 u64 offset_in_extent;
2968 off = em->start + em->len;
2969 if (off >= max)
2970 end = 1;
2971 3050
2972 if (em->block_start == EXTENT_MAP_HOLE) { 3051 /* break if the extent we found is outside the range */
2973 hole = 1; 3052 if (em->start >= max || extent_map_end(em) < off)
2974 goto next; 3053 break;
2975 }
2976 3054
2977 em_start = em->start; 3055 /*
2978 em_len = em->len; 3056 * get_extent may return an extent that starts before our
3057 * requested range. We have to make sure the ranges
3058 * we return to fiemap always move forward and don't
3059 * overlap, so adjust the offsets here
3060 */
3061 em_start = max(em->start, off);
2979 3062
3063 /*
3064 * record the offset from the start of the extent
3065 * for adjusting the disk offset below
3066 */
3067 offset_in_extent = em_start - em->start;
3068 em_end = extent_map_end(em);
3069 em_len = em_end - em_start;
3070 emflags = em->flags;
2980 disko = 0; 3071 disko = 0;
2981 flags = 0; 3072 flags = 0;
2982 3073
3074 /*
3075 * bump off for our next call to get_extent
3076 */
3077 off = extent_map_end(em);
3078 if (off >= max)
3079 end = 1;
3080
2983 if (em->block_start == EXTENT_MAP_LAST_BYTE) { 3081 if (em->block_start == EXTENT_MAP_LAST_BYTE) {
2984 end = 1; 3082 end = 1;
2985 flags |= FIEMAP_EXTENT_LAST; 3083 flags |= FIEMAP_EXTENT_LAST;
@@ -2990,42 +3088,34 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
2990 flags |= (FIEMAP_EXTENT_DELALLOC | 3088 flags |= (FIEMAP_EXTENT_DELALLOC |
2991 FIEMAP_EXTENT_UNKNOWN); 3089 FIEMAP_EXTENT_UNKNOWN);
2992 } else { 3090 } else {
2993 disko = em->block_start; 3091 disko = em->block_start + offset_in_extent;
2994 } 3092 }
2995 if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) 3093 if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags))
2996 flags |= FIEMAP_EXTENT_ENCODED; 3094 flags |= FIEMAP_EXTENT_ENCODED;
2997 3095
2998next:
2999 emflags = em->flags;
3000 free_extent_map(em); 3096 free_extent_map(em);
3001 em = NULL; 3097 em = NULL;
3002 if (!end) { 3098 if ((em_start >= last) || em_len == (u64)-1 ||
3003 em = get_extent(inode, NULL, 0, off, max - off, 0); 3099 (last == (u64)-1 && isize <= em_end)) {
3004 if (!em)
3005 goto out;
3006 if (IS_ERR(em)) {
3007 ret = PTR_ERR(em);
3008 goto out;
3009 }
3010 emflags = em->flags;
3011 }
3012
3013 if (test_bit(EXTENT_FLAG_VACANCY, &emflags)) {
3014 flags |= FIEMAP_EXTENT_LAST; 3100 flags |= FIEMAP_EXTENT_LAST;
3015 end = 1; 3101 end = 1;
3016 } 3102 }
3017 3103
3018 if (em_start == last) { 3104 /* now scan forward to see if this is really the last extent. */
3105 em = get_extent_skip_holes(inode, off, last_for_get_extent,
3106 get_extent);
3107 if (IS_ERR(em)) {
3108 ret = PTR_ERR(em);
3109 goto out;
3110 }
3111 if (!em) {
3019 flags |= FIEMAP_EXTENT_LAST; 3112 flags |= FIEMAP_EXTENT_LAST;
3020 end = 1; 3113 end = 1;
3021 } 3114 }
3022 3115 ret = fiemap_fill_next_extent(fieinfo, em_start, disko,
3023 if (!hole) { 3116 em_len, flags);
3024 ret = fiemap_fill_next_extent(fieinfo, em_start, disko, 3117 if (ret)
3025 em_len, flags); 3118 goto out_free;
3026 if (ret)
3027 goto out_free;
3028 }
3029 } 3119 }
3030out_free: 3120out_free:
3031 free_extent_map(em); 3121 free_extent_map(em);
@@ -3194,7 +3284,13 @@ struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree,
3194 } 3284 }
3195 if (!PageUptodate(p)) 3285 if (!PageUptodate(p))
3196 uptodate = 0; 3286 uptodate = 0;
3197 unlock_page(p); 3287
3288 /*
3289 * see below about how we avoid a nasty race with release page
3290 * and why we unlock later
3291 */
3292 if (i != 0)
3293 unlock_page(p);
3198 } 3294 }
3199 if (uptodate) 3295 if (uptodate)
3200 set_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags); 3296 set_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags);
@@ -3218,9 +3314,26 @@ struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree,
3218 atomic_inc(&eb->refs); 3314 atomic_inc(&eb->refs);
3219 spin_unlock(&tree->buffer_lock); 3315 spin_unlock(&tree->buffer_lock);
3220 radix_tree_preload_end(); 3316 radix_tree_preload_end();
3317
3318 /*
3319 * there is a race where release page may have
3320 * tried to find this extent buffer in the radix
3321 * but failed. It will tell the VM it is safe to
3322 * reclaim the, and it will clear the page private bit.
3323 * We must make sure to set the page private bit properly
3324 * after the extent buffer is in the radix tree so
3325 * it doesn't get lost
3326 */
3327 set_page_extent_mapped(eb->first_page);
3328 set_page_extent_head(eb->first_page, eb->len);
3329 if (!page0)
3330 unlock_page(eb->first_page);
3221 return eb; 3331 return eb;
3222 3332
3223free_eb: 3333free_eb:
3334 if (eb->first_page && !page0)
3335 unlock_page(eb->first_page);
3336
3224 if (!atomic_dec_and_test(&eb->refs)) 3337 if (!atomic_dec_and_test(&eb->refs))
3225 return exists; 3338 return exists;
3226 btrfs_release_extent_buffer(eb); 3339 btrfs_release_extent_buffer(eb);
@@ -3271,10 +3384,11 @@ int clear_extent_buffer_dirty(struct extent_io_tree *tree,
3271 continue; 3384 continue;
3272 3385
3273 lock_page(page); 3386 lock_page(page);
3387 WARN_ON(!PagePrivate(page));
3388
3389 set_page_extent_mapped(page);
3274 if (i == 0) 3390 if (i == 0)
3275 set_page_extent_head(page, eb->len); 3391 set_page_extent_head(page, eb->len);
3276 else
3277 set_page_private(page, EXTENT_PAGE_PRIVATE);
3278 3392
3279 clear_page_dirty_for_io(page); 3393 clear_page_dirty_for_io(page);
3280 spin_lock_irq(&page->mapping->tree_lock); 3394 spin_lock_irq(&page->mapping->tree_lock);
@@ -3464,6 +3578,13 @@ int read_extent_buffer_pages(struct extent_io_tree *tree,
3464 3578
3465 for (i = start_i; i < num_pages; i++) { 3579 for (i = start_i; i < num_pages; i++) {
3466 page = extent_buffer_page(eb, i); 3580 page = extent_buffer_page(eb, i);
3581
3582 WARN_ON(!PagePrivate(page));
3583
3584 set_page_extent_mapped(page);
3585 if (i == 0)
3586 set_page_extent_head(page, eb->len);
3587
3467 if (inc_all_pages) 3588 if (inc_all_pages)
3468 page_cache_get(page); 3589 page_cache_get(page);
3469 if (!PageUptodate(page)) { 3590 if (!PageUptodate(page)) {