diff options
Diffstat (limited to 'fs/btrfs/extent_io.c')
-rw-r--r-- | fs/btrfs/extent_io.c | 213 |
1 files changed, 167 insertions, 46 deletions
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 5e76a474cb7e..714adc4ac4c2 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c | |||
@@ -1433,12 +1433,13 @@ int extent_clear_unlock_delalloc(struct inode *inode, | |||
1433 | */ | 1433 | */ |
1434 | u64 count_range_bits(struct extent_io_tree *tree, | 1434 | u64 count_range_bits(struct extent_io_tree *tree, |
1435 | u64 *start, u64 search_end, u64 max_bytes, | 1435 | u64 *start, u64 search_end, u64 max_bytes, |
1436 | unsigned long bits) | 1436 | unsigned long bits, int contig) |
1437 | { | 1437 | { |
1438 | struct rb_node *node; | 1438 | struct rb_node *node; |
1439 | struct extent_state *state; | 1439 | struct extent_state *state; |
1440 | u64 cur_start = *start; | 1440 | u64 cur_start = *start; |
1441 | u64 total_bytes = 0; | 1441 | u64 total_bytes = 0; |
1442 | u64 last = 0; | ||
1442 | int found = 0; | 1443 | int found = 0; |
1443 | 1444 | ||
1444 | if (search_end <= cur_start) { | 1445 | if (search_end <= cur_start) { |
@@ -1463,7 +1464,9 @@ u64 count_range_bits(struct extent_io_tree *tree, | |||
1463 | state = rb_entry(node, struct extent_state, rb_node); | 1464 | state = rb_entry(node, struct extent_state, rb_node); |
1464 | if (state->start > search_end) | 1465 | if (state->start > search_end) |
1465 | break; | 1466 | break; |
1466 | if (state->end >= cur_start && (state->state & bits)) { | 1467 | if (contig && found && state->start > last + 1) |
1468 | break; | ||
1469 | if (state->end >= cur_start && (state->state & bits) == bits) { | ||
1467 | total_bytes += min(search_end, state->end) + 1 - | 1470 | total_bytes += min(search_end, state->end) + 1 - |
1468 | max(cur_start, state->start); | 1471 | max(cur_start, state->start); |
1469 | if (total_bytes >= max_bytes) | 1472 | if (total_bytes >= max_bytes) |
@@ -1472,6 +1475,9 @@ u64 count_range_bits(struct extent_io_tree *tree, | |||
1472 | *start = state->start; | 1475 | *start = state->start; |
1473 | found = 1; | 1476 | found = 1; |
1474 | } | 1477 | } |
1478 | last = state->end; | ||
1479 | } else if (contig && found) { | ||
1480 | break; | ||
1475 | } | 1481 | } |
1476 | node = rb_next(node); | 1482 | node = rb_next(node); |
1477 | if (!node) | 1483 | if (!node) |
@@ -1946,6 +1952,7 @@ void set_page_extent_mapped(struct page *page) | |||
1946 | 1952 | ||
1947 | static void set_page_extent_head(struct page *page, unsigned long len) | 1953 | static void set_page_extent_head(struct page *page, unsigned long len) |
1948 | { | 1954 | { |
1955 | WARN_ON(!PagePrivate(page)); | ||
1949 | set_page_private(page, EXTENT_PAGE_PRIVATE_FIRST_PAGE | len << 2); | 1956 | set_page_private(page, EXTENT_PAGE_PRIVATE_FIRST_PAGE | len << 2); |
1950 | } | 1957 | } |
1951 | 1958 | ||
@@ -2821,9 +2828,17 @@ int try_release_extent_state(struct extent_map_tree *map, | |||
2821 | * at this point we can safely clear everything except the | 2828 | * at this point we can safely clear everything except the |
2822 | * locked bit and the nodatasum bit | 2829 | * locked bit and the nodatasum bit |
2823 | */ | 2830 | */ |
2824 | clear_extent_bit(tree, start, end, | 2831 | ret = clear_extent_bit(tree, start, end, |
2825 | ~(EXTENT_LOCKED | EXTENT_NODATASUM), | 2832 | ~(EXTENT_LOCKED | EXTENT_NODATASUM), |
2826 | 0, 0, NULL, mask); | 2833 | 0, 0, NULL, mask); |
2834 | |||
2835 | /* if clear_extent_bit failed for enomem reasons, | ||
2836 | * we can't allow the release to continue. | ||
2837 | */ | ||
2838 | if (ret < 0) | ||
2839 | ret = 0; | ||
2840 | else | ||
2841 | ret = 1; | ||
2827 | } | 2842 | } |
2828 | return ret; | 2843 | return ret; |
2829 | } | 2844 | } |
@@ -2903,6 +2918,46 @@ out: | |||
2903 | return sector; | 2918 | return sector; |
2904 | } | 2919 | } |
2905 | 2920 | ||
2921 | /* | ||
2922 | * helper function for fiemap, which doesn't want to see any holes. | ||
2923 | * This maps until we find something past 'last' | ||
2924 | */ | ||
2925 | static struct extent_map *get_extent_skip_holes(struct inode *inode, | ||
2926 | u64 offset, | ||
2927 | u64 last, | ||
2928 | get_extent_t *get_extent) | ||
2929 | { | ||
2930 | u64 sectorsize = BTRFS_I(inode)->root->sectorsize; | ||
2931 | struct extent_map *em; | ||
2932 | u64 len; | ||
2933 | |||
2934 | if (offset >= last) | ||
2935 | return NULL; | ||
2936 | |||
2937 | while(1) { | ||
2938 | len = last - offset; | ||
2939 | if (len == 0) | ||
2940 | break; | ||
2941 | len = (len + sectorsize - 1) & ~(sectorsize - 1); | ||
2942 | em = get_extent(inode, NULL, 0, offset, len, 0); | ||
2943 | if (!em || IS_ERR(em)) | ||
2944 | return em; | ||
2945 | |||
2946 | /* if this isn't a hole return it */ | ||
2947 | if (!test_bit(EXTENT_FLAG_VACANCY, &em->flags) && | ||
2948 | em->block_start != EXTENT_MAP_HOLE) { | ||
2949 | return em; | ||
2950 | } | ||
2951 | |||
2952 | /* this is a hole, advance to the next extent */ | ||
2953 | offset = extent_map_end(em); | ||
2954 | free_extent_map(em); | ||
2955 | if (offset >= last) | ||
2956 | break; | ||
2957 | } | ||
2958 | return NULL; | ||
2959 | } | ||
2960 | |||
2906 | int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, | 2961 | int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, |
2907 | __u64 start, __u64 len, get_extent_t *get_extent) | 2962 | __u64 start, __u64 len, get_extent_t *get_extent) |
2908 | { | 2963 | { |
@@ -2912,16 +2967,19 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, | |||
2912 | u32 flags = 0; | 2967 | u32 flags = 0; |
2913 | u32 found_type; | 2968 | u32 found_type; |
2914 | u64 last; | 2969 | u64 last; |
2970 | u64 last_for_get_extent = 0; | ||
2915 | u64 disko = 0; | 2971 | u64 disko = 0; |
2972 | u64 isize = i_size_read(inode); | ||
2916 | struct btrfs_key found_key; | 2973 | struct btrfs_key found_key; |
2917 | struct extent_map *em = NULL; | 2974 | struct extent_map *em = NULL; |
2918 | struct extent_state *cached_state = NULL; | 2975 | struct extent_state *cached_state = NULL; |
2919 | struct btrfs_path *path; | 2976 | struct btrfs_path *path; |
2920 | struct btrfs_file_extent_item *item; | 2977 | struct btrfs_file_extent_item *item; |
2921 | int end = 0; | 2978 | int end = 0; |
2922 | u64 em_start = 0, em_len = 0; | 2979 | u64 em_start = 0; |
2980 | u64 em_len = 0; | ||
2981 | u64 em_end = 0; | ||
2923 | unsigned long emflags; | 2982 | unsigned long emflags; |
2924 | int hole = 0; | ||
2925 | 2983 | ||
2926 | if (len == 0) | 2984 | if (len == 0) |
2927 | return -EINVAL; | 2985 | return -EINVAL; |
@@ -2931,6 +2989,10 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, | |||
2931 | return -ENOMEM; | 2989 | return -ENOMEM; |
2932 | path->leave_spinning = 1; | 2990 | path->leave_spinning = 1; |
2933 | 2991 | ||
2992 | /* | ||
2993 | * lookup the last file extent. We're not using i_size here | ||
2994 | * because there might be preallocation past i_size | ||
2995 | */ | ||
2934 | ret = btrfs_lookup_file_extent(NULL, BTRFS_I(inode)->root, | 2996 | ret = btrfs_lookup_file_extent(NULL, BTRFS_I(inode)->root, |
2935 | path, inode->i_ino, -1, 0); | 2997 | path, inode->i_ino, -1, 0); |
2936 | if (ret < 0) { | 2998 | if (ret < 0) { |
@@ -2944,18 +3006,38 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, | |||
2944 | btrfs_item_key_to_cpu(path->nodes[0], &found_key, path->slots[0]); | 3006 | btrfs_item_key_to_cpu(path->nodes[0], &found_key, path->slots[0]); |
2945 | found_type = btrfs_key_type(&found_key); | 3007 | found_type = btrfs_key_type(&found_key); |
2946 | 3008 | ||
2947 | /* No extents, just return */ | 3009 | /* No extents, but there might be delalloc bits */ |
2948 | if (found_key.objectid != inode->i_ino || | 3010 | if (found_key.objectid != inode->i_ino || |
2949 | found_type != BTRFS_EXTENT_DATA_KEY) { | 3011 | found_type != BTRFS_EXTENT_DATA_KEY) { |
2950 | btrfs_free_path(path); | 3012 | /* have to trust i_size as the end */ |
2951 | return 0; | 3013 | last = (u64)-1; |
3014 | last_for_get_extent = isize; | ||
3015 | } else { | ||
3016 | /* | ||
3017 | * remember the start of the last extent. There are a | ||
3018 | * bunch of different factors that go into the length of the | ||
3019 | * extent, so its much less complex to remember where it started | ||
3020 | */ | ||
3021 | last = found_key.offset; | ||
3022 | last_for_get_extent = last + 1; | ||
2952 | } | 3023 | } |
2953 | last = found_key.offset; | ||
2954 | btrfs_free_path(path); | 3024 | btrfs_free_path(path); |
2955 | 3025 | ||
3026 | /* | ||
3027 | * we might have some extents allocated but more delalloc past those | ||
3028 | * extents. so, we trust isize unless the start of the last extent is | ||
3029 | * beyond isize | ||
3030 | */ | ||
3031 | if (last < isize) { | ||
3032 | last = (u64)-1; | ||
3033 | last_for_get_extent = isize; | ||
3034 | } | ||
3035 | |||
2956 | lock_extent_bits(&BTRFS_I(inode)->io_tree, start, start + len, 0, | 3036 | lock_extent_bits(&BTRFS_I(inode)->io_tree, start, start + len, 0, |
2957 | &cached_state, GFP_NOFS); | 3037 | &cached_state, GFP_NOFS); |
2958 | em = get_extent(inode, NULL, 0, off, max - off, 0); | 3038 | |
3039 | em = get_extent_skip_holes(inode, off, last_for_get_extent, | ||
3040 | get_extent); | ||
2959 | if (!em) | 3041 | if (!em) |
2960 | goto out; | 3042 | goto out; |
2961 | if (IS_ERR(em)) { | 3043 | if (IS_ERR(em)) { |
@@ -2964,22 +3046,38 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, | |||
2964 | } | 3046 | } |
2965 | 3047 | ||
2966 | while (!end) { | 3048 | while (!end) { |
2967 | hole = 0; | 3049 | u64 offset_in_extent; |
2968 | off = em->start + em->len; | ||
2969 | if (off >= max) | ||
2970 | end = 1; | ||
2971 | 3050 | ||
2972 | if (em->block_start == EXTENT_MAP_HOLE) { | 3051 | /* break if the extent we found is outside the range */ |
2973 | hole = 1; | 3052 | if (em->start >= max || extent_map_end(em) < off) |
2974 | goto next; | 3053 | break; |
2975 | } | ||
2976 | 3054 | ||
2977 | em_start = em->start; | 3055 | /* |
2978 | em_len = em->len; | 3056 | * get_extent may return an extent that starts before our |
3057 | * requested range. We have to make sure the ranges | ||
3058 | * we return to fiemap always move forward and don't | ||
3059 | * overlap, so adjust the offsets here | ||
3060 | */ | ||
3061 | em_start = max(em->start, off); | ||
2979 | 3062 | ||
3063 | /* | ||
3064 | * record the offset from the start of the extent | ||
3065 | * for adjusting the disk offset below | ||
3066 | */ | ||
3067 | offset_in_extent = em_start - em->start; | ||
3068 | em_end = extent_map_end(em); | ||
3069 | em_len = em_end - em_start; | ||
3070 | emflags = em->flags; | ||
2980 | disko = 0; | 3071 | disko = 0; |
2981 | flags = 0; | 3072 | flags = 0; |
2982 | 3073 | ||
3074 | /* | ||
3075 | * bump off for our next call to get_extent | ||
3076 | */ | ||
3077 | off = extent_map_end(em); | ||
3078 | if (off >= max) | ||
3079 | end = 1; | ||
3080 | |||
2983 | if (em->block_start == EXTENT_MAP_LAST_BYTE) { | 3081 | if (em->block_start == EXTENT_MAP_LAST_BYTE) { |
2984 | end = 1; | 3082 | end = 1; |
2985 | flags |= FIEMAP_EXTENT_LAST; | 3083 | flags |= FIEMAP_EXTENT_LAST; |
@@ -2990,42 +3088,34 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, | |||
2990 | flags |= (FIEMAP_EXTENT_DELALLOC | | 3088 | flags |= (FIEMAP_EXTENT_DELALLOC | |
2991 | FIEMAP_EXTENT_UNKNOWN); | 3089 | FIEMAP_EXTENT_UNKNOWN); |
2992 | } else { | 3090 | } else { |
2993 | disko = em->block_start; | 3091 | disko = em->block_start + offset_in_extent; |
2994 | } | 3092 | } |
2995 | if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) | 3093 | if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) |
2996 | flags |= FIEMAP_EXTENT_ENCODED; | 3094 | flags |= FIEMAP_EXTENT_ENCODED; |
2997 | 3095 | ||
2998 | next: | ||
2999 | emflags = em->flags; | ||
3000 | free_extent_map(em); | 3096 | free_extent_map(em); |
3001 | em = NULL; | 3097 | em = NULL; |
3002 | if (!end) { | 3098 | if ((em_start >= last) || em_len == (u64)-1 || |
3003 | em = get_extent(inode, NULL, 0, off, max - off, 0); | 3099 | (last == (u64)-1 && isize <= em_end)) { |
3004 | if (!em) | ||
3005 | goto out; | ||
3006 | if (IS_ERR(em)) { | ||
3007 | ret = PTR_ERR(em); | ||
3008 | goto out; | ||
3009 | } | ||
3010 | emflags = em->flags; | ||
3011 | } | ||
3012 | |||
3013 | if (test_bit(EXTENT_FLAG_VACANCY, &emflags)) { | ||
3014 | flags |= FIEMAP_EXTENT_LAST; | 3100 | flags |= FIEMAP_EXTENT_LAST; |
3015 | end = 1; | 3101 | end = 1; |
3016 | } | 3102 | } |
3017 | 3103 | ||
3018 | if (em_start == last) { | 3104 | /* now scan forward to see if this is really the last extent. */ |
3105 | em = get_extent_skip_holes(inode, off, last_for_get_extent, | ||
3106 | get_extent); | ||
3107 | if (IS_ERR(em)) { | ||
3108 | ret = PTR_ERR(em); | ||
3109 | goto out; | ||
3110 | } | ||
3111 | if (!em) { | ||
3019 | flags |= FIEMAP_EXTENT_LAST; | 3112 | flags |= FIEMAP_EXTENT_LAST; |
3020 | end = 1; | 3113 | end = 1; |
3021 | } | 3114 | } |
3022 | 3115 | ret = fiemap_fill_next_extent(fieinfo, em_start, disko, | |
3023 | if (!hole) { | 3116 | em_len, flags); |
3024 | ret = fiemap_fill_next_extent(fieinfo, em_start, disko, | 3117 | if (ret) |
3025 | em_len, flags); | 3118 | goto out_free; |
3026 | if (ret) | ||
3027 | goto out_free; | ||
3028 | } | ||
3029 | } | 3119 | } |
3030 | out_free: | 3120 | out_free: |
3031 | free_extent_map(em); | 3121 | free_extent_map(em); |
@@ -3194,7 +3284,13 @@ struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree, | |||
3194 | } | 3284 | } |
3195 | if (!PageUptodate(p)) | 3285 | if (!PageUptodate(p)) |
3196 | uptodate = 0; | 3286 | uptodate = 0; |
3197 | unlock_page(p); | 3287 | |
3288 | /* | ||
3289 | * see below about how we avoid a nasty race with release page | ||
3290 | * and why we unlock later | ||
3291 | */ | ||
3292 | if (i != 0) | ||
3293 | unlock_page(p); | ||
3198 | } | 3294 | } |
3199 | if (uptodate) | 3295 | if (uptodate) |
3200 | set_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags); | 3296 | set_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags); |
@@ -3218,9 +3314,26 @@ struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree, | |||
3218 | atomic_inc(&eb->refs); | 3314 | atomic_inc(&eb->refs); |
3219 | spin_unlock(&tree->buffer_lock); | 3315 | spin_unlock(&tree->buffer_lock); |
3220 | radix_tree_preload_end(); | 3316 | radix_tree_preload_end(); |
3317 | |||
3318 | /* | ||
3319 | * there is a race where release page may have | ||
3320 | * tried to find this extent buffer in the radix | ||
3321 | * but failed. It will tell the VM it is safe to | ||
3322 | * reclaim the, and it will clear the page private bit. | ||
3323 | * We must make sure to set the page private bit properly | ||
3324 | * after the extent buffer is in the radix tree so | ||
3325 | * it doesn't get lost | ||
3326 | */ | ||
3327 | set_page_extent_mapped(eb->first_page); | ||
3328 | set_page_extent_head(eb->first_page, eb->len); | ||
3329 | if (!page0) | ||
3330 | unlock_page(eb->first_page); | ||
3221 | return eb; | 3331 | return eb; |
3222 | 3332 | ||
3223 | free_eb: | 3333 | free_eb: |
3334 | if (eb->first_page && !page0) | ||
3335 | unlock_page(eb->first_page); | ||
3336 | |||
3224 | if (!atomic_dec_and_test(&eb->refs)) | 3337 | if (!atomic_dec_and_test(&eb->refs)) |
3225 | return exists; | 3338 | return exists; |
3226 | btrfs_release_extent_buffer(eb); | 3339 | btrfs_release_extent_buffer(eb); |
@@ -3271,10 +3384,11 @@ int clear_extent_buffer_dirty(struct extent_io_tree *tree, | |||
3271 | continue; | 3384 | continue; |
3272 | 3385 | ||
3273 | lock_page(page); | 3386 | lock_page(page); |
3387 | WARN_ON(!PagePrivate(page)); | ||
3388 | |||
3389 | set_page_extent_mapped(page); | ||
3274 | if (i == 0) | 3390 | if (i == 0) |
3275 | set_page_extent_head(page, eb->len); | 3391 | set_page_extent_head(page, eb->len); |
3276 | else | ||
3277 | set_page_private(page, EXTENT_PAGE_PRIVATE); | ||
3278 | 3392 | ||
3279 | clear_page_dirty_for_io(page); | 3393 | clear_page_dirty_for_io(page); |
3280 | spin_lock_irq(&page->mapping->tree_lock); | 3394 | spin_lock_irq(&page->mapping->tree_lock); |
@@ -3464,6 +3578,13 @@ int read_extent_buffer_pages(struct extent_io_tree *tree, | |||
3464 | 3578 | ||
3465 | for (i = start_i; i < num_pages; i++) { | 3579 | for (i = start_i; i < num_pages; i++) { |
3466 | page = extent_buffer_page(eb, i); | 3580 | page = extent_buffer_page(eb, i); |
3581 | |||
3582 | WARN_ON(!PagePrivate(page)); | ||
3583 | |||
3584 | set_page_extent_mapped(page); | ||
3585 | if (i == 0) | ||
3586 | set_page_extent_head(page, eb->len); | ||
3587 | |||
3467 | if (inc_all_pages) | 3588 | if (inc_all_pages) |
3468 | page_cache_get(page); | 3589 | page_cache_get(page); |
3469 | if (!PageUptodate(page)) { | 3590 | if (!PageUptodate(page)) { |