diff options
author | Chris Mason <chris.mason@oracle.com> | 2011-02-23 16:23:20 -0500 |
---|---|---|
committer | Chris Mason <chris.mason@oracle.com> | 2011-02-23 16:23:20 -0500 |
commit | ec29ed5b407d618a8128f5942aade9e1758aa14b (patch) | |
tree | 19b3c13f15504cf9de116f0bd55045ebcf0d0cfc /fs/btrfs/extent_io.c | |
parent | fb01aa85b8b29c1a4e1f4a28ea54175de6bf7559 (diff) |
Btrfs: fix fiemap bugs with delalloc
The Btrfs fiemap code wasn't properly returning delalloc extents,
so applications that trust fiemap to decide if there are holes in the
file see holes instead of delalloc.
This reworks the btrfs fiemap code, adding a get_extent helper that
searches for delalloc ranges and also adding a helper for extent_fiemap
that skips past holes in the file.
Signed-off-by: Chris Mason <chris.mason@oracle.com>
Diffstat (limited to 'fs/btrfs/extent_io.c')
-rw-r--r-- | fs/btrfs/extent_io.c | 138 |
1 files changed, 99 insertions, 39 deletions
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index e7aeba242701..ff45b80d90f0 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c | |||
@@ -1433,12 +1433,13 @@ int extent_clear_unlock_delalloc(struct inode *inode, | |||
1433 | */ | 1433 | */ |
1434 | u64 count_range_bits(struct extent_io_tree *tree, | 1434 | u64 count_range_bits(struct extent_io_tree *tree, |
1435 | u64 *start, u64 search_end, u64 max_bytes, | 1435 | u64 *start, u64 search_end, u64 max_bytes, |
1436 | unsigned long bits) | 1436 | unsigned long bits, int contig) |
1437 | { | 1437 | { |
1438 | struct rb_node *node; | 1438 | struct rb_node *node; |
1439 | struct extent_state *state; | 1439 | struct extent_state *state; |
1440 | u64 cur_start = *start; | 1440 | u64 cur_start = *start; |
1441 | u64 total_bytes = 0; | 1441 | u64 total_bytes = 0; |
1442 | u64 last = 0; | ||
1442 | int found = 0; | 1443 | int found = 0; |
1443 | 1444 | ||
1444 | if (search_end <= cur_start) { | 1445 | if (search_end <= cur_start) { |
@@ -1463,7 +1464,9 @@ u64 count_range_bits(struct extent_io_tree *tree, | |||
1463 | state = rb_entry(node, struct extent_state, rb_node); | 1464 | state = rb_entry(node, struct extent_state, rb_node); |
1464 | if (state->start > search_end) | 1465 | if (state->start > search_end) |
1465 | break; | 1466 | break; |
1466 | if (state->end >= cur_start && (state->state & bits)) { | 1467 | if (contig && found && state->start > last + 1) |
1468 | break; | ||
1469 | if (state->end >= cur_start && (state->state & bits) == bits) { | ||
1467 | total_bytes += min(search_end, state->end) + 1 - | 1470 | total_bytes += min(search_end, state->end) + 1 - |
1468 | max(cur_start, state->start); | 1471 | max(cur_start, state->start); |
1469 | if (total_bytes >= max_bytes) | 1472 | if (total_bytes >= max_bytes) |
@@ -1472,6 +1475,9 @@ u64 count_range_bits(struct extent_io_tree *tree, | |||
1472 | *start = state->start; | 1475 | *start = state->start; |
1473 | found = 1; | 1476 | found = 1; |
1474 | } | 1477 | } |
1478 | last = state->end; | ||
1479 | } else if (contig && found) { | ||
1480 | break; | ||
1475 | } | 1481 | } |
1476 | node = rb_next(node); | 1482 | node = rb_next(node); |
1477 | if (!node) | 1483 | if (!node) |
@@ -2912,6 +2918,46 @@ out: | |||
2912 | return sector; | 2918 | return sector; |
2913 | } | 2919 | } |
2914 | 2920 | ||
2921 | /* | ||
2922 | * helper function for fiemap, which doesn't want to see any holes. | ||
2923 | * This maps until we find something past 'last' | ||
2924 | */ | ||
2925 | static struct extent_map *get_extent_skip_holes(struct inode *inode, | ||
2926 | u64 offset, | ||
2927 | u64 last, | ||
2928 | get_extent_t *get_extent) | ||
2929 | { | ||
2930 | u64 sectorsize = BTRFS_I(inode)->root->sectorsize; | ||
2931 | struct extent_map *em; | ||
2932 | u64 len; | ||
2933 | |||
2934 | if (offset >= last) | ||
2935 | return NULL; | ||
2936 | |||
2937 | while(1) { | ||
2938 | len = last - offset; | ||
2939 | if (len == 0) | ||
2940 | break; | ||
2941 | len = (len + sectorsize - 1) & ~(sectorsize - 1); | ||
2942 | em = get_extent(inode, NULL, 0, offset, len, 0); | ||
2943 | if (!em || IS_ERR(em)) | ||
2944 | return em; | ||
2945 | |||
2946 | /* if this isn't a hole return it */ | ||
2947 | if (!test_bit(EXTENT_FLAG_VACANCY, &em->flags) && | ||
2948 | em->block_start != EXTENT_MAP_HOLE) { | ||
2949 | return em; | ||
2950 | } | ||
2951 | |||
2952 | /* this is a hole, advance to the next extent */ | ||
2953 | offset = extent_map_end(em); | ||
2954 | free_extent_map(em); | ||
2955 | if (offset >= last) | ||
2956 | break; | ||
2957 | } | ||
2958 | return NULL; | ||
2959 | } | ||
2960 | |||
2915 | int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, | 2961 | int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, |
2916 | __u64 start, __u64 len, get_extent_t *get_extent) | 2962 | __u64 start, __u64 len, get_extent_t *get_extent) |
2917 | { | 2963 | { |
@@ -2921,16 +2967,19 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, | |||
2921 | u32 flags = 0; | 2967 | u32 flags = 0; |
2922 | u32 found_type; | 2968 | u32 found_type; |
2923 | u64 last; | 2969 | u64 last; |
2970 | u64 last_for_get_extent = 0; | ||
2924 | u64 disko = 0; | 2971 | u64 disko = 0; |
2972 | u64 isize = i_size_read(inode); | ||
2925 | struct btrfs_key found_key; | 2973 | struct btrfs_key found_key; |
2926 | struct extent_map *em = NULL; | 2974 | struct extent_map *em = NULL; |
2927 | struct extent_state *cached_state = NULL; | 2975 | struct extent_state *cached_state = NULL; |
2928 | struct btrfs_path *path; | 2976 | struct btrfs_path *path; |
2929 | struct btrfs_file_extent_item *item; | 2977 | struct btrfs_file_extent_item *item; |
2930 | int end = 0; | 2978 | int end = 0; |
2931 | u64 em_start = 0, em_len = 0; | 2979 | u64 em_start = 0; |
2980 | u64 em_len = 0; | ||
2981 | u64 em_end = 0; | ||
2932 | unsigned long emflags; | 2982 | unsigned long emflags; |
2933 | int hole = 0; | ||
2934 | 2983 | ||
2935 | if (len == 0) | 2984 | if (len == 0) |
2936 | return -EINVAL; | 2985 | return -EINVAL; |
@@ -2940,6 +2989,10 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, | |||
2940 | return -ENOMEM; | 2989 | return -ENOMEM; |
2941 | path->leave_spinning = 1; | 2990 | path->leave_spinning = 1; |
2942 | 2991 | ||
2992 | /* | ||
2993 | * lookup the last file extent. We're not using i_size here | ||
2994 | * because there might be preallocation past i_size | ||
2995 | */ | ||
2943 | ret = btrfs_lookup_file_extent(NULL, BTRFS_I(inode)->root, | 2996 | ret = btrfs_lookup_file_extent(NULL, BTRFS_I(inode)->root, |
2944 | path, inode->i_ino, -1, 0); | 2997 | path, inode->i_ino, -1, 0); |
2945 | if (ret < 0) { | 2998 | if (ret < 0) { |
@@ -2953,18 +3006,38 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, | |||
2953 | btrfs_item_key_to_cpu(path->nodes[0], &found_key, path->slots[0]); | 3006 | btrfs_item_key_to_cpu(path->nodes[0], &found_key, path->slots[0]); |
2954 | found_type = btrfs_key_type(&found_key); | 3007 | found_type = btrfs_key_type(&found_key); |
2955 | 3008 | ||
2956 | /* No extents, just return */ | 3009 | /* No extents, but there might be delalloc bits */ |
2957 | if (found_key.objectid != inode->i_ino || | 3010 | if (found_key.objectid != inode->i_ino || |
2958 | found_type != BTRFS_EXTENT_DATA_KEY) { | 3011 | found_type != BTRFS_EXTENT_DATA_KEY) { |
2959 | btrfs_free_path(path); | 3012 | /* have to trust i_size as the end */ |
2960 | return 0; | 3013 | last = (u64)-1; |
3014 | last_for_get_extent = isize; | ||
3015 | } else { | ||
3016 | /* | ||
3017 | * remember the start of the last extent. There are a | ||
3018 | * bunch of different factors that go into the length of the | ||
3019 | * extent, so its much less complex to remember where it started | ||
3020 | */ | ||
3021 | last = found_key.offset; | ||
3022 | last_for_get_extent = last + 1; | ||
2961 | } | 3023 | } |
2962 | last = found_key.offset; | ||
2963 | btrfs_free_path(path); | 3024 | btrfs_free_path(path); |
2964 | 3025 | ||
3026 | /* | ||
3027 | * we might have some extents allocated but more delalloc past those | ||
3028 | * extents. so, we trust isize unless the start of the last extent is | ||
3029 | * beyond isize | ||
3030 | */ | ||
3031 | if (last < isize) { | ||
3032 | last = (u64)-1; | ||
3033 | last_for_get_extent = isize; | ||
3034 | } | ||
3035 | |||
2965 | lock_extent_bits(&BTRFS_I(inode)->io_tree, start, start + len, 0, | 3036 | lock_extent_bits(&BTRFS_I(inode)->io_tree, start, start + len, 0, |
2966 | &cached_state, GFP_NOFS); | 3037 | &cached_state, GFP_NOFS); |
2967 | em = get_extent(inode, NULL, 0, off, max - off, 0); | 3038 | |
3039 | em = get_extent_skip_holes(inode, off, last_for_get_extent, | ||
3040 | get_extent); | ||
2968 | if (!em) | 3041 | if (!em) |
2969 | goto out; | 3042 | goto out; |
2970 | if (IS_ERR(em)) { | 3043 | if (IS_ERR(em)) { |
@@ -2973,19 +3046,14 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, | |||
2973 | } | 3046 | } |
2974 | 3047 | ||
2975 | while (!end) { | 3048 | while (!end) { |
2976 | hole = 0; | 3049 | off = extent_map_end(em); |
2977 | off = em->start + em->len; | ||
2978 | if (off >= max) | 3050 | if (off >= max) |
2979 | end = 1; | 3051 | end = 1; |
2980 | 3052 | ||
2981 | if (em->block_start == EXTENT_MAP_HOLE) { | ||
2982 | hole = 1; | ||
2983 | goto next; | ||
2984 | } | ||
2985 | |||
2986 | em_start = em->start; | 3053 | em_start = em->start; |
2987 | em_len = em->len; | 3054 | em_len = em->len; |
2988 | 3055 | em_end = extent_map_end(em); | |
3056 | emflags = em->flags; | ||
2989 | disko = 0; | 3057 | disko = 0; |
2990 | flags = 0; | 3058 | flags = 0; |
2991 | 3059 | ||
@@ -3004,37 +3072,29 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, | |||
3004 | if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) | 3072 | if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) |
3005 | flags |= FIEMAP_EXTENT_ENCODED; | 3073 | flags |= FIEMAP_EXTENT_ENCODED; |
3006 | 3074 | ||
3007 | next: | ||
3008 | emflags = em->flags; | ||
3009 | free_extent_map(em); | 3075 | free_extent_map(em); |
3010 | em = NULL; | 3076 | em = NULL; |
3011 | if (!end) { | 3077 | if ((em_start >= last) || em_len == (u64)-1 || |
3012 | em = get_extent(inode, NULL, 0, off, max - off, 0); | 3078 | (last == (u64)-1 && isize <= em_end)) { |
3013 | if (!em) | ||
3014 | goto out; | ||
3015 | if (IS_ERR(em)) { | ||
3016 | ret = PTR_ERR(em); | ||
3017 | goto out; | ||
3018 | } | ||
3019 | emflags = em->flags; | ||
3020 | } | ||
3021 | |||
3022 | if (test_bit(EXTENT_FLAG_VACANCY, &emflags)) { | ||
3023 | flags |= FIEMAP_EXTENT_LAST; | 3079 | flags |= FIEMAP_EXTENT_LAST; |
3024 | end = 1; | 3080 | end = 1; |
3025 | } | 3081 | } |
3026 | 3082 | ||
3027 | if (em_start == last) { | 3083 | /* now scan forward to see if this is really the last extent. */ |
3084 | em = get_extent_skip_holes(inode, off, last_for_get_extent, | ||
3085 | get_extent); | ||
3086 | if (IS_ERR(em)) { | ||
3087 | ret = PTR_ERR(em); | ||
3088 | goto out; | ||
3089 | } | ||
3090 | if (!em) { | ||
3028 | flags |= FIEMAP_EXTENT_LAST; | 3091 | flags |= FIEMAP_EXTENT_LAST; |
3029 | end = 1; | 3092 | end = 1; |
3030 | } | 3093 | } |
3031 | 3094 | ret = fiemap_fill_next_extent(fieinfo, em_start, disko, | |
3032 | if (!hole) { | 3095 | em_len, flags); |
3033 | ret = fiemap_fill_next_extent(fieinfo, em_start, disko, | 3096 | if (ret) |
3034 | em_len, flags); | 3097 | goto out_free; |
3035 | if (ret) | ||
3036 | goto out_free; | ||
3037 | } | ||
3038 | } | 3098 | } |
3039 | out_free: | 3099 | out_free: |
3040 | free_extent_map(em); | 3100 | free_extent_map(em); |