aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorChris Mason <chris.mason@oracle.com>2011-02-23 16:23:20 -0500
committerChris Mason <chris.mason@oracle.com>2011-02-23 16:23:20 -0500
commitec29ed5b407d618a8128f5942aade9e1758aa14b (patch)
tree19b3c13f15504cf9de116f0bd55045ebcf0d0cfc /fs
parentfb01aa85b8b29c1a4e1f4a28ea54175de6bf7559 (diff)
Btrfs: fix fiemap bugs with delalloc
The Btrfs fiemap code wasn't properly returning delalloc extents, so applications that trust fiemap to decide if there are holes in the file see holes instead of delalloc. This reworks the btrfs fiemap code, adding a get_extent helper that searches for delalloc ranges and also adding a helper for extent_fiemap that skips past holes in the file. Signed-off-by: Chris Mason <chris.mason@oracle.com>
Diffstat (limited to 'fs')
-rw-r--r--fs/btrfs/extent_io.c138
-rw-r--r--fs/btrfs/extent_io.h2
-rw-r--r--fs/btrfs/inode.c126
3 files changed, 224 insertions, 42 deletions
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index e7aeba242701..ff45b80d90f0 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -1433,12 +1433,13 @@ int extent_clear_unlock_delalloc(struct inode *inode,
1433 */ 1433 */
1434u64 count_range_bits(struct extent_io_tree *tree, 1434u64 count_range_bits(struct extent_io_tree *tree,
1435 u64 *start, u64 search_end, u64 max_bytes, 1435 u64 *start, u64 search_end, u64 max_bytes,
1436 unsigned long bits) 1436 unsigned long bits, int contig)
1437{ 1437{
1438 struct rb_node *node; 1438 struct rb_node *node;
1439 struct extent_state *state; 1439 struct extent_state *state;
1440 u64 cur_start = *start; 1440 u64 cur_start = *start;
1441 u64 total_bytes = 0; 1441 u64 total_bytes = 0;
1442 u64 last = 0;
1442 int found = 0; 1443 int found = 0;
1443 1444
1444 if (search_end <= cur_start) { 1445 if (search_end <= cur_start) {
@@ -1463,7 +1464,9 @@ u64 count_range_bits(struct extent_io_tree *tree,
1463 state = rb_entry(node, struct extent_state, rb_node); 1464 state = rb_entry(node, struct extent_state, rb_node);
1464 if (state->start > search_end) 1465 if (state->start > search_end)
1465 break; 1466 break;
1466 if (state->end >= cur_start && (state->state & bits)) { 1467 if (contig && found && state->start > last + 1)
1468 break;
1469 if (state->end >= cur_start && (state->state & bits) == bits) {
1467 total_bytes += min(search_end, state->end) + 1 - 1470 total_bytes += min(search_end, state->end) + 1 -
1468 max(cur_start, state->start); 1471 max(cur_start, state->start);
1469 if (total_bytes >= max_bytes) 1472 if (total_bytes >= max_bytes)
@@ -1472,6 +1475,9 @@ u64 count_range_bits(struct extent_io_tree *tree,
1472 *start = state->start; 1475 *start = state->start;
1473 found = 1; 1476 found = 1;
1474 } 1477 }
1478 last = state->end;
1479 } else if (contig && found) {
1480 break;
1475 } 1481 }
1476 node = rb_next(node); 1482 node = rb_next(node);
1477 if (!node) 1483 if (!node)
@@ -2912,6 +2918,46 @@ out:
2912 return sector; 2918 return sector;
2913} 2919}
2914 2920
2921/*
2922 * helper function for fiemap, which doesn't want to see any holes.
2923 * This maps until we find something past 'last'
2924 */
2925static struct extent_map *get_extent_skip_holes(struct inode *inode,
2926 u64 offset,
2927 u64 last,
2928 get_extent_t *get_extent)
2929{
2930 u64 sectorsize = BTRFS_I(inode)->root->sectorsize;
2931 struct extent_map *em;
2932 u64 len;
2933
2934 if (offset >= last)
2935 return NULL;
2936
2937 while(1) {
2938 len = last - offset;
2939 if (len == 0)
2940 break;
2941 len = (len + sectorsize - 1) & ~(sectorsize - 1);
2942 em = get_extent(inode, NULL, 0, offset, len, 0);
2943 if (!em || IS_ERR(em))
2944 return em;
2945
2946 /* if this isn't a hole return it */
2947 if (!test_bit(EXTENT_FLAG_VACANCY, &em->flags) &&
2948 em->block_start != EXTENT_MAP_HOLE) {
2949 return em;
2950 }
2951
2952 /* this is a hole, advance to the next extent */
2953 offset = extent_map_end(em);
2954 free_extent_map(em);
2955 if (offset >= last)
2956 break;
2957 }
2958 return NULL;
2959}
2960
2915int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, 2961int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
2916 __u64 start, __u64 len, get_extent_t *get_extent) 2962 __u64 start, __u64 len, get_extent_t *get_extent)
2917{ 2963{
@@ -2921,16 +2967,19 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
2921 u32 flags = 0; 2967 u32 flags = 0;
2922 u32 found_type; 2968 u32 found_type;
2923 u64 last; 2969 u64 last;
2970 u64 last_for_get_extent = 0;
2924 u64 disko = 0; 2971 u64 disko = 0;
2972 u64 isize = i_size_read(inode);
2925 struct btrfs_key found_key; 2973 struct btrfs_key found_key;
2926 struct extent_map *em = NULL; 2974 struct extent_map *em = NULL;
2927 struct extent_state *cached_state = NULL; 2975 struct extent_state *cached_state = NULL;
2928 struct btrfs_path *path; 2976 struct btrfs_path *path;
2929 struct btrfs_file_extent_item *item; 2977 struct btrfs_file_extent_item *item;
2930 int end = 0; 2978 int end = 0;
2931 u64 em_start = 0, em_len = 0; 2979 u64 em_start = 0;
2980 u64 em_len = 0;
2981 u64 em_end = 0;
2932 unsigned long emflags; 2982 unsigned long emflags;
2933 int hole = 0;
2934 2983
2935 if (len == 0) 2984 if (len == 0)
2936 return -EINVAL; 2985 return -EINVAL;
@@ -2940,6 +2989,10 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
2940 return -ENOMEM; 2989 return -ENOMEM;
2941 path->leave_spinning = 1; 2990 path->leave_spinning = 1;
2942 2991
2992 /*
2993 * lookup the last file extent. We're not using i_size here
2994 * because there might be preallocation past i_size
2995 */
2943 ret = btrfs_lookup_file_extent(NULL, BTRFS_I(inode)->root, 2996 ret = btrfs_lookup_file_extent(NULL, BTRFS_I(inode)->root,
2944 path, inode->i_ino, -1, 0); 2997 path, inode->i_ino, -1, 0);
2945 if (ret < 0) { 2998 if (ret < 0) {
@@ -2953,18 +3006,38 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
2953 btrfs_item_key_to_cpu(path->nodes[0], &found_key, path->slots[0]); 3006 btrfs_item_key_to_cpu(path->nodes[0], &found_key, path->slots[0]);
2954 found_type = btrfs_key_type(&found_key); 3007 found_type = btrfs_key_type(&found_key);
2955 3008
2956 /* No extents, just return */ 3009 /* No extents, but there might be delalloc bits */
2957 if (found_key.objectid != inode->i_ino || 3010 if (found_key.objectid != inode->i_ino ||
2958 found_type != BTRFS_EXTENT_DATA_KEY) { 3011 found_type != BTRFS_EXTENT_DATA_KEY) {
2959 btrfs_free_path(path); 3012 /* have to trust i_size as the end */
2960 return 0; 3013 last = (u64)-1;
3014 last_for_get_extent = isize;
3015 } else {
3016 /*
3017 * remember the start of the last extent. There are a
3018 * bunch of different factors that go into the length of the
3019 * extent, so its much less complex to remember where it started
3020 */
3021 last = found_key.offset;
3022 last_for_get_extent = last + 1;
2961 } 3023 }
2962 last = found_key.offset;
2963 btrfs_free_path(path); 3024 btrfs_free_path(path);
2964 3025
3026 /*
3027 * we might have some extents allocated but more delalloc past those
3028 * extents. so, we trust isize unless the start of the last extent is
3029 * beyond isize
3030 */
3031 if (last < isize) {
3032 last = (u64)-1;
3033 last_for_get_extent = isize;
3034 }
3035
2965 lock_extent_bits(&BTRFS_I(inode)->io_tree, start, start + len, 0, 3036 lock_extent_bits(&BTRFS_I(inode)->io_tree, start, start + len, 0,
2966 &cached_state, GFP_NOFS); 3037 &cached_state, GFP_NOFS);
2967 em = get_extent(inode, NULL, 0, off, max - off, 0); 3038
3039 em = get_extent_skip_holes(inode, off, last_for_get_extent,
3040 get_extent);
2968 if (!em) 3041 if (!em)
2969 goto out; 3042 goto out;
2970 if (IS_ERR(em)) { 3043 if (IS_ERR(em)) {
@@ -2973,19 +3046,14 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
2973 } 3046 }
2974 3047
2975 while (!end) { 3048 while (!end) {
2976 hole = 0; 3049 off = extent_map_end(em);
2977 off = em->start + em->len;
2978 if (off >= max) 3050 if (off >= max)
2979 end = 1; 3051 end = 1;
2980 3052
2981 if (em->block_start == EXTENT_MAP_HOLE) {
2982 hole = 1;
2983 goto next;
2984 }
2985
2986 em_start = em->start; 3053 em_start = em->start;
2987 em_len = em->len; 3054 em_len = em->len;
2988 3055 em_end = extent_map_end(em);
3056 emflags = em->flags;
2989 disko = 0; 3057 disko = 0;
2990 flags = 0; 3058 flags = 0;
2991 3059
@@ -3004,37 +3072,29 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
3004 if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) 3072 if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags))
3005 flags |= FIEMAP_EXTENT_ENCODED; 3073 flags |= FIEMAP_EXTENT_ENCODED;
3006 3074
3007next:
3008 emflags = em->flags;
3009 free_extent_map(em); 3075 free_extent_map(em);
3010 em = NULL; 3076 em = NULL;
3011 if (!end) { 3077 if ((em_start >= last) || em_len == (u64)-1 ||
3012 em = get_extent(inode, NULL, 0, off, max - off, 0); 3078 (last == (u64)-1 && isize <= em_end)) {
3013 if (!em)
3014 goto out;
3015 if (IS_ERR(em)) {
3016 ret = PTR_ERR(em);
3017 goto out;
3018 }
3019 emflags = em->flags;
3020 }
3021
3022 if (test_bit(EXTENT_FLAG_VACANCY, &emflags)) {
3023 flags |= FIEMAP_EXTENT_LAST; 3079 flags |= FIEMAP_EXTENT_LAST;
3024 end = 1; 3080 end = 1;
3025 } 3081 }
3026 3082
3027 if (em_start == last) { 3083 /* now scan forward to see if this is really the last extent. */
3084 em = get_extent_skip_holes(inode, off, last_for_get_extent,
3085 get_extent);
3086 if (IS_ERR(em)) {
3087 ret = PTR_ERR(em);
3088 goto out;
3089 }
3090 if (!em) {
3028 flags |= FIEMAP_EXTENT_LAST; 3091 flags |= FIEMAP_EXTENT_LAST;
3029 end = 1; 3092 end = 1;
3030 } 3093 }
3031 3094 ret = fiemap_fill_next_extent(fieinfo, em_start, disko,
3032 if (!hole) { 3095 em_len, flags);
3033 ret = fiemap_fill_next_extent(fieinfo, em_start, disko, 3096 if (ret)
3034 em_len, flags); 3097 goto out_free;
3035 if (ret)
3036 goto out_free;
3037 }
3038 } 3098 }
3039out_free: 3099out_free:
3040 free_extent_map(em); 3100 free_extent_map(em);
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index 7083cfafd061..9318dfefd59c 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -191,7 +191,7 @@ void extent_io_exit(void);
191 191
192u64 count_range_bits(struct extent_io_tree *tree, 192u64 count_range_bits(struct extent_io_tree *tree,
193 u64 *start, u64 search_end, 193 u64 *start, u64 search_end,
194 u64 max_bytes, unsigned long bits); 194 u64 max_bytes, unsigned long bits, int contig);
195 195
196void free_extent_state(struct extent_state *state); 196void free_extent_state(struct extent_state *state);
197int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end, 197int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end,
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 8d392ed73d57..44b926646e33 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -1913,7 +1913,7 @@ static int btrfs_clean_io_failures(struct inode *inode, u64 start)
1913 1913
1914 private = 0; 1914 private = 0;
1915 if (count_range_bits(&BTRFS_I(inode)->io_failure_tree, &private, 1915 if (count_range_bits(&BTRFS_I(inode)->io_failure_tree, &private,
1916 (u64)-1, 1, EXTENT_DIRTY)) { 1916 (u64)-1, 1, EXTENT_DIRTY, 0)) {
1917 ret = get_state_private(&BTRFS_I(inode)->io_failure_tree, 1917 ret = get_state_private(&BTRFS_I(inode)->io_failure_tree,
1918 start, &private_failure); 1918 start, &private_failure);
1919 if (ret == 0) { 1919 if (ret == 0) {
@@ -5282,6 +5282,128 @@ out:
5282 return em; 5282 return em;
5283} 5283}
5284 5284
5285struct extent_map *btrfs_get_extent_fiemap(struct inode *inode, struct page *page,
5286 size_t pg_offset, u64 start, u64 len,
5287 int create)
5288{
5289 struct extent_map *em;
5290 struct extent_map *hole_em = NULL;
5291 u64 range_start = start;
5292 u64 end;
5293 u64 found;
5294 u64 found_end;
5295 int err = 0;
5296
5297 em = btrfs_get_extent(inode, page, pg_offset, start, len, create);
5298 if (IS_ERR(em))
5299 return em;
5300 if (em) {
5301 /*
5302 * if our em maps to a hole, there might
5303 * actually be delalloc bytes behind it
5304 */
5305 if (em->block_start != EXTENT_MAP_HOLE)
5306 return em;
5307 else
5308 hole_em = em;
5309 }
5310
5311 /* check to see if we've wrapped (len == -1 or similar) */
5312 end = start + len;
5313 if (end < start)
5314 end = (u64)-1;
5315 else
5316 end -= 1;
5317
5318 em = NULL;
5319
5320 /* ok, we didn't find anything, lets look for delalloc */
5321 found = count_range_bits(&BTRFS_I(inode)->io_tree, &range_start,
5322 end, len, EXTENT_DELALLOC, 1);
5323 found_end = range_start + found;
5324 if (found_end < range_start)
5325 found_end = (u64)-1;
5326
5327 /*
5328 * we didn't find anything useful, return
5329 * the original results from get_extent()
5330 */
5331 if (range_start > end || found_end <= start) {
5332 em = hole_em;
5333 hole_em = NULL;
5334 goto out;
5335 }
5336
5337 /* adjust the range_start to make sure it doesn't
5338 * go backwards from the start they passed in
5339 */
5340 range_start = max(start,range_start);
5341 found = found_end - range_start;
5342
5343 if (found > 0) {
5344 u64 hole_start = start;
5345 u64 hole_len = len;
5346
5347 em = alloc_extent_map(GFP_NOFS);
5348 if (!em) {
5349 err = -ENOMEM;
5350 goto out;
5351 }
5352 /*
5353 * when btrfs_get_extent can't find anything it
5354 * returns one huge hole
5355 *
5356 * make sure what it found really fits our range, and
5357 * adjust to make sure it is based on the start from
5358 * the caller
5359 */
5360 if (hole_em) {
5361 u64 calc_end = extent_map_end(hole_em);
5362
5363 if (calc_end <= start || (hole_em->start > end)) {
5364 free_extent_map(hole_em);
5365 hole_em = NULL;
5366 } else {
5367 hole_start = max(hole_em->start, start);
5368 hole_len = calc_end - hole_start;
5369 }
5370 }
5371 em->bdev = NULL;
5372 if (hole_em && range_start > hole_start) {
5373 /* our hole starts before our delalloc, so we
5374 * have to return just the parts of the hole
5375 * that go until the delalloc starts
5376 */
5377 em->len = min(hole_len,
5378 range_start - hole_start);
5379 em->start = hole_start;
5380 em->orig_start = hole_start;
5381 /*
5382 * don't adjust block start at all,
5383 * it is fixed at EXTENT_MAP_HOLE
5384 */
5385 em->block_start = hole_em->block_start;
5386 em->block_len = hole_len;
5387 } else {
5388 em->start = range_start;
5389 em->len = found;
5390 em->orig_start = range_start;
5391 em->block_start = EXTENT_MAP_DELALLOC;
5392 em->block_len = found;
5393 }
5394 } else if (hole_em) {
5395 return hole_em;
5396 }
5397out:
5398
5399 free_extent_map(hole_em);
5400 if (err) {
5401 free_extent_map(em);
5402 return ERR_PTR(err);
5403 }
5404 return em;
5405}
5406
5285static struct extent_map *btrfs_new_extent_direct(struct inode *inode, 5407static struct extent_map *btrfs_new_extent_direct(struct inode *inode,
5286 u64 start, u64 len) 5408 u64 start, u64 len)
5287{ 5409{
@@ -6104,7 +6226,7 @@ out:
6104static int btrfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, 6226static int btrfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
6105 __u64 start, __u64 len) 6227 __u64 start, __u64 len)
6106{ 6228{
6107 return extent_fiemap(inode, fieinfo, start, len, btrfs_get_extent); 6229 return extent_fiemap(inode, fieinfo, start, len, btrfs_get_extent_fiemap);
6108} 6230}
6109 6231
6110int btrfs_readpage(struct file *file, struct page *page) 6232int btrfs_readpage(struct file *file, struct page *page)