diff options
author | Qu Wenruo <quwenruo@cn.fujitsu.com> | 2014-09-16 23:53:35 -0400 |
---|---|---|
committer | Chris Mason <clm@fb.com> | 2014-09-18 10:14:46 -0400 |
commit | e6c4efd87ab04e5ead363f24e6ac35ed3506d401 (patch) | |
tree | 09205e85abcb49d88e2f29dc998d1c4146649b7d | |
parent | 4d1a40c66bed0b3fa43b9da5fbd5cbe332e4eccf (diff) |
btrfs: Fix and enhance merge_extent_mapping() to insert best fitted extent map
The following commit enhanced the merge_extent_mapping() to reduce
fragment in extent map tree, but it can't handle case which existing
lies before map_start:
51f39 btrfs: Use right extent length when inserting overlap extent map.
[BUG]
When existing extent map's start is before map_start,
the em->len will be minus, which will corrupt the extent map and fail to
insert the new extent map.
This will happen when someone get a large extent map, but when it is
going to insert it into extent map tree, some one has already commit
some write and split the huge extent into small parts.
[REPRODUCER]
It is very easy to tiger using filebench with randomrw personality.
It is about 100% to reproduce when using 8G preallocated file in 60s
randonrw test.
[FIX]
This patch can now handle any existing extent position.
Since it does not directly use existing->start, now it will find the
previous and next extent around map_start.
So the old existing->start < map_start bug will never happen again.
[ENHANCE]
This patch will insert the best fitted extent map into extent map tree,
other than the oldest [map_start, map_start + sectorsize) or the
relatively newer but not perfect [map_start, existing->start).
The patch will first search existing extent that does not intersects with
the desired map range [map_start, map_start + len).
The existing extent will be either before or behind map_start, and based
on the existing extent, we can find out the previous and next extent
around map_start.
So the best fitted extent would be [prev->end, next->start).
For prev or next is not found, em->start would be prev->end and em->end
wold be next->start.
With this patch, the fragment in extent map tree should be reduced much
more than the 51f39 commit and reduce an unneeded extent map tree search.
Reported-by: Tsutomu Itoh <t-itoh@jp.fujitsu.com>
Signed-off-by: Qu Wenruo <quwenruo@cn.fujitsu.com>
Signed-off-by: Chris Mason <clm@fb.com>
-rw-r--r-- | fs/btrfs/inode.c | 79 |
1 files changed, 57 insertions, 22 deletions
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 3c16a1493e22..b1106d0dcd5c 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c | |||
@@ -6249,21 +6249,60 @@ out_fail_inode: | |||
6249 | goto out_fail; | 6249 | goto out_fail; |
6250 | } | 6250 | } |
6251 | 6251 | ||
6252 | /* Find next extent map of a given extent map, caller needs to ensure locks */ | ||
6253 | static struct extent_map *next_extent_map(struct extent_map *em) | ||
6254 | { | ||
6255 | struct rb_node *next; | ||
6256 | |||
6257 | next = rb_next(&em->rb_node); | ||
6258 | if (!next) | ||
6259 | return NULL; | ||
6260 | return container_of(next, struct extent_map, rb_node); | ||
6261 | } | ||
6262 | |||
6263 | static struct extent_map *prev_extent_map(struct extent_map *em) | ||
6264 | { | ||
6265 | struct rb_node *prev; | ||
6266 | |||
6267 | prev = rb_prev(&em->rb_node); | ||
6268 | if (!prev) | ||
6269 | return NULL; | ||
6270 | return container_of(prev, struct extent_map, rb_node); | ||
6271 | } | ||
6272 | |||
6252 | /* helper for btfs_get_extent. Given an existing extent in the tree, | 6273 | /* helper for btfs_get_extent. Given an existing extent in the tree, |
6274 | * the existing extent is the nearest extent to map_start, | ||
6253 | * and an extent that you want to insert, deal with overlap and insert | 6275 | * and an extent that you want to insert, deal with overlap and insert |
6254 | * the new extent into the tree. | 6276 | * the best fitted new extent into the tree. |
6255 | */ | 6277 | */ |
6256 | static int merge_extent_mapping(struct extent_map_tree *em_tree, | 6278 | static int merge_extent_mapping(struct extent_map_tree *em_tree, |
6257 | struct extent_map *existing, | 6279 | struct extent_map *existing, |
6258 | struct extent_map *em, | 6280 | struct extent_map *em, |
6259 | u64 map_start) | 6281 | u64 map_start) |
6260 | { | 6282 | { |
6283 | struct extent_map *prev; | ||
6284 | struct extent_map *next; | ||
6285 | u64 start; | ||
6286 | u64 end; | ||
6261 | u64 start_diff; | 6287 | u64 start_diff; |
6262 | 6288 | ||
6263 | BUG_ON(map_start < em->start || map_start >= extent_map_end(em)); | 6289 | BUG_ON(map_start < em->start || map_start >= extent_map_end(em)); |
6264 | start_diff = map_start - em->start; | 6290 | |
6265 | em->start = map_start; | 6291 | if (existing->start > map_start) { |
6266 | em->len = existing->start - em->start; | 6292 | next = existing; |
6293 | prev = prev_extent_map(next); | ||
6294 | } else { | ||
6295 | prev = existing; | ||
6296 | next = next_extent_map(prev); | ||
6297 | } | ||
6298 | |||
6299 | start = prev ? extent_map_end(prev) : em->start; | ||
6300 | start = max_t(u64, start, em->start); | ||
6301 | end = next ? next->start : extent_map_end(em); | ||
6302 | end = min_t(u64, end, extent_map_end(em)); | ||
6303 | start_diff = start - em->start; | ||
6304 | em->start = start; | ||
6305 | em->len = end - start; | ||
6267 | if (em->block_start < EXTENT_MAP_LAST_BYTE && | 6306 | if (em->block_start < EXTENT_MAP_LAST_BYTE && |
6268 | !test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) { | 6307 | !test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) { |
6269 | em->block_start += start_diff; | 6308 | em->block_start += start_diff; |
@@ -6540,25 +6579,21 @@ insert: | |||
6540 | 6579 | ||
6541 | ret = 0; | 6580 | ret = 0; |
6542 | 6581 | ||
6543 | existing = lookup_extent_mapping(em_tree, start, len); | 6582 | existing = search_extent_mapping(em_tree, start, len); |
6544 | if (existing && (existing->start > start || | 6583 | /* |
6545 | existing->start + existing->len <= start)) { | 6584 | * existing will always be non-NULL, since there must be |
6585 | * extent causing the -EEXIST. | ||
6586 | */ | ||
6587 | if (start >= extent_map_end(existing) || | ||
6588 | start + len <= existing->start) { | ||
6589 | /* | ||
6590 | * The existing extent map is the one nearest to | ||
6591 | * the [start, start + len) range which overlaps | ||
6592 | */ | ||
6593 | err = merge_extent_mapping(em_tree, existing, | ||
6594 | em, start); | ||
6546 | free_extent_map(existing); | 6595 | free_extent_map(existing); |
6547 | existing = NULL; | 6596 | if (err) { |
6548 | } | ||
6549 | if (!existing) { | ||
6550 | existing = lookup_extent_mapping(em_tree, em->start, | ||
6551 | em->len); | ||
6552 | if (existing) { | ||
6553 | err = merge_extent_mapping(em_tree, existing, | ||
6554 | em, start); | ||
6555 | free_extent_map(existing); | ||
6556 | if (err) { | ||
6557 | free_extent_map(em); | ||
6558 | em = NULL; | ||
6559 | } | ||
6560 | } else { | ||
6561 | err = -EIO; | ||
6562 | free_extent_map(em); | 6597 | free_extent_map(em); |
6563 | em = NULL; | 6598 | em = NULL; |
6564 | } | 6599 | } |