aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorQu Wenruo <quwenruo@cn.fujitsu.com>2014-09-16 23:53:35 -0400
committerChris Mason <clm@fb.com>2014-09-18 10:14:46 -0400
commite6c4efd87ab04e5ead363f24e6ac35ed3506d401 (patch)
tree09205e85abcb49d88e2f29dc998d1c4146649b7d
parent4d1a40c66bed0b3fa43b9da5fbd5cbe332e4eccf (diff)
btrfs: Fix and enhance merge_extent_mapping() to insert best fitted extent map
The following commit enhanced the merge_extent_mapping() to reduce fragment in extent map tree, but it can't handle case which existing lies before map_start: 51f39 btrfs: Use right extent length when inserting overlap extent map. [BUG] When existing extent map's start is before map_start, the em->len will be minus, which will corrupt the extent map and fail to insert the new extent map. This will happen when someone get a large extent map, but when it is going to insert it into extent map tree, some one has already commit some write and split the huge extent into small parts. [REPRODUCER] It is very easy to tiger using filebench with randomrw personality. It is about 100% to reproduce when using 8G preallocated file in 60s randonrw test. [FIX] This patch can now handle any existing extent position. Since it does not directly use existing->start, now it will find the previous and next extent around map_start. So the old existing->start < map_start bug will never happen again. [ENHANCE] This patch will insert the best fitted extent map into extent map tree, other than the oldest [map_start, map_start + sectorsize) or the relatively newer but not perfect [map_start, existing->start). The patch will first search existing extent that does not intersects with the desired map range [map_start, map_start + len). The existing extent will be either before or behind map_start, and based on the existing extent, we can find out the previous and next extent around map_start. So the best fitted extent would be [prev->end, next->start). For prev or next is not found, em->start would be prev->end and em->end wold be next->start. With this patch, the fragment in extent map tree should be reduced much more than the 51f39 commit and reduce an unneeded extent map tree search. Reported-by: Tsutomu Itoh <t-itoh@jp.fujitsu.com> Signed-off-by: Qu Wenruo <quwenruo@cn.fujitsu.com> Signed-off-by: Chris Mason <clm@fb.com>
-rw-r--r--fs/btrfs/inode.c79
1 files changed, 57 insertions, 22 deletions
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 3c16a1493e22..b1106d0dcd5c 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -6249,21 +6249,60 @@ out_fail_inode:
6249 goto out_fail; 6249 goto out_fail;
6250} 6250}
6251 6251
6252/* Find next extent map of a given extent map, caller needs to ensure locks */
6253static struct extent_map *next_extent_map(struct extent_map *em)
6254{
6255 struct rb_node *next;
6256
6257 next = rb_next(&em->rb_node);
6258 if (!next)
6259 return NULL;
6260 return container_of(next, struct extent_map, rb_node);
6261}
6262
6263static struct extent_map *prev_extent_map(struct extent_map *em)
6264{
6265 struct rb_node *prev;
6266
6267 prev = rb_prev(&em->rb_node);
6268 if (!prev)
6269 return NULL;
6270 return container_of(prev, struct extent_map, rb_node);
6271}
6272
6252/* helper for btfs_get_extent. Given an existing extent in the tree, 6273/* helper for btfs_get_extent. Given an existing extent in the tree,
6274 * the existing extent is the nearest extent to map_start,
6253 * and an extent that you want to insert, deal with overlap and insert 6275 * and an extent that you want to insert, deal with overlap and insert
6254 * the new extent into the tree. 6276 * the best fitted new extent into the tree.
6255 */ 6277 */
6256static int merge_extent_mapping(struct extent_map_tree *em_tree, 6278static int merge_extent_mapping(struct extent_map_tree *em_tree,
6257 struct extent_map *existing, 6279 struct extent_map *existing,
6258 struct extent_map *em, 6280 struct extent_map *em,
6259 u64 map_start) 6281 u64 map_start)
6260{ 6282{
6283 struct extent_map *prev;
6284 struct extent_map *next;
6285 u64 start;
6286 u64 end;
6261 u64 start_diff; 6287 u64 start_diff;
6262 6288
6263 BUG_ON(map_start < em->start || map_start >= extent_map_end(em)); 6289 BUG_ON(map_start < em->start || map_start >= extent_map_end(em));
6264 start_diff = map_start - em->start; 6290
6265 em->start = map_start; 6291 if (existing->start > map_start) {
6266 em->len = existing->start - em->start; 6292 next = existing;
6293 prev = prev_extent_map(next);
6294 } else {
6295 prev = existing;
6296 next = next_extent_map(prev);
6297 }
6298
6299 start = prev ? extent_map_end(prev) : em->start;
6300 start = max_t(u64, start, em->start);
6301 end = next ? next->start : extent_map_end(em);
6302 end = min_t(u64, end, extent_map_end(em));
6303 start_diff = start - em->start;
6304 em->start = start;
6305 em->len = end - start;
6267 if (em->block_start < EXTENT_MAP_LAST_BYTE && 6306 if (em->block_start < EXTENT_MAP_LAST_BYTE &&
6268 !test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) { 6307 !test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) {
6269 em->block_start += start_diff; 6308 em->block_start += start_diff;
@@ -6540,25 +6579,21 @@ insert:
6540 6579
6541 ret = 0; 6580 ret = 0;
6542 6581
6543 existing = lookup_extent_mapping(em_tree, start, len); 6582 existing = search_extent_mapping(em_tree, start, len);
6544 if (existing && (existing->start > start || 6583 /*
6545 existing->start + existing->len <= start)) { 6584 * existing will always be non-NULL, since there must be
6585 * extent causing the -EEXIST.
6586 */
6587 if (start >= extent_map_end(existing) ||
6588 start + len <= existing->start) {
6589 /*
6590 * The existing extent map is the one nearest to
6591 * the [start, start + len) range which overlaps
6592 */
6593 err = merge_extent_mapping(em_tree, existing,
6594 em, start);
6546 free_extent_map(existing); 6595 free_extent_map(existing);
6547 existing = NULL; 6596 if (err) {
6548 }
6549 if (!existing) {
6550 existing = lookup_extent_mapping(em_tree, em->start,
6551 em->len);
6552 if (existing) {
6553 err = merge_extent_mapping(em_tree, existing,
6554 em, start);
6555 free_extent_map(existing);
6556 if (err) {
6557 free_extent_map(em);
6558 em = NULL;
6559 }
6560 } else {
6561 err = -EIO;
6562 free_extent_map(em); 6597 free_extent_map(em);
6563 em = NULL; 6598 em = NULL;
6564 } 6599 }