aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorFilipe Manana <fdmanana@gmail.com>2014-02-25 09:15:13 -0500
committerJosef Bacik <jbacik@fb.com>2014-03-10 15:16:57 -0400
commit176840b3aa3cb795ddec4fc665ffbd707abff906 (patch)
treec709c05160ab4a4d104d16460e59ecc4cc3cd7a4
parentf2071b21553bf8f1eae583e32b9068393f61cbe9 (diff)
Btrfs: more efficient btrfs_drop_extent_cache
While droping extent map structures from the extent cache that cover our target range, we would remove each extent map structure from the red black tree and then add either 1 or 2 new extent map structures if the former extent map covered sections outside our target range. This change simply attempts to replace the existing extent map structure with a new one that covers the subsection we're not interested in, instead of doing a red black remove operation followed by an insertion operation. The number of elements in an inode's extent map tree can get very high for large files under random writes. For example, while running the following test: sysbench --test=fileio --file-num=1 --file-total-size=10G \ --file-test-mode=rndrw --num-threads=32 --file-block-size=32768 \ --max-requests=500000 --file-rw-ratio=2 [prepare|run] I captured the following histogram capturing the number of extent_map items in the red black tree while that test was running: Count: 122462 Range: 1.000 - 172231.000; Mean: 96415.831; Median: 101855.000; Stddev: 49700.981 Percentiles: 90th: 160120.000; 95th: 166335.000; 99th: 171070.000 1.000 - 5.231: 452 | 5.231 - 187.392: 87 | 187.392 - 585.911: 206 | 585.911 - 1827.438: 623 | 1827.438 - 5695.245: 1962 # 5695.245 - 17744.861: 6204 #### 17744.861 - 55283.764: 21115 ############ 55283.764 - 172231.000: 91813 ##################################################### Benchmark: sysbench --test=fileio --file-num=1 --file-total-size=10G --file-test-mode=rndwr \ --num-threads=64 --file-block-size=32768 --max-requests=0 --max-time=60 \ --file-io-mode=sync --file-fsync-freq=0 [prepare|run] Before this change: 122.1Mb/sec After this change: 125.07Mb/sec (averages of 5 test runs) Test machine: quad core intel i5-3570K, 32Gb of ram, SSD Signed-off-by: Filipe David Borba Manana <fdmanana@gmail.com> Signed-off-by: Josef Bacik <jbacik@fb.com>
-rw-r--r--fs/btrfs/extent_map.c39
-rw-r--r--fs/btrfs/extent_map.h4
-rw-r--r--fs/btrfs/file.c16
3 files changed, 45 insertions, 14 deletions
diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c
index 64d08f94485d..1874aee69c86 100644
--- a/fs/btrfs/extent_map.c
+++ b/fs/btrfs/extent_map.c
@@ -318,6 +318,20 @@ void clear_em_logging(struct extent_map_tree *tree, struct extent_map *em)
318 try_merge_map(tree, em); 318 try_merge_map(tree, em);
319} 319}
320 320
321static inline void setup_extent_mapping(struct extent_map_tree *tree,
322 struct extent_map *em,
323 int modified)
324{
325 atomic_inc(&em->refs);
326 em->mod_start = em->start;
327 em->mod_len = em->len;
328
329 if (modified)
330 list_move(&em->list, &tree->modified_extents);
331 else
332 try_merge_map(tree, em);
333}
334
321/** 335/**
322 * add_extent_mapping - add new extent map to the extent tree 336 * add_extent_mapping - add new extent map to the extent tree
323 * @tree: tree to insert new map in 337 * @tree: tree to insert new map in
@@ -337,15 +351,7 @@ int add_extent_mapping(struct extent_map_tree *tree,
337 if (ret) 351 if (ret)
338 goto out; 352 goto out;
339 353
340 atomic_inc(&em->refs); 354 setup_extent_mapping(tree, em, modified);
341
342 em->mod_start = em->start;
343 em->mod_len = em->len;
344
345 if (modified)
346 list_move(&em->list, &tree->modified_extents);
347 else
348 try_merge_map(tree, em);
349out: 355out:
350 return ret; 356 return ret;
351} 357}
@@ -432,3 +438,18 @@ int remove_extent_mapping(struct extent_map_tree *tree, struct extent_map *em)
432 RB_CLEAR_NODE(&em->rb_node); 438 RB_CLEAR_NODE(&em->rb_node);
433 return ret; 439 return ret;
434} 440}
441
442void replace_extent_mapping(struct extent_map_tree *tree,
443 struct extent_map *cur,
444 struct extent_map *new,
445 int modified)
446{
447 WARN_ON(test_bit(EXTENT_FLAG_PINNED, &cur->flags));
448 ASSERT(extent_map_in_tree(cur));
449 if (!test_bit(EXTENT_FLAG_LOGGING, &cur->flags))
450 list_del_init(&cur->list);
451 rb_replace_node(&cur->rb_node, &new->rb_node, &tree->map);
452 RB_CLEAR_NODE(&cur->rb_node);
453
454 setup_extent_mapping(tree, new, modified);
455}
diff --git a/fs/btrfs/extent_map.h b/fs/btrfs/extent_map.h
index f0a645a14d6e..e7fd8a56a140 100644
--- a/fs/btrfs/extent_map.h
+++ b/fs/btrfs/extent_map.h
@@ -68,6 +68,10 @@ struct extent_map *lookup_extent_mapping(struct extent_map_tree *tree,
68int add_extent_mapping(struct extent_map_tree *tree, 68int add_extent_mapping(struct extent_map_tree *tree,
69 struct extent_map *em, int modified); 69 struct extent_map *em, int modified);
70int remove_extent_mapping(struct extent_map_tree *tree, struct extent_map *em); 70int remove_extent_mapping(struct extent_map_tree *tree, struct extent_map *em);
71void replace_extent_mapping(struct extent_map_tree *tree,
72 struct extent_map *cur,
73 struct extent_map *new,
74 int modified);
71 75
72struct extent_map *alloc_extent_map(void); 76struct extent_map *alloc_extent_map(void);
73void free_extent_map(struct extent_map *em); 77void free_extent_map(struct extent_map *em);
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 762ca32bd988..31e48b947060 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -591,7 +591,6 @@ void btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end,
591 clear_bit(EXTENT_FLAG_PINNED, &em->flags); 591 clear_bit(EXTENT_FLAG_PINNED, &em->flags);
592 clear_bit(EXTENT_FLAG_LOGGING, &flags); 592 clear_bit(EXTENT_FLAG_LOGGING, &flags);
593 modified = !list_empty(&em->list); 593 modified = !list_empty(&em->list);
594 remove_extent_mapping(em_tree, em);
595 if (no_splits) 594 if (no_splits)
596 goto next; 595 goto next;
597 596
@@ -622,8 +621,7 @@ void btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end,
622 split->bdev = em->bdev; 621 split->bdev = em->bdev;
623 split->flags = flags; 622 split->flags = flags;
624 split->compress_type = em->compress_type; 623 split->compress_type = em->compress_type;
625 ret = add_extent_mapping(em_tree, split, modified); 624 replace_extent_mapping(em_tree, em, split, modified);
626 BUG_ON(ret); /* Logic error */
627 free_extent_map(split); 625 free_extent_map(split);
628 split = split2; 626 split = split2;
629 split2 = NULL; 627 split2 = NULL;
@@ -661,12 +659,20 @@ void btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end,
661 split->orig_block_len = 0; 659 split->orig_block_len = 0;
662 } 660 }
663 661
664 ret = add_extent_mapping(em_tree, split, modified); 662 if (extent_map_in_tree(em)) {
665 BUG_ON(ret); /* Logic error */ 663 replace_extent_mapping(em_tree, em, split,
664 modified);
665 } else {
666 ret = add_extent_mapping(em_tree, split,
667 modified);
668 ASSERT(ret == 0); /* Logic error */
669 }
666 free_extent_map(split); 670 free_extent_map(split);
667 split = NULL; 671 split = NULL;
668 } 672 }
669next: 673next:
674 if (extent_map_in_tree(em))
675 remove_extent_mapping(em_tree, em);
670 write_unlock(&em_tree->lock); 676 write_unlock(&em_tree->lock);
671 677
672 /* once for us */ 678 /* once for us */