aboutsummaryrefslogtreecommitdiffstats
path: root/fs/btrfs
diff options
context:
space:
mode:
authorLiu Bo <bo.li.liu@oracle.com>2012-08-27 12:52:20 -0400
committerChris Mason <chris.mason@fusionio.com>2012-10-01 15:19:05 -0400
commit4e2f84e63dc138eca91e89ccbc34f37732ce58f7 (patch)
tree31691a22773cf249fc289d8414be62b52d071513 /fs/btrfs
parentca7e70f59078046db28501519308c2061b0e7a6f (diff)
Btrfs: improve fsync by filtering extents that we want
This is based on Josef's "Btrfs: turbo charge fsync". The above Josef's patch performs very good in random sync write test, because we won't have too much extents to merge. However, it does not performs good on the test: dd if=/dev/zero of=foobar bs=4k count=12500 oflag=sync The reason is when we do sequencial sync write, we need to merge the current extent just with the previous one, so that we can get accumulated extents to log: A(4k) --> AA(8k) --> AAA(12k) --> AAAA(16k) ... So we'll have to flush more and more checksum into log tree, which is the bottleneck according to my tests. But we can avoid this by telling fsync the real extents that are needed to be logged. With this, I did the above dd sync write test (size=50m), w/o (orig) w/ (josef's) w/ (this) SATA 104KB/s 109KB/s 121KB/s ramdisk 1.5MB/s 1.5MB/s 10.7MB/s (613%) Signed-off-by: Liu Bo <bo.li.liu@oracle.com>
Diffstat (limited to 'fs/btrfs')
-rw-r--r--fs/btrfs/extent_map.c20
-rw-r--r--fs/btrfs/extent_map.h2
-rw-r--r--fs/btrfs/inode.c1
-rw-r--r--fs/btrfs/tree-log.c6
4 files changed, 26 insertions, 3 deletions
diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c
index 1fe82cfc1d93..ac606f076eb7 100644
--- a/fs/btrfs/extent_map.c
+++ b/fs/btrfs/extent_map.c
@@ -203,6 +203,8 @@ static void try_merge_map(struct extent_map_tree *tree, struct extent_map *em)
203 em->block_start = merge->block_start; 203 em->block_start = merge->block_start;
204 merge->in_tree = 0; 204 merge->in_tree = 0;
205 if (merge->generation > em->generation) { 205 if (merge->generation > em->generation) {
206 em->mod_start = em->start;
207 em->mod_len = em->len;
206 em->generation = merge->generation; 208 em->generation = merge->generation;
207 list_move(&em->list, &tree->modified_extents); 209 list_move(&em->list, &tree->modified_extents);
208 } 210 }
@@ -222,6 +224,7 @@ static void try_merge_map(struct extent_map_tree *tree, struct extent_map *em)
222 rb_erase(&merge->rb_node, &tree->map); 224 rb_erase(&merge->rb_node, &tree->map);
223 merge->in_tree = 0; 225 merge->in_tree = 0;
224 if (merge->generation > em->generation) { 226 if (merge->generation > em->generation) {
227 em->mod_len = em->len;
225 em->generation = merge->generation; 228 em->generation = merge->generation;
226 list_move(&em->list, &tree->modified_extents); 229 list_move(&em->list, &tree->modified_extents);
227 } 230 }
@@ -247,6 +250,7 @@ int unpin_extent_cache(struct extent_map_tree *tree, u64 start, u64 len,
247{ 250{
248 int ret = 0; 251 int ret = 0;
249 struct extent_map *em; 252 struct extent_map *em;
253 bool prealloc = false;
250 254
251 write_lock(&tree->lock); 255 write_lock(&tree->lock);
252 em = lookup_extent_mapping(tree, start, len); 256 em = lookup_extent_mapping(tree, start, len);
@@ -259,8 +263,21 @@ int unpin_extent_cache(struct extent_map_tree *tree, u64 start, u64 len,
259 list_move(&em->list, &tree->modified_extents); 263 list_move(&em->list, &tree->modified_extents);
260 em->generation = gen; 264 em->generation = gen;
261 clear_bit(EXTENT_FLAG_PINNED, &em->flags); 265 clear_bit(EXTENT_FLAG_PINNED, &em->flags);
266 em->mod_start = em->start;
267 em->mod_len = em->len;
268
269 if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags)) {
270 prealloc = true;
271 clear_bit(EXTENT_FLAG_PREALLOC, &em->flags);
272 }
262 273
263 try_merge_map(tree, em); 274 try_merge_map(tree, em);
275
276 if (prealloc) {
277 em->mod_start = em->start;
278 em->mod_len = em->len;
279 }
280
264 free_extent_map(em); 281 free_extent_map(em);
265out: 282out:
266 write_unlock(&tree->lock); 283 write_unlock(&tree->lock);
@@ -298,6 +315,9 @@ int add_extent_mapping(struct extent_map_tree *tree,
298 } 315 }
299 atomic_inc(&em->refs); 316 atomic_inc(&em->refs);
300 317
318 em->mod_start = em->start;
319 em->mod_len = em->len;
320
301 try_merge_map(tree, em); 321 try_merge_map(tree, em);
302out: 322out:
303 return ret; 323 return ret;
diff --git a/fs/btrfs/extent_map.h b/fs/btrfs/extent_map.h
index 2388a60bd6e3..8e6294b51357 100644
--- a/fs/btrfs/extent_map.h
+++ b/fs/btrfs/extent_map.h
@@ -20,6 +20,8 @@ struct extent_map {
20 /* all of these are in bytes */ 20 /* all of these are in bytes */
21 u64 start; 21 u64 start;
22 u64 len; 22 u64 len;
23 u64 mod_start;
24 u64 mod_len;
23 u64 orig_start; 25 u64 orig_start;
24 u64 block_start; 26 u64 block_start;
25 u64 block_len; 27 u64 block_len;
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index ca4fa05171ab..878116d9625d 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -1308,6 +1308,7 @@ out_check:
1308 em->block_start = disk_bytenr; 1308 em->block_start = disk_bytenr;
1309 em->bdev = root->fs_info->fs_devices->latest_bdev; 1309 em->bdev = root->fs_info->fs_devices->latest_bdev;
1310 set_bit(EXTENT_FLAG_PINNED, &em->flags); 1310 set_bit(EXTENT_FLAG_PINNED, &em->flags);
1311 set_bit(EXTENT_FLAG_PREALLOC, &em->flags);
1311 while (1) { 1312 while (1) {
1312 write_lock(&em_tree->lock); 1313 write_lock(&em_tree->lock);
1313 ret = add_extent_mapping(em_tree, em); 1314 ret = add_extent_mapping(em_tree, em);
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index 58075d711d24..71e71539ffb7 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -2833,8 +2833,8 @@ static int log_one_extent(struct btrfs_trans_handle *trans,
2833 struct btrfs_root *log = root->log_root; 2833 struct btrfs_root *log = root->log_root;
2834 struct btrfs_file_extent_item *fi; 2834 struct btrfs_file_extent_item *fi;
2835 struct btrfs_key key; 2835 struct btrfs_key key;
2836 u64 start = em->start; 2836 u64 start = em->mod_start;
2837 u64 len = em->len; 2837 u64 len = em->mod_len;
2838 u64 num_bytes; 2838 u64 num_bytes;
2839 int nritems; 2839 int nritems;
2840 int ret; 2840 int ret;
@@ -2970,7 +2970,7 @@ static int btrfs_log_changed_extents(struct btrfs_trans_handle *trans,
2970 * sequential then we need to copy the items we have and redo 2970 * sequential then we need to copy the items we have and redo
2971 * our search 2971 * our search
2972 */ 2972 */
2973 if (args.nr && em->start != args.next_offset) { 2973 if (args.nr && em->mod_start != args.next_offset) {
2974 ret = copy_items(trans, log, dst_path, args.src, 2974 ret = copy_items(trans, log, dst_path, args.src,
2975 args.start_slot, args.nr, 2975 args.start_slot, args.nr,
2976 LOG_INODE_ALL); 2976 LOG_INODE_ALL);