diff options
author | Liu Bo <bo.li.liu@oracle.com> | 2012-08-27 12:52:20 -0400 |
---|---|---|
committer | Chris Mason <chris.mason@fusionio.com> | 2012-10-01 15:19:05 -0400 |
commit | 4e2f84e63dc138eca91e89ccbc34f37732ce58f7 (patch) | |
tree | 31691a22773cf249fc289d8414be62b52d071513 /fs/btrfs | |
parent | ca7e70f59078046db28501519308c2061b0e7a6f (diff) |
Btrfs: improve fsync by filtering extents that we want
This is based on Josef's "Btrfs: turbo charge fsync".
The above Josef's patch performs very good in random sync write test,
because we won't have too much extents to merge.
However, it does not performs good on the test:
dd if=/dev/zero of=foobar bs=4k count=12500 oflag=sync
The reason is when we do sequencial sync write, we need to merge the
current extent just with the previous one, so that we can get accumulated
extents to log:
A(4k) --> AA(8k) --> AAA(12k) --> AAAA(16k) ...
So we'll have to flush more and more checksum into log tree, which is the
bottleneck according to my tests.
But we can avoid this by telling fsync the real extents that are needed
to be logged.
With this, I did the above dd sync write test (size=50m),
w/o (orig) w/ (josef's) w/ (this)
SATA 104KB/s 109KB/s 121KB/s
ramdisk 1.5MB/s 1.5MB/s 10.7MB/s (613%)
Signed-off-by: Liu Bo <bo.li.liu@oracle.com>
Diffstat (limited to 'fs/btrfs')
-rw-r--r-- | fs/btrfs/extent_map.c | 20 | ||||
-rw-r--r-- | fs/btrfs/extent_map.h | 2 | ||||
-rw-r--r-- | fs/btrfs/inode.c | 1 | ||||
-rw-r--r-- | fs/btrfs/tree-log.c | 6 |
4 files changed, 26 insertions, 3 deletions
diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c index 1fe82cfc1d93..ac606f076eb7 100644 --- a/fs/btrfs/extent_map.c +++ b/fs/btrfs/extent_map.c | |||
@@ -203,6 +203,8 @@ static void try_merge_map(struct extent_map_tree *tree, struct extent_map *em) | |||
203 | em->block_start = merge->block_start; | 203 | em->block_start = merge->block_start; |
204 | merge->in_tree = 0; | 204 | merge->in_tree = 0; |
205 | if (merge->generation > em->generation) { | 205 | if (merge->generation > em->generation) { |
206 | em->mod_start = em->start; | ||
207 | em->mod_len = em->len; | ||
206 | em->generation = merge->generation; | 208 | em->generation = merge->generation; |
207 | list_move(&em->list, &tree->modified_extents); | 209 | list_move(&em->list, &tree->modified_extents); |
208 | } | 210 | } |
@@ -222,6 +224,7 @@ static void try_merge_map(struct extent_map_tree *tree, struct extent_map *em) | |||
222 | rb_erase(&merge->rb_node, &tree->map); | 224 | rb_erase(&merge->rb_node, &tree->map); |
223 | merge->in_tree = 0; | 225 | merge->in_tree = 0; |
224 | if (merge->generation > em->generation) { | 226 | if (merge->generation > em->generation) { |
227 | em->mod_len = em->len; | ||
225 | em->generation = merge->generation; | 228 | em->generation = merge->generation; |
226 | list_move(&em->list, &tree->modified_extents); | 229 | list_move(&em->list, &tree->modified_extents); |
227 | } | 230 | } |
@@ -247,6 +250,7 @@ int unpin_extent_cache(struct extent_map_tree *tree, u64 start, u64 len, | |||
247 | { | 250 | { |
248 | int ret = 0; | 251 | int ret = 0; |
249 | struct extent_map *em; | 252 | struct extent_map *em; |
253 | bool prealloc = false; | ||
250 | 254 | ||
251 | write_lock(&tree->lock); | 255 | write_lock(&tree->lock); |
252 | em = lookup_extent_mapping(tree, start, len); | 256 | em = lookup_extent_mapping(tree, start, len); |
@@ -259,8 +263,21 @@ int unpin_extent_cache(struct extent_map_tree *tree, u64 start, u64 len, | |||
259 | list_move(&em->list, &tree->modified_extents); | 263 | list_move(&em->list, &tree->modified_extents); |
260 | em->generation = gen; | 264 | em->generation = gen; |
261 | clear_bit(EXTENT_FLAG_PINNED, &em->flags); | 265 | clear_bit(EXTENT_FLAG_PINNED, &em->flags); |
266 | em->mod_start = em->start; | ||
267 | em->mod_len = em->len; | ||
268 | |||
269 | if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags)) { | ||
270 | prealloc = true; | ||
271 | clear_bit(EXTENT_FLAG_PREALLOC, &em->flags); | ||
272 | } | ||
262 | 273 | ||
263 | try_merge_map(tree, em); | 274 | try_merge_map(tree, em); |
275 | |||
276 | if (prealloc) { | ||
277 | em->mod_start = em->start; | ||
278 | em->mod_len = em->len; | ||
279 | } | ||
280 | |||
264 | free_extent_map(em); | 281 | free_extent_map(em); |
265 | out: | 282 | out: |
266 | write_unlock(&tree->lock); | 283 | write_unlock(&tree->lock); |
@@ -298,6 +315,9 @@ int add_extent_mapping(struct extent_map_tree *tree, | |||
298 | } | 315 | } |
299 | atomic_inc(&em->refs); | 316 | atomic_inc(&em->refs); |
300 | 317 | ||
318 | em->mod_start = em->start; | ||
319 | em->mod_len = em->len; | ||
320 | |||
301 | try_merge_map(tree, em); | 321 | try_merge_map(tree, em); |
302 | out: | 322 | out: |
303 | return ret; | 323 | return ret; |
diff --git a/fs/btrfs/extent_map.h b/fs/btrfs/extent_map.h index 2388a60bd6e3..8e6294b51357 100644 --- a/fs/btrfs/extent_map.h +++ b/fs/btrfs/extent_map.h | |||
@@ -20,6 +20,8 @@ struct extent_map { | |||
20 | /* all of these are in bytes */ | 20 | /* all of these are in bytes */ |
21 | u64 start; | 21 | u64 start; |
22 | u64 len; | 22 | u64 len; |
23 | u64 mod_start; | ||
24 | u64 mod_len; | ||
23 | u64 orig_start; | 25 | u64 orig_start; |
24 | u64 block_start; | 26 | u64 block_start; |
25 | u64 block_len; | 27 | u64 block_len; |
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index ca4fa05171ab..878116d9625d 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c | |||
@@ -1308,6 +1308,7 @@ out_check: | |||
1308 | em->block_start = disk_bytenr; | 1308 | em->block_start = disk_bytenr; |
1309 | em->bdev = root->fs_info->fs_devices->latest_bdev; | 1309 | em->bdev = root->fs_info->fs_devices->latest_bdev; |
1310 | set_bit(EXTENT_FLAG_PINNED, &em->flags); | 1310 | set_bit(EXTENT_FLAG_PINNED, &em->flags); |
1311 | set_bit(EXTENT_FLAG_PREALLOC, &em->flags); | ||
1311 | while (1) { | 1312 | while (1) { |
1312 | write_lock(&em_tree->lock); | 1313 | write_lock(&em_tree->lock); |
1313 | ret = add_extent_mapping(em_tree, em); | 1314 | ret = add_extent_mapping(em_tree, em); |
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 58075d711d24..71e71539ffb7 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c | |||
@@ -2833,8 +2833,8 @@ static int log_one_extent(struct btrfs_trans_handle *trans, | |||
2833 | struct btrfs_root *log = root->log_root; | 2833 | struct btrfs_root *log = root->log_root; |
2834 | struct btrfs_file_extent_item *fi; | 2834 | struct btrfs_file_extent_item *fi; |
2835 | struct btrfs_key key; | 2835 | struct btrfs_key key; |
2836 | u64 start = em->start; | 2836 | u64 start = em->mod_start; |
2837 | u64 len = em->len; | 2837 | u64 len = em->mod_len; |
2838 | u64 num_bytes; | 2838 | u64 num_bytes; |
2839 | int nritems; | 2839 | int nritems; |
2840 | int ret; | 2840 | int ret; |
@@ -2970,7 +2970,7 @@ static int btrfs_log_changed_extents(struct btrfs_trans_handle *trans, | |||
2970 | * sequential then we need to copy the items we have and redo | 2970 | * sequential then we need to copy the items we have and redo |
2971 | * our search | 2971 | * our search |
2972 | */ | 2972 | */ |
2973 | if (args.nr && em->start != args.next_offset) { | 2973 | if (args.nr && em->mod_start != args.next_offset) { |
2974 | ret = copy_items(trans, log, dst_path, args.src, | 2974 | ret = copy_items(trans, log, dst_path, args.src, |
2975 | args.start_slot, args.nr, | 2975 | args.start_slot, args.nr, |
2976 | LOG_INODE_ALL); | 2976 | LOG_INODE_ALL); |