diff options
author | Yan, Zheng <zheng.yan@oracle.com> | 2009-11-12 04:33:26 -0500 |
---|---|---|
committer | Chris Mason <chris.mason@oracle.com> | 2009-12-15 21:24:25 -0500 |
commit | 8cef4e160d74920ad1725f58c89fd75ec4c4ac38 (patch) | |
tree | c1592369c7085e75d67a1709438a4f56351b2348 /fs/btrfs | |
parent | 22763c5cf3690a681551162c15d34d935308c8d7 (diff) |
Btrfs: Avoid superfluous tree-log writeout
We allow two log transactions at a time, but use same flag
to mark dirty tree-log btree blocks. So we may flush dirty
blocks belonging to newer log transaction when committing a
log transaction. This patch fixes the issue by using two
flags to mark dirty tree-log btree blocks.
Signed-off-by: Yan Zheng <zheng.yan@oracle.com>
Signed-off-by: Chris Mason <chris.mason@oracle.com>
Diffstat (limited to 'fs/btrfs')
-rw-r--r-- | fs/btrfs/disk-io.c | 6 | ||||
-rw-r--r-- | fs/btrfs/extent-tree.c | 12 | ||||
-rw-r--r-- | fs/btrfs/transaction.c | 21 | ||||
-rw-r--r-- | fs/btrfs/transaction.h | 6 | ||||
-rw-r--r-- | fs/btrfs/tree-log.c | 33 |
5 files changed, 47 insertions, 31 deletions
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 02b6afbd7450..101940fab9b3 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c | |||
@@ -980,12 +980,12 @@ int btrfs_free_log_root_tree(struct btrfs_trans_handle *trans, | |||
980 | 980 | ||
981 | while (1) { | 981 | while (1) { |
982 | ret = find_first_extent_bit(&log_root_tree->dirty_log_pages, | 982 | ret = find_first_extent_bit(&log_root_tree->dirty_log_pages, |
983 | 0, &start, &end, EXTENT_DIRTY); | 983 | 0, &start, &end, EXTENT_DIRTY | EXTENT_NEW); |
984 | if (ret) | 984 | if (ret) |
985 | break; | 985 | break; |
986 | 986 | ||
987 | clear_extent_dirty(&log_root_tree->dirty_log_pages, | 987 | clear_extent_bits(&log_root_tree->dirty_log_pages, start, end, |
988 | start, end, GFP_NOFS); | 988 | EXTENT_DIRTY | EXTENT_NEW, GFP_NOFS); |
989 | } | 989 | } |
990 | eb = fs_info->log_root_tree->node; | 990 | eb = fs_info->log_root_tree->node; |
991 | 991 | ||
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 94627c4cc193..4a86508ce473 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c | |||
@@ -4919,8 +4919,16 @@ struct extent_buffer *btrfs_init_new_buffer(struct btrfs_trans_handle *trans, | |||
4919 | btrfs_set_buffer_uptodate(buf); | 4919 | btrfs_set_buffer_uptodate(buf); |
4920 | 4920 | ||
4921 | if (root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID) { | 4921 | if (root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID) { |
4922 | set_extent_dirty(&root->dirty_log_pages, buf->start, | 4922 | /* |
4923 | buf->start + buf->len - 1, GFP_NOFS); | 4923 | * we allow two log transactions at a time, use different |
4924 | * EXENT bit to differentiate dirty pages. | ||
4925 | */ | ||
4926 | if (root->log_transid % 2 == 0) | ||
4927 | set_extent_dirty(&root->dirty_log_pages, buf->start, | ||
4928 | buf->start + buf->len - 1, GFP_NOFS); | ||
4929 | else | ||
4930 | set_extent_new(&root->dirty_log_pages, buf->start, | ||
4931 | buf->start + buf->len - 1, GFP_NOFS); | ||
4924 | } else { | 4932 | } else { |
4925 | set_extent_dirty(&trans->transaction->dirty_pages, buf->start, | 4933 | set_extent_dirty(&trans->transaction->dirty_pages, buf->start, |
4926 | buf->start + buf->len - 1, GFP_NOFS); | 4934 | buf->start + buf->len - 1, GFP_NOFS); |
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index c207e8c32c9b..b7b22c344b66 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c | |||
@@ -354,7 +354,7 @@ int btrfs_end_transaction_throttle(struct btrfs_trans_handle *trans, | |||
354 | * those extents are sent to disk but does not wait on them | 354 | * those extents are sent to disk but does not wait on them |
355 | */ | 355 | */ |
356 | int btrfs_write_marked_extents(struct btrfs_root *root, | 356 | int btrfs_write_marked_extents(struct btrfs_root *root, |
357 | struct extent_io_tree *dirty_pages) | 357 | struct extent_io_tree *dirty_pages, int mark) |
358 | { | 358 | { |
359 | int ret; | 359 | int ret; |
360 | int err = 0; | 360 | int err = 0; |
@@ -367,7 +367,7 @@ int btrfs_write_marked_extents(struct btrfs_root *root, | |||
367 | 367 | ||
368 | while (1) { | 368 | while (1) { |
369 | ret = find_first_extent_bit(dirty_pages, start, &start, &end, | 369 | ret = find_first_extent_bit(dirty_pages, start, &start, &end, |
370 | EXTENT_DIRTY); | 370 | mark); |
371 | if (ret) | 371 | if (ret) |
372 | break; | 372 | break; |
373 | while (start <= end) { | 373 | while (start <= end) { |
@@ -413,7 +413,7 @@ int btrfs_write_marked_extents(struct btrfs_root *root, | |||
413 | * on all the pages and clear them from the dirty pages state tree | 413 | * on all the pages and clear them from the dirty pages state tree |
414 | */ | 414 | */ |
415 | int btrfs_wait_marked_extents(struct btrfs_root *root, | 415 | int btrfs_wait_marked_extents(struct btrfs_root *root, |
416 | struct extent_io_tree *dirty_pages) | 416 | struct extent_io_tree *dirty_pages, int mark) |
417 | { | 417 | { |
418 | int ret; | 418 | int ret; |
419 | int err = 0; | 419 | int err = 0; |
@@ -425,12 +425,12 @@ int btrfs_wait_marked_extents(struct btrfs_root *root, | |||
425 | unsigned long index; | 425 | unsigned long index; |
426 | 426 | ||
427 | while (1) { | 427 | while (1) { |
428 | ret = find_first_extent_bit(dirty_pages, 0, &start, &end, | 428 | ret = find_first_extent_bit(dirty_pages, start, &start, &end, |
429 | EXTENT_DIRTY); | 429 | mark); |
430 | if (ret) | 430 | if (ret) |
431 | break; | 431 | break; |
432 | 432 | ||
433 | clear_extent_dirty(dirty_pages, start, end, GFP_NOFS); | 433 | clear_extent_bits(dirty_pages, start, end, mark, GFP_NOFS); |
434 | while (start <= end) { | 434 | while (start <= end) { |
435 | index = start >> PAGE_CACHE_SHIFT; | 435 | index = start >> PAGE_CACHE_SHIFT; |
436 | start = (u64)(index + 1) << PAGE_CACHE_SHIFT; | 436 | start = (u64)(index + 1) << PAGE_CACHE_SHIFT; |
@@ -460,13 +460,13 @@ int btrfs_wait_marked_extents(struct btrfs_root *root, | |||
460 | * those extents are on disk for transaction or log commit | 460 | * those extents are on disk for transaction or log commit |
461 | */ | 461 | */ |
462 | int btrfs_write_and_wait_marked_extents(struct btrfs_root *root, | 462 | int btrfs_write_and_wait_marked_extents(struct btrfs_root *root, |
463 | struct extent_io_tree *dirty_pages) | 463 | struct extent_io_tree *dirty_pages, int mark) |
464 | { | 464 | { |
465 | int ret; | 465 | int ret; |
466 | int ret2; | 466 | int ret2; |
467 | 467 | ||
468 | ret = btrfs_write_marked_extents(root, dirty_pages); | 468 | ret = btrfs_write_marked_extents(root, dirty_pages, mark); |
469 | ret2 = btrfs_wait_marked_extents(root, dirty_pages); | 469 | ret2 = btrfs_wait_marked_extents(root, dirty_pages, mark); |
470 | return ret || ret2; | 470 | return ret || ret2; |
471 | } | 471 | } |
472 | 472 | ||
@@ -479,7 +479,8 @@ int btrfs_write_and_wait_transaction(struct btrfs_trans_handle *trans, | |||
479 | return filemap_write_and_wait(btree_inode->i_mapping); | 479 | return filemap_write_and_wait(btree_inode->i_mapping); |
480 | } | 480 | } |
481 | return btrfs_write_and_wait_marked_extents(root, | 481 | return btrfs_write_and_wait_marked_extents(root, |
482 | &trans->transaction->dirty_pages); | 482 | &trans->transaction->dirty_pages, |
483 | EXTENT_DIRTY); | ||
483 | } | 484 | } |
484 | 485 | ||
485 | /* | 486 | /* |
diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h index d4e3e7a6938c..93c7ccb33118 100644 --- a/fs/btrfs/transaction.h +++ b/fs/btrfs/transaction.h | |||
@@ -107,10 +107,10 @@ void btrfs_throttle(struct btrfs_root *root); | |||
107 | int btrfs_record_root_in_trans(struct btrfs_trans_handle *trans, | 107 | int btrfs_record_root_in_trans(struct btrfs_trans_handle *trans, |
108 | struct btrfs_root *root); | 108 | struct btrfs_root *root); |
109 | int btrfs_write_and_wait_marked_extents(struct btrfs_root *root, | 109 | int btrfs_write_and_wait_marked_extents(struct btrfs_root *root, |
110 | struct extent_io_tree *dirty_pages); | 110 | struct extent_io_tree *dirty_pages, int mark); |
111 | int btrfs_write_marked_extents(struct btrfs_root *root, | 111 | int btrfs_write_marked_extents(struct btrfs_root *root, |
112 | struct extent_io_tree *dirty_pages); | 112 | struct extent_io_tree *dirty_pages, int mark); |
113 | int btrfs_wait_marked_extents(struct btrfs_root *root, | 113 | int btrfs_wait_marked_extents(struct btrfs_root *root, |
114 | struct extent_io_tree *dirty_pages); | 114 | struct extent_io_tree *dirty_pages, int mark); |
115 | int btrfs_transaction_in_commit(struct btrfs_fs_info *info); | 115 | int btrfs_transaction_in_commit(struct btrfs_fs_info *info); |
116 | #endif | 116 | #endif |
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 741666a7676a..31da0002e78b 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c | |||
@@ -1977,10 +1977,11 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, | |||
1977 | { | 1977 | { |
1978 | int index1; | 1978 | int index1; |
1979 | int index2; | 1979 | int index2; |
1980 | int mark; | ||
1980 | int ret; | 1981 | int ret; |
1981 | struct btrfs_root *log = root->log_root; | 1982 | struct btrfs_root *log = root->log_root; |
1982 | struct btrfs_root *log_root_tree = root->fs_info->log_root_tree; | 1983 | struct btrfs_root *log_root_tree = root->fs_info->log_root_tree; |
1983 | u64 log_transid = 0; | 1984 | unsigned long log_transid = 0; |
1984 | 1985 | ||
1985 | mutex_lock(&root->log_mutex); | 1986 | mutex_lock(&root->log_mutex); |
1986 | index1 = root->log_transid % 2; | 1987 | index1 = root->log_transid % 2; |
@@ -2014,24 +2015,29 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, | |||
2014 | goto out; | 2015 | goto out; |
2015 | } | 2016 | } |
2016 | 2017 | ||
2018 | log_transid = root->log_transid; | ||
2019 | if (log_transid % 2 == 0) | ||
2020 | mark = EXTENT_DIRTY; | ||
2021 | else | ||
2022 | mark = EXTENT_NEW; | ||
2023 | |||
2017 | /* we start IO on all the marked extents here, but we don't actually | 2024 | /* we start IO on all the marked extents here, but we don't actually |
2018 | * wait for them until later. | 2025 | * wait for them until later. |
2019 | */ | 2026 | */ |
2020 | ret = btrfs_write_marked_extents(log, &log->dirty_log_pages); | 2027 | ret = btrfs_write_marked_extents(log, &log->dirty_log_pages, mark); |
2021 | BUG_ON(ret); | 2028 | BUG_ON(ret); |
2022 | 2029 | ||
2023 | btrfs_set_root_node(&log->root_item, log->node); | 2030 | btrfs_set_root_node(&log->root_item, log->node); |
2024 | 2031 | ||
2025 | root->log_batch = 0; | 2032 | root->log_batch = 0; |
2026 | log_transid = root->log_transid; | ||
2027 | root->log_transid++; | 2033 | root->log_transid++; |
2028 | log->log_transid = root->log_transid; | 2034 | log->log_transid = root->log_transid; |
2029 | root->log_start_pid = 0; | 2035 | root->log_start_pid = 0; |
2030 | smp_mb(); | 2036 | smp_mb(); |
2031 | /* | 2037 | /* |
2032 | * log tree has been flushed to disk, new modifications of | 2038 | * IO has been started, blocks of the log tree have WRITTEN flag set |
2033 | * the log will be written to new positions. so it's safe to | 2039 | * in their headers. new modifications of the log will be written to |
2034 | * allow log writers to go in. | 2040 | * new positions. so it's safe to allow log writers to go in. |
2035 | */ | 2041 | */ |
2036 | mutex_unlock(&root->log_mutex); | 2042 | mutex_unlock(&root->log_mutex); |
2037 | 2043 | ||
@@ -2052,7 +2058,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, | |||
2052 | 2058 | ||
2053 | index2 = log_root_tree->log_transid % 2; | 2059 | index2 = log_root_tree->log_transid % 2; |
2054 | if (atomic_read(&log_root_tree->log_commit[index2])) { | 2060 | if (atomic_read(&log_root_tree->log_commit[index2])) { |
2055 | btrfs_wait_marked_extents(log, &log->dirty_log_pages); | 2061 | btrfs_wait_marked_extents(log, &log->dirty_log_pages, mark); |
2056 | wait_log_commit(trans, log_root_tree, | 2062 | wait_log_commit(trans, log_root_tree, |
2057 | log_root_tree->log_transid); | 2063 | log_root_tree->log_transid); |
2058 | mutex_unlock(&log_root_tree->log_mutex); | 2064 | mutex_unlock(&log_root_tree->log_mutex); |
@@ -2072,16 +2078,17 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, | |||
2072 | * check the full commit flag again | 2078 | * check the full commit flag again |
2073 | */ | 2079 | */ |
2074 | if (root->fs_info->last_trans_log_full_commit == trans->transid) { | 2080 | if (root->fs_info->last_trans_log_full_commit == trans->transid) { |
2075 | btrfs_wait_marked_extents(log, &log->dirty_log_pages); | 2081 | btrfs_wait_marked_extents(log, &log->dirty_log_pages, mark); |
2076 | mutex_unlock(&log_root_tree->log_mutex); | 2082 | mutex_unlock(&log_root_tree->log_mutex); |
2077 | ret = -EAGAIN; | 2083 | ret = -EAGAIN; |
2078 | goto out_wake_log_root; | 2084 | goto out_wake_log_root; |
2079 | } | 2085 | } |
2080 | 2086 | ||
2081 | ret = btrfs_write_and_wait_marked_extents(log_root_tree, | 2087 | ret = btrfs_write_and_wait_marked_extents(log_root_tree, |
2082 | &log_root_tree->dirty_log_pages); | 2088 | &log_root_tree->dirty_log_pages, |
2089 | EXTENT_DIRTY | EXTENT_NEW); | ||
2083 | BUG_ON(ret); | 2090 | BUG_ON(ret); |
2084 | btrfs_wait_marked_extents(log, &log->dirty_log_pages); | 2091 | btrfs_wait_marked_extents(log, &log->dirty_log_pages, mark); |
2085 | 2092 | ||
2086 | btrfs_set_super_log_root(&root->fs_info->super_for_commit, | 2093 | btrfs_set_super_log_root(&root->fs_info->super_for_commit, |
2087 | log_root_tree->node->start); | 2094 | log_root_tree->node->start); |
@@ -2147,12 +2154,12 @@ int btrfs_free_log(struct btrfs_trans_handle *trans, struct btrfs_root *root) | |||
2147 | 2154 | ||
2148 | while (1) { | 2155 | while (1) { |
2149 | ret = find_first_extent_bit(&log->dirty_log_pages, | 2156 | ret = find_first_extent_bit(&log->dirty_log_pages, |
2150 | 0, &start, &end, EXTENT_DIRTY); | 2157 | 0, &start, &end, EXTENT_DIRTY | EXTENT_NEW); |
2151 | if (ret) | 2158 | if (ret) |
2152 | break; | 2159 | break; |
2153 | 2160 | ||
2154 | clear_extent_dirty(&log->dirty_log_pages, | 2161 | clear_extent_bits(&log->dirty_log_pages, start, end, |
2155 | start, end, GFP_NOFS); | 2162 | EXTENT_DIRTY | EXTENT_NEW, GFP_NOFS); |
2156 | } | 2163 | } |
2157 | 2164 | ||
2158 | if (log->log_transid > 0) { | 2165 | if (log->log_transid > 0) { |