aboutsummaryrefslogtreecommitdiffstats
path: root/fs/btrfs/tree-log.c
diff options
context:
space:
mode:
authorJosef Bacik <jbacik@fusionio.com>2012-10-12 15:27:49 -0400
committerJosef Bacik <jbacik@fusionio.com>2013-02-20 09:37:04 -0500
commit2ab28f322f9896782da904f5942f3873432addc8 (patch)
treed8d136d90b96f96d63262f8d2eb11680bed80aab /fs/btrfs/tree-log.c
parentdfd79829b709af3c2ac55951353a874ae89f41c3 (diff)
Btrfs: wait on ordered extents at the last possible moment
Since we don't actually copy the extent information from the source tree in the fast case we don't need to wait for ordered io to be completed in order to fsync, we just need to wait for the io to be completed. So when we're logging our file just attach all of the ordered extents to the log, and then when the log syncs just wait for IO_DONE on the ordered extents and then write the super. Thanks, Signed-off-by: Josef Bacik <jbacik@fusionio.com>
Diffstat (limited to 'fs/btrfs/tree-log.c')
-rw-r--r--fs/btrfs/tree-log.c132
1 files changed, 128 insertions, 4 deletions
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index 9027bb1e7466..7de720d22b74 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -2281,6 +2281,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
2281 unsigned long log_transid = 0; 2281 unsigned long log_transid = 0;
2282 2282
2283 mutex_lock(&root->log_mutex); 2283 mutex_lock(&root->log_mutex);
2284 log_transid = root->log_transid;
2284 index1 = root->log_transid % 2; 2285 index1 = root->log_transid % 2;
2285 if (atomic_read(&root->log_commit[index1])) { 2286 if (atomic_read(&root->log_commit[index1])) {
2286 wait_log_commit(trans, root, root->log_transid); 2287 wait_log_commit(trans, root, root->log_transid);
@@ -2308,11 +2309,11 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
2308 /* bail out if we need to do a full commit */ 2309 /* bail out if we need to do a full commit */
2309 if (root->fs_info->last_trans_log_full_commit == trans->transid) { 2310 if (root->fs_info->last_trans_log_full_commit == trans->transid) {
2310 ret = -EAGAIN; 2311 ret = -EAGAIN;
2312 btrfs_free_logged_extents(log, log_transid);
2311 mutex_unlock(&root->log_mutex); 2313 mutex_unlock(&root->log_mutex);
2312 goto out; 2314 goto out;
2313 } 2315 }
2314 2316
2315 log_transid = root->log_transid;
2316 if (log_transid % 2 == 0) 2317 if (log_transid % 2 == 0)
2317 mark = EXTENT_DIRTY; 2318 mark = EXTENT_DIRTY;
2318 else 2319 else
@@ -2324,6 +2325,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
2324 ret = btrfs_write_marked_extents(log, &log->dirty_log_pages, mark); 2325 ret = btrfs_write_marked_extents(log, &log->dirty_log_pages, mark);
2325 if (ret) { 2326 if (ret) {
2326 btrfs_abort_transaction(trans, root, ret); 2327 btrfs_abort_transaction(trans, root, ret);
2328 btrfs_free_logged_extents(log, log_transid);
2327 mutex_unlock(&root->log_mutex); 2329 mutex_unlock(&root->log_mutex);
2328 goto out; 2330 goto out;
2329 } 2331 }
@@ -2363,6 +2365,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
2363 } 2365 }
2364 root->fs_info->last_trans_log_full_commit = trans->transid; 2366 root->fs_info->last_trans_log_full_commit = trans->transid;
2365 btrfs_wait_marked_extents(log, &log->dirty_log_pages, mark); 2367 btrfs_wait_marked_extents(log, &log->dirty_log_pages, mark);
2368 btrfs_free_logged_extents(log, log_transid);
2366 mutex_unlock(&log_root_tree->log_mutex); 2369 mutex_unlock(&log_root_tree->log_mutex);
2367 ret = -EAGAIN; 2370 ret = -EAGAIN;
2368 goto out; 2371 goto out;
@@ -2373,6 +2376,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
2373 btrfs_wait_marked_extents(log, &log->dirty_log_pages, mark); 2376 btrfs_wait_marked_extents(log, &log->dirty_log_pages, mark);
2374 wait_log_commit(trans, log_root_tree, 2377 wait_log_commit(trans, log_root_tree,
2375 log_root_tree->log_transid); 2378 log_root_tree->log_transid);
2379 btrfs_free_logged_extents(log, log_transid);
2376 mutex_unlock(&log_root_tree->log_mutex); 2380 mutex_unlock(&log_root_tree->log_mutex);
2377 ret = 0; 2381 ret = 0;
2378 goto out; 2382 goto out;
@@ -2392,6 +2396,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
2392 */ 2396 */
2393 if (root->fs_info->last_trans_log_full_commit == trans->transid) { 2397 if (root->fs_info->last_trans_log_full_commit == trans->transid) {
2394 btrfs_wait_marked_extents(log, &log->dirty_log_pages, mark); 2398 btrfs_wait_marked_extents(log, &log->dirty_log_pages, mark);
2399 btrfs_free_logged_extents(log, log_transid);
2395 mutex_unlock(&log_root_tree->log_mutex); 2400 mutex_unlock(&log_root_tree->log_mutex);
2396 ret = -EAGAIN; 2401 ret = -EAGAIN;
2397 goto out_wake_log_root; 2402 goto out_wake_log_root;
@@ -2402,10 +2407,12 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
2402 EXTENT_DIRTY | EXTENT_NEW); 2407 EXTENT_DIRTY | EXTENT_NEW);
2403 if (ret) { 2408 if (ret) {
2404 btrfs_abort_transaction(trans, root, ret); 2409 btrfs_abort_transaction(trans, root, ret);
2410 btrfs_free_logged_extents(log, log_transid);
2405 mutex_unlock(&log_root_tree->log_mutex); 2411 mutex_unlock(&log_root_tree->log_mutex);
2406 goto out_wake_log_root; 2412 goto out_wake_log_root;
2407 } 2413 }
2408 btrfs_wait_marked_extents(log, &log->dirty_log_pages, mark); 2414 btrfs_wait_marked_extents(log, &log->dirty_log_pages, mark);
2415 btrfs_wait_logged_extents(log, log_transid);
2409 2416
2410 btrfs_set_super_log_root(root->fs_info->super_for_commit, 2417 btrfs_set_super_log_root(root->fs_info->super_for_commit,
2411 log_root_tree->node->start); 2418 log_root_tree->node->start);
@@ -2475,6 +2482,14 @@ static void free_log_tree(struct btrfs_trans_handle *trans,
2475 EXTENT_DIRTY | EXTENT_NEW, GFP_NOFS); 2482 EXTENT_DIRTY | EXTENT_NEW, GFP_NOFS);
2476 } 2483 }
2477 2484
2485 /*
2486 * We may have short-circuited the log tree with the full commit logic
2487 * and left ordered extents on our list, so clear these out to keep us
2488 * from leaking inodes and memory.
2489 */
2490 btrfs_free_logged_extents(log, 0);
2491 btrfs_free_logged_extents(log, 1);
2492
2478 free_extent_buffer(log->node); 2493 free_extent_buffer(log->node);
2479 kfree(log); 2494 kfree(log);
2480} 2495}
@@ -3271,14 +3286,18 @@ static int log_one_extent(struct btrfs_trans_handle *trans,
3271 struct btrfs_root *log = root->log_root; 3286 struct btrfs_root *log = root->log_root;
3272 struct btrfs_file_extent_item *fi; 3287 struct btrfs_file_extent_item *fi;
3273 struct extent_buffer *leaf; 3288 struct extent_buffer *leaf;
3289 struct btrfs_ordered_extent *ordered;
3274 struct list_head ordered_sums; 3290 struct list_head ordered_sums;
3275 struct btrfs_map_token token; 3291 struct btrfs_map_token token;
3276 struct btrfs_key key; 3292 struct btrfs_key key;
3277 u64 csum_offset = em->mod_start - em->start; 3293 u64 mod_start = em->mod_start;
3278 u64 csum_len = em->mod_len; 3294 u64 mod_len = em->mod_len;
3295 u64 csum_offset;
3296 u64 csum_len;
3279 u64 extent_offset = em->start - em->orig_start; 3297 u64 extent_offset = em->start - em->orig_start;
3280 u64 block_len; 3298 u64 block_len;
3281 int ret; 3299 int ret;
3300 int index = log->log_transid % 2;
3282 bool skip_csum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM; 3301 bool skip_csum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM;
3283 3302
3284 INIT_LIST_HEAD(&ordered_sums); 3303 INIT_LIST_HEAD(&ordered_sums);
@@ -3362,6 +3381,92 @@ static int log_one_extent(struct btrfs_trans_handle *trans,
3362 csum_len = block_len; 3381 csum_len = block_len;
3363 } 3382 }
3364 3383
3384 /*
3385 * First check and see if our csums are on our outstanding ordered
3386 * extents.
3387 */
3388again:
3389 spin_lock_irq(&log->log_extents_lock[index]);
3390 list_for_each_entry(ordered, &log->logged_list[index], log_list) {
3391 struct btrfs_ordered_sum *sum;
3392
3393 if (!mod_len)
3394 break;
3395
3396 if (ordered->inode != inode)
3397 continue;
3398
3399 if (ordered->file_offset + ordered->len <= mod_start ||
3400 mod_start + mod_len <= ordered->file_offset)
3401 continue;
3402
3403 /*
3404 * We are going to copy all the csums on this ordered extent, so
3405 * go ahead and adjust mod_start and mod_len in case this
3406 * ordered extent has already been logged.
3407 */
3408 if (ordered->file_offset > mod_start) {
3409 if (ordered->file_offset + ordered->len >=
3410 mod_start + mod_len)
3411 mod_len = ordered->file_offset - mod_start;
3412 /*
3413 * If we have this case
3414 *
3415 * |--------- logged extent ---------|
3416 * |----- ordered extent ----|
3417 *
3418 * Just don't mess with mod_start and mod_len, we'll
3419 * just end up logging more csums than we need and it
3420 * will be ok.
3421 */
3422 } else {
3423 if (ordered->file_offset + ordered->len <
3424 mod_start + mod_len) {
3425 mod_len = (mod_start + mod_len) -
3426 (ordered->file_offset + ordered->len);
3427 mod_start = ordered->file_offset +
3428 ordered->len;
3429 } else {
3430 mod_len = 0;
3431 }
3432 }
3433
3434 /*
3435 * To keep us from looping for the above case of an ordered
3436 * extent that falls inside of the logged extent.
3437 */
3438 if (test_and_set_bit(BTRFS_ORDERED_LOGGED_CSUM,
3439 &ordered->flags))
3440 continue;
3441 atomic_inc(&ordered->refs);
3442 spin_unlock_irq(&log->log_extents_lock[index]);
3443 /*
3444 * we've dropped the lock, we must either break or
3445 * start over after this.
3446 */
3447
3448 wait_event(ordered->wait, ordered->csum_bytes_left == 0);
3449
3450 list_for_each_entry(sum, &ordered->list, list) {
3451 ret = btrfs_csum_file_blocks(trans, log, sum);
3452 if (ret) {
3453 btrfs_put_ordered_extent(ordered);
3454 goto unlocked;
3455 }
3456 }
3457 btrfs_put_ordered_extent(ordered);
3458 goto again;
3459
3460 }
3461 spin_unlock_irq(&log->log_extents_lock[index]);
3462unlocked:
3463
3464 if (!mod_len || ret)
3465 return ret;
3466
3467 csum_offset = mod_start - em->start;
3468 csum_len = mod_len;
3469
3365 /* block start is already adjusted for the file extent offset. */ 3470 /* block start is already adjusted for the file extent offset. */
3366 ret = btrfs_lookup_csums_range(log->fs_info->csum_root, 3471 ret = btrfs_lookup_csums_range(log->fs_info->csum_root,
3367 em->block_start + csum_offset, 3472 em->block_start + csum_offset,
@@ -3393,6 +3498,7 @@ static int btrfs_log_changed_extents(struct btrfs_trans_handle *trans,
3393 struct extent_map_tree *tree = &BTRFS_I(inode)->extent_tree; 3498 struct extent_map_tree *tree = &BTRFS_I(inode)->extent_tree;
3394 u64 test_gen; 3499 u64 test_gen;
3395 int ret = 0; 3500 int ret = 0;
3501 int num = 0;
3396 3502
3397 INIT_LIST_HEAD(&extents); 3503 INIT_LIST_HEAD(&extents);
3398 3504
@@ -3401,16 +3507,31 @@ static int btrfs_log_changed_extents(struct btrfs_trans_handle *trans,
3401 3507
3402 list_for_each_entry_safe(em, n, &tree->modified_extents, list) { 3508 list_for_each_entry_safe(em, n, &tree->modified_extents, list) {
3403 list_del_init(&em->list); 3509 list_del_init(&em->list);
3510
3511 /*
3512 * Just an arbitrary number, this can be really CPU intensive
3513 * once we start getting a lot of extents, and really once we
3514 * have a bunch of extents we just want to commit since it will
3515 * be faster.
3516 */
3517 if (++num > 32768) {
3518 list_del_init(&tree->modified_extents);
3519 ret = -EFBIG;
3520 goto process;
3521 }
3522
3404 if (em->generation <= test_gen) 3523 if (em->generation <= test_gen)
3405 continue; 3524 continue;
3406 /* Need a ref to keep it from getting evicted from cache */ 3525 /* Need a ref to keep it from getting evicted from cache */
3407 atomic_inc(&em->refs); 3526 atomic_inc(&em->refs);
3408 set_bit(EXTENT_FLAG_LOGGING, &em->flags); 3527 set_bit(EXTENT_FLAG_LOGGING, &em->flags);
3409 list_add_tail(&em->list, &extents); 3528 list_add_tail(&em->list, &extents);
3529 num++;
3410 } 3530 }
3411 3531
3412 list_sort(NULL, &extents, extent_cmp); 3532 list_sort(NULL, &extents, extent_cmp);
3413 3533
3534process:
3414 while (!list_empty(&extents)) { 3535 while (!list_empty(&extents)) {
3415 em = list_entry(extents.next, struct extent_map, list); 3536 em = list_entry(extents.next, struct extent_map, list);
3416 3537
@@ -3513,6 +3634,8 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
3513 3634
3514 mutex_lock(&BTRFS_I(inode)->log_mutex); 3635 mutex_lock(&BTRFS_I(inode)->log_mutex);
3515 3636
3637 btrfs_get_logged_extents(log, inode);
3638
3516 /* 3639 /*
3517 * a brute force approach to making sure we get the most uptodate 3640 * a brute force approach to making sure we get the most uptodate
3518 * copies of everything. 3641 * copies of everything.
@@ -3656,6 +3779,8 @@ log_extents:
3656 BTRFS_I(inode)->logged_trans = trans->transid; 3779 BTRFS_I(inode)->logged_trans = trans->transid;
3657 BTRFS_I(inode)->last_log_commit = BTRFS_I(inode)->last_sub_trans; 3780 BTRFS_I(inode)->last_log_commit = BTRFS_I(inode)->last_sub_trans;
3658out_unlock: 3781out_unlock:
3782 if (err)
3783 btrfs_free_logged_extents(log, log->log_transid);
3659 mutex_unlock(&BTRFS_I(inode)->log_mutex); 3784 mutex_unlock(&BTRFS_I(inode)->log_mutex);
3660 3785
3661 btrfs_free_path(path); 3786 btrfs_free_path(path);
@@ -3822,7 +3947,6 @@ int btrfs_log_inode_parent(struct btrfs_trans_handle *trans,
3822end_trans: 3947end_trans:
3823 dput(old_parent); 3948 dput(old_parent);
3824 if (ret < 0) { 3949 if (ret < 0) {
3825 WARN_ON(ret != -ENOSPC);
3826 root->fs_info->last_trans_log_full_commit = trans->transid; 3950 root->fs_info->last_trans_log_full_commit = trans->transid;
3827 ret = 1; 3951 ret = 1;
3828 } 3952 }