aboutsummaryrefslogtreecommitdiffstats
path: root/fs/btrfs/tree-log.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2013-03-02 19:41:54 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2013-03-02 19:41:54 -0500
commitb695188dd39162a1a6bff11fdbcc4c0b65b933ab (patch)
treea3df7c052d38b5bfaf335fbf3130abcc5c6ca577 /fs/btrfs/tree-log.c
parent48476df99894492a0f7239f2f3c9a2dde4ff38e2 (diff)
parent180e001cd5fc2950dc6a7997dde5b65c954d0e79 (diff)
Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs
Pull btrfs update from Chris Mason: "The biggest feature in the pull is the new (and still experimental) raid56 code that David Woodhouse started long ago. I'm still working on the parity logging setup that will avoid inconsistent parity after a crash, so this is only for testing right now. But, I'd really like to get it out to a broader audience to hammer out any performance issues or other problems. scrub does not yet correct errors on raid5/6 either. Josef has another pass at fsync performance. The big change here is to combine waiting for metadata with waiting for data, which is a big latency win. It is also step one toward using atomics from the hardware during a commit. Mark Fasheh has a new way to use btrfs send/receive to send only the metadata changes. SUSE is using this to make snapper more efficient at finding changes between snapshosts. Snapshot-aware defrag is also included. Otherwise we have a large number of fixes and cleanups. Eric Sandeen wins the award for removing the most lines, and I'm hoping we steal this idea from XFS over and over again." * 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs: (118 commits) btrfs: fixup/remove module.h usage as required Btrfs: delete inline extents when we find them during logging btrfs: try harder to allocate raid56 stripe cache Btrfs: cleanup to make the function btrfs_delalloc_reserve_metadata more logic Btrfs: don't call btrfs_qgroup_free if just btrfs_qgroup_reserve fails Btrfs: remove reduplicate check about root in the function btrfs_clean_quota_tree Btrfs: return ENOMEM rather than use BUG_ON when btrfs_alloc_path fails Btrfs: fix missing deleted items in btrfs_clean_quota_tree btrfs: use only inline_pages from extent buffer Btrfs: fix wrong reserved space when deleting a snapshot/subvolume Btrfs: fix wrong reserved space in qgroup during snap/subv creation Btrfs: remove unnecessary dget_parent/dput when creating the pending snapshot btrfs: remove a printk from scan_one_device Btrfs: fix NULL pointer after aborting a transaction Btrfs: fix memory leak of log roots Btrfs: copy everything if we've created an inline extent btrfs: cleanup for open-coded alignment Btrfs: do not change inode flags in rename Btrfs: use reserved space for creating a snapshot clear chunk_alloc flag on retryable failure ...
Diffstat (limited to 'fs/btrfs/tree-log.c')
-rw-r--r--fs/btrfs/tree-log.c166
1 files changed, 154 insertions, 12 deletions
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index 9027bb1e7466..c7ef569eb22a 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -278,8 +278,7 @@ static int process_one_buffer(struct btrfs_root *log,
278 struct walk_control *wc, u64 gen) 278 struct walk_control *wc, u64 gen)
279{ 279{
280 if (wc->pin) 280 if (wc->pin)
281 btrfs_pin_extent_for_log_replay(wc->trans, 281 btrfs_pin_extent_for_log_replay(log->fs_info->extent_root,
282 log->fs_info->extent_root,
283 eb->start, eb->len); 282 eb->start, eb->len);
284 283
285 if (btrfs_buffer_uptodate(eb, gen, 0)) { 284 if (btrfs_buffer_uptodate(eb, gen, 0)) {
@@ -485,7 +484,6 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans,
485 struct btrfs_key *key) 484 struct btrfs_key *key)
486{ 485{
487 int found_type; 486 int found_type;
488 u64 mask = root->sectorsize - 1;
489 u64 extent_end; 487 u64 extent_end;
490 u64 start = key->offset; 488 u64 start = key->offset;
491 u64 saved_nbytes; 489 u64 saved_nbytes;
@@ -502,7 +500,7 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans,
502 extent_end = start + btrfs_file_extent_num_bytes(eb, item); 500 extent_end = start + btrfs_file_extent_num_bytes(eb, item);
503 else if (found_type == BTRFS_FILE_EXTENT_INLINE) { 501 else if (found_type == BTRFS_FILE_EXTENT_INLINE) {
504 size = btrfs_file_extent_inline_len(eb, item); 502 size = btrfs_file_extent_inline_len(eb, item);
505 extent_end = (start + size + mask) & ~mask; 503 extent_end = ALIGN(start + size, root->sectorsize);
506 } else { 504 } else {
507 ret = 0; 505 ret = 0;
508 goto out; 506 goto out;
@@ -2281,6 +2279,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
2281 unsigned long log_transid = 0; 2279 unsigned long log_transid = 0;
2282 2280
2283 mutex_lock(&root->log_mutex); 2281 mutex_lock(&root->log_mutex);
2282 log_transid = root->log_transid;
2284 index1 = root->log_transid % 2; 2283 index1 = root->log_transid % 2;
2285 if (atomic_read(&root->log_commit[index1])) { 2284 if (atomic_read(&root->log_commit[index1])) {
2286 wait_log_commit(trans, root, root->log_transid); 2285 wait_log_commit(trans, root, root->log_transid);
@@ -2308,11 +2307,11 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
2308 /* bail out if we need to do a full commit */ 2307 /* bail out if we need to do a full commit */
2309 if (root->fs_info->last_trans_log_full_commit == trans->transid) { 2308 if (root->fs_info->last_trans_log_full_commit == trans->transid) {
2310 ret = -EAGAIN; 2309 ret = -EAGAIN;
2310 btrfs_free_logged_extents(log, log_transid);
2311 mutex_unlock(&root->log_mutex); 2311 mutex_unlock(&root->log_mutex);
2312 goto out; 2312 goto out;
2313 } 2313 }
2314 2314
2315 log_transid = root->log_transid;
2316 if (log_transid % 2 == 0) 2315 if (log_transid % 2 == 0)
2317 mark = EXTENT_DIRTY; 2316 mark = EXTENT_DIRTY;
2318 else 2317 else
@@ -2324,6 +2323,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
2324 ret = btrfs_write_marked_extents(log, &log->dirty_log_pages, mark); 2323 ret = btrfs_write_marked_extents(log, &log->dirty_log_pages, mark);
2325 if (ret) { 2324 if (ret) {
2326 btrfs_abort_transaction(trans, root, ret); 2325 btrfs_abort_transaction(trans, root, ret);
2326 btrfs_free_logged_extents(log, log_transid);
2327 mutex_unlock(&root->log_mutex); 2327 mutex_unlock(&root->log_mutex);
2328 goto out; 2328 goto out;
2329 } 2329 }
@@ -2363,6 +2363,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
2363 } 2363 }
2364 root->fs_info->last_trans_log_full_commit = trans->transid; 2364 root->fs_info->last_trans_log_full_commit = trans->transid;
2365 btrfs_wait_marked_extents(log, &log->dirty_log_pages, mark); 2365 btrfs_wait_marked_extents(log, &log->dirty_log_pages, mark);
2366 btrfs_free_logged_extents(log, log_transid);
2366 mutex_unlock(&log_root_tree->log_mutex); 2367 mutex_unlock(&log_root_tree->log_mutex);
2367 ret = -EAGAIN; 2368 ret = -EAGAIN;
2368 goto out; 2369 goto out;
@@ -2373,6 +2374,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
2373 btrfs_wait_marked_extents(log, &log->dirty_log_pages, mark); 2374 btrfs_wait_marked_extents(log, &log->dirty_log_pages, mark);
2374 wait_log_commit(trans, log_root_tree, 2375 wait_log_commit(trans, log_root_tree,
2375 log_root_tree->log_transid); 2376 log_root_tree->log_transid);
2377 btrfs_free_logged_extents(log, log_transid);
2376 mutex_unlock(&log_root_tree->log_mutex); 2378 mutex_unlock(&log_root_tree->log_mutex);
2377 ret = 0; 2379 ret = 0;
2378 goto out; 2380 goto out;
@@ -2392,6 +2394,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
2392 */ 2394 */
2393 if (root->fs_info->last_trans_log_full_commit == trans->transid) { 2395 if (root->fs_info->last_trans_log_full_commit == trans->transid) {
2394 btrfs_wait_marked_extents(log, &log->dirty_log_pages, mark); 2396 btrfs_wait_marked_extents(log, &log->dirty_log_pages, mark);
2397 btrfs_free_logged_extents(log, log_transid);
2395 mutex_unlock(&log_root_tree->log_mutex); 2398 mutex_unlock(&log_root_tree->log_mutex);
2396 ret = -EAGAIN; 2399 ret = -EAGAIN;
2397 goto out_wake_log_root; 2400 goto out_wake_log_root;
@@ -2402,10 +2405,12 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
2402 EXTENT_DIRTY | EXTENT_NEW); 2405 EXTENT_DIRTY | EXTENT_NEW);
2403 if (ret) { 2406 if (ret) {
2404 btrfs_abort_transaction(trans, root, ret); 2407 btrfs_abort_transaction(trans, root, ret);
2408 btrfs_free_logged_extents(log, log_transid);
2405 mutex_unlock(&log_root_tree->log_mutex); 2409 mutex_unlock(&log_root_tree->log_mutex);
2406 goto out_wake_log_root; 2410 goto out_wake_log_root;
2407 } 2411 }
2408 btrfs_wait_marked_extents(log, &log->dirty_log_pages, mark); 2412 btrfs_wait_marked_extents(log, &log->dirty_log_pages, mark);
2413 btrfs_wait_logged_extents(log, log_transid);
2409 2414
2410 btrfs_set_super_log_root(root->fs_info->super_for_commit, 2415 btrfs_set_super_log_root(root->fs_info->super_for_commit,
2411 log_root_tree->node->start); 2416 log_root_tree->node->start);
@@ -2461,8 +2466,10 @@ static void free_log_tree(struct btrfs_trans_handle *trans,
2461 .process_func = process_one_buffer 2466 .process_func = process_one_buffer
2462 }; 2467 };
2463 2468
2464 ret = walk_log_tree(trans, log, &wc); 2469 if (trans) {
2465 BUG_ON(ret); 2470 ret = walk_log_tree(trans, log, &wc);
2471 BUG_ON(ret);
2472 }
2466 2473
2467 while (1) { 2474 while (1) {
2468 ret = find_first_extent_bit(&log->dirty_log_pages, 2475 ret = find_first_extent_bit(&log->dirty_log_pages,
@@ -2475,6 +2482,14 @@ static void free_log_tree(struct btrfs_trans_handle *trans,
2475 EXTENT_DIRTY | EXTENT_NEW, GFP_NOFS); 2482 EXTENT_DIRTY | EXTENT_NEW, GFP_NOFS);
2476 } 2483 }
2477 2484
2485 /*
2486 * We may have short-circuited the log tree with the full commit logic
2487 * and left ordered extents on our list, so clear these out to keep us
2488 * from leaking inodes and memory.
2489 */
2490 btrfs_free_logged_extents(log, 0);
2491 btrfs_free_logged_extents(log, 1);
2492
2478 free_extent_buffer(log->node); 2493 free_extent_buffer(log->node);
2479 kfree(log); 2494 kfree(log);
2480} 2495}
@@ -2724,7 +2739,7 @@ static noinline int log_dir_items(struct btrfs_trans_handle *trans,
2724 path->keep_locks = 1; 2739 path->keep_locks = 1;
2725 2740
2726 ret = btrfs_search_forward(root, &min_key, &max_key, 2741 ret = btrfs_search_forward(root, &min_key, &max_key,
2727 path, 0, trans->transid); 2742 path, trans->transid);
2728 2743
2729 /* 2744 /*
2730 * we didn't find anything from this transaction, see if there 2745 * we didn't find anything from this transaction, see if there
@@ -3271,16 +3286,21 @@ static int log_one_extent(struct btrfs_trans_handle *trans,
3271 struct btrfs_root *log = root->log_root; 3286 struct btrfs_root *log = root->log_root;
3272 struct btrfs_file_extent_item *fi; 3287 struct btrfs_file_extent_item *fi;
3273 struct extent_buffer *leaf; 3288 struct extent_buffer *leaf;
3289 struct btrfs_ordered_extent *ordered;
3274 struct list_head ordered_sums; 3290 struct list_head ordered_sums;
3275 struct btrfs_map_token token; 3291 struct btrfs_map_token token;
3276 struct btrfs_key key; 3292 struct btrfs_key key;
3277 u64 csum_offset = em->mod_start - em->start; 3293 u64 mod_start = em->mod_start;
3278 u64 csum_len = em->mod_len; 3294 u64 mod_len = em->mod_len;
3295 u64 csum_offset;
3296 u64 csum_len;
3279 u64 extent_offset = em->start - em->orig_start; 3297 u64 extent_offset = em->start - em->orig_start;
3280 u64 block_len; 3298 u64 block_len;
3281 int ret; 3299 int ret;
3300 int index = log->log_transid % 2;
3282 bool skip_csum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM; 3301 bool skip_csum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM;
3283 3302
3303insert:
3284 INIT_LIST_HEAD(&ordered_sums); 3304 INIT_LIST_HEAD(&ordered_sums);
3285 btrfs_init_map_token(&token); 3305 btrfs_init_map_token(&token);
3286 key.objectid = btrfs_ino(inode); 3306 key.objectid = btrfs_ino(inode);
@@ -3296,6 +3316,23 @@ static int log_one_extent(struct btrfs_trans_handle *trans,
3296 leaf = path->nodes[0]; 3316 leaf = path->nodes[0];
3297 fi = btrfs_item_ptr(leaf, path->slots[0], 3317 fi = btrfs_item_ptr(leaf, path->slots[0],
3298 struct btrfs_file_extent_item); 3318 struct btrfs_file_extent_item);
3319
3320 /*
3321 * If we are overwriting an inline extent with a real one then we need
3322 * to just delete the inline extent as it may not be large enough to
3323 * have the entire file_extent_item.
3324 */
3325 if (ret && btrfs_token_file_extent_type(leaf, fi, &token) ==
3326 BTRFS_FILE_EXTENT_INLINE) {
3327 ret = btrfs_del_item(trans, log, path);
3328 btrfs_release_path(path);
3329 if (ret) {
3330 path->really_keep_locks = 0;
3331 return ret;
3332 }
3333 goto insert;
3334 }
3335
3299 btrfs_set_token_file_extent_generation(leaf, fi, em->generation, 3336 btrfs_set_token_file_extent_generation(leaf, fi, em->generation,
3300 &token); 3337 &token);
3301 if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags)) { 3338 if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags)) {
@@ -3362,6 +3399,92 @@ static int log_one_extent(struct btrfs_trans_handle *trans,
3362 csum_len = block_len; 3399 csum_len = block_len;
3363 } 3400 }
3364 3401
3402 /*
3403 * First check and see if our csums are on our outstanding ordered
3404 * extents.
3405 */
3406again:
3407 spin_lock_irq(&log->log_extents_lock[index]);
3408 list_for_each_entry(ordered, &log->logged_list[index], log_list) {
3409 struct btrfs_ordered_sum *sum;
3410
3411 if (!mod_len)
3412 break;
3413
3414 if (ordered->inode != inode)
3415 continue;
3416
3417 if (ordered->file_offset + ordered->len <= mod_start ||
3418 mod_start + mod_len <= ordered->file_offset)
3419 continue;
3420
3421 /*
3422 * We are going to copy all the csums on this ordered extent, so
3423 * go ahead and adjust mod_start and mod_len in case this
3424 * ordered extent has already been logged.
3425 */
3426 if (ordered->file_offset > mod_start) {
3427 if (ordered->file_offset + ordered->len >=
3428 mod_start + mod_len)
3429 mod_len = ordered->file_offset - mod_start;
3430 /*
3431 * If we have this case
3432 *
3433 * |--------- logged extent ---------|
3434 * |----- ordered extent ----|
3435 *
3436 * Just don't mess with mod_start and mod_len, we'll
3437 * just end up logging more csums than we need and it
3438 * will be ok.
3439 */
3440 } else {
3441 if (ordered->file_offset + ordered->len <
3442 mod_start + mod_len) {
3443 mod_len = (mod_start + mod_len) -
3444 (ordered->file_offset + ordered->len);
3445 mod_start = ordered->file_offset +
3446 ordered->len;
3447 } else {
3448 mod_len = 0;
3449 }
3450 }
3451
3452 /*
3453 * To keep us from looping for the above case of an ordered
3454 * extent that falls inside of the logged extent.
3455 */
3456 if (test_and_set_bit(BTRFS_ORDERED_LOGGED_CSUM,
3457 &ordered->flags))
3458 continue;
3459 atomic_inc(&ordered->refs);
3460 spin_unlock_irq(&log->log_extents_lock[index]);
3461 /*
3462 * we've dropped the lock, we must either break or
3463 * start over after this.
3464 */
3465
3466 wait_event(ordered->wait, ordered->csum_bytes_left == 0);
3467
3468 list_for_each_entry(sum, &ordered->list, list) {
3469 ret = btrfs_csum_file_blocks(trans, log, sum);
3470 if (ret) {
3471 btrfs_put_ordered_extent(ordered);
3472 goto unlocked;
3473 }
3474 }
3475 btrfs_put_ordered_extent(ordered);
3476 goto again;
3477
3478 }
3479 spin_unlock_irq(&log->log_extents_lock[index]);
3480unlocked:
3481
3482 if (!mod_len || ret)
3483 return ret;
3484
3485 csum_offset = mod_start - em->start;
3486 csum_len = mod_len;
3487
3365 /* block start is already adjusted for the file extent offset. */ 3488 /* block start is already adjusted for the file extent offset. */
3366 ret = btrfs_lookup_csums_range(log->fs_info->csum_root, 3489 ret = btrfs_lookup_csums_range(log->fs_info->csum_root,
3367 em->block_start + csum_offset, 3490 em->block_start + csum_offset,
@@ -3393,6 +3516,7 @@ static int btrfs_log_changed_extents(struct btrfs_trans_handle *trans,
3393 struct extent_map_tree *tree = &BTRFS_I(inode)->extent_tree; 3516 struct extent_map_tree *tree = &BTRFS_I(inode)->extent_tree;
3394 u64 test_gen; 3517 u64 test_gen;
3395 int ret = 0; 3518 int ret = 0;
3519 int num = 0;
3396 3520
3397 INIT_LIST_HEAD(&extents); 3521 INIT_LIST_HEAD(&extents);
3398 3522
@@ -3401,16 +3525,31 @@ static int btrfs_log_changed_extents(struct btrfs_trans_handle *trans,
3401 3525
3402 list_for_each_entry_safe(em, n, &tree->modified_extents, list) { 3526 list_for_each_entry_safe(em, n, &tree->modified_extents, list) {
3403 list_del_init(&em->list); 3527 list_del_init(&em->list);
3528
3529 /*
3530 * Just an arbitrary number, this can be really CPU intensive
3531 * once we start getting a lot of extents, and really once we
3532 * have a bunch of extents we just want to commit since it will
3533 * be faster.
3534 */
3535 if (++num > 32768) {
3536 list_del_init(&tree->modified_extents);
3537 ret = -EFBIG;
3538 goto process;
3539 }
3540
3404 if (em->generation <= test_gen) 3541 if (em->generation <= test_gen)
3405 continue; 3542 continue;
3406 /* Need a ref to keep it from getting evicted from cache */ 3543 /* Need a ref to keep it from getting evicted from cache */
3407 atomic_inc(&em->refs); 3544 atomic_inc(&em->refs);
3408 set_bit(EXTENT_FLAG_LOGGING, &em->flags); 3545 set_bit(EXTENT_FLAG_LOGGING, &em->flags);
3409 list_add_tail(&em->list, &extents); 3546 list_add_tail(&em->list, &extents);
3547 num++;
3410 } 3548 }
3411 3549
3412 list_sort(NULL, &extents, extent_cmp); 3550 list_sort(NULL, &extents, extent_cmp);
3413 3551
3552process:
3414 while (!list_empty(&extents)) { 3553 while (!list_empty(&extents)) {
3415 em = list_entry(extents.next, struct extent_map, list); 3554 em = list_entry(extents.next, struct extent_map, list);
3416 3555
@@ -3513,6 +3652,8 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
3513 3652
3514 mutex_lock(&BTRFS_I(inode)->log_mutex); 3653 mutex_lock(&BTRFS_I(inode)->log_mutex);
3515 3654
3655 btrfs_get_logged_extents(log, inode);
3656
3516 /* 3657 /*
3517 * a brute force approach to making sure we get the most uptodate 3658 * a brute force approach to making sure we get the most uptodate
3518 * copies of everything. 3659 * copies of everything.
@@ -3558,7 +3699,7 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
3558 while (1) { 3699 while (1) {
3559 ins_nr = 0; 3700 ins_nr = 0;
3560 ret = btrfs_search_forward(root, &min_key, &max_key, 3701 ret = btrfs_search_forward(root, &min_key, &max_key,
3561 path, 0, trans->transid); 3702 path, trans->transid);
3562 if (ret != 0) 3703 if (ret != 0)
3563 break; 3704 break;
3564again: 3705again:
@@ -3656,6 +3797,8 @@ log_extents:
3656 BTRFS_I(inode)->logged_trans = trans->transid; 3797 BTRFS_I(inode)->logged_trans = trans->transid;
3657 BTRFS_I(inode)->last_log_commit = BTRFS_I(inode)->last_sub_trans; 3798 BTRFS_I(inode)->last_log_commit = BTRFS_I(inode)->last_sub_trans;
3658out_unlock: 3799out_unlock:
3800 if (err)
3801 btrfs_free_logged_extents(log, log->log_transid);
3659 mutex_unlock(&BTRFS_I(inode)->log_mutex); 3802 mutex_unlock(&BTRFS_I(inode)->log_mutex);
3660 3803
3661 btrfs_free_path(path); 3804 btrfs_free_path(path);
@@ -3822,7 +3965,6 @@ int btrfs_log_inode_parent(struct btrfs_trans_handle *trans,
3822end_trans: 3965end_trans:
3823 dput(old_parent); 3966 dput(old_parent);
3824 if (ret < 0) { 3967 if (ret < 0) {
3825 WARN_ON(ret != -ENOSPC);
3826 root->fs_info->last_trans_log_full_commit = trans->transid; 3968 root->fs_info->last_trans_log_full_commit = trans->transid;
3827 ret = 1; 3969 ret = 1;
3828 } 3970 }