diff options
Diffstat (limited to 'fs/btrfs/tree-log.c')
-rw-r--r-- | fs/btrfs/tree-log.c | 166 |
1 files changed, 154 insertions, 12 deletions
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 9027bb1e7466..c7ef569eb22a 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c | |||
@@ -278,8 +278,7 @@ static int process_one_buffer(struct btrfs_root *log, | |||
278 | struct walk_control *wc, u64 gen) | 278 | struct walk_control *wc, u64 gen) |
279 | { | 279 | { |
280 | if (wc->pin) | 280 | if (wc->pin) |
281 | btrfs_pin_extent_for_log_replay(wc->trans, | 281 | btrfs_pin_extent_for_log_replay(log->fs_info->extent_root, |
282 | log->fs_info->extent_root, | ||
283 | eb->start, eb->len); | 282 | eb->start, eb->len); |
284 | 283 | ||
285 | if (btrfs_buffer_uptodate(eb, gen, 0)) { | 284 | if (btrfs_buffer_uptodate(eb, gen, 0)) { |
@@ -485,7 +484,6 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans, | |||
485 | struct btrfs_key *key) | 484 | struct btrfs_key *key) |
486 | { | 485 | { |
487 | int found_type; | 486 | int found_type; |
488 | u64 mask = root->sectorsize - 1; | ||
489 | u64 extent_end; | 487 | u64 extent_end; |
490 | u64 start = key->offset; | 488 | u64 start = key->offset; |
491 | u64 saved_nbytes; | 489 | u64 saved_nbytes; |
@@ -502,7 +500,7 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans, | |||
502 | extent_end = start + btrfs_file_extent_num_bytes(eb, item); | 500 | extent_end = start + btrfs_file_extent_num_bytes(eb, item); |
503 | else if (found_type == BTRFS_FILE_EXTENT_INLINE) { | 501 | else if (found_type == BTRFS_FILE_EXTENT_INLINE) { |
504 | size = btrfs_file_extent_inline_len(eb, item); | 502 | size = btrfs_file_extent_inline_len(eb, item); |
505 | extent_end = (start + size + mask) & ~mask; | 503 | extent_end = ALIGN(start + size, root->sectorsize); |
506 | } else { | 504 | } else { |
507 | ret = 0; | 505 | ret = 0; |
508 | goto out; | 506 | goto out; |
@@ -2281,6 +2279,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, | |||
2281 | unsigned long log_transid = 0; | 2279 | unsigned long log_transid = 0; |
2282 | 2280 | ||
2283 | mutex_lock(&root->log_mutex); | 2281 | mutex_lock(&root->log_mutex); |
2282 | log_transid = root->log_transid; | ||
2284 | index1 = root->log_transid % 2; | 2283 | index1 = root->log_transid % 2; |
2285 | if (atomic_read(&root->log_commit[index1])) { | 2284 | if (atomic_read(&root->log_commit[index1])) { |
2286 | wait_log_commit(trans, root, root->log_transid); | 2285 | wait_log_commit(trans, root, root->log_transid); |
@@ -2308,11 +2307,11 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, | |||
2308 | /* bail out if we need to do a full commit */ | 2307 | /* bail out if we need to do a full commit */ |
2309 | if (root->fs_info->last_trans_log_full_commit == trans->transid) { | 2308 | if (root->fs_info->last_trans_log_full_commit == trans->transid) { |
2310 | ret = -EAGAIN; | 2309 | ret = -EAGAIN; |
2310 | btrfs_free_logged_extents(log, log_transid); | ||
2311 | mutex_unlock(&root->log_mutex); | 2311 | mutex_unlock(&root->log_mutex); |
2312 | goto out; | 2312 | goto out; |
2313 | } | 2313 | } |
2314 | 2314 | ||
2315 | log_transid = root->log_transid; | ||
2316 | if (log_transid % 2 == 0) | 2315 | if (log_transid % 2 == 0) |
2317 | mark = EXTENT_DIRTY; | 2316 | mark = EXTENT_DIRTY; |
2318 | else | 2317 | else |
@@ -2324,6 +2323,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, | |||
2324 | ret = btrfs_write_marked_extents(log, &log->dirty_log_pages, mark); | 2323 | ret = btrfs_write_marked_extents(log, &log->dirty_log_pages, mark); |
2325 | if (ret) { | 2324 | if (ret) { |
2326 | btrfs_abort_transaction(trans, root, ret); | 2325 | btrfs_abort_transaction(trans, root, ret); |
2326 | btrfs_free_logged_extents(log, log_transid); | ||
2327 | mutex_unlock(&root->log_mutex); | 2327 | mutex_unlock(&root->log_mutex); |
2328 | goto out; | 2328 | goto out; |
2329 | } | 2329 | } |
@@ -2363,6 +2363,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, | |||
2363 | } | 2363 | } |
2364 | root->fs_info->last_trans_log_full_commit = trans->transid; | 2364 | root->fs_info->last_trans_log_full_commit = trans->transid; |
2365 | btrfs_wait_marked_extents(log, &log->dirty_log_pages, mark); | 2365 | btrfs_wait_marked_extents(log, &log->dirty_log_pages, mark); |
2366 | btrfs_free_logged_extents(log, log_transid); | ||
2366 | mutex_unlock(&log_root_tree->log_mutex); | 2367 | mutex_unlock(&log_root_tree->log_mutex); |
2367 | ret = -EAGAIN; | 2368 | ret = -EAGAIN; |
2368 | goto out; | 2369 | goto out; |
@@ -2373,6 +2374,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, | |||
2373 | btrfs_wait_marked_extents(log, &log->dirty_log_pages, mark); | 2374 | btrfs_wait_marked_extents(log, &log->dirty_log_pages, mark); |
2374 | wait_log_commit(trans, log_root_tree, | 2375 | wait_log_commit(trans, log_root_tree, |
2375 | log_root_tree->log_transid); | 2376 | log_root_tree->log_transid); |
2377 | btrfs_free_logged_extents(log, log_transid); | ||
2376 | mutex_unlock(&log_root_tree->log_mutex); | 2378 | mutex_unlock(&log_root_tree->log_mutex); |
2377 | ret = 0; | 2379 | ret = 0; |
2378 | goto out; | 2380 | goto out; |
@@ -2392,6 +2394,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, | |||
2392 | */ | 2394 | */ |
2393 | if (root->fs_info->last_trans_log_full_commit == trans->transid) { | 2395 | if (root->fs_info->last_trans_log_full_commit == trans->transid) { |
2394 | btrfs_wait_marked_extents(log, &log->dirty_log_pages, mark); | 2396 | btrfs_wait_marked_extents(log, &log->dirty_log_pages, mark); |
2397 | btrfs_free_logged_extents(log, log_transid); | ||
2395 | mutex_unlock(&log_root_tree->log_mutex); | 2398 | mutex_unlock(&log_root_tree->log_mutex); |
2396 | ret = -EAGAIN; | 2399 | ret = -EAGAIN; |
2397 | goto out_wake_log_root; | 2400 | goto out_wake_log_root; |
@@ -2402,10 +2405,12 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, | |||
2402 | EXTENT_DIRTY | EXTENT_NEW); | 2405 | EXTENT_DIRTY | EXTENT_NEW); |
2403 | if (ret) { | 2406 | if (ret) { |
2404 | btrfs_abort_transaction(trans, root, ret); | 2407 | btrfs_abort_transaction(trans, root, ret); |
2408 | btrfs_free_logged_extents(log, log_transid); | ||
2405 | mutex_unlock(&log_root_tree->log_mutex); | 2409 | mutex_unlock(&log_root_tree->log_mutex); |
2406 | goto out_wake_log_root; | 2410 | goto out_wake_log_root; |
2407 | } | 2411 | } |
2408 | btrfs_wait_marked_extents(log, &log->dirty_log_pages, mark); | 2412 | btrfs_wait_marked_extents(log, &log->dirty_log_pages, mark); |
2413 | btrfs_wait_logged_extents(log, log_transid); | ||
2409 | 2414 | ||
2410 | btrfs_set_super_log_root(root->fs_info->super_for_commit, | 2415 | btrfs_set_super_log_root(root->fs_info->super_for_commit, |
2411 | log_root_tree->node->start); | 2416 | log_root_tree->node->start); |
@@ -2461,8 +2466,10 @@ static void free_log_tree(struct btrfs_trans_handle *trans, | |||
2461 | .process_func = process_one_buffer | 2466 | .process_func = process_one_buffer |
2462 | }; | 2467 | }; |
2463 | 2468 | ||
2464 | ret = walk_log_tree(trans, log, &wc); | 2469 | if (trans) { |
2465 | BUG_ON(ret); | 2470 | ret = walk_log_tree(trans, log, &wc); |
2471 | BUG_ON(ret); | ||
2472 | } | ||
2466 | 2473 | ||
2467 | while (1) { | 2474 | while (1) { |
2468 | ret = find_first_extent_bit(&log->dirty_log_pages, | 2475 | ret = find_first_extent_bit(&log->dirty_log_pages, |
@@ -2475,6 +2482,14 @@ static void free_log_tree(struct btrfs_trans_handle *trans, | |||
2475 | EXTENT_DIRTY | EXTENT_NEW, GFP_NOFS); | 2482 | EXTENT_DIRTY | EXTENT_NEW, GFP_NOFS); |
2476 | } | 2483 | } |
2477 | 2484 | ||
2485 | /* | ||
2486 | * We may have short-circuited the log tree with the full commit logic | ||
2487 | * and left ordered extents on our list, so clear these out to keep us | ||
2488 | * from leaking inodes and memory. | ||
2489 | */ | ||
2490 | btrfs_free_logged_extents(log, 0); | ||
2491 | btrfs_free_logged_extents(log, 1); | ||
2492 | |||
2478 | free_extent_buffer(log->node); | 2493 | free_extent_buffer(log->node); |
2479 | kfree(log); | 2494 | kfree(log); |
2480 | } | 2495 | } |
@@ -2724,7 +2739,7 @@ static noinline int log_dir_items(struct btrfs_trans_handle *trans, | |||
2724 | path->keep_locks = 1; | 2739 | path->keep_locks = 1; |
2725 | 2740 | ||
2726 | ret = btrfs_search_forward(root, &min_key, &max_key, | 2741 | ret = btrfs_search_forward(root, &min_key, &max_key, |
2727 | path, 0, trans->transid); | 2742 | path, trans->transid); |
2728 | 2743 | ||
2729 | /* | 2744 | /* |
2730 | * we didn't find anything from this transaction, see if there | 2745 | * we didn't find anything from this transaction, see if there |
@@ -3271,16 +3286,21 @@ static int log_one_extent(struct btrfs_trans_handle *trans, | |||
3271 | struct btrfs_root *log = root->log_root; | 3286 | struct btrfs_root *log = root->log_root; |
3272 | struct btrfs_file_extent_item *fi; | 3287 | struct btrfs_file_extent_item *fi; |
3273 | struct extent_buffer *leaf; | 3288 | struct extent_buffer *leaf; |
3289 | struct btrfs_ordered_extent *ordered; | ||
3274 | struct list_head ordered_sums; | 3290 | struct list_head ordered_sums; |
3275 | struct btrfs_map_token token; | 3291 | struct btrfs_map_token token; |
3276 | struct btrfs_key key; | 3292 | struct btrfs_key key; |
3277 | u64 csum_offset = em->mod_start - em->start; | 3293 | u64 mod_start = em->mod_start; |
3278 | u64 csum_len = em->mod_len; | 3294 | u64 mod_len = em->mod_len; |
3295 | u64 csum_offset; | ||
3296 | u64 csum_len; | ||
3279 | u64 extent_offset = em->start - em->orig_start; | 3297 | u64 extent_offset = em->start - em->orig_start; |
3280 | u64 block_len; | 3298 | u64 block_len; |
3281 | int ret; | 3299 | int ret; |
3300 | int index = log->log_transid % 2; | ||
3282 | bool skip_csum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM; | 3301 | bool skip_csum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM; |
3283 | 3302 | ||
3303 | insert: | ||
3284 | INIT_LIST_HEAD(&ordered_sums); | 3304 | INIT_LIST_HEAD(&ordered_sums); |
3285 | btrfs_init_map_token(&token); | 3305 | btrfs_init_map_token(&token); |
3286 | key.objectid = btrfs_ino(inode); | 3306 | key.objectid = btrfs_ino(inode); |
@@ -3296,6 +3316,23 @@ static int log_one_extent(struct btrfs_trans_handle *trans, | |||
3296 | leaf = path->nodes[0]; | 3316 | leaf = path->nodes[0]; |
3297 | fi = btrfs_item_ptr(leaf, path->slots[0], | 3317 | fi = btrfs_item_ptr(leaf, path->slots[0], |
3298 | struct btrfs_file_extent_item); | 3318 | struct btrfs_file_extent_item); |
3319 | |||
3320 | /* | ||
3321 | * If we are overwriting an inline extent with a real one then we need | ||
3322 | * to just delete the inline extent as it may not be large enough to | ||
3323 | * have the entire file_extent_item. | ||
3324 | */ | ||
3325 | if (ret && btrfs_token_file_extent_type(leaf, fi, &token) == | ||
3326 | BTRFS_FILE_EXTENT_INLINE) { | ||
3327 | ret = btrfs_del_item(trans, log, path); | ||
3328 | btrfs_release_path(path); | ||
3329 | if (ret) { | ||
3330 | path->really_keep_locks = 0; | ||
3331 | return ret; | ||
3332 | } | ||
3333 | goto insert; | ||
3334 | } | ||
3335 | |||
3299 | btrfs_set_token_file_extent_generation(leaf, fi, em->generation, | 3336 | btrfs_set_token_file_extent_generation(leaf, fi, em->generation, |
3300 | &token); | 3337 | &token); |
3301 | if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags)) { | 3338 | if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags)) { |
@@ -3362,6 +3399,92 @@ static int log_one_extent(struct btrfs_trans_handle *trans, | |||
3362 | csum_len = block_len; | 3399 | csum_len = block_len; |
3363 | } | 3400 | } |
3364 | 3401 | ||
3402 | /* | ||
3403 | * First check and see if our csums are on our outstanding ordered | ||
3404 | * extents. | ||
3405 | */ | ||
3406 | again: | ||
3407 | spin_lock_irq(&log->log_extents_lock[index]); | ||
3408 | list_for_each_entry(ordered, &log->logged_list[index], log_list) { | ||
3409 | struct btrfs_ordered_sum *sum; | ||
3410 | |||
3411 | if (!mod_len) | ||
3412 | break; | ||
3413 | |||
3414 | if (ordered->inode != inode) | ||
3415 | continue; | ||
3416 | |||
3417 | if (ordered->file_offset + ordered->len <= mod_start || | ||
3418 | mod_start + mod_len <= ordered->file_offset) | ||
3419 | continue; | ||
3420 | |||
3421 | /* | ||
3422 | * We are going to copy all the csums on this ordered extent, so | ||
3423 | * go ahead and adjust mod_start and mod_len in case this | ||
3424 | * ordered extent has already been logged. | ||
3425 | */ | ||
3426 | if (ordered->file_offset > mod_start) { | ||
3427 | if (ordered->file_offset + ordered->len >= | ||
3428 | mod_start + mod_len) | ||
3429 | mod_len = ordered->file_offset - mod_start; | ||
3430 | /* | ||
3431 | * If we have this case | ||
3432 | * | ||
3433 | * |--------- logged extent ---------| | ||
3434 | * |----- ordered extent ----| | ||
3435 | * | ||
3436 | * Just don't mess with mod_start and mod_len, we'll | ||
3437 | * just end up logging more csums than we need and it | ||
3438 | * will be ok. | ||
3439 | */ | ||
3440 | } else { | ||
3441 | if (ordered->file_offset + ordered->len < | ||
3442 | mod_start + mod_len) { | ||
3443 | mod_len = (mod_start + mod_len) - | ||
3444 | (ordered->file_offset + ordered->len); | ||
3445 | mod_start = ordered->file_offset + | ||
3446 | ordered->len; | ||
3447 | } else { | ||
3448 | mod_len = 0; | ||
3449 | } | ||
3450 | } | ||
3451 | |||
3452 | /* | ||
3453 | * To keep us from looping for the above case of an ordered | ||
3454 | * extent that falls inside of the logged extent. | ||
3455 | */ | ||
3456 | if (test_and_set_bit(BTRFS_ORDERED_LOGGED_CSUM, | ||
3457 | &ordered->flags)) | ||
3458 | continue; | ||
3459 | atomic_inc(&ordered->refs); | ||
3460 | spin_unlock_irq(&log->log_extents_lock[index]); | ||
3461 | /* | ||
3462 | * we've dropped the lock, we must either break or | ||
3463 | * start over after this. | ||
3464 | */ | ||
3465 | |||
3466 | wait_event(ordered->wait, ordered->csum_bytes_left == 0); | ||
3467 | |||
3468 | list_for_each_entry(sum, &ordered->list, list) { | ||
3469 | ret = btrfs_csum_file_blocks(trans, log, sum); | ||
3470 | if (ret) { | ||
3471 | btrfs_put_ordered_extent(ordered); | ||
3472 | goto unlocked; | ||
3473 | } | ||
3474 | } | ||
3475 | btrfs_put_ordered_extent(ordered); | ||
3476 | goto again; | ||
3477 | |||
3478 | } | ||
3479 | spin_unlock_irq(&log->log_extents_lock[index]); | ||
3480 | unlocked: | ||
3481 | |||
3482 | if (!mod_len || ret) | ||
3483 | return ret; | ||
3484 | |||
3485 | csum_offset = mod_start - em->start; | ||
3486 | csum_len = mod_len; | ||
3487 | |||
3365 | /* block start is already adjusted for the file extent offset. */ | 3488 | /* block start is already adjusted for the file extent offset. */ |
3366 | ret = btrfs_lookup_csums_range(log->fs_info->csum_root, | 3489 | ret = btrfs_lookup_csums_range(log->fs_info->csum_root, |
3367 | em->block_start + csum_offset, | 3490 | em->block_start + csum_offset, |
@@ -3393,6 +3516,7 @@ static int btrfs_log_changed_extents(struct btrfs_trans_handle *trans, | |||
3393 | struct extent_map_tree *tree = &BTRFS_I(inode)->extent_tree; | 3516 | struct extent_map_tree *tree = &BTRFS_I(inode)->extent_tree; |
3394 | u64 test_gen; | 3517 | u64 test_gen; |
3395 | int ret = 0; | 3518 | int ret = 0; |
3519 | int num = 0; | ||
3396 | 3520 | ||
3397 | INIT_LIST_HEAD(&extents); | 3521 | INIT_LIST_HEAD(&extents); |
3398 | 3522 | ||
@@ -3401,16 +3525,31 @@ static int btrfs_log_changed_extents(struct btrfs_trans_handle *trans, | |||
3401 | 3525 | ||
3402 | list_for_each_entry_safe(em, n, &tree->modified_extents, list) { | 3526 | list_for_each_entry_safe(em, n, &tree->modified_extents, list) { |
3403 | list_del_init(&em->list); | 3527 | list_del_init(&em->list); |
3528 | |||
3529 | /* | ||
3530 | * Just an arbitrary number, this can be really CPU intensive | ||
3531 | * once we start getting a lot of extents, and really once we | ||
3532 | * have a bunch of extents we just want to commit since it will | ||
3533 | * be faster. | ||
3534 | */ | ||
3535 | if (++num > 32768) { | ||
3536 | list_del_init(&tree->modified_extents); | ||
3537 | ret = -EFBIG; | ||
3538 | goto process; | ||
3539 | } | ||
3540 | |||
3404 | if (em->generation <= test_gen) | 3541 | if (em->generation <= test_gen) |
3405 | continue; | 3542 | continue; |
3406 | /* Need a ref to keep it from getting evicted from cache */ | 3543 | /* Need a ref to keep it from getting evicted from cache */ |
3407 | atomic_inc(&em->refs); | 3544 | atomic_inc(&em->refs); |
3408 | set_bit(EXTENT_FLAG_LOGGING, &em->flags); | 3545 | set_bit(EXTENT_FLAG_LOGGING, &em->flags); |
3409 | list_add_tail(&em->list, &extents); | 3546 | list_add_tail(&em->list, &extents); |
3547 | num++; | ||
3410 | } | 3548 | } |
3411 | 3549 | ||
3412 | list_sort(NULL, &extents, extent_cmp); | 3550 | list_sort(NULL, &extents, extent_cmp); |
3413 | 3551 | ||
3552 | process: | ||
3414 | while (!list_empty(&extents)) { | 3553 | while (!list_empty(&extents)) { |
3415 | em = list_entry(extents.next, struct extent_map, list); | 3554 | em = list_entry(extents.next, struct extent_map, list); |
3416 | 3555 | ||
@@ -3513,6 +3652,8 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans, | |||
3513 | 3652 | ||
3514 | mutex_lock(&BTRFS_I(inode)->log_mutex); | 3653 | mutex_lock(&BTRFS_I(inode)->log_mutex); |
3515 | 3654 | ||
3655 | btrfs_get_logged_extents(log, inode); | ||
3656 | |||
3516 | /* | 3657 | /* |
3517 | * a brute force approach to making sure we get the most uptodate | 3658 | * a brute force approach to making sure we get the most uptodate |
3518 | * copies of everything. | 3659 | * copies of everything. |
@@ -3558,7 +3699,7 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans, | |||
3558 | while (1) { | 3699 | while (1) { |
3559 | ins_nr = 0; | 3700 | ins_nr = 0; |
3560 | ret = btrfs_search_forward(root, &min_key, &max_key, | 3701 | ret = btrfs_search_forward(root, &min_key, &max_key, |
3561 | path, 0, trans->transid); | 3702 | path, trans->transid); |
3562 | if (ret != 0) | 3703 | if (ret != 0) |
3563 | break; | 3704 | break; |
3564 | again: | 3705 | again: |
@@ -3656,6 +3797,8 @@ log_extents: | |||
3656 | BTRFS_I(inode)->logged_trans = trans->transid; | 3797 | BTRFS_I(inode)->logged_trans = trans->transid; |
3657 | BTRFS_I(inode)->last_log_commit = BTRFS_I(inode)->last_sub_trans; | 3798 | BTRFS_I(inode)->last_log_commit = BTRFS_I(inode)->last_sub_trans; |
3658 | out_unlock: | 3799 | out_unlock: |
3800 | if (err) | ||
3801 | btrfs_free_logged_extents(log, log->log_transid); | ||
3659 | mutex_unlock(&BTRFS_I(inode)->log_mutex); | 3802 | mutex_unlock(&BTRFS_I(inode)->log_mutex); |
3660 | 3803 | ||
3661 | btrfs_free_path(path); | 3804 | btrfs_free_path(path); |
@@ -3822,7 +3965,6 @@ int btrfs_log_inode_parent(struct btrfs_trans_handle *trans, | |||
3822 | end_trans: | 3965 | end_trans: |
3823 | dput(old_parent); | 3966 | dput(old_parent); |
3824 | if (ret < 0) { | 3967 | if (ret < 0) { |
3825 | WARN_ON(ret != -ENOSPC); | ||
3826 | root->fs_info->last_trans_log_full_commit = trans->transid; | 3968 | root->fs_info->last_trans_log_full_commit = trans->transid; |
3827 | ret = 1; | 3969 | ret = 1; |
3828 | } | 3970 | } |