diff options
author | Yan Zheng <zheng.yan@oracle.com> | 2009-01-06 11:42:00 -0500 |
---|---|---|
committer | Chris Mason <chris.mason@oracle.com> | 2009-01-06 11:42:00 -0500 |
commit | 07d400a6df4767a90d49a153fdb7f4cfa1e3f23e (patch) | |
tree | 8ca61bb87ffb72343b8d392a26fb7a6265f6fa3d /fs/btrfs/tree-log.c | |
parent | 1ba12553f3600ffebad226c5204ab0e46df98161 (diff) |
Btrfs: tree logging checksum fixes
This patch contains following things.
1) Limit the max size of btrfs_ordered_sum structure to PAGE_SIZE. This
struct is kmalloced so we want to keep it reasonable.
2) Replace copy_extent_csums by btrfs_lookup_csums_range. This was
duplicated code in tree-log.c
3) Remove replay_one_csum. csum items are replayed at the same time as
replaying file extents. This guarantees we only replay useful csums.
4) nbytes accounting fix.
Signed-off-by: Yan Zheng <zheng.yan@oracle.com>
Diffstat (limited to 'fs/btrfs/tree-log.c')
-rw-r--r-- | fs/btrfs/tree-log.c | 293 |
1 files changed, 91 insertions, 202 deletions
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 3a72a1b6c247..332ec35d2c08 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c | |||
@@ -433,49 +433,6 @@ insert: | |||
433 | trans->transid); | 433 | trans->transid); |
434 | } | 434 | } |
435 | } | 435 | } |
436 | |||
437 | if (overwrite_root && | ||
438 | key->type == BTRFS_EXTENT_DATA_KEY) { | ||
439 | int extent_type; | ||
440 | struct btrfs_file_extent_item *fi; | ||
441 | |||
442 | fi = (struct btrfs_file_extent_item *)dst_ptr; | ||
443 | extent_type = btrfs_file_extent_type(path->nodes[0], fi); | ||
444 | if (extent_type == BTRFS_FILE_EXTENT_REG || | ||
445 | extent_type == BTRFS_FILE_EXTENT_PREALLOC) { | ||
446 | struct btrfs_key ins; | ||
447 | ins.objectid = btrfs_file_extent_disk_bytenr( | ||
448 | path->nodes[0], fi); | ||
449 | ins.offset = btrfs_file_extent_disk_num_bytes( | ||
450 | path->nodes[0], fi); | ||
451 | ins.type = BTRFS_EXTENT_ITEM_KEY; | ||
452 | |||
453 | /* | ||
454 | * is this extent already allocated in the extent | ||
455 | * allocation tree? If so, just add a reference | ||
456 | */ | ||
457 | ret = btrfs_lookup_extent(root, ins.objectid, | ||
458 | ins.offset); | ||
459 | if (ret == 0) { | ||
460 | ret = btrfs_inc_extent_ref(trans, root, | ||
461 | ins.objectid, ins.offset, | ||
462 | path->nodes[0]->start, | ||
463 | root->root_key.objectid, | ||
464 | trans->transid, key->objectid); | ||
465 | } else { | ||
466 | /* | ||
467 | * insert the extent pointer in the extent | ||
468 | * allocation tree | ||
469 | */ | ||
470 | ret = btrfs_alloc_logged_extent(trans, root, | ||
471 | path->nodes[0]->start, | ||
472 | root->root_key.objectid, | ||
473 | trans->transid, key->objectid, | ||
474 | &ins); | ||
475 | BUG_ON(ret); | ||
476 | } | ||
477 | } | ||
478 | } | ||
479 | no_copy: | 436 | no_copy: |
480 | btrfs_mark_buffer_dirty(path->nodes[0]); | 437 | btrfs_mark_buffer_dirty(path->nodes[0]); |
481 | btrfs_release_path(root, path); | 438 | btrfs_release_path(root, path); |
@@ -530,6 +487,7 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans, | |||
530 | u64 extent_end; | 487 | u64 extent_end; |
531 | u64 alloc_hint; | 488 | u64 alloc_hint; |
532 | u64 start = key->offset; | 489 | u64 start = key->offset; |
490 | u64 saved_nbytes; | ||
533 | struct btrfs_file_extent_item *item; | 491 | struct btrfs_file_extent_item *item; |
534 | struct inode *inode = NULL; | 492 | struct inode *inode = NULL; |
535 | unsigned long size; | 493 | unsigned long size; |
@@ -591,17 +549,95 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans, | |||
591 | } | 549 | } |
592 | btrfs_release_path(root, path); | 550 | btrfs_release_path(root, path); |
593 | 551 | ||
552 | saved_nbytes = inode_get_bytes(inode); | ||
594 | /* drop any overlapping extents */ | 553 | /* drop any overlapping extents */ |
595 | ret = btrfs_drop_extents(trans, root, inode, | 554 | ret = btrfs_drop_extents(trans, root, inode, |
596 | start, extent_end, start, &alloc_hint); | 555 | start, extent_end, start, &alloc_hint); |
597 | BUG_ON(ret); | 556 | BUG_ON(ret); |
598 | 557 | ||
599 | /* insert the extent */ | 558 | if (found_type == BTRFS_FILE_EXTENT_REG || |
600 | ret = overwrite_item(trans, root, path, eb, slot, key); | 559 | found_type == BTRFS_FILE_EXTENT_PREALLOC) { |
601 | BUG_ON(ret); | 560 | unsigned long dest_offset; |
561 | struct btrfs_key ins; | ||
562 | |||
563 | ret = btrfs_insert_empty_item(trans, root, path, key, | ||
564 | sizeof(*item)); | ||
565 | BUG_ON(ret); | ||
566 | dest_offset = btrfs_item_ptr_offset(path->nodes[0], | ||
567 | path->slots[0]); | ||
568 | copy_extent_buffer(path->nodes[0], eb, dest_offset, | ||
569 | (unsigned long)item, sizeof(*item)); | ||
570 | |||
571 | ins.objectid = btrfs_file_extent_disk_bytenr(eb, item); | ||
572 | ins.offset = btrfs_file_extent_disk_num_bytes(eb, item); | ||
573 | ins.type = BTRFS_EXTENT_ITEM_KEY; | ||
574 | |||
575 | if (ins.objectid > 0) { | ||
576 | u64 csum_start; | ||
577 | u64 csum_end; | ||
578 | LIST_HEAD(ordered_sums); | ||
579 | /* | ||
580 | * is this extent already allocated in the extent | ||
581 | * allocation tree? If so, just add a reference | ||
582 | */ | ||
583 | ret = btrfs_lookup_extent(root, ins.objectid, | ||
584 | ins.offset); | ||
585 | if (ret == 0) { | ||
586 | ret = btrfs_inc_extent_ref(trans, root, | ||
587 | ins.objectid, ins.offset, | ||
588 | path->nodes[0]->start, | ||
589 | root->root_key.objectid, | ||
590 | trans->transid, key->objectid); | ||
591 | } else { | ||
592 | /* | ||
593 | * insert the extent pointer in the extent | ||
594 | * allocation tree | ||
595 | */ | ||
596 | ret = btrfs_alloc_logged_extent(trans, root, | ||
597 | path->nodes[0]->start, | ||
598 | root->root_key.objectid, | ||
599 | trans->transid, key->objectid, | ||
600 | &ins); | ||
601 | BUG_ON(ret); | ||
602 | } | ||
603 | btrfs_release_path(root, path); | ||
604 | |||
605 | if (btrfs_file_extent_compression(eb, item)) { | ||
606 | csum_start = ins.objectid; | ||
607 | csum_end = csum_start + ins.offset; | ||
608 | } else { | ||
609 | csum_start = ins.objectid + | ||
610 | btrfs_file_extent_offset(eb, item); | ||
611 | csum_end = csum_start + | ||
612 | btrfs_file_extent_num_bytes(eb, item); | ||
613 | } | ||
614 | |||
615 | ret = btrfs_lookup_csums_range(root->log_root, | ||
616 | csum_start, csum_end - 1, | ||
617 | &ordered_sums); | ||
618 | BUG_ON(ret); | ||
619 | while (!list_empty(&ordered_sums)) { | ||
620 | struct btrfs_ordered_sum *sums; | ||
621 | sums = list_entry(ordered_sums.next, | ||
622 | struct btrfs_ordered_sum, | ||
623 | list); | ||
624 | ret = btrfs_csum_file_blocks(trans, | ||
625 | root->fs_info->csum_root, | ||
626 | sums); | ||
627 | BUG_ON(ret); | ||
628 | list_del(&sums->list); | ||
629 | kfree(sums); | ||
630 | } | ||
631 | } else { | ||
632 | btrfs_release_path(root, path); | ||
633 | } | ||
634 | } else if (found_type == BTRFS_FILE_EXTENT_INLINE) { | ||
635 | /* inline extents are easy, we just overwrite them */ | ||
636 | ret = overwrite_item(trans, root, path, eb, slot, key); | ||
637 | BUG_ON(ret); | ||
638 | } | ||
602 | 639 | ||
603 | /* btrfs_drop_extents changes i_bytes & i_blocks, update it here */ | 640 | inode_set_bytes(inode, saved_nbytes); |
604 | inode_add_bytes(inode, extent_end - start); | ||
605 | btrfs_update_inode(trans, root, inode); | 641 | btrfs_update_inode(trans, root, inode); |
606 | out: | 642 | out: |
607 | if (inode) | 643 | if (inode) |
@@ -903,70 +939,6 @@ out_nowrite: | |||
903 | } | 939 | } |
904 | 940 | ||
905 | /* | 941 | /* |
906 | * replay one csum item from the log tree into the subvolume 'root' | ||
907 | * eb, slot and key all refer to the log tree | ||
908 | * path is for temp use by this function and should be released on return | ||
909 | * | ||
910 | * This copies the checksums out of the log tree and inserts them into | ||
911 | * the subvolume. Any existing checksums for this range in the file | ||
912 | * are overwritten, and new items are added where required. | ||
913 | * | ||
914 | * We keep this simple by reusing the btrfs_ordered_sum code from | ||
915 | * the data=ordered mode. This basically means making a copy | ||
916 | * of all the checksums in ram, which we have to do anyway for kmap | ||
917 | * rules. | ||
918 | * | ||
919 | * The copy is then sent down to btrfs_csum_file_blocks, which | ||
920 | * does all the hard work of finding existing items in the file | ||
921 | * or adding new ones. | ||
922 | */ | ||
923 | static noinline int replay_one_csum(struct btrfs_trans_handle *trans, | ||
924 | struct btrfs_root *root, | ||
925 | struct btrfs_path *path, | ||
926 | struct extent_buffer *eb, int slot, | ||
927 | struct btrfs_key *key) | ||
928 | { | ||
929 | int ret; | ||
930 | u32 item_size = btrfs_item_size_nr(eb, slot); | ||
931 | u64 cur_offset; | ||
932 | u16 csum_size = | ||
933 | btrfs_super_csum_size(&root->fs_info->super_copy); | ||
934 | unsigned long file_bytes; | ||
935 | struct btrfs_ordered_sum *sums; | ||
936 | struct btrfs_sector_sum *sector_sum; | ||
937 | unsigned long ptr; | ||
938 | |||
939 | file_bytes = (item_size / csum_size) * root->sectorsize; | ||
940 | sums = kzalloc(btrfs_ordered_sum_size(root, file_bytes), GFP_NOFS); | ||
941 | if (!sums) | ||
942 | return -ENOMEM; | ||
943 | |||
944 | INIT_LIST_HEAD(&sums->list); | ||
945 | sums->len = file_bytes; | ||
946 | sums->bytenr = key->offset; | ||
947 | |||
948 | /* | ||
949 | * copy all the sums into the ordered sum struct | ||
950 | */ | ||
951 | sector_sum = sums->sums; | ||
952 | cur_offset = key->offset; | ||
953 | ptr = btrfs_item_ptr_offset(eb, slot); | ||
954 | while (item_size > 0) { | ||
955 | sector_sum->bytenr = cur_offset; | ||
956 | read_extent_buffer(eb, §or_sum->sum, ptr, csum_size); | ||
957 | sector_sum++; | ||
958 | item_size -= csum_size; | ||
959 | ptr += csum_size; | ||
960 | cur_offset += root->sectorsize; | ||
961 | } | ||
962 | |||
963 | /* let btrfs_csum_file_blocks add them into the file */ | ||
964 | ret = btrfs_csum_file_blocks(trans, root->fs_info->csum_root, sums); | ||
965 | BUG_ON(ret); | ||
966 | kfree(sums); | ||
967 | return 0; | ||
968 | } | ||
969 | /* | ||
970 | * There are a few corners where the link count of the file can't | 942 | * There are a few corners where the link count of the file can't |
971 | * be properly maintained during replay. So, instead of adding | 943 | * be properly maintained during replay. So, instead of adding |
972 | * lots of complexity to the log code, we just scan the backrefs | 944 | * lots of complexity to the log code, we just scan the backrefs |
@@ -1659,10 +1631,6 @@ static int replay_one_buffer(struct btrfs_root *log, struct extent_buffer *eb, | |||
1659 | ret = replay_one_extent(wc->trans, root, path, | 1631 | ret = replay_one_extent(wc->trans, root, path, |
1660 | eb, i, &key); | 1632 | eb, i, &key); |
1661 | BUG_ON(ret); | 1633 | BUG_ON(ret); |
1662 | } else if (key.type == BTRFS_EXTENT_CSUM_KEY) { | ||
1663 | ret = replay_one_csum(wc->trans, root, path, | ||
1664 | eb, i, &key); | ||
1665 | BUG_ON(ret); | ||
1666 | } else if (key.type == BTRFS_DIR_ITEM_KEY || | 1634 | } else if (key.type == BTRFS_DIR_ITEM_KEY || |
1667 | key.type == BTRFS_DIR_INDEX_KEY) { | 1635 | key.type == BTRFS_DIR_INDEX_KEY) { |
1668 | ret = replay_one_dir_item(wc->trans, root, path, | 1636 | ret = replay_one_dir_item(wc->trans, root, path, |
@@ -2021,7 +1989,7 @@ int btrfs_free_log(struct btrfs_trans_handle *trans, struct btrfs_root *root) | |||
2021 | .process_func = process_one_buffer | 1989 | .process_func = process_one_buffer |
2022 | }; | 1990 | }; |
2023 | 1991 | ||
2024 | if (!root->log_root) | 1992 | if (!root->log_root || root->fs_info->log_root_recovering) |
2025 | return 0; | 1993 | return 0; |
2026 | 1994 | ||
2027 | log = root->log_root; | 1995 | log = root->log_root; |
@@ -2453,86 +2421,6 @@ static int drop_objectid_items(struct btrfs_trans_handle *trans, | |||
2453 | return 0; | 2421 | return 0; |
2454 | } | 2422 | } |
2455 | 2423 | ||
2456 | static noinline int copy_extent_csums(struct btrfs_trans_handle *trans, | ||
2457 | struct list_head *list, | ||
2458 | struct btrfs_root *root, | ||
2459 | u64 disk_bytenr, u64 len) | ||
2460 | { | ||
2461 | struct btrfs_ordered_sum *sums; | ||
2462 | struct btrfs_sector_sum *sector_sum; | ||
2463 | int ret; | ||
2464 | struct btrfs_path *path; | ||
2465 | struct btrfs_csum_item *item = NULL; | ||
2466 | u64 end = disk_bytenr + len; | ||
2467 | u64 item_start_offset = 0; | ||
2468 | u64 item_last_offset = 0; | ||
2469 | u32 diff; | ||
2470 | u32 sum; | ||
2471 | u16 csum_size = btrfs_super_csum_size(&root->fs_info->super_copy); | ||
2472 | |||
2473 | sums = kzalloc(btrfs_ordered_sum_size(root, len), GFP_NOFS); | ||
2474 | |||
2475 | sector_sum = sums->sums; | ||
2476 | sums->bytenr = disk_bytenr; | ||
2477 | sums->len = len; | ||
2478 | list_add_tail(&sums->list, list); | ||
2479 | |||
2480 | path = btrfs_alloc_path(); | ||
2481 | while (disk_bytenr < end) { | ||
2482 | if (!item || disk_bytenr < item_start_offset || | ||
2483 | disk_bytenr >= item_last_offset) { | ||
2484 | struct btrfs_key found_key; | ||
2485 | u32 item_size; | ||
2486 | |||
2487 | if (item) | ||
2488 | btrfs_release_path(root, path); | ||
2489 | item = btrfs_lookup_csum(NULL, root, path, | ||
2490 | disk_bytenr, 0); | ||
2491 | if (IS_ERR(item)) { | ||
2492 | ret = PTR_ERR(item); | ||
2493 | if (ret == -ENOENT || ret == -EFBIG) | ||
2494 | ret = 0; | ||
2495 | sum = 0; | ||
2496 | printk(KERN_INFO "log no csum found for " | ||
2497 | "byte %llu\n", | ||
2498 | (unsigned long long)disk_bytenr); | ||
2499 | item = NULL; | ||
2500 | btrfs_release_path(root, path); | ||
2501 | goto found; | ||
2502 | } | ||
2503 | btrfs_item_key_to_cpu(path->nodes[0], &found_key, | ||
2504 | path->slots[0]); | ||
2505 | |||
2506 | item_start_offset = found_key.offset; | ||
2507 | item_size = btrfs_item_size_nr(path->nodes[0], | ||
2508 | path->slots[0]); | ||
2509 | item_last_offset = item_start_offset + | ||
2510 | (item_size / csum_size) * | ||
2511 | root->sectorsize; | ||
2512 | item = btrfs_item_ptr(path->nodes[0], path->slots[0], | ||
2513 | struct btrfs_csum_item); | ||
2514 | } | ||
2515 | /* | ||
2516 | * this byte range must be able to fit inside | ||
2517 | * a single leaf so it will also fit inside a u32 | ||
2518 | */ | ||
2519 | diff = disk_bytenr - item_start_offset; | ||
2520 | diff = diff / root->sectorsize; | ||
2521 | diff = diff * csum_size; | ||
2522 | |||
2523 | read_extent_buffer(path->nodes[0], &sum, | ||
2524 | ((unsigned long)item) + diff, | ||
2525 | csum_size); | ||
2526 | found: | ||
2527 | sector_sum->bytenr = disk_bytenr; | ||
2528 | sector_sum->sum = sum; | ||
2529 | disk_bytenr += root->sectorsize; | ||
2530 | sector_sum++; | ||
2531 | } | ||
2532 | btrfs_free_path(path); | ||
2533 | return 0; | ||
2534 | } | ||
2535 | |||
2536 | static noinline int copy_items(struct btrfs_trans_handle *trans, | 2424 | static noinline int copy_items(struct btrfs_trans_handle *trans, |
2537 | struct btrfs_root *log, | 2425 | struct btrfs_root *log, |
2538 | struct btrfs_path *dst_path, | 2426 | struct btrfs_path *dst_path, |
@@ -2622,10 +2510,10 @@ static noinline int copy_items(struct btrfs_trans_handle *trans, | |||
2622 | trans->transid, | 2510 | trans->transid, |
2623 | ins_keys[i].objectid); | 2511 | ins_keys[i].objectid); |
2624 | BUG_ON(ret); | 2512 | BUG_ON(ret); |
2625 | ret = copy_extent_csums(trans, | 2513 | ret = btrfs_lookup_csums_range( |
2626 | &ordered_sums, | 2514 | log->fs_info->csum_root, |
2627 | log->fs_info->csum_root, | 2515 | ds + cs, ds + cs + cl - 1, |
2628 | ds + cs, cl); | 2516 | &ordered_sums); |
2629 | BUG_ON(ret); | 2517 | BUG_ON(ret); |
2630 | } | 2518 | } |
2631 | } | 2519 | } |
@@ -2942,9 +2830,9 @@ again: | |||
2942 | tmp_key.offset = (u64)-1; | 2830 | tmp_key.offset = (u64)-1; |
2943 | 2831 | ||
2944 | wc.replay_dest = btrfs_read_fs_root_no_name(fs_info, &tmp_key); | 2832 | wc.replay_dest = btrfs_read_fs_root_no_name(fs_info, &tmp_key); |
2945 | |||
2946 | BUG_ON(!wc.replay_dest); | 2833 | BUG_ON(!wc.replay_dest); |
2947 | 2834 | ||
2835 | wc.replay_dest->log_root = log; | ||
2948 | btrfs_record_root_in_trans(wc.replay_dest); | 2836 | btrfs_record_root_in_trans(wc.replay_dest); |
2949 | ret = walk_log_tree(trans, log, &wc); | 2837 | ret = walk_log_tree(trans, log, &wc); |
2950 | BUG_ON(ret); | 2838 | BUG_ON(ret); |
@@ -2961,6 +2849,7 @@ again: | |||
2961 | } | 2849 | } |
2962 | 2850 | ||
2963 | key.offset = found_key.offset - 1; | 2851 | key.offset = found_key.offset - 1; |
2852 | wc.replay_dest->log_root = NULL; | ||
2964 | free_extent_buffer(log->node); | 2853 | free_extent_buffer(log->node); |
2965 | kfree(log); | 2854 | kfree(log); |
2966 | 2855 | ||