aboutsummaryrefslogtreecommitdiffstats
path: root/fs/btrfs/tree-log.c
diff options
context:
space:
mode:
authorYan Zheng <zheng.yan@oracle.com>2009-01-06 11:42:00 -0500
committerChris Mason <chris.mason@oracle.com>2009-01-06 11:42:00 -0500
commit07d400a6df4767a90d49a153fdb7f4cfa1e3f23e (patch)
tree8ca61bb87ffb72343b8d392a26fb7a6265f6fa3d /fs/btrfs/tree-log.c
parent1ba12553f3600ffebad226c5204ab0e46df98161 (diff)
Btrfs: tree logging checksum fixes
This patch contains following things. 1) Limit the max size of btrfs_ordered_sum structure to PAGE_SIZE. This struct is kmalloced so we want to keep it reasonable. 2) Replace copy_extent_csums by btrfs_lookup_csums_range. This was duplicated code in tree-log.c 3) Remove replay_one_csum. csum items are replayed at the same time as replaying file extents. This guarantees we only replay useful csums. 4) nbytes accounting fix. Signed-off-by: Yan Zheng <zheng.yan@oracle.com>
Diffstat (limited to 'fs/btrfs/tree-log.c')
-rw-r--r--fs/btrfs/tree-log.c293
1 files changed, 91 insertions, 202 deletions
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index 3a72a1b6c247..332ec35d2c08 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -433,49 +433,6 @@ insert:
433 trans->transid); 433 trans->transid);
434 } 434 }
435 } 435 }
436
437 if (overwrite_root &&
438 key->type == BTRFS_EXTENT_DATA_KEY) {
439 int extent_type;
440 struct btrfs_file_extent_item *fi;
441
442 fi = (struct btrfs_file_extent_item *)dst_ptr;
443 extent_type = btrfs_file_extent_type(path->nodes[0], fi);
444 if (extent_type == BTRFS_FILE_EXTENT_REG ||
445 extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
446 struct btrfs_key ins;
447 ins.objectid = btrfs_file_extent_disk_bytenr(
448 path->nodes[0], fi);
449 ins.offset = btrfs_file_extent_disk_num_bytes(
450 path->nodes[0], fi);
451 ins.type = BTRFS_EXTENT_ITEM_KEY;
452
453 /*
454 * is this extent already allocated in the extent
455 * allocation tree? If so, just add a reference
456 */
457 ret = btrfs_lookup_extent(root, ins.objectid,
458 ins.offset);
459 if (ret == 0) {
460 ret = btrfs_inc_extent_ref(trans, root,
461 ins.objectid, ins.offset,
462 path->nodes[0]->start,
463 root->root_key.objectid,
464 trans->transid, key->objectid);
465 } else {
466 /*
467 * insert the extent pointer in the extent
468 * allocation tree
469 */
470 ret = btrfs_alloc_logged_extent(trans, root,
471 path->nodes[0]->start,
472 root->root_key.objectid,
473 trans->transid, key->objectid,
474 &ins);
475 BUG_ON(ret);
476 }
477 }
478 }
479no_copy: 436no_copy:
480 btrfs_mark_buffer_dirty(path->nodes[0]); 437 btrfs_mark_buffer_dirty(path->nodes[0]);
481 btrfs_release_path(root, path); 438 btrfs_release_path(root, path);
@@ -530,6 +487,7 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans,
530 u64 extent_end; 487 u64 extent_end;
531 u64 alloc_hint; 488 u64 alloc_hint;
532 u64 start = key->offset; 489 u64 start = key->offset;
490 u64 saved_nbytes;
533 struct btrfs_file_extent_item *item; 491 struct btrfs_file_extent_item *item;
534 struct inode *inode = NULL; 492 struct inode *inode = NULL;
535 unsigned long size; 493 unsigned long size;
@@ -591,17 +549,95 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans,
591 } 549 }
592 btrfs_release_path(root, path); 550 btrfs_release_path(root, path);
593 551
552 saved_nbytes = inode_get_bytes(inode);
594 /* drop any overlapping extents */ 553 /* drop any overlapping extents */
595 ret = btrfs_drop_extents(trans, root, inode, 554 ret = btrfs_drop_extents(trans, root, inode,
596 start, extent_end, start, &alloc_hint); 555 start, extent_end, start, &alloc_hint);
597 BUG_ON(ret); 556 BUG_ON(ret);
598 557
599 /* insert the extent */ 558 if (found_type == BTRFS_FILE_EXTENT_REG ||
600 ret = overwrite_item(trans, root, path, eb, slot, key); 559 found_type == BTRFS_FILE_EXTENT_PREALLOC) {
601 BUG_ON(ret); 560 unsigned long dest_offset;
561 struct btrfs_key ins;
562
563 ret = btrfs_insert_empty_item(trans, root, path, key,
564 sizeof(*item));
565 BUG_ON(ret);
566 dest_offset = btrfs_item_ptr_offset(path->nodes[0],
567 path->slots[0]);
568 copy_extent_buffer(path->nodes[0], eb, dest_offset,
569 (unsigned long)item, sizeof(*item));
570
571 ins.objectid = btrfs_file_extent_disk_bytenr(eb, item);
572 ins.offset = btrfs_file_extent_disk_num_bytes(eb, item);
573 ins.type = BTRFS_EXTENT_ITEM_KEY;
574
575 if (ins.objectid > 0) {
576 u64 csum_start;
577 u64 csum_end;
578 LIST_HEAD(ordered_sums);
579 /*
580 * is this extent already allocated in the extent
581 * allocation tree? If so, just add a reference
582 */
583 ret = btrfs_lookup_extent(root, ins.objectid,
584 ins.offset);
585 if (ret == 0) {
586 ret = btrfs_inc_extent_ref(trans, root,
587 ins.objectid, ins.offset,
588 path->nodes[0]->start,
589 root->root_key.objectid,
590 trans->transid, key->objectid);
591 } else {
592 /*
593 * insert the extent pointer in the extent
594 * allocation tree
595 */
596 ret = btrfs_alloc_logged_extent(trans, root,
597 path->nodes[0]->start,
598 root->root_key.objectid,
599 trans->transid, key->objectid,
600 &ins);
601 BUG_ON(ret);
602 }
603 btrfs_release_path(root, path);
604
605 if (btrfs_file_extent_compression(eb, item)) {
606 csum_start = ins.objectid;
607 csum_end = csum_start + ins.offset;
608 } else {
609 csum_start = ins.objectid +
610 btrfs_file_extent_offset(eb, item);
611 csum_end = csum_start +
612 btrfs_file_extent_num_bytes(eb, item);
613 }
614
615 ret = btrfs_lookup_csums_range(root->log_root,
616 csum_start, csum_end - 1,
617 &ordered_sums);
618 BUG_ON(ret);
619 while (!list_empty(&ordered_sums)) {
620 struct btrfs_ordered_sum *sums;
621 sums = list_entry(ordered_sums.next,
622 struct btrfs_ordered_sum,
623 list);
624 ret = btrfs_csum_file_blocks(trans,
625 root->fs_info->csum_root,
626 sums);
627 BUG_ON(ret);
628 list_del(&sums->list);
629 kfree(sums);
630 }
631 } else {
632 btrfs_release_path(root, path);
633 }
634 } else if (found_type == BTRFS_FILE_EXTENT_INLINE) {
635 /* inline extents are easy, we just overwrite them */
636 ret = overwrite_item(trans, root, path, eb, slot, key);
637 BUG_ON(ret);
638 }
602 639
603 /* btrfs_drop_extents changes i_bytes & i_blocks, update it here */ 640 inode_set_bytes(inode, saved_nbytes);
604 inode_add_bytes(inode, extent_end - start);
605 btrfs_update_inode(trans, root, inode); 641 btrfs_update_inode(trans, root, inode);
606out: 642out:
607 if (inode) 643 if (inode)
@@ -903,70 +939,6 @@ out_nowrite:
903} 939}
904 940
905/* 941/*
906 * replay one csum item from the log tree into the subvolume 'root'
907 * eb, slot and key all refer to the log tree
908 * path is for temp use by this function and should be released on return
909 *
910 * This copies the checksums out of the log tree and inserts them into
911 * the subvolume. Any existing checksums for this range in the file
912 * are overwritten, and new items are added where required.
913 *
914 * We keep this simple by reusing the btrfs_ordered_sum code from
915 * the data=ordered mode. This basically means making a copy
916 * of all the checksums in ram, which we have to do anyway for kmap
917 * rules.
918 *
919 * The copy is then sent down to btrfs_csum_file_blocks, which
920 * does all the hard work of finding existing items in the file
921 * or adding new ones.
922 */
923static noinline int replay_one_csum(struct btrfs_trans_handle *trans,
924 struct btrfs_root *root,
925 struct btrfs_path *path,
926 struct extent_buffer *eb, int slot,
927 struct btrfs_key *key)
928{
929 int ret;
930 u32 item_size = btrfs_item_size_nr(eb, slot);
931 u64 cur_offset;
932 u16 csum_size =
933 btrfs_super_csum_size(&root->fs_info->super_copy);
934 unsigned long file_bytes;
935 struct btrfs_ordered_sum *sums;
936 struct btrfs_sector_sum *sector_sum;
937 unsigned long ptr;
938
939 file_bytes = (item_size / csum_size) * root->sectorsize;
940 sums = kzalloc(btrfs_ordered_sum_size(root, file_bytes), GFP_NOFS);
941 if (!sums)
942 return -ENOMEM;
943
944 INIT_LIST_HEAD(&sums->list);
945 sums->len = file_bytes;
946 sums->bytenr = key->offset;
947
948 /*
949 * copy all the sums into the ordered sum struct
950 */
951 sector_sum = sums->sums;
952 cur_offset = key->offset;
953 ptr = btrfs_item_ptr_offset(eb, slot);
954 while (item_size > 0) {
955 sector_sum->bytenr = cur_offset;
956 read_extent_buffer(eb, &sector_sum->sum, ptr, csum_size);
957 sector_sum++;
958 item_size -= csum_size;
959 ptr += csum_size;
960 cur_offset += root->sectorsize;
961 }
962
963 /* let btrfs_csum_file_blocks add them into the file */
964 ret = btrfs_csum_file_blocks(trans, root->fs_info->csum_root, sums);
965 BUG_ON(ret);
966 kfree(sums);
967 return 0;
968}
969/*
970 * There are a few corners where the link count of the file can't 942 * There are a few corners where the link count of the file can't
971 * be properly maintained during replay. So, instead of adding 943 * be properly maintained during replay. So, instead of adding
972 * lots of complexity to the log code, we just scan the backrefs 944 * lots of complexity to the log code, we just scan the backrefs
@@ -1659,10 +1631,6 @@ static int replay_one_buffer(struct btrfs_root *log, struct extent_buffer *eb,
1659 ret = replay_one_extent(wc->trans, root, path, 1631 ret = replay_one_extent(wc->trans, root, path,
1660 eb, i, &key); 1632 eb, i, &key);
1661 BUG_ON(ret); 1633 BUG_ON(ret);
1662 } else if (key.type == BTRFS_EXTENT_CSUM_KEY) {
1663 ret = replay_one_csum(wc->trans, root, path,
1664 eb, i, &key);
1665 BUG_ON(ret);
1666 } else if (key.type == BTRFS_DIR_ITEM_KEY || 1634 } else if (key.type == BTRFS_DIR_ITEM_KEY ||
1667 key.type == BTRFS_DIR_INDEX_KEY) { 1635 key.type == BTRFS_DIR_INDEX_KEY) {
1668 ret = replay_one_dir_item(wc->trans, root, path, 1636 ret = replay_one_dir_item(wc->trans, root, path,
@@ -2021,7 +1989,7 @@ int btrfs_free_log(struct btrfs_trans_handle *trans, struct btrfs_root *root)
2021 .process_func = process_one_buffer 1989 .process_func = process_one_buffer
2022 }; 1990 };
2023 1991
2024 if (!root->log_root) 1992 if (!root->log_root || root->fs_info->log_root_recovering)
2025 return 0; 1993 return 0;
2026 1994
2027 log = root->log_root; 1995 log = root->log_root;
@@ -2453,86 +2421,6 @@ static int drop_objectid_items(struct btrfs_trans_handle *trans,
2453 return 0; 2421 return 0;
2454} 2422}
2455 2423
2456static noinline int copy_extent_csums(struct btrfs_trans_handle *trans,
2457 struct list_head *list,
2458 struct btrfs_root *root,
2459 u64 disk_bytenr, u64 len)
2460{
2461 struct btrfs_ordered_sum *sums;
2462 struct btrfs_sector_sum *sector_sum;
2463 int ret;
2464 struct btrfs_path *path;
2465 struct btrfs_csum_item *item = NULL;
2466 u64 end = disk_bytenr + len;
2467 u64 item_start_offset = 0;
2468 u64 item_last_offset = 0;
2469 u32 diff;
2470 u32 sum;
2471 u16 csum_size = btrfs_super_csum_size(&root->fs_info->super_copy);
2472
2473 sums = kzalloc(btrfs_ordered_sum_size(root, len), GFP_NOFS);
2474
2475 sector_sum = sums->sums;
2476 sums->bytenr = disk_bytenr;
2477 sums->len = len;
2478 list_add_tail(&sums->list, list);
2479
2480 path = btrfs_alloc_path();
2481 while (disk_bytenr < end) {
2482 if (!item || disk_bytenr < item_start_offset ||
2483 disk_bytenr >= item_last_offset) {
2484 struct btrfs_key found_key;
2485 u32 item_size;
2486
2487 if (item)
2488 btrfs_release_path(root, path);
2489 item = btrfs_lookup_csum(NULL, root, path,
2490 disk_bytenr, 0);
2491 if (IS_ERR(item)) {
2492 ret = PTR_ERR(item);
2493 if (ret == -ENOENT || ret == -EFBIG)
2494 ret = 0;
2495 sum = 0;
2496 printk(KERN_INFO "log no csum found for "
2497 "byte %llu\n",
2498 (unsigned long long)disk_bytenr);
2499 item = NULL;
2500 btrfs_release_path(root, path);
2501 goto found;
2502 }
2503 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
2504 path->slots[0]);
2505
2506 item_start_offset = found_key.offset;
2507 item_size = btrfs_item_size_nr(path->nodes[0],
2508 path->slots[0]);
2509 item_last_offset = item_start_offset +
2510 (item_size / csum_size) *
2511 root->sectorsize;
2512 item = btrfs_item_ptr(path->nodes[0], path->slots[0],
2513 struct btrfs_csum_item);
2514 }
2515 /*
2516 * this byte range must be able to fit inside
2517 * a single leaf so it will also fit inside a u32
2518 */
2519 diff = disk_bytenr - item_start_offset;
2520 diff = diff / root->sectorsize;
2521 diff = diff * csum_size;
2522
2523 read_extent_buffer(path->nodes[0], &sum,
2524 ((unsigned long)item) + diff,
2525 csum_size);
2526found:
2527 sector_sum->bytenr = disk_bytenr;
2528 sector_sum->sum = sum;
2529 disk_bytenr += root->sectorsize;
2530 sector_sum++;
2531 }
2532 btrfs_free_path(path);
2533 return 0;
2534}
2535
2536static noinline int copy_items(struct btrfs_trans_handle *trans, 2424static noinline int copy_items(struct btrfs_trans_handle *trans,
2537 struct btrfs_root *log, 2425 struct btrfs_root *log,
2538 struct btrfs_path *dst_path, 2426 struct btrfs_path *dst_path,
@@ -2622,10 +2510,10 @@ static noinline int copy_items(struct btrfs_trans_handle *trans,
2622 trans->transid, 2510 trans->transid,
2623 ins_keys[i].objectid); 2511 ins_keys[i].objectid);
2624 BUG_ON(ret); 2512 BUG_ON(ret);
2625 ret = copy_extent_csums(trans, 2513 ret = btrfs_lookup_csums_range(
2626 &ordered_sums, 2514 log->fs_info->csum_root,
2627 log->fs_info->csum_root, 2515 ds + cs, ds + cs + cl - 1,
2628 ds + cs, cl); 2516 &ordered_sums);
2629 BUG_ON(ret); 2517 BUG_ON(ret);
2630 } 2518 }
2631 } 2519 }
@@ -2942,9 +2830,9 @@ again:
2942 tmp_key.offset = (u64)-1; 2830 tmp_key.offset = (u64)-1;
2943 2831
2944 wc.replay_dest = btrfs_read_fs_root_no_name(fs_info, &tmp_key); 2832 wc.replay_dest = btrfs_read_fs_root_no_name(fs_info, &tmp_key);
2945
2946 BUG_ON(!wc.replay_dest); 2833 BUG_ON(!wc.replay_dest);
2947 2834
2835 wc.replay_dest->log_root = log;
2948 btrfs_record_root_in_trans(wc.replay_dest); 2836 btrfs_record_root_in_trans(wc.replay_dest);
2949 ret = walk_log_tree(trans, log, &wc); 2837 ret = walk_log_tree(trans, log, &wc);
2950 BUG_ON(ret); 2838 BUG_ON(ret);
@@ -2961,6 +2849,7 @@ again:
2961 } 2849 }
2962 2850
2963 key.offset = found_key.offset - 1; 2851 key.offset = found_key.offset - 1;
2852 wc.replay_dest->log_root = NULL;
2964 free_extent_buffer(log->node); 2853 free_extent_buffer(log->node);
2965 kfree(log); 2854 kfree(log);
2966 2855