diff options
Diffstat (limited to 'fs/btrfs/tree-log.c')
| -rw-r--r-- | fs/btrfs/tree-log.c | 889 |
1 files changed, 732 insertions, 157 deletions
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index c86670f4f285..81e407d9677a 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c | |||
| @@ -18,13 +18,16 @@ | |||
| 18 | 18 | ||
| 19 | #include <linux/sched.h> | 19 | #include <linux/sched.h> |
| 20 | #include <linux/slab.h> | 20 | #include <linux/slab.h> |
| 21 | #include <linux/list_sort.h> | ||
| 21 | #include "ctree.h" | 22 | #include "ctree.h" |
| 22 | #include "transaction.h" | 23 | #include "transaction.h" |
| 23 | #include "disk-io.h" | 24 | #include "disk-io.h" |
| 24 | #include "locking.h" | 25 | #include "locking.h" |
| 25 | #include "print-tree.h" | 26 | #include "print-tree.h" |
| 27 | #include "backref.h" | ||
| 26 | #include "compat.h" | 28 | #include "compat.h" |
| 27 | #include "tree-log.h" | 29 | #include "tree-log.h" |
| 30 | #include "hash.h" | ||
| 28 | 31 | ||
| 29 | /* magic values for the inode_only field in btrfs_log_inode: | 32 | /* magic values for the inode_only field in btrfs_log_inode: |
| 30 | * | 33 | * |
| @@ -146,7 +149,7 @@ static int start_log_trans(struct btrfs_trans_handle *trans, | |||
| 146 | root->log_multiple_pids = true; | 149 | root->log_multiple_pids = true; |
| 147 | } | 150 | } |
| 148 | 151 | ||
| 149 | root->log_batch++; | 152 | atomic_inc(&root->log_batch); |
| 150 | atomic_inc(&root->log_writers); | 153 | atomic_inc(&root->log_writers); |
| 151 | mutex_unlock(&root->log_mutex); | 154 | mutex_unlock(&root->log_mutex); |
| 152 | return 0; | 155 | return 0; |
| @@ -165,7 +168,7 @@ static int start_log_trans(struct btrfs_trans_handle *trans, | |||
| 165 | err = ret; | 168 | err = ret; |
| 166 | } | 169 | } |
| 167 | mutex_unlock(&root->fs_info->tree_log_mutex); | 170 | mutex_unlock(&root->fs_info->tree_log_mutex); |
| 168 | root->log_batch++; | 171 | atomic_inc(&root->log_batch); |
| 169 | atomic_inc(&root->log_writers); | 172 | atomic_inc(&root->log_writers); |
| 170 | mutex_unlock(&root->log_mutex); | 173 | mutex_unlock(&root->log_mutex); |
| 171 | return err; | 174 | return err; |
| @@ -484,7 +487,6 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans, | |||
| 484 | int found_type; | 487 | int found_type; |
| 485 | u64 mask = root->sectorsize - 1; | 488 | u64 mask = root->sectorsize - 1; |
| 486 | u64 extent_end; | 489 | u64 extent_end; |
| 487 | u64 alloc_hint; | ||
| 488 | u64 start = key->offset; | 490 | u64 start = key->offset; |
| 489 | u64 saved_nbytes; | 491 | u64 saved_nbytes; |
| 490 | struct btrfs_file_extent_item *item; | 492 | struct btrfs_file_extent_item *item; |
| @@ -550,8 +552,7 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans, | |||
| 550 | 552 | ||
| 551 | saved_nbytes = inode_get_bytes(inode); | 553 | saved_nbytes = inode_get_bytes(inode); |
| 552 | /* drop any overlapping extents */ | 554 | /* drop any overlapping extents */ |
| 553 | ret = btrfs_drop_extents(trans, inode, start, extent_end, | 555 | ret = btrfs_drop_extents(trans, root, inode, start, extent_end, 1); |
| 554 | &alloc_hint, 1); | ||
| 555 | BUG_ON(ret); | 556 | BUG_ON(ret); |
| 556 | 557 | ||
| 557 | if (found_type == BTRFS_FILE_EXTENT_REG || | 558 | if (found_type == BTRFS_FILE_EXTENT_REG || |
| @@ -744,6 +745,7 @@ out: | |||
| 744 | */ | 745 | */ |
| 745 | static noinline int backref_in_log(struct btrfs_root *log, | 746 | static noinline int backref_in_log(struct btrfs_root *log, |
| 746 | struct btrfs_key *key, | 747 | struct btrfs_key *key, |
| 748 | u64 ref_objectid, | ||
| 747 | char *name, int namelen) | 749 | char *name, int namelen) |
| 748 | { | 750 | { |
| 749 | struct btrfs_path *path; | 751 | struct btrfs_path *path; |
| @@ -764,8 +766,17 @@ static noinline int backref_in_log(struct btrfs_root *log, | |||
| 764 | if (ret != 0) | 766 | if (ret != 0) |
| 765 | goto out; | 767 | goto out; |
| 766 | 768 | ||
| 767 | item_size = btrfs_item_size_nr(path->nodes[0], path->slots[0]); | ||
| 768 | ptr = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]); | 769 | ptr = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]); |
| 770 | |||
| 771 | if (key->type == BTRFS_INODE_EXTREF_KEY) { | ||
| 772 | if (btrfs_find_name_in_ext_backref(path, ref_objectid, | ||
| 773 | name, namelen, NULL)) | ||
| 774 | match = 1; | ||
| 775 | |||
| 776 | goto out; | ||
| 777 | } | ||
| 778 | |||
| 779 | item_size = btrfs_item_size_nr(path->nodes[0], path->slots[0]); | ||
| 769 | ptr_end = ptr + item_size; | 780 | ptr_end = ptr + item_size; |
| 770 | while (ptr < ptr_end) { | 781 | while (ptr < ptr_end) { |
| 771 | ref = (struct btrfs_inode_ref *)ptr; | 782 | ref = (struct btrfs_inode_ref *)ptr; |
| @@ -786,91 +797,42 @@ out: | |||
| 786 | return match; | 797 | return match; |
| 787 | } | 798 | } |
| 788 | 799 | ||
| 789 | 800 | static inline int __add_inode_ref(struct btrfs_trans_handle *trans, | |
| 790 | /* | ||
| 791 | * replay one inode back reference item found in the log tree. | ||
| 792 | * eb, slot and key refer to the buffer and key found in the log tree. | ||
| 793 | * root is the destination we are replaying into, and path is for temp | ||
| 794 | * use by this function. (it should be released on return). | ||
| 795 | */ | ||
| 796 | static noinline int add_inode_ref(struct btrfs_trans_handle *trans, | ||
| 797 | struct btrfs_root *root, | 801 | struct btrfs_root *root, |
| 798 | struct btrfs_root *log, | ||
| 799 | struct btrfs_path *path, | 802 | struct btrfs_path *path, |
| 800 | struct extent_buffer *eb, int slot, | 803 | struct btrfs_root *log_root, |
| 801 | struct btrfs_key *key) | 804 | struct inode *dir, struct inode *inode, |
| 805 | struct extent_buffer *eb, | ||
| 806 | u64 inode_objectid, u64 parent_objectid, | ||
| 807 | u64 ref_index, char *name, int namelen, | ||
| 808 | int *search_done) | ||
| 802 | { | 809 | { |
| 803 | struct btrfs_inode_ref *ref; | ||
| 804 | struct btrfs_dir_item *di; | ||
| 805 | struct inode *dir; | ||
| 806 | struct inode *inode; | ||
| 807 | unsigned long ref_ptr; | ||
| 808 | unsigned long ref_end; | ||
| 809 | char *name; | ||
| 810 | int namelen; | ||
| 811 | int ret; | 810 | int ret; |
| 812 | int search_done = 0; | 811 | char *victim_name; |
| 813 | 812 | int victim_name_len; | |
| 814 | /* | 813 | struct extent_buffer *leaf; |
| 815 | * it is possible that we didn't log all the parent directories | 814 | struct btrfs_dir_item *di; |
| 816 | * for a given inode. If we don't find the dir, just don't | 815 | struct btrfs_key search_key; |
| 817 | * copy the back ref in. The link count fixup code will take | 816 | struct btrfs_inode_extref *extref; |
| 818 | * care of the rest | ||
| 819 | */ | ||
| 820 | dir = read_one_inode(root, key->offset); | ||
| 821 | if (!dir) | ||
| 822 | return -ENOENT; | ||
| 823 | |||
| 824 | inode = read_one_inode(root, key->objectid); | ||
| 825 | if (!inode) { | ||
| 826 | iput(dir); | ||
| 827 | return -EIO; | ||
| 828 | } | ||
| 829 | |||
| 830 | ref_ptr = btrfs_item_ptr_offset(eb, slot); | ||
| 831 | ref_end = ref_ptr + btrfs_item_size_nr(eb, slot); | ||
| 832 | 817 | ||
| 833 | again: | 818 | again: |
| 834 | ref = (struct btrfs_inode_ref *)ref_ptr; | 819 | /* Search old style refs */ |
| 835 | 820 | search_key.objectid = inode_objectid; | |
| 836 | namelen = btrfs_inode_ref_name_len(eb, ref); | 821 | search_key.type = BTRFS_INODE_REF_KEY; |
| 837 | name = kmalloc(namelen, GFP_NOFS); | 822 | search_key.offset = parent_objectid; |
| 838 | BUG_ON(!name); | 823 | ret = btrfs_search_slot(NULL, root, &search_key, path, 0, 0); |
| 839 | |||
| 840 | read_extent_buffer(eb, name, (unsigned long)(ref + 1), namelen); | ||
| 841 | |||
| 842 | /* if we already have a perfect match, we're done */ | ||
| 843 | if (inode_in_dir(root, path, btrfs_ino(dir), btrfs_ino(inode), | ||
| 844 | btrfs_inode_ref_index(eb, ref), | ||
| 845 | name, namelen)) { | ||
| 846 | goto out; | ||
| 847 | } | ||
| 848 | |||
| 849 | /* | ||
| 850 | * look for a conflicting back reference in the metadata. | ||
| 851 | * if we find one we have to unlink that name of the file | ||
| 852 | * before we add our new link. Later on, we overwrite any | ||
| 853 | * existing back reference, and we don't want to create | ||
| 854 | * dangling pointers in the directory. | ||
| 855 | */ | ||
| 856 | |||
| 857 | if (search_done) | ||
| 858 | goto insert; | ||
| 859 | |||
| 860 | ret = btrfs_search_slot(NULL, root, key, path, 0, 0); | ||
| 861 | if (ret == 0) { | 824 | if (ret == 0) { |
| 862 | char *victim_name; | ||
| 863 | int victim_name_len; | ||
| 864 | struct btrfs_inode_ref *victim_ref; | 825 | struct btrfs_inode_ref *victim_ref; |
| 865 | unsigned long ptr; | 826 | unsigned long ptr; |
| 866 | unsigned long ptr_end; | 827 | unsigned long ptr_end; |
| 867 | struct extent_buffer *leaf = path->nodes[0]; | 828 | |
| 829 | leaf = path->nodes[0]; | ||
| 868 | 830 | ||
| 869 | /* are we trying to overwrite a back ref for the root directory | 831 | /* are we trying to overwrite a back ref for the root directory |
| 870 | * if so, just jump out, we're done | 832 | * if so, just jump out, we're done |
| 871 | */ | 833 | */ |
| 872 | if (key->objectid == key->offset) | 834 | if (search_key.objectid == search_key.offset) |
| 873 | goto out_nowrite; | 835 | return 1; |
| 874 | 836 | ||
| 875 | /* check all the names in this back reference to see | 837 | /* check all the names in this back reference to see |
| 876 | * if they are in the log. if so, we allow them to stay | 838 | * if they are in the log. if so, we allow them to stay |
| @@ -889,7 +851,9 @@ again: | |||
| 889 | (unsigned long)(victim_ref + 1), | 851 | (unsigned long)(victim_ref + 1), |
| 890 | victim_name_len); | 852 | victim_name_len); |
| 891 | 853 | ||
| 892 | if (!backref_in_log(log, key, victim_name, | 854 | if (!backref_in_log(log_root, &search_key, |
| 855 | parent_objectid, | ||
| 856 | victim_name, | ||
| 893 | victim_name_len)) { | 857 | victim_name_len)) { |
| 894 | btrfs_inc_nlink(inode); | 858 | btrfs_inc_nlink(inode); |
| 895 | btrfs_release_path(path); | 859 | btrfs_release_path(path); |
| @@ -897,9 +861,14 @@ again: | |||
| 897 | ret = btrfs_unlink_inode(trans, root, dir, | 861 | ret = btrfs_unlink_inode(trans, root, dir, |
| 898 | inode, victim_name, | 862 | inode, victim_name, |
| 899 | victim_name_len); | 863 | victim_name_len); |
| 864 | BUG_ON(ret); | ||
| 900 | btrfs_run_delayed_items(trans, root); | 865 | btrfs_run_delayed_items(trans, root); |
| 866 | kfree(victim_name); | ||
| 867 | *search_done = 1; | ||
| 868 | goto again; | ||
| 901 | } | 869 | } |
| 902 | kfree(victim_name); | 870 | kfree(victim_name); |
| 871 | |||
| 903 | ptr = (unsigned long)(victim_ref + 1) + victim_name_len; | 872 | ptr = (unsigned long)(victim_ref + 1) + victim_name_len; |
| 904 | } | 873 | } |
| 905 | BUG_ON(ret); | 874 | BUG_ON(ret); |
| @@ -908,14 +877,78 @@ again: | |||
| 908 | * NOTE: we have searched root tree and checked the | 877 | * NOTE: we have searched root tree and checked the |
| 909 | * coresponding ref, it does not need to check again. | 878 | * coresponding ref, it does not need to check again. |
| 910 | */ | 879 | */ |
| 911 | search_done = 1; | 880 | *search_done = 1; |
| 881 | } | ||
| 882 | btrfs_release_path(path); | ||
| 883 | |||
| 884 | /* Same search but for extended refs */ | ||
| 885 | extref = btrfs_lookup_inode_extref(NULL, root, path, name, namelen, | ||
| 886 | inode_objectid, parent_objectid, 0, | ||
| 887 | 0); | ||
| 888 | if (!IS_ERR_OR_NULL(extref)) { | ||
| 889 | u32 item_size; | ||
| 890 | u32 cur_offset = 0; | ||
| 891 | unsigned long base; | ||
| 892 | struct inode *victim_parent; | ||
| 893 | |||
| 894 | leaf = path->nodes[0]; | ||
| 895 | |||
| 896 | item_size = btrfs_item_size_nr(leaf, path->slots[0]); | ||
| 897 | base = btrfs_item_ptr_offset(leaf, path->slots[0]); | ||
| 898 | |||
| 899 | while (cur_offset < item_size) { | ||
| 900 | extref = (struct btrfs_inode_extref *)base + cur_offset; | ||
| 901 | |||
| 902 | victim_name_len = btrfs_inode_extref_name_len(leaf, extref); | ||
| 903 | |||
| 904 | if (btrfs_inode_extref_parent(leaf, extref) != parent_objectid) | ||
| 905 | goto next; | ||
| 906 | |||
| 907 | victim_name = kmalloc(victim_name_len, GFP_NOFS); | ||
| 908 | read_extent_buffer(leaf, victim_name, (unsigned long)&extref->name, | ||
| 909 | victim_name_len); | ||
| 910 | |||
| 911 | search_key.objectid = inode_objectid; | ||
| 912 | search_key.type = BTRFS_INODE_EXTREF_KEY; | ||
| 913 | search_key.offset = btrfs_extref_hash(parent_objectid, | ||
| 914 | victim_name, | ||
| 915 | victim_name_len); | ||
| 916 | ret = 0; | ||
| 917 | if (!backref_in_log(log_root, &search_key, | ||
| 918 | parent_objectid, victim_name, | ||
| 919 | victim_name_len)) { | ||
| 920 | ret = -ENOENT; | ||
| 921 | victim_parent = read_one_inode(root, | ||
| 922 | parent_objectid); | ||
| 923 | if (victim_parent) { | ||
| 924 | btrfs_inc_nlink(inode); | ||
| 925 | btrfs_release_path(path); | ||
| 926 | |||
| 927 | ret = btrfs_unlink_inode(trans, root, | ||
| 928 | victim_parent, | ||
| 929 | inode, | ||
| 930 | victim_name, | ||
| 931 | victim_name_len); | ||
| 932 | btrfs_run_delayed_items(trans, root); | ||
| 933 | } | ||
| 934 | BUG_ON(ret); | ||
| 935 | iput(victim_parent); | ||
| 936 | kfree(victim_name); | ||
| 937 | *search_done = 1; | ||
| 938 | goto again; | ||
| 939 | } | ||
| 940 | kfree(victim_name); | ||
| 941 | BUG_ON(ret); | ||
| 942 | next: | ||
| 943 | cur_offset += victim_name_len + sizeof(*extref); | ||
| 944 | } | ||
| 945 | *search_done = 1; | ||
| 912 | } | 946 | } |
| 913 | btrfs_release_path(path); | 947 | btrfs_release_path(path); |
| 914 | 948 | ||
| 915 | /* look for a conflicting sequence number */ | 949 | /* look for a conflicting sequence number */ |
| 916 | di = btrfs_lookup_dir_index_item(trans, root, path, btrfs_ino(dir), | 950 | di = btrfs_lookup_dir_index_item(trans, root, path, btrfs_ino(dir), |
| 917 | btrfs_inode_ref_index(eb, ref), | 951 | ref_index, name, namelen, 0); |
| 918 | name, namelen, 0); | ||
| 919 | if (di && !IS_ERR(di)) { | 952 | if (di && !IS_ERR(di)) { |
| 920 | ret = drop_one_dir_item(trans, root, path, dir, di); | 953 | ret = drop_one_dir_item(trans, root, path, dir, di); |
| 921 | BUG_ON(ret); | 954 | BUG_ON(ret); |
| @@ -931,25 +964,173 @@ again: | |||
| 931 | } | 964 | } |
| 932 | btrfs_release_path(path); | 965 | btrfs_release_path(path); |
| 933 | 966 | ||
| 934 | insert: | 967 | return 0; |
| 935 | /* insert our name */ | 968 | } |
| 936 | ret = btrfs_add_link(trans, dir, inode, name, namelen, 0, | ||
| 937 | btrfs_inode_ref_index(eb, ref)); | ||
| 938 | BUG_ON(ret); | ||
| 939 | 969 | ||
| 940 | btrfs_update_inode(trans, root, inode); | 970 | static int extref_get_fields(struct extent_buffer *eb, unsigned long ref_ptr, |
| 971 | u32 *namelen, char **name, u64 *index, | ||
| 972 | u64 *parent_objectid) | ||
| 973 | { | ||
| 974 | struct btrfs_inode_extref *extref; | ||
| 941 | 975 | ||
| 942 | out: | 976 | extref = (struct btrfs_inode_extref *)ref_ptr; |
| 943 | ref_ptr = (unsigned long)(ref + 1) + namelen; | 977 | |
| 944 | kfree(name); | 978 | *namelen = btrfs_inode_extref_name_len(eb, extref); |
| 945 | if (ref_ptr < ref_end) | 979 | *name = kmalloc(*namelen, GFP_NOFS); |
| 946 | goto again; | 980 | if (*name == NULL) |
| 981 | return -ENOMEM; | ||
| 982 | |||
| 983 | read_extent_buffer(eb, *name, (unsigned long)&extref->name, | ||
| 984 | *namelen); | ||
| 985 | |||
| 986 | *index = btrfs_inode_extref_index(eb, extref); | ||
| 987 | if (parent_objectid) | ||
| 988 | *parent_objectid = btrfs_inode_extref_parent(eb, extref); | ||
| 989 | |||
| 990 | return 0; | ||
| 991 | } | ||
| 992 | |||
| 993 | static int ref_get_fields(struct extent_buffer *eb, unsigned long ref_ptr, | ||
| 994 | u32 *namelen, char **name, u64 *index) | ||
| 995 | { | ||
| 996 | struct btrfs_inode_ref *ref; | ||
| 997 | |||
| 998 | ref = (struct btrfs_inode_ref *)ref_ptr; | ||
| 999 | |||
| 1000 | *namelen = btrfs_inode_ref_name_len(eb, ref); | ||
| 1001 | *name = kmalloc(*namelen, GFP_NOFS); | ||
| 1002 | if (*name == NULL) | ||
| 1003 | return -ENOMEM; | ||
| 1004 | |||
| 1005 | read_extent_buffer(eb, *name, (unsigned long)(ref + 1), *namelen); | ||
| 1006 | |||
| 1007 | *index = btrfs_inode_ref_index(eb, ref); | ||
| 1008 | |||
| 1009 | return 0; | ||
| 1010 | } | ||
| 1011 | |||
| 1012 | /* | ||
| 1013 | * replay one inode back reference item found in the log tree. | ||
| 1014 | * eb, slot and key refer to the buffer and key found in the log tree. | ||
| 1015 | * root is the destination we are replaying into, and path is for temp | ||
| 1016 | * use by this function. (it should be released on return). | ||
| 1017 | */ | ||
| 1018 | static noinline int add_inode_ref(struct btrfs_trans_handle *trans, | ||
| 1019 | struct btrfs_root *root, | ||
| 1020 | struct btrfs_root *log, | ||
| 1021 | struct btrfs_path *path, | ||
| 1022 | struct extent_buffer *eb, int slot, | ||
| 1023 | struct btrfs_key *key) | ||
| 1024 | { | ||
| 1025 | struct inode *dir; | ||
| 1026 | struct inode *inode; | ||
| 1027 | unsigned long ref_ptr; | ||
| 1028 | unsigned long ref_end; | ||
| 1029 | char *name; | ||
| 1030 | int namelen; | ||
| 1031 | int ret; | ||
| 1032 | int search_done = 0; | ||
| 1033 | int log_ref_ver = 0; | ||
| 1034 | u64 parent_objectid; | ||
| 1035 | u64 inode_objectid; | ||
| 1036 | u64 ref_index = 0; | ||
| 1037 | int ref_struct_size; | ||
| 1038 | |||
| 1039 | ref_ptr = btrfs_item_ptr_offset(eb, slot); | ||
| 1040 | ref_end = ref_ptr + btrfs_item_size_nr(eb, slot); | ||
| 1041 | |||
| 1042 | if (key->type == BTRFS_INODE_EXTREF_KEY) { | ||
| 1043 | struct btrfs_inode_extref *r; | ||
| 1044 | |||
| 1045 | ref_struct_size = sizeof(struct btrfs_inode_extref); | ||
| 1046 | log_ref_ver = 1; | ||
| 1047 | r = (struct btrfs_inode_extref *)ref_ptr; | ||
| 1048 | parent_objectid = btrfs_inode_extref_parent(eb, r); | ||
| 1049 | } else { | ||
| 1050 | ref_struct_size = sizeof(struct btrfs_inode_ref); | ||
| 1051 | parent_objectid = key->offset; | ||
| 1052 | } | ||
| 1053 | inode_objectid = key->objectid; | ||
| 1054 | |||
| 1055 | /* | ||
| 1056 | * it is possible that we didn't log all the parent directories | ||
| 1057 | * for a given inode. If we don't find the dir, just don't | ||
| 1058 | * copy the back ref in. The link count fixup code will take | ||
| 1059 | * care of the rest | ||
| 1060 | */ | ||
| 1061 | dir = read_one_inode(root, parent_objectid); | ||
| 1062 | if (!dir) | ||
| 1063 | return -ENOENT; | ||
| 1064 | |||
| 1065 | inode = read_one_inode(root, inode_objectid); | ||
| 1066 | if (!inode) { | ||
| 1067 | iput(dir); | ||
| 1068 | return -EIO; | ||
| 1069 | } | ||
| 1070 | |||
| 1071 | while (ref_ptr < ref_end) { | ||
| 1072 | if (log_ref_ver) { | ||
| 1073 | ret = extref_get_fields(eb, ref_ptr, &namelen, &name, | ||
| 1074 | &ref_index, &parent_objectid); | ||
| 1075 | /* | ||
| 1076 | * parent object can change from one array | ||
| 1077 | * item to another. | ||
| 1078 | */ | ||
| 1079 | if (!dir) | ||
| 1080 | dir = read_one_inode(root, parent_objectid); | ||
| 1081 | if (!dir) | ||
| 1082 | return -ENOENT; | ||
| 1083 | } else { | ||
| 1084 | ret = ref_get_fields(eb, ref_ptr, &namelen, &name, | ||
| 1085 | &ref_index); | ||
| 1086 | } | ||
| 1087 | if (ret) | ||
| 1088 | return ret; | ||
| 1089 | |||
| 1090 | /* if we already have a perfect match, we're done */ | ||
| 1091 | if (!inode_in_dir(root, path, btrfs_ino(dir), btrfs_ino(inode), | ||
| 1092 | ref_index, name, namelen)) { | ||
| 1093 | /* | ||
| 1094 | * look for a conflicting back reference in the | ||
| 1095 | * metadata. if we find one we have to unlink that name | ||
| 1096 | * of the file before we add our new link. Later on, we | ||
| 1097 | * overwrite any existing back reference, and we don't | ||
| 1098 | * want to create dangling pointers in the directory. | ||
| 1099 | */ | ||
| 1100 | |||
| 1101 | if (!search_done) { | ||
| 1102 | ret = __add_inode_ref(trans, root, path, log, | ||
| 1103 | dir, inode, eb, | ||
| 1104 | inode_objectid, | ||
| 1105 | parent_objectid, | ||
| 1106 | ref_index, name, namelen, | ||
| 1107 | &search_done); | ||
| 1108 | if (ret == 1) | ||
| 1109 | goto out; | ||
| 1110 | BUG_ON(ret); | ||
| 1111 | } | ||
| 1112 | |||
| 1113 | /* insert our name */ | ||
| 1114 | ret = btrfs_add_link(trans, dir, inode, name, namelen, | ||
| 1115 | 0, ref_index); | ||
| 1116 | BUG_ON(ret); | ||
| 1117 | |||
| 1118 | btrfs_update_inode(trans, root, inode); | ||
| 1119 | } | ||
| 1120 | |||
| 1121 | ref_ptr = (unsigned long)(ref_ptr + ref_struct_size) + namelen; | ||
| 1122 | kfree(name); | ||
| 1123 | if (log_ref_ver) { | ||
| 1124 | iput(dir); | ||
| 1125 | dir = NULL; | ||
| 1126 | } | ||
| 1127 | } | ||
| 947 | 1128 | ||
| 948 | /* finally write the back reference in the inode */ | 1129 | /* finally write the back reference in the inode */ |
| 949 | ret = overwrite_item(trans, root, path, eb, slot, key); | 1130 | ret = overwrite_item(trans, root, path, eb, slot, key); |
| 950 | BUG_ON(ret); | 1131 | BUG_ON(ret); |
| 951 | 1132 | ||
| 952 | out_nowrite: | 1133 | out: |
| 953 | btrfs_release_path(path); | 1134 | btrfs_release_path(path); |
| 954 | iput(dir); | 1135 | iput(dir); |
| 955 | iput(inode); | 1136 | iput(inode); |
| @@ -966,25 +1147,55 @@ static int insert_orphan_item(struct btrfs_trans_handle *trans, | |||
| 966 | return ret; | 1147 | return ret; |
| 967 | } | 1148 | } |
| 968 | 1149 | ||
| 1150 | static int count_inode_extrefs(struct btrfs_root *root, | ||
| 1151 | struct inode *inode, struct btrfs_path *path) | ||
| 1152 | { | ||
| 1153 | int ret = 0; | ||
| 1154 | int name_len; | ||
| 1155 | unsigned int nlink = 0; | ||
| 1156 | u32 item_size; | ||
| 1157 | u32 cur_offset = 0; | ||
| 1158 | u64 inode_objectid = btrfs_ino(inode); | ||
| 1159 | u64 offset = 0; | ||
| 1160 | unsigned long ptr; | ||
| 1161 | struct btrfs_inode_extref *extref; | ||
| 1162 | struct extent_buffer *leaf; | ||
| 969 | 1163 | ||
| 970 | /* | 1164 | while (1) { |
| 971 | * There are a few corners where the link count of the file can't | 1165 | ret = btrfs_find_one_extref(root, inode_objectid, offset, path, |
| 972 | * be properly maintained during replay. So, instead of adding | 1166 | &extref, &offset); |
| 973 | * lots of complexity to the log code, we just scan the backrefs | 1167 | if (ret) |
| 974 | * for any file that has been through replay. | 1168 | break; |
| 975 | * | 1169 | |
| 976 | * The scan will update the link count on the inode to reflect the | 1170 | leaf = path->nodes[0]; |
| 977 | * number of back refs found. If it goes down to zero, the iput | 1171 | item_size = btrfs_item_size_nr(leaf, path->slots[0]); |
| 978 | * will free the inode. | 1172 | ptr = btrfs_item_ptr_offset(leaf, path->slots[0]); |
| 979 | */ | 1173 | |
| 980 | static noinline int fixup_inode_link_count(struct btrfs_trans_handle *trans, | 1174 | while (cur_offset < item_size) { |
| 981 | struct btrfs_root *root, | 1175 | extref = (struct btrfs_inode_extref *) (ptr + cur_offset); |
| 982 | struct inode *inode) | 1176 | name_len = btrfs_inode_extref_name_len(leaf, extref); |
| 1177 | |||
| 1178 | nlink++; | ||
| 1179 | |||
| 1180 | cur_offset += name_len + sizeof(*extref); | ||
| 1181 | } | ||
| 1182 | |||
| 1183 | offset++; | ||
| 1184 | btrfs_release_path(path); | ||
| 1185 | } | ||
| 1186 | btrfs_release_path(path); | ||
| 1187 | |||
| 1188 | if (ret < 0) | ||
| 1189 | return ret; | ||
| 1190 | return nlink; | ||
| 1191 | } | ||
| 1192 | |||
| 1193 | static int count_inode_refs(struct btrfs_root *root, | ||
| 1194 | struct inode *inode, struct btrfs_path *path) | ||
| 983 | { | 1195 | { |
| 984 | struct btrfs_path *path; | ||
| 985 | int ret; | 1196 | int ret; |
| 986 | struct btrfs_key key; | 1197 | struct btrfs_key key; |
| 987 | u64 nlink = 0; | 1198 | unsigned int nlink = 0; |
| 988 | unsigned long ptr; | 1199 | unsigned long ptr; |
| 989 | unsigned long ptr_end; | 1200 | unsigned long ptr_end; |
| 990 | int name_len; | 1201 | int name_len; |
| @@ -994,10 +1205,6 @@ static noinline int fixup_inode_link_count(struct btrfs_trans_handle *trans, | |||
| 994 | key.type = BTRFS_INODE_REF_KEY; | 1205 | key.type = BTRFS_INODE_REF_KEY; |
| 995 | key.offset = (u64)-1; | 1206 | key.offset = (u64)-1; |
| 996 | 1207 | ||
| 997 | path = btrfs_alloc_path(); | ||
| 998 | if (!path) | ||
| 999 | return -ENOMEM; | ||
| 1000 | |||
| 1001 | while (1) { | 1208 | while (1) { |
| 1002 | ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); | 1209 | ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); |
| 1003 | if (ret < 0) | 1210 | if (ret < 0) |
| @@ -1031,6 +1238,50 @@ static noinline int fixup_inode_link_count(struct btrfs_trans_handle *trans, | |||
| 1031 | btrfs_release_path(path); | 1238 | btrfs_release_path(path); |
| 1032 | } | 1239 | } |
| 1033 | btrfs_release_path(path); | 1240 | btrfs_release_path(path); |
| 1241 | |||
| 1242 | return nlink; | ||
| 1243 | } | ||
| 1244 | |||
| 1245 | /* | ||
| 1246 | * There are a few corners where the link count of the file can't | ||
| 1247 | * be properly maintained during replay. So, instead of adding | ||
| 1248 | * lots of complexity to the log code, we just scan the backrefs | ||
| 1249 | * for any file that has been through replay. | ||
| 1250 | * | ||
| 1251 | * The scan will update the link count on the inode to reflect the | ||
| 1252 | * number of back refs found. If it goes down to zero, the iput | ||
| 1253 | * will free the inode. | ||
| 1254 | */ | ||
| 1255 | static noinline int fixup_inode_link_count(struct btrfs_trans_handle *trans, | ||
| 1256 | struct btrfs_root *root, | ||
| 1257 | struct inode *inode) | ||
| 1258 | { | ||
| 1259 | struct btrfs_path *path; | ||
| 1260 | int ret; | ||
| 1261 | u64 nlink = 0; | ||
| 1262 | u64 ino = btrfs_ino(inode); | ||
| 1263 | |||
| 1264 | path = btrfs_alloc_path(); | ||
| 1265 | if (!path) | ||
| 1266 | return -ENOMEM; | ||
| 1267 | |||
| 1268 | ret = count_inode_refs(root, inode, path); | ||
| 1269 | if (ret < 0) | ||
| 1270 | goto out; | ||
| 1271 | |||
| 1272 | nlink = ret; | ||
| 1273 | |||
| 1274 | ret = count_inode_extrefs(root, inode, path); | ||
| 1275 | if (ret == -ENOENT) | ||
| 1276 | ret = 0; | ||
| 1277 | |||
| 1278 | if (ret < 0) | ||
| 1279 | goto out; | ||
| 1280 | |||
| 1281 | nlink += ret; | ||
| 1282 | |||
| 1283 | ret = 0; | ||
| 1284 | |||
| 1034 | if (nlink != inode->i_nlink) { | 1285 | if (nlink != inode->i_nlink) { |
| 1035 | set_nlink(inode, nlink); | 1286 | set_nlink(inode, nlink); |
| 1036 | btrfs_update_inode(trans, root, inode); | 1287 | btrfs_update_inode(trans, root, inode); |
| @@ -1046,9 +1297,10 @@ static noinline int fixup_inode_link_count(struct btrfs_trans_handle *trans, | |||
| 1046 | ret = insert_orphan_item(trans, root, ino); | 1297 | ret = insert_orphan_item(trans, root, ino); |
| 1047 | BUG_ON(ret); | 1298 | BUG_ON(ret); |
| 1048 | } | 1299 | } |
| 1049 | btrfs_free_path(path); | ||
| 1050 | 1300 | ||
| 1051 | return 0; | 1301 | out: |
| 1302 | btrfs_free_path(path); | ||
| 1303 | return ret; | ||
| 1052 | } | 1304 | } |
| 1053 | 1305 | ||
| 1054 | static noinline int fixup_inode_link_counts(struct btrfs_trans_handle *trans, | 1306 | static noinline int fixup_inode_link_counts(struct btrfs_trans_handle *trans, |
| @@ -1695,6 +1947,10 @@ static int replay_one_buffer(struct btrfs_root *log, struct extent_buffer *eb, | |||
| 1695 | ret = add_inode_ref(wc->trans, root, log, path, | 1947 | ret = add_inode_ref(wc->trans, root, log, path, |
| 1696 | eb, i, &key); | 1948 | eb, i, &key); |
| 1697 | BUG_ON(ret && ret != -ENOENT); | 1949 | BUG_ON(ret && ret != -ENOENT); |
| 1950 | } else if (key.type == BTRFS_INODE_EXTREF_KEY) { | ||
| 1951 | ret = add_inode_ref(wc->trans, root, log, path, | ||
| 1952 | eb, i, &key); | ||
| 1953 | BUG_ON(ret && ret != -ENOENT); | ||
| 1698 | } else if (key.type == BTRFS_EXTENT_DATA_KEY) { | 1954 | } else if (key.type == BTRFS_EXTENT_DATA_KEY) { |
| 1699 | ret = replay_one_extent(wc->trans, root, path, | 1955 | ret = replay_one_extent(wc->trans, root, path, |
| 1700 | eb, i, &key); | 1956 | eb, i, &key); |
| @@ -2037,7 +2293,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, | |||
| 2037 | if (atomic_read(&root->log_commit[(index1 + 1) % 2])) | 2293 | if (atomic_read(&root->log_commit[(index1 + 1) % 2])) |
| 2038 | wait_log_commit(trans, root, root->log_transid - 1); | 2294 | wait_log_commit(trans, root, root->log_transid - 1); |
| 2039 | while (1) { | 2295 | while (1) { |
| 2040 | unsigned long batch = root->log_batch; | 2296 | int batch = atomic_read(&root->log_batch); |
| 2041 | /* when we're on an ssd, just kick the log commit out */ | 2297 | /* when we're on an ssd, just kick the log commit out */ |
| 2042 | if (!btrfs_test_opt(root, SSD) && root->log_multiple_pids) { | 2298 | if (!btrfs_test_opt(root, SSD) && root->log_multiple_pids) { |
| 2043 | mutex_unlock(&root->log_mutex); | 2299 | mutex_unlock(&root->log_mutex); |
| @@ -2045,7 +2301,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, | |||
| 2045 | mutex_lock(&root->log_mutex); | 2301 | mutex_lock(&root->log_mutex); |
| 2046 | } | 2302 | } |
| 2047 | wait_for_writer(trans, root); | 2303 | wait_for_writer(trans, root); |
| 2048 | if (batch == root->log_batch) | 2304 | if (batch == atomic_read(&root->log_batch)) |
| 2049 | break; | 2305 | break; |
| 2050 | } | 2306 | } |
| 2051 | 2307 | ||
| @@ -2074,7 +2330,6 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, | |||
| 2074 | 2330 | ||
| 2075 | btrfs_set_root_node(&log->root_item, log->node); | 2331 | btrfs_set_root_node(&log->root_item, log->node); |
| 2076 | 2332 | ||
| 2077 | root->log_batch = 0; | ||
| 2078 | root->log_transid++; | 2333 | root->log_transid++; |
| 2079 | log->log_transid = root->log_transid; | 2334 | log->log_transid = root->log_transid; |
| 2080 | root->log_start_pid = 0; | 2335 | root->log_start_pid = 0; |
| @@ -2087,7 +2342,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, | |||
| 2087 | mutex_unlock(&root->log_mutex); | 2342 | mutex_unlock(&root->log_mutex); |
| 2088 | 2343 | ||
| 2089 | mutex_lock(&log_root_tree->log_mutex); | 2344 | mutex_lock(&log_root_tree->log_mutex); |
| 2090 | log_root_tree->log_batch++; | 2345 | atomic_inc(&log_root_tree->log_batch); |
| 2091 | atomic_inc(&log_root_tree->log_writers); | 2346 | atomic_inc(&log_root_tree->log_writers); |
| 2092 | mutex_unlock(&log_root_tree->log_mutex); | 2347 | mutex_unlock(&log_root_tree->log_mutex); |
| 2093 | 2348 | ||
| @@ -2157,7 +2412,6 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, | |||
| 2157 | btrfs_set_super_log_root_level(root->fs_info->super_for_commit, | 2412 | btrfs_set_super_log_root_level(root->fs_info->super_for_commit, |
| 2158 | btrfs_header_level(log_root_tree->node)); | 2413 | btrfs_header_level(log_root_tree->node)); |
| 2159 | 2414 | ||
| 2160 | log_root_tree->log_batch = 0; | ||
| 2161 | log_root_tree->log_transid++; | 2415 | log_root_tree->log_transid++; |
| 2162 | smp_mb(); | 2416 | smp_mb(); |
| 2163 | 2417 | ||
| @@ -2171,9 +2425,12 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, | |||
| 2171 | * in and cause problems either. | 2425 | * in and cause problems either. |
| 2172 | */ | 2426 | */ |
| 2173 | btrfs_scrub_pause_super(root); | 2427 | btrfs_scrub_pause_super(root); |
| 2174 | write_ctree_super(trans, root->fs_info->tree_root, 1); | 2428 | ret = write_ctree_super(trans, root->fs_info->tree_root, 1); |
| 2175 | btrfs_scrub_continue_super(root); | 2429 | btrfs_scrub_continue_super(root); |
| 2176 | ret = 0; | 2430 | if (ret) { |
| 2431 | btrfs_abort_transaction(trans, root, ret); | ||
| 2432 | goto out_wake_log_root; | ||
| 2433 | } | ||
| 2177 | 2434 | ||
| 2178 | mutex_lock(&root->log_mutex); | 2435 | mutex_lock(&root->log_mutex); |
| 2179 | if (root->last_log_commit < log_transid) | 2436 | if (root->last_log_commit < log_transid) |
| @@ -2209,7 +2466,8 @@ static void free_log_tree(struct btrfs_trans_handle *trans, | |||
| 2209 | 2466 | ||
| 2210 | while (1) { | 2467 | while (1) { |
| 2211 | ret = find_first_extent_bit(&log->dirty_log_pages, | 2468 | ret = find_first_extent_bit(&log->dirty_log_pages, |
| 2212 | 0, &start, &end, EXTENT_DIRTY | EXTENT_NEW); | 2469 | 0, &start, &end, EXTENT_DIRTY | EXTENT_NEW, |
| 2470 | NULL); | ||
| 2213 | if (ret) | 2471 | if (ret) |
| 2214 | break; | 2472 | break; |
| 2215 | 2473 | ||
| @@ -2646,6 +2904,7 @@ static int drop_objectid_items(struct btrfs_trans_handle *trans, | |||
| 2646 | int ret; | 2904 | int ret; |
| 2647 | struct btrfs_key key; | 2905 | struct btrfs_key key; |
| 2648 | struct btrfs_key found_key; | 2906 | struct btrfs_key found_key; |
| 2907 | int start_slot; | ||
| 2649 | 2908 | ||
| 2650 | key.objectid = objectid; | 2909 | key.objectid = objectid; |
| 2651 | key.type = max_key_type; | 2910 | key.type = max_key_type; |
| @@ -2667,8 +2926,18 @@ static int drop_objectid_items(struct btrfs_trans_handle *trans, | |||
| 2667 | if (found_key.objectid != objectid) | 2926 | if (found_key.objectid != objectid) |
| 2668 | break; | 2927 | break; |
| 2669 | 2928 | ||
| 2670 | ret = btrfs_del_item(trans, log, path); | 2929 | found_key.offset = 0; |
| 2671 | if (ret) | 2930 | found_key.type = 0; |
| 2931 | ret = btrfs_bin_search(path->nodes[0], &found_key, 0, | ||
| 2932 | &start_slot); | ||
| 2933 | |||
| 2934 | ret = btrfs_del_items(trans, log, path, start_slot, | ||
| 2935 | path->slots[0] - start_slot + 1); | ||
| 2936 | /* | ||
| 2937 | * If start slot isn't 0 then we don't need to re-search, we've | ||
| 2938 | * found the last guy with the objectid in this tree. | ||
| 2939 | */ | ||
| 2940 | if (ret || start_slot != 0) | ||
| 2672 | break; | 2941 | break; |
| 2673 | btrfs_release_path(path); | 2942 | btrfs_release_path(path); |
| 2674 | } | 2943 | } |
| @@ -2678,14 +2947,64 @@ static int drop_objectid_items(struct btrfs_trans_handle *trans, | |||
| 2678 | return ret; | 2947 | return ret; |
| 2679 | } | 2948 | } |
| 2680 | 2949 | ||
| 2950 | static void fill_inode_item(struct btrfs_trans_handle *trans, | ||
| 2951 | struct extent_buffer *leaf, | ||
| 2952 | struct btrfs_inode_item *item, | ||
| 2953 | struct inode *inode, int log_inode_only) | ||
| 2954 | { | ||
| 2955 | btrfs_set_inode_uid(leaf, item, i_uid_read(inode)); | ||
| 2956 | btrfs_set_inode_gid(leaf, item, i_gid_read(inode)); | ||
| 2957 | btrfs_set_inode_mode(leaf, item, inode->i_mode); | ||
| 2958 | btrfs_set_inode_nlink(leaf, item, inode->i_nlink); | ||
| 2959 | |||
| 2960 | btrfs_set_timespec_sec(leaf, btrfs_inode_atime(item), | ||
| 2961 | inode->i_atime.tv_sec); | ||
| 2962 | btrfs_set_timespec_nsec(leaf, btrfs_inode_atime(item), | ||
| 2963 | inode->i_atime.tv_nsec); | ||
| 2964 | |||
| 2965 | btrfs_set_timespec_sec(leaf, btrfs_inode_mtime(item), | ||
| 2966 | inode->i_mtime.tv_sec); | ||
| 2967 | btrfs_set_timespec_nsec(leaf, btrfs_inode_mtime(item), | ||
| 2968 | inode->i_mtime.tv_nsec); | ||
| 2969 | |||
| 2970 | btrfs_set_timespec_sec(leaf, btrfs_inode_ctime(item), | ||
| 2971 | inode->i_ctime.tv_sec); | ||
| 2972 | btrfs_set_timespec_nsec(leaf, btrfs_inode_ctime(item), | ||
| 2973 | inode->i_ctime.tv_nsec); | ||
| 2974 | |||
| 2975 | btrfs_set_inode_nbytes(leaf, item, inode_get_bytes(inode)); | ||
| 2976 | |||
| 2977 | btrfs_set_inode_sequence(leaf, item, inode->i_version); | ||
| 2978 | btrfs_set_inode_transid(leaf, item, trans->transid); | ||
| 2979 | btrfs_set_inode_rdev(leaf, item, inode->i_rdev); | ||
| 2980 | btrfs_set_inode_flags(leaf, item, BTRFS_I(inode)->flags); | ||
| 2981 | btrfs_set_inode_block_group(leaf, item, 0); | ||
| 2982 | |||
| 2983 | if (log_inode_only) { | ||
| 2984 | /* set the generation to zero so the recover code | ||
| 2985 | * can tell the difference between an logging | ||
| 2986 | * just to say 'this inode exists' and a logging | ||
| 2987 | * to say 'update this inode with these values' | ||
| 2988 | */ | ||
| 2989 | btrfs_set_inode_generation(leaf, item, 0); | ||
| 2990 | btrfs_set_inode_size(leaf, item, 0); | ||
| 2991 | } else { | ||
| 2992 | btrfs_set_inode_generation(leaf, item, | ||
| 2993 | BTRFS_I(inode)->generation); | ||
| 2994 | btrfs_set_inode_size(leaf, item, inode->i_size); | ||
| 2995 | } | ||
| 2996 | |||
| 2997 | } | ||
| 2998 | |||
| 2681 | static noinline int copy_items(struct btrfs_trans_handle *trans, | 2999 | static noinline int copy_items(struct btrfs_trans_handle *trans, |
| 2682 | struct btrfs_root *log, | 3000 | struct inode *inode, |
| 2683 | struct btrfs_path *dst_path, | 3001 | struct btrfs_path *dst_path, |
| 2684 | struct extent_buffer *src, | 3002 | struct extent_buffer *src, |
| 2685 | int start_slot, int nr, int inode_only) | 3003 | int start_slot, int nr, int inode_only) |
| 2686 | { | 3004 | { |
| 2687 | unsigned long src_offset; | 3005 | unsigned long src_offset; |
| 2688 | unsigned long dst_offset; | 3006 | unsigned long dst_offset; |
| 3007 | struct btrfs_root *log = BTRFS_I(inode)->root->log_root; | ||
| 2689 | struct btrfs_file_extent_item *extent; | 3008 | struct btrfs_file_extent_item *extent; |
| 2690 | struct btrfs_inode_item *inode_item; | 3009 | struct btrfs_inode_item *inode_item; |
| 2691 | int ret; | 3010 | int ret; |
| @@ -2694,6 +3013,7 @@ static noinline int copy_items(struct btrfs_trans_handle *trans, | |||
| 2694 | char *ins_data; | 3013 | char *ins_data; |
| 2695 | int i; | 3014 | int i; |
| 2696 | struct list_head ordered_sums; | 3015 | struct list_head ordered_sums; |
| 3016 | int skip_csum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM; | ||
| 2697 | 3017 | ||
| 2698 | INIT_LIST_HEAD(&ordered_sums); | 3018 | INIT_LIST_HEAD(&ordered_sums); |
| 2699 | 3019 | ||
| @@ -2722,29 +3042,23 @@ static noinline int copy_items(struct btrfs_trans_handle *trans, | |||
| 2722 | 3042 | ||
| 2723 | src_offset = btrfs_item_ptr_offset(src, start_slot + i); | 3043 | src_offset = btrfs_item_ptr_offset(src, start_slot + i); |
| 2724 | 3044 | ||
| 2725 | copy_extent_buffer(dst_path->nodes[0], src, dst_offset, | 3045 | if (ins_keys[i].type == BTRFS_INODE_ITEM_KEY) { |
| 2726 | src_offset, ins_sizes[i]); | ||
| 2727 | |||
| 2728 | if (inode_only == LOG_INODE_EXISTS && | ||
| 2729 | ins_keys[i].type == BTRFS_INODE_ITEM_KEY) { | ||
| 2730 | inode_item = btrfs_item_ptr(dst_path->nodes[0], | 3046 | inode_item = btrfs_item_ptr(dst_path->nodes[0], |
| 2731 | dst_path->slots[0], | 3047 | dst_path->slots[0], |
| 2732 | struct btrfs_inode_item); | 3048 | struct btrfs_inode_item); |
| 2733 | btrfs_set_inode_size(dst_path->nodes[0], inode_item, 0); | 3049 | fill_inode_item(trans, dst_path->nodes[0], inode_item, |
| 2734 | 3050 | inode, inode_only == LOG_INODE_EXISTS); | |
| 2735 | /* set the generation to zero so the recover code | 3051 | } else { |
| 2736 | * can tell the difference between an logging | 3052 | copy_extent_buffer(dst_path->nodes[0], src, dst_offset, |
| 2737 | * just to say 'this inode exists' and a logging | 3053 | src_offset, ins_sizes[i]); |
| 2738 | * to say 'update this inode with these values' | ||
| 2739 | */ | ||
| 2740 | btrfs_set_inode_generation(dst_path->nodes[0], | ||
| 2741 | inode_item, 0); | ||
| 2742 | } | 3054 | } |
| 3055 | |||
| 2743 | /* take a reference on file data extents so that truncates | 3056 | /* take a reference on file data extents so that truncates |
| 2744 | * or deletes of this inode don't have to relog the inode | 3057 | * or deletes of this inode don't have to relog the inode |
| 2745 | * again | 3058 | * again |
| 2746 | */ | 3059 | */ |
| 2747 | if (btrfs_key_type(ins_keys + i) == BTRFS_EXTENT_DATA_KEY) { | 3060 | if (btrfs_key_type(ins_keys + i) == BTRFS_EXTENT_DATA_KEY && |
| 3061 | !skip_csum) { | ||
| 2748 | int found_type; | 3062 | int found_type; |
| 2749 | extent = btrfs_item_ptr(src, start_slot + i, | 3063 | extent = btrfs_item_ptr(src, start_slot + i, |
| 2750 | struct btrfs_file_extent_item); | 3064 | struct btrfs_file_extent_item); |
| @@ -2753,8 +3067,7 @@ static noinline int copy_items(struct btrfs_trans_handle *trans, | |||
| 2753 | continue; | 3067 | continue; |
| 2754 | 3068 | ||
| 2755 | found_type = btrfs_file_extent_type(src, extent); | 3069 | found_type = btrfs_file_extent_type(src, extent); |
| 2756 | if (found_type == BTRFS_FILE_EXTENT_REG || | 3070 | if (found_type == BTRFS_FILE_EXTENT_REG) { |
| 2757 | found_type == BTRFS_FILE_EXTENT_PREALLOC) { | ||
| 2758 | u64 ds, dl, cs, cl; | 3071 | u64 ds, dl, cs, cl; |
| 2759 | ds = btrfs_file_extent_disk_bytenr(src, | 3072 | ds = btrfs_file_extent_disk_bytenr(src, |
| 2760 | extent); | 3073 | extent); |
| @@ -2803,6 +3116,239 @@ static noinline int copy_items(struct btrfs_trans_handle *trans, | |||
| 2803 | return ret; | 3116 | return ret; |
| 2804 | } | 3117 | } |
| 2805 | 3118 | ||
| 3119 | static int extent_cmp(void *priv, struct list_head *a, struct list_head *b) | ||
| 3120 | { | ||
| 3121 | struct extent_map *em1, *em2; | ||
| 3122 | |||
| 3123 | em1 = list_entry(a, struct extent_map, list); | ||
| 3124 | em2 = list_entry(b, struct extent_map, list); | ||
| 3125 | |||
| 3126 | if (em1->start < em2->start) | ||
| 3127 | return -1; | ||
| 3128 | else if (em1->start > em2->start) | ||
| 3129 | return 1; | ||
| 3130 | return 0; | ||
| 3131 | } | ||
| 3132 | |||
| 3133 | struct log_args { | ||
| 3134 | struct extent_buffer *src; | ||
| 3135 | u64 next_offset; | ||
| 3136 | int start_slot; | ||
| 3137 | int nr; | ||
| 3138 | }; | ||
| 3139 | |||
| 3140 | static int log_one_extent(struct btrfs_trans_handle *trans, | ||
| 3141 | struct inode *inode, struct btrfs_root *root, | ||
| 3142 | struct extent_map *em, struct btrfs_path *path, | ||
| 3143 | struct btrfs_path *dst_path, struct log_args *args) | ||
| 3144 | { | ||
| 3145 | struct btrfs_root *log = root->log_root; | ||
| 3146 | struct btrfs_file_extent_item *fi; | ||
| 3147 | struct btrfs_key key; | ||
| 3148 | u64 start = em->mod_start; | ||
| 3149 | u64 search_start = start; | ||
| 3150 | u64 len = em->mod_len; | ||
| 3151 | u64 num_bytes; | ||
| 3152 | int nritems; | ||
| 3153 | int ret; | ||
| 3154 | |||
| 3155 | if (BTRFS_I(inode)->logged_trans == trans->transid) { | ||
| 3156 | ret = __btrfs_drop_extents(trans, log, inode, dst_path, start, | ||
| 3157 | start + len, NULL, 0); | ||
| 3158 | if (ret) | ||
| 3159 | return ret; | ||
| 3160 | } | ||
| 3161 | |||
| 3162 | while (len) { | ||
| 3163 | if (args->nr) | ||
| 3164 | goto next_slot; | ||
| 3165 | again: | ||
| 3166 | key.objectid = btrfs_ino(inode); | ||
| 3167 | key.type = BTRFS_EXTENT_DATA_KEY; | ||
| 3168 | key.offset = search_start; | ||
| 3169 | |||
| 3170 | ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); | ||
| 3171 | if (ret < 0) | ||
| 3172 | return ret; | ||
| 3173 | |||
| 3174 | if (ret) { | ||
| 3175 | /* | ||
| 3176 | * A rare case were we can have an em for a section of a | ||
| 3177 | * larger extent so we need to make sure that this em | ||
| 3178 | * falls within the extent we've found. If not we just | ||
| 3179 | * bail and go back to ye-olde way of doing things but | ||
| 3180 | * it happens often enough in testing that we need to do | ||
| 3181 | * this dance to make sure. | ||
| 3182 | */ | ||
| 3183 | do { | ||
| 3184 | if (path->slots[0] == 0) { | ||
| 3185 | btrfs_release_path(path); | ||
| 3186 | if (search_start == 0) | ||
| 3187 | return -ENOENT; | ||
| 3188 | search_start--; | ||
| 3189 | goto again; | ||
| 3190 | } | ||
| 3191 | |||
| 3192 | path->slots[0]--; | ||
| 3193 | btrfs_item_key_to_cpu(path->nodes[0], &key, | ||
| 3194 | path->slots[0]); | ||
| 3195 | if (key.objectid != btrfs_ino(inode) || | ||
| 3196 | key.type != BTRFS_EXTENT_DATA_KEY) { | ||
| 3197 | btrfs_release_path(path); | ||
| 3198 | return -ENOENT; | ||
| 3199 | } | ||
| 3200 | } while (key.offset > start); | ||
| 3201 | |||
| 3202 | fi = btrfs_item_ptr(path->nodes[0], path->slots[0], | ||
| 3203 | struct btrfs_file_extent_item); | ||
| 3204 | num_bytes = btrfs_file_extent_num_bytes(path->nodes[0], | ||
| 3205 | fi); | ||
| 3206 | if (key.offset + num_bytes <= start) { | ||
| 3207 | btrfs_release_path(path); | ||
| 3208 | return -ENOENT; | ||
| 3209 | } | ||
| 3210 | } | ||
| 3211 | args->src = path->nodes[0]; | ||
| 3212 | next_slot: | ||
| 3213 | btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]); | ||
| 3214 | fi = btrfs_item_ptr(args->src, path->slots[0], | ||
| 3215 | struct btrfs_file_extent_item); | ||
| 3216 | if (args->nr && | ||
| 3217 | args->start_slot + args->nr == path->slots[0]) { | ||
| 3218 | args->nr++; | ||
| 3219 | } else if (args->nr) { | ||
| 3220 | ret = copy_items(trans, inode, dst_path, args->src, | ||
| 3221 | args->start_slot, args->nr, | ||
| 3222 | LOG_INODE_ALL); | ||
| 3223 | if (ret) | ||
| 3224 | return ret; | ||
| 3225 | args->nr = 1; | ||
| 3226 | args->start_slot = path->slots[0]; | ||
| 3227 | } else if (!args->nr) { | ||
| 3228 | args->nr = 1; | ||
| 3229 | args->start_slot = path->slots[0]; | ||
| 3230 | } | ||
| 3231 | nritems = btrfs_header_nritems(path->nodes[0]); | ||
| 3232 | path->slots[0]++; | ||
| 3233 | num_bytes = btrfs_file_extent_num_bytes(args->src, fi); | ||
| 3234 | if (len < num_bytes) { | ||
| 3235 | /* I _think_ this is ok, envision we write to a | ||
| 3236 | * preallocated space that is adjacent to a previously | ||
| 3237 | * written preallocated space that gets merged when we | ||
| 3238 | * mark this preallocated space written. If we do not | ||
| 3239 | * have the adjacent extent in cache then when we copy | ||
| 3240 | * this extent it could end up being larger than our EM | ||
| 3241 | * thinks it is, which is a-ok, so just set len to 0. | ||
| 3242 | */ | ||
| 3243 | len = 0; | ||
| 3244 | } else { | ||
| 3245 | len -= num_bytes; | ||
| 3246 | } | ||
| 3247 | start = key.offset + num_bytes; | ||
| 3248 | args->next_offset = start; | ||
| 3249 | search_start = start; | ||
| 3250 | |||
| 3251 | if (path->slots[0] < nritems) { | ||
| 3252 | if (len) | ||
| 3253 | goto next_slot; | ||
| 3254 | break; | ||
| 3255 | } | ||
| 3256 | |||
| 3257 | if (args->nr) { | ||
| 3258 | ret = copy_items(trans, inode, dst_path, args->src, | ||
| 3259 | args->start_slot, args->nr, | ||
| 3260 | LOG_INODE_ALL); | ||
| 3261 | if (ret) | ||
| 3262 | return ret; | ||
| 3263 | args->nr = 0; | ||
| 3264 | btrfs_release_path(path); | ||
| 3265 | } | ||
| 3266 | } | ||
| 3267 | |||
| 3268 | return 0; | ||
| 3269 | } | ||
| 3270 | |||
| 3271 | static int btrfs_log_changed_extents(struct btrfs_trans_handle *trans, | ||
| 3272 | struct btrfs_root *root, | ||
| 3273 | struct inode *inode, | ||
| 3274 | struct btrfs_path *path, | ||
| 3275 | struct btrfs_path *dst_path) | ||
| 3276 | { | ||
| 3277 | struct log_args args; | ||
| 3278 | struct extent_map *em, *n; | ||
| 3279 | struct list_head extents; | ||
| 3280 | struct extent_map_tree *tree = &BTRFS_I(inode)->extent_tree; | ||
| 3281 | u64 test_gen; | ||
| 3282 | int ret = 0; | ||
| 3283 | |||
| 3284 | INIT_LIST_HEAD(&extents); | ||
| 3285 | |||
| 3286 | memset(&args, 0, sizeof(args)); | ||
| 3287 | |||
| 3288 | write_lock(&tree->lock); | ||
| 3289 | test_gen = root->fs_info->last_trans_committed; | ||
| 3290 | |||
| 3291 | list_for_each_entry_safe(em, n, &tree->modified_extents, list) { | ||
| 3292 | list_del_init(&em->list); | ||
| 3293 | if (em->generation <= test_gen) | ||
| 3294 | continue; | ||
| 3295 | /* Need a ref to keep it from getting evicted from cache */ | ||
| 3296 | atomic_inc(&em->refs); | ||
| 3297 | set_bit(EXTENT_FLAG_LOGGING, &em->flags); | ||
| 3298 | list_add_tail(&em->list, &extents); | ||
| 3299 | } | ||
| 3300 | |||
| 3301 | list_sort(NULL, &extents, extent_cmp); | ||
| 3302 | |||
| 3303 | while (!list_empty(&extents)) { | ||
| 3304 | em = list_entry(extents.next, struct extent_map, list); | ||
| 3305 | |||
| 3306 | list_del_init(&em->list); | ||
| 3307 | clear_bit(EXTENT_FLAG_LOGGING, &em->flags); | ||
| 3308 | |||
| 3309 | /* | ||
| 3310 | * If we had an error we just need to delete everybody from our | ||
| 3311 | * private list. | ||
| 3312 | */ | ||
| 3313 | if (ret) { | ||
| 3314 | free_extent_map(em); | ||
| 3315 | continue; | ||
| 3316 | } | ||
| 3317 | |||
| 3318 | write_unlock(&tree->lock); | ||
| 3319 | |||
| 3320 | /* | ||
| 3321 | * If the previous EM and the last extent we left off on aren't | ||
| 3322 | * sequential then we need to copy the items we have and redo | ||
| 3323 | * our search | ||
| 3324 | */ | ||
| 3325 | if (args.nr && em->mod_start != args.next_offset) { | ||
| 3326 | ret = copy_items(trans, inode, dst_path, args.src, | ||
| 3327 | args.start_slot, args.nr, | ||
| 3328 | LOG_INODE_ALL); | ||
| 3329 | if (ret) { | ||
| 3330 | free_extent_map(em); | ||
| 3331 | write_lock(&tree->lock); | ||
| 3332 | continue; | ||
| 3333 | } | ||
| 3334 | btrfs_release_path(path); | ||
| 3335 | args.nr = 0; | ||
| 3336 | } | ||
| 3337 | |||
| 3338 | ret = log_one_extent(trans, inode, root, em, path, dst_path, &args); | ||
| 3339 | free_extent_map(em); | ||
| 3340 | write_lock(&tree->lock); | ||
| 3341 | } | ||
| 3342 | WARN_ON(!list_empty(&extents)); | ||
| 3343 | write_unlock(&tree->lock); | ||
| 3344 | |||
| 3345 | if (!ret && args.nr) | ||
| 3346 | ret = copy_items(trans, inode, dst_path, args.src, | ||
| 3347 | args.start_slot, args.nr, LOG_INODE_ALL); | ||
| 3348 | btrfs_release_path(path); | ||
| 3349 | return ret; | ||
| 3350 | } | ||
| 3351 | |||
| 2806 | /* log a single inode in the tree log. | 3352 | /* log a single inode in the tree log. |
| 2807 | * At least one parent directory for this inode must exist in the tree | 3353 | * At least one parent directory for this inode must exist in the tree |
| 2808 | * or be logged already. | 3354 | * or be logged already. |
| @@ -2832,6 +3378,7 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans, | |||
| 2832 | int nritems; | 3378 | int nritems; |
| 2833 | int ins_start_slot = 0; | 3379 | int ins_start_slot = 0; |
| 2834 | int ins_nr; | 3380 | int ins_nr; |
| 3381 | bool fast_search = false; | ||
| 2835 | u64 ino = btrfs_ino(inode); | 3382 | u64 ino = btrfs_ino(inode); |
| 2836 | 3383 | ||
| 2837 | log = root->log_root; | 3384 | log = root->log_root; |
| @@ -2851,21 +3398,23 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans, | |||
| 2851 | 3398 | ||
| 2852 | max_key.objectid = ino; | 3399 | max_key.objectid = ino; |
| 2853 | 3400 | ||
| 2854 | /* today the code can only do partial logging of directories */ | ||
| 2855 | if (!S_ISDIR(inode->i_mode)) | ||
| 2856 | inode_only = LOG_INODE_ALL; | ||
| 2857 | 3401 | ||
| 3402 | /* today the code can only do partial logging of directories */ | ||
| 2858 | if (inode_only == LOG_INODE_EXISTS || S_ISDIR(inode->i_mode)) | 3403 | if (inode_only == LOG_INODE_EXISTS || S_ISDIR(inode->i_mode)) |
| 2859 | max_key.type = BTRFS_XATTR_ITEM_KEY; | 3404 | max_key.type = BTRFS_XATTR_ITEM_KEY; |
| 2860 | else | 3405 | else |
| 2861 | max_key.type = (u8)-1; | 3406 | max_key.type = (u8)-1; |
| 2862 | max_key.offset = (u64)-1; | 3407 | max_key.offset = (u64)-1; |
| 2863 | 3408 | ||
| 2864 | ret = btrfs_commit_inode_delayed_items(trans, inode); | 3409 | /* Only run delayed items if we are a dir or a new file */ |
| 2865 | if (ret) { | 3410 | if (S_ISDIR(inode->i_mode) || |
| 2866 | btrfs_free_path(path); | 3411 | BTRFS_I(inode)->generation > root->fs_info->last_trans_committed) { |
| 2867 | btrfs_free_path(dst_path); | 3412 | ret = btrfs_commit_inode_delayed_items(trans, inode); |
| 2868 | return ret; | 3413 | if (ret) { |
| 3414 | btrfs_free_path(path); | ||
| 3415 | btrfs_free_path(dst_path); | ||
| 3416 | return ret; | ||
| 3417 | } | ||
| 2869 | } | 3418 | } |
| 2870 | 3419 | ||
| 2871 | mutex_lock(&BTRFS_I(inode)->log_mutex); | 3420 | mutex_lock(&BTRFS_I(inode)->log_mutex); |
| @@ -2881,7 +3430,16 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans, | |||
| 2881 | max_key_type = BTRFS_XATTR_ITEM_KEY; | 3430 | max_key_type = BTRFS_XATTR_ITEM_KEY; |
| 2882 | ret = drop_objectid_items(trans, log, path, ino, max_key_type); | 3431 | ret = drop_objectid_items(trans, log, path, ino, max_key_type); |
| 2883 | } else { | 3432 | } else { |
| 2884 | ret = btrfs_truncate_inode_items(trans, log, inode, 0, 0); | 3433 | if (test_and_clear_bit(BTRFS_INODE_NEEDS_FULL_SYNC, |
| 3434 | &BTRFS_I(inode)->runtime_flags)) { | ||
| 3435 | ret = btrfs_truncate_inode_items(trans, log, | ||
| 3436 | inode, 0, 0); | ||
| 3437 | } else { | ||
| 3438 | fast_search = true; | ||
| 3439 | max_key.type = BTRFS_XATTR_ITEM_KEY; | ||
| 3440 | ret = drop_objectid_items(trans, log, path, ino, | ||
| 3441 | BTRFS_XATTR_ITEM_KEY); | ||
| 3442 | } | ||
| 2885 | } | 3443 | } |
| 2886 | if (ret) { | 3444 | if (ret) { |
| 2887 | err = ret; | 3445 | err = ret; |
| @@ -2912,7 +3470,7 @@ again: | |||
| 2912 | goto next_slot; | 3470 | goto next_slot; |
| 2913 | } | 3471 | } |
| 2914 | 3472 | ||
| 2915 | ret = copy_items(trans, log, dst_path, src, ins_start_slot, | 3473 | ret = copy_items(trans, inode, dst_path, src, ins_start_slot, |
| 2916 | ins_nr, inode_only); | 3474 | ins_nr, inode_only); |
| 2917 | if (ret) { | 3475 | if (ret) { |
| 2918 | err = ret; | 3476 | err = ret; |
| @@ -2930,7 +3488,7 @@ next_slot: | |||
| 2930 | goto again; | 3488 | goto again; |
| 2931 | } | 3489 | } |
| 2932 | if (ins_nr) { | 3490 | if (ins_nr) { |
| 2933 | ret = copy_items(trans, log, dst_path, src, | 3491 | ret = copy_items(trans, inode, dst_path, src, |
| 2934 | ins_start_slot, | 3492 | ins_start_slot, |
| 2935 | ins_nr, inode_only); | 3493 | ins_nr, inode_only); |
| 2936 | if (ret) { | 3494 | if (ret) { |
| @@ -2951,8 +3509,7 @@ next_slot: | |||
| 2951 | break; | 3509 | break; |
| 2952 | } | 3510 | } |
| 2953 | if (ins_nr) { | 3511 | if (ins_nr) { |
| 2954 | ret = copy_items(trans, log, dst_path, src, | 3512 | ret = copy_items(trans, inode, dst_path, src, ins_start_slot, |
| 2955 | ins_start_slot, | ||
| 2956 | ins_nr, inode_only); | 3513 | ins_nr, inode_only); |
| 2957 | if (ret) { | 3514 | if (ret) { |
| 2958 | err = ret; | 3515 | err = ret; |
| @@ -2960,7 +3517,24 @@ next_slot: | |||
| 2960 | } | 3517 | } |
| 2961 | ins_nr = 0; | 3518 | ins_nr = 0; |
| 2962 | } | 3519 | } |
| 2963 | WARN_ON(ins_nr); | 3520 | |
| 3521 | if (fast_search) { | ||
| 3522 | btrfs_release_path(path); | ||
| 3523 | btrfs_release_path(dst_path); | ||
| 3524 | ret = btrfs_log_changed_extents(trans, root, inode, path, | ||
| 3525 | dst_path); | ||
| 3526 | if (ret) { | ||
| 3527 | err = ret; | ||
| 3528 | goto out_unlock; | ||
| 3529 | } | ||
| 3530 | } else { | ||
| 3531 | struct extent_map_tree *tree = &BTRFS_I(inode)->extent_tree; | ||
| 3532 | struct extent_map *em, *n; | ||
| 3533 | |||
| 3534 | list_for_each_entry_safe(em, n, &tree->modified_extents, list) | ||
| 3535 | list_del_init(&em->list); | ||
| 3536 | } | ||
| 3537 | |||
| 2964 | if (inode_only == LOG_INODE_ALL && S_ISDIR(inode->i_mode)) { | 3538 | if (inode_only == LOG_INODE_ALL && S_ISDIR(inode->i_mode)) { |
| 2965 | btrfs_release_path(path); | 3539 | btrfs_release_path(path); |
| 2966 | btrfs_release_path(dst_path); | 3540 | btrfs_release_path(dst_path); |
| @@ -2971,6 +3545,7 @@ next_slot: | |||
| 2971 | } | 3545 | } |
| 2972 | } | 3546 | } |
| 2973 | BTRFS_I(inode)->logged_trans = trans->transid; | 3547 | BTRFS_I(inode)->logged_trans = trans->transid; |
| 3548 | BTRFS_I(inode)->last_log_commit = BTRFS_I(inode)->last_sub_trans; | ||
| 2974 | out_unlock: | 3549 | out_unlock: |
| 2975 | mutex_unlock(&BTRFS_I(inode)->log_mutex); | 3550 | mutex_unlock(&BTRFS_I(inode)->log_mutex); |
| 2976 | 3551 | ||
| @@ -3138,7 +3713,7 @@ int btrfs_log_inode_parent(struct btrfs_trans_handle *trans, | |||
| 3138 | end_trans: | 3713 | end_trans: |
| 3139 | dput(old_parent); | 3714 | dput(old_parent); |
| 3140 | if (ret < 0) { | 3715 | if (ret < 0) { |
| 3141 | BUG_ON(ret != -ENOSPC); | 3716 | WARN_ON(ret != -ENOSPC); |
| 3142 | root->fs_info->last_trans_log_full_commit = trans->transid; | 3717 | root->fs_info->last_trans_log_full_commit = trans->transid; |
| 3143 | ret = 1; | 3718 | ret = 1; |
| 3144 | } | 3719 | } |
