diff options
Diffstat (limited to 'fs/btrfs/tree-log.c')
-rw-r--r-- | fs/btrfs/tree-log.c | 889 |
1 files changed, 732 insertions, 157 deletions
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index c86670f4f285..e9ebb472b28b 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c | |||
@@ -18,13 +18,16 @@ | |||
18 | 18 | ||
19 | #include <linux/sched.h> | 19 | #include <linux/sched.h> |
20 | #include <linux/slab.h> | 20 | #include <linux/slab.h> |
21 | #include <linux/list_sort.h> | ||
21 | #include "ctree.h" | 22 | #include "ctree.h" |
22 | #include "transaction.h" | 23 | #include "transaction.h" |
23 | #include "disk-io.h" | 24 | #include "disk-io.h" |
24 | #include "locking.h" | 25 | #include "locking.h" |
25 | #include "print-tree.h" | 26 | #include "print-tree.h" |
27 | #include "backref.h" | ||
26 | #include "compat.h" | 28 | #include "compat.h" |
27 | #include "tree-log.h" | 29 | #include "tree-log.h" |
30 | #include "hash.h" | ||
28 | 31 | ||
29 | /* magic values for the inode_only field in btrfs_log_inode: | 32 | /* magic values for the inode_only field in btrfs_log_inode: |
30 | * | 33 | * |
@@ -146,7 +149,7 @@ static int start_log_trans(struct btrfs_trans_handle *trans, | |||
146 | root->log_multiple_pids = true; | 149 | root->log_multiple_pids = true; |
147 | } | 150 | } |
148 | 151 | ||
149 | root->log_batch++; | 152 | atomic_inc(&root->log_batch); |
150 | atomic_inc(&root->log_writers); | 153 | atomic_inc(&root->log_writers); |
151 | mutex_unlock(&root->log_mutex); | 154 | mutex_unlock(&root->log_mutex); |
152 | return 0; | 155 | return 0; |
@@ -165,7 +168,7 @@ static int start_log_trans(struct btrfs_trans_handle *trans, | |||
165 | err = ret; | 168 | err = ret; |
166 | } | 169 | } |
167 | mutex_unlock(&root->fs_info->tree_log_mutex); | 170 | mutex_unlock(&root->fs_info->tree_log_mutex); |
168 | root->log_batch++; | 171 | atomic_inc(&root->log_batch); |
169 | atomic_inc(&root->log_writers); | 172 | atomic_inc(&root->log_writers); |
170 | mutex_unlock(&root->log_mutex); | 173 | mutex_unlock(&root->log_mutex); |
171 | return err; | 174 | return err; |
@@ -484,7 +487,6 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans, | |||
484 | int found_type; | 487 | int found_type; |
485 | u64 mask = root->sectorsize - 1; | 488 | u64 mask = root->sectorsize - 1; |
486 | u64 extent_end; | 489 | u64 extent_end; |
487 | u64 alloc_hint; | ||
488 | u64 start = key->offset; | 490 | u64 start = key->offset; |
489 | u64 saved_nbytes; | 491 | u64 saved_nbytes; |
490 | struct btrfs_file_extent_item *item; | 492 | struct btrfs_file_extent_item *item; |
@@ -550,8 +552,7 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans, | |||
550 | 552 | ||
551 | saved_nbytes = inode_get_bytes(inode); | 553 | saved_nbytes = inode_get_bytes(inode); |
552 | /* drop any overlapping extents */ | 554 | /* drop any overlapping extents */ |
553 | ret = btrfs_drop_extents(trans, inode, start, extent_end, | 555 | ret = btrfs_drop_extents(trans, root, inode, start, extent_end, 1); |
554 | &alloc_hint, 1); | ||
555 | BUG_ON(ret); | 556 | BUG_ON(ret); |
556 | 557 | ||
557 | if (found_type == BTRFS_FILE_EXTENT_REG || | 558 | if (found_type == BTRFS_FILE_EXTENT_REG || |
@@ -744,6 +745,7 @@ out: | |||
744 | */ | 745 | */ |
745 | static noinline int backref_in_log(struct btrfs_root *log, | 746 | static noinline int backref_in_log(struct btrfs_root *log, |
746 | struct btrfs_key *key, | 747 | struct btrfs_key *key, |
748 | u64 ref_objectid, | ||
747 | char *name, int namelen) | 749 | char *name, int namelen) |
748 | { | 750 | { |
749 | struct btrfs_path *path; | 751 | struct btrfs_path *path; |
@@ -764,8 +766,17 @@ static noinline int backref_in_log(struct btrfs_root *log, | |||
764 | if (ret != 0) | 766 | if (ret != 0) |
765 | goto out; | 767 | goto out; |
766 | 768 | ||
767 | item_size = btrfs_item_size_nr(path->nodes[0], path->slots[0]); | ||
768 | ptr = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]); | 769 | ptr = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]); |
770 | |||
771 | if (key->type == BTRFS_INODE_EXTREF_KEY) { | ||
772 | if (btrfs_find_name_in_ext_backref(path, ref_objectid, | ||
773 | name, namelen, NULL)) | ||
774 | match = 1; | ||
775 | |||
776 | goto out; | ||
777 | } | ||
778 | |||
779 | item_size = btrfs_item_size_nr(path->nodes[0], path->slots[0]); | ||
769 | ptr_end = ptr + item_size; | 780 | ptr_end = ptr + item_size; |
770 | while (ptr < ptr_end) { | 781 | while (ptr < ptr_end) { |
771 | ref = (struct btrfs_inode_ref *)ptr; | 782 | ref = (struct btrfs_inode_ref *)ptr; |
@@ -786,91 +797,42 @@ out: | |||
786 | return match; | 797 | return match; |
787 | } | 798 | } |
788 | 799 | ||
789 | 800 | static inline int __add_inode_ref(struct btrfs_trans_handle *trans, | |
790 | /* | ||
791 | * replay one inode back reference item found in the log tree. | ||
792 | * eb, slot and key refer to the buffer and key found in the log tree. | ||
793 | * root is the destination we are replaying into, and path is for temp | ||
794 | * use by this function. (it should be released on return). | ||
795 | */ | ||
796 | static noinline int add_inode_ref(struct btrfs_trans_handle *trans, | ||
797 | struct btrfs_root *root, | 801 | struct btrfs_root *root, |
798 | struct btrfs_root *log, | ||
799 | struct btrfs_path *path, | 802 | struct btrfs_path *path, |
800 | struct extent_buffer *eb, int slot, | 803 | struct btrfs_root *log_root, |
801 | struct btrfs_key *key) | 804 | struct inode *dir, struct inode *inode, |
805 | struct extent_buffer *eb, | ||
806 | u64 inode_objectid, u64 parent_objectid, | ||
807 | u64 ref_index, char *name, int namelen, | ||
808 | int *search_done) | ||
802 | { | 809 | { |
803 | struct btrfs_inode_ref *ref; | ||
804 | struct btrfs_dir_item *di; | ||
805 | struct inode *dir; | ||
806 | struct inode *inode; | ||
807 | unsigned long ref_ptr; | ||
808 | unsigned long ref_end; | ||
809 | char *name; | ||
810 | int namelen; | ||
811 | int ret; | 810 | int ret; |
812 | int search_done = 0; | 811 | char *victim_name; |
813 | 812 | int victim_name_len; | |
814 | /* | 813 | struct extent_buffer *leaf; |
815 | * it is possible that we didn't log all the parent directories | 814 | struct btrfs_dir_item *di; |
816 | * for a given inode. If we don't find the dir, just don't | 815 | struct btrfs_key search_key; |
817 | * copy the back ref in. The link count fixup code will take | 816 | struct btrfs_inode_extref *extref; |
818 | * care of the rest | ||
819 | */ | ||
820 | dir = read_one_inode(root, key->offset); | ||
821 | if (!dir) | ||
822 | return -ENOENT; | ||
823 | |||
824 | inode = read_one_inode(root, key->objectid); | ||
825 | if (!inode) { | ||
826 | iput(dir); | ||
827 | return -EIO; | ||
828 | } | ||
829 | |||
830 | ref_ptr = btrfs_item_ptr_offset(eb, slot); | ||
831 | ref_end = ref_ptr + btrfs_item_size_nr(eb, slot); | ||
832 | 817 | ||
833 | again: | 818 | again: |
834 | ref = (struct btrfs_inode_ref *)ref_ptr; | 819 | /* Search old style refs */ |
835 | 820 | search_key.objectid = inode_objectid; | |
836 | namelen = btrfs_inode_ref_name_len(eb, ref); | 821 | search_key.type = BTRFS_INODE_REF_KEY; |
837 | name = kmalloc(namelen, GFP_NOFS); | 822 | search_key.offset = parent_objectid; |
838 | BUG_ON(!name); | 823 | ret = btrfs_search_slot(NULL, root, &search_key, path, 0, 0); |
839 | |||
840 | read_extent_buffer(eb, name, (unsigned long)(ref + 1), namelen); | ||
841 | |||
842 | /* if we already have a perfect match, we're done */ | ||
843 | if (inode_in_dir(root, path, btrfs_ino(dir), btrfs_ino(inode), | ||
844 | btrfs_inode_ref_index(eb, ref), | ||
845 | name, namelen)) { | ||
846 | goto out; | ||
847 | } | ||
848 | |||
849 | /* | ||
850 | * look for a conflicting back reference in the metadata. | ||
851 | * if we find one we have to unlink that name of the file | ||
852 | * before we add our new link. Later on, we overwrite any | ||
853 | * existing back reference, and we don't want to create | ||
854 | * dangling pointers in the directory. | ||
855 | */ | ||
856 | |||
857 | if (search_done) | ||
858 | goto insert; | ||
859 | |||
860 | ret = btrfs_search_slot(NULL, root, key, path, 0, 0); | ||
861 | if (ret == 0) { | 824 | if (ret == 0) { |
862 | char *victim_name; | ||
863 | int victim_name_len; | ||
864 | struct btrfs_inode_ref *victim_ref; | 825 | struct btrfs_inode_ref *victim_ref; |
865 | unsigned long ptr; | 826 | unsigned long ptr; |
866 | unsigned long ptr_end; | 827 | unsigned long ptr_end; |
867 | struct extent_buffer *leaf = path->nodes[0]; | 828 | |
829 | leaf = path->nodes[0]; | ||
868 | 830 | ||
869 | /* are we trying to overwrite a back ref for the root directory | 831 | /* are we trying to overwrite a back ref for the root directory |
870 | * if so, just jump out, we're done | 832 | * if so, just jump out, we're done |
871 | */ | 833 | */ |
872 | if (key->objectid == key->offset) | 834 | if (search_key.objectid == search_key.offset) |
873 | goto out_nowrite; | 835 | return 1; |
874 | 836 | ||
875 | /* check all the names in this back reference to see | 837 | /* check all the names in this back reference to see |
876 | * if they are in the log. if so, we allow them to stay | 838 | * if they are in the log. if so, we allow them to stay |
@@ -889,7 +851,9 @@ again: | |||
889 | (unsigned long)(victim_ref + 1), | 851 | (unsigned long)(victim_ref + 1), |
890 | victim_name_len); | 852 | victim_name_len); |
891 | 853 | ||
892 | if (!backref_in_log(log, key, victim_name, | 854 | if (!backref_in_log(log_root, &search_key, |
855 | parent_objectid, | ||
856 | victim_name, | ||
893 | victim_name_len)) { | 857 | victim_name_len)) { |
894 | btrfs_inc_nlink(inode); | 858 | btrfs_inc_nlink(inode); |
895 | btrfs_release_path(path); | 859 | btrfs_release_path(path); |
@@ -897,9 +861,14 @@ again: | |||
897 | ret = btrfs_unlink_inode(trans, root, dir, | 861 | ret = btrfs_unlink_inode(trans, root, dir, |
898 | inode, victim_name, | 862 | inode, victim_name, |
899 | victim_name_len); | 863 | victim_name_len); |
864 | BUG_ON(ret); | ||
900 | btrfs_run_delayed_items(trans, root); | 865 | btrfs_run_delayed_items(trans, root); |
866 | kfree(victim_name); | ||
867 | *search_done = 1; | ||
868 | goto again; | ||
901 | } | 869 | } |
902 | kfree(victim_name); | 870 | kfree(victim_name); |
871 | |||
903 | ptr = (unsigned long)(victim_ref + 1) + victim_name_len; | 872 | ptr = (unsigned long)(victim_ref + 1) + victim_name_len; |
904 | } | 873 | } |
905 | BUG_ON(ret); | 874 | BUG_ON(ret); |
@@ -908,14 +877,78 @@ again: | |||
908 | * NOTE: we have searched root tree and checked the | 877 | * NOTE: we have searched root tree and checked the |
909 | * coresponding ref, it does not need to check again. | 878 | * coresponding ref, it does not need to check again. |
910 | */ | 879 | */ |
911 | search_done = 1; | 880 | *search_done = 1; |
881 | } | ||
882 | btrfs_release_path(path); | ||
883 | |||
884 | /* Same search but for extended refs */ | ||
885 | extref = btrfs_lookup_inode_extref(NULL, root, path, name, namelen, | ||
886 | inode_objectid, parent_objectid, 0, | ||
887 | 0); | ||
888 | if (!IS_ERR_OR_NULL(extref)) { | ||
889 | u32 item_size; | ||
890 | u32 cur_offset = 0; | ||
891 | unsigned long base; | ||
892 | struct inode *victim_parent; | ||
893 | |||
894 | leaf = path->nodes[0]; | ||
895 | |||
896 | item_size = btrfs_item_size_nr(leaf, path->slots[0]); | ||
897 | base = btrfs_item_ptr_offset(leaf, path->slots[0]); | ||
898 | |||
899 | while (cur_offset < item_size) { | ||
900 | extref = (struct btrfs_inode_extref *)base + cur_offset; | ||
901 | |||
902 | victim_name_len = btrfs_inode_extref_name_len(leaf, extref); | ||
903 | |||
904 | if (btrfs_inode_extref_parent(leaf, extref) != parent_objectid) | ||
905 | goto next; | ||
906 | |||
907 | victim_name = kmalloc(victim_name_len, GFP_NOFS); | ||
908 | read_extent_buffer(leaf, victim_name, (unsigned long)&extref->name, | ||
909 | victim_name_len); | ||
910 | |||
911 | search_key.objectid = inode_objectid; | ||
912 | search_key.type = BTRFS_INODE_EXTREF_KEY; | ||
913 | search_key.offset = btrfs_extref_hash(parent_objectid, | ||
914 | victim_name, | ||
915 | victim_name_len); | ||
916 | ret = 0; | ||
917 | if (!backref_in_log(log_root, &search_key, | ||
918 | parent_objectid, victim_name, | ||
919 | victim_name_len)) { | ||
920 | ret = -ENOENT; | ||
921 | victim_parent = read_one_inode(root, | ||
922 | parent_objectid); | ||
923 | if (victim_parent) { | ||
924 | btrfs_inc_nlink(inode); | ||
925 | btrfs_release_path(path); | ||
926 | |||
927 | ret = btrfs_unlink_inode(trans, root, | ||
928 | victim_parent, | ||
929 | inode, | ||
930 | victim_name, | ||
931 | victim_name_len); | ||
932 | btrfs_run_delayed_items(trans, root); | ||
933 | } | ||
934 | BUG_ON(ret); | ||
935 | iput(victim_parent); | ||
936 | kfree(victim_name); | ||
937 | *search_done = 1; | ||
938 | goto again; | ||
939 | } | ||
940 | kfree(victim_name); | ||
941 | BUG_ON(ret); | ||
942 | next: | ||
943 | cur_offset += victim_name_len + sizeof(*extref); | ||
944 | } | ||
945 | *search_done = 1; | ||
912 | } | 946 | } |
913 | btrfs_release_path(path); | 947 | btrfs_release_path(path); |
914 | 948 | ||
915 | /* look for a conflicting sequence number */ | 949 | /* look for a conflicting sequence number */ |
916 | di = btrfs_lookup_dir_index_item(trans, root, path, btrfs_ino(dir), | 950 | di = btrfs_lookup_dir_index_item(trans, root, path, btrfs_ino(dir), |
917 | btrfs_inode_ref_index(eb, ref), | 951 | ref_index, name, namelen, 0); |
918 | name, namelen, 0); | ||
919 | if (di && !IS_ERR(di)) { | 952 | if (di && !IS_ERR(di)) { |
920 | ret = drop_one_dir_item(trans, root, path, dir, di); | 953 | ret = drop_one_dir_item(trans, root, path, dir, di); |
921 | BUG_ON(ret); | 954 | BUG_ON(ret); |
@@ -931,25 +964,173 @@ again: | |||
931 | } | 964 | } |
932 | btrfs_release_path(path); | 965 | btrfs_release_path(path); |
933 | 966 | ||
934 | insert: | 967 | return 0; |
935 | /* insert our name */ | 968 | } |
936 | ret = btrfs_add_link(trans, dir, inode, name, namelen, 0, | ||
937 | btrfs_inode_ref_index(eb, ref)); | ||
938 | BUG_ON(ret); | ||
939 | 969 | ||
940 | btrfs_update_inode(trans, root, inode); | 970 | static int extref_get_fields(struct extent_buffer *eb, unsigned long ref_ptr, |
971 | u32 *namelen, char **name, u64 *index, | ||
972 | u64 *parent_objectid) | ||
973 | { | ||
974 | struct btrfs_inode_extref *extref; | ||
941 | 975 | ||
942 | out: | 976 | extref = (struct btrfs_inode_extref *)ref_ptr; |
943 | ref_ptr = (unsigned long)(ref + 1) + namelen; | 977 | |
944 | kfree(name); | 978 | *namelen = btrfs_inode_extref_name_len(eb, extref); |
945 | if (ref_ptr < ref_end) | 979 | *name = kmalloc(*namelen, GFP_NOFS); |
946 | goto again; | 980 | if (*name == NULL) |
981 | return -ENOMEM; | ||
982 | |||
983 | read_extent_buffer(eb, *name, (unsigned long)&extref->name, | ||
984 | *namelen); | ||
985 | |||
986 | *index = btrfs_inode_extref_index(eb, extref); | ||
987 | if (parent_objectid) | ||
988 | *parent_objectid = btrfs_inode_extref_parent(eb, extref); | ||
989 | |||
990 | return 0; | ||
991 | } | ||
992 | |||
993 | static int ref_get_fields(struct extent_buffer *eb, unsigned long ref_ptr, | ||
994 | u32 *namelen, char **name, u64 *index) | ||
995 | { | ||
996 | struct btrfs_inode_ref *ref; | ||
997 | |||
998 | ref = (struct btrfs_inode_ref *)ref_ptr; | ||
999 | |||
1000 | *namelen = btrfs_inode_ref_name_len(eb, ref); | ||
1001 | *name = kmalloc(*namelen, GFP_NOFS); | ||
1002 | if (*name == NULL) | ||
1003 | return -ENOMEM; | ||
1004 | |||
1005 | read_extent_buffer(eb, *name, (unsigned long)(ref + 1), *namelen); | ||
1006 | |||
1007 | *index = btrfs_inode_ref_index(eb, ref); | ||
1008 | |||
1009 | return 0; | ||
1010 | } | ||
1011 | |||
1012 | /* | ||
1013 | * replay one inode back reference item found in the log tree. | ||
1014 | * eb, slot and key refer to the buffer and key found in the log tree. | ||
1015 | * root is the destination we are replaying into, and path is for temp | ||
1016 | * use by this function. (it should be released on return). | ||
1017 | */ | ||
1018 | static noinline int add_inode_ref(struct btrfs_trans_handle *trans, | ||
1019 | struct btrfs_root *root, | ||
1020 | struct btrfs_root *log, | ||
1021 | struct btrfs_path *path, | ||
1022 | struct extent_buffer *eb, int slot, | ||
1023 | struct btrfs_key *key) | ||
1024 | { | ||
1025 | struct inode *dir; | ||
1026 | struct inode *inode; | ||
1027 | unsigned long ref_ptr; | ||
1028 | unsigned long ref_end; | ||
1029 | char *name; | ||
1030 | int namelen; | ||
1031 | int ret; | ||
1032 | int search_done = 0; | ||
1033 | int log_ref_ver = 0; | ||
1034 | u64 parent_objectid; | ||
1035 | u64 inode_objectid; | ||
1036 | u64 ref_index = 0; | ||
1037 | int ref_struct_size; | ||
1038 | |||
1039 | ref_ptr = btrfs_item_ptr_offset(eb, slot); | ||
1040 | ref_end = ref_ptr + btrfs_item_size_nr(eb, slot); | ||
1041 | |||
1042 | if (key->type == BTRFS_INODE_EXTREF_KEY) { | ||
1043 | struct btrfs_inode_extref *r; | ||
1044 | |||
1045 | ref_struct_size = sizeof(struct btrfs_inode_extref); | ||
1046 | log_ref_ver = 1; | ||
1047 | r = (struct btrfs_inode_extref *)ref_ptr; | ||
1048 | parent_objectid = btrfs_inode_extref_parent(eb, r); | ||
1049 | } else { | ||
1050 | ref_struct_size = sizeof(struct btrfs_inode_ref); | ||
1051 | parent_objectid = key->offset; | ||
1052 | } | ||
1053 | inode_objectid = key->objectid; | ||
1054 | |||
1055 | /* | ||
1056 | * it is possible that we didn't log all the parent directories | ||
1057 | * for a given inode. If we don't find the dir, just don't | ||
1058 | * copy the back ref in. The link count fixup code will take | ||
1059 | * care of the rest | ||
1060 | */ | ||
1061 | dir = read_one_inode(root, parent_objectid); | ||
1062 | if (!dir) | ||
1063 | return -ENOENT; | ||
1064 | |||
1065 | inode = read_one_inode(root, inode_objectid); | ||
1066 | if (!inode) { | ||
1067 | iput(dir); | ||
1068 | return -EIO; | ||
1069 | } | ||
1070 | |||
1071 | while (ref_ptr < ref_end) { | ||
1072 | if (log_ref_ver) { | ||
1073 | ret = extref_get_fields(eb, ref_ptr, &namelen, &name, | ||
1074 | &ref_index, &parent_objectid); | ||
1075 | /* | ||
1076 | * parent object can change from one array | ||
1077 | * item to another. | ||
1078 | */ | ||
1079 | if (!dir) | ||
1080 | dir = read_one_inode(root, parent_objectid); | ||
1081 | if (!dir) | ||
1082 | return -ENOENT; | ||
1083 | } else { | ||
1084 | ret = ref_get_fields(eb, ref_ptr, &namelen, &name, | ||
1085 | &ref_index); | ||
1086 | } | ||
1087 | if (ret) | ||
1088 | return ret; | ||
1089 | |||
1090 | /* if we already have a perfect match, we're done */ | ||
1091 | if (!inode_in_dir(root, path, btrfs_ino(dir), btrfs_ino(inode), | ||
1092 | ref_index, name, namelen)) { | ||
1093 | /* | ||
1094 | * look for a conflicting back reference in the | ||
1095 | * metadata. if we find one we have to unlink that name | ||
1096 | * of the file before we add our new link. Later on, we | ||
1097 | * overwrite any existing back reference, and we don't | ||
1098 | * want to create dangling pointers in the directory. | ||
1099 | */ | ||
1100 | |||
1101 | if (!search_done) { | ||
1102 | ret = __add_inode_ref(trans, root, path, log, | ||
1103 | dir, inode, eb, | ||
1104 | inode_objectid, | ||
1105 | parent_objectid, | ||
1106 | ref_index, name, namelen, | ||
1107 | &search_done); | ||
1108 | if (ret == 1) | ||
1109 | goto out; | ||
1110 | BUG_ON(ret); | ||
1111 | } | ||
1112 | |||
1113 | /* insert our name */ | ||
1114 | ret = btrfs_add_link(trans, dir, inode, name, namelen, | ||
1115 | 0, ref_index); | ||
1116 | BUG_ON(ret); | ||
1117 | |||
1118 | btrfs_update_inode(trans, root, inode); | ||
1119 | } | ||
1120 | |||
1121 | ref_ptr = (unsigned long)(ref_ptr + ref_struct_size) + namelen; | ||
1122 | kfree(name); | ||
1123 | if (log_ref_ver) { | ||
1124 | iput(dir); | ||
1125 | dir = NULL; | ||
1126 | } | ||
1127 | } | ||
947 | 1128 | ||
948 | /* finally write the back reference in the inode */ | 1129 | /* finally write the back reference in the inode */ |
949 | ret = overwrite_item(trans, root, path, eb, slot, key); | 1130 | ret = overwrite_item(trans, root, path, eb, slot, key); |
950 | BUG_ON(ret); | 1131 | BUG_ON(ret); |
951 | 1132 | ||
952 | out_nowrite: | 1133 | out: |
953 | btrfs_release_path(path); | 1134 | btrfs_release_path(path); |
954 | iput(dir); | 1135 | iput(dir); |
955 | iput(inode); | 1136 | iput(inode); |
@@ -966,25 +1147,55 @@ static int insert_orphan_item(struct btrfs_trans_handle *trans, | |||
966 | return ret; | 1147 | return ret; |
967 | } | 1148 | } |
968 | 1149 | ||
1150 | static int count_inode_extrefs(struct btrfs_root *root, | ||
1151 | struct inode *inode, struct btrfs_path *path) | ||
1152 | { | ||
1153 | int ret = 0; | ||
1154 | int name_len; | ||
1155 | unsigned int nlink = 0; | ||
1156 | u32 item_size; | ||
1157 | u32 cur_offset = 0; | ||
1158 | u64 inode_objectid = btrfs_ino(inode); | ||
1159 | u64 offset = 0; | ||
1160 | unsigned long ptr; | ||
1161 | struct btrfs_inode_extref *extref; | ||
1162 | struct extent_buffer *leaf; | ||
969 | 1163 | ||
970 | /* | 1164 | while (1) { |
971 | * There are a few corners where the link count of the file can't | 1165 | ret = btrfs_find_one_extref(root, inode_objectid, offset, path, |
972 | * be properly maintained during replay. So, instead of adding | 1166 | &extref, &offset); |
973 | * lots of complexity to the log code, we just scan the backrefs | 1167 | if (ret) |
974 | * for any file that has been through replay. | 1168 | break; |
975 | * | 1169 | |
976 | * The scan will update the link count on the inode to reflect the | 1170 | leaf = path->nodes[0]; |
977 | * number of back refs found. If it goes down to zero, the iput | 1171 | item_size = btrfs_item_size_nr(leaf, path->slots[0]); |
978 | * will free the inode. | 1172 | ptr = btrfs_item_ptr_offset(leaf, path->slots[0]); |
979 | */ | 1173 | |
980 | static noinline int fixup_inode_link_count(struct btrfs_trans_handle *trans, | 1174 | while (cur_offset < item_size) { |
981 | struct btrfs_root *root, | 1175 | extref = (struct btrfs_inode_extref *) (ptr + cur_offset); |
982 | struct inode *inode) | 1176 | name_len = btrfs_inode_extref_name_len(leaf, extref); |
1177 | |||
1178 | nlink++; | ||
1179 | |||
1180 | cur_offset += name_len + sizeof(*extref); | ||
1181 | } | ||
1182 | |||
1183 | offset++; | ||
1184 | btrfs_release_path(path); | ||
1185 | } | ||
1186 | btrfs_release_path(path); | ||
1187 | |||
1188 | if (ret < 0) | ||
1189 | return ret; | ||
1190 | return nlink; | ||
1191 | } | ||
1192 | |||
1193 | static int count_inode_refs(struct btrfs_root *root, | ||
1194 | struct inode *inode, struct btrfs_path *path) | ||
983 | { | 1195 | { |
984 | struct btrfs_path *path; | ||
985 | int ret; | 1196 | int ret; |
986 | struct btrfs_key key; | 1197 | struct btrfs_key key; |
987 | u64 nlink = 0; | 1198 | unsigned int nlink = 0; |
988 | unsigned long ptr; | 1199 | unsigned long ptr; |
989 | unsigned long ptr_end; | 1200 | unsigned long ptr_end; |
990 | int name_len; | 1201 | int name_len; |
@@ -994,10 +1205,6 @@ static noinline int fixup_inode_link_count(struct btrfs_trans_handle *trans, | |||
994 | key.type = BTRFS_INODE_REF_KEY; | 1205 | key.type = BTRFS_INODE_REF_KEY; |
995 | key.offset = (u64)-1; | 1206 | key.offset = (u64)-1; |
996 | 1207 | ||
997 | path = btrfs_alloc_path(); | ||
998 | if (!path) | ||
999 | return -ENOMEM; | ||
1000 | |||
1001 | while (1) { | 1208 | while (1) { |
1002 | ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); | 1209 | ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); |
1003 | if (ret < 0) | 1210 | if (ret < 0) |
@@ -1031,6 +1238,50 @@ static noinline int fixup_inode_link_count(struct btrfs_trans_handle *trans, | |||
1031 | btrfs_release_path(path); | 1238 | btrfs_release_path(path); |
1032 | } | 1239 | } |
1033 | btrfs_release_path(path); | 1240 | btrfs_release_path(path); |
1241 | |||
1242 | return nlink; | ||
1243 | } | ||
1244 | |||
1245 | /* | ||
1246 | * There are a few corners where the link count of the file can't | ||
1247 | * be properly maintained during replay. So, instead of adding | ||
1248 | * lots of complexity to the log code, we just scan the backrefs | ||
1249 | * for any file that has been through replay. | ||
1250 | * | ||
1251 | * The scan will update the link count on the inode to reflect the | ||
1252 | * number of back refs found. If it goes down to zero, the iput | ||
1253 | * will free the inode. | ||
1254 | */ | ||
1255 | static noinline int fixup_inode_link_count(struct btrfs_trans_handle *trans, | ||
1256 | struct btrfs_root *root, | ||
1257 | struct inode *inode) | ||
1258 | { | ||
1259 | struct btrfs_path *path; | ||
1260 | int ret; | ||
1261 | u64 nlink = 0; | ||
1262 | u64 ino = btrfs_ino(inode); | ||
1263 | |||
1264 | path = btrfs_alloc_path(); | ||
1265 | if (!path) | ||
1266 | return -ENOMEM; | ||
1267 | |||
1268 | ret = count_inode_refs(root, inode, path); | ||
1269 | if (ret < 0) | ||
1270 | goto out; | ||
1271 | |||
1272 | nlink = ret; | ||
1273 | |||
1274 | ret = count_inode_extrefs(root, inode, path); | ||
1275 | if (ret == -ENOENT) | ||
1276 | ret = 0; | ||
1277 | |||
1278 | if (ret < 0) | ||
1279 | goto out; | ||
1280 | |||
1281 | nlink += ret; | ||
1282 | |||
1283 | ret = 0; | ||
1284 | |||
1034 | if (nlink != inode->i_nlink) { | 1285 | if (nlink != inode->i_nlink) { |
1035 | set_nlink(inode, nlink); | 1286 | set_nlink(inode, nlink); |
1036 | btrfs_update_inode(trans, root, inode); | 1287 | btrfs_update_inode(trans, root, inode); |
@@ -1046,9 +1297,10 @@ static noinline int fixup_inode_link_count(struct btrfs_trans_handle *trans, | |||
1046 | ret = insert_orphan_item(trans, root, ino); | 1297 | ret = insert_orphan_item(trans, root, ino); |
1047 | BUG_ON(ret); | 1298 | BUG_ON(ret); |
1048 | } | 1299 | } |
1049 | btrfs_free_path(path); | ||
1050 | 1300 | ||
1051 | return 0; | 1301 | out: |
1302 | btrfs_free_path(path); | ||
1303 | return ret; | ||
1052 | } | 1304 | } |
1053 | 1305 | ||
1054 | static noinline int fixup_inode_link_counts(struct btrfs_trans_handle *trans, | 1306 | static noinline int fixup_inode_link_counts(struct btrfs_trans_handle *trans, |
@@ -1695,6 +1947,10 @@ static int replay_one_buffer(struct btrfs_root *log, struct extent_buffer *eb, | |||
1695 | ret = add_inode_ref(wc->trans, root, log, path, | 1947 | ret = add_inode_ref(wc->trans, root, log, path, |
1696 | eb, i, &key); | 1948 | eb, i, &key); |
1697 | BUG_ON(ret && ret != -ENOENT); | 1949 | BUG_ON(ret && ret != -ENOENT); |
1950 | } else if (key.type == BTRFS_INODE_EXTREF_KEY) { | ||
1951 | ret = add_inode_ref(wc->trans, root, log, path, | ||
1952 | eb, i, &key); | ||
1953 | BUG_ON(ret && ret != -ENOENT); | ||
1698 | } else if (key.type == BTRFS_EXTENT_DATA_KEY) { | 1954 | } else if (key.type == BTRFS_EXTENT_DATA_KEY) { |
1699 | ret = replay_one_extent(wc->trans, root, path, | 1955 | ret = replay_one_extent(wc->trans, root, path, |
1700 | eb, i, &key); | 1956 | eb, i, &key); |
@@ -2037,7 +2293,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, | |||
2037 | if (atomic_read(&root->log_commit[(index1 + 1) % 2])) | 2293 | if (atomic_read(&root->log_commit[(index1 + 1) % 2])) |
2038 | wait_log_commit(trans, root, root->log_transid - 1); | 2294 | wait_log_commit(trans, root, root->log_transid - 1); |
2039 | while (1) { | 2295 | while (1) { |
2040 | unsigned long batch = root->log_batch; | 2296 | int batch = atomic_read(&root->log_batch); |
2041 | /* when we're on an ssd, just kick the log commit out */ | 2297 | /* when we're on an ssd, just kick the log commit out */ |
2042 | if (!btrfs_test_opt(root, SSD) && root->log_multiple_pids) { | 2298 | if (!btrfs_test_opt(root, SSD) && root->log_multiple_pids) { |
2043 | mutex_unlock(&root->log_mutex); | 2299 | mutex_unlock(&root->log_mutex); |
@@ -2045,7 +2301,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, | |||
2045 | mutex_lock(&root->log_mutex); | 2301 | mutex_lock(&root->log_mutex); |
2046 | } | 2302 | } |
2047 | wait_for_writer(trans, root); | 2303 | wait_for_writer(trans, root); |
2048 | if (batch == root->log_batch) | 2304 | if (batch == atomic_read(&root->log_batch)) |
2049 | break; | 2305 | break; |
2050 | } | 2306 | } |
2051 | 2307 | ||
@@ -2074,7 +2330,6 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, | |||
2074 | 2330 | ||
2075 | btrfs_set_root_node(&log->root_item, log->node); | 2331 | btrfs_set_root_node(&log->root_item, log->node); |
2076 | 2332 | ||
2077 | root->log_batch = 0; | ||
2078 | root->log_transid++; | 2333 | root->log_transid++; |
2079 | log->log_transid = root->log_transid; | 2334 | log->log_transid = root->log_transid; |
2080 | root->log_start_pid = 0; | 2335 | root->log_start_pid = 0; |
@@ -2087,7 +2342,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, | |||
2087 | mutex_unlock(&root->log_mutex); | 2342 | mutex_unlock(&root->log_mutex); |
2088 | 2343 | ||
2089 | mutex_lock(&log_root_tree->log_mutex); | 2344 | mutex_lock(&log_root_tree->log_mutex); |
2090 | log_root_tree->log_batch++; | 2345 | atomic_inc(&log_root_tree->log_batch); |
2091 | atomic_inc(&log_root_tree->log_writers); | 2346 | atomic_inc(&log_root_tree->log_writers); |
2092 | mutex_unlock(&log_root_tree->log_mutex); | 2347 | mutex_unlock(&log_root_tree->log_mutex); |
2093 | 2348 | ||
@@ -2157,7 +2412,6 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, | |||
2157 | btrfs_set_super_log_root_level(root->fs_info->super_for_commit, | 2412 | btrfs_set_super_log_root_level(root->fs_info->super_for_commit, |
2158 | btrfs_header_level(log_root_tree->node)); | 2413 | btrfs_header_level(log_root_tree->node)); |
2159 | 2414 | ||
2160 | log_root_tree->log_batch = 0; | ||
2161 | log_root_tree->log_transid++; | 2415 | log_root_tree->log_transid++; |
2162 | smp_mb(); | 2416 | smp_mb(); |
2163 | 2417 | ||
@@ -2171,9 +2425,12 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, | |||
2171 | * in and cause problems either. | 2425 | * in and cause problems either. |
2172 | */ | 2426 | */ |
2173 | btrfs_scrub_pause_super(root); | 2427 | btrfs_scrub_pause_super(root); |
2174 | write_ctree_super(trans, root->fs_info->tree_root, 1); | 2428 | ret = write_ctree_super(trans, root->fs_info->tree_root, 1); |
2175 | btrfs_scrub_continue_super(root); | 2429 | btrfs_scrub_continue_super(root); |
2176 | ret = 0; | 2430 | if (ret) { |
2431 | btrfs_abort_transaction(trans, root, ret); | ||
2432 | goto out_wake_log_root; | ||
2433 | } | ||
2177 | 2434 | ||
2178 | mutex_lock(&root->log_mutex); | 2435 | mutex_lock(&root->log_mutex); |
2179 | if (root->last_log_commit < log_transid) | 2436 | if (root->last_log_commit < log_transid) |
@@ -2209,7 +2466,8 @@ static void free_log_tree(struct btrfs_trans_handle *trans, | |||
2209 | 2466 | ||
2210 | while (1) { | 2467 | while (1) { |
2211 | ret = find_first_extent_bit(&log->dirty_log_pages, | 2468 | ret = find_first_extent_bit(&log->dirty_log_pages, |
2212 | 0, &start, &end, EXTENT_DIRTY | EXTENT_NEW); | 2469 | 0, &start, &end, EXTENT_DIRTY | EXTENT_NEW, |
2470 | NULL); | ||
2213 | if (ret) | 2471 | if (ret) |
2214 | break; | 2472 | break; |
2215 | 2473 | ||
@@ -2646,6 +2904,7 @@ static int drop_objectid_items(struct btrfs_trans_handle *trans, | |||
2646 | int ret; | 2904 | int ret; |
2647 | struct btrfs_key key; | 2905 | struct btrfs_key key; |
2648 | struct btrfs_key found_key; | 2906 | struct btrfs_key found_key; |
2907 | int start_slot; | ||
2649 | 2908 | ||
2650 | key.objectid = objectid; | 2909 | key.objectid = objectid; |
2651 | key.type = max_key_type; | 2910 | key.type = max_key_type; |
@@ -2667,8 +2926,18 @@ static int drop_objectid_items(struct btrfs_trans_handle *trans, | |||
2667 | if (found_key.objectid != objectid) | 2926 | if (found_key.objectid != objectid) |
2668 | break; | 2927 | break; |
2669 | 2928 | ||
2670 | ret = btrfs_del_item(trans, log, path); | 2929 | found_key.offset = 0; |
2671 | if (ret) | 2930 | found_key.type = 0; |
2931 | ret = btrfs_bin_search(path->nodes[0], &found_key, 0, | ||
2932 | &start_slot); | ||
2933 | |||
2934 | ret = btrfs_del_items(trans, log, path, start_slot, | ||
2935 | path->slots[0] - start_slot + 1); | ||
2936 | /* | ||
2937 | * If start slot isn't 0 then we don't need to re-search, we've | ||
2938 | * found the last guy with the objectid in this tree. | ||
2939 | */ | ||
2940 | if (ret || start_slot != 0) | ||
2672 | break; | 2941 | break; |
2673 | btrfs_release_path(path); | 2942 | btrfs_release_path(path); |
2674 | } | 2943 | } |
@@ -2678,14 +2947,64 @@ static int drop_objectid_items(struct btrfs_trans_handle *trans, | |||
2678 | return ret; | 2947 | return ret; |
2679 | } | 2948 | } |
2680 | 2949 | ||
2950 | static void fill_inode_item(struct btrfs_trans_handle *trans, | ||
2951 | struct extent_buffer *leaf, | ||
2952 | struct btrfs_inode_item *item, | ||
2953 | struct inode *inode, int log_inode_only) | ||
2954 | { | ||
2955 | btrfs_set_inode_uid(leaf, item, inode->i_uid); | ||
2956 | btrfs_set_inode_gid(leaf, item, inode->i_gid); | ||
2957 | btrfs_set_inode_mode(leaf, item, inode->i_mode); | ||
2958 | btrfs_set_inode_nlink(leaf, item, inode->i_nlink); | ||
2959 | |||
2960 | btrfs_set_timespec_sec(leaf, btrfs_inode_atime(item), | ||
2961 | inode->i_atime.tv_sec); | ||
2962 | btrfs_set_timespec_nsec(leaf, btrfs_inode_atime(item), | ||
2963 | inode->i_atime.tv_nsec); | ||
2964 | |||
2965 | btrfs_set_timespec_sec(leaf, btrfs_inode_mtime(item), | ||
2966 | inode->i_mtime.tv_sec); | ||
2967 | btrfs_set_timespec_nsec(leaf, btrfs_inode_mtime(item), | ||
2968 | inode->i_mtime.tv_nsec); | ||
2969 | |||
2970 | btrfs_set_timespec_sec(leaf, btrfs_inode_ctime(item), | ||
2971 | inode->i_ctime.tv_sec); | ||
2972 | btrfs_set_timespec_nsec(leaf, btrfs_inode_ctime(item), | ||
2973 | inode->i_ctime.tv_nsec); | ||
2974 | |||
2975 | btrfs_set_inode_nbytes(leaf, item, inode_get_bytes(inode)); | ||
2976 | |||
2977 | btrfs_set_inode_sequence(leaf, item, inode->i_version); | ||
2978 | btrfs_set_inode_transid(leaf, item, trans->transid); | ||
2979 | btrfs_set_inode_rdev(leaf, item, inode->i_rdev); | ||
2980 | btrfs_set_inode_flags(leaf, item, BTRFS_I(inode)->flags); | ||
2981 | btrfs_set_inode_block_group(leaf, item, 0); | ||
2982 | |||
2983 | if (log_inode_only) { | ||
2984 | /* set the generation to zero so the recover code | ||
2985 | * can tell the difference between an logging | ||
2986 | * just to say 'this inode exists' and a logging | ||
2987 | * to say 'update this inode with these values' | ||
2988 | */ | ||
2989 | btrfs_set_inode_generation(leaf, item, 0); | ||
2990 | btrfs_set_inode_size(leaf, item, 0); | ||
2991 | } else { | ||
2992 | btrfs_set_inode_generation(leaf, item, | ||
2993 | BTRFS_I(inode)->generation); | ||
2994 | btrfs_set_inode_size(leaf, item, inode->i_size); | ||
2995 | } | ||
2996 | |||
2997 | } | ||
2998 | |||
2681 | static noinline int copy_items(struct btrfs_trans_handle *trans, | 2999 | static noinline int copy_items(struct btrfs_trans_handle *trans, |
2682 | struct btrfs_root *log, | 3000 | struct inode *inode, |
2683 | struct btrfs_path *dst_path, | 3001 | struct btrfs_path *dst_path, |
2684 | struct extent_buffer *src, | 3002 | struct extent_buffer *src, |
2685 | int start_slot, int nr, int inode_only) | 3003 | int start_slot, int nr, int inode_only) |
2686 | { | 3004 | { |
2687 | unsigned long src_offset; | 3005 | unsigned long src_offset; |
2688 | unsigned long dst_offset; | 3006 | unsigned long dst_offset; |
3007 | struct btrfs_root *log = BTRFS_I(inode)->root->log_root; | ||
2689 | struct btrfs_file_extent_item *extent; | 3008 | struct btrfs_file_extent_item *extent; |
2690 | struct btrfs_inode_item *inode_item; | 3009 | struct btrfs_inode_item *inode_item; |
2691 | int ret; | 3010 | int ret; |
@@ -2694,6 +3013,7 @@ static noinline int copy_items(struct btrfs_trans_handle *trans, | |||
2694 | char *ins_data; | 3013 | char *ins_data; |
2695 | int i; | 3014 | int i; |
2696 | struct list_head ordered_sums; | 3015 | struct list_head ordered_sums; |
3016 | int skip_csum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM; | ||
2697 | 3017 | ||
2698 | INIT_LIST_HEAD(&ordered_sums); | 3018 | INIT_LIST_HEAD(&ordered_sums); |
2699 | 3019 | ||
@@ -2722,29 +3042,23 @@ static noinline int copy_items(struct btrfs_trans_handle *trans, | |||
2722 | 3042 | ||
2723 | src_offset = btrfs_item_ptr_offset(src, start_slot + i); | 3043 | src_offset = btrfs_item_ptr_offset(src, start_slot + i); |
2724 | 3044 | ||
2725 | copy_extent_buffer(dst_path->nodes[0], src, dst_offset, | 3045 | if (ins_keys[i].type == BTRFS_INODE_ITEM_KEY) { |
2726 | src_offset, ins_sizes[i]); | ||
2727 | |||
2728 | if (inode_only == LOG_INODE_EXISTS && | ||
2729 | ins_keys[i].type == BTRFS_INODE_ITEM_KEY) { | ||
2730 | inode_item = btrfs_item_ptr(dst_path->nodes[0], | 3046 | inode_item = btrfs_item_ptr(dst_path->nodes[0], |
2731 | dst_path->slots[0], | 3047 | dst_path->slots[0], |
2732 | struct btrfs_inode_item); | 3048 | struct btrfs_inode_item); |
2733 | btrfs_set_inode_size(dst_path->nodes[0], inode_item, 0); | 3049 | fill_inode_item(trans, dst_path->nodes[0], inode_item, |
2734 | 3050 | inode, inode_only == LOG_INODE_EXISTS); | |
2735 | /* set the generation to zero so the recover code | 3051 | } else { |
2736 | * can tell the difference between an logging | 3052 | copy_extent_buffer(dst_path->nodes[0], src, dst_offset, |
2737 | * just to say 'this inode exists' and a logging | 3053 | src_offset, ins_sizes[i]); |
2738 | * to say 'update this inode with these values' | ||
2739 | */ | ||
2740 | btrfs_set_inode_generation(dst_path->nodes[0], | ||
2741 | inode_item, 0); | ||
2742 | } | 3054 | } |
3055 | |||
2743 | /* take a reference on file data extents so that truncates | 3056 | /* take a reference on file data extents so that truncates |
2744 | * or deletes of this inode don't have to relog the inode | 3057 | * or deletes of this inode don't have to relog the inode |
2745 | * again | 3058 | * again |
2746 | */ | 3059 | */ |
2747 | if (btrfs_key_type(ins_keys + i) == BTRFS_EXTENT_DATA_KEY) { | 3060 | if (btrfs_key_type(ins_keys + i) == BTRFS_EXTENT_DATA_KEY && |
3061 | !skip_csum) { | ||
2748 | int found_type; | 3062 | int found_type; |
2749 | extent = btrfs_item_ptr(src, start_slot + i, | 3063 | extent = btrfs_item_ptr(src, start_slot + i, |
2750 | struct btrfs_file_extent_item); | 3064 | struct btrfs_file_extent_item); |
@@ -2753,8 +3067,7 @@ static noinline int copy_items(struct btrfs_trans_handle *trans, | |||
2753 | continue; | 3067 | continue; |
2754 | 3068 | ||
2755 | found_type = btrfs_file_extent_type(src, extent); | 3069 | found_type = btrfs_file_extent_type(src, extent); |
2756 | if (found_type == BTRFS_FILE_EXTENT_REG || | 3070 | if (found_type == BTRFS_FILE_EXTENT_REG) { |
2757 | found_type == BTRFS_FILE_EXTENT_PREALLOC) { | ||
2758 | u64 ds, dl, cs, cl; | 3071 | u64 ds, dl, cs, cl; |
2759 | ds = btrfs_file_extent_disk_bytenr(src, | 3072 | ds = btrfs_file_extent_disk_bytenr(src, |
2760 | extent); | 3073 | extent); |
@@ -2803,6 +3116,239 @@ static noinline int copy_items(struct btrfs_trans_handle *trans, | |||
2803 | return ret; | 3116 | return ret; |
2804 | } | 3117 | } |
2805 | 3118 | ||
3119 | static int extent_cmp(void *priv, struct list_head *a, struct list_head *b) | ||
3120 | { | ||
3121 | struct extent_map *em1, *em2; | ||
3122 | |||
3123 | em1 = list_entry(a, struct extent_map, list); | ||
3124 | em2 = list_entry(b, struct extent_map, list); | ||
3125 | |||
3126 | if (em1->start < em2->start) | ||
3127 | return -1; | ||
3128 | else if (em1->start > em2->start) | ||
3129 | return 1; | ||
3130 | return 0; | ||
3131 | } | ||
3132 | |||
3133 | struct log_args { | ||
3134 | struct extent_buffer *src; | ||
3135 | u64 next_offset; | ||
3136 | int start_slot; | ||
3137 | int nr; | ||
3138 | }; | ||
3139 | |||
3140 | static int log_one_extent(struct btrfs_trans_handle *trans, | ||
3141 | struct inode *inode, struct btrfs_root *root, | ||
3142 | struct extent_map *em, struct btrfs_path *path, | ||
3143 | struct btrfs_path *dst_path, struct log_args *args) | ||
3144 | { | ||
3145 | struct btrfs_root *log = root->log_root; | ||
3146 | struct btrfs_file_extent_item *fi; | ||
3147 | struct btrfs_key key; | ||
3148 | u64 start = em->mod_start; | ||
3149 | u64 search_start = start; | ||
3150 | u64 len = em->mod_len; | ||
3151 | u64 num_bytes; | ||
3152 | int nritems; | ||
3153 | int ret; | ||
3154 | |||
3155 | if (BTRFS_I(inode)->logged_trans == trans->transid) { | ||
3156 | ret = __btrfs_drop_extents(trans, log, inode, dst_path, start, | ||
3157 | start + len, NULL, 0); | ||
3158 | if (ret) | ||
3159 | return ret; | ||
3160 | } | ||
3161 | |||
3162 | while (len) { | ||
3163 | if (args->nr) | ||
3164 | goto next_slot; | ||
3165 | again: | ||
3166 | key.objectid = btrfs_ino(inode); | ||
3167 | key.type = BTRFS_EXTENT_DATA_KEY; | ||
3168 | key.offset = search_start; | ||
3169 | |||
3170 | ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); | ||
3171 | if (ret < 0) | ||
3172 | return ret; | ||
3173 | |||
3174 | if (ret) { | ||
3175 | /* | ||
3176 | * A rare case were we can have an em for a section of a | ||
3177 | * larger extent so we need to make sure that this em | ||
3178 | * falls within the extent we've found. If not we just | ||
3179 | * bail and go back to ye-olde way of doing things but | ||
3180 | * it happens often enough in testing that we need to do | ||
3181 | * this dance to make sure. | ||
3182 | */ | ||
3183 | do { | ||
3184 | if (path->slots[0] == 0) { | ||
3185 | btrfs_release_path(path); | ||
3186 | if (search_start == 0) | ||
3187 | return -ENOENT; | ||
3188 | search_start--; | ||
3189 | goto again; | ||
3190 | } | ||
3191 | |||
3192 | path->slots[0]--; | ||
3193 | btrfs_item_key_to_cpu(path->nodes[0], &key, | ||
3194 | path->slots[0]); | ||
3195 | if (key.objectid != btrfs_ino(inode) || | ||
3196 | key.type != BTRFS_EXTENT_DATA_KEY) { | ||
3197 | btrfs_release_path(path); | ||
3198 | return -ENOENT; | ||
3199 | } | ||
3200 | } while (key.offset > start); | ||
3201 | |||
3202 | fi = btrfs_item_ptr(path->nodes[0], path->slots[0], | ||
3203 | struct btrfs_file_extent_item); | ||
3204 | num_bytes = btrfs_file_extent_num_bytes(path->nodes[0], | ||
3205 | fi); | ||
3206 | if (key.offset + num_bytes <= start) { | ||
3207 | btrfs_release_path(path); | ||
3208 | return -ENOENT; | ||
3209 | } | ||
3210 | } | ||
3211 | args->src = path->nodes[0]; | ||
3212 | next_slot: | ||
3213 | btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]); | ||
3214 | fi = btrfs_item_ptr(args->src, path->slots[0], | ||
3215 | struct btrfs_file_extent_item); | ||
3216 | if (args->nr && | ||
3217 | args->start_slot + args->nr == path->slots[0]) { | ||
3218 | args->nr++; | ||
3219 | } else if (args->nr) { | ||
3220 | ret = copy_items(trans, inode, dst_path, args->src, | ||
3221 | args->start_slot, args->nr, | ||
3222 | LOG_INODE_ALL); | ||
3223 | if (ret) | ||
3224 | return ret; | ||
3225 | args->nr = 1; | ||
3226 | args->start_slot = path->slots[0]; | ||
3227 | } else if (!args->nr) { | ||
3228 | args->nr = 1; | ||
3229 | args->start_slot = path->slots[0]; | ||
3230 | } | ||
3231 | nritems = btrfs_header_nritems(path->nodes[0]); | ||
3232 | path->slots[0]++; | ||
3233 | num_bytes = btrfs_file_extent_num_bytes(args->src, fi); | ||
3234 | if (len < num_bytes) { | ||
3235 | /* I _think_ this is ok, envision we write to a | ||
3236 | * preallocated space that is adjacent to a previously | ||
3237 | * written preallocated space that gets merged when we | ||
3238 | * mark this preallocated space written. If we do not | ||
3239 | * have the adjacent extent in cache then when we copy | ||
3240 | * this extent it could end up being larger than our EM | ||
3241 | * thinks it is, which is a-ok, so just set len to 0. | ||
3242 | */ | ||
3243 | len = 0; | ||
3244 | } else { | ||
3245 | len -= num_bytes; | ||
3246 | } | ||
3247 | start = key.offset + num_bytes; | ||
3248 | args->next_offset = start; | ||
3249 | search_start = start; | ||
3250 | |||
3251 | if (path->slots[0] < nritems) { | ||
3252 | if (len) | ||
3253 | goto next_slot; | ||
3254 | break; | ||
3255 | } | ||
3256 | |||
3257 | if (args->nr) { | ||
3258 | ret = copy_items(trans, inode, dst_path, args->src, | ||
3259 | args->start_slot, args->nr, | ||
3260 | LOG_INODE_ALL); | ||
3261 | if (ret) | ||
3262 | return ret; | ||
3263 | args->nr = 0; | ||
3264 | btrfs_release_path(path); | ||
3265 | } | ||
3266 | } | ||
3267 | |||
3268 | return 0; | ||
3269 | } | ||
3270 | |||
3271 | static int btrfs_log_changed_extents(struct btrfs_trans_handle *trans, | ||
3272 | struct btrfs_root *root, | ||
3273 | struct inode *inode, | ||
3274 | struct btrfs_path *path, | ||
3275 | struct btrfs_path *dst_path) | ||
3276 | { | ||
3277 | struct log_args args; | ||
3278 | struct extent_map *em, *n; | ||
3279 | struct list_head extents; | ||
3280 | struct extent_map_tree *tree = &BTRFS_I(inode)->extent_tree; | ||
3281 | u64 test_gen; | ||
3282 | int ret = 0; | ||
3283 | |||
3284 | INIT_LIST_HEAD(&extents); | ||
3285 | |||
3286 | memset(&args, 0, sizeof(args)); | ||
3287 | |||
3288 | write_lock(&tree->lock); | ||
3289 | test_gen = root->fs_info->last_trans_committed; | ||
3290 | |||
3291 | list_for_each_entry_safe(em, n, &tree->modified_extents, list) { | ||
3292 | list_del_init(&em->list); | ||
3293 | if (em->generation <= test_gen) | ||
3294 | continue; | ||
3295 | /* Need a ref to keep it from getting evicted from cache */ | ||
3296 | atomic_inc(&em->refs); | ||
3297 | set_bit(EXTENT_FLAG_LOGGING, &em->flags); | ||
3298 | list_add_tail(&em->list, &extents); | ||
3299 | } | ||
3300 | |||
3301 | list_sort(NULL, &extents, extent_cmp); | ||
3302 | |||
3303 | while (!list_empty(&extents)) { | ||
3304 | em = list_entry(extents.next, struct extent_map, list); | ||
3305 | |||
3306 | list_del_init(&em->list); | ||
3307 | clear_bit(EXTENT_FLAG_LOGGING, &em->flags); | ||
3308 | |||
3309 | /* | ||
3310 | * If we had an error we just need to delete everybody from our | ||
3311 | * private list. | ||
3312 | */ | ||
3313 | if (ret) { | ||
3314 | free_extent_map(em); | ||
3315 | continue; | ||
3316 | } | ||
3317 | |||
3318 | write_unlock(&tree->lock); | ||
3319 | |||
3320 | /* | ||
3321 | * If the previous EM and the last extent we left off on aren't | ||
3322 | * sequential then we need to copy the items we have and redo | ||
3323 | * our search | ||
3324 | */ | ||
3325 | if (args.nr && em->mod_start != args.next_offset) { | ||
3326 | ret = copy_items(trans, inode, dst_path, args.src, | ||
3327 | args.start_slot, args.nr, | ||
3328 | LOG_INODE_ALL); | ||
3329 | if (ret) { | ||
3330 | free_extent_map(em); | ||
3331 | write_lock(&tree->lock); | ||
3332 | continue; | ||
3333 | } | ||
3334 | btrfs_release_path(path); | ||
3335 | args.nr = 0; | ||
3336 | } | ||
3337 | |||
3338 | ret = log_one_extent(trans, inode, root, em, path, dst_path, &args); | ||
3339 | free_extent_map(em); | ||
3340 | write_lock(&tree->lock); | ||
3341 | } | ||
3342 | WARN_ON(!list_empty(&extents)); | ||
3343 | write_unlock(&tree->lock); | ||
3344 | |||
3345 | if (!ret && args.nr) | ||
3346 | ret = copy_items(trans, inode, dst_path, args.src, | ||
3347 | args.start_slot, args.nr, LOG_INODE_ALL); | ||
3348 | btrfs_release_path(path); | ||
3349 | return ret; | ||
3350 | } | ||
3351 | |||
2806 | /* log a single inode in the tree log. | 3352 | /* log a single inode in the tree log. |
2807 | * At least one parent directory for this inode must exist in the tree | 3353 | * At least one parent directory for this inode must exist in the tree |
2808 | * or be logged already. | 3354 | * or be logged already. |
@@ -2832,6 +3378,7 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans, | |||
2832 | int nritems; | 3378 | int nritems; |
2833 | int ins_start_slot = 0; | 3379 | int ins_start_slot = 0; |
2834 | int ins_nr; | 3380 | int ins_nr; |
3381 | bool fast_search = false; | ||
2835 | u64 ino = btrfs_ino(inode); | 3382 | u64 ino = btrfs_ino(inode); |
2836 | 3383 | ||
2837 | log = root->log_root; | 3384 | log = root->log_root; |
@@ -2851,21 +3398,23 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans, | |||
2851 | 3398 | ||
2852 | max_key.objectid = ino; | 3399 | max_key.objectid = ino; |
2853 | 3400 | ||
2854 | /* today the code can only do partial logging of directories */ | ||
2855 | if (!S_ISDIR(inode->i_mode)) | ||
2856 | inode_only = LOG_INODE_ALL; | ||
2857 | 3401 | ||
3402 | /* today the code can only do partial logging of directories */ | ||
2858 | if (inode_only == LOG_INODE_EXISTS || S_ISDIR(inode->i_mode)) | 3403 | if (inode_only == LOG_INODE_EXISTS || S_ISDIR(inode->i_mode)) |
2859 | max_key.type = BTRFS_XATTR_ITEM_KEY; | 3404 | max_key.type = BTRFS_XATTR_ITEM_KEY; |
2860 | else | 3405 | else |
2861 | max_key.type = (u8)-1; | 3406 | max_key.type = (u8)-1; |
2862 | max_key.offset = (u64)-1; | 3407 | max_key.offset = (u64)-1; |
2863 | 3408 | ||
2864 | ret = btrfs_commit_inode_delayed_items(trans, inode); | 3409 | /* Only run delayed items if we are a dir or a new file */ |
2865 | if (ret) { | 3410 | if (S_ISDIR(inode->i_mode) || |
2866 | btrfs_free_path(path); | 3411 | BTRFS_I(inode)->generation > root->fs_info->last_trans_committed) { |
2867 | btrfs_free_path(dst_path); | 3412 | ret = btrfs_commit_inode_delayed_items(trans, inode); |
2868 | return ret; | 3413 | if (ret) { |
3414 | btrfs_free_path(path); | ||
3415 | btrfs_free_path(dst_path); | ||
3416 | return ret; | ||
3417 | } | ||
2869 | } | 3418 | } |
2870 | 3419 | ||
2871 | mutex_lock(&BTRFS_I(inode)->log_mutex); | 3420 | mutex_lock(&BTRFS_I(inode)->log_mutex); |
@@ -2881,7 +3430,16 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans, | |||
2881 | max_key_type = BTRFS_XATTR_ITEM_KEY; | 3430 | max_key_type = BTRFS_XATTR_ITEM_KEY; |
2882 | ret = drop_objectid_items(trans, log, path, ino, max_key_type); | 3431 | ret = drop_objectid_items(trans, log, path, ino, max_key_type); |
2883 | } else { | 3432 | } else { |
2884 | ret = btrfs_truncate_inode_items(trans, log, inode, 0, 0); | 3433 | if (test_and_clear_bit(BTRFS_INODE_NEEDS_FULL_SYNC, |
3434 | &BTRFS_I(inode)->runtime_flags)) { | ||
3435 | ret = btrfs_truncate_inode_items(trans, log, | ||
3436 | inode, 0, 0); | ||
3437 | } else { | ||
3438 | fast_search = true; | ||
3439 | max_key.type = BTRFS_XATTR_ITEM_KEY; | ||
3440 | ret = drop_objectid_items(trans, log, path, ino, | ||
3441 | BTRFS_XATTR_ITEM_KEY); | ||
3442 | } | ||
2885 | } | 3443 | } |
2886 | if (ret) { | 3444 | if (ret) { |
2887 | err = ret; | 3445 | err = ret; |
@@ -2912,7 +3470,7 @@ again: | |||
2912 | goto next_slot; | 3470 | goto next_slot; |
2913 | } | 3471 | } |
2914 | 3472 | ||
2915 | ret = copy_items(trans, log, dst_path, src, ins_start_slot, | 3473 | ret = copy_items(trans, inode, dst_path, src, ins_start_slot, |
2916 | ins_nr, inode_only); | 3474 | ins_nr, inode_only); |
2917 | if (ret) { | 3475 | if (ret) { |
2918 | err = ret; | 3476 | err = ret; |
@@ -2930,7 +3488,7 @@ next_slot: | |||
2930 | goto again; | 3488 | goto again; |
2931 | } | 3489 | } |
2932 | if (ins_nr) { | 3490 | if (ins_nr) { |
2933 | ret = copy_items(trans, log, dst_path, src, | 3491 | ret = copy_items(trans, inode, dst_path, src, |
2934 | ins_start_slot, | 3492 | ins_start_slot, |
2935 | ins_nr, inode_only); | 3493 | ins_nr, inode_only); |
2936 | if (ret) { | 3494 | if (ret) { |
@@ -2951,8 +3509,7 @@ next_slot: | |||
2951 | break; | 3509 | break; |
2952 | } | 3510 | } |
2953 | if (ins_nr) { | 3511 | if (ins_nr) { |
2954 | ret = copy_items(trans, log, dst_path, src, | 3512 | ret = copy_items(trans, inode, dst_path, src, ins_start_slot, |
2955 | ins_start_slot, | ||
2956 | ins_nr, inode_only); | 3513 | ins_nr, inode_only); |
2957 | if (ret) { | 3514 | if (ret) { |
2958 | err = ret; | 3515 | err = ret; |
@@ -2960,7 +3517,24 @@ next_slot: | |||
2960 | } | 3517 | } |
2961 | ins_nr = 0; | 3518 | ins_nr = 0; |
2962 | } | 3519 | } |
2963 | WARN_ON(ins_nr); | 3520 | |
3521 | if (fast_search) { | ||
3522 | btrfs_release_path(path); | ||
3523 | btrfs_release_path(dst_path); | ||
3524 | ret = btrfs_log_changed_extents(trans, root, inode, path, | ||
3525 | dst_path); | ||
3526 | if (ret) { | ||
3527 | err = ret; | ||
3528 | goto out_unlock; | ||
3529 | } | ||
3530 | } else { | ||
3531 | struct extent_map_tree *tree = &BTRFS_I(inode)->extent_tree; | ||
3532 | struct extent_map *em, *n; | ||
3533 | |||
3534 | list_for_each_entry_safe(em, n, &tree->modified_extents, list) | ||
3535 | list_del_init(&em->list); | ||
3536 | } | ||
3537 | |||
2964 | if (inode_only == LOG_INODE_ALL && S_ISDIR(inode->i_mode)) { | 3538 | if (inode_only == LOG_INODE_ALL && S_ISDIR(inode->i_mode)) { |
2965 | btrfs_release_path(path); | 3539 | btrfs_release_path(path); |
2966 | btrfs_release_path(dst_path); | 3540 | btrfs_release_path(dst_path); |
@@ -2971,6 +3545,7 @@ next_slot: | |||
2971 | } | 3545 | } |
2972 | } | 3546 | } |
2973 | BTRFS_I(inode)->logged_trans = trans->transid; | 3547 | BTRFS_I(inode)->logged_trans = trans->transid; |
3548 | BTRFS_I(inode)->last_log_commit = BTRFS_I(inode)->last_sub_trans; | ||
2974 | out_unlock: | 3549 | out_unlock: |
2975 | mutex_unlock(&BTRFS_I(inode)->log_mutex); | 3550 | mutex_unlock(&BTRFS_I(inode)->log_mutex); |
2976 | 3551 | ||
@@ -3138,7 +3713,7 @@ int btrfs_log_inode_parent(struct btrfs_trans_handle *trans, | |||
3138 | end_trans: | 3713 | end_trans: |
3139 | dput(old_parent); | 3714 | dput(old_parent); |
3140 | if (ret < 0) { | 3715 | if (ret < 0) { |
3141 | BUG_ON(ret != -ENOSPC); | 3716 | WARN_ON(ret != -ENOSPC); |
3142 | root->fs_info->last_trans_log_full_commit = trans->transid; | 3717 | root->fs_info->last_trans_log_full_commit = trans->transid; |
3143 | ret = 1; | 3718 | ret = 1; |
3144 | } | 3719 | } |