aboutsummaryrefslogtreecommitdiffstats
path: root/fs/btrfs/tree-log.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/btrfs/tree-log.c')
-rw-r--r--fs/btrfs/tree-log.c889
1 files changed, 732 insertions, 157 deletions
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index c86670f4f285..e9ebb472b28b 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -18,13 +18,16 @@
18 18
19#include <linux/sched.h> 19#include <linux/sched.h>
20#include <linux/slab.h> 20#include <linux/slab.h>
21#include <linux/list_sort.h>
21#include "ctree.h" 22#include "ctree.h"
22#include "transaction.h" 23#include "transaction.h"
23#include "disk-io.h" 24#include "disk-io.h"
24#include "locking.h" 25#include "locking.h"
25#include "print-tree.h" 26#include "print-tree.h"
27#include "backref.h"
26#include "compat.h" 28#include "compat.h"
27#include "tree-log.h" 29#include "tree-log.h"
30#include "hash.h"
28 31
29/* magic values for the inode_only field in btrfs_log_inode: 32/* magic values for the inode_only field in btrfs_log_inode:
30 * 33 *
@@ -146,7 +149,7 @@ static int start_log_trans(struct btrfs_trans_handle *trans,
146 root->log_multiple_pids = true; 149 root->log_multiple_pids = true;
147 } 150 }
148 151
149 root->log_batch++; 152 atomic_inc(&root->log_batch);
150 atomic_inc(&root->log_writers); 153 atomic_inc(&root->log_writers);
151 mutex_unlock(&root->log_mutex); 154 mutex_unlock(&root->log_mutex);
152 return 0; 155 return 0;
@@ -165,7 +168,7 @@ static int start_log_trans(struct btrfs_trans_handle *trans,
165 err = ret; 168 err = ret;
166 } 169 }
167 mutex_unlock(&root->fs_info->tree_log_mutex); 170 mutex_unlock(&root->fs_info->tree_log_mutex);
168 root->log_batch++; 171 atomic_inc(&root->log_batch);
169 atomic_inc(&root->log_writers); 172 atomic_inc(&root->log_writers);
170 mutex_unlock(&root->log_mutex); 173 mutex_unlock(&root->log_mutex);
171 return err; 174 return err;
@@ -484,7 +487,6 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans,
484 int found_type; 487 int found_type;
485 u64 mask = root->sectorsize - 1; 488 u64 mask = root->sectorsize - 1;
486 u64 extent_end; 489 u64 extent_end;
487 u64 alloc_hint;
488 u64 start = key->offset; 490 u64 start = key->offset;
489 u64 saved_nbytes; 491 u64 saved_nbytes;
490 struct btrfs_file_extent_item *item; 492 struct btrfs_file_extent_item *item;
@@ -550,8 +552,7 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans,
550 552
551 saved_nbytes = inode_get_bytes(inode); 553 saved_nbytes = inode_get_bytes(inode);
552 /* drop any overlapping extents */ 554 /* drop any overlapping extents */
553 ret = btrfs_drop_extents(trans, inode, start, extent_end, 555 ret = btrfs_drop_extents(trans, root, inode, start, extent_end, 1);
554 &alloc_hint, 1);
555 BUG_ON(ret); 556 BUG_ON(ret);
556 557
557 if (found_type == BTRFS_FILE_EXTENT_REG || 558 if (found_type == BTRFS_FILE_EXTENT_REG ||
@@ -744,6 +745,7 @@ out:
744 */ 745 */
745static noinline int backref_in_log(struct btrfs_root *log, 746static noinline int backref_in_log(struct btrfs_root *log,
746 struct btrfs_key *key, 747 struct btrfs_key *key,
748 u64 ref_objectid,
747 char *name, int namelen) 749 char *name, int namelen)
748{ 750{
749 struct btrfs_path *path; 751 struct btrfs_path *path;
@@ -764,8 +766,17 @@ static noinline int backref_in_log(struct btrfs_root *log,
764 if (ret != 0) 766 if (ret != 0)
765 goto out; 767 goto out;
766 768
767 item_size = btrfs_item_size_nr(path->nodes[0], path->slots[0]);
768 ptr = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]); 769 ptr = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]);
770
771 if (key->type == BTRFS_INODE_EXTREF_KEY) {
772 if (btrfs_find_name_in_ext_backref(path, ref_objectid,
773 name, namelen, NULL))
774 match = 1;
775
776 goto out;
777 }
778
779 item_size = btrfs_item_size_nr(path->nodes[0], path->slots[0]);
769 ptr_end = ptr + item_size; 780 ptr_end = ptr + item_size;
770 while (ptr < ptr_end) { 781 while (ptr < ptr_end) {
771 ref = (struct btrfs_inode_ref *)ptr; 782 ref = (struct btrfs_inode_ref *)ptr;
@@ -786,91 +797,42 @@ out:
786 return match; 797 return match;
787} 798}
788 799
789 800static inline int __add_inode_ref(struct btrfs_trans_handle *trans,
790/*
791 * replay one inode back reference item found in the log tree.
792 * eb, slot and key refer to the buffer and key found in the log tree.
793 * root is the destination we are replaying into, and path is for temp
794 * use by this function. (it should be released on return).
795 */
796static noinline int add_inode_ref(struct btrfs_trans_handle *trans,
797 struct btrfs_root *root, 801 struct btrfs_root *root,
798 struct btrfs_root *log,
799 struct btrfs_path *path, 802 struct btrfs_path *path,
800 struct extent_buffer *eb, int slot, 803 struct btrfs_root *log_root,
801 struct btrfs_key *key) 804 struct inode *dir, struct inode *inode,
805 struct extent_buffer *eb,
806 u64 inode_objectid, u64 parent_objectid,
807 u64 ref_index, char *name, int namelen,
808 int *search_done)
802{ 809{
803 struct btrfs_inode_ref *ref;
804 struct btrfs_dir_item *di;
805 struct inode *dir;
806 struct inode *inode;
807 unsigned long ref_ptr;
808 unsigned long ref_end;
809 char *name;
810 int namelen;
811 int ret; 810 int ret;
812 int search_done = 0; 811 char *victim_name;
813 812 int victim_name_len;
814 /* 813 struct extent_buffer *leaf;
815 * it is possible that we didn't log all the parent directories 814 struct btrfs_dir_item *di;
816 * for a given inode. If we don't find the dir, just don't 815 struct btrfs_key search_key;
817 * copy the back ref in. The link count fixup code will take 816 struct btrfs_inode_extref *extref;
818 * care of the rest
819 */
820 dir = read_one_inode(root, key->offset);
821 if (!dir)
822 return -ENOENT;
823
824 inode = read_one_inode(root, key->objectid);
825 if (!inode) {
826 iput(dir);
827 return -EIO;
828 }
829
830 ref_ptr = btrfs_item_ptr_offset(eb, slot);
831 ref_end = ref_ptr + btrfs_item_size_nr(eb, slot);
832 817
833again: 818again:
834 ref = (struct btrfs_inode_ref *)ref_ptr; 819 /* Search old style refs */
835 820 search_key.objectid = inode_objectid;
836 namelen = btrfs_inode_ref_name_len(eb, ref); 821 search_key.type = BTRFS_INODE_REF_KEY;
837 name = kmalloc(namelen, GFP_NOFS); 822 search_key.offset = parent_objectid;
838 BUG_ON(!name); 823 ret = btrfs_search_slot(NULL, root, &search_key, path, 0, 0);
839
840 read_extent_buffer(eb, name, (unsigned long)(ref + 1), namelen);
841
842 /* if we already have a perfect match, we're done */
843 if (inode_in_dir(root, path, btrfs_ino(dir), btrfs_ino(inode),
844 btrfs_inode_ref_index(eb, ref),
845 name, namelen)) {
846 goto out;
847 }
848
849 /*
850 * look for a conflicting back reference in the metadata.
851 * if we find one we have to unlink that name of the file
852 * before we add our new link. Later on, we overwrite any
853 * existing back reference, and we don't want to create
854 * dangling pointers in the directory.
855 */
856
857 if (search_done)
858 goto insert;
859
860 ret = btrfs_search_slot(NULL, root, key, path, 0, 0);
861 if (ret == 0) { 824 if (ret == 0) {
862 char *victim_name;
863 int victim_name_len;
864 struct btrfs_inode_ref *victim_ref; 825 struct btrfs_inode_ref *victim_ref;
865 unsigned long ptr; 826 unsigned long ptr;
866 unsigned long ptr_end; 827 unsigned long ptr_end;
867 struct extent_buffer *leaf = path->nodes[0]; 828
829 leaf = path->nodes[0];
868 830
869 /* are we trying to overwrite a back ref for the root directory 831 /* are we trying to overwrite a back ref for the root directory
870 * if so, just jump out, we're done 832 * if so, just jump out, we're done
871 */ 833 */
872 if (key->objectid == key->offset) 834 if (search_key.objectid == search_key.offset)
873 goto out_nowrite; 835 return 1;
874 836
875 /* check all the names in this back reference to see 837 /* check all the names in this back reference to see
876 * if they are in the log. if so, we allow them to stay 838 * if they are in the log. if so, we allow them to stay
@@ -889,7 +851,9 @@ again:
889 (unsigned long)(victim_ref + 1), 851 (unsigned long)(victim_ref + 1),
890 victim_name_len); 852 victim_name_len);
891 853
892 if (!backref_in_log(log, key, victim_name, 854 if (!backref_in_log(log_root, &search_key,
855 parent_objectid,
856 victim_name,
893 victim_name_len)) { 857 victim_name_len)) {
894 btrfs_inc_nlink(inode); 858 btrfs_inc_nlink(inode);
895 btrfs_release_path(path); 859 btrfs_release_path(path);
@@ -897,9 +861,14 @@ again:
897 ret = btrfs_unlink_inode(trans, root, dir, 861 ret = btrfs_unlink_inode(trans, root, dir,
898 inode, victim_name, 862 inode, victim_name,
899 victim_name_len); 863 victim_name_len);
864 BUG_ON(ret);
900 btrfs_run_delayed_items(trans, root); 865 btrfs_run_delayed_items(trans, root);
866 kfree(victim_name);
867 *search_done = 1;
868 goto again;
901 } 869 }
902 kfree(victim_name); 870 kfree(victim_name);
871
903 ptr = (unsigned long)(victim_ref + 1) + victim_name_len; 872 ptr = (unsigned long)(victim_ref + 1) + victim_name_len;
904 } 873 }
905 BUG_ON(ret); 874 BUG_ON(ret);
@@ -908,14 +877,78 @@ again:
908 * NOTE: we have searched root tree and checked the 877 * NOTE: we have searched root tree and checked the
909 * coresponding ref, it does not need to check again. 878 * coresponding ref, it does not need to check again.
910 */ 879 */
911 search_done = 1; 880 *search_done = 1;
881 }
882 btrfs_release_path(path);
883
884 /* Same search but for extended refs */
885 extref = btrfs_lookup_inode_extref(NULL, root, path, name, namelen,
886 inode_objectid, parent_objectid, 0,
887 0);
888 if (!IS_ERR_OR_NULL(extref)) {
889 u32 item_size;
890 u32 cur_offset = 0;
891 unsigned long base;
892 struct inode *victim_parent;
893
894 leaf = path->nodes[0];
895
896 item_size = btrfs_item_size_nr(leaf, path->slots[0]);
897 base = btrfs_item_ptr_offset(leaf, path->slots[0]);
898
899 while (cur_offset < item_size) {
900 extref = (struct btrfs_inode_extref *)base + cur_offset;
901
902 victim_name_len = btrfs_inode_extref_name_len(leaf, extref);
903
904 if (btrfs_inode_extref_parent(leaf, extref) != parent_objectid)
905 goto next;
906
907 victim_name = kmalloc(victim_name_len, GFP_NOFS);
908 read_extent_buffer(leaf, victim_name, (unsigned long)&extref->name,
909 victim_name_len);
910
911 search_key.objectid = inode_objectid;
912 search_key.type = BTRFS_INODE_EXTREF_KEY;
913 search_key.offset = btrfs_extref_hash(parent_objectid,
914 victim_name,
915 victim_name_len);
916 ret = 0;
917 if (!backref_in_log(log_root, &search_key,
918 parent_objectid, victim_name,
919 victim_name_len)) {
920 ret = -ENOENT;
921 victim_parent = read_one_inode(root,
922 parent_objectid);
923 if (victim_parent) {
924 btrfs_inc_nlink(inode);
925 btrfs_release_path(path);
926
927 ret = btrfs_unlink_inode(trans, root,
928 victim_parent,
929 inode,
930 victim_name,
931 victim_name_len);
932 btrfs_run_delayed_items(trans, root);
933 }
934 BUG_ON(ret);
935 iput(victim_parent);
936 kfree(victim_name);
937 *search_done = 1;
938 goto again;
939 }
940 kfree(victim_name);
941 BUG_ON(ret);
942next:
943 cur_offset += victim_name_len + sizeof(*extref);
944 }
945 *search_done = 1;
912 } 946 }
913 btrfs_release_path(path); 947 btrfs_release_path(path);
914 948
915 /* look for a conflicting sequence number */ 949 /* look for a conflicting sequence number */
916 di = btrfs_lookup_dir_index_item(trans, root, path, btrfs_ino(dir), 950 di = btrfs_lookup_dir_index_item(trans, root, path, btrfs_ino(dir),
917 btrfs_inode_ref_index(eb, ref), 951 ref_index, name, namelen, 0);
918 name, namelen, 0);
919 if (di && !IS_ERR(di)) { 952 if (di && !IS_ERR(di)) {
920 ret = drop_one_dir_item(trans, root, path, dir, di); 953 ret = drop_one_dir_item(trans, root, path, dir, di);
921 BUG_ON(ret); 954 BUG_ON(ret);
@@ -931,25 +964,173 @@ again:
931 } 964 }
932 btrfs_release_path(path); 965 btrfs_release_path(path);
933 966
934insert: 967 return 0;
935 /* insert our name */ 968}
936 ret = btrfs_add_link(trans, dir, inode, name, namelen, 0,
937 btrfs_inode_ref_index(eb, ref));
938 BUG_ON(ret);
939 969
940 btrfs_update_inode(trans, root, inode); 970static int extref_get_fields(struct extent_buffer *eb, unsigned long ref_ptr,
971 u32 *namelen, char **name, u64 *index,
972 u64 *parent_objectid)
973{
974 struct btrfs_inode_extref *extref;
941 975
942out: 976 extref = (struct btrfs_inode_extref *)ref_ptr;
943 ref_ptr = (unsigned long)(ref + 1) + namelen; 977
944 kfree(name); 978 *namelen = btrfs_inode_extref_name_len(eb, extref);
945 if (ref_ptr < ref_end) 979 *name = kmalloc(*namelen, GFP_NOFS);
946 goto again; 980 if (*name == NULL)
981 return -ENOMEM;
982
983 read_extent_buffer(eb, *name, (unsigned long)&extref->name,
984 *namelen);
985
986 *index = btrfs_inode_extref_index(eb, extref);
987 if (parent_objectid)
988 *parent_objectid = btrfs_inode_extref_parent(eb, extref);
989
990 return 0;
991}
992
993static int ref_get_fields(struct extent_buffer *eb, unsigned long ref_ptr,
994 u32 *namelen, char **name, u64 *index)
995{
996 struct btrfs_inode_ref *ref;
997
998 ref = (struct btrfs_inode_ref *)ref_ptr;
999
1000 *namelen = btrfs_inode_ref_name_len(eb, ref);
1001 *name = kmalloc(*namelen, GFP_NOFS);
1002 if (*name == NULL)
1003 return -ENOMEM;
1004
1005 read_extent_buffer(eb, *name, (unsigned long)(ref + 1), *namelen);
1006
1007 *index = btrfs_inode_ref_index(eb, ref);
1008
1009 return 0;
1010}
1011
1012/*
1013 * replay one inode back reference item found in the log tree.
1014 * eb, slot and key refer to the buffer and key found in the log tree.
1015 * root is the destination we are replaying into, and path is for temp
1016 * use by this function. (it should be released on return).
1017 */
1018static noinline int add_inode_ref(struct btrfs_trans_handle *trans,
1019 struct btrfs_root *root,
1020 struct btrfs_root *log,
1021 struct btrfs_path *path,
1022 struct extent_buffer *eb, int slot,
1023 struct btrfs_key *key)
1024{
1025 struct inode *dir;
1026 struct inode *inode;
1027 unsigned long ref_ptr;
1028 unsigned long ref_end;
1029 char *name;
1030 int namelen;
1031 int ret;
1032 int search_done = 0;
1033 int log_ref_ver = 0;
1034 u64 parent_objectid;
1035 u64 inode_objectid;
1036 u64 ref_index = 0;
1037 int ref_struct_size;
1038
1039 ref_ptr = btrfs_item_ptr_offset(eb, slot);
1040 ref_end = ref_ptr + btrfs_item_size_nr(eb, slot);
1041
1042 if (key->type == BTRFS_INODE_EXTREF_KEY) {
1043 struct btrfs_inode_extref *r;
1044
1045 ref_struct_size = sizeof(struct btrfs_inode_extref);
1046 log_ref_ver = 1;
1047 r = (struct btrfs_inode_extref *)ref_ptr;
1048 parent_objectid = btrfs_inode_extref_parent(eb, r);
1049 } else {
1050 ref_struct_size = sizeof(struct btrfs_inode_ref);
1051 parent_objectid = key->offset;
1052 }
1053 inode_objectid = key->objectid;
1054
1055 /*
1056 * it is possible that we didn't log all the parent directories
1057 * for a given inode. If we don't find the dir, just don't
1058 * copy the back ref in. The link count fixup code will take
1059 * care of the rest
1060 */
1061 dir = read_one_inode(root, parent_objectid);
1062 if (!dir)
1063 return -ENOENT;
1064
1065 inode = read_one_inode(root, inode_objectid);
1066 if (!inode) {
1067 iput(dir);
1068 return -EIO;
1069 }
1070
1071 while (ref_ptr < ref_end) {
1072 if (log_ref_ver) {
1073 ret = extref_get_fields(eb, ref_ptr, &namelen, &name,
1074 &ref_index, &parent_objectid);
1075 /*
1076 * parent object can change from one array
1077 * item to another.
1078 */
1079 if (!dir)
1080 dir = read_one_inode(root, parent_objectid);
1081 if (!dir)
1082 return -ENOENT;
1083 } else {
1084 ret = ref_get_fields(eb, ref_ptr, &namelen, &name,
1085 &ref_index);
1086 }
1087 if (ret)
1088 return ret;
1089
1090 /* if we already have a perfect match, we're done */
1091 if (!inode_in_dir(root, path, btrfs_ino(dir), btrfs_ino(inode),
1092 ref_index, name, namelen)) {
1093 /*
1094 * look for a conflicting back reference in the
1095 * metadata. if we find one we have to unlink that name
1096 * of the file before we add our new link. Later on, we
1097 * overwrite any existing back reference, and we don't
1098 * want to create dangling pointers in the directory.
1099 */
1100
1101 if (!search_done) {
1102 ret = __add_inode_ref(trans, root, path, log,
1103 dir, inode, eb,
1104 inode_objectid,
1105 parent_objectid,
1106 ref_index, name, namelen,
1107 &search_done);
1108 if (ret == 1)
1109 goto out;
1110 BUG_ON(ret);
1111 }
1112
1113 /* insert our name */
1114 ret = btrfs_add_link(trans, dir, inode, name, namelen,
1115 0, ref_index);
1116 BUG_ON(ret);
1117
1118 btrfs_update_inode(trans, root, inode);
1119 }
1120
1121 ref_ptr = (unsigned long)(ref_ptr + ref_struct_size) + namelen;
1122 kfree(name);
1123 if (log_ref_ver) {
1124 iput(dir);
1125 dir = NULL;
1126 }
1127 }
947 1128
948 /* finally write the back reference in the inode */ 1129 /* finally write the back reference in the inode */
949 ret = overwrite_item(trans, root, path, eb, slot, key); 1130 ret = overwrite_item(trans, root, path, eb, slot, key);
950 BUG_ON(ret); 1131 BUG_ON(ret);
951 1132
952out_nowrite: 1133out:
953 btrfs_release_path(path); 1134 btrfs_release_path(path);
954 iput(dir); 1135 iput(dir);
955 iput(inode); 1136 iput(inode);
@@ -966,25 +1147,55 @@ static int insert_orphan_item(struct btrfs_trans_handle *trans,
966 return ret; 1147 return ret;
967} 1148}
968 1149
1150static int count_inode_extrefs(struct btrfs_root *root,
1151 struct inode *inode, struct btrfs_path *path)
1152{
1153 int ret = 0;
1154 int name_len;
1155 unsigned int nlink = 0;
1156 u32 item_size;
1157 u32 cur_offset = 0;
1158 u64 inode_objectid = btrfs_ino(inode);
1159 u64 offset = 0;
1160 unsigned long ptr;
1161 struct btrfs_inode_extref *extref;
1162 struct extent_buffer *leaf;
969 1163
970/* 1164 while (1) {
971 * There are a few corners where the link count of the file can't 1165 ret = btrfs_find_one_extref(root, inode_objectid, offset, path,
972 * be properly maintained during replay. So, instead of adding 1166 &extref, &offset);
973 * lots of complexity to the log code, we just scan the backrefs 1167 if (ret)
974 * for any file that has been through replay. 1168 break;
975 * 1169
976 * The scan will update the link count on the inode to reflect the 1170 leaf = path->nodes[0];
977 * number of back refs found. If it goes down to zero, the iput 1171 item_size = btrfs_item_size_nr(leaf, path->slots[0]);
978 * will free the inode. 1172 ptr = btrfs_item_ptr_offset(leaf, path->slots[0]);
979 */ 1173
980static noinline int fixup_inode_link_count(struct btrfs_trans_handle *trans, 1174 while (cur_offset < item_size) {
981 struct btrfs_root *root, 1175 extref = (struct btrfs_inode_extref *) (ptr + cur_offset);
982 struct inode *inode) 1176 name_len = btrfs_inode_extref_name_len(leaf, extref);
1177
1178 nlink++;
1179
1180 cur_offset += name_len + sizeof(*extref);
1181 }
1182
1183 offset++;
1184 btrfs_release_path(path);
1185 }
1186 btrfs_release_path(path);
1187
1188 if (ret < 0)
1189 return ret;
1190 return nlink;
1191}
1192
1193static int count_inode_refs(struct btrfs_root *root,
1194 struct inode *inode, struct btrfs_path *path)
983{ 1195{
984 struct btrfs_path *path;
985 int ret; 1196 int ret;
986 struct btrfs_key key; 1197 struct btrfs_key key;
987 u64 nlink = 0; 1198 unsigned int nlink = 0;
988 unsigned long ptr; 1199 unsigned long ptr;
989 unsigned long ptr_end; 1200 unsigned long ptr_end;
990 int name_len; 1201 int name_len;
@@ -994,10 +1205,6 @@ static noinline int fixup_inode_link_count(struct btrfs_trans_handle *trans,
994 key.type = BTRFS_INODE_REF_KEY; 1205 key.type = BTRFS_INODE_REF_KEY;
995 key.offset = (u64)-1; 1206 key.offset = (u64)-1;
996 1207
997 path = btrfs_alloc_path();
998 if (!path)
999 return -ENOMEM;
1000
1001 while (1) { 1208 while (1) {
1002 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 1209 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
1003 if (ret < 0) 1210 if (ret < 0)
@@ -1031,6 +1238,50 @@ static noinline int fixup_inode_link_count(struct btrfs_trans_handle *trans,
1031 btrfs_release_path(path); 1238 btrfs_release_path(path);
1032 } 1239 }
1033 btrfs_release_path(path); 1240 btrfs_release_path(path);
1241
1242 return nlink;
1243}
1244
1245/*
1246 * There are a few corners where the link count of the file can't
1247 * be properly maintained during replay. So, instead of adding
1248 * lots of complexity to the log code, we just scan the backrefs
1249 * for any file that has been through replay.
1250 *
1251 * The scan will update the link count on the inode to reflect the
1252 * number of back refs found. If it goes down to zero, the iput
1253 * will free the inode.
1254 */
1255static noinline int fixup_inode_link_count(struct btrfs_trans_handle *trans,
1256 struct btrfs_root *root,
1257 struct inode *inode)
1258{
1259 struct btrfs_path *path;
1260 int ret;
1261 u64 nlink = 0;
1262 u64 ino = btrfs_ino(inode);
1263
1264 path = btrfs_alloc_path();
1265 if (!path)
1266 return -ENOMEM;
1267
1268 ret = count_inode_refs(root, inode, path);
1269 if (ret < 0)
1270 goto out;
1271
1272 nlink = ret;
1273
1274 ret = count_inode_extrefs(root, inode, path);
1275 if (ret == -ENOENT)
1276 ret = 0;
1277
1278 if (ret < 0)
1279 goto out;
1280
1281 nlink += ret;
1282
1283 ret = 0;
1284
1034 if (nlink != inode->i_nlink) { 1285 if (nlink != inode->i_nlink) {
1035 set_nlink(inode, nlink); 1286 set_nlink(inode, nlink);
1036 btrfs_update_inode(trans, root, inode); 1287 btrfs_update_inode(trans, root, inode);
@@ -1046,9 +1297,10 @@ static noinline int fixup_inode_link_count(struct btrfs_trans_handle *trans,
1046 ret = insert_orphan_item(trans, root, ino); 1297 ret = insert_orphan_item(trans, root, ino);
1047 BUG_ON(ret); 1298 BUG_ON(ret);
1048 } 1299 }
1049 btrfs_free_path(path);
1050 1300
1051 return 0; 1301out:
1302 btrfs_free_path(path);
1303 return ret;
1052} 1304}
1053 1305
1054static noinline int fixup_inode_link_counts(struct btrfs_trans_handle *trans, 1306static noinline int fixup_inode_link_counts(struct btrfs_trans_handle *trans,
@@ -1695,6 +1947,10 @@ static int replay_one_buffer(struct btrfs_root *log, struct extent_buffer *eb,
1695 ret = add_inode_ref(wc->trans, root, log, path, 1947 ret = add_inode_ref(wc->trans, root, log, path,
1696 eb, i, &key); 1948 eb, i, &key);
1697 BUG_ON(ret && ret != -ENOENT); 1949 BUG_ON(ret && ret != -ENOENT);
1950 } else if (key.type == BTRFS_INODE_EXTREF_KEY) {
1951 ret = add_inode_ref(wc->trans, root, log, path,
1952 eb, i, &key);
1953 BUG_ON(ret && ret != -ENOENT);
1698 } else if (key.type == BTRFS_EXTENT_DATA_KEY) { 1954 } else if (key.type == BTRFS_EXTENT_DATA_KEY) {
1699 ret = replay_one_extent(wc->trans, root, path, 1955 ret = replay_one_extent(wc->trans, root, path,
1700 eb, i, &key); 1956 eb, i, &key);
@@ -2037,7 +2293,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
2037 if (atomic_read(&root->log_commit[(index1 + 1) % 2])) 2293 if (atomic_read(&root->log_commit[(index1 + 1) % 2]))
2038 wait_log_commit(trans, root, root->log_transid - 1); 2294 wait_log_commit(trans, root, root->log_transid - 1);
2039 while (1) { 2295 while (1) {
2040 unsigned long batch = root->log_batch; 2296 int batch = atomic_read(&root->log_batch);
2041 /* when we're on an ssd, just kick the log commit out */ 2297 /* when we're on an ssd, just kick the log commit out */
2042 if (!btrfs_test_opt(root, SSD) && root->log_multiple_pids) { 2298 if (!btrfs_test_opt(root, SSD) && root->log_multiple_pids) {
2043 mutex_unlock(&root->log_mutex); 2299 mutex_unlock(&root->log_mutex);
@@ -2045,7 +2301,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
2045 mutex_lock(&root->log_mutex); 2301 mutex_lock(&root->log_mutex);
2046 } 2302 }
2047 wait_for_writer(trans, root); 2303 wait_for_writer(trans, root);
2048 if (batch == root->log_batch) 2304 if (batch == atomic_read(&root->log_batch))
2049 break; 2305 break;
2050 } 2306 }
2051 2307
@@ -2074,7 +2330,6 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
2074 2330
2075 btrfs_set_root_node(&log->root_item, log->node); 2331 btrfs_set_root_node(&log->root_item, log->node);
2076 2332
2077 root->log_batch = 0;
2078 root->log_transid++; 2333 root->log_transid++;
2079 log->log_transid = root->log_transid; 2334 log->log_transid = root->log_transid;
2080 root->log_start_pid = 0; 2335 root->log_start_pid = 0;
@@ -2087,7 +2342,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
2087 mutex_unlock(&root->log_mutex); 2342 mutex_unlock(&root->log_mutex);
2088 2343
2089 mutex_lock(&log_root_tree->log_mutex); 2344 mutex_lock(&log_root_tree->log_mutex);
2090 log_root_tree->log_batch++; 2345 atomic_inc(&log_root_tree->log_batch);
2091 atomic_inc(&log_root_tree->log_writers); 2346 atomic_inc(&log_root_tree->log_writers);
2092 mutex_unlock(&log_root_tree->log_mutex); 2347 mutex_unlock(&log_root_tree->log_mutex);
2093 2348
@@ -2157,7 +2412,6 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
2157 btrfs_set_super_log_root_level(root->fs_info->super_for_commit, 2412 btrfs_set_super_log_root_level(root->fs_info->super_for_commit,
2158 btrfs_header_level(log_root_tree->node)); 2413 btrfs_header_level(log_root_tree->node));
2159 2414
2160 log_root_tree->log_batch = 0;
2161 log_root_tree->log_transid++; 2415 log_root_tree->log_transid++;
2162 smp_mb(); 2416 smp_mb();
2163 2417
@@ -2171,9 +2425,12 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
2171 * in and cause problems either. 2425 * in and cause problems either.
2172 */ 2426 */
2173 btrfs_scrub_pause_super(root); 2427 btrfs_scrub_pause_super(root);
2174 write_ctree_super(trans, root->fs_info->tree_root, 1); 2428 ret = write_ctree_super(trans, root->fs_info->tree_root, 1);
2175 btrfs_scrub_continue_super(root); 2429 btrfs_scrub_continue_super(root);
2176 ret = 0; 2430 if (ret) {
2431 btrfs_abort_transaction(trans, root, ret);
2432 goto out_wake_log_root;
2433 }
2177 2434
2178 mutex_lock(&root->log_mutex); 2435 mutex_lock(&root->log_mutex);
2179 if (root->last_log_commit < log_transid) 2436 if (root->last_log_commit < log_transid)
@@ -2209,7 +2466,8 @@ static void free_log_tree(struct btrfs_trans_handle *trans,
2209 2466
2210 while (1) { 2467 while (1) {
2211 ret = find_first_extent_bit(&log->dirty_log_pages, 2468 ret = find_first_extent_bit(&log->dirty_log_pages,
2212 0, &start, &end, EXTENT_DIRTY | EXTENT_NEW); 2469 0, &start, &end, EXTENT_DIRTY | EXTENT_NEW,
2470 NULL);
2213 if (ret) 2471 if (ret)
2214 break; 2472 break;
2215 2473
@@ -2646,6 +2904,7 @@ static int drop_objectid_items(struct btrfs_trans_handle *trans,
2646 int ret; 2904 int ret;
2647 struct btrfs_key key; 2905 struct btrfs_key key;
2648 struct btrfs_key found_key; 2906 struct btrfs_key found_key;
2907 int start_slot;
2649 2908
2650 key.objectid = objectid; 2909 key.objectid = objectid;
2651 key.type = max_key_type; 2910 key.type = max_key_type;
@@ -2667,8 +2926,18 @@ static int drop_objectid_items(struct btrfs_trans_handle *trans,
2667 if (found_key.objectid != objectid) 2926 if (found_key.objectid != objectid)
2668 break; 2927 break;
2669 2928
2670 ret = btrfs_del_item(trans, log, path); 2929 found_key.offset = 0;
2671 if (ret) 2930 found_key.type = 0;
2931 ret = btrfs_bin_search(path->nodes[0], &found_key, 0,
2932 &start_slot);
2933
2934 ret = btrfs_del_items(trans, log, path, start_slot,
2935 path->slots[0] - start_slot + 1);
2936 /*
2937 * If start slot isn't 0 then we don't need to re-search, we've
2938 * found the last guy with the objectid in this tree.
2939 */
2940 if (ret || start_slot != 0)
2672 break; 2941 break;
2673 btrfs_release_path(path); 2942 btrfs_release_path(path);
2674 } 2943 }
@@ -2678,14 +2947,64 @@ static int drop_objectid_items(struct btrfs_trans_handle *trans,
2678 return ret; 2947 return ret;
2679} 2948}
2680 2949
2950static void fill_inode_item(struct btrfs_trans_handle *trans,
2951 struct extent_buffer *leaf,
2952 struct btrfs_inode_item *item,
2953 struct inode *inode, int log_inode_only)
2954{
2955 btrfs_set_inode_uid(leaf, item, inode->i_uid);
2956 btrfs_set_inode_gid(leaf, item, inode->i_gid);
2957 btrfs_set_inode_mode(leaf, item, inode->i_mode);
2958 btrfs_set_inode_nlink(leaf, item, inode->i_nlink);
2959
2960 btrfs_set_timespec_sec(leaf, btrfs_inode_atime(item),
2961 inode->i_atime.tv_sec);
2962 btrfs_set_timespec_nsec(leaf, btrfs_inode_atime(item),
2963 inode->i_atime.tv_nsec);
2964
2965 btrfs_set_timespec_sec(leaf, btrfs_inode_mtime(item),
2966 inode->i_mtime.tv_sec);
2967 btrfs_set_timespec_nsec(leaf, btrfs_inode_mtime(item),
2968 inode->i_mtime.tv_nsec);
2969
2970 btrfs_set_timespec_sec(leaf, btrfs_inode_ctime(item),
2971 inode->i_ctime.tv_sec);
2972 btrfs_set_timespec_nsec(leaf, btrfs_inode_ctime(item),
2973 inode->i_ctime.tv_nsec);
2974
2975 btrfs_set_inode_nbytes(leaf, item, inode_get_bytes(inode));
2976
2977 btrfs_set_inode_sequence(leaf, item, inode->i_version);
2978 btrfs_set_inode_transid(leaf, item, trans->transid);
2979 btrfs_set_inode_rdev(leaf, item, inode->i_rdev);
2980 btrfs_set_inode_flags(leaf, item, BTRFS_I(inode)->flags);
2981 btrfs_set_inode_block_group(leaf, item, 0);
2982
2983 if (log_inode_only) {
2984 /* set the generation to zero so the recover code
2985 * can tell the difference between an logging
2986 * just to say 'this inode exists' and a logging
2987 * to say 'update this inode with these values'
2988 */
2989 btrfs_set_inode_generation(leaf, item, 0);
2990 btrfs_set_inode_size(leaf, item, 0);
2991 } else {
2992 btrfs_set_inode_generation(leaf, item,
2993 BTRFS_I(inode)->generation);
2994 btrfs_set_inode_size(leaf, item, inode->i_size);
2995 }
2996
2997}
2998
2681static noinline int copy_items(struct btrfs_trans_handle *trans, 2999static noinline int copy_items(struct btrfs_trans_handle *trans,
2682 struct btrfs_root *log, 3000 struct inode *inode,
2683 struct btrfs_path *dst_path, 3001 struct btrfs_path *dst_path,
2684 struct extent_buffer *src, 3002 struct extent_buffer *src,
2685 int start_slot, int nr, int inode_only) 3003 int start_slot, int nr, int inode_only)
2686{ 3004{
2687 unsigned long src_offset; 3005 unsigned long src_offset;
2688 unsigned long dst_offset; 3006 unsigned long dst_offset;
3007 struct btrfs_root *log = BTRFS_I(inode)->root->log_root;
2689 struct btrfs_file_extent_item *extent; 3008 struct btrfs_file_extent_item *extent;
2690 struct btrfs_inode_item *inode_item; 3009 struct btrfs_inode_item *inode_item;
2691 int ret; 3010 int ret;
@@ -2694,6 +3013,7 @@ static noinline int copy_items(struct btrfs_trans_handle *trans,
2694 char *ins_data; 3013 char *ins_data;
2695 int i; 3014 int i;
2696 struct list_head ordered_sums; 3015 struct list_head ordered_sums;
3016 int skip_csum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM;
2697 3017
2698 INIT_LIST_HEAD(&ordered_sums); 3018 INIT_LIST_HEAD(&ordered_sums);
2699 3019
@@ -2722,29 +3042,23 @@ static noinline int copy_items(struct btrfs_trans_handle *trans,
2722 3042
2723 src_offset = btrfs_item_ptr_offset(src, start_slot + i); 3043 src_offset = btrfs_item_ptr_offset(src, start_slot + i);
2724 3044
2725 copy_extent_buffer(dst_path->nodes[0], src, dst_offset, 3045 if (ins_keys[i].type == BTRFS_INODE_ITEM_KEY) {
2726 src_offset, ins_sizes[i]);
2727
2728 if (inode_only == LOG_INODE_EXISTS &&
2729 ins_keys[i].type == BTRFS_INODE_ITEM_KEY) {
2730 inode_item = btrfs_item_ptr(dst_path->nodes[0], 3046 inode_item = btrfs_item_ptr(dst_path->nodes[0],
2731 dst_path->slots[0], 3047 dst_path->slots[0],
2732 struct btrfs_inode_item); 3048 struct btrfs_inode_item);
2733 btrfs_set_inode_size(dst_path->nodes[0], inode_item, 0); 3049 fill_inode_item(trans, dst_path->nodes[0], inode_item,
2734 3050 inode, inode_only == LOG_INODE_EXISTS);
2735 /* set the generation to zero so the recover code 3051 } else {
2736 * can tell the difference between an logging 3052 copy_extent_buffer(dst_path->nodes[0], src, dst_offset,
2737 * just to say 'this inode exists' and a logging 3053 src_offset, ins_sizes[i]);
2738 * to say 'update this inode with these values'
2739 */
2740 btrfs_set_inode_generation(dst_path->nodes[0],
2741 inode_item, 0);
2742 } 3054 }
3055
2743 /* take a reference on file data extents so that truncates 3056 /* take a reference on file data extents so that truncates
2744 * or deletes of this inode don't have to relog the inode 3057 * or deletes of this inode don't have to relog the inode
2745 * again 3058 * again
2746 */ 3059 */
2747 if (btrfs_key_type(ins_keys + i) == BTRFS_EXTENT_DATA_KEY) { 3060 if (btrfs_key_type(ins_keys + i) == BTRFS_EXTENT_DATA_KEY &&
3061 !skip_csum) {
2748 int found_type; 3062 int found_type;
2749 extent = btrfs_item_ptr(src, start_slot + i, 3063 extent = btrfs_item_ptr(src, start_slot + i,
2750 struct btrfs_file_extent_item); 3064 struct btrfs_file_extent_item);
@@ -2753,8 +3067,7 @@ static noinline int copy_items(struct btrfs_trans_handle *trans,
2753 continue; 3067 continue;
2754 3068
2755 found_type = btrfs_file_extent_type(src, extent); 3069 found_type = btrfs_file_extent_type(src, extent);
2756 if (found_type == BTRFS_FILE_EXTENT_REG || 3070 if (found_type == BTRFS_FILE_EXTENT_REG) {
2757 found_type == BTRFS_FILE_EXTENT_PREALLOC) {
2758 u64 ds, dl, cs, cl; 3071 u64 ds, dl, cs, cl;
2759 ds = btrfs_file_extent_disk_bytenr(src, 3072 ds = btrfs_file_extent_disk_bytenr(src,
2760 extent); 3073 extent);
@@ -2803,6 +3116,239 @@ static noinline int copy_items(struct btrfs_trans_handle *trans,
2803 return ret; 3116 return ret;
2804} 3117}
2805 3118
3119static int extent_cmp(void *priv, struct list_head *a, struct list_head *b)
3120{
3121 struct extent_map *em1, *em2;
3122
3123 em1 = list_entry(a, struct extent_map, list);
3124 em2 = list_entry(b, struct extent_map, list);
3125
3126 if (em1->start < em2->start)
3127 return -1;
3128 else if (em1->start > em2->start)
3129 return 1;
3130 return 0;
3131}
3132
3133struct log_args {
3134 struct extent_buffer *src;
3135 u64 next_offset;
3136 int start_slot;
3137 int nr;
3138};
3139
3140static int log_one_extent(struct btrfs_trans_handle *trans,
3141 struct inode *inode, struct btrfs_root *root,
3142 struct extent_map *em, struct btrfs_path *path,
3143 struct btrfs_path *dst_path, struct log_args *args)
3144{
3145 struct btrfs_root *log = root->log_root;
3146 struct btrfs_file_extent_item *fi;
3147 struct btrfs_key key;
3148 u64 start = em->mod_start;
3149 u64 search_start = start;
3150 u64 len = em->mod_len;
3151 u64 num_bytes;
3152 int nritems;
3153 int ret;
3154
3155 if (BTRFS_I(inode)->logged_trans == trans->transid) {
3156 ret = __btrfs_drop_extents(trans, log, inode, dst_path, start,
3157 start + len, NULL, 0);
3158 if (ret)
3159 return ret;
3160 }
3161
3162 while (len) {
3163 if (args->nr)
3164 goto next_slot;
3165again:
3166 key.objectid = btrfs_ino(inode);
3167 key.type = BTRFS_EXTENT_DATA_KEY;
3168 key.offset = search_start;
3169
3170 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
3171 if (ret < 0)
3172 return ret;
3173
3174 if (ret) {
3175 /*
3176 * A rare case were we can have an em for a section of a
3177 * larger extent so we need to make sure that this em
3178 * falls within the extent we've found. If not we just
3179 * bail and go back to ye-olde way of doing things but
3180 * it happens often enough in testing that we need to do
3181 * this dance to make sure.
3182 */
3183 do {
3184 if (path->slots[0] == 0) {
3185 btrfs_release_path(path);
3186 if (search_start == 0)
3187 return -ENOENT;
3188 search_start--;
3189 goto again;
3190 }
3191
3192 path->slots[0]--;
3193 btrfs_item_key_to_cpu(path->nodes[0], &key,
3194 path->slots[0]);
3195 if (key.objectid != btrfs_ino(inode) ||
3196 key.type != BTRFS_EXTENT_DATA_KEY) {
3197 btrfs_release_path(path);
3198 return -ENOENT;
3199 }
3200 } while (key.offset > start);
3201
3202 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
3203 struct btrfs_file_extent_item);
3204 num_bytes = btrfs_file_extent_num_bytes(path->nodes[0],
3205 fi);
3206 if (key.offset + num_bytes <= start) {
3207 btrfs_release_path(path);
3208 return -ENOENT;
3209 }
3210 }
3211 args->src = path->nodes[0];
3212next_slot:
3213 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
3214 fi = btrfs_item_ptr(args->src, path->slots[0],
3215 struct btrfs_file_extent_item);
3216 if (args->nr &&
3217 args->start_slot + args->nr == path->slots[0]) {
3218 args->nr++;
3219 } else if (args->nr) {
3220 ret = copy_items(trans, inode, dst_path, args->src,
3221 args->start_slot, args->nr,
3222 LOG_INODE_ALL);
3223 if (ret)
3224 return ret;
3225 args->nr = 1;
3226 args->start_slot = path->slots[0];
3227 } else if (!args->nr) {
3228 args->nr = 1;
3229 args->start_slot = path->slots[0];
3230 }
3231 nritems = btrfs_header_nritems(path->nodes[0]);
3232 path->slots[0]++;
3233 num_bytes = btrfs_file_extent_num_bytes(args->src, fi);
3234 if (len < num_bytes) {
3235 /* I _think_ this is ok, envision we write to a
3236 * preallocated space that is adjacent to a previously
3237 * written preallocated space that gets merged when we
3238 * mark this preallocated space written. If we do not
3239 * have the adjacent extent in cache then when we copy
3240 * this extent it could end up being larger than our EM
3241 * thinks it is, which is a-ok, so just set len to 0.
3242 */
3243 len = 0;
3244 } else {
3245 len -= num_bytes;
3246 }
3247 start = key.offset + num_bytes;
3248 args->next_offset = start;
3249 search_start = start;
3250
3251 if (path->slots[0] < nritems) {
3252 if (len)
3253 goto next_slot;
3254 break;
3255 }
3256
3257 if (args->nr) {
3258 ret = copy_items(trans, inode, dst_path, args->src,
3259 args->start_slot, args->nr,
3260 LOG_INODE_ALL);
3261 if (ret)
3262 return ret;
3263 args->nr = 0;
3264 btrfs_release_path(path);
3265 }
3266 }
3267
3268 return 0;
3269}
3270
3271static int btrfs_log_changed_extents(struct btrfs_trans_handle *trans,
3272 struct btrfs_root *root,
3273 struct inode *inode,
3274 struct btrfs_path *path,
3275 struct btrfs_path *dst_path)
3276{
3277 struct log_args args;
3278 struct extent_map *em, *n;
3279 struct list_head extents;
3280 struct extent_map_tree *tree = &BTRFS_I(inode)->extent_tree;
3281 u64 test_gen;
3282 int ret = 0;
3283
3284 INIT_LIST_HEAD(&extents);
3285
3286 memset(&args, 0, sizeof(args));
3287
3288 write_lock(&tree->lock);
3289 test_gen = root->fs_info->last_trans_committed;
3290
3291 list_for_each_entry_safe(em, n, &tree->modified_extents, list) {
3292 list_del_init(&em->list);
3293 if (em->generation <= test_gen)
3294 continue;
3295 /* Need a ref to keep it from getting evicted from cache */
3296 atomic_inc(&em->refs);
3297 set_bit(EXTENT_FLAG_LOGGING, &em->flags);
3298 list_add_tail(&em->list, &extents);
3299 }
3300
3301 list_sort(NULL, &extents, extent_cmp);
3302
3303 while (!list_empty(&extents)) {
3304 em = list_entry(extents.next, struct extent_map, list);
3305
3306 list_del_init(&em->list);
3307 clear_bit(EXTENT_FLAG_LOGGING, &em->flags);
3308
3309 /*
3310 * If we had an error we just need to delete everybody from our
3311 * private list.
3312 */
3313 if (ret) {
3314 free_extent_map(em);
3315 continue;
3316 }
3317
3318 write_unlock(&tree->lock);
3319
3320 /*
3321 * If the previous EM and the last extent we left off on aren't
3322 * sequential then we need to copy the items we have and redo
3323 * our search
3324 */
3325 if (args.nr && em->mod_start != args.next_offset) {
3326 ret = copy_items(trans, inode, dst_path, args.src,
3327 args.start_slot, args.nr,
3328 LOG_INODE_ALL);
3329 if (ret) {
3330 free_extent_map(em);
3331 write_lock(&tree->lock);
3332 continue;
3333 }
3334 btrfs_release_path(path);
3335 args.nr = 0;
3336 }
3337
3338 ret = log_one_extent(trans, inode, root, em, path, dst_path, &args);
3339 free_extent_map(em);
3340 write_lock(&tree->lock);
3341 }
3342 WARN_ON(!list_empty(&extents));
3343 write_unlock(&tree->lock);
3344
3345 if (!ret && args.nr)
3346 ret = copy_items(trans, inode, dst_path, args.src,
3347 args.start_slot, args.nr, LOG_INODE_ALL);
3348 btrfs_release_path(path);
3349 return ret;
3350}
3351
2806/* log a single inode in the tree log. 3352/* log a single inode in the tree log.
2807 * At least one parent directory for this inode must exist in the tree 3353 * At least one parent directory for this inode must exist in the tree
2808 * or be logged already. 3354 * or be logged already.
@@ -2832,6 +3378,7 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
2832 int nritems; 3378 int nritems;
2833 int ins_start_slot = 0; 3379 int ins_start_slot = 0;
2834 int ins_nr; 3380 int ins_nr;
3381 bool fast_search = false;
2835 u64 ino = btrfs_ino(inode); 3382 u64 ino = btrfs_ino(inode);
2836 3383
2837 log = root->log_root; 3384 log = root->log_root;
@@ -2851,21 +3398,23 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
2851 3398
2852 max_key.objectid = ino; 3399 max_key.objectid = ino;
2853 3400
2854 /* today the code can only do partial logging of directories */
2855 if (!S_ISDIR(inode->i_mode))
2856 inode_only = LOG_INODE_ALL;
2857 3401
3402 /* today the code can only do partial logging of directories */
2858 if (inode_only == LOG_INODE_EXISTS || S_ISDIR(inode->i_mode)) 3403 if (inode_only == LOG_INODE_EXISTS || S_ISDIR(inode->i_mode))
2859 max_key.type = BTRFS_XATTR_ITEM_KEY; 3404 max_key.type = BTRFS_XATTR_ITEM_KEY;
2860 else 3405 else
2861 max_key.type = (u8)-1; 3406 max_key.type = (u8)-1;
2862 max_key.offset = (u64)-1; 3407 max_key.offset = (u64)-1;
2863 3408
2864 ret = btrfs_commit_inode_delayed_items(trans, inode); 3409 /* Only run delayed items if we are a dir or a new file */
2865 if (ret) { 3410 if (S_ISDIR(inode->i_mode) ||
2866 btrfs_free_path(path); 3411 BTRFS_I(inode)->generation > root->fs_info->last_trans_committed) {
2867 btrfs_free_path(dst_path); 3412 ret = btrfs_commit_inode_delayed_items(trans, inode);
2868 return ret; 3413 if (ret) {
3414 btrfs_free_path(path);
3415 btrfs_free_path(dst_path);
3416 return ret;
3417 }
2869 } 3418 }
2870 3419
2871 mutex_lock(&BTRFS_I(inode)->log_mutex); 3420 mutex_lock(&BTRFS_I(inode)->log_mutex);
@@ -2881,7 +3430,16 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
2881 max_key_type = BTRFS_XATTR_ITEM_KEY; 3430 max_key_type = BTRFS_XATTR_ITEM_KEY;
2882 ret = drop_objectid_items(trans, log, path, ino, max_key_type); 3431 ret = drop_objectid_items(trans, log, path, ino, max_key_type);
2883 } else { 3432 } else {
2884 ret = btrfs_truncate_inode_items(trans, log, inode, 0, 0); 3433 if (test_and_clear_bit(BTRFS_INODE_NEEDS_FULL_SYNC,
3434 &BTRFS_I(inode)->runtime_flags)) {
3435 ret = btrfs_truncate_inode_items(trans, log,
3436 inode, 0, 0);
3437 } else {
3438 fast_search = true;
3439 max_key.type = BTRFS_XATTR_ITEM_KEY;
3440 ret = drop_objectid_items(trans, log, path, ino,
3441 BTRFS_XATTR_ITEM_KEY);
3442 }
2885 } 3443 }
2886 if (ret) { 3444 if (ret) {
2887 err = ret; 3445 err = ret;
@@ -2912,7 +3470,7 @@ again:
2912 goto next_slot; 3470 goto next_slot;
2913 } 3471 }
2914 3472
2915 ret = copy_items(trans, log, dst_path, src, ins_start_slot, 3473 ret = copy_items(trans, inode, dst_path, src, ins_start_slot,
2916 ins_nr, inode_only); 3474 ins_nr, inode_only);
2917 if (ret) { 3475 if (ret) {
2918 err = ret; 3476 err = ret;
@@ -2930,7 +3488,7 @@ next_slot:
2930 goto again; 3488 goto again;
2931 } 3489 }
2932 if (ins_nr) { 3490 if (ins_nr) {
2933 ret = copy_items(trans, log, dst_path, src, 3491 ret = copy_items(trans, inode, dst_path, src,
2934 ins_start_slot, 3492 ins_start_slot,
2935 ins_nr, inode_only); 3493 ins_nr, inode_only);
2936 if (ret) { 3494 if (ret) {
@@ -2951,8 +3509,7 @@ next_slot:
2951 break; 3509 break;
2952 } 3510 }
2953 if (ins_nr) { 3511 if (ins_nr) {
2954 ret = copy_items(trans, log, dst_path, src, 3512 ret = copy_items(trans, inode, dst_path, src, ins_start_slot,
2955 ins_start_slot,
2956 ins_nr, inode_only); 3513 ins_nr, inode_only);
2957 if (ret) { 3514 if (ret) {
2958 err = ret; 3515 err = ret;
@@ -2960,7 +3517,24 @@ next_slot:
2960 } 3517 }
2961 ins_nr = 0; 3518 ins_nr = 0;
2962 } 3519 }
2963 WARN_ON(ins_nr); 3520
3521 if (fast_search) {
3522 btrfs_release_path(path);
3523 btrfs_release_path(dst_path);
3524 ret = btrfs_log_changed_extents(trans, root, inode, path,
3525 dst_path);
3526 if (ret) {
3527 err = ret;
3528 goto out_unlock;
3529 }
3530 } else {
3531 struct extent_map_tree *tree = &BTRFS_I(inode)->extent_tree;
3532 struct extent_map *em, *n;
3533
3534 list_for_each_entry_safe(em, n, &tree->modified_extents, list)
3535 list_del_init(&em->list);
3536 }
3537
2964 if (inode_only == LOG_INODE_ALL && S_ISDIR(inode->i_mode)) { 3538 if (inode_only == LOG_INODE_ALL && S_ISDIR(inode->i_mode)) {
2965 btrfs_release_path(path); 3539 btrfs_release_path(path);
2966 btrfs_release_path(dst_path); 3540 btrfs_release_path(dst_path);
@@ -2971,6 +3545,7 @@ next_slot:
2971 } 3545 }
2972 } 3546 }
2973 BTRFS_I(inode)->logged_trans = trans->transid; 3547 BTRFS_I(inode)->logged_trans = trans->transid;
3548 BTRFS_I(inode)->last_log_commit = BTRFS_I(inode)->last_sub_trans;
2974out_unlock: 3549out_unlock:
2975 mutex_unlock(&BTRFS_I(inode)->log_mutex); 3550 mutex_unlock(&BTRFS_I(inode)->log_mutex);
2976 3551
@@ -3138,7 +3713,7 @@ int btrfs_log_inode_parent(struct btrfs_trans_handle *trans,
3138end_trans: 3713end_trans:
3139 dput(old_parent); 3714 dput(old_parent);
3140 if (ret < 0) { 3715 if (ret < 0) {
3141 BUG_ON(ret != -ENOSPC); 3716 WARN_ON(ret != -ENOSPC);
3142 root->fs_info->last_trans_log_full_commit = trans->transid; 3717 root->fs_info->last_trans_log_full_commit = trans->transid;
3143 ret = 1; 3718 ret = 1;
3144 } 3719 }