aboutsummaryrefslogtreecommitdiffstats
path: root/fs/btrfs/tree-log.c
diff options
context:
space:
mode:
authorMark Fasheh <mfasheh@suse.de>2012-08-08 14:32:27 -0400
committerChris Mason <chris.mason@fusionio.com>2012-10-09 09:14:45 -0400
commitf186373fef005cee948a4a39e6a14c2e5f517298 (patch)
tree5683c66a7112e56147149f379658517ab18e7689 /fs/btrfs/tree-log.c
parent5a1d7843ca4b3a9009bea87f85ad33854b910aea (diff)
btrfs: extended inode refs
This patch adds basic support for extended inode refs. This includes support for link and unlink of the refs, which basically gets us support for rename as well. Inode creation does not need changing - extended refs are only added after the ref array is full. Signed-off-by: Mark Fasheh <mfasheh@suse.de>
Diffstat (limited to 'fs/btrfs/tree-log.c')
-rw-r--r--fs/btrfs/tree-log.c345
1 files changed, 293 insertions, 52 deletions
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index 15dae589e59f..1d7b34844323 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -24,8 +24,10 @@
24#include "disk-io.h" 24#include "disk-io.h"
25#include "locking.h" 25#include "locking.h"
26#include "print-tree.h" 26#include "print-tree.h"
27#include "backref.h"
27#include "compat.h" 28#include "compat.h"
28#include "tree-log.h" 29#include "tree-log.h"
30#include "hash.h"
29 31
30/* magic values for the inode_only field in btrfs_log_inode: 32/* magic values for the inode_only field in btrfs_log_inode:
31 * 33 *
@@ -743,6 +745,7 @@ out:
743 */ 745 */
744static noinline int backref_in_log(struct btrfs_root *log, 746static noinline int backref_in_log(struct btrfs_root *log,
745 struct btrfs_key *key, 747 struct btrfs_key *key,
748 u64 ref_objectid,
746 char *name, int namelen) 749 char *name, int namelen)
747{ 750{
748 struct btrfs_path *path; 751 struct btrfs_path *path;
@@ -763,8 +766,17 @@ static noinline int backref_in_log(struct btrfs_root *log,
763 if (ret != 0) 766 if (ret != 0)
764 goto out; 767 goto out;
765 768
766 item_size = btrfs_item_size_nr(path->nodes[0], path->slots[0]);
767 ptr = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]); 769 ptr = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]);
770
771 if (key->type == BTRFS_INODE_EXTREF_KEY) {
772 if (btrfs_find_name_in_ext_backref(path, ref_objectid,
773 name, namelen, NULL))
774 match = 1;
775
776 goto out;
777 }
778
779 item_size = btrfs_item_size_nr(path->nodes[0], path->slots[0]);
768 ptr_end = ptr + item_size; 780 ptr_end = ptr + item_size;
769 while (ptr < ptr_end) { 781 while (ptr < ptr_end) {
770 ref = (struct btrfs_inode_ref *)ptr; 782 ref = (struct btrfs_inode_ref *)ptr;
@@ -790,27 +802,36 @@ static inline int __add_inode_ref(struct btrfs_trans_handle *trans,
790 struct btrfs_path *path, 802 struct btrfs_path *path,
791 struct btrfs_root *log_root, 803 struct btrfs_root *log_root,
792 struct inode *dir, struct inode *inode, 804 struct inode *dir, struct inode *inode,
793 struct btrfs_key *key,
794 struct extent_buffer *eb, 805 struct extent_buffer *eb,
795 struct btrfs_inode_ref *ref, 806 u64 inode_objectid, u64 parent_objectid,
796 char *name, int namelen, int *search_done) 807 u64 ref_index, char *name, int namelen,
808 int *search_done)
797{ 809{
798 int ret; 810 int ret;
811 char *victim_name;
812 int victim_name_len;
813 struct extent_buffer *leaf;
799 struct btrfs_dir_item *di; 814 struct btrfs_dir_item *di;
815 struct btrfs_key search_key;
816 struct btrfs_inode_extref *extref;
800 817
801 ret = btrfs_search_slot(NULL, root, key, path, 0, 0); 818again:
819 /* Search old style refs */
820 search_key.objectid = inode_objectid;
821 search_key.type = BTRFS_INODE_REF_KEY;
822 search_key.offset = parent_objectid;
823 ret = btrfs_search_slot(NULL, root, &search_key, path, 0, 0);
802 if (ret == 0) { 824 if (ret == 0) {
803 char *victim_name;
804 int victim_name_len;
805 struct btrfs_inode_ref *victim_ref; 825 struct btrfs_inode_ref *victim_ref;
806 unsigned long ptr; 826 unsigned long ptr;
807 unsigned long ptr_end; 827 unsigned long ptr_end;
808 struct extent_buffer *leaf = path->nodes[0]; 828
829 leaf = path->nodes[0];
809 830
810 /* are we trying to overwrite a back ref for the root directory 831 /* are we trying to overwrite a back ref for the root directory
811 * if so, just jump out, we're done 832 * if so, just jump out, we're done
812 */ 833 */
813 if (key->objectid == key->offset) 834 if (search_key.objectid == search_key.offset)
814 return 1; 835 return 1;
815 836
816 /* check all the names in this back reference to see 837 /* check all the names in this back reference to see
@@ -830,7 +851,9 @@ static inline int __add_inode_ref(struct btrfs_trans_handle *trans,
830 (unsigned long)(victim_ref + 1), 851 (unsigned long)(victim_ref + 1),
831 victim_name_len); 852 victim_name_len);
832 853
833 if (!backref_in_log(log_root, key, victim_name, 854 if (!backref_in_log(log_root, &search_key,
855 parent_objectid,
856 victim_name,
834 victim_name_len)) { 857 victim_name_len)) {
835 btrfs_inc_nlink(inode); 858 btrfs_inc_nlink(inode);
836 btrfs_release_path(path); 859 btrfs_release_path(path);
@@ -838,9 +861,14 @@ static inline int __add_inode_ref(struct btrfs_trans_handle *trans,
838 ret = btrfs_unlink_inode(trans, root, dir, 861 ret = btrfs_unlink_inode(trans, root, dir,
839 inode, victim_name, 862 inode, victim_name,
840 victim_name_len); 863 victim_name_len);
864 BUG_ON(ret);
841 btrfs_run_delayed_items(trans, root); 865 btrfs_run_delayed_items(trans, root);
866 kfree(victim_name);
867 *search_done = 1;
868 goto again;
842 } 869 }
843 kfree(victim_name); 870 kfree(victim_name);
871
844 ptr = (unsigned long)(victim_ref + 1) + victim_name_len; 872 ptr = (unsigned long)(victim_ref + 1) + victim_name_len;
845 } 873 }
846 BUG_ON(ret); 874 BUG_ON(ret);
@@ -853,10 +881,74 @@ static inline int __add_inode_ref(struct btrfs_trans_handle *trans,
853 } 881 }
854 btrfs_release_path(path); 882 btrfs_release_path(path);
855 883
884 /* Same search but for extended refs */
885 extref = btrfs_lookup_inode_extref(NULL, root, path, name, namelen,
886 inode_objectid, parent_objectid, 0,
887 0);
888 if (!IS_ERR_OR_NULL(extref)) {
889 u32 item_size;
890 u32 cur_offset = 0;
891 unsigned long base;
892 struct inode *victim_parent;
893
894 leaf = path->nodes[0];
895
896 item_size = btrfs_item_size_nr(leaf, path->slots[0]);
897 base = btrfs_item_ptr_offset(leaf, path->slots[0]);
898
899 while (cur_offset < item_size) {
900 extref = (struct btrfs_inode_extref *)base + cur_offset;
901
902 victim_name_len = btrfs_inode_extref_name_len(leaf, extref);
903
904 if (btrfs_inode_extref_parent(leaf, extref) != parent_objectid)
905 goto next;
906
907 victim_name = kmalloc(victim_name_len, GFP_NOFS);
908 read_extent_buffer(leaf, victim_name, (unsigned long)&extref->name,
909 victim_name_len);
910
911 search_key.objectid = inode_objectid;
912 search_key.type = BTRFS_INODE_EXTREF_KEY;
913 search_key.offset = btrfs_extref_hash(parent_objectid,
914 victim_name,
915 victim_name_len);
916 ret = 0;
917 if (!backref_in_log(log_root, &search_key,
918 parent_objectid, victim_name,
919 victim_name_len)) {
920 ret = -ENOENT;
921 victim_parent = read_one_inode(root,
922 parent_objectid);
923 if (victim_parent) {
924 btrfs_inc_nlink(inode);
925 btrfs_release_path(path);
926
927 ret = btrfs_unlink_inode(trans, root,
928 victim_parent,
929 inode,
930 victim_name,
931 victim_name_len);
932 btrfs_run_delayed_items(trans, root);
933 }
934 BUG_ON(ret);
935 iput(victim_parent);
936 kfree(victim_name);
937 *search_done = 1;
938 goto again;
939 }
940 kfree(victim_name);
941 BUG_ON(ret);
942next:
943 cur_offset += victim_name_len + sizeof(*extref);
944 }
945 *search_done = 1;
946 }
947 btrfs_release_path(path);
948
856 /* look for a conflicting sequence number */ 949 /* look for a conflicting sequence number */
857 di = btrfs_lookup_dir_index_item(trans, root, path, btrfs_ino(dir), 950 di = btrfs_lookup_dir_index_item(trans, root, path, btrfs_ino(dir),
858 btrfs_inode_ref_index(eb, ref), 951 ref_index, name, namelen, 0);
859 name, namelen, 0);
860 if (di && !IS_ERR(di)) { 952 if (di && !IS_ERR(di)) {
861 ret = drop_one_dir_item(trans, root, path, dir, di); 953 ret = drop_one_dir_item(trans, root, path, dir, di);
862 BUG_ON(ret); 954 BUG_ON(ret);
@@ -875,6 +967,48 @@ static inline int __add_inode_ref(struct btrfs_trans_handle *trans,
875 return 0; 967 return 0;
876} 968}
877 969
970static int extref_get_fields(struct extent_buffer *eb, unsigned long ref_ptr,
971 u32 *namelen, char **name, u64 *index,
972 u64 *parent_objectid)
973{
974 struct btrfs_inode_extref *extref;
975
976 extref = (struct btrfs_inode_extref *)ref_ptr;
977
978 *namelen = btrfs_inode_extref_name_len(eb, extref);
979 *name = kmalloc(*namelen, GFP_NOFS);
980 if (*name == NULL)
981 return -ENOMEM;
982
983 read_extent_buffer(eb, *name, (unsigned long)&extref->name,
984 *namelen);
985
986 *index = btrfs_inode_extref_index(eb, extref);
987 if (parent_objectid)
988 *parent_objectid = btrfs_inode_extref_parent(eb, extref);
989
990 return 0;
991}
992
993static int ref_get_fields(struct extent_buffer *eb, unsigned long ref_ptr,
994 u32 *namelen, char **name, u64 *index)
995{
996 struct btrfs_inode_ref *ref;
997
998 ref = (struct btrfs_inode_ref *)ref_ptr;
999
1000 *namelen = btrfs_inode_ref_name_len(eb, ref);
1001 *name = kmalloc(*namelen, GFP_NOFS);
1002 if (*name == NULL)
1003 return -ENOMEM;
1004
1005 read_extent_buffer(eb, *name, (unsigned long)(ref + 1), *namelen);
1006
1007 *index = btrfs_inode_ref_index(eb, ref);
1008
1009 return 0;
1010}
1011
878/* 1012/*
879 * replay one inode back reference item found in the log tree. 1013 * replay one inode back reference item found in the log tree.
880 * eb, slot and key refer to the buffer and key found in the log tree. 1014 * eb, slot and key refer to the buffer and key found in the log tree.
@@ -888,7 +1022,6 @@ static noinline int add_inode_ref(struct btrfs_trans_handle *trans,
888 struct extent_buffer *eb, int slot, 1022 struct extent_buffer *eb, int slot,
889 struct btrfs_key *key) 1023 struct btrfs_key *key)
890{ 1024{
891 struct btrfs_inode_ref *ref;
892 struct inode *dir; 1025 struct inode *dir;
893 struct inode *inode; 1026 struct inode *inode;
894 unsigned long ref_ptr; 1027 unsigned long ref_ptr;
@@ -897,6 +1030,27 @@ static noinline int add_inode_ref(struct btrfs_trans_handle *trans,
897 int namelen; 1030 int namelen;
898 int ret; 1031 int ret;
899 int search_done = 0; 1032 int search_done = 0;
1033 int log_ref_ver = 0;
1034 u64 parent_objectid;
1035 u64 inode_objectid;
1036 u64 ref_index;
1037 int ref_struct_size;
1038
1039 ref_ptr = btrfs_item_ptr_offset(eb, slot);
1040 ref_end = ref_ptr + btrfs_item_size_nr(eb, slot);
1041
1042 if (key->type == BTRFS_INODE_EXTREF_KEY) {
1043 struct btrfs_inode_extref *r;
1044
1045 ref_struct_size = sizeof(struct btrfs_inode_extref);
1046 log_ref_ver = 1;
1047 r = (struct btrfs_inode_extref *)ref_ptr;
1048 parent_objectid = btrfs_inode_extref_parent(eb, r);
1049 } else {
1050 ref_struct_size = sizeof(struct btrfs_inode_ref);
1051 parent_objectid = key->offset;
1052 }
1053 inode_objectid = key->objectid;
900 1054
901 /* 1055 /*
902 * it is possible that we didn't log all the parent directories 1056 * it is possible that we didn't log all the parent directories
@@ -904,32 +1058,38 @@ static noinline int add_inode_ref(struct btrfs_trans_handle *trans,
904 * copy the back ref in. The link count fixup code will take 1058 * copy the back ref in. The link count fixup code will take
905 * care of the rest 1059 * care of the rest
906 */ 1060 */
907 dir = read_one_inode(root, key->offset); 1061 dir = read_one_inode(root, parent_objectid);
908 if (!dir) 1062 if (!dir)
909 return -ENOENT; 1063 return -ENOENT;
910 1064
911 inode = read_one_inode(root, key->objectid); 1065 inode = read_one_inode(root, inode_objectid);
912 if (!inode) { 1066 if (!inode) {
913 iput(dir); 1067 iput(dir);
914 return -EIO; 1068 return -EIO;
915 } 1069 }
916 1070
917 ref_ptr = btrfs_item_ptr_offset(eb, slot);
918 ref_end = ref_ptr + btrfs_item_size_nr(eb, slot);
919
920 while (ref_ptr < ref_end) { 1071 while (ref_ptr < ref_end) {
921 ref = (struct btrfs_inode_ref *)ref_ptr; 1072 if (log_ref_ver) {
922 1073 ret = extref_get_fields(eb, ref_ptr, &namelen, &name,
923 namelen = btrfs_inode_ref_name_len(eb, ref); 1074 &ref_index, &parent_objectid);
924 name = kmalloc(namelen, GFP_NOFS); 1075 /*
925 BUG_ON(!name); 1076 * parent object can change from one array
926 1077 * item to another.
927 read_extent_buffer(eb, name, (unsigned long)(ref + 1), namelen); 1078 */
1079 if (!dir)
1080 dir = read_one_inode(root, parent_objectid);
1081 if (!dir)
1082 return -ENOENT;
1083 } else {
1084 ret = ref_get_fields(eb, ref_ptr, &namelen, &name,
1085 &ref_index);
1086 }
1087 if (ret)
1088 return ret;
928 1089
929 /* if we already have a perfect match, we're done */ 1090 /* if we already have a perfect match, we're done */
930 if (!inode_in_dir(root, path, btrfs_ino(dir), btrfs_ino(inode), 1091 if (!inode_in_dir(root, path, btrfs_ino(dir), btrfs_ino(inode),
931 btrfs_inode_ref_index(eb, ref), 1092 ref_index, name, namelen)) {
932 name, namelen)) {
933 /* 1093 /*
934 * look for a conflicting back reference in the 1094 * look for a conflicting back reference in the
935 * metadata. if we find one we have to unlink that name 1095 * metadata. if we find one we have to unlink that name
@@ -940,8 +1100,10 @@ static noinline int add_inode_ref(struct btrfs_trans_handle *trans,
940 1100
941 if (!search_done) { 1101 if (!search_done) {
942 ret = __add_inode_ref(trans, root, path, log, 1102 ret = __add_inode_ref(trans, root, path, log,
943 dir, inode, key, eb, ref, 1103 dir, inode, eb,
944 name, namelen, 1104 inode_objectid,
1105 parent_objectid,
1106 ref_index, name, namelen,
945 &search_done); 1107 &search_done);
946 if (ret == 1) 1108 if (ret == 1)
947 goto out; 1109 goto out;
@@ -950,14 +1112,18 @@ static noinline int add_inode_ref(struct btrfs_trans_handle *trans,
950 1112
951 /* insert our name */ 1113 /* insert our name */
952 ret = btrfs_add_link(trans, dir, inode, name, namelen, 1114 ret = btrfs_add_link(trans, dir, inode, name, namelen,
953 0, btrfs_inode_ref_index(eb, ref)); 1115 0, ref_index);
954 BUG_ON(ret); 1116 BUG_ON(ret);
955 1117
956 btrfs_update_inode(trans, root, inode); 1118 btrfs_update_inode(trans, root, inode);
957 } 1119 }
958 1120
959 ref_ptr = (unsigned long)(ref + 1) + namelen; 1121 ref_ptr = (unsigned long)(ref_ptr + ref_struct_size) + namelen;
960 kfree(name); 1122 kfree(name);
1123 if (log_ref_ver) {
1124 iput(dir);
1125 dir = NULL;
1126 }
961 } 1127 }
962 1128
963 /* finally write the back reference in the inode */ 1129 /* finally write the back reference in the inode */
@@ -981,25 +1147,55 @@ static int insert_orphan_item(struct btrfs_trans_handle *trans,
981 return ret; 1147 return ret;
982} 1148}
983 1149
1150static int count_inode_extrefs(struct btrfs_root *root,
1151 struct inode *inode, struct btrfs_path *path)
1152{
1153 int ret = 0;
1154 int name_len;
1155 unsigned int nlink = 0;
1156 u32 item_size;
1157 u32 cur_offset = 0;
1158 u64 inode_objectid = btrfs_ino(inode);
1159 u64 offset = 0;
1160 unsigned long ptr;
1161 struct btrfs_inode_extref *extref;
1162 struct extent_buffer *leaf;
984 1163
985/* 1164 while (1) {
986 * There are a few corners where the link count of the file can't 1165 ret = btrfs_find_one_extref(root, inode_objectid, offset, path,
987 * be properly maintained during replay. So, instead of adding 1166 &extref, &offset);
988 * lots of complexity to the log code, we just scan the backrefs 1167 if (ret)
989 * for any file that has been through replay. 1168 break;
990 * 1169
991 * The scan will update the link count on the inode to reflect the 1170 leaf = path->nodes[0];
992 * number of back refs found. If it goes down to zero, the iput 1171 item_size = btrfs_item_size_nr(leaf, path->slots[0]);
993 * will free the inode. 1172 ptr = btrfs_item_ptr_offset(leaf, path->slots[0]);
994 */ 1173
995static noinline int fixup_inode_link_count(struct btrfs_trans_handle *trans, 1174 while (cur_offset < item_size) {
996 struct btrfs_root *root, 1175 extref = (struct btrfs_inode_extref *) (ptr + cur_offset);
997 struct inode *inode) 1176 name_len = btrfs_inode_extref_name_len(leaf, extref);
1177
1178 nlink++;
1179
1180 cur_offset += name_len + sizeof(*extref);
1181 }
1182
1183 offset++;
1184 btrfs_release_path(path);
1185 }
1186 btrfs_release_path(path);
1187
1188 if (ret < 0)
1189 return ret;
1190 return nlink;
1191}
1192
1193static int count_inode_refs(struct btrfs_root *root,
1194 struct inode *inode, struct btrfs_path *path)
998{ 1195{
999 struct btrfs_path *path;
1000 int ret; 1196 int ret;
1001 struct btrfs_key key; 1197 struct btrfs_key key;
1002 u64 nlink = 0; 1198 unsigned int nlink = 0;
1003 unsigned long ptr; 1199 unsigned long ptr;
1004 unsigned long ptr_end; 1200 unsigned long ptr_end;
1005 int name_len; 1201 int name_len;
@@ -1009,10 +1205,6 @@ static noinline int fixup_inode_link_count(struct btrfs_trans_handle *trans,
1009 key.type = BTRFS_INODE_REF_KEY; 1205 key.type = BTRFS_INODE_REF_KEY;
1010 key.offset = (u64)-1; 1206 key.offset = (u64)-1;
1011 1207
1012 path = btrfs_alloc_path();
1013 if (!path)
1014 return -ENOMEM;
1015
1016 while (1) { 1208 while (1) {
1017 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 1209 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
1018 if (ret < 0) 1210 if (ret < 0)
@@ -1046,6 +1238,50 @@ static noinline int fixup_inode_link_count(struct btrfs_trans_handle *trans,
1046 btrfs_release_path(path); 1238 btrfs_release_path(path);
1047 } 1239 }
1048 btrfs_release_path(path); 1240 btrfs_release_path(path);
1241
1242 return nlink;
1243}
1244
1245/*
1246 * There are a few corners where the link count of the file can't
1247 * be properly maintained during replay. So, instead of adding
1248 * lots of complexity to the log code, we just scan the backrefs
1249 * for any file that has been through replay.
1250 *
1251 * The scan will update the link count on the inode to reflect the
1252 * number of back refs found. If it goes down to zero, the iput
1253 * will free the inode.
1254 */
1255static noinline int fixup_inode_link_count(struct btrfs_trans_handle *trans,
1256 struct btrfs_root *root,
1257 struct inode *inode)
1258{
1259 struct btrfs_path *path;
1260 int ret;
1261 u64 nlink = 0;
1262 u64 ino = btrfs_ino(inode);
1263
1264 path = btrfs_alloc_path();
1265 if (!path)
1266 return -ENOMEM;
1267
1268 ret = count_inode_refs(root, inode, path);
1269 if (ret < 0)
1270 goto out;
1271
1272 nlink = ret;
1273
1274 ret = count_inode_extrefs(root, inode, path);
1275 if (ret == -ENOENT)
1276 ret = 0;
1277
1278 if (ret < 0)
1279 goto out;
1280
1281 nlink += ret;
1282
1283 ret = 0;
1284
1049 if (nlink != inode->i_nlink) { 1285 if (nlink != inode->i_nlink) {
1050 set_nlink(inode, nlink); 1286 set_nlink(inode, nlink);
1051 btrfs_update_inode(trans, root, inode); 1287 btrfs_update_inode(trans, root, inode);
@@ -1061,9 +1297,10 @@ static noinline int fixup_inode_link_count(struct btrfs_trans_handle *trans,
1061 ret = insert_orphan_item(trans, root, ino); 1297 ret = insert_orphan_item(trans, root, ino);
1062 BUG_ON(ret); 1298 BUG_ON(ret);
1063 } 1299 }
1064 btrfs_free_path(path);
1065 1300
1066 return 0; 1301out:
1302 btrfs_free_path(path);
1303 return ret;
1067} 1304}
1068 1305
1069static noinline int fixup_inode_link_counts(struct btrfs_trans_handle *trans, 1306static noinline int fixup_inode_link_counts(struct btrfs_trans_handle *trans,
@@ -1710,6 +1947,10 @@ static int replay_one_buffer(struct btrfs_root *log, struct extent_buffer *eb,
1710 ret = add_inode_ref(wc->trans, root, log, path, 1947 ret = add_inode_ref(wc->trans, root, log, path,
1711 eb, i, &key); 1948 eb, i, &key);
1712 BUG_ON(ret && ret != -ENOENT); 1949 BUG_ON(ret && ret != -ENOENT);
1950 } else if (key.type == BTRFS_INODE_EXTREF_KEY) {
1951 ret = add_inode_ref(wc->trans, root, log, path,
1952 eb, i, &key);
1953 BUG_ON(ret && ret != -ENOENT);
1713 } else if (key.type == BTRFS_EXTENT_DATA_KEY) { 1954 } else if (key.type == BTRFS_EXTENT_DATA_KEY) {
1714 ret = replay_one_extent(wc->trans, root, path, 1955 ret = replay_one_extent(wc->trans, root, path,
1715 eb, i, &key); 1956 eb, i, &key);