diff options
Diffstat (limited to 'fs/btrfs')
41 files changed, 3617 insertions, 1663 deletions
diff --git a/fs/btrfs/acl.c b/fs/btrfs/acl.c index 761e2cd8fed1..0c16e3dbfd56 100644 --- a/fs/btrfs/acl.c +++ b/fs/btrfs/acl.c | |||
| @@ -61,7 +61,7 @@ struct posix_acl *btrfs_get_acl(struct inode *inode, int type) | |||
| 61 | size = __btrfs_getxattr(inode, name, value, size); | 61 | size = __btrfs_getxattr(inode, name, value, size); |
| 62 | } | 62 | } |
| 63 | if (size > 0) { | 63 | if (size > 0) { |
| 64 | acl = posix_acl_from_xattr(value, size); | 64 | acl = posix_acl_from_xattr(&init_user_ns, value, size); |
| 65 | } else if (size == -ENOENT || size == -ENODATA || size == 0) { | 65 | } else if (size == -ENOENT || size == -ENODATA || size == 0) { |
| 66 | /* FIXME, who returns -ENOENT? I think nobody */ | 66 | /* FIXME, who returns -ENOENT? I think nobody */ |
| 67 | acl = NULL; | 67 | acl = NULL; |
| @@ -91,7 +91,7 @@ static int btrfs_xattr_acl_get(struct dentry *dentry, const char *name, | |||
| 91 | return PTR_ERR(acl); | 91 | return PTR_ERR(acl); |
| 92 | if (acl == NULL) | 92 | if (acl == NULL) |
| 93 | return -ENODATA; | 93 | return -ENODATA; |
| 94 | ret = posix_acl_to_xattr(acl, value, size); | 94 | ret = posix_acl_to_xattr(&init_user_ns, acl, value, size); |
| 95 | posix_acl_release(acl); | 95 | posix_acl_release(acl); |
| 96 | 96 | ||
| 97 | return ret; | 97 | return ret; |
| @@ -141,7 +141,7 @@ static int btrfs_set_acl(struct btrfs_trans_handle *trans, | |||
| 141 | goto out; | 141 | goto out; |
| 142 | } | 142 | } |
| 143 | 143 | ||
| 144 | ret = posix_acl_to_xattr(acl, value, size); | 144 | ret = posix_acl_to_xattr(&init_user_ns, acl, value, size); |
| 145 | if (ret < 0) | 145 | if (ret < 0) |
| 146 | goto out; | 146 | goto out; |
| 147 | } | 147 | } |
| @@ -169,7 +169,7 @@ static int btrfs_xattr_acl_set(struct dentry *dentry, const char *name, | |||
| 169 | return -EOPNOTSUPP; | 169 | return -EOPNOTSUPP; |
| 170 | 170 | ||
| 171 | if (value) { | 171 | if (value) { |
| 172 | acl = posix_acl_from_xattr(value, size); | 172 | acl = posix_acl_from_xattr(&init_user_ns, value, size); |
| 173 | if (IS_ERR(acl)) | 173 | if (IS_ERR(acl)) |
| 174 | return PTR_ERR(acl); | 174 | return PTR_ERR(acl); |
| 175 | 175 | ||
diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c index ff6475f409d6..f3187938e081 100644 --- a/fs/btrfs/backref.c +++ b/fs/btrfs/backref.c | |||
| @@ -16,6 +16,7 @@ | |||
| 16 | * Boston, MA 021110-1307, USA. | 16 | * Boston, MA 021110-1307, USA. |
| 17 | */ | 17 | */ |
| 18 | 18 | ||
| 19 | #include <linux/vmalloc.h> | ||
| 19 | #include "ctree.h" | 20 | #include "ctree.h" |
| 20 | #include "disk-io.h" | 21 | #include "disk-io.h" |
| 21 | #include "backref.h" | 22 | #include "backref.h" |
| @@ -231,7 +232,7 @@ static int add_all_parents(struct btrfs_root *root, struct btrfs_path *path, | |||
| 231 | } | 232 | } |
| 232 | if (!ret) { | 233 | if (!ret) { |
| 233 | ret = ulist_add(parents, eb->start, | 234 | ret = ulist_add(parents, eb->start, |
| 234 | (unsigned long)eie, GFP_NOFS); | 235 | (uintptr_t)eie, GFP_NOFS); |
| 235 | if (ret < 0) | 236 | if (ret < 0) |
| 236 | break; | 237 | break; |
| 237 | if (!extent_item_pos) { | 238 | if (!extent_item_pos) { |
| @@ -363,8 +364,8 @@ static int __resolve_indirect_refs(struct btrfs_fs_info *fs_info, | |||
| 363 | ULIST_ITER_INIT(&uiter); | 364 | ULIST_ITER_INIT(&uiter); |
| 364 | node = ulist_next(parents, &uiter); | 365 | node = ulist_next(parents, &uiter); |
| 365 | ref->parent = node ? node->val : 0; | 366 | ref->parent = node ? node->val : 0; |
| 366 | ref->inode_list = | 367 | ref->inode_list = node ? |
| 367 | node ? (struct extent_inode_elem *)node->aux : 0; | 368 | (struct extent_inode_elem *)(uintptr_t)node->aux : 0; |
| 368 | 369 | ||
| 369 | /* additional parents require new refs being added here */ | 370 | /* additional parents require new refs being added here */ |
| 370 | while ((node = ulist_next(parents, &uiter))) { | 371 | while ((node = ulist_next(parents, &uiter))) { |
| @@ -375,8 +376,8 @@ static int __resolve_indirect_refs(struct btrfs_fs_info *fs_info, | |||
| 375 | } | 376 | } |
| 376 | memcpy(new_ref, ref, sizeof(*ref)); | 377 | memcpy(new_ref, ref, sizeof(*ref)); |
| 377 | new_ref->parent = node->val; | 378 | new_ref->parent = node->val; |
| 378 | new_ref->inode_list = | 379 | new_ref->inode_list = (struct extent_inode_elem *) |
| 379 | (struct extent_inode_elem *)node->aux; | 380 | (uintptr_t)node->aux; |
| 380 | list_add(&new_ref->list, &ref->list); | 381 | list_add(&new_ref->list, &ref->list); |
| 381 | } | 382 | } |
| 382 | ulist_reinit(parents); | 383 | ulist_reinit(parents); |
| @@ -914,8 +915,8 @@ again: | |||
| 914 | free_extent_buffer(eb); | 915 | free_extent_buffer(eb); |
| 915 | } | 916 | } |
| 916 | ret = ulist_add_merge(refs, ref->parent, | 917 | ret = ulist_add_merge(refs, ref->parent, |
| 917 | (unsigned long)ref->inode_list, | 918 | (uintptr_t)ref->inode_list, |
| 918 | (unsigned long *)&eie, GFP_NOFS); | 919 | (u64 *)&eie, GFP_NOFS); |
| 919 | if (!ret && extent_item_pos) { | 920 | if (!ret && extent_item_pos) { |
| 920 | /* | 921 | /* |
| 921 | * we've recorded that parent, so we must extend | 922 | * we've recorded that parent, so we must extend |
| @@ -959,7 +960,7 @@ static void free_leaf_list(struct ulist *blocks) | |||
| 959 | while ((node = ulist_next(blocks, &uiter))) { | 960 | while ((node = ulist_next(blocks, &uiter))) { |
| 960 | if (!node->aux) | 961 | if (!node->aux) |
| 961 | continue; | 962 | continue; |
| 962 | eie = (struct extent_inode_elem *)node->aux; | 963 | eie = (struct extent_inode_elem *)(uintptr_t)node->aux; |
| 963 | for (; eie; eie = eie_next) { | 964 | for (; eie; eie = eie_next) { |
| 964 | eie_next = eie->next; | 965 | eie_next = eie->next; |
| 965 | kfree(eie); | 966 | kfree(eie); |
| @@ -1108,26 +1109,80 @@ static int inode_ref_info(u64 inum, u64 ioff, struct btrfs_root *fs_root, | |||
| 1108 | found_key); | 1109 | found_key); |
| 1109 | } | 1110 | } |
| 1110 | 1111 | ||
| 1111 | /* | 1112 | int btrfs_find_one_extref(struct btrfs_root *root, u64 inode_objectid, |
| 1112 | * this iterates to turn a btrfs_inode_ref into a full filesystem path. elements | 1113 | u64 start_off, struct btrfs_path *path, |
| 1113 | * of the path are separated by '/' and the path is guaranteed to be | 1114 | struct btrfs_inode_extref **ret_extref, |
| 1114 | * 0-terminated. the path is only given within the current file system. | 1115 | u64 *found_off) |
| 1115 | * Therefore, it never starts with a '/'. the caller is responsible to provide | 1116 | { |
| 1116 | * "size" bytes in "dest". the dest buffer will be filled backwards. finally, | 1117 | int ret, slot; |
| 1117 | * the start point of the resulting string is returned. this pointer is within | 1118 | struct btrfs_key key; |
| 1118 | * dest, normally. | 1119 | struct btrfs_key found_key; |
| 1119 | * in case the path buffer would overflow, the pointer is decremented further | 1120 | struct btrfs_inode_extref *extref; |
| 1120 | * as if output was written to the buffer, though no more output is actually | 1121 | struct extent_buffer *leaf; |
| 1121 | * generated. that way, the caller can determine how much space would be | 1122 | unsigned long ptr; |
| 1122 | * required for the path to fit into the buffer. in that case, the returned | 1123 | |
| 1123 | * value will be smaller than dest. callers must check this! | 1124 | key.objectid = inode_objectid; |
| 1124 | */ | 1125 | btrfs_set_key_type(&key, BTRFS_INODE_EXTREF_KEY); |
| 1125 | char *btrfs_iref_to_path(struct btrfs_root *fs_root, struct btrfs_path *path, | 1126 | key.offset = start_off; |
| 1126 | struct btrfs_inode_ref *iref, | 1127 | |
| 1128 | ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); | ||
| 1129 | if (ret < 0) | ||
| 1130 | return ret; | ||
| 1131 | |||
| 1132 | while (1) { | ||
| 1133 | leaf = path->nodes[0]; | ||
| 1134 | slot = path->slots[0]; | ||
| 1135 | if (slot >= btrfs_header_nritems(leaf)) { | ||
| 1136 | /* | ||
| 1137 | * If the item at offset is not found, | ||
| 1138 | * btrfs_search_slot will point us to the slot | ||
| 1139 | * where it should be inserted. In our case | ||
| 1140 | * that will be the slot directly before the | ||
| 1141 | * next INODE_REF_KEY_V2 item. In the case | ||
| 1142 | * that we're pointing to the last slot in a | ||
| 1143 | * leaf, we must move one leaf over. | ||
| 1144 | */ | ||
| 1145 | ret = btrfs_next_leaf(root, path); | ||
| 1146 | if (ret) { | ||
| 1147 | if (ret >= 1) | ||
| 1148 | ret = -ENOENT; | ||
| 1149 | break; | ||
| 1150 | } | ||
| 1151 | continue; | ||
| 1152 | } | ||
| 1153 | |||
| 1154 | btrfs_item_key_to_cpu(leaf, &found_key, slot); | ||
| 1155 | |||
| 1156 | /* | ||
| 1157 | * Check that we're still looking at an extended ref key for | ||
| 1158 | * this particular objectid. If we have different | ||
| 1159 | * objectid or type then there are no more to be found | ||
| 1160 | * in the tree and we can exit. | ||
| 1161 | */ | ||
| 1162 | ret = -ENOENT; | ||
| 1163 | if (found_key.objectid != inode_objectid) | ||
| 1164 | break; | ||
| 1165 | if (btrfs_key_type(&found_key) != BTRFS_INODE_EXTREF_KEY) | ||
| 1166 | break; | ||
| 1167 | |||
| 1168 | ret = 0; | ||
| 1169 | ptr = btrfs_item_ptr_offset(leaf, path->slots[0]); | ||
| 1170 | extref = (struct btrfs_inode_extref *)ptr; | ||
| 1171 | *ret_extref = extref; | ||
| 1172 | if (found_off) | ||
| 1173 | *found_off = found_key.offset; | ||
| 1174 | break; | ||
| 1175 | } | ||
| 1176 | |||
| 1177 | return ret; | ||
| 1178 | } | ||
| 1179 | |||
| 1180 | static char *ref_to_path(struct btrfs_root *fs_root, | ||
| 1181 | struct btrfs_path *path, | ||
| 1182 | u32 name_len, unsigned long name_off, | ||
| 1127 | struct extent_buffer *eb_in, u64 parent, | 1183 | struct extent_buffer *eb_in, u64 parent, |
| 1128 | char *dest, u32 size) | 1184 | char *dest, u32 size) |
| 1129 | { | 1185 | { |
| 1130 | u32 len; | ||
| 1131 | int slot; | 1186 | int slot; |
| 1132 | u64 next_inum; | 1187 | u64 next_inum; |
| 1133 | int ret; | 1188 | int ret; |
| @@ -1135,17 +1190,17 @@ char *btrfs_iref_to_path(struct btrfs_root *fs_root, struct btrfs_path *path, | |||
| 1135 | struct extent_buffer *eb = eb_in; | 1190 | struct extent_buffer *eb = eb_in; |
| 1136 | struct btrfs_key found_key; | 1191 | struct btrfs_key found_key; |
| 1137 | int leave_spinning = path->leave_spinning; | 1192 | int leave_spinning = path->leave_spinning; |
| 1193 | struct btrfs_inode_ref *iref; | ||
| 1138 | 1194 | ||
| 1139 | if (bytes_left >= 0) | 1195 | if (bytes_left >= 0) |
| 1140 | dest[bytes_left] = '\0'; | 1196 | dest[bytes_left] = '\0'; |
| 1141 | 1197 | ||
| 1142 | path->leave_spinning = 1; | 1198 | path->leave_spinning = 1; |
| 1143 | while (1) { | 1199 | while (1) { |
| 1144 | len = btrfs_inode_ref_name_len(eb, iref); | 1200 | bytes_left -= name_len; |
| 1145 | bytes_left -= len; | ||
| 1146 | if (bytes_left >= 0) | 1201 | if (bytes_left >= 0) |
| 1147 | read_extent_buffer(eb, dest + bytes_left, | 1202 | read_extent_buffer(eb, dest + bytes_left, |
| 1148 | (unsigned long)(iref + 1), len); | 1203 | name_off, name_len); |
| 1149 | if (eb != eb_in) { | 1204 | if (eb != eb_in) { |
| 1150 | btrfs_tree_read_unlock_blocking(eb); | 1205 | btrfs_tree_read_unlock_blocking(eb); |
| 1151 | free_extent_buffer(eb); | 1206 | free_extent_buffer(eb); |
| @@ -1155,6 +1210,7 @@ char *btrfs_iref_to_path(struct btrfs_root *fs_root, struct btrfs_path *path, | |||
| 1155 | ret = -ENOENT; | 1210 | ret = -ENOENT; |
| 1156 | if (ret) | 1211 | if (ret) |
| 1157 | break; | 1212 | break; |
| 1213 | |||
| 1158 | next_inum = found_key.offset; | 1214 | next_inum = found_key.offset; |
| 1159 | 1215 | ||
| 1160 | /* regular exit ahead */ | 1216 | /* regular exit ahead */ |
| @@ -1170,8 +1226,11 @@ char *btrfs_iref_to_path(struct btrfs_root *fs_root, struct btrfs_path *path, | |||
| 1170 | btrfs_set_lock_blocking_rw(eb, BTRFS_READ_LOCK); | 1226 | btrfs_set_lock_blocking_rw(eb, BTRFS_READ_LOCK); |
| 1171 | } | 1227 | } |
| 1172 | btrfs_release_path(path); | 1228 | btrfs_release_path(path); |
| 1173 | |||
| 1174 | iref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref); | 1229 | iref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref); |
| 1230 | |||
| 1231 | name_len = btrfs_inode_ref_name_len(eb, iref); | ||
| 1232 | name_off = (unsigned long)(iref + 1); | ||
| 1233 | |||
| 1175 | parent = next_inum; | 1234 | parent = next_inum; |
| 1176 | --bytes_left; | 1235 | --bytes_left; |
| 1177 | if (bytes_left >= 0) | 1236 | if (bytes_left >= 0) |
| @@ -1188,12 +1247,39 @@ char *btrfs_iref_to_path(struct btrfs_root *fs_root, struct btrfs_path *path, | |||
| 1188 | } | 1247 | } |
| 1189 | 1248 | ||
| 1190 | /* | 1249 | /* |
| 1250 | * this iterates to turn a btrfs_inode_ref into a full filesystem path. elements | ||
| 1251 | * of the path are separated by '/' and the path is guaranteed to be | ||
| 1252 | * 0-terminated. the path is only given within the current file system. | ||
| 1253 | * Therefore, it never starts with a '/'. the caller is responsible to provide | ||
| 1254 | * "size" bytes in "dest". the dest buffer will be filled backwards. finally, | ||
| 1255 | * the start point of the resulting string is returned. this pointer is within | ||
| 1256 | * dest, normally. | ||
| 1257 | * in case the path buffer would overflow, the pointer is decremented further | ||
| 1258 | * as if output was written to the buffer, though no more output is actually | ||
| 1259 | * generated. that way, the caller can determine how much space would be | ||
| 1260 | * required for the path to fit into the buffer. in that case, the returned | ||
| 1261 | * value will be smaller than dest. callers must check this! | ||
| 1262 | */ | ||
| 1263 | char *btrfs_iref_to_path(struct btrfs_root *fs_root, | ||
| 1264 | struct btrfs_path *path, | ||
| 1265 | struct btrfs_inode_ref *iref, | ||
| 1266 | struct extent_buffer *eb_in, u64 parent, | ||
| 1267 | char *dest, u32 size) | ||
| 1268 | { | ||
| 1269 | return ref_to_path(fs_root, path, | ||
| 1270 | btrfs_inode_ref_name_len(eb_in, iref), | ||
| 1271 | (unsigned long)(iref + 1), | ||
| 1272 | eb_in, parent, dest, size); | ||
| 1273 | } | ||
| 1274 | |||
| 1275 | /* | ||
| 1191 | * this makes the path point to (logical EXTENT_ITEM *) | 1276 | * this makes the path point to (logical EXTENT_ITEM *) |
| 1192 | * returns BTRFS_EXTENT_FLAG_DATA for data, BTRFS_EXTENT_FLAG_TREE_BLOCK for | 1277 | * returns BTRFS_EXTENT_FLAG_DATA for data, BTRFS_EXTENT_FLAG_TREE_BLOCK for |
| 1193 | * tree blocks and <0 on error. | 1278 | * tree blocks and <0 on error. |
| 1194 | */ | 1279 | */ |
| 1195 | int extent_from_logical(struct btrfs_fs_info *fs_info, u64 logical, | 1280 | int extent_from_logical(struct btrfs_fs_info *fs_info, u64 logical, |
| 1196 | struct btrfs_path *path, struct btrfs_key *found_key) | 1281 | struct btrfs_path *path, struct btrfs_key *found_key, |
| 1282 | u64 *flags_ret) | ||
| 1197 | { | 1283 | { |
| 1198 | int ret; | 1284 | int ret; |
| 1199 | u64 flags; | 1285 | u64 flags; |
| @@ -1237,10 +1323,17 @@ int extent_from_logical(struct btrfs_fs_info *fs_info, u64 logical, | |||
| 1237 | (unsigned long long)found_key->objectid, | 1323 | (unsigned long long)found_key->objectid, |
| 1238 | (unsigned long long)found_key->offset, | 1324 | (unsigned long long)found_key->offset, |
| 1239 | (unsigned long long)flags, item_size); | 1325 | (unsigned long long)flags, item_size); |
| 1240 | if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) | 1326 | |
| 1241 | return BTRFS_EXTENT_FLAG_TREE_BLOCK; | 1327 | WARN_ON(!flags_ret); |
| 1242 | if (flags & BTRFS_EXTENT_FLAG_DATA) | 1328 | if (flags_ret) { |
| 1243 | return BTRFS_EXTENT_FLAG_DATA; | 1329 | if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) |
| 1330 | *flags_ret = BTRFS_EXTENT_FLAG_TREE_BLOCK; | ||
| 1331 | else if (flags & BTRFS_EXTENT_FLAG_DATA) | ||
| 1332 | *flags_ret = BTRFS_EXTENT_FLAG_DATA; | ||
| 1333 | else | ||
| 1334 | BUG_ON(1); | ||
| 1335 | return 0; | ||
| 1336 | } | ||
| 1244 | 1337 | ||
| 1245 | return -EIO; | 1338 | return -EIO; |
| 1246 | } | 1339 | } |
| @@ -1404,12 +1497,13 @@ int iterate_extent_inodes(struct btrfs_fs_info *fs_info, | |||
| 1404 | ULIST_ITER_INIT(&root_uiter); | 1497 | ULIST_ITER_INIT(&root_uiter); |
| 1405 | while (!ret && (root_node = ulist_next(roots, &root_uiter))) { | 1498 | while (!ret && (root_node = ulist_next(roots, &root_uiter))) { |
| 1406 | pr_debug("root %llu references leaf %llu, data list " | 1499 | pr_debug("root %llu references leaf %llu, data list " |
| 1407 | "%#lx\n", root_node->val, ref_node->val, | 1500 | "%#llx\n", root_node->val, ref_node->val, |
| 1408 | ref_node->aux); | 1501 | (long long)ref_node->aux); |
| 1409 | ret = iterate_leaf_refs( | 1502 | ret = iterate_leaf_refs((struct extent_inode_elem *) |
| 1410 | (struct extent_inode_elem *)ref_node->aux, | 1503 | (uintptr_t)ref_node->aux, |
| 1411 | root_node->val, extent_item_objectid, | 1504 | root_node->val, |
| 1412 | iterate, ctx); | 1505 | extent_item_objectid, |
| 1506 | iterate, ctx); | ||
| 1413 | } | 1507 | } |
| 1414 | ulist_free(roots); | 1508 | ulist_free(roots); |
| 1415 | roots = NULL; | 1509 | roots = NULL; |
| @@ -1432,15 +1526,15 @@ int iterate_inodes_from_logical(u64 logical, struct btrfs_fs_info *fs_info, | |||
| 1432 | { | 1526 | { |
| 1433 | int ret; | 1527 | int ret; |
| 1434 | u64 extent_item_pos; | 1528 | u64 extent_item_pos; |
| 1529 | u64 flags = 0; | ||
| 1435 | struct btrfs_key found_key; | 1530 | struct btrfs_key found_key; |
| 1436 | int search_commit_root = path->search_commit_root; | 1531 | int search_commit_root = path->search_commit_root; |
| 1437 | 1532 | ||
| 1438 | ret = extent_from_logical(fs_info, logical, path, | 1533 | ret = extent_from_logical(fs_info, logical, path, &found_key, &flags); |
| 1439 | &found_key); | ||
| 1440 | btrfs_release_path(path); | 1534 | btrfs_release_path(path); |
| 1441 | if (ret < 0) | 1535 | if (ret < 0) |
| 1442 | return ret; | 1536 | return ret; |
| 1443 | if (ret & BTRFS_EXTENT_FLAG_TREE_BLOCK) | 1537 | if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) |
| 1444 | return -EINVAL; | 1538 | return -EINVAL; |
| 1445 | 1539 | ||
| 1446 | extent_item_pos = logical - found_key.objectid; | 1540 | extent_item_pos = logical - found_key.objectid; |
| @@ -1451,9 +1545,12 @@ int iterate_inodes_from_logical(u64 logical, struct btrfs_fs_info *fs_info, | |||
| 1451 | return ret; | 1545 | return ret; |
| 1452 | } | 1546 | } |
| 1453 | 1547 | ||
| 1454 | static int iterate_irefs(u64 inum, struct btrfs_root *fs_root, | 1548 | typedef int (iterate_irefs_t)(u64 parent, u32 name_len, unsigned long name_off, |
| 1455 | struct btrfs_path *path, | 1549 | struct extent_buffer *eb, void *ctx); |
| 1456 | iterate_irefs_t *iterate, void *ctx) | 1550 | |
| 1551 | static int iterate_inode_refs(u64 inum, struct btrfs_root *fs_root, | ||
| 1552 | struct btrfs_path *path, | ||
| 1553 | iterate_irefs_t *iterate, void *ctx) | ||
| 1457 | { | 1554 | { |
| 1458 | int ret = 0; | 1555 | int ret = 0; |
| 1459 | int slot; | 1556 | int slot; |
| @@ -1470,7 +1567,7 @@ static int iterate_irefs(u64 inum, struct btrfs_root *fs_root, | |||
| 1470 | while (!ret) { | 1567 | while (!ret) { |
| 1471 | path->leave_spinning = 1; | 1568 | path->leave_spinning = 1; |
| 1472 | ret = inode_ref_info(inum, parent ? parent+1 : 0, fs_root, path, | 1569 | ret = inode_ref_info(inum, parent ? parent+1 : 0, fs_root, path, |
| 1473 | &found_key); | 1570 | &found_key); |
| 1474 | if (ret < 0) | 1571 | if (ret < 0) |
| 1475 | break; | 1572 | break; |
| 1476 | if (ret) { | 1573 | if (ret) { |
| @@ -1498,7 +1595,8 @@ static int iterate_irefs(u64 inum, struct btrfs_root *fs_root, | |||
| 1498 | "tree %llu\n", cur, | 1595 | "tree %llu\n", cur, |
| 1499 | (unsigned long long)found_key.objectid, | 1596 | (unsigned long long)found_key.objectid, |
| 1500 | (unsigned long long)fs_root->objectid); | 1597 | (unsigned long long)fs_root->objectid); |
| 1501 | ret = iterate(parent, iref, eb, ctx); | 1598 | ret = iterate(parent, name_len, |
| 1599 | (unsigned long)(iref + 1), eb, ctx); | ||
| 1502 | if (ret) | 1600 | if (ret) |
| 1503 | break; | 1601 | break; |
| 1504 | len = sizeof(*iref) + name_len; | 1602 | len = sizeof(*iref) + name_len; |
| @@ -1513,12 +1611,98 @@ static int iterate_irefs(u64 inum, struct btrfs_root *fs_root, | |||
| 1513 | return ret; | 1611 | return ret; |
| 1514 | } | 1612 | } |
| 1515 | 1613 | ||
| 1614 | static int iterate_inode_extrefs(u64 inum, struct btrfs_root *fs_root, | ||
| 1615 | struct btrfs_path *path, | ||
| 1616 | iterate_irefs_t *iterate, void *ctx) | ||
| 1617 | { | ||
| 1618 | int ret; | ||
| 1619 | int slot; | ||
| 1620 | u64 offset = 0; | ||
| 1621 | u64 parent; | ||
| 1622 | int found = 0; | ||
| 1623 | struct extent_buffer *eb; | ||
| 1624 | struct btrfs_inode_extref *extref; | ||
| 1625 | struct extent_buffer *leaf; | ||
| 1626 | u32 item_size; | ||
| 1627 | u32 cur_offset; | ||
| 1628 | unsigned long ptr; | ||
| 1629 | |||
| 1630 | while (1) { | ||
| 1631 | ret = btrfs_find_one_extref(fs_root, inum, offset, path, &extref, | ||
| 1632 | &offset); | ||
| 1633 | if (ret < 0) | ||
| 1634 | break; | ||
| 1635 | if (ret) { | ||
| 1636 | ret = found ? 0 : -ENOENT; | ||
| 1637 | break; | ||
| 1638 | } | ||
| 1639 | ++found; | ||
| 1640 | |||
| 1641 | slot = path->slots[0]; | ||
| 1642 | eb = path->nodes[0]; | ||
| 1643 | /* make sure we can use eb after releasing the path */ | ||
| 1644 | atomic_inc(&eb->refs); | ||
| 1645 | |||
| 1646 | btrfs_tree_read_lock(eb); | ||
| 1647 | btrfs_set_lock_blocking_rw(eb, BTRFS_READ_LOCK); | ||
| 1648 | btrfs_release_path(path); | ||
| 1649 | |||
| 1650 | leaf = path->nodes[0]; | ||
| 1651 | item_size = btrfs_item_size_nr(leaf, path->slots[0]); | ||
| 1652 | ptr = btrfs_item_ptr_offset(leaf, path->slots[0]); | ||
| 1653 | cur_offset = 0; | ||
| 1654 | |||
| 1655 | while (cur_offset < item_size) { | ||
| 1656 | u32 name_len; | ||
| 1657 | |||
| 1658 | extref = (struct btrfs_inode_extref *)(ptr + cur_offset); | ||
| 1659 | parent = btrfs_inode_extref_parent(eb, extref); | ||
| 1660 | name_len = btrfs_inode_extref_name_len(eb, extref); | ||
| 1661 | ret = iterate(parent, name_len, | ||
| 1662 | (unsigned long)&extref->name, eb, ctx); | ||
| 1663 | if (ret) | ||
| 1664 | break; | ||
| 1665 | |||
| 1666 | cur_offset += btrfs_inode_extref_name_len(leaf, extref); | ||
| 1667 | cur_offset += sizeof(*extref); | ||
| 1668 | } | ||
| 1669 | btrfs_tree_read_unlock_blocking(eb); | ||
| 1670 | free_extent_buffer(eb); | ||
| 1671 | |||
| 1672 | offset++; | ||
| 1673 | } | ||
| 1674 | |||
| 1675 | btrfs_release_path(path); | ||
| 1676 | |||
| 1677 | return ret; | ||
| 1678 | } | ||
| 1679 | |||
| 1680 | static int iterate_irefs(u64 inum, struct btrfs_root *fs_root, | ||
| 1681 | struct btrfs_path *path, iterate_irefs_t *iterate, | ||
| 1682 | void *ctx) | ||
| 1683 | { | ||
| 1684 | int ret; | ||
| 1685 | int found_refs = 0; | ||
| 1686 | |||
| 1687 | ret = iterate_inode_refs(inum, fs_root, path, iterate, ctx); | ||
| 1688 | if (!ret) | ||
| 1689 | ++found_refs; | ||
| 1690 | else if (ret != -ENOENT) | ||
| 1691 | return ret; | ||
| 1692 | |||
| 1693 | ret = iterate_inode_extrefs(inum, fs_root, path, iterate, ctx); | ||
| 1694 | if (ret == -ENOENT && found_refs) | ||
| 1695 | return 0; | ||
| 1696 | |||
| 1697 | return ret; | ||
| 1698 | } | ||
| 1699 | |||
| 1516 | /* | 1700 | /* |
| 1517 | * returns 0 if the path could be dumped (probably truncated) | 1701 | * returns 0 if the path could be dumped (probably truncated) |
| 1518 | * returns <0 in case of an error | 1702 | * returns <0 in case of an error |
| 1519 | */ | 1703 | */ |
| 1520 | static int inode_to_path(u64 inum, struct btrfs_inode_ref *iref, | 1704 | static int inode_to_path(u64 inum, u32 name_len, unsigned long name_off, |
| 1521 | struct extent_buffer *eb, void *ctx) | 1705 | struct extent_buffer *eb, void *ctx) |
| 1522 | { | 1706 | { |
| 1523 | struct inode_fs_paths *ipath = ctx; | 1707 | struct inode_fs_paths *ipath = ctx; |
| 1524 | char *fspath; | 1708 | char *fspath; |
| @@ -1531,20 +1715,17 @@ static int inode_to_path(u64 inum, struct btrfs_inode_ref *iref, | |||
| 1531 | ipath->fspath->bytes_left - s_ptr : 0; | 1715 | ipath->fspath->bytes_left - s_ptr : 0; |
| 1532 | 1716 | ||
| 1533 | fspath_min = (char *)ipath->fspath->val + (i + 1) * s_ptr; | 1717 | fspath_min = (char *)ipath->fspath->val + (i + 1) * s_ptr; |
| 1534 | fspath = btrfs_iref_to_path(ipath->fs_root, ipath->btrfs_path, iref, eb, | 1718 | fspath = ref_to_path(ipath->fs_root, ipath->btrfs_path, name_len, |
| 1535 | inum, fspath_min, bytes_left); | 1719 | name_off, eb, inum, fspath_min, |
| 1720 | bytes_left); | ||
| 1536 | if (IS_ERR(fspath)) | 1721 | if (IS_ERR(fspath)) |
| 1537 | return PTR_ERR(fspath); | 1722 | return PTR_ERR(fspath); |
| 1538 | 1723 | ||
| 1539 | if (fspath > fspath_min) { | 1724 | if (fspath > fspath_min) { |
| 1540 | pr_debug("path resolved: %s\n", fspath); | ||
| 1541 | ipath->fspath->val[i] = (u64)(unsigned long)fspath; | 1725 | ipath->fspath->val[i] = (u64)(unsigned long)fspath; |
| 1542 | ++ipath->fspath->elem_cnt; | 1726 | ++ipath->fspath->elem_cnt; |
| 1543 | ipath->fspath->bytes_left = fspath - fspath_min; | 1727 | ipath->fspath->bytes_left = fspath - fspath_min; |
| 1544 | } else { | 1728 | } else { |
| 1545 | pr_debug("missed path, not enough space. missing bytes: %lu, " | ||
| 1546 | "constructed so far: %s\n", | ||
| 1547 | (unsigned long)(fspath_min - fspath), fspath_min); | ||
| 1548 | ++ipath->fspath->elem_missed; | 1729 | ++ipath->fspath->elem_missed; |
| 1549 | ipath->fspath->bytes_missing += fspath_min - fspath; | 1730 | ipath->fspath->bytes_missing += fspath_min - fspath; |
| 1550 | ipath->fspath->bytes_left = 0; | 1731 | ipath->fspath->bytes_left = 0; |
| @@ -1566,7 +1747,7 @@ static int inode_to_path(u64 inum, struct btrfs_inode_ref *iref, | |||
| 1566 | int paths_from_inode(u64 inum, struct inode_fs_paths *ipath) | 1747 | int paths_from_inode(u64 inum, struct inode_fs_paths *ipath) |
| 1567 | { | 1748 | { |
| 1568 | return iterate_irefs(inum, ipath->fs_root, ipath->btrfs_path, | 1749 | return iterate_irefs(inum, ipath->fs_root, ipath->btrfs_path, |
| 1569 | inode_to_path, ipath); | 1750 | inode_to_path, ipath); |
| 1570 | } | 1751 | } |
| 1571 | 1752 | ||
| 1572 | struct btrfs_data_container *init_data_container(u32 total_bytes) | 1753 | struct btrfs_data_container *init_data_container(u32 total_bytes) |
| @@ -1575,7 +1756,7 @@ struct btrfs_data_container *init_data_container(u32 total_bytes) | |||
| 1575 | size_t alloc_bytes; | 1756 | size_t alloc_bytes; |
| 1576 | 1757 | ||
| 1577 | alloc_bytes = max_t(size_t, total_bytes, sizeof(*data)); | 1758 | alloc_bytes = max_t(size_t, total_bytes, sizeof(*data)); |
| 1578 | data = kmalloc(alloc_bytes, GFP_NOFS); | 1759 | data = vmalloc(alloc_bytes); |
| 1579 | if (!data) | 1760 | if (!data) |
| 1580 | return ERR_PTR(-ENOMEM); | 1761 | return ERR_PTR(-ENOMEM); |
| 1581 | 1762 | ||
| @@ -1626,6 +1807,6 @@ void free_ipath(struct inode_fs_paths *ipath) | |||
| 1626 | { | 1807 | { |
| 1627 | if (!ipath) | 1808 | if (!ipath) |
| 1628 | return; | 1809 | return; |
| 1629 | kfree(ipath->fspath); | 1810 | vfree(ipath->fspath); |
| 1630 | kfree(ipath); | 1811 | kfree(ipath); |
| 1631 | } | 1812 | } |
diff --git a/fs/btrfs/backref.h b/fs/btrfs/backref.h index 032f4dc7eab8..e75533043a5f 100644 --- a/fs/btrfs/backref.h +++ b/fs/btrfs/backref.h | |||
| @@ -33,14 +33,13 @@ struct inode_fs_paths { | |||
| 33 | 33 | ||
| 34 | typedef int (iterate_extent_inodes_t)(u64 inum, u64 offset, u64 root, | 34 | typedef int (iterate_extent_inodes_t)(u64 inum, u64 offset, u64 root, |
| 35 | void *ctx); | 35 | void *ctx); |
| 36 | typedef int (iterate_irefs_t)(u64 parent, struct btrfs_inode_ref *iref, | ||
| 37 | struct extent_buffer *eb, void *ctx); | ||
| 38 | 36 | ||
| 39 | int inode_item_info(u64 inum, u64 ioff, struct btrfs_root *fs_root, | 37 | int inode_item_info(u64 inum, u64 ioff, struct btrfs_root *fs_root, |
| 40 | struct btrfs_path *path); | 38 | struct btrfs_path *path); |
| 41 | 39 | ||
| 42 | int extent_from_logical(struct btrfs_fs_info *fs_info, u64 logical, | 40 | int extent_from_logical(struct btrfs_fs_info *fs_info, u64 logical, |
| 43 | struct btrfs_path *path, struct btrfs_key *found_key); | 41 | struct btrfs_path *path, struct btrfs_key *found_key, |
| 42 | u64 *flags); | ||
| 44 | 43 | ||
| 45 | int tree_backref_for_extent(unsigned long *ptr, struct extent_buffer *eb, | 44 | int tree_backref_for_extent(unsigned long *ptr, struct extent_buffer *eb, |
| 46 | struct btrfs_extent_item *ei, u32 item_size, | 45 | struct btrfs_extent_item *ei, u32 item_size, |
| @@ -69,4 +68,9 @@ struct inode_fs_paths *init_ipath(s32 total_bytes, struct btrfs_root *fs_root, | |||
| 69 | struct btrfs_path *path); | 68 | struct btrfs_path *path); |
| 70 | void free_ipath(struct inode_fs_paths *ipath); | 69 | void free_ipath(struct inode_fs_paths *ipath); |
| 71 | 70 | ||
| 71 | int btrfs_find_one_extref(struct btrfs_root *root, u64 inode_objectid, | ||
| 72 | u64 start_off, struct btrfs_path *path, | ||
| 73 | struct btrfs_inode_extref **ret_extref, | ||
| 74 | u64 *found_off); | ||
| 75 | |||
| 72 | #endif | 76 | #endif |
diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h index 5b2ad6bc4fe7..ed8ca7ca5eff 100644 --- a/fs/btrfs/btrfs_inode.h +++ b/fs/btrfs/btrfs_inode.h | |||
| @@ -38,6 +38,7 @@ | |||
| 38 | #define BTRFS_INODE_DELALLOC_META_RESERVED 4 | 38 | #define BTRFS_INODE_DELALLOC_META_RESERVED 4 |
| 39 | #define BTRFS_INODE_HAS_ORPHAN_ITEM 5 | 39 | #define BTRFS_INODE_HAS_ORPHAN_ITEM 5 |
| 40 | #define BTRFS_INODE_HAS_ASYNC_EXTENT 6 | 40 | #define BTRFS_INODE_HAS_ASYNC_EXTENT 6 |
| 41 | #define BTRFS_INODE_NEEDS_FULL_SYNC 7 | ||
| 41 | 42 | ||
| 42 | /* in memory btrfs inode */ | 43 | /* in memory btrfs inode */ |
| 43 | struct btrfs_inode { | 44 | struct btrfs_inode { |
| @@ -143,6 +144,9 @@ struct btrfs_inode { | |||
| 143 | /* flags field from the on disk inode */ | 144 | /* flags field from the on disk inode */ |
| 144 | u32 flags; | 145 | u32 flags; |
| 145 | 146 | ||
| 147 | /* a local copy of root's last_log_commit */ | ||
| 148 | unsigned long last_log_commit; | ||
| 149 | |||
| 146 | /* | 150 | /* |
| 147 | * Counters to keep track of the number of extent item's we may use due | 151 | * Counters to keep track of the number of extent item's we may use due |
| 148 | * to delalloc and such. outstanding_extents is the number of extent | 152 | * to delalloc and such. outstanding_extents is the number of extent |
| @@ -202,15 +206,10 @@ static inline bool btrfs_is_free_space_inode(struct inode *inode) | |||
| 202 | 206 | ||
| 203 | static inline int btrfs_inode_in_log(struct inode *inode, u64 generation) | 207 | static inline int btrfs_inode_in_log(struct inode *inode, u64 generation) |
| 204 | { | 208 | { |
| 205 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
| 206 | int ret = 0; | ||
| 207 | |||
| 208 | mutex_lock(&root->log_mutex); | ||
| 209 | if (BTRFS_I(inode)->logged_trans == generation && | 209 | if (BTRFS_I(inode)->logged_trans == generation && |
| 210 | BTRFS_I(inode)->last_sub_trans <= root->last_log_commit) | 210 | BTRFS_I(inode)->last_sub_trans <= BTRFS_I(inode)->last_log_commit) |
| 211 | ret = 1; | 211 | return 1; |
| 212 | mutex_unlock(&root->log_mutex); | 212 | return 0; |
| 213 | return ret; | ||
| 214 | } | 213 | } |
| 215 | 214 | ||
| 216 | #endif | 215 | #endif |
diff --git a/fs/btrfs/check-integrity.c b/fs/btrfs/check-integrity.c index 9197e2e33407..5a3e45db642a 100644 --- a/fs/btrfs/check-integrity.c +++ b/fs/btrfs/check-integrity.c | |||
| @@ -37,8 +37,9 @@ | |||
| 37 | * the file system was mounted, (i.e., they have been | 37 | * the file system was mounted, (i.e., they have been |
| 38 | * referenced by the super block) or they have been | 38 | * referenced by the super block) or they have been |
| 39 | * written since then and the write completion callback | 39 | * written since then and the write completion callback |
| 40 | * was called and a FLUSH request to the device where | 40 | * was called and no write error was indicated and a |
| 41 | * these blocks are located was received and completed. | 41 | * FLUSH request to the device where these blocks are |
| 42 | * located was received and completed. | ||
| 42 | * 2b. All referenced blocks need to have a generation | 43 | * 2b. All referenced blocks need to have a generation |
| 43 | * number which is equal to the parent's number. | 44 | * number which is equal to the parent's number. |
| 44 | * | 45 | * |
| @@ -2601,6 +2602,17 @@ static int btrfsic_check_all_ref_blocks(struct btrfsic_state *state, | |||
| 2601 | (unsigned long long)l->block_ref_to->dev_bytenr, | 2602 | (unsigned long long)l->block_ref_to->dev_bytenr, |
| 2602 | l->block_ref_to->mirror_num); | 2603 | l->block_ref_to->mirror_num); |
| 2603 | ret = -1; | 2604 | ret = -1; |
| 2605 | } else if (l->block_ref_to->iodone_w_error) { | ||
| 2606 | printk(KERN_INFO "btrfs: attempt to write superblock" | ||
| 2607 | " which references block %c @%llu (%s/%llu/%d)" | ||
| 2608 | " which has write error!\n", | ||
| 2609 | btrfsic_get_block_type(state, l->block_ref_to), | ||
| 2610 | (unsigned long long) | ||
| 2611 | l->block_ref_to->logical_bytenr, | ||
| 2612 | l->block_ref_to->dev_state->name, | ||
| 2613 | (unsigned long long)l->block_ref_to->dev_bytenr, | ||
| 2614 | l->block_ref_to->mirror_num); | ||
| 2615 | ret = -1; | ||
| 2604 | } else if (l->parent_generation != | 2616 | } else if (l->parent_generation != |
| 2605 | l->block_ref_to->generation && | 2617 | l->block_ref_to->generation && |
| 2606 | BTRFSIC_GENERATION_UNKNOWN != | 2618 | BTRFSIC_GENERATION_UNKNOWN != |
diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c index 43d1c5a3a030..c6467aa88bee 100644 --- a/fs/btrfs/compression.c +++ b/fs/btrfs/compression.c | |||
| @@ -577,6 +577,7 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio, | |||
| 577 | u64 em_start; | 577 | u64 em_start; |
| 578 | struct extent_map *em; | 578 | struct extent_map *em; |
| 579 | int ret = -ENOMEM; | 579 | int ret = -ENOMEM; |
| 580 | int faili = 0; | ||
| 580 | u32 *sums; | 581 | u32 *sums; |
| 581 | 582 | ||
| 582 | tree = &BTRFS_I(inode)->io_tree; | 583 | tree = &BTRFS_I(inode)->io_tree; |
| @@ -626,9 +627,13 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio, | |||
| 626 | for (pg_index = 0; pg_index < nr_pages; pg_index++) { | 627 | for (pg_index = 0; pg_index < nr_pages; pg_index++) { |
| 627 | cb->compressed_pages[pg_index] = alloc_page(GFP_NOFS | | 628 | cb->compressed_pages[pg_index] = alloc_page(GFP_NOFS | |
| 628 | __GFP_HIGHMEM); | 629 | __GFP_HIGHMEM); |
| 629 | if (!cb->compressed_pages[pg_index]) | 630 | if (!cb->compressed_pages[pg_index]) { |
| 631 | faili = pg_index - 1; | ||
| 632 | ret = -ENOMEM; | ||
| 630 | goto fail2; | 633 | goto fail2; |
| 634 | } | ||
| 631 | } | 635 | } |
| 636 | faili = nr_pages - 1; | ||
| 632 | cb->nr_pages = nr_pages; | 637 | cb->nr_pages = nr_pages; |
| 633 | 638 | ||
| 634 | add_ra_bio_pages(inode, em_start + em_len, cb); | 639 | add_ra_bio_pages(inode, em_start + em_len, cb); |
| @@ -713,8 +718,10 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio, | |||
| 713 | return 0; | 718 | return 0; |
| 714 | 719 | ||
| 715 | fail2: | 720 | fail2: |
| 716 | for (pg_index = 0; pg_index < nr_pages; pg_index++) | 721 | while (faili >= 0) { |
| 717 | free_page((unsigned long)cb->compressed_pages[pg_index]); | 722 | __free_page(cb->compressed_pages[faili]); |
| 723 | faili--; | ||
| 724 | } | ||
| 718 | 725 | ||
| 719 | kfree(cb->compressed_pages); | 726 | kfree(cb->compressed_pages); |
| 720 | fail1: | 727 | fail1: |
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 6d183f60d63a..b33436211000 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c | |||
| @@ -4402,149 +4402,6 @@ void btrfs_extend_item(struct btrfs_trans_handle *trans, | |||
| 4402 | } | 4402 | } |
| 4403 | 4403 | ||
| 4404 | /* | 4404 | /* |
| 4405 | * Given a key and some data, insert items into the tree. | ||
| 4406 | * This does all the path init required, making room in the tree if needed. | ||
| 4407 | * Returns the number of keys that were inserted. | ||
| 4408 | */ | ||
| 4409 | int btrfs_insert_some_items(struct btrfs_trans_handle *trans, | ||
| 4410 | struct btrfs_root *root, | ||
| 4411 | struct btrfs_path *path, | ||
| 4412 | struct btrfs_key *cpu_key, u32 *data_size, | ||
| 4413 | int nr) | ||
| 4414 | { | ||
| 4415 | struct extent_buffer *leaf; | ||
| 4416 | struct btrfs_item *item; | ||
| 4417 | int ret = 0; | ||
| 4418 | int slot; | ||
| 4419 | int i; | ||
| 4420 | u32 nritems; | ||
| 4421 | u32 total_data = 0; | ||
| 4422 | u32 total_size = 0; | ||
| 4423 | unsigned int data_end; | ||
| 4424 | struct btrfs_disk_key disk_key; | ||
| 4425 | struct btrfs_key found_key; | ||
| 4426 | struct btrfs_map_token token; | ||
| 4427 | |||
| 4428 | btrfs_init_map_token(&token); | ||
| 4429 | |||
| 4430 | for (i = 0; i < nr; i++) { | ||
| 4431 | if (total_size + data_size[i] + sizeof(struct btrfs_item) > | ||
| 4432 | BTRFS_LEAF_DATA_SIZE(root)) { | ||
| 4433 | break; | ||
| 4434 | nr = i; | ||
| 4435 | } | ||
| 4436 | total_data += data_size[i]; | ||
| 4437 | total_size += data_size[i] + sizeof(struct btrfs_item); | ||
| 4438 | } | ||
| 4439 | BUG_ON(nr == 0); | ||
| 4440 | |||
| 4441 | ret = btrfs_search_slot(trans, root, cpu_key, path, total_size, 1); | ||
| 4442 | if (ret == 0) | ||
| 4443 | return -EEXIST; | ||
| 4444 | if (ret < 0) | ||
| 4445 | goto out; | ||
| 4446 | |||
| 4447 | leaf = path->nodes[0]; | ||
| 4448 | |||
| 4449 | nritems = btrfs_header_nritems(leaf); | ||
| 4450 | data_end = leaf_data_end(root, leaf); | ||
| 4451 | |||
| 4452 | if (btrfs_leaf_free_space(root, leaf) < total_size) { | ||
| 4453 | for (i = nr; i >= 0; i--) { | ||
| 4454 | total_data -= data_size[i]; | ||
| 4455 | total_size -= data_size[i] + sizeof(struct btrfs_item); | ||
| 4456 | if (total_size < btrfs_leaf_free_space(root, leaf)) | ||
| 4457 | break; | ||
| 4458 | } | ||
| 4459 | nr = i; | ||
| 4460 | } | ||
| 4461 | |||
| 4462 | slot = path->slots[0]; | ||
| 4463 | BUG_ON(slot < 0); | ||
| 4464 | |||
| 4465 | if (slot != nritems) { | ||
| 4466 | unsigned int old_data = btrfs_item_end_nr(leaf, slot); | ||
| 4467 | |||
| 4468 | item = btrfs_item_nr(leaf, slot); | ||
| 4469 | btrfs_item_key_to_cpu(leaf, &found_key, slot); | ||
| 4470 | |||
| 4471 | /* figure out how many keys we can insert in here */ | ||
| 4472 | total_data = data_size[0]; | ||
| 4473 | for (i = 1; i < nr; i++) { | ||
| 4474 | if (btrfs_comp_cpu_keys(&found_key, cpu_key + i) <= 0) | ||
| 4475 | break; | ||
| 4476 | total_data += data_size[i]; | ||
| 4477 | } | ||
| 4478 | nr = i; | ||
| 4479 | |||
| 4480 | if (old_data < data_end) { | ||
| 4481 | btrfs_print_leaf(root, leaf); | ||
| 4482 | printk(KERN_CRIT "slot %d old_data %d data_end %d\n", | ||
| 4483 | slot, old_data, data_end); | ||
| 4484 | BUG_ON(1); | ||
| 4485 | } | ||
| 4486 | /* | ||
| 4487 | * item0..itemN ... dataN.offset..dataN.size .. data0.size | ||
| 4488 | */ | ||
| 4489 | /* first correct the data pointers */ | ||
| 4490 | for (i = slot; i < nritems; i++) { | ||
| 4491 | u32 ioff; | ||
| 4492 | |||
| 4493 | item = btrfs_item_nr(leaf, i); | ||
| 4494 | ioff = btrfs_token_item_offset(leaf, item, &token); | ||
| 4495 | btrfs_set_token_item_offset(leaf, item, | ||
| 4496 | ioff - total_data, &token); | ||
| 4497 | } | ||
| 4498 | /* shift the items */ | ||
| 4499 | memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot + nr), | ||
| 4500 | btrfs_item_nr_offset(slot), | ||
| 4501 | (nritems - slot) * sizeof(struct btrfs_item)); | ||
| 4502 | |||
| 4503 | /* shift the data */ | ||
| 4504 | memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) + | ||
| 4505 | data_end - total_data, btrfs_leaf_data(leaf) + | ||
| 4506 | data_end, old_data - data_end); | ||
| 4507 | data_end = old_data; | ||
| 4508 | } else { | ||
| 4509 | /* | ||
| 4510 | * this sucks but it has to be done, if we are inserting at | ||
| 4511 | * the end of the leaf only insert 1 of the items, since we | ||
| 4512 | * have no way of knowing whats on the next leaf and we'd have | ||
| 4513 | * to drop our current locks to figure it out | ||
| 4514 | */ | ||
| 4515 | nr = 1; | ||
| 4516 | } | ||
| 4517 | |||
| 4518 | /* setup the item for the new data */ | ||
| 4519 | for (i = 0; i < nr; i++) { | ||
| 4520 | btrfs_cpu_key_to_disk(&disk_key, cpu_key + i); | ||
| 4521 | btrfs_set_item_key(leaf, &disk_key, slot + i); | ||
| 4522 | item = btrfs_item_nr(leaf, slot + i); | ||
| 4523 | btrfs_set_token_item_offset(leaf, item, | ||
| 4524 | data_end - data_size[i], &token); | ||
| 4525 | data_end -= data_size[i]; | ||
| 4526 | btrfs_set_token_item_size(leaf, item, data_size[i], &token); | ||
| 4527 | } | ||
| 4528 | btrfs_set_header_nritems(leaf, nritems + nr); | ||
| 4529 | btrfs_mark_buffer_dirty(leaf); | ||
| 4530 | |||
| 4531 | ret = 0; | ||
| 4532 | if (slot == 0) { | ||
| 4533 | btrfs_cpu_key_to_disk(&disk_key, cpu_key); | ||
| 4534 | fixup_low_keys(trans, root, path, &disk_key, 1); | ||
| 4535 | } | ||
| 4536 | |||
| 4537 | if (btrfs_leaf_free_space(root, leaf) < 0) { | ||
| 4538 | btrfs_print_leaf(root, leaf); | ||
| 4539 | BUG(); | ||
| 4540 | } | ||
| 4541 | out: | ||
| 4542 | if (!ret) | ||
| 4543 | ret = nr; | ||
| 4544 | return ret; | ||
| 4545 | } | ||
| 4546 | |||
| 4547 | /* | ||
| 4548 | * this is a helper for btrfs_insert_empty_items, the main goal here is | 4405 | * this is a helper for btrfs_insert_empty_items, the main goal here is |
| 4549 | * to save stack depth by doing the bulk of the work in a function | 4406 | * to save stack depth by doing the bulk of the work in a function |
| 4550 | * that doesn't call btrfs_search_slot | 4407 | * that doesn't call btrfs_search_slot |
| @@ -5073,6 +4930,7 @@ static void tree_move_down(struct btrfs_root *root, | |||
| 5073 | struct btrfs_path *path, | 4930 | struct btrfs_path *path, |
| 5074 | int *level, int root_level) | 4931 | int *level, int root_level) |
| 5075 | { | 4932 | { |
| 4933 | BUG_ON(*level == 0); | ||
| 5076 | path->nodes[*level - 1] = read_node_slot(root, path->nodes[*level], | 4934 | path->nodes[*level - 1] = read_node_slot(root, path->nodes[*level], |
| 5077 | path->slots[*level]); | 4935 | path->slots[*level]); |
| 5078 | path->slots[*level - 1] = 0; | 4936 | path->slots[*level - 1] = 0; |
| @@ -5089,7 +4947,7 @@ static int tree_move_next_or_upnext(struct btrfs_root *root, | |||
| 5089 | 4947 | ||
| 5090 | path->slots[*level]++; | 4948 | path->slots[*level]++; |
| 5091 | 4949 | ||
| 5092 | while (path->slots[*level] == nritems) { | 4950 | while (path->slots[*level] >= nritems) { |
| 5093 | if (*level == root_level) | 4951 | if (*level == root_level) |
| 5094 | return -1; | 4952 | return -1; |
| 5095 | 4953 | ||
| @@ -5433,9 +5291,11 @@ int btrfs_compare_trees(struct btrfs_root *left_root, | |||
| 5433 | goto out; | 5291 | goto out; |
| 5434 | advance_right = ADVANCE; | 5292 | advance_right = ADVANCE; |
| 5435 | } else { | 5293 | } else { |
| 5294 | WARN_ON(!extent_buffer_uptodate(left_path->nodes[0])); | ||
| 5436 | ret = tree_compare_item(left_root, left_path, | 5295 | ret = tree_compare_item(left_root, left_path, |
| 5437 | right_path, tmp_buf); | 5296 | right_path, tmp_buf); |
| 5438 | if (ret) { | 5297 | if (ret) { |
| 5298 | WARN_ON(!extent_buffer_uptodate(left_path->nodes[0])); | ||
| 5439 | ret = changed_cb(left_root, right_root, | 5299 | ret = changed_cb(left_root, right_root, |
| 5440 | left_path, right_path, | 5300 | left_path, right_path, |
| 5441 | &left_key, | 5301 | &left_key, |
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 0d195b507660..926c9ffc66d9 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h | |||
| @@ -116,7 +116,7 @@ struct btrfs_ordered_sum; | |||
| 116 | #define BTRFS_FREE_SPACE_OBJECTID -11ULL | 116 | #define BTRFS_FREE_SPACE_OBJECTID -11ULL |
| 117 | 117 | ||
| 118 | /* | 118 | /* |
| 119 | * The inode number assigned to the special inode for sotring | 119 | * The inode number assigned to the special inode for storing |
| 120 | * free ino cache | 120 | * free ino cache |
| 121 | */ | 121 | */ |
| 122 | #define BTRFS_FREE_INO_OBJECTID -12ULL | 122 | #define BTRFS_FREE_INO_OBJECTID -12ULL |
| @@ -154,6 +154,13 @@ struct btrfs_ordered_sum; | |||
| 154 | */ | 154 | */ |
| 155 | #define BTRFS_NAME_LEN 255 | 155 | #define BTRFS_NAME_LEN 255 |
| 156 | 156 | ||
| 157 | /* | ||
| 158 | * Theoretical limit is larger, but we keep this down to a sane | ||
| 159 | * value. That should limit greatly the possibility of collisions on | ||
| 160 | * inode ref items. | ||
| 161 | */ | ||
| 162 | #define BTRFS_LINK_MAX 65535U | ||
| 163 | |||
| 157 | /* 32 bytes in various csum fields */ | 164 | /* 32 bytes in various csum fields */ |
| 158 | #define BTRFS_CSUM_SIZE 32 | 165 | #define BTRFS_CSUM_SIZE 32 |
| 159 | 166 | ||
| @@ -489,6 +496,8 @@ struct btrfs_super_block { | |||
| 489 | */ | 496 | */ |
| 490 | #define BTRFS_FEATURE_INCOMPAT_BIG_METADATA (1ULL << 5) | 497 | #define BTRFS_FEATURE_INCOMPAT_BIG_METADATA (1ULL << 5) |
| 491 | 498 | ||
| 499 | #define BTRFS_FEATURE_INCOMPAT_EXTENDED_IREF (1ULL << 6) | ||
| 500 | |||
| 492 | #define BTRFS_FEATURE_COMPAT_SUPP 0ULL | 501 | #define BTRFS_FEATURE_COMPAT_SUPP 0ULL |
| 493 | #define BTRFS_FEATURE_COMPAT_RO_SUPP 0ULL | 502 | #define BTRFS_FEATURE_COMPAT_RO_SUPP 0ULL |
| 494 | #define BTRFS_FEATURE_INCOMPAT_SUPP \ | 503 | #define BTRFS_FEATURE_INCOMPAT_SUPP \ |
| @@ -496,7 +505,8 @@ struct btrfs_super_block { | |||
| 496 | BTRFS_FEATURE_INCOMPAT_DEFAULT_SUBVOL | \ | 505 | BTRFS_FEATURE_INCOMPAT_DEFAULT_SUBVOL | \ |
| 497 | BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS | \ | 506 | BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS | \ |
| 498 | BTRFS_FEATURE_INCOMPAT_BIG_METADATA | \ | 507 | BTRFS_FEATURE_INCOMPAT_BIG_METADATA | \ |
| 499 | BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO) | 508 | BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO | \ |
| 509 | BTRFS_FEATURE_INCOMPAT_EXTENDED_IREF) | ||
| 500 | 510 | ||
| 501 | /* | 511 | /* |
| 502 | * A leaf is full of items. offset and size tell us where to find | 512 | * A leaf is full of items. offset and size tell us where to find |
| @@ -643,6 +653,14 @@ struct btrfs_inode_ref { | |||
| 643 | /* name goes here */ | 653 | /* name goes here */ |
| 644 | } __attribute__ ((__packed__)); | 654 | } __attribute__ ((__packed__)); |
| 645 | 655 | ||
| 656 | struct btrfs_inode_extref { | ||
| 657 | __le64 parent_objectid; | ||
| 658 | __le64 index; | ||
| 659 | __le16 name_len; | ||
| 660 | __u8 name[0]; | ||
| 661 | /* name goes here */ | ||
| 662 | } __attribute__ ((__packed__)); | ||
| 663 | |||
| 646 | struct btrfs_timespec { | 664 | struct btrfs_timespec { |
| 647 | __le64 sec; | 665 | __le64 sec; |
| 648 | __le32 nsec; | 666 | __le32 nsec; |
| @@ -1028,12 +1046,22 @@ struct btrfs_space_info { | |||
| 1028 | wait_queue_head_t wait; | 1046 | wait_queue_head_t wait; |
| 1029 | }; | 1047 | }; |
| 1030 | 1048 | ||
| 1049 | #define BTRFS_BLOCK_RSV_GLOBAL 1 | ||
| 1050 | #define BTRFS_BLOCK_RSV_DELALLOC 2 | ||
| 1051 | #define BTRFS_BLOCK_RSV_TRANS 3 | ||
| 1052 | #define BTRFS_BLOCK_RSV_CHUNK 4 | ||
| 1053 | #define BTRFS_BLOCK_RSV_DELOPS 5 | ||
| 1054 | #define BTRFS_BLOCK_RSV_EMPTY 6 | ||
| 1055 | #define BTRFS_BLOCK_RSV_TEMP 7 | ||
| 1056 | |||
| 1031 | struct btrfs_block_rsv { | 1057 | struct btrfs_block_rsv { |
| 1032 | u64 size; | 1058 | u64 size; |
| 1033 | u64 reserved; | 1059 | u64 reserved; |
| 1034 | struct btrfs_space_info *space_info; | 1060 | struct btrfs_space_info *space_info; |
| 1035 | spinlock_t lock; | 1061 | spinlock_t lock; |
| 1036 | unsigned int full; | 1062 | unsigned short full; |
| 1063 | unsigned short type; | ||
| 1064 | unsigned short failfast; | ||
| 1037 | }; | 1065 | }; |
| 1038 | 1066 | ||
| 1039 | /* | 1067 | /* |
| @@ -1127,6 +1155,9 @@ struct btrfs_block_group_cache { | |||
| 1127 | * Today it will only have one thing on it, but that may change | 1155 | * Today it will only have one thing on it, but that may change |
| 1128 | */ | 1156 | */ |
| 1129 | struct list_head cluster_list; | 1157 | struct list_head cluster_list; |
| 1158 | |||
| 1159 | /* For delayed block group creation */ | ||
| 1160 | struct list_head new_bg_list; | ||
| 1130 | }; | 1161 | }; |
| 1131 | 1162 | ||
| 1132 | /* delayed seq elem */ | 1163 | /* delayed seq elem */ |
| @@ -1240,7 +1271,6 @@ struct btrfs_fs_info { | |||
| 1240 | struct mutex reloc_mutex; | 1271 | struct mutex reloc_mutex; |
| 1241 | 1272 | ||
| 1242 | struct list_head trans_list; | 1273 | struct list_head trans_list; |
| 1243 | struct list_head hashers; | ||
| 1244 | struct list_head dead_roots; | 1274 | struct list_head dead_roots; |
| 1245 | struct list_head caching_block_groups; | 1275 | struct list_head caching_block_groups; |
| 1246 | 1276 | ||
| @@ -1366,9 +1396,6 @@ struct btrfs_fs_info { | |||
| 1366 | struct rb_root defrag_inodes; | 1396 | struct rb_root defrag_inodes; |
| 1367 | atomic_t defrag_running; | 1397 | atomic_t defrag_running; |
| 1368 | 1398 | ||
| 1369 | spinlock_t ref_cache_lock; | ||
| 1370 | u64 total_ref_cache_size; | ||
| 1371 | |||
| 1372 | /* | 1399 | /* |
| 1373 | * these three are in extended format (availability of single | 1400 | * these three are in extended format (availability of single |
| 1374 | * chunks is denoted by BTRFS_AVAIL_ALLOC_BIT_SINGLE bit, other | 1401 | * chunks is denoted by BTRFS_AVAIL_ALLOC_BIT_SINGLE bit, other |
| @@ -1441,6 +1468,8 @@ struct btrfs_fs_info { | |||
| 1441 | 1468 | ||
| 1442 | /* next backup root to be overwritten */ | 1469 | /* next backup root to be overwritten */ |
| 1443 | int backup_root_index; | 1470 | int backup_root_index; |
| 1471 | |||
| 1472 | int num_tolerated_disk_barrier_failures; | ||
| 1444 | }; | 1473 | }; |
| 1445 | 1474 | ||
| 1446 | /* | 1475 | /* |
| @@ -1481,9 +1510,9 @@ struct btrfs_root { | |||
| 1481 | wait_queue_head_t log_commit_wait[2]; | 1510 | wait_queue_head_t log_commit_wait[2]; |
| 1482 | atomic_t log_writers; | 1511 | atomic_t log_writers; |
| 1483 | atomic_t log_commit[2]; | 1512 | atomic_t log_commit[2]; |
| 1513 | atomic_t log_batch; | ||
| 1484 | unsigned long log_transid; | 1514 | unsigned long log_transid; |
| 1485 | unsigned long last_log_commit; | 1515 | unsigned long last_log_commit; |
| 1486 | unsigned long log_batch; | ||
| 1487 | pid_t log_start_pid; | 1516 | pid_t log_start_pid; |
| 1488 | bool log_multiple_pids; | 1517 | bool log_multiple_pids; |
| 1489 | 1518 | ||
| @@ -1592,6 +1621,7 @@ struct btrfs_ioctl_defrag_range_args { | |||
| 1592 | */ | 1621 | */ |
| 1593 | #define BTRFS_INODE_ITEM_KEY 1 | 1622 | #define BTRFS_INODE_ITEM_KEY 1 |
| 1594 | #define BTRFS_INODE_REF_KEY 12 | 1623 | #define BTRFS_INODE_REF_KEY 12 |
| 1624 | #define BTRFS_INODE_EXTREF_KEY 13 | ||
| 1595 | #define BTRFS_XATTR_ITEM_KEY 24 | 1625 | #define BTRFS_XATTR_ITEM_KEY 24 |
| 1596 | #define BTRFS_ORPHAN_ITEM_KEY 48 | 1626 | #define BTRFS_ORPHAN_ITEM_KEY 48 |
| 1597 | /* reserve 2-15 close to the inode for later flexibility */ | 1627 | /* reserve 2-15 close to the inode for later flexibility */ |
| @@ -1978,6 +2008,13 @@ BTRFS_SETGET_STACK_FUNCS(block_group_flags, | |||
| 1978 | BTRFS_SETGET_FUNCS(inode_ref_name_len, struct btrfs_inode_ref, name_len, 16); | 2008 | BTRFS_SETGET_FUNCS(inode_ref_name_len, struct btrfs_inode_ref, name_len, 16); |
| 1979 | BTRFS_SETGET_FUNCS(inode_ref_index, struct btrfs_inode_ref, index, 64); | 2009 | BTRFS_SETGET_FUNCS(inode_ref_index, struct btrfs_inode_ref, index, 64); |
| 1980 | 2010 | ||
| 2011 | /* struct btrfs_inode_extref */ | ||
| 2012 | BTRFS_SETGET_FUNCS(inode_extref_parent, struct btrfs_inode_extref, | ||
| 2013 | parent_objectid, 64); | ||
| 2014 | BTRFS_SETGET_FUNCS(inode_extref_name_len, struct btrfs_inode_extref, | ||
| 2015 | name_len, 16); | ||
| 2016 | BTRFS_SETGET_FUNCS(inode_extref_index, struct btrfs_inode_extref, index, 64); | ||
| 2017 | |||
| 1981 | /* struct btrfs_inode_item */ | 2018 | /* struct btrfs_inode_item */ |
| 1982 | BTRFS_SETGET_FUNCS(inode_generation, struct btrfs_inode_item, generation, 64); | 2019 | BTRFS_SETGET_FUNCS(inode_generation, struct btrfs_inode_item, generation, 64); |
| 1983 | BTRFS_SETGET_FUNCS(inode_sequence, struct btrfs_inode_item, sequence, 64); | 2020 | BTRFS_SETGET_FUNCS(inode_sequence, struct btrfs_inode_item, sequence, 64); |
| @@ -2858,6 +2895,8 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans, | |||
| 2858 | u64 size); | 2895 | u64 size); |
| 2859 | int btrfs_remove_block_group(struct btrfs_trans_handle *trans, | 2896 | int btrfs_remove_block_group(struct btrfs_trans_handle *trans, |
| 2860 | struct btrfs_root *root, u64 group_start); | 2897 | struct btrfs_root *root, u64 group_start); |
| 2898 | void btrfs_create_pending_block_groups(struct btrfs_trans_handle *trans, | ||
| 2899 | struct btrfs_root *root); | ||
| 2861 | u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags); | 2900 | u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags); |
| 2862 | u64 btrfs_get_alloc_profile(struct btrfs_root *root, int data); | 2901 | u64 btrfs_get_alloc_profile(struct btrfs_root *root, int data); |
| 2863 | void btrfs_clear_space_info_full(struct btrfs_fs_info *info); | 2902 | void btrfs_clear_space_info_full(struct btrfs_fs_info *info); |
| @@ -2874,8 +2913,9 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes); | |||
| 2874 | void btrfs_delalloc_release_metadata(struct inode *inode, u64 num_bytes); | 2913 | void btrfs_delalloc_release_metadata(struct inode *inode, u64 num_bytes); |
| 2875 | int btrfs_delalloc_reserve_space(struct inode *inode, u64 num_bytes); | 2914 | int btrfs_delalloc_reserve_space(struct inode *inode, u64 num_bytes); |
| 2876 | void btrfs_delalloc_release_space(struct inode *inode, u64 num_bytes); | 2915 | void btrfs_delalloc_release_space(struct inode *inode, u64 num_bytes); |
| 2877 | void btrfs_init_block_rsv(struct btrfs_block_rsv *rsv); | 2916 | void btrfs_init_block_rsv(struct btrfs_block_rsv *rsv, unsigned short type); |
| 2878 | struct btrfs_block_rsv *btrfs_alloc_block_rsv(struct btrfs_root *root); | 2917 | struct btrfs_block_rsv *btrfs_alloc_block_rsv(struct btrfs_root *root, |
| 2918 | unsigned short type); | ||
| 2879 | void btrfs_free_block_rsv(struct btrfs_root *root, | 2919 | void btrfs_free_block_rsv(struct btrfs_root *root, |
| 2880 | struct btrfs_block_rsv *rsv); | 2920 | struct btrfs_block_rsv *rsv); |
| 2881 | int btrfs_block_rsv_add(struct btrfs_root *root, | 2921 | int btrfs_block_rsv_add(struct btrfs_root *root, |
| @@ -3172,12 +3212,12 @@ int btrfs_del_inode_ref(struct btrfs_trans_handle *trans, | |||
| 3172 | struct btrfs_root *root, | 3212 | struct btrfs_root *root, |
| 3173 | const char *name, int name_len, | 3213 | const char *name, int name_len, |
| 3174 | u64 inode_objectid, u64 ref_objectid, u64 *index); | 3214 | u64 inode_objectid, u64 ref_objectid, u64 *index); |
| 3175 | struct btrfs_inode_ref * | 3215 | int btrfs_get_inode_ref_index(struct btrfs_trans_handle *trans, |
| 3176 | btrfs_lookup_inode_ref(struct btrfs_trans_handle *trans, | 3216 | struct btrfs_root *root, |
| 3177 | struct btrfs_root *root, | 3217 | struct btrfs_path *path, |
| 3178 | struct btrfs_path *path, | 3218 | const char *name, int name_len, |
| 3179 | const char *name, int name_len, | 3219 | u64 inode_objectid, u64 ref_objectid, int mod, |
| 3180 | u64 inode_objectid, u64 ref_objectid, int mod); | 3220 | u64 *ret_index); |
| 3181 | int btrfs_insert_empty_inode(struct btrfs_trans_handle *trans, | 3221 | int btrfs_insert_empty_inode(struct btrfs_trans_handle *trans, |
| 3182 | struct btrfs_root *root, | 3222 | struct btrfs_root *root, |
| 3183 | struct btrfs_path *path, u64 objectid); | 3223 | struct btrfs_path *path, u64 objectid); |
| @@ -3185,6 +3225,19 @@ int btrfs_lookup_inode(struct btrfs_trans_handle *trans, struct btrfs_root | |||
| 3185 | *root, struct btrfs_path *path, | 3225 | *root, struct btrfs_path *path, |
| 3186 | struct btrfs_key *location, int mod); | 3226 | struct btrfs_key *location, int mod); |
| 3187 | 3227 | ||
| 3228 | struct btrfs_inode_extref * | ||
| 3229 | btrfs_lookup_inode_extref(struct btrfs_trans_handle *trans, | ||
| 3230 | struct btrfs_root *root, | ||
| 3231 | struct btrfs_path *path, | ||
| 3232 | const char *name, int name_len, | ||
| 3233 | u64 inode_objectid, u64 ref_objectid, int ins_len, | ||
| 3234 | int cow); | ||
| 3235 | |||
| 3236 | int btrfs_find_name_in_ext_backref(struct btrfs_path *path, | ||
| 3237 | u64 ref_objectid, const char *name, | ||
| 3238 | int name_len, | ||
| 3239 | struct btrfs_inode_extref **extref_ret); | ||
| 3240 | |||
| 3188 | /* file-item.c */ | 3241 | /* file-item.c */ |
| 3189 | int btrfs_del_csums(struct btrfs_trans_handle *trans, | 3242 | int btrfs_del_csums(struct btrfs_trans_handle *trans, |
| 3190 | struct btrfs_root *root, u64 bytenr, u64 len); | 3243 | struct btrfs_root *root, u64 bytenr, u64 len); |
| @@ -3249,6 +3302,8 @@ int btrfs_unlink_subvol(struct btrfs_trans_handle *trans, | |||
| 3249 | struct btrfs_root *root, | 3302 | struct btrfs_root *root, |
| 3250 | struct inode *dir, u64 objectid, | 3303 | struct inode *dir, u64 objectid, |
| 3251 | const char *name, int name_len); | 3304 | const char *name, int name_len); |
| 3305 | int btrfs_truncate_page(struct inode *inode, loff_t from, loff_t len, | ||
| 3306 | int front); | ||
| 3252 | int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans, | 3307 | int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans, |
| 3253 | struct btrfs_root *root, | 3308 | struct btrfs_root *root, |
| 3254 | struct inode *inode, u64 new_size, | 3309 | struct inode *inode, u64 new_size, |
| @@ -3308,16 +3363,27 @@ void btrfs_inherit_iflags(struct inode *inode, struct inode *dir); | |||
| 3308 | int btrfs_defrag_file(struct inode *inode, struct file *file, | 3363 | int btrfs_defrag_file(struct inode *inode, struct file *file, |
| 3309 | struct btrfs_ioctl_defrag_range_args *range, | 3364 | struct btrfs_ioctl_defrag_range_args *range, |
| 3310 | u64 newer_than, unsigned long max_pages); | 3365 | u64 newer_than, unsigned long max_pages); |
| 3366 | void btrfs_get_block_group_info(struct list_head *groups_list, | ||
| 3367 | struct btrfs_ioctl_space_info *space); | ||
| 3368 | |||
| 3311 | /* file.c */ | 3369 | /* file.c */ |
| 3312 | int btrfs_add_inode_defrag(struct btrfs_trans_handle *trans, | 3370 | int btrfs_add_inode_defrag(struct btrfs_trans_handle *trans, |
| 3313 | struct inode *inode); | 3371 | struct inode *inode); |
| 3314 | int btrfs_run_defrag_inodes(struct btrfs_fs_info *fs_info); | 3372 | int btrfs_run_defrag_inodes(struct btrfs_fs_info *fs_info); |
| 3315 | int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync); | 3373 | int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync); |
| 3316 | int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end, | 3374 | void btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end, |
| 3317 | int skip_pinned); | 3375 | int skip_pinned); |
| 3376 | int btrfs_replace_extent_cache(struct inode *inode, struct extent_map *replace, | ||
| 3377 | u64 start, u64 end, int skip_pinned, | ||
| 3378 | int modified); | ||
| 3318 | extern const struct file_operations btrfs_file_operations; | 3379 | extern const struct file_operations btrfs_file_operations; |
| 3319 | int btrfs_drop_extents(struct btrfs_trans_handle *trans, struct inode *inode, | 3380 | int __btrfs_drop_extents(struct btrfs_trans_handle *trans, |
| 3320 | u64 start, u64 end, u64 *hint_byte, int drop_cache); | 3381 | struct btrfs_root *root, struct inode *inode, |
| 3382 | struct btrfs_path *path, u64 start, u64 end, | ||
| 3383 | u64 *drop_end, int drop_cache); | ||
| 3384 | int btrfs_drop_extents(struct btrfs_trans_handle *trans, | ||
| 3385 | struct btrfs_root *root, struct inode *inode, u64 start, | ||
| 3386 | u64 end, int drop_cache); | ||
| 3321 | int btrfs_mark_extent_written(struct btrfs_trans_handle *trans, | 3387 | int btrfs_mark_extent_written(struct btrfs_trans_handle *trans, |
| 3322 | struct inode *inode, u64 start, u64 end); | 3388 | struct inode *inode, u64 start, u64 end); |
| 3323 | int btrfs_release_file(struct inode *inode, struct file *file); | 3389 | int btrfs_release_file(struct inode *inode, struct file *file); |
| @@ -3378,6 +3444,11 @@ static inline void __btrfs_set_fs_incompat(struct btrfs_fs_info *fs_info, | |||
| 3378 | } | 3444 | } |
| 3379 | } | 3445 | } |
| 3380 | 3446 | ||
| 3447 | /* | ||
| 3448 | * Call btrfs_abort_transaction as early as possible when an error condition is | ||
| 3449 | * detected, that way the exact line number is reported. | ||
| 3450 | */ | ||
| 3451 | |||
| 3381 | #define btrfs_abort_transaction(trans, root, errno) \ | 3452 | #define btrfs_abort_transaction(trans, root, errno) \ |
| 3382 | do { \ | 3453 | do { \ |
| 3383 | __btrfs_abort_transaction(trans, root, __func__, \ | 3454 | __btrfs_abort_transaction(trans, root, __func__, \ |
diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c index 07d5eeb1e6f1..478f66bdc57b 100644 --- a/fs/btrfs/delayed-inode.c +++ b/fs/btrfs/delayed-inode.c | |||
| @@ -29,7 +29,7 @@ static struct kmem_cache *delayed_node_cache; | |||
| 29 | 29 | ||
| 30 | int __init btrfs_delayed_inode_init(void) | 30 | int __init btrfs_delayed_inode_init(void) |
| 31 | { | 31 | { |
| 32 | delayed_node_cache = kmem_cache_create("delayed_node", | 32 | delayed_node_cache = kmem_cache_create("btrfs_delayed_node", |
| 33 | sizeof(struct btrfs_delayed_node), | 33 | sizeof(struct btrfs_delayed_node), |
| 34 | 0, | 34 | 0, |
| 35 | SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, | 35 | SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, |
| @@ -650,7 +650,7 @@ static int btrfs_delayed_inode_reserve_metadata( | |||
| 650 | * we're accounted for. | 650 | * we're accounted for. |
| 651 | */ | 651 | */ |
| 652 | if (!src_rsv || (!trans->bytes_reserved && | 652 | if (!src_rsv || (!trans->bytes_reserved && |
| 653 | src_rsv != &root->fs_info->delalloc_block_rsv)) { | 653 | src_rsv->type != BTRFS_BLOCK_RSV_DELALLOC)) { |
| 654 | ret = btrfs_block_rsv_add_noflush(root, dst_rsv, num_bytes); | 654 | ret = btrfs_block_rsv_add_noflush(root, dst_rsv, num_bytes); |
| 655 | /* | 655 | /* |
| 656 | * Since we're under a transaction reserve_metadata_bytes could | 656 | * Since we're under a transaction reserve_metadata_bytes could |
| @@ -668,7 +668,7 @@ static int btrfs_delayed_inode_reserve_metadata( | |||
| 668 | num_bytes, 1); | 668 | num_bytes, 1); |
| 669 | } | 669 | } |
| 670 | return ret; | 670 | return ret; |
| 671 | } else if (src_rsv == &root->fs_info->delalloc_block_rsv) { | 671 | } else if (src_rsv->type == BTRFS_BLOCK_RSV_DELALLOC) { |
| 672 | spin_lock(&BTRFS_I(inode)->lock); | 672 | spin_lock(&BTRFS_I(inode)->lock); |
| 673 | if (test_and_clear_bit(BTRFS_INODE_DELALLOC_META_RESERVED, | 673 | if (test_and_clear_bit(BTRFS_INODE_DELALLOC_META_RESERVED, |
| 674 | &BTRFS_I(inode)->runtime_flags)) { | 674 | &BTRFS_I(inode)->runtime_flags)) { |
| @@ -1715,8 +1715,8 @@ static void fill_stack_inode_item(struct btrfs_trans_handle *trans, | |||
| 1715 | struct btrfs_inode_item *inode_item, | 1715 | struct btrfs_inode_item *inode_item, |
| 1716 | struct inode *inode) | 1716 | struct inode *inode) |
| 1717 | { | 1717 | { |
| 1718 | btrfs_set_stack_inode_uid(inode_item, inode->i_uid); | 1718 | btrfs_set_stack_inode_uid(inode_item, i_uid_read(inode)); |
| 1719 | btrfs_set_stack_inode_gid(inode_item, inode->i_gid); | 1719 | btrfs_set_stack_inode_gid(inode_item, i_gid_read(inode)); |
| 1720 | btrfs_set_stack_inode_size(inode_item, BTRFS_I(inode)->disk_i_size); | 1720 | btrfs_set_stack_inode_size(inode_item, BTRFS_I(inode)->disk_i_size); |
| 1721 | btrfs_set_stack_inode_mode(inode_item, inode->i_mode); | 1721 | btrfs_set_stack_inode_mode(inode_item, inode->i_mode); |
| 1722 | btrfs_set_stack_inode_nlink(inode_item, inode->i_nlink); | 1722 | btrfs_set_stack_inode_nlink(inode_item, inode->i_nlink); |
| @@ -1764,8 +1764,8 @@ int btrfs_fill_inode(struct inode *inode, u32 *rdev) | |||
| 1764 | 1764 | ||
| 1765 | inode_item = &delayed_node->inode_item; | 1765 | inode_item = &delayed_node->inode_item; |
| 1766 | 1766 | ||
| 1767 | inode->i_uid = btrfs_stack_inode_uid(inode_item); | 1767 | i_uid_write(inode, btrfs_stack_inode_uid(inode_item)); |
| 1768 | inode->i_gid = btrfs_stack_inode_gid(inode_item); | 1768 | i_gid_write(inode, btrfs_stack_inode_gid(inode_item)); |
| 1769 | btrfs_i_size_write(inode, btrfs_stack_inode_size(inode_item)); | 1769 | btrfs_i_size_write(inode, btrfs_stack_inode_size(inode_item)); |
| 1770 | inode->i_mode = btrfs_stack_inode_mode(inode_item); | 1770 | inode->i_mode = btrfs_stack_inode_mode(inode_item); |
| 1771 | set_nlink(inode, btrfs_stack_inode_nlink(inode_item)); | 1771 | set_nlink(inode, btrfs_stack_inode_nlink(inode_item)); |
diff --git a/fs/btrfs/delayed-ref.h b/fs/btrfs/delayed-ref.h index ab5300595847..c9d703693df0 100644 --- a/fs/btrfs/delayed-ref.h +++ b/fs/btrfs/delayed-ref.h | |||
| @@ -18,7 +18,7 @@ | |||
| 18 | #ifndef __DELAYED_REF__ | 18 | #ifndef __DELAYED_REF__ |
| 19 | #define __DELAYED_REF__ | 19 | #define __DELAYED_REF__ |
| 20 | 20 | ||
| 21 | /* these are the possible values of struct btrfs_delayed_ref->action */ | 21 | /* these are the possible values of struct btrfs_delayed_ref_node->action */ |
| 22 | #define BTRFS_ADD_DELAYED_REF 1 /* add one backref to the tree */ | 22 | #define BTRFS_ADD_DELAYED_REF 1 /* add one backref to the tree */ |
| 23 | #define BTRFS_DROP_DELAYED_REF 2 /* delete one backref from the tree */ | 23 | #define BTRFS_DROP_DELAYED_REF 2 /* delete one backref from the tree */ |
| 24 | #define BTRFS_ADD_DELAYED_EXTENT 3 /* record a full extent allocation */ | 24 | #define BTRFS_ADD_DELAYED_EXTENT 3 /* record a full extent allocation */ |
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 22e98e04c2ea..7cda51995c1e 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c | |||
| @@ -46,6 +46,10 @@ | |||
| 46 | #include "check-integrity.h" | 46 | #include "check-integrity.h" |
| 47 | #include "rcu-string.h" | 47 | #include "rcu-string.h" |
| 48 | 48 | ||
| 49 | #ifdef CONFIG_X86 | ||
| 50 | #include <asm/cpufeature.h> | ||
| 51 | #endif | ||
| 52 | |||
| 49 | static struct extent_io_ops btree_extent_io_ops; | 53 | static struct extent_io_ops btree_extent_io_ops; |
| 50 | static void end_workqueue_fn(struct btrfs_work *work); | 54 | static void end_workqueue_fn(struct btrfs_work *work); |
| 51 | static void free_fs_root(struct btrfs_root *root); | 55 | static void free_fs_root(struct btrfs_root *root); |
| @@ -217,26 +221,16 @@ static struct extent_map *btree_get_extent(struct inode *inode, | |||
| 217 | write_lock(&em_tree->lock); | 221 | write_lock(&em_tree->lock); |
| 218 | ret = add_extent_mapping(em_tree, em); | 222 | ret = add_extent_mapping(em_tree, em); |
| 219 | if (ret == -EEXIST) { | 223 | if (ret == -EEXIST) { |
| 220 | u64 failed_start = em->start; | ||
| 221 | u64 failed_len = em->len; | ||
| 222 | |||
| 223 | free_extent_map(em); | 224 | free_extent_map(em); |
| 224 | em = lookup_extent_mapping(em_tree, start, len); | 225 | em = lookup_extent_mapping(em_tree, start, len); |
| 225 | if (em) { | 226 | if (!em) |
| 226 | ret = 0; | 227 | em = ERR_PTR(-EIO); |
| 227 | } else { | ||
| 228 | em = lookup_extent_mapping(em_tree, failed_start, | ||
| 229 | failed_len); | ||
| 230 | ret = -EIO; | ||
| 231 | } | ||
| 232 | } else if (ret) { | 228 | } else if (ret) { |
| 233 | free_extent_map(em); | 229 | free_extent_map(em); |
| 234 | em = NULL; | 230 | em = ERR_PTR(ret); |
| 235 | } | 231 | } |
| 236 | write_unlock(&em_tree->lock); | 232 | write_unlock(&em_tree->lock); |
| 237 | 233 | ||
| 238 | if (ret) | ||
| 239 | em = ERR_PTR(ret); | ||
| 240 | out: | 234 | out: |
| 241 | return em; | 235 | return em; |
| 242 | } | 236 | } |
| @@ -439,10 +433,6 @@ static int csum_dirty_buffer(struct btrfs_root *root, struct page *page) | |||
| 439 | WARN_ON(1); | 433 | WARN_ON(1); |
| 440 | return 0; | 434 | return 0; |
| 441 | } | 435 | } |
| 442 | if (eb->pages[0] != page) { | ||
| 443 | WARN_ON(1); | ||
| 444 | return 0; | ||
| 445 | } | ||
| 446 | if (!PageUptodate(page)) { | 436 | if (!PageUptodate(page)) { |
| 447 | WARN_ON(1); | 437 | WARN_ON(1); |
| 448 | return 0; | 438 | return 0; |
| @@ -869,10 +859,22 @@ static int __btree_submit_bio_done(struct inode *inode, int rw, struct bio *bio, | |||
| 869 | return btrfs_map_bio(BTRFS_I(inode)->root, rw, bio, mirror_num, 1); | 859 | return btrfs_map_bio(BTRFS_I(inode)->root, rw, bio, mirror_num, 1); |
| 870 | } | 860 | } |
| 871 | 861 | ||
| 862 | static int check_async_write(struct inode *inode, unsigned long bio_flags) | ||
| 863 | { | ||
| 864 | if (bio_flags & EXTENT_BIO_TREE_LOG) | ||
| 865 | return 0; | ||
| 866 | #ifdef CONFIG_X86 | ||
| 867 | if (cpu_has_xmm4_2) | ||
| 868 | return 0; | ||
| 869 | #endif | ||
| 870 | return 1; | ||
| 871 | } | ||
| 872 | |||
| 872 | static int btree_submit_bio_hook(struct inode *inode, int rw, struct bio *bio, | 873 | static int btree_submit_bio_hook(struct inode *inode, int rw, struct bio *bio, |
| 873 | int mirror_num, unsigned long bio_flags, | 874 | int mirror_num, unsigned long bio_flags, |
| 874 | u64 bio_offset) | 875 | u64 bio_offset) |
| 875 | { | 876 | { |
| 877 | int async = check_async_write(inode, bio_flags); | ||
| 876 | int ret; | 878 | int ret; |
| 877 | 879 | ||
| 878 | if (!(rw & REQ_WRITE)) { | 880 | if (!(rw & REQ_WRITE)) { |
| @@ -887,6 +889,12 @@ static int btree_submit_bio_hook(struct inode *inode, int rw, struct bio *bio, | |||
| 887 | return ret; | 889 | return ret; |
| 888 | return btrfs_map_bio(BTRFS_I(inode)->root, rw, bio, | 890 | return btrfs_map_bio(BTRFS_I(inode)->root, rw, bio, |
| 889 | mirror_num, 0); | 891 | mirror_num, 0); |
| 892 | } else if (!async) { | ||
| 893 | ret = btree_csum_one_bio(bio); | ||
| 894 | if (ret) | ||
| 895 | return ret; | ||
| 896 | return btrfs_map_bio(BTRFS_I(inode)->root, rw, bio, | ||
| 897 | mirror_num, 0); | ||
| 890 | } | 898 | } |
| 891 | 899 | ||
| 892 | /* | 900 | /* |
| @@ -1168,8 +1176,8 @@ static void __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize, | |||
| 1168 | atomic_set(&root->log_commit[0], 0); | 1176 | atomic_set(&root->log_commit[0], 0); |
| 1169 | atomic_set(&root->log_commit[1], 0); | 1177 | atomic_set(&root->log_commit[1], 0); |
| 1170 | atomic_set(&root->log_writers, 0); | 1178 | atomic_set(&root->log_writers, 0); |
| 1179 | atomic_set(&root->log_batch, 0); | ||
| 1171 | atomic_set(&root->orphan_inodes, 0); | 1180 | atomic_set(&root->orphan_inodes, 0); |
| 1172 | root->log_batch = 0; | ||
| 1173 | root->log_transid = 0; | 1181 | root->log_transid = 0; |
| 1174 | root->last_log_commit = 0; | 1182 | root->last_log_commit = 0; |
| 1175 | extent_io_tree_init(&root->dirty_log_pages, | 1183 | extent_io_tree_init(&root->dirty_log_pages, |
| @@ -1667,9 +1675,10 @@ static int transaction_kthread(void *arg) | |||
| 1667 | spin_unlock(&root->fs_info->trans_lock); | 1675 | spin_unlock(&root->fs_info->trans_lock); |
| 1668 | 1676 | ||
| 1669 | /* If the file system is aborted, this will always fail. */ | 1677 | /* If the file system is aborted, this will always fail. */ |
| 1670 | trans = btrfs_join_transaction(root); | 1678 | trans = btrfs_attach_transaction(root); |
| 1671 | if (IS_ERR(trans)) { | 1679 | if (IS_ERR(trans)) { |
| 1672 | cannot_commit = true; | 1680 | if (PTR_ERR(trans) != -ENOENT) |
| 1681 | cannot_commit = true; | ||
| 1673 | goto sleep; | 1682 | goto sleep; |
| 1674 | } | 1683 | } |
| 1675 | if (transid == trans->transid) { | 1684 | if (transid == trans->transid) { |
| @@ -1994,13 +2003,11 @@ int open_ctree(struct super_block *sb, | |||
| 1994 | INIT_LIST_HEAD(&fs_info->trans_list); | 2003 | INIT_LIST_HEAD(&fs_info->trans_list); |
| 1995 | INIT_LIST_HEAD(&fs_info->dead_roots); | 2004 | INIT_LIST_HEAD(&fs_info->dead_roots); |
| 1996 | INIT_LIST_HEAD(&fs_info->delayed_iputs); | 2005 | INIT_LIST_HEAD(&fs_info->delayed_iputs); |
| 1997 | INIT_LIST_HEAD(&fs_info->hashers); | ||
| 1998 | INIT_LIST_HEAD(&fs_info->delalloc_inodes); | 2006 | INIT_LIST_HEAD(&fs_info->delalloc_inodes); |
| 1999 | INIT_LIST_HEAD(&fs_info->ordered_operations); | 2007 | INIT_LIST_HEAD(&fs_info->ordered_operations); |
| 2000 | INIT_LIST_HEAD(&fs_info->caching_block_groups); | 2008 | INIT_LIST_HEAD(&fs_info->caching_block_groups); |
| 2001 | spin_lock_init(&fs_info->delalloc_lock); | 2009 | spin_lock_init(&fs_info->delalloc_lock); |
| 2002 | spin_lock_init(&fs_info->trans_lock); | 2010 | spin_lock_init(&fs_info->trans_lock); |
| 2003 | spin_lock_init(&fs_info->ref_cache_lock); | ||
| 2004 | spin_lock_init(&fs_info->fs_roots_radix_lock); | 2011 | spin_lock_init(&fs_info->fs_roots_radix_lock); |
| 2005 | spin_lock_init(&fs_info->delayed_iput_lock); | 2012 | spin_lock_init(&fs_info->delayed_iput_lock); |
| 2006 | spin_lock_init(&fs_info->defrag_inodes_lock); | 2013 | spin_lock_init(&fs_info->defrag_inodes_lock); |
| @@ -2014,12 +2021,15 @@ int open_ctree(struct super_block *sb, | |||
| 2014 | INIT_LIST_HEAD(&fs_info->space_info); | 2021 | INIT_LIST_HEAD(&fs_info->space_info); |
| 2015 | INIT_LIST_HEAD(&fs_info->tree_mod_seq_list); | 2022 | INIT_LIST_HEAD(&fs_info->tree_mod_seq_list); |
| 2016 | btrfs_mapping_init(&fs_info->mapping_tree); | 2023 | btrfs_mapping_init(&fs_info->mapping_tree); |
| 2017 | btrfs_init_block_rsv(&fs_info->global_block_rsv); | 2024 | btrfs_init_block_rsv(&fs_info->global_block_rsv, |
| 2018 | btrfs_init_block_rsv(&fs_info->delalloc_block_rsv); | 2025 | BTRFS_BLOCK_RSV_GLOBAL); |
| 2019 | btrfs_init_block_rsv(&fs_info->trans_block_rsv); | 2026 | btrfs_init_block_rsv(&fs_info->delalloc_block_rsv, |
| 2020 | btrfs_init_block_rsv(&fs_info->chunk_block_rsv); | 2027 | BTRFS_BLOCK_RSV_DELALLOC); |
| 2021 | btrfs_init_block_rsv(&fs_info->empty_block_rsv); | 2028 | btrfs_init_block_rsv(&fs_info->trans_block_rsv, BTRFS_BLOCK_RSV_TRANS); |
| 2022 | btrfs_init_block_rsv(&fs_info->delayed_block_rsv); | 2029 | btrfs_init_block_rsv(&fs_info->chunk_block_rsv, BTRFS_BLOCK_RSV_CHUNK); |
| 2030 | btrfs_init_block_rsv(&fs_info->empty_block_rsv, BTRFS_BLOCK_RSV_EMPTY); | ||
| 2031 | btrfs_init_block_rsv(&fs_info->delayed_block_rsv, | ||
| 2032 | BTRFS_BLOCK_RSV_DELOPS); | ||
| 2023 | atomic_set(&fs_info->nr_async_submits, 0); | 2033 | atomic_set(&fs_info->nr_async_submits, 0); |
| 2024 | atomic_set(&fs_info->async_delalloc_pages, 0); | 2034 | atomic_set(&fs_info->async_delalloc_pages, 0); |
| 2025 | atomic_set(&fs_info->async_submit_draining, 0); | 2035 | atomic_set(&fs_info->async_submit_draining, 0); |
| @@ -2491,6 +2501,8 @@ retry_root_backup: | |||
| 2491 | printk(KERN_ERR "Failed to read block groups: %d\n", ret); | 2501 | printk(KERN_ERR "Failed to read block groups: %d\n", ret); |
| 2492 | goto fail_block_groups; | 2502 | goto fail_block_groups; |
| 2493 | } | 2503 | } |
| 2504 | fs_info->num_tolerated_disk_barrier_failures = | ||
| 2505 | btrfs_calc_num_tolerated_disk_barrier_failures(fs_info); | ||
| 2494 | 2506 | ||
| 2495 | fs_info->cleaner_kthread = kthread_run(cleaner_kthread, tree_root, | 2507 | fs_info->cleaner_kthread = kthread_run(cleaner_kthread, tree_root, |
| 2496 | "btrfs-cleaner"); | 2508 | "btrfs-cleaner"); |
| @@ -2874,12 +2886,10 @@ static int write_dev_flush(struct btrfs_device *device, int wait) | |||
| 2874 | printk_in_rcu("btrfs: disabling barriers on dev %s\n", | 2886 | printk_in_rcu("btrfs: disabling barriers on dev %s\n", |
| 2875 | rcu_str_deref(device->name)); | 2887 | rcu_str_deref(device->name)); |
| 2876 | device->nobarriers = 1; | 2888 | device->nobarriers = 1; |
| 2877 | } | 2889 | } else if (!bio_flagged(bio, BIO_UPTODATE)) { |
| 2878 | if (!bio_flagged(bio, BIO_UPTODATE)) { | ||
| 2879 | ret = -EIO; | 2890 | ret = -EIO; |
| 2880 | if (!bio_flagged(bio, BIO_EOPNOTSUPP)) | 2891 | btrfs_dev_stat_inc_and_print(device, |
| 2881 | btrfs_dev_stat_inc_and_print(device, | 2892 | BTRFS_DEV_STAT_FLUSH_ERRS); |
| 2882 | BTRFS_DEV_STAT_FLUSH_ERRS); | ||
| 2883 | } | 2893 | } |
| 2884 | 2894 | ||
| 2885 | /* drop the reference from the wait == 0 run */ | 2895 | /* drop the reference from the wait == 0 run */ |
| @@ -2918,14 +2928,15 @@ static int barrier_all_devices(struct btrfs_fs_info *info) | |||
| 2918 | { | 2928 | { |
| 2919 | struct list_head *head; | 2929 | struct list_head *head; |
| 2920 | struct btrfs_device *dev; | 2930 | struct btrfs_device *dev; |
| 2921 | int errors = 0; | 2931 | int errors_send = 0; |
| 2932 | int errors_wait = 0; | ||
| 2922 | int ret; | 2933 | int ret; |
| 2923 | 2934 | ||
| 2924 | /* send down all the barriers */ | 2935 | /* send down all the barriers */ |
| 2925 | head = &info->fs_devices->devices; | 2936 | head = &info->fs_devices->devices; |
| 2926 | list_for_each_entry_rcu(dev, head, dev_list) { | 2937 | list_for_each_entry_rcu(dev, head, dev_list) { |
| 2927 | if (!dev->bdev) { | 2938 | if (!dev->bdev) { |
| 2928 | errors++; | 2939 | errors_send++; |
| 2929 | continue; | 2940 | continue; |
| 2930 | } | 2941 | } |
| 2931 | if (!dev->in_fs_metadata || !dev->writeable) | 2942 | if (!dev->in_fs_metadata || !dev->writeable) |
| @@ -2933,13 +2944,13 @@ static int barrier_all_devices(struct btrfs_fs_info *info) | |||
| 2933 | 2944 | ||
| 2934 | ret = write_dev_flush(dev, 0); | 2945 | ret = write_dev_flush(dev, 0); |
| 2935 | if (ret) | 2946 | if (ret) |
| 2936 | errors++; | 2947 | errors_send++; |
| 2937 | } | 2948 | } |
| 2938 | 2949 | ||
| 2939 | /* wait for all the barriers */ | 2950 | /* wait for all the barriers */ |
| 2940 | list_for_each_entry_rcu(dev, head, dev_list) { | 2951 | list_for_each_entry_rcu(dev, head, dev_list) { |
| 2941 | if (!dev->bdev) { | 2952 | if (!dev->bdev) { |
| 2942 | errors++; | 2953 | errors_wait++; |
| 2943 | continue; | 2954 | continue; |
| 2944 | } | 2955 | } |
| 2945 | if (!dev->in_fs_metadata || !dev->writeable) | 2956 | if (!dev->in_fs_metadata || !dev->writeable) |
| @@ -2947,13 +2958,87 @@ static int barrier_all_devices(struct btrfs_fs_info *info) | |||
| 2947 | 2958 | ||
| 2948 | ret = write_dev_flush(dev, 1); | 2959 | ret = write_dev_flush(dev, 1); |
| 2949 | if (ret) | 2960 | if (ret) |
| 2950 | errors++; | 2961 | errors_wait++; |
| 2951 | } | 2962 | } |
| 2952 | if (errors) | 2963 | if (errors_send > info->num_tolerated_disk_barrier_failures || |
| 2964 | errors_wait > info->num_tolerated_disk_barrier_failures) | ||
| 2953 | return -EIO; | 2965 | return -EIO; |
| 2954 | return 0; | 2966 | return 0; |
| 2955 | } | 2967 | } |
| 2956 | 2968 | ||
| 2969 | int btrfs_calc_num_tolerated_disk_barrier_failures( | ||
| 2970 | struct btrfs_fs_info *fs_info) | ||
| 2971 | { | ||
| 2972 | struct btrfs_ioctl_space_info space; | ||
| 2973 | struct btrfs_space_info *sinfo; | ||
| 2974 | u64 types[] = {BTRFS_BLOCK_GROUP_DATA, | ||
| 2975 | BTRFS_BLOCK_GROUP_SYSTEM, | ||
| 2976 | BTRFS_BLOCK_GROUP_METADATA, | ||
| 2977 | BTRFS_BLOCK_GROUP_DATA | BTRFS_BLOCK_GROUP_METADATA}; | ||
| 2978 | int num_types = 4; | ||
| 2979 | int i; | ||
| 2980 | int c; | ||
| 2981 | int num_tolerated_disk_barrier_failures = | ||
| 2982 | (int)fs_info->fs_devices->num_devices; | ||
| 2983 | |||
| 2984 | for (i = 0; i < num_types; i++) { | ||
| 2985 | struct btrfs_space_info *tmp; | ||
| 2986 | |||
| 2987 | sinfo = NULL; | ||
| 2988 | rcu_read_lock(); | ||
| 2989 | list_for_each_entry_rcu(tmp, &fs_info->space_info, list) { | ||
| 2990 | if (tmp->flags == types[i]) { | ||
| 2991 | sinfo = tmp; | ||
| 2992 | break; | ||
| 2993 | } | ||
| 2994 | } | ||
| 2995 | rcu_read_unlock(); | ||
| 2996 | |||
| 2997 | if (!sinfo) | ||
| 2998 | continue; | ||
| 2999 | |||
| 3000 | down_read(&sinfo->groups_sem); | ||
| 3001 | for (c = 0; c < BTRFS_NR_RAID_TYPES; c++) { | ||
| 3002 | if (!list_empty(&sinfo->block_groups[c])) { | ||
| 3003 | u64 flags; | ||
| 3004 | |||
| 3005 | btrfs_get_block_group_info( | ||
| 3006 | &sinfo->block_groups[c], &space); | ||
| 3007 | if (space.total_bytes == 0 || | ||
| 3008 | space.used_bytes == 0) | ||
| 3009 | continue; | ||
| 3010 | flags = space.flags; | ||
| 3011 | /* | ||
| 3012 | * return | ||
| 3013 | * 0: if dup, single or RAID0 is configured for | ||
| 3014 | * any of metadata, system or data, else | ||
| 3015 | * 1: if RAID5 is configured, or if RAID1 or | ||
| 3016 | * RAID10 is configured and only two mirrors | ||
| 3017 | * are used, else | ||
| 3018 | * 2: if RAID6 is configured, else | ||
| 3019 | * num_mirrors - 1: if RAID1 or RAID10 is | ||
| 3020 | * configured and more than | ||
| 3021 | * 2 mirrors are used. | ||
| 3022 | */ | ||
| 3023 | if (num_tolerated_disk_barrier_failures > 0 && | ||
| 3024 | ((flags & (BTRFS_BLOCK_GROUP_DUP | | ||
| 3025 | BTRFS_BLOCK_GROUP_RAID0)) || | ||
| 3026 | ((flags & BTRFS_BLOCK_GROUP_PROFILE_MASK) | ||
| 3027 | == 0))) | ||
| 3028 | num_tolerated_disk_barrier_failures = 0; | ||
| 3029 | else if (num_tolerated_disk_barrier_failures > 1 | ||
| 3030 | && | ||
| 3031 | (flags & (BTRFS_BLOCK_GROUP_RAID1 | | ||
| 3032 | BTRFS_BLOCK_GROUP_RAID10))) | ||
| 3033 | num_tolerated_disk_barrier_failures = 1; | ||
| 3034 | } | ||
| 3035 | } | ||
| 3036 | up_read(&sinfo->groups_sem); | ||
| 3037 | } | ||
| 3038 | |||
| 3039 | return num_tolerated_disk_barrier_failures; | ||
| 3040 | } | ||
| 3041 | |||
| 2957 | int write_all_supers(struct btrfs_root *root, int max_mirrors) | 3042 | int write_all_supers(struct btrfs_root *root, int max_mirrors) |
| 2958 | { | 3043 | { |
| 2959 | struct list_head *head; | 3044 | struct list_head *head; |
| @@ -2976,8 +3061,16 @@ int write_all_supers(struct btrfs_root *root, int max_mirrors) | |||
| 2976 | mutex_lock(&root->fs_info->fs_devices->device_list_mutex); | 3061 | mutex_lock(&root->fs_info->fs_devices->device_list_mutex); |
| 2977 | head = &root->fs_info->fs_devices->devices; | 3062 | head = &root->fs_info->fs_devices->devices; |
| 2978 | 3063 | ||
| 2979 | if (do_barriers) | 3064 | if (do_barriers) { |
| 2980 | barrier_all_devices(root->fs_info); | 3065 | ret = barrier_all_devices(root->fs_info); |
| 3066 | if (ret) { | ||
| 3067 | mutex_unlock( | ||
| 3068 | &root->fs_info->fs_devices->device_list_mutex); | ||
| 3069 | btrfs_error(root->fs_info, ret, | ||
| 3070 | "errors while submitting device barriers."); | ||
| 3071 | return ret; | ||
| 3072 | } | ||
| 3073 | } | ||
| 2981 | 3074 | ||
| 2982 | list_for_each_entry_rcu(dev, head, dev_list) { | 3075 | list_for_each_entry_rcu(dev, head, dev_list) { |
| 2983 | if (!dev->bdev) { | 3076 | if (!dev->bdev) { |
| @@ -3211,10 +3304,6 @@ int close_ctree(struct btrfs_root *root) | |||
| 3211 | printk(KERN_INFO "btrfs: at unmount delalloc count %llu\n", | 3304 | printk(KERN_INFO "btrfs: at unmount delalloc count %llu\n", |
| 3212 | (unsigned long long)fs_info->delalloc_bytes); | 3305 | (unsigned long long)fs_info->delalloc_bytes); |
| 3213 | } | 3306 | } |
| 3214 | if (fs_info->total_ref_cache_size) { | ||
| 3215 | printk(KERN_INFO "btrfs: at umount reference cache size %llu\n", | ||
| 3216 | (unsigned long long)fs_info->total_ref_cache_size); | ||
| 3217 | } | ||
| 3218 | 3307 | ||
| 3219 | free_extent_buffer(fs_info->extent_root->node); | 3308 | free_extent_buffer(fs_info->extent_root->node); |
| 3220 | free_extent_buffer(fs_info->extent_root->commit_root); | 3309 | free_extent_buffer(fs_info->extent_root->commit_root); |
| @@ -3360,52 +3449,6 @@ int btrfs_read_buffer(struct extent_buffer *buf, u64 parent_transid) | |||
| 3360 | return btree_read_extent_buffer_pages(root, buf, 0, parent_transid); | 3449 | return btree_read_extent_buffer_pages(root, buf, 0, parent_transid); |
| 3361 | } | 3450 | } |
| 3362 | 3451 | ||
| 3363 | int btree_lock_page_hook(struct page *page, void *data, | ||
| 3364 | void (*flush_fn)(void *)) | ||
| 3365 | { | ||
| 3366 | struct inode *inode = page->mapping->host; | ||
| 3367 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
| 3368 | struct extent_buffer *eb; | ||
| 3369 | |||
| 3370 | /* | ||
| 3371 | * We culled this eb but the page is still hanging out on the mapping, | ||
| 3372 | * carry on. | ||
| 3373 | */ | ||
| 3374 | if (!PagePrivate(page)) | ||
| 3375 | goto out; | ||
| 3376 | |||
| 3377 | eb = (struct extent_buffer *)page->private; | ||
| 3378 | if (!eb) { | ||
| 3379 | WARN_ON(1); | ||
| 3380 | goto out; | ||
| 3381 | } | ||
| 3382 | if (page != eb->pages[0]) | ||
| 3383 | goto out; | ||
| 3384 | |||
| 3385 | if (!btrfs_try_tree_write_lock(eb)) { | ||
| 3386 | flush_fn(data); | ||
| 3387 | btrfs_tree_lock(eb); | ||
| 3388 | } | ||
| 3389 | btrfs_set_header_flag(eb, BTRFS_HEADER_FLAG_WRITTEN); | ||
| 3390 | |||
| 3391 | if (test_and_clear_bit(EXTENT_BUFFER_DIRTY, &eb->bflags)) { | ||
| 3392 | spin_lock(&root->fs_info->delalloc_lock); | ||
| 3393 | if (root->fs_info->dirty_metadata_bytes >= eb->len) | ||
| 3394 | root->fs_info->dirty_metadata_bytes -= eb->len; | ||
| 3395 | else | ||
| 3396 | WARN_ON(1); | ||
| 3397 | spin_unlock(&root->fs_info->delalloc_lock); | ||
| 3398 | } | ||
| 3399 | |||
| 3400 | btrfs_tree_unlock(eb); | ||
| 3401 | out: | ||
| 3402 | if (!trylock_page(page)) { | ||
| 3403 | flush_fn(data); | ||
| 3404 | lock_page(page); | ||
| 3405 | } | ||
| 3406 | return 0; | ||
| 3407 | } | ||
| 3408 | |||
| 3409 | static int btrfs_check_super_valid(struct btrfs_fs_info *fs_info, | 3452 | static int btrfs_check_super_valid(struct btrfs_fs_info *fs_info, |
| 3410 | int read_only) | 3453 | int read_only) |
| 3411 | { | 3454 | { |
| @@ -3608,7 +3651,7 @@ static int btrfs_destroy_marked_extents(struct btrfs_root *root, | |||
| 3608 | 3651 | ||
| 3609 | while (1) { | 3652 | while (1) { |
| 3610 | ret = find_first_extent_bit(dirty_pages, start, &start, &end, | 3653 | ret = find_first_extent_bit(dirty_pages, start, &start, &end, |
| 3611 | mark); | 3654 | mark, NULL); |
| 3612 | if (ret) | 3655 | if (ret) |
| 3613 | break; | 3656 | break; |
| 3614 | 3657 | ||
| @@ -3663,7 +3706,7 @@ static int btrfs_destroy_pinned_extent(struct btrfs_root *root, | |||
| 3663 | again: | 3706 | again: |
| 3664 | while (1) { | 3707 | while (1) { |
| 3665 | ret = find_first_extent_bit(unpin, 0, &start, &end, | 3708 | ret = find_first_extent_bit(unpin, 0, &start, &end, |
| 3666 | EXTENT_DIRTY); | 3709 | EXTENT_DIRTY, NULL); |
| 3667 | if (ret) | 3710 | if (ret) |
| 3668 | break; | 3711 | break; |
| 3669 | 3712 | ||
| @@ -3800,7 +3843,6 @@ int btrfs_cleanup_transaction(struct btrfs_root *root) | |||
| 3800 | } | 3843 | } |
| 3801 | 3844 | ||
| 3802 | static struct extent_io_ops btree_extent_io_ops = { | 3845 | static struct extent_io_ops btree_extent_io_ops = { |
| 3803 | .write_cache_pages_lock_hook = btree_lock_page_hook, | ||
| 3804 | .readpage_end_io_hook = btree_readpage_end_io_hook, | 3846 | .readpage_end_io_hook = btree_readpage_end_io_hook, |
| 3805 | .readpage_io_failed_hook = btree_io_failed_hook, | 3847 | .readpage_io_failed_hook = btree_io_failed_hook, |
| 3806 | .submit_bio_hook = btree_submit_bio_hook, | 3848 | .submit_bio_hook = btree_submit_bio_hook, |
diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h index c5b00a735fef..2025a9132c16 100644 --- a/fs/btrfs/disk-io.h +++ b/fs/btrfs/disk-io.h | |||
| @@ -95,6 +95,8 @@ struct btrfs_root *btrfs_create_tree(struct btrfs_trans_handle *trans, | |||
| 95 | u64 objectid); | 95 | u64 objectid); |
| 96 | int btree_lock_page_hook(struct page *page, void *data, | 96 | int btree_lock_page_hook(struct page *page, void *data, |
| 97 | void (*flush_fn)(void *)); | 97 | void (*flush_fn)(void *)); |
| 98 | int btrfs_calc_num_tolerated_disk_barrier_failures( | ||
| 99 | struct btrfs_fs_info *fs_info); | ||
| 98 | 100 | ||
| 99 | #ifdef CONFIG_DEBUG_LOCK_ALLOC | 101 | #ifdef CONFIG_DEBUG_LOCK_ALLOC |
| 100 | void btrfs_init_lockdep(void); | 102 | void btrfs_init_lockdep(void); |
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index ba58024d40d3..3d3e2c17d8d1 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c | |||
| @@ -94,8 +94,8 @@ static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans, | |||
| 94 | u64 flags, struct btrfs_disk_key *key, | 94 | u64 flags, struct btrfs_disk_key *key, |
| 95 | int level, struct btrfs_key *ins); | 95 | int level, struct btrfs_key *ins); |
| 96 | static int do_chunk_alloc(struct btrfs_trans_handle *trans, | 96 | static int do_chunk_alloc(struct btrfs_trans_handle *trans, |
| 97 | struct btrfs_root *extent_root, u64 alloc_bytes, | 97 | struct btrfs_root *extent_root, u64 flags, |
| 98 | u64 flags, int force); | 98 | int force); |
| 99 | static int find_next_key(struct btrfs_path *path, int level, | 99 | static int find_next_key(struct btrfs_path *path, int level, |
| 100 | struct btrfs_key *key); | 100 | struct btrfs_key *key); |
| 101 | static void dump_space_info(struct btrfs_space_info *info, u64 bytes, | 101 | static void dump_space_info(struct btrfs_space_info *info, u64 bytes, |
| @@ -312,7 +312,8 @@ static u64 add_new_free_space(struct btrfs_block_group_cache *block_group, | |||
| 312 | while (start < end) { | 312 | while (start < end) { |
| 313 | ret = find_first_extent_bit(info->pinned_extents, start, | 313 | ret = find_first_extent_bit(info->pinned_extents, start, |
| 314 | &extent_start, &extent_end, | 314 | &extent_start, &extent_end, |
| 315 | EXTENT_DIRTY | EXTENT_UPTODATE); | 315 | EXTENT_DIRTY | EXTENT_UPTODATE, |
| 316 | NULL); | ||
| 316 | if (ret) | 317 | if (ret) |
| 317 | break; | 318 | break; |
| 318 | 319 | ||
| @@ -2361,10 +2362,6 @@ static noinline int run_clustered_refs(struct btrfs_trans_handle *trans, | |||
| 2361 | } | 2362 | } |
| 2362 | 2363 | ||
| 2363 | next: | 2364 | next: |
| 2364 | do_chunk_alloc(trans, fs_info->extent_root, | ||
| 2365 | 2 * 1024 * 1024, | ||
| 2366 | btrfs_get_alloc_profile(root, 0), | ||
| 2367 | CHUNK_ALLOC_NO_FORCE); | ||
| 2368 | cond_resched(); | 2365 | cond_resched(); |
| 2369 | spin_lock(&delayed_refs->lock); | 2366 | spin_lock(&delayed_refs->lock); |
| 2370 | } | 2367 | } |
| @@ -2478,10 +2475,6 @@ int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans, | |||
| 2478 | if (root == root->fs_info->extent_root) | 2475 | if (root == root->fs_info->extent_root) |
| 2479 | root = root->fs_info->tree_root; | 2476 | root = root->fs_info->tree_root; |
| 2480 | 2477 | ||
| 2481 | do_chunk_alloc(trans, root->fs_info->extent_root, | ||
| 2482 | 2 * 1024 * 1024, btrfs_get_alloc_profile(root, 0), | ||
| 2483 | CHUNK_ALLOC_NO_FORCE); | ||
| 2484 | |||
| 2485 | btrfs_delayed_refs_qgroup_accounting(trans, root->fs_info); | 2478 | btrfs_delayed_refs_qgroup_accounting(trans, root->fs_info); |
| 2486 | 2479 | ||
| 2487 | delayed_refs = &trans->transaction->delayed_refs; | 2480 | delayed_refs = &trans->transaction->delayed_refs; |
| @@ -2551,6 +2544,12 @@ again: | |||
| 2551 | } | 2544 | } |
| 2552 | 2545 | ||
| 2553 | if (run_all) { | 2546 | if (run_all) { |
| 2547 | if (!list_empty(&trans->new_bgs)) { | ||
| 2548 | spin_unlock(&delayed_refs->lock); | ||
| 2549 | btrfs_create_pending_block_groups(trans, root); | ||
| 2550 | spin_lock(&delayed_refs->lock); | ||
| 2551 | } | ||
| 2552 | |||
| 2554 | node = rb_first(&delayed_refs->root); | 2553 | node = rb_first(&delayed_refs->root); |
| 2555 | if (!node) | 2554 | if (!node) |
| 2556 | goto out; | 2555 | goto out; |
| @@ -3406,7 +3405,6 @@ alloc: | |||
| 3406 | return PTR_ERR(trans); | 3405 | return PTR_ERR(trans); |
| 3407 | 3406 | ||
| 3408 | ret = do_chunk_alloc(trans, root->fs_info->extent_root, | 3407 | ret = do_chunk_alloc(trans, root->fs_info->extent_root, |
| 3409 | bytes + 2 * 1024 * 1024, | ||
| 3410 | alloc_target, | 3408 | alloc_target, |
| 3411 | CHUNK_ALLOC_NO_FORCE); | 3409 | CHUNK_ALLOC_NO_FORCE); |
| 3412 | btrfs_end_transaction(trans, root); | 3410 | btrfs_end_transaction(trans, root); |
| @@ -3488,8 +3486,7 @@ static void force_metadata_allocation(struct btrfs_fs_info *info) | |||
| 3488 | } | 3486 | } |
| 3489 | 3487 | ||
| 3490 | static int should_alloc_chunk(struct btrfs_root *root, | 3488 | static int should_alloc_chunk(struct btrfs_root *root, |
| 3491 | struct btrfs_space_info *sinfo, u64 alloc_bytes, | 3489 | struct btrfs_space_info *sinfo, int force) |
| 3492 | int force) | ||
| 3493 | { | 3490 | { |
| 3494 | struct btrfs_block_rsv *global_rsv = &root->fs_info->global_block_rsv; | 3491 | struct btrfs_block_rsv *global_rsv = &root->fs_info->global_block_rsv; |
| 3495 | u64 num_bytes = sinfo->total_bytes - sinfo->bytes_readonly; | 3492 | u64 num_bytes = sinfo->total_bytes - sinfo->bytes_readonly; |
| @@ -3504,7 +3501,8 @@ static int should_alloc_chunk(struct btrfs_root *root, | |||
| 3504 | * and purposes it's used space. Don't worry about locking the | 3501 | * and purposes it's used space. Don't worry about locking the |
| 3505 | * global_rsv, it doesn't change except when the transaction commits. | 3502 | * global_rsv, it doesn't change except when the transaction commits. |
| 3506 | */ | 3503 | */ |
| 3507 | num_allocated += global_rsv->size; | 3504 | if (sinfo->flags & BTRFS_BLOCK_GROUP_METADATA) |
| 3505 | num_allocated += global_rsv->size; | ||
| 3508 | 3506 | ||
| 3509 | /* | 3507 | /* |
| 3510 | * in limited mode, we want to have some free space up to | 3508 | * in limited mode, we want to have some free space up to |
| @@ -3518,15 +3516,8 @@ static int should_alloc_chunk(struct btrfs_root *root, | |||
| 3518 | if (num_bytes - num_allocated < thresh) | 3516 | if (num_bytes - num_allocated < thresh) |
| 3519 | return 1; | 3517 | return 1; |
| 3520 | } | 3518 | } |
| 3521 | thresh = btrfs_super_total_bytes(root->fs_info->super_copy); | ||
| 3522 | 3519 | ||
| 3523 | /* 256MB or 2% of the FS */ | 3520 | if (num_allocated + 2 * 1024 * 1024 < div_factor(num_bytes, 8)) |
| 3524 | thresh = max_t(u64, 256 * 1024 * 1024, div_factor_fine(thresh, 2)); | ||
| 3525 | /* system chunks need a much small threshold */ | ||
| 3526 | if (sinfo->flags & BTRFS_BLOCK_GROUP_SYSTEM) | ||
| 3527 | thresh = 32 * 1024 * 1024; | ||
| 3528 | |||
| 3529 | if (num_bytes > thresh && sinfo->bytes_used < div_factor(num_bytes, 8)) | ||
| 3530 | return 0; | 3521 | return 0; |
| 3531 | return 1; | 3522 | return 1; |
| 3532 | } | 3523 | } |
| @@ -3576,8 +3567,7 @@ static void check_system_chunk(struct btrfs_trans_handle *trans, | |||
| 3576 | } | 3567 | } |
| 3577 | 3568 | ||
| 3578 | static int do_chunk_alloc(struct btrfs_trans_handle *trans, | 3569 | static int do_chunk_alloc(struct btrfs_trans_handle *trans, |
| 3579 | struct btrfs_root *extent_root, u64 alloc_bytes, | 3570 | struct btrfs_root *extent_root, u64 flags, int force) |
| 3580 | u64 flags, int force) | ||
| 3581 | { | 3571 | { |
| 3582 | struct btrfs_space_info *space_info; | 3572 | struct btrfs_space_info *space_info; |
| 3583 | struct btrfs_fs_info *fs_info = extent_root->fs_info; | 3573 | struct btrfs_fs_info *fs_info = extent_root->fs_info; |
| @@ -3601,7 +3591,7 @@ again: | |||
| 3601 | return 0; | 3591 | return 0; |
| 3602 | } | 3592 | } |
| 3603 | 3593 | ||
| 3604 | if (!should_alloc_chunk(extent_root, space_info, alloc_bytes, force)) { | 3594 | if (!should_alloc_chunk(extent_root, space_info, force)) { |
| 3605 | spin_unlock(&space_info->lock); | 3595 | spin_unlock(&space_info->lock); |
| 3606 | return 0; | 3596 | return 0; |
| 3607 | } else if (space_info->chunk_alloc) { | 3597 | } else if (space_info->chunk_alloc) { |
| @@ -3669,6 +3659,46 @@ out: | |||
| 3669 | return ret; | 3659 | return ret; |
| 3670 | } | 3660 | } |
| 3671 | 3661 | ||
| 3662 | static int can_overcommit(struct btrfs_root *root, | ||
| 3663 | struct btrfs_space_info *space_info, u64 bytes, | ||
| 3664 | int flush) | ||
| 3665 | { | ||
| 3666 | u64 profile = btrfs_get_alloc_profile(root, 0); | ||
| 3667 | u64 avail; | ||
| 3668 | u64 used; | ||
| 3669 | |||
| 3670 | used = space_info->bytes_used + space_info->bytes_reserved + | ||
| 3671 | space_info->bytes_pinned + space_info->bytes_readonly + | ||
| 3672 | space_info->bytes_may_use; | ||
| 3673 | |||
| 3674 | spin_lock(&root->fs_info->free_chunk_lock); | ||
| 3675 | avail = root->fs_info->free_chunk_space; | ||
| 3676 | spin_unlock(&root->fs_info->free_chunk_lock); | ||
| 3677 | |||
| 3678 | /* | ||
| 3679 | * If we have dup, raid1 or raid10 then only half of the free | ||
| 3680 | * space is actually useable. | ||
| 3681 | */ | ||
| 3682 | if (profile & (BTRFS_BLOCK_GROUP_DUP | | ||
| 3683 | BTRFS_BLOCK_GROUP_RAID1 | | ||
| 3684 | BTRFS_BLOCK_GROUP_RAID10)) | ||
| 3685 | avail >>= 1; | ||
| 3686 | |||
| 3687 | /* | ||
| 3688 | * If we aren't flushing don't let us overcommit too much, say | ||
| 3689 | * 1/8th of the space. If we can flush, let it overcommit up to | ||
| 3690 | * 1/2 of the space. | ||
| 3691 | */ | ||
| 3692 | if (flush) | ||
| 3693 | avail >>= 3; | ||
| 3694 | else | ||
| 3695 | avail >>= 1; | ||
| 3696 | |||
| 3697 | if (used + bytes < space_info->total_bytes + avail) | ||
| 3698 | return 1; | ||
| 3699 | return 0; | ||
| 3700 | } | ||
| 3701 | |||
| 3672 | /* | 3702 | /* |
| 3673 | * shrink metadata reservation for delalloc | 3703 | * shrink metadata reservation for delalloc |
| 3674 | */ | 3704 | */ |
| @@ -3693,7 +3723,7 @@ static void shrink_delalloc(struct btrfs_root *root, u64 to_reclaim, u64 orig, | |||
| 3693 | if (delalloc_bytes == 0) { | 3723 | if (delalloc_bytes == 0) { |
| 3694 | if (trans) | 3724 | if (trans) |
| 3695 | return; | 3725 | return; |
| 3696 | btrfs_wait_ordered_extents(root, 0, 0); | 3726 | btrfs_wait_ordered_extents(root, 0); |
| 3697 | return; | 3727 | return; |
| 3698 | } | 3728 | } |
| 3699 | 3729 | ||
| @@ -3703,11 +3733,15 @@ static void shrink_delalloc(struct btrfs_root *root, u64 to_reclaim, u64 orig, | |||
| 3703 | writeback_inodes_sb_nr_if_idle(root->fs_info->sb, nr_pages, | 3733 | writeback_inodes_sb_nr_if_idle(root->fs_info->sb, nr_pages, |
| 3704 | WB_REASON_FS_FREE_SPACE); | 3734 | WB_REASON_FS_FREE_SPACE); |
| 3705 | 3735 | ||
| 3736 | /* | ||
| 3737 | * We need to wait for the async pages to actually start before | ||
| 3738 | * we do anything. | ||
| 3739 | */ | ||
| 3740 | wait_event(root->fs_info->async_submit_wait, | ||
| 3741 | !atomic_read(&root->fs_info->async_delalloc_pages)); | ||
| 3742 | |||
| 3706 | spin_lock(&space_info->lock); | 3743 | spin_lock(&space_info->lock); |
| 3707 | if (space_info->bytes_used + space_info->bytes_reserved + | 3744 | if (can_overcommit(root, space_info, orig, !trans)) { |
| 3708 | space_info->bytes_pinned + space_info->bytes_readonly + | ||
| 3709 | space_info->bytes_may_use + orig <= | ||
| 3710 | space_info->total_bytes) { | ||
| 3711 | spin_unlock(&space_info->lock); | 3745 | spin_unlock(&space_info->lock); |
| 3712 | break; | 3746 | break; |
| 3713 | } | 3747 | } |
| @@ -3715,7 +3749,7 @@ static void shrink_delalloc(struct btrfs_root *root, u64 to_reclaim, u64 orig, | |||
| 3715 | 3749 | ||
| 3716 | loops++; | 3750 | loops++; |
| 3717 | if (wait_ordered && !trans) { | 3751 | if (wait_ordered && !trans) { |
| 3718 | btrfs_wait_ordered_extents(root, 0, 0); | 3752 | btrfs_wait_ordered_extents(root, 0); |
| 3719 | } else { | 3753 | } else { |
| 3720 | time_left = schedule_timeout_killable(1); | 3754 | time_left = schedule_timeout_killable(1); |
| 3721 | if (time_left) | 3755 | if (time_left) |
| @@ -3784,11 +3818,12 @@ commit: | |||
| 3784 | } | 3818 | } |
| 3785 | 3819 | ||
| 3786 | enum flush_state { | 3820 | enum flush_state { |
| 3787 | FLUSH_DELALLOC = 1, | 3821 | FLUSH_DELAYED_ITEMS_NR = 1, |
| 3788 | FLUSH_DELALLOC_WAIT = 2, | 3822 | FLUSH_DELAYED_ITEMS = 2, |
| 3789 | FLUSH_DELAYED_ITEMS_NR = 3, | 3823 | FLUSH_DELALLOC = 3, |
| 3790 | FLUSH_DELAYED_ITEMS = 4, | 3824 | FLUSH_DELALLOC_WAIT = 4, |
| 3791 | COMMIT_TRANS = 5, | 3825 | ALLOC_CHUNK = 5, |
| 3826 | COMMIT_TRANS = 6, | ||
| 3792 | }; | 3827 | }; |
| 3793 | 3828 | ||
| 3794 | static int flush_space(struct btrfs_root *root, | 3829 | static int flush_space(struct btrfs_root *root, |
| @@ -3800,11 +3835,6 @@ static int flush_space(struct btrfs_root *root, | |||
| 3800 | int ret = 0; | 3835 | int ret = 0; |
| 3801 | 3836 | ||
| 3802 | switch (state) { | 3837 | switch (state) { |
| 3803 | case FLUSH_DELALLOC: | ||
| 3804 | case FLUSH_DELALLOC_WAIT: | ||
| 3805 | shrink_delalloc(root, num_bytes, orig_bytes, | ||
| 3806 | state == FLUSH_DELALLOC_WAIT); | ||
| 3807 | break; | ||
| 3808 | case FLUSH_DELAYED_ITEMS_NR: | 3838 | case FLUSH_DELAYED_ITEMS_NR: |
| 3809 | case FLUSH_DELAYED_ITEMS: | 3839 | case FLUSH_DELAYED_ITEMS: |
| 3810 | if (state == FLUSH_DELAYED_ITEMS_NR) { | 3840 | if (state == FLUSH_DELAYED_ITEMS_NR) { |
| @@ -3825,6 +3855,24 @@ static int flush_space(struct btrfs_root *root, | |||
| 3825 | ret = btrfs_run_delayed_items_nr(trans, root, nr); | 3855 | ret = btrfs_run_delayed_items_nr(trans, root, nr); |
| 3826 | btrfs_end_transaction(trans, root); | 3856 | btrfs_end_transaction(trans, root); |
| 3827 | break; | 3857 | break; |
| 3858 | case FLUSH_DELALLOC: | ||
| 3859 | case FLUSH_DELALLOC_WAIT: | ||
| 3860 | shrink_delalloc(root, num_bytes, orig_bytes, | ||
| 3861 | state == FLUSH_DELALLOC_WAIT); | ||
| 3862 | break; | ||
| 3863 | case ALLOC_CHUNK: | ||
| 3864 | trans = btrfs_join_transaction(root); | ||
| 3865 | if (IS_ERR(trans)) { | ||
| 3866 | ret = PTR_ERR(trans); | ||
| 3867 | break; | ||
| 3868 | } | ||
| 3869 | ret = do_chunk_alloc(trans, root->fs_info->extent_root, | ||
| 3870 | btrfs_get_alloc_profile(root, 0), | ||
| 3871 | CHUNK_ALLOC_NO_FORCE); | ||
| 3872 | btrfs_end_transaction(trans, root); | ||
| 3873 | if (ret == -ENOSPC) | ||
| 3874 | ret = 0; | ||
| 3875 | break; | ||
| 3828 | case COMMIT_TRANS: | 3876 | case COMMIT_TRANS: |
| 3829 | ret = may_commit_transaction(root, space_info, orig_bytes, 0); | 3877 | ret = may_commit_transaction(root, space_info, orig_bytes, 0); |
| 3830 | break; | 3878 | break; |
| @@ -3856,10 +3904,9 @@ static int reserve_metadata_bytes(struct btrfs_root *root, | |||
| 3856 | struct btrfs_space_info *space_info = block_rsv->space_info; | 3904 | struct btrfs_space_info *space_info = block_rsv->space_info; |
| 3857 | u64 used; | 3905 | u64 used; |
| 3858 | u64 num_bytes = orig_bytes; | 3906 | u64 num_bytes = orig_bytes; |
| 3859 | int flush_state = FLUSH_DELALLOC; | 3907 | int flush_state = FLUSH_DELAYED_ITEMS_NR; |
| 3860 | int ret = 0; | 3908 | int ret = 0; |
| 3861 | bool flushing = false; | 3909 | bool flushing = false; |
| 3862 | bool committed = false; | ||
| 3863 | 3910 | ||
| 3864 | again: | 3911 | again: |
| 3865 | ret = 0; | 3912 | ret = 0; |
| @@ -3922,57 +3969,12 @@ again: | |||
| 3922 | (orig_bytes * 2); | 3969 | (orig_bytes * 2); |
| 3923 | } | 3970 | } |
| 3924 | 3971 | ||
| 3925 | if (ret) { | 3972 | if (ret && can_overcommit(root, space_info, orig_bytes, flush)) { |
| 3926 | u64 profile = btrfs_get_alloc_profile(root, 0); | 3973 | space_info->bytes_may_use += orig_bytes; |
| 3927 | u64 avail; | 3974 | trace_btrfs_space_reservation(root->fs_info, "space_info", |
| 3928 | 3975 | space_info->flags, orig_bytes, | |
| 3929 | /* | 3976 | 1); |
| 3930 | * If we have a lot of space that's pinned, don't bother doing | 3977 | ret = 0; |
| 3931 | * the overcommit dance yet and just commit the transaction. | ||
| 3932 | */ | ||
| 3933 | avail = (space_info->total_bytes - space_info->bytes_used) * 8; | ||
| 3934 | do_div(avail, 10); | ||
| 3935 | if (space_info->bytes_pinned >= avail && flush && !committed) { | ||
| 3936 | space_info->flush = 1; | ||
| 3937 | flushing = true; | ||
| 3938 | spin_unlock(&space_info->lock); | ||
| 3939 | ret = may_commit_transaction(root, space_info, | ||
| 3940 | orig_bytes, 1); | ||
| 3941 | if (ret) | ||
| 3942 | goto out; | ||
| 3943 | committed = true; | ||
| 3944 | goto again; | ||
| 3945 | } | ||
| 3946 | |||
| 3947 | spin_lock(&root->fs_info->free_chunk_lock); | ||
| 3948 | avail = root->fs_info->free_chunk_space; | ||
| 3949 | |||
| 3950 | /* | ||
| 3951 | * If we have dup, raid1 or raid10 then only half of the free | ||
| 3952 | * space is actually useable. | ||
| 3953 | */ | ||
| 3954 | if (profile & (BTRFS_BLOCK_GROUP_DUP | | ||
| 3955 | BTRFS_BLOCK_GROUP_RAID1 | | ||
| 3956 | BTRFS_BLOCK_GROUP_RAID10)) | ||
| 3957 | avail >>= 1; | ||
| 3958 | |||
| 3959 | /* | ||
| 3960 | * If we aren't flushing don't let us overcommit too much, say | ||
| 3961 | * 1/8th of the space. If we can flush, let it overcommit up to | ||
| 3962 | * 1/2 of the space. | ||
| 3963 | */ | ||
| 3964 | if (flush) | ||
| 3965 | avail >>= 3; | ||
| 3966 | else | ||
| 3967 | avail >>= 1; | ||
| 3968 | spin_unlock(&root->fs_info->free_chunk_lock); | ||
| 3969 | |||
| 3970 | if (used + num_bytes < space_info->total_bytes + avail) { | ||
| 3971 | space_info->bytes_may_use += orig_bytes; | ||
| 3972 | trace_btrfs_space_reservation(root->fs_info, | ||
| 3973 | "space_info", space_info->flags, orig_bytes, 1); | ||
| 3974 | ret = 0; | ||
| 3975 | } | ||
| 3976 | } | 3978 | } |
| 3977 | 3979 | ||
| 3978 | /* | 3980 | /* |
| @@ -4114,13 +4116,15 @@ static int block_rsv_migrate_bytes(struct btrfs_block_rsv *src, | |||
| 4114 | return 0; | 4116 | return 0; |
| 4115 | } | 4117 | } |
| 4116 | 4118 | ||
| 4117 | void btrfs_init_block_rsv(struct btrfs_block_rsv *rsv) | 4119 | void btrfs_init_block_rsv(struct btrfs_block_rsv *rsv, unsigned short type) |
| 4118 | { | 4120 | { |
| 4119 | memset(rsv, 0, sizeof(*rsv)); | 4121 | memset(rsv, 0, sizeof(*rsv)); |
| 4120 | spin_lock_init(&rsv->lock); | 4122 | spin_lock_init(&rsv->lock); |
| 4123 | rsv->type = type; | ||
| 4121 | } | 4124 | } |
| 4122 | 4125 | ||
| 4123 | struct btrfs_block_rsv *btrfs_alloc_block_rsv(struct btrfs_root *root) | 4126 | struct btrfs_block_rsv *btrfs_alloc_block_rsv(struct btrfs_root *root, |
| 4127 | unsigned short type) | ||
| 4124 | { | 4128 | { |
| 4125 | struct btrfs_block_rsv *block_rsv; | 4129 | struct btrfs_block_rsv *block_rsv; |
| 4126 | struct btrfs_fs_info *fs_info = root->fs_info; | 4130 | struct btrfs_fs_info *fs_info = root->fs_info; |
| @@ -4129,7 +4133,7 @@ struct btrfs_block_rsv *btrfs_alloc_block_rsv(struct btrfs_root *root) | |||
| 4129 | if (!block_rsv) | 4133 | if (!block_rsv) |
| 4130 | return NULL; | 4134 | return NULL; |
| 4131 | 4135 | ||
| 4132 | btrfs_init_block_rsv(block_rsv); | 4136 | btrfs_init_block_rsv(block_rsv, type); |
| 4133 | block_rsv->space_info = __find_space_info(fs_info, | 4137 | block_rsv->space_info = __find_space_info(fs_info, |
| 4134 | BTRFS_BLOCK_GROUP_METADATA); | 4138 | BTRFS_BLOCK_GROUP_METADATA); |
| 4135 | return block_rsv; | 4139 | return block_rsv; |
| @@ -4138,6 +4142,8 @@ struct btrfs_block_rsv *btrfs_alloc_block_rsv(struct btrfs_root *root) | |||
| 4138 | void btrfs_free_block_rsv(struct btrfs_root *root, | 4142 | void btrfs_free_block_rsv(struct btrfs_root *root, |
| 4139 | struct btrfs_block_rsv *rsv) | 4143 | struct btrfs_block_rsv *rsv) |
| 4140 | { | 4144 | { |
| 4145 | if (!rsv) | ||
| 4146 | return; | ||
| 4141 | btrfs_block_rsv_release(root, rsv, (u64)-1); | 4147 | btrfs_block_rsv_release(root, rsv, (u64)-1); |
| 4142 | kfree(rsv); | 4148 | kfree(rsv); |
| 4143 | } | 4149 | } |
| @@ -4416,10 +4422,10 @@ int btrfs_snap_reserve_metadata(struct btrfs_trans_handle *trans, | |||
| 4416 | struct btrfs_block_rsv *src_rsv = get_block_rsv(trans, root); | 4422 | struct btrfs_block_rsv *src_rsv = get_block_rsv(trans, root); |
| 4417 | struct btrfs_block_rsv *dst_rsv = &pending->block_rsv; | 4423 | struct btrfs_block_rsv *dst_rsv = &pending->block_rsv; |
| 4418 | /* | 4424 | /* |
| 4419 | * two for root back/forward refs, two for directory entries | 4425 | * two for root back/forward refs, two for directory entries, |
| 4420 | * and one for root of the snapshot. | 4426 | * one for root of the snapshot and one for parent inode. |
| 4421 | */ | 4427 | */ |
| 4422 | u64 num_bytes = btrfs_calc_trans_metadata_size(root, 5); | 4428 | u64 num_bytes = btrfs_calc_trans_metadata_size(root, 6); |
| 4423 | dst_rsv->space_info = src_rsv->space_info; | 4429 | dst_rsv->space_info = src_rsv->space_info; |
| 4424 | return block_rsv_migrate_bytes(src_rsv, dst_rsv, num_bytes); | 4430 | return block_rsv_migrate_bytes(src_rsv, dst_rsv, num_bytes); |
| 4425 | } | 4431 | } |
| @@ -5018,7 +5024,7 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, | |||
| 5018 | 5024 | ||
| 5019 | while (1) { | 5025 | while (1) { |
| 5020 | ret = find_first_extent_bit(unpin, 0, &start, &end, | 5026 | ret = find_first_extent_bit(unpin, 0, &start, &end, |
| 5021 | EXTENT_DIRTY); | 5027 | EXTENT_DIRTY, NULL); |
| 5022 | if (ret) | 5028 | if (ret) |
| 5023 | break; | 5029 | break; |
| 5024 | 5030 | ||
| @@ -5096,8 +5102,10 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans, | |||
| 5096 | ret = remove_extent_backref(trans, extent_root, path, | 5102 | ret = remove_extent_backref(trans, extent_root, path, |
| 5097 | NULL, refs_to_drop, | 5103 | NULL, refs_to_drop, |
| 5098 | is_data); | 5104 | is_data); |
| 5099 | if (ret) | 5105 | if (ret) { |
| 5100 | goto abort; | 5106 | btrfs_abort_transaction(trans, extent_root, ret); |
| 5107 | goto out; | ||
| 5108 | } | ||
| 5101 | btrfs_release_path(path); | 5109 | btrfs_release_path(path); |
| 5102 | path->leave_spinning = 1; | 5110 | path->leave_spinning = 1; |
| 5103 | 5111 | ||
| @@ -5115,8 +5123,10 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans, | |||
| 5115 | btrfs_print_leaf(extent_root, | 5123 | btrfs_print_leaf(extent_root, |
| 5116 | path->nodes[0]); | 5124 | path->nodes[0]); |
| 5117 | } | 5125 | } |
| 5118 | if (ret < 0) | 5126 | if (ret < 0) { |
| 5119 | goto abort; | 5127 | btrfs_abort_transaction(trans, extent_root, ret); |
| 5128 | goto out; | ||
| 5129 | } | ||
| 5120 | extent_slot = path->slots[0]; | 5130 | extent_slot = path->slots[0]; |
| 5121 | } | 5131 | } |
| 5122 | } else if (ret == -ENOENT) { | 5132 | } else if (ret == -ENOENT) { |
| @@ -5130,7 +5140,8 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans, | |||
| 5130 | (unsigned long long)owner_objectid, | 5140 | (unsigned long long)owner_objectid, |
| 5131 | (unsigned long long)owner_offset); | 5141 | (unsigned long long)owner_offset); |
| 5132 | } else { | 5142 | } else { |
| 5133 | goto abort; | 5143 | btrfs_abort_transaction(trans, extent_root, ret); |
| 5144 | goto out; | ||
| 5134 | } | 5145 | } |
| 5135 | 5146 | ||
| 5136 | leaf = path->nodes[0]; | 5147 | leaf = path->nodes[0]; |
| @@ -5140,8 +5151,10 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans, | |||
| 5140 | BUG_ON(found_extent || extent_slot != path->slots[0]); | 5151 | BUG_ON(found_extent || extent_slot != path->slots[0]); |
| 5141 | ret = convert_extent_item_v0(trans, extent_root, path, | 5152 | ret = convert_extent_item_v0(trans, extent_root, path, |
| 5142 | owner_objectid, 0); | 5153 | owner_objectid, 0); |
| 5143 | if (ret < 0) | 5154 | if (ret < 0) { |
| 5144 | goto abort; | 5155 | btrfs_abort_transaction(trans, extent_root, ret); |
| 5156 | goto out; | ||
| 5157 | } | ||
| 5145 | 5158 | ||
| 5146 | btrfs_release_path(path); | 5159 | btrfs_release_path(path); |
| 5147 | path->leave_spinning = 1; | 5160 | path->leave_spinning = 1; |
| @@ -5158,8 +5171,11 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans, | |||
| 5158 | (unsigned long long)bytenr); | 5171 | (unsigned long long)bytenr); |
| 5159 | btrfs_print_leaf(extent_root, path->nodes[0]); | 5172 | btrfs_print_leaf(extent_root, path->nodes[0]); |
| 5160 | } | 5173 | } |
| 5161 | if (ret < 0) | 5174 | if (ret < 0) { |
| 5162 | goto abort; | 5175 | btrfs_abort_transaction(trans, extent_root, ret); |
| 5176 | goto out; | ||
| 5177 | } | ||
| 5178 | |||
| 5163 | extent_slot = path->slots[0]; | 5179 | extent_slot = path->slots[0]; |
| 5164 | leaf = path->nodes[0]; | 5180 | leaf = path->nodes[0]; |
| 5165 | item_size = btrfs_item_size_nr(leaf, extent_slot); | 5181 | item_size = btrfs_item_size_nr(leaf, extent_slot); |
| @@ -5196,8 +5212,10 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans, | |||
| 5196 | ret = remove_extent_backref(trans, extent_root, path, | 5212 | ret = remove_extent_backref(trans, extent_root, path, |
| 5197 | iref, refs_to_drop, | 5213 | iref, refs_to_drop, |
| 5198 | is_data); | 5214 | is_data); |
| 5199 | if (ret) | 5215 | if (ret) { |
| 5200 | goto abort; | 5216 | btrfs_abort_transaction(trans, extent_root, ret); |
| 5217 | goto out; | ||
| 5218 | } | ||
| 5201 | } | 5219 | } |
| 5202 | } else { | 5220 | } else { |
| 5203 | if (found_extent) { | 5221 | if (found_extent) { |
| @@ -5214,27 +5232,29 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans, | |||
| 5214 | 5232 | ||
| 5215 | ret = btrfs_del_items(trans, extent_root, path, path->slots[0], | 5233 | ret = btrfs_del_items(trans, extent_root, path, path->slots[0], |
| 5216 | num_to_del); | 5234 | num_to_del); |
| 5217 | if (ret) | 5235 | if (ret) { |
| 5218 | goto abort; | 5236 | btrfs_abort_transaction(trans, extent_root, ret); |
| 5237 | goto out; | ||
| 5238 | } | ||
| 5219 | btrfs_release_path(path); | 5239 | btrfs_release_path(path); |
| 5220 | 5240 | ||
| 5221 | if (is_data) { | 5241 | if (is_data) { |
| 5222 | ret = btrfs_del_csums(trans, root, bytenr, num_bytes); | 5242 | ret = btrfs_del_csums(trans, root, bytenr, num_bytes); |
| 5223 | if (ret) | 5243 | if (ret) { |
| 5224 | goto abort; | 5244 | btrfs_abort_transaction(trans, extent_root, ret); |
| 5245 | goto out; | ||
| 5246 | } | ||
| 5225 | } | 5247 | } |
| 5226 | 5248 | ||
| 5227 | ret = update_block_group(trans, root, bytenr, num_bytes, 0); | 5249 | ret = update_block_group(trans, root, bytenr, num_bytes, 0); |
| 5228 | if (ret) | 5250 | if (ret) { |
| 5229 | goto abort; | 5251 | btrfs_abort_transaction(trans, extent_root, ret); |
| 5252 | goto out; | ||
| 5253 | } | ||
| 5230 | } | 5254 | } |
| 5231 | out: | 5255 | out: |
| 5232 | btrfs_free_path(path); | 5256 | btrfs_free_path(path); |
| 5233 | return ret; | 5257 | return ret; |
| 5234 | |||
| 5235 | abort: | ||
| 5236 | btrfs_abort_transaction(trans, extent_root, ret); | ||
| 5237 | goto out; | ||
| 5238 | } | 5258 | } |
| 5239 | 5259 | ||
| 5240 | /* | 5260 | /* |
| @@ -5497,8 +5517,6 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans, | |||
| 5497 | struct btrfs_block_group_cache *used_block_group; | 5517 | struct btrfs_block_group_cache *used_block_group; |
| 5498 | u64 search_start = 0; | 5518 | u64 search_start = 0; |
| 5499 | int empty_cluster = 2 * 1024 * 1024; | 5519 | int empty_cluster = 2 * 1024 * 1024; |
| 5500 | int allowed_chunk_alloc = 0; | ||
| 5501 | int done_chunk_alloc = 0; | ||
| 5502 | struct btrfs_space_info *space_info; | 5520 | struct btrfs_space_info *space_info; |
| 5503 | int loop = 0; | 5521 | int loop = 0; |
| 5504 | int index = 0; | 5522 | int index = 0; |
| @@ -5530,9 +5548,6 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans, | |||
| 5530 | if (btrfs_mixed_space_info(space_info)) | 5548 | if (btrfs_mixed_space_info(space_info)) |
| 5531 | use_cluster = false; | 5549 | use_cluster = false; |
| 5532 | 5550 | ||
| 5533 | if (orig_root->ref_cows || empty_size) | ||
| 5534 | allowed_chunk_alloc = 1; | ||
| 5535 | |||
| 5536 | if (data & BTRFS_BLOCK_GROUP_METADATA && use_cluster) { | 5551 | if (data & BTRFS_BLOCK_GROUP_METADATA && use_cluster) { |
| 5537 | last_ptr = &root->fs_info->meta_alloc_cluster; | 5552 | last_ptr = &root->fs_info->meta_alloc_cluster; |
| 5538 | if (!btrfs_test_opt(root, SSD)) | 5553 | if (!btrfs_test_opt(root, SSD)) |
| @@ -5806,10 +5821,6 @@ checks: | |||
| 5806 | 5821 | ||
| 5807 | trace_btrfs_reserve_extent(orig_root, block_group, | 5822 | trace_btrfs_reserve_extent(orig_root, block_group, |
| 5808 | search_start, num_bytes); | 5823 | search_start, num_bytes); |
| 5809 | if (offset < search_start) | ||
| 5810 | btrfs_add_free_space(used_block_group, offset, | ||
| 5811 | search_start - offset); | ||
| 5812 | BUG_ON(offset > search_start); | ||
| 5813 | if (used_block_group != block_group) | 5824 | if (used_block_group != block_group) |
| 5814 | btrfs_put_block_group(used_block_group); | 5825 | btrfs_put_block_group(used_block_group); |
| 5815 | btrfs_put_block_group(block_group); | 5826 | btrfs_put_block_group(block_group); |
| @@ -5842,34 +5853,17 @@ loop: | |||
| 5842 | index = 0; | 5853 | index = 0; |
| 5843 | loop++; | 5854 | loop++; |
| 5844 | if (loop == LOOP_ALLOC_CHUNK) { | 5855 | if (loop == LOOP_ALLOC_CHUNK) { |
| 5845 | if (allowed_chunk_alloc) { | 5856 | ret = do_chunk_alloc(trans, root, data, |
| 5846 | ret = do_chunk_alloc(trans, root, num_bytes + | 5857 | CHUNK_ALLOC_FORCE); |
| 5847 | 2 * 1024 * 1024, data, | 5858 | /* |
| 5848 | CHUNK_ALLOC_LIMITED); | 5859 | * Do not bail out on ENOSPC since we |
| 5849 | /* | 5860 | * can do more things. |
| 5850 | * Do not bail out on ENOSPC since we | 5861 | */ |
| 5851 | * can do more things. | 5862 | if (ret < 0 && ret != -ENOSPC) { |
| 5852 | */ | 5863 | btrfs_abort_transaction(trans, |
| 5853 | if (ret < 0 && ret != -ENOSPC) { | 5864 | root, ret); |
| 5854 | btrfs_abort_transaction(trans, | 5865 | goto out; |
| 5855 | root, ret); | ||
| 5856 | goto out; | ||
| 5857 | } | ||
| 5858 | allowed_chunk_alloc = 0; | ||
| 5859 | if (ret == 1) | ||
| 5860 | done_chunk_alloc = 1; | ||
| 5861 | } else if (!done_chunk_alloc && | ||
| 5862 | space_info->force_alloc == | ||
| 5863 | CHUNK_ALLOC_NO_FORCE) { | ||
| 5864 | space_info->force_alloc = CHUNK_ALLOC_LIMITED; | ||
| 5865 | } | 5866 | } |
| 5866 | |||
| 5867 | /* | ||
| 5868 | * We didn't allocate a chunk, go ahead and drop the | ||
| 5869 | * empty size and loop again. | ||
| 5870 | */ | ||
| 5871 | if (!done_chunk_alloc) | ||
| 5872 | loop = LOOP_NO_EMPTY_SIZE; | ||
| 5873 | } | 5867 | } |
| 5874 | 5868 | ||
| 5875 | if (loop == LOOP_NO_EMPTY_SIZE) { | 5869 | if (loop == LOOP_NO_EMPTY_SIZE) { |
| @@ -5944,20 +5938,6 @@ int btrfs_reserve_extent(struct btrfs_trans_handle *trans, | |||
| 5944 | 5938 | ||
| 5945 | data = btrfs_get_alloc_profile(root, data); | 5939 | data = btrfs_get_alloc_profile(root, data); |
| 5946 | again: | 5940 | again: |
| 5947 | /* | ||
| 5948 | * the only place that sets empty_size is btrfs_realloc_node, which | ||
| 5949 | * is not called recursively on allocations | ||
| 5950 | */ | ||
| 5951 | if (empty_size || root->ref_cows) { | ||
| 5952 | ret = do_chunk_alloc(trans, root->fs_info->extent_root, | ||
| 5953 | num_bytes + 2 * 1024 * 1024, data, | ||
| 5954 | CHUNK_ALLOC_NO_FORCE); | ||
| 5955 | if (ret < 0 && ret != -ENOSPC) { | ||
| 5956 | btrfs_abort_transaction(trans, root, ret); | ||
| 5957 | return ret; | ||
| 5958 | } | ||
| 5959 | } | ||
| 5960 | |||
| 5961 | WARN_ON(num_bytes < root->sectorsize); | 5941 | WARN_ON(num_bytes < root->sectorsize); |
| 5962 | ret = find_free_extent(trans, root, num_bytes, empty_size, | 5942 | ret = find_free_extent(trans, root, num_bytes, empty_size, |
| 5963 | hint_byte, ins, data); | 5943 | hint_byte, ins, data); |
| @@ -5967,12 +5947,6 @@ again: | |||
| 5967 | num_bytes = num_bytes >> 1; | 5947 | num_bytes = num_bytes >> 1; |
| 5968 | num_bytes = num_bytes & ~(root->sectorsize - 1); | 5948 | num_bytes = num_bytes & ~(root->sectorsize - 1); |
| 5969 | num_bytes = max(num_bytes, min_alloc_size); | 5949 | num_bytes = max(num_bytes, min_alloc_size); |
| 5970 | ret = do_chunk_alloc(trans, root->fs_info->extent_root, | ||
| 5971 | num_bytes, data, CHUNK_ALLOC_FORCE); | ||
| 5972 | if (ret < 0 && ret != -ENOSPC) { | ||
| 5973 | btrfs_abort_transaction(trans, root, ret); | ||
| 5974 | return ret; | ||
| 5975 | } | ||
| 5976 | if (num_bytes == min_alloc_size) | 5950 | if (num_bytes == min_alloc_size) |
| 5977 | final_tried = true; | 5951 | final_tried = true; |
| 5978 | goto again; | 5952 | goto again; |
| @@ -6314,7 +6288,7 @@ use_block_rsv(struct btrfs_trans_handle *trans, | |||
| 6314 | ret = block_rsv_use_bytes(block_rsv, blocksize); | 6288 | ret = block_rsv_use_bytes(block_rsv, blocksize); |
| 6315 | if (!ret) | 6289 | if (!ret) |
| 6316 | return block_rsv; | 6290 | return block_rsv; |
| 6317 | if (ret) { | 6291 | if (ret && !block_rsv->failfast) { |
| 6318 | static DEFINE_RATELIMIT_STATE(_rs, | 6292 | static DEFINE_RATELIMIT_STATE(_rs, |
| 6319 | DEFAULT_RATELIMIT_INTERVAL, | 6293 | DEFAULT_RATELIMIT_INTERVAL, |
| 6320 | /*DEFAULT_RATELIMIT_BURST*/ 2); | 6294 | /*DEFAULT_RATELIMIT_BURST*/ 2); |
| @@ -7279,7 +7253,7 @@ int btrfs_set_block_group_ro(struct btrfs_root *root, | |||
| 7279 | 7253 | ||
| 7280 | alloc_flags = update_block_group_flags(root, cache->flags); | 7254 | alloc_flags = update_block_group_flags(root, cache->flags); |
| 7281 | if (alloc_flags != cache->flags) { | 7255 | if (alloc_flags != cache->flags) { |
| 7282 | ret = do_chunk_alloc(trans, root, 2 * 1024 * 1024, alloc_flags, | 7256 | ret = do_chunk_alloc(trans, root, alloc_flags, |
| 7283 | CHUNK_ALLOC_FORCE); | 7257 | CHUNK_ALLOC_FORCE); |
| 7284 | if (ret < 0) | 7258 | if (ret < 0) |
| 7285 | goto out; | 7259 | goto out; |
| @@ -7289,7 +7263,7 @@ int btrfs_set_block_group_ro(struct btrfs_root *root, | |||
| 7289 | if (!ret) | 7263 | if (!ret) |
| 7290 | goto out; | 7264 | goto out; |
| 7291 | alloc_flags = get_alloc_profile(root, cache->space_info->flags); | 7265 | alloc_flags = get_alloc_profile(root, cache->space_info->flags); |
| 7292 | ret = do_chunk_alloc(trans, root, 2 * 1024 * 1024, alloc_flags, | 7266 | ret = do_chunk_alloc(trans, root, alloc_flags, |
| 7293 | CHUNK_ALLOC_FORCE); | 7267 | CHUNK_ALLOC_FORCE); |
| 7294 | if (ret < 0) | 7268 | if (ret < 0) |
| 7295 | goto out; | 7269 | goto out; |
| @@ -7303,7 +7277,7 @@ int btrfs_force_chunk_alloc(struct btrfs_trans_handle *trans, | |||
| 7303 | struct btrfs_root *root, u64 type) | 7277 | struct btrfs_root *root, u64 type) |
| 7304 | { | 7278 | { |
| 7305 | u64 alloc_flags = get_alloc_profile(root, type); | 7279 | u64 alloc_flags = get_alloc_profile(root, type); |
| 7306 | return do_chunk_alloc(trans, root, 2 * 1024 * 1024, alloc_flags, | 7280 | return do_chunk_alloc(trans, root, alloc_flags, |
| 7307 | CHUNK_ALLOC_FORCE); | 7281 | CHUNK_ALLOC_FORCE); |
| 7308 | } | 7282 | } |
| 7309 | 7283 | ||
| @@ -7810,6 +7784,34 @@ error: | |||
| 7810 | return ret; | 7784 | return ret; |
| 7811 | } | 7785 | } |
| 7812 | 7786 | ||
| 7787 | void btrfs_create_pending_block_groups(struct btrfs_trans_handle *trans, | ||
| 7788 | struct btrfs_root *root) | ||
| 7789 | { | ||
| 7790 | struct btrfs_block_group_cache *block_group, *tmp; | ||
| 7791 | struct btrfs_root *extent_root = root->fs_info->extent_root; | ||
| 7792 | struct btrfs_block_group_item item; | ||
| 7793 | struct btrfs_key key; | ||
| 7794 | int ret = 0; | ||
| 7795 | |||
| 7796 | list_for_each_entry_safe(block_group, tmp, &trans->new_bgs, | ||
| 7797 | new_bg_list) { | ||
| 7798 | list_del_init(&block_group->new_bg_list); | ||
| 7799 | |||
| 7800 | if (ret) | ||
| 7801 | continue; | ||
| 7802 | |||
| 7803 | spin_lock(&block_group->lock); | ||
| 7804 | memcpy(&item, &block_group->item, sizeof(item)); | ||
| 7805 | memcpy(&key, &block_group->key, sizeof(key)); | ||
| 7806 | spin_unlock(&block_group->lock); | ||
| 7807 | |||
| 7808 | ret = btrfs_insert_item(trans, extent_root, &key, &item, | ||
| 7809 | sizeof(item)); | ||
| 7810 | if (ret) | ||
| 7811 | btrfs_abort_transaction(trans, extent_root, ret); | ||
| 7812 | } | ||
| 7813 | } | ||
| 7814 | |||
| 7813 | int btrfs_make_block_group(struct btrfs_trans_handle *trans, | 7815 | int btrfs_make_block_group(struct btrfs_trans_handle *trans, |
| 7814 | struct btrfs_root *root, u64 bytes_used, | 7816 | struct btrfs_root *root, u64 bytes_used, |
| 7815 | u64 type, u64 chunk_objectid, u64 chunk_offset, | 7817 | u64 type, u64 chunk_objectid, u64 chunk_offset, |
| @@ -7843,6 +7845,7 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans, | |||
| 7843 | spin_lock_init(&cache->lock); | 7845 | spin_lock_init(&cache->lock); |
| 7844 | INIT_LIST_HEAD(&cache->list); | 7846 | INIT_LIST_HEAD(&cache->list); |
| 7845 | INIT_LIST_HEAD(&cache->cluster_list); | 7847 | INIT_LIST_HEAD(&cache->cluster_list); |
| 7848 | INIT_LIST_HEAD(&cache->new_bg_list); | ||
| 7846 | 7849 | ||
| 7847 | btrfs_init_free_space_ctl(cache); | 7850 | btrfs_init_free_space_ctl(cache); |
| 7848 | 7851 | ||
| @@ -7874,12 +7877,7 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans, | |||
| 7874 | ret = btrfs_add_block_group_cache(root->fs_info, cache); | 7877 | ret = btrfs_add_block_group_cache(root->fs_info, cache); |
| 7875 | BUG_ON(ret); /* Logic error */ | 7878 | BUG_ON(ret); /* Logic error */ |
| 7876 | 7879 | ||
| 7877 | ret = btrfs_insert_item(trans, extent_root, &cache->key, &cache->item, | 7880 | list_add_tail(&cache->new_bg_list, &trans->new_bgs); |
| 7878 | sizeof(cache->item)); | ||
| 7879 | if (ret) { | ||
| 7880 | btrfs_abort_transaction(trans, extent_root, ret); | ||
| 7881 | return ret; | ||
| 7882 | } | ||
| 7883 | 7881 | ||
| 7884 | set_avail_alloc_bits(extent_root->fs_info, type); | 7882 | set_avail_alloc_bits(extent_root->fs_info, type); |
| 7885 | 7883 | ||
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 4c878476bb91..8036d3a84853 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c | |||
| @@ -45,6 +45,7 @@ struct extent_page_data { | |||
| 45 | struct bio *bio; | 45 | struct bio *bio; |
| 46 | struct extent_io_tree *tree; | 46 | struct extent_io_tree *tree; |
| 47 | get_extent_t *get_extent; | 47 | get_extent_t *get_extent; |
| 48 | unsigned long bio_flags; | ||
| 48 | 49 | ||
| 49 | /* tells writepage not to lock the state bits for this range | 50 | /* tells writepage not to lock the state bits for this range |
| 50 | * it still does the unlocking | 51 | * it still does the unlocking |
| @@ -64,13 +65,13 @@ tree_fs_info(struct extent_io_tree *tree) | |||
| 64 | 65 | ||
| 65 | int __init extent_io_init(void) | 66 | int __init extent_io_init(void) |
| 66 | { | 67 | { |
| 67 | extent_state_cache = kmem_cache_create("extent_state", | 68 | extent_state_cache = kmem_cache_create("btrfs_extent_state", |
| 68 | sizeof(struct extent_state), 0, | 69 | sizeof(struct extent_state), 0, |
| 69 | SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL); | 70 | SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL); |
| 70 | if (!extent_state_cache) | 71 | if (!extent_state_cache) |
| 71 | return -ENOMEM; | 72 | return -ENOMEM; |
| 72 | 73 | ||
| 73 | extent_buffer_cache = kmem_cache_create("extent_buffers", | 74 | extent_buffer_cache = kmem_cache_create("btrfs_extent_buffer", |
| 74 | sizeof(struct extent_buffer), 0, | 75 | sizeof(struct extent_buffer), 0, |
| 75 | SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL); | 76 | SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL); |
| 76 | if (!extent_buffer_cache) | 77 | if (!extent_buffer_cache) |
| @@ -107,6 +108,12 @@ void extent_io_exit(void) | |||
| 107 | list_del(&eb->leak_list); | 108 | list_del(&eb->leak_list); |
| 108 | kmem_cache_free(extent_buffer_cache, eb); | 109 | kmem_cache_free(extent_buffer_cache, eb); |
| 109 | } | 110 | } |
| 111 | |||
| 112 | /* | ||
| 113 | * Make sure all delayed rcu free are flushed before we | ||
| 114 | * destroy caches. | ||
| 115 | */ | ||
| 116 | rcu_barrier(); | ||
| 110 | if (extent_state_cache) | 117 | if (extent_state_cache) |
| 111 | kmem_cache_destroy(extent_state_cache); | 118 | kmem_cache_destroy(extent_state_cache); |
| 112 | if (extent_buffer_cache) | 119 | if (extent_buffer_cache) |
| @@ -936,6 +943,7 @@ int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, int bits, | |||
| 936 | * @end: the end offset in bytes (inclusive) | 943 | * @end: the end offset in bytes (inclusive) |
| 937 | * @bits: the bits to set in this range | 944 | * @bits: the bits to set in this range |
| 938 | * @clear_bits: the bits to clear in this range | 945 | * @clear_bits: the bits to clear in this range |
| 946 | * @cached_state: state that we're going to cache | ||
| 939 | * @mask: the allocation mask | 947 | * @mask: the allocation mask |
| 940 | * | 948 | * |
| 941 | * This will go through and set bits for the given range. If any states exist | 949 | * This will go through and set bits for the given range. If any states exist |
| @@ -945,7 +953,8 @@ int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, int bits, | |||
| 945 | * boundary bits like LOCK. | 953 | * boundary bits like LOCK. |
| 946 | */ | 954 | */ |
| 947 | int convert_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, | 955 | int convert_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, |
| 948 | int bits, int clear_bits, gfp_t mask) | 956 | int bits, int clear_bits, |
| 957 | struct extent_state **cached_state, gfp_t mask) | ||
| 949 | { | 958 | { |
| 950 | struct extent_state *state; | 959 | struct extent_state *state; |
| 951 | struct extent_state *prealloc = NULL; | 960 | struct extent_state *prealloc = NULL; |
| @@ -962,6 +971,15 @@ again: | |||
| 962 | } | 971 | } |
| 963 | 972 | ||
| 964 | spin_lock(&tree->lock); | 973 | spin_lock(&tree->lock); |
| 974 | if (cached_state && *cached_state) { | ||
| 975 | state = *cached_state; | ||
| 976 | if (state->start <= start && state->end > start && | ||
| 977 | state->tree) { | ||
| 978 | node = &state->rb_node; | ||
| 979 | goto hit_next; | ||
| 980 | } | ||
| 981 | } | ||
| 982 | |||
| 965 | /* | 983 | /* |
| 966 | * this search will find all the extents that end after | 984 | * this search will find all the extents that end after |
| 967 | * our range starts. | 985 | * our range starts. |
| @@ -992,6 +1010,7 @@ hit_next: | |||
| 992 | */ | 1010 | */ |
| 993 | if (state->start == start && state->end <= end) { | 1011 | if (state->start == start && state->end <= end) { |
| 994 | set_state_bits(tree, state, &bits); | 1012 | set_state_bits(tree, state, &bits); |
| 1013 | cache_state(state, cached_state); | ||
| 995 | state = clear_state_bit(tree, state, &clear_bits, 0); | 1014 | state = clear_state_bit(tree, state, &clear_bits, 0); |
| 996 | if (last_end == (u64)-1) | 1015 | if (last_end == (u64)-1) |
| 997 | goto out; | 1016 | goto out; |
| @@ -1032,6 +1051,7 @@ hit_next: | |||
| 1032 | goto out; | 1051 | goto out; |
| 1033 | if (state->end <= end) { | 1052 | if (state->end <= end) { |
| 1034 | set_state_bits(tree, state, &bits); | 1053 | set_state_bits(tree, state, &bits); |
| 1054 | cache_state(state, cached_state); | ||
| 1035 | state = clear_state_bit(tree, state, &clear_bits, 0); | 1055 | state = clear_state_bit(tree, state, &clear_bits, 0); |
| 1036 | if (last_end == (u64)-1) | 1056 | if (last_end == (u64)-1) |
| 1037 | goto out; | 1057 | goto out; |
| @@ -1070,6 +1090,7 @@ hit_next: | |||
| 1070 | &bits); | 1090 | &bits); |
| 1071 | if (err) | 1091 | if (err) |
| 1072 | extent_io_tree_panic(tree, err); | 1092 | extent_io_tree_panic(tree, err); |
| 1093 | cache_state(prealloc, cached_state); | ||
| 1073 | prealloc = NULL; | 1094 | prealloc = NULL; |
| 1074 | start = this_end + 1; | 1095 | start = this_end + 1; |
| 1075 | goto search_again; | 1096 | goto search_again; |
| @@ -1092,6 +1113,7 @@ hit_next: | |||
| 1092 | extent_io_tree_panic(tree, err); | 1113 | extent_io_tree_panic(tree, err); |
| 1093 | 1114 | ||
| 1094 | set_state_bits(tree, prealloc, &bits); | 1115 | set_state_bits(tree, prealloc, &bits); |
| 1116 | cache_state(prealloc, cached_state); | ||
| 1095 | clear_state_bit(tree, prealloc, &clear_bits, 0); | 1117 | clear_state_bit(tree, prealloc, &clear_bits, 0); |
| 1096 | prealloc = NULL; | 1118 | prealloc = NULL; |
| 1097 | goto out; | 1119 | goto out; |
| @@ -1144,6 +1166,14 @@ int set_extent_delalloc(struct extent_io_tree *tree, u64 start, u64 end, | |||
| 1144 | NULL, cached_state, mask); | 1166 | NULL, cached_state, mask); |
| 1145 | } | 1167 | } |
| 1146 | 1168 | ||
| 1169 | int set_extent_defrag(struct extent_io_tree *tree, u64 start, u64 end, | ||
| 1170 | struct extent_state **cached_state, gfp_t mask) | ||
| 1171 | { | ||
| 1172 | return set_extent_bit(tree, start, end, | ||
| 1173 | EXTENT_DELALLOC | EXTENT_UPTODATE | EXTENT_DEFRAG, | ||
| 1174 | NULL, cached_state, mask); | ||
| 1175 | } | ||
| 1176 | |||
| 1147 | int clear_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end, | 1177 | int clear_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end, |
| 1148 | gfp_t mask) | 1178 | gfp_t mask) |
| 1149 | { | 1179 | { |
| @@ -1288,18 +1318,42 @@ out: | |||
| 1288 | * If nothing was found, 1 is returned. If found something, return 0. | 1318 | * If nothing was found, 1 is returned. If found something, return 0. |
| 1289 | */ | 1319 | */ |
| 1290 | int find_first_extent_bit(struct extent_io_tree *tree, u64 start, | 1320 | int find_first_extent_bit(struct extent_io_tree *tree, u64 start, |
| 1291 | u64 *start_ret, u64 *end_ret, int bits) | 1321 | u64 *start_ret, u64 *end_ret, int bits, |
| 1322 | struct extent_state **cached_state) | ||
| 1292 | { | 1323 | { |
| 1293 | struct extent_state *state; | 1324 | struct extent_state *state; |
| 1325 | struct rb_node *n; | ||
| 1294 | int ret = 1; | 1326 | int ret = 1; |
| 1295 | 1327 | ||
| 1296 | spin_lock(&tree->lock); | 1328 | spin_lock(&tree->lock); |
| 1329 | if (cached_state && *cached_state) { | ||
| 1330 | state = *cached_state; | ||
| 1331 | if (state->end == start - 1 && state->tree) { | ||
| 1332 | n = rb_next(&state->rb_node); | ||
| 1333 | while (n) { | ||
| 1334 | state = rb_entry(n, struct extent_state, | ||
| 1335 | rb_node); | ||
| 1336 | if (state->state & bits) | ||
| 1337 | goto got_it; | ||
| 1338 | n = rb_next(n); | ||
| 1339 | } | ||
| 1340 | free_extent_state(*cached_state); | ||
| 1341 | *cached_state = NULL; | ||
| 1342 | goto out; | ||
| 1343 | } | ||
| 1344 | free_extent_state(*cached_state); | ||
| 1345 | *cached_state = NULL; | ||
| 1346 | } | ||
| 1347 | |||
| 1297 | state = find_first_extent_bit_state(tree, start, bits); | 1348 | state = find_first_extent_bit_state(tree, start, bits); |
| 1349 | got_it: | ||
| 1298 | if (state) { | 1350 | if (state) { |
| 1351 | cache_state(state, cached_state); | ||
| 1299 | *start_ret = state->start; | 1352 | *start_ret = state->start; |
| 1300 | *end_ret = state->end; | 1353 | *end_ret = state->end; |
| 1301 | ret = 0; | 1354 | ret = 0; |
| 1302 | } | 1355 | } |
| 1356 | out: | ||
| 1303 | spin_unlock(&tree->lock); | 1357 | spin_unlock(&tree->lock); |
| 1304 | return ret; | 1358 | return ret; |
| 1305 | } | 1359 | } |
| @@ -2062,7 +2116,7 @@ static int bio_readpage_error(struct bio *failed_bio, struct page *page, | |||
| 2062 | } | 2116 | } |
| 2063 | read_unlock(&em_tree->lock); | 2117 | read_unlock(&em_tree->lock); |
| 2064 | 2118 | ||
| 2065 | if (!em || IS_ERR(em)) { | 2119 | if (!em) { |
| 2066 | kfree(failrec); | 2120 | kfree(failrec); |
| 2067 | return -EIO; | 2121 | return -EIO; |
| 2068 | } | 2122 | } |
| @@ -2298,8 +2352,8 @@ static void end_bio_extent_readpage(struct bio *bio, int err) | |||
| 2298 | struct extent_state *cached = NULL; | 2352 | struct extent_state *cached = NULL; |
| 2299 | struct extent_state *state; | 2353 | struct extent_state *state; |
| 2300 | 2354 | ||
| 2301 | pr_debug("end_bio_extent_readpage: bi_vcnt=%d, idx=%d, err=%d, " | 2355 | pr_debug("end_bio_extent_readpage: bi_sector=%llu, err=%d, " |
| 2302 | "mirror=%ld\n", bio->bi_vcnt, bio->bi_idx, err, | 2356 | "mirror=%ld\n", (u64)bio->bi_sector, err, |
| 2303 | (long int)bio->bi_bdev); | 2357 | (long int)bio->bi_bdev); |
| 2304 | tree = &BTRFS_I(page->mapping->host)->io_tree; | 2358 | tree = &BTRFS_I(page->mapping->host)->io_tree; |
| 2305 | 2359 | ||
| @@ -2703,12 +2757,15 @@ static int __extent_read_full_page(struct extent_io_tree *tree, | |||
| 2703 | end_bio_extent_readpage, mirror_num, | 2757 | end_bio_extent_readpage, mirror_num, |
| 2704 | *bio_flags, | 2758 | *bio_flags, |
| 2705 | this_bio_flag); | 2759 | this_bio_flag); |
| 2706 | BUG_ON(ret == -ENOMEM); | 2760 | if (!ret) { |
| 2707 | nr++; | 2761 | nr++; |
| 2708 | *bio_flags = this_bio_flag; | 2762 | *bio_flags = this_bio_flag; |
| 2763 | } | ||
| 2709 | } | 2764 | } |
| 2710 | if (ret) | 2765 | if (ret) { |
| 2711 | SetPageError(page); | 2766 | SetPageError(page); |
| 2767 | unlock_extent(tree, cur, cur + iosize - 1); | ||
| 2768 | } | ||
| 2712 | cur = cur + iosize; | 2769 | cur = cur + iosize; |
| 2713 | pg_offset += iosize; | 2770 | pg_offset += iosize; |
| 2714 | } | 2771 | } |
| @@ -3155,12 +3212,16 @@ static int write_one_eb(struct extent_buffer *eb, | |||
| 3155 | struct block_device *bdev = fs_info->fs_devices->latest_bdev; | 3212 | struct block_device *bdev = fs_info->fs_devices->latest_bdev; |
| 3156 | u64 offset = eb->start; | 3213 | u64 offset = eb->start; |
| 3157 | unsigned long i, num_pages; | 3214 | unsigned long i, num_pages; |
| 3215 | unsigned long bio_flags = 0; | ||
| 3158 | int rw = (epd->sync_io ? WRITE_SYNC : WRITE); | 3216 | int rw = (epd->sync_io ? WRITE_SYNC : WRITE); |
| 3159 | int ret = 0; | 3217 | int ret = 0; |
| 3160 | 3218 | ||
| 3161 | clear_bit(EXTENT_BUFFER_IOERR, &eb->bflags); | 3219 | clear_bit(EXTENT_BUFFER_IOERR, &eb->bflags); |
| 3162 | num_pages = num_extent_pages(eb->start, eb->len); | 3220 | num_pages = num_extent_pages(eb->start, eb->len); |
| 3163 | atomic_set(&eb->io_pages, num_pages); | 3221 | atomic_set(&eb->io_pages, num_pages); |
| 3222 | if (btrfs_header_owner(eb) == BTRFS_TREE_LOG_OBJECTID) | ||
| 3223 | bio_flags = EXTENT_BIO_TREE_LOG; | ||
| 3224 | |||
| 3164 | for (i = 0; i < num_pages; i++) { | 3225 | for (i = 0; i < num_pages; i++) { |
| 3165 | struct page *p = extent_buffer_page(eb, i); | 3226 | struct page *p = extent_buffer_page(eb, i); |
| 3166 | 3227 | ||
| @@ -3169,7 +3230,8 @@ static int write_one_eb(struct extent_buffer *eb, | |||
| 3169 | ret = submit_extent_page(rw, eb->tree, p, offset >> 9, | 3230 | ret = submit_extent_page(rw, eb->tree, p, offset >> 9, |
| 3170 | PAGE_CACHE_SIZE, 0, bdev, &epd->bio, | 3231 | PAGE_CACHE_SIZE, 0, bdev, &epd->bio, |
| 3171 | -1, end_bio_extent_buffer_writepage, | 3232 | -1, end_bio_extent_buffer_writepage, |
| 3172 | 0, 0, 0); | 3233 | 0, epd->bio_flags, bio_flags); |
| 3234 | epd->bio_flags = bio_flags; | ||
| 3173 | if (ret) { | 3235 | if (ret) { |
| 3174 | set_bit(EXTENT_BUFFER_IOERR, &eb->bflags); | 3236 | set_bit(EXTENT_BUFFER_IOERR, &eb->bflags); |
| 3175 | SetPageError(p); | 3237 | SetPageError(p); |
| @@ -3204,6 +3266,7 @@ int btree_write_cache_pages(struct address_space *mapping, | |||
| 3204 | .tree = tree, | 3266 | .tree = tree, |
| 3205 | .extent_locked = 0, | 3267 | .extent_locked = 0, |
| 3206 | .sync_io = wbc->sync_mode == WB_SYNC_ALL, | 3268 | .sync_io = wbc->sync_mode == WB_SYNC_ALL, |
| 3269 | .bio_flags = 0, | ||
| 3207 | }; | 3270 | }; |
| 3208 | int ret = 0; | 3271 | int ret = 0; |
| 3209 | int done = 0; | 3272 | int done = 0; |
| @@ -3248,19 +3311,34 @@ retry: | |||
| 3248 | break; | 3311 | break; |
| 3249 | } | 3312 | } |
| 3250 | 3313 | ||
| 3314 | spin_lock(&mapping->private_lock); | ||
| 3315 | if (!PagePrivate(page)) { | ||
| 3316 | spin_unlock(&mapping->private_lock); | ||
| 3317 | continue; | ||
| 3318 | } | ||
| 3319 | |||
| 3251 | eb = (struct extent_buffer *)page->private; | 3320 | eb = (struct extent_buffer *)page->private; |
| 3321 | |||
| 3322 | /* | ||
| 3323 | * Shouldn't happen and normally this would be a BUG_ON | ||
| 3324 | * but no sense in crashing the users box for something | ||
| 3325 | * we can survive anyway. | ||
| 3326 | */ | ||
| 3252 | if (!eb) { | 3327 | if (!eb) { |
| 3328 | spin_unlock(&mapping->private_lock); | ||
| 3253 | WARN_ON(1); | 3329 | WARN_ON(1); |
| 3254 | continue; | 3330 | continue; |
| 3255 | } | 3331 | } |
| 3256 | 3332 | ||
| 3257 | if (eb == prev_eb) | 3333 | if (eb == prev_eb) { |
| 3334 | spin_unlock(&mapping->private_lock); | ||
| 3258 | continue; | 3335 | continue; |
| 3336 | } | ||
| 3259 | 3337 | ||
| 3260 | if (!atomic_inc_not_zero(&eb->refs)) { | 3338 | ret = atomic_inc_not_zero(&eb->refs); |
| 3261 | WARN_ON(1); | 3339 | spin_unlock(&mapping->private_lock); |
| 3340 | if (!ret) | ||
| 3262 | continue; | 3341 | continue; |
| 3263 | } | ||
| 3264 | 3342 | ||
| 3265 | prev_eb = eb; | 3343 | prev_eb = eb; |
| 3266 | ret = lock_extent_buffer_for_io(eb, fs_info, &epd); | 3344 | ret = lock_extent_buffer_for_io(eb, fs_info, &epd); |
| @@ -3451,7 +3529,7 @@ static void flush_epd_write_bio(struct extent_page_data *epd) | |||
| 3451 | if (epd->sync_io) | 3529 | if (epd->sync_io) |
| 3452 | rw = WRITE_SYNC; | 3530 | rw = WRITE_SYNC; |
| 3453 | 3531 | ||
| 3454 | ret = submit_one_bio(rw, epd->bio, 0, 0); | 3532 | ret = submit_one_bio(rw, epd->bio, 0, epd->bio_flags); |
| 3455 | BUG_ON(ret < 0); /* -ENOMEM */ | 3533 | BUG_ON(ret < 0); /* -ENOMEM */ |
| 3456 | epd->bio = NULL; | 3534 | epd->bio = NULL; |
| 3457 | } | 3535 | } |
| @@ -3474,6 +3552,7 @@ int extent_write_full_page(struct extent_io_tree *tree, struct page *page, | |||
| 3474 | .get_extent = get_extent, | 3552 | .get_extent = get_extent, |
| 3475 | .extent_locked = 0, | 3553 | .extent_locked = 0, |
| 3476 | .sync_io = wbc->sync_mode == WB_SYNC_ALL, | 3554 | .sync_io = wbc->sync_mode == WB_SYNC_ALL, |
| 3555 | .bio_flags = 0, | ||
| 3477 | }; | 3556 | }; |
| 3478 | 3557 | ||
| 3479 | ret = __extent_writepage(page, wbc, &epd); | 3558 | ret = __extent_writepage(page, wbc, &epd); |
| @@ -3498,6 +3577,7 @@ int extent_write_locked_range(struct extent_io_tree *tree, struct inode *inode, | |||
| 3498 | .get_extent = get_extent, | 3577 | .get_extent = get_extent, |
| 3499 | .extent_locked = 1, | 3578 | .extent_locked = 1, |
| 3500 | .sync_io = mode == WB_SYNC_ALL, | 3579 | .sync_io = mode == WB_SYNC_ALL, |
| 3580 | .bio_flags = 0, | ||
| 3501 | }; | 3581 | }; |
| 3502 | struct writeback_control wbc_writepages = { | 3582 | struct writeback_control wbc_writepages = { |
| 3503 | .sync_mode = mode, | 3583 | .sync_mode = mode, |
| @@ -3537,6 +3617,7 @@ int extent_writepages(struct extent_io_tree *tree, | |||
| 3537 | .get_extent = get_extent, | 3617 | .get_extent = get_extent, |
| 3538 | .extent_locked = 0, | 3618 | .extent_locked = 0, |
| 3539 | .sync_io = wbc->sync_mode == WB_SYNC_ALL, | 3619 | .sync_io = wbc->sync_mode == WB_SYNC_ALL, |
| 3620 | .bio_flags = 0, | ||
| 3540 | }; | 3621 | }; |
| 3541 | 3622 | ||
| 3542 | ret = extent_write_cache_pages(tree, mapping, wbc, | 3623 | ret = extent_write_cache_pages(tree, mapping, wbc, |
| @@ -3914,18 +3995,6 @@ out: | |||
| 3914 | return ret; | 3995 | return ret; |
| 3915 | } | 3996 | } |
| 3916 | 3997 | ||
| 3917 | inline struct page *extent_buffer_page(struct extent_buffer *eb, | ||
| 3918 | unsigned long i) | ||
| 3919 | { | ||
| 3920 | return eb->pages[i]; | ||
| 3921 | } | ||
| 3922 | |||
| 3923 | inline unsigned long num_extent_pages(u64 start, u64 len) | ||
| 3924 | { | ||
| 3925 | return ((start + len + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT) - | ||
| 3926 | (start >> PAGE_CACHE_SHIFT); | ||
| 3927 | } | ||
| 3928 | |||
| 3929 | static void __free_extent_buffer(struct extent_buffer *eb) | 3998 | static void __free_extent_buffer(struct extent_buffer *eb) |
| 3930 | { | 3999 | { |
| 3931 | #if LEAK_DEBUG | 4000 | #if LEAK_DEBUG |
| @@ -4041,7 +4110,7 @@ struct extent_buffer *alloc_dummy_extent_buffer(u64 start, unsigned long len) | |||
| 4041 | 4110 | ||
| 4042 | return eb; | 4111 | return eb; |
| 4043 | err: | 4112 | err: |
| 4044 | for (i--; i > 0; i--) | 4113 | for (i--; i >= 0; i--) |
| 4045 | __free_page(eb->pages[i]); | 4114 | __free_page(eb->pages[i]); |
| 4046 | __free_extent_buffer(eb); | 4115 | __free_extent_buffer(eb); |
| 4047 | return NULL; | 4116 | return NULL; |
| @@ -4186,10 +4255,8 @@ struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree, | |||
| 4186 | 4255 | ||
| 4187 | for (i = 0; i < num_pages; i++, index++) { | 4256 | for (i = 0; i < num_pages; i++, index++) { |
| 4188 | p = find_or_create_page(mapping, index, GFP_NOFS); | 4257 | p = find_or_create_page(mapping, index, GFP_NOFS); |
| 4189 | if (!p) { | 4258 | if (!p) |
| 4190 | WARN_ON(1); | ||
| 4191 | goto free_eb; | 4259 | goto free_eb; |
| 4192 | } | ||
| 4193 | 4260 | ||
| 4194 | spin_lock(&mapping->private_lock); | 4261 | spin_lock(&mapping->private_lock); |
| 4195 | if (PagePrivate(p)) { | 4262 | if (PagePrivate(p)) { |
| @@ -4332,7 +4399,6 @@ static int release_extent_buffer(struct extent_buffer *eb, gfp_t mask) | |||
| 4332 | 4399 | ||
| 4333 | /* Should be safe to release our pages at this point */ | 4400 | /* Should be safe to release our pages at this point */ |
| 4334 | btrfs_release_extent_buffer_page(eb, 0); | 4401 | btrfs_release_extent_buffer_page(eb, 0); |
| 4335 | |||
| 4336 | call_rcu(&eb->rcu_head, btrfs_release_extent_buffer_rcu); | 4402 | call_rcu(&eb->rcu_head, btrfs_release_extent_buffer_rcu); |
| 4337 | return 1; | 4403 | return 1; |
| 4338 | } | 4404 | } |
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h index 25900af5b15d..711d12b80028 100644 --- a/fs/btrfs/extent_io.h +++ b/fs/btrfs/extent_io.h | |||
| @@ -27,6 +27,7 @@ | |||
| 27 | * type for this bio | 27 | * type for this bio |
| 28 | */ | 28 | */ |
| 29 | #define EXTENT_BIO_COMPRESSED 1 | 29 | #define EXTENT_BIO_COMPRESSED 1 |
| 30 | #define EXTENT_BIO_TREE_LOG 2 | ||
| 30 | #define EXTENT_BIO_FLAG_SHIFT 16 | 31 | #define EXTENT_BIO_FLAG_SHIFT 16 |
| 31 | 32 | ||
| 32 | /* these are bit numbers for test/set bit */ | 33 | /* these are bit numbers for test/set bit */ |
| @@ -232,11 +233,15 @@ int set_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end, | |||
| 232 | int clear_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end, | 233 | int clear_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end, |
| 233 | gfp_t mask); | 234 | gfp_t mask); |
| 234 | int convert_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, | 235 | int convert_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, |
| 235 | int bits, int clear_bits, gfp_t mask); | 236 | int bits, int clear_bits, |
| 237 | struct extent_state **cached_state, gfp_t mask); | ||
| 236 | int set_extent_delalloc(struct extent_io_tree *tree, u64 start, u64 end, | 238 | int set_extent_delalloc(struct extent_io_tree *tree, u64 start, u64 end, |
| 237 | struct extent_state **cached_state, gfp_t mask); | 239 | struct extent_state **cached_state, gfp_t mask); |
| 240 | int set_extent_defrag(struct extent_io_tree *tree, u64 start, u64 end, | ||
| 241 | struct extent_state **cached_state, gfp_t mask); | ||
| 238 | int find_first_extent_bit(struct extent_io_tree *tree, u64 start, | 242 | int find_first_extent_bit(struct extent_io_tree *tree, u64 start, |
| 239 | u64 *start_ret, u64 *end_ret, int bits); | 243 | u64 *start_ret, u64 *end_ret, int bits, |
| 244 | struct extent_state **cached_state); | ||
| 240 | struct extent_state *find_first_extent_bit_state(struct extent_io_tree *tree, | 245 | struct extent_state *find_first_extent_bit_state(struct extent_io_tree *tree, |
| 241 | u64 start, int bits); | 246 | u64 start, int bits); |
| 242 | int extent_invalidatepage(struct extent_io_tree *tree, | 247 | int extent_invalidatepage(struct extent_io_tree *tree, |
| @@ -277,8 +282,18 @@ void free_extent_buffer_stale(struct extent_buffer *eb); | |||
| 277 | int read_extent_buffer_pages(struct extent_io_tree *tree, | 282 | int read_extent_buffer_pages(struct extent_io_tree *tree, |
| 278 | struct extent_buffer *eb, u64 start, int wait, | 283 | struct extent_buffer *eb, u64 start, int wait, |
| 279 | get_extent_t *get_extent, int mirror_num); | 284 | get_extent_t *get_extent, int mirror_num); |
| 280 | unsigned long num_extent_pages(u64 start, u64 len); | 285 | |
| 281 | struct page *extent_buffer_page(struct extent_buffer *eb, unsigned long i); | 286 | static inline unsigned long num_extent_pages(u64 start, u64 len) |
| 287 | { | ||
| 288 | return ((start + len + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT) - | ||
| 289 | (start >> PAGE_CACHE_SHIFT); | ||
| 290 | } | ||
| 291 | |||
| 292 | static inline struct page *extent_buffer_page(struct extent_buffer *eb, | ||
| 293 | unsigned long i) | ||
| 294 | { | ||
| 295 | return eb->pages[i]; | ||
| 296 | } | ||
| 282 | 297 | ||
| 283 | static inline void extent_buffer_get(struct extent_buffer *eb) | 298 | static inline void extent_buffer_get(struct extent_buffer *eb) |
| 284 | { | 299 | { |
diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c index 7c97b3301459..b8cbc8d5c7f7 100644 --- a/fs/btrfs/extent_map.c +++ b/fs/btrfs/extent_map.c | |||
| @@ -11,7 +11,7 @@ static struct kmem_cache *extent_map_cache; | |||
| 11 | 11 | ||
| 12 | int __init extent_map_init(void) | 12 | int __init extent_map_init(void) |
| 13 | { | 13 | { |
| 14 | extent_map_cache = kmem_cache_create("extent_map", | 14 | extent_map_cache = kmem_cache_create("btrfs_extent_map", |
| 15 | sizeof(struct extent_map), 0, | 15 | sizeof(struct extent_map), 0, |
| 16 | SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL); | 16 | SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL); |
| 17 | if (!extent_map_cache) | 17 | if (!extent_map_cache) |
| @@ -35,6 +35,7 @@ void extent_map_exit(void) | |||
| 35 | void extent_map_tree_init(struct extent_map_tree *tree) | 35 | void extent_map_tree_init(struct extent_map_tree *tree) |
| 36 | { | 36 | { |
| 37 | tree->map = RB_ROOT; | 37 | tree->map = RB_ROOT; |
| 38 | INIT_LIST_HEAD(&tree->modified_extents); | ||
| 38 | rwlock_init(&tree->lock); | 39 | rwlock_init(&tree->lock); |
| 39 | } | 40 | } |
| 40 | 41 | ||
| @@ -54,7 +55,9 @@ struct extent_map *alloc_extent_map(void) | |||
| 54 | em->in_tree = 0; | 55 | em->in_tree = 0; |
| 55 | em->flags = 0; | 56 | em->flags = 0; |
| 56 | em->compress_type = BTRFS_COMPRESS_NONE; | 57 | em->compress_type = BTRFS_COMPRESS_NONE; |
| 58 | em->generation = 0; | ||
| 57 | atomic_set(&em->refs, 1); | 59 | atomic_set(&em->refs, 1); |
| 60 | INIT_LIST_HEAD(&em->list); | ||
| 58 | return em; | 61 | return em; |
| 59 | } | 62 | } |
| 60 | 63 | ||
| @@ -72,6 +75,7 @@ void free_extent_map(struct extent_map *em) | |||
| 72 | WARN_ON(atomic_read(&em->refs) == 0); | 75 | WARN_ON(atomic_read(&em->refs) == 0); |
| 73 | if (atomic_dec_and_test(&em->refs)) { | 76 | if (atomic_dec_and_test(&em->refs)) { |
| 74 | WARN_ON(em->in_tree); | 77 | WARN_ON(em->in_tree); |
| 78 | WARN_ON(!list_empty(&em->list)); | ||
| 75 | kmem_cache_free(extent_map_cache, em); | 79 | kmem_cache_free(extent_map_cache, em); |
| 76 | } | 80 | } |
| 77 | } | 81 | } |
| @@ -198,6 +202,14 @@ static void try_merge_map(struct extent_map_tree *tree, struct extent_map *em) | |||
| 198 | em->block_len += merge->block_len; | 202 | em->block_len += merge->block_len; |
| 199 | em->block_start = merge->block_start; | 203 | em->block_start = merge->block_start; |
| 200 | merge->in_tree = 0; | 204 | merge->in_tree = 0; |
| 205 | if (merge->generation > em->generation) { | ||
| 206 | em->mod_start = em->start; | ||
| 207 | em->mod_len = em->len; | ||
| 208 | em->generation = merge->generation; | ||
| 209 | list_move(&em->list, &tree->modified_extents); | ||
| 210 | } | ||
| 211 | |||
| 212 | list_del_init(&merge->list); | ||
| 201 | rb_erase(&merge->rb_node, &tree->map); | 213 | rb_erase(&merge->rb_node, &tree->map); |
| 202 | free_extent_map(merge); | 214 | free_extent_map(merge); |
| 203 | } | 215 | } |
| @@ -211,14 +223,34 @@ static void try_merge_map(struct extent_map_tree *tree, struct extent_map *em) | |||
| 211 | em->block_len += merge->len; | 223 | em->block_len += merge->len; |
| 212 | rb_erase(&merge->rb_node, &tree->map); | 224 | rb_erase(&merge->rb_node, &tree->map); |
| 213 | merge->in_tree = 0; | 225 | merge->in_tree = 0; |
| 226 | if (merge->generation > em->generation) { | ||
| 227 | em->mod_len = em->len; | ||
| 228 | em->generation = merge->generation; | ||
| 229 | list_move(&em->list, &tree->modified_extents); | ||
| 230 | } | ||
| 231 | list_del_init(&merge->list); | ||
| 214 | free_extent_map(merge); | 232 | free_extent_map(merge); |
| 215 | } | 233 | } |
| 216 | } | 234 | } |
| 217 | 235 | ||
| 218 | int unpin_extent_cache(struct extent_map_tree *tree, u64 start, u64 len) | 236 | /** |
| 237 | * unpint_extent_cache - unpin an extent from the cache | ||
| 238 | * @tree: tree to unpin the extent in | ||
| 239 | * @start: logical offset in the file | ||
| 240 | * @len: length of the extent | ||
| 241 | * @gen: generation that this extent has been modified in | ||
| 242 | * @prealloc: if this is set we need to clear the prealloc flag | ||
| 243 | * | ||
| 244 | * Called after an extent has been written to disk properly. Set the generation | ||
| 245 | * to the generation that actually added the file item to the inode so we know | ||
| 246 | * we need to sync this extent when we call fsync(). | ||
| 247 | */ | ||
| 248 | int unpin_extent_cache(struct extent_map_tree *tree, u64 start, u64 len, | ||
| 249 | u64 gen) | ||
| 219 | { | 250 | { |
| 220 | int ret = 0; | 251 | int ret = 0; |
| 221 | struct extent_map *em; | 252 | struct extent_map *em; |
| 253 | bool prealloc = false; | ||
| 222 | 254 | ||
| 223 | write_lock(&tree->lock); | 255 | write_lock(&tree->lock); |
| 224 | em = lookup_extent_mapping(tree, start, len); | 256 | em = lookup_extent_mapping(tree, start, len); |
| @@ -228,10 +260,24 @@ int unpin_extent_cache(struct extent_map_tree *tree, u64 start, u64 len) | |||
| 228 | if (!em) | 260 | if (!em) |
| 229 | goto out; | 261 | goto out; |
| 230 | 262 | ||
| 263 | list_move(&em->list, &tree->modified_extents); | ||
| 264 | em->generation = gen; | ||
| 231 | clear_bit(EXTENT_FLAG_PINNED, &em->flags); | 265 | clear_bit(EXTENT_FLAG_PINNED, &em->flags); |
| 266 | em->mod_start = em->start; | ||
| 267 | em->mod_len = em->len; | ||
| 268 | |||
| 269 | if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags)) { | ||
| 270 | prealloc = true; | ||
| 271 | clear_bit(EXTENT_FLAG_PREALLOC, &em->flags); | ||
| 272 | } | ||
| 232 | 273 | ||
| 233 | try_merge_map(tree, em); | 274 | try_merge_map(tree, em); |
| 234 | 275 | ||
| 276 | if (prealloc) { | ||
| 277 | em->mod_start = em->start; | ||
| 278 | em->mod_len = em->len; | ||
| 279 | } | ||
| 280 | |||
| 235 | free_extent_map(em); | 281 | free_extent_map(em); |
| 236 | out: | 282 | out: |
| 237 | write_unlock(&tree->lock); | 283 | write_unlock(&tree->lock); |
| @@ -269,6 +315,9 @@ int add_extent_mapping(struct extent_map_tree *tree, | |||
| 269 | } | 315 | } |
| 270 | atomic_inc(&em->refs); | 316 | atomic_inc(&em->refs); |
| 271 | 317 | ||
| 318 | em->mod_start = em->start; | ||
| 319 | em->mod_len = em->len; | ||
| 320 | |||
| 272 | try_merge_map(tree, em); | 321 | try_merge_map(tree, em); |
| 273 | out: | 322 | out: |
| 274 | return ret; | 323 | return ret; |
| @@ -358,6 +407,8 @@ int remove_extent_mapping(struct extent_map_tree *tree, struct extent_map *em) | |||
| 358 | 407 | ||
| 359 | WARN_ON(test_bit(EXTENT_FLAG_PINNED, &em->flags)); | 408 | WARN_ON(test_bit(EXTENT_FLAG_PINNED, &em->flags)); |
| 360 | rb_erase(&em->rb_node, &tree->map); | 409 | rb_erase(&em->rb_node, &tree->map); |
| 410 | if (!test_bit(EXTENT_FLAG_LOGGING, &em->flags)) | ||
| 411 | list_del_init(&em->list); | ||
| 361 | em->in_tree = 0; | 412 | em->in_tree = 0; |
| 362 | return ret; | 413 | return ret; |
| 363 | } | 414 | } |
diff --git a/fs/btrfs/extent_map.h b/fs/btrfs/extent_map.h index 1195f09761fe..679225555f7b 100644 --- a/fs/btrfs/extent_map.h +++ b/fs/btrfs/extent_map.h | |||
| @@ -13,6 +13,7 @@ | |||
| 13 | #define EXTENT_FLAG_COMPRESSED 1 | 13 | #define EXTENT_FLAG_COMPRESSED 1 |
| 14 | #define EXTENT_FLAG_VACANCY 2 /* no file extent item found */ | 14 | #define EXTENT_FLAG_VACANCY 2 /* no file extent item found */ |
| 15 | #define EXTENT_FLAG_PREALLOC 3 /* pre-allocated extent */ | 15 | #define EXTENT_FLAG_PREALLOC 3 /* pre-allocated extent */ |
| 16 | #define EXTENT_FLAG_LOGGING 4 /* Logging this extent */ | ||
| 16 | 17 | ||
| 17 | struct extent_map { | 18 | struct extent_map { |
| 18 | struct rb_node rb_node; | 19 | struct rb_node rb_node; |
| @@ -20,18 +21,23 @@ struct extent_map { | |||
| 20 | /* all of these are in bytes */ | 21 | /* all of these are in bytes */ |
| 21 | u64 start; | 22 | u64 start; |
| 22 | u64 len; | 23 | u64 len; |
| 24 | u64 mod_start; | ||
| 25 | u64 mod_len; | ||
| 23 | u64 orig_start; | 26 | u64 orig_start; |
| 24 | u64 block_start; | 27 | u64 block_start; |
| 25 | u64 block_len; | 28 | u64 block_len; |
| 29 | u64 generation; | ||
| 26 | unsigned long flags; | 30 | unsigned long flags; |
| 27 | struct block_device *bdev; | 31 | struct block_device *bdev; |
| 28 | atomic_t refs; | 32 | atomic_t refs; |
| 29 | unsigned int in_tree; | 33 | unsigned int in_tree; |
| 30 | unsigned int compress_type; | 34 | unsigned int compress_type; |
| 35 | struct list_head list; | ||
| 31 | }; | 36 | }; |
| 32 | 37 | ||
| 33 | struct extent_map_tree { | 38 | struct extent_map_tree { |
| 34 | struct rb_root map; | 39 | struct rb_root map; |
| 40 | struct list_head modified_extents; | ||
| 35 | rwlock_t lock; | 41 | rwlock_t lock; |
| 36 | }; | 42 | }; |
| 37 | 43 | ||
| @@ -60,7 +66,7 @@ struct extent_map *alloc_extent_map(void); | |||
| 60 | void free_extent_map(struct extent_map *em); | 66 | void free_extent_map(struct extent_map *em); |
| 61 | int __init extent_map_init(void); | 67 | int __init extent_map_init(void); |
| 62 | void extent_map_exit(void); | 68 | void extent_map_exit(void); |
| 63 | int unpin_extent_cache(struct extent_map_tree *tree, u64 start, u64 len); | 69 | int unpin_extent_cache(struct extent_map_tree *tree, u64 start, u64 len, u64 gen); |
| 64 | struct extent_map *search_extent_mapping(struct extent_map_tree *tree, | 70 | struct extent_map *search_extent_mapping(struct extent_map_tree *tree, |
| 65 | u64 start, u64 len); | 71 | u64 start, u64 len); |
| 66 | #endif | 72 | #endif |
diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c index 857d93cd01dc..1ad08e4e4a15 100644 --- a/fs/btrfs/file-item.c +++ b/fs/btrfs/file-item.c | |||
| @@ -25,11 +25,12 @@ | |||
| 25 | #include "transaction.h" | 25 | #include "transaction.h" |
| 26 | #include "print-tree.h" | 26 | #include "print-tree.h" |
| 27 | 27 | ||
| 28 | #define __MAX_CSUM_ITEMS(r, size) ((((BTRFS_LEAF_DATA_SIZE(r) - \ | 28 | #define __MAX_CSUM_ITEMS(r, size) ((unsigned long)(((BTRFS_LEAF_DATA_SIZE(r) - \ |
| 29 | sizeof(struct btrfs_item) * 2) / \ | 29 | sizeof(struct btrfs_item) * 2) / \ |
| 30 | size) - 1)) | 30 | size) - 1)) |
| 31 | 31 | ||
| 32 | #define MAX_CSUM_ITEMS(r, size) (min(__MAX_CSUM_ITEMS(r, size), PAGE_CACHE_SIZE)) | 32 | #define MAX_CSUM_ITEMS(r, size) (min_t(u32, __MAX_CSUM_ITEMS(r, size), \ |
| 33 | PAGE_CACHE_SIZE)) | ||
| 33 | 34 | ||
| 34 | #define MAX_ORDERED_SUM_BYTES(r) ((PAGE_SIZE - \ | 35 | #define MAX_ORDERED_SUM_BYTES(r) ((PAGE_SIZE - \ |
| 35 | sizeof(struct btrfs_ordered_sum)) / \ | 36 | sizeof(struct btrfs_ordered_sum)) / \ |
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 5caf285c6e4d..9ab1bed88116 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c | |||
| @@ -39,6 +39,7 @@ | |||
| 39 | #include "tree-log.h" | 39 | #include "tree-log.h" |
| 40 | #include "locking.h" | 40 | #include "locking.h" |
| 41 | #include "compat.h" | 41 | #include "compat.h" |
| 42 | #include "volumes.h" | ||
| 42 | 43 | ||
| 43 | /* | 44 | /* |
| 44 | * when auto defrag is enabled we | 45 | * when auto defrag is enabled we |
| @@ -458,14 +459,15 @@ int btrfs_dirty_pages(struct btrfs_root *root, struct inode *inode, | |||
| 458 | * this drops all the extents in the cache that intersect the range | 459 | * this drops all the extents in the cache that intersect the range |
| 459 | * [start, end]. Existing extents are split as required. | 460 | * [start, end]. Existing extents are split as required. |
| 460 | */ | 461 | */ |
| 461 | int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end, | 462 | void btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end, |
| 462 | int skip_pinned) | 463 | int skip_pinned) |
| 463 | { | 464 | { |
| 464 | struct extent_map *em; | 465 | struct extent_map *em; |
| 465 | struct extent_map *split = NULL; | 466 | struct extent_map *split = NULL; |
| 466 | struct extent_map *split2 = NULL; | 467 | struct extent_map *split2 = NULL; |
| 467 | struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; | 468 | struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; |
| 468 | u64 len = end - start + 1; | 469 | u64 len = end - start + 1; |
| 470 | u64 gen; | ||
| 469 | int ret; | 471 | int ret; |
| 470 | int testend = 1; | 472 | int testend = 1; |
| 471 | unsigned long flags; | 473 | unsigned long flags; |
| @@ -477,11 +479,14 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end, | |||
| 477 | testend = 0; | 479 | testend = 0; |
| 478 | } | 480 | } |
| 479 | while (1) { | 481 | while (1) { |
| 482 | int no_splits = 0; | ||
| 483 | |||
| 480 | if (!split) | 484 | if (!split) |
| 481 | split = alloc_extent_map(); | 485 | split = alloc_extent_map(); |
| 482 | if (!split2) | 486 | if (!split2) |
| 483 | split2 = alloc_extent_map(); | 487 | split2 = alloc_extent_map(); |
| 484 | BUG_ON(!split || !split2); /* -ENOMEM */ | 488 | if (!split || !split2) |
| 489 | no_splits = 1; | ||
| 485 | 490 | ||
| 486 | write_lock(&em_tree->lock); | 491 | write_lock(&em_tree->lock); |
| 487 | em = lookup_extent_mapping(em_tree, start, len); | 492 | em = lookup_extent_mapping(em_tree, start, len); |
| @@ -490,6 +495,7 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end, | |||
| 490 | break; | 495 | break; |
| 491 | } | 496 | } |
| 492 | flags = em->flags; | 497 | flags = em->flags; |
| 498 | gen = em->generation; | ||
| 493 | if (skip_pinned && test_bit(EXTENT_FLAG_PINNED, &em->flags)) { | 499 | if (skip_pinned && test_bit(EXTENT_FLAG_PINNED, &em->flags)) { |
| 494 | if (testend && em->start + em->len >= start + len) { | 500 | if (testend && em->start + em->len >= start + len) { |
| 495 | free_extent_map(em); | 501 | free_extent_map(em); |
| @@ -506,6 +512,8 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end, | |||
| 506 | compressed = test_bit(EXTENT_FLAG_COMPRESSED, &em->flags); | 512 | compressed = test_bit(EXTENT_FLAG_COMPRESSED, &em->flags); |
| 507 | clear_bit(EXTENT_FLAG_PINNED, &em->flags); | 513 | clear_bit(EXTENT_FLAG_PINNED, &em->flags); |
| 508 | remove_extent_mapping(em_tree, em); | 514 | remove_extent_mapping(em_tree, em); |
| 515 | if (no_splits) | ||
| 516 | goto next; | ||
| 509 | 517 | ||
| 510 | if (em->block_start < EXTENT_MAP_LAST_BYTE && | 518 | if (em->block_start < EXTENT_MAP_LAST_BYTE && |
| 511 | em->start < start) { | 519 | em->start < start) { |
| @@ -518,12 +526,13 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end, | |||
| 518 | split->block_len = em->block_len; | 526 | split->block_len = em->block_len; |
| 519 | else | 527 | else |
| 520 | split->block_len = split->len; | 528 | split->block_len = split->len; |
| 521 | 529 | split->generation = gen; | |
| 522 | split->bdev = em->bdev; | 530 | split->bdev = em->bdev; |
| 523 | split->flags = flags; | 531 | split->flags = flags; |
| 524 | split->compress_type = em->compress_type; | 532 | split->compress_type = em->compress_type; |
| 525 | ret = add_extent_mapping(em_tree, split); | 533 | ret = add_extent_mapping(em_tree, split); |
| 526 | BUG_ON(ret); /* Logic error */ | 534 | BUG_ON(ret); /* Logic error */ |
| 535 | list_move(&split->list, &em_tree->modified_extents); | ||
| 527 | free_extent_map(split); | 536 | free_extent_map(split); |
| 528 | split = split2; | 537 | split = split2; |
| 529 | split2 = NULL; | 538 | split2 = NULL; |
| @@ -537,6 +546,7 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end, | |||
| 537 | split->bdev = em->bdev; | 546 | split->bdev = em->bdev; |
| 538 | split->flags = flags; | 547 | split->flags = flags; |
| 539 | split->compress_type = em->compress_type; | 548 | split->compress_type = em->compress_type; |
| 549 | split->generation = gen; | ||
| 540 | 550 | ||
| 541 | if (compressed) { | 551 | if (compressed) { |
| 542 | split->block_len = em->block_len; | 552 | split->block_len = em->block_len; |
| @@ -550,9 +560,11 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end, | |||
| 550 | 560 | ||
| 551 | ret = add_extent_mapping(em_tree, split); | 561 | ret = add_extent_mapping(em_tree, split); |
| 552 | BUG_ON(ret); /* Logic error */ | 562 | BUG_ON(ret); /* Logic error */ |
| 563 | list_move(&split->list, &em_tree->modified_extents); | ||
| 553 | free_extent_map(split); | 564 | free_extent_map(split); |
| 554 | split = NULL; | 565 | split = NULL; |
| 555 | } | 566 | } |
| 567 | next: | ||
| 556 | write_unlock(&em_tree->lock); | 568 | write_unlock(&em_tree->lock); |
| 557 | 569 | ||
| 558 | /* once for us */ | 570 | /* once for us */ |
| @@ -564,7 +576,6 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end, | |||
| 564 | free_extent_map(split); | 576 | free_extent_map(split); |
| 565 | if (split2) | 577 | if (split2) |
| 566 | free_extent_map(split2); | 578 | free_extent_map(split2); |
| 567 | return 0; | ||
| 568 | } | 579 | } |
| 569 | 580 | ||
| 570 | /* | 581 | /* |
| @@ -576,13 +587,13 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end, | |||
| 576 | * it is either truncated or split. Anything entirely inside the range | 587 | * it is either truncated or split. Anything entirely inside the range |
| 577 | * is deleted from the tree. | 588 | * is deleted from the tree. |
| 578 | */ | 589 | */ |
| 579 | int btrfs_drop_extents(struct btrfs_trans_handle *trans, struct inode *inode, | 590 | int __btrfs_drop_extents(struct btrfs_trans_handle *trans, |
| 580 | u64 start, u64 end, u64 *hint_byte, int drop_cache) | 591 | struct btrfs_root *root, struct inode *inode, |
| 592 | struct btrfs_path *path, u64 start, u64 end, | ||
| 593 | u64 *drop_end, int drop_cache) | ||
| 581 | { | 594 | { |
| 582 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
| 583 | struct extent_buffer *leaf; | 595 | struct extent_buffer *leaf; |
| 584 | struct btrfs_file_extent_item *fi; | 596 | struct btrfs_file_extent_item *fi; |
| 585 | struct btrfs_path *path; | ||
| 586 | struct btrfs_key key; | 597 | struct btrfs_key key; |
| 587 | struct btrfs_key new_key; | 598 | struct btrfs_key new_key; |
| 588 | u64 ino = btrfs_ino(inode); | 599 | u64 ino = btrfs_ino(inode); |
| @@ -597,14 +608,12 @@ int btrfs_drop_extents(struct btrfs_trans_handle *trans, struct inode *inode, | |||
| 597 | int recow; | 608 | int recow; |
| 598 | int ret; | 609 | int ret; |
| 599 | int modify_tree = -1; | 610 | int modify_tree = -1; |
| 611 | int update_refs = (root->ref_cows || root == root->fs_info->tree_root); | ||
| 612 | int found = 0; | ||
| 600 | 613 | ||
| 601 | if (drop_cache) | 614 | if (drop_cache) |
| 602 | btrfs_drop_extent_cache(inode, start, end - 1, 0); | 615 | btrfs_drop_extent_cache(inode, start, end - 1, 0); |
| 603 | 616 | ||
| 604 | path = btrfs_alloc_path(); | ||
| 605 | if (!path) | ||
| 606 | return -ENOMEM; | ||
| 607 | |||
| 608 | if (start >= BTRFS_I(inode)->disk_i_size) | 617 | if (start >= BTRFS_I(inode)->disk_i_size) |
| 609 | modify_tree = 0; | 618 | modify_tree = 0; |
| 610 | 619 | ||
| @@ -666,6 +675,7 @@ next_slot: | |||
| 666 | goto next_slot; | 675 | goto next_slot; |
| 667 | } | 676 | } |
| 668 | 677 | ||
| 678 | found = 1; | ||
| 669 | search_start = max(key.offset, start); | 679 | search_start = max(key.offset, start); |
| 670 | if (recow || !modify_tree) { | 680 | if (recow || !modify_tree) { |
| 671 | modify_tree = -1; | 681 | modify_tree = -1; |
| @@ -707,14 +717,13 @@ next_slot: | |||
| 707 | extent_end - start); | 717 | extent_end - start); |
| 708 | btrfs_mark_buffer_dirty(leaf); | 718 | btrfs_mark_buffer_dirty(leaf); |
| 709 | 719 | ||
| 710 | if (disk_bytenr > 0) { | 720 | if (update_refs && disk_bytenr > 0) { |
| 711 | ret = btrfs_inc_extent_ref(trans, root, | 721 | ret = btrfs_inc_extent_ref(trans, root, |
| 712 | disk_bytenr, num_bytes, 0, | 722 | disk_bytenr, num_bytes, 0, |
| 713 | root->root_key.objectid, | 723 | root->root_key.objectid, |
| 714 | new_key.objectid, | 724 | new_key.objectid, |
| 715 | start - extent_offset, 0); | 725 | start - extent_offset, 0); |
| 716 | BUG_ON(ret); /* -ENOMEM */ | 726 | BUG_ON(ret); /* -ENOMEM */ |
| 717 | *hint_byte = disk_bytenr; | ||
| 718 | } | 727 | } |
| 719 | key.offset = start; | 728 | key.offset = start; |
| 720 | } | 729 | } |
| @@ -734,10 +743,8 @@ next_slot: | |||
| 734 | btrfs_set_file_extent_num_bytes(leaf, fi, | 743 | btrfs_set_file_extent_num_bytes(leaf, fi, |
| 735 | extent_end - end); | 744 | extent_end - end); |
| 736 | btrfs_mark_buffer_dirty(leaf); | 745 | btrfs_mark_buffer_dirty(leaf); |
| 737 | if (disk_bytenr > 0) { | 746 | if (update_refs && disk_bytenr > 0) |
| 738 | inode_sub_bytes(inode, end - key.offset); | 747 | inode_sub_bytes(inode, end - key.offset); |
| 739 | *hint_byte = disk_bytenr; | ||
| 740 | } | ||
| 741 | break; | 748 | break; |
| 742 | } | 749 | } |
| 743 | 750 | ||
| @@ -753,10 +760,8 @@ next_slot: | |||
| 753 | btrfs_set_file_extent_num_bytes(leaf, fi, | 760 | btrfs_set_file_extent_num_bytes(leaf, fi, |
| 754 | start - key.offset); | 761 | start - key.offset); |
| 755 | btrfs_mark_buffer_dirty(leaf); | 762 | btrfs_mark_buffer_dirty(leaf); |
| 756 | if (disk_bytenr > 0) { | 763 | if (update_refs && disk_bytenr > 0) |
| 757 | inode_sub_bytes(inode, extent_end - start); | 764 | inode_sub_bytes(inode, extent_end - start); |
| 758 | *hint_byte = disk_bytenr; | ||
| 759 | } | ||
| 760 | if (end == extent_end) | 765 | if (end == extent_end) |
| 761 | break; | 766 | break; |
| 762 | 767 | ||
| @@ -777,12 +782,13 @@ next_slot: | |||
| 777 | del_nr++; | 782 | del_nr++; |
| 778 | } | 783 | } |
| 779 | 784 | ||
| 780 | if (extent_type == BTRFS_FILE_EXTENT_INLINE) { | 785 | if (update_refs && |
| 786 | extent_type == BTRFS_FILE_EXTENT_INLINE) { | ||
| 781 | inode_sub_bytes(inode, | 787 | inode_sub_bytes(inode, |
| 782 | extent_end - key.offset); | 788 | extent_end - key.offset); |
| 783 | extent_end = ALIGN(extent_end, | 789 | extent_end = ALIGN(extent_end, |
| 784 | root->sectorsize); | 790 | root->sectorsize); |
| 785 | } else if (disk_bytenr > 0) { | 791 | } else if (update_refs && disk_bytenr > 0) { |
| 786 | ret = btrfs_free_extent(trans, root, | 792 | ret = btrfs_free_extent(trans, root, |
| 787 | disk_bytenr, num_bytes, 0, | 793 | disk_bytenr, num_bytes, 0, |
| 788 | root->root_key.objectid, | 794 | root->root_key.objectid, |
| @@ -791,7 +797,6 @@ next_slot: | |||
| 791 | BUG_ON(ret); /* -ENOMEM */ | 797 | BUG_ON(ret); /* -ENOMEM */ |
| 792 | inode_sub_bytes(inode, | 798 | inode_sub_bytes(inode, |
| 793 | extent_end - key.offset); | 799 | extent_end - key.offset); |
| 794 | *hint_byte = disk_bytenr; | ||
| 795 | } | 800 | } |
| 796 | 801 | ||
| 797 | if (end == extent_end) | 802 | if (end == extent_end) |
| @@ -806,7 +811,7 @@ next_slot: | |||
| 806 | del_nr); | 811 | del_nr); |
| 807 | if (ret) { | 812 | if (ret) { |
| 808 | btrfs_abort_transaction(trans, root, ret); | 813 | btrfs_abort_transaction(trans, root, ret); |
| 809 | goto out; | 814 | break; |
| 810 | } | 815 | } |
| 811 | 816 | ||
| 812 | del_nr = 0; | 817 | del_nr = 0; |
| @@ -825,7 +830,24 @@ next_slot: | |||
| 825 | btrfs_abort_transaction(trans, root, ret); | 830 | btrfs_abort_transaction(trans, root, ret); |
| 826 | } | 831 | } |
| 827 | 832 | ||
| 828 | out: | 833 | if (drop_end) |
| 834 | *drop_end = found ? min(end, extent_end) : end; | ||
| 835 | btrfs_release_path(path); | ||
| 836 | return ret; | ||
| 837 | } | ||
| 838 | |||
| 839 | int btrfs_drop_extents(struct btrfs_trans_handle *trans, | ||
| 840 | struct btrfs_root *root, struct inode *inode, u64 start, | ||
| 841 | u64 end, int drop_cache) | ||
| 842 | { | ||
| 843 | struct btrfs_path *path; | ||
| 844 | int ret; | ||
| 845 | |||
| 846 | path = btrfs_alloc_path(); | ||
| 847 | if (!path) | ||
| 848 | return -ENOMEM; | ||
| 849 | ret = __btrfs_drop_extents(trans, root, inode, path, start, end, NULL, | ||
| 850 | drop_cache); | ||
| 829 | btrfs_free_path(path); | 851 | btrfs_free_path(path); |
| 830 | return ret; | 852 | return ret; |
| 831 | } | 853 | } |
| @@ -892,8 +914,6 @@ int btrfs_mark_extent_written(struct btrfs_trans_handle *trans, | |||
| 892 | int ret; | 914 | int ret; |
| 893 | u64 ino = btrfs_ino(inode); | 915 | u64 ino = btrfs_ino(inode); |
| 894 | 916 | ||
| 895 | btrfs_drop_extent_cache(inode, start, end - 1, 0); | ||
| 896 | |||
| 897 | path = btrfs_alloc_path(); | 917 | path = btrfs_alloc_path(); |
| 898 | if (!path) | 918 | if (!path) |
| 899 | return -ENOMEM; | 919 | return -ENOMEM; |
| @@ -935,12 +955,16 @@ again: | |||
| 935 | btrfs_set_item_key_safe(trans, root, path, &new_key); | 955 | btrfs_set_item_key_safe(trans, root, path, &new_key); |
| 936 | fi = btrfs_item_ptr(leaf, path->slots[0], | 956 | fi = btrfs_item_ptr(leaf, path->slots[0], |
| 937 | struct btrfs_file_extent_item); | 957 | struct btrfs_file_extent_item); |
| 958 | btrfs_set_file_extent_generation(leaf, fi, | ||
| 959 | trans->transid); | ||
| 938 | btrfs_set_file_extent_num_bytes(leaf, fi, | 960 | btrfs_set_file_extent_num_bytes(leaf, fi, |
| 939 | extent_end - end); | 961 | extent_end - end); |
| 940 | btrfs_set_file_extent_offset(leaf, fi, | 962 | btrfs_set_file_extent_offset(leaf, fi, |
| 941 | end - orig_offset); | 963 | end - orig_offset); |
| 942 | fi = btrfs_item_ptr(leaf, path->slots[0] - 1, | 964 | fi = btrfs_item_ptr(leaf, path->slots[0] - 1, |
| 943 | struct btrfs_file_extent_item); | 965 | struct btrfs_file_extent_item); |
| 966 | btrfs_set_file_extent_generation(leaf, fi, | ||
| 967 | trans->transid); | ||
| 944 | btrfs_set_file_extent_num_bytes(leaf, fi, | 968 | btrfs_set_file_extent_num_bytes(leaf, fi, |
| 945 | end - other_start); | 969 | end - other_start); |
| 946 | btrfs_mark_buffer_dirty(leaf); | 970 | btrfs_mark_buffer_dirty(leaf); |
| @@ -958,12 +982,16 @@ again: | |||
| 958 | struct btrfs_file_extent_item); | 982 | struct btrfs_file_extent_item); |
| 959 | btrfs_set_file_extent_num_bytes(leaf, fi, | 983 | btrfs_set_file_extent_num_bytes(leaf, fi, |
| 960 | start - key.offset); | 984 | start - key.offset); |
| 985 | btrfs_set_file_extent_generation(leaf, fi, | ||
| 986 | trans->transid); | ||
| 961 | path->slots[0]++; | 987 | path->slots[0]++; |
| 962 | new_key.offset = start; | 988 | new_key.offset = start; |
| 963 | btrfs_set_item_key_safe(trans, root, path, &new_key); | 989 | btrfs_set_item_key_safe(trans, root, path, &new_key); |
| 964 | 990 | ||
| 965 | fi = btrfs_item_ptr(leaf, path->slots[0], | 991 | fi = btrfs_item_ptr(leaf, path->slots[0], |
| 966 | struct btrfs_file_extent_item); | 992 | struct btrfs_file_extent_item); |
| 993 | btrfs_set_file_extent_generation(leaf, fi, | ||
| 994 | trans->transid); | ||
| 967 | btrfs_set_file_extent_num_bytes(leaf, fi, | 995 | btrfs_set_file_extent_num_bytes(leaf, fi, |
| 968 | other_end - start); | 996 | other_end - start); |
| 969 | btrfs_set_file_extent_offset(leaf, fi, | 997 | btrfs_set_file_extent_offset(leaf, fi, |
| @@ -991,12 +1019,14 @@ again: | |||
| 991 | leaf = path->nodes[0]; | 1019 | leaf = path->nodes[0]; |
| 992 | fi = btrfs_item_ptr(leaf, path->slots[0] - 1, | 1020 | fi = btrfs_item_ptr(leaf, path->slots[0] - 1, |
| 993 | struct btrfs_file_extent_item); | 1021 | struct btrfs_file_extent_item); |
| 1022 | btrfs_set_file_extent_generation(leaf, fi, trans->transid); | ||
| 994 | btrfs_set_file_extent_num_bytes(leaf, fi, | 1023 | btrfs_set_file_extent_num_bytes(leaf, fi, |
| 995 | split - key.offset); | 1024 | split - key.offset); |
| 996 | 1025 | ||
| 997 | fi = btrfs_item_ptr(leaf, path->slots[0], | 1026 | fi = btrfs_item_ptr(leaf, path->slots[0], |
| 998 | struct btrfs_file_extent_item); | 1027 | struct btrfs_file_extent_item); |
| 999 | 1028 | ||
| 1029 | btrfs_set_file_extent_generation(leaf, fi, trans->transid); | ||
| 1000 | btrfs_set_file_extent_offset(leaf, fi, split - orig_offset); | 1030 | btrfs_set_file_extent_offset(leaf, fi, split - orig_offset); |
| 1001 | btrfs_set_file_extent_num_bytes(leaf, fi, | 1031 | btrfs_set_file_extent_num_bytes(leaf, fi, |
| 1002 | extent_end - split); | 1032 | extent_end - split); |
| @@ -1056,12 +1086,14 @@ again: | |||
| 1056 | struct btrfs_file_extent_item); | 1086 | struct btrfs_file_extent_item); |
| 1057 | btrfs_set_file_extent_type(leaf, fi, | 1087 | btrfs_set_file_extent_type(leaf, fi, |
| 1058 | BTRFS_FILE_EXTENT_REG); | 1088 | BTRFS_FILE_EXTENT_REG); |
| 1089 | btrfs_set_file_extent_generation(leaf, fi, trans->transid); | ||
| 1059 | btrfs_mark_buffer_dirty(leaf); | 1090 | btrfs_mark_buffer_dirty(leaf); |
| 1060 | } else { | 1091 | } else { |
| 1061 | fi = btrfs_item_ptr(leaf, del_slot - 1, | 1092 | fi = btrfs_item_ptr(leaf, del_slot - 1, |
| 1062 | struct btrfs_file_extent_item); | 1093 | struct btrfs_file_extent_item); |
| 1063 | btrfs_set_file_extent_type(leaf, fi, | 1094 | btrfs_set_file_extent_type(leaf, fi, |
| 1064 | BTRFS_FILE_EXTENT_REG); | 1095 | BTRFS_FILE_EXTENT_REG); |
| 1096 | btrfs_set_file_extent_generation(leaf, fi, trans->transid); | ||
| 1065 | btrfs_set_file_extent_num_bytes(leaf, fi, | 1097 | btrfs_set_file_extent_num_bytes(leaf, fi, |
| 1066 | extent_end - key.offset); | 1098 | extent_end - key.offset); |
| 1067 | btrfs_mark_buffer_dirty(leaf); | 1099 | btrfs_mark_buffer_dirty(leaf); |
| @@ -1173,8 +1205,8 @@ again: | |||
| 1173 | 1205 | ||
| 1174 | clear_extent_bit(&BTRFS_I(inode)->io_tree, start_pos, | 1206 | clear_extent_bit(&BTRFS_I(inode)->io_tree, start_pos, |
| 1175 | last_pos - 1, EXTENT_DIRTY | EXTENT_DELALLOC | | 1207 | last_pos - 1, EXTENT_DIRTY | EXTENT_DELALLOC | |
| 1176 | EXTENT_DO_ACCOUNTING, 0, 0, &cached_state, | 1208 | EXTENT_DO_ACCOUNTING | EXTENT_DEFRAG, |
| 1177 | GFP_NOFS); | 1209 | 0, 0, &cached_state, GFP_NOFS); |
| 1178 | unlock_extent_cached(&BTRFS_I(inode)->io_tree, | 1210 | unlock_extent_cached(&BTRFS_I(inode)->io_tree, |
| 1179 | start_pos, last_pos - 1, &cached_state, | 1211 | start_pos, last_pos - 1, &cached_state, |
| 1180 | GFP_NOFS); | 1212 | GFP_NOFS); |
| @@ -1514,16 +1546,24 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) | |||
| 1514 | 1546 | ||
| 1515 | trace_btrfs_sync_file(file, datasync); | 1547 | trace_btrfs_sync_file(file, datasync); |
| 1516 | 1548 | ||
| 1549 | /* | ||
| 1550 | * We write the dirty pages in the range and wait until they complete | ||
| 1551 | * out of the ->i_mutex. If so, we can flush the dirty pages by | ||
| 1552 | * multi-task, and make the performance up. | ||
| 1553 | */ | ||
| 1554 | ret = filemap_write_and_wait_range(inode->i_mapping, start, end); | ||
| 1555 | if (ret) | ||
| 1556 | return ret; | ||
| 1557 | |||
| 1517 | mutex_lock(&inode->i_mutex); | 1558 | mutex_lock(&inode->i_mutex); |
| 1518 | 1559 | ||
| 1519 | /* | 1560 | /* |
| 1520 | * we wait first, since the writeback may change the inode, also wait | 1561 | * We flush the dirty pages again to avoid some dirty pages in the |
| 1521 | * ordered range does a filemape_write_and_wait_range which is why we | 1562 | * range being left. |
| 1522 | * don't do it above like other file systems. | ||
| 1523 | */ | 1563 | */ |
| 1524 | root->log_batch++; | 1564 | atomic_inc(&root->log_batch); |
| 1525 | btrfs_wait_ordered_range(inode, start, end); | 1565 | btrfs_wait_ordered_range(inode, start, end); |
| 1526 | root->log_batch++; | 1566 | atomic_inc(&root->log_batch); |
| 1527 | 1567 | ||
| 1528 | /* | 1568 | /* |
| 1529 | * check the transaction that last modified this inode | 1569 | * check the transaction that last modified this inode |
| @@ -1544,6 +1584,14 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) | |||
| 1544 | BTRFS_I(inode)->last_trans <= | 1584 | BTRFS_I(inode)->last_trans <= |
| 1545 | root->fs_info->last_trans_committed) { | 1585 | root->fs_info->last_trans_committed) { |
| 1546 | BTRFS_I(inode)->last_trans = 0; | 1586 | BTRFS_I(inode)->last_trans = 0; |
| 1587 | |||
| 1588 | /* | ||
| 1589 | * We'v had everything committed since the last time we were | ||
| 1590 | * modified so clear this flag in case it was set for whatever | ||
| 1591 | * reason, it's no longer relevant. | ||
| 1592 | */ | ||
| 1593 | clear_bit(BTRFS_INODE_NEEDS_FULL_SYNC, | ||
| 1594 | &BTRFS_I(inode)->runtime_flags); | ||
| 1547 | mutex_unlock(&inode->i_mutex); | 1595 | mutex_unlock(&inode->i_mutex); |
| 1548 | goto out; | 1596 | goto out; |
| 1549 | } | 1597 | } |
| @@ -1599,6 +1647,7 @@ out: | |||
| 1599 | static const struct vm_operations_struct btrfs_file_vm_ops = { | 1647 | static const struct vm_operations_struct btrfs_file_vm_ops = { |
| 1600 | .fault = filemap_fault, | 1648 | .fault = filemap_fault, |
| 1601 | .page_mkwrite = btrfs_page_mkwrite, | 1649 | .page_mkwrite = btrfs_page_mkwrite, |
| 1650 | .remap_pages = generic_file_remap_pages, | ||
| 1602 | }; | 1651 | }; |
| 1603 | 1652 | ||
| 1604 | static int btrfs_file_mmap(struct file *filp, struct vm_area_struct *vma) | 1653 | static int btrfs_file_mmap(struct file *filp, struct vm_area_struct *vma) |
| @@ -1610,11 +1659,328 @@ static int btrfs_file_mmap(struct file *filp, struct vm_area_struct *vma) | |||
| 1610 | 1659 | ||
| 1611 | file_accessed(filp); | 1660 | file_accessed(filp); |
| 1612 | vma->vm_ops = &btrfs_file_vm_ops; | 1661 | vma->vm_ops = &btrfs_file_vm_ops; |
| 1613 | vma->vm_flags |= VM_CAN_NONLINEAR; | ||
| 1614 | 1662 | ||
| 1615 | return 0; | 1663 | return 0; |
| 1616 | } | 1664 | } |
| 1617 | 1665 | ||
| 1666 | static int hole_mergeable(struct inode *inode, struct extent_buffer *leaf, | ||
| 1667 | int slot, u64 start, u64 end) | ||
| 1668 | { | ||
| 1669 | struct btrfs_file_extent_item *fi; | ||
| 1670 | struct btrfs_key key; | ||
| 1671 | |||
| 1672 | if (slot < 0 || slot >= btrfs_header_nritems(leaf)) | ||
| 1673 | return 0; | ||
| 1674 | |||
| 1675 | btrfs_item_key_to_cpu(leaf, &key, slot); | ||
| 1676 | if (key.objectid != btrfs_ino(inode) || | ||
| 1677 | key.type != BTRFS_EXTENT_DATA_KEY) | ||
| 1678 | return 0; | ||
| 1679 | |||
| 1680 | fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item); | ||
| 1681 | |||
| 1682 | if (btrfs_file_extent_type(leaf, fi) != BTRFS_FILE_EXTENT_REG) | ||
| 1683 | return 0; | ||
| 1684 | |||
| 1685 | if (btrfs_file_extent_disk_bytenr(leaf, fi)) | ||
| 1686 | return 0; | ||
| 1687 | |||
| 1688 | if (key.offset == end) | ||
| 1689 | return 1; | ||
| 1690 | if (key.offset + btrfs_file_extent_num_bytes(leaf, fi) == start) | ||
| 1691 | return 1; | ||
| 1692 | return 0; | ||
| 1693 | } | ||
| 1694 | |||
| 1695 | static int fill_holes(struct btrfs_trans_handle *trans, struct inode *inode, | ||
| 1696 | struct btrfs_path *path, u64 offset, u64 end) | ||
| 1697 | { | ||
| 1698 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
| 1699 | struct extent_buffer *leaf; | ||
| 1700 | struct btrfs_file_extent_item *fi; | ||
| 1701 | struct extent_map *hole_em; | ||
| 1702 | struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; | ||
| 1703 | struct btrfs_key key; | ||
| 1704 | int ret; | ||
| 1705 | |||
| 1706 | key.objectid = btrfs_ino(inode); | ||
| 1707 | key.type = BTRFS_EXTENT_DATA_KEY; | ||
| 1708 | key.offset = offset; | ||
| 1709 | |||
| 1710 | |||
| 1711 | ret = btrfs_search_slot(trans, root, &key, path, 0, 1); | ||
| 1712 | if (ret < 0) | ||
| 1713 | return ret; | ||
| 1714 | BUG_ON(!ret); | ||
| 1715 | |||
| 1716 | leaf = path->nodes[0]; | ||
| 1717 | if (hole_mergeable(inode, leaf, path->slots[0]-1, offset, end)) { | ||
| 1718 | u64 num_bytes; | ||
| 1719 | |||
| 1720 | path->slots[0]--; | ||
| 1721 | fi = btrfs_item_ptr(leaf, path->slots[0], | ||
| 1722 | struct btrfs_file_extent_item); | ||
| 1723 | num_bytes = btrfs_file_extent_num_bytes(leaf, fi) + | ||
| 1724 | end - offset; | ||
| 1725 | btrfs_set_file_extent_num_bytes(leaf, fi, num_bytes); | ||
| 1726 | btrfs_set_file_extent_ram_bytes(leaf, fi, num_bytes); | ||
| 1727 | btrfs_set_file_extent_offset(leaf, fi, 0); | ||
| 1728 | btrfs_mark_buffer_dirty(leaf); | ||
| 1729 | goto out; | ||
| 1730 | } | ||
| 1731 | |||
| 1732 | if (hole_mergeable(inode, leaf, path->slots[0]+1, offset, end)) { | ||
| 1733 | u64 num_bytes; | ||
| 1734 | |||
| 1735 | path->slots[0]++; | ||
| 1736 | key.offset = offset; | ||
| 1737 | btrfs_set_item_key_safe(trans, root, path, &key); | ||
| 1738 | fi = btrfs_item_ptr(leaf, path->slots[0], | ||
| 1739 | struct btrfs_file_extent_item); | ||
| 1740 | num_bytes = btrfs_file_extent_num_bytes(leaf, fi) + end - | ||
| 1741 | offset; | ||
| 1742 | btrfs_set_file_extent_num_bytes(leaf, fi, num_bytes); | ||
| 1743 | btrfs_set_file_extent_ram_bytes(leaf, fi, num_bytes); | ||
| 1744 | btrfs_set_file_extent_offset(leaf, fi, 0); | ||
| 1745 | btrfs_mark_buffer_dirty(leaf); | ||
| 1746 | goto out; | ||
| 1747 | } | ||
| 1748 | btrfs_release_path(path); | ||
| 1749 | |||
| 1750 | ret = btrfs_insert_file_extent(trans, root, btrfs_ino(inode), offset, | ||
| 1751 | 0, 0, end - offset, 0, end - offset, | ||
| 1752 | 0, 0, 0); | ||
| 1753 | if (ret) | ||
| 1754 | return ret; | ||
| 1755 | |||
| 1756 | out: | ||
| 1757 | btrfs_release_path(path); | ||
| 1758 | |||
| 1759 | hole_em = alloc_extent_map(); | ||
| 1760 | if (!hole_em) { | ||
| 1761 | btrfs_drop_extent_cache(inode, offset, end - 1, 0); | ||
| 1762 | set_bit(BTRFS_INODE_NEEDS_FULL_SYNC, | ||
| 1763 | &BTRFS_I(inode)->runtime_flags); | ||
| 1764 | } else { | ||
| 1765 | hole_em->start = offset; | ||
| 1766 | hole_em->len = end - offset; | ||
| 1767 | hole_em->orig_start = offset; | ||
| 1768 | |||
| 1769 | hole_em->block_start = EXTENT_MAP_HOLE; | ||
| 1770 | hole_em->block_len = 0; | ||
| 1771 | hole_em->bdev = root->fs_info->fs_devices->latest_bdev; | ||
| 1772 | hole_em->compress_type = BTRFS_COMPRESS_NONE; | ||
| 1773 | hole_em->generation = trans->transid; | ||
| 1774 | |||
| 1775 | do { | ||
| 1776 | btrfs_drop_extent_cache(inode, offset, end - 1, 0); | ||
| 1777 | write_lock(&em_tree->lock); | ||
| 1778 | ret = add_extent_mapping(em_tree, hole_em); | ||
| 1779 | if (!ret) | ||
| 1780 | list_move(&hole_em->list, | ||
| 1781 | &em_tree->modified_extents); | ||
| 1782 | write_unlock(&em_tree->lock); | ||
| 1783 | } while (ret == -EEXIST); | ||
| 1784 | free_extent_map(hole_em); | ||
| 1785 | if (ret) | ||
| 1786 | set_bit(BTRFS_INODE_NEEDS_FULL_SYNC, | ||
| 1787 | &BTRFS_I(inode)->runtime_flags); | ||
| 1788 | } | ||
| 1789 | |||
| 1790 | return 0; | ||
| 1791 | } | ||
| 1792 | |||
| 1793 | static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len) | ||
| 1794 | { | ||
| 1795 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
| 1796 | struct extent_state *cached_state = NULL; | ||
| 1797 | struct btrfs_path *path; | ||
| 1798 | struct btrfs_block_rsv *rsv; | ||
| 1799 | struct btrfs_trans_handle *trans; | ||
| 1800 | u64 mask = BTRFS_I(inode)->root->sectorsize - 1; | ||
| 1801 | u64 lockstart = (offset + mask) & ~mask; | ||
| 1802 | u64 lockend = ((offset + len) & ~mask) - 1; | ||
| 1803 | u64 cur_offset = lockstart; | ||
| 1804 | u64 min_size = btrfs_calc_trunc_metadata_size(root, 1); | ||
| 1805 | u64 drop_end; | ||
| 1806 | unsigned long nr; | ||
| 1807 | int ret = 0; | ||
| 1808 | int err = 0; | ||
| 1809 | bool same_page = (offset >> PAGE_CACHE_SHIFT) == | ||
| 1810 | ((offset + len) >> PAGE_CACHE_SHIFT); | ||
| 1811 | |||
| 1812 | btrfs_wait_ordered_range(inode, offset, len); | ||
| 1813 | |||
| 1814 | mutex_lock(&inode->i_mutex); | ||
| 1815 | if (offset >= inode->i_size) { | ||
| 1816 | mutex_unlock(&inode->i_mutex); | ||
| 1817 | return 0; | ||
| 1818 | } | ||
| 1819 | |||
| 1820 | /* | ||
| 1821 | * Only do this if we are in the same page and we aren't doing the | ||
| 1822 | * entire page. | ||
| 1823 | */ | ||
| 1824 | if (same_page && len < PAGE_CACHE_SIZE) { | ||
| 1825 | ret = btrfs_truncate_page(inode, offset, len, 0); | ||
| 1826 | mutex_unlock(&inode->i_mutex); | ||
| 1827 | return ret; | ||
| 1828 | } | ||
| 1829 | |||
| 1830 | /* zero back part of the first page */ | ||
| 1831 | ret = btrfs_truncate_page(inode, offset, 0, 0); | ||
| 1832 | if (ret) { | ||
| 1833 | mutex_unlock(&inode->i_mutex); | ||
| 1834 | return ret; | ||
| 1835 | } | ||
| 1836 | |||
| 1837 | /* zero the front end of the last page */ | ||
| 1838 | ret = btrfs_truncate_page(inode, offset + len, 0, 1); | ||
| 1839 | if (ret) { | ||
| 1840 | mutex_unlock(&inode->i_mutex); | ||
| 1841 | return ret; | ||
| 1842 | } | ||
| 1843 | |||
| 1844 | if (lockend < lockstart) { | ||
| 1845 | mutex_unlock(&inode->i_mutex); | ||
| 1846 | return 0; | ||
| 1847 | } | ||
| 1848 | |||
| 1849 | while (1) { | ||
| 1850 | struct btrfs_ordered_extent *ordered; | ||
| 1851 | |||
| 1852 | truncate_pagecache_range(inode, lockstart, lockend); | ||
| 1853 | |||
| 1854 | lock_extent_bits(&BTRFS_I(inode)->io_tree, lockstart, lockend, | ||
| 1855 | 0, &cached_state); | ||
| 1856 | ordered = btrfs_lookup_first_ordered_extent(inode, lockend); | ||
| 1857 | |||
| 1858 | /* | ||
| 1859 | * We need to make sure we have no ordered extents in this range | ||
| 1860 | * and nobody raced in and read a page in this range, if we did | ||
| 1861 | * we need to try again. | ||
| 1862 | */ | ||
| 1863 | if ((!ordered || | ||
| 1864 | (ordered->file_offset + ordered->len < lockstart || | ||
| 1865 | ordered->file_offset > lockend)) && | ||
| 1866 | !test_range_bit(&BTRFS_I(inode)->io_tree, lockstart, | ||
| 1867 | lockend, EXTENT_UPTODATE, 0, | ||
| 1868 | cached_state)) { | ||
| 1869 | if (ordered) | ||
| 1870 | btrfs_put_ordered_extent(ordered); | ||
| 1871 | break; | ||
| 1872 | } | ||
| 1873 | if (ordered) | ||
| 1874 | btrfs_put_ordered_extent(ordered); | ||
| 1875 | unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart, | ||
| 1876 | lockend, &cached_state, GFP_NOFS); | ||
| 1877 | btrfs_wait_ordered_range(inode, lockstart, | ||
| 1878 | lockend - lockstart + 1); | ||
| 1879 | } | ||
| 1880 | |||
| 1881 | path = btrfs_alloc_path(); | ||
| 1882 | if (!path) { | ||
| 1883 | ret = -ENOMEM; | ||
| 1884 | goto out; | ||
| 1885 | } | ||
| 1886 | |||
| 1887 | rsv = btrfs_alloc_block_rsv(root, BTRFS_BLOCK_RSV_TEMP); | ||
| 1888 | if (!rsv) { | ||
| 1889 | ret = -ENOMEM; | ||
| 1890 | goto out_free; | ||
| 1891 | } | ||
| 1892 | rsv->size = btrfs_calc_trunc_metadata_size(root, 1); | ||
| 1893 | rsv->failfast = 1; | ||
| 1894 | |||
| 1895 | /* | ||
| 1896 | * 1 - update the inode | ||
| 1897 | * 1 - removing the extents in the range | ||
| 1898 | * 1 - adding the hole extent | ||
| 1899 | */ | ||
| 1900 | trans = btrfs_start_transaction(root, 3); | ||
| 1901 | if (IS_ERR(trans)) { | ||
| 1902 | err = PTR_ERR(trans); | ||
| 1903 | goto out_free; | ||
| 1904 | } | ||
| 1905 | |||
| 1906 | ret = btrfs_block_rsv_migrate(&root->fs_info->trans_block_rsv, rsv, | ||
| 1907 | min_size); | ||
| 1908 | BUG_ON(ret); | ||
| 1909 | trans->block_rsv = rsv; | ||
| 1910 | |||
| 1911 | while (cur_offset < lockend) { | ||
| 1912 | ret = __btrfs_drop_extents(trans, root, inode, path, | ||
| 1913 | cur_offset, lockend + 1, | ||
| 1914 | &drop_end, 1); | ||
| 1915 | if (ret != -ENOSPC) | ||
| 1916 | break; | ||
| 1917 | |||
| 1918 | trans->block_rsv = &root->fs_info->trans_block_rsv; | ||
| 1919 | |||
| 1920 | ret = fill_holes(trans, inode, path, cur_offset, drop_end); | ||
| 1921 | if (ret) { | ||
| 1922 | err = ret; | ||
| 1923 | break; | ||
| 1924 | } | ||
| 1925 | |||
| 1926 | cur_offset = drop_end; | ||
| 1927 | |||
| 1928 | ret = btrfs_update_inode(trans, root, inode); | ||
| 1929 | if (ret) { | ||
| 1930 | err = ret; | ||
| 1931 | break; | ||
| 1932 | } | ||
| 1933 | |||
| 1934 | nr = trans->blocks_used; | ||
| 1935 | btrfs_end_transaction(trans, root); | ||
| 1936 | btrfs_btree_balance_dirty(root, nr); | ||
| 1937 | |||
| 1938 | trans = btrfs_start_transaction(root, 3); | ||
| 1939 | if (IS_ERR(trans)) { | ||
| 1940 | ret = PTR_ERR(trans); | ||
| 1941 | trans = NULL; | ||
| 1942 | break; | ||
| 1943 | } | ||
| 1944 | |||
| 1945 | ret = btrfs_block_rsv_migrate(&root->fs_info->trans_block_rsv, | ||
| 1946 | rsv, min_size); | ||
| 1947 | BUG_ON(ret); /* shouldn't happen */ | ||
| 1948 | trans->block_rsv = rsv; | ||
| 1949 | } | ||
| 1950 | |||
| 1951 | if (ret) { | ||
| 1952 | err = ret; | ||
| 1953 | goto out_trans; | ||
| 1954 | } | ||
| 1955 | |||
| 1956 | trans->block_rsv = &root->fs_info->trans_block_rsv; | ||
| 1957 | ret = fill_holes(trans, inode, path, cur_offset, drop_end); | ||
| 1958 | if (ret) { | ||
| 1959 | err = ret; | ||
| 1960 | goto out_trans; | ||
| 1961 | } | ||
| 1962 | |||
| 1963 | out_trans: | ||
| 1964 | if (!trans) | ||
| 1965 | goto out_free; | ||
| 1966 | |||
| 1967 | trans->block_rsv = &root->fs_info->trans_block_rsv; | ||
| 1968 | ret = btrfs_update_inode(trans, root, inode); | ||
| 1969 | nr = trans->blocks_used; | ||
| 1970 | btrfs_end_transaction(trans, root); | ||
| 1971 | btrfs_btree_balance_dirty(root, nr); | ||
| 1972 | out_free: | ||
| 1973 | btrfs_free_path(path); | ||
| 1974 | btrfs_free_block_rsv(root, rsv); | ||
| 1975 | out: | ||
| 1976 | unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart, lockend, | ||
| 1977 | &cached_state, GFP_NOFS); | ||
| 1978 | mutex_unlock(&inode->i_mutex); | ||
| 1979 | if (ret && !err) | ||
| 1980 | err = ret; | ||
| 1981 | return err; | ||
| 1982 | } | ||
| 1983 | |||
| 1618 | static long btrfs_fallocate(struct file *file, int mode, | 1984 | static long btrfs_fallocate(struct file *file, int mode, |
| 1619 | loff_t offset, loff_t len) | 1985 | loff_t offset, loff_t len) |
| 1620 | { | 1986 | { |
| @@ -1633,15 +1999,18 @@ static long btrfs_fallocate(struct file *file, int mode, | |||
| 1633 | alloc_start = offset & ~mask; | 1999 | alloc_start = offset & ~mask; |
| 1634 | alloc_end = (offset + len + mask) & ~mask; | 2000 | alloc_end = (offset + len + mask) & ~mask; |
| 1635 | 2001 | ||
| 1636 | /* We only support the FALLOC_FL_KEEP_SIZE mode */ | 2002 | /* Make sure we aren't being give some crap mode */ |
| 1637 | if (mode & ~FALLOC_FL_KEEP_SIZE) | 2003 | if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE)) |
| 1638 | return -EOPNOTSUPP; | 2004 | return -EOPNOTSUPP; |
| 1639 | 2005 | ||
| 2006 | if (mode & FALLOC_FL_PUNCH_HOLE) | ||
| 2007 | return btrfs_punch_hole(inode, offset, len); | ||
| 2008 | |||
| 1640 | /* | 2009 | /* |
| 1641 | * Make sure we have enough space before we do the | 2010 | * Make sure we have enough space before we do the |
| 1642 | * allocation. | 2011 | * allocation. |
| 1643 | */ | 2012 | */ |
| 1644 | ret = btrfs_check_data_free_space(inode, len); | 2013 | ret = btrfs_check_data_free_space(inode, alloc_end - alloc_start + 1); |
| 1645 | if (ret) | 2014 | if (ret) |
| 1646 | return ret; | 2015 | return ret; |
| 1647 | 2016 | ||
| @@ -1748,7 +2117,7 @@ static long btrfs_fallocate(struct file *file, int mode, | |||
| 1748 | out: | 2117 | out: |
| 1749 | mutex_unlock(&inode->i_mutex); | 2118 | mutex_unlock(&inode->i_mutex); |
| 1750 | /* Let go of our reservation. */ | 2119 | /* Let go of our reservation. */ |
| 1751 | btrfs_free_reserved_data_space(inode, len); | 2120 | btrfs_free_reserved_data_space(inode, alloc_end - alloc_start + 1); |
| 1752 | return ret; | 2121 | return ret; |
| 1753 | } | 2122 | } |
| 1754 | 2123 | ||
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c index 6b10acfc2f5c..1027b854b90c 100644 --- a/fs/btrfs/free-space-cache.c +++ b/fs/btrfs/free-space-cache.c | |||
| @@ -966,7 +966,7 @@ int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode, | |||
| 966 | block_group->key.offset)) { | 966 | block_group->key.offset)) { |
| 967 | ret = find_first_extent_bit(unpin, start, | 967 | ret = find_first_extent_bit(unpin, start, |
| 968 | &extent_start, &extent_end, | 968 | &extent_start, &extent_end, |
| 969 | EXTENT_DIRTY); | 969 | EXTENT_DIRTY, NULL); |
| 970 | if (ret) { | 970 | if (ret) { |
| 971 | ret = 0; | 971 | ret = 0; |
| 972 | break; | 972 | break; |
| @@ -1454,9 +1454,7 @@ static int search_bitmap(struct btrfs_free_space_ctl *ctl, | |||
| 1454 | max_t(u64, *offset, bitmap_info->offset)); | 1454 | max_t(u64, *offset, bitmap_info->offset)); |
| 1455 | bits = bytes_to_bits(*bytes, ctl->unit); | 1455 | bits = bytes_to_bits(*bytes, ctl->unit); |
| 1456 | 1456 | ||
| 1457 | for (i = find_next_bit(bitmap_info->bitmap, BITS_PER_BITMAP, i); | 1457 | for_each_set_bit_from(i, bitmap_info->bitmap, BITS_PER_BITMAP) { |
| 1458 | i < BITS_PER_BITMAP; | ||
| 1459 | i = find_next_bit(bitmap_info->bitmap, BITS_PER_BITMAP, i + 1)) { | ||
| 1460 | next_zero = find_next_zero_bit(bitmap_info->bitmap, | 1458 | next_zero = find_next_zero_bit(bitmap_info->bitmap, |
| 1461 | BITS_PER_BITMAP, i); | 1459 | BITS_PER_BITMAP, i); |
| 1462 | if ((next_zero - i) >= bits) { | 1460 | if ((next_zero - i) >= bits) { |
| @@ -2307,9 +2305,7 @@ static int btrfs_bitmap_cluster(struct btrfs_block_group_cache *block_group, | |||
| 2307 | 2305 | ||
| 2308 | again: | 2306 | again: |
| 2309 | found_bits = 0; | 2307 | found_bits = 0; |
| 2310 | for (i = find_next_bit(entry->bitmap, BITS_PER_BITMAP, i); | 2308 | for_each_set_bit_from(i, entry->bitmap, BITS_PER_BITMAP) { |
| 2311 | i < BITS_PER_BITMAP; | ||
| 2312 | i = find_next_bit(entry->bitmap, BITS_PER_BITMAP, i + 1)) { | ||
| 2313 | next_zero = find_next_zero_bit(entry->bitmap, | 2309 | next_zero = find_next_zero_bit(entry->bitmap, |
| 2314 | BITS_PER_BITMAP, i); | 2310 | BITS_PER_BITMAP, i); |
| 2315 | if (next_zero - i >= min_bits) { | 2311 | if (next_zero - i >= min_bits) { |
diff --git a/fs/btrfs/hash.h b/fs/btrfs/hash.h index db2ff9773b99..1d982812ab67 100644 --- a/fs/btrfs/hash.h +++ b/fs/btrfs/hash.h | |||
| @@ -24,4 +24,14 @@ static inline u64 btrfs_name_hash(const char *name, int len) | |||
| 24 | { | 24 | { |
| 25 | return crc32c((u32)~1, name, len); | 25 | return crc32c((u32)~1, name, len); |
| 26 | } | 26 | } |
| 27 | |||
| 28 | /* | ||
| 29 | * Figure the key offset of an extended inode ref | ||
| 30 | */ | ||
| 31 | static inline u64 btrfs_extref_hash(u64 parent_objectid, const char *name, | ||
| 32 | int len) | ||
| 33 | { | ||
| 34 | return (u64) crc32c(parent_objectid, name, len); | ||
| 35 | } | ||
| 36 | |||
| 27 | #endif | 37 | #endif |
diff --git a/fs/btrfs/inode-item.c b/fs/btrfs/inode-item.c index a13cf1a96c73..48b8fda93132 100644 --- a/fs/btrfs/inode-item.c +++ b/fs/btrfs/inode-item.c | |||
| @@ -18,6 +18,7 @@ | |||
| 18 | 18 | ||
| 19 | #include "ctree.h" | 19 | #include "ctree.h" |
| 20 | #include "disk-io.h" | 20 | #include "disk-io.h" |
| 21 | #include "hash.h" | ||
| 21 | #include "transaction.h" | 22 | #include "transaction.h" |
| 22 | #include "print-tree.h" | 23 | #include "print-tree.h" |
| 23 | 24 | ||
| @@ -50,18 +51,57 @@ static int find_name_in_backref(struct btrfs_path *path, const char *name, | |||
| 50 | return 0; | 51 | return 0; |
| 51 | } | 52 | } |
| 52 | 53 | ||
| 53 | struct btrfs_inode_ref * | 54 | int btrfs_find_name_in_ext_backref(struct btrfs_path *path, u64 ref_objectid, |
| 55 | const char *name, int name_len, | ||
| 56 | struct btrfs_inode_extref **extref_ret) | ||
| 57 | { | ||
| 58 | struct extent_buffer *leaf; | ||
| 59 | struct btrfs_inode_extref *extref; | ||
| 60 | unsigned long ptr; | ||
| 61 | unsigned long name_ptr; | ||
| 62 | u32 item_size; | ||
| 63 | u32 cur_offset = 0; | ||
| 64 | int ref_name_len; | ||
| 65 | |||
| 66 | leaf = path->nodes[0]; | ||
| 67 | item_size = btrfs_item_size_nr(leaf, path->slots[0]); | ||
| 68 | ptr = btrfs_item_ptr_offset(leaf, path->slots[0]); | ||
| 69 | |||
| 70 | /* | ||
| 71 | * Search all extended backrefs in this item. We're only | ||
| 72 | * looking through any collisions so most of the time this is | ||
| 73 | * just going to compare against one buffer. If all is well, | ||
| 74 | * we'll return success and the inode ref object. | ||
| 75 | */ | ||
| 76 | while (cur_offset < item_size) { | ||
| 77 | extref = (struct btrfs_inode_extref *) (ptr + cur_offset); | ||
| 78 | name_ptr = (unsigned long)(&extref->name); | ||
| 79 | ref_name_len = btrfs_inode_extref_name_len(leaf, extref); | ||
| 80 | |||
| 81 | if (ref_name_len == name_len && | ||
| 82 | btrfs_inode_extref_parent(leaf, extref) == ref_objectid && | ||
| 83 | (memcmp_extent_buffer(leaf, name, name_ptr, name_len) == 0)) { | ||
| 84 | if (extref_ret) | ||
| 85 | *extref_ret = extref; | ||
| 86 | return 1; | ||
| 87 | } | ||
| 88 | |||
| 89 | cur_offset += ref_name_len + sizeof(*extref); | ||
| 90 | } | ||
| 91 | return 0; | ||
| 92 | } | ||
| 93 | |||
| 94 | static struct btrfs_inode_ref * | ||
| 54 | btrfs_lookup_inode_ref(struct btrfs_trans_handle *trans, | 95 | btrfs_lookup_inode_ref(struct btrfs_trans_handle *trans, |
| 55 | struct btrfs_root *root, | 96 | struct btrfs_root *root, |
| 56 | struct btrfs_path *path, | 97 | struct btrfs_path *path, |
| 57 | const char *name, int name_len, | 98 | const char *name, int name_len, |
| 58 | u64 inode_objectid, u64 ref_objectid, int mod) | 99 | u64 inode_objectid, u64 ref_objectid, int ins_len, |
| 100 | int cow) | ||
| 59 | { | 101 | { |
| 102 | int ret; | ||
| 60 | struct btrfs_key key; | 103 | struct btrfs_key key; |
| 61 | struct btrfs_inode_ref *ref; | 104 | struct btrfs_inode_ref *ref; |
| 62 | int ins_len = mod < 0 ? -1 : 0; | ||
| 63 | int cow = mod != 0; | ||
| 64 | int ret; | ||
| 65 | 105 | ||
| 66 | key.objectid = inode_objectid; | 106 | key.objectid = inode_objectid; |
| 67 | key.type = BTRFS_INODE_REF_KEY; | 107 | key.type = BTRFS_INODE_REF_KEY; |
| @@ -77,13 +117,150 @@ btrfs_lookup_inode_ref(struct btrfs_trans_handle *trans, | |||
| 77 | return ref; | 117 | return ref; |
| 78 | } | 118 | } |
| 79 | 119 | ||
| 80 | int btrfs_del_inode_ref(struct btrfs_trans_handle *trans, | 120 | /* Returns NULL if no extref found */ |
| 121 | struct btrfs_inode_extref * | ||
| 122 | btrfs_lookup_inode_extref(struct btrfs_trans_handle *trans, | ||
| 123 | struct btrfs_root *root, | ||
| 124 | struct btrfs_path *path, | ||
| 125 | const char *name, int name_len, | ||
| 126 | u64 inode_objectid, u64 ref_objectid, int ins_len, | ||
| 127 | int cow) | ||
| 128 | { | ||
| 129 | int ret; | ||
| 130 | struct btrfs_key key; | ||
| 131 | struct btrfs_inode_extref *extref; | ||
| 132 | |||
| 133 | key.objectid = inode_objectid; | ||
| 134 | key.type = BTRFS_INODE_EXTREF_KEY; | ||
| 135 | key.offset = btrfs_extref_hash(ref_objectid, name, name_len); | ||
| 136 | |||
| 137 | ret = btrfs_search_slot(trans, root, &key, path, ins_len, cow); | ||
| 138 | if (ret < 0) | ||
| 139 | return ERR_PTR(ret); | ||
| 140 | if (ret > 0) | ||
| 141 | return NULL; | ||
| 142 | if (!btrfs_find_name_in_ext_backref(path, ref_objectid, name, name_len, &extref)) | ||
| 143 | return NULL; | ||
| 144 | return extref; | ||
| 145 | } | ||
| 146 | |||
| 147 | int btrfs_get_inode_ref_index(struct btrfs_trans_handle *trans, | ||
| 148 | struct btrfs_root *root, | ||
| 149 | struct btrfs_path *path, | ||
| 150 | const char *name, int name_len, | ||
| 151 | u64 inode_objectid, u64 ref_objectid, int mod, | ||
| 152 | u64 *ret_index) | ||
| 153 | { | ||
| 154 | struct btrfs_inode_ref *ref; | ||
| 155 | struct btrfs_inode_extref *extref; | ||
| 156 | int ins_len = mod < 0 ? -1 : 0; | ||
| 157 | int cow = mod != 0; | ||
| 158 | |||
| 159 | ref = btrfs_lookup_inode_ref(trans, root, path, name, name_len, | ||
| 160 | inode_objectid, ref_objectid, ins_len, | ||
| 161 | cow); | ||
| 162 | if (IS_ERR(ref)) | ||
| 163 | return PTR_ERR(ref); | ||
| 164 | |||
| 165 | if (ref != NULL) { | ||
| 166 | *ret_index = btrfs_inode_ref_index(path->nodes[0], ref); | ||
| 167 | return 0; | ||
| 168 | } | ||
| 169 | |||
| 170 | btrfs_release_path(path); | ||
| 171 | |||
| 172 | extref = btrfs_lookup_inode_extref(trans, root, path, name, | ||
| 173 | name_len, inode_objectid, | ||
| 174 | ref_objectid, ins_len, cow); | ||
| 175 | if (IS_ERR(extref)) | ||
| 176 | return PTR_ERR(extref); | ||
| 177 | |||
| 178 | if (extref) { | ||
| 179 | *ret_index = btrfs_inode_extref_index(path->nodes[0], extref); | ||
| 180 | return 0; | ||
| 181 | } | ||
| 182 | |||
| 183 | return -ENOENT; | ||
| 184 | } | ||
| 185 | |||
| 186 | int btrfs_del_inode_extref(struct btrfs_trans_handle *trans, | ||
| 81 | struct btrfs_root *root, | 187 | struct btrfs_root *root, |
| 82 | const char *name, int name_len, | 188 | const char *name, int name_len, |
| 83 | u64 inode_objectid, u64 ref_objectid, u64 *index) | 189 | u64 inode_objectid, u64 ref_objectid, u64 *index) |
| 84 | { | 190 | { |
| 85 | struct btrfs_path *path; | 191 | struct btrfs_path *path; |
| 86 | struct btrfs_key key; | 192 | struct btrfs_key key; |
| 193 | struct btrfs_inode_extref *extref; | ||
| 194 | struct extent_buffer *leaf; | ||
| 195 | int ret; | ||
| 196 | int del_len = name_len + sizeof(*extref); | ||
| 197 | unsigned long ptr; | ||
| 198 | unsigned long item_start; | ||
| 199 | u32 item_size; | ||
| 200 | |||
| 201 | key.objectid = inode_objectid; | ||
| 202 | btrfs_set_key_type(&key, BTRFS_INODE_EXTREF_KEY); | ||
| 203 | key.offset = btrfs_extref_hash(ref_objectid, name, name_len); | ||
| 204 | |||
| 205 | path = btrfs_alloc_path(); | ||
| 206 | if (!path) | ||
| 207 | return -ENOMEM; | ||
| 208 | |||
| 209 | path->leave_spinning = 1; | ||
| 210 | |||
| 211 | ret = btrfs_search_slot(trans, root, &key, path, -1, 1); | ||
| 212 | if (ret > 0) | ||
| 213 | ret = -ENOENT; | ||
| 214 | if (ret < 0) | ||
| 215 | goto out; | ||
| 216 | |||
| 217 | /* | ||
| 218 | * Sanity check - did we find the right item for this name? | ||
| 219 | * This should always succeed so error here will make the FS | ||
| 220 | * readonly. | ||
| 221 | */ | ||
| 222 | if (!btrfs_find_name_in_ext_backref(path, ref_objectid, | ||
| 223 | name, name_len, &extref)) { | ||
| 224 | btrfs_std_error(root->fs_info, -ENOENT); | ||
| 225 | ret = -EROFS; | ||
| 226 | goto out; | ||
| 227 | } | ||
| 228 | |||
| 229 | leaf = path->nodes[0]; | ||
| 230 | item_size = btrfs_item_size_nr(leaf, path->slots[0]); | ||
| 231 | if (index) | ||
| 232 | *index = btrfs_inode_extref_index(leaf, extref); | ||
| 233 | |||
| 234 | if (del_len == item_size) { | ||
| 235 | /* | ||
| 236 | * Common case only one ref in the item, remove the | ||
| 237 | * whole item. | ||
| 238 | */ | ||
| 239 | ret = btrfs_del_item(trans, root, path); | ||
| 240 | goto out; | ||
| 241 | } | ||
| 242 | |||
| 243 | ptr = (unsigned long)extref; | ||
| 244 | item_start = btrfs_item_ptr_offset(leaf, path->slots[0]); | ||
| 245 | |||
| 246 | memmove_extent_buffer(leaf, ptr, ptr + del_len, | ||
| 247 | item_size - (ptr + del_len - item_start)); | ||
| 248 | |||
| 249 | btrfs_truncate_item(trans, root, path, item_size - del_len, 1); | ||
| 250 | |||
| 251 | out: | ||
| 252 | btrfs_free_path(path); | ||
| 253 | |||
| 254 | return ret; | ||
| 255 | } | ||
| 256 | |||
| 257 | int btrfs_del_inode_ref(struct btrfs_trans_handle *trans, | ||
| 258 | struct btrfs_root *root, | ||
| 259 | const char *name, int name_len, | ||
| 260 | u64 inode_objectid, u64 ref_objectid, u64 *index) | ||
| 261 | { | ||
| 262 | struct btrfs_path *path; | ||
| 263 | struct btrfs_key key; | ||
| 87 | struct btrfs_inode_ref *ref; | 264 | struct btrfs_inode_ref *ref; |
| 88 | struct extent_buffer *leaf; | 265 | struct extent_buffer *leaf; |
| 89 | unsigned long ptr; | 266 | unsigned long ptr; |
| @@ -91,6 +268,7 @@ int btrfs_del_inode_ref(struct btrfs_trans_handle *trans, | |||
| 91 | u32 item_size; | 268 | u32 item_size; |
| 92 | u32 sub_item_len; | 269 | u32 sub_item_len; |
| 93 | int ret; | 270 | int ret; |
| 271 | int search_ext_refs = 0; | ||
| 94 | int del_len = name_len + sizeof(*ref); | 272 | int del_len = name_len + sizeof(*ref); |
| 95 | 273 | ||
| 96 | key.objectid = inode_objectid; | 274 | key.objectid = inode_objectid; |
| @@ -106,12 +284,14 @@ int btrfs_del_inode_ref(struct btrfs_trans_handle *trans, | |||
| 106 | ret = btrfs_search_slot(trans, root, &key, path, -1, 1); | 284 | ret = btrfs_search_slot(trans, root, &key, path, -1, 1); |
| 107 | if (ret > 0) { | 285 | if (ret > 0) { |
| 108 | ret = -ENOENT; | 286 | ret = -ENOENT; |
| 287 | search_ext_refs = 1; | ||
| 109 | goto out; | 288 | goto out; |
| 110 | } else if (ret < 0) { | 289 | } else if (ret < 0) { |
| 111 | goto out; | 290 | goto out; |
| 112 | } | 291 | } |
| 113 | if (!find_name_in_backref(path, name, name_len, &ref)) { | 292 | if (!find_name_in_backref(path, name, name_len, &ref)) { |
| 114 | ret = -ENOENT; | 293 | ret = -ENOENT; |
| 294 | search_ext_refs = 1; | ||
| 115 | goto out; | 295 | goto out; |
| 116 | } | 296 | } |
| 117 | leaf = path->nodes[0]; | 297 | leaf = path->nodes[0]; |
| @@ -129,8 +309,78 @@ int btrfs_del_inode_ref(struct btrfs_trans_handle *trans, | |||
| 129 | item_start = btrfs_item_ptr_offset(leaf, path->slots[0]); | 309 | item_start = btrfs_item_ptr_offset(leaf, path->slots[0]); |
| 130 | memmove_extent_buffer(leaf, ptr, ptr + sub_item_len, | 310 | memmove_extent_buffer(leaf, ptr, ptr + sub_item_len, |
| 131 | item_size - (ptr + sub_item_len - item_start)); | 311 | item_size - (ptr + sub_item_len - item_start)); |
| 132 | btrfs_truncate_item(trans, root, path, | 312 | btrfs_truncate_item(trans, root, path, item_size - sub_item_len, 1); |
| 133 | item_size - sub_item_len, 1); | 313 | out: |
| 314 | btrfs_free_path(path); | ||
| 315 | |||
| 316 | if (search_ext_refs) { | ||
| 317 | /* | ||
| 318 | * No refs were found, or we could not find the | ||
| 319 | * name in our ref array. Find and remove the extended | ||
| 320 | * inode ref then. | ||
| 321 | */ | ||
| 322 | return btrfs_del_inode_extref(trans, root, name, name_len, | ||
| 323 | inode_objectid, ref_objectid, index); | ||
| 324 | } | ||
| 325 | |||
| 326 | return ret; | ||
| 327 | } | ||
| 328 | |||
| 329 | /* | ||
| 330 | * btrfs_insert_inode_extref() - Inserts an extended inode ref into a tree. | ||
| 331 | * | ||
| 332 | * The caller must have checked against BTRFS_LINK_MAX already. | ||
| 333 | */ | ||
| 334 | static int btrfs_insert_inode_extref(struct btrfs_trans_handle *trans, | ||
| 335 | struct btrfs_root *root, | ||
| 336 | const char *name, int name_len, | ||
| 337 | u64 inode_objectid, u64 ref_objectid, u64 index) | ||
| 338 | { | ||
| 339 | struct btrfs_inode_extref *extref; | ||
| 340 | int ret; | ||
| 341 | int ins_len = name_len + sizeof(*extref); | ||
| 342 | unsigned long ptr; | ||
| 343 | struct btrfs_path *path; | ||
| 344 | struct btrfs_key key; | ||
| 345 | struct extent_buffer *leaf; | ||
| 346 | struct btrfs_item *item; | ||
| 347 | |||
| 348 | key.objectid = inode_objectid; | ||
| 349 | key.type = BTRFS_INODE_EXTREF_KEY; | ||
| 350 | key.offset = btrfs_extref_hash(ref_objectid, name, name_len); | ||
| 351 | |||
| 352 | path = btrfs_alloc_path(); | ||
| 353 | if (!path) | ||
| 354 | return -ENOMEM; | ||
| 355 | |||
| 356 | path->leave_spinning = 1; | ||
| 357 | ret = btrfs_insert_empty_item(trans, root, path, &key, | ||
| 358 | ins_len); | ||
| 359 | if (ret == -EEXIST) { | ||
| 360 | if (btrfs_find_name_in_ext_backref(path, ref_objectid, | ||
| 361 | name, name_len, NULL)) | ||
| 362 | goto out; | ||
| 363 | |||
| 364 | btrfs_extend_item(trans, root, path, ins_len); | ||
| 365 | ret = 0; | ||
| 366 | } | ||
| 367 | if (ret < 0) | ||
| 368 | goto out; | ||
| 369 | |||
| 370 | leaf = path->nodes[0]; | ||
| 371 | item = btrfs_item_nr(leaf, path->slots[0]); | ||
| 372 | ptr = (unsigned long)btrfs_item_ptr(leaf, path->slots[0], char); | ||
| 373 | ptr += btrfs_item_size(leaf, item) - ins_len; | ||
| 374 | extref = (struct btrfs_inode_extref *)ptr; | ||
| 375 | |||
| 376 | btrfs_set_inode_extref_name_len(path->nodes[0], extref, name_len); | ||
| 377 | btrfs_set_inode_extref_index(path->nodes[0], extref, index); | ||
| 378 | btrfs_set_inode_extref_parent(path->nodes[0], extref, ref_objectid); | ||
| 379 | |||
| 380 | ptr = (unsigned long)&extref->name; | ||
| 381 | write_extent_buffer(path->nodes[0], name, ptr, name_len); | ||
| 382 | btrfs_mark_buffer_dirty(path->nodes[0]); | ||
| 383 | |||
| 134 | out: | 384 | out: |
| 135 | btrfs_free_path(path); | 385 | btrfs_free_path(path); |
| 136 | return ret; | 386 | return ret; |
| @@ -191,6 +441,19 @@ int btrfs_insert_inode_ref(struct btrfs_trans_handle *trans, | |||
| 191 | 441 | ||
| 192 | out: | 442 | out: |
| 193 | btrfs_free_path(path); | 443 | btrfs_free_path(path); |
| 444 | |||
| 445 | if (ret == -EMLINK) { | ||
| 446 | struct btrfs_super_block *disk_super = root->fs_info->super_copy; | ||
| 447 | /* We ran out of space in the ref array. Need to | ||
| 448 | * add an extended ref. */ | ||
| 449 | if (btrfs_super_incompat_flags(disk_super) | ||
| 450 | & BTRFS_FEATURE_INCOMPAT_EXTENDED_IREF) | ||
| 451 | ret = btrfs_insert_inode_extref(trans, root, name, | ||
| 452 | name_len, | ||
| 453 | inode_objectid, | ||
| 454 | ref_objectid, index); | ||
| 455 | } | ||
| 456 | |||
| 194 | return ret; | 457 | return ret; |
| 195 | } | 458 | } |
| 196 | 459 | ||
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index ec154f954646..85a1e5053fe6 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c | |||
| @@ -230,7 +230,6 @@ static noinline int cow_file_range_inline(struct btrfs_trans_handle *trans, | |||
| 230 | u64 inline_len = actual_end - start; | 230 | u64 inline_len = actual_end - start; |
| 231 | u64 aligned_end = (end + root->sectorsize - 1) & | 231 | u64 aligned_end = (end + root->sectorsize - 1) & |
| 232 | ~((u64)root->sectorsize - 1); | 232 | ~((u64)root->sectorsize - 1); |
| 233 | u64 hint_byte; | ||
| 234 | u64 data_len = inline_len; | 233 | u64 data_len = inline_len; |
| 235 | int ret; | 234 | int ret; |
| 236 | 235 | ||
| @@ -247,8 +246,7 @@ static noinline int cow_file_range_inline(struct btrfs_trans_handle *trans, | |||
| 247 | return 1; | 246 | return 1; |
| 248 | } | 247 | } |
| 249 | 248 | ||
| 250 | ret = btrfs_drop_extents(trans, inode, start, aligned_end, | 249 | ret = btrfs_drop_extents(trans, root, inode, start, aligned_end, 1); |
| 251 | &hint_byte, 1); | ||
| 252 | if (ret) | 250 | if (ret) |
| 253 | return ret; | 251 | return ret; |
| 254 | 252 | ||
| @@ -664,7 +662,7 @@ retry: | |||
| 664 | async_extent->compressed_size, | 662 | async_extent->compressed_size, |
| 665 | async_extent->compressed_size, | 663 | async_extent->compressed_size, |
| 666 | 0, alloc_hint, &ins, 1); | 664 | 0, alloc_hint, &ins, 1); |
| 667 | if (ret) | 665 | if (ret && ret != -ENOSPC) |
| 668 | btrfs_abort_transaction(trans, root, ret); | 666 | btrfs_abort_transaction(trans, root, ret); |
| 669 | btrfs_end_transaction(trans, root); | 667 | btrfs_end_transaction(trans, root); |
| 670 | } | 668 | } |
| @@ -1308,6 +1306,7 @@ out_check: | |||
| 1308 | em->block_start = disk_bytenr; | 1306 | em->block_start = disk_bytenr; |
| 1309 | em->bdev = root->fs_info->fs_devices->latest_bdev; | 1307 | em->bdev = root->fs_info->fs_devices->latest_bdev; |
| 1310 | set_bit(EXTENT_FLAG_PINNED, &em->flags); | 1308 | set_bit(EXTENT_FLAG_PINNED, &em->flags); |
| 1309 | set_bit(EXTENT_FLAG_PREALLOC, &em->flags); | ||
| 1311 | while (1) { | 1310 | while (1) { |
| 1312 | write_lock(&em_tree->lock); | 1311 | write_lock(&em_tree->lock); |
| 1313 | ret = add_extent_mapping(em_tree, em); | 1312 | ret = add_extent_mapping(em_tree, em); |
| @@ -1364,11 +1363,7 @@ out_check: | |||
| 1364 | } | 1363 | } |
| 1365 | 1364 | ||
| 1366 | error: | 1365 | error: |
| 1367 | if (nolock) { | 1366 | err = btrfs_end_transaction(trans, root); |
| 1368 | err = btrfs_end_transaction_nolock(trans, root); | ||
| 1369 | } else { | ||
| 1370 | err = btrfs_end_transaction(trans, root); | ||
| 1371 | } | ||
| 1372 | if (!ret) | 1367 | if (!ret) |
| 1373 | ret = err; | 1368 | ret = err; |
| 1374 | 1369 | ||
| @@ -1785,7 +1780,6 @@ static int insert_reserved_file_extent(struct btrfs_trans_handle *trans, | |||
| 1785 | struct btrfs_path *path; | 1780 | struct btrfs_path *path; |
| 1786 | struct extent_buffer *leaf; | 1781 | struct extent_buffer *leaf; |
| 1787 | struct btrfs_key ins; | 1782 | struct btrfs_key ins; |
| 1788 | u64 hint; | ||
| 1789 | int ret; | 1783 | int ret; |
| 1790 | 1784 | ||
| 1791 | path = btrfs_alloc_path(); | 1785 | path = btrfs_alloc_path(); |
| @@ -1803,8 +1797,8 @@ static int insert_reserved_file_extent(struct btrfs_trans_handle *trans, | |||
| 1803 | * the caller is expected to unpin it and allow it to be merged | 1797 | * the caller is expected to unpin it and allow it to be merged |
| 1804 | * with the others. | 1798 | * with the others. |
| 1805 | */ | 1799 | */ |
| 1806 | ret = btrfs_drop_extents(trans, inode, file_pos, file_pos + num_bytes, | 1800 | ret = btrfs_drop_extents(trans, root, inode, file_pos, |
| 1807 | &hint, 0); | 1801 | file_pos + num_bytes, 0); |
| 1808 | if (ret) | 1802 | if (ret) |
| 1809 | goto out; | 1803 | goto out; |
| 1810 | 1804 | ||
| @@ -1828,10 +1822,8 @@ static int insert_reserved_file_extent(struct btrfs_trans_handle *trans, | |||
| 1828 | btrfs_set_file_extent_encryption(leaf, fi, encryption); | 1822 | btrfs_set_file_extent_encryption(leaf, fi, encryption); |
| 1829 | btrfs_set_file_extent_other_encoding(leaf, fi, other_encoding); | 1823 | btrfs_set_file_extent_other_encoding(leaf, fi, other_encoding); |
| 1830 | 1824 | ||
| 1831 | btrfs_unlock_up_safe(path, 1); | ||
| 1832 | btrfs_set_lock_blocking(leaf); | ||
| 1833 | |||
| 1834 | btrfs_mark_buffer_dirty(leaf); | 1825 | btrfs_mark_buffer_dirty(leaf); |
| 1826 | btrfs_release_path(path); | ||
| 1835 | 1827 | ||
| 1836 | inode_add_bytes(inode, num_bytes); | 1828 | inode_add_bytes(inode, num_bytes); |
| 1837 | 1829 | ||
| @@ -1929,11 +1921,10 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent) | |||
| 1929 | ordered_extent->len, | 1921 | ordered_extent->len, |
| 1930 | compress_type, 0, 0, | 1922 | compress_type, 0, 0, |
| 1931 | BTRFS_FILE_EXTENT_REG); | 1923 | BTRFS_FILE_EXTENT_REG); |
| 1932 | unpin_extent_cache(&BTRFS_I(inode)->extent_tree, | ||
| 1933 | ordered_extent->file_offset, | ||
| 1934 | ordered_extent->len); | ||
| 1935 | } | 1924 | } |
| 1936 | 1925 | unpin_extent_cache(&BTRFS_I(inode)->extent_tree, | |
| 1926 | ordered_extent->file_offset, ordered_extent->len, | ||
| 1927 | trans->transid); | ||
| 1937 | if (ret < 0) { | 1928 | if (ret < 0) { |
| 1938 | btrfs_abort_transaction(trans, root, ret); | 1929 | btrfs_abort_transaction(trans, root, ret); |
| 1939 | goto out_unlock; | 1930 | goto out_unlock; |
| @@ -1949,6 +1940,8 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent) | |||
| 1949 | btrfs_abort_transaction(trans, root, ret); | 1940 | btrfs_abort_transaction(trans, root, ret); |
| 1950 | goto out_unlock; | 1941 | goto out_unlock; |
| 1951 | } | 1942 | } |
| 1943 | } else { | ||
| 1944 | btrfs_set_inode_last_trans(trans, inode); | ||
| 1952 | } | 1945 | } |
| 1953 | ret = 0; | 1946 | ret = 0; |
| 1954 | out_unlock: | 1947 | out_unlock: |
| @@ -1958,12 +1951,8 @@ out_unlock: | |||
| 1958 | out: | 1951 | out: |
| 1959 | if (root != root->fs_info->tree_root) | 1952 | if (root != root->fs_info->tree_root) |
| 1960 | btrfs_delalloc_release_metadata(inode, ordered_extent->len); | 1953 | btrfs_delalloc_release_metadata(inode, ordered_extent->len); |
| 1961 | if (trans) { | 1954 | if (trans) |
| 1962 | if (nolock) | 1955 | btrfs_end_transaction(trans, root); |
| 1963 | btrfs_end_transaction_nolock(trans, root); | ||
| 1964 | else | ||
| 1965 | btrfs_end_transaction(trans, root); | ||
| 1966 | } | ||
| 1967 | 1956 | ||
| 1968 | if (ret) | 1957 | if (ret) |
| 1969 | clear_extent_uptodate(io_tree, ordered_extent->file_offset, | 1958 | clear_extent_uptodate(io_tree, ordered_extent->file_offset, |
| @@ -1971,8 +1960,8 @@ out: | |||
| 1971 | ordered_extent->len - 1, NULL, GFP_NOFS); | 1960 | ordered_extent->len - 1, NULL, GFP_NOFS); |
| 1972 | 1961 | ||
| 1973 | /* | 1962 | /* |
| 1974 | * This needs to be dont to make sure anybody waiting knows we are done | 1963 | * This needs to be done to make sure anybody waiting knows we are done |
| 1975 | * upating everything for this ordered extent. | 1964 | * updating everything for this ordered extent. |
| 1976 | */ | 1965 | */ |
| 1977 | btrfs_remove_ordered_extent(inode, ordered_extent); | 1966 | btrfs_remove_ordered_extent(inode, ordered_extent); |
| 1978 | 1967 | ||
| @@ -2119,7 +2108,6 @@ void btrfs_run_delayed_iputs(struct btrfs_root *root) | |||
| 2119 | if (empty) | 2108 | if (empty) |
| 2120 | return; | 2109 | return; |
| 2121 | 2110 | ||
| 2122 | down_read(&root->fs_info->cleanup_work_sem); | ||
| 2123 | spin_lock(&fs_info->delayed_iput_lock); | 2111 | spin_lock(&fs_info->delayed_iput_lock); |
| 2124 | list_splice_init(&fs_info->delayed_iputs, &list); | 2112 | list_splice_init(&fs_info->delayed_iputs, &list); |
| 2125 | spin_unlock(&fs_info->delayed_iput_lock); | 2113 | spin_unlock(&fs_info->delayed_iput_lock); |
| @@ -2130,7 +2118,6 @@ void btrfs_run_delayed_iputs(struct btrfs_root *root) | |||
| 2130 | iput(delayed->inode); | 2118 | iput(delayed->inode); |
| 2131 | kfree(delayed); | 2119 | kfree(delayed); |
| 2132 | } | 2120 | } |
| 2133 | up_read(&root->fs_info->cleanup_work_sem); | ||
| 2134 | } | 2121 | } |
| 2135 | 2122 | ||
| 2136 | enum btrfs_orphan_cleanup_state { | 2123 | enum btrfs_orphan_cleanup_state { |
| @@ -2198,7 +2185,7 @@ int btrfs_orphan_add(struct btrfs_trans_handle *trans, struct inode *inode) | |||
| 2198 | int ret; | 2185 | int ret; |
| 2199 | 2186 | ||
| 2200 | if (!root->orphan_block_rsv) { | 2187 | if (!root->orphan_block_rsv) { |
| 2201 | block_rsv = btrfs_alloc_block_rsv(root); | 2188 | block_rsv = btrfs_alloc_block_rsv(root, BTRFS_BLOCK_RSV_TEMP); |
| 2202 | if (!block_rsv) | 2189 | if (!block_rsv) |
| 2203 | return -ENOMEM; | 2190 | return -ENOMEM; |
| 2204 | } | 2191 | } |
| @@ -2225,7 +2212,7 @@ int btrfs_orphan_add(struct btrfs_trans_handle *trans, struct inode *inode) | |||
| 2225 | insert = 1; | 2212 | insert = 1; |
| 2226 | #endif | 2213 | #endif |
| 2227 | insert = 1; | 2214 | insert = 1; |
| 2228 | atomic_dec(&root->orphan_inodes); | 2215 | atomic_inc(&root->orphan_inodes); |
| 2229 | } | 2216 | } |
| 2230 | 2217 | ||
| 2231 | if (!test_and_set_bit(BTRFS_INODE_ORPHAN_META_RESERVED, | 2218 | if (!test_and_set_bit(BTRFS_INODE_ORPHAN_META_RESERVED, |
| @@ -2572,8 +2559,8 @@ static void btrfs_read_locked_inode(struct inode *inode) | |||
| 2572 | struct btrfs_inode_item); | 2559 | struct btrfs_inode_item); |
| 2573 | inode->i_mode = btrfs_inode_mode(leaf, inode_item); | 2560 | inode->i_mode = btrfs_inode_mode(leaf, inode_item); |
| 2574 | set_nlink(inode, btrfs_inode_nlink(leaf, inode_item)); | 2561 | set_nlink(inode, btrfs_inode_nlink(leaf, inode_item)); |
| 2575 | inode->i_uid = btrfs_inode_uid(leaf, inode_item); | 2562 | i_uid_write(inode, btrfs_inode_uid(leaf, inode_item)); |
| 2576 | inode->i_gid = btrfs_inode_gid(leaf, inode_item); | 2563 | i_gid_write(inode, btrfs_inode_gid(leaf, inode_item)); |
| 2577 | btrfs_i_size_write(inode, btrfs_inode_size(leaf, inode_item)); | 2564 | btrfs_i_size_write(inode, btrfs_inode_size(leaf, inode_item)); |
| 2578 | 2565 | ||
| 2579 | tspec = btrfs_inode_atime(inode_item); | 2566 | tspec = btrfs_inode_atime(inode_item); |
| @@ -2590,6 +2577,18 @@ static void btrfs_read_locked_inode(struct inode *inode) | |||
| 2590 | 2577 | ||
| 2591 | inode_set_bytes(inode, btrfs_inode_nbytes(leaf, inode_item)); | 2578 | inode_set_bytes(inode, btrfs_inode_nbytes(leaf, inode_item)); |
| 2592 | BTRFS_I(inode)->generation = btrfs_inode_generation(leaf, inode_item); | 2579 | BTRFS_I(inode)->generation = btrfs_inode_generation(leaf, inode_item); |
| 2580 | BTRFS_I(inode)->last_trans = btrfs_inode_transid(leaf, inode_item); | ||
| 2581 | |||
| 2582 | /* | ||
| 2583 | * If we were modified in the current generation and evicted from memory | ||
| 2584 | * and then re-read we need to do a full sync since we don't have any | ||
| 2585 | * idea about which extents were modified before we were evicted from | ||
| 2586 | * cache. | ||
| 2587 | */ | ||
| 2588 | if (BTRFS_I(inode)->last_trans == root->fs_info->generation) | ||
| 2589 | set_bit(BTRFS_INODE_NEEDS_FULL_SYNC, | ||
| 2590 | &BTRFS_I(inode)->runtime_flags); | ||
| 2591 | |||
| 2593 | inode->i_version = btrfs_inode_sequence(leaf, inode_item); | 2592 | inode->i_version = btrfs_inode_sequence(leaf, inode_item); |
| 2594 | inode->i_generation = BTRFS_I(inode)->generation; | 2593 | inode->i_generation = BTRFS_I(inode)->generation; |
| 2595 | inode->i_rdev = 0; | 2594 | inode->i_rdev = 0; |
| @@ -2651,8 +2650,8 @@ static void fill_inode_item(struct btrfs_trans_handle *trans, | |||
| 2651 | struct btrfs_inode_item *item, | 2650 | struct btrfs_inode_item *item, |
| 2652 | struct inode *inode) | 2651 | struct inode *inode) |
| 2653 | { | 2652 | { |
| 2654 | btrfs_set_inode_uid(leaf, item, inode->i_uid); | 2653 | btrfs_set_inode_uid(leaf, item, i_uid_read(inode)); |
| 2655 | btrfs_set_inode_gid(leaf, item, inode->i_gid); | 2654 | btrfs_set_inode_gid(leaf, item, i_gid_read(inode)); |
| 2656 | btrfs_set_inode_size(leaf, item, BTRFS_I(inode)->disk_i_size); | 2655 | btrfs_set_inode_size(leaf, item, BTRFS_I(inode)->disk_i_size); |
| 2657 | btrfs_set_inode_mode(leaf, item, inode->i_mode); | 2656 | btrfs_set_inode_mode(leaf, item, inode->i_mode); |
| 2658 | btrfs_set_inode_nlink(leaf, item, inode->i_nlink); | 2657 | btrfs_set_inode_nlink(leaf, item, inode->i_nlink); |
| @@ -2894,7 +2893,6 @@ static struct btrfs_trans_handle *__unlink_start_trans(struct inode *dir, | |||
| 2894 | struct btrfs_trans_handle *trans; | 2893 | struct btrfs_trans_handle *trans; |
| 2895 | struct btrfs_root *root = BTRFS_I(dir)->root; | 2894 | struct btrfs_root *root = BTRFS_I(dir)->root; |
| 2896 | struct btrfs_path *path; | 2895 | struct btrfs_path *path; |
| 2897 | struct btrfs_inode_ref *ref; | ||
| 2898 | struct btrfs_dir_item *di; | 2896 | struct btrfs_dir_item *di; |
| 2899 | struct inode *inode = dentry->d_inode; | 2897 | struct inode *inode = dentry->d_inode; |
| 2900 | u64 index; | 2898 | u64 index; |
| @@ -3008,17 +3006,17 @@ static struct btrfs_trans_handle *__unlink_start_trans(struct inode *dir, | |||
| 3008 | } | 3006 | } |
| 3009 | btrfs_release_path(path); | 3007 | btrfs_release_path(path); |
| 3010 | 3008 | ||
| 3011 | ref = btrfs_lookup_inode_ref(trans, root, path, | 3009 | ret = btrfs_get_inode_ref_index(trans, root, path, dentry->d_name.name, |
| 3012 | dentry->d_name.name, dentry->d_name.len, | 3010 | dentry->d_name.len, ino, dir_ino, 0, |
| 3013 | ino, dir_ino, 0); | 3011 | &index); |
| 3014 | if (IS_ERR(ref)) { | 3012 | if (ret) { |
| 3015 | err = PTR_ERR(ref); | 3013 | err = ret; |
| 3016 | goto out; | 3014 | goto out; |
| 3017 | } | 3015 | } |
| 3018 | BUG_ON(!ref); /* Logic error */ | 3016 | |
| 3019 | if (check_path_shared(root, path)) | 3017 | if (check_path_shared(root, path)) |
| 3020 | goto out; | 3018 | goto out; |
| 3021 | index = btrfs_inode_ref_index(path->nodes[0], ref); | 3019 | |
| 3022 | btrfs_release_path(path); | 3020 | btrfs_release_path(path); |
| 3023 | 3021 | ||
| 3024 | /* | 3022 | /* |
| @@ -3061,7 +3059,7 @@ out: | |||
| 3061 | static void __unlink_end_trans(struct btrfs_trans_handle *trans, | 3059 | static void __unlink_end_trans(struct btrfs_trans_handle *trans, |
| 3062 | struct btrfs_root *root) | 3060 | struct btrfs_root *root) |
| 3063 | { | 3061 | { |
| 3064 | if (trans->block_rsv == &root->fs_info->global_block_rsv) { | 3062 | if (trans->block_rsv->type == BTRFS_BLOCK_RSV_GLOBAL) { |
| 3065 | btrfs_block_rsv_release(root, trans->block_rsv, | 3063 | btrfs_block_rsv_release(root, trans->block_rsv, |
| 3066 | trans->bytes_reserved); | 3064 | trans->bytes_reserved); |
| 3067 | trans->block_rsv = &root->fs_info->trans_block_rsv; | 3065 | trans->block_rsv = &root->fs_info->trans_block_rsv; |
| @@ -3191,9 +3189,10 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry) | |||
| 3191 | struct btrfs_trans_handle *trans; | 3189 | struct btrfs_trans_handle *trans; |
| 3192 | unsigned long nr = 0; | 3190 | unsigned long nr = 0; |
| 3193 | 3191 | ||
| 3194 | if (inode->i_size > BTRFS_EMPTY_DIR_SIZE || | 3192 | if (inode->i_size > BTRFS_EMPTY_DIR_SIZE) |
| 3195 | btrfs_ino(inode) == BTRFS_FIRST_FREE_OBJECTID) | ||
| 3196 | return -ENOTEMPTY; | 3193 | return -ENOTEMPTY; |
| 3194 | if (btrfs_ino(inode) == BTRFS_FIRST_FREE_OBJECTID) | ||
| 3195 | return -EPERM; | ||
| 3197 | 3196 | ||
| 3198 | trans = __unlink_start_trans(dir, dentry); | 3197 | trans = __unlink_start_trans(dir, dentry); |
| 3199 | if (IS_ERR(trans)) | 3198 | if (IS_ERR(trans)) |
| @@ -3267,8 +3266,13 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans, | |||
| 3267 | return -ENOMEM; | 3266 | return -ENOMEM; |
| 3268 | path->reada = -1; | 3267 | path->reada = -1; |
| 3269 | 3268 | ||
| 3269 | /* | ||
| 3270 | * We want to drop from the next block forward in case this new size is | ||
| 3271 | * not block aligned since we will be keeping the last block of the | ||
| 3272 | * extent just the way it is. | ||
| 3273 | */ | ||
| 3270 | if (root->ref_cows || root == root->fs_info->tree_root) | 3274 | if (root->ref_cows || root == root->fs_info->tree_root) |
| 3271 | btrfs_drop_extent_cache(inode, new_size & (~mask), (u64)-1, 0); | 3275 | btrfs_drop_extent_cache(inode, (new_size + mask) & (~mask), (u64)-1, 0); |
| 3272 | 3276 | ||
| 3273 | /* | 3277 | /* |
| 3274 | * This function is also used to drop the items in the log tree before | 3278 | * This function is also used to drop the items in the log tree before |
| @@ -3429,12 +3433,6 @@ delete: | |||
| 3429 | 3433 | ||
| 3430 | if (path->slots[0] == 0 || | 3434 | if (path->slots[0] == 0 || |
| 3431 | path->slots[0] != pending_del_slot) { | 3435 | path->slots[0] != pending_del_slot) { |
| 3432 | if (root->ref_cows && | ||
| 3433 | BTRFS_I(inode)->location.objectid != | ||
| 3434 | BTRFS_FREE_INO_OBJECTID) { | ||
| 3435 | err = -EAGAIN; | ||
| 3436 | goto out; | ||
| 3437 | } | ||
| 3438 | if (pending_del_nr) { | 3436 | if (pending_del_nr) { |
| 3439 | ret = btrfs_del_items(trans, root, path, | 3437 | ret = btrfs_del_items(trans, root, path, |
| 3440 | pending_del_slot, | 3438 | pending_del_slot, |
| @@ -3465,12 +3463,20 @@ error: | |||
| 3465 | } | 3463 | } |
| 3466 | 3464 | ||
| 3467 | /* | 3465 | /* |
| 3468 | * taken from block_truncate_page, but does cow as it zeros out | 3466 | * btrfs_truncate_page - read, zero a chunk and write a page |
| 3469 | * any bytes left in the last page in the file. | 3467 | * @inode - inode that we're zeroing |
| 3468 | * @from - the offset to start zeroing | ||
| 3469 | * @len - the length to zero, 0 to zero the entire range respective to the | ||
| 3470 | * offset | ||
| 3471 | * @front - zero up to the offset instead of from the offset on | ||
| 3472 | * | ||
| 3473 | * This will find the page for the "from" offset and cow the page and zero the | ||
| 3474 | * part we want to zero. This is used with truncate and hole punching. | ||
| 3470 | */ | 3475 | */ |
| 3471 | static int btrfs_truncate_page(struct address_space *mapping, loff_t from) | 3476 | int btrfs_truncate_page(struct inode *inode, loff_t from, loff_t len, |
| 3477 | int front) | ||
| 3472 | { | 3478 | { |
| 3473 | struct inode *inode = mapping->host; | 3479 | struct address_space *mapping = inode->i_mapping; |
| 3474 | struct btrfs_root *root = BTRFS_I(inode)->root; | 3480 | struct btrfs_root *root = BTRFS_I(inode)->root; |
| 3475 | struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; | 3481 | struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; |
| 3476 | struct btrfs_ordered_extent *ordered; | 3482 | struct btrfs_ordered_extent *ordered; |
| @@ -3485,7 +3491,8 @@ static int btrfs_truncate_page(struct address_space *mapping, loff_t from) | |||
| 3485 | u64 page_start; | 3491 | u64 page_start; |
| 3486 | u64 page_end; | 3492 | u64 page_end; |
| 3487 | 3493 | ||
| 3488 | if ((offset & (blocksize - 1)) == 0) | 3494 | if ((offset & (blocksize - 1)) == 0 && |
| 3495 | (!len || ((len & (blocksize - 1)) == 0))) | ||
| 3489 | goto out; | 3496 | goto out; |
| 3490 | ret = btrfs_delalloc_reserve_space(inode, PAGE_CACHE_SIZE); | 3497 | ret = btrfs_delalloc_reserve_space(inode, PAGE_CACHE_SIZE); |
| 3491 | if (ret) | 3498 | if (ret) |
| @@ -3532,7 +3539,8 @@ again: | |||
| 3532 | } | 3539 | } |
| 3533 | 3540 | ||
| 3534 | clear_extent_bit(&BTRFS_I(inode)->io_tree, page_start, page_end, | 3541 | clear_extent_bit(&BTRFS_I(inode)->io_tree, page_start, page_end, |
| 3535 | EXTENT_DIRTY | EXTENT_DELALLOC | EXTENT_DO_ACCOUNTING, | 3542 | EXTENT_DIRTY | EXTENT_DELALLOC | |
| 3543 | EXTENT_DO_ACCOUNTING | EXTENT_DEFRAG, | ||
| 3536 | 0, 0, &cached_state, GFP_NOFS); | 3544 | 0, 0, &cached_state, GFP_NOFS); |
| 3537 | 3545 | ||
| 3538 | ret = btrfs_set_extent_delalloc(inode, page_start, page_end, | 3546 | ret = btrfs_set_extent_delalloc(inode, page_start, page_end, |
| @@ -3545,8 +3553,13 @@ again: | |||
| 3545 | 3553 | ||
| 3546 | ret = 0; | 3554 | ret = 0; |
| 3547 | if (offset != PAGE_CACHE_SIZE) { | 3555 | if (offset != PAGE_CACHE_SIZE) { |
| 3556 | if (!len) | ||
| 3557 | len = PAGE_CACHE_SIZE - offset; | ||
| 3548 | kaddr = kmap(page); | 3558 | kaddr = kmap(page); |
| 3549 | memset(kaddr + offset, 0, PAGE_CACHE_SIZE - offset); | 3559 | if (front) |
| 3560 | memset(kaddr, 0, offset); | ||
| 3561 | else | ||
| 3562 | memset(kaddr + offset, 0, len); | ||
| 3550 | flush_dcache_page(page); | 3563 | flush_dcache_page(page); |
| 3551 | kunmap(page); | 3564 | kunmap(page); |
| 3552 | } | 3565 | } |
| @@ -3577,6 +3590,7 @@ int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size) | |||
| 3577 | struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; | 3590 | struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; |
| 3578 | struct extent_map *em = NULL; | 3591 | struct extent_map *em = NULL; |
| 3579 | struct extent_state *cached_state = NULL; | 3592 | struct extent_state *cached_state = NULL; |
| 3593 | struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; | ||
| 3580 | u64 mask = root->sectorsize - 1; | 3594 | u64 mask = root->sectorsize - 1; |
| 3581 | u64 hole_start = (oldsize + mask) & ~mask; | 3595 | u64 hole_start = (oldsize + mask) & ~mask; |
| 3582 | u64 block_end = (size + mask) & ~mask; | 3596 | u64 block_end = (size + mask) & ~mask; |
| @@ -3613,7 +3627,7 @@ int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size) | |||
| 3613 | last_byte = min(extent_map_end(em), block_end); | 3627 | last_byte = min(extent_map_end(em), block_end); |
| 3614 | last_byte = (last_byte + mask) & ~mask; | 3628 | last_byte = (last_byte + mask) & ~mask; |
| 3615 | if (!test_bit(EXTENT_FLAG_PREALLOC, &em->flags)) { | 3629 | if (!test_bit(EXTENT_FLAG_PREALLOC, &em->flags)) { |
| 3616 | u64 hint_byte = 0; | 3630 | struct extent_map *hole_em; |
| 3617 | hole_size = last_byte - cur_offset; | 3631 | hole_size = last_byte - cur_offset; |
| 3618 | 3632 | ||
| 3619 | trans = btrfs_start_transaction(root, 3); | 3633 | trans = btrfs_start_transaction(root, 3); |
| @@ -3622,9 +3636,9 @@ int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size) | |||
| 3622 | break; | 3636 | break; |
| 3623 | } | 3637 | } |
| 3624 | 3638 | ||
| 3625 | err = btrfs_drop_extents(trans, inode, cur_offset, | 3639 | err = btrfs_drop_extents(trans, root, inode, |
| 3626 | cur_offset + hole_size, | 3640 | cur_offset, |
| 3627 | &hint_byte, 1); | 3641 | cur_offset + hole_size, 1); |
| 3628 | if (err) { | 3642 | if (err) { |
| 3629 | btrfs_abort_transaction(trans, root, err); | 3643 | btrfs_abort_transaction(trans, root, err); |
| 3630 | btrfs_end_transaction(trans, root); | 3644 | btrfs_end_transaction(trans, root); |
| @@ -3641,9 +3655,39 @@ int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size) | |||
| 3641 | break; | 3655 | break; |
| 3642 | } | 3656 | } |
| 3643 | 3657 | ||
| 3644 | btrfs_drop_extent_cache(inode, hole_start, | 3658 | btrfs_drop_extent_cache(inode, cur_offset, |
| 3645 | last_byte - 1, 0); | 3659 | cur_offset + hole_size - 1, 0); |
| 3660 | hole_em = alloc_extent_map(); | ||
| 3661 | if (!hole_em) { | ||
| 3662 | set_bit(BTRFS_INODE_NEEDS_FULL_SYNC, | ||
| 3663 | &BTRFS_I(inode)->runtime_flags); | ||
| 3664 | goto next; | ||
| 3665 | } | ||
| 3666 | hole_em->start = cur_offset; | ||
| 3667 | hole_em->len = hole_size; | ||
| 3668 | hole_em->orig_start = cur_offset; | ||
| 3669 | |||
| 3670 | hole_em->block_start = EXTENT_MAP_HOLE; | ||
| 3671 | hole_em->block_len = 0; | ||
| 3672 | hole_em->bdev = root->fs_info->fs_devices->latest_bdev; | ||
| 3673 | hole_em->compress_type = BTRFS_COMPRESS_NONE; | ||
| 3674 | hole_em->generation = trans->transid; | ||
| 3646 | 3675 | ||
| 3676 | while (1) { | ||
| 3677 | write_lock(&em_tree->lock); | ||
| 3678 | err = add_extent_mapping(em_tree, hole_em); | ||
| 3679 | if (!err) | ||
| 3680 | list_move(&hole_em->list, | ||
| 3681 | &em_tree->modified_extents); | ||
| 3682 | write_unlock(&em_tree->lock); | ||
| 3683 | if (err != -EEXIST) | ||
| 3684 | break; | ||
| 3685 | btrfs_drop_extent_cache(inode, cur_offset, | ||
| 3686 | cur_offset + | ||
| 3687 | hole_size - 1, 0); | ||
| 3688 | } | ||
| 3689 | free_extent_map(hole_em); | ||
| 3690 | next: | ||
| 3647 | btrfs_update_inode(trans, root, inode); | 3691 | btrfs_update_inode(trans, root, inode); |
| 3648 | btrfs_end_transaction(trans, root); | 3692 | btrfs_end_transaction(trans, root); |
| 3649 | } | 3693 | } |
| @@ -3768,26 +3812,22 @@ void btrfs_evict_inode(struct inode *inode) | |||
| 3768 | goto no_delete; | 3812 | goto no_delete; |
| 3769 | } | 3813 | } |
| 3770 | 3814 | ||
| 3771 | rsv = btrfs_alloc_block_rsv(root); | 3815 | rsv = btrfs_alloc_block_rsv(root, BTRFS_BLOCK_RSV_TEMP); |
| 3772 | if (!rsv) { | 3816 | if (!rsv) { |
| 3773 | btrfs_orphan_del(NULL, inode); | 3817 | btrfs_orphan_del(NULL, inode); |
| 3774 | goto no_delete; | 3818 | goto no_delete; |
| 3775 | } | 3819 | } |
| 3776 | rsv->size = min_size; | 3820 | rsv->size = min_size; |
| 3821 | rsv->failfast = 1; | ||
| 3777 | global_rsv = &root->fs_info->global_block_rsv; | 3822 | global_rsv = &root->fs_info->global_block_rsv; |
| 3778 | 3823 | ||
| 3779 | btrfs_i_size_write(inode, 0); | 3824 | btrfs_i_size_write(inode, 0); |
| 3780 | 3825 | ||
| 3781 | /* | 3826 | /* |
| 3782 | * This is a bit simpler than btrfs_truncate since | 3827 | * This is a bit simpler than btrfs_truncate since we've already |
| 3783 | * | 3828 | * reserved our space for our orphan item in the unlink, so we just |
| 3784 | * 1) We've already reserved our space for our orphan item in the | 3829 | * need to reserve some slack space in case we add bytes and update |
| 3785 | * unlink. | 3830 | * inode item when doing the truncate. |
| 3786 | * 2) We're going to delete the inode item, so we don't need to update | ||
| 3787 | * it at all. | ||
| 3788 | * | ||
| 3789 | * So we just need to reserve some slack space in case we add bytes when | ||
| 3790 | * doing the truncate. | ||
| 3791 | */ | 3831 | */ |
| 3792 | while (1) { | 3832 | while (1) { |
| 3793 | ret = btrfs_block_rsv_refill_noflush(root, rsv, min_size); | 3833 | ret = btrfs_block_rsv_refill_noflush(root, rsv, min_size); |
| @@ -3808,7 +3848,7 @@ void btrfs_evict_inode(struct inode *inode) | |||
| 3808 | goto no_delete; | 3848 | goto no_delete; |
| 3809 | } | 3849 | } |
| 3810 | 3850 | ||
| 3811 | trans = btrfs_start_transaction(root, 0); | 3851 | trans = btrfs_start_transaction_noflush(root, 1); |
| 3812 | if (IS_ERR(trans)) { | 3852 | if (IS_ERR(trans)) { |
| 3813 | btrfs_orphan_del(NULL, inode); | 3853 | btrfs_orphan_del(NULL, inode); |
| 3814 | btrfs_free_block_rsv(root, rsv); | 3854 | btrfs_free_block_rsv(root, rsv); |
| @@ -3818,9 +3858,13 @@ void btrfs_evict_inode(struct inode *inode) | |||
| 3818 | trans->block_rsv = rsv; | 3858 | trans->block_rsv = rsv; |
| 3819 | 3859 | ||
| 3820 | ret = btrfs_truncate_inode_items(trans, root, inode, 0, 0); | 3860 | ret = btrfs_truncate_inode_items(trans, root, inode, 0, 0); |
| 3821 | if (ret != -EAGAIN) | 3861 | if (ret != -ENOSPC) |
| 3822 | break; | 3862 | break; |
| 3823 | 3863 | ||
| 3864 | trans->block_rsv = &root->fs_info->trans_block_rsv; | ||
| 3865 | ret = btrfs_update_inode(trans, root, inode); | ||
| 3866 | BUG_ON(ret); | ||
| 3867 | |||
| 3824 | nr = trans->blocks_used; | 3868 | nr = trans->blocks_used; |
| 3825 | btrfs_end_transaction(trans, root); | 3869 | btrfs_end_transaction(trans, root); |
| 3826 | trans = NULL; | 3870 | trans = NULL; |
| @@ -4470,10 +4514,7 @@ int btrfs_write_inode(struct inode *inode, struct writeback_control *wbc) | |||
| 4470 | trans = btrfs_join_transaction(root); | 4514 | trans = btrfs_join_transaction(root); |
| 4471 | if (IS_ERR(trans)) | 4515 | if (IS_ERR(trans)) |
| 4472 | return PTR_ERR(trans); | 4516 | return PTR_ERR(trans); |
| 4473 | if (nolock) | 4517 | ret = btrfs_commit_transaction(trans, root); |
| 4474 | ret = btrfs_end_transaction_nolock(trans, root); | ||
| 4475 | else | ||
| 4476 | ret = btrfs_commit_transaction(trans, root); | ||
| 4477 | } | 4518 | } |
| 4478 | return ret; | 4519 | return ret; |
| 4479 | } | 4520 | } |
| @@ -4671,6 +4712,14 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, | |||
| 4671 | BTRFS_I(inode)->generation = trans->transid; | 4712 | BTRFS_I(inode)->generation = trans->transid; |
| 4672 | inode->i_generation = BTRFS_I(inode)->generation; | 4713 | inode->i_generation = BTRFS_I(inode)->generation; |
| 4673 | 4714 | ||
| 4715 | /* | ||
| 4716 | * We could have gotten an inode number from somebody who was fsynced | ||
| 4717 | * and then removed in this same transaction, so let's just set full | ||
| 4718 | * sync since it will be a full sync anyway and this will blow away the | ||
| 4719 | * old info in the log. | ||
| 4720 | */ | ||
| 4721 | set_bit(BTRFS_INODE_NEEDS_FULL_SYNC, &BTRFS_I(inode)->runtime_flags); | ||
| 4722 | |||
| 4674 | if (S_ISDIR(mode)) | 4723 | if (S_ISDIR(mode)) |
| 4675 | owner = 0; | 4724 | owner = 0; |
| 4676 | else | 4725 | else |
| @@ -4680,6 +4729,12 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, | |||
| 4680 | btrfs_set_key_type(&key[0], BTRFS_INODE_ITEM_KEY); | 4729 | btrfs_set_key_type(&key[0], BTRFS_INODE_ITEM_KEY); |
| 4681 | key[0].offset = 0; | 4730 | key[0].offset = 0; |
| 4682 | 4731 | ||
| 4732 | /* | ||
| 4733 | * Start new inodes with an inode_ref. This is slightly more | ||
| 4734 | * efficient for small numbers of hard links since they will | ||
| 4735 | * be packed into one item. Extended refs will kick in if we | ||
| 4736 | * add more hard links than can fit in the ref item. | ||
| 4737 | */ | ||
| 4683 | key[1].objectid = objectid; | 4738 | key[1].objectid = objectid; |
| 4684 | btrfs_set_key_type(&key[1], BTRFS_INODE_REF_KEY); | 4739 | btrfs_set_key_type(&key[1], BTRFS_INODE_REF_KEY); |
| 4685 | key[1].offset = ref_objectid; | 4740 | key[1].offset = ref_objectid; |
| @@ -4986,7 +5041,7 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir, | |||
| 4986 | if (root->objectid != BTRFS_I(inode)->root->objectid) | 5041 | if (root->objectid != BTRFS_I(inode)->root->objectid) |
| 4987 | return -EXDEV; | 5042 | return -EXDEV; |
| 4988 | 5043 | ||
| 4989 | if (inode->i_nlink == ~0U) | 5044 | if (inode->i_nlink >= BTRFS_LINK_MAX) |
| 4990 | return -EMLINK; | 5045 | return -EMLINK; |
| 4991 | 5046 | ||
| 4992 | err = btrfs_set_inode_index(dir, &index); | 5047 | err = btrfs_set_inode_index(dir, &index); |
| @@ -5450,7 +5505,8 @@ insert: | |||
| 5450 | write_unlock(&em_tree->lock); | 5505 | write_unlock(&em_tree->lock); |
| 5451 | out: | 5506 | out: |
| 5452 | 5507 | ||
| 5453 | trace_btrfs_get_extent(root, em); | 5508 | if (em) |
| 5509 | trace_btrfs_get_extent(root, em); | ||
| 5454 | 5510 | ||
| 5455 | if (path) | 5511 | if (path) |
| 5456 | btrfs_free_path(path); | 5512 | btrfs_free_path(path); |
| @@ -5836,6 +5892,48 @@ static int lock_extent_direct(struct inode *inode, u64 lockstart, u64 lockend, | |||
| 5836 | return ret; | 5892 | return ret; |
| 5837 | } | 5893 | } |
| 5838 | 5894 | ||
| 5895 | static struct extent_map *create_pinned_em(struct inode *inode, u64 start, | ||
| 5896 | u64 len, u64 orig_start, | ||
| 5897 | u64 block_start, u64 block_len, | ||
| 5898 | int type) | ||
| 5899 | { | ||
| 5900 | struct extent_map_tree *em_tree; | ||
| 5901 | struct extent_map *em; | ||
| 5902 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
| 5903 | int ret; | ||
| 5904 | |||
| 5905 | em_tree = &BTRFS_I(inode)->extent_tree; | ||
| 5906 | em = alloc_extent_map(); | ||
| 5907 | if (!em) | ||
| 5908 | return ERR_PTR(-ENOMEM); | ||
| 5909 | |||
| 5910 | em->start = start; | ||
| 5911 | em->orig_start = orig_start; | ||
| 5912 | em->len = len; | ||
| 5913 | em->block_len = block_len; | ||
| 5914 | em->block_start = block_start; | ||
| 5915 | em->bdev = root->fs_info->fs_devices->latest_bdev; | ||
| 5916 | set_bit(EXTENT_FLAG_PINNED, &em->flags); | ||
| 5917 | if (type == BTRFS_ORDERED_PREALLOC) | ||
| 5918 | set_bit(EXTENT_FLAG_PREALLOC, &em->flags); | ||
| 5919 | |||
| 5920 | do { | ||
| 5921 | btrfs_drop_extent_cache(inode, em->start, | ||
| 5922 | em->start + em->len - 1, 0); | ||
| 5923 | write_lock(&em_tree->lock); | ||
| 5924 | ret = add_extent_mapping(em_tree, em); | ||
| 5925 | write_unlock(&em_tree->lock); | ||
| 5926 | } while (ret == -EEXIST); | ||
| 5927 | |||
| 5928 | if (ret) { | ||
| 5929 | free_extent_map(em); | ||
| 5930 | return ERR_PTR(ret); | ||
| 5931 | } | ||
| 5932 | |||
| 5933 | return em; | ||
| 5934 | } | ||
| 5935 | |||
| 5936 | |||
| 5839 | static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock, | 5937 | static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock, |
| 5840 | struct buffer_head *bh_result, int create) | 5938 | struct buffer_head *bh_result, int create) |
| 5841 | { | 5939 | { |
| @@ -5950,6 +6048,19 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock, | |||
| 5950 | goto must_cow; | 6048 | goto must_cow; |
| 5951 | 6049 | ||
| 5952 | if (can_nocow_odirect(trans, inode, start, len) == 1) { | 6050 | if (can_nocow_odirect(trans, inode, start, len) == 1) { |
| 6051 | u64 orig_start = em->start; | ||
| 6052 | |||
| 6053 | if (type == BTRFS_ORDERED_PREALLOC) { | ||
| 6054 | free_extent_map(em); | ||
| 6055 | em = create_pinned_em(inode, start, len, | ||
| 6056 | orig_start, | ||
| 6057 | block_start, len, type); | ||
| 6058 | if (IS_ERR(em)) { | ||
| 6059 | btrfs_end_transaction(trans, root); | ||
| 6060 | goto unlock_err; | ||
| 6061 | } | ||
| 6062 | } | ||
| 6063 | |||
| 5953 | ret = btrfs_add_ordered_extent_dio(inode, start, | 6064 | ret = btrfs_add_ordered_extent_dio(inode, start, |
| 5954 | block_start, len, len, type); | 6065 | block_start, len, len, type); |
| 5955 | btrfs_end_transaction(trans, root); | 6066 | btrfs_end_transaction(trans, root); |
| @@ -5999,7 +6110,8 @@ unlock: | |||
| 5999 | if (lockstart < lockend) { | 6110 | if (lockstart < lockend) { |
| 6000 | if (create && len < lockend - lockstart) { | 6111 | if (create && len < lockend - lockstart) { |
| 6001 | clear_extent_bit(&BTRFS_I(inode)->io_tree, lockstart, | 6112 | clear_extent_bit(&BTRFS_I(inode)->io_tree, lockstart, |
| 6002 | lockstart + len - 1, unlock_bits, 1, 0, | 6113 | lockstart + len - 1, |
| 6114 | unlock_bits | EXTENT_DEFRAG, 1, 0, | ||
| 6003 | &cached_state, GFP_NOFS); | 6115 | &cached_state, GFP_NOFS); |
| 6004 | /* | 6116 | /* |
| 6005 | * Beside unlock, we also need to cleanup reserved space | 6117 | * Beside unlock, we also need to cleanup reserved space |
| @@ -6007,8 +6119,8 @@ unlock: | |||
| 6007 | */ | 6119 | */ |
| 6008 | clear_extent_bit(&BTRFS_I(inode)->io_tree, | 6120 | clear_extent_bit(&BTRFS_I(inode)->io_tree, |
| 6009 | lockstart + len, lockend, | 6121 | lockstart + len, lockend, |
| 6010 | unlock_bits | EXTENT_DO_ACCOUNTING, | 6122 | unlock_bits | EXTENT_DO_ACCOUNTING | |
| 6011 | 1, 0, NULL, GFP_NOFS); | 6123 | EXTENT_DEFRAG, 1, 0, NULL, GFP_NOFS); |
| 6012 | } else { | 6124 | } else { |
| 6013 | clear_extent_bit(&BTRFS_I(inode)->io_tree, lockstart, | 6125 | clear_extent_bit(&BTRFS_I(inode)->io_tree, lockstart, |
| 6014 | lockend, unlock_bits, 1, 0, | 6126 | lockend, unlock_bits, 1, 0, |
| @@ -6573,8 +6685,8 @@ static void btrfs_invalidatepage(struct page *page, unsigned long offset) | |||
| 6573 | */ | 6685 | */ |
| 6574 | clear_extent_bit(tree, page_start, page_end, | 6686 | clear_extent_bit(tree, page_start, page_end, |
| 6575 | EXTENT_DIRTY | EXTENT_DELALLOC | | 6687 | EXTENT_DIRTY | EXTENT_DELALLOC | |
| 6576 | EXTENT_LOCKED | EXTENT_DO_ACCOUNTING, 1, 0, | 6688 | EXTENT_LOCKED | EXTENT_DO_ACCOUNTING | |
| 6577 | &cached_state, GFP_NOFS); | 6689 | EXTENT_DEFRAG, 1, 0, &cached_state, GFP_NOFS); |
| 6578 | /* | 6690 | /* |
| 6579 | * whoever cleared the private bit is responsible | 6691 | * whoever cleared the private bit is responsible |
| 6580 | * for the finish_ordered_io | 6692 | * for the finish_ordered_io |
| @@ -6590,7 +6702,8 @@ static void btrfs_invalidatepage(struct page *page, unsigned long offset) | |||
| 6590 | } | 6702 | } |
| 6591 | clear_extent_bit(tree, page_start, page_end, | 6703 | clear_extent_bit(tree, page_start, page_end, |
| 6592 | EXTENT_LOCKED | EXTENT_DIRTY | EXTENT_DELALLOC | | 6704 | EXTENT_LOCKED | EXTENT_DIRTY | EXTENT_DELALLOC | |
| 6593 | EXTENT_DO_ACCOUNTING, 1, 1, &cached_state, GFP_NOFS); | 6705 | EXTENT_DO_ACCOUNTING | EXTENT_DEFRAG, 1, 1, |
| 6706 | &cached_state, GFP_NOFS); | ||
| 6594 | __btrfs_releasepage(page, GFP_NOFS); | 6707 | __btrfs_releasepage(page, GFP_NOFS); |
| 6595 | 6708 | ||
| 6596 | ClearPageChecked(page); | 6709 | ClearPageChecked(page); |
| @@ -6687,7 +6800,8 @@ again: | |||
| 6687 | * prepare_pages in the normal write path. | 6800 | * prepare_pages in the normal write path. |
| 6688 | */ | 6801 | */ |
| 6689 | clear_extent_bit(&BTRFS_I(inode)->io_tree, page_start, page_end, | 6802 | clear_extent_bit(&BTRFS_I(inode)->io_tree, page_start, page_end, |
| 6690 | EXTENT_DIRTY | EXTENT_DELALLOC | EXTENT_DO_ACCOUNTING, | 6803 | EXTENT_DIRTY | EXTENT_DELALLOC | |
| 6804 | EXTENT_DO_ACCOUNTING | EXTENT_DEFRAG, | ||
| 6691 | 0, 0, &cached_state, GFP_NOFS); | 6805 | 0, 0, &cached_state, GFP_NOFS); |
| 6692 | 6806 | ||
| 6693 | ret = btrfs_set_extent_delalloc(inode, page_start, page_end, | 6807 | ret = btrfs_set_extent_delalloc(inode, page_start, page_end, |
| @@ -6718,6 +6832,7 @@ again: | |||
| 6718 | 6832 | ||
| 6719 | BTRFS_I(inode)->last_trans = root->fs_info->generation; | 6833 | BTRFS_I(inode)->last_trans = root->fs_info->generation; |
| 6720 | BTRFS_I(inode)->last_sub_trans = BTRFS_I(inode)->root->log_transid; | 6834 | BTRFS_I(inode)->last_sub_trans = BTRFS_I(inode)->root->log_transid; |
| 6835 | BTRFS_I(inode)->last_log_commit = BTRFS_I(inode)->root->last_log_commit; | ||
| 6721 | 6836 | ||
| 6722 | unlock_extent_cached(io_tree, page_start, page_end, &cached_state, GFP_NOFS); | 6837 | unlock_extent_cached(io_tree, page_start, page_end, &cached_state, GFP_NOFS); |
| 6723 | 6838 | ||
| @@ -6745,7 +6860,7 @@ static int btrfs_truncate(struct inode *inode) | |||
| 6745 | u64 mask = root->sectorsize - 1; | 6860 | u64 mask = root->sectorsize - 1; |
| 6746 | u64 min_size = btrfs_calc_trunc_metadata_size(root, 1); | 6861 | u64 min_size = btrfs_calc_trunc_metadata_size(root, 1); |
| 6747 | 6862 | ||
| 6748 | ret = btrfs_truncate_page(inode->i_mapping, inode->i_size); | 6863 | ret = btrfs_truncate_page(inode, inode->i_size, 0, 0); |
| 6749 | if (ret) | 6864 | if (ret) |
| 6750 | return ret; | 6865 | return ret; |
| 6751 | 6866 | ||
| @@ -6788,10 +6903,11 @@ static int btrfs_truncate(struct inode *inode) | |||
| 6788 | * 3) fs_info->trans_block_rsv - this will have 1 items worth left for | 6903 | * 3) fs_info->trans_block_rsv - this will have 1 items worth left for |
| 6789 | * updating the inode. | 6904 | * updating the inode. |
| 6790 | */ | 6905 | */ |
| 6791 | rsv = btrfs_alloc_block_rsv(root); | 6906 | rsv = btrfs_alloc_block_rsv(root, BTRFS_BLOCK_RSV_TEMP); |
| 6792 | if (!rsv) | 6907 | if (!rsv) |
| 6793 | return -ENOMEM; | 6908 | return -ENOMEM; |
| 6794 | rsv->size = min_size; | 6909 | rsv->size = min_size; |
| 6910 | rsv->failfast = 1; | ||
| 6795 | 6911 | ||
| 6796 | /* | 6912 | /* |
| 6797 | * 1 for the truncate slack space | 6913 | * 1 for the truncate slack space |
| @@ -6837,36 +6953,21 @@ static int btrfs_truncate(struct inode *inode) | |||
| 6837 | &BTRFS_I(inode)->runtime_flags)) | 6953 | &BTRFS_I(inode)->runtime_flags)) |
| 6838 | btrfs_add_ordered_operation(trans, root, inode); | 6954 | btrfs_add_ordered_operation(trans, root, inode); |
| 6839 | 6955 | ||
| 6840 | while (1) { | 6956 | /* |
| 6841 | ret = btrfs_block_rsv_refill(root, rsv, min_size); | 6957 | * So if we truncate and then write and fsync we normally would just |
| 6842 | if (ret) { | 6958 | * write the extents that changed, which is a problem if we need to |
| 6843 | /* | 6959 | * first truncate that entire inode. So set this flag so we write out |
| 6844 | * This can only happen with the original transaction we | 6960 | * all of the extents in the inode to the sync log so we're completely |
| 6845 | * started above, every other time we shouldn't have a | 6961 | * safe. |
| 6846 | * transaction started yet. | 6962 | */ |
| 6847 | */ | 6963 | set_bit(BTRFS_INODE_NEEDS_FULL_SYNC, &BTRFS_I(inode)->runtime_flags); |
| 6848 | if (ret == -EAGAIN) | 6964 | trans->block_rsv = rsv; |
| 6849 | goto end_trans; | ||
| 6850 | err = ret; | ||
| 6851 | break; | ||
| 6852 | } | ||
| 6853 | |||
| 6854 | if (!trans) { | ||
| 6855 | /* Just need the 1 for updating the inode */ | ||
| 6856 | trans = btrfs_start_transaction(root, 1); | ||
| 6857 | if (IS_ERR(trans)) { | ||
| 6858 | ret = err = PTR_ERR(trans); | ||
| 6859 | trans = NULL; | ||
| 6860 | break; | ||
| 6861 | } | ||
| 6862 | } | ||
| 6863 | |||
| 6864 | trans->block_rsv = rsv; | ||
| 6865 | 6965 | ||
| 6966 | while (1) { | ||
| 6866 | ret = btrfs_truncate_inode_items(trans, root, inode, | 6967 | ret = btrfs_truncate_inode_items(trans, root, inode, |
| 6867 | inode->i_size, | 6968 | inode->i_size, |
| 6868 | BTRFS_EXTENT_DATA_KEY); | 6969 | BTRFS_EXTENT_DATA_KEY); |
| 6869 | if (ret != -EAGAIN) { | 6970 | if (ret != -ENOSPC) { |
| 6870 | err = ret; | 6971 | err = ret; |
| 6871 | break; | 6972 | break; |
| 6872 | } | 6973 | } |
| @@ -6877,11 +6978,22 @@ static int btrfs_truncate(struct inode *inode) | |||
| 6877 | err = ret; | 6978 | err = ret; |
| 6878 | break; | 6979 | break; |
| 6879 | } | 6980 | } |
| 6880 | end_trans: | 6981 | |
| 6881 | nr = trans->blocks_used; | 6982 | nr = trans->blocks_used; |
| 6882 | btrfs_end_transaction(trans, root); | 6983 | btrfs_end_transaction(trans, root); |
| 6883 | trans = NULL; | ||
| 6884 | btrfs_btree_balance_dirty(root, nr); | 6984 | btrfs_btree_balance_dirty(root, nr); |
| 6985 | |||
| 6986 | trans = btrfs_start_transaction(root, 2); | ||
| 6987 | if (IS_ERR(trans)) { | ||
| 6988 | ret = err = PTR_ERR(trans); | ||
| 6989 | trans = NULL; | ||
| 6990 | break; | ||
| 6991 | } | ||
| 6992 | |||
| 6993 | ret = btrfs_block_rsv_migrate(&root->fs_info->trans_block_rsv, | ||
| 6994 | rsv, min_size); | ||
| 6995 | BUG_ON(ret); /* shouldn't happen */ | ||
| 6996 | trans->block_rsv = rsv; | ||
| 6885 | } | 6997 | } |
| 6886 | 6998 | ||
| 6887 | if (ret == 0 && inode->i_nlink > 0) { | 6999 | if (ret == 0 && inode->i_nlink > 0) { |
| @@ -6965,6 +7077,7 @@ struct inode *btrfs_alloc_inode(struct super_block *sb) | |||
| 6965 | ei->csum_bytes = 0; | 7077 | ei->csum_bytes = 0; |
| 6966 | ei->index_cnt = (u64)-1; | 7078 | ei->index_cnt = (u64)-1; |
| 6967 | ei->last_unlink_trans = 0; | 7079 | ei->last_unlink_trans = 0; |
| 7080 | ei->last_log_commit = 0; | ||
| 6968 | 7081 | ||
| 6969 | spin_lock_init(&ei->lock); | 7082 | spin_lock_init(&ei->lock); |
| 6970 | ei->outstanding_extents = 0; | 7083 | ei->outstanding_extents = 0; |
| @@ -7076,6 +7189,11 @@ static void init_once(void *foo) | |||
| 7076 | 7189 | ||
| 7077 | void btrfs_destroy_cachep(void) | 7190 | void btrfs_destroy_cachep(void) |
| 7078 | { | 7191 | { |
| 7192 | /* | ||
| 7193 | * Make sure all delayed rcu free inodes are flushed before we | ||
| 7194 | * destroy cache. | ||
| 7195 | */ | ||
| 7196 | rcu_barrier(); | ||
| 7079 | if (btrfs_inode_cachep) | 7197 | if (btrfs_inode_cachep) |
| 7080 | kmem_cache_destroy(btrfs_inode_cachep); | 7198 | kmem_cache_destroy(btrfs_inode_cachep); |
| 7081 | if (btrfs_trans_handle_cachep) | 7199 | if (btrfs_trans_handle_cachep) |
| @@ -7090,31 +7208,31 @@ void btrfs_destroy_cachep(void) | |||
| 7090 | 7208 | ||
| 7091 | int btrfs_init_cachep(void) | 7209 | int btrfs_init_cachep(void) |
| 7092 | { | 7210 | { |
| 7093 | btrfs_inode_cachep = kmem_cache_create("btrfs_inode_cache", | 7211 | btrfs_inode_cachep = kmem_cache_create("btrfs_inode", |
| 7094 | sizeof(struct btrfs_inode), 0, | 7212 | sizeof(struct btrfs_inode), 0, |
| 7095 | SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, init_once); | 7213 | SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, init_once); |
| 7096 | if (!btrfs_inode_cachep) | 7214 | if (!btrfs_inode_cachep) |
| 7097 | goto fail; | 7215 | goto fail; |
| 7098 | 7216 | ||
| 7099 | btrfs_trans_handle_cachep = kmem_cache_create("btrfs_trans_handle_cache", | 7217 | btrfs_trans_handle_cachep = kmem_cache_create("btrfs_trans_handle", |
| 7100 | sizeof(struct btrfs_trans_handle), 0, | 7218 | sizeof(struct btrfs_trans_handle), 0, |
| 7101 | SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL); | 7219 | SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL); |
| 7102 | if (!btrfs_trans_handle_cachep) | 7220 | if (!btrfs_trans_handle_cachep) |
| 7103 | goto fail; | 7221 | goto fail; |
| 7104 | 7222 | ||
| 7105 | btrfs_transaction_cachep = kmem_cache_create("btrfs_transaction_cache", | 7223 | btrfs_transaction_cachep = kmem_cache_create("btrfs_transaction", |
| 7106 | sizeof(struct btrfs_transaction), 0, | 7224 | sizeof(struct btrfs_transaction), 0, |
| 7107 | SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL); | 7225 | SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL); |
| 7108 | if (!btrfs_transaction_cachep) | 7226 | if (!btrfs_transaction_cachep) |
| 7109 | goto fail; | 7227 | goto fail; |
| 7110 | 7228 | ||
| 7111 | btrfs_path_cachep = kmem_cache_create("btrfs_path_cache", | 7229 | btrfs_path_cachep = kmem_cache_create("btrfs_path", |
| 7112 | sizeof(struct btrfs_path), 0, | 7230 | sizeof(struct btrfs_path), 0, |
| 7113 | SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL); | 7231 | SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL); |
| 7114 | if (!btrfs_path_cachep) | 7232 | if (!btrfs_path_cachep) |
| 7115 | goto fail; | 7233 | goto fail; |
| 7116 | 7234 | ||
| 7117 | btrfs_free_space_cachep = kmem_cache_create("btrfs_free_space_cache", | 7235 | btrfs_free_space_cachep = kmem_cache_create("btrfs_free_space", |
| 7118 | sizeof(struct btrfs_free_space), 0, | 7236 | sizeof(struct btrfs_free_space), 0, |
| 7119 | SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL); | 7237 | SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL); |
| 7120 | if (!btrfs_free_space_cachep) | 7238 | if (!btrfs_free_space_cachep) |
| @@ -7508,6 +7626,8 @@ static int __btrfs_prealloc_file_range(struct inode *inode, int mode, | |||
| 7508 | loff_t actual_len, u64 *alloc_hint, | 7626 | loff_t actual_len, u64 *alloc_hint, |
| 7509 | struct btrfs_trans_handle *trans) | 7627 | struct btrfs_trans_handle *trans) |
| 7510 | { | 7628 | { |
| 7629 | struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; | ||
| 7630 | struct extent_map *em; | ||
| 7511 | struct btrfs_root *root = BTRFS_I(inode)->root; | 7631 | struct btrfs_root *root = BTRFS_I(inode)->root; |
| 7512 | struct btrfs_key ins; | 7632 | struct btrfs_key ins; |
| 7513 | u64 cur_offset = start; | 7633 | u64 cur_offset = start; |
| @@ -7548,6 +7668,37 @@ static int __btrfs_prealloc_file_range(struct inode *inode, int mode, | |||
| 7548 | btrfs_drop_extent_cache(inode, cur_offset, | 7668 | btrfs_drop_extent_cache(inode, cur_offset, |
| 7549 | cur_offset + ins.offset -1, 0); | 7669 | cur_offset + ins.offset -1, 0); |
| 7550 | 7670 | ||
| 7671 | em = alloc_extent_map(); | ||
| 7672 | if (!em) { | ||
| 7673 | set_bit(BTRFS_INODE_NEEDS_FULL_SYNC, | ||
| 7674 | &BTRFS_I(inode)->runtime_flags); | ||
| 7675 | goto next; | ||
| 7676 | } | ||
| 7677 | |||
| 7678 | em->start = cur_offset; | ||
| 7679 | em->orig_start = cur_offset; | ||
| 7680 | em->len = ins.offset; | ||
| 7681 | em->block_start = ins.objectid; | ||
| 7682 | em->block_len = ins.offset; | ||
| 7683 | em->bdev = root->fs_info->fs_devices->latest_bdev; | ||
| 7684 | set_bit(EXTENT_FLAG_PREALLOC, &em->flags); | ||
| 7685 | em->generation = trans->transid; | ||
| 7686 | |||
| 7687 | while (1) { | ||
| 7688 | write_lock(&em_tree->lock); | ||
| 7689 | ret = add_extent_mapping(em_tree, em); | ||
| 7690 | if (!ret) | ||
| 7691 | list_move(&em->list, | ||
| 7692 | &em_tree->modified_extents); | ||
| 7693 | write_unlock(&em_tree->lock); | ||
| 7694 | if (ret != -EEXIST) | ||
| 7695 | break; | ||
| 7696 | btrfs_drop_extent_cache(inode, cur_offset, | ||
| 7697 | cur_offset + ins.offset - 1, | ||
| 7698 | 0); | ||
| 7699 | } | ||
| 7700 | free_extent_map(em); | ||
| 7701 | next: | ||
| 7551 | num_bytes -= ins.offset; | 7702 | num_bytes -= ins.offset; |
| 7552 | cur_offset += ins.offset; | 7703 | cur_offset += ins.offset; |
| 7553 | *alloc_hint = ins.objectid + ins.offset; | 7704 | *alloc_hint = ins.objectid + ins.offset; |
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 9df50fa8a078..e568c472f807 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c | |||
| @@ -181,6 +181,7 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg) | |||
| 181 | int ret; | 181 | int ret; |
| 182 | u64 ip_oldflags; | 182 | u64 ip_oldflags; |
| 183 | unsigned int i_oldflags; | 183 | unsigned int i_oldflags; |
| 184 | umode_t mode; | ||
| 184 | 185 | ||
| 185 | if (btrfs_root_readonly(root)) | 186 | if (btrfs_root_readonly(root)) |
| 186 | return -EROFS; | 187 | return -EROFS; |
| @@ -203,6 +204,7 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg) | |||
| 203 | 204 | ||
| 204 | ip_oldflags = ip->flags; | 205 | ip_oldflags = ip->flags; |
| 205 | i_oldflags = inode->i_flags; | 206 | i_oldflags = inode->i_flags; |
| 207 | mode = inode->i_mode; | ||
| 206 | 208 | ||
| 207 | flags = btrfs_mask_flags(inode->i_mode, flags); | 209 | flags = btrfs_mask_flags(inode->i_mode, flags); |
| 208 | oldflags = btrfs_flags_to_ioctl(ip->flags); | 210 | oldflags = btrfs_flags_to_ioctl(ip->flags); |
| @@ -237,10 +239,31 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg) | |||
| 237 | ip->flags |= BTRFS_INODE_DIRSYNC; | 239 | ip->flags |= BTRFS_INODE_DIRSYNC; |
| 238 | else | 240 | else |
| 239 | ip->flags &= ~BTRFS_INODE_DIRSYNC; | 241 | ip->flags &= ~BTRFS_INODE_DIRSYNC; |
| 240 | if (flags & FS_NOCOW_FL) | 242 | if (flags & FS_NOCOW_FL) { |
| 241 | ip->flags |= BTRFS_INODE_NODATACOW; | 243 | if (S_ISREG(mode)) { |
| 242 | else | 244 | /* |
| 243 | ip->flags &= ~BTRFS_INODE_NODATACOW; | 245 | * It's safe to turn csums off here, no extents exist. |
| 246 | * Otherwise we want the flag to reflect the real COW | ||
| 247 | * status of the file and will not set it. | ||
| 248 | */ | ||
| 249 | if (inode->i_size == 0) | ||
| 250 | ip->flags |= BTRFS_INODE_NODATACOW | ||
| 251 | | BTRFS_INODE_NODATASUM; | ||
| 252 | } else { | ||
| 253 | ip->flags |= BTRFS_INODE_NODATACOW; | ||
| 254 | } | ||
| 255 | } else { | ||
| 256 | /* | ||
| 257 | * Revert back under same assuptions as above | ||
| 258 | */ | ||
| 259 | if (S_ISREG(mode)) { | ||
| 260 | if (inode->i_size == 0) | ||
| 261 | ip->flags &= ~(BTRFS_INODE_NODATACOW | ||
| 262 | | BTRFS_INODE_NODATASUM); | ||
| 263 | } else { | ||
| 264 | ip->flags &= ~BTRFS_INODE_NODATACOW; | ||
| 265 | } | ||
| 266 | } | ||
| 244 | 267 | ||
| 245 | /* | 268 | /* |
| 246 | * The COMPRESS flag can only be changed by users, while the NOCOMPRESS | 269 | * The COMPRESS flag can only be changed by users, while the NOCOMPRESS |
| @@ -516,7 +539,8 @@ static int create_snapshot(struct btrfs_root *root, struct dentry *dentry, | |||
| 516 | if (!pending_snapshot) | 539 | if (!pending_snapshot) |
| 517 | return -ENOMEM; | 540 | return -ENOMEM; |
| 518 | 541 | ||
| 519 | btrfs_init_block_rsv(&pending_snapshot->block_rsv); | 542 | btrfs_init_block_rsv(&pending_snapshot->block_rsv, |
| 543 | BTRFS_BLOCK_RSV_TEMP); | ||
| 520 | pending_snapshot->dentry = dentry; | 544 | pending_snapshot->dentry = dentry; |
| 521 | pending_snapshot->root = root; | 545 | pending_snapshot->root = root; |
| 522 | pending_snapshot->readonly = readonly; | 546 | pending_snapshot->readonly = readonly; |
| @@ -525,7 +549,7 @@ static int create_snapshot(struct btrfs_root *root, struct dentry *dentry, | |||
| 525 | *inherit = NULL; /* take responsibility to free it */ | 549 | *inherit = NULL; /* take responsibility to free it */ |
| 526 | } | 550 | } |
| 527 | 551 | ||
| 528 | trans = btrfs_start_transaction(root->fs_info->extent_root, 5); | 552 | trans = btrfs_start_transaction(root->fs_info->extent_root, 6); |
| 529 | if (IS_ERR(trans)) { | 553 | if (IS_ERR(trans)) { |
| 530 | ret = PTR_ERR(trans); | 554 | ret = PTR_ERR(trans); |
| 531 | goto fail; | 555 | goto fail; |
| @@ -575,13 +599,13 @@ fail: | |||
| 575 | */ | 599 | */ |
| 576 | static inline int btrfs_check_sticky(struct inode *dir, struct inode *inode) | 600 | static inline int btrfs_check_sticky(struct inode *dir, struct inode *inode) |
| 577 | { | 601 | { |
| 578 | uid_t fsuid = current_fsuid(); | 602 | kuid_t fsuid = current_fsuid(); |
| 579 | 603 | ||
| 580 | if (!(dir->i_mode & S_ISVTX)) | 604 | if (!(dir->i_mode & S_ISVTX)) |
| 581 | return 0; | 605 | return 0; |
| 582 | if (inode->i_uid == fsuid) | 606 | if (uid_eq(inode->i_uid, fsuid)) |
| 583 | return 0; | 607 | return 0; |
| 584 | if (dir->i_uid == fsuid) | 608 | if (uid_eq(dir->i_uid, fsuid)) |
| 585 | return 0; | 609 | return 0; |
| 586 | return !capable(CAP_FOWNER); | 610 | return !capable(CAP_FOWNER); |
| 587 | } | 611 | } |
| @@ -1022,8 +1046,8 @@ again: | |||
| 1022 | page_start, page_end - 1, 0, &cached_state); | 1046 | page_start, page_end - 1, 0, &cached_state); |
| 1023 | clear_extent_bit(&BTRFS_I(inode)->io_tree, page_start, | 1047 | clear_extent_bit(&BTRFS_I(inode)->io_tree, page_start, |
| 1024 | page_end - 1, EXTENT_DIRTY | EXTENT_DELALLOC | | 1048 | page_end - 1, EXTENT_DIRTY | EXTENT_DELALLOC | |
| 1025 | EXTENT_DO_ACCOUNTING, 0, 0, &cached_state, | 1049 | EXTENT_DO_ACCOUNTING | EXTENT_DEFRAG, 0, 0, |
| 1026 | GFP_NOFS); | 1050 | &cached_state, GFP_NOFS); |
| 1027 | 1051 | ||
| 1028 | if (i_done != page_cnt) { | 1052 | if (i_done != page_cnt) { |
| 1029 | spin_lock(&BTRFS_I(inode)->lock); | 1053 | spin_lock(&BTRFS_I(inode)->lock); |
| @@ -1034,8 +1058,8 @@ again: | |||
| 1034 | } | 1058 | } |
| 1035 | 1059 | ||
| 1036 | 1060 | ||
| 1037 | btrfs_set_extent_delalloc(inode, page_start, page_end - 1, | 1061 | set_extent_defrag(&BTRFS_I(inode)->io_tree, page_start, page_end - 1, |
| 1038 | &cached_state); | 1062 | &cached_state, GFP_NOFS); |
| 1039 | 1063 | ||
| 1040 | unlock_extent_cached(&BTRFS_I(inode)->io_tree, | 1064 | unlock_extent_cached(&BTRFS_I(inode)->io_tree, |
| 1041 | page_start, page_end - 1, &cached_state, | 1065 | page_start, page_end - 1, &cached_state, |
| @@ -1397,7 +1421,6 @@ static noinline int btrfs_ioctl_snap_create_transid(struct file *file, | |||
| 1397 | u64 *transid, bool readonly, | 1421 | u64 *transid, bool readonly, |
| 1398 | struct btrfs_qgroup_inherit **inherit) | 1422 | struct btrfs_qgroup_inherit **inherit) |
| 1399 | { | 1423 | { |
| 1400 | struct file *src_file; | ||
| 1401 | int namelen; | 1424 | int namelen; |
| 1402 | int ret = 0; | 1425 | int ret = 0; |
| 1403 | 1426 | ||
| @@ -1421,25 +1444,24 @@ static noinline int btrfs_ioctl_snap_create_transid(struct file *file, | |||
| 1421 | ret = btrfs_mksubvol(&file->f_path, name, namelen, | 1444 | ret = btrfs_mksubvol(&file->f_path, name, namelen, |
| 1422 | NULL, transid, readonly, inherit); | 1445 | NULL, transid, readonly, inherit); |
| 1423 | } else { | 1446 | } else { |
| 1447 | struct fd src = fdget(fd); | ||
| 1424 | struct inode *src_inode; | 1448 | struct inode *src_inode; |
| 1425 | src_file = fget(fd); | 1449 | if (!src.file) { |
| 1426 | if (!src_file) { | ||
| 1427 | ret = -EINVAL; | 1450 | ret = -EINVAL; |
| 1428 | goto out_drop_write; | 1451 | goto out_drop_write; |
| 1429 | } | 1452 | } |
| 1430 | 1453 | ||
| 1431 | src_inode = src_file->f_path.dentry->d_inode; | 1454 | src_inode = src.file->f_path.dentry->d_inode; |
| 1432 | if (src_inode->i_sb != file->f_path.dentry->d_inode->i_sb) { | 1455 | if (src_inode->i_sb != file->f_path.dentry->d_inode->i_sb) { |
| 1433 | printk(KERN_INFO "btrfs: Snapshot src from " | 1456 | printk(KERN_INFO "btrfs: Snapshot src from " |
| 1434 | "another FS\n"); | 1457 | "another FS\n"); |
| 1435 | ret = -EINVAL; | 1458 | ret = -EINVAL; |
| 1436 | fput(src_file); | 1459 | } else { |
| 1437 | goto out_drop_write; | 1460 | ret = btrfs_mksubvol(&file->f_path, name, namelen, |
| 1461 | BTRFS_I(src_inode)->root, | ||
| 1462 | transid, readonly, inherit); | ||
| 1438 | } | 1463 | } |
| 1439 | ret = btrfs_mksubvol(&file->f_path, name, namelen, | 1464 | fdput(src); |
| 1440 | BTRFS_I(src_inode)->root, | ||
| 1441 | transid, readonly, inherit); | ||
| 1442 | fput(src_file); | ||
| 1443 | } | 1465 | } |
| 1444 | out_drop_write: | 1466 | out_drop_write: |
| 1445 | mnt_drop_write_file(file); | 1467 | mnt_drop_write_file(file); |
| @@ -2341,7 +2363,7 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, | |||
| 2341 | { | 2363 | { |
| 2342 | struct inode *inode = fdentry(file)->d_inode; | 2364 | struct inode *inode = fdentry(file)->d_inode; |
| 2343 | struct btrfs_root *root = BTRFS_I(inode)->root; | 2365 | struct btrfs_root *root = BTRFS_I(inode)->root; |
| 2344 | struct file *src_file; | 2366 | struct fd src_file; |
| 2345 | struct inode *src; | 2367 | struct inode *src; |
| 2346 | struct btrfs_trans_handle *trans; | 2368 | struct btrfs_trans_handle *trans; |
| 2347 | struct btrfs_path *path; | 2369 | struct btrfs_path *path; |
| @@ -2353,7 +2375,6 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, | |||
| 2353 | int ret; | 2375 | int ret; |
| 2354 | u64 len = olen; | 2376 | u64 len = olen; |
| 2355 | u64 bs = root->fs_info->sb->s_blocksize; | 2377 | u64 bs = root->fs_info->sb->s_blocksize; |
| 2356 | u64 hint_byte; | ||
| 2357 | 2378 | ||
| 2358 | /* | 2379 | /* |
| 2359 | * TODO: | 2380 | * TODO: |
| @@ -2376,24 +2397,24 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, | |||
| 2376 | if (ret) | 2397 | if (ret) |
| 2377 | return ret; | 2398 | return ret; |
| 2378 | 2399 | ||
| 2379 | src_file = fget(srcfd); | 2400 | src_file = fdget(srcfd); |
| 2380 | if (!src_file) { | 2401 | if (!src_file.file) { |
| 2381 | ret = -EBADF; | 2402 | ret = -EBADF; |
| 2382 | goto out_drop_write; | 2403 | goto out_drop_write; |
| 2383 | } | 2404 | } |
| 2384 | 2405 | ||
| 2385 | ret = -EXDEV; | 2406 | ret = -EXDEV; |
| 2386 | if (src_file->f_path.mnt != file->f_path.mnt) | 2407 | if (src_file.file->f_path.mnt != file->f_path.mnt) |
| 2387 | goto out_fput; | 2408 | goto out_fput; |
| 2388 | 2409 | ||
| 2389 | src = src_file->f_dentry->d_inode; | 2410 | src = src_file.file->f_dentry->d_inode; |
| 2390 | 2411 | ||
| 2391 | ret = -EINVAL; | 2412 | ret = -EINVAL; |
| 2392 | if (src == inode) | 2413 | if (src == inode) |
| 2393 | goto out_fput; | 2414 | goto out_fput; |
| 2394 | 2415 | ||
| 2395 | /* the src must be open for reading */ | 2416 | /* the src must be open for reading */ |
| 2396 | if (!(src_file->f_mode & FMODE_READ)) | 2417 | if (!(src_file.file->f_mode & FMODE_READ)) |
| 2397 | goto out_fput; | 2418 | goto out_fput; |
| 2398 | 2419 | ||
| 2399 | /* don't make the dst file partly checksummed */ | 2420 | /* don't make the dst file partly checksummed */ |
| @@ -2458,13 +2479,13 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, | |||
| 2458 | another, and lock file content */ | 2479 | another, and lock file content */ |
| 2459 | while (1) { | 2480 | while (1) { |
| 2460 | struct btrfs_ordered_extent *ordered; | 2481 | struct btrfs_ordered_extent *ordered; |
| 2461 | lock_extent(&BTRFS_I(src)->io_tree, off, off+len); | 2482 | lock_extent(&BTRFS_I(src)->io_tree, off, off + len - 1); |
| 2462 | ordered = btrfs_lookup_first_ordered_extent(src, off+len); | 2483 | ordered = btrfs_lookup_first_ordered_extent(src, off + len - 1); |
| 2463 | if (!ordered && | 2484 | if (!ordered && |
| 2464 | !test_range_bit(&BTRFS_I(src)->io_tree, off, off+len, | 2485 | !test_range_bit(&BTRFS_I(src)->io_tree, off, off + len - 1, |
| 2465 | EXTENT_DELALLOC, 0, NULL)) | 2486 | EXTENT_DELALLOC, 0, NULL)) |
| 2466 | break; | 2487 | break; |
| 2467 | unlock_extent(&BTRFS_I(src)->io_tree, off, off+len); | 2488 | unlock_extent(&BTRFS_I(src)->io_tree, off, off + len - 1); |
| 2468 | if (ordered) | 2489 | if (ordered) |
| 2469 | btrfs_put_ordered_extent(ordered); | 2490 | btrfs_put_ordered_extent(ordered); |
| 2470 | btrfs_wait_ordered_range(src, off, len); | 2491 | btrfs_wait_ordered_range(src, off, len); |
| @@ -2538,7 +2559,7 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, | |||
| 2538 | btrfs_release_path(path); | 2559 | btrfs_release_path(path); |
| 2539 | 2560 | ||
| 2540 | if (key.offset + datal <= off || | 2561 | if (key.offset + datal <= off || |
| 2541 | key.offset >= off+len) | 2562 | key.offset >= off + len - 1) |
| 2542 | goto next; | 2563 | goto next; |
| 2543 | 2564 | ||
| 2544 | memcpy(&new_key, &key, sizeof(new_key)); | 2565 | memcpy(&new_key, &key, sizeof(new_key)); |
| @@ -2576,10 +2597,10 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, | |||
| 2576 | datal -= off - key.offset; | 2597 | datal -= off - key.offset; |
| 2577 | } | 2598 | } |
| 2578 | 2599 | ||
| 2579 | ret = btrfs_drop_extents(trans, inode, | 2600 | ret = btrfs_drop_extents(trans, root, inode, |
| 2580 | new_key.offset, | 2601 | new_key.offset, |
| 2581 | new_key.offset + datal, | 2602 | new_key.offset + datal, |
| 2582 | &hint_byte, 1); | 2603 | 1); |
| 2583 | if (ret) { | 2604 | if (ret) { |
| 2584 | btrfs_abort_transaction(trans, root, | 2605 | btrfs_abort_transaction(trans, root, |
| 2585 | ret); | 2606 | ret); |
| @@ -2639,8 +2660,8 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, | |||
| 2639 | new_key.offset += skip; | 2660 | new_key.offset += skip; |
| 2640 | } | 2661 | } |
| 2641 | 2662 | ||
| 2642 | if (key.offset + datal > off+len) | 2663 | if (key.offset + datal > off + len) |
| 2643 | trim = key.offset + datal - (off+len); | 2664 | trim = key.offset + datal - (off + len); |
| 2644 | 2665 | ||
| 2645 | if (comp && (skip || trim)) { | 2666 | if (comp && (skip || trim)) { |
| 2646 | ret = -EINVAL; | 2667 | ret = -EINVAL; |
| @@ -2650,10 +2671,10 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, | |||
| 2650 | size -= skip + trim; | 2671 | size -= skip + trim; |
| 2651 | datal -= skip + trim; | 2672 | datal -= skip + trim; |
| 2652 | 2673 | ||
| 2653 | ret = btrfs_drop_extents(trans, inode, | 2674 | ret = btrfs_drop_extents(trans, root, inode, |
| 2654 | new_key.offset, | 2675 | new_key.offset, |
| 2655 | new_key.offset + datal, | 2676 | new_key.offset + datal, |
| 2656 | &hint_byte, 1); | 2677 | 1); |
| 2657 | if (ret) { | 2678 | if (ret) { |
| 2658 | btrfs_abort_transaction(trans, root, | 2679 | btrfs_abort_transaction(trans, root, |
| 2659 | ret); | 2680 | ret); |
| @@ -2717,14 +2738,14 @@ next: | |||
| 2717 | ret = 0; | 2738 | ret = 0; |
| 2718 | out: | 2739 | out: |
| 2719 | btrfs_release_path(path); | 2740 | btrfs_release_path(path); |
| 2720 | unlock_extent(&BTRFS_I(src)->io_tree, off, off+len); | 2741 | unlock_extent(&BTRFS_I(src)->io_tree, off, off + len - 1); |
| 2721 | out_unlock: | 2742 | out_unlock: |
| 2722 | mutex_unlock(&src->i_mutex); | 2743 | mutex_unlock(&src->i_mutex); |
| 2723 | mutex_unlock(&inode->i_mutex); | 2744 | mutex_unlock(&inode->i_mutex); |
| 2724 | vfree(buf); | 2745 | vfree(buf); |
| 2725 | btrfs_free_path(path); | 2746 | btrfs_free_path(path); |
| 2726 | out_fput: | 2747 | out_fput: |
| 2727 | fput(src_file); | 2748 | fdput(src_file); |
| 2728 | out_drop_write: | 2749 | out_drop_write: |
| 2729 | mnt_drop_write_file(file); | 2750 | mnt_drop_write_file(file); |
| 2730 | return ret; | 2751 | return ret; |
| @@ -2852,8 +2873,8 @@ static long btrfs_ioctl_default_subvol(struct file *file, void __user *argp) | |||
| 2852 | return 0; | 2873 | return 0; |
| 2853 | } | 2874 | } |
| 2854 | 2875 | ||
| 2855 | static void get_block_group_info(struct list_head *groups_list, | 2876 | void btrfs_get_block_group_info(struct list_head *groups_list, |
| 2856 | struct btrfs_ioctl_space_info *space) | 2877 | struct btrfs_ioctl_space_info *space) |
| 2857 | { | 2878 | { |
| 2858 | struct btrfs_block_group_cache *block_group; | 2879 | struct btrfs_block_group_cache *block_group; |
| 2859 | 2880 | ||
| @@ -2961,8 +2982,8 @@ long btrfs_ioctl_space_info(struct btrfs_root *root, void __user *arg) | |||
| 2961 | down_read(&info->groups_sem); | 2982 | down_read(&info->groups_sem); |
| 2962 | for (c = 0; c < BTRFS_NR_RAID_TYPES; c++) { | 2983 | for (c = 0; c < BTRFS_NR_RAID_TYPES; c++) { |
| 2963 | if (!list_empty(&info->block_groups[c])) { | 2984 | if (!list_empty(&info->block_groups[c])) { |
| 2964 | get_block_group_info(&info->block_groups[c], | 2985 | btrfs_get_block_group_info( |
| 2965 | &space); | 2986 | &info->block_groups[c], &space); |
| 2966 | memcpy(dest, &space, sizeof(space)); | 2987 | memcpy(dest, &space, sizeof(space)); |
| 2967 | dest++; | 2988 | dest++; |
| 2968 | space_args.total_spaces++; | 2989 | space_args.total_spaces++; |
| @@ -3210,11 +3231,9 @@ static long btrfs_ioctl_logical_to_ino(struct btrfs_root *root, | |||
| 3210 | { | 3231 | { |
| 3211 | int ret = 0; | 3232 | int ret = 0; |
| 3212 | int size; | 3233 | int size; |
| 3213 | u64 extent_item_pos; | ||
| 3214 | struct btrfs_ioctl_logical_ino_args *loi; | 3234 | struct btrfs_ioctl_logical_ino_args *loi; |
| 3215 | struct btrfs_data_container *inodes = NULL; | 3235 | struct btrfs_data_container *inodes = NULL; |
| 3216 | struct btrfs_path *path = NULL; | 3236 | struct btrfs_path *path = NULL; |
| 3217 | struct btrfs_key key; | ||
| 3218 | 3237 | ||
| 3219 | if (!capable(CAP_SYS_ADMIN)) | 3238 | if (!capable(CAP_SYS_ADMIN)) |
| 3220 | return -EPERM; | 3239 | return -EPERM; |
| @@ -3232,7 +3251,7 @@ static long btrfs_ioctl_logical_to_ino(struct btrfs_root *root, | |||
| 3232 | goto out; | 3251 | goto out; |
| 3233 | } | 3252 | } |
| 3234 | 3253 | ||
| 3235 | size = min_t(u32, loi->size, 4096); | 3254 | size = min_t(u32, loi->size, 64 * 1024); |
| 3236 | inodes = init_data_container(size); | 3255 | inodes = init_data_container(size); |
| 3237 | if (IS_ERR(inodes)) { | 3256 | if (IS_ERR(inodes)) { |
| 3238 | ret = PTR_ERR(inodes); | 3257 | ret = PTR_ERR(inodes); |
| @@ -3240,22 +3259,13 @@ static long btrfs_ioctl_logical_to_ino(struct btrfs_root *root, | |||
| 3240 | goto out; | 3259 | goto out; |
| 3241 | } | 3260 | } |
| 3242 | 3261 | ||
| 3243 | ret = extent_from_logical(root->fs_info, loi->logical, path, &key); | 3262 | ret = iterate_inodes_from_logical(loi->logical, root->fs_info, path, |
| 3244 | btrfs_release_path(path); | 3263 | build_ino_list, inodes); |
| 3245 | 3264 | if (ret == -EINVAL) | |
| 3246 | if (ret & BTRFS_EXTENT_FLAG_TREE_BLOCK) | ||
| 3247 | ret = -ENOENT; | 3265 | ret = -ENOENT; |
| 3248 | if (ret < 0) | 3266 | if (ret < 0) |
| 3249 | goto out; | 3267 | goto out; |
| 3250 | 3268 | ||
| 3251 | extent_item_pos = loi->logical - key.objectid; | ||
| 3252 | ret = iterate_extent_inodes(root->fs_info, key.objectid, | ||
| 3253 | extent_item_pos, 0, build_ino_list, | ||
| 3254 | inodes); | ||
| 3255 | |||
| 3256 | if (ret < 0) | ||
| 3257 | goto out; | ||
| 3258 | |||
| 3259 | ret = copy_to_user((void *)(unsigned long)loi->inodes, | 3269 | ret = copy_to_user((void *)(unsigned long)loi->inodes, |
| 3260 | (void *)(unsigned long)inodes, size); | 3270 | (void *)(unsigned long)inodes, size); |
| 3261 | if (ret) | 3271 | if (ret) |
| @@ -3263,7 +3273,7 @@ static long btrfs_ioctl_logical_to_ino(struct btrfs_root *root, | |||
| 3263 | 3273 | ||
| 3264 | out: | 3274 | out: |
| 3265 | btrfs_free_path(path); | 3275 | btrfs_free_path(path); |
| 3266 | kfree(inodes); | 3276 | vfree(inodes); |
| 3267 | kfree(loi); | 3277 | kfree(loi); |
| 3268 | 3278 | ||
| 3269 | return ret; | 3279 | return ret; |
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c index 051c7fe551dd..7772f02ba28e 100644 --- a/fs/btrfs/ordered-data.c +++ b/fs/btrfs/ordered-data.c | |||
| @@ -25,6 +25,8 @@ | |||
| 25 | #include "btrfs_inode.h" | 25 | #include "btrfs_inode.h" |
| 26 | #include "extent_io.h" | 26 | #include "extent_io.h" |
| 27 | 27 | ||
| 28 | static struct kmem_cache *btrfs_ordered_extent_cache; | ||
| 29 | |||
| 28 | static u64 entry_end(struct btrfs_ordered_extent *entry) | 30 | static u64 entry_end(struct btrfs_ordered_extent *entry) |
| 29 | { | 31 | { |
| 30 | if (entry->file_offset + entry->len < entry->file_offset) | 32 | if (entry->file_offset + entry->len < entry->file_offset) |
| @@ -187,7 +189,7 @@ static int __btrfs_add_ordered_extent(struct inode *inode, u64 file_offset, | |||
| 187 | struct btrfs_ordered_extent *entry; | 189 | struct btrfs_ordered_extent *entry; |
| 188 | 190 | ||
| 189 | tree = &BTRFS_I(inode)->ordered_tree; | 191 | tree = &BTRFS_I(inode)->ordered_tree; |
| 190 | entry = kzalloc(sizeof(*entry), GFP_NOFS); | 192 | entry = kmem_cache_zalloc(btrfs_ordered_extent_cache, GFP_NOFS); |
| 191 | if (!entry) | 193 | if (!entry) |
| 192 | return -ENOMEM; | 194 | return -ENOMEM; |
| 193 | 195 | ||
| @@ -421,7 +423,7 @@ void btrfs_put_ordered_extent(struct btrfs_ordered_extent *entry) | |||
| 421 | list_del(&sum->list); | 423 | list_del(&sum->list); |
| 422 | kfree(sum); | 424 | kfree(sum); |
| 423 | } | 425 | } |
| 424 | kfree(entry); | 426 | kmem_cache_free(btrfs_ordered_extent_cache, entry); |
| 425 | } | 427 | } |
| 426 | } | 428 | } |
| 427 | 429 | ||
| @@ -466,8 +468,7 @@ void btrfs_remove_ordered_extent(struct inode *inode, | |||
| 466 | * wait for all the ordered extents in a root. This is done when balancing | 468 | * wait for all the ordered extents in a root. This is done when balancing |
| 467 | * space between drives. | 469 | * space between drives. |
| 468 | */ | 470 | */ |
| 469 | void btrfs_wait_ordered_extents(struct btrfs_root *root, | 471 | void btrfs_wait_ordered_extents(struct btrfs_root *root, int delay_iput) |
| 470 | int nocow_only, int delay_iput) | ||
| 471 | { | 472 | { |
| 472 | struct list_head splice; | 473 | struct list_head splice; |
| 473 | struct list_head *cur; | 474 | struct list_head *cur; |
| @@ -482,15 +483,6 @@ void btrfs_wait_ordered_extents(struct btrfs_root *root, | |||
| 482 | cur = splice.next; | 483 | cur = splice.next; |
| 483 | ordered = list_entry(cur, struct btrfs_ordered_extent, | 484 | ordered = list_entry(cur, struct btrfs_ordered_extent, |
| 484 | root_extent_list); | 485 | root_extent_list); |
| 485 | if (nocow_only && | ||
| 486 | !test_bit(BTRFS_ORDERED_NOCOW, &ordered->flags) && | ||
| 487 | !test_bit(BTRFS_ORDERED_PREALLOC, &ordered->flags)) { | ||
| 488 | list_move(&ordered->root_extent_list, | ||
| 489 | &root->fs_info->ordered_extents); | ||
| 490 | cond_resched_lock(&root->fs_info->ordered_extent_lock); | ||
| 491 | continue; | ||
| 492 | } | ||
| 493 | |||
| 494 | list_del_init(&ordered->root_extent_list); | 486 | list_del_init(&ordered->root_extent_list); |
| 495 | atomic_inc(&ordered->refs); | 487 | atomic_inc(&ordered->refs); |
| 496 | 488 | ||
| @@ -775,7 +767,6 @@ int btrfs_ordered_update_i_size(struct inode *inode, u64 offset, | |||
| 775 | struct btrfs_ordered_inode_tree *tree = &BTRFS_I(inode)->ordered_tree; | 767 | struct btrfs_ordered_inode_tree *tree = &BTRFS_I(inode)->ordered_tree; |
| 776 | u64 disk_i_size; | 768 | u64 disk_i_size; |
| 777 | u64 new_i_size; | 769 | u64 new_i_size; |
| 778 | u64 i_size_test; | ||
| 779 | u64 i_size = i_size_read(inode); | 770 | u64 i_size = i_size_read(inode); |
| 780 | struct rb_node *node; | 771 | struct rb_node *node; |
| 781 | struct rb_node *prev = NULL; | 772 | struct rb_node *prev = NULL; |
| @@ -835,55 +826,30 @@ int btrfs_ordered_update_i_size(struct inode *inode, u64 offset, | |||
| 835 | break; | 826 | break; |
| 836 | if (test->file_offset >= i_size) | 827 | if (test->file_offset >= i_size) |
| 837 | break; | 828 | break; |
| 838 | if (test->file_offset >= disk_i_size) | 829 | if (test->file_offset >= disk_i_size) { |
| 830 | /* | ||
| 831 | * we don't update disk_i_size now, so record this | ||
| 832 | * undealt i_size. Or we will not know the real | ||
| 833 | * i_size. | ||
| 834 | */ | ||
| 835 | if (test->outstanding_isize < offset) | ||
| 836 | test->outstanding_isize = offset; | ||
| 837 | if (ordered && | ||
| 838 | ordered->outstanding_isize > | ||
| 839 | test->outstanding_isize) | ||
| 840 | test->outstanding_isize = | ||
| 841 | ordered->outstanding_isize; | ||
| 839 | goto out; | 842 | goto out; |
| 840 | } | ||
| 841 | new_i_size = min_t(u64, offset, i_size); | ||
| 842 | |||
| 843 | /* | ||
| 844 | * at this point, we know we can safely update i_size to at least | ||
| 845 | * the offset from this ordered extent. But, we need to | ||
| 846 | * walk forward and see if ios from higher up in the file have | ||
| 847 | * finished. | ||
| 848 | */ | ||
| 849 | if (ordered) { | ||
| 850 | node = rb_next(&ordered->rb_node); | ||
| 851 | } else { | ||
| 852 | if (prev) | ||
| 853 | node = rb_next(prev); | ||
| 854 | else | ||
| 855 | node = rb_first(&tree->tree); | ||
| 856 | } | ||
| 857 | |||
| 858 | /* | ||
| 859 | * We are looking for an area between our current extent and the next | ||
| 860 | * ordered extent to update the i_size to. There are 3 cases here | ||
| 861 | * | ||
| 862 | * 1) We don't actually have anything and we can update to i_size. | ||
| 863 | * 2) We have stuff but they already did their i_size update so again we | ||
| 864 | * can just update to i_size. | ||
| 865 | * 3) We have an outstanding ordered extent so the most we can update | ||
| 866 | * our disk_i_size to is the start of the next offset. | ||
| 867 | */ | ||
| 868 | i_size_test = i_size; | ||
| 869 | for (; node; node = rb_next(node)) { | ||
| 870 | test = rb_entry(node, struct btrfs_ordered_extent, rb_node); | ||
| 871 | |||
| 872 | if (test_bit(BTRFS_ORDERED_UPDATED_ISIZE, &test->flags)) | ||
| 873 | continue; | ||
| 874 | if (test->file_offset > offset) { | ||
| 875 | i_size_test = test->file_offset; | ||
| 876 | break; | ||
| 877 | } | 843 | } |
| 878 | } | 844 | } |
| 845 | new_i_size = min_t(u64, offset, i_size); | ||
| 879 | 846 | ||
| 880 | /* | 847 | /* |
| 881 | * i_size_test is the end of a region after this ordered | 848 | * Some ordered extents may completed before the current one, and |
| 882 | * extent where there are no ordered extents, we can safely set | 849 | * we hold the real i_size in ->outstanding_isize. |
| 883 | * disk_i_size to this. | ||
| 884 | */ | 850 | */ |
| 885 | if (i_size_test > offset) | 851 | if (ordered && ordered->outstanding_isize > new_i_size) |
| 886 | new_i_size = min_t(u64, i_size_test, i_size); | 852 | new_i_size = min_t(u64, ordered->outstanding_isize, i_size); |
| 887 | BTRFS_I(inode)->disk_i_size = new_i_size; | 853 | BTRFS_I(inode)->disk_i_size = new_i_size; |
| 888 | ret = 0; | 854 | ret = 0; |
| 889 | out: | 855 | out: |
| @@ -984,3 +950,20 @@ void btrfs_add_ordered_operation(struct btrfs_trans_handle *trans, | |||
| 984 | } | 950 | } |
| 985 | spin_unlock(&root->fs_info->ordered_extent_lock); | 951 | spin_unlock(&root->fs_info->ordered_extent_lock); |
| 986 | } | 952 | } |
| 953 | |||
| 954 | int __init ordered_data_init(void) | ||
| 955 | { | ||
| 956 | btrfs_ordered_extent_cache = kmem_cache_create("btrfs_ordered_extent", | ||
| 957 | sizeof(struct btrfs_ordered_extent), 0, | ||
| 958 | SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, | ||
| 959 | NULL); | ||
| 960 | if (!btrfs_ordered_extent_cache) | ||
| 961 | return -ENOMEM; | ||
| 962 | return 0; | ||
| 963 | } | ||
| 964 | |||
| 965 | void ordered_data_exit(void) | ||
| 966 | { | ||
| 967 | if (btrfs_ordered_extent_cache) | ||
| 968 | kmem_cache_destroy(btrfs_ordered_extent_cache); | ||
| 969 | } | ||
diff --git a/fs/btrfs/ordered-data.h b/fs/btrfs/ordered-data.h index e03c560d2997..dd27a0b46a37 100644 --- a/fs/btrfs/ordered-data.h +++ b/fs/btrfs/ordered-data.h | |||
| @@ -96,6 +96,13 @@ struct btrfs_ordered_extent { | |||
| 96 | /* number of bytes that still need writing */ | 96 | /* number of bytes that still need writing */ |
| 97 | u64 bytes_left; | 97 | u64 bytes_left; |
| 98 | 98 | ||
| 99 | /* | ||
| 100 | * the end of the ordered extent which is behind it but | ||
| 101 | * didn't update disk_i_size. Please see the comment of | ||
| 102 | * btrfs_ordered_update_i_size(); | ||
| 103 | */ | ||
| 104 | u64 outstanding_isize; | ||
| 105 | |||
| 99 | /* flags (described above) */ | 106 | /* flags (described above) */ |
| 100 | unsigned long flags; | 107 | unsigned long flags; |
| 101 | 108 | ||
| @@ -183,6 +190,7 @@ void btrfs_run_ordered_operations(struct btrfs_root *root, int wait); | |||
| 183 | void btrfs_add_ordered_operation(struct btrfs_trans_handle *trans, | 190 | void btrfs_add_ordered_operation(struct btrfs_trans_handle *trans, |
| 184 | struct btrfs_root *root, | 191 | struct btrfs_root *root, |
| 185 | struct inode *inode); | 192 | struct inode *inode); |
| 186 | void btrfs_wait_ordered_extents(struct btrfs_root *root, | 193 | void btrfs_wait_ordered_extents(struct btrfs_root *root, int delay_iput); |
| 187 | int nocow_only, int delay_iput); | 194 | int __init ordered_data_init(void); |
| 195 | void ordered_data_exit(void); | ||
| 188 | #endif | 196 | #endif |
diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c index b65015581744..5039686df6ae 100644 --- a/fs/btrfs/qgroup.c +++ b/fs/btrfs/qgroup.c | |||
| @@ -1145,12 +1145,12 @@ int btrfs_qgroup_account_ref(struct btrfs_trans_handle *trans, | |||
| 1145 | 1145 | ||
| 1146 | ulist_reinit(tmp); | 1146 | ulist_reinit(tmp); |
| 1147 | /* XXX id not needed */ | 1147 | /* XXX id not needed */ |
| 1148 | ulist_add(tmp, qg->qgroupid, (unsigned long)qg, GFP_ATOMIC); | 1148 | ulist_add(tmp, qg->qgroupid, (u64)(uintptr_t)qg, GFP_ATOMIC); |
| 1149 | ULIST_ITER_INIT(&tmp_uiter); | 1149 | ULIST_ITER_INIT(&tmp_uiter); |
| 1150 | while ((tmp_unode = ulist_next(tmp, &tmp_uiter))) { | 1150 | while ((tmp_unode = ulist_next(tmp, &tmp_uiter))) { |
| 1151 | struct btrfs_qgroup_list *glist; | 1151 | struct btrfs_qgroup_list *glist; |
| 1152 | 1152 | ||
| 1153 | qg = (struct btrfs_qgroup *)tmp_unode->aux; | 1153 | qg = (struct btrfs_qgroup *)(uintptr_t)tmp_unode->aux; |
| 1154 | if (qg->refcnt < seq) | 1154 | if (qg->refcnt < seq) |
| 1155 | qg->refcnt = seq + 1; | 1155 | qg->refcnt = seq + 1; |
| 1156 | else | 1156 | else |
| @@ -1158,7 +1158,7 @@ int btrfs_qgroup_account_ref(struct btrfs_trans_handle *trans, | |||
| 1158 | 1158 | ||
| 1159 | list_for_each_entry(glist, &qg->groups, next_group) { | 1159 | list_for_each_entry(glist, &qg->groups, next_group) { |
| 1160 | ulist_add(tmp, glist->group->qgroupid, | 1160 | ulist_add(tmp, glist->group->qgroupid, |
| 1161 | (unsigned long)glist->group, | 1161 | (u64)(uintptr_t)glist->group, |
| 1162 | GFP_ATOMIC); | 1162 | GFP_ATOMIC); |
| 1163 | } | 1163 | } |
| 1164 | } | 1164 | } |
| @@ -1168,13 +1168,13 @@ int btrfs_qgroup_account_ref(struct btrfs_trans_handle *trans, | |||
| 1168 | * step 2: walk from the new root | 1168 | * step 2: walk from the new root |
| 1169 | */ | 1169 | */ |
| 1170 | ulist_reinit(tmp); | 1170 | ulist_reinit(tmp); |
| 1171 | ulist_add(tmp, qgroup->qgroupid, (unsigned long)qgroup, GFP_ATOMIC); | 1171 | ulist_add(tmp, qgroup->qgroupid, (uintptr_t)qgroup, GFP_ATOMIC); |
| 1172 | ULIST_ITER_INIT(&uiter); | 1172 | ULIST_ITER_INIT(&uiter); |
| 1173 | while ((unode = ulist_next(tmp, &uiter))) { | 1173 | while ((unode = ulist_next(tmp, &uiter))) { |
| 1174 | struct btrfs_qgroup *qg; | 1174 | struct btrfs_qgroup *qg; |
| 1175 | struct btrfs_qgroup_list *glist; | 1175 | struct btrfs_qgroup_list *glist; |
| 1176 | 1176 | ||
| 1177 | qg = (struct btrfs_qgroup *)unode->aux; | 1177 | qg = (struct btrfs_qgroup *)(uintptr_t)unode->aux; |
| 1178 | if (qg->refcnt < seq) { | 1178 | if (qg->refcnt < seq) { |
| 1179 | /* not visited by step 1 */ | 1179 | /* not visited by step 1 */ |
| 1180 | qg->rfer += sgn * node->num_bytes; | 1180 | qg->rfer += sgn * node->num_bytes; |
| @@ -1190,7 +1190,7 @@ int btrfs_qgroup_account_ref(struct btrfs_trans_handle *trans, | |||
| 1190 | 1190 | ||
| 1191 | list_for_each_entry(glist, &qg->groups, next_group) { | 1191 | list_for_each_entry(glist, &qg->groups, next_group) { |
| 1192 | ulist_add(tmp, glist->group->qgroupid, | 1192 | ulist_add(tmp, glist->group->qgroupid, |
| 1193 | (unsigned long)glist->group, GFP_ATOMIC); | 1193 | (uintptr_t)glist->group, GFP_ATOMIC); |
| 1194 | } | 1194 | } |
| 1195 | } | 1195 | } |
| 1196 | 1196 | ||
| @@ -1208,12 +1208,12 @@ int btrfs_qgroup_account_ref(struct btrfs_trans_handle *trans, | |||
| 1208 | continue; | 1208 | continue; |
| 1209 | 1209 | ||
| 1210 | ulist_reinit(tmp); | 1210 | ulist_reinit(tmp); |
| 1211 | ulist_add(tmp, qg->qgroupid, (unsigned long)qg, GFP_ATOMIC); | 1211 | ulist_add(tmp, qg->qgroupid, (uintptr_t)qg, GFP_ATOMIC); |
| 1212 | ULIST_ITER_INIT(&tmp_uiter); | 1212 | ULIST_ITER_INIT(&tmp_uiter); |
| 1213 | while ((tmp_unode = ulist_next(tmp, &tmp_uiter))) { | 1213 | while ((tmp_unode = ulist_next(tmp, &tmp_uiter))) { |
| 1214 | struct btrfs_qgroup_list *glist; | 1214 | struct btrfs_qgroup_list *glist; |
| 1215 | 1215 | ||
| 1216 | qg = (struct btrfs_qgroup *)tmp_unode->aux; | 1216 | qg = (struct btrfs_qgroup *)(uintptr_t)tmp_unode->aux; |
| 1217 | if (qg->tag == seq) | 1217 | if (qg->tag == seq) |
| 1218 | continue; | 1218 | continue; |
| 1219 | 1219 | ||
| @@ -1225,7 +1225,7 @@ int btrfs_qgroup_account_ref(struct btrfs_trans_handle *trans, | |||
| 1225 | 1225 | ||
| 1226 | list_for_each_entry(glist, &qg->groups, next_group) { | 1226 | list_for_each_entry(glist, &qg->groups, next_group) { |
| 1227 | ulist_add(tmp, glist->group->qgroupid, | 1227 | ulist_add(tmp, glist->group->qgroupid, |
| 1228 | (unsigned long)glist->group, | 1228 | (uintptr_t)glist->group, |
| 1229 | GFP_ATOMIC); | 1229 | GFP_ATOMIC); |
| 1230 | } | 1230 | } |
| 1231 | } | 1231 | } |
| @@ -1469,13 +1469,17 @@ int btrfs_qgroup_reserve(struct btrfs_root *root, u64 num_bytes) | |||
| 1469 | * be exceeded | 1469 | * be exceeded |
| 1470 | */ | 1470 | */ |
| 1471 | ulist = ulist_alloc(GFP_ATOMIC); | 1471 | ulist = ulist_alloc(GFP_ATOMIC); |
| 1472 | ulist_add(ulist, qgroup->qgroupid, (unsigned long)qgroup, GFP_ATOMIC); | 1472 | if (!ulist) { |
| 1473 | ret = -ENOMEM; | ||
| 1474 | goto out; | ||
| 1475 | } | ||
| 1476 | ulist_add(ulist, qgroup->qgroupid, (uintptr_t)qgroup, GFP_ATOMIC); | ||
| 1473 | ULIST_ITER_INIT(&uiter); | 1477 | ULIST_ITER_INIT(&uiter); |
| 1474 | while ((unode = ulist_next(ulist, &uiter))) { | 1478 | while ((unode = ulist_next(ulist, &uiter))) { |
| 1475 | struct btrfs_qgroup *qg; | 1479 | struct btrfs_qgroup *qg; |
| 1476 | struct btrfs_qgroup_list *glist; | 1480 | struct btrfs_qgroup_list *glist; |
| 1477 | 1481 | ||
| 1478 | qg = (struct btrfs_qgroup *)unode->aux; | 1482 | qg = (struct btrfs_qgroup *)(uintptr_t)unode->aux; |
| 1479 | 1483 | ||
| 1480 | if ((qg->lim_flags & BTRFS_QGROUP_LIMIT_MAX_RFER) && | 1484 | if ((qg->lim_flags & BTRFS_QGROUP_LIMIT_MAX_RFER) && |
| 1481 | qg->reserved + qg->rfer + num_bytes > | 1485 | qg->reserved + qg->rfer + num_bytes > |
| @@ -1489,7 +1493,7 @@ int btrfs_qgroup_reserve(struct btrfs_root *root, u64 num_bytes) | |||
| 1489 | 1493 | ||
| 1490 | list_for_each_entry(glist, &qg->groups, next_group) { | 1494 | list_for_each_entry(glist, &qg->groups, next_group) { |
| 1491 | ulist_add(ulist, glist->group->qgroupid, | 1495 | ulist_add(ulist, glist->group->qgroupid, |
| 1492 | (unsigned long)glist->group, GFP_ATOMIC); | 1496 | (uintptr_t)glist->group, GFP_ATOMIC); |
| 1493 | } | 1497 | } |
| 1494 | } | 1498 | } |
| 1495 | if (ret) | 1499 | if (ret) |
| @@ -1502,7 +1506,7 @@ int btrfs_qgroup_reserve(struct btrfs_root *root, u64 num_bytes) | |||
| 1502 | while ((unode = ulist_next(ulist, &uiter))) { | 1506 | while ((unode = ulist_next(ulist, &uiter))) { |
| 1503 | struct btrfs_qgroup *qg; | 1507 | struct btrfs_qgroup *qg; |
| 1504 | 1508 | ||
| 1505 | qg = (struct btrfs_qgroup *)unode->aux; | 1509 | qg = (struct btrfs_qgroup *)(uintptr_t)unode->aux; |
| 1506 | 1510 | ||
| 1507 | qg->reserved += num_bytes; | 1511 | qg->reserved += num_bytes; |
| 1508 | } | 1512 | } |
| @@ -1541,19 +1545,23 @@ void btrfs_qgroup_free(struct btrfs_root *root, u64 num_bytes) | |||
| 1541 | goto out; | 1545 | goto out; |
| 1542 | 1546 | ||
| 1543 | ulist = ulist_alloc(GFP_ATOMIC); | 1547 | ulist = ulist_alloc(GFP_ATOMIC); |
| 1544 | ulist_add(ulist, qgroup->qgroupid, (unsigned long)qgroup, GFP_ATOMIC); | 1548 | if (!ulist) { |
| 1549 | btrfs_std_error(fs_info, -ENOMEM); | ||
| 1550 | goto out; | ||
| 1551 | } | ||
| 1552 | ulist_add(ulist, qgroup->qgroupid, (uintptr_t)qgroup, GFP_ATOMIC); | ||
| 1545 | ULIST_ITER_INIT(&uiter); | 1553 | ULIST_ITER_INIT(&uiter); |
| 1546 | while ((unode = ulist_next(ulist, &uiter))) { | 1554 | while ((unode = ulist_next(ulist, &uiter))) { |
| 1547 | struct btrfs_qgroup *qg; | 1555 | struct btrfs_qgroup *qg; |
| 1548 | struct btrfs_qgroup_list *glist; | 1556 | struct btrfs_qgroup_list *glist; |
| 1549 | 1557 | ||
| 1550 | qg = (struct btrfs_qgroup *)unode->aux; | 1558 | qg = (struct btrfs_qgroup *)(uintptr_t)unode->aux; |
| 1551 | 1559 | ||
| 1552 | qg->reserved -= num_bytes; | 1560 | qg->reserved -= num_bytes; |
| 1553 | 1561 | ||
| 1554 | list_for_each_entry(glist, &qg->groups, next_group) { | 1562 | list_for_each_entry(glist, &qg->groups, next_group) { |
| 1555 | ulist_add(ulist, glist->group->qgroupid, | 1563 | ulist_add(ulist, glist->group->qgroupid, |
| 1556 | (unsigned long)glist->group, GFP_ATOMIC); | 1564 | (uintptr_t)glist->group, GFP_ATOMIC); |
| 1557 | } | 1565 | } |
| 1558 | } | 1566 | } |
| 1559 | 1567 | ||
diff --git a/fs/btrfs/reada.c b/fs/btrfs/reada.c index 48a4882d8ad5..a955669519a2 100644 --- a/fs/btrfs/reada.c +++ b/fs/btrfs/reada.c | |||
| @@ -68,7 +68,7 @@ struct reada_extent { | |||
| 68 | u32 blocksize; | 68 | u32 blocksize; |
| 69 | int err; | 69 | int err; |
| 70 | struct list_head extctl; | 70 | struct list_head extctl; |
| 71 | struct kref refcnt; | 71 | int refcnt; |
| 72 | spinlock_t lock; | 72 | spinlock_t lock; |
| 73 | struct reada_zone *zones[BTRFS_MAX_MIRRORS]; | 73 | struct reada_zone *zones[BTRFS_MAX_MIRRORS]; |
| 74 | int nzones; | 74 | int nzones; |
| @@ -126,7 +126,7 @@ static int __readahead_hook(struct btrfs_root *root, struct extent_buffer *eb, | |||
| 126 | spin_lock(&fs_info->reada_lock); | 126 | spin_lock(&fs_info->reada_lock); |
| 127 | re = radix_tree_lookup(&fs_info->reada_tree, index); | 127 | re = radix_tree_lookup(&fs_info->reada_tree, index); |
| 128 | if (re) | 128 | if (re) |
| 129 | kref_get(&re->refcnt); | 129 | re->refcnt++; |
| 130 | spin_unlock(&fs_info->reada_lock); | 130 | spin_unlock(&fs_info->reada_lock); |
| 131 | 131 | ||
| 132 | if (!re) | 132 | if (!re) |
| @@ -336,7 +336,7 @@ static struct reada_extent *reada_find_extent(struct btrfs_root *root, | |||
| 336 | spin_lock(&fs_info->reada_lock); | 336 | spin_lock(&fs_info->reada_lock); |
| 337 | re = radix_tree_lookup(&fs_info->reada_tree, index); | 337 | re = radix_tree_lookup(&fs_info->reada_tree, index); |
| 338 | if (re) | 338 | if (re) |
| 339 | kref_get(&re->refcnt); | 339 | re->refcnt++; |
| 340 | spin_unlock(&fs_info->reada_lock); | 340 | spin_unlock(&fs_info->reada_lock); |
| 341 | 341 | ||
| 342 | if (re) | 342 | if (re) |
| @@ -352,7 +352,7 @@ static struct reada_extent *reada_find_extent(struct btrfs_root *root, | |||
| 352 | re->top = *top; | 352 | re->top = *top; |
| 353 | INIT_LIST_HEAD(&re->extctl); | 353 | INIT_LIST_HEAD(&re->extctl); |
| 354 | spin_lock_init(&re->lock); | 354 | spin_lock_init(&re->lock); |
| 355 | kref_init(&re->refcnt); | 355 | re->refcnt = 1; |
| 356 | 356 | ||
| 357 | /* | 357 | /* |
| 358 | * map block | 358 | * map block |
| @@ -398,7 +398,7 @@ static struct reada_extent *reada_find_extent(struct btrfs_root *root, | |||
| 398 | if (ret == -EEXIST) { | 398 | if (ret == -EEXIST) { |
| 399 | re_exist = radix_tree_lookup(&fs_info->reada_tree, index); | 399 | re_exist = radix_tree_lookup(&fs_info->reada_tree, index); |
| 400 | BUG_ON(!re_exist); | 400 | BUG_ON(!re_exist); |
| 401 | kref_get(&re_exist->refcnt); | 401 | re_exist->refcnt++; |
| 402 | spin_unlock(&fs_info->reada_lock); | 402 | spin_unlock(&fs_info->reada_lock); |
| 403 | goto error; | 403 | goto error; |
| 404 | } | 404 | } |
| @@ -465,10 +465,6 @@ error: | |||
| 465 | return re_exist; | 465 | return re_exist; |
| 466 | } | 466 | } |
| 467 | 467 | ||
| 468 | static void reada_kref_dummy(struct kref *kr) | ||
| 469 | { | ||
| 470 | } | ||
| 471 | |||
| 472 | static void reada_extent_put(struct btrfs_fs_info *fs_info, | 468 | static void reada_extent_put(struct btrfs_fs_info *fs_info, |
| 473 | struct reada_extent *re) | 469 | struct reada_extent *re) |
| 474 | { | 470 | { |
| @@ -476,7 +472,7 @@ static void reada_extent_put(struct btrfs_fs_info *fs_info, | |||
| 476 | unsigned long index = re->logical >> PAGE_CACHE_SHIFT; | 472 | unsigned long index = re->logical >> PAGE_CACHE_SHIFT; |
| 477 | 473 | ||
| 478 | spin_lock(&fs_info->reada_lock); | 474 | spin_lock(&fs_info->reada_lock); |
| 479 | if (!kref_put(&re->refcnt, reada_kref_dummy)) { | 475 | if (--re->refcnt) { |
| 480 | spin_unlock(&fs_info->reada_lock); | 476 | spin_unlock(&fs_info->reada_lock); |
| 481 | return; | 477 | return; |
| 482 | } | 478 | } |
| @@ -671,7 +667,7 @@ static int reada_start_machine_dev(struct btrfs_fs_info *fs_info, | |||
| 671 | return 0; | 667 | return 0; |
| 672 | } | 668 | } |
| 673 | dev->reada_next = re->logical + re->blocksize; | 669 | dev->reada_next = re->logical + re->blocksize; |
| 674 | kref_get(&re->refcnt); | 670 | re->refcnt++; |
| 675 | 671 | ||
| 676 | spin_unlock(&fs_info->reada_lock); | 672 | spin_unlock(&fs_info->reada_lock); |
| 677 | 673 | ||
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index 4da08652004d..776f0aa128fc 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c | |||
| @@ -3270,8 +3270,8 @@ static int delete_block_group_cache(struct btrfs_fs_info *fs_info, | |||
| 3270 | key.offset = 0; | 3270 | key.offset = 0; |
| 3271 | 3271 | ||
| 3272 | inode = btrfs_iget(fs_info->sb, &key, root, NULL); | 3272 | inode = btrfs_iget(fs_info->sb, &key, root, NULL); |
| 3273 | if (IS_ERR_OR_NULL(inode) || is_bad_inode(inode)) { | 3273 | if (IS_ERR(inode) || is_bad_inode(inode)) { |
| 3274 | if (inode && !IS_ERR(inode)) | 3274 | if (!IS_ERR(inode)) |
| 3275 | iput(inode); | 3275 | iput(inode); |
| 3276 | return -ENOENT; | 3276 | return -ENOENT; |
| 3277 | } | 3277 | } |
| @@ -3621,7 +3621,7 @@ next: | |||
| 3621 | 3621 | ||
| 3622 | ret = find_first_extent_bit(&rc->processed_blocks, | 3622 | ret = find_first_extent_bit(&rc->processed_blocks, |
| 3623 | key.objectid, &start, &end, | 3623 | key.objectid, &start, &end, |
| 3624 | EXTENT_DIRTY); | 3624 | EXTENT_DIRTY, NULL); |
| 3625 | 3625 | ||
| 3626 | if (ret == 0 && start <= key.objectid) { | 3626 | if (ret == 0 && start <= key.objectid) { |
| 3627 | btrfs_release_path(path); | 3627 | btrfs_release_path(path); |
| @@ -3674,7 +3674,8 @@ int prepare_to_relocate(struct reloc_control *rc) | |||
| 3674 | struct btrfs_trans_handle *trans; | 3674 | struct btrfs_trans_handle *trans; |
| 3675 | int ret; | 3675 | int ret; |
| 3676 | 3676 | ||
| 3677 | rc->block_rsv = btrfs_alloc_block_rsv(rc->extent_root); | 3677 | rc->block_rsv = btrfs_alloc_block_rsv(rc->extent_root, |
| 3678 | BTRFS_BLOCK_RSV_TEMP); | ||
| 3678 | if (!rc->block_rsv) | 3679 | if (!rc->block_rsv) |
| 3679 | return -ENOMEM; | 3680 | return -ENOMEM; |
| 3680 | 3681 | ||
| @@ -4057,7 +4058,7 @@ int btrfs_relocate_block_group(struct btrfs_root *extent_root, u64 group_start) | |||
| 4057 | (unsigned long long)rc->block_group->flags); | 4058 | (unsigned long long)rc->block_group->flags); |
| 4058 | 4059 | ||
| 4059 | btrfs_start_delalloc_inodes(fs_info->tree_root, 0); | 4060 | btrfs_start_delalloc_inodes(fs_info->tree_root, 0); |
| 4060 | btrfs_wait_ordered_extents(fs_info->tree_root, 0, 0); | 4061 | btrfs_wait_ordered_extents(fs_info->tree_root, 0); |
| 4061 | 4062 | ||
| 4062 | while (1) { | 4063 | while (1) { |
| 4063 | mutex_lock(&fs_info->cleaner_mutex); | 4064 | mutex_lock(&fs_info->cleaner_mutex); |
diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c index 10d8e4d88071..eb923d087da7 100644 --- a/fs/btrfs/root-tree.c +++ b/fs/btrfs/root-tree.c | |||
| @@ -141,8 +141,10 @@ int btrfs_update_root(struct btrfs_trans_handle *trans, struct btrfs_root | |||
| 141 | return -ENOMEM; | 141 | return -ENOMEM; |
| 142 | 142 | ||
| 143 | ret = btrfs_search_slot(trans, root, key, path, 0, 1); | 143 | ret = btrfs_search_slot(trans, root, key, path, 0, 1); |
| 144 | if (ret < 0) | 144 | if (ret < 0) { |
| 145 | goto out_abort; | 145 | btrfs_abort_transaction(trans, root, ret); |
| 146 | goto out; | ||
| 147 | } | ||
| 146 | 148 | ||
| 147 | if (ret != 0) { | 149 | if (ret != 0) { |
| 148 | btrfs_print_leaf(root, path->nodes[0]); | 150 | btrfs_print_leaf(root, path->nodes[0]); |
| @@ -166,16 +168,23 @@ int btrfs_update_root(struct btrfs_trans_handle *trans, struct btrfs_root | |||
| 166 | btrfs_release_path(path); | 168 | btrfs_release_path(path); |
| 167 | ret = btrfs_search_slot(trans, root, key, path, | 169 | ret = btrfs_search_slot(trans, root, key, path, |
| 168 | -1, 1); | 170 | -1, 1); |
| 169 | if (ret < 0) | 171 | if (ret < 0) { |
| 170 | goto out_abort; | 172 | btrfs_abort_transaction(trans, root, ret); |
| 173 | goto out; | ||
| 174 | } | ||
| 175 | |||
| 171 | ret = btrfs_del_item(trans, root, path); | 176 | ret = btrfs_del_item(trans, root, path); |
| 172 | if (ret < 0) | 177 | if (ret < 0) { |
| 173 | goto out_abort; | 178 | btrfs_abort_transaction(trans, root, ret); |
| 179 | goto out; | ||
| 180 | } | ||
| 174 | btrfs_release_path(path); | 181 | btrfs_release_path(path); |
| 175 | ret = btrfs_insert_empty_item(trans, root, path, | 182 | ret = btrfs_insert_empty_item(trans, root, path, |
| 176 | key, sizeof(*item)); | 183 | key, sizeof(*item)); |
| 177 | if (ret < 0) | 184 | if (ret < 0) { |
| 178 | goto out_abort; | 185 | btrfs_abort_transaction(trans, root, ret); |
| 186 | goto out; | ||
| 187 | } | ||
| 179 | l = path->nodes[0]; | 188 | l = path->nodes[0]; |
| 180 | slot = path->slots[0]; | 189 | slot = path->slots[0]; |
| 181 | ptr = btrfs_item_ptr_offset(l, slot); | 190 | ptr = btrfs_item_ptr_offset(l, slot); |
| @@ -192,10 +201,6 @@ int btrfs_update_root(struct btrfs_trans_handle *trans, struct btrfs_root | |||
| 192 | out: | 201 | out: |
| 193 | btrfs_free_path(path); | 202 | btrfs_free_path(path); |
| 194 | return ret; | 203 | return ret; |
| 195 | |||
| 196 | out_abort: | ||
| 197 | btrfs_abort_transaction(trans, root, ret); | ||
| 198 | goto out; | ||
| 199 | } | 204 | } |
| 200 | 205 | ||
| 201 | int btrfs_insert_root(struct btrfs_trans_handle *trans, struct btrfs_root *root, | 206 | int btrfs_insert_root(struct btrfs_trans_handle *trans, struct btrfs_root *root, |
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c index b223620cd5a6..27892f67e69b 100644 --- a/fs/btrfs/scrub.c +++ b/fs/btrfs/scrub.c | |||
| @@ -352,13 +352,14 @@ static void scrub_print_warning(const char *errstr, struct scrub_block *sblock) | |||
| 352 | struct extent_buffer *eb; | 352 | struct extent_buffer *eb; |
| 353 | struct btrfs_extent_item *ei; | 353 | struct btrfs_extent_item *ei; |
| 354 | struct scrub_warning swarn; | 354 | struct scrub_warning swarn; |
| 355 | u32 item_size; | 355 | unsigned long ptr = 0; |
| 356 | int ret; | 356 | u64 extent_item_pos; |
| 357 | u64 flags = 0; | ||
| 357 | u64 ref_root; | 358 | u64 ref_root; |
| 359 | u32 item_size; | ||
| 358 | u8 ref_level; | 360 | u8 ref_level; |
| 359 | unsigned long ptr = 0; | ||
| 360 | const int bufsize = 4096; | 361 | const int bufsize = 4096; |
| 361 | u64 extent_item_pos; | 362 | int ret; |
| 362 | 363 | ||
| 363 | path = btrfs_alloc_path(); | 364 | path = btrfs_alloc_path(); |
| 364 | 365 | ||
| @@ -375,7 +376,8 @@ static void scrub_print_warning(const char *errstr, struct scrub_block *sblock) | |||
| 375 | if (!path || !swarn.scratch_buf || !swarn.msg_buf) | 376 | if (!path || !swarn.scratch_buf || !swarn.msg_buf) |
| 376 | goto out; | 377 | goto out; |
| 377 | 378 | ||
| 378 | ret = extent_from_logical(fs_info, swarn.logical, path, &found_key); | 379 | ret = extent_from_logical(fs_info, swarn.logical, path, &found_key, |
| 380 | &flags); | ||
| 379 | if (ret < 0) | 381 | if (ret < 0) |
| 380 | goto out; | 382 | goto out; |
| 381 | 383 | ||
| @@ -387,7 +389,7 @@ static void scrub_print_warning(const char *errstr, struct scrub_block *sblock) | |||
| 387 | item_size = btrfs_item_size_nr(eb, path->slots[0]); | 389 | item_size = btrfs_item_size_nr(eb, path->slots[0]); |
| 388 | btrfs_release_path(path); | 390 | btrfs_release_path(path); |
| 389 | 391 | ||
| 390 | if (ret & BTRFS_EXTENT_FLAG_TREE_BLOCK) { | 392 | if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) { |
| 391 | do { | 393 | do { |
| 392 | ret = tree_backref_for_extent(&ptr, eb, ei, item_size, | 394 | ret = tree_backref_for_extent(&ptr, eb, ei, item_size, |
| 393 | &ref_root, &ref_level); | 395 | &ref_root, &ref_level); |
| @@ -1029,6 +1031,7 @@ static int scrub_setup_recheck_block(struct scrub_dev *sdev, | |||
| 1029 | spin_lock(&sdev->stat_lock); | 1031 | spin_lock(&sdev->stat_lock); |
| 1030 | sdev->stat.malloc_errors++; | 1032 | sdev->stat.malloc_errors++; |
| 1031 | spin_unlock(&sdev->stat_lock); | 1033 | spin_unlock(&sdev->stat_lock); |
| 1034 | kfree(bbio); | ||
| 1032 | return -ENOMEM; | 1035 | return -ENOMEM; |
| 1033 | } | 1036 | } |
| 1034 | sblock->page_count++; | 1037 | sblock->page_count++; |
| @@ -1666,21 +1669,6 @@ static void scrub_bio_end_io_worker(struct btrfs_work *work) | |||
| 1666 | scrub_block_put(sblock); | 1669 | scrub_block_put(sblock); |
| 1667 | } | 1670 | } |
| 1668 | 1671 | ||
| 1669 | if (sbio->err) { | ||
| 1670 | /* what is this good for??? */ | ||
| 1671 | sbio->bio->bi_flags &= ~(BIO_POOL_MASK - 1); | ||
| 1672 | sbio->bio->bi_flags |= 1 << BIO_UPTODATE; | ||
| 1673 | sbio->bio->bi_phys_segments = 0; | ||
| 1674 | sbio->bio->bi_idx = 0; | ||
| 1675 | |||
| 1676 | for (i = 0; i < sbio->page_count; i++) { | ||
| 1677 | struct bio_vec *bi; | ||
| 1678 | bi = &sbio->bio->bi_io_vec[i]; | ||
| 1679 | bi->bv_offset = 0; | ||
| 1680 | bi->bv_len = PAGE_SIZE; | ||
| 1681 | } | ||
| 1682 | } | ||
| 1683 | |||
| 1684 | bio_put(sbio->bio); | 1672 | bio_put(sbio->bio); |
| 1685 | sbio->bio = NULL; | 1673 | sbio->bio = NULL; |
| 1686 | spin_lock(&sdev->list_lock); | 1674 | spin_lock(&sdev->list_lock); |
diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c index fb5ffe95f869..c7beb543a4a8 100644 --- a/fs/btrfs/send.c +++ b/fs/btrfs/send.c | |||
| @@ -107,7 +107,6 @@ struct send_ctx { | |||
| 107 | int cur_inode_new; | 107 | int cur_inode_new; |
| 108 | int cur_inode_new_gen; | 108 | int cur_inode_new_gen; |
| 109 | int cur_inode_deleted; | 109 | int cur_inode_deleted; |
| 110 | int cur_inode_first_ref_orphan; | ||
| 111 | u64 cur_inode_size; | 110 | u64 cur_inode_size; |
| 112 | u64 cur_inode_mode; | 111 | u64 cur_inode_mode; |
| 113 | 112 | ||
| @@ -126,7 +125,15 @@ struct send_ctx { | |||
| 126 | 125 | ||
| 127 | struct name_cache_entry { | 126 | struct name_cache_entry { |
| 128 | struct list_head list; | 127 | struct list_head list; |
| 129 | struct list_head use_list; | 128 | /* |
| 129 | * radix_tree has only 32bit entries but we need to handle 64bit inums. | ||
| 130 | * We use the lower 32bit of the 64bit inum to store it in the tree. If | ||
| 131 | * more then one inum would fall into the same entry, we use radix_list | ||
| 132 | * to store the additional entries. radix_list is also used to store | ||
| 133 | * entries where two entries have the same inum but different | ||
| 134 | * generations. | ||
| 135 | */ | ||
| 136 | struct list_head radix_list; | ||
| 130 | u64 ino; | 137 | u64 ino; |
| 131 | u64 gen; | 138 | u64 gen; |
| 132 | u64 parent_ino; | 139 | u64 parent_ino; |
| @@ -328,6 +335,7 @@ out: | |||
| 328 | return ret; | 335 | return ret; |
| 329 | } | 336 | } |
| 330 | 337 | ||
| 338 | #if 0 | ||
| 331 | static void fs_path_remove(struct fs_path *p) | 339 | static void fs_path_remove(struct fs_path *p) |
| 332 | { | 340 | { |
| 333 | BUG_ON(p->reversed); | 341 | BUG_ON(p->reversed); |
| @@ -335,6 +343,7 @@ static void fs_path_remove(struct fs_path *p) | |||
| 335 | p->end--; | 343 | p->end--; |
| 336 | *p->end = 0; | 344 | *p->end = 0; |
| 337 | } | 345 | } |
| 346 | #endif | ||
| 338 | 347 | ||
| 339 | static int fs_path_copy(struct fs_path *p, struct fs_path *from) | 348 | static int fs_path_copy(struct fs_path *p, struct fs_path *from) |
| 340 | { | 349 | { |
| @@ -377,7 +386,7 @@ static struct btrfs_path *alloc_path_for_send(void) | |||
| 377 | return path; | 386 | return path; |
| 378 | } | 387 | } |
| 379 | 388 | ||
| 380 | static int write_buf(struct send_ctx *sctx, const void *buf, u32 len) | 389 | int write_buf(struct file *filp, const void *buf, u32 len, loff_t *off) |
| 381 | { | 390 | { |
| 382 | int ret; | 391 | int ret; |
| 383 | mm_segment_t old_fs; | 392 | mm_segment_t old_fs; |
| @@ -387,8 +396,7 @@ static int write_buf(struct send_ctx *sctx, const void *buf, u32 len) | |||
| 387 | set_fs(KERNEL_DS); | 396 | set_fs(KERNEL_DS); |
| 388 | 397 | ||
| 389 | while (pos < len) { | 398 | while (pos < len) { |
| 390 | ret = vfs_write(sctx->send_filp, (char *)buf + pos, len - pos, | 399 | ret = vfs_write(filp, (char *)buf + pos, len - pos, off); |
| 391 | &sctx->send_off); | ||
| 392 | /* TODO handle that correctly */ | 400 | /* TODO handle that correctly */ |
| 393 | /*if (ret == -ERESTARTSYS) { | 401 | /*if (ret == -ERESTARTSYS) { |
| 394 | continue; | 402 | continue; |
| @@ -544,7 +552,8 @@ static int send_header(struct send_ctx *sctx) | |||
| 544 | strcpy(hdr.magic, BTRFS_SEND_STREAM_MAGIC); | 552 | strcpy(hdr.magic, BTRFS_SEND_STREAM_MAGIC); |
| 545 | hdr.version = cpu_to_le32(BTRFS_SEND_STREAM_VERSION); | 553 | hdr.version = cpu_to_le32(BTRFS_SEND_STREAM_VERSION); |
| 546 | 554 | ||
| 547 | return write_buf(sctx, &hdr, sizeof(hdr)); | 555 | return write_buf(sctx->send_filp, &hdr, sizeof(hdr), |
| 556 | &sctx->send_off); | ||
| 548 | } | 557 | } |
| 549 | 558 | ||
| 550 | /* | 559 | /* |
| @@ -581,7 +590,8 @@ static int send_cmd(struct send_ctx *sctx) | |||
| 581 | crc = crc32c(0, (unsigned char *)sctx->send_buf, sctx->send_size); | 590 | crc = crc32c(0, (unsigned char *)sctx->send_buf, sctx->send_size); |
| 582 | hdr->crc = cpu_to_le32(crc); | 591 | hdr->crc = cpu_to_le32(crc); |
| 583 | 592 | ||
| 584 | ret = write_buf(sctx, sctx->send_buf, sctx->send_size); | 593 | ret = write_buf(sctx->send_filp, sctx->send_buf, sctx->send_size, |
| 594 | &sctx->send_off); | ||
| 585 | 595 | ||
| 586 | sctx->total_send_size += sctx->send_size; | 596 | sctx->total_send_size += sctx->send_size; |
| 587 | sctx->cmd_send_size[le16_to_cpu(hdr->cmd)] += sctx->send_size; | 597 | sctx->cmd_send_size[le16_to_cpu(hdr->cmd)] += sctx->send_size; |
| @@ -687,7 +697,8 @@ out: | |||
| 687 | */ | 697 | */ |
| 688 | static int get_inode_info(struct btrfs_root *root, | 698 | static int get_inode_info(struct btrfs_root *root, |
| 689 | u64 ino, u64 *size, u64 *gen, | 699 | u64 ino, u64 *size, u64 *gen, |
| 690 | u64 *mode, u64 *uid, u64 *gid) | 700 | u64 *mode, u64 *uid, u64 *gid, |
| 701 | u64 *rdev) | ||
| 691 | { | 702 | { |
| 692 | int ret; | 703 | int ret; |
| 693 | struct btrfs_inode_item *ii; | 704 | struct btrfs_inode_item *ii; |
| @@ -721,6 +732,8 @@ static int get_inode_info(struct btrfs_root *root, | |||
| 721 | *uid = btrfs_inode_uid(path->nodes[0], ii); | 732 | *uid = btrfs_inode_uid(path->nodes[0], ii); |
| 722 | if (gid) | 733 | if (gid) |
| 723 | *gid = btrfs_inode_gid(path->nodes[0], ii); | 734 | *gid = btrfs_inode_gid(path->nodes[0], ii); |
| 735 | if (rdev) | ||
| 736 | *rdev = btrfs_inode_rdev(path->nodes[0], ii); | ||
| 724 | 737 | ||
| 725 | out: | 738 | out: |
| 726 | btrfs_free_path(path); | 739 | btrfs_free_path(path); |
| @@ -852,7 +865,6 @@ static int iterate_dir_item(struct send_ctx *sctx, | |||
| 852 | struct extent_buffer *eb; | 865 | struct extent_buffer *eb; |
| 853 | struct btrfs_item *item; | 866 | struct btrfs_item *item; |
| 854 | struct btrfs_dir_item *di; | 867 | struct btrfs_dir_item *di; |
| 855 | struct btrfs_path *tmp_path = NULL; | ||
| 856 | struct btrfs_key di_key; | 868 | struct btrfs_key di_key; |
| 857 | char *buf = NULL; | 869 | char *buf = NULL; |
| 858 | char *buf2 = NULL; | 870 | char *buf2 = NULL; |
| @@ -874,12 +886,6 @@ static int iterate_dir_item(struct send_ctx *sctx, | |||
| 874 | goto out; | 886 | goto out; |
| 875 | } | 887 | } |
| 876 | 888 | ||
| 877 | tmp_path = alloc_path_for_send(); | ||
| 878 | if (!tmp_path) { | ||
| 879 | ret = -ENOMEM; | ||
| 880 | goto out; | ||
| 881 | } | ||
| 882 | |||
| 883 | eb = path->nodes[0]; | 889 | eb = path->nodes[0]; |
| 884 | slot = path->slots[0]; | 890 | slot = path->slots[0]; |
| 885 | item = btrfs_item_nr(eb, slot); | 891 | item = btrfs_item_nr(eb, slot); |
| @@ -941,7 +947,6 @@ static int iterate_dir_item(struct send_ctx *sctx, | |||
| 941 | } | 947 | } |
| 942 | 948 | ||
| 943 | out: | 949 | out: |
| 944 | btrfs_free_path(tmp_path); | ||
| 945 | if (buf_virtual) | 950 | if (buf_virtual) |
| 946 | vfree(buf); | 951 | vfree(buf); |
| 947 | else | 952 | else |
| @@ -1026,12 +1031,12 @@ struct backref_ctx { | |||
| 1026 | u64 extent_len; | 1031 | u64 extent_len; |
| 1027 | 1032 | ||
| 1028 | /* Just to check for bugs in backref resolving */ | 1033 | /* Just to check for bugs in backref resolving */ |
| 1029 | int found_in_send_root; | 1034 | int found_itself; |
| 1030 | }; | 1035 | }; |
| 1031 | 1036 | ||
| 1032 | static int __clone_root_cmp_bsearch(const void *key, const void *elt) | 1037 | static int __clone_root_cmp_bsearch(const void *key, const void *elt) |
| 1033 | { | 1038 | { |
| 1034 | u64 root = (u64)key; | 1039 | u64 root = (u64)(uintptr_t)key; |
| 1035 | struct clone_root *cr = (struct clone_root *)elt; | 1040 | struct clone_root *cr = (struct clone_root *)elt; |
| 1036 | 1041 | ||
| 1037 | if (root < cr->root->objectid) | 1042 | if (root < cr->root->objectid) |
| @@ -1055,6 +1060,7 @@ static int __clone_root_cmp_sort(const void *e1, const void *e2) | |||
| 1055 | 1060 | ||
| 1056 | /* | 1061 | /* |
| 1057 | * Called for every backref that is found for the current extent. | 1062 | * Called for every backref that is found for the current extent. |
| 1063 | * Results are collected in sctx->clone_roots->ino/offset/found_refs | ||
| 1058 | */ | 1064 | */ |
| 1059 | static int __iterate_backrefs(u64 ino, u64 offset, u64 root, void *ctx_) | 1065 | static int __iterate_backrefs(u64 ino, u64 offset, u64 root, void *ctx_) |
| 1060 | { | 1066 | { |
| @@ -1064,7 +1070,7 @@ static int __iterate_backrefs(u64 ino, u64 offset, u64 root, void *ctx_) | |||
| 1064 | u64 i_size; | 1070 | u64 i_size; |
| 1065 | 1071 | ||
| 1066 | /* First check if the root is in the list of accepted clone sources */ | 1072 | /* First check if the root is in the list of accepted clone sources */ |
| 1067 | found = bsearch((void *)root, bctx->sctx->clone_roots, | 1073 | found = bsearch((void *)(uintptr_t)root, bctx->sctx->clone_roots, |
| 1068 | bctx->sctx->clone_roots_cnt, | 1074 | bctx->sctx->clone_roots_cnt, |
| 1069 | sizeof(struct clone_root), | 1075 | sizeof(struct clone_root), |
| 1070 | __clone_root_cmp_bsearch); | 1076 | __clone_root_cmp_bsearch); |
| @@ -1074,14 +1080,15 @@ static int __iterate_backrefs(u64 ino, u64 offset, u64 root, void *ctx_) | |||
| 1074 | if (found->root == bctx->sctx->send_root && | 1080 | if (found->root == bctx->sctx->send_root && |
| 1075 | ino == bctx->cur_objectid && | 1081 | ino == bctx->cur_objectid && |
| 1076 | offset == bctx->cur_offset) { | 1082 | offset == bctx->cur_offset) { |
| 1077 | bctx->found_in_send_root = 1; | 1083 | bctx->found_itself = 1; |
| 1078 | } | 1084 | } |
| 1079 | 1085 | ||
| 1080 | /* | 1086 | /* |
| 1081 | * There are inodes that have extents that lie behind it's i_size. Don't | 1087 | * There are inodes that have extents that lie behind its i_size. Don't |
| 1082 | * accept clones from these extents. | 1088 | * accept clones from these extents. |
| 1083 | */ | 1089 | */ |
| 1084 | ret = get_inode_info(found->root, ino, &i_size, NULL, NULL, NULL, NULL); | 1090 | ret = get_inode_info(found->root, ino, &i_size, NULL, NULL, NULL, NULL, |
| 1091 | NULL); | ||
| 1085 | if (ret < 0) | 1092 | if (ret < 0) |
| 1086 | return ret; | 1093 | return ret; |
| 1087 | 1094 | ||
| @@ -1101,16 +1108,12 @@ static int __iterate_backrefs(u64 ino, u64 offset, u64 root, void *ctx_) | |||
| 1101 | */ | 1108 | */ |
| 1102 | if (ino >= bctx->cur_objectid) | 1109 | if (ino >= bctx->cur_objectid) |
| 1103 | return 0; | 1110 | return 0; |
| 1104 | /*if (ino > ctx->cur_objectid) | 1111 | #if 0 |
| 1112 | if (ino > bctx->cur_objectid) | ||
| 1105 | return 0; | 1113 | return 0; |
| 1106 | if (offset + ctx->extent_len > ctx->cur_offset) | 1114 | if (offset + bctx->extent_len > bctx->cur_offset) |
| 1107 | return 0;*/ | 1115 | return 0; |
| 1108 | 1116 | #endif | |
| 1109 | bctx->found++; | ||
| 1110 | found->found_refs++; | ||
| 1111 | found->ino = ino; | ||
| 1112 | found->offset = offset; | ||
| 1113 | return 0; | ||
| 1114 | } | 1117 | } |
| 1115 | 1118 | ||
| 1116 | bctx->found++; | 1119 | bctx->found++; |
| @@ -1130,6 +1133,12 @@ static int __iterate_backrefs(u64 ino, u64 offset, u64 root, void *ctx_) | |||
| 1130 | } | 1133 | } |
| 1131 | 1134 | ||
| 1132 | /* | 1135 | /* |
| 1136 | * Given an inode, offset and extent item, it finds a good clone for a clone | ||
| 1137 | * instruction. Returns -ENOENT when none could be found. The function makes | ||
| 1138 | * sure that the returned clone is usable at the point where sending is at the | ||
| 1139 | * moment. This means, that no clones are accepted which lie behind the current | ||
| 1140 | * inode+offset. | ||
| 1141 | * | ||
| 1133 | * path must point to the extent item when called. | 1142 | * path must point to the extent item when called. |
| 1134 | */ | 1143 | */ |
| 1135 | static int find_extent_clone(struct send_ctx *sctx, | 1144 | static int find_extent_clone(struct send_ctx *sctx, |
| @@ -1141,20 +1150,29 @@ static int find_extent_clone(struct send_ctx *sctx, | |||
| 1141 | int ret; | 1150 | int ret; |
| 1142 | int extent_type; | 1151 | int extent_type; |
| 1143 | u64 logical; | 1152 | u64 logical; |
| 1153 | u64 disk_byte; | ||
| 1144 | u64 num_bytes; | 1154 | u64 num_bytes; |
| 1145 | u64 extent_item_pos; | 1155 | u64 extent_item_pos; |
| 1156 | u64 flags = 0; | ||
| 1146 | struct btrfs_file_extent_item *fi; | 1157 | struct btrfs_file_extent_item *fi; |
| 1147 | struct extent_buffer *eb = path->nodes[0]; | 1158 | struct extent_buffer *eb = path->nodes[0]; |
| 1148 | struct backref_ctx backref_ctx; | 1159 | struct backref_ctx *backref_ctx = NULL; |
| 1149 | struct clone_root *cur_clone_root; | 1160 | struct clone_root *cur_clone_root; |
| 1150 | struct btrfs_key found_key; | 1161 | struct btrfs_key found_key; |
| 1151 | struct btrfs_path *tmp_path; | 1162 | struct btrfs_path *tmp_path; |
| 1163 | int compressed; | ||
| 1152 | u32 i; | 1164 | u32 i; |
| 1153 | 1165 | ||
| 1154 | tmp_path = alloc_path_for_send(); | 1166 | tmp_path = alloc_path_for_send(); |
| 1155 | if (!tmp_path) | 1167 | if (!tmp_path) |
| 1156 | return -ENOMEM; | 1168 | return -ENOMEM; |
| 1157 | 1169 | ||
| 1170 | backref_ctx = kmalloc(sizeof(*backref_ctx), GFP_NOFS); | ||
| 1171 | if (!backref_ctx) { | ||
| 1172 | ret = -ENOMEM; | ||
| 1173 | goto out; | ||
| 1174 | } | ||
| 1175 | |||
| 1158 | if (data_offset >= ino_size) { | 1176 | if (data_offset >= ino_size) { |
| 1159 | /* | 1177 | /* |
| 1160 | * There may be extents that lie behind the file's size. | 1178 | * There may be extents that lie behind the file's size. |
| @@ -1172,22 +1190,23 @@ static int find_extent_clone(struct send_ctx *sctx, | |||
| 1172 | ret = -ENOENT; | 1190 | ret = -ENOENT; |
| 1173 | goto out; | 1191 | goto out; |
| 1174 | } | 1192 | } |
| 1193 | compressed = btrfs_file_extent_compression(eb, fi); | ||
| 1175 | 1194 | ||
| 1176 | num_bytes = btrfs_file_extent_num_bytes(eb, fi); | 1195 | num_bytes = btrfs_file_extent_num_bytes(eb, fi); |
| 1177 | logical = btrfs_file_extent_disk_bytenr(eb, fi); | 1196 | disk_byte = btrfs_file_extent_disk_bytenr(eb, fi); |
| 1178 | if (logical == 0) { | 1197 | if (disk_byte == 0) { |
| 1179 | ret = -ENOENT; | 1198 | ret = -ENOENT; |
| 1180 | goto out; | 1199 | goto out; |
| 1181 | } | 1200 | } |
| 1182 | logical += btrfs_file_extent_offset(eb, fi); | 1201 | logical = disk_byte + btrfs_file_extent_offset(eb, fi); |
| 1183 | 1202 | ||
| 1184 | ret = extent_from_logical(sctx->send_root->fs_info, | 1203 | ret = extent_from_logical(sctx->send_root->fs_info, disk_byte, tmp_path, |
| 1185 | logical, tmp_path, &found_key); | 1204 | &found_key, &flags); |
| 1186 | btrfs_release_path(tmp_path); | 1205 | btrfs_release_path(tmp_path); |
| 1187 | 1206 | ||
| 1188 | if (ret < 0) | 1207 | if (ret < 0) |
| 1189 | goto out; | 1208 | goto out; |
| 1190 | if (ret & BTRFS_EXTENT_FLAG_TREE_BLOCK) { | 1209 | if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) { |
| 1191 | ret = -EIO; | 1210 | ret = -EIO; |
| 1192 | goto out; | 1211 | goto out; |
| 1193 | } | 1212 | } |
| @@ -1202,12 +1221,12 @@ static int find_extent_clone(struct send_ctx *sctx, | |||
| 1202 | cur_clone_root->found_refs = 0; | 1221 | cur_clone_root->found_refs = 0; |
| 1203 | } | 1222 | } |
| 1204 | 1223 | ||
| 1205 | backref_ctx.sctx = sctx; | 1224 | backref_ctx->sctx = sctx; |
| 1206 | backref_ctx.found = 0; | 1225 | backref_ctx->found = 0; |
| 1207 | backref_ctx.cur_objectid = ino; | 1226 | backref_ctx->cur_objectid = ino; |
| 1208 | backref_ctx.cur_offset = data_offset; | 1227 | backref_ctx->cur_offset = data_offset; |
| 1209 | backref_ctx.found_in_send_root = 0; | 1228 | backref_ctx->found_itself = 0; |
| 1210 | backref_ctx.extent_len = num_bytes; | 1229 | backref_ctx->extent_len = num_bytes; |
| 1211 | 1230 | ||
| 1212 | /* | 1231 | /* |
| 1213 | * The last extent of a file may be too large due to page alignment. | 1232 | * The last extent of a file may be too large due to page alignment. |
| @@ -1215,25 +1234,31 @@ static int find_extent_clone(struct send_ctx *sctx, | |||
| 1215 | * __iterate_backrefs work. | 1234 | * __iterate_backrefs work. |
| 1216 | */ | 1235 | */ |
| 1217 | if (data_offset + num_bytes >= ino_size) | 1236 | if (data_offset + num_bytes >= ino_size) |
| 1218 | backref_ctx.extent_len = ino_size - data_offset; | 1237 | backref_ctx->extent_len = ino_size - data_offset; |
| 1219 | 1238 | ||
| 1220 | /* | 1239 | /* |
| 1221 | * Now collect all backrefs. | 1240 | * Now collect all backrefs. |
| 1222 | */ | 1241 | */ |
| 1242 | if (compressed == BTRFS_COMPRESS_NONE) | ||
| 1243 | extent_item_pos = logical - found_key.objectid; | ||
| 1244 | else | ||
| 1245 | extent_item_pos = 0; | ||
| 1246 | |||
| 1223 | extent_item_pos = logical - found_key.objectid; | 1247 | extent_item_pos = logical - found_key.objectid; |
| 1224 | ret = iterate_extent_inodes(sctx->send_root->fs_info, | 1248 | ret = iterate_extent_inodes(sctx->send_root->fs_info, |
| 1225 | found_key.objectid, extent_item_pos, 1, | 1249 | found_key.objectid, extent_item_pos, 1, |
| 1226 | __iterate_backrefs, &backref_ctx); | 1250 | __iterate_backrefs, backref_ctx); |
| 1251 | |||
| 1227 | if (ret < 0) | 1252 | if (ret < 0) |
| 1228 | goto out; | 1253 | goto out; |
| 1229 | 1254 | ||
| 1230 | if (!backref_ctx.found_in_send_root) { | 1255 | if (!backref_ctx->found_itself) { |
| 1231 | /* found a bug in backref code? */ | 1256 | /* found a bug in backref code? */ |
| 1232 | ret = -EIO; | 1257 | ret = -EIO; |
| 1233 | printk(KERN_ERR "btrfs: ERROR did not find backref in " | 1258 | printk(KERN_ERR "btrfs: ERROR did not find backref in " |
| 1234 | "send_root. inode=%llu, offset=%llu, " | 1259 | "send_root. inode=%llu, offset=%llu, " |
| 1235 | "logical=%llu\n", | 1260 | "disk_byte=%llu found extent=%llu\n", |
| 1236 | ino, data_offset, logical); | 1261 | ino, data_offset, disk_byte, found_key.objectid); |
| 1237 | goto out; | 1262 | goto out; |
| 1238 | } | 1263 | } |
| 1239 | 1264 | ||
| @@ -1242,7 +1267,7 @@ verbose_printk(KERN_DEBUG "btrfs: find_extent_clone: data_offset=%llu, " | |||
| 1242 | "num_bytes=%llu, logical=%llu\n", | 1267 | "num_bytes=%llu, logical=%llu\n", |
| 1243 | data_offset, ino, num_bytes, logical); | 1268 | data_offset, ino, num_bytes, logical); |
| 1244 | 1269 | ||
| 1245 | if (!backref_ctx.found) | 1270 | if (!backref_ctx->found) |
| 1246 | verbose_printk("btrfs: no clones found\n"); | 1271 | verbose_printk("btrfs: no clones found\n"); |
| 1247 | 1272 | ||
| 1248 | cur_clone_root = NULL; | 1273 | cur_clone_root = NULL; |
| @@ -1253,7 +1278,6 @@ verbose_printk(KERN_DEBUG "btrfs: find_extent_clone: data_offset=%llu, " | |||
| 1253 | else if (sctx->clone_roots[i].root == sctx->send_root) | 1278 | else if (sctx->clone_roots[i].root == sctx->send_root) |
| 1254 | /* prefer clones from send_root over others */ | 1279 | /* prefer clones from send_root over others */ |
| 1255 | cur_clone_root = sctx->clone_roots + i; | 1280 | cur_clone_root = sctx->clone_roots + i; |
| 1256 | break; | ||
| 1257 | } | 1281 | } |
| 1258 | 1282 | ||
| 1259 | } | 1283 | } |
| @@ -1267,6 +1291,7 @@ verbose_printk(KERN_DEBUG "btrfs: find_extent_clone: data_offset=%llu, " | |||
| 1267 | 1291 | ||
| 1268 | out: | 1292 | out: |
| 1269 | btrfs_free_path(tmp_path); | 1293 | btrfs_free_path(tmp_path); |
| 1294 | kfree(backref_ctx); | ||
| 1270 | return ret; | 1295 | return ret; |
| 1271 | } | 1296 | } |
| 1272 | 1297 | ||
| @@ -1307,8 +1332,6 @@ static int read_symlink(struct send_ctx *sctx, | |||
| 1307 | len = btrfs_file_extent_inline_len(path->nodes[0], ei); | 1332 | len = btrfs_file_extent_inline_len(path->nodes[0], ei); |
| 1308 | 1333 | ||
| 1309 | ret = fs_path_add_from_extent_buffer(dest, path->nodes[0], off, len); | 1334 | ret = fs_path_add_from_extent_buffer(dest, path->nodes[0], off, len); |
| 1310 | if (ret < 0) | ||
| 1311 | goto out; | ||
| 1312 | 1335 | ||
| 1313 | out: | 1336 | out: |
| 1314 | btrfs_free_path(path); | 1337 | btrfs_free_path(path); |
| @@ -1404,7 +1427,7 @@ static int get_cur_inode_state(struct send_ctx *sctx, u64 ino, u64 gen) | |||
| 1404 | u64 right_gen; | 1427 | u64 right_gen; |
| 1405 | 1428 | ||
| 1406 | ret = get_inode_info(sctx->send_root, ino, NULL, &left_gen, NULL, NULL, | 1429 | ret = get_inode_info(sctx->send_root, ino, NULL, &left_gen, NULL, NULL, |
| 1407 | NULL); | 1430 | NULL, NULL); |
| 1408 | if (ret < 0 && ret != -ENOENT) | 1431 | if (ret < 0 && ret != -ENOENT) |
| 1409 | goto out; | 1432 | goto out; |
| 1410 | left_ret = ret; | 1433 | left_ret = ret; |
| @@ -1413,16 +1436,16 @@ static int get_cur_inode_state(struct send_ctx *sctx, u64 ino, u64 gen) | |||
| 1413 | right_ret = -ENOENT; | 1436 | right_ret = -ENOENT; |
| 1414 | } else { | 1437 | } else { |
| 1415 | ret = get_inode_info(sctx->parent_root, ino, NULL, &right_gen, | 1438 | ret = get_inode_info(sctx->parent_root, ino, NULL, &right_gen, |
| 1416 | NULL, NULL, NULL); | 1439 | NULL, NULL, NULL, NULL); |
| 1417 | if (ret < 0 && ret != -ENOENT) | 1440 | if (ret < 0 && ret != -ENOENT) |
| 1418 | goto out; | 1441 | goto out; |
| 1419 | right_ret = ret; | 1442 | right_ret = ret; |
| 1420 | } | 1443 | } |
| 1421 | 1444 | ||
| 1422 | if (!left_ret && !right_ret) { | 1445 | if (!left_ret && !right_ret) { |
| 1423 | if (left_gen == gen && right_gen == gen) | 1446 | if (left_gen == gen && right_gen == gen) { |
| 1424 | ret = inode_state_no_change; | 1447 | ret = inode_state_no_change; |
| 1425 | else if (left_gen == gen) { | 1448 | } else if (left_gen == gen) { |
| 1426 | if (ino < sctx->send_progress) | 1449 | if (ino < sctx->send_progress) |
| 1427 | ret = inode_state_did_create; | 1450 | ret = inode_state_did_create; |
| 1428 | else | 1451 | else |
| @@ -1516,6 +1539,10 @@ out: | |||
| 1516 | return ret; | 1539 | return ret; |
| 1517 | } | 1540 | } |
| 1518 | 1541 | ||
| 1542 | /* | ||
| 1543 | * Looks up the first btrfs_inode_ref of a given ino. It returns the parent dir, | ||
| 1544 | * generation of the parent dir and the name of the dir entry. | ||
| 1545 | */ | ||
| 1519 | static int get_first_ref(struct send_ctx *sctx, | 1546 | static int get_first_ref(struct send_ctx *sctx, |
| 1520 | struct btrfs_root *root, u64 ino, | 1547 | struct btrfs_root *root, u64 ino, |
| 1521 | u64 *dir, u64 *dir_gen, struct fs_path *name) | 1548 | u64 *dir, u64 *dir_gen, struct fs_path *name) |
| @@ -1557,7 +1584,7 @@ static int get_first_ref(struct send_ctx *sctx, | |||
| 1557 | btrfs_release_path(path); | 1584 | btrfs_release_path(path); |
| 1558 | 1585 | ||
| 1559 | ret = get_inode_info(root, found_key.offset, NULL, dir_gen, NULL, NULL, | 1586 | ret = get_inode_info(root, found_key.offset, NULL, dir_gen, NULL, NULL, |
| 1560 | NULL); | 1587 | NULL, NULL); |
| 1561 | if (ret < 0) | 1588 | if (ret < 0) |
| 1562 | goto out; | 1589 | goto out; |
| 1563 | 1590 | ||
| @@ -1586,22 +1613,28 @@ static int is_first_ref(struct send_ctx *sctx, | |||
| 1586 | if (ret < 0) | 1613 | if (ret < 0) |
| 1587 | goto out; | 1614 | goto out; |
| 1588 | 1615 | ||
| 1589 | if (name_len != fs_path_len(tmp_name)) { | 1616 | if (dir != tmp_dir || name_len != fs_path_len(tmp_name)) { |
| 1590 | ret = 0; | 1617 | ret = 0; |
| 1591 | goto out; | 1618 | goto out; |
| 1592 | } | 1619 | } |
| 1593 | 1620 | ||
| 1594 | ret = memcmp(tmp_name->start, name, name_len); | 1621 | ret = !memcmp(tmp_name->start, name, name_len); |
| 1595 | if (ret) | ||
| 1596 | ret = 0; | ||
| 1597 | else | ||
| 1598 | ret = 1; | ||
| 1599 | 1622 | ||
| 1600 | out: | 1623 | out: |
| 1601 | fs_path_free(sctx, tmp_name); | 1624 | fs_path_free(sctx, tmp_name); |
| 1602 | return ret; | 1625 | return ret; |
| 1603 | } | 1626 | } |
| 1604 | 1627 | ||
| 1628 | /* | ||
| 1629 | * Used by process_recorded_refs to determine if a new ref would overwrite an | ||
| 1630 | * already existing ref. In case it detects an overwrite, it returns the | ||
| 1631 | * inode/gen in who_ino/who_gen. | ||
| 1632 | * When an overwrite is detected, process_recorded_refs does proper orphanizing | ||
| 1633 | * to make sure later references to the overwritten inode are possible. | ||
| 1634 | * Orphanizing is however only required for the first ref of an inode. | ||
| 1635 | * process_recorded_refs does an additional is_first_ref check to see if | ||
| 1636 | * orphanizing is really required. | ||
| 1637 | */ | ||
| 1605 | static int will_overwrite_ref(struct send_ctx *sctx, u64 dir, u64 dir_gen, | 1638 | static int will_overwrite_ref(struct send_ctx *sctx, u64 dir, u64 dir_gen, |
| 1606 | const char *name, int name_len, | 1639 | const char *name, int name_len, |
| 1607 | u64 *who_ino, u64 *who_gen) | 1640 | u64 *who_ino, u64 *who_gen) |
| @@ -1626,9 +1659,14 @@ static int will_overwrite_ref(struct send_ctx *sctx, u64 dir, u64 dir_gen, | |||
| 1626 | goto out; | 1659 | goto out; |
| 1627 | } | 1660 | } |
| 1628 | 1661 | ||
| 1662 | /* | ||
| 1663 | * Check if the overwritten ref was already processed. If yes, the ref | ||
| 1664 | * was already unlinked/moved, so we can safely assume that we will not | ||
| 1665 | * overwrite anything at this point in time. | ||
| 1666 | */ | ||
| 1629 | if (other_inode > sctx->send_progress) { | 1667 | if (other_inode > sctx->send_progress) { |
| 1630 | ret = get_inode_info(sctx->parent_root, other_inode, NULL, | 1668 | ret = get_inode_info(sctx->parent_root, other_inode, NULL, |
| 1631 | who_gen, NULL, NULL, NULL); | 1669 | who_gen, NULL, NULL, NULL, NULL); |
| 1632 | if (ret < 0) | 1670 | if (ret < 0) |
| 1633 | goto out; | 1671 | goto out; |
| 1634 | 1672 | ||
| @@ -1642,6 +1680,13 @@ out: | |||
| 1642 | return ret; | 1680 | return ret; |
| 1643 | } | 1681 | } |
| 1644 | 1682 | ||
| 1683 | /* | ||
| 1684 | * Checks if the ref was overwritten by an already processed inode. This is | ||
| 1685 | * used by __get_cur_name_and_parent to find out if the ref was orphanized and | ||
| 1686 | * thus the orphan name needs be used. | ||
| 1687 | * process_recorded_refs also uses it to avoid unlinking of refs that were | ||
| 1688 | * overwritten. | ||
| 1689 | */ | ||
| 1645 | static int did_overwrite_ref(struct send_ctx *sctx, | 1690 | static int did_overwrite_ref(struct send_ctx *sctx, |
| 1646 | u64 dir, u64 dir_gen, | 1691 | u64 dir, u64 dir_gen, |
| 1647 | u64 ino, u64 ino_gen, | 1692 | u64 ino, u64 ino_gen, |
| @@ -1671,7 +1716,7 @@ static int did_overwrite_ref(struct send_ctx *sctx, | |||
| 1671 | } | 1716 | } |
| 1672 | 1717 | ||
| 1673 | ret = get_inode_info(sctx->send_root, ow_inode, NULL, &gen, NULL, NULL, | 1718 | ret = get_inode_info(sctx->send_root, ow_inode, NULL, &gen, NULL, NULL, |
| 1674 | NULL); | 1719 | NULL, NULL); |
| 1675 | if (ret < 0) | 1720 | if (ret < 0) |
| 1676 | goto out; | 1721 | goto out; |
| 1677 | 1722 | ||
| @@ -1690,6 +1735,11 @@ out: | |||
| 1690 | return ret; | 1735 | return ret; |
| 1691 | } | 1736 | } |
| 1692 | 1737 | ||
| 1738 | /* | ||
| 1739 | * Same as did_overwrite_ref, but also checks if it is the first ref of an inode | ||
| 1740 | * that got overwritten. This is used by process_recorded_refs to determine | ||
| 1741 | * if it has to use the path as returned by get_cur_path or the orphan name. | ||
| 1742 | */ | ||
| 1693 | static int did_overwrite_first_ref(struct send_ctx *sctx, u64 ino, u64 gen) | 1743 | static int did_overwrite_first_ref(struct send_ctx *sctx, u64 ino, u64 gen) |
| 1694 | { | 1744 | { |
| 1695 | int ret = 0; | 1745 | int ret = 0; |
| @@ -1710,39 +1760,40 @@ static int did_overwrite_first_ref(struct send_ctx *sctx, u64 ino, u64 gen) | |||
| 1710 | 1760 | ||
| 1711 | ret = did_overwrite_ref(sctx, dir, dir_gen, ino, gen, | 1761 | ret = did_overwrite_ref(sctx, dir, dir_gen, ino, gen, |
| 1712 | name->start, fs_path_len(name)); | 1762 | name->start, fs_path_len(name)); |
| 1713 | if (ret < 0) | ||
| 1714 | goto out; | ||
| 1715 | 1763 | ||
| 1716 | out: | 1764 | out: |
| 1717 | fs_path_free(sctx, name); | 1765 | fs_path_free(sctx, name); |
| 1718 | return ret; | 1766 | return ret; |
| 1719 | } | 1767 | } |
| 1720 | 1768 | ||
| 1769 | /* | ||
| 1770 | * Insert a name cache entry. On 32bit kernels the radix tree index is 32bit, | ||
| 1771 | * so we need to do some special handling in case we have clashes. This function | ||
| 1772 | * takes care of this with the help of name_cache_entry::radix_list. | ||
| 1773 | * In case of error, nce is kfreed. | ||
| 1774 | */ | ||
| 1721 | static int name_cache_insert(struct send_ctx *sctx, | 1775 | static int name_cache_insert(struct send_ctx *sctx, |
| 1722 | struct name_cache_entry *nce) | 1776 | struct name_cache_entry *nce) |
| 1723 | { | 1777 | { |
| 1724 | int ret = 0; | 1778 | int ret = 0; |
| 1725 | struct name_cache_entry **ncea; | 1779 | struct list_head *nce_head; |
| 1726 | 1780 | ||
| 1727 | ncea = radix_tree_lookup(&sctx->name_cache, nce->ino); | 1781 | nce_head = radix_tree_lookup(&sctx->name_cache, |
| 1728 | if (ncea) { | 1782 | (unsigned long)nce->ino); |
| 1729 | if (!ncea[0]) | 1783 | if (!nce_head) { |
| 1730 | ncea[0] = nce; | 1784 | nce_head = kmalloc(sizeof(*nce_head), GFP_NOFS); |
| 1731 | else if (!ncea[1]) | 1785 | if (!nce_head) |
| 1732 | ncea[1] = nce; | ||
| 1733 | else | ||
| 1734 | BUG(); | ||
| 1735 | } else { | ||
| 1736 | ncea = kmalloc(sizeof(void *) * 2, GFP_NOFS); | ||
| 1737 | if (!ncea) | ||
| 1738 | return -ENOMEM; | 1786 | return -ENOMEM; |
| 1787 | INIT_LIST_HEAD(nce_head); | ||
| 1739 | 1788 | ||
| 1740 | ncea[0] = nce; | 1789 | ret = radix_tree_insert(&sctx->name_cache, nce->ino, nce_head); |
| 1741 | ncea[1] = NULL; | 1790 | if (ret < 0) { |
| 1742 | ret = radix_tree_insert(&sctx->name_cache, nce->ino, ncea); | 1791 | kfree(nce_head); |
| 1743 | if (ret < 0) | 1792 | kfree(nce); |
| 1744 | return ret; | 1793 | return ret; |
| 1794 | } | ||
| 1745 | } | 1795 | } |
| 1796 | list_add_tail(&nce->radix_list, nce_head); | ||
| 1746 | list_add_tail(&nce->list, &sctx->name_cache_list); | 1797 | list_add_tail(&nce->list, &sctx->name_cache_list); |
| 1747 | sctx->name_cache_size++; | 1798 | sctx->name_cache_size++; |
| 1748 | 1799 | ||
| @@ -1752,50 +1803,52 @@ static int name_cache_insert(struct send_ctx *sctx, | |||
| 1752 | static void name_cache_delete(struct send_ctx *sctx, | 1803 | static void name_cache_delete(struct send_ctx *sctx, |
| 1753 | struct name_cache_entry *nce) | 1804 | struct name_cache_entry *nce) |
| 1754 | { | 1805 | { |
| 1755 | struct name_cache_entry **ncea; | 1806 | struct list_head *nce_head; |
| 1756 | |||
| 1757 | ncea = radix_tree_lookup(&sctx->name_cache, nce->ino); | ||
| 1758 | BUG_ON(!ncea); | ||
| 1759 | |||
| 1760 | if (ncea[0] == nce) | ||
| 1761 | ncea[0] = NULL; | ||
| 1762 | else if (ncea[1] == nce) | ||
| 1763 | ncea[1] = NULL; | ||
| 1764 | else | ||
| 1765 | BUG(); | ||
| 1766 | 1807 | ||
| 1767 | if (!ncea[0] && !ncea[1]) { | 1808 | nce_head = radix_tree_lookup(&sctx->name_cache, |
| 1768 | radix_tree_delete(&sctx->name_cache, nce->ino); | 1809 | (unsigned long)nce->ino); |
| 1769 | kfree(ncea); | 1810 | BUG_ON(!nce_head); |
| 1770 | } | ||
| 1771 | 1811 | ||
| 1812 | list_del(&nce->radix_list); | ||
| 1772 | list_del(&nce->list); | 1813 | list_del(&nce->list); |
| 1773 | |||
| 1774 | sctx->name_cache_size--; | 1814 | sctx->name_cache_size--; |
| 1815 | |||
| 1816 | if (list_empty(nce_head)) { | ||
| 1817 | radix_tree_delete(&sctx->name_cache, (unsigned long)nce->ino); | ||
| 1818 | kfree(nce_head); | ||
| 1819 | } | ||
| 1775 | } | 1820 | } |
| 1776 | 1821 | ||
| 1777 | static struct name_cache_entry *name_cache_search(struct send_ctx *sctx, | 1822 | static struct name_cache_entry *name_cache_search(struct send_ctx *sctx, |
| 1778 | u64 ino, u64 gen) | 1823 | u64 ino, u64 gen) |
| 1779 | { | 1824 | { |
| 1780 | struct name_cache_entry **ncea; | 1825 | struct list_head *nce_head; |
| 1826 | struct name_cache_entry *cur; | ||
| 1781 | 1827 | ||
| 1782 | ncea = radix_tree_lookup(&sctx->name_cache, ino); | 1828 | nce_head = radix_tree_lookup(&sctx->name_cache, (unsigned long)ino); |
| 1783 | if (!ncea) | 1829 | if (!nce_head) |
| 1784 | return NULL; | 1830 | return NULL; |
| 1785 | 1831 | ||
| 1786 | if (ncea[0] && ncea[0]->gen == gen) | 1832 | list_for_each_entry(cur, nce_head, radix_list) { |
| 1787 | return ncea[0]; | 1833 | if (cur->ino == ino && cur->gen == gen) |
| 1788 | else if (ncea[1] && ncea[1]->gen == gen) | 1834 | return cur; |
| 1789 | return ncea[1]; | 1835 | } |
| 1790 | return NULL; | 1836 | return NULL; |
| 1791 | } | 1837 | } |
| 1792 | 1838 | ||
| 1839 | /* | ||
| 1840 | * Removes the entry from the list and adds it back to the end. This marks the | ||
| 1841 | * entry as recently used so that name_cache_clean_unused does not remove it. | ||
| 1842 | */ | ||
| 1793 | static void name_cache_used(struct send_ctx *sctx, struct name_cache_entry *nce) | 1843 | static void name_cache_used(struct send_ctx *sctx, struct name_cache_entry *nce) |
| 1794 | { | 1844 | { |
| 1795 | list_del(&nce->list); | 1845 | list_del(&nce->list); |
| 1796 | list_add_tail(&nce->list, &sctx->name_cache_list); | 1846 | list_add_tail(&nce->list, &sctx->name_cache_list); |
| 1797 | } | 1847 | } |
| 1798 | 1848 | ||
| 1849 | /* | ||
| 1850 | * Remove some entries from the beginning of name_cache_list. | ||
| 1851 | */ | ||
| 1799 | static void name_cache_clean_unused(struct send_ctx *sctx) | 1852 | static void name_cache_clean_unused(struct send_ctx *sctx) |
| 1800 | { | 1853 | { |
| 1801 | struct name_cache_entry *nce; | 1854 | struct name_cache_entry *nce; |
| @@ -1814,13 +1867,23 @@ static void name_cache_clean_unused(struct send_ctx *sctx) | |||
| 1814 | static void name_cache_free(struct send_ctx *sctx) | 1867 | static void name_cache_free(struct send_ctx *sctx) |
| 1815 | { | 1868 | { |
| 1816 | struct name_cache_entry *nce; | 1869 | struct name_cache_entry *nce; |
| 1817 | struct name_cache_entry *tmp; | ||
| 1818 | 1870 | ||
| 1819 | list_for_each_entry_safe(nce, tmp, &sctx->name_cache_list, list) { | 1871 | while (!list_empty(&sctx->name_cache_list)) { |
| 1872 | nce = list_entry(sctx->name_cache_list.next, | ||
| 1873 | struct name_cache_entry, list); | ||
| 1820 | name_cache_delete(sctx, nce); | 1874 | name_cache_delete(sctx, nce); |
| 1875 | kfree(nce); | ||
| 1821 | } | 1876 | } |
| 1822 | } | 1877 | } |
| 1823 | 1878 | ||
| 1879 | /* | ||
| 1880 | * Used by get_cur_path for each ref up to the root. | ||
| 1881 | * Returns 0 if it succeeded. | ||
| 1882 | * Returns 1 if the inode is not existent or got overwritten. In that case, the | ||
| 1883 | * name is an orphan name. This instructs get_cur_path to stop iterating. If 1 | ||
| 1884 | * is returned, parent_ino/parent_gen are not guaranteed to be valid. | ||
| 1885 | * Returns <0 in case of error. | ||
| 1886 | */ | ||
| 1824 | static int __get_cur_name_and_parent(struct send_ctx *sctx, | 1887 | static int __get_cur_name_and_parent(struct send_ctx *sctx, |
| 1825 | u64 ino, u64 gen, | 1888 | u64 ino, u64 gen, |
| 1826 | u64 *parent_ino, | 1889 | u64 *parent_ino, |
| @@ -1832,6 +1895,11 @@ static int __get_cur_name_and_parent(struct send_ctx *sctx, | |||
| 1832 | struct btrfs_path *path = NULL; | 1895 | struct btrfs_path *path = NULL; |
| 1833 | struct name_cache_entry *nce = NULL; | 1896 | struct name_cache_entry *nce = NULL; |
| 1834 | 1897 | ||
| 1898 | /* | ||
| 1899 | * First check if we already did a call to this function with the same | ||
| 1900 | * ino/gen. If yes, check if the cache entry is still up-to-date. If yes | ||
| 1901 | * return the cached result. | ||
| 1902 | */ | ||
| 1835 | nce = name_cache_search(sctx, ino, gen); | 1903 | nce = name_cache_search(sctx, ino, gen); |
| 1836 | if (nce) { | 1904 | if (nce) { |
| 1837 | if (ino < sctx->send_progress && nce->need_later_update) { | 1905 | if (ino < sctx->send_progress && nce->need_later_update) { |
| @@ -1854,6 +1922,11 @@ static int __get_cur_name_and_parent(struct send_ctx *sctx, | |||
| 1854 | if (!path) | 1922 | if (!path) |
| 1855 | return -ENOMEM; | 1923 | return -ENOMEM; |
| 1856 | 1924 | ||
| 1925 | /* | ||
| 1926 | * If the inode is not existent yet, add the orphan name and return 1. | ||
| 1927 | * This should only happen for the parent dir that we determine in | ||
| 1928 | * __record_new_ref | ||
| 1929 | */ | ||
| 1857 | ret = is_inode_existent(sctx, ino, gen); | 1930 | ret = is_inode_existent(sctx, ino, gen); |
| 1858 | if (ret < 0) | 1931 | if (ret < 0) |
| 1859 | goto out; | 1932 | goto out; |
| @@ -1866,6 +1939,10 @@ static int __get_cur_name_and_parent(struct send_ctx *sctx, | |||
| 1866 | goto out_cache; | 1939 | goto out_cache; |
| 1867 | } | 1940 | } |
| 1868 | 1941 | ||
| 1942 | /* | ||
| 1943 | * Depending on whether the inode was already processed or not, use | ||
| 1944 | * send_root or parent_root for ref lookup. | ||
| 1945 | */ | ||
| 1869 | if (ino < sctx->send_progress) | 1946 | if (ino < sctx->send_progress) |
| 1870 | ret = get_first_ref(sctx, sctx->send_root, ino, | 1947 | ret = get_first_ref(sctx, sctx->send_root, ino, |
| 1871 | parent_ino, parent_gen, dest); | 1948 | parent_ino, parent_gen, dest); |
| @@ -1875,6 +1952,10 @@ static int __get_cur_name_and_parent(struct send_ctx *sctx, | |||
| 1875 | if (ret < 0) | 1952 | if (ret < 0) |
| 1876 | goto out; | 1953 | goto out; |
| 1877 | 1954 | ||
| 1955 | /* | ||
| 1956 | * Check if the ref was overwritten by an inode's ref that was processed | ||
| 1957 | * earlier. If yes, treat as orphan and return 1. | ||
| 1958 | */ | ||
| 1878 | ret = did_overwrite_ref(sctx, *parent_ino, *parent_gen, ino, gen, | 1959 | ret = did_overwrite_ref(sctx, *parent_ino, *parent_gen, ino, gen, |
| 1879 | dest->start, dest->end - dest->start); | 1960 | dest->start, dest->end - dest->start); |
| 1880 | if (ret < 0) | 1961 | if (ret < 0) |
| @@ -1888,6 +1969,9 @@ static int __get_cur_name_and_parent(struct send_ctx *sctx, | |||
| 1888 | } | 1969 | } |
| 1889 | 1970 | ||
| 1890 | out_cache: | 1971 | out_cache: |
| 1972 | /* | ||
| 1973 | * Store the result of the lookup in the name cache. | ||
| 1974 | */ | ||
| 1891 | nce = kmalloc(sizeof(*nce) + fs_path_len(dest) + 1, GFP_NOFS); | 1975 | nce = kmalloc(sizeof(*nce) + fs_path_len(dest) + 1, GFP_NOFS); |
| 1892 | if (!nce) { | 1976 | if (!nce) { |
| 1893 | ret = -ENOMEM; | 1977 | ret = -ENOMEM; |
| @@ -1901,7 +1985,6 @@ out_cache: | |||
| 1901 | nce->name_len = fs_path_len(dest); | 1985 | nce->name_len = fs_path_len(dest); |
| 1902 | nce->ret = ret; | 1986 | nce->ret = ret; |
| 1903 | strcpy(nce->name, dest->start); | 1987 | strcpy(nce->name, dest->start); |
| 1904 | memset(&nce->use_list, 0, sizeof(nce->use_list)); | ||
| 1905 | 1988 | ||
| 1906 | if (ino < sctx->send_progress) | 1989 | if (ino < sctx->send_progress) |
| 1907 | nce->need_later_update = 0; | 1990 | nce->need_later_update = 0; |
| @@ -2107,9 +2190,6 @@ static int send_subvol_begin(struct send_ctx *sctx) | |||
| 2107 | read_extent_buffer(leaf, name, (unsigned long)(ref + 1), namelen); | 2190 | read_extent_buffer(leaf, name, (unsigned long)(ref + 1), namelen); |
| 2108 | btrfs_release_path(path); | 2191 | btrfs_release_path(path); |
| 2109 | 2192 | ||
| 2110 | if (ret < 0) | ||
| 2111 | goto out; | ||
| 2112 | |||
| 2113 | if (parent_root) { | 2193 | if (parent_root) { |
| 2114 | ret = begin_cmd(sctx, BTRFS_SEND_C_SNAPSHOT); | 2194 | ret = begin_cmd(sctx, BTRFS_SEND_C_SNAPSHOT); |
| 2115 | if (ret < 0) | 2195 | if (ret < 0) |
| @@ -2276,7 +2356,7 @@ verbose_printk("btrfs: send_utimes %llu\n", ino); | |||
| 2276 | btrfs_inode_mtime(ii)); | 2356 | btrfs_inode_mtime(ii)); |
| 2277 | TLV_PUT_BTRFS_TIMESPEC(sctx, BTRFS_SEND_A_CTIME, eb, | 2357 | TLV_PUT_BTRFS_TIMESPEC(sctx, BTRFS_SEND_A_CTIME, eb, |
| 2278 | btrfs_inode_ctime(ii)); | 2358 | btrfs_inode_ctime(ii)); |
| 2279 | /* TODO otime? */ | 2359 | /* TODO Add otime support when the otime patches get into upstream */ |
| 2280 | 2360 | ||
| 2281 | ret = send_cmd(sctx); | 2361 | ret = send_cmd(sctx); |
| 2282 | 2362 | ||
| @@ -2292,39 +2372,39 @@ out: | |||
| 2292 | * a valid path yet because we did not process the refs yet. So, the inode | 2372 | * a valid path yet because we did not process the refs yet. So, the inode |
| 2293 | * is created as orphan. | 2373 | * is created as orphan. |
| 2294 | */ | 2374 | */ |
| 2295 | static int send_create_inode(struct send_ctx *sctx, struct btrfs_path *path, | 2375 | static int send_create_inode(struct send_ctx *sctx, u64 ino) |
| 2296 | struct btrfs_key *key) | ||
| 2297 | { | 2376 | { |
| 2298 | int ret = 0; | 2377 | int ret = 0; |
| 2299 | struct extent_buffer *eb = path->nodes[0]; | ||
| 2300 | struct btrfs_inode_item *ii; | ||
| 2301 | struct fs_path *p; | 2378 | struct fs_path *p; |
| 2302 | int slot = path->slots[0]; | ||
| 2303 | int cmd; | 2379 | int cmd; |
| 2380 | u64 gen; | ||
| 2304 | u64 mode; | 2381 | u64 mode; |
| 2382 | u64 rdev; | ||
| 2305 | 2383 | ||
| 2306 | verbose_printk("btrfs: send_create_inode %llu\n", sctx->cur_ino); | 2384 | verbose_printk("btrfs: send_create_inode %llu\n", ino); |
| 2307 | 2385 | ||
| 2308 | p = fs_path_alloc(sctx); | 2386 | p = fs_path_alloc(sctx); |
| 2309 | if (!p) | 2387 | if (!p) |
| 2310 | return -ENOMEM; | 2388 | return -ENOMEM; |
| 2311 | 2389 | ||
| 2312 | ii = btrfs_item_ptr(eb, slot, struct btrfs_inode_item); | 2390 | ret = get_inode_info(sctx->send_root, ino, NULL, &gen, &mode, NULL, |
| 2313 | mode = btrfs_inode_mode(eb, ii); | 2391 | NULL, &rdev); |
| 2392 | if (ret < 0) | ||
| 2393 | goto out; | ||
| 2314 | 2394 | ||
| 2315 | if (S_ISREG(mode)) | 2395 | if (S_ISREG(mode)) { |
| 2316 | cmd = BTRFS_SEND_C_MKFILE; | 2396 | cmd = BTRFS_SEND_C_MKFILE; |
| 2317 | else if (S_ISDIR(mode)) | 2397 | } else if (S_ISDIR(mode)) { |
| 2318 | cmd = BTRFS_SEND_C_MKDIR; | 2398 | cmd = BTRFS_SEND_C_MKDIR; |
| 2319 | else if (S_ISLNK(mode)) | 2399 | } else if (S_ISLNK(mode)) { |
| 2320 | cmd = BTRFS_SEND_C_SYMLINK; | 2400 | cmd = BTRFS_SEND_C_SYMLINK; |
| 2321 | else if (S_ISCHR(mode) || S_ISBLK(mode)) | 2401 | } else if (S_ISCHR(mode) || S_ISBLK(mode)) { |
| 2322 | cmd = BTRFS_SEND_C_MKNOD; | 2402 | cmd = BTRFS_SEND_C_MKNOD; |
| 2323 | else if (S_ISFIFO(mode)) | 2403 | } else if (S_ISFIFO(mode)) { |
| 2324 | cmd = BTRFS_SEND_C_MKFIFO; | 2404 | cmd = BTRFS_SEND_C_MKFIFO; |
| 2325 | else if (S_ISSOCK(mode)) | 2405 | } else if (S_ISSOCK(mode)) { |
| 2326 | cmd = BTRFS_SEND_C_MKSOCK; | 2406 | cmd = BTRFS_SEND_C_MKSOCK; |
| 2327 | else { | 2407 | } else { |
| 2328 | printk(KERN_WARNING "btrfs: unexpected inode type %o", | 2408 | printk(KERN_WARNING "btrfs: unexpected inode type %o", |
| 2329 | (int)(mode & S_IFMT)); | 2409 | (int)(mode & S_IFMT)); |
| 2330 | ret = -ENOTSUPP; | 2410 | ret = -ENOTSUPP; |
| @@ -2335,22 +2415,22 @@ verbose_printk("btrfs: send_create_inode %llu\n", sctx->cur_ino); | |||
| 2335 | if (ret < 0) | 2415 | if (ret < 0) |
| 2336 | goto out; | 2416 | goto out; |
| 2337 | 2417 | ||
| 2338 | ret = gen_unique_name(sctx, sctx->cur_ino, sctx->cur_inode_gen, p); | 2418 | ret = gen_unique_name(sctx, ino, gen, p); |
| 2339 | if (ret < 0) | 2419 | if (ret < 0) |
| 2340 | goto out; | 2420 | goto out; |
| 2341 | 2421 | ||
| 2342 | TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p); | 2422 | TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p); |
| 2343 | TLV_PUT_U64(sctx, BTRFS_SEND_A_INO, sctx->cur_ino); | 2423 | TLV_PUT_U64(sctx, BTRFS_SEND_A_INO, ino); |
| 2344 | 2424 | ||
| 2345 | if (S_ISLNK(mode)) { | 2425 | if (S_ISLNK(mode)) { |
| 2346 | fs_path_reset(p); | 2426 | fs_path_reset(p); |
| 2347 | ret = read_symlink(sctx, sctx->send_root, sctx->cur_ino, p); | 2427 | ret = read_symlink(sctx, sctx->send_root, ino, p); |
| 2348 | if (ret < 0) | 2428 | if (ret < 0) |
| 2349 | goto out; | 2429 | goto out; |
| 2350 | TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH_LINK, p); | 2430 | TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH_LINK, p); |
| 2351 | } else if (S_ISCHR(mode) || S_ISBLK(mode) || | 2431 | } else if (S_ISCHR(mode) || S_ISBLK(mode) || |
| 2352 | S_ISFIFO(mode) || S_ISSOCK(mode)) { | 2432 | S_ISFIFO(mode) || S_ISSOCK(mode)) { |
| 2353 | TLV_PUT_U64(sctx, BTRFS_SEND_A_RDEV, btrfs_inode_rdev(eb, ii)); | 2433 | TLV_PUT_U64(sctx, BTRFS_SEND_A_RDEV, rdev); |
| 2354 | } | 2434 | } |
| 2355 | 2435 | ||
| 2356 | ret = send_cmd(sctx); | 2436 | ret = send_cmd(sctx); |
| @@ -2364,6 +2444,92 @@ out: | |||
| 2364 | return ret; | 2444 | return ret; |
| 2365 | } | 2445 | } |
| 2366 | 2446 | ||
| 2447 | /* | ||
| 2448 | * We need some special handling for inodes that get processed before the parent | ||
| 2449 | * directory got created. See process_recorded_refs for details. | ||
| 2450 | * This function does the check if we already created the dir out of order. | ||
| 2451 | */ | ||
| 2452 | static int did_create_dir(struct send_ctx *sctx, u64 dir) | ||
| 2453 | { | ||
| 2454 | int ret = 0; | ||
| 2455 | struct btrfs_path *path = NULL; | ||
| 2456 | struct btrfs_key key; | ||
| 2457 | struct btrfs_key found_key; | ||
| 2458 | struct btrfs_key di_key; | ||
| 2459 | struct extent_buffer *eb; | ||
| 2460 | struct btrfs_dir_item *di; | ||
| 2461 | int slot; | ||
| 2462 | |||
| 2463 | path = alloc_path_for_send(); | ||
| 2464 | if (!path) { | ||
| 2465 | ret = -ENOMEM; | ||
| 2466 | goto out; | ||
| 2467 | } | ||
| 2468 | |||
| 2469 | key.objectid = dir; | ||
| 2470 | key.type = BTRFS_DIR_INDEX_KEY; | ||
| 2471 | key.offset = 0; | ||
| 2472 | while (1) { | ||
| 2473 | ret = btrfs_search_slot_for_read(sctx->send_root, &key, path, | ||
| 2474 | 1, 0); | ||
| 2475 | if (ret < 0) | ||
| 2476 | goto out; | ||
| 2477 | if (!ret) { | ||
| 2478 | eb = path->nodes[0]; | ||
| 2479 | slot = path->slots[0]; | ||
| 2480 | btrfs_item_key_to_cpu(eb, &found_key, slot); | ||
| 2481 | } | ||
| 2482 | if (ret || found_key.objectid != key.objectid || | ||
| 2483 | found_key.type != key.type) { | ||
| 2484 | ret = 0; | ||
| 2485 | goto out; | ||
| 2486 | } | ||
| 2487 | |||
| 2488 | di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item); | ||
| 2489 | btrfs_dir_item_key_to_cpu(eb, di, &di_key); | ||
| 2490 | |||
| 2491 | if (di_key.objectid < sctx->send_progress) { | ||
| 2492 | ret = 1; | ||
| 2493 | goto out; | ||
| 2494 | } | ||
| 2495 | |||
| 2496 | key.offset = found_key.offset + 1; | ||
| 2497 | btrfs_release_path(path); | ||
| 2498 | } | ||
| 2499 | |||
| 2500 | out: | ||
| 2501 | btrfs_free_path(path); | ||
| 2502 | return ret; | ||
| 2503 | } | ||
| 2504 | |||
| 2505 | /* | ||
| 2506 | * Only creates the inode if it is: | ||
| 2507 | * 1. Not a directory | ||
| 2508 | * 2. Or a directory which was not created already due to out of order | ||
| 2509 | * directories. See did_create_dir and process_recorded_refs for details. | ||
| 2510 | */ | ||
| 2511 | static int send_create_inode_if_needed(struct send_ctx *sctx) | ||
| 2512 | { | ||
| 2513 | int ret; | ||
| 2514 | |||
| 2515 | if (S_ISDIR(sctx->cur_inode_mode)) { | ||
| 2516 | ret = did_create_dir(sctx, sctx->cur_ino); | ||
| 2517 | if (ret < 0) | ||
| 2518 | goto out; | ||
| 2519 | if (ret) { | ||
| 2520 | ret = 0; | ||
| 2521 | goto out; | ||
| 2522 | } | ||
| 2523 | } | ||
| 2524 | |||
| 2525 | ret = send_create_inode(sctx, sctx->cur_ino); | ||
| 2526 | if (ret < 0) | ||
| 2527 | goto out; | ||
| 2528 | |||
| 2529 | out: | ||
| 2530 | return ret; | ||
| 2531 | } | ||
| 2532 | |||
| 2367 | struct recorded_ref { | 2533 | struct recorded_ref { |
| 2368 | struct list_head list; | 2534 | struct list_head list; |
| 2369 | char *dir_path; | 2535 | char *dir_path; |
| @@ -2416,13 +2582,13 @@ static int record_ref(struct list_head *head, u64 dir, | |||
| 2416 | static void __free_recorded_refs(struct send_ctx *sctx, struct list_head *head) | 2582 | static void __free_recorded_refs(struct send_ctx *sctx, struct list_head *head) |
| 2417 | { | 2583 | { |
| 2418 | struct recorded_ref *cur; | 2584 | struct recorded_ref *cur; |
| 2419 | struct recorded_ref *tmp; | ||
| 2420 | 2585 | ||
| 2421 | list_for_each_entry_safe(cur, tmp, head, list) { | 2586 | while (!list_empty(head)) { |
| 2587 | cur = list_entry(head->next, struct recorded_ref, list); | ||
| 2422 | fs_path_free(sctx, cur->full_path); | 2588 | fs_path_free(sctx, cur->full_path); |
| 2589 | list_del(&cur->list); | ||
| 2423 | kfree(cur); | 2590 | kfree(cur); |
| 2424 | } | 2591 | } |
| 2425 | INIT_LIST_HEAD(head); | ||
| 2426 | } | 2592 | } |
| 2427 | 2593 | ||
| 2428 | static void free_recorded_refs(struct send_ctx *sctx) | 2594 | static void free_recorded_refs(struct send_ctx *sctx) |
| @@ -2432,7 +2598,7 @@ static void free_recorded_refs(struct send_ctx *sctx) | |||
| 2432 | } | 2598 | } |
| 2433 | 2599 | ||
| 2434 | /* | 2600 | /* |
| 2435 | * Renames/moves a file/dir to it's orphan name. Used when the first | 2601 | * Renames/moves a file/dir to its orphan name. Used when the first |
| 2436 | * ref of an unprocessed inode gets overwritten and for all non empty | 2602 | * ref of an unprocessed inode gets overwritten and for all non empty |
| 2437 | * directories. | 2603 | * directories. |
| 2438 | */ | 2604 | */ |
| @@ -2472,6 +2638,12 @@ static int can_rmdir(struct send_ctx *sctx, u64 dir, u64 send_progress) | |||
| 2472 | struct btrfs_key loc; | 2638 | struct btrfs_key loc; |
| 2473 | struct btrfs_dir_item *di; | 2639 | struct btrfs_dir_item *di; |
| 2474 | 2640 | ||
| 2641 | /* | ||
| 2642 | * Don't try to rmdir the top/root subvolume dir. | ||
| 2643 | */ | ||
| 2644 | if (dir == BTRFS_FIRST_FREE_OBJECTID) | ||
| 2645 | return 0; | ||
| 2646 | |||
| 2475 | path = alloc_path_for_send(); | 2647 | path = alloc_path_for_send(); |
| 2476 | if (!path) | 2648 | if (!path) |
| 2477 | return -ENOMEM; | 2649 | return -ENOMEM; |
| @@ -2513,160 +2685,6 @@ out: | |||
| 2513 | return ret; | 2685 | return ret; |
| 2514 | } | 2686 | } |
| 2515 | 2687 | ||
| 2516 | struct finish_unordered_dir_ctx { | ||
| 2517 | struct send_ctx *sctx; | ||
| 2518 | struct fs_path *cur_path; | ||
| 2519 | struct fs_path *dir_path; | ||
| 2520 | u64 dir_ino; | ||
| 2521 | int need_delete; | ||
| 2522 | int delete_pass; | ||
| 2523 | }; | ||
| 2524 | |||
| 2525 | int __finish_unordered_dir(int num, struct btrfs_key *di_key, | ||
| 2526 | const char *name, int name_len, | ||
| 2527 | const char *data, int data_len, | ||
| 2528 | u8 type, void *ctx) | ||
| 2529 | { | ||
| 2530 | int ret = 0; | ||
| 2531 | struct finish_unordered_dir_ctx *fctx = ctx; | ||
| 2532 | struct send_ctx *sctx = fctx->sctx; | ||
| 2533 | u64 di_gen; | ||
| 2534 | u64 di_mode; | ||
| 2535 | int is_orphan = 0; | ||
| 2536 | |||
| 2537 | if (di_key->objectid >= fctx->dir_ino) | ||
| 2538 | goto out; | ||
| 2539 | |||
| 2540 | fs_path_reset(fctx->cur_path); | ||
| 2541 | |||
| 2542 | ret = get_inode_info(sctx->send_root, di_key->objectid, | ||
| 2543 | NULL, &di_gen, &di_mode, NULL, NULL); | ||
| 2544 | if (ret < 0) | ||
| 2545 | goto out; | ||
| 2546 | |||
| 2547 | ret = is_first_ref(sctx, sctx->send_root, di_key->objectid, | ||
| 2548 | fctx->dir_ino, name, name_len); | ||
| 2549 | if (ret < 0) | ||
| 2550 | goto out; | ||
| 2551 | if (ret) { | ||
| 2552 | is_orphan = 1; | ||
| 2553 | ret = gen_unique_name(sctx, di_key->objectid, di_gen, | ||
| 2554 | fctx->cur_path); | ||
| 2555 | } else { | ||
| 2556 | ret = get_cur_path(sctx, di_key->objectid, di_gen, | ||
| 2557 | fctx->cur_path); | ||
| 2558 | } | ||
| 2559 | if (ret < 0) | ||
| 2560 | goto out; | ||
| 2561 | |||
| 2562 | ret = fs_path_add(fctx->dir_path, name, name_len); | ||
| 2563 | if (ret < 0) | ||
| 2564 | goto out; | ||
| 2565 | |||
| 2566 | if (!fctx->delete_pass) { | ||
| 2567 | if (S_ISDIR(di_mode)) { | ||
| 2568 | ret = send_rename(sctx, fctx->cur_path, | ||
| 2569 | fctx->dir_path); | ||
| 2570 | } else { | ||
| 2571 | ret = send_link(sctx, fctx->dir_path, | ||
| 2572 | fctx->cur_path); | ||
| 2573 | if (is_orphan) | ||
| 2574 | fctx->need_delete = 1; | ||
| 2575 | } | ||
| 2576 | } else if (!S_ISDIR(di_mode)) { | ||
| 2577 | ret = send_unlink(sctx, fctx->cur_path); | ||
| 2578 | } else { | ||
| 2579 | ret = 0; | ||
| 2580 | } | ||
| 2581 | |||
| 2582 | fs_path_remove(fctx->dir_path); | ||
| 2583 | |||
| 2584 | out: | ||
| 2585 | return ret; | ||
| 2586 | } | ||
| 2587 | |||
| 2588 | /* | ||
| 2589 | * Go through all dir items and see if we find refs which could not be created | ||
| 2590 | * in the past because the dir did not exist at that time. | ||
| 2591 | */ | ||
| 2592 | static int finish_outoforder_dir(struct send_ctx *sctx, u64 dir, u64 dir_gen) | ||
| 2593 | { | ||
| 2594 | int ret = 0; | ||
| 2595 | struct btrfs_path *path = NULL; | ||
| 2596 | struct btrfs_key key; | ||
| 2597 | struct btrfs_key found_key; | ||
| 2598 | struct extent_buffer *eb; | ||
| 2599 | struct finish_unordered_dir_ctx fctx; | ||
| 2600 | int slot; | ||
| 2601 | |||
| 2602 | path = alloc_path_for_send(); | ||
| 2603 | if (!path) { | ||
| 2604 | ret = -ENOMEM; | ||
| 2605 | goto out; | ||
| 2606 | } | ||
| 2607 | |||
| 2608 | memset(&fctx, 0, sizeof(fctx)); | ||
| 2609 | fctx.sctx = sctx; | ||
| 2610 | fctx.cur_path = fs_path_alloc(sctx); | ||
| 2611 | fctx.dir_path = fs_path_alloc(sctx); | ||
| 2612 | if (!fctx.cur_path || !fctx.dir_path) { | ||
| 2613 | ret = -ENOMEM; | ||
| 2614 | goto out; | ||
| 2615 | } | ||
| 2616 | fctx.dir_ino = dir; | ||
| 2617 | |||
| 2618 | ret = get_cur_path(sctx, dir, dir_gen, fctx.dir_path); | ||
| 2619 | if (ret < 0) | ||
| 2620 | goto out; | ||
| 2621 | |||
| 2622 | /* | ||
| 2623 | * We do two passes. The first links in the new refs and the second | ||
| 2624 | * deletes orphans if required. Deletion of orphans is not required for | ||
| 2625 | * directory inodes, as we always have only one ref and use rename | ||
| 2626 | * instead of link for those. | ||
| 2627 | */ | ||
| 2628 | |||
| 2629 | again: | ||
| 2630 | key.objectid = dir; | ||
| 2631 | key.type = BTRFS_DIR_ITEM_KEY; | ||
| 2632 | key.offset = 0; | ||
| 2633 | while (1) { | ||
| 2634 | ret = btrfs_search_slot_for_read(sctx->send_root, &key, path, | ||
| 2635 | 1, 0); | ||
| 2636 | if (ret < 0) | ||
| 2637 | goto out; | ||
| 2638 | eb = path->nodes[0]; | ||
| 2639 | slot = path->slots[0]; | ||
| 2640 | btrfs_item_key_to_cpu(eb, &found_key, slot); | ||
| 2641 | |||
| 2642 | if (found_key.objectid != key.objectid || | ||
| 2643 | found_key.type != key.type) { | ||
| 2644 | btrfs_release_path(path); | ||
| 2645 | break; | ||
| 2646 | } | ||
| 2647 | |||
| 2648 | ret = iterate_dir_item(sctx, sctx->send_root, path, | ||
| 2649 | &found_key, __finish_unordered_dir, | ||
| 2650 | &fctx); | ||
| 2651 | if (ret < 0) | ||
| 2652 | goto out; | ||
| 2653 | |||
| 2654 | key.offset = found_key.offset + 1; | ||
| 2655 | btrfs_release_path(path); | ||
| 2656 | } | ||
| 2657 | |||
| 2658 | if (!fctx.delete_pass && fctx.need_delete) { | ||
| 2659 | fctx.delete_pass = 1; | ||
| 2660 | goto again; | ||
| 2661 | } | ||
| 2662 | |||
| 2663 | out: | ||
| 2664 | btrfs_free_path(path); | ||
| 2665 | fs_path_free(sctx, fctx.cur_path); | ||
| 2666 | fs_path_free(sctx, fctx.dir_path); | ||
| 2667 | return ret; | ||
| 2668 | } | ||
| 2669 | |||
| 2670 | /* | 2688 | /* |
| 2671 | * This does all the move/link/unlink/rmdir magic. | 2689 | * This does all the move/link/unlink/rmdir magic. |
| 2672 | */ | 2690 | */ |
| @@ -2674,6 +2692,7 @@ static int process_recorded_refs(struct send_ctx *sctx) | |||
| 2674 | { | 2692 | { |
| 2675 | int ret = 0; | 2693 | int ret = 0; |
| 2676 | struct recorded_ref *cur; | 2694 | struct recorded_ref *cur; |
| 2695 | struct recorded_ref *cur2; | ||
| 2677 | struct ulist *check_dirs = NULL; | 2696 | struct ulist *check_dirs = NULL; |
| 2678 | struct ulist_iterator uit; | 2697 | struct ulist_iterator uit; |
| 2679 | struct ulist_node *un; | 2698 | struct ulist_node *un; |
| @@ -2685,6 +2704,12 @@ static int process_recorded_refs(struct send_ctx *sctx) | |||
| 2685 | 2704 | ||
| 2686 | verbose_printk("btrfs: process_recorded_refs %llu\n", sctx->cur_ino); | 2705 | verbose_printk("btrfs: process_recorded_refs %llu\n", sctx->cur_ino); |
| 2687 | 2706 | ||
| 2707 | /* | ||
| 2708 | * This should never happen as the root dir always has the same ref | ||
| 2709 | * which is always '..' | ||
| 2710 | */ | ||
| 2711 | BUG_ON(sctx->cur_ino <= BTRFS_FIRST_FREE_OBJECTID); | ||
| 2712 | |||
| 2688 | valid_path = fs_path_alloc(sctx); | 2713 | valid_path = fs_path_alloc(sctx); |
| 2689 | if (!valid_path) { | 2714 | if (!valid_path) { |
| 2690 | ret = -ENOMEM; | 2715 | ret = -ENOMEM; |
| @@ -2731,6 +2756,46 @@ verbose_printk("btrfs: process_recorded_refs %llu\n", sctx->cur_ino); | |||
| 2731 | 2756 | ||
| 2732 | list_for_each_entry(cur, &sctx->new_refs, list) { | 2757 | list_for_each_entry(cur, &sctx->new_refs, list) { |
| 2733 | /* | 2758 | /* |
| 2759 | * We may have refs where the parent directory does not exist | ||
| 2760 | * yet. This happens if the parent directories inum is higher | ||
| 2761 | * the the current inum. To handle this case, we create the | ||
| 2762 | * parent directory out of order. But we need to check if this | ||
| 2763 | * did already happen before due to other refs in the same dir. | ||
| 2764 | */ | ||
| 2765 | ret = get_cur_inode_state(sctx, cur->dir, cur->dir_gen); | ||
| 2766 | if (ret < 0) | ||
| 2767 | goto out; | ||
| 2768 | if (ret == inode_state_will_create) { | ||
| 2769 | ret = 0; | ||
| 2770 | /* | ||
| 2771 | * First check if any of the current inodes refs did | ||
| 2772 | * already create the dir. | ||
| 2773 | */ | ||
| 2774 | list_for_each_entry(cur2, &sctx->new_refs, list) { | ||
| 2775 | if (cur == cur2) | ||
| 2776 | break; | ||
| 2777 | if (cur2->dir == cur->dir) { | ||
| 2778 | ret = 1; | ||
| 2779 | break; | ||
| 2780 | } | ||
| 2781 | } | ||
| 2782 | |||
| 2783 | /* | ||
| 2784 | * If that did not happen, check if a previous inode | ||
| 2785 | * did already create the dir. | ||
| 2786 | */ | ||
| 2787 | if (!ret) | ||
| 2788 | ret = did_create_dir(sctx, cur->dir); | ||
| 2789 | if (ret < 0) | ||
| 2790 | goto out; | ||
| 2791 | if (!ret) { | ||
| 2792 | ret = send_create_inode(sctx, cur->dir); | ||
| 2793 | if (ret < 0) | ||
| 2794 | goto out; | ||
| 2795 | } | ||
| 2796 | } | ||
| 2797 | |||
| 2798 | /* | ||
| 2734 | * Check if this new ref would overwrite the first ref of | 2799 | * Check if this new ref would overwrite the first ref of |
| 2735 | * another unprocessed inode. If yes, orphanize the | 2800 | * another unprocessed inode. If yes, orphanize the |
| 2736 | * overwritten inode. If we find an overwritten ref that is | 2801 | * overwritten inode. If we find an overwritten ref that is |
| @@ -2764,7 +2829,7 @@ verbose_printk("btrfs: process_recorded_refs %llu\n", sctx->cur_ino); | |||
| 2764 | * inode, move it and update valid_path. If not, link or move | 2829 | * inode, move it and update valid_path. If not, link or move |
| 2765 | * it depending on the inode mode. | 2830 | * it depending on the inode mode. |
| 2766 | */ | 2831 | */ |
| 2767 | if (is_orphan && !sctx->cur_inode_first_ref_orphan) { | 2832 | if (is_orphan) { |
| 2768 | ret = send_rename(sctx, valid_path, cur->full_path); | 2833 | ret = send_rename(sctx, valid_path, cur->full_path); |
| 2769 | if (ret < 0) | 2834 | if (ret < 0) |
| 2770 | goto out; | 2835 | goto out; |
| @@ -2827,6 +2892,17 @@ verbose_printk("btrfs: process_recorded_refs %llu\n", sctx->cur_ino); | |||
| 2827 | if (ret < 0) | 2892 | if (ret < 0) |
| 2828 | goto out; | 2893 | goto out; |
| 2829 | } | 2894 | } |
| 2895 | } else if (S_ISDIR(sctx->cur_inode_mode) && | ||
| 2896 | !list_empty(&sctx->deleted_refs)) { | ||
| 2897 | /* | ||
| 2898 | * We have a moved dir. Add the old parent to check_dirs | ||
| 2899 | */ | ||
| 2900 | cur = list_entry(sctx->deleted_refs.next, struct recorded_ref, | ||
| 2901 | list); | ||
| 2902 | ret = ulist_add(check_dirs, cur->dir, cur->dir_gen, | ||
| 2903 | GFP_NOFS); | ||
| 2904 | if (ret < 0) | ||
| 2905 | goto out; | ||
| 2830 | } else if (!S_ISDIR(sctx->cur_inode_mode)) { | 2906 | } else if (!S_ISDIR(sctx->cur_inode_mode)) { |
| 2831 | /* | 2907 | /* |
| 2832 | * We have a non dir inode. Go through all deleted refs and | 2908 | * We have a non dir inode. Go through all deleted refs and |
| @@ -2840,35 +2916,9 @@ verbose_printk("btrfs: process_recorded_refs %llu\n", sctx->cur_ino); | |||
| 2840 | if (ret < 0) | 2916 | if (ret < 0) |
| 2841 | goto out; | 2917 | goto out; |
| 2842 | if (!ret) { | 2918 | if (!ret) { |
| 2843 | /* | 2919 | ret = send_unlink(sctx, cur->full_path); |
| 2844 | * In case the inode was moved to a directory | 2920 | if (ret < 0) |
| 2845 | * that was not created yet (see | 2921 | goto out; |
| 2846 | * __record_new_ref), we can not unlink the ref | ||
| 2847 | * as it will be needed later when the parent | ||
| 2848 | * directory is created, so that we can move in | ||
| 2849 | * the inode to the new dir. | ||
| 2850 | */ | ||
| 2851 | if (!is_orphan && | ||
| 2852 | sctx->cur_inode_first_ref_orphan) { | ||
| 2853 | ret = orphanize_inode(sctx, | ||
| 2854 | sctx->cur_ino, | ||
| 2855 | sctx->cur_inode_gen, | ||
| 2856 | cur->full_path); | ||
| 2857 | if (ret < 0) | ||
| 2858 | goto out; | ||
| 2859 | ret = gen_unique_name(sctx, | ||
| 2860 | sctx->cur_ino, | ||
| 2861 | sctx->cur_inode_gen, | ||
| 2862 | valid_path); | ||
| 2863 | if (ret < 0) | ||
| 2864 | goto out; | ||
| 2865 | is_orphan = 1; | ||
| 2866 | |||
| 2867 | } else { | ||
| 2868 | ret = send_unlink(sctx, cur->full_path); | ||
| 2869 | if (ret < 0) | ||
| 2870 | goto out; | ||
| 2871 | } | ||
| 2872 | } | 2922 | } |
| 2873 | ret = ulist_add(check_dirs, cur->dir, cur->dir_gen, | 2923 | ret = ulist_add(check_dirs, cur->dir, cur->dir_gen, |
| 2874 | GFP_NOFS); | 2924 | GFP_NOFS); |
| @@ -2880,12 +2930,11 @@ verbose_printk("btrfs: process_recorded_refs %llu\n", sctx->cur_ino); | |||
| 2880 | * If the inode is still orphan, unlink the orphan. This may | 2930 | * If the inode is still orphan, unlink the orphan. This may |
| 2881 | * happen when a previous inode did overwrite the first ref | 2931 | * happen when a previous inode did overwrite the first ref |
| 2882 | * of this inode and no new refs were added for the current | 2932 | * of this inode and no new refs were added for the current |
| 2883 | * inode. | 2933 | * inode. Unlinking does not mean that the inode is deleted in |
| 2884 | * We can however not delete the orphan in case the inode relies | 2934 | * all cases. There may still be links to this inode in other |
| 2885 | * in a directory that was not created yet (see | 2935 | * places. |
| 2886 | * __record_new_ref) | ||
| 2887 | */ | 2936 | */ |
| 2888 | if (is_orphan && !sctx->cur_inode_first_ref_orphan) { | 2937 | if (is_orphan) { |
| 2889 | ret = send_unlink(sctx, valid_path); | 2938 | ret = send_unlink(sctx, valid_path); |
| 2890 | if (ret < 0) | 2939 | if (ret < 0) |
| 2891 | goto out; | 2940 | goto out; |
| @@ -2900,6 +2949,11 @@ verbose_printk("btrfs: process_recorded_refs %llu\n", sctx->cur_ino); | |||
| 2900 | */ | 2949 | */ |
| 2901 | ULIST_ITER_INIT(&uit); | 2950 | ULIST_ITER_INIT(&uit); |
| 2902 | while ((un = ulist_next(check_dirs, &uit))) { | 2951 | while ((un = ulist_next(check_dirs, &uit))) { |
| 2952 | /* | ||
| 2953 | * In case we had refs into dirs that were not processed yet, | ||
| 2954 | * we don't need to do the utime and rmdir logic for these dirs. | ||
| 2955 | * The dir will be processed later. | ||
| 2956 | */ | ||
| 2903 | if (un->val > sctx->cur_ino) | 2957 | if (un->val > sctx->cur_ino) |
| 2904 | continue; | 2958 | continue; |
| 2905 | 2959 | ||
| @@ -2929,25 +2983,6 @@ verbose_printk("btrfs: process_recorded_refs %llu\n", sctx->cur_ino); | |||
| 2929 | } | 2983 | } |
| 2930 | } | 2984 | } |
| 2931 | 2985 | ||
| 2932 | /* | ||
| 2933 | * Current inode is now at it's new position, so we must increase | ||
| 2934 | * send_progress | ||
| 2935 | */ | ||
| 2936 | sctx->send_progress = sctx->cur_ino + 1; | ||
| 2937 | |||
| 2938 | /* | ||
| 2939 | * We may have a directory here that has pending refs which could not | ||
| 2940 | * be created before (because the dir did not exist before, see | ||
| 2941 | * __record_new_ref). finish_outoforder_dir will link/move the pending | ||
| 2942 | * refs. | ||
| 2943 | */ | ||
| 2944 | if (S_ISDIR(sctx->cur_inode_mode) && sctx->cur_inode_new) { | ||
| 2945 | ret = finish_outoforder_dir(sctx, sctx->cur_ino, | ||
| 2946 | sctx->cur_inode_gen); | ||
| 2947 | if (ret < 0) | ||
| 2948 | goto out; | ||
| 2949 | } | ||
| 2950 | |||
| 2951 | ret = 0; | 2986 | ret = 0; |
| 2952 | 2987 | ||
| 2953 | out: | 2988 | out: |
| @@ -2971,34 +3006,9 @@ static int __record_new_ref(int num, u64 dir, int index, | |||
| 2971 | return -ENOMEM; | 3006 | return -ENOMEM; |
| 2972 | 3007 | ||
| 2973 | ret = get_inode_info(sctx->send_root, dir, NULL, &gen, NULL, NULL, | 3008 | ret = get_inode_info(sctx->send_root, dir, NULL, &gen, NULL, NULL, |
| 2974 | NULL); | 3009 | NULL, NULL); |
| 2975 | if (ret < 0) | ||
| 2976 | goto out; | ||
| 2977 | |||
| 2978 | /* | ||
| 2979 | * The parent may be non-existent at this point in time. This happens | ||
| 2980 | * if the ino of the parent dir is higher then the current ino. In this | ||
| 2981 | * case, we can not process this ref until the parent dir is finally | ||
| 2982 | * created. If we reach the parent dir later, process_recorded_refs | ||
| 2983 | * will go through all dir items and process the refs that could not be | ||
| 2984 | * processed before. In case this is the first ref, we set | ||
| 2985 | * cur_inode_first_ref_orphan to 1 to inform process_recorded_refs to | ||
| 2986 | * keep an orphan of the inode so that it later can be used for | ||
| 2987 | * link/move | ||
| 2988 | */ | ||
| 2989 | ret = is_inode_existent(sctx, dir, gen); | ||
| 2990 | if (ret < 0) | 3010 | if (ret < 0) |
| 2991 | goto out; | 3011 | goto out; |
| 2992 | if (!ret) { | ||
| 2993 | ret = is_first_ref(sctx, sctx->send_root, sctx->cur_ino, dir, | ||
| 2994 | name->start, fs_path_len(name)); | ||
| 2995 | if (ret < 0) | ||
| 2996 | goto out; | ||
| 2997 | if (ret) | ||
| 2998 | sctx->cur_inode_first_ref_orphan = 1; | ||
| 2999 | ret = 0; | ||
| 3000 | goto out; | ||
| 3001 | } | ||
| 3002 | 3012 | ||
| 3003 | ret = get_cur_path(sctx, dir, gen, p); | 3013 | ret = get_cur_path(sctx, dir, gen, p); |
| 3004 | if (ret < 0) | 3014 | if (ret < 0) |
| @@ -3029,7 +3039,7 @@ static int __record_deleted_ref(int num, u64 dir, int index, | |||
| 3029 | return -ENOMEM; | 3039 | return -ENOMEM; |
| 3030 | 3040 | ||
| 3031 | ret = get_inode_info(sctx->parent_root, dir, NULL, &gen, NULL, NULL, | 3041 | ret = get_inode_info(sctx->parent_root, dir, NULL, &gen, NULL, NULL, |
| 3032 | NULL); | 3042 | NULL, NULL); |
| 3033 | if (ret < 0) | 3043 | if (ret < 0) |
| 3034 | goto out; | 3044 | goto out; |
| 3035 | 3045 | ||
| @@ -3206,33 +3216,28 @@ static int process_all_refs(struct send_ctx *sctx, | |||
| 3206 | key.offset = 0; | 3216 | key.offset = 0; |
| 3207 | while (1) { | 3217 | while (1) { |
| 3208 | ret = btrfs_search_slot_for_read(root, &key, path, 1, 0); | 3218 | ret = btrfs_search_slot_for_read(root, &key, path, 1, 0); |
| 3209 | if (ret < 0) { | 3219 | if (ret < 0) |
| 3210 | btrfs_release_path(path); | ||
| 3211 | goto out; | 3220 | goto out; |
| 3212 | } | 3221 | if (ret) |
| 3213 | if (ret) { | ||
| 3214 | btrfs_release_path(path); | ||
| 3215 | break; | 3222 | break; |
| 3216 | } | ||
| 3217 | 3223 | ||
| 3218 | eb = path->nodes[0]; | 3224 | eb = path->nodes[0]; |
| 3219 | slot = path->slots[0]; | 3225 | slot = path->slots[0]; |
| 3220 | btrfs_item_key_to_cpu(eb, &found_key, slot); | 3226 | btrfs_item_key_to_cpu(eb, &found_key, slot); |
| 3221 | 3227 | ||
| 3222 | if (found_key.objectid != key.objectid || | 3228 | if (found_key.objectid != key.objectid || |
| 3223 | found_key.type != key.type) { | 3229 | found_key.type != key.type) |
| 3224 | btrfs_release_path(path); | ||
| 3225 | break; | 3230 | break; |
| 3226 | } | ||
| 3227 | 3231 | ||
| 3228 | ret = iterate_inode_ref(sctx, sctx->parent_root, path, | 3232 | ret = iterate_inode_ref(sctx, root, path, &found_key, 0, cb, |
| 3229 | &found_key, 0, cb, sctx); | 3233 | sctx); |
| 3230 | btrfs_release_path(path); | 3234 | btrfs_release_path(path); |
| 3231 | if (ret < 0) | 3235 | if (ret < 0) |
| 3232 | goto out; | 3236 | goto out; |
| 3233 | 3237 | ||
| 3234 | key.offset = found_key.offset + 1; | 3238 | key.offset = found_key.offset + 1; |
| 3235 | } | 3239 | } |
| 3240 | btrfs_release_path(path); | ||
| 3236 | 3241 | ||
| 3237 | ret = process_recorded_refs(sctx); | 3242 | ret = process_recorded_refs(sctx); |
| 3238 | 3243 | ||
| @@ -3555,7 +3560,7 @@ static int send_write(struct send_ctx *sctx, u64 offset, u32 len) | |||
| 3555 | int ret = 0; | 3560 | int ret = 0; |
| 3556 | struct fs_path *p; | 3561 | struct fs_path *p; |
| 3557 | loff_t pos = offset; | 3562 | loff_t pos = offset; |
| 3558 | int readed = 0; | 3563 | int num_read = 0; |
| 3559 | mm_segment_t old_fs; | 3564 | mm_segment_t old_fs; |
| 3560 | 3565 | ||
| 3561 | p = fs_path_alloc(sctx); | 3566 | p = fs_path_alloc(sctx); |
| @@ -3580,8 +3585,8 @@ verbose_printk("btrfs: send_write offset=%llu, len=%d\n", offset, len); | |||
| 3580 | ret = vfs_read(sctx->cur_inode_filp, sctx->read_buf, len, &pos); | 3585 | ret = vfs_read(sctx->cur_inode_filp, sctx->read_buf, len, &pos); |
| 3581 | if (ret < 0) | 3586 | if (ret < 0) |
| 3582 | goto out; | 3587 | goto out; |
| 3583 | readed = ret; | 3588 | num_read = ret; |
| 3584 | if (!readed) | 3589 | if (!num_read) |
| 3585 | goto out; | 3590 | goto out; |
| 3586 | 3591 | ||
| 3587 | ret = begin_cmd(sctx, BTRFS_SEND_C_WRITE); | 3592 | ret = begin_cmd(sctx, BTRFS_SEND_C_WRITE); |
| @@ -3594,7 +3599,7 @@ verbose_printk("btrfs: send_write offset=%llu, len=%d\n", offset, len); | |||
| 3594 | 3599 | ||
| 3595 | TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p); | 3600 | TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p); |
| 3596 | TLV_PUT_U64(sctx, BTRFS_SEND_A_FILE_OFFSET, offset); | 3601 | TLV_PUT_U64(sctx, BTRFS_SEND_A_FILE_OFFSET, offset); |
| 3597 | TLV_PUT(sctx, BTRFS_SEND_A_DATA, sctx->read_buf, readed); | 3602 | TLV_PUT(sctx, BTRFS_SEND_A_DATA, sctx->read_buf, num_read); |
| 3598 | 3603 | ||
| 3599 | ret = send_cmd(sctx); | 3604 | ret = send_cmd(sctx); |
| 3600 | 3605 | ||
| @@ -3604,7 +3609,7 @@ out: | |||
| 3604 | set_fs(old_fs); | 3609 | set_fs(old_fs); |
| 3605 | if (ret < 0) | 3610 | if (ret < 0) |
| 3606 | return ret; | 3611 | return ret; |
| 3607 | return readed; | 3612 | return num_read; |
| 3608 | } | 3613 | } |
| 3609 | 3614 | ||
| 3610 | /* | 3615 | /* |
| @@ -3615,7 +3620,6 @@ static int send_clone(struct send_ctx *sctx, | |||
| 3615 | struct clone_root *clone_root) | 3620 | struct clone_root *clone_root) |
| 3616 | { | 3621 | { |
| 3617 | int ret = 0; | 3622 | int ret = 0; |
| 3618 | struct btrfs_root *clone_root2 = clone_root->root; | ||
| 3619 | struct fs_path *p; | 3623 | struct fs_path *p; |
| 3620 | u64 gen; | 3624 | u64 gen; |
| 3621 | 3625 | ||
| @@ -3640,22 +3644,23 @@ verbose_printk("btrfs: send_clone offset=%llu, len=%d, clone_root=%llu, " | |||
| 3640 | TLV_PUT_U64(sctx, BTRFS_SEND_A_CLONE_LEN, len); | 3644 | TLV_PUT_U64(sctx, BTRFS_SEND_A_CLONE_LEN, len); |
| 3641 | TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p); | 3645 | TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p); |
| 3642 | 3646 | ||
| 3643 | if (clone_root2 == sctx->send_root) { | 3647 | if (clone_root->root == sctx->send_root) { |
| 3644 | ret = get_inode_info(sctx->send_root, clone_root->ino, NULL, | 3648 | ret = get_inode_info(sctx->send_root, clone_root->ino, NULL, |
| 3645 | &gen, NULL, NULL, NULL); | 3649 | &gen, NULL, NULL, NULL, NULL); |
| 3646 | if (ret < 0) | 3650 | if (ret < 0) |
| 3647 | goto out; | 3651 | goto out; |
| 3648 | ret = get_cur_path(sctx, clone_root->ino, gen, p); | 3652 | ret = get_cur_path(sctx, clone_root->ino, gen, p); |
| 3649 | } else { | 3653 | } else { |
| 3650 | ret = get_inode_path(sctx, clone_root2, clone_root->ino, p); | 3654 | ret = get_inode_path(sctx, clone_root->root, |
| 3655 | clone_root->ino, p); | ||
| 3651 | } | 3656 | } |
| 3652 | if (ret < 0) | 3657 | if (ret < 0) |
| 3653 | goto out; | 3658 | goto out; |
| 3654 | 3659 | ||
| 3655 | TLV_PUT_UUID(sctx, BTRFS_SEND_A_CLONE_UUID, | 3660 | TLV_PUT_UUID(sctx, BTRFS_SEND_A_CLONE_UUID, |
| 3656 | clone_root2->root_item.uuid); | 3661 | clone_root->root->root_item.uuid); |
| 3657 | TLV_PUT_U64(sctx, BTRFS_SEND_A_CLONE_CTRANSID, | 3662 | TLV_PUT_U64(sctx, BTRFS_SEND_A_CLONE_CTRANSID, |
| 3658 | clone_root2->root_item.ctransid); | 3663 | clone_root->root->root_item.ctransid); |
| 3659 | TLV_PUT_PATH(sctx, BTRFS_SEND_A_CLONE_PATH, p); | 3664 | TLV_PUT_PATH(sctx, BTRFS_SEND_A_CLONE_PATH, p); |
| 3660 | TLV_PUT_U64(sctx, BTRFS_SEND_A_CLONE_OFFSET, | 3665 | TLV_PUT_U64(sctx, BTRFS_SEND_A_CLONE_OFFSET, |
| 3661 | clone_root->offset); | 3666 | clone_root->offset); |
| @@ -3684,10 +3689,17 @@ static int send_write_or_clone(struct send_ctx *sctx, | |||
| 3684 | ei = btrfs_item_ptr(path->nodes[0], path->slots[0], | 3689 | ei = btrfs_item_ptr(path->nodes[0], path->slots[0], |
| 3685 | struct btrfs_file_extent_item); | 3690 | struct btrfs_file_extent_item); |
| 3686 | type = btrfs_file_extent_type(path->nodes[0], ei); | 3691 | type = btrfs_file_extent_type(path->nodes[0], ei); |
| 3687 | if (type == BTRFS_FILE_EXTENT_INLINE) | 3692 | if (type == BTRFS_FILE_EXTENT_INLINE) { |
| 3688 | len = btrfs_file_extent_inline_len(path->nodes[0], ei); | 3693 | len = btrfs_file_extent_inline_len(path->nodes[0], ei); |
| 3689 | else | 3694 | /* |
| 3695 | * it is possible the inline item won't cover the whole page, | ||
| 3696 | * but there may be items after this page. Make | ||
| 3697 | * sure to send the whole thing | ||
| 3698 | */ | ||
| 3699 | len = PAGE_CACHE_ALIGN(len); | ||
| 3700 | } else { | ||
| 3690 | len = btrfs_file_extent_num_bytes(path->nodes[0], ei); | 3701 | len = btrfs_file_extent_num_bytes(path->nodes[0], ei); |
| 3702 | } | ||
| 3691 | 3703 | ||
| 3692 | if (offset + len > sctx->cur_inode_size) | 3704 | if (offset + len > sctx->cur_inode_size) |
| 3693 | len = sctx->cur_inode_size - offset; | 3705 | len = sctx->cur_inode_size - offset; |
| @@ -3735,6 +3747,8 @@ static int is_extent_unchanged(struct send_ctx *sctx, | |||
| 3735 | u64 left_offset_fixed; | 3747 | u64 left_offset_fixed; |
| 3736 | u64 left_len; | 3748 | u64 left_len; |
| 3737 | u64 right_len; | 3749 | u64 right_len; |
| 3750 | u64 left_gen; | ||
| 3751 | u64 right_gen; | ||
| 3738 | u8 left_type; | 3752 | u8 left_type; |
| 3739 | u8 right_type; | 3753 | u8 right_type; |
| 3740 | 3754 | ||
| @@ -3744,17 +3758,17 @@ static int is_extent_unchanged(struct send_ctx *sctx, | |||
| 3744 | 3758 | ||
| 3745 | eb = left_path->nodes[0]; | 3759 | eb = left_path->nodes[0]; |
| 3746 | slot = left_path->slots[0]; | 3760 | slot = left_path->slots[0]; |
| 3747 | |||
| 3748 | ei = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item); | 3761 | ei = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item); |
| 3749 | left_type = btrfs_file_extent_type(eb, ei); | 3762 | left_type = btrfs_file_extent_type(eb, ei); |
| 3750 | left_disknr = btrfs_file_extent_disk_bytenr(eb, ei); | ||
| 3751 | left_len = btrfs_file_extent_num_bytes(eb, ei); | ||
| 3752 | left_offset = btrfs_file_extent_offset(eb, ei); | ||
| 3753 | 3763 | ||
| 3754 | if (left_type != BTRFS_FILE_EXTENT_REG) { | 3764 | if (left_type != BTRFS_FILE_EXTENT_REG) { |
| 3755 | ret = 0; | 3765 | ret = 0; |
| 3756 | goto out; | 3766 | goto out; |
| 3757 | } | 3767 | } |
| 3768 | left_disknr = btrfs_file_extent_disk_bytenr(eb, ei); | ||
| 3769 | left_len = btrfs_file_extent_num_bytes(eb, ei); | ||
| 3770 | left_offset = btrfs_file_extent_offset(eb, ei); | ||
| 3771 | left_gen = btrfs_file_extent_generation(eb, ei); | ||
| 3758 | 3772 | ||
| 3759 | /* | 3773 | /* |
| 3760 | * Following comments will refer to these graphics. L is the left | 3774 | * Following comments will refer to these graphics. L is the left |
| @@ -3810,6 +3824,7 @@ static int is_extent_unchanged(struct send_ctx *sctx, | |||
| 3810 | right_disknr = btrfs_file_extent_disk_bytenr(eb, ei); | 3824 | right_disknr = btrfs_file_extent_disk_bytenr(eb, ei); |
| 3811 | right_len = btrfs_file_extent_num_bytes(eb, ei); | 3825 | right_len = btrfs_file_extent_num_bytes(eb, ei); |
| 3812 | right_offset = btrfs_file_extent_offset(eb, ei); | 3826 | right_offset = btrfs_file_extent_offset(eb, ei); |
| 3827 | right_gen = btrfs_file_extent_generation(eb, ei); | ||
| 3813 | 3828 | ||
| 3814 | if (right_type != BTRFS_FILE_EXTENT_REG) { | 3829 | if (right_type != BTRFS_FILE_EXTENT_REG) { |
| 3815 | ret = 0; | 3830 | ret = 0; |
| @@ -3820,7 +3835,7 @@ static int is_extent_unchanged(struct send_ctx *sctx, | |||
| 3820 | * Are we at extent 8? If yes, we know the extent is changed. | 3835 | * Are we at extent 8? If yes, we know the extent is changed. |
| 3821 | * This may only happen on the first iteration. | 3836 | * This may only happen on the first iteration. |
| 3822 | */ | 3837 | */ |
| 3823 | if (found_key.offset + right_len < ekey->offset) { | 3838 | if (found_key.offset + right_len <= ekey->offset) { |
| 3824 | ret = 0; | 3839 | ret = 0; |
| 3825 | goto out; | 3840 | goto out; |
| 3826 | } | 3841 | } |
| @@ -3837,8 +3852,9 @@ static int is_extent_unchanged(struct send_ctx *sctx, | |||
| 3837 | /* | 3852 | /* |
| 3838 | * Check if we have the same extent. | 3853 | * Check if we have the same extent. |
| 3839 | */ | 3854 | */ |
| 3840 | if (left_disknr + left_offset_fixed != | 3855 | if (left_disknr != right_disknr || |
| 3841 | right_disknr + right_offset) { | 3856 | left_offset_fixed != right_offset || |
| 3857 | left_gen != right_gen) { | ||
| 3842 | ret = 0; | 3858 | ret = 0; |
| 3843 | goto out; | 3859 | goto out; |
| 3844 | } | 3860 | } |
| @@ -3977,6 +3993,15 @@ static int process_recorded_refs_if_needed(struct send_ctx *sctx, int at_end) | |||
| 3977 | goto out; | 3993 | goto out; |
| 3978 | 3994 | ||
| 3979 | ret = process_recorded_refs(sctx); | 3995 | ret = process_recorded_refs(sctx); |
| 3996 | if (ret < 0) | ||
| 3997 | goto out; | ||
| 3998 | |||
| 3999 | /* | ||
| 4000 | * We have processed the refs and thus need to advance send_progress. | ||
| 4001 | * Now, calls to get_cur_xxx will take the updated refs of the current | ||
| 4002 | * inode into account. | ||
| 4003 | */ | ||
| 4004 | sctx->send_progress = sctx->cur_ino + 1; | ||
| 3980 | 4005 | ||
| 3981 | out: | 4006 | out: |
| 3982 | return ret; | 4007 | return ret; |
| @@ -4004,7 +4029,7 @@ static int finish_inode_if_needed(struct send_ctx *sctx, int at_end) | |||
| 4004 | goto out; | 4029 | goto out; |
| 4005 | 4030 | ||
| 4006 | ret = get_inode_info(sctx->send_root, sctx->cur_ino, NULL, NULL, | 4031 | ret = get_inode_info(sctx->send_root, sctx->cur_ino, NULL, NULL, |
| 4007 | &left_mode, &left_uid, &left_gid); | 4032 | &left_mode, &left_uid, &left_gid, NULL); |
| 4008 | if (ret < 0) | 4033 | if (ret < 0) |
| 4009 | goto out; | 4034 | goto out; |
| 4010 | 4035 | ||
| @@ -4015,7 +4040,7 @@ static int finish_inode_if_needed(struct send_ctx *sctx, int at_end) | |||
| 4015 | } else { | 4040 | } else { |
| 4016 | ret = get_inode_info(sctx->parent_root, sctx->cur_ino, | 4041 | ret = get_inode_info(sctx->parent_root, sctx->cur_ino, |
| 4017 | NULL, NULL, &right_mode, &right_uid, | 4042 | NULL, NULL, &right_mode, &right_uid, |
| 4018 | &right_gid); | 4043 | &right_gid, NULL); |
| 4019 | if (ret < 0) | 4044 | if (ret < 0) |
| 4020 | goto out; | 4045 | goto out; |
| 4021 | 4046 | ||
| @@ -4074,7 +4099,12 @@ static int changed_inode(struct send_ctx *sctx, | |||
| 4074 | 4099 | ||
| 4075 | sctx->cur_ino = key->objectid; | 4100 | sctx->cur_ino = key->objectid; |
| 4076 | sctx->cur_inode_new_gen = 0; | 4101 | sctx->cur_inode_new_gen = 0; |
| 4077 | sctx->cur_inode_first_ref_orphan = 0; | 4102 | |
| 4103 | /* | ||
| 4104 | * Set send_progress to current inode. This will tell all get_cur_xxx | ||
| 4105 | * functions that the current inode's refs are not updated yet. Later, | ||
| 4106 | * when process_recorded_refs is finished, it is set to cur_ino + 1. | ||
| 4107 | */ | ||
| 4078 | sctx->send_progress = sctx->cur_ino; | 4108 | sctx->send_progress = sctx->cur_ino; |
| 4079 | 4109 | ||
| 4080 | if (result == BTRFS_COMPARE_TREE_NEW || | 4110 | if (result == BTRFS_COMPARE_TREE_NEW || |
| @@ -4098,7 +4128,14 @@ static int changed_inode(struct send_ctx *sctx, | |||
| 4098 | 4128 | ||
| 4099 | right_gen = btrfs_inode_generation(sctx->right_path->nodes[0], | 4129 | right_gen = btrfs_inode_generation(sctx->right_path->nodes[0], |
| 4100 | right_ii); | 4130 | right_ii); |
| 4101 | if (left_gen != right_gen) | 4131 | |
| 4132 | /* | ||
| 4133 | * The cur_ino = root dir case is special here. We can't treat | ||
| 4134 | * the inode as deleted+reused because it would generate a | ||
| 4135 | * stream that tries to delete/mkdir the root dir. | ||
| 4136 | */ | ||
| 4137 | if (left_gen != right_gen && | ||
| 4138 | sctx->cur_ino != BTRFS_FIRST_FREE_OBJECTID) | ||
| 4102 | sctx->cur_inode_new_gen = 1; | 4139 | sctx->cur_inode_new_gen = 1; |
| 4103 | } | 4140 | } |
| 4104 | 4141 | ||
| @@ -4111,8 +4148,7 @@ static int changed_inode(struct send_ctx *sctx, | |||
| 4111 | sctx->cur_inode_mode = btrfs_inode_mode( | 4148 | sctx->cur_inode_mode = btrfs_inode_mode( |
| 4112 | sctx->left_path->nodes[0], left_ii); | 4149 | sctx->left_path->nodes[0], left_ii); |
| 4113 | if (sctx->cur_ino != BTRFS_FIRST_FREE_OBJECTID) | 4150 | if (sctx->cur_ino != BTRFS_FIRST_FREE_OBJECTID) |
| 4114 | ret = send_create_inode(sctx, sctx->left_path, | 4151 | ret = send_create_inode_if_needed(sctx); |
| 4115 | sctx->cmp_key); | ||
| 4116 | } else if (result == BTRFS_COMPARE_TREE_DELETED) { | 4152 | } else if (result == BTRFS_COMPARE_TREE_DELETED) { |
| 4117 | sctx->cur_inode_gen = right_gen; | 4153 | sctx->cur_inode_gen = right_gen; |
| 4118 | sctx->cur_inode_new = 0; | 4154 | sctx->cur_inode_new = 0; |
| @@ -4122,7 +4158,17 @@ static int changed_inode(struct send_ctx *sctx, | |||
| 4122 | sctx->cur_inode_mode = btrfs_inode_mode( | 4158 | sctx->cur_inode_mode = btrfs_inode_mode( |
| 4123 | sctx->right_path->nodes[0], right_ii); | 4159 | sctx->right_path->nodes[0], right_ii); |
| 4124 | } else if (result == BTRFS_COMPARE_TREE_CHANGED) { | 4160 | } else if (result == BTRFS_COMPARE_TREE_CHANGED) { |
| 4161 | /* | ||
| 4162 | * We need to do some special handling in case the inode was | ||
| 4163 | * reported as changed with a changed generation number. This | ||
| 4164 | * means that the original inode was deleted and new inode | ||
| 4165 | * reused the same inum. So we have to treat the old inode as | ||
| 4166 | * deleted and the new one as new. | ||
| 4167 | */ | ||
| 4125 | if (sctx->cur_inode_new_gen) { | 4168 | if (sctx->cur_inode_new_gen) { |
| 4169 | /* | ||
| 4170 | * First, process the inode as if it was deleted. | ||
| 4171 | */ | ||
| 4126 | sctx->cur_inode_gen = right_gen; | 4172 | sctx->cur_inode_gen = right_gen; |
| 4127 | sctx->cur_inode_new = 0; | 4173 | sctx->cur_inode_new = 0; |
| 4128 | sctx->cur_inode_deleted = 1; | 4174 | sctx->cur_inode_deleted = 1; |
| @@ -4135,6 +4181,9 @@ static int changed_inode(struct send_ctx *sctx, | |||
| 4135 | if (ret < 0) | 4181 | if (ret < 0) |
| 4136 | goto out; | 4182 | goto out; |
| 4137 | 4183 | ||
| 4184 | /* | ||
| 4185 | * Now process the inode as if it was new. | ||
| 4186 | */ | ||
| 4138 | sctx->cur_inode_gen = left_gen; | 4187 | sctx->cur_inode_gen = left_gen; |
| 4139 | sctx->cur_inode_new = 1; | 4188 | sctx->cur_inode_new = 1; |
| 4140 | sctx->cur_inode_deleted = 0; | 4189 | sctx->cur_inode_deleted = 0; |
| @@ -4142,14 +4191,23 @@ static int changed_inode(struct send_ctx *sctx, | |||
| 4142 | sctx->left_path->nodes[0], left_ii); | 4191 | sctx->left_path->nodes[0], left_ii); |
| 4143 | sctx->cur_inode_mode = btrfs_inode_mode( | 4192 | sctx->cur_inode_mode = btrfs_inode_mode( |
| 4144 | sctx->left_path->nodes[0], left_ii); | 4193 | sctx->left_path->nodes[0], left_ii); |
| 4145 | ret = send_create_inode(sctx, sctx->left_path, | 4194 | ret = send_create_inode_if_needed(sctx); |
| 4146 | sctx->cmp_key); | ||
| 4147 | if (ret < 0) | 4195 | if (ret < 0) |
| 4148 | goto out; | 4196 | goto out; |
| 4149 | 4197 | ||
| 4150 | ret = process_all_refs(sctx, BTRFS_COMPARE_TREE_NEW); | 4198 | ret = process_all_refs(sctx, BTRFS_COMPARE_TREE_NEW); |
| 4151 | if (ret < 0) | 4199 | if (ret < 0) |
| 4152 | goto out; | 4200 | goto out; |
| 4201 | /* | ||
| 4202 | * Advance send_progress now as we did not get into | ||
| 4203 | * process_recorded_refs_if_needed in the new_gen case. | ||
| 4204 | */ | ||
| 4205 | sctx->send_progress = sctx->cur_ino + 1; | ||
| 4206 | |||
| 4207 | /* | ||
| 4208 | * Now process all extents and xattrs of the inode as if | ||
| 4209 | * they were all new. | ||
| 4210 | */ | ||
| 4153 | ret = process_all_extents(sctx); | 4211 | ret = process_all_extents(sctx); |
| 4154 | if (ret < 0) | 4212 | if (ret < 0) |
| 4155 | goto out; | 4213 | goto out; |
| @@ -4172,6 +4230,16 @@ out: | |||
| 4172 | return ret; | 4230 | return ret; |
| 4173 | } | 4231 | } |
| 4174 | 4232 | ||
| 4233 | /* | ||
| 4234 | * We have to process new refs before deleted refs, but compare_trees gives us | ||
| 4235 | * the new and deleted refs mixed. To fix this, we record the new/deleted refs | ||
| 4236 | * first and later process them in process_recorded_refs. | ||
| 4237 | * For the cur_inode_new_gen case, we skip recording completely because | ||
| 4238 | * changed_inode did already initiate processing of refs. The reason for this is | ||
| 4239 | * that in this case, compare_tree actually compares the refs of 2 different | ||
| 4240 | * inodes. To fix this, process_all_refs is used in changed_inode to handle all | ||
| 4241 | * refs of the right tree as deleted and all refs of the left tree as new. | ||
| 4242 | */ | ||
| 4175 | static int changed_ref(struct send_ctx *sctx, | 4243 | static int changed_ref(struct send_ctx *sctx, |
| 4176 | enum btrfs_compare_tree_result result) | 4244 | enum btrfs_compare_tree_result result) |
| 4177 | { | 4245 | { |
| @@ -4192,6 +4260,11 @@ static int changed_ref(struct send_ctx *sctx, | |||
| 4192 | return ret; | 4260 | return ret; |
| 4193 | } | 4261 | } |
| 4194 | 4262 | ||
| 4263 | /* | ||
| 4264 | * Process new/deleted/changed xattrs. We skip processing in the | ||
| 4265 | * cur_inode_new_gen case because changed_inode did already initiate processing | ||
| 4266 | * of xattrs. The reason is the same as in changed_ref | ||
| 4267 | */ | ||
| 4195 | static int changed_xattr(struct send_ctx *sctx, | 4268 | static int changed_xattr(struct send_ctx *sctx, |
| 4196 | enum btrfs_compare_tree_result result) | 4269 | enum btrfs_compare_tree_result result) |
| 4197 | { | 4270 | { |
| @@ -4211,6 +4284,11 @@ static int changed_xattr(struct send_ctx *sctx, | |||
| 4211 | return ret; | 4284 | return ret; |
| 4212 | } | 4285 | } |
| 4213 | 4286 | ||
| 4287 | /* | ||
| 4288 | * Process new/deleted/changed extents. We skip processing in the | ||
| 4289 | * cur_inode_new_gen case because changed_inode did already initiate processing | ||
| 4290 | * of extents. The reason is the same as in changed_ref | ||
| 4291 | */ | ||
| 4214 | static int changed_extent(struct send_ctx *sctx, | 4292 | static int changed_extent(struct send_ctx *sctx, |
| 4215 | enum btrfs_compare_tree_result result) | 4293 | enum btrfs_compare_tree_result result) |
| 4216 | { | 4294 | { |
| @@ -4227,7 +4305,10 @@ static int changed_extent(struct send_ctx *sctx, | |||
| 4227 | return ret; | 4305 | return ret; |
| 4228 | } | 4306 | } |
| 4229 | 4307 | ||
| 4230 | 4308 | /* | |
| 4309 | * Updates compare related fields in sctx and simply forwards to the actual | ||
| 4310 | * changed_xxx functions. | ||
| 4311 | */ | ||
| 4231 | static int changed_cb(struct btrfs_root *left_root, | 4312 | static int changed_cb(struct btrfs_root *left_root, |
| 4232 | struct btrfs_root *right_root, | 4313 | struct btrfs_root *right_root, |
| 4233 | struct btrfs_path *left_path, | 4314 | struct btrfs_path *left_path, |
| @@ -4247,6 +4328,11 @@ static int changed_cb(struct btrfs_root *left_root, | |||
| 4247 | if (ret < 0) | 4328 | if (ret < 0) |
| 4248 | goto out; | 4329 | goto out; |
| 4249 | 4330 | ||
| 4331 | /* Ignore non-FS objects */ | ||
| 4332 | if (key->objectid == BTRFS_FREE_INO_OBJECTID || | ||
| 4333 | key->objectid == BTRFS_FREE_SPACE_OBJECTID) | ||
| 4334 | goto out; | ||
| 4335 | |||
| 4250 | if (key->type == BTRFS_INODE_ITEM_KEY) | 4336 | if (key->type == BTRFS_INODE_ITEM_KEY) |
| 4251 | ret = changed_inode(sctx, result); | 4337 | ret = changed_inode(sctx, result); |
| 4252 | else if (key->type == BTRFS_INODE_REF_KEY) | 4338 | else if (key->type == BTRFS_INODE_REF_KEY) |
| @@ -4299,7 +4385,8 @@ join_trans: | |||
| 4299 | } | 4385 | } |
| 4300 | 4386 | ||
| 4301 | /* | 4387 | /* |
| 4302 | * Make sure the tree has not changed | 4388 | * Make sure the tree has not changed after re-joining. We detect this |
| 4389 | * by comparing start_ctransid and ctransid. They should always match. | ||
| 4303 | */ | 4390 | */ |
| 4304 | spin_lock(&send_root->root_times_lock); | 4391 | spin_lock(&send_root->root_times_lock); |
| 4305 | ctransid = btrfs_root_ctransid(&send_root->root_item); | 4392 | ctransid = btrfs_root_ctransid(&send_root->root_item); |
diff --git a/fs/btrfs/send.h b/fs/btrfs/send.h index 9934e948e57f..1bf4f32fd4ef 100644 --- a/fs/btrfs/send.h +++ b/fs/btrfs/send.h | |||
| @@ -130,4 +130,5 @@ enum { | |||
| 130 | 130 | ||
| 131 | #ifdef __KERNEL__ | 131 | #ifdef __KERNEL__ |
| 132 | long btrfs_ioctl_send(struct file *mnt_file, void __user *arg); | 132 | long btrfs_ioctl_send(struct file *mnt_file, void __user *arg); |
| 133 | int write_buf(struct file *filp, const void *buf, u32 len, loff_t *off); | ||
| 133 | #endif | 134 | #endif |
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 83d6f9f9c220..915ac14c2064 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c | |||
| @@ -243,12 +243,18 @@ void __btrfs_abort_transaction(struct btrfs_trans_handle *trans, | |||
| 243 | struct btrfs_root *root, const char *function, | 243 | struct btrfs_root *root, const char *function, |
| 244 | unsigned int line, int errno) | 244 | unsigned int line, int errno) |
| 245 | { | 245 | { |
| 246 | WARN_ONCE(1, KERN_DEBUG "btrfs: Transaction aborted"); | 246 | WARN_ONCE(1, KERN_DEBUG "btrfs: Transaction aborted\n"); |
| 247 | trans->aborted = errno; | 247 | trans->aborted = errno; |
| 248 | /* Nothing used. The other threads that have joined this | 248 | /* Nothing used. The other threads that have joined this |
| 249 | * transaction may be able to continue. */ | 249 | * transaction may be able to continue. */ |
| 250 | if (!trans->blocks_used) { | 250 | if (!trans->blocks_used) { |
| 251 | btrfs_printk(root->fs_info, "Aborting unused transaction.\n"); | 251 | char nbuf[16]; |
| 252 | const char *errstr; | ||
| 253 | |||
| 254 | errstr = btrfs_decode_error(root->fs_info, errno, nbuf); | ||
| 255 | btrfs_printk(root->fs_info, | ||
| 256 | "%s:%d: Aborting unused transaction(%s).\n", | ||
| 257 | function, line, errstr); | ||
| 252 | return; | 258 | return; |
| 253 | } | 259 | } |
| 254 | trans->transaction->aborted = errno; | 260 | trans->transaction->aborted = errno; |
| @@ -407,7 +413,15 @@ int btrfs_parse_options(struct btrfs_root *root, char *options) | |||
| 407 | btrfs_set_opt(info->mount_opt, NODATASUM); | 413 | btrfs_set_opt(info->mount_opt, NODATASUM); |
| 408 | break; | 414 | break; |
| 409 | case Opt_nodatacow: | 415 | case Opt_nodatacow: |
| 410 | printk(KERN_INFO "btrfs: setting nodatacow\n"); | 416 | if (!btrfs_test_opt(root, COMPRESS) || |
| 417 | !btrfs_test_opt(root, FORCE_COMPRESS)) { | ||
| 418 | printk(KERN_INFO "btrfs: setting nodatacow, compression disabled\n"); | ||
| 419 | } else { | ||
| 420 | printk(KERN_INFO "btrfs: setting nodatacow\n"); | ||
| 421 | } | ||
| 422 | info->compress_type = BTRFS_COMPRESS_NONE; | ||
| 423 | btrfs_clear_opt(info->mount_opt, COMPRESS); | ||
| 424 | btrfs_clear_opt(info->mount_opt, FORCE_COMPRESS); | ||
| 411 | btrfs_set_opt(info->mount_opt, NODATACOW); | 425 | btrfs_set_opt(info->mount_opt, NODATACOW); |
| 412 | btrfs_set_opt(info->mount_opt, NODATASUM); | 426 | btrfs_set_opt(info->mount_opt, NODATASUM); |
| 413 | break; | 427 | break; |
| @@ -422,10 +436,14 @@ int btrfs_parse_options(struct btrfs_root *root, char *options) | |||
| 422 | compress_type = "zlib"; | 436 | compress_type = "zlib"; |
| 423 | info->compress_type = BTRFS_COMPRESS_ZLIB; | 437 | info->compress_type = BTRFS_COMPRESS_ZLIB; |
| 424 | btrfs_set_opt(info->mount_opt, COMPRESS); | 438 | btrfs_set_opt(info->mount_opt, COMPRESS); |
| 439 | btrfs_clear_opt(info->mount_opt, NODATACOW); | ||
| 440 | btrfs_clear_opt(info->mount_opt, NODATASUM); | ||
| 425 | } else if (strcmp(args[0].from, "lzo") == 0) { | 441 | } else if (strcmp(args[0].from, "lzo") == 0) { |
| 426 | compress_type = "lzo"; | 442 | compress_type = "lzo"; |
| 427 | info->compress_type = BTRFS_COMPRESS_LZO; | 443 | info->compress_type = BTRFS_COMPRESS_LZO; |
| 428 | btrfs_set_opt(info->mount_opt, COMPRESS); | 444 | btrfs_set_opt(info->mount_opt, COMPRESS); |
| 445 | btrfs_clear_opt(info->mount_opt, NODATACOW); | ||
| 446 | btrfs_clear_opt(info->mount_opt, NODATASUM); | ||
| 429 | btrfs_set_fs_incompat(info, COMPRESS_LZO); | 447 | btrfs_set_fs_incompat(info, COMPRESS_LZO); |
| 430 | } else if (strncmp(args[0].from, "no", 2) == 0) { | 448 | } else if (strncmp(args[0].from, "no", 2) == 0) { |
| 431 | compress_type = "no"; | 449 | compress_type = "no"; |
| @@ -543,11 +561,11 @@ int btrfs_parse_options(struct btrfs_root *root, char *options) | |||
| 543 | btrfs_set_opt(info->mount_opt, ENOSPC_DEBUG); | 561 | btrfs_set_opt(info->mount_opt, ENOSPC_DEBUG); |
| 544 | break; | 562 | break; |
| 545 | case Opt_defrag: | 563 | case Opt_defrag: |
| 546 | printk(KERN_INFO "btrfs: enabling auto defrag"); | 564 | printk(KERN_INFO "btrfs: enabling auto defrag\n"); |
| 547 | btrfs_set_opt(info->mount_opt, AUTO_DEFRAG); | 565 | btrfs_set_opt(info->mount_opt, AUTO_DEFRAG); |
| 548 | break; | 566 | break; |
| 549 | case Opt_recovery: | 567 | case Opt_recovery: |
| 550 | printk(KERN_INFO "btrfs: enabling auto recovery"); | 568 | printk(KERN_INFO "btrfs: enabling auto recovery\n"); |
| 551 | btrfs_set_opt(info->mount_opt, RECOVERY); | 569 | btrfs_set_opt(info->mount_opt, RECOVERY); |
| 552 | break; | 570 | break; |
| 553 | case Opt_skip_balance: | 571 | case Opt_skip_balance: |
| @@ -846,18 +864,15 @@ int btrfs_sync_fs(struct super_block *sb, int wait) | |||
| 846 | return 0; | 864 | return 0; |
| 847 | } | 865 | } |
| 848 | 866 | ||
| 849 | btrfs_wait_ordered_extents(root, 0, 0); | 867 | btrfs_wait_ordered_extents(root, 0); |
| 850 | |||
| 851 | spin_lock(&fs_info->trans_lock); | ||
| 852 | if (!fs_info->running_transaction) { | ||
| 853 | spin_unlock(&fs_info->trans_lock); | ||
| 854 | return 0; | ||
| 855 | } | ||
| 856 | spin_unlock(&fs_info->trans_lock); | ||
| 857 | 868 | ||
| 858 | trans = btrfs_join_transaction(root); | 869 | trans = btrfs_attach_transaction(root); |
| 859 | if (IS_ERR(trans)) | 870 | if (IS_ERR(trans)) { |
| 871 | /* no transaction, don't bother */ | ||
| 872 | if (PTR_ERR(trans) == -ENOENT) | ||
| 873 | return 0; | ||
| 860 | return PTR_ERR(trans); | 874 | return PTR_ERR(trans); |
| 875 | } | ||
| 861 | return btrfs_commit_transaction(trans, root); | 876 | return btrfs_commit_transaction(trans, root); |
| 862 | } | 877 | } |
| 863 | 878 | ||
| @@ -1508,17 +1523,21 @@ static long btrfs_control_ioctl(struct file *file, unsigned int cmd, | |||
| 1508 | 1523 | ||
| 1509 | static int btrfs_freeze(struct super_block *sb) | 1524 | static int btrfs_freeze(struct super_block *sb) |
| 1510 | { | 1525 | { |
| 1511 | struct btrfs_fs_info *fs_info = btrfs_sb(sb); | 1526 | struct btrfs_trans_handle *trans; |
| 1512 | mutex_lock(&fs_info->transaction_kthread_mutex); | 1527 | struct btrfs_root *root = btrfs_sb(sb)->tree_root; |
| 1513 | mutex_lock(&fs_info->cleaner_mutex); | 1528 | |
| 1514 | return 0; | 1529 | trans = btrfs_attach_transaction(root); |
| 1530 | if (IS_ERR(trans)) { | ||
| 1531 | /* no transaction, don't bother */ | ||
| 1532 | if (PTR_ERR(trans) == -ENOENT) | ||
| 1533 | return 0; | ||
| 1534 | return PTR_ERR(trans); | ||
| 1535 | } | ||
| 1536 | return btrfs_commit_transaction(trans, root); | ||
| 1515 | } | 1537 | } |
| 1516 | 1538 | ||
| 1517 | static int btrfs_unfreeze(struct super_block *sb) | 1539 | static int btrfs_unfreeze(struct super_block *sb) |
| 1518 | { | 1540 | { |
| 1519 | struct btrfs_fs_info *fs_info = btrfs_sb(sb); | ||
| 1520 | mutex_unlock(&fs_info->cleaner_mutex); | ||
| 1521 | mutex_unlock(&fs_info->transaction_kthread_mutex); | ||
| 1522 | return 0; | 1541 | return 0; |
| 1523 | } | 1542 | } |
| 1524 | 1543 | ||
| @@ -1595,7 +1614,7 @@ static int btrfs_interface_init(void) | |||
| 1595 | static void btrfs_interface_exit(void) | 1614 | static void btrfs_interface_exit(void) |
| 1596 | { | 1615 | { |
| 1597 | if (misc_deregister(&btrfs_misc) < 0) | 1616 | if (misc_deregister(&btrfs_misc) < 0) |
| 1598 | printk(KERN_INFO "misc_deregister failed for control device"); | 1617 | printk(KERN_INFO "btrfs: misc_deregister failed for control device\n"); |
| 1599 | } | 1618 | } |
| 1600 | 1619 | ||
| 1601 | static int __init init_btrfs_fs(void) | 1620 | static int __init init_btrfs_fs(void) |
| @@ -1620,10 +1639,14 @@ static int __init init_btrfs_fs(void) | |||
| 1620 | if (err) | 1639 | if (err) |
| 1621 | goto free_extent_io; | 1640 | goto free_extent_io; |
| 1622 | 1641 | ||
| 1623 | err = btrfs_delayed_inode_init(); | 1642 | err = ordered_data_init(); |
| 1624 | if (err) | 1643 | if (err) |
| 1625 | goto free_extent_map; | 1644 | goto free_extent_map; |
| 1626 | 1645 | ||
| 1646 | err = btrfs_delayed_inode_init(); | ||
| 1647 | if (err) | ||
| 1648 | goto free_ordered_data; | ||
| 1649 | |||
| 1627 | err = btrfs_interface_init(); | 1650 | err = btrfs_interface_init(); |
| 1628 | if (err) | 1651 | if (err) |
| 1629 | goto free_delayed_inode; | 1652 | goto free_delayed_inode; |
| @@ -1641,6 +1664,8 @@ unregister_ioctl: | |||
| 1641 | btrfs_interface_exit(); | 1664 | btrfs_interface_exit(); |
| 1642 | free_delayed_inode: | 1665 | free_delayed_inode: |
| 1643 | btrfs_delayed_inode_exit(); | 1666 | btrfs_delayed_inode_exit(); |
| 1667 | free_ordered_data: | ||
| 1668 | ordered_data_exit(); | ||
| 1644 | free_extent_map: | 1669 | free_extent_map: |
| 1645 | extent_map_exit(); | 1670 | extent_map_exit(); |
| 1646 | free_extent_io: | 1671 | free_extent_io: |
| @@ -1657,6 +1682,7 @@ static void __exit exit_btrfs_fs(void) | |||
| 1657 | { | 1682 | { |
| 1658 | btrfs_destroy_cachep(); | 1683 | btrfs_destroy_cachep(); |
| 1659 | btrfs_delayed_inode_exit(); | 1684 | btrfs_delayed_inode_exit(); |
| 1685 | ordered_data_exit(); | ||
| 1660 | extent_map_exit(); | 1686 | extent_map_exit(); |
| 1661 | extent_io_exit(); | 1687 | extent_io_exit(); |
| 1662 | btrfs_interface_exit(); | 1688 | btrfs_interface_exit(); |
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 27c26004e050..77db875b5116 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c | |||
| @@ -53,7 +53,7 @@ static noinline void switch_commit_root(struct btrfs_root *root) | |||
| 53 | /* | 53 | /* |
| 54 | * either allocate a new transaction or hop into the existing one | 54 | * either allocate a new transaction or hop into the existing one |
| 55 | */ | 55 | */ |
| 56 | static noinline int join_transaction(struct btrfs_root *root, int nofail) | 56 | static noinline int join_transaction(struct btrfs_root *root, int type) |
| 57 | { | 57 | { |
| 58 | struct btrfs_transaction *cur_trans; | 58 | struct btrfs_transaction *cur_trans; |
| 59 | struct btrfs_fs_info *fs_info = root->fs_info; | 59 | struct btrfs_fs_info *fs_info = root->fs_info; |
| @@ -67,7 +67,13 @@ loop: | |||
| 67 | } | 67 | } |
| 68 | 68 | ||
| 69 | if (fs_info->trans_no_join) { | 69 | if (fs_info->trans_no_join) { |
| 70 | if (!nofail) { | 70 | /* |
| 71 | * If we are JOIN_NOLOCK we're already committing a current | ||
| 72 | * transaction, we just need a handle to deal with something | ||
| 73 | * when committing the transaction, such as inode cache and | ||
| 74 | * space cache. It is a special case. | ||
| 75 | */ | ||
| 76 | if (type != TRANS_JOIN_NOLOCK) { | ||
| 71 | spin_unlock(&fs_info->trans_lock); | 77 | spin_unlock(&fs_info->trans_lock); |
| 72 | return -EBUSY; | 78 | return -EBUSY; |
| 73 | } | 79 | } |
| @@ -87,6 +93,13 @@ loop: | |||
| 87 | } | 93 | } |
| 88 | spin_unlock(&fs_info->trans_lock); | 94 | spin_unlock(&fs_info->trans_lock); |
| 89 | 95 | ||
| 96 | /* | ||
| 97 | * If we are ATTACH, we just want to catch the current transaction, | ||
| 98 | * and commit it. If there is no transaction, just return ENOENT. | ||
| 99 | */ | ||
| 100 | if (type == TRANS_ATTACH) | ||
| 101 | return -ENOENT; | ||
| 102 | |||
| 90 | cur_trans = kmem_cache_alloc(btrfs_transaction_cachep, GFP_NOFS); | 103 | cur_trans = kmem_cache_alloc(btrfs_transaction_cachep, GFP_NOFS); |
| 91 | if (!cur_trans) | 104 | if (!cur_trans) |
| 92 | return -ENOMEM; | 105 | return -ENOMEM; |
| @@ -267,13 +280,6 @@ static void wait_current_trans(struct btrfs_root *root) | |||
| 267 | } | 280 | } |
| 268 | } | 281 | } |
| 269 | 282 | ||
| 270 | enum btrfs_trans_type { | ||
| 271 | TRANS_START, | ||
| 272 | TRANS_JOIN, | ||
| 273 | TRANS_USERSPACE, | ||
| 274 | TRANS_JOIN_NOLOCK, | ||
| 275 | }; | ||
| 276 | |||
| 277 | static int may_wait_transaction(struct btrfs_root *root, int type) | 283 | static int may_wait_transaction(struct btrfs_root *root, int type) |
| 278 | { | 284 | { |
| 279 | if (root->fs_info->log_root_recovering) | 285 | if (root->fs_info->log_root_recovering) |
| @@ -290,7 +296,8 @@ static int may_wait_transaction(struct btrfs_root *root, int type) | |||
| 290 | } | 296 | } |
| 291 | 297 | ||
| 292 | static struct btrfs_trans_handle *start_transaction(struct btrfs_root *root, | 298 | static struct btrfs_trans_handle *start_transaction(struct btrfs_root *root, |
| 293 | u64 num_items, int type) | 299 | u64 num_items, int type, |
| 300 | int noflush) | ||
| 294 | { | 301 | { |
| 295 | struct btrfs_trans_handle *h; | 302 | struct btrfs_trans_handle *h; |
| 296 | struct btrfs_transaction *cur_trans; | 303 | struct btrfs_transaction *cur_trans; |
| @@ -324,9 +331,14 @@ static struct btrfs_trans_handle *start_transaction(struct btrfs_root *root, | |||
| 324 | } | 331 | } |
| 325 | 332 | ||
| 326 | num_bytes = btrfs_calc_trans_metadata_size(root, num_items); | 333 | num_bytes = btrfs_calc_trans_metadata_size(root, num_items); |
| 327 | ret = btrfs_block_rsv_add(root, | 334 | if (noflush) |
| 328 | &root->fs_info->trans_block_rsv, | 335 | ret = btrfs_block_rsv_add_noflush(root, |
| 329 | num_bytes); | 336 | &root->fs_info->trans_block_rsv, |
| 337 | num_bytes); | ||
| 338 | else | ||
| 339 | ret = btrfs_block_rsv_add(root, | ||
| 340 | &root->fs_info->trans_block_rsv, | ||
| 341 | num_bytes); | ||
| 330 | if (ret) | 342 | if (ret) |
| 331 | return ERR_PTR(ret); | 343 | return ERR_PTR(ret); |
| 332 | } | 344 | } |
| @@ -335,19 +347,34 @@ again: | |||
| 335 | if (!h) | 347 | if (!h) |
| 336 | return ERR_PTR(-ENOMEM); | 348 | return ERR_PTR(-ENOMEM); |
| 337 | 349 | ||
| 338 | sb_start_intwrite(root->fs_info->sb); | 350 | /* |
| 351 | * If we are JOIN_NOLOCK we're already committing a transaction and | ||
| 352 | * waiting on this guy, so we don't need to do the sb_start_intwrite | ||
| 353 | * because we're already holding a ref. We need this because we could | ||
| 354 | * have raced in and did an fsync() on a file which can kick a commit | ||
| 355 | * and then we deadlock with somebody doing a freeze. | ||
| 356 | * | ||
| 357 | * If we are ATTACH, it means we just want to catch the current | ||
| 358 | * transaction and commit it, so we needn't do sb_start_intwrite(). | ||
| 359 | */ | ||
| 360 | if (type < TRANS_JOIN_NOLOCK) | ||
| 361 | sb_start_intwrite(root->fs_info->sb); | ||
| 339 | 362 | ||
| 340 | if (may_wait_transaction(root, type)) | 363 | if (may_wait_transaction(root, type)) |
| 341 | wait_current_trans(root); | 364 | wait_current_trans(root); |
| 342 | 365 | ||
| 343 | do { | 366 | do { |
| 344 | ret = join_transaction(root, type == TRANS_JOIN_NOLOCK); | 367 | ret = join_transaction(root, type); |
| 345 | if (ret == -EBUSY) | 368 | if (ret == -EBUSY) |
| 346 | wait_current_trans(root); | 369 | wait_current_trans(root); |
| 347 | } while (ret == -EBUSY); | 370 | } while (ret == -EBUSY); |
| 348 | 371 | ||
| 349 | if (ret < 0) { | 372 | if (ret < 0) { |
| 350 | sb_end_intwrite(root->fs_info->sb); | 373 | /* We must get the transaction if we are JOIN_NOLOCK. */ |
| 374 | BUG_ON(type == TRANS_JOIN_NOLOCK); | ||
| 375 | |||
| 376 | if (type < TRANS_JOIN_NOLOCK) | ||
| 377 | sb_end_intwrite(root->fs_info->sb); | ||
| 351 | kmem_cache_free(btrfs_trans_handle_cachep, h); | 378 | kmem_cache_free(btrfs_trans_handle_cachep, h); |
| 352 | return ERR_PTR(ret); | 379 | return ERR_PTR(ret); |
| 353 | } | 380 | } |
| @@ -367,7 +394,9 @@ again: | |||
| 367 | h->aborted = 0; | 394 | h->aborted = 0; |
| 368 | h->qgroup_reserved = qgroup_reserved; | 395 | h->qgroup_reserved = qgroup_reserved; |
| 369 | h->delayed_ref_elem.seq = 0; | 396 | h->delayed_ref_elem.seq = 0; |
| 397 | h->type = type; | ||
| 370 | INIT_LIST_HEAD(&h->qgroup_ref_list); | 398 | INIT_LIST_HEAD(&h->qgroup_ref_list); |
| 399 | INIT_LIST_HEAD(&h->new_bgs); | ||
| 371 | 400 | ||
| 372 | smp_mb(); | 401 | smp_mb(); |
| 373 | if (cur_trans->blocked && may_wait_transaction(root, type)) { | 402 | if (cur_trans->blocked && may_wait_transaction(root, type)) { |
| @@ -393,21 +422,33 @@ got_it: | |||
| 393 | struct btrfs_trans_handle *btrfs_start_transaction(struct btrfs_root *root, | 422 | struct btrfs_trans_handle *btrfs_start_transaction(struct btrfs_root *root, |
| 394 | int num_items) | 423 | int num_items) |
| 395 | { | 424 | { |
| 396 | return start_transaction(root, num_items, TRANS_START); | 425 | return start_transaction(root, num_items, TRANS_START, 0); |
| 426 | } | ||
| 427 | |||
| 428 | struct btrfs_trans_handle *btrfs_start_transaction_noflush( | ||
| 429 | struct btrfs_root *root, int num_items) | ||
| 430 | { | ||
| 431 | return start_transaction(root, num_items, TRANS_START, 1); | ||
| 397 | } | 432 | } |
| 433 | |||
| 398 | struct btrfs_trans_handle *btrfs_join_transaction(struct btrfs_root *root) | 434 | struct btrfs_trans_handle *btrfs_join_transaction(struct btrfs_root *root) |
| 399 | { | 435 | { |
| 400 | return start_transaction(root, 0, TRANS_JOIN); | 436 | return start_transaction(root, 0, TRANS_JOIN, 0); |
| 401 | } | 437 | } |
| 402 | 438 | ||
| 403 | struct btrfs_trans_handle *btrfs_join_transaction_nolock(struct btrfs_root *root) | 439 | struct btrfs_trans_handle *btrfs_join_transaction_nolock(struct btrfs_root *root) |
| 404 | { | 440 | { |
| 405 | return start_transaction(root, 0, TRANS_JOIN_NOLOCK); | 441 | return start_transaction(root, 0, TRANS_JOIN_NOLOCK, 0); |
| 406 | } | 442 | } |
| 407 | 443 | ||
| 408 | struct btrfs_trans_handle *btrfs_start_ioctl_transaction(struct btrfs_root *root) | 444 | struct btrfs_trans_handle *btrfs_start_ioctl_transaction(struct btrfs_root *root) |
| 409 | { | 445 | { |
| 410 | return start_transaction(root, 0, TRANS_USERSPACE); | 446 | return start_transaction(root, 0, TRANS_USERSPACE, 0); |
| 447 | } | ||
| 448 | |||
| 449 | struct btrfs_trans_handle *btrfs_attach_transaction(struct btrfs_root *root) | ||
| 450 | { | ||
| 451 | return start_transaction(root, 0, TRANS_ATTACH, 0); | ||
| 411 | } | 452 | } |
| 412 | 453 | ||
| 413 | /* wait for a transaction commit to be fully complete */ | 454 | /* wait for a transaction commit to be fully complete */ |
| @@ -506,11 +547,12 @@ int btrfs_should_end_transaction(struct btrfs_trans_handle *trans, | |||
| 506 | } | 547 | } |
| 507 | 548 | ||
| 508 | static int __btrfs_end_transaction(struct btrfs_trans_handle *trans, | 549 | static int __btrfs_end_transaction(struct btrfs_trans_handle *trans, |
| 509 | struct btrfs_root *root, int throttle, int lock) | 550 | struct btrfs_root *root, int throttle) |
| 510 | { | 551 | { |
| 511 | struct btrfs_transaction *cur_trans = trans->transaction; | 552 | struct btrfs_transaction *cur_trans = trans->transaction; |
| 512 | struct btrfs_fs_info *info = root->fs_info; | 553 | struct btrfs_fs_info *info = root->fs_info; |
| 513 | int count = 0; | 554 | int count = 0; |
| 555 | int lock = (trans->type != TRANS_JOIN_NOLOCK); | ||
| 514 | int err = 0; | 556 | int err = 0; |
| 515 | 557 | ||
| 516 | if (--trans->use_count) { | 558 | if (--trans->use_count) { |
| @@ -536,6 +578,9 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans, | |||
| 536 | trans->qgroup_reserved = 0; | 578 | trans->qgroup_reserved = 0; |
| 537 | } | 579 | } |
| 538 | 580 | ||
| 581 | if (!list_empty(&trans->new_bgs)) | ||
| 582 | btrfs_create_pending_block_groups(trans, root); | ||
| 583 | |||
| 539 | while (count < 2) { | 584 | while (count < 2) { |
| 540 | unsigned long cur = trans->delayed_ref_updates; | 585 | unsigned long cur = trans->delayed_ref_updates; |
| 541 | trans->delayed_ref_updates = 0; | 586 | trans->delayed_ref_updates = 0; |
| @@ -551,7 +596,8 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans, | |||
| 551 | btrfs_trans_release_metadata(trans, root); | 596 | btrfs_trans_release_metadata(trans, root); |
| 552 | trans->block_rsv = NULL; | 597 | trans->block_rsv = NULL; |
| 553 | 598 | ||
| 554 | sb_end_intwrite(root->fs_info->sb); | 599 | if (!list_empty(&trans->new_bgs)) |
| 600 | btrfs_create_pending_block_groups(trans, root); | ||
| 555 | 601 | ||
| 556 | if (lock && !atomic_read(&root->fs_info->open_ioctl_trans) && | 602 | if (lock && !atomic_read(&root->fs_info->open_ioctl_trans) && |
| 557 | should_end_transaction(trans, root)) { | 603 | should_end_transaction(trans, root)) { |
| @@ -573,6 +619,9 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans, | |||
| 573 | } | 619 | } |
| 574 | } | 620 | } |
| 575 | 621 | ||
| 622 | if (trans->type < TRANS_JOIN_NOLOCK) | ||
| 623 | sb_end_intwrite(root->fs_info->sb); | ||
| 624 | |||
| 576 | WARN_ON(cur_trans != info->running_transaction); | 625 | WARN_ON(cur_trans != info->running_transaction); |
| 577 | WARN_ON(atomic_read(&cur_trans->num_writers) < 1); | 626 | WARN_ON(atomic_read(&cur_trans->num_writers) < 1); |
| 578 | atomic_dec(&cur_trans->num_writers); | 627 | atomic_dec(&cur_trans->num_writers); |
| @@ -604,7 +653,7 @@ int btrfs_end_transaction(struct btrfs_trans_handle *trans, | |||
| 604 | { | 653 | { |
| 605 | int ret; | 654 | int ret; |
| 606 | 655 | ||
| 607 | ret = __btrfs_end_transaction(trans, root, 0, 1); | 656 | ret = __btrfs_end_transaction(trans, root, 0); |
| 608 | if (ret) | 657 | if (ret) |
| 609 | return ret; | 658 | return ret; |
| 610 | return 0; | 659 | return 0; |
| @@ -615,18 +664,7 @@ int btrfs_end_transaction_throttle(struct btrfs_trans_handle *trans, | |||
| 615 | { | 664 | { |
| 616 | int ret; | 665 | int ret; |
| 617 | 666 | ||
| 618 | ret = __btrfs_end_transaction(trans, root, 1, 1); | 667 | ret = __btrfs_end_transaction(trans, root, 1); |
| 619 | if (ret) | ||
| 620 | return ret; | ||
| 621 | return 0; | ||
| 622 | } | ||
| 623 | |||
| 624 | int btrfs_end_transaction_nolock(struct btrfs_trans_handle *trans, | ||
| 625 | struct btrfs_root *root) | ||
| 626 | { | ||
| 627 | int ret; | ||
| 628 | |||
| 629 | ret = __btrfs_end_transaction(trans, root, 0, 0); | ||
| 630 | if (ret) | 668 | if (ret) |
| 631 | return ret; | 669 | return ret; |
| 632 | return 0; | 670 | return 0; |
| @@ -635,7 +673,7 @@ int btrfs_end_transaction_nolock(struct btrfs_trans_handle *trans, | |||
| 635 | int btrfs_end_transaction_dmeta(struct btrfs_trans_handle *trans, | 673 | int btrfs_end_transaction_dmeta(struct btrfs_trans_handle *trans, |
| 636 | struct btrfs_root *root) | 674 | struct btrfs_root *root) |
| 637 | { | 675 | { |
| 638 | return __btrfs_end_transaction(trans, root, 1, 1); | 676 | return __btrfs_end_transaction(trans, root, 1); |
| 639 | } | 677 | } |
| 640 | 678 | ||
| 641 | /* | 679 | /* |
| @@ -649,13 +687,15 @@ int btrfs_write_marked_extents(struct btrfs_root *root, | |||
| 649 | int err = 0; | 687 | int err = 0; |
| 650 | int werr = 0; | 688 | int werr = 0; |
| 651 | struct address_space *mapping = root->fs_info->btree_inode->i_mapping; | 689 | struct address_space *mapping = root->fs_info->btree_inode->i_mapping; |
| 690 | struct extent_state *cached_state = NULL; | ||
| 652 | u64 start = 0; | 691 | u64 start = 0; |
| 653 | u64 end; | 692 | u64 end; |
| 654 | 693 | ||
| 655 | while (!find_first_extent_bit(dirty_pages, start, &start, &end, | 694 | while (!find_first_extent_bit(dirty_pages, start, &start, &end, |
| 656 | mark)) { | 695 | mark, &cached_state)) { |
| 657 | convert_extent_bit(dirty_pages, start, end, EXTENT_NEED_WAIT, mark, | 696 | convert_extent_bit(dirty_pages, start, end, EXTENT_NEED_WAIT, |
| 658 | GFP_NOFS); | 697 | mark, &cached_state, GFP_NOFS); |
| 698 | cached_state = NULL; | ||
| 659 | err = filemap_fdatawrite_range(mapping, start, end); | 699 | err = filemap_fdatawrite_range(mapping, start, end); |
| 660 | if (err) | 700 | if (err) |
| 661 | werr = err; | 701 | werr = err; |
| @@ -679,12 +719,14 @@ int btrfs_wait_marked_extents(struct btrfs_root *root, | |||
| 679 | int err = 0; | 719 | int err = 0; |
| 680 | int werr = 0; | 720 | int werr = 0; |
| 681 | struct address_space *mapping = root->fs_info->btree_inode->i_mapping; | 721 | struct address_space *mapping = root->fs_info->btree_inode->i_mapping; |
| 722 | struct extent_state *cached_state = NULL; | ||
| 682 | u64 start = 0; | 723 | u64 start = 0; |
| 683 | u64 end; | 724 | u64 end; |
| 684 | 725 | ||
| 685 | while (!find_first_extent_bit(dirty_pages, start, &start, &end, | 726 | while (!find_first_extent_bit(dirty_pages, start, &start, &end, |
| 686 | EXTENT_NEED_WAIT)) { | 727 | EXTENT_NEED_WAIT, &cached_state)) { |
| 687 | clear_extent_bits(dirty_pages, start, end, EXTENT_NEED_WAIT, GFP_NOFS); | 728 | clear_extent_bit(dirty_pages, start, end, EXTENT_NEED_WAIT, |
| 729 | 0, 0, &cached_state, GFP_NOFS); | ||
| 688 | err = filemap_fdatawait_range(mapping, start, end); | 730 | err = filemap_fdatawait_range(mapping, start, end); |
| 689 | if (err) | 731 | if (err) |
| 690 | werr = err; | 732 | werr = err; |
| @@ -955,6 +997,8 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, | |||
| 955 | struct btrfs_root *parent_root; | 997 | struct btrfs_root *parent_root; |
| 956 | struct btrfs_block_rsv *rsv; | 998 | struct btrfs_block_rsv *rsv; |
| 957 | struct inode *parent_inode; | 999 | struct inode *parent_inode; |
| 1000 | struct btrfs_path *path; | ||
| 1001 | struct btrfs_dir_item *dir_item; | ||
| 958 | struct dentry *parent; | 1002 | struct dentry *parent; |
| 959 | struct dentry *dentry; | 1003 | struct dentry *dentry; |
| 960 | struct extent_buffer *tmp; | 1004 | struct extent_buffer *tmp; |
| @@ -967,18 +1011,22 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, | |||
| 967 | u64 root_flags; | 1011 | u64 root_flags; |
| 968 | uuid_le new_uuid; | 1012 | uuid_le new_uuid; |
| 969 | 1013 | ||
| 970 | rsv = trans->block_rsv; | 1014 | path = btrfs_alloc_path(); |
| 1015 | if (!path) { | ||
| 1016 | ret = pending->error = -ENOMEM; | ||
| 1017 | goto path_alloc_fail; | ||
| 1018 | } | ||
| 971 | 1019 | ||
| 972 | new_root_item = kmalloc(sizeof(*new_root_item), GFP_NOFS); | 1020 | new_root_item = kmalloc(sizeof(*new_root_item), GFP_NOFS); |
| 973 | if (!new_root_item) { | 1021 | if (!new_root_item) { |
| 974 | ret = pending->error = -ENOMEM; | 1022 | ret = pending->error = -ENOMEM; |
| 975 | goto fail; | 1023 | goto root_item_alloc_fail; |
| 976 | } | 1024 | } |
| 977 | 1025 | ||
| 978 | ret = btrfs_find_free_objectid(tree_root, &objectid); | 1026 | ret = btrfs_find_free_objectid(tree_root, &objectid); |
| 979 | if (ret) { | 1027 | if (ret) { |
| 980 | pending->error = ret; | 1028 | pending->error = ret; |
| 981 | goto fail; | 1029 | goto no_free_objectid; |
| 982 | } | 1030 | } |
| 983 | 1031 | ||
| 984 | btrfs_reloc_pre_snapshot(trans, pending, &to_reserve); | 1032 | btrfs_reloc_pre_snapshot(trans, pending, &to_reserve); |
| @@ -988,22 +1036,22 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, | |||
| 988 | to_reserve); | 1036 | to_reserve); |
| 989 | if (ret) { | 1037 | if (ret) { |
| 990 | pending->error = ret; | 1038 | pending->error = ret; |
| 991 | goto fail; | 1039 | goto no_free_objectid; |
| 992 | } | 1040 | } |
| 993 | } | 1041 | } |
| 994 | 1042 | ||
| 995 | ret = btrfs_qgroup_inherit(trans, fs_info, root->root_key.objectid, | 1043 | ret = btrfs_qgroup_inherit(trans, fs_info, root->root_key.objectid, |
| 996 | objectid, pending->inherit); | 1044 | objectid, pending->inherit); |
| 997 | kfree(pending->inherit); | ||
| 998 | if (ret) { | 1045 | if (ret) { |
| 999 | pending->error = ret; | 1046 | pending->error = ret; |
| 1000 | goto fail; | 1047 | goto no_free_objectid; |
| 1001 | } | 1048 | } |
| 1002 | 1049 | ||
| 1003 | key.objectid = objectid; | 1050 | key.objectid = objectid; |
| 1004 | key.offset = (u64)-1; | 1051 | key.offset = (u64)-1; |
| 1005 | key.type = BTRFS_ROOT_ITEM_KEY; | 1052 | key.type = BTRFS_ROOT_ITEM_KEY; |
| 1006 | 1053 | ||
| 1054 | rsv = trans->block_rsv; | ||
| 1007 | trans->block_rsv = &pending->block_rsv; | 1055 | trans->block_rsv = &pending->block_rsv; |
| 1008 | 1056 | ||
| 1009 | dentry = pending->dentry; | 1057 | dentry = pending->dentry; |
| @@ -1017,24 +1065,21 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, | |||
| 1017 | */ | 1065 | */ |
| 1018 | ret = btrfs_set_inode_index(parent_inode, &index); | 1066 | ret = btrfs_set_inode_index(parent_inode, &index); |
| 1019 | BUG_ON(ret); /* -ENOMEM */ | 1067 | BUG_ON(ret); /* -ENOMEM */ |
| 1020 | ret = btrfs_insert_dir_item(trans, parent_root, | 1068 | |
| 1021 | dentry->d_name.name, dentry->d_name.len, | 1069 | /* check if there is a file/dir which has the same name. */ |
| 1022 | parent_inode, &key, | 1070 | dir_item = btrfs_lookup_dir_item(NULL, parent_root, path, |
| 1023 | BTRFS_FT_DIR, index); | 1071 | btrfs_ino(parent_inode), |
| 1024 | if (ret == -EEXIST) { | 1072 | dentry->d_name.name, |
| 1073 | dentry->d_name.len, 0); | ||
| 1074 | if (dir_item != NULL && !IS_ERR(dir_item)) { | ||
| 1025 | pending->error = -EEXIST; | 1075 | pending->error = -EEXIST; |
| 1026 | dput(parent); | ||
| 1027 | goto fail; | 1076 | goto fail; |
| 1028 | } else if (ret) { | 1077 | } else if (IS_ERR(dir_item)) { |
| 1029 | goto abort_trans_dput; | 1078 | ret = PTR_ERR(dir_item); |
| 1079 | btrfs_abort_transaction(trans, root, ret); | ||
| 1080 | goto fail; | ||
| 1030 | } | 1081 | } |
| 1031 | 1082 | btrfs_release_path(path); | |
| 1032 | btrfs_i_size_write(parent_inode, parent_inode->i_size + | ||
| 1033 | dentry->d_name.len * 2); | ||
| 1034 | parent_inode->i_mtime = parent_inode->i_ctime = CURRENT_TIME; | ||
| 1035 | ret = btrfs_update_inode(trans, parent_root, parent_inode); | ||
| 1036 | if (ret) | ||
| 1037 | goto abort_trans_dput; | ||
| 1038 | 1083 | ||
| 1039 | /* | 1084 | /* |
| 1040 | * pull in the delayed directory update | 1085 | * pull in the delayed directory update |
| @@ -1043,8 +1088,8 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, | |||
| 1043 | * snapshot | 1088 | * snapshot |
| 1044 | */ | 1089 | */ |
| 1045 | ret = btrfs_run_delayed_items(trans, root); | 1090 | ret = btrfs_run_delayed_items(trans, root); |
| 1046 | if (ret) { /* Transaction aborted */ | 1091 | if (ret) { /* Transaction aborted */ |
| 1047 | dput(parent); | 1092 | btrfs_abort_transaction(trans, root, ret); |
| 1048 | goto fail; | 1093 | goto fail; |
| 1049 | } | 1094 | } |
| 1050 | 1095 | ||
| @@ -1079,7 +1124,8 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, | |||
| 1079 | if (ret) { | 1124 | if (ret) { |
| 1080 | btrfs_tree_unlock(old); | 1125 | btrfs_tree_unlock(old); |
| 1081 | free_extent_buffer(old); | 1126 | free_extent_buffer(old); |
| 1082 | goto abort_trans_dput; | 1127 | btrfs_abort_transaction(trans, root, ret); |
| 1128 | goto fail; | ||
| 1083 | } | 1129 | } |
| 1084 | 1130 | ||
| 1085 | btrfs_set_lock_blocking(old); | 1131 | btrfs_set_lock_blocking(old); |
| @@ -1088,8 +1134,10 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, | |||
| 1088 | /* clean up in any case */ | 1134 | /* clean up in any case */ |
| 1089 | btrfs_tree_unlock(old); | 1135 | btrfs_tree_unlock(old); |
| 1090 | free_extent_buffer(old); | 1136 | free_extent_buffer(old); |
| 1091 | if (ret) | 1137 | if (ret) { |
| 1092 | goto abort_trans_dput; | 1138 | btrfs_abort_transaction(trans, root, ret); |
| 1139 | goto fail; | ||
| 1140 | } | ||
| 1093 | 1141 | ||
| 1094 | /* see comments in should_cow_block() */ | 1142 | /* see comments in should_cow_block() */ |
| 1095 | root->force_cow = 1; | 1143 | root->force_cow = 1; |
| @@ -1101,8 +1149,10 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, | |||
| 1101 | ret = btrfs_insert_root(trans, tree_root, &key, new_root_item); | 1149 | ret = btrfs_insert_root(trans, tree_root, &key, new_root_item); |
| 1102 | btrfs_tree_unlock(tmp); | 1150 | btrfs_tree_unlock(tmp); |
| 1103 | free_extent_buffer(tmp); | 1151 | free_extent_buffer(tmp); |
| 1104 | if (ret) | 1152 | if (ret) { |
| 1105 | goto abort_trans_dput; | 1153 | btrfs_abort_transaction(trans, root, ret); |
| 1154 | goto fail; | ||
| 1155 | } | ||
| 1106 | 1156 | ||
| 1107 | /* | 1157 | /* |
| 1108 | * insert root back/forward references | 1158 | * insert root back/forward references |
| @@ -1111,32 +1161,58 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, | |||
| 1111 | parent_root->root_key.objectid, | 1161 | parent_root->root_key.objectid, |
| 1112 | btrfs_ino(parent_inode), index, | 1162 | btrfs_ino(parent_inode), index, |
| 1113 | dentry->d_name.name, dentry->d_name.len); | 1163 | dentry->d_name.name, dentry->d_name.len); |
| 1114 | dput(parent); | 1164 | if (ret) { |
| 1115 | if (ret) | 1165 | btrfs_abort_transaction(trans, root, ret); |
| 1116 | goto fail; | 1166 | goto fail; |
| 1167 | } | ||
| 1117 | 1168 | ||
| 1118 | key.offset = (u64)-1; | 1169 | key.offset = (u64)-1; |
| 1119 | pending->snap = btrfs_read_fs_root_no_name(root->fs_info, &key); | 1170 | pending->snap = btrfs_read_fs_root_no_name(root->fs_info, &key); |
| 1120 | if (IS_ERR(pending->snap)) { | 1171 | if (IS_ERR(pending->snap)) { |
| 1121 | ret = PTR_ERR(pending->snap); | 1172 | ret = PTR_ERR(pending->snap); |
| 1122 | goto abort_trans; | 1173 | btrfs_abort_transaction(trans, root, ret); |
| 1174 | goto fail; | ||
| 1123 | } | 1175 | } |
| 1124 | 1176 | ||
| 1125 | ret = btrfs_reloc_post_snapshot(trans, pending); | 1177 | ret = btrfs_reloc_post_snapshot(trans, pending); |
| 1178 | if (ret) { | ||
| 1179 | btrfs_abort_transaction(trans, root, ret); | ||
| 1180 | goto fail; | ||
| 1181 | } | ||
| 1182 | |||
| 1183 | ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1); | ||
| 1184 | if (ret) { | ||
| 1185 | btrfs_abort_transaction(trans, root, ret); | ||
| 1186 | goto fail; | ||
| 1187 | } | ||
| 1188 | |||
| 1189 | ret = btrfs_insert_dir_item(trans, parent_root, | ||
| 1190 | dentry->d_name.name, dentry->d_name.len, | ||
| 1191 | parent_inode, &key, | ||
| 1192 | BTRFS_FT_DIR, index); | ||
| 1193 | /* We have check then name at the beginning, so it is impossible. */ | ||
| 1194 | BUG_ON(ret == -EEXIST); | ||
| 1195 | if (ret) { | ||
| 1196 | btrfs_abort_transaction(trans, root, ret); | ||
| 1197 | goto fail; | ||
| 1198 | } | ||
| 1199 | |||
| 1200 | btrfs_i_size_write(parent_inode, parent_inode->i_size + | ||
| 1201 | dentry->d_name.len * 2); | ||
| 1202 | parent_inode->i_mtime = parent_inode->i_ctime = CURRENT_TIME; | ||
| 1203 | ret = btrfs_update_inode(trans, parent_root, parent_inode); | ||
| 1126 | if (ret) | 1204 | if (ret) |
| 1127 | goto abort_trans; | 1205 | btrfs_abort_transaction(trans, root, ret); |
| 1128 | ret = 0; | ||
| 1129 | fail: | 1206 | fail: |
| 1130 | kfree(new_root_item); | 1207 | dput(parent); |
| 1131 | trans->block_rsv = rsv; | 1208 | trans->block_rsv = rsv; |
| 1209 | no_free_objectid: | ||
| 1210 | kfree(new_root_item); | ||
| 1211 | root_item_alloc_fail: | ||
| 1212 | btrfs_free_path(path); | ||
| 1213 | path_alloc_fail: | ||
| 1132 | btrfs_block_rsv_release(root, &pending->block_rsv, (u64)-1); | 1214 | btrfs_block_rsv_release(root, &pending->block_rsv, (u64)-1); |
| 1133 | return ret; | 1215 | return ret; |
| 1134 | |||
| 1135 | abort_trans_dput: | ||
| 1136 | dput(parent); | ||
| 1137 | abort_trans: | ||
| 1138 | btrfs_abort_transaction(trans, root, ret); | ||
| 1139 | goto fail; | ||
| 1140 | } | 1216 | } |
| 1141 | 1217 | ||
| 1142 | /* | 1218 | /* |
| @@ -1229,6 +1305,16 @@ static void do_async_commit(struct work_struct *work) | |||
| 1229 | struct btrfs_async_commit *ac = | 1305 | struct btrfs_async_commit *ac = |
| 1230 | container_of(work, struct btrfs_async_commit, work.work); | 1306 | container_of(work, struct btrfs_async_commit, work.work); |
| 1231 | 1307 | ||
| 1308 | /* | ||
| 1309 | * We've got freeze protection passed with the transaction. | ||
| 1310 | * Tell lockdep about it. | ||
| 1311 | */ | ||
| 1312 | rwsem_acquire_read( | ||
| 1313 | &ac->root->fs_info->sb->s_writers.lock_map[SB_FREEZE_FS-1], | ||
| 1314 | 0, 1, _THIS_IP_); | ||
| 1315 | |||
| 1316 | current->journal_info = ac->newtrans; | ||
| 1317 | |||
| 1232 | btrfs_commit_transaction(ac->newtrans, ac->root); | 1318 | btrfs_commit_transaction(ac->newtrans, ac->root); |
| 1233 | kfree(ac); | 1319 | kfree(ac); |
| 1234 | } | 1320 | } |
| @@ -1258,6 +1344,14 @@ int btrfs_commit_transaction_async(struct btrfs_trans_handle *trans, | |||
| 1258 | atomic_inc(&cur_trans->use_count); | 1344 | atomic_inc(&cur_trans->use_count); |
| 1259 | 1345 | ||
| 1260 | btrfs_end_transaction(trans, root); | 1346 | btrfs_end_transaction(trans, root); |
| 1347 | |||
| 1348 | /* | ||
| 1349 | * Tell lockdep we've released the freeze rwsem, since the | ||
| 1350 | * async commit thread will be the one to unlock it. | ||
| 1351 | */ | ||
| 1352 | rwsem_release(&root->fs_info->sb->s_writers.lock_map[SB_FREEZE_FS-1], | ||
| 1353 | 1, _THIS_IP_); | ||
| 1354 | |||
| 1261 | schedule_delayed_work(&ac->work, 0); | 1355 | schedule_delayed_work(&ac->work, 0); |
| 1262 | 1356 | ||
| 1263 | /* wait for transaction to start and unblock */ | 1357 | /* wait for transaction to start and unblock */ |
| @@ -1348,6 +1442,9 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, | |||
| 1348 | */ | 1442 | */ |
| 1349 | cur_trans->delayed_refs.flushing = 1; | 1443 | cur_trans->delayed_refs.flushing = 1; |
| 1350 | 1444 | ||
| 1445 | if (!list_empty(&trans->new_bgs)) | ||
| 1446 | btrfs_create_pending_block_groups(trans, root); | ||
| 1447 | |||
| 1351 | ret = btrfs_run_delayed_refs(trans, root, 0); | 1448 | ret = btrfs_run_delayed_refs(trans, root, 0); |
| 1352 | if (ret) | 1449 | if (ret) |
| 1353 | goto cleanup_transaction; | 1450 | goto cleanup_transaction; |
| @@ -1403,7 +1500,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, | |||
| 1403 | 1500 | ||
| 1404 | if (flush_on_commit || snap_pending) { | 1501 | if (flush_on_commit || snap_pending) { |
| 1405 | btrfs_start_delalloc_inodes(root, 1); | 1502 | btrfs_start_delalloc_inodes(root, 1); |
| 1406 | btrfs_wait_ordered_extents(root, 0, 1); | 1503 | btrfs_wait_ordered_extents(root, 1); |
| 1407 | } | 1504 | } |
| 1408 | 1505 | ||
| 1409 | ret = btrfs_run_delayed_items(trans, root); | 1506 | ret = btrfs_run_delayed_items(trans, root); |
| @@ -1456,13 +1553,28 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, | |||
| 1456 | */ | 1553 | */ |
| 1457 | mutex_lock(&root->fs_info->reloc_mutex); | 1554 | mutex_lock(&root->fs_info->reloc_mutex); |
| 1458 | 1555 | ||
| 1459 | ret = btrfs_run_delayed_items(trans, root); | 1556 | /* |
| 1557 | * We needn't worry about the delayed items because we will | ||
| 1558 | * deal with them in create_pending_snapshot(), which is the | ||
| 1559 | * core function of the snapshot creation. | ||
| 1560 | */ | ||
| 1561 | ret = create_pending_snapshots(trans, root->fs_info); | ||
| 1460 | if (ret) { | 1562 | if (ret) { |
| 1461 | mutex_unlock(&root->fs_info->reloc_mutex); | 1563 | mutex_unlock(&root->fs_info->reloc_mutex); |
| 1462 | goto cleanup_transaction; | 1564 | goto cleanup_transaction; |
| 1463 | } | 1565 | } |
| 1464 | 1566 | ||
| 1465 | ret = create_pending_snapshots(trans, root->fs_info); | 1567 | /* |
| 1568 | * We insert the dir indexes of the snapshots and update the inode | ||
| 1569 | * of the snapshots' parents after the snapshot creation, so there | ||
| 1570 | * are some delayed items which are not dealt with. Now deal with | ||
| 1571 | * them. | ||
| 1572 | * | ||
| 1573 | * We needn't worry that this operation will corrupt the snapshots, | ||
| 1574 | * because all the tree which are snapshoted will be forced to COW | ||
| 1575 | * the nodes and leaves. | ||
| 1576 | */ | ||
| 1577 | ret = btrfs_run_delayed_items(trans, root); | ||
| 1466 | if (ret) { | 1578 | if (ret) { |
| 1467 | mutex_unlock(&root->fs_info->reloc_mutex); | 1579 | mutex_unlock(&root->fs_info->reloc_mutex); |
| 1468 | goto cleanup_transaction; | 1580 | goto cleanup_transaction; |
| @@ -1584,7 +1696,8 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, | |||
| 1584 | put_transaction(cur_trans); | 1696 | put_transaction(cur_trans); |
| 1585 | put_transaction(cur_trans); | 1697 | put_transaction(cur_trans); |
| 1586 | 1698 | ||
| 1587 | sb_end_intwrite(root->fs_info->sb); | 1699 | if (trans->type < TRANS_JOIN_NOLOCK) |
| 1700 | sb_end_intwrite(root->fs_info->sb); | ||
| 1588 | 1701 | ||
| 1589 | trace_btrfs_transaction_commit(root); | 1702 | trace_btrfs_transaction_commit(root); |
| 1590 | 1703 | ||
diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h index e8b8416c688b..80961947a6b2 100644 --- a/fs/btrfs/transaction.h +++ b/fs/btrfs/transaction.h | |||
| @@ -47,6 +47,14 @@ struct btrfs_transaction { | |||
| 47 | int aborted; | 47 | int aborted; |
| 48 | }; | 48 | }; |
| 49 | 49 | ||
| 50 | enum btrfs_trans_type { | ||
| 51 | TRANS_START, | ||
| 52 | TRANS_JOIN, | ||
| 53 | TRANS_USERSPACE, | ||
| 54 | TRANS_JOIN_NOLOCK, | ||
| 55 | TRANS_ATTACH, | ||
| 56 | }; | ||
| 57 | |||
| 50 | struct btrfs_trans_handle { | 58 | struct btrfs_trans_handle { |
| 51 | u64 transid; | 59 | u64 transid; |
| 52 | u64 bytes_reserved; | 60 | u64 bytes_reserved; |
| @@ -58,8 +66,9 @@ struct btrfs_trans_handle { | |||
| 58 | struct btrfs_transaction *transaction; | 66 | struct btrfs_transaction *transaction; |
| 59 | struct btrfs_block_rsv *block_rsv; | 67 | struct btrfs_block_rsv *block_rsv; |
| 60 | struct btrfs_block_rsv *orig_rsv; | 68 | struct btrfs_block_rsv *orig_rsv; |
| 61 | int aborted; | 69 | short aborted; |
| 62 | int adding_csums; | 70 | short adding_csums; |
| 71 | enum btrfs_trans_type type; | ||
| 63 | /* | 72 | /* |
| 64 | * this root is only needed to validate that the root passed to | 73 | * this root is only needed to validate that the root passed to |
| 65 | * start_transaction is the same as the one passed to end_transaction. | 74 | * start_transaction is the same as the one passed to end_transaction. |
| @@ -68,6 +77,7 @@ struct btrfs_trans_handle { | |||
| 68 | struct btrfs_root *root; | 77 | struct btrfs_root *root; |
| 69 | struct seq_list delayed_ref_elem; | 78 | struct seq_list delayed_ref_elem; |
| 70 | struct list_head qgroup_ref_list; | 79 | struct list_head qgroup_ref_list; |
| 80 | struct list_head new_bgs; | ||
| 71 | }; | 81 | }; |
| 72 | 82 | ||
| 73 | struct btrfs_pending_snapshot { | 83 | struct btrfs_pending_snapshot { |
| @@ -88,16 +98,18 @@ static inline void btrfs_set_inode_last_trans(struct btrfs_trans_handle *trans, | |||
| 88 | { | 98 | { |
| 89 | BTRFS_I(inode)->last_trans = trans->transaction->transid; | 99 | BTRFS_I(inode)->last_trans = trans->transaction->transid; |
| 90 | BTRFS_I(inode)->last_sub_trans = BTRFS_I(inode)->root->log_transid; | 100 | BTRFS_I(inode)->last_sub_trans = BTRFS_I(inode)->root->log_transid; |
| 101 | BTRFS_I(inode)->last_log_commit = BTRFS_I(inode)->root->last_log_commit; | ||
| 91 | } | 102 | } |
| 92 | 103 | ||
| 93 | int btrfs_end_transaction(struct btrfs_trans_handle *trans, | 104 | int btrfs_end_transaction(struct btrfs_trans_handle *trans, |
| 94 | struct btrfs_root *root); | 105 | struct btrfs_root *root); |
| 95 | int btrfs_end_transaction_nolock(struct btrfs_trans_handle *trans, | ||
| 96 | struct btrfs_root *root); | ||
| 97 | struct btrfs_trans_handle *btrfs_start_transaction(struct btrfs_root *root, | 106 | struct btrfs_trans_handle *btrfs_start_transaction(struct btrfs_root *root, |
| 98 | int num_items); | 107 | int num_items); |
| 108 | struct btrfs_trans_handle *btrfs_start_transaction_noflush( | ||
| 109 | struct btrfs_root *root, int num_items); | ||
| 99 | struct btrfs_trans_handle *btrfs_join_transaction(struct btrfs_root *root); | 110 | struct btrfs_trans_handle *btrfs_join_transaction(struct btrfs_root *root); |
| 100 | struct btrfs_trans_handle *btrfs_join_transaction_nolock(struct btrfs_root *root); | 111 | struct btrfs_trans_handle *btrfs_join_transaction_nolock(struct btrfs_root *root); |
| 112 | struct btrfs_trans_handle *btrfs_attach_transaction(struct btrfs_root *root); | ||
| 101 | struct btrfs_trans_handle *btrfs_start_ioctl_transaction(struct btrfs_root *root); | 113 | struct btrfs_trans_handle *btrfs_start_ioctl_transaction(struct btrfs_root *root); |
| 102 | int btrfs_wait_for_commit(struct btrfs_root *root, u64 transid); | 114 | int btrfs_wait_for_commit(struct btrfs_root *root, u64 transid); |
| 103 | int btrfs_write_and_wait_transaction(struct btrfs_trans_handle *trans, | 115 | int btrfs_write_and_wait_transaction(struct btrfs_trans_handle *trans, |
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index c86670f4f285..e9ebb472b28b 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c | |||
| @@ -18,13 +18,16 @@ | |||
| 18 | 18 | ||
| 19 | #include <linux/sched.h> | 19 | #include <linux/sched.h> |
| 20 | #include <linux/slab.h> | 20 | #include <linux/slab.h> |
| 21 | #include <linux/list_sort.h> | ||
| 21 | #include "ctree.h" | 22 | #include "ctree.h" |
| 22 | #include "transaction.h" | 23 | #include "transaction.h" |
| 23 | #include "disk-io.h" | 24 | #include "disk-io.h" |
| 24 | #include "locking.h" | 25 | #include "locking.h" |
| 25 | #include "print-tree.h" | 26 | #include "print-tree.h" |
| 27 | #include "backref.h" | ||
| 26 | #include "compat.h" | 28 | #include "compat.h" |
| 27 | #include "tree-log.h" | 29 | #include "tree-log.h" |
| 30 | #include "hash.h" | ||
| 28 | 31 | ||
| 29 | /* magic values for the inode_only field in btrfs_log_inode: | 32 | /* magic values for the inode_only field in btrfs_log_inode: |
| 30 | * | 33 | * |
| @@ -146,7 +149,7 @@ static int start_log_trans(struct btrfs_trans_handle *trans, | |||
| 146 | root->log_multiple_pids = true; | 149 | root->log_multiple_pids = true; |
| 147 | } | 150 | } |
| 148 | 151 | ||
| 149 | root->log_batch++; | 152 | atomic_inc(&root->log_batch); |
| 150 | atomic_inc(&root->log_writers); | 153 | atomic_inc(&root->log_writers); |
| 151 | mutex_unlock(&root->log_mutex); | 154 | mutex_unlock(&root->log_mutex); |
| 152 | return 0; | 155 | return 0; |
| @@ -165,7 +168,7 @@ static int start_log_trans(struct btrfs_trans_handle *trans, | |||
| 165 | err = ret; | 168 | err = ret; |
| 166 | } | 169 | } |
| 167 | mutex_unlock(&root->fs_info->tree_log_mutex); | 170 | mutex_unlock(&root->fs_info->tree_log_mutex); |
| 168 | root->log_batch++; | 171 | atomic_inc(&root->log_batch); |
| 169 | atomic_inc(&root->log_writers); | 172 | atomic_inc(&root->log_writers); |
| 170 | mutex_unlock(&root->log_mutex); | 173 | mutex_unlock(&root->log_mutex); |
| 171 | return err; | 174 | return err; |
| @@ -484,7 +487,6 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans, | |||
| 484 | int found_type; | 487 | int found_type; |
| 485 | u64 mask = root->sectorsize - 1; | 488 | u64 mask = root->sectorsize - 1; |
| 486 | u64 extent_end; | 489 | u64 extent_end; |
| 487 | u64 alloc_hint; | ||
| 488 | u64 start = key->offset; | 490 | u64 start = key->offset; |
| 489 | u64 saved_nbytes; | 491 | u64 saved_nbytes; |
| 490 | struct btrfs_file_extent_item *item; | 492 | struct btrfs_file_extent_item *item; |
| @@ -550,8 +552,7 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans, | |||
| 550 | 552 | ||
| 551 | saved_nbytes = inode_get_bytes(inode); | 553 | saved_nbytes = inode_get_bytes(inode); |
| 552 | /* drop any overlapping extents */ | 554 | /* drop any overlapping extents */ |
| 553 | ret = btrfs_drop_extents(trans, inode, start, extent_end, | 555 | ret = btrfs_drop_extents(trans, root, inode, start, extent_end, 1); |
| 554 | &alloc_hint, 1); | ||
| 555 | BUG_ON(ret); | 556 | BUG_ON(ret); |
| 556 | 557 | ||
| 557 | if (found_type == BTRFS_FILE_EXTENT_REG || | 558 | if (found_type == BTRFS_FILE_EXTENT_REG || |
| @@ -744,6 +745,7 @@ out: | |||
| 744 | */ | 745 | */ |
| 745 | static noinline int backref_in_log(struct btrfs_root *log, | 746 | static noinline int backref_in_log(struct btrfs_root *log, |
| 746 | struct btrfs_key *key, | 747 | struct btrfs_key *key, |
| 748 | u64 ref_objectid, | ||
| 747 | char *name, int namelen) | 749 | char *name, int namelen) |
| 748 | { | 750 | { |
| 749 | struct btrfs_path *path; | 751 | struct btrfs_path *path; |
| @@ -764,8 +766,17 @@ static noinline int backref_in_log(struct btrfs_root *log, | |||
| 764 | if (ret != 0) | 766 | if (ret != 0) |
| 765 | goto out; | 767 | goto out; |
| 766 | 768 | ||
| 767 | item_size = btrfs_item_size_nr(path->nodes[0], path->slots[0]); | ||
| 768 | ptr = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]); | 769 | ptr = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]); |
| 770 | |||
| 771 | if (key->type == BTRFS_INODE_EXTREF_KEY) { | ||
| 772 | if (btrfs_find_name_in_ext_backref(path, ref_objectid, | ||
| 773 | name, namelen, NULL)) | ||
| 774 | match = 1; | ||
| 775 | |||
| 776 | goto out; | ||
| 777 | } | ||
| 778 | |||
| 779 | item_size = btrfs_item_size_nr(path->nodes[0], path->slots[0]); | ||
| 769 | ptr_end = ptr + item_size; | 780 | ptr_end = ptr + item_size; |
| 770 | while (ptr < ptr_end) { | 781 | while (ptr < ptr_end) { |
| 771 | ref = (struct btrfs_inode_ref *)ptr; | 782 | ref = (struct btrfs_inode_ref *)ptr; |
| @@ -786,91 +797,42 @@ out: | |||
| 786 | return match; | 797 | return match; |
| 787 | } | 798 | } |
| 788 | 799 | ||
| 789 | 800 | static inline int __add_inode_ref(struct btrfs_trans_handle *trans, | |
| 790 | /* | ||
| 791 | * replay one inode back reference item found in the log tree. | ||
| 792 | * eb, slot and key refer to the buffer and key found in the log tree. | ||
| 793 | * root is the destination we are replaying into, and path is for temp | ||
| 794 | * use by this function. (it should be released on return). | ||
| 795 | */ | ||
| 796 | static noinline int add_inode_ref(struct btrfs_trans_handle *trans, | ||
| 797 | struct btrfs_root *root, | 801 | struct btrfs_root *root, |
| 798 | struct btrfs_root *log, | ||
| 799 | struct btrfs_path *path, | 802 | struct btrfs_path *path, |
| 800 | struct extent_buffer *eb, int slot, | 803 | struct btrfs_root *log_root, |
| 801 | struct btrfs_key *key) | 804 | struct inode *dir, struct inode *inode, |
| 805 | struct extent_buffer *eb, | ||
| 806 | u64 inode_objectid, u64 parent_objectid, | ||
| 807 | u64 ref_index, char *name, int namelen, | ||
| 808 | int *search_done) | ||
| 802 | { | 809 | { |
| 803 | struct btrfs_inode_ref *ref; | ||
| 804 | struct btrfs_dir_item *di; | ||
| 805 | struct inode *dir; | ||
| 806 | struct inode *inode; | ||
| 807 | unsigned long ref_ptr; | ||
| 808 | unsigned long ref_end; | ||
| 809 | char *name; | ||
| 810 | int namelen; | ||
| 811 | int ret; | 810 | int ret; |
| 812 | int search_done = 0; | 811 | char *victim_name; |
| 813 | 812 | int victim_name_len; | |
| 814 | /* | 813 | struct extent_buffer *leaf; |
| 815 | * it is possible that we didn't log all the parent directories | 814 | struct btrfs_dir_item *di; |
| 816 | * for a given inode. If we don't find the dir, just don't | 815 | struct btrfs_key search_key; |
| 817 | * copy the back ref in. The link count fixup code will take | 816 | struct btrfs_inode_extref *extref; |
| 818 | * care of the rest | ||
| 819 | */ | ||
| 820 | dir = read_one_inode(root, key->offset); | ||
| 821 | if (!dir) | ||
| 822 | return -ENOENT; | ||
| 823 | |||
| 824 | inode = read_one_inode(root, key->objectid); | ||
| 825 | if (!inode) { | ||
| 826 | iput(dir); | ||
| 827 | return -EIO; | ||
| 828 | } | ||
| 829 | |||
| 830 | ref_ptr = btrfs_item_ptr_offset(eb, slot); | ||
| 831 | ref_end = ref_ptr + btrfs_item_size_nr(eb, slot); | ||
| 832 | 817 | ||
| 833 | again: | 818 | again: |
| 834 | ref = (struct btrfs_inode_ref *)ref_ptr; | 819 | /* Search old style refs */ |
| 835 | 820 | search_key.objectid = inode_objectid; | |
| 836 | namelen = btrfs_inode_ref_name_len(eb, ref); | 821 | search_key.type = BTRFS_INODE_REF_KEY; |
| 837 | name = kmalloc(namelen, GFP_NOFS); | 822 | search_key.offset = parent_objectid; |
| 838 | BUG_ON(!name); | 823 | ret = btrfs_search_slot(NULL, root, &search_key, path, 0, 0); |
| 839 | |||
| 840 | read_extent_buffer(eb, name, (unsigned long)(ref + 1), namelen); | ||
| 841 | |||
| 842 | /* if we already have a perfect match, we're done */ | ||
| 843 | if (inode_in_dir(root, path, btrfs_ino(dir), btrfs_ino(inode), | ||
| 844 | btrfs_inode_ref_index(eb, ref), | ||
| 845 | name, namelen)) { | ||
| 846 | goto out; | ||
| 847 | } | ||
| 848 | |||
| 849 | /* | ||
| 850 | * look for a conflicting back reference in the metadata. | ||
| 851 | * if we find one we have to unlink that name of the file | ||
| 852 | * before we add our new link. Later on, we overwrite any | ||
| 853 | * existing back reference, and we don't want to create | ||
| 854 | * dangling pointers in the directory. | ||
| 855 | */ | ||
| 856 | |||
| 857 | if (search_done) | ||
| 858 | goto insert; | ||
| 859 | |||
| 860 | ret = btrfs_search_slot(NULL, root, key, path, 0, 0); | ||
| 861 | if (ret == 0) { | 824 | if (ret == 0) { |
| 862 | char *victim_name; | ||
| 863 | int victim_name_len; | ||
| 864 | struct btrfs_inode_ref *victim_ref; | 825 | struct btrfs_inode_ref *victim_ref; |
| 865 | unsigned long ptr; | 826 | unsigned long ptr; |
| 866 | unsigned long ptr_end; | 827 | unsigned long ptr_end; |
| 867 | struct extent_buffer *leaf = path->nodes[0]; | 828 | |
| 829 | leaf = path->nodes[0]; | ||
| 868 | 830 | ||
| 869 | /* are we trying to overwrite a back ref for the root directory | 831 | /* are we trying to overwrite a back ref for the root directory |
| 870 | * if so, just jump out, we're done | 832 | * if so, just jump out, we're done |
| 871 | */ | 833 | */ |
| 872 | if (key->objectid == key->offset) | 834 | if (search_key.objectid == search_key.offset) |
| 873 | goto out_nowrite; | 835 | return 1; |
| 874 | 836 | ||
| 875 | /* check all the names in this back reference to see | 837 | /* check all the names in this back reference to see |
| 876 | * if they are in the log. if so, we allow them to stay | 838 | * if they are in the log. if so, we allow them to stay |
| @@ -889,7 +851,9 @@ again: | |||
| 889 | (unsigned long)(victim_ref + 1), | 851 | (unsigned long)(victim_ref + 1), |
| 890 | victim_name_len); | 852 | victim_name_len); |
| 891 | 853 | ||
| 892 | if (!backref_in_log(log, key, victim_name, | 854 | if (!backref_in_log(log_root, &search_key, |
| 855 | parent_objectid, | ||
| 856 | victim_name, | ||
| 893 | victim_name_len)) { | 857 | victim_name_len)) { |
| 894 | btrfs_inc_nlink(inode); | 858 | btrfs_inc_nlink(inode); |
| 895 | btrfs_release_path(path); | 859 | btrfs_release_path(path); |
| @@ -897,9 +861,14 @@ again: | |||
| 897 | ret = btrfs_unlink_inode(trans, root, dir, | 861 | ret = btrfs_unlink_inode(trans, root, dir, |
| 898 | inode, victim_name, | 862 | inode, victim_name, |
| 899 | victim_name_len); | 863 | victim_name_len); |
| 864 | BUG_ON(ret); | ||
| 900 | btrfs_run_delayed_items(trans, root); | 865 | btrfs_run_delayed_items(trans, root); |
| 866 | kfree(victim_name); | ||
| 867 | *search_done = 1; | ||
| 868 | goto again; | ||
| 901 | } | 869 | } |
| 902 | kfree(victim_name); | 870 | kfree(victim_name); |
| 871 | |||
| 903 | ptr = (unsigned long)(victim_ref + 1) + victim_name_len; | 872 | ptr = (unsigned long)(victim_ref + 1) + victim_name_len; |
| 904 | } | 873 | } |
| 905 | BUG_ON(ret); | 874 | BUG_ON(ret); |
| @@ -908,14 +877,78 @@ again: | |||
| 908 | * NOTE: we have searched root tree and checked the | 877 | * NOTE: we have searched root tree and checked the |
| 909 | * coresponding ref, it does not need to check again. | 878 | * coresponding ref, it does not need to check again. |
| 910 | */ | 879 | */ |
| 911 | search_done = 1; | 880 | *search_done = 1; |
| 881 | } | ||
| 882 | btrfs_release_path(path); | ||
| 883 | |||
| 884 | /* Same search but for extended refs */ | ||
| 885 | extref = btrfs_lookup_inode_extref(NULL, root, path, name, namelen, | ||
| 886 | inode_objectid, parent_objectid, 0, | ||
| 887 | 0); | ||
| 888 | if (!IS_ERR_OR_NULL(extref)) { | ||
| 889 | u32 item_size; | ||
| 890 | u32 cur_offset = 0; | ||
| 891 | unsigned long base; | ||
| 892 | struct inode *victim_parent; | ||
| 893 | |||
| 894 | leaf = path->nodes[0]; | ||
| 895 | |||
| 896 | item_size = btrfs_item_size_nr(leaf, path->slots[0]); | ||
| 897 | base = btrfs_item_ptr_offset(leaf, path->slots[0]); | ||
| 898 | |||
| 899 | while (cur_offset < item_size) { | ||
| 900 | extref = (struct btrfs_inode_extref *)base + cur_offset; | ||
| 901 | |||
| 902 | victim_name_len = btrfs_inode_extref_name_len(leaf, extref); | ||
| 903 | |||
| 904 | if (btrfs_inode_extref_parent(leaf, extref) != parent_objectid) | ||
| 905 | goto next; | ||
| 906 | |||
| 907 | victim_name = kmalloc(victim_name_len, GFP_NOFS); | ||
| 908 | read_extent_buffer(leaf, victim_name, (unsigned long)&extref->name, | ||
| 909 | victim_name_len); | ||
| 910 | |||
| 911 | search_key.objectid = inode_objectid; | ||
| 912 | search_key.type = BTRFS_INODE_EXTREF_KEY; | ||
| 913 | search_key.offset = btrfs_extref_hash(parent_objectid, | ||
| 914 | victim_name, | ||
| 915 | victim_name_len); | ||
| 916 | ret = 0; | ||
| 917 | if (!backref_in_log(log_root, &search_key, | ||
| 918 | parent_objectid, victim_name, | ||
| 919 | victim_name_len)) { | ||
| 920 | ret = -ENOENT; | ||
| 921 | victim_parent = read_one_inode(root, | ||
| 922 | parent_objectid); | ||
| 923 | if (victim_parent) { | ||
| 924 | btrfs_inc_nlink(inode); | ||
| 925 | btrfs_release_path(path); | ||
| 926 | |||
| 927 | ret = btrfs_unlink_inode(trans, root, | ||
| 928 | victim_parent, | ||
| 929 | inode, | ||
| 930 | victim_name, | ||
| 931 | victim_name_len); | ||
| 932 | btrfs_run_delayed_items(trans, root); | ||
| 933 | } | ||
| 934 | BUG_ON(ret); | ||
| 935 | iput(victim_parent); | ||
| 936 | kfree(victim_name); | ||
| 937 | *search_done = 1; | ||
| 938 | goto again; | ||
| 939 | } | ||
| 940 | kfree(victim_name); | ||
| 941 | BUG_ON(ret); | ||
| 942 | next: | ||
| 943 | cur_offset += victim_name_len + sizeof(*extref); | ||
| 944 | } | ||
| 945 | *search_done = 1; | ||
| 912 | } | 946 | } |
| 913 | btrfs_release_path(path); | 947 | btrfs_release_path(path); |
| 914 | 948 | ||
| 915 | /* look for a conflicting sequence number */ | 949 | /* look for a conflicting sequence number */ |
| 916 | di = btrfs_lookup_dir_index_item(trans, root, path, btrfs_ino(dir), | 950 | di = btrfs_lookup_dir_index_item(trans, root, path, btrfs_ino(dir), |
| 917 | btrfs_inode_ref_index(eb, ref), | 951 | ref_index, name, namelen, 0); |
| 918 | name, namelen, 0); | ||
| 919 | if (di && !IS_ERR(di)) { | 952 | if (di && !IS_ERR(di)) { |
| 920 | ret = drop_one_dir_item(trans, root, path, dir, di); | 953 | ret = drop_one_dir_item(trans, root, path, dir, di); |
| 921 | BUG_ON(ret); | 954 | BUG_ON(ret); |
| @@ -931,25 +964,173 @@ again: | |||
| 931 | } | 964 | } |
| 932 | btrfs_release_path(path); | 965 | btrfs_release_path(path); |
| 933 | 966 | ||
| 934 | insert: | 967 | return 0; |
| 935 | /* insert our name */ | 968 | } |
| 936 | ret = btrfs_add_link(trans, dir, inode, name, namelen, 0, | ||
| 937 | btrfs_inode_ref_index(eb, ref)); | ||
| 938 | BUG_ON(ret); | ||
| 939 | 969 | ||
| 940 | btrfs_update_inode(trans, root, inode); | 970 | static int extref_get_fields(struct extent_buffer *eb, unsigned long ref_ptr, |
| 971 | u32 *namelen, char **name, u64 *index, | ||
| 972 | u64 *parent_objectid) | ||
| 973 | { | ||
| 974 | struct btrfs_inode_extref *extref; | ||
| 941 | 975 | ||
| 942 | out: | 976 | extref = (struct btrfs_inode_extref *)ref_ptr; |
| 943 | ref_ptr = (unsigned long)(ref + 1) + namelen; | 977 | |
| 944 | kfree(name); | 978 | *namelen = btrfs_inode_extref_name_len(eb, extref); |
| 945 | if (ref_ptr < ref_end) | 979 | *name = kmalloc(*namelen, GFP_NOFS); |
| 946 | goto again; | 980 | if (*name == NULL) |
| 981 | return -ENOMEM; | ||
| 982 | |||
| 983 | read_extent_buffer(eb, *name, (unsigned long)&extref->name, | ||
| 984 | *namelen); | ||
| 985 | |||
| 986 | *index = btrfs_inode_extref_index(eb, extref); | ||
| 987 | if (parent_objectid) | ||
| 988 | *parent_objectid = btrfs_inode_extref_parent(eb, extref); | ||
| 989 | |||
| 990 | return 0; | ||
| 991 | } | ||
| 992 | |||
| 993 | static int ref_get_fields(struct extent_buffer *eb, unsigned long ref_ptr, | ||
| 994 | u32 *namelen, char **name, u64 *index) | ||
| 995 | { | ||
| 996 | struct btrfs_inode_ref *ref; | ||
| 997 | |||
| 998 | ref = (struct btrfs_inode_ref *)ref_ptr; | ||
| 999 | |||
| 1000 | *namelen = btrfs_inode_ref_name_len(eb, ref); | ||
| 1001 | *name = kmalloc(*namelen, GFP_NOFS); | ||
| 1002 | if (*name == NULL) | ||
| 1003 | return -ENOMEM; | ||
| 1004 | |||
| 1005 | read_extent_buffer(eb, *name, (unsigned long)(ref + 1), *namelen); | ||
| 1006 | |||
| 1007 | *index = btrfs_inode_ref_index(eb, ref); | ||
| 1008 | |||
| 1009 | return 0; | ||
| 1010 | } | ||
| 1011 | |||
| 1012 | /* | ||
| 1013 | * replay one inode back reference item found in the log tree. | ||
| 1014 | * eb, slot and key refer to the buffer and key found in the log tree. | ||
| 1015 | * root is the destination we are replaying into, and path is for temp | ||
| 1016 | * use by this function. (it should be released on return). | ||
| 1017 | */ | ||
| 1018 | static noinline int add_inode_ref(struct btrfs_trans_handle *trans, | ||
| 1019 | struct btrfs_root *root, | ||
| 1020 | struct btrfs_root *log, | ||
| 1021 | struct btrfs_path *path, | ||
| 1022 | struct extent_buffer *eb, int slot, | ||
| 1023 | struct btrfs_key *key) | ||
| 1024 | { | ||
| 1025 | struct inode *dir; | ||
| 1026 | struct inode *inode; | ||
| 1027 | unsigned long ref_ptr; | ||
| 1028 | unsigned long ref_end; | ||
| 1029 | char *name; | ||
| 1030 | int namelen; | ||
| 1031 | int ret; | ||
| 1032 | int search_done = 0; | ||
| 1033 | int log_ref_ver = 0; | ||
| 1034 | u64 parent_objectid; | ||
| 1035 | u64 inode_objectid; | ||
| 1036 | u64 ref_index = 0; | ||
| 1037 | int ref_struct_size; | ||
| 1038 | |||
| 1039 | ref_ptr = btrfs_item_ptr_offset(eb, slot); | ||
| 1040 | ref_end = ref_ptr + btrfs_item_size_nr(eb, slot); | ||
| 1041 | |||
| 1042 | if (key->type == BTRFS_INODE_EXTREF_KEY) { | ||
| 1043 | struct btrfs_inode_extref *r; | ||
| 1044 | |||
| 1045 | ref_struct_size = sizeof(struct btrfs_inode_extref); | ||
| 1046 | log_ref_ver = 1; | ||
| 1047 | r = (struct btrfs_inode_extref *)ref_ptr; | ||
| 1048 | parent_objectid = btrfs_inode_extref_parent(eb, r); | ||
| 1049 | } else { | ||
| 1050 | ref_struct_size = sizeof(struct btrfs_inode_ref); | ||
| 1051 | parent_objectid = key->offset; | ||
| 1052 | } | ||
| 1053 | inode_objectid = key->objectid; | ||
| 1054 | |||
| 1055 | /* | ||
| 1056 | * it is possible that we didn't log all the parent directories | ||
| 1057 | * for a given inode. If we don't find the dir, just don't | ||
| 1058 | * copy the back ref in. The link count fixup code will take | ||
| 1059 | * care of the rest | ||
| 1060 | */ | ||
| 1061 | dir = read_one_inode(root, parent_objectid); | ||
| 1062 | if (!dir) | ||
| 1063 | return -ENOENT; | ||
| 1064 | |||
| 1065 | inode = read_one_inode(root, inode_objectid); | ||
| 1066 | if (!inode) { | ||
| 1067 | iput(dir); | ||
| 1068 | return -EIO; | ||
| 1069 | } | ||
| 1070 | |||
| 1071 | while (ref_ptr < ref_end) { | ||
| 1072 | if (log_ref_ver) { | ||
| 1073 | ret = extref_get_fields(eb, ref_ptr, &namelen, &name, | ||
| 1074 | &ref_index, &parent_objectid); | ||
| 1075 | /* | ||
| 1076 | * parent object can change from one array | ||
| 1077 | * item to another. | ||
| 1078 | */ | ||
| 1079 | if (!dir) | ||
| 1080 | dir = read_one_inode(root, parent_objectid); | ||
| 1081 | if (!dir) | ||
| 1082 | return -ENOENT; | ||
| 1083 | } else { | ||
| 1084 | ret = ref_get_fields(eb, ref_ptr, &namelen, &name, | ||
| 1085 | &ref_index); | ||
| 1086 | } | ||
| 1087 | if (ret) | ||
| 1088 | return ret; | ||
| 1089 | |||
| 1090 | /* if we already have a perfect match, we're done */ | ||
| 1091 | if (!inode_in_dir(root, path, btrfs_ino(dir), btrfs_ino(inode), | ||
| 1092 | ref_index, name, namelen)) { | ||
| 1093 | /* | ||
| 1094 | * look for a conflicting back reference in the | ||
| 1095 | * metadata. if we find one we have to unlink that name | ||
| 1096 | * of the file before we add our new link. Later on, we | ||
| 1097 | * overwrite any existing back reference, and we don't | ||
| 1098 | * want to create dangling pointers in the directory. | ||
| 1099 | */ | ||
| 1100 | |||
| 1101 | if (!search_done) { | ||
| 1102 | ret = __add_inode_ref(trans, root, path, log, | ||
| 1103 | dir, inode, eb, | ||
| 1104 | inode_objectid, | ||
| 1105 | parent_objectid, | ||
| 1106 | ref_index, name, namelen, | ||
| 1107 | &search_done); | ||
| 1108 | if (ret == 1) | ||
| 1109 | goto out; | ||
| 1110 | BUG_ON(ret); | ||
| 1111 | } | ||
| 1112 | |||
| 1113 | /* insert our name */ | ||
| 1114 | ret = btrfs_add_link(trans, dir, inode, name, namelen, | ||
| 1115 | 0, ref_index); | ||
| 1116 | BUG_ON(ret); | ||
| 1117 | |||
| 1118 | btrfs_update_inode(trans, root, inode); | ||
| 1119 | } | ||
| 1120 | |||
| 1121 | ref_ptr = (unsigned long)(ref_ptr + ref_struct_size) + namelen; | ||
| 1122 | kfree(name); | ||
| 1123 | if (log_ref_ver) { | ||
| 1124 | iput(dir); | ||
| 1125 | dir = NULL; | ||
| 1126 | } | ||
| 1127 | } | ||
| 947 | 1128 | ||
| 948 | /* finally write the back reference in the inode */ | 1129 | /* finally write the back reference in the inode */ |
| 949 | ret = overwrite_item(trans, root, path, eb, slot, key); | 1130 | ret = overwrite_item(trans, root, path, eb, slot, key); |
| 950 | BUG_ON(ret); | 1131 | BUG_ON(ret); |
| 951 | 1132 | ||
| 952 | out_nowrite: | 1133 | out: |
| 953 | btrfs_release_path(path); | 1134 | btrfs_release_path(path); |
| 954 | iput(dir); | 1135 | iput(dir); |
| 955 | iput(inode); | 1136 | iput(inode); |
| @@ -966,25 +1147,55 @@ static int insert_orphan_item(struct btrfs_trans_handle *trans, | |||
| 966 | return ret; | 1147 | return ret; |
| 967 | } | 1148 | } |
| 968 | 1149 | ||
| 1150 | static int count_inode_extrefs(struct btrfs_root *root, | ||
| 1151 | struct inode *inode, struct btrfs_path *path) | ||
| 1152 | { | ||
| 1153 | int ret = 0; | ||
| 1154 | int name_len; | ||
| 1155 | unsigned int nlink = 0; | ||
| 1156 | u32 item_size; | ||
| 1157 | u32 cur_offset = 0; | ||
| 1158 | u64 inode_objectid = btrfs_ino(inode); | ||
| 1159 | u64 offset = 0; | ||
| 1160 | unsigned long ptr; | ||
| 1161 | struct btrfs_inode_extref *extref; | ||
| 1162 | struct extent_buffer *leaf; | ||
| 969 | 1163 | ||
| 970 | /* | 1164 | while (1) { |
| 971 | * There are a few corners where the link count of the file can't | 1165 | ret = btrfs_find_one_extref(root, inode_objectid, offset, path, |
| 972 | * be properly maintained during replay. So, instead of adding | 1166 | &extref, &offset); |
| 973 | * lots of complexity to the log code, we just scan the backrefs | 1167 | if (ret) |
| 974 | * for any file that has been through replay. | 1168 | break; |
| 975 | * | 1169 | |
| 976 | * The scan will update the link count on the inode to reflect the | 1170 | leaf = path->nodes[0]; |
| 977 | * number of back refs found. If it goes down to zero, the iput | 1171 | item_size = btrfs_item_size_nr(leaf, path->slots[0]); |
| 978 | * will free the inode. | 1172 | ptr = btrfs_item_ptr_offset(leaf, path->slots[0]); |
| 979 | */ | 1173 | |
| 980 | static noinline int fixup_inode_link_count(struct btrfs_trans_handle *trans, | 1174 | while (cur_offset < item_size) { |
| 981 | struct btrfs_root *root, | 1175 | extref = (struct btrfs_inode_extref *) (ptr + cur_offset); |
| 982 | struct inode *inode) | 1176 | name_len = btrfs_inode_extref_name_len(leaf, extref); |
| 1177 | |||
| 1178 | nlink++; | ||
| 1179 | |||
| 1180 | cur_offset += name_len + sizeof(*extref); | ||
| 1181 | } | ||
| 1182 | |||
| 1183 | offset++; | ||
| 1184 | btrfs_release_path(path); | ||
| 1185 | } | ||
| 1186 | btrfs_release_path(path); | ||
| 1187 | |||
| 1188 | if (ret < 0) | ||
| 1189 | return ret; | ||
| 1190 | return nlink; | ||
| 1191 | } | ||
| 1192 | |||
| 1193 | static int count_inode_refs(struct btrfs_root *root, | ||
| 1194 | struct inode *inode, struct btrfs_path *path) | ||
| 983 | { | 1195 | { |
| 984 | struct btrfs_path *path; | ||
| 985 | int ret; | 1196 | int ret; |
| 986 | struct btrfs_key key; | 1197 | struct btrfs_key key; |
| 987 | u64 nlink = 0; | 1198 | unsigned int nlink = 0; |
| 988 | unsigned long ptr; | 1199 | unsigned long ptr; |
| 989 | unsigned long ptr_end; | 1200 | unsigned long ptr_end; |
| 990 | int name_len; | 1201 | int name_len; |
| @@ -994,10 +1205,6 @@ static noinline int fixup_inode_link_count(struct btrfs_trans_handle *trans, | |||
| 994 | key.type = BTRFS_INODE_REF_KEY; | 1205 | key.type = BTRFS_INODE_REF_KEY; |
| 995 | key.offset = (u64)-1; | 1206 | key.offset = (u64)-1; |
| 996 | 1207 | ||
| 997 | path = btrfs_alloc_path(); | ||
| 998 | if (!path) | ||
| 999 | return -ENOMEM; | ||
| 1000 | |||
| 1001 | while (1) { | 1208 | while (1) { |
| 1002 | ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); | 1209 | ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); |
| 1003 | if (ret < 0) | 1210 | if (ret < 0) |
| @@ -1031,6 +1238,50 @@ static noinline int fixup_inode_link_count(struct btrfs_trans_handle *trans, | |||
| 1031 | btrfs_release_path(path); | 1238 | btrfs_release_path(path); |
| 1032 | } | 1239 | } |
| 1033 | btrfs_release_path(path); | 1240 | btrfs_release_path(path); |
| 1241 | |||
| 1242 | return nlink; | ||
| 1243 | } | ||
| 1244 | |||
| 1245 | /* | ||
| 1246 | * There are a few corners where the link count of the file can't | ||
| 1247 | * be properly maintained during replay. So, instead of adding | ||
| 1248 | * lots of complexity to the log code, we just scan the backrefs | ||
| 1249 | * for any file that has been through replay. | ||
| 1250 | * | ||
| 1251 | * The scan will update the link count on the inode to reflect the | ||
| 1252 | * number of back refs found. If it goes down to zero, the iput | ||
| 1253 | * will free the inode. | ||
| 1254 | */ | ||
| 1255 | static noinline int fixup_inode_link_count(struct btrfs_trans_handle *trans, | ||
| 1256 | struct btrfs_root *root, | ||
| 1257 | struct inode *inode) | ||
| 1258 | { | ||
| 1259 | struct btrfs_path *path; | ||
| 1260 | int ret; | ||
| 1261 | u64 nlink = 0; | ||
| 1262 | u64 ino = btrfs_ino(inode); | ||
| 1263 | |||
| 1264 | path = btrfs_alloc_path(); | ||
| 1265 | if (!path) | ||
| 1266 | return -ENOMEM; | ||
| 1267 | |||
| 1268 | ret = count_inode_refs(root, inode, path); | ||
| 1269 | if (ret < 0) | ||
| 1270 | goto out; | ||
| 1271 | |||
| 1272 | nlink = ret; | ||
| 1273 | |||
| 1274 | ret = count_inode_extrefs(root, inode, path); | ||
| 1275 | if (ret == -ENOENT) | ||
| 1276 | ret = 0; | ||
| 1277 | |||
| 1278 | if (ret < 0) | ||
| 1279 | goto out; | ||
| 1280 | |||
| 1281 | nlink += ret; | ||
| 1282 | |||
| 1283 | ret = 0; | ||
| 1284 | |||
| 1034 | if (nlink != inode->i_nlink) { | 1285 | if (nlink != inode->i_nlink) { |
| 1035 | set_nlink(inode, nlink); | 1286 | set_nlink(inode, nlink); |
| 1036 | btrfs_update_inode(trans, root, inode); | 1287 | btrfs_update_inode(trans, root, inode); |
| @@ -1046,9 +1297,10 @@ static noinline int fixup_inode_link_count(struct btrfs_trans_handle *trans, | |||
| 1046 | ret = insert_orphan_item(trans, root, ino); | 1297 | ret = insert_orphan_item(trans, root, ino); |
| 1047 | BUG_ON(ret); | 1298 | BUG_ON(ret); |
| 1048 | } | 1299 | } |
| 1049 | btrfs_free_path(path); | ||
| 1050 | 1300 | ||
| 1051 | return 0; | 1301 | out: |
| 1302 | btrfs_free_path(path); | ||
| 1303 | return ret; | ||
| 1052 | } | 1304 | } |
| 1053 | 1305 | ||
| 1054 | static noinline int fixup_inode_link_counts(struct btrfs_trans_handle *trans, | 1306 | static noinline int fixup_inode_link_counts(struct btrfs_trans_handle *trans, |
| @@ -1695,6 +1947,10 @@ static int replay_one_buffer(struct btrfs_root *log, struct extent_buffer *eb, | |||
| 1695 | ret = add_inode_ref(wc->trans, root, log, path, | 1947 | ret = add_inode_ref(wc->trans, root, log, path, |
| 1696 | eb, i, &key); | 1948 | eb, i, &key); |
| 1697 | BUG_ON(ret && ret != -ENOENT); | 1949 | BUG_ON(ret && ret != -ENOENT); |
| 1950 | } else if (key.type == BTRFS_INODE_EXTREF_KEY) { | ||
| 1951 | ret = add_inode_ref(wc->trans, root, log, path, | ||
| 1952 | eb, i, &key); | ||
| 1953 | BUG_ON(ret && ret != -ENOENT); | ||
| 1698 | } else if (key.type == BTRFS_EXTENT_DATA_KEY) { | 1954 | } else if (key.type == BTRFS_EXTENT_DATA_KEY) { |
| 1699 | ret = replay_one_extent(wc->trans, root, path, | 1955 | ret = replay_one_extent(wc->trans, root, path, |
| 1700 | eb, i, &key); | 1956 | eb, i, &key); |
| @@ -2037,7 +2293,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, | |||
| 2037 | if (atomic_read(&root->log_commit[(index1 + 1) % 2])) | 2293 | if (atomic_read(&root->log_commit[(index1 + 1) % 2])) |
| 2038 | wait_log_commit(trans, root, root->log_transid - 1); | 2294 | wait_log_commit(trans, root, root->log_transid - 1); |
| 2039 | while (1) { | 2295 | while (1) { |
| 2040 | unsigned long batch = root->log_batch; | 2296 | int batch = atomic_read(&root->log_batch); |
| 2041 | /* when we're on an ssd, just kick the log commit out */ | 2297 | /* when we're on an ssd, just kick the log commit out */ |
| 2042 | if (!btrfs_test_opt(root, SSD) && root->log_multiple_pids) { | 2298 | if (!btrfs_test_opt(root, SSD) && root->log_multiple_pids) { |
| 2043 | mutex_unlock(&root->log_mutex); | 2299 | mutex_unlock(&root->log_mutex); |
| @@ -2045,7 +2301,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, | |||
| 2045 | mutex_lock(&root->log_mutex); | 2301 | mutex_lock(&root->log_mutex); |
| 2046 | } | 2302 | } |
| 2047 | wait_for_writer(trans, root); | 2303 | wait_for_writer(trans, root); |
| 2048 | if (batch == root->log_batch) | 2304 | if (batch == atomic_read(&root->log_batch)) |
| 2049 | break; | 2305 | break; |
| 2050 | } | 2306 | } |
| 2051 | 2307 | ||
| @@ -2074,7 +2330,6 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, | |||
| 2074 | 2330 | ||
| 2075 | btrfs_set_root_node(&log->root_item, log->node); | 2331 | btrfs_set_root_node(&log->root_item, log->node); |
| 2076 | 2332 | ||
| 2077 | root->log_batch = 0; | ||
| 2078 | root->log_transid++; | 2333 | root->log_transid++; |
| 2079 | log->log_transid = root->log_transid; | 2334 | log->log_transid = root->log_transid; |
| 2080 | root->log_start_pid = 0; | 2335 | root->log_start_pid = 0; |
| @@ -2087,7 +2342,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, | |||
| 2087 | mutex_unlock(&root->log_mutex); | 2342 | mutex_unlock(&root->log_mutex); |
| 2088 | 2343 | ||
| 2089 | mutex_lock(&log_root_tree->log_mutex); | 2344 | mutex_lock(&log_root_tree->log_mutex); |
| 2090 | log_root_tree->log_batch++; | 2345 | atomic_inc(&log_root_tree->log_batch); |
| 2091 | atomic_inc(&log_root_tree->log_writers); | 2346 | atomic_inc(&log_root_tree->log_writers); |
| 2092 | mutex_unlock(&log_root_tree->log_mutex); | 2347 | mutex_unlock(&log_root_tree->log_mutex); |
| 2093 | 2348 | ||
| @@ -2157,7 +2412,6 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, | |||
| 2157 | btrfs_set_super_log_root_level(root->fs_info->super_for_commit, | 2412 | btrfs_set_super_log_root_level(root->fs_info->super_for_commit, |
| 2158 | btrfs_header_level(log_root_tree->node)); | 2413 | btrfs_header_level(log_root_tree->node)); |
| 2159 | 2414 | ||
| 2160 | log_root_tree->log_batch = 0; | ||
| 2161 | log_root_tree->log_transid++; | 2415 | log_root_tree->log_transid++; |
| 2162 | smp_mb(); | 2416 | smp_mb(); |
| 2163 | 2417 | ||
| @@ -2171,9 +2425,12 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, | |||
| 2171 | * in and cause problems either. | 2425 | * in and cause problems either. |
| 2172 | */ | 2426 | */ |
| 2173 | btrfs_scrub_pause_super(root); | 2427 | btrfs_scrub_pause_super(root); |
| 2174 | write_ctree_super(trans, root->fs_info->tree_root, 1); | 2428 | ret = write_ctree_super(trans, root->fs_info->tree_root, 1); |
| 2175 | btrfs_scrub_continue_super(root); | 2429 | btrfs_scrub_continue_super(root); |
| 2176 | ret = 0; | 2430 | if (ret) { |
| 2431 | btrfs_abort_transaction(trans, root, ret); | ||
| 2432 | goto out_wake_log_root; | ||
| 2433 | } | ||
| 2177 | 2434 | ||
| 2178 | mutex_lock(&root->log_mutex); | 2435 | mutex_lock(&root->log_mutex); |
| 2179 | if (root->last_log_commit < log_transid) | 2436 | if (root->last_log_commit < log_transid) |
| @@ -2209,7 +2466,8 @@ static void free_log_tree(struct btrfs_trans_handle *trans, | |||
| 2209 | 2466 | ||
| 2210 | while (1) { | 2467 | while (1) { |
| 2211 | ret = find_first_extent_bit(&log->dirty_log_pages, | 2468 | ret = find_first_extent_bit(&log->dirty_log_pages, |
| 2212 | 0, &start, &end, EXTENT_DIRTY | EXTENT_NEW); | 2469 | 0, &start, &end, EXTENT_DIRTY | EXTENT_NEW, |
| 2470 | NULL); | ||
| 2213 | if (ret) | 2471 | if (ret) |
| 2214 | break; | 2472 | break; |
| 2215 | 2473 | ||
| @@ -2646,6 +2904,7 @@ static int drop_objectid_items(struct btrfs_trans_handle *trans, | |||
| 2646 | int ret; | 2904 | int ret; |
| 2647 | struct btrfs_key key; | 2905 | struct btrfs_key key; |
| 2648 | struct btrfs_key found_key; | 2906 | struct btrfs_key found_key; |
| 2907 | int start_slot; | ||
| 2649 | 2908 | ||
| 2650 | key.objectid = objectid; | 2909 | key.objectid = objectid; |
| 2651 | key.type = max_key_type; | 2910 | key.type = max_key_type; |
| @@ -2667,8 +2926,18 @@ static int drop_objectid_items(struct btrfs_trans_handle *trans, | |||
| 2667 | if (found_key.objectid != objectid) | 2926 | if (found_key.objectid != objectid) |
| 2668 | break; | 2927 | break; |
| 2669 | 2928 | ||
| 2670 | ret = btrfs_del_item(trans, log, path); | 2929 | found_key.offset = 0; |
| 2671 | if (ret) | 2930 | found_key.type = 0; |
| 2931 | ret = btrfs_bin_search(path->nodes[0], &found_key, 0, | ||
| 2932 | &start_slot); | ||
| 2933 | |||
| 2934 | ret = btrfs_del_items(trans, log, path, start_slot, | ||
| 2935 | path->slots[0] - start_slot + 1); | ||
| 2936 | /* | ||
| 2937 | * If start slot isn't 0 then we don't need to re-search, we've | ||
| 2938 | * found the last guy with the objectid in this tree. | ||
| 2939 | */ | ||
| 2940 | if (ret || start_slot != 0) | ||
| 2672 | break; | 2941 | break; |
| 2673 | btrfs_release_path(path); | 2942 | btrfs_release_path(path); |
| 2674 | } | 2943 | } |
| @@ -2678,14 +2947,64 @@ static int drop_objectid_items(struct btrfs_trans_handle *trans, | |||
| 2678 | return ret; | 2947 | return ret; |
| 2679 | } | 2948 | } |
| 2680 | 2949 | ||
| 2950 | static void fill_inode_item(struct btrfs_trans_handle *trans, | ||
| 2951 | struct extent_buffer *leaf, | ||
| 2952 | struct btrfs_inode_item *item, | ||
| 2953 | struct inode *inode, int log_inode_only) | ||
| 2954 | { | ||
| 2955 | btrfs_set_inode_uid(leaf, item, inode->i_uid); | ||
| 2956 | btrfs_set_inode_gid(leaf, item, inode->i_gid); | ||
| 2957 | btrfs_set_inode_mode(leaf, item, inode->i_mode); | ||
| 2958 | btrfs_set_inode_nlink(leaf, item, inode->i_nlink); | ||
| 2959 | |||
| 2960 | btrfs_set_timespec_sec(leaf, btrfs_inode_atime(item), | ||
| 2961 | inode->i_atime.tv_sec); | ||
| 2962 | btrfs_set_timespec_nsec(leaf, btrfs_inode_atime(item), | ||
| 2963 | inode->i_atime.tv_nsec); | ||
| 2964 | |||
| 2965 | btrfs_set_timespec_sec(leaf, btrfs_inode_mtime(item), | ||
| 2966 | inode->i_mtime.tv_sec); | ||
| 2967 | btrfs_set_timespec_nsec(leaf, btrfs_inode_mtime(item), | ||
| 2968 | inode->i_mtime.tv_nsec); | ||
| 2969 | |||
| 2970 | btrfs_set_timespec_sec(leaf, btrfs_inode_ctime(item), | ||
| 2971 | inode->i_ctime.tv_sec); | ||
| 2972 | btrfs_set_timespec_nsec(leaf, btrfs_inode_ctime(item), | ||
| 2973 | inode->i_ctime.tv_nsec); | ||
| 2974 | |||
| 2975 | btrfs_set_inode_nbytes(leaf, item, inode_get_bytes(inode)); | ||
| 2976 | |||
| 2977 | btrfs_set_inode_sequence(leaf, item, inode->i_version); | ||
| 2978 | btrfs_set_inode_transid(leaf, item, trans->transid); | ||
| 2979 | btrfs_set_inode_rdev(leaf, item, inode->i_rdev); | ||
| 2980 | btrfs_set_inode_flags(leaf, item, BTRFS_I(inode)->flags); | ||
| 2981 | btrfs_set_inode_block_group(leaf, item, 0); | ||
| 2982 | |||
| 2983 | if (log_inode_only) { | ||
| 2984 | /* set the generation to zero so the recover code | ||
| 2985 | * can tell the difference between an logging | ||
| 2986 | * just to say 'this inode exists' and a logging | ||
| 2987 | * to say 'update this inode with these values' | ||
| 2988 | */ | ||
| 2989 | btrfs_set_inode_generation(leaf, item, 0); | ||
| 2990 | btrfs_set_inode_size(leaf, item, 0); | ||
| 2991 | } else { | ||
| 2992 | btrfs_set_inode_generation(leaf, item, | ||
| 2993 | BTRFS_I(inode)->generation); | ||
| 2994 | btrfs_set_inode_size(leaf, item, inode->i_size); | ||
| 2995 | } | ||
| 2996 | |||
| 2997 | } | ||
| 2998 | |||
| 2681 | static noinline int copy_items(struct btrfs_trans_handle *trans, | 2999 | static noinline int copy_items(struct btrfs_trans_handle *trans, |
| 2682 | struct btrfs_root *log, | 3000 | struct inode *inode, |
| 2683 | struct btrfs_path *dst_path, | 3001 | struct btrfs_path *dst_path, |
| 2684 | struct extent_buffer *src, | 3002 | struct extent_buffer *src, |
| 2685 | int start_slot, int nr, int inode_only) | 3003 | int start_slot, int nr, int inode_only) |
| 2686 | { | 3004 | { |
| 2687 | unsigned long src_offset; | 3005 | unsigned long src_offset; |
| 2688 | unsigned long dst_offset; | 3006 | unsigned long dst_offset; |
| 3007 | struct btrfs_root *log = BTRFS_I(inode)->root->log_root; | ||
| 2689 | struct btrfs_file_extent_item *extent; | 3008 | struct btrfs_file_extent_item *extent; |
| 2690 | struct btrfs_inode_item *inode_item; | 3009 | struct btrfs_inode_item *inode_item; |
| 2691 | int ret; | 3010 | int ret; |
| @@ -2694,6 +3013,7 @@ static noinline int copy_items(struct btrfs_trans_handle *trans, | |||
| 2694 | char *ins_data; | 3013 | char *ins_data; |
| 2695 | int i; | 3014 | int i; |
| 2696 | struct list_head ordered_sums; | 3015 | struct list_head ordered_sums; |
| 3016 | int skip_csum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM; | ||
| 2697 | 3017 | ||
| 2698 | INIT_LIST_HEAD(&ordered_sums); | 3018 | INIT_LIST_HEAD(&ordered_sums); |
| 2699 | 3019 | ||
| @@ -2722,29 +3042,23 @@ static noinline int copy_items(struct btrfs_trans_handle *trans, | |||
| 2722 | 3042 | ||
| 2723 | src_offset = btrfs_item_ptr_offset(src, start_slot + i); | 3043 | src_offset = btrfs_item_ptr_offset(src, start_slot + i); |
| 2724 | 3044 | ||
| 2725 | copy_extent_buffer(dst_path->nodes[0], src, dst_offset, | 3045 | if (ins_keys[i].type == BTRFS_INODE_ITEM_KEY) { |
| 2726 | src_offset, ins_sizes[i]); | ||
| 2727 | |||
| 2728 | if (inode_only == LOG_INODE_EXISTS && | ||
| 2729 | ins_keys[i].type == BTRFS_INODE_ITEM_KEY) { | ||
| 2730 | inode_item = btrfs_item_ptr(dst_path->nodes[0], | 3046 | inode_item = btrfs_item_ptr(dst_path->nodes[0], |
| 2731 | dst_path->slots[0], | 3047 | dst_path->slots[0], |
| 2732 | struct btrfs_inode_item); | 3048 | struct btrfs_inode_item); |
| 2733 | btrfs_set_inode_size(dst_path->nodes[0], inode_item, 0); | 3049 | fill_inode_item(trans, dst_path->nodes[0], inode_item, |
| 2734 | 3050 | inode, inode_only == LOG_INODE_EXISTS); | |
| 2735 | /* set the generation to zero so the recover code | 3051 | } else { |
| 2736 | * can tell the difference between an logging | 3052 | copy_extent_buffer(dst_path->nodes[0], src, dst_offset, |
| 2737 | * just to say 'this inode exists' and a logging | 3053 | src_offset, ins_sizes[i]); |
| 2738 | * to say 'update this inode with these values' | ||
| 2739 | */ | ||
| 2740 | btrfs_set_inode_generation(dst_path->nodes[0], | ||
| 2741 | inode_item, 0); | ||
| 2742 | } | 3054 | } |
| 3055 | |||
| 2743 | /* take a reference on file data extents so that truncates | 3056 | /* take a reference on file data extents so that truncates |
| 2744 | * or deletes of this inode don't have to relog the inode | 3057 | * or deletes of this inode don't have to relog the inode |
| 2745 | * again | 3058 | * again |
| 2746 | */ | 3059 | */ |
| 2747 | if (btrfs_key_type(ins_keys + i) == BTRFS_EXTENT_DATA_KEY) { | 3060 | if (btrfs_key_type(ins_keys + i) == BTRFS_EXTENT_DATA_KEY && |
| 3061 | !skip_csum) { | ||
| 2748 | int found_type; | 3062 | int found_type; |
| 2749 | extent = btrfs_item_ptr(src, start_slot + i, | 3063 | extent = btrfs_item_ptr(src, start_slot + i, |
| 2750 | struct btrfs_file_extent_item); | 3064 | struct btrfs_file_extent_item); |
| @@ -2753,8 +3067,7 @@ static noinline int copy_items(struct btrfs_trans_handle *trans, | |||
| 2753 | continue; | 3067 | continue; |
| 2754 | 3068 | ||
| 2755 | found_type = btrfs_file_extent_type(src, extent); | 3069 | found_type = btrfs_file_extent_type(src, extent); |
| 2756 | if (found_type == BTRFS_FILE_EXTENT_REG || | 3070 | if (found_type == BTRFS_FILE_EXTENT_REG) { |
| 2757 | found_type == BTRFS_FILE_EXTENT_PREALLOC) { | ||
| 2758 | u64 ds, dl, cs, cl; | 3071 | u64 ds, dl, cs, cl; |
| 2759 | ds = btrfs_file_extent_disk_bytenr(src, | 3072 | ds = btrfs_file_extent_disk_bytenr(src, |
| 2760 | extent); | 3073 | extent); |
| @@ -2803,6 +3116,239 @@ static noinline int copy_items(struct btrfs_trans_handle *trans, | |||
| 2803 | return ret; | 3116 | return ret; |
| 2804 | } | 3117 | } |
| 2805 | 3118 | ||
| 3119 | static int extent_cmp(void *priv, struct list_head *a, struct list_head *b) | ||
| 3120 | { | ||
| 3121 | struct extent_map *em1, *em2; | ||
| 3122 | |||
| 3123 | em1 = list_entry(a, struct extent_map, list); | ||
| 3124 | em2 = list_entry(b, struct extent_map, list); | ||
| 3125 | |||
| 3126 | if (em1->start < em2->start) | ||
| 3127 | return -1; | ||
| 3128 | else if (em1->start > em2->start) | ||
| 3129 | return 1; | ||
| 3130 | return 0; | ||
| 3131 | } | ||
| 3132 | |||
| 3133 | struct log_args { | ||
| 3134 | struct extent_buffer *src; | ||
| 3135 | u64 next_offset; | ||
| 3136 | int start_slot; | ||
| 3137 | int nr; | ||
| 3138 | }; | ||
| 3139 | |||
| 3140 | static int log_one_extent(struct btrfs_trans_handle *trans, | ||
| 3141 | struct inode *inode, struct btrfs_root *root, | ||
| 3142 | struct extent_map *em, struct btrfs_path *path, | ||
| 3143 | struct btrfs_path *dst_path, struct log_args *args) | ||
| 3144 | { | ||
| 3145 | struct btrfs_root *log = root->log_root; | ||
| 3146 | struct btrfs_file_extent_item *fi; | ||
| 3147 | struct btrfs_key key; | ||
| 3148 | u64 start = em->mod_start; | ||
| 3149 | u64 search_start = start; | ||
| 3150 | u64 len = em->mod_len; | ||
| 3151 | u64 num_bytes; | ||
| 3152 | int nritems; | ||
| 3153 | int ret; | ||
| 3154 | |||
| 3155 | if (BTRFS_I(inode)->logged_trans == trans->transid) { | ||
| 3156 | ret = __btrfs_drop_extents(trans, log, inode, dst_path, start, | ||
| 3157 | start + len, NULL, 0); | ||
| 3158 | if (ret) | ||
| 3159 | return ret; | ||
| 3160 | } | ||
| 3161 | |||
| 3162 | while (len) { | ||
| 3163 | if (args->nr) | ||
| 3164 | goto next_slot; | ||
| 3165 | again: | ||
| 3166 | key.objectid = btrfs_ino(inode); | ||
| 3167 | key.type = BTRFS_EXTENT_DATA_KEY; | ||
| 3168 | key.offset = search_start; | ||
| 3169 | |||
| 3170 | ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); | ||
| 3171 | if (ret < 0) | ||
| 3172 | return ret; | ||
| 3173 | |||
| 3174 | if (ret) { | ||
| 3175 | /* | ||
| 3176 | * A rare case were we can have an em for a section of a | ||
| 3177 | * larger extent so we need to make sure that this em | ||
| 3178 | * falls within the extent we've found. If not we just | ||
| 3179 | * bail and go back to ye-olde way of doing things but | ||
| 3180 | * it happens often enough in testing that we need to do | ||
| 3181 | * this dance to make sure. | ||
| 3182 | */ | ||
| 3183 | do { | ||
| 3184 | if (path->slots[0] == 0) { | ||
| 3185 | btrfs_release_path(path); | ||
| 3186 | if (search_start == 0) | ||
| 3187 | return -ENOENT; | ||
| 3188 | search_start--; | ||
| 3189 | goto again; | ||
| 3190 | } | ||
| 3191 | |||
| 3192 | path->slots[0]--; | ||
| 3193 | btrfs_item_key_to_cpu(path->nodes[0], &key, | ||
| 3194 | path->slots[0]); | ||
| 3195 | if (key.objectid != btrfs_ino(inode) || | ||
| 3196 | key.type != BTRFS_EXTENT_DATA_KEY) { | ||
| 3197 | btrfs_release_path(path); | ||
| 3198 | return -ENOENT; | ||
| 3199 | } | ||
| 3200 | } while (key.offset > start); | ||
| 3201 | |||
| 3202 | fi = btrfs_item_ptr(path->nodes[0], path->slots[0], | ||
| 3203 | struct btrfs_file_extent_item); | ||
| 3204 | num_bytes = btrfs_file_extent_num_bytes(path->nodes[0], | ||
| 3205 | fi); | ||
| 3206 | if (key.offset + num_bytes <= start) { | ||
| 3207 | btrfs_release_path(path); | ||
| 3208 | return -ENOENT; | ||
| 3209 | } | ||
| 3210 | } | ||
| 3211 | args->src = path->nodes[0]; | ||
| 3212 | next_slot: | ||
| 3213 | btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]); | ||
| 3214 | fi = btrfs_item_ptr(args->src, path->slots[0], | ||
| 3215 | struct btrfs_file_extent_item); | ||
| 3216 | if (args->nr && | ||
| 3217 | args->start_slot + args->nr == path->slots[0]) { | ||
| 3218 | args->nr++; | ||
| 3219 | } else if (args->nr) { | ||
| 3220 | ret = copy_items(trans, inode, dst_path, args->src, | ||
| 3221 | args->start_slot, args->nr, | ||
| 3222 | LOG_INODE_ALL); | ||
| 3223 | if (ret) | ||
| 3224 | return ret; | ||
| 3225 | args->nr = 1; | ||
| 3226 | args->start_slot = path->slots[0]; | ||
| 3227 | } else if (!args->nr) { | ||
| 3228 | args->nr = 1; | ||
| 3229 | args->start_slot = path->slots[0]; | ||
| 3230 | } | ||
| 3231 | nritems = btrfs_header_nritems(path->nodes[0]); | ||
| 3232 | path->slots[0]++; | ||
| 3233 | num_bytes = btrfs_file_extent_num_bytes(args->src, fi); | ||
| 3234 | if (len < num_bytes) { | ||
| 3235 | /* I _think_ this is ok, envision we write to a | ||
| 3236 | * preallocated space that is adjacent to a previously | ||
| 3237 | * written preallocated space that gets merged when we | ||
| 3238 | * mark this preallocated space written. If we do not | ||
| 3239 | * have the adjacent extent in cache then when we copy | ||
| 3240 | * this extent it could end up being larger than our EM | ||
| 3241 | * thinks it is, which is a-ok, so just set len to 0. | ||
| 3242 | */ | ||
| 3243 | len = 0; | ||
| 3244 | } else { | ||
| 3245 | len -= num_bytes; | ||
| 3246 | } | ||
| 3247 | start = key.offset + num_bytes; | ||
| 3248 | args->next_offset = start; | ||
| 3249 | search_start = start; | ||
| 3250 | |||
| 3251 | if (path->slots[0] < nritems) { | ||
| 3252 | if (len) | ||
| 3253 | goto next_slot; | ||
| 3254 | break; | ||
| 3255 | } | ||
| 3256 | |||
| 3257 | if (args->nr) { | ||
| 3258 | ret = copy_items(trans, inode, dst_path, args->src, | ||
| 3259 | args->start_slot, args->nr, | ||
| 3260 | LOG_INODE_ALL); | ||
| 3261 | if (ret) | ||
| 3262 | return ret; | ||
| 3263 | args->nr = 0; | ||
| 3264 | btrfs_release_path(path); | ||
| 3265 | } | ||
| 3266 | } | ||
| 3267 | |||
| 3268 | return 0; | ||
| 3269 | } | ||
| 3270 | |||
| 3271 | static int btrfs_log_changed_extents(struct btrfs_trans_handle *trans, | ||
| 3272 | struct btrfs_root *root, | ||
| 3273 | struct inode *inode, | ||
| 3274 | struct btrfs_path *path, | ||
| 3275 | struct btrfs_path *dst_path) | ||
| 3276 | { | ||
| 3277 | struct log_args args; | ||
| 3278 | struct extent_map *em, *n; | ||
| 3279 | struct list_head extents; | ||
| 3280 | struct extent_map_tree *tree = &BTRFS_I(inode)->extent_tree; | ||
| 3281 | u64 test_gen; | ||
| 3282 | int ret = 0; | ||
| 3283 | |||
| 3284 | INIT_LIST_HEAD(&extents); | ||
| 3285 | |||
| 3286 | memset(&args, 0, sizeof(args)); | ||
| 3287 | |||
| 3288 | write_lock(&tree->lock); | ||
| 3289 | test_gen = root->fs_info->last_trans_committed; | ||
| 3290 | |||
| 3291 | list_for_each_entry_safe(em, n, &tree->modified_extents, list) { | ||
| 3292 | list_del_init(&em->list); | ||
| 3293 | if (em->generation <= test_gen) | ||
| 3294 | continue; | ||
| 3295 | /* Need a ref to keep it from getting evicted from cache */ | ||
| 3296 | atomic_inc(&em->refs); | ||
| 3297 | set_bit(EXTENT_FLAG_LOGGING, &em->flags); | ||
| 3298 | list_add_tail(&em->list, &extents); | ||
| 3299 | } | ||
| 3300 | |||
| 3301 | list_sort(NULL, &extents, extent_cmp); | ||
| 3302 | |||
| 3303 | while (!list_empty(&extents)) { | ||
| 3304 | em = list_entry(extents.next, struct extent_map, list); | ||
| 3305 | |||
| 3306 | list_del_init(&em->list); | ||
| 3307 | clear_bit(EXTENT_FLAG_LOGGING, &em->flags); | ||
| 3308 | |||
| 3309 | /* | ||
| 3310 | * If we had an error we just need to delete everybody from our | ||
| 3311 | * private list. | ||
| 3312 | */ | ||
| 3313 | if (ret) { | ||
| 3314 | free_extent_map(em); | ||
| 3315 | continue; | ||
| 3316 | } | ||
| 3317 | |||
| 3318 | write_unlock(&tree->lock); | ||
| 3319 | |||
| 3320 | /* | ||
| 3321 | * If the previous EM and the last extent we left off on aren't | ||
| 3322 | * sequential then we need to copy the items we have and redo | ||
| 3323 | * our search | ||
| 3324 | */ | ||
| 3325 | if (args.nr && em->mod_start != args.next_offset) { | ||
| 3326 | ret = copy_items(trans, inode, dst_path, args.src, | ||
| 3327 | args.start_slot, args.nr, | ||
| 3328 | LOG_INODE_ALL); | ||
| 3329 | if (ret) { | ||
| 3330 | free_extent_map(em); | ||
| 3331 | write_lock(&tree->lock); | ||
| 3332 | continue; | ||
| 3333 | } | ||
| 3334 | btrfs_release_path(path); | ||
| 3335 | args.nr = 0; | ||
| 3336 | } | ||
| 3337 | |||
| 3338 | ret = log_one_extent(trans, inode, root, em, path, dst_path, &args); | ||
| 3339 | free_extent_map(em); | ||
| 3340 | write_lock(&tree->lock); | ||
| 3341 | } | ||
| 3342 | WARN_ON(!list_empty(&extents)); | ||
| 3343 | write_unlock(&tree->lock); | ||
| 3344 | |||
| 3345 | if (!ret && args.nr) | ||
| 3346 | ret = copy_items(trans, inode, dst_path, args.src, | ||
| 3347 | args.start_slot, args.nr, LOG_INODE_ALL); | ||
| 3348 | btrfs_release_path(path); | ||
| 3349 | return ret; | ||
| 3350 | } | ||
| 3351 | |||
| 2806 | /* log a single inode in the tree log. | 3352 | /* log a single inode in the tree log. |
| 2807 | * At least one parent directory for this inode must exist in the tree | 3353 | * At least one parent directory for this inode must exist in the tree |
| 2808 | * or be logged already. | 3354 | * or be logged already. |
| @@ -2832,6 +3378,7 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans, | |||
| 2832 | int nritems; | 3378 | int nritems; |
| 2833 | int ins_start_slot = 0; | 3379 | int ins_start_slot = 0; |
| 2834 | int ins_nr; | 3380 | int ins_nr; |
| 3381 | bool fast_search = false; | ||
| 2835 | u64 ino = btrfs_ino(inode); | 3382 | u64 ino = btrfs_ino(inode); |
| 2836 | 3383 | ||
| 2837 | log = root->log_root; | 3384 | log = root->log_root; |
| @@ -2851,21 +3398,23 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans, | |||
| 2851 | 3398 | ||
| 2852 | max_key.objectid = ino; | 3399 | max_key.objectid = ino; |
| 2853 | 3400 | ||
| 2854 | /* today the code can only do partial logging of directories */ | ||
| 2855 | if (!S_ISDIR(inode->i_mode)) | ||
| 2856 | inode_only = LOG_INODE_ALL; | ||
| 2857 | 3401 | ||
| 3402 | /* today the code can only do partial logging of directories */ | ||
| 2858 | if (inode_only == LOG_INODE_EXISTS || S_ISDIR(inode->i_mode)) | 3403 | if (inode_only == LOG_INODE_EXISTS || S_ISDIR(inode->i_mode)) |
| 2859 | max_key.type = BTRFS_XATTR_ITEM_KEY; | 3404 | max_key.type = BTRFS_XATTR_ITEM_KEY; |
| 2860 | else | 3405 | else |
| 2861 | max_key.type = (u8)-1; | 3406 | max_key.type = (u8)-1; |
| 2862 | max_key.offset = (u64)-1; | 3407 | max_key.offset = (u64)-1; |
| 2863 | 3408 | ||
| 2864 | ret = btrfs_commit_inode_delayed_items(trans, inode); | 3409 | /* Only run delayed items if we are a dir or a new file */ |
| 2865 | if (ret) { | 3410 | if (S_ISDIR(inode->i_mode) || |
| 2866 | btrfs_free_path(path); | 3411 | BTRFS_I(inode)->generation > root->fs_info->last_trans_committed) { |
| 2867 | btrfs_free_path(dst_path); | 3412 | ret = btrfs_commit_inode_delayed_items(trans, inode); |
| 2868 | return ret; | 3413 | if (ret) { |
| 3414 | btrfs_free_path(path); | ||
| 3415 | btrfs_free_path(dst_path); | ||
| 3416 | return ret; | ||
| 3417 | } | ||
| 2869 | } | 3418 | } |
| 2870 | 3419 | ||
| 2871 | mutex_lock(&BTRFS_I(inode)->log_mutex); | 3420 | mutex_lock(&BTRFS_I(inode)->log_mutex); |
| @@ -2881,7 +3430,16 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans, | |||
| 2881 | max_key_type = BTRFS_XATTR_ITEM_KEY; | 3430 | max_key_type = BTRFS_XATTR_ITEM_KEY; |
| 2882 | ret = drop_objectid_items(trans, log, path, ino, max_key_type); | 3431 | ret = drop_objectid_items(trans, log, path, ino, max_key_type); |
| 2883 | } else { | 3432 | } else { |
| 2884 | ret = btrfs_truncate_inode_items(trans, log, inode, 0, 0); | 3433 | if (test_and_clear_bit(BTRFS_INODE_NEEDS_FULL_SYNC, |
| 3434 | &BTRFS_I(inode)->runtime_flags)) { | ||
| 3435 | ret = btrfs_truncate_inode_items(trans, log, | ||
| 3436 | inode, 0, 0); | ||
| 3437 | } else { | ||
| 3438 | fast_search = true; | ||
| 3439 | max_key.type = BTRFS_XATTR_ITEM_KEY; | ||
| 3440 | ret = drop_objectid_items(trans, log, path, ino, | ||
| 3441 | BTRFS_XATTR_ITEM_KEY); | ||
| 3442 | } | ||
| 2885 | } | 3443 | } |
| 2886 | if (ret) { | 3444 | if (ret) { |
| 2887 | err = ret; | 3445 | err = ret; |
| @@ -2912,7 +3470,7 @@ again: | |||
| 2912 | goto next_slot; | 3470 | goto next_slot; |
| 2913 | } | 3471 | } |
| 2914 | 3472 | ||
| 2915 | ret = copy_items(trans, log, dst_path, src, ins_start_slot, | 3473 | ret = copy_items(trans, inode, dst_path, src, ins_start_slot, |
| 2916 | ins_nr, inode_only); | 3474 | ins_nr, inode_only); |
| 2917 | if (ret) { | 3475 | if (ret) { |
| 2918 | err = ret; | 3476 | err = ret; |
| @@ -2930,7 +3488,7 @@ next_slot: | |||
| 2930 | goto again; | 3488 | goto again; |
| 2931 | } | 3489 | } |
| 2932 | if (ins_nr) { | 3490 | if (ins_nr) { |
| 2933 | ret = copy_items(trans, log, dst_path, src, | 3491 | ret = copy_items(trans, inode, dst_path, src, |
| 2934 | ins_start_slot, | 3492 | ins_start_slot, |
| 2935 | ins_nr, inode_only); | 3493 | ins_nr, inode_only); |
| 2936 | if (ret) { | 3494 | if (ret) { |
| @@ -2951,8 +3509,7 @@ next_slot: | |||
| 2951 | break; | 3509 | break; |
| 2952 | } | 3510 | } |
| 2953 | if (ins_nr) { | 3511 | if (ins_nr) { |
| 2954 | ret = copy_items(trans, log, dst_path, src, | 3512 | ret = copy_items(trans, inode, dst_path, src, ins_start_slot, |
| 2955 | ins_start_slot, | ||
| 2956 | ins_nr, inode_only); | 3513 | ins_nr, inode_only); |
| 2957 | if (ret) { | 3514 | if (ret) { |
| 2958 | err = ret; | 3515 | err = ret; |
| @@ -2960,7 +3517,24 @@ next_slot: | |||
| 2960 | } | 3517 | } |
| 2961 | ins_nr = 0; | 3518 | ins_nr = 0; |
| 2962 | } | 3519 | } |
| 2963 | WARN_ON(ins_nr); | 3520 | |
| 3521 | if (fast_search) { | ||
| 3522 | btrfs_release_path(path); | ||
| 3523 | btrfs_release_path(dst_path); | ||
| 3524 | ret = btrfs_log_changed_extents(trans, root, inode, path, | ||
| 3525 | dst_path); | ||
| 3526 | if (ret) { | ||
| 3527 | err = ret; | ||
| 3528 | goto out_unlock; | ||
| 3529 | } | ||
| 3530 | } else { | ||
| 3531 | struct extent_map_tree *tree = &BTRFS_I(inode)->extent_tree; | ||
| 3532 | struct extent_map *em, *n; | ||
| 3533 | |||
| 3534 | list_for_each_entry_safe(em, n, &tree->modified_extents, list) | ||
| 3535 | list_del_init(&em->list); | ||
| 3536 | } | ||
| 3537 | |||
| 2964 | if (inode_only == LOG_INODE_ALL && S_ISDIR(inode->i_mode)) { | 3538 | if (inode_only == LOG_INODE_ALL && S_ISDIR(inode->i_mode)) { |
| 2965 | btrfs_release_path(path); | 3539 | btrfs_release_path(path); |
| 2966 | btrfs_release_path(dst_path); | 3540 | btrfs_release_path(dst_path); |
| @@ -2971,6 +3545,7 @@ next_slot: | |||
| 2971 | } | 3545 | } |
| 2972 | } | 3546 | } |
| 2973 | BTRFS_I(inode)->logged_trans = trans->transid; | 3547 | BTRFS_I(inode)->logged_trans = trans->transid; |
| 3548 | BTRFS_I(inode)->last_log_commit = BTRFS_I(inode)->last_sub_trans; | ||
| 2974 | out_unlock: | 3549 | out_unlock: |
| 2975 | mutex_unlock(&BTRFS_I(inode)->log_mutex); | 3550 | mutex_unlock(&BTRFS_I(inode)->log_mutex); |
| 2976 | 3551 | ||
| @@ -3138,7 +3713,7 @@ int btrfs_log_inode_parent(struct btrfs_trans_handle *trans, | |||
| 3138 | end_trans: | 3713 | end_trans: |
| 3139 | dput(old_parent); | 3714 | dput(old_parent); |
| 3140 | if (ret < 0) { | 3715 | if (ret < 0) { |
| 3141 | BUG_ON(ret != -ENOSPC); | 3716 | WARN_ON(ret != -ENOSPC); |
| 3142 | root->fs_info->last_trans_log_full_commit = trans->transid; | 3717 | root->fs_info->last_trans_log_full_commit = trans->transid; |
| 3143 | ret = 1; | 3718 | ret = 1; |
| 3144 | } | 3719 | } |
diff --git a/fs/btrfs/ulist.c b/fs/btrfs/ulist.c index ab942f46b3dd..99be4c138db6 100644 --- a/fs/btrfs/ulist.c +++ b/fs/btrfs/ulist.c | |||
| @@ -143,14 +143,13 @@ EXPORT_SYMBOL(ulist_free); | |||
| 143 | * In case of allocation failure -ENOMEM is returned and the ulist stays | 143 | * In case of allocation failure -ENOMEM is returned and the ulist stays |
| 144 | * unaltered. | 144 | * unaltered. |
| 145 | */ | 145 | */ |
| 146 | int ulist_add(struct ulist *ulist, u64 val, unsigned long aux, | 146 | int ulist_add(struct ulist *ulist, u64 val, u64 aux, gfp_t gfp_mask) |
| 147 | gfp_t gfp_mask) | ||
| 148 | { | 147 | { |
| 149 | return ulist_add_merge(ulist, val, aux, NULL, gfp_mask); | 148 | return ulist_add_merge(ulist, val, aux, NULL, gfp_mask); |
| 150 | } | 149 | } |
| 151 | 150 | ||
| 152 | int ulist_add_merge(struct ulist *ulist, u64 val, unsigned long aux, | 151 | int ulist_add_merge(struct ulist *ulist, u64 val, u64 aux, |
| 153 | unsigned long *old_aux, gfp_t gfp_mask) | 152 | u64 *old_aux, gfp_t gfp_mask) |
| 154 | { | 153 | { |
| 155 | int i; | 154 | int i; |
| 156 | 155 | ||
diff --git a/fs/btrfs/ulist.h b/fs/btrfs/ulist.h index 21bdc8ec8130..21a1963439c3 100644 --- a/fs/btrfs/ulist.h +++ b/fs/btrfs/ulist.h | |||
| @@ -33,7 +33,7 @@ struct ulist_iterator { | |||
| 33 | */ | 33 | */ |
| 34 | struct ulist_node { | 34 | struct ulist_node { |
| 35 | u64 val; /* value to store */ | 35 | u64 val; /* value to store */ |
| 36 | unsigned long aux; /* auxiliary value saved along with the val */ | 36 | u64 aux; /* auxiliary value saved along with the val */ |
| 37 | }; | 37 | }; |
| 38 | 38 | ||
| 39 | struct ulist { | 39 | struct ulist { |
| @@ -65,10 +65,9 @@ void ulist_fini(struct ulist *ulist); | |||
| 65 | void ulist_reinit(struct ulist *ulist); | 65 | void ulist_reinit(struct ulist *ulist); |
| 66 | struct ulist *ulist_alloc(gfp_t gfp_mask); | 66 | struct ulist *ulist_alloc(gfp_t gfp_mask); |
| 67 | void ulist_free(struct ulist *ulist); | 67 | void ulist_free(struct ulist *ulist); |
| 68 | int ulist_add(struct ulist *ulist, u64 val, unsigned long aux, | 68 | int ulist_add(struct ulist *ulist, u64 val, u64 aux, gfp_t gfp_mask); |
| 69 | gfp_t gfp_mask); | 69 | int ulist_add_merge(struct ulist *ulist, u64 val, u64 aux, |
| 70 | int ulist_add_merge(struct ulist *ulist, u64 val, unsigned long aux, | 70 | u64 *old_aux, gfp_t gfp_mask); |
| 71 | unsigned long *old_aux, gfp_t gfp_mask); | ||
| 72 | struct ulist_node *ulist_next(struct ulist *ulist, | 71 | struct ulist_node *ulist_next(struct ulist *ulist, |
| 73 | struct ulist_iterator *uiter); | 72 | struct ulist_iterator *uiter); |
| 74 | 73 | ||
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 88b969aeeb71..029b903a4ae3 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c | |||
| @@ -639,7 +639,7 @@ static int __btrfs_open_devices(struct btrfs_fs_devices *fs_devices, | |||
| 639 | 639 | ||
| 640 | bdev = blkdev_get_by_path(device->name->str, flags, holder); | 640 | bdev = blkdev_get_by_path(device->name->str, flags, holder); |
| 641 | if (IS_ERR(bdev)) { | 641 | if (IS_ERR(bdev)) { |
| 642 | printk(KERN_INFO "open %s failed\n", device->name->str); | 642 | printk(KERN_INFO "btrfs: open %s failed\n", device->name->str); |
| 643 | goto error; | 643 | goto error; |
| 644 | } | 644 | } |
| 645 | filemap_write_and_wait(bdev->bd_inode->i_mapping); | 645 | filemap_write_and_wait(bdev->bd_inode->i_mapping); |
| @@ -1475,6 +1475,9 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path) | |||
| 1475 | free_fs_devices(cur_devices); | 1475 | free_fs_devices(cur_devices); |
| 1476 | } | 1476 | } |
| 1477 | 1477 | ||
| 1478 | root->fs_info->num_tolerated_disk_barrier_failures = | ||
| 1479 | btrfs_calc_num_tolerated_disk_barrier_failures(root->fs_info); | ||
| 1480 | |||
| 1478 | /* | 1481 | /* |
| 1479 | * at this point, the device is zero sized. We want to | 1482 | * at this point, the device is zero sized. We want to |
| 1480 | * remove it from the devices list and zero out the old super | 1483 | * remove it from the devices list and zero out the old super |
| @@ -1775,15 +1778,21 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path) | |||
| 1775 | 1778 | ||
| 1776 | if (seeding_dev) { | 1779 | if (seeding_dev) { |
| 1777 | ret = init_first_rw_device(trans, root, device); | 1780 | ret = init_first_rw_device(trans, root, device); |
| 1778 | if (ret) | 1781 | if (ret) { |
| 1782 | btrfs_abort_transaction(trans, root, ret); | ||
| 1779 | goto error_trans; | 1783 | goto error_trans; |
| 1784 | } | ||
| 1780 | ret = btrfs_finish_sprout(trans, root); | 1785 | ret = btrfs_finish_sprout(trans, root); |
| 1781 | if (ret) | 1786 | if (ret) { |
| 1787 | btrfs_abort_transaction(trans, root, ret); | ||
| 1782 | goto error_trans; | 1788 | goto error_trans; |
| 1789 | } | ||
| 1783 | } else { | 1790 | } else { |
| 1784 | ret = btrfs_add_device(trans, root, device); | 1791 | ret = btrfs_add_device(trans, root, device); |
| 1785 | if (ret) | 1792 | if (ret) { |
| 1793 | btrfs_abort_transaction(trans, root, ret); | ||
| 1786 | goto error_trans; | 1794 | goto error_trans; |
| 1795 | } | ||
| 1787 | } | 1796 | } |
| 1788 | 1797 | ||
| 1789 | /* | 1798 | /* |
| @@ -1793,6 +1802,8 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path) | |||
| 1793 | btrfs_clear_space_info_full(root->fs_info); | 1802 | btrfs_clear_space_info_full(root->fs_info); |
| 1794 | 1803 | ||
| 1795 | unlock_chunks(root); | 1804 | unlock_chunks(root); |
| 1805 | root->fs_info->num_tolerated_disk_barrier_failures = | ||
| 1806 | btrfs_calc_num_tolerated_disk_barrier_failures(root->fs_info); | ||
| 1796 | ret = btrfs_commit_transaction(trans, root); | 1807 | ret = btrfs_commit_transaction(trans, root); |
| 1797 | 1808 | ||
| 1798 | if (seeding_dev) { | 1809 | if (seeding_dev) { |
| @@ -1814,7 +1825,6 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path) | |||
| 1814 | 1825 | ||
| 1815 | error_trans: | 1826 | error_trans: |
| 1816 | unlock_chunks(root); | 1827 | unlock_chunks(root); |
| 1817 | btrfs_abort_transaction(trans, root, ret); | ||
| 1818 | btrfs_end_transaction(trans, root); | 1828 | btrfs_end_transaction(trans, root); |
| 1819 | rcu_string_free(device->name); | 1829 | rcu_string_free(device->name); |
| 1820 | kfree(device); | 1830 | kfree(device); |
| @@ -2804,6 +2814,26 @@ int btrfs_balance(struct btrfs_balance_control *bctl, | |||
| 2804 | } | 2814 | } |
| 2805 | } | 2815 | } |
| 2806 | 2816 | ||
| 2817 | if (bctl->sys.flags & BTRFS_BALANCE_ARGS_CONVERT) { | ||
| 2818 | int num_tolerated_disk_barrier_failures; | ||
| 2819 | u64 target = bctl->sys.target; | ||
| 2820 | |||
| 2821 | num_tolerated_disk_barrier_failures = | ||
| 2822 | btrfs_calc_num_tolerated_disk_barrier_failures(fs_info); | ||
| 2823 | if (num_tolerated_disk_barrier_failures > 0 && | ||
| 2824 | (target & | ||
| 2825 | (BTRFS_BLOCK_GROUP_DUP | BTRFS_BLOCK_GROUP_RAID0 | | ||
| 2826 | BTRFS_AVAIL_ALLOC_BIT_SINGLE))) | ||
| 2827 | num_tolerated_disk_barrier_failures = 0; | ||
| 2828 | else if (num_tolerated_disk_barrier_failures > 1 && | ||
| 2829 | (target & | ||
| 2830 | (BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_RAID10))) | ||
| 2831 | num_tolerated_disk_barrier_failures = 1; | ||
| 2832 | |||
| 2833 | fs_info->num_tolerated_disk_barrier_failures = | ||
| 2834 | num_tolerated_disk_barrier_failures; | ||
| 2835 | } | ||
| 2836 | |||
| 2807 | ret = insert_balance_item(fs_info->tree_root, bctl); | 2837 | ret = insert_balance_item(fs_info->tree_root, bctl); |
| 2808 | if (ret && ret != -EEXIST) | 2838 | if (ret && ret != -EEXIST) |
| 2809 | goto out; | 2839 | goto out; |
| @@ -2836,6 +2866,11 @@ int btrfs_balance(struct btrfs_balance_control *bctl, | |||
| 2836 | __cancel_balance(fs_info); | 2866 | __cancel_balance(fs_info); |
| 2837 | } | 2867 | } |
| 2838 | 2868 | ||
| 2869 | if (bctl->sys.flags & BTRFS_BALANCE_ARGS_CONVERT) { | ||
| 2870 | fs_info->num_tolerated_disk_barrier_failures = | ||
| 2871 | btrfs_calc_num_tolerated_disk_barrier_failures(fs_info); | ||
| 2872 | } | ||
| 2873 | |||
| 2839 | wake_up(&fs_info->balance_wait_q); | 2874 | wake_up(&fs_info->balance_wait_q); |
| 2840 | 2875 | ||
| 2841 | return ret; | 2876 | return ret; |
| @@ -3608,12 +3643,16 @@ static noinline int init_first_rw_device(struct btrfs_trans_handle *trans, | |||
| 3608 | ret = __btrfs_alloc_chunk(trans, extent_root, &sys_map, | 3643 | ret = __btrfs_alloc_chunk(trans, extent_root, &sys_map, |
| 3609 | &sys_chunk_size, &sys_stripe_size, | 3644 | &sys_chunk_size, &sys_stripe_size, |
| 3610 | sys_chunk_offset, alloc_profile); | 3645 | sys_chunk_offset, alloc_profile); |
| 3611 | if (ret) | 3646 | if (ret) { |
| 3612 | goto abort; | 3647 | btrfs_abort_transaction(trans, root, ret); |
| 3648 | goto out; | ||
| 3649 | } | ||
| 3613 | 3650 | ||
| 3614 | ret = btrfs_add_device(trans, fs_info->chunk_root, device); | 3651 | ret = btrfs_add_device(trans, fs_info->chunk_root, device); |
| 3615 | if (ret) | 3652 | if (ret) { |
| 3616 | goto abort; | 3653 | btrfs_abort_transaction(trans, root, ret); |
| 3654 | goto out; | ||
| 3655 | } | ||
| 3617 | 3656 | ||
| 3618 | /* | 3657 | /* |
| 3619 | * Modifying chunk tree needs allocating new blocks from both | 3658 | * Modifying chunk tree needs allocating new blocks from both |
| @@ -3623,19 +3662,19 @@ static noinline int init_first_rw_device(struct btrfs_trans_handle *trans, | |||
| 3623 | */ | 3662 | */ |
| 3624 | ret = __finish_chunk_alloc(trans, extent_root, map, chunk_offset, | 3663 | ret = __finish_chunk_alloc(trans, extent_root, map, chunk_offset, |
| 3625 | chunk_size, stripe_size); | 3664 | chunk_size, stripe_size); |
| 3626 | if (ret) | 3665 | if (ret) { |
| 3627 | goto abort; | 3666 | btrfs_abort_transaction(trans, root, ret); |
| 3667 | goto out; | ||
| 3668 | } | ||
| 3628 | 3669 | ||
| 3629 | ret = __finish_chunk_alloc(trans, extent_root, sys_map, | 3670 | ret = __finish_chunk_alloc(trans, extent_root, sys_map, |
| 3630 | sys_chunk_offset, sys_chunk_size, | 3671 | sys_chunk_offset, sys_chunk_size, |
| 3631 | sys_stripe_size); | 3672 | sys_stripe_size); |
| 3632 | if (ret) | 3673 | if (ret) |
| 3633 | goto abort; | 3674 | btrfs_abort_transaction(trans, root, ret); |
| 3634 | 3675 | ||
| 3635 | return 0; | 3676 | out: |
| 3636 | 3677 | ||
| 3637 | abort: | ||
| 3638 | btrfs_abort_transaction(trans, root, ret); | ||
| 3639 | return ret; | 3678 | return ret; |
| 3640 | } | 3679 | } |
| 3641 | 3680 | ||
| @@ -3760,7 +3799,7 @@ static int __btrfs_map_block(struct btrfs_mapping_tree *map_tree, int rw, | |||
| 3760 | read_unlock(&em_tree->lock); | 3799 | read_unlock(&em_tree->lock); |
| 3761 | 3800 | ||
| 3762 | if (!em) { | 3801 | if (!em) { |
| 3763 | printk(KERN_CRIT "unable to find logical %llu len %llu\n", | 3802 | printk(KERN_CRIT "btrfs: unable to find logical %llu len %llu\n", |
| 3764 | (unsigned long long)logical, | 3803 | (unsigned long long)logical, |
| 3765 | (unsigned long long)*length); | 3804 | (unsigned long long)*length); |
| 3766 | BUG(); | 3805 | BUG(); |
| @@ -4217,7 +4256,7 @@ int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio, | |||
| 4217 | 4256 | ||
| 4218 | total_devs = bbio->num_stripes; | 4257 | total_devs = bbio->num_stripes; |
| 4219 | if (map_length < length) { | 4258 | if (map_length < length) { |
| 4220 | printk(KERN_CRIT "mapping failed logical %llu bio len %llu " | 4259 | printk(KERN_CRIT "btrfs: mapping failed logical %llu bio len %llu " |
| 4221 | "len %llu\n", (unsigned long long)logical, | 4260 | "len %llu\n", (unsigned long long)logical, |
| 4222 | (unsigned long long)length, | 4261 | (unsigned long long)length, |
| 4223 | (unsigned long long)map_length); | 4262 | (unsigned long long)map_length); |
diff --git a/fs/btrfs/zlib.c b/fs/btrfs/zlib.c index 92c20654cc55..9acb846c3e7f 100644 --- a/fs/btrfs/zlib.c +++ b/fs/btrfs/zlib.c | |||
| @@ -97,7 +97,7 @@ static int zlib_compress_pages(struct list_head *ws, | |||
| 97 | *total_in = 0; | 97 | *total_in = 0; |
| 98 | 98 | ||
| 99 | if (Z_OK != zlib_deflateInit(&workspace->def_strm, 3)) { | 99 | if (Z_OK != zlib_deflateInit(&workspace->def_strm, 3)) { |
| 100 | printk(KERN_WARNING "deflateInit failed\n"); | 100 | printk(KERN_WARNING "btrfs: deflateInit failed\n"); |
| 101 | ret = -1; | 101 | ret = -1; |
| 102 | goto out; | 102 | goto out; |
| 103 | } | 103 | } |
| @@ -125,7 +125,7 @@ static int zlib_compress_pages(struct list_head *ws, | |||
| 125 | while (workspace->def_strm.total_in < len) { | 125 | while (workspace->def_strm.total_in < len) { |
| 126 | ret = zlib_deflate(&workspace->def_strm, Z_SYNC_FLUSH); | 126 | ret = zlib_deflate(&workspace->def_strm, Z_SYNC_FLUSH); |
| 127 | if (ret != Z_OK) { | 127 | if (ret != Z_OK) { |
| 128 | printk(KERN_DEBUG "btrfs deflate in loop returned %d\n", | 128 | printk(KERN_DEBUG "btrfs: deflate in loop returned %d\n", |
| 129 | ret); | 129 | ret); |
| 130 | zlib_deflateEnd(&workspace->def_strm); | 130 | zlib_deflateEnd(&workspace->def_strm); |
| 131 | ret = -1; | 131 | ret = -1; |
| @@ -252,7 +252,7 @@ static int zlib_decompress_biovec(struct list_head *ws, struct page **pages_in, | |||
| 252 | } | 252 | } |
| 253 | 253 | ||
| 254 | if (Z_OK != zlib_inflateInit2(&workspace->inf_strm, wbits)) { | 254 | if (Z_OK != zlib_inflateInit2(&workspace->inf_strm, wbits)) { |
| 255 | printk(KERN_WARNING "inflateInit failed\n"); | 255 | printk(KERN_WARNING "btrfs: inflateInit failed\n"); |
| 256 | return -1; | 256 | return -1; |
| 257 | } | 257 | } |
| 258 | while (workspace->inf_strm.total_in < srclen) { | 258 | while (workspace->inf_strm.total_in < srclen) { |
| @@ -336,7 +336,7 @@ static int zlib_decompress(struct list_head *ws, unsigned char *data_in, | |||
| 336 | } | 336 | } |
| 337 | 337 | ||
| 338 | if (Z_OK != zlib_inflateInit2(&workspace->inf_strm, wbits)) { | 338 | if (Z_OK != zlib_inflateInit2(&workspace->inf_strm, wbits)) { |
| 339 | printk(KERN_WARNING "inflateInit failed\n"); | 339 | printk(KERN_WARNING "btrfs: inflateInit failed\n"); |
| 340 | return -1; | 340 | return -1; |
| 341 | } | 341 | } |
| 342 | 342 | ||
