diff options
Diffstat (limited to 'fs/btrfs')
50 files changed, 8749 insertions, 3198 deletions
diff --git a/fs/btrfs/Makefile b/fs/btrfs/Makefile index d7fcdba141a2..7df3e0f0ee51 100644 --- a/fs/btrfs/Makefile +++ b/fs/btrfs/Makefile | |||
| @@ -8,7 +8,7 @@ btrfs-y += super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \ | |||
| 8 | extent_io.o volumes.o async-thread.o ioctl.o locking.o orphan.o \ | 8 | extent_io.o volumes.o async-thread.o ioctl.o locking.o orphan.o \ |
| 9 | export.o tree-log.o free-space-cache.o zlib.o lzo.o \ | 9 | export.o tree-log.o free-space-cache.o zlib.o lzo.o \ |
| 10 | compression.o delayed-ref.o relocation.o delayed-inode.o scrub.o \ | 10 | compression.o delayed-ref.o relocation.o delayed-inode.o scrub.o \ |
| 11 | reada.o backref.o ulist.o qgroup.o send.o | 11 | reada.o backref.o ulist.o qgroup.o send.o dev-replace.o |
| 12 | 12 | ||
| 13 | btrfs-$(CONFIG_BTRFS_FS_POSIX_ACL) += acl.o | 13 | btrfs-$(CONFIG_BTRFS_FS_POSIX_ACL) += acl.o |
| 14 | btrfs-$(CONFIG_BTRFS_FS_CHECK_INTEGRITY) += check-integrity.o | 14 | btrfs-$(CONFIG_BTRFS_FS_CHECK_INTEGRITY) += check-integrity.o |
diff --git a/fs/btrfs/acl.c b/fs/btrfs/acl.c index 0c16e3dbfd56..e15d2b0d8d3b 100644 --- a/fs/btrfs/acl.c +++ b/fs/btrfs/acl.c | |||
| @@ -121,6 +121,8 @@ static int btrfs_set_acl(struct btrfs_trans_handle *trans, | |||
| 121 | ret = posix_acl_equiv_mode(acl, &inode->i_mode); | 121 | ret = posix_acl_equiv_mode(acl, &inode->i_mode); |
| 122 | if (ret < 0) | 122 | if (ret < 0) |
| 123 | return ret; | 123 | return ret; |
| 124 | if (ret == 0) | ||
| 125 | acl = NULL; | ||
| 124 | } | 126 | } |
| 125 | ret = 0; | 127 | ret = 0; |
| 126 | break; | 128 | break; |
diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c index ff6475f409d6..04edf69be875 100644 --- a/fs/btrfs/backref.c +++ b/fs/btrfs/backref.c | |||
| @@ -16,6 +16,7 @@ | |||
| 16 | * Boston, MA 021110-1307, USA. | 16 | * Boston, MA 021110-1307, USA. |
| 17 | */ | 17 | */ |
| 18 | 18 | ||
| 19 | #include <linux/vmalloc.h> | ||
| 19 | #include "ctree.h" | 20 | #include "ctree.h" |
| 20 | #include "disk-io.h" | 21 | #include "disk-io.h" |
| 21 | #include "backref.h" | 22 | #include "backref.h" |
| @@ -231,7 +232,7 @@ static int add_all_parents(struct btrfs_root *root, struct btrfs_path *path, | |||
| 231 | } | 232 | } |
| 232 | if (!ret) { | 233 | if (!ret) { |
| 233 | ret = ulist_add(parents, eb->start, | 234 | ret = ulist_add(parents, eb->start, |
| 234 | (unsigned long)eie, GFP_NOFS); | 235 | (uintptr_t)eie, GFP_NOFS); |
| 235 | if (ret < 0) | 236 | if (ret < 0) |
| 236 | break; | 237 | break; |
| 237 | if (!extent_item_pos) { | 238 | if (!extent_item_pos) { |
| @@ -282,9 +283,7 @@ static int __resolve_indirect_ref(struct btrfs_fs_info *fs_info, | |||
| 282 | goto out; | 283 | goto out; |
| 283 | } | 284 | } |
| 284 | 285 | ||
| 285 | rcu_read_lock(); | 286 | root_level = btrfs_old_root_level(root, time_seq); |
| 286 | root_level = btrfs_header_level(root->node); | ||
| 287 | rcu_read_unlock(); | ||
| 288 | 287 | ||
| 289 | if (root_level + 1 == level) | 288 | if (root_level + 1 == level) |
| 290 | goto out; | 289 | goto out; |
| @@ -363,8 +362,8 @@ static int __resolve_indirect_refs(struct btrfs_fs_info *fs_info, | |||
| 363 | ULIST_ITER_INIT(&uiter); | 362 | ULIST_ITER_INIT(&uiter); |
| 364 | node = ulist_next(parents, &uiter); | 363 | node = ulist_next(parents, &uiter); |
| 365 | ref->parent = node ? node->val : 0; | 364 | ref->parent = node ? node->val : 0; |
| 366 | ref->inode_list = | 365 | ref->inode_list = node ? |
| 367 | node ? (struct extent_inode_elem *)node->aux : 0; | 366 | (struct extent_inode_elem *)(uintptr_t)node->aux : 0; |
| 368 | 367 | ||
| 369 | /* additional parents require new refs being added here */ | 368 | /* additional parents require new refs being added here */ |
| 370 | while ((node = ulist_next(parents, &uiter))) { | 369 | while ((node = ulist_next(parents, &uiter))) { |
| @@ -375,8 +374,8 @@ static int __resolve_indirect_refs(struct btrfs_fs_info *fs_info, | |||
| 375 | } | 374 | } |
| 376 | memcpy(new_ref, ref, sizeof(*ref)); | 375 | memcpy(new_ref, ref, sizeof(*ref)); |
| 377 | new_ref->parent = node->val; | 376 | new_ref->parent = node->val; |
| 378 | new_ref->inode_list = | 377 | new_ref->inode_list = (struct extent_inode_elem *) |
| 379 | (struct extent_inode_elem *)node->aux; | 378 | (uintptr_t)node->aux; |
| 380 | list_add(&new_ref->list, &ref->list); | 379 | list_add(&new_ref->list, &ref->list); |
| 381 | } | 380 | } |
| 382 | ulist_reinit(parents); | 381 | ulist_reinit(parents); |
| @@ -462,6 +461,7 @@ static int __merge_refs(struct list_head *head, int mode) | |||
| 462 | pos2 = n2, n2 = pos2->next) { | 461 | pos2 = n2, n2 = pos2->next) { |
| 463 | struct __prelim_ref *ref2; | 462 | struct __prelim_ref *ref2; |
| 464 | struct __prelim_ref *xchg; | 463 | struct __prelim_ref *xchg; |
| 464 | struct extent_inode_elem *eie; | ||
| 465 | 465 | ||
| 466 | ref2 = list_entry(pos2, struct __prelim_ref, list); | 466 | ref2 = list_entry(pos2, struct __prelim_ref, list); |
| 467 | 467 | ||
| @@ -473,12 +473,20 @@ static int __merge_refs(struct list_head *head, int mode) | |||
| 473 | ref1 = ref2; | 473 | ref1 = ref2; |
| 474 | ref2 = xchg; | 474 | ref2 = xchg; |
| 475 | } | 475 | } |
| 476 | ref1->count += ref2->count; | ||
| 477 | } else { | 476 | } else { |
| 478 | if (ref1->parent != ref2->parent) | 477 | if (ref1->parent != ref2->parent) |
| 479 | continue; | 478 | continue; |
| 480 | ref1->count += ref2->count; | ||
| 481 | } | 479 | } |
| 480 | |||
| 481 | eie = ref1->inode_list; | ||
| 482 | while (eie && eie->next) | ||
| 483 | eie = eie->next; | ||
| 484 | if (eie) | ||
| 485 | eie->next = ref2->inode_list; | ||
| 486 | else | ||
| 487 | ref1->inode_list = ref2->inode_list; | ||
| 488 | ref1->count += ref2->count; | ||
| 489 | |||
| 482 | list_del(&ref2->list); | 490 | list_del(&ref2->list); |
| 483 | kfree(ref2); | 491 | kfree(ref2); |
| 484 | } | 492 | } |
| @@ -891,8 +899,7 @@ again: | |||
| 891 | while (!list_empty(&prefs)) { | 899 | while (!list_empty(&prefs)) { |
| 892 | ref = list_first_entry(&prefs, struct __prelim_ref, list); | 900 | ref = list_first_entry(&prefs, struct __prelim_ref, list); |
| 893 | list_del(&ref->list); | 901 | list_del(&ref->list); |
| 894 | if (ref->count < 0) | 902 | WARN_ON(ref->count < 0); |
| 895 | WARN_ON(1); | ||
| 896 | if (ref->count && ref->root_id && ref->parent == 0) { | 903 | if (ref->count && ref->root_id && ref->parent == 0) { |
| 897 | /* no parent == root of tree */ | 904 | /* no parent == root of tree */ |
| 898 | ret = ulist_add(roots, ref->root_id, 0, GFP_NOFS); | 905 | ret = ulist_add(roots, ref->root_id, 0, GFP_NOFS); |
| @@ -914,8 +921,8 @@ again: | |||
| 914 | free_extent_buffer(eb); | 921 | free_extent_buffer(eb); |
| 915 | } | 922 | } |
| 916 | ret = ulist_add_merge(refs, ref->parent, | 923 | ret = ulist_add_merge(refs, ref->parent, |
| 917 | (unsigned long)ref->inode_list, | 924 | (uintptr_t)ref->inode_list, |
| 918 | (unsigned long *)&eie, GFP_NOFS); | 925 | (u64 *)&eie, GFP_NOFS); |
| 919 | if (!ret && extent_item_pos) { | 926 | if (!ret && extent_item_pos) { |
| 920 | /* | 927 | /* |
| 921 | * we've recorded that parent, so we must extend | 928 | * we've recorded that parent, so we must extend |
| @@ -959,7 +966,7 @@ static void free_leaf_list(struct ulist *blocks) | |||
| 959 | while ((node = ulist_next(blocks, &uiter))) { | 966 | while ((node = ulist_next(blocks, &uiter))) { |
| 960 | if (!node->aux) | 967 | if (!node->aux) |
| 961 | continue; | 968 | continue; |
| 962 | eie = (struct extent_inode_elem *)node->aux; | 969 | eie = (struct extent_inode_elem *)(uintptr_t)node->aux; |
| 963 | for (; eie; eie = eie_next) { | 970 | for (; eie; eie = eie_next) { |
| 964 | eie_next = eie->next; | 971 | eie_next = eie->next; |
| 965 | kfree(eie); | 972 | kfree(eie); |
| @@ -1108,44 +1115,97 @@ static int inode_ref_info(u64 inum, u64 ioff, struct btrfs_root *fs_root, | |||
| 1108 | found_key); | 1115 | found_key); |
| 1109 | } | 1116 | } |
| 1110 | 1117 | ||
| 1111 | /* | 1118 | int btrfs_find_one_extref(struct btrfs_root *root, u64 inode_objectid, |
| 1112 | * this iterates to turn a btrfs_inode_ref into a full filesystem path. elements | 1119 | u64 start_off, struct btrfs_path *path, |
| 1113 | * of the path are separated by '/' and the path is guaranteed to be | 1120 | struct btrfs_inode_extref **ret_extref, |
| 1114 | * 0-terminated. the path is only given within the current file system. | 1121 | u64 *found_off) |
| 1115 | * Therefore, it never starts with a '/'. the caller is responsible to provide | 1122 | { |
| 1116 | * "size" bytes in "dest". the dest buffer will be filled backwards. finally, | 1123 | int ret, slot; |
| 1117 | * the start point of the resulting string is returned. this pointer is within | 1124 | struct btrfs_key key; |
| 1118 | * dest, normally. | 1125 | struct btrfs_key found_key; |
| 1119 | * in case the path buffer would overflow, the pointer is decremented further | 1126 | struct btrfs_inode_extref *extref; |
| 1120 | * as if output was written to the buffer, though no more output is actually | 1127 | struct extent_buffer *leaf; |
| 1121 | * generated. that way, the caller can determine how much space would be | 1128 | unsigned long ptr; |
| 1122 | * required for the path to fit into the buffer. in that case, the returned | 1129 | |
| 1123 | * value will be smaller than dest. callers must check this! | 1130 | key.objectid = inode_objectid; |
| 1124 | */ | 1131 | btrfs_set_key_type(&key, BTRFS_INODE_EXTREF_KEY); |
| 1125 | char *btrfs_iref_to_path(struct btrfs_root *fs_root, struct btrfs_path *path, | 1132 | key.offset = start_off; |
| 1126 | struct btrfs_inode_ref *iref, | 1133 | |
| 1127 | struct extent_buffer *eb_in, u64 parent, | 1134 | ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); |
| 1128 | char *dest, u32 size) | 1135 | if (ret < 0) |
| 1136 | return ret; | ||
| 1137 | |||
| 1138 | while (1) { | ||
| 1139 | leaf = path->nodes[0]; | ||
| 1140 | slot = path->slots[0]; | ||
| 1141 | if (slot >= btrfs_header_nritems(leaf)) { | ||
| 1142 | /* | ||
| 1143 | * If the item at offset is not found, | ||
| 1144 | * btrfs_search_slot will point us to the slot | ||
| 1145 | * where it should be inserted. In our case | ||
| 1146 | * that will be the slot directly before the | ||
| 1147 | * next INODE_REF_KEY_V2 item. In the case | ||
| 1148 | * that we're pointing to the last slot in a | ||
| 1149 | * leaf, we must move one leaf over. | ||
| 1150 | */ | ||
| 1151 | ret = btrfs_next_leaf(root, path); | ||
| 1152 | if (ret) { | ||
| 1153 | if (ret >= 1) | ||
| 1154 | ret = -ENOENT; | ||
| 1155 | break; | ||
| 1156 | } | ||
| 1157 | continue; | ||
| 1158 | } | ||
| 1159 | |||
| 1160 | btrfs_item_key_to_cpu(leaf, &found_key, slot); | ||
| 1161 | |||
| 1162 | /* | ||
| 1163 | * Check that we're still looking at an extended ref key for | ||
| 1164 | * this particular objectid. If we have different | ||
| 1165 | * objectid or type then there are no more to be found | ||
| 1166 | * in the tree and we can exit. | ||
| 1167 | */ | ||
| 1168 | ret = -ENOENT; | ||
| 1169 | if (found_key.objectid != inode_objectid) | ||
| 1170 | break; | ||
| 1171 | if (btrfs_key_type(&found_key) != BTRFS_INODE_EXTREF_KEY) | ||
| 1172 | break; | ||
| 1173 | |||
| 1174 | ret = 0; | ||
| 1175 | ptr = btrfs_item_ptr_offset(leaf, path->slots[0]); | ||
| 1176 | extref = (struct btrfs_inode_extref *)ptr; | ||
| 1177 | *ret_extref = extref; | ||
| 1178 | if (found_off) | ||
| 1179 | *found_off = found_key.offset; | ||
| 1180 | break; | ||
| 1181 | } | ||
| 1182 | |||
| 1183 | return ret; | ||
| 1184 | } | ||
| 1185 | |||
| 1186 | char *btrfs_ref_to_path(struct btrfs_root *fs_root, struct btrfs_path *path, | ||
| 1187 | u32 name_len, unsigned long name_off, | ||
| 1188 | struct extent_buffer *eb_in, u64 parent, | ||
| 1189 | char *dest, u32 size) | ||
| 1129 | { | 1190 | { |
| 1130 | u32 len; | ||
| 1131 | int slot; | 1191 | int slot; |
| 1132 | u64 next_inum; | 1192 | u64 next_inum; |
| 1133 | int ret; | 1193 | int ret; |
| 1134 | s64 bytes_left = size - 1; | 1194 | s64 bytes_left = ((s64)size) - 1; |
| 1135 | struct extent_buffer *eb = eb_in; | 1195 | struct extent_buffer *eb = eb_in; |
| 1136 | struct btrfs_key found_key; | 1196 | struct btrfs_key found_key; |
| 1137 | int leave_spinning = path->leave_spinning; | 1197 | int leave_spinning = path->leave_spinning; |
| 1198 | struct btrfs_inode_ref *iref; | ||
| 1138 | 1199 | ||
| 1139 | if (bytes_left >= 0) | 1200 | if (bytes_left >= 0) |
| 1140 | dest[bytes_left] = '\0'; | 1201 | dest[bytes_left] = '\0'; |
| 1141 | 1202 | ||
| 1142 | path->leave_spinning = 1; | 1203 | path->leave_spinning = 1; |
| 1143 | while (1) { | 1204 | while (1) { |
| 1144 | len = btrfs_inode_ref_name_len(eb, iref); | 1205 | bytes_left -= name_len; |
| 1145 | bytes_left -= len; | ||
| 1146 | if (bytes_left >= 0) | 1206 | if (bytes_left >= 0) |
| 1147 | read_extent_buffer(eb, dest + bytes_left, | 1207 | read_extent_buffer(eb, dest + bytes_left, |
| 1148 | (unsigned long)(iref + 1), len); | 1208 | name_off, name_len); |
| 1149 | if (eb != eb_in) { | 1209 | if (eb != eb_in) { |
| 1150 | btrfs_tree_read_unlock_blocking(eb); | 1210 | btrfs_tree_read_unlock_blocking(eb); |
| 1151 | free_extent_buffer(eb); | 1211 | free_extent_buffer(eb); |
| @@ -1155,6 +1215,7 @@ char *btrfs_iref_to_path(struct btrfs_root *fs_root, struct btrfs_path *path, | |||
| 1155 | ret = -ENOENT; | 1215 | ret = -ENOENT; |
| 1156 | if (ret) | 1216 | if (ret) |
| 1157 | break; | 1217 | break; |
| 1218 | |||
| 1158 | next_inum = found_key.offset; | 1219 | next_inum = found_key.offset; |
| 1159 | 1220 | ||
| 1160 | /* regular exit ahead */ | 1221 | /* regular exit ahead */ |
| @@ -1170,8 +1231,11 @@ char *btrfs_iref_to_path(struct btrfs_root *fs_root, struct btrfs_path *path, | |||
| 1170 | btrfs_set_lock_blocking_rw(eb, BTRFS_READ_LOCK); | 1231 | btrfs_set_lock_blocking_rw(eb, BTRFS_READ_LOCK); |
| 1171 | } | 1232 | } |
| 1172 | btrfs_release_path(path); | 1233 | btrfs_release_path(path); |
| 1173 | |||
| 1174 | iref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref); | 1234 | iref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref); |
| 1235 | |||
| 1236 | name_len = btrfs_inode_ref_name_len(eb, iref); | ||
| 1237 | name_off = (unsigned long)(iref + 1); | ||
| 1238 | |||
| 1175 | parent = next_inum; | 1239 | parent = next_inum; |
| 1176 | --bytes_left; | 1240 | --bytes_left; |
| 1177 | if (bytes_left >= 0) | 1241 | if (bytes_left >= 0) |
| @@ -1188,12 +1252,39 @@ char *btrfs_iref_to_path(struct btrfs_root *fs_root, struct btrfs_path *path, | |||
| 1188 | } | 1252 | } |
| 1189 | 1253 | ||
| 1190 | /* | 1254 | /* |
| 1255 | * this iterates to turn a btrfs_inode_ref into a full filesystem path. elements | ||
| 1256 | * of the path are separated by '/' and the path is guaranteed to be | ||
| 1257 | * 0-terminated. the path is only given within the current file system. | ||
| 1258 | * Therefore, it never starts with a '/'. the caller is responsible to provide | ||
| 1259 | * "size" bytes in "dest". the dest buffer will be filled backwards. finally, | ||
| 1260 | * the start point of the resulting string is returned. this pointer is within | ||
| 1261 | * dest, normally. | ||
| 1262 | * in case the path buffer would overflow, the pointer is decremented further | ||
| 1263 | * as if output was written to the buffer, though no more output is actually | ||
| 1264 | * generated. that way, the caller can determine how much space would be | ||
| 1265 | * required for the path to fit into the buffer. in that case, the returned | ||
| 1266 | * value will be smaller than dest. callers must check this! | ||
| 1267 | */ | ||
| 1268 | char *btrfs_iref_to_path(struct btrfs_root *fs_root, | ||
| 1269 | struct btrfs_path *path, | ||
| 1270 | struct btrfs_inode_ref *iref, | ||
| 1271 | struct extent_buffer *eb_in, u64 parent, | ||
| 1272 | char *dest, u32 size) | ||
| 1273 | { | ||
| 1274 | return btrfs_ref_to_path(fs_root, path, | ||
| 1275 | btrfs_inode_ref_name_len(eb_in, iref), | ||
| 1276 | (unsigned long)(iref + 1), | ||
| 1277 | eb_in, parent, dest, size); | ||
| 1278 | } | ||
| 1279 | |||
| 1280 | /* | ||
| 1191 | * this makes the path point to (logical EXTENT_ITEM *) | 1281 | * this makes the path point to (logical EXTENT_ITEM *) |
| 1192 | * returns BTRFS_EXTENT_FLAG_DATA for data, BTRFS_EXTENT_FLAG_TREE_BLOCK for | 1282 | * returns BTRFS_EXTENT_FLAG_DATA for data, BTRFS_EXTENT_FLAG_TREE_BLOCK for |
| 1193 | * tree blocks and <0 on error. | 1283 | * tree blocks and <0 on error. |
| 1194 | */ | 1284 | */ |
| 1195 | int extent_from_logical(struct btrfs_fs_info *fs_info, u64 logical, | 1285 | int extent_from_logical(struct btrfs_fs_info *fs_info, u64 logical, |
| 1196 | struct btrfs_path *path, struct btrfs_key *found_key) | 1286 | struct btrfs_path *path, struct btrfs_key *found_key, |
| 1287 | u64 *flags_ret) | ||
| 1197 | { | 1288 | { |
| 1198 | int ret; | 1289 | int ret; |
| 1199 | u64 flags; | 1290 | u64 flags; |
| @@ -1237,10 +1328,17 @@ int extent_from_logical(struct btrfs_fs_info *fs_info, u64 logical, | |||
| 1237 | (unsigned long long)found_key->objectid, | 1328 | (unsigned long long)found_key->objectid, |
| 1238 | (unsigned long long)found_key->offset, | 1329 | (unsigned long long)found_key->offset, |
| 1239 | (unsigned long long)flags, item_size); | 1330 | (unsigned long long)flags, item_size); |
| 1240 | if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) | 1331 | |
| 1241 | return BTRFS_EXTENT_FLAG_TREE_BLOCK; | 1332 | WARN_ON(!flags_ret); |
| 1242 | if (flags & BTRFS_EXTENT_FLAG_DATA) | 1333 | if (flags_ret) { |
| 1243 | return BTRFS_EXTENT_FLAG_DATA; | 1334 | if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) |
| 1335 | *flags_ret = BTRFS_EXTENT_FLAG_TREE_BLOCK; | ||
| 1336 | else if (flags & BTRFS_EXTENT_FLAG_DATA) | ||
| 1337 | *flags_ret = BTRFS_EXTENT_FLAG_DATA; | ||
| 1338 | else | ||
| 1339 | BUG_ON(1); | ||
| 1340 | return 0; | ||
| 1341 | } | ||
| 1244 | 1342 | ||
| 1245 | return -EIO; | 1343 | return -EIO; |
| 1246 | } | 1344 | } |
| @@ -1404,12 +1502,13 @@ int iterate_extent_inodes(struct btrfs_fs_info *fs_info, | |||
| 1404 | ULIST_ITER_INIT(&root_uiter); | 1502 | ULIST_ITER_INIT(&root_uiter); |
| 1405 | while (!ret && (root_node = ulist_next(roots, &root_uiter))) { | 1503 | while (!ret && (root_node = ulist_next(roots, &root_uiter))) { |
| 1406 | pr_debug("root %llu references leaf %llu, data list " | 1504 | pr_debug("root %llu references leaf %llu, data list " |
| 1407 | "%#lx\n", root_node->val, ref_node->val, | 1505 | "%#llx\n", root_node->val, ref_node->val, |
| 1408 | ref_node->aux); | 1506 | (long long)ref_node->aux); |
| 1409 | ret = iterate_leaf_refs( | 1507 | ret = iterate_leaf_refs((struct extent_inode_elem *) |
| 1410 | (struct extent_inode_elem *)ref_node->aux, | 1508 | (uintptr_t)ref_node->aux, |
| 1411 | root_node->val, extent_item_objectid, | 1509 | root_node->val, |
| 1412 | iterate, ctx); | 1510 | extent_item_objectid, |
| 1511 | iterate, ctx); | ||
| 1413 | } | 1512 | } |
| 1414 | ulist_free(roots); | 1513 | ulist_free(roots); |
| 1415 | roots = NULL; | 1514 | roots = NULL; |
| @@ -1432,15 +1531,15 @@ int iterate_inodes_from_logical(u64 logical, struct btrfs_fs_info *fs_info, | |||
| 1432 | { | 1531 | { |
| 1433 | int ret; | 1532 | int ret; |
| 1434 | u64 extent_item_pos; | 1533 | u64 extent_item_pos; |
| 1534 | u64 flags = 0; | ||
| 1435 | struct btrfs_key found_key; | 1535 | struct btrfs_key found_key; |
| 1436 | int search_commit_root = path->search_commit_root; | 1536 | int search_commit_root = path->search_commit_root; |
| 1437 | 1537 | ||
| 1438 | ret = extent_from_logical(fs_info, logical, path, | 1538 | ret = extent_from_logical(fs_info, logical, path, &found_key, &flags); |
| 1439 | &found_key); | ||
| 1440 | btrfs_release_path(path); | 1539 | btrfs_release_path(path); |
| 1441 | if (ret < 0) | 1540 | if (ret < 0) |
| 1442 | return ret; | 1541 | return ret; |
| 1443 | if (ret & BTRFS_EXTENT_FLAG_TREE_BLOCK) | 1542 | if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) |
| 1444 | return -EINVAL; | 1543 | return -EINVAL; |
| 1445 | 1544 | ||
| 1446 | extent_item_pos = logical - found_key.objectid; | 1545 | extent_item_pos = logical - found_key.objectid; |
| @@ -1451,9 +1550,12 @@ int iterate_inodes_from_logical(u64 logical, struct btrfs_fs_info *fs_info, | |||
| 1451 | return ret; | 1550 | return ret; |
| 1452 | } | 1551 | } |
| 1453 | 1552 | ||
| 1454 | static int iterate_irefs(u64 inum, struct btrfs_root *fs_root, | 1553 | typedef int (iterate_irefs_t)(u64 parent, u32 name_len, unsigned long name_off, |
| 1455 | struct btrfs_path *path, | 1554 | struct extent_buffer *eb, void *ctx); |
| 1456 | iterate_irefs_t *iterate, void *ctx) | 1555 | |
| 1556 | static int iterate_inode_refs(u64 inum, struct btrfs_root *fs_root, | ||
| 1557 | struct btrfs_path *path, | ||
| 1558 | iterate_irefs_t *iterate, void *ctx) | ||
| 1457 | { | 1559 | { |
| 1458 | int ret = 0; | 1560 | int ret = 0; |
| 1459 | int slot; | 1561 | int slot; |
| @@ -1470,7 +1572,7 @@ static int iterate_irefs(u64 inum, struct btrfs_root *fs_root, | |||
| 1470 | while (!ret) { | 1572 | while (!ret) { |
| 1471 | path->leave_spinning = 1; | 1573 | path->leave_spinning = 1; |
| 1472 | ret = inode_ref_info(inum, parent ? parent+1 : 0, fs_root, path, | 1574 | ret = inode_ref_info(inum, parent ? parent+1 : 0, fs_root, path, |
| 1473 | &found_key); | 1575 | &found_key); |
| 1474 | if (ret < 0) | 1576 | if (ret < 0) |
| 1475 | break; | 1577 | break; |
| 1476 | if (ret) { | 1578 | if (ret) { |
| @@ -1498,7 +1600,8 @@ static int iterate_irefs(u64 inum, struct btrfs_root *fs_root, | |||
| 1498 | "tree %llu\n", cur, | 1600 | "tree %llu\n", cur, |
| 1499 | (unsigned long long)found_key.objectid, | 1601 | (unsigned long long)found_key.objectid, |
| 1500 | (unsigned long long)fs_root->objectid); | 1602 | (unsigned long long)fs_root->objectid); |
| 1501 | ret = iterate(parent, iref, eb, ctx); | 1603 | ret = iterate(parent, name_len, |
| 1604 | (unsigned long)(iref + 1), eb, ctx); | ||
| 1502 | if (ret) | 1605 | if (ret) |
| 1503 | break; | 1606 | break; |
| 1504 | len = sizeof(*iref) + name_len; | 1607 | len = sizeof(*iref) + name_len; |
| @@ -1513,12 +1616,98 @@ static int iterate_irefs(u64 inum, struct btrfs_root *fs_root, | |||
| 1513 | return ret; | 1616 | return ret; |
| 1514 | } | 1617 | } |
| 1515 | 1618 | ||
| 1619 | static int iterate_inode_extrefs(u64 inum, struct btrfs_root *fs_root, | ||
| 1620 | struct btrfs_path *path, | ||
| 1621 | iterate_irefs_t *iterate, void *ctx) | ||
| 1622 | { | ||
| 1623 | int ret; | ||
| 1624 | int slot; | ||
| 1625 | u64 offset = 0; | ||
| 1626 | u64 parent; | ||
| 1627 | int found = 0; | ||
| 1628 | struct extent_buffer *eb; | ||
| 1629 | struct btrfs_inode_extref *extref; | ||
| 1630 | struct extent_buffer *leaf; | ||
| 1631 | u32 item_size; | ||
| 1632 | u32 cur_offset; | ||
| 1633 | unsigned long ptr; | ||
| 1634 | |||
| 1635 | while (1) { | ||
| 1636 | ret = btrfs_find_one_extref(fs_root, inum, offset, path, &extref, | ||
| 1637 | &offset); | ||
| 1638 | if (ret < 0) | ||
| 1639 | break; | ||
| 1640 | if (ret) { | ||
| 1641 | ret = found ? 0 : -ENOENT; | ||
| 1642 | break; | ||
| 1643 | } | ||
| 1644 | ++found; | ||
| 1645 | |||
| 1646 | slot = path->slots[0]; | ||
| 1647 | eb = path->nodes[0]; | ||
| 1648 | /* make sure we can use eb after releasing the path */ | ||
| 1649 | atomic_inc(&eb->refs); | ||
| 1650 | |||
| 1651 | btrfs_tree_read_lock(eb); | ||
| 1652 | btrfs_set_lock_blocking_rw(eb, BTRFS_READ_LOCK); | ||
| 1653 | btrfs_release_path(path); | ||
| 1654 | |||
| 1655 | leaf = path->nodes[0]; | ||
| 1656 | item_size = btrfs_item_size_nr(leaf, path->slots[0]); | ||
| 1657 | ptr = btrfs_item_ptr_offset(leaf, path->slots[0]); | ||
| 1658 | cur_offset = 0; | ||
| 1659 | |||
| 1660 | while (cur_offset < item_size) { | ||
| 1661 | u32 name_len; | ||
| 1662 | |||
| 1663 | extref = (struct btrfs_inode_extref *)(ptr + cur_offset); | ||
| 1664 | parent = btrfs_inode_extref_parent(eb, extref); | ||
| 1665 | name_len = btrfs_inode_extref_name_len(eb, extref); | ||
| 1666 | ret = iterate(parent, name_len, | ||
| 1667 | (unsigned long)&extref->name, eb, ctx); | ||
| 1668 | if (ret) | ||
| 1669 | break; | ||
| 1670 | |||
| 1671 | cur_offset += btrfs_inode_extref_name_len(leaf, extref); | ||
| 1672 | cur_offset += sizeof(*extref); | ||
| 1673 | } | ||
| 1674 | btrfs_tree_read_unlock_blocking(eb); | ||
| 1675 | free_extent_buffer(eb); | ||
| 1676 | |||
| 1677 | offset++; | ||
| 1678 | } | ||
| 1679 | |||
| 1680 | btrfs_release_path(path); | ||
| 1681 | |||
| 1682 | return ret; | ||
| 1683 | } | ||
| 1684 | |||
| 1685 | static int iterate_irefs(u64 inum, struct btrfs_root *fs_root, | ||
| 1686 | struct btrfs_path *path, iterate_irefs_t *iterate, | ||
| 1687 | void *ctx) | ||
| 1688 | { | ||
| 1689 | int ret; | ||
| 1690 | int found_refs = 0; | ||
| 1691 | |||
| 1692 | ret = iterate_inode_refs(inum, fs_root, path, iterate, ctx); | ||
| 1693 | if (!ret) | ||
| 1694 | ++found_refs; | ||
| 1695 | else if (ret != -ENOENT) | ||
| 1696 | return ret; | ||
| 1697 | |||
| 1698 | ret = iterate_inode_extrefs(inum, fs_root, path, iterate, ctx); | ||
| 1699 | if (ret == -ENOENT && found_refs) | ||
| 1700 | return 0; | ||
| 1701 | |||
| 1702 | return ret; | ||
| 1703 | } | ||
| 1704 | |||
| 1516 | /* | 1705 | /* |
| 1517 | * returns 0 if the path could be dumped (probably truncated) | 1706 | * returns 0 if the path could be dumped (probably truncated) |
| 1518 | * returns <0 in case of an error | 1707 | * returns <0 in case of an error |
| 1519 | */ | 1708 | */ |
| 1520 | static int inode_to_path(u64 inum, struct btrfs_inode_ref *iref, | 1709 | static int inode_to_path(u64 inum, u32 name_len, unsigned long name_off, |
| 1521 | struct extent_buffer *eb, void *ctx) | 1710 | struct extent_buffer *eb, void *ctx) |
| 1522 | { | 1711 | { |
| 1523 | struct inode_fs_paths *ipath = ctx; | 1712 | struct inode_fs_paths *ipath = ctx; |
| 1524 | char *fspath; | 1713 | char *fspath; |
| @@ -1531,20 +1720,16 @@ static int inode_to_path(u64 inum, struct btrfs_inode_ref *iref, | |||
| 1531 | ipath->fspath->bytes_left - s_ptr : 0; | 1720 | ipath->fspath->bytes_left - s_ptr : 0; |
| 1532 | 1721 | ||
| 1533 | fspath_min = (char *)ipath->fspath->val + (i + 1) * s_ptr; | 1722 | fspath_min = (char *)ipath->fspath->val + (i + 1) * s_ptr; |
| 1534 | fspath = btrfs_iref_to_path(ipath->fs_root, ipath->btrfs_path, iref, eb, | 1723 | fspath = btrfs_ref_to_path(ipath->fs_root, ipath->btrfs_path, name_len, |
| 1535 | inum, fspath_min, bytes_left); | 1724 | name_off, eb, inum, fspath_min, bytes_left); |
| 1536 | if (IS_ERR(fspath)) | 1725 | if (IS_ERR(fspath)) |
| 1537 | return PTR_ERR(fspath); | 1726 | return PTR_ERR(fspath); |
| 1538 | 1727 | ||
| 1539 | if (fspath > fspath_min) { | 1728 | if (fspath > fspath_min) { |
| 1540 | pr_debug("path resolved: %s\n", fspath); | ||
| 1541 | ipath->fspath->val[i] = (u64)(unsigned long)fspath; | 1729 | ipath->fspath->val[i] = (u64)(unsigned long)fspath; |
| 1542 | ++ipath->fspath->elem_cnt; | 1730 | ++ipath->fspath->elem_cnt; |
| 1543 | ipath->fspath->bytes_left = fspath - fspath_min; | 1731 | ipath->fspath->bytes_left = fspath - fspath_min; |
| 1544 | } else { | 1732 | } else { |
| 1545 | pr_debug("missed path, not enough space. missing bytes: %lu, " | ||
| 1546 | "constructed so far: %s\n", | ||
| 1547 | (unsigned long)(fspath_min - fspath), fspath_min); | ||
| 1548 | ++ipath->fspath->elem_missed; | 1733 | ++ipath->fspath->elem_missed; |
| 1549 | ipath->fspath->bytes_missing += fspath_min - fspath; | 1734 | ipath->fspath->bytes_missing += fspath_min - fspath; |
| 1550 | ipath->fspath->bytes_left = 0; | 1735 | ipath->fspath->bytes_left = 0; |
| @@ -1566,7 +1751,7 @@ static int inode_to_path(u64 inum, struct btrfs_inode_ref *iref, | |||
| 1566 | int paths_from_inode(u64 inum, struct inode_fs_paths *ipath) | 1751 | int paths_from_inode(u64 inum, struct inode_fs_paths *ipath) |
| 1567 | { | 1752 | { |
| 1568 | return iterate_irefs(inum, ipath->fs_root, ipath->btrfs_path, | 1753 | return iterate_irefs(inum, ipath->fs_root, ipath->btrfs_path, |
| 1569 | inode_to_path, ipath); | 1754 | inode_to_path, ipath); |
| 1570 | } | 1755 | } |
| 1571 | 1756 | ||
| 1572 | struct btrfs_data_container *init_data_container(u32 total_bytes) | 1757 | struct btrfs_data_container *init_data_container(u32 total_bytes) |
| @@ -1575,7 +1760,7 @@ struct btrfs_data_container *init_data_container(u32 total_bytes) | |||
| 1575 | size_t alloc_bytes; | 1760 | size_t alloc_bytes; |
| 1576 | 1761 | ||
| 1577 | alloc_bytes = max_t(size_t, total_bytes, sizeof(*data)); | 1762 | alloc_bytes = max_t(size_t, total_bytes, sizeof(*data)); |
| 1578 | data = kmalloc(alloc_bytes, GFP_NOFS); | 1763 | data = vmalloc(alloc_bytes); |
| 1579 | if (!data) | 1764 | if (!data) |
| 1580 | return ERR_PTR(-ENOMEM); | 1765 | return ERR_PTR(-ENOMEM); |
| 1581 | 1766 | ||
| @@ -1626,6 +1811,6 @@ void free_ipath(struct inode_fs_paths *ipath) | |||
| 1626 | { | 1811 | { |
| 1627 | if (!ipath) | 1812 | if (!ipath) |
| 1628 | return; | 1813 | return; |
| 1629 | kfree(ipath->fspath); | 1814 | vfree(ipath->fspath); |
| 1630 | kfree(ipath); | 1815 | kfree(ipath); |
| 1631 | } | 1816 | } |
diff --git a/fs/btrfs/backref.h b/fs/btrfs/backref.h index 032f4dc7eab8..d61feca79455 100644 --- a/fs/btrfs/backref.h +++ b/fs/btrfs/backref.h | |||
| @@ -33,14 +33,13 @@ struct inode_fs_paths { | |||
| 33 | 33 | ||
| 34 | typedef int (iterate_extent_inodes_t)(u64 inum, u64 offset, u64 root, | 34 | typedef int (iterate_extent_inodes_t)(u64 inum, u64 offset, u64 root, |
| 35 | void *ctx); | 35 | void *ctx); |
| 36 | typedef int (iterate_irefs_t)(u64 parent, struct btrfs_inode_ref *iref, | ||
| 37 | struct extent_buffer *eb, void *ctx); | ||
| 38 | 36 | ||
| 39 | int inode_item_info(u64 inum, u64 ioff, struct btrfs_root *fs_root, | 37 | int inode_item_info(u64 inum, u64 ioff, struct btrfs_root *fs_root, |
| 40 | struct btrfs_path *path); | 38 | struct btrfs_path *path); |
| 41 | 39 | ||
| 42 | int extent_from_logical(struct btrfs_fs_info *fs_info, u64 logical, | 40 | int extent_from_logical(struct btrfs_fs_info *fs_info, u64 logical, |
| 43 | struct btrfs_path *path, struct btrfs_key *found_key); | 41 | struct btrfs_path *path, struct btrfs_key *found_key, |
| 42 | u64 *flags); | ||
| 44 | 43 | ||
| 45 | int tree_backref_for_extent(unsigned long *ptr, struct extent_buffer *eb, | 44 | int tree_backref_for_extent(unsigned long *ptr, struct extent_buffer *eb, |
| 46 | struct btrfs_extent_item *ei, u32 item_size, | 45 | struct btrfs_extent_item *ei, u32 item_size, |
| @@ -63,10 +62,19 @@ int btrfs_find_all_roots(struct btrfs_trans_handle *trans, | |||
| 63 | char *btrfs_iref_to_path(struct btrfs_root *fs_root, struct btrfs_path *path, | 62 | char *btrfs_iref_to_path(struct btrfs_root *fs_root, struct btrfs_path *path, |
| 64 | struct btrfs_inode_ref *iref, struct extent_buffer *eb, | 63 | struct btrfs_inode_ref *iref, struct extent_buffer *eb, |
| 65 | u64 parent, char *dest, u32 size); | 64 | u64 parent, char *dest, u32 size); |
| 65 | char *btrfs_ref_to_path(struct btrfs_root *fs_root, struct btrfs_path *path, | ||
| 66 | u32 name_len, unsigned long name_off, | ||
| 67 | struct extent_buffer *eb_in, u64 parent, | ||
| 68 | char *dest, u32 size); | ||
| 66 | 69 | ||
| 67 | struct btrfs_data_container *init_data_container(u32 total_bytes); | 70 | struct btrfs_data_container *init_data_container(u32 total_bytes); |
| 68 | struct inode_fs_paths *init_ipath(s32 total_bytes, struct btrfs_root *fs_root, | 71 | struct inode_fs_paths *init_ipath(s32 total_bytes, struct btrfs_root *fs_root, |
| 69 | struct btrfs_path *path); | 72 | struct btrfs_path *path); |
| 70 | void free_ipath(struct inode_fs_paths *ipath); | 73 | void free_ipath(struct inode_fs_paths *ipath); |
| 71 | 74 | ||
| 75 | int btrfs_find_one_extref(struct btrfs_root *root, u64 inode_objectid, | ||
| 76 | u64 start_off, struct btrfs_path *path, | ||
| 77 | struct btrfs_inode_extref **ret_extref, | ||
| 78 | u64 *found_off); | ||
| 79 | |||
| 72 | #endif | 80 | #endif |
diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h index 5b2ad6bc4fe7..2a8c242bc4f5 100644 --- a/fs/btrfs/btrfs_inode.h +++ b/fs/btrfs/btrfs_inode.h | |||
| @@ -38,6 +38,8 @@ | |||
| 38 | #define BTRFS_INODE_DELALLOC_META_RESERVED 4 | 38 | #define BTRFS_INODE_DELALLOC_META_RESERVED 4 |
| 39 | #define BTRFS_INODE_HAS_ORPHAN_ITEM 5 | 39 | #define BTRFS_INODE_HAS_ORPHAN_ITEM 5 |
| 40 | #define BTRFS_INODE_HAS_ASYNC_EXTENT 6 | 40 | #define BTRFS_INODE_HAS_ASYNC_EXTENT 6 |
| 41 | #define BTRFS_INODE_NEEDS_FULL_SYNC 7 | ||
| 42 | #define BTRFS_INODE_COPY_EVERYTHING 8 | ||
| 41 | 43 | ||
| 42 | /* in memory btrfs inode */ | 44 | /* in memory btrfs inode */ |
| 43 | struct btrfs_inode { | 45 | struct btrfs_inode { |
| @@ -89,6 +91,9 @@ struct btrfs_inode { | |||
| 89 | 91 | ||
| 90 | unsigned long runtime_flags; | 92 | unsigned long runtime_flags; |
| 91 | 93 | ||
| 94 | /* Keep track of who's O_SYNC/fsycing currently */ | ||
| 95 | atomic_t sync_writers; | ||
| 96 | |||
| 92 | /* full 64 bit generation number, struct vfs_inode doesn't have a big | 97 | /* full 64 bit generation number, struct vfs_inode doesn't have a big |
| 93 | * enough field for this. | 98 | * enough field for this. |
| 94 | */ | 99 | */ |
| @@ -143,6 +148,9 @@ struct btrfs_inode { | |||
| 143 | /* flags field from the on disk inode */ | 148 | /* flags field from the on disk inode */ |
| 144 | u32 flags; | 149 | u32 flags; |
| 145 | 150 | ||
| 151 | /* a local copy of root's last_log_commit */ | ||
| 152 | unsigned long last_log_commit; | ||
| 153 | |||
| 146 | /* | 154 | /* |
| 147 | * Counters to keep track of the number of extent item's we may use due | 155 | * Counters to keep track of the number of extent item's we may use due |
| 148 | * to delalloc and such. outstanding_extents is the number of extent | 156 | * to delalloc and such. outstanding_extents is the number of extent |
| @@ -202,15 +210,10 @@ static inline bool btrfs_is_free_space_inode(struct inode *inode) | |||
| 202 | 210 | ||
| 203 | static inline int btrfs_inode_in_log(struct inode *inode, u64 generation) | 211 | static inline int btrfs_inode_in_log(struct inode *inode, u64 generation) |
| 204 | { | 212 | { |
| 205 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
| 206 | int ret = 0; | ||
| 207 | |||
| 208 | mutex_lock(&root->log_mutex); | ||
| 209 | if (BTRFS_I(inode)->logged_trans == generation && | 213 | if (BTRFS_I(inode)->logged_trans == generation && |
| 210 | BTRFS_I(inode)->last_sub_trans <= root->last_log_commit) | 214 | BTRFS_I(inode)->last_sub_trans <= BTRFS_I(inode)->last_log_commit) |
| 211 | ret = 1; | 215 | return 1; |
| 212 | mutex_unlock(&root->log_mutex); | 216 | return 0; |
| 213 | return ret; | ||
| 214 | } | 217 | } |
| 215 | 218 | ||
| 216 | #endif | 219 | #endif |
diff --git a/fs/btrfs/check-integrity.c b/fs/btrfs/check-integrity.c index 9197e2e33407..11d47bfb62b4 100644 --- a/fs/btrfs/check-integrity.c +++ b/fs/btrfs/check-integrity.c | |||
| @@ -37,8 +37,9 @@ | |||
| 37 | * the file system was mounted, (i.e., they have been | 37 | * the file system was mounted, (i.e., they have been |
| 38 | * referenced by the super block) or they have been | 38 | * referenced by the super block) or they have been |
| 39 | * written since then and the write completion callback | 39 | * written since then and the write completion callback |
| 40 | * was called and a FLUSH request to the device where | 40 | * was called and no write error was indicated and a |
| 41 | * these blocks are located was received and completed. | 41 | * FLUSH request to the device where these blocks are |
| 42 | * located was received and completed. | ||
| 42 | * 2b. All referenced blocks need to have a generation | 43 | * 2b. All referenced blocks need to have a generation |
| 43 | * number which is equal to the parent's number. | 44 | * number which is equal to the parent's number. |
| 44 | * | 45 | * |
| @@ -136,7 +137,7 @@ struct btrfsic_block { | |||
| 136 | unsigned int never_written:1; /* block was added because it was | 137 | unsigned int never_written:1; /* block was added because it was |
| 137 | * referenced, not because it was | 138 | * referenced, not because it was |
| 138 | * written */ | 139 | * written */ |
| 139 | unsigned int mirror_num:2; /* large enough to hold | 140 | unsigned int mirror_num; /* large enough to hold |
| 140 | * BTRFS_SUPER_MIRROR_MAX */ | 141 | * BTRFS_SUPER_MIRROR_MAX */ |
| 141 | struct btrfsic_dev_state *dev_state; | 142 | struct btrfsic_dev_state *dev_state; |
| 142 | u64 dev_bytenr; /* key, physical byte num on disk */ | 143 | u64 dev_bytenr; /* key, physical byte num on disk */ |
| @@ -722,7 +723,7 @@ static int btrfsic_process_superblock(struct btrfsic_state *state, | |||
| 722 | } | 723 | } |
| 723 | 724 | ||
| 724 | num_copies = | 725 | num_copies = |
| 725 | btrfs_num_copies(&state->root->fs_info->mapping_tree, | 726 | btrfs_num_copies(state->root->fs_info, |
| 726 | next_bytenr, state->metablock_size); | 727 | next_bytenr, state->metablock_size); |
| 727 | if (state->print_mask & BTRFSIC_PRINT_MASK_NUM_COPIES) | 728 | if (state->print_mask & BTRFSIC_PRINT_MASK_NUM_COPIES) |
| 728 | printk(KERN_INFO "num_copies(log_bytenr=%llu) = %d\n", | 729 | printk(KERN_INFO "num_copies(log_bytenr=%llu) = %d\n", |
| @@ -902,7 +903,7 @@ static int btrfsic_process_superblock_dev_mirror( | |||
| 902 | } | 903 | } |
| 903 | 904 | ||
| 904 | num_copies = | 905 | num_copies = |
| 905 | btrfs_num_copies(&state->root->fs_info->mapping_tree, | 906 | btrfs_num_copies(state->root->fs_info, |
| 906 | next_bytenr, state->metablock_size); | 907 | next_bytenr, state->metablock_size); |
| 907 | if (state->print_mask & BTRFSIC_PRINT_MASK_NUM_COPIES) | 908 | if (state->print_mask & BTRFSIC_PRINT_MASK_NUM_COPIES) |
| 908 | printk(KERN_INFO "num_copies(log_bytenr=%llu) = %d\n", | 909 | printk(KERN_INFO "num_copies(log_bytenr=%llu) = %d\n", |
| @@ -1286,7 +1287,7 @@ static int btrfsic_create_link_to_next_block( | |||
| 1286 | *next_blockp = NULL; | 1287 | *next_blockp = NULL; |
| 1287 | if (0 == *num_copiesp) { | 1288 | if (0 == *num_copiesp) { |
| 1288 | *num_copiesp = | 1289 | *num_copiesp = |
| 1289 | btrfs_num_copies(&state->root->fs_info->mapping_tree, | 1290 | btrfs_num_copies(state->root->fs_info, |
| 1290 | next_bytenr, state->metablock_size); | 1291 | next_bytenr, state->metablock_size); |
| 1291 | if (state->print_mask & BTRFSIC_PRINT_MASK_NUM_COPIES) | 1292 | if (state->print_mask & BTRFSIC_PRINT_MASK_NUM_COPIES) |
| 1292 | printk(KERN_INFO "num_copies(log_bytenr=%llu) = %d\n", | 1293 | printk(KERN_INFO "num_copies(log_bytenr=%llu) = %d\n", |
| @@ -1488,7 +1489,7 @@ static int btrfsic_handle_extent_data( | |||
| 1488 | chunk_len = num_bytes; | 1489 | chunk_len = num_bytes; |
| 1489 | 1490 | ||
| 1490 | num_copies = | 1491 | num_copies = |
| 1491 | btrfs_num_copies(&state->root->fs_info->mapping_tree, | 1492 | btrfs_num_copies(state->root->fs_info, |
| 1492 | next_bytenr, state->datablock_size); | 1493 | next_bytenr, state->datablock_size); |
| 1493 | if (state->print_mask & BTRFSIC_PRINT_MASK_NUM_COPIES) | 1494 | if (state->print_mask & BTRFSIC_PRINT_MASK_NUM_COPIES) |
| 1494 | printk(KERN_INFO "num_copies(log_bytenr=%llu) = %d\n", | 1495 | printk(KERN_INFO "num_copies(log_bytenr=%llu) = %d\n", |
| @@ -1581,9 +1582,21 @@ static int btrfsic_map_block(struct btrfsic_state *state, u64 bytenr, u32 len, | |||
| 1581 | struct btrfs_device *device; | 1582 | struct btrfs_device *device; |
| 1582 | 1583 | ||
| 1583 | length = len; | 1584 | length = len; |
| 1584 | ret = btrfs_map_block(&state->root->fs_info->mapping_tree, READ, | 1585 | ret = btrfs_map_block(state->root->fs_info, READ, |
| 1585 | bytenr, &length, &multi, mirror_num); | 1586 | bytenr, &length, &multi, mirror_num); |
| 1586 | 1587 | ||
| 1588 | if (ret) { | ||
| 1589 | block_ctx_out->start = 0; | ||
| 1590 | block_ctx_out->dev_bytenr = 0; | ||
| 1591 | block_ctx_out->len = 0; | ||
| 1592 | block_ctx_out->dev = NULL; | ||
| 1593 | block_ctx_out->datav = NULL; | ||
| 1594 | block_ctx_out->pagev = NULL; | ||
| 1595 | block_ctx_out->mem_to_free = NULL; | ||
| 1596 | |||
| 1597 | return ret; | ||
| 1598 | } | ||
| 1599 | |||
| 1587 | device = multi->stripes[0].dev; | 1600 | device = multi->stripes[0].dev; |
| 1588 | block_ctx_out->dev = btrfsic_dev_state_lookup(device->bdev); | 1601 | block_ctx_out->dev = btrfsic_dev_state_lookup(device->bdev); |
| 1589 | block_ctx_out->dev_bytenr = multi->stripes[0].physical; | 1602 | block_ctx_out->dev_bytenr = multi->stripes[0].physical; |
| @@ -1593,8 +1606,7 @@ static int btrfsic_map_block(struct btrfsic_state *state, u64 bytenr, u32 len, | |||
| 1593 | block_ctx_out->pagev = NULL; | 1606 | block_ctx_out->pagev = NULL; |
| 1594 | block_ctx_out->mem_to_free = NULL; | 1607 | block_ctx_out->mem_to_free = NULL; |
| 1595 | 1608 | ||
| 1596 | if (0 == ret) | 1609 | kfree(multi); |
| 1597 | kfree(multi); | ||
| 1598 | if (NULL == block_ctx_out->dev) { | 1610 | if (NULL == block_ctx_out->dev) { |
| 1599 | ret = -ENXIO; | 1611 | ret = -ENXIO; |
| 1600 | printk(KERN_INFO "btrfsic: error, cannot lookup dev (#1)!\n"); | 1612 | printk(KERN_INFO "btrfsic: error, cannot lookup dev (#1)!\n"); |
| @@ -2462,7 +2474,7 @@ static int btrfsic_process_written_superblock( | |||
| 2462 | } | 2474 | } |
| 2463 | 2475 | ||
| 2464 | num_copies = | 2476 | num_copies = |
| 2465 | btrfs_num_copies(&state->root->fs_info->mapping_tree, | 2477 | btrfs_num_copies(state->root->fs_info, |
| 2466 | next_bytenr, BTRFS_SUPER_INFO_SIZE); | 2478 | next_bytenr, BTRFS_SUPER_INFO_SIZE); |
| 2467 | if (state->print_mask & BTRFSIC_PRINT_MASK_NUM_COPIES) | 2479 | if (state->print_mask & BTRFSIC_PRINT_MASK_NUM_COPIES) |
| 2468 | printk(KERN_INFO "num_copies(log_bytenr=%llu) = %d\n", | 2480 | printk(KERN_INFO "num_copies(log_bytenr=%llu) = %d\n", |
| @@ -2601,6 +2613,17 @@ static int btrfsic_check_all_ref_blocks(struct btrfsic_state *state, | |||
| 2601 | (unsigned long long)l->block_ref_to->dev_bytenr, | 2613 | (unsigned long long)l->block_ref_to->dev_bytenr, |
| 2602 | l->block_ref_to->mirror_num); | 2614 | l->block_ref_to->mirror_num); |
| 2603 | ret = -1; | 2615 | ret = -1; |
| 2616 | } else if (l->block_ref_to->iodone_w_error) { | ||
| 2617 | printk(KERN_INFO "btrfs: attempt to write superblock" | ||
| 2618 | " which references block %c @%llu (%s/%llu/%d)" | ||
| 2619 | " which has write error!\n", | ||
| 2620 | btrfsic_get_block_type(state, l->block_ref_to), | ||
| 2621 | (unsigned long long) | ||
| 2622 | l->block_ref_to->logical_bytenr, | ||
| 2623 | l->block_ref_to->dev_state->name, | ||
| 2624 | (unsigned long long)l->block_ref_to->dev_bytenr, | ||
| 2625 | l->block_ref_to->mirror_num); | ||
| 2626 | ret = -1; | ||
| 2604 | } else if (l->parent_generation != | 2627 | } else if (l->parent_generation != |
| 2605 | l->block_ref_to->generation && | 2628 | l->block_ref_to->generation && |
| 2606 | BTRFSIC_GENERATION_UNKNOWN != | 2629 | BTRFSIC_GENERATION_UNKNOWN != |
| @@ -2948,7 +2971,7 @@ static void btrfsic_cmp_log_and_dev_bytenr(struct btrfsic_state *state, | |||
| 2948 | struct btrfsic_block_data_ctx block_ctx; | 2971 | struct btrfsic_block_data_ctx block_ctx; |
| 2949 | int match = 0; | 2972 | int match = 0; |
| 2950 | 2973 | ||
| 2951 | num_copies = btrfs_num_copies(&state->root->fs_info->mapping_tree, | 2974 | num_copies = btrfs_num_copies(state->root->fs_info, |
| 2952 | bytenr, state->metablock_size); | 2975 | bytenr, state->metablock_size); |
| 2953 | 2976 | ||
| 2954 | for (mirror_num = 1; mirror_num <= num_copies; mirror_num++) { | 2977 | for (mirror_num = 1; mirror_num <= num_copies; mirror_num++) { |
diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c index 43d1c5a3a030..94ab2f80e7e3 100644 --- a/fs/btrfs/compression.c +++ b/fs/btrfs/compression.c | |||
| @@ -577,6 +577,7 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio, | |||
| 577 | u64 em_start; | 577 | u64 em_start; |
| 578 | struct extent_map *em; | 578 | struct extent_map *em; |
| 579 | int ret = -ENOMEM; | 579 | int ret = -ENOMEM; |
| 580 | int faili = 0; | ||
| 580 | u32 *sums; | 581 | u32 *sums; |
| 581 | 582 | ||
| 582 | tree = &BTRFS_I(inode)->io_tree; | 583 | tree = &BTRFS_I(inode)->io_tree; |
| @@ -626,9 +627,13 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio, | |||
| 626 | for (pg_index = 0; pg_index < nr_pages; pg_index++) { | 627 | for (pg_index = 0; pg_index < nr_pages; pg_index++) { |
| 627 | cb->compressed_pages[pg_index] = alloc_page(GFP_NOFS | | 628 | cb->compressed_pages[pg_index] = alloc_page(GFP_NOFS | |
| 628 | __GFP_HIGHMEM); | 629 | __GFP_HIGHMEM); |
| 629 | if (!cb->compressed_pages[pg_index]) | 630 | if (!cb->compressed_pages[pg_index]) { |
| 631 | faili = pg_index - 1; | ||
| 632 | ret = -ENOMEM; | ||
| 630 | goto fail2; | 633 | goto fail2; |
| 634 | } | ||
| 631 | } | 635 | } |
| 636 | faili = nr_pages - 1; | ||
| 632 | cb->nr_pages = nr_pages; | 637 | cb->nr_pages = nr_pages; |
| 633 | 638 | ||
| 634 | add_ra_bio_pages(inode, em_start + em_len, cb); | 639 | add_ra_bio_pages(inode, em_start + em_len, cb); |
| @@ -682,7 +687,8 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio, | |||
| 682 | 687 | ||
| 683 | ret = btrfs_map_bio(root, READ, comp_bio, | 688 | ret = btrfs_map_bio(root, READ, comp_bio, |
| 684 | mirror_num, 0); | 689 | mirror_num, 0); |
| 685 | BUG_ON(ret); /* -ENOMEM */ | 690 | if (ret) |
| 691 | bio_endio(comp_bio, ret); | ||
| 686 | 692 | ||
| 687 | bio_put(comp_bio); | 693 | bio_put(comp_bio); |
| 688 | 694 | ||
| @@ -707,14 +713,17 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio, | |||
| 707 | } | 713 | } |
| 708 | 714 | ||
| 709 | ret = btrfs_map_bio(root, READ, comp_bio, mirror_num, 0); | 715 | ret = btrfs_map_bio(root, READ, comp_bio, mirror_num, 0); |
| 710 | BUG_ON(ret); /* -ENOMEM */ | 716 | if (ret) |
| 717 | bio_endio(comp_bio, ret); | ||
| 711 | 718 | ||
| 712 | bio_put(comp_bio); | 719 | bio_put(comp_bio); |
| 713 | return 0; | 720 | return 0; |
| 714 | 721 | ||
| 715 | fail2: | 722 | fail2: |
| 716 | for (pg_index = 0; pg_index < nr_pages; pg_index++) | 723 | while (faili >= 0) { |
| 717 | free_page((unsigned long)cb->compressed_pages[pg_index]); | 724 | __free_page(cb->compressed_pages[faili]); |
| 725 | faili--; | ||
| 726 | } | ||
| 718 | 727 | ||
| 719 | kfree(cb->compressed_pages); | 728 | kfree(cb->compressed_pages); |
| 720 | fail1: | 729 | fail1: |
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 6d183f60d63a..c7b67cf24bba 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c | |||
| @@ -38,8 +38,7 @@ static int balance_node_right(struct btrfs_trans_handle *trans, | |||
| 38 | struct extent_buffer *dst_buf, | 38 | struct extent_buffer *dst_buf, |
| 39 | struct extent_buffer *src_buf); | 39 | struct extent_buffer *src_buf); |
| 40 | static void del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root, | 40 | static void del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root, |
| 41 | struct btrfs_path *path, int level, int slot, | 41 | struct btrfs_path *path, int level, int slot); |
| 42 | int tree_mod_log); | ||
| 43 | static void tree_mod_log_free_eb(struct btrfs_fs_info *fs_info, | 42 | static void tree_mod_log_free_eb(struct btrfs_fs_info *fs_info, |
| 44 | struct extent_buffer *eb); | 43 | struct extent_buffer *eb); |
| 45 | struct extent_buffer *read_old_tree_block(struct btrfs_root *root, u64 bytenr, | 44 | struct extent_buffer *read_old_tree_block(struct btrfs_root *root, u64 bytenr, |
| @@ -596,6 +595,11 @@ tree_mod_log_insert_move(struct btrfs_fs_info *fs_info, | |||
| 596 | if (tree_mod_dont_log(fs_info, eb)) | 595 | if (tree_mod_dont_log(fs_info, eb)) |
| 597 | return 0; | 596 | return 0; |
| 598 | 597 | ||
| 598 | /* | ||
| 599 | * When we override something during the move, we log these removals. | ||
| 600 | * This can only happen when we move towards the beginning of the | ||
| 601 | * buffer, i.e. dst_slot < src_slot. | ||
| 602 | */ | ||
| 599 | for (i = 0; i + dst_slot < src_slot && i < nr_items; i++) { | 603 | for (i = 0; i + dst_slot < src_slot && i < nr_items; i++) { |
| 600 | ret = tree_mod_log_insert_key_locked(fs_info, eb, i + dst_slot, | 604 | ret = tree_mod_log_insert_key_locked(fs_info, eb, i + dst_slot, |
| 601 | MOD_LOG_KEY_REMOVE_WHILE_MOVING); | 605 | MOD_LOG_KEY_REMOVE_WHILE_MOVING); |
| @@ -647,8 +651,6 @@ tree_mod_log_insert_root(struct btrfs_fs_info *fs_info, | |||
| 647 | if (tree_mod_dont_log(fs_info, NULL)) | 651 | if (tree_mod_dont_log(fs_info, NULL)) |
| 648 | return 0; | 652 | return 0; |
| 649 | 653 | ||
| 650 | __tree_mod_log_free_eb(fs_info, old_root); | ||
| 651 | |||
| 652 | ret = tree_mod_alloc(fs_info, flags, &tm); | 654 | ret = tree_mod_alloc(fs_info, flags, &tm); |
| 653 | if (ret < 0) | 655 | if (ret < 0) |
| 654 | goto out; | 656 | goto out; |
| @@ -773,8 +775,7 @@ tree_mod_log_eb_move(struct btrfs_fs_info *fs_info, struct extent_buffer *dst, | |||
| 773 | 775 | ||
| 774 | static noinline void | 776 | static noinline void |
| 775 | tree_mod_log_set_node_key(struct btrfs_fs_info *fs_info, | 777 | tree_mod_log_set_node_key(struct btrfs_fs_info *fs_info, |
| 776 | struct extent_buffer *eb, | 778 | struct extent_buffer *eb, int slot, int atomic) |
| 777 | struct btrfs_disk_key *disk_key, int slot, int atomic) | ||
| 778 | { | 779 | { |
| 779 | int ret; | 780 | int ret; |
| 780 | 781 | ||
| @@ -926,12 +927,7 @@ static noinline int update_ref_for_cow(struct btrfs_trans_handle *trans, | |||
| 926 | ret = btrfs_dec_ref(trans, root, buf, 1, 1); | 927 | ret = btrfs_dec_ref(trans, root, buf, 1, 1); |
| 927 | BUG_ON(ret); /* -ENOMEM */ | 928 | BUG_ON(ret); /* -ENOMEM */ |
| 928 | } | 929 | } |
| 929 | /* | 930 | tree_mod_log_free_eb(root->fs_info, buf); |
| 930 | * don't log freeing in case we're freeing the root node, this | ||
| 931 | * is done by tree_mod_log_set_root_pointer later | ||
| 932 | */ | ||
| 933 | if (buf != root->node && btrfs_header_level(buf) != 0) | ||
| 934 | tree_mod_log_free_eb(root->fs_info, buf); | ||
| 935 | clean_tree_block(trans, root, buf); | 931 | clean_tree_block(trans, root, buf); |
| 936 | *last_ref = 1; | 932 | *last_ref = 1; |
| 937 | } | 933 | } |
| @@ -1142,13 +1138,13 @@ __tree_mod_log_rewind(struct extent_buffer *eb, u64 time_seq, | |||
| 1142 | switch (tm->op) { | 1138 | switch (tm->op) { |
| 1143 | case MOD_LOG_KEY_REMOVE_WHILE_FREEING: | 1139 | case MOD_LOG_KEY_REMOVE_WHILE_FREEING: |
| 1144 | BUG_ON(tm->slot < n); | 1140 | BUG_ON(tm->slot < n); |
| 1145 | case MOD_LOG_KEY_REMOVE_WHILE_MOVING: | ||
| 1146 | case MOD_LOG_KEY_REMOVE: | 1141 | case MOD_LOG_KEY_REMOVE: |
| 1142 | n++; | ||
| 1143 | case MOD_LOG_KEY_REMOVE_WHILE_MOVING: | ||
| 1147 | btrfs_set_node_key(eb, &tm->key, tm->slot); | 1144 | btrfs_set_node_key(eb, &tm->key, tm->slot); |
| 1148 | btrfs_set_node_blockptr(eb, tm->slot, tm->blockptr); | 1145 | btrfs_set_node_blockptr(eb, tm->slot, tm->blockptr); |
| 1149 | btrfs_set_node_ptr_generation(eb, tm->slot, | 1146 | btrfs_set_node_ptr_generation(eb, tm->slot, |
| 1150 | tm->generation); | 1147 | tm->generation); |
| 1151 | n++; | ||
| 1152 | break; | 1148 | break; |
| 1153 | case MOD_LOG_KEY_REPLACE: | 1149 | case MOD_LOG_KEY_REPLACE: |
| 1154 | BUG_ON(tm->slot >= n); | 1150 | BUG_ON(tm->slot >= n); |
| @@ -1225,6 +1221,8 @@ tree_mod_log_rewind(struct btrfs_fs_info *fs_info, struct extent_buffer *eb, | |||
| 1225 | free_extent_buffer(eb); | 1221 | free_extent_buffer(eb); |
| 1226 | 1222 | ||
| 1227 | __tree_mod_log_rewind(eb_rewin, time_seq, tm); | 1223 | __tree_mod_log_rewind(eb_rewin, time_seq, tm); |
| 1224 | WARN_ON(btrfs_header_nritems(eb_rewin) > | ||
| 1225 | BTRFS_NODEPTRS_PER_BLOCK(fs_info->fs_root)); | ||
| 1228 | 1226 | ||
| 1229 | return eb_rewin; | 1227 | return eb_rewin; |
| 1230 | } | 1228 | } |
| @@ -1241,9 +1239,11 @@ get_old_root(struct btrfs_root *root, u64 time_seq) | |||
| 1241 | { | 1239 | { |
| 1242 | struct tree_mod_elem *tm; | 1240 | struct tree_mod_elem *tm; |
| 1243 | struct extent_buffer *eb; | 1241 | struct extent_buffer *eb; |
| 1242 | struct extent_buffer *old; | ||
| 1244 | struct tree_mod_root *old_root = NULL; | 1243 | struct tree_mod_root *old_root = NULL; |
| 1245 | u64 old_generation = 0; | 1244 | u64 old_generation = 0; |
| 1246 | u64 logical; | 1245 | u64 logical; |
| 1246 | u32 blocksize; | ||
| 1247 | 1247 | ||
| 1248 | eb = btrfs_read_lock_root_node(root); | 1248 | eb = btrfs_read_lock_root_node(root); |
| 1249 | tm = __tree_mod_log_oldest_root(root->fs_info, root, time_seq); | 1249 | tm = __tree_mod_log_oldest_root(root->fs_info, root, time_seq); |
| @@ -1259,14 +1259,32 @@ get_old_root(struct btrfs_root *root, u64 time_seq) | |||
| 1259 | } | 1259 | } |
| 1260 | 1260 | ||
| 1261 | tm = tree_mod_log_search(root->fs_info, logical, time_seq); | 1261 | tm = tree_mod_log_search(root->fs_info, logical, time_seq); |
| 1262 | if (old_root) | 1262 | if (old_root && tm && tm->op != MOD_LOG_KEY_REMOVE_WHILE_FREEING) { |
| 1263 | btrfs_tree_read_unlock(root->node); | ||
| 1264 | free_extent_buffer(root->node); | ||
| 1265 | blocksize = btrfs_level_size(root, old_root->level); | ||
| 1266 | old = read_tree_block(root, logical, blocksize, 0); | ||
| 1267 | if (!old) { | ||
| 1268 | pr_warn("btrfs: failed to read tree block %llu from get_old_root\n", | ||
| 1269 | logical); | ||
| 1270 | WARN_ON(1); | ||
| 1271 | } else { | ||
| 1272 | eb = btrfs_clone_extent_buffer(old); | ||
| 1273 | free_extent_buffer(old); | ||
| 1274 | } | ||
| 1275 | } else if (old_root) { | ||
| 1276 | btrfs_tree_read_unlock(root->node); | ||
| 1277 | free_extent_buffer(root->node); | ||
| 1263 | eb = alloc_dummy_extent_buffer(logical, root->nodesize); | 1278 | eb = alloc_dummy_extent_buffer(logical, root->nodesize); |
| 1264 | else | 1279 | } else { |
| 1265 | eb = btrfs_clone_extent_buffer(root->node); | 1280 | eb = btrfs_clone_extent_buffer(root->node); |
| 1266 | btrfs_tree_read_unlock(root->node); | 1281 | btrfs_tree_read_unlock(root->node); |
| 1267 | free_extent_buffer(root->node); | 1282 | free_extent_buffer(root->node); |
| 1283 | } | ||
| 1284 | |||
| 1268 | if (!eb) | 1285 | if (!eb) |
| 1269 | return NULL; | 1286 | return NULL; |
| 1287 | extent_buffer_get(eb); | ||
| 1270 | btrfs_tree_read_lock(eb); | 1288 | btrfs_tree_read_lock(eb); |
| 1271 | if (old_root) { | 1289 | if (old_root) { |
| 1272 | btrfs_set_header_bytenr(eb, eb->start); | 1290 | btrfs_set_header_bytenr(eb, eb->start); |
| @@ -1279,11 +1297,28 @@ get_old_root(struct btrfs_root *root, u64 time_seq) | |||
| 1279 | __tree_mod_log_rewind(eb, time_seq, tm); | 1297 | __tree_mod_log_rewind(eb, time_seq, tm); |
| 1280 | else | 1298 | else |
| 1281 | WARN_ON(btrfs_header_level(eb) != 0); | 1299 | WARN_ON(btrfs_header_level(eb) != 0); |
| 1282 | extent_buffer_get(eb); | 1300 | WARN_ON(btrfs_header_nritems(eb) > BTRFS_NODEPTRS_PER_BLOCK(root)); |
| 1283 | 1301 | ||
| 1284 | return eb; | 1302 | return eb; |
| 1285 | } | 1303 | } |
| 1286 | 1304 | ||
| 1305 | int btrfs_old_root_level(struct btrfs_root *root, u64 time_seq) | ||
| 1306 | { | ||
| 1307 | struct tree_mod_elem *tm; | ||
| 1308 | int level; | ||
| 1309 | |||
| 1310 | tm = __tree_mod_log_oldest_root(root->fs_info, root, time_seq); | ||
| 1311 | if (tm && tm->op == MOD_LOG_ROOT_REPLACE) { | ||
| 1312 | level = tm->old_root.level; | ||
| 1313 | } else { | ||
| 1314 | rcu_read_lock(); | ||
| 1315 | level = btrfs_header_level(root->node); | ||
| 1316 | rcu_read_unlock(); | ||
| 1317 | } | ||
| 1318 | |||
| 1319 | return level; | ||
| 1320 | } | ||
| 1321 | |||
| 1287 | static inline int should_cow_block(struct btrfs_trans_handle *trans, | 1322 | static inline int should_cow_block(struct btrfs_trans_handle *trans, |
| 1288 | struct btrfs_root *root, | 1323 | struct btrfs_root *root, |
| 1289 | struct extent_buffer *buf) | 1324 | struct extent_buffer *buf) |
| @@ -1324,19 +1359,16 @@ noinline int btrfs_cow_block(struct btrfs_trans_handle *trans, | |||
| 1324 | u64 search_start; | 1359 | u64 search_start; |
| 1325 | int ret; | 1360 | int ret; |
| 1326 | 1361 | ||
| 1327 | if (trans->transaction != root->fs_info->running_transaction) { | 1362 | if (trans->transaction != root->fs_info->running_transaction) |
| 1328 | printk(KERN_CRIT "trans %llu running %llu\n", | 1363 | WARN(1, KERN_CRIT "trans %llu running %llu\n", |
| 1329 | (unsigned long long)trans->transid, | 1364 | (unsigned long long)trans->transid, |
| 1330 | (unsigned long long) | 1365 | (unsigned long long) |
| 1331 | root->fs_info->running_transaction->transid); | 1366 | root->fs_info->running_transaction->transid); |
| 1332 | WARN_ON(1); | 1367 | |
| 1333 | } | 1368 | if (trans->transid != root->fs_info->generation) |
| 1334 | if (trans->transid != root->fs_info->generation) { | 1369 | WARN(1, KERN_CRIT "trans %llu running %llu\n", |
| 1335 | printk(KERN_CRIT "trans %llu running %llu\n", | ||
| 1336 | (unsigned long long)trans->transid, | 1370 | (unsigned long long)trans->transid, |
| 1337 | (unsigned long long)root->fs_info->generation); | 1371 | (unsigned long long)root->fs_info->generation); |
| 1338 | WARN_ON(1); | ||
| 1339 | } | ||
| 1340 | 1372 | ||
| 1341 | if (!should_cow_block(trans, root, buf)) { | 1373 | if (!should_cow_block(trans, root, buf)) { |
| 1342 | *cow_ret = buf; | 1374 | *cow_ret = buf; |
| @@ -1432,10 +1464,8 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans, | |||
| 1432 | if (cache_only && parent_level != 1) | 1464 | if (cache_only && parent_level != 1) |
| 1433 | return 0; | 1465 | return 0; |
| 1434 | 1466 | ||
| 1435 | if (trans->transaction != root->fs_info->running_transaction) | 1467 | WARN_ON(trans->transaction != root->fs_info->running_transaction); |
| 1436 | WARN_ON(1); | 1468 | WARN_ON(trans->transid != root->fs_info->generation); |
| 1437 | if (trans->transid != root->fs_info->generation) | ||
| 1438 | WARN_ON(1); | ||
| 1439 | 1469 | ||
| 1440 | parent_nritems = btrfs_header_nritems(parent); | 1470 | parent_nritems = btrfs_header_nritems(parent); |
| 1441 | blocksize = btrfs_level_size(root, parent_level - 1); | 1471 | blocksize = btrfs_level_size(root, parent_level - 1); |
| @@ -1725,6 +1755,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans, | |||
| 1725 | goto enospc; | 1755 | goto enospc; |
| 1726 | } | 1756 | } |
| 1727 | 1757 | ||
| 1758 | tree_mod_log_free_eb(root->fs_info, root->node); | ||
| 1728 | tree_mod_log_set_root_pointer(root, child); | 1759 | tree_mod_log_set_root_pointer(root, child); |
| 1729 | rcu_assign_pointer(root->node, child); | 1760 | rcu_assign_pointer(root->node, child); |
| 1730 | 1761 | ||
| @@ -1789,7 +1820,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans, | |||
| 1789 | if (btrfs_header_nritems(right) == 0) { | 1820 | if (btrfs_header_nritems(right) == 0) { |
| 1790 | clean_tree_block(trans, root, right); | 1821 | clean_tree_block(trans, root, right); |
| 1791 | btrfs_tree_unlock(right); | 1822 | btrfs_tree_unlock(right); |
| 1792 | del_ptr(trans, root, path, level + 1, pslot + 1, 1); | 1823 | del_ptr(trans, root, path, level + 1, pslot + 1); |
| 1793 | root_sub_used(root, right->len); | 1824 | root_sub_used(root, right->len); |
| 1794 | btrfs_free_tree_block(trans, root, right, 0, 1); | 1825 | btrfs_free_tree_block(trans, root, right, 0, 1); |
| 1795 | free_extent_buffer_stale(right); | 1826 | free_extent_buffer_stale(right); |
| @@ -1798,7 +1829,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans, | |||
| 1798 | struct btrfs_disk_key right_key; | 1829 | struct btrfs_disk_key right_key; |
| 1799 | btrfs_node_key(right, &right_key, 0); | 1830 | btrfs_node_key(right, &right_key, 0); |
| 1800 | tree_mod_log_set_node_key(root->fs_info, parent, | 1831 | tree_mod_log_set_node_key(root->fs_info, parent, |
| 1801 | &right_key, pslot + 1, 0); | 1832 | pslot + 1, 0); |
| 1802 | btrfs_set_node_key(parent, &right_key, pslot + 1); | 1833 | btrfs_set_node_key(parent, &right_key, pslot + 1); |
| 1803 | btrfs_mark_buffer_dirty(parent); | 1834 | btrfs_mark_buffer_dirty(parent); |
| 1804 | } | 1835 | } |
| @@ -1833,7 +1864,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans, | |||
| 1833 | if (btrfs_header_nritems(mid) == 0) { | 1864 | if (btrfs_header_nritems(mid) == 0) { |
| 1834 | clean_tree_block(trans, root, mid); | 1865 | clean_tree_block(trans, root, mid); |
| 1835 | btrfs_tree_unlock(mid); | 1866 | btrfs_tree_unlock(mid); |
| 1836 | del_ptr(trans, root, path, level + 1, pslot, 1); | 1867 | del_ptr(trans, root, path, level + 1, pslot); |
| 1837 | root_sub_used(root, mid->len); | 1868 | root_sub_used(root, mid->len); |
| 1838 | btrfs_free_tree_block(trans, root, mid, 0, 1); | 1869 | btrfs_free_tree_block(trans, root, mid, 0, 1); |
| 1839 | free_extent_buffer_stale(mid); | 1870 | free_extent_buffer_stale(mid); |
| @@ -1842,7 +1873,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans, | |||
| 1842 | /* update the parent key to reflect our changes */ | 1873 | /* update the parent key to reflect our changes */ |
| 1843 | struct btrfs_disk_key mid_key; | 1874 | struct btrfs_disk_key mid_key; |
| 1844 | btrfs_node_key(mid, &mid_key, 0); | 1875 | btrfs_node_key(mid, &mid_key, 0); |
| 1845 | tree_mod_log_set_node_key(root->fs_info, parent, &mid_key, | 1876 | tree_mod_log_set_node_key(root->fs_info, parent, |
| 1846 | pslot, 0); | 1877 | pslot, 0); |
| 1847 | btrfs_set_node_key(parent, &mid_key, pslot); | 1878 | btrfs_set_node_key(parent, &mid_key, pslot); |
| 1848 | btrfs_mark_buffer_dirty(parent); | 1879 | btrfs_mark_buffer_dirty(parent); |
| @@ -1942,7 +1973,7 @@ static noinline int push_nodes_for_insert(struct btrfs_trans_handle *trans, | |||
| 1942 | orig_slot += left_nr; | 1973 | orig_slot += left_nr; |
| 1943 | btrfs_node_key(mid, &disk_key, 0); | 1974 | btrfs_node_key(mid, &disk_key, 0); |
| 1944 | tree_mod_log_set_node_key(root->fs_info, parent, | 1975 | tree_mod_log_set_node_key(root->fs_info, parent, |
| 1945 | &disk_key, pslot, 0); | 1976 | pslot, 0); |
| 1946 | btrfs_set_node_key(parent, &disk_key, pslot); | 1977 | btrfs_set_node_key(parent, &disk_key, pslot); |
| 1947 | btrfs_mark_buffer_dirty(parent); | 1978 | btrfs_mark_buffer_dirty(parent); |
| 1948 | if (btrfs_header_nritems(left) > orig_slot) { | 1979 | if (btrfs_header_nritems(left) > orig_slot) { |
| @@ -1995,7 +2026,7 @@ static noinline int push_nodes_for_insert(struct btrfs_trans_handle *trans, | |||
| 1995 | 2026 | ||
| 1996 | btrfs_node_key(right, &disk_key, 0); | 2027 | btrfs_node_key(right, &disk_key, 0); |
| 1997 | tree_mod_log_set_node_key(root->fs_info, parent, | 2028 | tree_mod_log_set_node_key(root->fs_info, parent, |
| 1998 | &disk_key, pslot + 1, 0); | 2029 | pslot + 1, 0); |
| 1999 | btrfs_set_node_key(parent, &disk_key, pslot + 1); | 2030 | btrfs_set_node_key(parent, &disk_key, pslot + 1); |
| 2000 | btrfs_mark_buffer_dirty(parent); | 2031 | btrfs_mark_buffer_dirty(parent); |
| 2001 | 2032 | ||
| @@ -2181,6 +2212,9 @@ static noinline void unlock_up(struct btrfs_path *path, int level, | |||
| 2181 | int no_skips = 0; | 2212 | int no_skips = 0; |
| 2182 | struct extent_buffer *t; | 2213 | struct extent_buffer *t; |
| 2183 | 2214 | ||
| 2215 | if (path->really_keep_locks) | ||
| 2216 | return; | ||
| 2217 | |||
| 2184 | for (i = level; i < BTRFS_MAX_LEVEL; i++) { | 2218 | for (i = level; i < BTRFS_MAX_LEVEL; i++) { |
| 2185 | if (!path->nodes[i]) | 2219 | if (!path->nodes[i]) |
| 2186 | break; | 2220 | break; |
| @@ -2228,7 +2262,7 @@ noinline void btrfs_unlock_up_safe(struct btrfs_path *path, int level) | |||
| 2228 | { | 2262 | { |
| 2229 | int i; | 2263 | int i; |
| 2230 | 2264 | ||
| 2231 | if (path->keep_locks) | 2265 | if (path->keep_locks || path->really_keep_locks) |
| 2232 | return; | 2266 | return; |
| 2233 | 2267 | ||
| 2234 | for (i = level; i < BTRFS_MAX_LEVEL; i++) { | 2268 | for (i = level; i < BTRFS_MAX_LEVEL; i++) { |
| @@ -2461,7 +2495,7 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root | |||
| 2461 | if (!cow) | 2495 | if (!cow) |
| 2462 | write_lock_level = -1; | 2496 | write_lock_level = -1; |
| 2463 | 2497 | ||
| 2464 | if (cow && (p->keep_locks || p->lowest_level)) | 2498 | if (cow && (p->really_keep_locks || p->keep_locks || p->lowest_level)) |
| 2465 | write_lock_level = BTRFS_MAX_LEVEL; | 2499 | write_lock_level = BTRFS_MAX_LEVEL; |
| 2466 | 2500 | ||
| 2467 | min_write_lock_level = write_lock_level; | 2501 | min_write_lock_level = write_lock_level; |
| @@ -2530,7 +2564,10 @@ again: | |||
| 2530 | * must have write locks on this node and the | 2564 | * must have write locks on this node and the |
| 2531 | * parent | 2565 | * parent |
| 2532 | */ | 2566 | */ |
| 2533 | if (level + 1 > write_lock_level) { | 2567 | if (level > write_lock_level || |
| 2568 | (level + 1 > write_lock_level && | ||
| 2569 | level + 1 < BTRFS_MAX_LEVEL && | ||
| 2570 | p->nodes[level + 1])) { | ||
| 2534 | write_lock_level = level + 1; | 2571 | write_lock_level = level + 1; |
| 2535 | btrfs_release_path(p); | 2572 | btrfs_release_path(p); |
| 2536 | goto again; | 2573 | goto again; |
| @@ -2879,7 +2916,7 @@ static void fixup_low_keys(struct btrfs_trans_handle *trans, | |||
| 2879 | if (!path->nodes[i]) | 2916 | if (!path->nodes[i]) |
| 2880 | break; | 2917 | break; |
| 2881 | t = path->nodes[i]; | 2918 | t = path->nodes[i]; |
| 2882 | tree_mod_log_set_node_key(root->fs_info, t, key, tslot, 1); | 2919 | tree_mod_log_set_node_key(root->fs_info, t, tslot, 1); |
| 2883 | btrfs_set_node_key(t, key, tslot); | 2920 | btrfs_set_node_key(t, key, tslot); |
| 2884 | btrfs_mark_buffer_dirty(path->nodes[i]); | 2921 | btrfs_mark_buffer_dirty(path->nodes[i]); |
| 2885 | if (tslot != 0) | 2922 | if (tslot != 0) |
| @@ -2970,8 +3007,10 @@ static int push_node_left(struct btrfs_trans_handle *trans, | |||
| 2970 | push_items * sizeof(struct btrfs_key_ptr)); | 3007 | push_items * sizeof(struct btrfs_key_ptr)); |
| 2971 | 3008 | ||
| 2972 | if (push_items < src_nritems) { | 3009 | if (push_items < src_nritems) { |
| 2973 | tree_mod_log_eb_move(root->fs_info, src, 0, push_items, | 3010 | /* |
| 2974 | src_nritems - push_items); | 3011 | * don't call tree_mod_log_eb_move here, key removal was already |
| 3012 | * fully logged by tree_mod_log_eb_copy above. | ||
| 3013 | */ | ||
| 2975 | memmove_extent_buffer(src, btrfs_node_key_ptr_offset(0), | 3014 | memmove_extent_buffer(src, btrfs_node_key_ptr_offset(0), |
| 2976 | btrfs_node_key_ptr_offset(push_items), | 3015 | btrfs_node_key_ptr_offset(push_items), |
| 2977 | (src_nritems - push_items) * | 3016 | (src_nritems - push_items) * |
| @@ -3262,14 +3301,21 @@ static noinline int split_node(struct btrfs_trans_handle *trans, | |||
| 3262 | */ | 3301 | */ |
| 3263 | static int leaf_space_used(struct extent_buffer *l, int start, int nr) | 3302 | static int leaf_space_used(struct extent_buffer *l, int start, int nr) |
| 3264 | { | 3303 | { |
| 3304 | struct btrfs_item *start_item; | ||
| 3305 | struct btrfs_item *end_item; | ||
| 3306 | struct btrfs_map_token token; | ||
| 3265 | int data_len; | 3307 | int data_len; |
| 3266 | int nritems = btrfs_header_nritems(l); | 3308 | int nritems = btrfs_header_nritems(l); |
| 3267 | int end = min(nritems, start + nr) - 1; | 3309 | int end = min(nritems, start + nr) - 1; |
| 3268 | 3310 | ||
| 3269 | if (!nr) | 3311 | if (!nr) |
| 3270 | return 0; | 3312 | return 0; |
| 3271 | data_len = btrfs_item_end_nr(l, start); | 3313 | btrfs_init_map_token(&token); |
| 3272 | data_len = data_len - btrfs_item_offset_nr(l, end); | 3314 | start_item = btrfs_item_nr(l, start); |
| 3315 | end_item = btrfs_item_nr(l, end); | ||
| 3316 | data_len = btrfs_token_item_offset(l, start_item, &token) + | ||
| 3317 | btrfs_token_item_size(l, start_item, &token); | ||
| 3318 | data_len = data_len - btrfs_token_item_offset(l, end_item, &token); | ||
| 3273 | data_len += sizeof(struct btrfs_item) * nr; | 3319 | data_len += sizeof(struct btrfs_item) * nr; |
| 3274 | WARN_ON(data_len < 0); | 3320 | WARN_ON(data_len < 0); |
| 3275 | return data_len; | 3321 | return data_len; |
| @@ -3363,8 +3409,7 @@ static noinline int __push_leaf_right(struct btrfs_trans_handle *trans, | |||
| 3363 | if (push_items == 0) | 3409 | if (push_items == 0) |
| 3364 | goto out_unlock; | 3410 | goto out_unlock; |
| 3365 | 3411 | ||
| 3366 | if (!empty && push_items == left_nritems) | 3412 | WARN_ON(!empty && push_items == left_nritems); |
| 3367 | WARN_ON(1); | ||
| 3368 | 3413 | ||
| 3369 | /* push left to right */ | 3414 | /* push left to right */ |
| 3370 | right_nritems = btrfs_header_nritems(right); | 3415 | right_nritems = btrfs_header_nritems(right); |
| @@ -3602,11 +3647,9 @@ static noinline int __push_leaf_left(struct btrfs_trans_handle *trans, | |||
| 3602 | btrfs_set_header_nritems(left, old_left_nritems + push_items); | 3647 | btrfs_set_header_nritems(left, old_left_nritems + push_items); |
| 3603 | 3648 | ||
| 3604 | /* fixup right node */ | 3649 | /* fixup right node */ |
| 3605 | if (push_items > right_nritems) { | 3650 | if (push_items > right_nritems) |
| 3606 | printk(KERN_CRIT "push items %d nr %u\n", push_items, | 3651 | WARN(1, KERN_CRIT "push items %d nr %u\n", push_items, |
| 3607 | right_nritems); | 3652 | right_nritems); |
| 3608 | WARN_ON(1); | ||
| 3609 | } | ||
| 3610 | 3653 | ||
| 3611 | if (push_items < right_nritems) { | 3654 | if (push_items < right_nritems) { |
| 3612 | push_space = btrfs_item_offset_nr(right, push_items - 1) - | 3655 | push_space = btrfs_item_offset_nr(right, push_items - 1) - |
| @@ -4402,149 +4445,6 @@ void btrfs_extend_item(struct btrfs_trans_handle *trans, | |||
| 4402 | } | 4445 | } |
| 4403 | 4446 | ||
| 4404 | /* | 4447 | /* |
| 4405 | * Given a key and some data, insert items into the tree. | ||
| 4406 | * This does all the path init required, making room in the tree if needed. | ||
| 4407 | * Returns the number of keys that were inserted. | ||
| 4408 | */ | ||
| 4409 | int btrfs_insert_some_items(struct btrfs_trans_handle *trans, | ||
| 4410 | struct btrfs_root *root, | ||
| 4411 | struct btrfs_path *path, | ||
| 4412 | struct btrfs_key *cpu_key, u32 *data_size, | ||
| 4413 | int nr) | ||
| 4414 | { | ||
| 4415 | struct extent_buffer *leaf; | ||
| 4416 | struct btrfs_item *item; | ||
| 4417 | int ret = 0; | ||
| 4418 | int slot; | ||
| 4419 | int i; | ||
| 4420 | u32 nritems; | ||
| 4421 | u32 total_data = 0; | ||
| 4422 | u32 total_size = 0; | ||
| 4423 | unsigned int data_end; | ||
| 4424 | struct btrfs_disk_key disk_key; | ||
| 4425 | struct btrfs_key found_key; | ||
| 4426 | struct btrfs_map_token token; | ||
| 4427 | |||
| 4428 | btrfs_init_map_token(&token); | ||
| 4429 | |||
| 4430 | for (i = 0; i < nr; i++) { | ||
| 4431 | if (total_size + data_size[i] + sizeof(struct btrfs_item) > | ||
| 4432 | BTRFS_LEAF_DATA_SIZE(root)) { | ||
| 4433 | break; | ||
| 4434 | nr = i; | ||
| 4435 | } | ||
| 4436 | total_data += data_size[i]; | ||
| 4437 | total_size += data_size[i] + sizeof(struct btrfs_item); | ||
| 4438 | } | ||
| 4439 | BUG_ON(nr == 0); | ||
| 4440 | |||
| 4441 | ret = btrfs_search_slot(trans, root, cpu_key, path, total_size, 1); | ||
| 4442 | if (ret == 0) | ||
| 4443 | return -EEXIST; | ||
| 4444 | if (ret < 0) | ||
| 4445 | goto out; | ||
| 4446 | |||
| 4447 | leaf = path->nodes[0]; | ||
| 4448 | |||
| 4449 | nritems = btrfs_header_nritems(leaf); | ||
| 4450 | data_end = leaf_data_end(root, leaf); | ||
| 4451 | |||
| 4452 | if (btrfs_leaf_free_space(root, leaf) < total_size) { | ||
| 4453 | for (i = nr; i >= 0; i--) { | ||
| 4454 | total_data -= data_size[i]; | ||
| 4455 | total_size -= data_size[i] + sizeof(struct btrfs_item); | ||
| 4456 | if (total_size < btrfs_leaf_free_space(root, leaf)) | ||
| 4457 | break; | ||
| 4458 | } | ||
| 4459 | nr = i; | ||
| 4460 | } | ||
| 4461 | |||
| 4462 | slot = path->slots[0]; | ||
| 4463 | BUG_ON(slot < 0); | ||
| 4464 | |||
| 4465 | if (slot != nritems) { | ||
| 4466 | unsigned int old_data = btrfs_item_end_nr(leaf, slot); | ||
| 4467 | |||
| 4468 | item = btrfs_item_nr(leaf, slot); | ||
| 4469 | btrfs_item_key_to_cpu(leaf, &found_key, slot); | ||
| 4470 | |||
| 4471 | /* figure out how many keys we can insert in here */ | ||
| 4472 | total_data = data_size[0]; | ||
| 4473 | for (i = 1; i < nr; i++) { | ||
| 4474 | if (btrfs_comp_cpu_keys(&found_key, cpu_key + i) <= 0) | ||
| 4475 | break; | ||
| 4476 | total_data += data_size[i]; | ||
| 4477 | } | ||
| 4478 | nr = i; | ||
| 4479 | |||
| 4480 | if (old_data < data_end) { | ||
| 4481 | btrfs_print_leaf(root, leaf); | ||
| 4482 | printk(KERN_CRIT "slot %d old_data %d data_end %d\n", | ||
| 4483 | slot, old_data, data_end); | ||
| 4484 | BUG_ON(1); | ||
| 4485 | } | ||
| 4486 | /* | ||
| 4487 | * item0..itemN ... dataN.offset..dataN.size .. data0.size | ||
| 4488 | */ | ||
| 4489 | /* first correct the data pointers */ | ||
| 4490 | for (i = slot; i < nritems; i++) { | ||
| 4491 | u32 ioff; | ||
| 4492 | |||
| 4493 | item = btrfs_item_nr(leaf, i); | ||
| 4494 | ioff = btrfs_token_item_offset(leaf, item, &token); | ||
| 4495 | btrfs_set_token_item_offset(leaf, item, | ||
| 4496 | ioff - total_data, &token); | ||
| 4497 | } | ||
| 4498 | /* shift the items */ | ||
| 4499 | memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot + nr), | ||
| 4500 | btrfs_item_nr_offset(slot), | ||
| 4501 | (nritems - slot) * sizeof(struct btrfs_item)); | ||
| 4502 | |||
| 4503 | /* shift the data */ | ||
| 4504 | memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) + | ||
| 4505 | data_end - total_data, btrfs_leaf_data(leaf) + | ||
| 4506 | data_end, old_data - data_end); | ||
| 4507 | data_end = old_data; | ||
| 4508 | } else { | ||
| 4509 | /* | ||
| 4510 | * this sucks but it has to be done, if we are inserting at | ||
| 4511 | * the end of the leaf only insert 1 of the items, since we | ||
| 4512 | * have no way of knowing whats on the next leaf and we'd have | ||
| 4513 | * to drop our current locks to figure it out | ||
| 4514 | */ | ||
| 4515 | nr = 1; | ||
| 4516 | } | ||
| 4517 | |||
| 4518 | /* setup the item for the new data */ | ||
| 4519 | for (i = 0; i < nr; i++) { | ||
| 4520 | btrfs_cpu_key_to_disk(&disk_key, cpu_key + i); | ||
| 4521 | btrfs_set_item_key(leaf, &disk_key, slot + i); | ||
| 4522 | item = btrfs_item_nr(leaf, slot + i); | ||
| 4523 | btrfs_set_token_item_offset(leaf, item, | ||
| 4524 | data_end - data_size[i], &token); | ||
| 4525 | data_end -= data_size[i]; | ||
| 4526 | btrfs_set_token_item_size(leaf, item, data_size[i], &token); | ||
| 4527 | } | ||
| 4528 | btrfs_set_header_nritems(leaf, nritems + nr); | ||
| 4529 | btrfs_mark_buffer_dirty(leaf); | ||
| 4530 | |||
| 4531 | ret = 0; | ||
| 4532 | if (slot == 0) { | ||
| 4533 | btrfs_cpu_key_to_disk(&disk_key, cpu_key); | ||
| 4534 | fixup_low_keys(trans, root, path, &disk_key, 1); | ||
| 4535 | } | ||
| 4536 | |||
| 4537 | if (btrfs_leaf_free_space(root, leaf) < 0) { | ||
| 4538 | btrfs_print_leaf(root, leaf); | ||
| 4539 | BUG(); | ||
| 4540 | } | ||
| 4541 | out: | ||
| 4542 | if (!ret) | ||
| 4543 | ret = nr; | ||
| 4544 | return ret; | ||
| 4545 | } | ||
| 4546 | |||
| 4547 | /* | ||
| 4548 | * this is a helper for btrfs_insert_empty_items, the main goal here is | 4448 | * this is a helper for btrfs_insert_empty_items, the main goal here is |
| 4549 | * to save stack depth by doing the bulk of the work in a function | 4449 | * to save stack depth by doing the bulk of the work in a function |
| 4550 | * that doesn't call btrfs_search_slot | 4450 | * that doesn't call btrfs_search_slot |
| @@ -4705,16 +4605,21 @@ int btrfs_insert_item(struct btrfs_trans_handle *trans, struct btrfs_root | |||
| 4705 | * empty a node. | 4605 | * empty a node. |
| 4706 | */ | 4606 | */ |
| 4707 | static void del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root, | 4607 | static void del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root, |
| 4708 | struct btrfs_path *path, int level, int slot, | 4608 | struct btrfs_path *path, int level, int slot) |
| 4709 | int tree_mod_log) | ||
| 4710 | { | 4609 | { |
| 4711 | struct extent_buffer *parent = path->nodes[level]; | 4610 | struct extent_buffer *parent = path->nodes[level]; |
| 4712 | u32 nritems; | 4611 | u32 nritems; |
| 4713 | int ret; | 4612 | int ret; |
| 4714 | 4613 | ||
| 4614 | if (level) { | ||
| 4615 | ret = tree_mod_log_insert_key(root->fs_info, parent, slot, | ||
| 4616 | MOD_LOG_KEY_REMOVE); | ||
| 4617 | BUG_ON(ret < 0); | ||
| 4618 | } | ||
| 4619 | |||
| 4715 | nritems = btrfs_header_nritems(parent); | 4620 | nritems = btrfs_header_nritems(parent); |
| 4716 | if (slot != nritems - 1) { | 4621 | if (slot != nritems - 1) { |
| 4717 | if (tree_mod_log && level) | 4622 | if (level) |
| 4718 | tree_mod_log_eb_move(root->fs_info, parent, slot, | 4623 | tree_mod_log_eb_move(root->fs_info, parent, slot, |
| 4719 | slot + 1, nritems - slot - 1); | 4624 | slot + 1, nritems - slot - 1); |
| 4720 | memmove_extent_buffer(parent, | 4625 | memmove_extent_buffer(parent, |
| @@ -4722,10 +4627,6 @@ static void del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root, | |||
| 4722 | btrfs_node_key_ptr_offset(slot + 1), | 4627 | btrfs_node_key_ptr_offset(slot + 1), |
| 4723 | sizeof(struct btrfs_key_ptr) * | 4628 | sizeof(struct btrfs_key_ptr) * |
| 4724 | (nritems - slot - 1)); | 4629 | (nritems - slot - 1)); |
| 4725 | } else if (tree_mod_log && level) { | ||
| 4726 | ret = tree_mod_log_insert_key(root->fs_info, parent, slot, | ||
| 4727 | MOD_LOG_KEY_REMOVE); | ||
| 4728 | BUG_ON(ret < 0); | ||
| 4729 | } | 4630 | } |
| 4730 | 4631 | ||
| 4731 | nritems--; | 4632 | nritems--; |
| @@ -4759,7 +4660,7 @@ static noinline void btrfs_del_leaf(struct btrfs_trans_handle *trans, | |||
| 4759 | struct extent_buffer *leaf) | 4660 | struct extent_buffer *leaf) |
| 4760 | { | 4661 | { |
| 4761 | WARN_ON(btrfs_header_generation(leaf) != trans->transid); | 4662 | WARN_ON(btrfs_header_generation(leaf) != trans->transid); |
| 4762 | del_ptr(trans, root, path, 1, path->slots[1], 1); | 4663 | del_ptr(trans, root, path, 1, path->slots[1]); |
| 4763 | 4664 | ||
| 4764 | /* | 4665 | /* |
| 4765 | * btrfs_free_extent is expensive, we want to make sure we | 4666 | * btrfs_free_extent is expensive, we want to make sure we |
| @@ -5073,6 +4974,7 @@ static void tree_move_down(struct btrfs_root *root, | |||
| 5073 | struct btrfs_path *path, | 4974 | struct btrfs_path *path, |
| 5074 | int *level, int root_level) | 4975 | int *level, int root_level) |
| 5075 | { | 4976 | { |
| 4977 | BUG_ON(*level == 0); | ||
| 5076 | path->nodes[*level - 1] = read_node_slot(root, path->nodes[*level], | 4978 | path->nodes[*level - 1] = read_node_slot(root, path->nodes[*level], |
| 5077 | path->slots[*level]); | 4979 | path->slots[*level]); |
| 5078 | path->slots[*level - 1] = 0; | 4980 | path->slots[*level - 1] = 0; |
| @@ -5089,7 +4991,7 @@ static int tree_move_next_or_upnext(struct btrfs_root *root, | |||
| 5089 | 4991 | ||
| 5090 | path->slots[*level]++; | 4992 | path->slots[*level]++; |
| 5091 | 4993 | ||
| 5092 | while (path->slots[*level] == nritems) { | 4994 | while (path->slots[*level] >= nritems) { |
| 5093 | if (*level == root_level) | 4995 | if (*level == root_level) |
| 5094 | return -1; | 4996 | return -1; |
| 5095 | 4997 | ||
| @@ -5225,13 +5127,13 @@ int btrfs_compare_trees(struct btrfs_root *left_root, | |||
| 5225 | right_path->search_commit_root = 1; | 5127 | right_path->search_commit_root = 1; |
| 5226 | right_path->skip_locking = 1; | 5128 | right_path->skip_locking = 1; |
| 5227 | 5129 | ||
| 5228 | spin_lock(&left_root->root_times_lock); | 5130 | spin_lock(&left_root->root_item_lock); |
| 5229 | left_start_ctransid = btrfs_root_ctransid(&left_root->root_item); | 5131 | left_start_ctransid = btrfs_root_ctransid(&left_root->root_item); |
| 5230 | spin_unlock(&left_root->root_times_lock); | 5132 | spin_unlock(&left_root->root_item_lock); |
| 5231 | 5133 | ||
| 5232 | spin_lock(&right_root->root_times_lock); | 5134 | spin_lock(&right_root->root_item_lock); |
| 5233 | right_start_ctransid = btrfs_root_ctransid(&right_root->root_item); | 5135 | right_start_ctransid = btrfs_root_ctransid(&right_root->root_item); |
| 5234 | spin_unlock(&right_root->root_times_lock); | 5136 | spin_unlock(&right_root->root_item_lock); |
| 5235 | 5137 | ||
| 5236 | trans = btrfs_join_transaction(left_root); | 5138 | trans = btrfs_join_transaction(left_root); |
| 5237 | if (IS_ERR(trans)) { | 5139 | if (IS_ERR(trans)) { |
| @@ -5326,15 +5228,15 @@ int btrfs_compare_trees(struct btrfs_root *left_root, | |||
| 5326 | goto out; | 5228 | goto out; |
| 5327 | } | 5229 | } |
| 5328 | 5230 | ||
| 5329 | spin_lock(&left_root->root_times_lock); | 5231 | spin_lock(&left_root->root_item_lock); |
| 5330 | ctransid = btrfs_root_ctransid(&left_root->root_item); | 5232 | ctransid = btrfs_root_ctransid(&left_root->root_item); |
| 5331 | spin_unlock(&left_root->root_times_lock); | 5233 | spin_unlock(&left_root->root_item_lock); |
| 5332 | if (ctransid != left_start_ctransid) | 5234 | if (ctransid != left_start_ctransid) |
| 5333 | left_start_ctransid = 0; | 5235 | left_start_ctransid = 0; |
| 5334 | 5236 | ||
| 5335 | spin_lock(&right_root->root_times_lock); | 5237 | spin_lock(&right_root->root_item_lock); |
| 5336 | ctransid = btrfs_root_ctransid(&right_root->root_item); | 5238 | ctransid = btrfs_root_ctransid(&right_root->root_item); |
| 5337 | spin_unlock(&right_root->root_times_lock); | 5239 | spin_unlock(&right_root->root_item_lock); |
| 5338 | if (ctransid != right_start_ctransid) | 5240 | if (ctransid != right_start_ctransid) |
| 5339 | right_start_ctransid = 0; | 5241 | right_start_ctransid = 0; |
| 5340 | 5242 | ||
| @@ -5433,9 +5335,11 @@ int btrfs_compare_trees(struct btrfs_root *left_root, | |||
| 5433 | goto out; | 5335 | goto out; |
| 5434 | advance_right = ADVANCE; | 5336 | advance_right = ADVANCE; |
| 5435 | } else { | 5337 | } else { |
| 5338 | WARN_ON(!extent_buffer_uptodate(left_path->nodes[0])); | ||
| 5436 | ret = tree_compare_item(left_root, left_path, | 5339 | ret = tree_compare_item(left_root, left_path, |
| 5437 | right_path, tmp_buf); | 5340 | right_path, tmp_buf); |
| 5438 | if (ret) { | 5341 | if (ret) { |
| 5342 | WARN_ON(!extent_buffer_uptodate(left_path->nodes[0])); | ||
| 5439 | ret = changed_cb(left_root, right_root, | 5343 | ret = changed_cb(left_root, right_root, |
| 5440 | left_path, right_path, | 5344 | left_path, right_path, |
| 5441 | &left_key, | 5345 | &left_key, |
| @@ -5596,6 +5500,139 @@ int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path) | |||
| 5596 | return btrfs_next_old_leaf(root, path, 0); | 5500 | return btrfs_next_old_leaf(root, path, 0); |
| 5597 | } | 5501 | } |
| 5598 | 5502 | ||
| 5503 | /* Release the path up to but not including the given level */ | ||
| 5504 | static void btrfs_release_level(struct btrfs_path *path, int level) | ||
| 5505 | { | ||
| 5506 | int i; | ||
| 5507 | |||
| 5508 | for (i = 0; i < level; i++) { | ||
| 5509 | path->slots[i] = 0; | ||
| 5510 | if (!path->nodes[i]) | ||
| 5511 | continue; | ||
| 5512 | if (path->locks[i]) { | ||
| 5513 | btrfs_tree_unlock_rw(path->nodes[i], path->locks[i]); | ||
| 5514 | path->locks[i] = 0; | ||
| 5515 | } | ||
| 5516 | free_extent_buffer(path->nodes[i]); | ||
| 5517 | path->nodes[i] = NULL; | ||
| 5518 | } | ||
| 5519 | } | ||
| 5520 | |||
| 5521 | /* | ||
| 5522 | * This function assumes 2 things | ||
| 5523 | * | ||
| 5524 | * 1) You are using path->keep_locks | ||
| 5525 | * 2) You are not inserting items. | ||
| 5526 | * | ||
| 5527 | * If either of these are not true do not use this function. If you need a next | ||
| 5528 | * leaf with either of these not being true then this function can be easily | ||
| 5529 | * adapted to do that, but at the moment these are the limitations. | ||
| 5530 | */ | ||
| 5531 | int btrfs_next_leaf_write(struct btrfs_trans_handle *trans, | ||
| 5532 | struct btrfs_root *root, struct btrfs_path *path, | ||
| 5533 | int del) | ||
| 5534 | { | ||
| 5535 | struct extent_buffer *b; | ||
| 5536 | struct btrfs_key key; | ||
| 5537 | u32 nritems; | ||
| 5538 | int level = 1; | ||
| 5539 | int slot; | ||
| 5540 | int ret = 1; | ||
| 5541 | int write_lock_level = BTRFS_MAX_LEVEL; | ||
| 5542 | int ins_len = del ? -1 : 0; | ||
| 5543 | |||
| 5544 | WARN_ON(!(path->keep_locks || path->really_keep_locks)); | ||
| 5545 | |||
| 5546 | nritems = btrfs_header_nritems(path->nodes[0]); | ||
| 5547 | btrfs_item_key_to_cpu(path->nodes[0], &key, nritems - 1); | ||
| 5548 | |||
| 5549 | while (path->nodes[level]) { | ||
| 5550 | nritems = btrfs_header_nritems(path->nodes[level]); | ||
| 5551 | if (!(path->locks[level] & BTRFS_WRITE_LOCK)) { | ||
| 5552 | search: | ||
| 5553 | btrfs_release_path(path); | ||
| 5554 | ret = btrfs_search_slot(trans, root, &key, path, | ||
| 5555 | ins_len, 1); | ||
| 5556 | if (ret < 0) | ||
| 5557 | goto out; | ||
| 5558 | level = 1; | ||
| 5559 | continue; | ||
| 5560 | } | ||
| 5561 | |||
| 5562 | if (path->slots[level] >= nritems - 1) { | ||
| 5563 | level++; | ||
| 5564 | continue; | ||
| 5565 | } | ||
| 5566 | |||
| 5567 | btrfs_release_level(path, level); | ||
| 5568 | break; | ||
| 5569 | } | ||
| 5570 | |||
| 5571 | if (!path->nodes[level]) { | ||
| 5572 | ret = 1; | ||
| 5573 | goto out; | ||
| 5574 | } | ||
| 5575 | |||
| 5576 | path->slots[level]++; | ||
| 5577 | b = path->nodes[level]; | ||
| 5578 | |||
| 5579 | while (b) { | ||
| 5580 | level = btrfs_header_level(b); | ||
| 5581 | |||
| 5582 | if (!should_cow_block(trans, root, b)) | ||
| 5583 | goto cow_done; | ||
| 5584 | |||
| 5585 | btrfs_set_path_blocking(path); | ||
| 5586 | ret = btrfs_cow_block(trans, root, b, | ||
| 5587 | path->nodes[level + 1], | ||
| 5588 | path->slots[level + 1], &b); | ||
| 5589 | if (ret) | ||
| 5590 | goto out; | ||
| 5591 | cow_done: | ||
| 5592 | path->nodes[level] = b; | ||
| 5593 | btrfs_clear_path_blocking(path, NULL, 0); | ||
| 5594 | if (level != 0) { | ||
| 5595 | ret = setup_nodes_for_search(trans, root, path, b, | ||
| 5596 | level, ins_len, | ||
| 5597 | &write_lock_level); | ||
| 5598 | if (ret == -EAGAIN) | ||
| 5599 | goto search; | ||
| 5600 | if (ret) | ||
| 5601 | goto out; | ||
| 5602 | |||
| 5603 | b = path->nodes[level]; | ||
| 5604 | slot = path->slots[level]; | ||
| 5605 | |||
| 5606 | ret = read_block_for_search(trans, root, path, | ||
| 5607 | &b, level, slot, &key, 0); | ||
| 5608 | if (ret == -EAGAIN) | ||
| 5609 | goto search; | ||
| 5610 | if (ret) | ||
| 5611 | goto out; | ||
| 5612 | level = btrfs_header_level(b); | ||
| 5613 | if (!btrfs_try_tree_write_lock(b)) { | ||
| 5614 | btrfs_set_path_blocking(path); | ||
| 5615 | btrfs_tree_lock(b); | ||
| 5616 | btrfs_clear_path_blocking(path, b, | ||
| 5617 | BTRFS_WRITE_LOCK); | ||
| 5618 | } | ||
| 5619 | path->locks[level] = BTRFS_WRITE_LOCK; | ||
| 5620 | path->nodes[level] = b; | ||
| 5621 | path->slots[level] = 0; | ||
| 5622 | } else { | ||
| 5623 | path->slots[level] = 0; | ||
| 5624 | ret = 0; | ||
| 5625 | break; | ||
| 5626 | } | ||
| 5627 | } | ||
| 5628 | |||
| 5629 | out: | ||
| 5630 | if (ret) | ||
| 5631 | btrfs_release_path(path); | ||
| 5632 | |||
| 5633 | return ret; | ||
| 5634 | } | ||
| 5635 | |||
| 5599 | int btrfs_next_old_leaf(struct btrfs_root *root, struct btrfs_path *path, | 5636 | int btrfs_next_old_leaf(struct btrfs_root *root, struct btrfs_path *path, |
| 5600 | u64 time_seq) | 5637 | u64 time_seq) |
| 5601 | { | 5638 | { |
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 9821b672f5a2..547b7b05727f 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h | |||
| @@ -48,7 +48,7 @@ struct btrfs_ordered_sum; | |||
| 48 | 48 | ||
| 49 | #define BTRFS_MAGIC "_BHRfS_M" | 49 | #define BTRFS_MAGIC "_BHRfS_M" |
| 50 | 50 | ||
| 51 | #define BTRFS_MAX_MIRRORS 2 | 51 | #define BTRFS_MAX_MIRRORS 3 |
| 52 | 52 | ||
| 53 | #define BTRFS_MAX_LEVEL 8 | 53 | #define BTRFS_MAX_LEVEL 8 |
| 54 | 54 | ||
| @@ -142,6 +142,8 @@ struct btrfs_ordered_sum; | |||
| 142 | 142 | ||
| 143 | #define BTRFS_EMPTY_SUBVOL_DIR_OBJECTID 2 | 143 | #define BTRFS_EMPTY_SUBVOL_DIR_OBJECTID 2 |
| 144 | 144 | ||
| 145 | #define BTRFS_DEV_REPLACE_DEVID 0 | ||
| 146 | |||
| 145 | /* | 147 | /* |
| 146 | * the max metadata block size. This limit is somewhat artificial, | 148 | * the max metadata block size. This limit is somewhat artificial, |
| 147 | * but the memmove costs go through the roof for larger blocks. | 149 | * but the memmove costs go through the roof for larger blocks. |
| @@ -154,6 +156,13 @@ struct btrfs_ordered_sum; | |||
| 154 | */ | 156 | */ |
| 155 | #define BTRFS_NAME_LEN 255 | 157 | #define BTRFS_NAME_LEN 255 |
| 156 | 158 | ||
| 159 | /* | ||
| 160 | * Theoretical limit is larger, but we keep this down to a sane | ||
| 161 | * value. That should limit greatly the possibility of collisions on | ||
| 162 | * inode ref items. | ||
| 163 | */ | ||
| 164 | #define BTRFS_LINK_MAX 65535U | ||
| 165 | |||
| 157 | /* 32 bytes in various csum fields */ | 166 | /* 32 bytes in various csum fields */ |
| 158 | #define BTRFS_CSUM_SIZE 32 | 167 | #define BTRFS_CSUM_SIZE 32 |
| 159 | 168 | ||
| @@ -165,6 +174,9 @@ static int btrfs_csum_sizes[] = { 4, 0 }; | |||
| 165 | /* four bytes for CRC32 */ | 174 | /* four bytes for CRC32 */ |
| 166 | #define BTRFS_EMPTY_DIR_SIZE 0 | 175 | #define BTRFS_EMPTY_DIR_SIZE 0 |
| 167 | 176 | ||
| 177 | /* spefic to btrfs_map_block(), therefore not in include/linux/blk_types.h */ | ||
| 178 | #define REQ_GET_READ_MIRRORS (1 << 30) | ||
| 179 | |||
| 168 | #define BTRFS_FT_UNKNOWN 0 | 180 | #define BTRFS_FT_UNKNOWN 0 |
| 169 | #define BTRFS_FT_REG_FILE 1 | 181 | #define BTRFS_FT_REG_FILE 1 |
| 170 | #define BTRFS_FT_DIR 2 | 182 | #define BTRFS_FT_DIR 2 |
| @@ -406,7 +418,7 @@ struct btrfs_root_backup { | |||
| 406 | __le64 bytes_used; | 418 | __le64 bytes_used; |
| 407 | __le64 num_devices; | 419 | __le64 num_devices; |
| 408 | /* future */ | 420 | /* future */ |
| 409 | __le64 unsed_64[4]; | 421 | __le64 unused_64[4]; |
| 410 | 422 | ||
| 411 | u8 tree_root_level; | 423 | u8 tree_root_level; |
| 412 | u8 chunk_root_level; | 424 | u8 chunk_root_level; |
| @@ -489,6 +501,8 @@ struct btrfs_super_block { | |||
| 489 | */ | 501 | */ |
| 490 | #define BTRFS_FEATURE_INCOMPAT_BIG_METADATA (1ULL << 5) | 502 | #define BTRFS_FEATURE_INCOMPAT_BIG_METADATA (1ULL << 5) |
| 491 | 503 | ||
| 504 | #define BTRFS_FEATURE_INCOMPAT_EXTENDED_IREF (1ULL << 6) | ||
| 505 | |||
| 492 | #define BTRFS_FEATURE_COMPAT_SUPP 0ULL | 506 | #define BTRFS_FEATURE_COMPAT_SUPP 0ULL |
| 493 | #define BTRFS_FEATURE_COMPAT_RO_SUPP 0ULL | 507 | #define BTRFS_FEATURE_COMPAT_RO_SUPP 0ULL |
| 494 | #define BTRFS_FEATURE_INCOMPAT_SUPP \ | 508 | #define BTRFS_FEATURE_INCOMPAT_SUPP \ |
| @@ -496,7 +510,8 @@ struct btrfs_super_block { | |||
| 496 | BTRFS_FEATURE_INCOMPAT_DEFAULT_SUBVOL | \ | 510 | BTRFS_FEATURE_INCOMPAT_DEFAULT_SUBVOL | \ |
| 497 | BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS | \ | 511 | BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS | \ |
| 498 | BTRFS_FEATURE_INCOMPAT_BIG_METADATA | \ | 512 | BTRFS_FEATURE_INCOMPAT_BIG_METADATA | \ |
| 499 | BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO) | 513 | BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO | \ |
| 514 | BTRFS_FEATURE_INCOMPAT_EXTENDED_IREF) | ||
| 500 | 515 | ||
| 501 | /* | 516 | /* |
| 502 | * A leaf is full of items. offset and size tell us where to find | 517 | * A leaf is full of items. offset and size tell us where to find |
| @@ -561,6 +576,7 @@ struct btrfs_path { | |||
| 561 | unsigned int skip_locking:1; | 576 | unsigned int skip_locking:1; |
| 562 | unsigned int leave_spinning:1; | 577 | unsigned int leave_spinning:1; |
| 563 | unsigned int search_commit_root:1; | 578 | unsigned int search_commit_root:1; |
| 579 | unsigned int really_keep_locks:1; | ||
| 564 | }; | 580 | }; |
| 565 | 581 | ||
| 566 | /* | 582 | /* |
| @@ -643,6 +659,14 @@ struct btrfs_inode_ref { | |||
| 643 | /* name goes here */ | 659 | /* name goes here */ |
| 644 | } __attribute__ ((__packed__)); | 660 | } __attribute__ ((__packed__)); |
| 645 | 661 | ||
| 662 | struct btrfs_inode_extref { | ||
| 663 | __le64 parent_objectid; | ||
| 664 | __le64 index; | ||
| 665 | __le16 name_len; | ||
| 666 | __u8 name[0]; | ||
| 667 | /* name goes here */ | ||
| 668 | } __attribute__ ((__packed__)); | ||
| 669 | |||
| 646 | struct btrfs_timespec { | 670 | struct btrfs_timespec { |
| 647 | __le64 sec; | 671 | __le64 sec; |
| 648 | __le32 nsec; | 672 | __le32 nsec; |
| @@ -867,6 +891,59 @@ struct btrfs_dev_stats_item { | |||
| 867 | __le64 values[BTRFS_DEV_STAT_VALUES_MAX]; | 891 | __le64 values[BTRFS_DEV_STAT_VALUES_MAX]; |
| 868 | } __attribute__ ((__packed__)); | 892 | } __attribute__ ((__packed__)); |
| 869 | 893 | ||
| 894 | #define BTRFS_DEV_REPLACE_ITEM_CONT_READING_FROM_SRCDEV_MODE_ALWAYS 0 | ||
| 895 | #define BTRFS_DEV_REPLACE_ITEM_CONT_READING_FROM_SRCDEV_MODE_AVOID 1 | ||
| 896 | #define BTRFS_DEV_REPLACE_ITEM_STATE_NEVER_STARTED 0 | ||
| 897 | #define BTRFS_DEV_REPLACE_ITEM_STATE_STARTED 1 | ||
| 898 | #define BTRFS_DEV_REPLACE_ITEM_STATE_SUSPENDED 2 | ||
| 899 | #define BTRFS_DEV_REPLACE_ITEM_STATE_FINISHED 3 | ||
| 900 | #define BTRFS_DEV_REPLACE_ITEM_STATE_CANCELED 4 | ||
| 901 | |||
| 902 | struct btrfs_dev_replace { | ||
| 903 | u64 replace_state; /* see #define above */ | ||
| 904 | u64 time_started; /* seconds since 1-Jan-1970 */ | ||
| 905 | u64 time_stopped; /* seconds since 1-Jan-1970 */ | ||
| 906 | atomic64_t num_write_errors; | ||
| 907 | atomic64_t num_uncorrectable_read_errors; | ||
| 908 | |||
| 909 | u64 cursor_left; | ||
| 910 | u64 committed_cursor_left; | ||
| 911 | u64 cursor_left_last_write_of_item; | ||
| 912 | u64 cursor_right; | ||
| 913 | |||
| 914 | u64 cont_reading_from_srcdev_mode; /* see #define above */ | ||
| 915 | |||
| 916 | int is_valid; | ||
| 917 | int item_needs_writeback; | ||
| 918 | struct btrfs_device *srcdev; | ||
| 919 | struct btrfs_device *tgtdev; | ||
| 920 | |||
| 921 | pid_t lock_owner; | ||
| 922 | atomic_t nesting_level; | ||
| 923 | struct mutex lock_finishing_cancel_unmount; | ||
| 924 | struct mutex lock_management_lock; | ||
| 925 | struct mutex lock; | ||
| 926 | |||
| 927 | struct btrfs_scrub_progress scrub_progress; | ||
| 928 | }; | ||
| 929 | |||
| 930 | struct btrfs_dev_replace_item { | ||
| 931 | /* | ||
| 932 | * grow this item struct at the end for future enhancements and keep | ||
| 933 | * the existing values unchanged | ||
| 934 | */ | ||
| 935 | __le64 src_devid; | ||
| 936 | __le64 cursor_left; | ||
| 937 | __le64 cursor_right; | ||
| 938 | __le64 cont_reading_from_srcdev_mode; | ||
| 939 | |||
| 940 | __le64 replace_state; | ||
| 941 | __le64 time_started; | ||
| 942 | __le64 time_stopped; | ||
| 943 | __le64 num_write_errors; | ||
| 944 | __le64 num_uncorrectable_read_errors; | ||
| 945 | } __attribute__ ((__packed__)); | ||
| 946 | |||
| 870 | /* different types of block groups (and chunks) */ | 947 | /* different types of block groups (and chunks) */ |
| 871 | #define BTRFS_BLOCK_GROUP_DATA (1ULL << 0) | 948 | #define BTRFS_BLOCK_GROUP_DATA (1ULL << 0) |
| 872 | #define BTRFS_BLOCK_GROUP_SYSTEM (1ULL << 1) | 949 | #define BTRFS_BLOCK_GROUP_SYSTEM (1ULL << 1) |
| @@ -1028,12 +1105,22 @@ struct btrfs_space_info { | |||
| 1028 | wait_queue_head_t wait; | 1105 | wait_queue_head_t wait; |
| 1029 | }; | 1106 | }; |
| 1030 | 1107 | ||
| 1108 | #define BTRFS_BLOCK_RSV_GLOBAL 1 | ||
| 1109 | #define BTRFS_BLOCK_RSV_DELALLOC 2 | ||
| 1110 | #define BTRFS_BLOCK_RSV_TRANS 3 | ||
| 1111 | #define BTRFS_BLOCK_RSV_CHUNK 4 | ||
| 1112 | #define BTRFS_BLOCK_RSV_DELOPS 5 | ||
| 1113 | #define BTRFS_BLOCK_RSV_EMPTY 6 | ||
| 1114 | #define BTRFS_BLOCK_RSV_TEMP 7 | ||
| 1115 | |||
| 1031 | struct btrfs_block_rsv { | 1116 | struct btrfs_block_rsv { |
| 1032 | u64 size; | 1117 | u64 size; |
| 1033 | u64 reserved; | 1118 | u64 reserved; |
| 1034 | struct btrfs_space_info *space_info; | 1119 | struct btrfs_space_info *space_info; |
| 1035 | spinlock_t lock; | 1120 | spinlock_t lock; |
| 1036 | unsigned int full; | 1121 | unsigned short full; |
| 1122 | unsigned short type; | ||
| 1123 | unsigned short failfast; | ||
| 1037 | }; | 1124 | }; |
| 1038 | 1125 | ||
| 1039 | /* | 1126 | /* |
| @@ -1127,6 +1214,9 @@ struct btrfs_block_group_cache { | |||
| 1127 | * Today it will only have one thing on it, but that may change | 1214 | * Today it will only have one thing on it, but that may change |
| 1128 | */ | 1215 | */ |
| 1129 | struct list_head cluster_list; | 1216 | struct list_head cluster_list; |
| 1217 | |||
| 1218 | /* For delayed block group creation */ | ||
| 1219 | struct list_head new_bg_list; | ||
| 1130 | }; | 1220 | }; |
| 1131 | 1221 | ||
| 1132 | /* delayed seq elem */ | 1222 | /* delayed seq elem */ |
| @@ -1240,7 +1330,6 @@ struct btrfs_fs_info { | |||
| 1240 | struct mutex reloc_mutex; | 1330 | struct mutex reloc_mutex; |
| 1241 | 1331 | ||
| 1242 | struct list_head trans_list; | 1332 | struct list_head trans_list; |
| 1243 | struct list_head hashers; | ||
| 1244 | struct list_head dead_roots; | 1333 | struct list_head dead_roots; |
| 1245 | struct list_head caching_block_groups; | 1334 | struct list_head caching_block_groups; |
| 1246 | 1335 | ||
| @@ -1303,6 +1392,7 @@ struct btrfs_fs_info { | |||
| 1303 | struct btrfs_workers generic_worker; | 1392 | struct btrfs_workers generic_worker; |
| 1304 | struct btrfs_workers workers; | 1393 | struct btrfs_workers workers; |
| 1305 | struct btrfs_workers delalloc_workers; | 1394 | struct btrfs_workers delalloc_workers; |
| 1395 | struct btrfs_workers flush_workers; | ||
| 1306 | struct btrfs_workers endio_workers; | 1396 | struct btrfs_workers endio_workers; |
| 1307 | struct btrfs_workers endio_meta_workers; | 1397 | struct btrfs_workers endio_meta_workers; |
| 1308 | struct btrfs_workers endio_meta_write_workers; | 1398 | struct btrfs_workers endio_meta_write_workers; |
| @@ -1366,9 +1456,6 @@ struct btrfs_fs_info { | |||
| 1366 | struct rb_root defrag_inodes; | 1456 | struct rb_root defrag_inodes; |
| 1367 | atomic_t defrag_running; | 1457 | atomic_t defrag_running; |
| 1368 | 1458 | ||
| 1369 | spinlock_t ref_cache_lock; | ||
| 1370 | u64 total_ref_cache_size; | ||
| 1371 | |||
| 1372 | /* | 1459 | /* |
| 1373 | * these three are in extended format (availability of single | 1460 | * these three are in extended format (availability of single |
| 1374 | * chunks is denoted by BTRFS_AVAIL_ALLOC_BIT_SINGLE bit, other | 1461 | * chunks is denoted by BTRFS_AVAIL_ALLOC_BIT_SINGLE bit, other |
| @@ -1402,6 +1489,8 @@ struct btrfs_fs_info { | |||
| 1402 | struct rw_semaphore scrub_super_lock; | 1489 | struct rw_semaphore scrub_super_lock; |
| 1403 | int scrub_workers_refcnt; | 1490 | int scrub_workers_refcnt; |
| 1404 | struct btrfs_workers scrub_workers; | 1491 | struct btrfs_workers scrub_workers; |
| 1492 | struct btrfs_workers scrub_wr_completion_workers; | ||
| 1493 | struct btrfs_workers scrub_nocow_workers; | ||
| 1405 | 1494 | ||
| 1406 | #ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY | 1495 | #ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY |
| 1407 | u32 check_integrity_print_mask; | 1496 | u32 check_integrity_print_mask; |
| @@ -1441,6 +1530,13 @@ struct btrfs_fs_info { | |||
| 1441 | 1530 | ||
| 1442 | /* next backup root to be overwritten */ | 1531 | /* next backup root to be overwritten */ |
| 1443 | int backup_root_index; | 1532 | int backup_root_index; |
| 1533 | |||
| 1534 | int num_tolerated_disk_barrier_failures; | ||
| 1535 | |||
| 1536 | /* device replace state */ | ||
| 1537 | struct btrfs_dev_replace dev_replace; | ||
| 1538 | |||
| 1539 | atomic_t mutually_exclusive_operation_running; | ||
| 1444 | }; | 1540 | }; |
| 1445 | 1541 | ||
| 1446 | /* | 1542 | /* |
| @@ -1481,9 +1577,9 @@ struct btrfs_root { | |||
| 1481 | wait_queue_head_t log_commit_wait[2]; | 1577 | wait_queue_head_t log_commit_wait[2]; |
| 1482 | atomic_t log_writers; | 1578 | atomic_t log_writers; |
| 1483 | atomic_t log_commit[2]; | 1579 | atomic_t log_commit[2]; |
| 1580 | atomic_t log_batch; | ||
| 1484 | unsigned long log_transid; | 1581 | unsigned long log_transid; |
| 1485 | unsigned long last_log_commit; | 1582 | unsigned long last_log_commit; |
| 1486 | unsigned long log_batch; | ||
| 1487 | pid_t log_start_pid; | 1583 | pid_t log_start_pid; |
| 1488 | bool log_multiple_pids; | 1584 | bool log_multiple_pids; |
| 1489 | 1585 | ||
| @@ -1550,7 +1646,7 @@ struct btrfs_root { | |||
| 1550 | 1646 | ||
| 1551 | int force_cow; | 1647 | int force_cow; |
| 1552 | 1648 | ||
| 1553 | spinlock_t root_times_lock; | 1649 | spinlock_t root_item_lock; |
| 1554 | }; | 1650 | }; |
| 1555 | 1651 | ||
| 1556 | struct btrfs_ioctl_defrag_range_args { | 1652 | struct btrfs_ioctl_defrag_range_args { |
| @@ -1592,6 +1688,7 @@ struct btrfs_ioctl_defrag_range_args { | |||
| 1592 | */ | 1688 | */ |
| 1593 | #define BTRFS_INODE_ITEM_KEY 1 | 1689 | #define BTRFS_INODE_ITEM_KEY 1 |
| 1594 | #define BTRFS_INODE_REF_KEY 12 | 1690 | #define BTRFS_INODE_REF_KEY 12 |
| 1691 | #define BTRFS_INODE_EXTREF_KEY 13 | ||
| 1595 | #define BTRFS_XATTR_ITEM_KEY 24 | 1692 | #define BTRFS_XATTR_ITEM_KEY 24 |
| 1596 | #define BTRFS_ORPHAN_ITEM_KEY 48 | 1693 | #define BTRFS_ORPHAN_ITEM_KEY 48 |
| 1597 | /* reserve 2-15 close to the inode for later flexibility */ | 1694 | /* reserve 2-15 close to the inode for later flexibility */ |
| @@ -1693,6 +1790,12 @@ struct btrfs_ioctl_defrag_range_args { | |||
| 1693 | #define BTRFS_DEV_STATS_KEY 249 | 1790 | #define BTRFS_DEV_STATS_KEY 249 |
| 1694 | 1791 | ||
| 1695 | /* | 1792 | /* |
| 1793 | * Persistantly stores the device replace state in the device tree. | ||
| 1794 | * The key is built like this: (0, BTRFS_DEV_REPLACE_KEY, 0). | ||
| 1795 | */ | ||
| 1796 | #define BTRFS_DEV_REPLACE_KEY 250 | ||
| 1797 | |||
| 1798 | /* | ||
| 1696 | * string items are for debugging. They just store a short string of | 1799 | * string items are for debugging. They just store a short string of |
| 1697 | * data in the FS | 1800 | * data in the FS |
| 1698 | */ | 1801 | */ |
| @@ -1757,7 +1860,7 @@ struct btrfs_map_token { | |||
| 1757 | 1860 | ||
| 1758 | static inline void btrfs_init_map_token (struct btrfs_map_token *token) | 1861 | static inline void btrfs_init_map_token (struct btrfs_map_token *token) |
| 1759 | { | 1862 | { |
| 1760 | memset(token, 0, sizeof(*token)); | 1863 | token->kaddr = NULL; |
| 1761 | } | 1864 | } |
| 1762 | 1865 | ||
| 1763 | /* some macros to generate set/get funcs for the struct fields. This | 1866 | /* some macros to generate set/get funcs for the struct fields. This |
| @@ -1978,6 +2081,13 @@ BTRFS_SETGET_STACK_FUNCS(block_group_flags, | |||
| 1978 | BTRFS_SETGET_FUNCS(inode_ref_name_len, struct btrfs_inode_ref, name_len, 16); | 2081 | BTRFS_SETGET_FUNCS(inode_ref_name_len, struct btrfs_inode_ref, name_len, 16); |
| 1979 | BTRFS_SETGET_FUNCS(inode_ref_index, struct btrfs_inode_ref, index, 64); | 2082 | BTRFS_SETGET_FUNCS(inode_ref_index, struct btrfs_inode_ref, index, 64); |
| 1980 | 2083 | ||
| 2084 | /* struct btrfs_inode_extref */ | ||
| 2085 | BTRFS_SETGET_FUNCS(inode_extref_parent, struct btrfs_inode_extref, | ||
| 2086 | parent_objectid, 64); | ||
| 2087 | BTRFS_SETGET_FUNCS(inode_extref_name_len, struct btrfs_inode_extref, | ||
| 2088 | name_len, 16); | ||
| 2089 | BTRFS_SETGET_FUNCS(inode_extref_index, struct btrfs_inode_extref, index, 64); | ||
| 2090 | |||
| 1981 | /* struct btrfs_inode_item */ | 2091 | /* struct btrfs_inode_item */ |
| 1982 | BTRFS_SETGET_FUNCS(inode_generation, struct btrfs_inode_item, generation, 64); | 2092 | BTRFS_SETGET_FUNCS(inode_generation, struct btrfs_inode_item, generation, 64); |
| 1983 | BTRFS_SETGET_FUNCS(inode_sequence, struct btrfs_inode_item, sequence, 64); | 2093 | BTRFS_SETGET_FUNCS(inode_sequence, struct btrfs_inode_item, sequence, 64); |
| @@ -2718,6 +2828,49 @@ BTRFS_SETGET_FUNCS(qgroup_limit_rsv_rfer, struct btrfs_qgroup_limit_item, | |||
| 2718 | BTRFS_SETGET_FUNCS(qgroup_limit_rsv_excl, struct btrfs_qgroup_limit_item, | 2828 | BTRFS_SETGET_FUNCS(qgroup_limit_rsv_excl, struct btrfs_qgroup_limit_item, |
| 2719 | rsv_excl, 64); | 2829 | rsv_excl, 64); |
| 2720 | 2830 | ||
| 2831 | /* btrfs_dev_replace_item */ | ||
| 2832 | BTRFS_SETGET_FUNCS(dev_replace_src_devid, | ||
| 2833 | struct btrfs_dev_replace_item, src_devid, 64); | ||
| 2834 | BTRFS_SETGET_FUNCS(dev_replace_cont_reading_from_srcdev_mode, | ||
| 2835 | struct btrfs_dev_replace_item, cont_reading_from_srcdev_mode, | ||
| 2836 | 64); | ||
| 2837 | BTRFS_SETGET_FUNCS(dev_replace_replace_state, struct btrfs_dev_replace_item, | ||
| 2838 | replace_state, 64); | ||
| 2839 | BTRFS_SETGET_FUNCS(dev_replace_time_started, struct btrfs_dev_replace_item, | ||
| 2840 | time_started, 64); | ||
| 2841 | BTRFS_SETGET_FUNCS(dev_replace_time_stopped, struct btrfs_dev_replace_item, | ||
| 2842 | time_stopped, 64); | ||
| 2843 | BTRFS_SETGET_FUNCS(dev_replace_num_write_errors, struct btrfs_dev_replace_item, | ||
| 2844 | num_write_errors, 64); | ||
| 2845 | BTRFS_SETGET_FUNCS(dev_replace_num_uncorrectable_read_errors, | ||
| 2846 | struct btrfs_dev_replace_item, num_uncorrectable_read_errors, | ||
| 2847 | 64); | ||
| 2848 | BTRFS_SETGET_FUNCS(dev_replace_cursor_left, struct btrfs_dev_replace_item, | ||
| 2849 | cursor_left, 64); | ||
| 2850 | BTRFS_SETGET_FUNCS(dev_replace_cursor_right, struct btrfs_dev_replace_item, | ||
| 2851 | cursor_right, 64); | ||
| 2852 | |||
| 2853 | BTRFS_SETGET_STACK_FUNCS(stack_dev_replace_src_devid, | ||
| 2854 | struct btrfs_dev_replace_item, src_devid, 64); | ||
| 2855 | BTRFS_SETGET_STACK_FUNCS(stack_dev_replace_cont_reading_from_srcdev_mode, | ||
| 2856 | struct btrfs_dev_replace_item, | ||
| 2857 | cont_reading_from_srcdev_mode, 64); | ||
| 2858 | BTRFS_SETGET_STACK_FUNCS(stack_dev_replace_replace_state, | ||
| 2859 | struct btrfs_dev_replace_item, replace_state, 64); | ||
| 2860 | BTRFS_SETGET_STACK_FUNCS(stack_dev_replace_time_started, | ||
| 2861 | struct btrfs_dev_replace_item, time_started, 64); | ||
| 2862 | BTRFS_SETGET_STACK_FUNCS(stack_dev_replace_time_stopped, | ||
| 2863 | struct btrfs_dev_replace_item, time_stopped, 64); | ||
| 2864 | BTRFS_SETGET_STACK_FUNCS(stack_dev_replace_num_write_errors, | ||
| 2865 | struct btrfs_dev_replace_item, num_write_errors, 64); | ||
| 2866 | BTRFS_SETGET_STACK_FUNCS(stack_dev_replace_num_uncorrectable_read_errors, | ||
| 2867 | struct btrfs_dev_replace_item, | ||
| 2868 | num_uncorrectable_read_errors, 64); | ||
| 2869 | BTRFS_SETGET_STACK_FUNCS(stack_dev_replace_cursor_left, | ||
| 2870 | struct btrfs_dev_replace_item, cursor_left, 64); | ||
| 2871 | BTRFS_SETGET_STACK_FUNCS(stack_dev_replace_cursor_right, | ||
| 2872 | struct btrfs_dev_replace_item, cursor_right, 64); | ||
| 2873 | |||
| 2721 | static inline struct btrfs_fs_info *btrfs_sb(struct super_block *sb) | 2874 | static inline struct btrfs_fs_info *btrfs_sb(struct super_block *sb) |
| 2722 | { | 2875 | { |
| 2723 | return sb->s_fs_info; | 2876 | return sb->s_fs_info; |
| @@ -2858,9 +3011,23 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans, | |||
| 2858 | u64 size); | 3011 | u64 size); |
| 2859 | int btrfs_remove_block_group(struct btrfs_trans_handle *trans, | 3012 | int btrfs_remove_block_group(struct btrfs_trans_handle *trans, |
| 2860 | struct btrfs_root *root, u64 group_start); | 3013 | struct btrfs_root *root, u64 group_start); |
| 3014 | void btrfs_create_pending_block_groups(struct btrfs_trans_handle *trans, | ||
| 3015 | struct btrfs_root *root); | ||
| 2861 | u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags); | 3016 | u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags); |
| 2862 | u64 btrfs_get_alloc_profile(struct btrfs_root *root, int data); | 3017 | u64 btrfs_get_alloc_profile(struct btrfs_root *root, int data); |
| 2863 | void btrfs_clear_space_info_full(struct btrfs_fs_info *info); | 3018 | void btrfs_clear_space_info_full(struct btrfs_fs_info *info); |
| 3019 | |||
| 3020 | enum btrfs_reserve_flush_enum { | ||
| 3021 | /* If we are in the transaction, we can't flush anything.*/ | ||
| 3022 | BTRFS_RESERVE_NO_FLUSH, | ||
| 3023 | /* | ||
| 3024 | * Flushing delalloc may cause deadlock somewhere, in this | ||
| 3025 | * case, use FLUSH LIMIT | ||
| 3026 | */ | ||
| 3027 | BTRFS_RESERVE_FLUSH_LIMIT, | ||
| 3028 | BTRFS_RESERVE_FLUSH_ALL, | ||
| 3029 | }; | ||
| 3030 | |||
| 2864 | int btrfs_check_data_free_space(struct inode *inode, u64 bytes); | 3031 | int btrfs_check_data_free_space(struct inode *inode, u64 bytes); |
| 2865 | void btrfs_free_reserved_data_space(struct inode *inode, u64 bytes); | 3032 | void btrfs_free_reserved_data_space(struct inode *inode, u64 bytes); |
| 2866 | void btrfs_trans_release_metadata(struct btrfs_trans_handle *trans, | 3033 | void btrfs_trans_release_metadata(struct btrfs_trans_handle *trans, |
| @@ -2874,24 +3041,19 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes); | |||
| 2874 | void btrfs_delalloc_release_metadata(struct inode *inode, u64 num_bytes); | 3041 | void btrfs_delalloc_release_metadata(struct inode *inode, u64 num_bytes); |
| 2875 | int btrfs_delalloc_reserve_space(struct inode *inode, u64 num_bytes); | 3042 | int btrfs_delalloc_reserve_space(struct inode *inode, u64 num_bytes); |
| 2876 | void btrfs_delalloc_release_space(struct inode *inode, u64 num_bytes); | 3043 | void btrfs_delalloc_release_space(struct inode *inode, u64 num_bytes); |
| 2877 | void btrfs_init_block_rsv(struct btrfs_block_rsv *rsv); | 3044 | void btrfs_init_block_rsv(struct btrfs_block_rsv *rsv, unsigned short type); |
| 2878 | struct btrfs_block_rsv *btrfs_alloc_block_rsv(struct btrfs_root *root); | 3045 | struct btrfs_block_rsv *btrfs_alloc_block_rsv(struct btrfs_root *root, |
| 3046 | unsigned short type); | ||
| 2879 | void btrfs_free_block_rsv(struct btrfs_root *root, | 3047 | void btrfs_free_block_rsv(struct btrfs_root *root, |
| 2880 | struct btrfs_block_rsv *rsv); | 3048 | struct btrfs_block_rsv *rsv); |
| 2881 | int btrfs_block_rsv_add(struct btrfs_root *root, | 3049 | int btrfs_block_rsv_add(struct btrfs_root *root, |
| 2882 | struct btrfs_block_rsv *block_rsv, | 3050 | struct btrfs_block_rsv *block_rsv, u64 num_bytes, |
| 2883 | u64 num_bytes); | 3051 | enum btrfs_reserve_flush_enum flush); |
| 2884 | int btrfs_block_rsv_add_noflush(struct btrfs_root *root, | ||
| 2885 | struct btrfs_block_rsv *block_rsv, | ||
| 2886 | u64 num_bytes); | ||
| 2887 | int btrfs_block_rsv_check(struct btrfs_root *root, | 3052 | int btrfs_block_rsv_check(struct btrfs_root *root, |
| 2888 | struct btrfs_block_rsv *block_rsv, int min_factor); | 3053 | struct btrfs_block_rsv *block_rsv, int min_factor); |
| 2889 | int btrfs_block_rsv_refill(struct btrfs_root *root, | 3054 | int btrfs_block_rsv_refill(struct btrfs_root *root, |
| 2890 | struct btrfs_block_rsv *block_rsv, | 3055 | struct btrfs_block_rsv *block_rsv, u64 min_reserved, |
| 2891 | u64 min_reserved); | 3056 | enum btrfs_reserve_flush_enum flush); |
| 2892 | int btrfs_block_rsv_refill_noflush(struct btrfs_root *root, | ||
| 2893 | struct btrfs_block_rsv *block_rsv, | ||
| 2894 | u64 min_reserved); | ||
| 2895 | int btrfs_block_rsv_migrate(struct btrfs_block_rsv *src_rsv, | 3057 | int btrfs_block_rsv_migrate(struct btrfs_block_rsv *src_rsv, |
| 2896 | struct btrfs_block_rsv *dst_rsv, | 3058 | struct btrfs_block_rsv *dst_rsv, |
| 2897 | u64 num_bytes); | 3059 | u64 num_bytes); |
| @@ -2915,6 +3077,7 @@ int btrfs_trim_fs(struct btrfs_root *root, struct fstrim_range *range); | |||
| 2915 | int btrfs_init_space_info(struct btrfs_fs_info *fs_info); | 3077 | int btrfs_init_space_info(struct btrfs_fs_info *fs_info); |
| 2916 | int btrfs_delayed_refs_qgroup_accounting(struct btrfs_trans_handle *trans, | 3078 | int btrfs_delayed_refs_qgroup_accounting(struct btrfs_trans_handle *trans, |
| 2917 | struct btrfs_fs_info *fs_info); | 3079 | struct btrfs_fs_info *fs_info); |
| 3080 | int __get_raid_index(u64 flags); | ||
| 2918 | /* ctree.c */ | 3081 | /* ctree.c */ |
| 2919 | int btrfs_bin_search(struct extent_buffer *eb, struct btrfs_key *key, | 3082 | int btrfs_bin_search(struct extent_buffer *eb, struct btrfs_key *key, |
| 2920 | int level, int *slot); | 3083 | int level, int *slot); |
| @@ -3025,6 +3188,9 @@ static inline int btrfs_insert_empty_item(struct btrfs_trans_handle *trans, | |||
| 3025 | } | 3188 | } |
| 3026 | 3189 | ||
| 3027 | int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path); | 3190 | int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path); |
| 3191 | int btrfs_next_leaf_write(struct btrfs_trans_handle *trans, | ||
| 3192 | struct btrfs_root *root, struct btrfs_path *path, | ||
| 3193 | int del); | ||
| 3028 | int btrfs_next_old_leaf(struct btrfs_root *root, struct btrfs_path *path, | 3194 | int btrfs_next_old_leaf(struct btrfs_root *root, struct btrfs_path *path, |
| 3029 | u64 time_seq); | 3195 | u64 time_seq); |
| 3030 | static inline int btrfs_next_old_item(struct btrfs_root *root, | 3196 | static inline int btrfs_next_old_item(struct btrfs_root *root, |
| @@ -3080,6 +3246,7 @@ static inline u64 btrfs_inc_tree_mod_seq(struct btrfs_fs_info *fs_info) | |||
| 3080 | { | 3246 | { |
| 3081 | return atomic_inc_return(&fs_info->tree_mod_seq); | 3247 | return atomic_inc_return(&fs_info->tree_mod_seq); |
| 3082 | } | 3248 | } |
| 3249 | int btrfs_old_root_level(struct btrfs_root *root, u64 time_seq); | ||
| 3083 | 3250 | ||
| 3084 | /* root-item.c */ | 3251 | /* root-item.c */ |
| 3085 | int btrfs_find_root_ref(struct btrfs_root *tree_root, | 3252 | int btrfs_find_root_ref(struct btrfs_root *tree_root, |
| @@ -3116,6 +3283,8 @@ void btrfs_update_root_times(struct btrfs_trans_handle *trans, | |||
| 3116 | struct btrfs_root *root); | 3283 | struct btrfs_root *root); |
| 3117 | 3284 | ||
| 3118 | /* dir-item.c */ | 3285 | /* dir-item.c */ |
| 3286 | int btrfs_check_dir_item_collision(struct btrfs_root *root, u64 dir, | ||
| 3287 | const char *name, int name_len); | ||
| 3119 | int btrfs_insert_dir_item(struct btrfs_trans_handle *trans, | 3288 | int btrfs_insert_dir_item(struct btrfs_trans_handle *trans, |
| 3120 | struct btrfs_root *root, const char *name, | 3289 | struct btrfs_root *root, const char *name, |
| 3121 | int name_len, struct inode *dir, | 3290 | int name_len, struct inode *dir, |
| @@ -3172,12 +3341,12 @@ int btrfs_del_inode_ref(struct btrfs_trans_handle *trans, | |||
| 3172 | struct btrfs_root *root, | 3341 | struct btrfs_root *root, |
| 3173 | const char *name, int name_len, | 3342 | const char *name, int name_len, |
| 3174 | u64 inode_objectid, u64 ref_objectid, u64 *index); | 3343 | u64 inode_objectid, u64 ref_objectid, u64 *index); |
| 3175 | struct btrfs_inode_ref * | 3344 | int btrfs_get_inode_ref_index(struct btrfs_trans_handle *trans, |
| 3176 | btrfs_lookup_inode_ref(struct btrfs_trans_handle *trans, | 3345 | struct btrfs_root *root, |
| 3177 | struct btrfs_root *root, | 3346 | struct btrfs_path *path, |
| 3178 | struct btrfs_path *path, | 3347 | const char *name, int name_len, |
| 3179 | const char *name, int name_len, | 3348 | u64 inode_objectid, u64 ref_objectid, int mod, |
| 3180 | u64 inode_objectid, u64 ref_objectid, int mod); | 3349 | u64 *ret_index); |
| 3181 | int btrfs_insert_empty_inode(struct btrfs_trans_handle *trans, | 3350 | int btrfs_insert_empty_inode(struct btrfs_trans_handle *trans, |
| 3182 | struct btrfs_root *root, | 3351 | struct btrfs_root *root, |
| 3183 | struct btrfs_path *path, u64 objectid); | 3352 | struct btrfs_path *path, u64 objectid); |
| @@ -3185,6 +3354,19 @@ int btrfs_lookup_inode(struct btrfs_trans_handle *trans, struct btrfs_root | |||
| 3185 | *root, struct btrfs_path *path, | 3354 | *root, struct btrfs_path *path, |
| 3186 | struct btrfs_key *location, int mod); | 3355 | struct btrfs_key *location, int mod); |
| 3187 | 3356 | ||
| 3357 | struct btrfs_inode_extref * | ||
| 3358 | btrfs_lookup_inode_extref(struct btrfs_trans_handle *trans, | ||
| 3359 | struct btrfs_root *root, | ||
| 3360 | struct btrfs_path *path, | ||
| 3361 | const char *name, int name_len, | ||
| 3362 | u64 inode_objectid, u64 ref_objectid, int ins_len, | ||
| 3363 | int cow); | ||
| 3364 | |||
| 3365 | int btrfs_find_name_in_ext_backref(struct btrfs_path *path, | ||
| 3366 | u64 ref_objectid, const char *name, | ||
| 3367 | int name_len, | ||
| 3368 | struct btrfs_inode_extref **extref_ret); | ||
| 3369 | |||
| 3188 | /* file-item.c */ | 3370 | /* file-item.c */ |
| 3189 | int btrfs_del_csums(struct btrfs_trans_handle *trans, | 3371 | int btrfs_del_csums(struct btrfs_trans_handle *trans, |
| 3190 | struct btrfs_root *root, u64 bytenr, u64 len); | 3372 | struct btrfs_root *root, u64 bytenr, u64 len); |
| @@ -3202,6 +3384,7 @@ int btrfs_lookup_file_extent(struct btrfs_trans_handle *trans, | |||
| 3202 | struct btrfs_root *root, | 3384 | struct btrfs_root *root, |
| 3203 | struct btrfs_path *path, u64 objectid, | 3385 | struct btrfs_path *path, u64 objectid, |
| 3204 | u64 bytenr, int mod); | 3386 | u64 bytenr, int mod); |
| 3387 | u64 btrfs_file_extent_length(struct btrfs_path *path); | ||
| 3205 | int btrfs_csum_file_blocks(struct btrfs_trans_handle *trans, | 3388 | int btrfs_csum_file_blocks(struct btrfs_trans_handle *trans, |
| 3206 | struct btrfs_root *root, | 3389 | struct btrfs_root *root, |
| 3207 | struct btrfs_ordered_sum *sums); | 3390 | struct btrfs_ordered_sum *sums); |
| @@ -3217,6 +3400,19 @@ int btrfs_csum_truncate(struct btrfs_trans_handle *trans, | |||
| 3217 | int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end, | 3400 | int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end, |
| 3218 | struct list_head *list, int search_commit); | 3401 | struct list_head *list, int search_commit); |
| 3219 | /* inode.c */ | 3402 | /* inode.c */ |
| 3403 | struct btrfs_delalloc_work { | ||
| 3404 | struct inode *inode; | ||
| 3405 | int wait; | ||
| 3406 | int delay_iput; | ||
| 3407 | struct completion completion; | ||
| 3408 | struct list_head list; | ||
| 3409 | struct btrfs_work work; | ||
| 3410 | }; | ||
| 3411 | |||
| 3412 | struct btrfs_delalloc_work *btrfs_alloc_delalloc_work(struct inode *inode, | ||
| 3413 | int wait, int delay_iput); | ||
| 3414 | void btrfs_wait_and_free_delalloc_work(struct btrfs_delalloc_work *work); | ||
| 3415 | |||
| 3220 | struct extent_map *btrfs_get_extent_fiemap(struct inode *inode, struct page *page, | 3416 | struct extent_map *btrfs_get_extent_fiemap(struct inode *inode, struct page *page, |
| 3221 | size_t pg_offset, u64 start, u64 len, | 3417 | size_t pg_offset, u64 start, u64 len, |
| 3222 | int create); | 3418 | int create); |
| @@ -3249,6 +3445,8 @@ int btrfs_unlink_subvol(struct btrfs_trans_handle *trans, | |||
| 3249 | struct btrfs_root *root, | 3445 | struct btrfs_root *root, |
| 3250 | struct inode *dir, u64 objectid, | 3446 | struct inode *dir, u64 objectid, |
| 3251 | const char *name, int name_len); | 3447 | const char *name, int name_len); |
| 3448 | int btrfs_truncate_page(struct inode *inode, loff_t from, loff_t len, | ||
| 3449 | int front); | ||
| 3252 | int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans, | 3450 | int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans, |
| 3253 | struct btrfs_root *root, | 3451 | struct btrfs_root *root, |
| 3254 | struct inode *inode, u64 new_size, | 3452 | struct inode *inode, u64 new_size, |
| @@ -3283,6 +3481,8 @@ struct extent_map *btrfs_get_extent(struct inode *inode, struct page *page, | |||
| 3283 | int btrfs_update_inode(struct btrfs_trans_handle *trans, | 3481 | int btrfs_update_inode(struct btrfs_trans_handle *trans, |
| 3284 | struct btrfs_root *root, | 3482 | struct btrfs_root *root, |
| 3285 | struct inode *inode); | 3483 | struct inode *inode); |
| 3484 | int btrfs_update_inode_fallback(struct btrfs_trans_handle *trans, | ||
| 3485 | struct btrfs_root *root, struct inode *inode); | ||
| 3286 | int btrfs_orphan_add(struct btrfs_trans_handle *trans, struct inode *inode); | 3486 | int btrfs_orphan_add(struct btrfs_trans_handle *trans, struct inode *inode); |
| 3287 | int btrfs_orphan_del(struct btrfs_trans_handle *trans, struct inode *inode); | 3487 | int btrfs_orphan_del(struct btrfs_trans_handle *trans, struct inode *inode); |
| 3288 | int btrfs_orphan_cleanup(struct btrfs_root *root); | 3488 | int btrfs_orphan_cleanup(struct btrfs_root *root); |
| @@ -3308,16 +3508,30 @@ void btrfs_inherit_iflags(struct inode *inode, struct inode *dir); | |||
| 3308 | int btrfs_defrag_file(struct inode *inode, struct file *file, | 3508 | int btrfs_defrag_file(struct inode *inode, struct file *file, |
| 3309 | struct btrfs_ioctl_defrag_range_args *range, | 3509 | struct btrfs_ioctl_defrag_range_args *range, |
| 3310 | u64 newer_than, unsigned long max_pages); | 3510 | u64 newer_than, unsigned long max_pages); |
| 3511 | void btrfs_get_block_group_info(struct list_head *groups_list, | ||
| 3512 | struct btrfs_ioctl_space_info *space); | ||
| 3513 | |||
| 3311 | /* file.c */ | 3514 | /* file.c */ |
| 3515 | int btrfs_auto_defrag_init(void); | ||
| 3516 | void btrfs_auto_defrag_exit(void); | ||
| 3312 | int btrfs_add_inode_defrag(struct btrfs_trans_handle *trans, | 3517 | int btrfs_add_inode_defrag(struct btrfs_trans_handle *trans, |
| 3313 | struct inode *inode); | 3518 | struct inode *inode); |
| 3314 | int btrfs_run_defrag_inodes(struct btrfs_fs_info *fs_info); | 3519 | int btrfs_run_defrag_inodes(struct btrfs_fs_info *fs_info); |
| 3520 | void btrfs_cleanup_defrag_inodes(struct btrfs_fs_info *fs_info); | ||
| 3315 | int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync); | 3521 | int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync); |
| 3316 | int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end, | 3522 | void btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end, |
| 3317 | int skip_pinned); | 3523 | int skip_pinned); |
| 3524 | int btrfs_replace_extent_cache(struct inode *inode, struct extent_map *replace, | ||
| 3525 | u64 start, u64 end, int skip_pinned, | ||
| 3526 | int modified); | ||
| 3318 | extern const struct file_operations btrfs_file_operations; | 3527 | extern const struct file_operations btrfs_file_operations; |
| 3319 | int btrfs_drop_extents(struct btrfs_trans_handle *trans, struct inode *inode, | 3528 | int __btrfs_drop_extents(struct btrfs_trans_handle *trans, |
| 3320 | u64 start, u64 end, u64 *hint_byte, int drop_cache); | 3529 | struct btrfs_root *root, struct inode *inode, |
| 3530 | struct btrfs_path *path, u64 start, u64 end, | ||
| 3531 | u64 *drop_end, int drop_cache); | ||
| 3532 | int btrfs_drop_extents(struct btrfs_trans_handle *trans, | ||
| 3533 | struct btrfs_root *root, struct inode *inode, u64 start, | ||
| 3534 | u64 end, int drop_cache); | ||
| 3321 | int btrfs_mark_extent_written(struct btrfs_trans_handle *trans, | 3535 | int btrfs_mark_extent_written(struct btrfs_trans_handle *trans, |
| 3322 | struct inode *inode, u64 start, u64 end); | 3536 | struct inode *inode, u64 start, u64 end); |
| 3323 | int btrfs_release_file(struct inode *inode, struct file *file); | 3537 | int btrfs_release_file(struct inode *inode, struct file *file); |
| @@ -3378,6 +3592,11 @@ static inline void __btrfs_set_fs_incompat(struct btrfs_fs_info *fs_info, | |||
| 3378 | } | 3592 | } |
| 3379 | } | 3593 | } |
| 3380 | 3594 | ||
| 3595 | /* | ||
| 3596 | * Call btrfs_abort_transaction as early as possible when an error condition is | ||
| 3597 | * detected, that way the exact line number is reported. | ||
| 3598 | */ | ||
| 3599 | |||
| 3381 | #define btrfs_abort_transaction(trans, root, errno) \ | 3600 | #define btrfs_abort_transaction(trans, root, errno) \ |
| 3382 | do { \ | 3601 | do { \ |
| 3383 | __btrfs_abort_transaction(trans, root, __func__, \ | 3602 | __btrfs_abort_transaction(trans, root, __func__, \ |
| @@ -3445,15 +3664,16 @@ int btrfs_reloc_post_snapshot(struct btrfs_trans_handle *trans, | |||
| 3445 | struct btrfs_pending_snapshot *pending); | 3664 | struct btrfs_pending_snapshot *pending); |
| 3446 | 3665 | ||
| 3447 | /* scrub.c */ | 3666 | /* scrub.c */ |
| 3448 | int btrfs_scrub_dev(struct btrfs_root *root, u64 devid, u64 start, u64 end, | 3667 | int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start, |
| 3449 | struct btrfs_scrub_progress *progress, int readonly); | 3668 | u64 end, struct btrfs_scrub_progress *progress, |
| 3669 | int readonly, int is_dev_replace); | ||
| 3450 | void btrfs_scrub_pause(struct btrfs_root *root); | 3670 | void btrfs_scrub_pause(struct btrfs_root *root); |
| 3451 | void btrfs_scrub_pause_super(struct btrfs_root *root); | 3671 | void btrfs_scrub_pause_super(struct btrfs_root *root); |
| 3452 | void btrfs_scrub_continue(struct btrfs_root *root); | 3672 | void btrfs_scrub_continue(struct btrfs_root *root); |
| 3453 | void btrfs_scrub_continue_super(struct btrfs_root *root); | 3673 | void btrfs_scrub_continue_super(struct btrfs_root *root); |
| 3454 | int __btrfs_scrub_cancel(struct btrfs_fs_info *info); | 3674 | int btrfs_scrub_cancel(struct btrfs_fs_info *info); |
| 3455 | int btrfs_scrub_cancel(struct btrfs_root *root); | 3675 | int btrfs_scrub_cancel_dev(struct btrfs_fs_info *info, |
| 3456 | int btrfs_scrub_cancel_dev(struct btrfs_root *root, struct btrfs_device *dev); | 3676 | struct btrfs_device *dev); |
| 3457 | int btrfs_scrub_cancel_devid(struct btrfs_root *root, u64 devid); | 3677 | int btrfs_scrub_cancel_devid(struct btrfs_root *root, u64 devid); |
| 3458 | int btrfs_scrub_progress(struct btrfs_root *root, u64 devid, | 3678 | int btrfs_scrub_progress(struct btrfs_root *root, u64 devid, |
| 3459 | struct btrfs_scrub_progress *progress); | 3679 | struct btrfs_scrub_progress *progress); |
diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c index 52c85e2b95d0..34836036f01b 100644 --- a/fs/btrfs/delayed-inode.c +++ b/fs/btrfs/delayed-inode.c | |||
| @@ -29,7 +29,7 @@ static struct kmem_cache *delayed_node_cache; | |||
| 29 | 29 | ||
| 30 | int __init btrfs_delayed_inode_init(void) | 30 | int __init btrfs_delayed_inode_init(void) |
| 31 | { | 31 | { |
| 32 | delayed_node_cache = kmem_cache_create("delayed_node", | 32 | delayed_node_cache = kmem_cache_create("btrfs_delayed_node", |
| 33 | sizeof(struct btrfs_delayed_node), | 33 | sizeof(struct btrfs_delayed_node), |
| 34 | 0, | 34 | 0, |
| 35 | SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, | 35 | SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, |
| @@ -650,8 +650,9 @@ static int btrfs_delayed_inode_reserve_metadata( | |||
| 650 | * we're accounted for. | 650 | * we're accounted for. |
| 651 | */ | 651 | */ |
| 652 | if (!src_rsv || (!trans->bytes_reserved && | 652 | if (!src_rsv || (!trans->bytes_reserved && |
| 653 | src_rsv != &root->fs_info->delalloc_block_rsv)) { | 653 | src_rsv->type != BTRFS_BLOCK_RSV_DELALLOC)) { |
| 654 | ret = btrfs_block_rsv_add_noflush(root, dst_rsv, num_bytes); | 654 | ret = btrfs_block_rsv_add(root, dst_rsv, num_bytes, |
| 655 | BTRFS_RESERVE_NO_FLUSH); | ||
| 655 | /* | 656 | /* |
| 656 | * Since we're under a transaction reserve_metadata_bytes could | 657 | * Since we're under a transaction reserve_metadata_bytes could |
| 657 | * try to commit the transaction which will make it return | 658 | * try to commit the transaction which will make it return |
| @@ -668,7 +669,7 @@ static int btrfs_delayed_inode_reserve_metadata( | |||
| 668 | num_bytes, 1); | 669 | num_bytes, 1); |
| 669 | } | 670 | } |
| 670 | return ret; | 671 | return ret; |
| 671 | } else if (src_rsv == &root->fs_info->delalloc_block_rsv) { | 672 | } else if (src_rsv->type == BTRFS_BLOCK_RSV_DELALLOC) { |
| 672 | spin_lock(&BTRFS_I(inode)->lock); | 673 | spin_lock(&BTRFS_I(inode)->lock); |
| 673 | if (test_and_clear_bit(BTRFS_INODE_DELALLOC_META_RESERVED, | 674 | if (test_and_clear_bit(BTRFS_INODE_DELALLOC_META_RESERVED, |
| 674 | &BTRFS_I(inode)->runtime_flags)) { | 675 | &BTRFS_I(inode)->runtime_flags)) { |
| @@ -686,7 +687,8 @@ static int btrfs_delayed_inode_reserve_metadata( | |||
| 686 | * reserve something strictly for us. If not be a pain and try | 687 | * reserve something strictly for us. If not be a pain and try |
| 687 | * to steal from the delalloc block rsv. | 688 | * to steal from the delalloc block rsv. |
| 688 | */ | 689 | */ |
| 689 | ret = btrfs_block_rsv_add_noflush(root, dst_rsv, num_bytes); | 690 | ret = btrfs_block_rsv_add(root, dst_rsv, num_bytes, |
| 691 | BTRFS_RESERVE_NO_FLUSH); | ||
| 690 | if (!ret) | 692 | if (!ret) |
| 691 | goto out; | 693 | goto out; |
| 692 | 694 | ||
| @@ -1255,7 +1257,6 @@ static void btrfs_async_run_delayed_node_done(struct btrfs_work *work) | |||
| 1255 | struct btrfs_delayed_node *delayed_node = NULL; | 1257 | struct btrfs_delayed_node *delayed_node = NULL; |
| 1256 | struct btrfs_root *root; | 1258 | struct btrfs_root *root; |
| 1257 | struct btrfs_block_rsv *block_rsv; | 1259 | struct btrfs_block_rsv *block_rsv; |
| 1258 | unsigned long nr = 0; | ||
| 1259 | int need_requeue = 0; | 1260 | int need_requeue = 0; |
| 1260 | int ret; | 1261 | int ret; |
| 1261 | 1262 | ||
| @@ -1316,11 +1317,9 @@ static void btrfs_async_run_delayed_node_done(struct btrfs_work *work) | |||
| 1316 | delayed_node); | 1317 | delayed_node); |
| 1317 | mutex_unlock(&delayed_node->mutex); | 1318 | mutex_unlock(&delayed_node->mutex); |
| 1318 | 1319 | ||
| 1319 | nr = trans->blocks_used; | ||
| 1320 | |||
| 1321 | trans->block_rsv = block_rsv; | 1320 | trans->block_rsv = block_rsv; |
| 1322 | btrfs_end_transaction_dmeta(trans, root); | 1321 | btrfs_end_transaction_dmeta(trans, root); |
| 1323 | __btrfs_btree_balance_dirty(root, nr); | 1322 | btrfs_btree_balance_dirty_nodelay(root); |
| 1324 | free_path: | 1323 | free_path: |
| 1325 | btrfs_free_path(path); | 1324 | btrfs_free_path(path); |
| 1326 | out: | 1325 | out: |
diff --git a/fs/btrfs/dev-replace.c b/fs/btrfs/dev-replace.c new file mode 100644 index 000000000000..66dbc8dbddf7 --- /dev/null +++ b/fs/btrfs/dev-replace.c | |||
| @@ -0,0 +1,856 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (C) STRATO AG 2012. All rights reserved. | ||
| 3 | * | ||
| 4 | * This program is free software; you can redistribute it and/or | ||
| 5 | * modify it under the terms of the GNU General Public | ||
| 6 | * License v2 as published by the Free Software Foundation. | ||
| 7 | * | ||
| 8 | * This program is distributed in the hope that it will be useful, | ||
| 9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
| 11 | * General Public License for more details. | ||
| 12 | * | ||
| 13 | * You should have received a copy of the GNU General Public | ||
| 14 | * License along with this program; if not, write to the | ||
| 15 | * Free Software Foundation, Inc., 59 Temple Place - Suite 330, | ||
| 16 | * Boston, MA 021110-1307, USA. | ||
| 17 | */ | ||
| 18 | #include <linux/sched.h> | ||
| 19 | #include <linux/bio.h> | ||
| 20 | #include <linux/slab.h> | ||
| 21 | #include <linux/buffer_head.h> | ||
| 22 | #include <linux/blkdev.h> | ||
| 23 | #include <linux/random.h> | ||
| 24 | #include <linux/iocontext.h> | ||
| 25 | #include <linux/capability.h> | ||
| 26 | #include <linux/kthread.h> | ||
| 27 | #include <linux/math64.h> | ||
| 28 | #include <asm/div64.h> | ||
| 29 | #include "compat.h" | ||
| 30 | #include "ctree.h" | ||
| 31 | #include "extent_map.h" | ||
| 32 | #include "disk-io.h" | ||
| 33 | #include "transaction.h" | ||
| 34 | #include "print-tree.h" | ||
| 35 | #include "volumes.h" | ||
| 36 | #include "async-thread.h" | ||
| 37 | #include "check-integrity.h" | ||
| 38 | #include "rcu-string.h" | ||
| 39 | #include "dev-replace.h" | ||
| 40 | |||
| 41 | static u64 btrfs_get_seconds_since_1970(void); | ||
| 42 | static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info, | ||
| 43 | int scrub_ret); | ||
| 44 | static void btrfs_dev_replace_update_device_in_mapping_tree( | ||
| 45 | struct btrfs_fs_info *fs_info, | ||
| 46 | struct btrfs_device *srcdev, | ||
| 47 | struct btrfs_device *tgtdev); | ||
| 48 | static int btrfs_dev_replace_find_srcdev(struct btrfs_root *root, u64 srcdevid, | ||
| 49 | char *srcdev_name, | ||
| 50 | struct btrfs_device **device); | ||
| 51 | static u64 __btrfs_dev_replace_cancel(struct btrfs_fs_info *fs_info); | ||
| 52 | static int btrfs_dev_replace_kthread(void *data); | ||
| 53 | static int btrfs_dev_replace_continue_on_mount(struct btrfs_fs_info *fs_info); | ||
| 54 | |||
| 55 | |||
| 56 | int btrfs_init_dev_replace(struct btrfs_fs_info *fs_info) | ||
| 57 | { | ||
| 58 | struct btrfs_key key; | ||
| 59 | struct btrfs_root *dev_root = fs_info->dev_root; | ||
| 60 | struct btrfs_dev_replace *dev_replace = &fs_info->dev_replace; | ||
| 61 | struct extent_buffer *eb; | ||
| 62 | int slot; | ||
| 63 | int ret = 0; | ||
| 64 | struct btrfs_path *path = NULL; | ||
| 65 | int item_size; | ||
| 66 | struct btrfs_dev_replace_item *ptr; | ||
| 67 | u64 src_devid; | ||
| 68 | |||
| 69 | path = btrfs_alloc_path(); | ||
| 70 | if (!path) { | ||
| 71 | ret = -ENOMEM; | ||
| 72 | goto out; | ||
| 73 | } | ||
| 74 | |||
| 75 | key.objectid = 0; | ||
| 76 | key.type = BTRFS_DEV_REPLACE_KEY; | ||
| 77 | key.offset = 0; | ||
| 78 | ret = btrfs_search_slot(NULL, dev_root, &key, path, 0, 0); | ||
| 79 | if (ret) { | ||
| 80 | no_valid_dev_replace_entry_found: | ||
| 81 | ret = 0; | ||
| 82 | dev_replace->replace_state = | ||
| 83 | BTRFS_DEV_REPLACE_ITEM_STATE_NEVER_STARTED; | ||
| 84 | dev_replace->cont_reading_from_srcdev_mode = | ||
| 85 | BTRFS_DEV_REPLACE_ITEM_CONT_READING_FROM_SRCDEV_MODE_ALWAYS; | ||
| 86 | dev_replace->replace_state = 0; | ||
| 87 | dev_replace->time_started = 0; | ||
| 88 | dev_replace->time_stopped = 0; | ||
| 89 | atomic64_set(&dev_replace->num_write_errors, 0); | ||
| 90 | atomic64_set(&dev_replace->num_uncorrectable_read_errors, 0); | ||
| 91 | dev_replace->cursor_left = 0; | ||
| 92 | dev_replace->committed_cursor_left = 0; | ||
| 93 | dev_replace->cursor_left_last_write_of_item = 0; | ||
| 94 | dev_replace->cursor_right = 0; | ||
| 95 | dev_replace->srcdev = NULL; | ||
| 96 | dev_replace->tgtdev = NULL; | ||
| 97 | dev_replace->is_valid = 0; | ||
| 98 | dev_replace->item_needs_writeback = 0; | ||
| 99 | goto out; | ||
| 100 | } | ||
| 101 | slot = path->slots[0]; | ||
| 102 | eb = path->nodes[0]; | ||
| 103 | item_size = btrfs_item_size_nr(eb, slot); | ||
| 104 | ptr = btrfs_item_ptr(eb, slot, struct btrfs_dev_replace_item); | ||
| 105 | |||
| 106 | if (item_size != sizeof(struct btrfs_dev_replace_item)) { | ||
| 107 | pr_warn("btrfs: dev_replace entry found has unexpected size, ignore entry\n"); | ||
| 108 | goto no_valid_dev_replace_entry_found; | ||
| 109 | } | ||
| 110 | |||
| 111 | src_devid = btrfs_dev_replace_src_devid(eb, ptr); | ||
| 112 | dev_replace->cont_reading_from_srcdev_mode = | ||
| 113 | btrfs_dev_replace_cont_reading_from_srcdev_mode(eb, ptr); | ||
| 114 | dev_replace->replace_state = btrfs_dev_replace_replace_state(eb, ptr); | ||
| 115 | dev_replace->time_started = btrfs_dev_replace_time_started(eb, ptr); | ||
| 116 | dev_replace->time_stopped = | ||
| 117 | btrfs_dev_replace_time_stopped(eb, ptr); | ||
| 118 | atomic64_set(&dev_replace->num_write_errors, | ||
| 119 | btrfs_dev_replace_num_write_errors(eb, ptr)); | ||
| 120 | atomic64_set(&dev_replace->num_uncorrectable_read_errors, | ||
| 121 | btrfs_dev_replace_num_uncorrectable_read_errors(eb, ptr)); | ||
| 122 | dev_replace->cursor_left = btrfs_dev_replace_cursor_left(eb, ptr); | ||
| 123 | dev_replace->committed_cursor_left = dev_replace->cursor_left; | ||
| 124 | dev_replace->cursor_left_last_write_of_item = dev_replace->cursor_left; | ||
| 125 | dev_replace->cursor_right = btrfs_dev_replace_cursor_right(eb, ptr); | ||
| 126 | dev_replace->is_valid = 1; | ||
| 127 | |||
| 128 | dev_replace->item_needs_writeback = 0; | ||
| 129 | switch (dev_replace->replace_state) { | ||
| 130 | case BTRFS_IOCTL_DEV_REPLACE_STATE_NEVER_STARTED: | ||
| 131 | case BTRFS_IOCTL_DEV_REPLACE_STATE_FINISHED: | ||
| 132 | case BTRFS_IOCTL_DEV_REPLACE_STATE_CANCELED: | ||
| 133 | dev_replace->srcdev = NULL; | ||
| 134 | dev_replace->tgtdev = NULL; | ||
| 135 | break; | ||
| 136 | case BTRFS_IOCTL_DEV_REPLACE_STATE_STARTED: | ||
| 137 | case BTRFS_IOCTL_DEV_REPLACE_STATE_SUSPENDED: | ||
| 138 | dev_replace->srcdev = btrfs_find_device(fs_info, src_devid, | ||
| 139 | NULL, NULL); | ||
| 140 | dev_replace->tgtdev = btrfs_find_device(fs_info, | ||
| 141 | BTRFS_DEV_REPLACE_DEVID, | ||
| 142 | NULL, NULL); | ||
| 143 | /* | ||
| 144 | * allow 'btrfs dev replace_cancel' if src/tgt device is | ||
| 145 | * missing | ||
| 146 | */ | ||
| 147 | if (!dev_replace->srcdev && | ||
| 148 | !btrfs_test_opt(dev_root, DEGRADED)) { | ||
| 149 | ret = -EIO; | ||
| 150 | pr_warn("btrfs: cannot mount because device replace operation is ongoing and\n" "srcdev (devid %llu) is missing, need to run 'btrfs dev scan'?\n", | ||
| 151 | (unsigned long long)src_devid); | ||
| 152 | } | ||
| 153 | if (!dev_replace->tgtdev && | ||
| 154 | !btrfs_test_opt(dev_root, DEGRADED)) { | ||
| 155 | ret = -EIO; | ||
| 156 | pr_warn("btrfs: cannot mount because device replace operation is ongoing and\n" "tgtdev (devid %llu) is missing, need to run btrfs dev scan?\n", | ||
| 157 | (unsigned long long)BTRFS_DEV_REPLACE_DEVID); | ||
| 158 | } | ||
| 159 | if (dev_replace->tgtdev) { | ||
| 160 | if (dev_replace->srcdev) { | ||
| 161 | dev_replace->tgtdev->total_bytes = | ||
| 162 | dev_replace->srcdev->total_bytes; | ||
| 163 | dev_replace->tgtdev->disk_total_bytes = | ||
| 164 | dev_replace->srcdev->disk_total_bytes; | ||
| 165 | dev_replace->tgtdev->bytes_used = | ||
| 166 | dev_replace->srcdev->bytes_used; | ||
| 167 | } | ||
| 168 | dev_replace->tgtdev->is_tgtdev_for_dev_replace = 1; | ||
| 169 | btrfs_init_dev_replace_tgtdev_for_resume(fs_info, | ||
| 170 | dev_replace->tgtdev); | ||
| 171 | } | ||
| 172 | break; | ||
| 173 | } | ||
| 174 | |||
| 175 | out: | ||
| 176 | if (path) | ||
| 177 | btrfs_free_path(path); | ||
| 178 | return ret; | ||
| 179 | } | ||
| 180 | |||
| 181 | /* | ||
| 182 | * called from commit_transaction. Writes changed device replace state to | ||
| 183 | * disk. | ||
| 184 | */ | ||
| 185 | int btrfs_run_dev_replace(struct btrfs_trans_handle *trans, | ||
| 186 | struct btrfs_fs_info *fs_info) | ||
| 187 | { | ||
| 188 | int ret; | ||
| 189 | struct btrfs_root *dev_root = fs_info->dev_root; | ||
| 190 | struct btrfs_path *path; | ||
| 191 | struct btrfs_key key; | ||
| 192 | struct extent_buffer *eb; | ||
| 193 | struct btrfs_dev_replace_item *ptr; | ||
| 194 | struct btrfs_dev_replace *dev_replace = &fs_info->dev_replace; | ||
| 195 | |||
| 196 | btrfs_dev_replace_lock(dev_replace); | ||
| 197 | if (!dev_replace->is_valid || | ||
| 198 | !dev_replace->item_needs_writeback) { | ||
| 199 | btrfs_dev_replace_unlock(dev_replace); | ||
| 200 | return 0; | ||
| 201 | } | ||
| 202 | btrfs_dev_replace_unlock(dev_replace); | ||
| 203 | |||
| 204 | key.objectid = 0; | ||
| 205 | key.type = BTRFS_DEV_REPLACE_KEY; | ||
| 206 | key.offset = 0; | ||
| 207 | |||
| 208 | path = btrfs_alloc_path(); | ||
| 209 | if (!path) { | ||
| 210 | ret = -ENOMEM; | ||
| 211 | goto out; | ||
| 212 | } | ||
| 213 | ret = btrfs_search_slot(trans, dev_root, &key, path, -1, 1); | ||
| 214 | if (ret < 0) { | ||
| 215 | pr_warn("btrfs: error %d while searching for dev_replace item!\n", | ||
| 216 | ret); | ||
| 217 | goto out; | ||
| 218 | } | ||
| 219 | |||
| 220 | if (ret == 0 && | ||
| 221 | btrfs_item_size_nr(path->nodes[0], path->slots[0]) < sizeof(*ptr)) { | ||
| 222 | /* | ||
| 223 | * need to delete old one and insert a new one. | ||
| 224 | * Since no attempt is made to recover any old state, if the | ||
| 225 | * dev_replace state is 'running', the data on the target | ||
| 226 | * drive is lost. | ||
| 227 | * It would be possible to recover the state: just make sure | ||
| 228 | * that the beginning of the item is never changed and always | ||
| 229 | * contains all the essential information. Then read this | ||
| 230 | * minimal set of information and use it as a base for the | ||
| 231 | * new state. | ||
| 232 | */ | ||
| 233 | ret = btrfs_del_item(trans, dev_root, path); | ||
| 234 | if (ret != 0) { | ||
| 235 | pr_warn("btrfs: delete too small dev_replace item failed %d!\n", | ||
| 236 | ret); | ||
| 237 | goto out; | ||
| 238 | } | ||
| 239 | ret = 1; | ||
| 240 | } | ||
| 241 | |||
| 242 | if (ret == 1) { | ||
| 243 | /* need to insert a new item */ | ||
| 244 | btrfs_release_path(path); | ||
| 245 | ret = btrfs_insert_empty_item(trans, dev_root, path, | ||
| 246 | &key, sizeof(*ptr)); | ||
| 247 | if (ret < 0) { | ||
| 248 | pr_warn("btrfs: insert dev_replace item failed %d!\n", | ||
| 249 | ret); | ||
| 250 | goto out; | ||
| 251 | } | ||
| 252 | } | ||
| 253 | |||
| 254 | eb = path->nodes[0]; | ||
| 255 | ptr = btrfs_item_ptr(eb, path->slots[0], | ||
| 256 | struct btrfs_dev_replace_item); | ||
| 257 | |||
| 258 | btrfs_dev_replace_lock(dev_replace); | ||
| 259 | if (dev_replace->srcdev) | ||
| 260 | btrfs_set_dev_replace_src_devid(eb, ptr, | ||
| 261 | dev_replace->srcdev->devid); | ||
| 262 | else | ||
| 263 | btrfs_set_dev_replace_src_devid(eb, ptr, (u64)-1); | ||
| 264 | btrfs_set_dev_replace_cont_reading_from_srcdev_mode(eb, ptr, | ||
| 265 | dev_replace->cont_reading_from_srcdev_mode); | ||
| 266 | btrfs_set_dev_replace_replace_state(eb, ptr, | ||
| 267 | dev_replace->replace_state); | ||
| 268 | btrfs_set_dev_replace_time_started(eb, ptr, dev_replace->time_started); | ||
| 269 | btrfs_set_dev_replace_time_stopped(eb, ptr, dev_replace->time_stopped); | ||
| 270 | btrfs_set_dev_replace_num_write_errors(eb, ptr, | ||
| 271 | atomic64_read(&dev_replace->num_write_errors)); | ||
| 272 | btrfs_set_dev_replace_num_uncorrectable_read_errors(eb, ptr, | ||
| 273 | atomic64_read(&dev_replace->num_uncorrectable_read_errors)); | ||
| 274 | dev_replace->cursor_left_last_write_of_item = | ||
| 275 | dev_replace->cursor_left; | ||
| 276 | btrfs_set_dev_replace_cursor_left(eb, ptr, | ||
| 277 | dev_replace->cursor_left_last_write_of_item); | ||
| 278 | btrfs_set_dev_replace_cursor_right(eb, ptr, | ||
| 279 | dev_replace->cursor_right); | ||
| 280 | dev_replace->item_needs_writeback = 0; | ||
| 281 | btrfs_dev_replace_unlock(dev_replace); | ||
| 282 | |||
| 283 | btrfs_mark_buffer_dirty(eb); | ||
| 284 | |||
| 285 | out: | ||
| 286 | btrfs_free_path(path); | ||
| 287 | |||
| 288 | return ret; | ||
| 289 | } | ||
| 290 | |||
| 291 | void btrfs_after_dev_replace_commit(struct btrfs_fs_info *fs_info) | ||
| 292 | { | ||
| 293 | struct btrfs_dev_replace *dev_replace = &fs_info->dev_replace; | ||
| 294 | |||
| 295 | dev_replace->committed_cursor_left = | ||
| 296 | dev_replace->cursor_left_last_write_of_item; | ||
| 297 | } | ||
| 298 | |||
| 299 | static u64 btrfs_get_seconds_since_1970(void) | ||
| 300 | { | ||
| 301 | struct timespec t = CURRENT_TIME_SEC; | ||
| 302 | |||
| 303 | return t.tv_sec; | ||
| 304 | } | ||
| 305 | |||
| 306 | int btrfs_dev_replace_start(struct btrfs_root *root, | ||
| 307 | struct btrfs_ioctl_dev_replace_args *args) | ||
| 308 | { | ||
| 309 | struct btrfs_trans_handle *trans; | ||
| 310 | struct btrfs_fs_info *fs_info = root->fs_info; | ||
| 311 | struct btrfs_dev_replace *dev_replace = &fs_info->dev_replace; | ||
| 312 | int ret; | ||
| 313 | struct btrfs_device *tgt_device = NULL; | ||
| 314 | struct btrfs_device *src_device = NULL; | ||
| 315 | |||
| 316 | switch (args->start.cont_reading_from_srcdev_mode) { | ||
| 317 | case BTRFS_IOCTL_DEV_REPLACE_CONT_READING_FROM_SRCDEV_MODE_ALWAYS: | ||
| 318 | case BTRFS_IOCTL_DEV_REPLACE_CONT_READING_FROM_SRCDEV_MODE_AVOID: | ||
| 319 | break; | ||
| 320 | default: | ||
| 321 | return -EINVAL; | ||
| 322 | } | ||
| 323 | |||
| 324 | if ((args->start.srcdevid == 0 && args->start.srcdev_name[0] == '\0') || | ||
| 325 | args->start.tgtdev_name[0] == '\0') | ||
| 326 | return -EINVAL; | ||
| 327 | |||
| 328 | mutex_lock(&fs_info->volume_mutex); | ||
| 329 | ret = btrfs_init_dev_replace_tgtdev(root, args->start.tgtdev_name, | ||
| 330 | &tgt_device); | ||
| 331 | if (ret) { | ||
| 332 | pr_err("btrfs: target device %s is invalid!\n", | ||
| 333 | args->start.tgtdev_name); | ||
| 334 | mutex_unlock(&fs_info->volume_mutex); | ||
| 335 | return -EINVAL; | ||
| 336 | } | ||
| 337 | |||
| 338 | ret = btrfs_dev_replace_find_srcdev(root, args->start.srcdevid, | ||
| 339 | args->start.srcdev_name, | ||
| 340 | &src_device); | ||
| 341 | mutex_unlock(&fs_info->volume_mutex); | ||
| 342 | if (ret) { | ||
| 343 | ret = -EINVAL; | ||
| 344 | goto leave_no_lock; | ||
| 345 | } | ||
| 346 | |||
| 347 | if (tgt_device->total_bytes < src_device->total_bytes) { | ||
| 348 | pr_err("btrfs: target device is smaller than source device!\n"); | ||
| 349 | ret = -EINVAL; | ||
| 350 | goto leave_no_lock; | ||
| 351 | } | ||
| 352 | |||
| 353 | btrfs_dev_replace_lock(dev_replace); | ||
| 354 | switch (dev_replace->replace_state) { | ||
| 355 | case BTRFS_IOCTL_DEV_REPLACE_STATE_NEVER_STARTED: | ||
| 356 | case BTRFS_IOCTL_DEV_REPLACE_STATE_FINISHED: | ||
| 357 | case BTRFS_IOCTL_DEV_REPLACE_STATE_CANCELED: | ||
| 358 | break; | ||
| 359 | case BTRFS_IOCTL_DEV_REPLACE_STATE_STARTED: | ||
| 360 | case BTRFS_IOCTL_DEV_REPLACE_STATE_SUSPENDED: | ||
| 361 | args->result = BTRFS_IOCTL_DEV_REPLACE_RESULT_ALREADY_STARTED; | ||
| 362 | goto leave; | ||
| 363 | } | ||
| 364 | |||
| 365 | dev_replace->cont_reading_from_srcdev_mode = | ||
| 366 | args->start.cont_reading_from_srcdev_mode; | ||
| 367 | WARN_ON(!src_device); | ||
| 368 | dev_replace->srcdev = src_device; | ||
| 369 | WARN_ON(!tgt_device); | ||
| 370 | dev_replace->tgtdev = tgt_device; | ||
| 371 | |||
| 372 | printk_in_rcu(KERN_INFO | ||
| 373 | "btrfs: dev_replace from %s (devid %llu) to %s) started\n", | ||
| 374 | src_device->missing ? "<missing disk>" : | ||
| 375 | rcu_str_deref(src_device->name), | ||
| 376 | src_device->devid, | ||
| 377 | rcu_str_deref(tgt_device->name)); | ||
| 378 | |||
| 379 | tgt_device->total_bytes = src_device->total_bytes; | ||
| 380 | tgt_device->disk_total_bytes = src_device->disk_total_bytes; | ||
| 381 | tgt_device->bytes_used = src_device->bytes_used; | ||
| 382 | |||
| 383 | /* | ||
| 384 | * from now on, the writes to the srcdev are all duplicated to | ||
| 385 | * go to the tgtdev as well (refer to btrfs_map_block()). | ||
| 386 | */ | ||
| 387 | dev_replace->replace_state = BTRFS_IOCTL_DEV_REPLACE_STATE_STARTED; | ||
| 388 | dev_replace->time_started = btrfs_get_seconds_since_1970(); | ||
| 389 | dev_replace->cursor_left = 0; | ||
| 390 | dev_replace->committed_cursor_left = 0; | ||
| 391 | dev_replace->cursor_left_last_write_of_item = 0; | ||
| 392 | dev_replace->cursor_right = 0; | ||
| 393 | dev_replace->is_valid = 1; | ||
| 394 | dev_replace->item_needs_writeback = 1; | ||
| 395 | args->result = BTRFS_IOCTL_DEV_REPLACE_RESULT_NO_ERROR; | ||
| 396 | btrfs_dev_replace_unlock(dev_replace); | ||
| 397 | |||
| 398 | btrfs_wait_ordered_extents(root, 0); | ||
| 399 | |||
| 400 | /* force writing the updated state information to disk */ | ||
| 401 | trans = btrfs_start_transaction(root, 0); | ||
| 402 | if (IS_ERR(trans)) { | ||
| 403 | ret = PTR_ERR(trans); | ||
| 404 | btrfs_dev_replace_lock(dev_replace); | ||
| 405 | goto leave; | ||
| 406 | } | ||
| 407 | |||
| 408 | ret = btrfs_commit_transaction(trans, root); | ||
| 409 | WARN_ON(ret); | ||
| 410 | |||
| 411 | /* the disk copy procedure reuses the scrub code */ | ||
| 412 | ret = btrfs_scrub_dev(fs_info, src_device->devid, 0, | ||
| 413 | src_device->total_bytes, | ||
| 414 | &dev_replace->scrub_progress, 0, 1); | ||
| 415 | |||
| 416 | ret = btrfs_dev_replace_finishing(root->fs_info, ret); | ||
| 417 | WARN_ON(ret); | ||
| 418 | |||
| 419 | return 0; | ||
| 420 | |||
| 421 | leave: | ||
| 422 | dev_replace->srcdev = NULL; | ||
| 423 | dev_replace->tgtdev = NULL; | ||
| 424 | btrfs_dev_replace_unlock(dev_replace); | ||
| 425 | leave_no_lock: | ||
| 426 | if (tgt_device) | ||
| 427 | btrfs_destroy_dev_replace_tgtdev(fs_info, tgt_device); | ||
| 428 | return ret; | ||
| 429 | } | ||
| 430 | |||
| 431 | static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info, | ||
| 432 | int scrub_ret) | ||
| 433 | { | ||
| 434 | struct btrfs_dev_replace *dev_replace = &fs_info->dev_replace; | ||
| 435 | struct btrfs_device *tgt_device; | ||
| 436 | struct btrfs_device *src_device; | ||
| 437 | struct btrfs_root *root = fs_info->tree_root; | ||
| 438 | u8 uuid_tmp[BTRFS_UUID_SIZE]; | ||
| 439 | struct btrfs_trans_handle *trans; | ||
| 440 | int ret = 0; | ||
| 441 | |||
| 442 | /* don't allow cancel or unmount to disturb the finishing procedure */ | ||
| 443 | mutex_lock(&dev_replace->lock_finishing_cancel_unmount); | ||
| 444 | |||
| 445 | btrfs_dev_replace_lock(dev_replace); | ||
| 446 | /* was the operation canceled, or is it finished? */ | ||
| 447 | if (dev_replace->replace_state != | ||
| 448 | BTRFS_IOCTL_DEV_REPLACE_STATE_STARTED) { | ||
| 449 | btrfs_dev_replace_unlock(dev_replace); | ||
| 450 | mutex_unlock(&dev_replace->lock_finishing_cancel_unmount); | ||
| 451 | return 0; | ||
| 452 | } | ||
| 453 | |||
| 454 | tgt_device = dev_replace->tgtdev; | ||
| 455 | src_device = dev_replace->srcdev; | ||
| 456 | btrfs_dev_replace_unlock(dev_replace); | ||
| 457 | |||
| 458 | /* replace old device with new one in mapping tree */ | ||
| 459 | if (!scrub_ret) | ||
| 460 | btrfs_dev_replace_update_device_in_mapping_tree(fs_info, | ||
| 461 | src_device, | ||
| 462 | tgt_device); | ||
| 463 | |||
| 464 | /* | ||
| 465 | * flush all outstanding I/O and inode extent mappings before the | ||
| 466 | * copy operation is declared as being finished | ||
| 467 | */ | ||
| 468 | btrfs_start_delalloc_inodes(root, 0); | ||
| 469 | btrfs_wait_ordered_extents(root, 0); | ||
| 470 | |||
| 471 | trans = btrfs_start_transaction(root, 0); | ||
| 472 | if (IS_ERR(trans)) { | ||
| 473 | mutex_unlock(&dev_replace->lock_finishing_cancel_unmount); | ||
| 474 | return PTR_ERR(trans); | ||
| 475 | } | ||
| 476 | ret = btrfs_commit_transaction(trans, root); | ||
| 477 | WARN_ON(ret); | ||
| 478 | |||
| 479 | /* keep away write_all_supers() during the finishing procedure */ | ||
| 480 | mutex_lock(&root->fs_info->fs_devices->device_list_mutex); | ||
| 481 | btrfs_dev_replace_lock(dev_replace); | ||
| 482 | dev_replace->replace_state = | ||
| 483 | scrub_ret ? BTRFS_IOCTL_DEV_REPLACE_STATE_CANCELED | ||
| 484 | : BTRFS_IOCTL_DEV_REPLACE_STATE_FINISHED; | ||
| 485 | dev_replace->tgtdev = NULL; | ||
| 486 | dev_replace->srcdev = NULL; | ||
| 487 | dev_replace->time_stopped = btrfs_get_seconds_since_1970(); | ||
| 488 | dev_replace->item_needs_writeback = 1; | ||
| 489 | |||
| 490 | if (scrub_ret) { | ||
| 491 | printk_in_rcu(KERN_ERR | ||
| 492 | "btrfs: btrfs_scrub_dev(%s, %llu, %s) failed %d\n", | ||
| 493 | src_device->missing ? "<missing disk>" : | ||
| 494 | rcu_str_deref(src_device->name), | ||
| 495 | src_device->devid, | ||
| 496 | rcu_str_deref(tgt_device->name), scrub_ret); | ||
| 497 | btrfs_dev_replace_unlock(dev_replace); | ||
| 498 | mutex_unlock(&root->fs_info->fs_devices->device_list_mutex); | ||
| 499 | if (tgt_device) | ||
| 500 | btrfs_destroy_dev_replace_tgtdev(fs_info, tgt_device); | ||
| 501 | mutex_unlock(&dev_replace->lock_finishing_cancel_unmount); | ||
| 502 | |||
| 503 | return 0; | ||
| 504 | } | ||
| 505 | |||
| 506 | printk_in_rcu(KERN_INFO | ||
| 507 | "btrfs: dev_replace from %s (devid %llu) to %s) finished\n", | ||
| 508 | src_device->missing ? "<missing disk>" : | ||
| 509 | rcu_str_deref(src_device->name), | ||
| 510 | src_device->devid, | ||
| 511 | rcu_str_deref(tgt_device->name)); | ||
| 512 | tgt_device->is_tgtdev_for_dev_replace = 0; | ||
| 513 | tgt_device->devid = src_device->devid; | ||
| 514 | src_device->devid = BTRFS_DEV_REPLACE_DEVID; | ||
| 515 | tgt_device->bytes_used = src_device->bytes_used; | ||
| 516 | memcpy(uuid_tmp, tgt_device->uuid, sizeof(uuid_tmp)); | ||
| 517 | memcpy(tgt_device->uuid, src_device->uuid, sizeof(tgt_device->uuid)); | ||
| 518 | memcpy(src_device->uuid, uuid_tmp, sizeof(src_device->uuid)); | ||
| 519 | tgt_device->total_bytes = src_device->total_bytes; | ||
| 520 | tgt_device->disk_total_bytes = src_device->disk_total_bytes; | ||
| 521 | tgt_device->bytes_used = src_device->bytes_used; | ||
| 522 | if (fs_info->sb->s_bdev == src_device->bdev) | ||
| 523 | fs_info->sb->s_bdev = tgt_device->bdev; | ||
| 524 | if (fs_info->fs_devices->latest_bdev == src_device->bdev) | ||
| 525 | fs_info->fs_devices->latest_bdev = tgt_device->bdev; | ||
| 526 | list_add(&tgt_device->dev_alloc_list, &fs_info->fs_devices->alloc_list); | ||
| 527 | |||
| 528 | btrfs_rm_dev_replace_srcdev(fs_info, src_device); | ||
| 529 | if (src_device->bdev) { | ||
| 530 | /* zero out the old super */ | ||
| 531 | btrfs_scratch_superblock(src_device); | ||
| 532 | } | ||
| 533 | /* | ||
| 534 | * this is again a consistent state where no dev_replace procedure | ||
| 535 | * is running, the target device is part of the filesystem, the | ||
| 536 | * source device is not part of the filesystem anymore and its 1st | ||
| 537 | * superblock is scratched out so that it is no longer marked to | ||
| 538 | * belong to this filesystem. | ||
| 539 | */ | ||
| 540 | btrfs_dev_replace_unlock(dev_replace); | ||
| 541 | mutex_unlock(&root->fs_info->fs_devices->device_list_mutex); | ||
| 542 | |||
| 543 | /* write back the superblocks */ | ||
| 544 | trans = btrfs_start_transaction(root, 0); | ||
| 545 | if (!IS_ERR(trans)) | ||
| 546 | btrfs_commit_transaction(trans, root); | ||
| 547 | |||
| 548 | mutex_unlock(&dev_replace->lock_finishing_cancel_unmount); | ||
| 549 | |||
| 550 | return 0; | ||
| 551 | } | ||
| 552 | |||
| 553 | static void btrfs_dev_replace_update_device_in_mapping_tree( | ||
| 554 | struct btrfs_fs_info *fs_info, | ||
| 555 | struct btrfs_device *srcdev, | ||
| 556 | struct btrfs_device *tgtdev) | ||
| 557 | { | ||
| 558 | struct extent_map_tree *em_tree = &fs_info->mapping_tree.map_tree; | ||
| 559 | struct extent_map *em; | ||
| 560 | struct map_lookup *map; | ||
| 561 | u64 start = 0; | ||
| 562 | int i; | ||
| 563 | |||
| 564 | write_lock(&em_tree->lock); | ||
| 565 | do { | ||
| 566 | em = lookup_extent_mapping(em_tree, start, (u64)-1); | ||
| 567 | if (!em) | ||
| 568 | break; | ||
| 569 | map = (struct map_lookup *)em->bdev; | ||
| 570 | for (i = 0; i < map->num_stripes; i++) | ||
| 571 | if (srcdev == map->stripes[i].dev) | ||
| 572 | map->stripes[i].dev = tgtdev; | ||
| 573 | start = em->start + em->len; | ||
| 574 | free_extent_map(em); | ||
| 575 | } while (start); | ||
| 576 | write_unlock(&em_tree->lock); | ||
| 577 | } | ||
| 578 | |||
| 579 | static int btrfs_dev_replace_find_srcdev(struct btrfs_root *root, u64 srcdevid, | ||
| 580 | char *srcdev_name, | ||
| 581 | struct btrfs_device **device) | ||
| 582 | { | ||
| 583 | int ret; | ||
| 584 | |||
| 585 | if (srcdevid) { | ||
| 586 | ret = 0; | ||
| 587 | *device = btrfs_find_device(root->fs_info, srcdevid, NULL, | ||
| 588 | NULL); | ||
| 589 | if (!*device) | ||
| 590 | ret = -ENOENT; | ||
| 591 | } else { | ||
| 592 | ret = btrfs_find_device_missing_or_by_path(root, srcdev_name, | ||
| 593 | device); | ||
| 594 | } | ||
| 595 | return ret; | ||
| 596 | } | ||
| 597 | |||
| 598 | void btrfs_dev_replace_status(struct btrfs_fs_info *fs_info, | ||
| 599 | struct btrfs_ioctl_dev_replace_args *args) | ||
| 600 | { | ||
| 601 | struct btrfs_dev_replace *dev_replace = &fs_info->dev_replace; | ||
| 602 | |||
| 603 | btrfs_dev_replace_lock(dev_replace); | ||
| 604 | /* even if !dev_replace_is_valid, the values are good enough for | ||
| 605 | * the replace_status ioctl */ | ||
| 606 | args->result = BTRFS_IOCTL_DEV_REPLACE_RESULT_NO_ERROR; | ||
| 607 | args->status.replace_state = dev_replace->replace_state; | ||
| 608 | args->status.time_started = dev_replace->time_started; | ||
| 609 | args->status.time_stopped = dev_replace->time_stopped; | ||
| 610 | args->status.num_write_errors = | ||
| 611 | atomic64_read(&dev_replace->num_write_errors); | ||
| 612 | args->status.num_uncorrectable_read_errors = | ||
| 613 | atomic64_read(&dev_replace->num_uncorrectable_read_errors); | ||
| 614 | switch (dev_replace->replace_state) { | ||
| 615 | case BTRFS_IOCTL_DEV_REPLACE_STATE_NEVER_STARTED: | ||
| 616 | case BTRFS_IOCTL_DEV_REPLACE_STATE_CANCELED: | ||
| 617 | args->status.progress_1000 = 0; | ||
| 618 | break; | ||
| 619 | case BTRFS_IOCTL_DEV_REPLACE_STATE_FINISHED: | ||
| 620 | args->status.progress_1000 = 1000; | ||
| 621 | break; | ||
| 622 | case BTRFS_IOCTL_DEV_REPLACE_STATE_STARTED: | ||
| 623 | case BTRFS_IOCTL_DEV_REPLACE_STATE_SUSPENDED: | ||
| 624 | args->status.progress_1000 = div64_u64(dev_replace->cursor_left, | ||
| 625 | div64_u64(dev_replace->srcdev->total_bytes, 1000)); | ||
| 626 | break; | ||
| 627 | } | ||
| 628 | btrfs_dev_replace_unlock(dev_replace); | ||
| 629 | } | ||
| 630 | |||
| 631 | int btrfs_dev_replace_cancel(struct btrfs_fs_info *fs_info, | ||
| 632 | struct btrfs_ioctl_dev_replace_args *args) | ||
| 633 | { | ||
| 634 | args->result = __btrfs_dev_replace_cancel(fs_info); | ||
| 635 | return 0; | ||
| 636 | } | ||
| 637 | |||
| 638 | static u64 __btrfs_dev_replace_cancel(struct btrfs_fs_info *fs_info) | ||
| 639 | { | ||
| 640 | struct btrfs_dev_replace *dev_replace = &fs_info->dev_replace; | ||
| 641 | struct btrfs_device *tgt_device = NULL; | ||
| 642 | struct btrfs_trans_handle *trans; | ||
| 643 | struct btrfs_root *root = fs_info->tree_root; | ||
| 644 | u64 result; | ||
| 645 | int ret; | ||
| 646 | |||
| 647 | mutex_lock(&dev_replace->lock_finishing_cancel_unmount); | ||
| 648 | btrfs_dev_replace_lock(dev_replace); | ||
| 649 | switch (dev_replace->replace_state) { | ||
| 650 | case BTRFS_IOCTL_DEV_REPLACE_STATE_NEVER_STARTED: | ||
| 651 | case BTRFS_IOCTL_DEV_REPLACE_STATE_FINISHED: | ||
| 652 | case BTRFS_IOCTL_DEV_REPLACE_STATE_CANCELED: | ||
| 653 | result = BTRFS_IOCTL_DEV_REPLACE_RESULT_NOT_STARTED; | ||
| 654 | btrfs_dev_replace_unlock(dev_replace); | ||
| 655 | goto leave; | ||
| 656 | case BTRFS_IOCTL_DEV_REPLACE_STATE_STARTED: | ||
| 657 | case BTRFS_IOCTL_DEV_REPLACE_STATE_SUSPENDED: | ||
| 658 | result = BTRFS_IOCTL_DEV_REPLACE_RESULT_NO_ERROR; | ||
| 659 | tgt_device = dev_replace->tgtdev; | ||
| 660 | dev_replace->tgtdev = NULL; | ||
| 661 | dev_replace->srcdev = NULL; | ||
| 662 | break; | ||
| 663 | } | ||
| 664 | dev_replace->replace_state = BTRFS_IOCTL_DEV_REPLACE_STATE_CANCELED; | ||
| 665 | dev_replace->time_stopped = btrfs_get_seconds_since_1970(); | ||
| 666 | dev_replace->item_needs_writeback = 1; | ||
| 667 | btrfs_dev_replace_unlock(dev_replace); | ||
| 668 | btrfs_scrub_cancel(fs_info); | ||
| 669 | |||
| 670 | trans = btrfs_start_transaction(root, 0); | ||
| 671 | if (IS_ERR(trans)) { | ||
| 672 | mutex_unlock(&dev_replace->lock_finishing_cancel_unmount); | ||
| 673 | return PTR_ERR(trans); | ||
| 674 | } | ||
| 675 | ret = btrfs_commit_transaction(trans, root); | ||
| 676 | WARN_ON(ret); | ||
| 677 | if (tgt_device) | ||
| 678 | btrfs_destroy_dev_replace_tgtdev(fs_info, tgt_device); | ||
| 679 | |||
| 680 | leave: | ||
| 681 | mutex_unlock(&dev_replace->lock_finishing_cancel_unmount); | ||
| 682 | return result; | ||
| 683 | } | ||
| 684 | |||
| 685 | void btrfs_dev_replace_suspend_for_unmount(struct btrfs_fs_info *fs_info) | ||
| 686 | { | ||
| 687 | struct btrfs_dev_replace *dev_replace = &fs_info->dev_replace; | ||
| 688 | |||
| 689 | mutex_lock(&dev_replace->lock_finishing_cancel_unmount); | ||
| 690 | btrfs_dev_replace_lock(dev_replace); | ||
| 691 | switch (dev_replace->replace_state) { | ||
| 692 | case BTRFS_IOCTL_DEV_REPLACE_STATE_NEVER_STARTED: | ||
| 693 | case BTRFS_IOCTL_DEV_REPLACE_STATE_FINISHED: | ||
| 694 | case BTRFS_IOCTL_DEV_REPLACE_STATE_CANCELED: | ||
| 695 | case BTRFS_IOCTL_DEV_REPLACE_STATE_SUSPENDED: | ||
| 696 | break; | ||
| 697 | case BTRFS_IOCTL_DEV_REPLACE_STATE_STARTED: | ||
| 698 | dev_replace->replace_state = | ||
| 699 | BTRFS_IOCTL_DEV_REPLACE_STATE_SUSPENDED; | ||
| 700 | dev_replace->time_stopped = btrfs_get_seconds_since_1970(); | ||
| 701 | dev_replace->item_needs_writeback = 1; | ||
| 702 | pr_info("btrfs: suspending dev_replace for unmount\n"); | ||
| 703 | break; | ||
| 704 | } | ||
| 705 | |||
| 706 | btrfs_dev_replace_unlock(dev_replace); | ||
| 707 | mutex_unlock(&dev_replace->lock_finishing_cancel_unmount); | ||
| 708 | } | ||
| 709 | |||
| 710 | /* resume dev_replace procedure that was interrupted by unmount */ | ||
| 711 | int btrfs_resume_dev_replace_async(struct btrfs_fs_info *fs_info) | ||
| 712 | { | ||
| 713 | struct task_struct *task; | ||
| 714 | struct btrfs_dev_replace *dev_replace = &fs_info->dev_replace; | ||
| 715 | |||
| 716 | btrfs_dev_replace_lock(dev_replace); | ||
| 717 | switch (dev_replace->replace_state) { | ||
| 718 | case BTRFS_IOCTL_DEV_REPLACE_STATE_NEVER_STARTED: | ||
| 719 | case BTRFS_IOCTL_DEV_REPLACE_STATE_FINISHED: | ||
| 720 | case BTRFS_IOCTL_DEV_REPLACE_STATE_CANCELED: | ||
| 721 | btrfs_dev_replace_unlock(dev_replace); | ||
| 722 | return 0; | ||
| 723 | case BTRFS_IOCTL_DEV_REPLACE_STATE_STARTED: | ||
| 724 | break; | ||
| 725 | case BTRFS_IOCTL_DEV_REPLACE_STATE_SUSPENDED: | ||
| 726 | dev_replace->replace_state = | ||
| 727 | BTRFS_IOCTL_DEV_REPLACE_STATE_STARTED; | ||
| 728 | break; | ||
| 729 | } | ||
| 730 | if (!dev_replace->tgtdev || !dev_replace->tgtdev->bdev) { | ||
| 731 | pr_info("btrfs: cannot continue dev_replace, tgtdev is missing\n" | ||
| 732 | "btrfs: you may cancel the operation after 'mount -o degraded'\n"); | ||
| 733 | btrfs_dev_replace_unlock(dev_replace); | ||
| 734 | return 0; | ||
| 735 | } | ||
| 736 | btrfs_dev_replace_unlock(dev_replace); | ||
| 737 | |||
| 738 | WARN_ON(atomic_xchg( | ||
| 739 | &fs_info->mutually_exclusive_operation_running, 1)); | ||
| 740 | task = kthread_run(btrfs_dev_replace_kthread, fs_info, "btrfs-devrepl"); | ||
| 741 | return PTR_RET(task); | ||
| 742 | } | ||
| 743 | |||
| 744 | static int btrfs_dev_replace_kthread(void *data) | ||
| 745 | { | ||
| 746 | struct btrfs_fs_info *fs_info = data; | ||
| 747 | struct btrfs_dev_replace *dev_replace = &fs_info->dev_replace; | ||
| 748 | struct btrfs_ioctl_dev_replace_args *status_args; | ||
| 749 | u64 progress; | ||
| 750 | |||
| 751 | status_args = kzalloc(sizeof(*status_args), GFP_NOFS); | ||
| 752 | if (status_args) { | ||
| 753 | btrfs_dev_replace_status(fs_info, status_args); | ||
| 754 | progress = status_args->status.progress_1000; | ||
| 755 | kfree(status_args); | ||
| 756 | do_div(progress, 10); | ||
| 757 | printk_in_rcu(KERN_INFO | ||
| 758 | "btrfs: continuing dev_replace from %s (devid %llu) to %s @%u%%\n", | ||
| 759 | dev_replace->srcdev->missing ? "<missing disk>" : | ||
| 760 | rcu_str_deref(dev_replace->srcdev->name), | ||
| 761 | dev_replace->srcdev->devid, | ||
| 762 | dev_replace->tgtdev ? | ||
| 763 | rcu_str_deref(dev_replace->tgtdev->name) : | ||
| 764 | "<missing target disk>", | ||
| 765 | (unsigned int)progress); | ||
| 766 | } | ||
| 767 | btrfs_dev_replace_continue_on_mount(fs_info); | ||
| 768 | atomic_set(&fs_info->mutually_exclusive_operation_running, 0); | ||
| 769 | |||
| 770 | return 0; | ||
| 771 | } | ||
| 772 | |||
| 773 | static int btrfs_dev_replace_continue_on_mount(struct btrfs_fs_info *fs_info) | ||
| 774 | { | ||
| 775 | struct btrfs_dev_replace *dev_replace = &fs_info->dev_replace; | ||
| 776 | int ret; | ||
| 777 | |||
| 778 | ret = btrfs_scrub_dev(fs_info, dev_replace->srcdev->devid, | ||
| 779 | dev_replace->committed_cursor_left, | ||
| 780 | dev_replace->srcdev->total_bytes, | ||
| 781 | &dev_replace->scrub_progress, 0, 1); | ||
| 782 | ret = btrfs_dev_replace_finishing(fs_info, ret); | ||
| 783 | WARN_ON(ret); | ||
| 784 | return 0; | ||
| 785 | } | ||
| 786 | |||
| 787 | int btrfs_dev_replace_is_ongoing(struct btrfs_dev_replace *dev_replace) | ||
| 788 | { | ||
| 789 | if (!dev_replace->is_valid) | ||
| 790 | return 0; | ||
| 791 | |||
| 792 | switch (dev_replace->replace_state) { | ||
| 793 | case BTRFS_IOCTL_DEV_REPLACE_STATE_NEVER_STARTED: | ||
| 794 | case BTRFS_IOCTL_DEV_REPLACE_STATE_FINISHED: | ||
| 795 | case BTRFS_IOCTL_DEV_REPLACE_STATE_CANCELED: | ||
| 796 | return 0; | ||
| 797 | case BTRFS_IOCTL_DEV_REPLACE_STATE_STARTED: | ||
| 798 | case BTRFS_IOCTL_DEV_REPLACE_STATE_SUSPENDED: | ||
| 799 | /* | ||
| 800 | * return true even if tgtdev is missing (this is | ||
| 801 | * something that can happen if the dev_replace | ||
| 802 | * procedure is suspended by an umount and then | ||
| 803 | * the tgtdev is missing (or "btrfs dev scan") was | ||
| 804 | * not called and the the filesystem is remounted | ||
| 805 | * in degraded state. This does not stop the | ||
| 806 | * dev_replace procedure. It needs to be canceled | ||
| 807 | * manually if the cancelation is wanted. | ||
| 808 | */ | ||
| 809 | break; | ||
| 810 | } | ||
| 811 | return 1; | ||
| 812 | } | ||
| 813 | |||
| 814 | void btrfs_dev_replace_lock(struct btrfs_dev_replace *dev_replace) | ||
| 815 | { | ||
| 816 | /* the beginning is just an optimization for the typical case */ | ||
| 817 | if (atomic_read(&dev_replace->nesting_level) == 0) { | ||
| 818 | acquire_lock: | ||
| 819 | /* this is not a nested case where the same thread | ||
| 820 | * is trying to acqurire the same lock twice */ | ||
| 821 | mutex_lock(&dev_replace->lock); | ||
| 822 | mutex_lock(&dev_replace->lock_management_lock); | ||
| 823 | dev_replace->lock_owner = current->pid; | ||
| 824 | atomic_inc(&dev_replace->nesting_level); | ||
| 825 | mutex_unlock(&dev_replace->lock_management_lock); | ||
| 826 | return; | ||
| 827 | } | ||
| 828 | |||
| 829 | mutex_lock(&dev_replace->lock_management_lock); | ||
| 830 | if (atomic_read(&dev_replace->nesting_level) > 0 && | ||
| 831 | dev_replace->lock_owner == current->pid) { | ||
| 832 | WARN_ON(!mutex_is_locked(&dev_replace->lock)); | ||
| 833 | atomic_inc(&dev_replace->nesting_level); | ||
| 834 | mutex_unlock(&dev_replace->lock_management_lock); | ||
| 835 | return; | ||
| 836 | } | ||
| 837 | |||
| 838 | mutex_unlock(&dev_replace->lock_management_lock); | ||
| 839 | goto acquire_lock; | ||
| 840 | } | ||
| 841 | |||
| 842 | void btrfs_dev_replace_unlock(struct btrfs_dev_replace *dev_replace) | ||
| 843 | { | ||
| 844 | WARN_ON(!mutex_is_locked(&dev_replace->lock)); | ||
| 845 | mutex_lock(&dev_replace->lock_management_lock); | ||
| 846 | WARN_ON(atomic_read(&dev_replace->nesting_level) < 1); | ||
| 847 | WARN_ON(dev_replace->lock_owner != current->pid); | ||
| 848 | atomic_dec(&dev_replace->nesting_level); | ||
| 849 | if (atomic_read(&dev_replace->nesting_level) == 0) { | ||
| 850 | dev_replace->lock_owner = 0; | ||
| 851 | mutex_unlock(&dev_replace->lock_management_lock); | ||
| 852 | mutex_unlock(&dev_replace->lock); | ||
| 853 | } else { | ||
| 854 | mutex_unlock(&dev_replace->lock_management_lock); | ||
| 855 | } | ||
| 856 | } | ||
diff --git a/fs/btrfs/dev-replace.h b/fs/btrfs/dev-replace.h new file mode 100644 index 000000000000..20035cbbf021 --- /dev/null +++ b/fs/btrfs/dev-replace.h | |||
| @@ -0,0 +1,44 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (C) STRATO AG 2012. All rights reserved. | ||
| 3 | * | ||
| 4 | * This program is free software; you can redistribute it and/or | ||
| 5 | * modify it under the terms of the GNU General Public | ||
| 6 | * License v2 as published by the Free Software Foundation. | ||
| 7 | * | ||
| 8 | * This program is distributed in the hope that it will be useful, | ||
| 9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
| 11 | * General Public License for more details. | ||
| 12 | * | ||
| 13 | * You should have received a copy of the GNU General Public | ||
| 14 | * License along with this program; if not, write to the | ||
| 15 | * Free Software Foundation, Inc., 59 Temple Place - Suite 330, | ||
| 16 | * Boston, MA 021110-1307, USA. | ||
| 17 | */ | ||
| 18 | |||
| 19 | #if !defined(__BTRFS_DEV_REPLACE__) | ||
| 20 | #define __BTRFS_DEV_REPLACE__ | ||
| 21 | |||
| 22 | struct btrfs_ioctl_dev_replace_args; | ||
| 23 | |||
| 24 | int btrfs_init_dev_replace(struct btrfs_fs_info *fs_info); | ||
| 25 | int btrfs_run_dev_replace(struct btrfs_trans_handle *trans, | ||
| 26 | struct btrfs_fs_info *fs_info); | ||
| 27 | void btrfs_after_dev_replace_commit(struct btrfs_fs_info *fs_info); | ||
| 28 | int btrfs_dev_replace_start(struct btrfs_root *root, | ||
| 29 | struct btrfs_ioctl_dev_replace_args *args); | ||
| 30 | void btrfs_dev_replace_status(struct btrfs_fs_info *fs_info, | ||
| 31 | struct btrfs_ioctl_dev_replace_args *args); | ||
| 32 | int btrfs_dev_replace_cancel(struct btrfs_fs_info *fs_info, | ||
| 33 | struct btrfs_ioctl_dev_replace_args *args); | ||
| 34 | void btrfs_dev_replace_suspend_for_unmount(struct btrfs_fs_info *fs_info); | ||
| 35 | int btrfs_resume_dev_replace_async(struct btrfs_fs_info *fs_info); | ||
| 36 | int btrfs_dev_replace_is_ongoing(struct btrfs_dev_replace *dev_replace); | ||
| 37 | void btrfs_dev_replace_lock(struct btrfs_dev_replace *dev_replace); | ||
| 38 | void btrfs_dev_replace_unlock(struct btrfs_dev_replace *dev_replace); | ||
| 39 | |||
| 40 | static inline void btrfs_dev_replace_stats_inc(atomic64_t *stat_value) | ||
| 41 | { | ||
| 42 | atomic64_inc(stat_value); | ||
| 43 | } | ||
| 44 | #endif | ||
diff --git a/fs/btrfs/dir-item.c b/fs/btrfs/dir-item.c index c1a074d0696f..502c2158167c 100644 --- a/fs/btrfs/dir-item.c +++ b/fs/btrfs/dir-item.c | |||
| @@ -213,6 +213,65 @@ struct btrfs_dir_item *btrfs_lookup_dir_item(struct btrfs_trans_handle *trans, | |||
| 213 | return btrfs_match_dir_item_name(root, path, name, name_len); | 213 | return btrfs_match_dir_item_name(root, path, name, name_len); |
| 214 | } | 214 | } |
| 215 | 215 | ||
| 216 | int btrfs_check_dir_item_collision(struct btrfs_root *root, u64 dir, | ||
| 217 | const char *name, int name_len) | ||
| 218 | { | ||
| 219 | int ret; | ||
| 220 | struct btrfs_key key; | ||
| 221 | struct btrfs_dir_item *di; | ||
| 222 | int data_size; | ||
| 223 | struct extent_buffer *leaf; | ||
| 224 | int slot; | ||
| 225 | struct btrfs_path *path; | ||
| 226 | |||
| 227 | |||
| 228 | path = btrfs_alloc_path(); | ||
| 229 | if (!path) | ||
| 230 | return -ENOMEM; | ||
| 231 | |||
| 232 | key.objectid = dir; | ||
| 233 | btrfs_set_key_type(&key, BTRFS_DIR_ITEM_KEY); | ||
| 234 | key.offset = btrfs_name_hash(name, name_len); | ||
| 235 | |||
| 236 | ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); | ||
| 237 | |||
| 238 | /* return back any errors */ | ||
| 239 | if (ret < 0) | ||
| 240 | goto out; | ||
| 241 | |||
| 242 | /* nothing found, we're safe */ | ||
| 243 | if (ret > 0) { | ||
| 244 | ret = 0; | ||
| 245 | goto out; | ||
| 246 | } | ||
| 247 | |||
| 248 | /* we found an item, look for our name in the item */ | ||
| 249 | di = btrfs_match_dir_item_name(root, path, name, name_len); | ||
| 250 | if (di) { | ||
| 251 | /* our exact name was found */ | ||
| 252 | ret = -EEXIST; | ||
| 253 | goto out; | ||
| 254 | } | ||
| 255 | |||
| 256 | /* | ||
| 257 | * see if there is room in the item to insert this | ||
| 258 | * name | ||
| 259 | */ | ||
| 260 | data_size = sizeof(*di) + name_len + sizeof(struct btrfs_item); | ||
| 261 | leaf = path->nodes[0]; | ||
| 262 | slot = path->slots[0]; | ||
| 263 | if (data_size + btrfs_item_size_nr(leaf, slot) + | ||
| 264 | sizeof(struct btrfs_item) > BTRFS_LEAF_DATA_SIZE(root)) { | ||
| 265 | ret = -EOVERFLOW; | ||
| 266 | } else { | ||
| 267 | /* plenty of insertion room */ | ||
| 268 | ret = 0; | ||
| 269 | } | ||
| 270 | out: | ||
| 271 | btrfs_free_path(path); | ||
| 272 | return ret; | ||
| 273 | } | ||
| 274 | |||
| 216 | /* | 275 | /* |
| 217 | * lookup a directory item based on index. 'dir' is the objectid | 276 | * lookup a directory item based on index. 'dir' is the objectid |
| 218 | * we're searching in, and 'mod' tells us if you plan on deleting the | 277 | * we're searching in, and 'mod' tells us if you plan on deleting the |
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 22e98e04c2ea..a8f652dc940b 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c | |||
| @@ -45,6 +45,11 @@ | |||
| 45 | #include "inode-map.h" | 45 | #include "inode-map.h" |
| 46 | #include "check-integrity.h" | 46 | #include "check-integrity.h" |
| 47 | #include "rcu-string.h" | 47 | #include "rcu-string.h" |
| 48 | #include "dev-replace.h" | ||
| 49 | |||
| 50 | #ifdef CONFIG_X86 | ||
| 51 | #include <asm/cpufeature.h> | ||
| 52 | #endif | ||
| 48 | 53 | ||
| 49 | static struct extent_io_ops btree_extent_io_ops; | 54 | static struct extent_io_ops btree_extent_io_ops; |
| 50 | static void end_workqueue_fn(struct btrfs_work *work); | 55 | static void end_workqueue_fn(struct btrfs_work *work); |
| @@ -217,26 +222,16 @@ static struct extent_map *btree_get_extent(struct inode *inode, | |||
| 217 | write_lock(&em_tree->lock); | 222 | write_lock(&em_tree->lock); |
| 218 | ret = add_extent_mapping(em_tree, em); | 223 | ret = add_extent_mapping(em_tree, em); |
| 219 | if (ret == -EEXIST) { | 224 | if (ret == -EEXIST) { |
| 220 | u64 failed_start = em->start; | ||
| 221 | u64 failed_len = em->len; | ||
| 222 | |||
| 223 | free_extent_map(em); | 225 | free_extent_map(em); |
| 224 | em = lookup_extent_mapping(em_tree, start, len); | 226 | em = lookup_extent_mapping(em_tree, start, len); |
| 225 | if (em) { | 227 | if (!em) |
| 226 | ret = 0; | 228 | em = ERR_PTR(-EIO); |
| 227 | } else { | ||
| 228 | em = lookup_extent_mapping(em_tree, failed_start, | ||
| 229 | failed_len); | ||
| 230 | ret = -EIO; | ||
| 231 | } | ||
| 232 | } else if (ret) { | 229 | } else if (ret) { |
| 233 | free_extent_map(em); | 230 | free_extent_map(em); |
| 234 | em = NULL; | 231 | em = ERR_PTR(ret); |
| 235 | } | 232 | } |
| 236 | write_unlock(&em_tree->lock); | 233 | write_unlock(&em_tree->lock); |
| 237 | 234 | ||
| 238 | if (ret) | ||
| 239 | em = ERR_PTR(ret); | ||
| 240 | out: | 235 | out: |
| 241 | return em; | 236 | return em; |
| 242 | } | 237 | } |
| @@ -393,7 +388,7 @@ static int btree_read_extent_buffer_pages(struct btrfs_root *root, | |||
| 393 | if (test_bit(EXTENT_BUFFER_CORRUPT, &eb->bflags)) | 388 | if (test_bit(EXTENT_BUFFER_CORRUPT, &eb->bflags)) |
| 394 | break; | 389 | break; |
| 395 | 390 | ||
| 396 | num_copies = btrfs_num_copies(&root->fs_info->mapping_tree, | 391 | num_copies = btrfs_num_copies(root->fs_info, |
| 397 | eb->start, eb->len); | 392 | eb->start, eb->len); |
| 398 | if (num_copies == 1) | 393 | if (num_copies == 1) |
| 399 | break; | 394 | break; |
| @@ -439,10 +434,6 @@ static int csum_dirty_buffer(struct btrfs_root *root, struct page *page) | |||
| 439 | WARN_ON(1); | 434 | WARN_ON(1); |
| 440 | return 0; | 435 | return 0; |
| 441 | } | 436 | } |
| 442 | if (eb->pages[0] != page) { | ||
| 443 | WARN_ON(1); | ||
| 444 | return 0; | ||
| 445 | } | ||
| 446 | if (!PageUptodate(page)) { | 437 | if (!PageUptodate(page)) { |
| 447 | WARN_ON(1); | 438 | WARN_ON(1); |
| 448 | return 0; | 439 | return 0; |
| @@ -862,21 +853,37 @@ static int __btree_submit_bio_done(struct inode *inode, int rw, struct bio *bio, | |||
| 862 | int mirror_num, unsigned long bio_flags, | 853 | int mirror_num, unsigned long bio_flags, |
| 863 | u64 bio_offset) | 854 | u64 bio_offset) |
| 864 | { | 855 | { |
| 856 | int ret; | ||
| 857 | |||
| 865 | /* | 858 | /* |
| 866 | * when we're called for a write, we're already in the async | 859 | * when we're called for a write, we're already in the async |
| 867 | * submission context. Just jump into btrfs_map_bio | 860 | * submission context. Just jump into btrfs_map_bio |
| 868 | */ | 861 | */ |
| 869 | return btrfs_map_bio(BTRFS_I(inode)->root, rw, bio, mirror_num, 1); | 862 | ret = btrfs_map_bio(BTRFS_I(inode)->root, rw, bio, mirror_num, 1); |
| 863 | if (ret) | ||
| 864 | bio_endio(bio, ret); | ||
| 865 | return ret; | ||
| 866 | } | ||
| 867 | |||
| 868 | static int check_async_write(struct inode *inode, unsigned long bio_flags) | ||
| 869 | { | ||
| 870 | if (bio_flags & EXTENT_BIO_TREE_LOG) | ||
| 871 | return 0; | ||
| 872 | #ifdef CONFIG_X86 | ||
| 873 | if (cpu_has_xmm4_2) | ||
| 874 | return 0; | ||
| 875 | #endif | ||
| 876 | return 1; | ||
| 870 | } | 877 | } |
| 871 | 878 | ||
| 872 | static int btree_submit_bio_hook(struct inode *inode, int rw, struct bio *bio, | 879 | static int btree_submit_bio_hook(struct inode *inode, int rw, struct bio *bio, |
| 873 | int mirror_num, unsigned long bio_flags, | 880 | int mirror_num, unsigned long bio_flags, |
| 874 | u64 bio_offset) | 881 | u64 bio_offset) |
| 875 | { | 882 | { |
| 883 | int async = check_async_write(inode, bio_flags); | ||
| 876 | int ret; | 884 | int ret; |
| 877 | 885 | ||
| 878 | if (!(rw & REQ_WRITE)) { | 886 | if (!(rw & REQ_WRITE)) { |
| 879 | |||
| 880 | /* | 887 | /* |
| 881 | * called for a read, do the setup so that checksum validation | 888 | * called for a read, do the setup so that checksum validation |
| 882 | * can happen in the async kernel threads | 889 | * can happen in the async kernel threads |
| @@ -884,20 +891,32 @@ static int btree_submit_bio_hook(struct inode *inode, int rw, struct bio *bio, | |||
| 884 | ret = btrfs_bio_wq_end_io(BTRFS_I(inode)->root->fs_info, | 891 | ret = btrfs_bio_wq_end_io(BTRFS_I(inode)->root->fs_info, |
| 885 | bio, 1); | 892 | bio, 1); |
| 886 | if (ret) | 893 | if (ret) |
| 887 | return ret; | 894 | goto out_w_error; |
| 888 | return btrfs_map_bio(BTRFS_I(inode)->root, rw, bio, | 895 | ret = btrfs_map_bio(BTRFS_I(inode)->root, rw, bio, |
| 889 | mirror_num, 0); | 896 | mirror_num, 0); |
| 897 | } else if (!async) { | ||
| 898 | ret = btree_csum_one_bio(bio); | ||
| 899 | if (ret) | ||
| 900 | goto out_w_error; | ||
| 901 | ret = btrfs_map_bio(BTRFS_I(inode)->root, rw, bio, | ||
| 902 | mirror_num, 0); | ||
| 903 | } else { | ||
| 904 | /* | ||
| 905 | * kthread helpers are used to submit writes so that | ||
| 906 | * checksumming can happen in parallel across all CPUs | ||
| 907 | */ | ||
| 908 | ret = btrfs_wq_submit_bio(BTRFS_I(inode)->root->fs_info, | ||
| 909 | inode, rw, bio, mirror_num, 0, | ||
| 910 | bio_offset, | ||
| 911 | __btree_submit_bio_start, | ||
| 912 | __btree_submit_bio_done); | ||
| 890 | } | 913 | } |
| 891 | 914 | ||
| 892 | /* | 915 | if (ret) { |
| 893 | * kthread helpers are used to submit writes so that checksumming | 916 | out_w_error: |
| 894 | * can happen in parallel across all CPUs | 917 | bio_endio(bio, ret); |
| 895 | */ | 918 | } |
| 896 | return btrfs_wq_submit_bio(BTRFS_I(inode)->root->fs_info, | 919 | return ret; |
| 897 | inode, rw, bio, mirror_num, 0, | ||
| 898 | bio_offset, | ||
| 899 | __btree_submit_bio_start, | ||
| 900 | __btree_submit_bio_done); | ||
| 901 | } | 920 | } |
| 902 | 921 | ||
| 903 | #ifdef CONFIG_MIGRATION | 922 | #ifdef CONFIG_MIGRATION |
| @@ -982,6 +1001,7 @@ static void btree_invalidatepage(struct page *page, unsigned long offset) | |||
| 982 | 1001 | ||
| 983 | static int btree_set_page_dirty(struct page *page) | 1002 | static int btree_set_page_dirty(struct page *page) |
| 984 | { | 1003 | { |
| 1004 | #ifdef DEBUG | ||
| 985 | struct extent_buffer *eb; | 1005 | struct extent_buffer *eb; |
| 986 | 1006 | ||
| 987 | BUG_ON(!PagePrivate(page)); | 1007 | BUG_ON(!PagePrivate(page)); |
| @@ -990,6 +1010,7 @@ static int btree_set_page_dirty(struct page *page) | |||
| 990 | BUG_ON(!test_bit(EXTENT_BUFFER_DIRTY, &eb->bflags)); | 1010 | BUG_ON(!test_bit(EXTENT_BUFFER_DIRTY, &eb->bflags)); |
| 991 | BUG_ON(!atomic_read(&eb->refs)); | 1011 | BUG_ON(!atomic_read(&eb->refs)); |
| 992 | btrfs_assert_tree_locked(eb); | 1012 | btrfs_assert_tree_locked(eb); |
| 1013 | #endif | ||
| 993 | return __set_page_dirty_nobuffers(page); | 1014 | return __set_page_dirty_nobuffers(page); |
| 994 | } | 1015 | } |
| 995 | 1016 | ||
| @@ -1121,11 +1142,11 @@ void clean_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, | |||
| 1121 | root->fs_info->dirty_metadata_bytes); | 1142 | root->fs_info->dirty_metadata_bytes); |
| 1122 | } | 1143 | } |
| 1123 | spin_unlock(&root->fs_info->delalloc_lock); | 1144 | spin_unlock(&root->fs_info->delalloc_lock); |
| 1124 | } | ||
| 1125 | 1145 | ||
| 1126 | /* ugh, clear_extent_buffer_dirty needs to lock the page */ | 1146 | /* ugh, clear_extent_buffer_dirty needs to lock the page */ |
| 1127 | btrfs_set_lock_blocking(buf); | 1147 | btrfs_set_lock_blocking(buf); |
| 1128 | clear_extent_buffer_dirty(buf); | 1148 | clear_extent_buffer_dirty(buf); |
| 1149 | } | ||
| 1129 | } | 1150 | } |
| 1130 | } | 1151 | } |
| 1131 | 1152 | ||
| @@ -1168,8 +1189,8 @@ static void __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize, | |||
| 1168 | atomic_set(&root->log_commit[0], 0); | 1189 | atomic_set(&root->log_commit[0], 0); |
| 1169 | atomic_set(&root->log_commit[1], 0); | 1190 | atomic_set(&root->log_commit[1], 0); |
| 1170 | atomic_set(&root->log_writers, 0); | 1191 | atomic_set(&root->log_writers, 0); |
| 1192 | atomic_set(&root->log_batch, 0); | ||
| 1171 | atomic_set(&root->orphan_inodes, 0); | 1193 | atomic_set(&root->orphan_inodes, 0); |
| 1172 | root->log_batch = 0; | ||
| 1173 | root->log_transid = 0; | 1194 | root->log_transid = 0; |
| 1174 | root->last_log_commit = 0; | 1195 | root->last_log_commit = 0; |
| 1175 | extent_io_tree_init(&root->dirty_log_pages, | 1196 | extent_io_tree_init(&root->dirty_log_pages, |
| @@ -1185,7 +1206,7 @@ static void __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize, | |||
| 1185 | root->root_key.objectid = objectid; | 1206 | root->root_key.objectid = objectid; |
| 1186 | root->anon_dev = 0; | 1207 | root->anon_dev = 0; |
| 1187 | 1208 | ||
| 1188 | spin_lock_init(&root->root_times_lock); | 1209 | spin_lock_init(&root->root_item_lock); |
| 1189 | } | 1210 | } |
| 1190 | 1211 | ||
| 1191 | static int __must_check find_and_setup_root(struct btrfs_root *tree_root, | 1212 | static int __must_check find_and_setup_root(struct btrfs_root *tree_root, |
| @@ -1667,9 +1688,10 @@ static int transaction_kthread(void *arg) | |||
| 1667 | spin_unlock(&root->fs_info->trans_lock); | 1688 | spin_unlock(&root->fs_info->trans_lock); |
| 1668 | 1689 | ||
| 1669 | /* If the file system is aborted, this will always fail. */ | 1690 | /* If the file system is aborted, this will always fail. */ |
| 1670 | trans = btrfs_join_transaction(root); | 1691 | trans = btrfs_attach_transaction(root); |
| 1671 | if (IS_ERR(trans)) { | 1692 | if (IS_ERR(trans)) { |
| 1672 | cannot_commit = true; | 1693 | if (PTR_ERR(trans) != -ENOENT) |
| 1694 | cannot_commit = true; | ||
| 1673 | goto sleep; | 1695 | goto sleep; |
| 1674 | } | 1696 | } |
| 1675 | if (transid == trans->transid) { | 1697 | if (transid == trans->transid) { |
| @@ -1994,13 +2016,11 @@ int open_ctree(struct super_block *sb, | |||
| 1994 | INIT_LIST_HEAD(&fs_info->trans_list); | 2016 | INIT_LIST_HEAD(&fs_info->trans_list); |
| 1995 | INIT_LIST_HEAD(&fs_info->dead_roots); | 2017 | INIT_LIST_HEAD(&fs_info->dead_roots); |
| 1996 | INIT_LIST_HEAD(&fs_info->delayed_iputs); | 2018 | INIT_LIST_HEAD(&fs_info->delayed_iputs); |
| 1997 | INIT_LIST_HEAD(&fs_info->hashers); | ||
| 1998 | INIT_LIST_HEAD(&fs_info->delalloc_inodes); | 2019 | INIT_LIST_HEAD(&fs_info->delalloc_inodes); |
| 1999 | INIT_LIST_HEAD(&fs_info->ordered_operations); | 2020 | INIT_LIST_HEAD(&fs_info->ordered_operations); |
| 2000 | INIT_LIST_HEAD(&fs_info->caching_block_groups); | 2021 | INIT_LIST_HEAD(&fs_info->caching_block_groups); |
| 2001 | spin_lock_init(&fs_info->delalloc_lock); | 2022 | spin_lock_init(&fs_info->delalloc_lock); |
| 2002 | spin_lock_init(&fs_info->trans_lock); | 2023 | spin_lock_init(&fs_info->trans_lock); |
| 2003 | spin_lock_init(&fs_info->ref_cache_lock); | ||
| 2004 | spin_lock_init(&fs_info->fs_roots_radix_lock); | 2024 | spin_lock_init(&fs_info->fs_roots_radix_lock); |
| 2005 | spin_lock_init(&fs_info->delayed_iput_lock); | 2025 | spin_lock_init(&fs_info->delayed_iput_lock); |
| 2006 | spin_lock_init(&fs_info->defrag_inodes_lock); | 2026 | spin_lock_init(&fs_info->defrag_inodes_lock); |
| @@ -2014,12 +2034,15 @@ int open_ctree(struct super_block *sb, | |||
| 2014 | INIT_LIST_HEAD(&fs_info->space_info); | 2034 | INIT_LIST_HEAD(&fs_info->space_info); |
| 2015 | INIT_LIST_HEAD(&fs_info->tree_mod_seq_list); | 2035 | INIT_LIST_HEAD(&fs_info->tree_mod_seq_list); |
| 2016 | btrfs_mapping_init(&fs_info->mapping_tree); | 2036 | btrfs_mapping_init(&fs_info->mapping_tree); |
| 2017 | btrfs_init_block_rsv(&fs_info->global_block_rsv); | 2037 | btrfs_init_block_rsv(&fs_info->global_block_rsv, |
| 2018 | btrfs_init_block_rsv(&fs_info->delalloc_block_rsv); | 2038 | BTRFS_BLOCK_RSV_GLOBAL); |
| 2019 | btrfs_init_block_rsv(&fs_info->trans_block_rsv); | 2039 | btrfs_init_block_rsv(&fs_info->delalloc_block_rsv, |
| 2020 | btrfs_init_block_rsv(&fs_info->chunk_block_rsv); | 2040 | BTRFS_BLOCK_RSV_DELALLOC); |
| 2021 | btrfs_init_block_rsv(&fs_info->empty_block_rsv); | 2041 | btrfs_init_block_rsv(&fs_info->trans_block_rsv, BTRFS_BLOCK_RSV_TRANS); |
| 2022 | btrfs_init_block_rsv(&fs_info->delayed_block_rsv); | 2042 | btrfs_init_block_rsv(&fs_info->chunk_block_rsv, BTRFS_BLOCK_RSV_CHUNK); |
| 2043 | btrfs_init_block_rsv(&fs_info->empty_block_rsv, BTRFS_BLOCK_RSV_EMPTY); | ||
| 2044 | btrfs_init_block_rsv(&fs_info->delayed_block_rsv, | ||
| 2045 | BTRFS_BLOCK_RSV_DELOPS); | ||
| 2023 | atomic_set(&fs_info->nr_async_submits, 0); | 2046 | atomic_set(&fs_info->nr_async_submits, 0); |
| 2024 | atomic_set(&fs_info->async_delalloc_pages, 0); | 2047 | atomic_set(&fs_info->async_delalloc_pages, 0); |
| 2025 | atomic_set(&fs_info->async_submit_draining, 0); | 2048 | atomic_set(&fs_info->async_submit_draining, 0); |
| @@ -2121,6 +2144,11 @@ int open_ctree(struct super_block *sb, | |||
| 2121 | init_rwsem(&fs_info->extent_commit_sem); | 2144 | init_rwsem(&fs_info->extent_commit_sem); |
| 2122 | init_rwsem(&fs_info->cleanup_work_sem); | 2145 | init_rwsem(&fs_info->cleanup_work_sem); |
| 2123 | init_rwsem(&fs_info->subvol_sem); | 2146 | init_rwsem(&fs_info->subvol_sem); |
| 2147 | fs_info->dev_replace.lock_owner = 0; | ||
| 2148 | atomic_set(&fs_info->dev_replace.nesting_level, 0); | ||
| 2149 | mutex_init(&fs_info->dev_replace.lock_finishing_cancel_unmount); | ||
| 2150 | mutex_init(&fs_info->dev_replace.lock_management_lock); | ||
| 2151 | mutex_init(&fs_info->dev_replace.lock); | ||
| 2124 | 2152 | ||
| 2125 | spin_lock_init(&fs_info->qgroup_lock); | 2153 | spin_lock_init(&fs_info->qgroup_lock); |
| 2126 | fs_info->qgroup_tree = RB_ROOT; | 2154 | fs_info->qgroup_tree = RB_ROOT; |
| @@ -2269,6 +2297,10 @@ int open_ctree(struct super_block *sb, | |||
| 2269 | fs_info->thread_pool_size, | 2297 | fs_info->thread_pool_size, |
| 2270 | &fs_info->generic_worker); | 2298 | &fs_info->generic_worker); |
| 2271 | 2299 | ||
| 2300 | btrfs_init_workers(&fs_info->flush_workers, "flush_delalloc", | ||
| 2301 | fs_info->thread_pool_size, | ||
| 2302 | &fs_info->generic_worker); | ||
| 2303 | |||
| 2272 | btrfs_init_workers(&fs_info->submit_workers, "submit", | 2304 | btrfs_init_workers(&fs_info->submit_workers, "submit", |
| 2273 | min_t(u64, fs_devices->num_devices, | 2305 | min_t(u64, fs_devices->num_devices, |
| 2274 | fs_info->thread_pool_size), | 2306 | fs_info->thread_pool_size), |
| @@ -2340,6 +2372,7 @@ int open_ctree(struct super_block *sb, | |||
| 2340 | ret |= btrfs_start_workers(&fs_info->delayed_workers); | 2372 | ret |= btrfs_start_workers(&fs_info->delayed_workers); |
| 2341 | ret |= btrfs_start_workers(&fs_info->caching_workers); | 2373 | ret |= btrfs_start_workers(&fs_info->caching_workers); |
| 2342 | ret |= btrfs_start_workers(&fs_info->readahead_workers); | 2374 | ret |= btrfs_start_workers(&fs_info->readahead_workers); |
| 2375 | ret |= btrfs_start_workers(&fs_info->flush_workers); | ||
| 2343 | if (ret) { | 2376 | if (ret) { |
| 2344 | err = -ENOMEM; | 2377 | err = -ENOMEM; |
| 2345 | goto fail_sb_buffer; | 2378 | goto fail_sb_buffer; |
| @@ -2408,7 +2441,11 @@ int open_ctree(struct super_block *sb, | |||
| 2408 | goto fail_tree_roots; | 2441 | goto fail_tree_roots; |
| 2409 | } | 2442 | } |
| 2410 | 2443 | ||
| 2411 | btrfs_close_extra_devices(fs_devices); | 2444 | /* |
| 2445 | * keep the device that is marked to be the target device for the | ||
| 2446 | * dev_replace procedure | ||
| 2447 | */ | ||
| 2448 | btrfs_close_extra_devices(fs_info, fs_devices, 0); | ||
| 2412 | 2449 | ||
| 2413 | if (!fs_devices->latest_bdev) { | 2450 | if (!fs_devices->latest_bdev) { |
| 2414 | printk(KERN_CRIT "btrfs: failed to read devices on %s\n", | 2451 | printk(KERN_CRIT "btrfs: failed to read devices on %s\n", |
| @@ -2480,6 +2517,14 @@ retry_root_backup: | |||
| 2480 | goto fail_block_groups; | 2517 | goto fail_block_groups; |
| 2481 | } | 2518 | } |
| 2482 | 2519 | ||
| 2520 | ret = btrfs_init_dev_replace(fs_info); | ||
| 2521 | if (ret) { | ||
| 2522 | pr_err("btrfs: failed to init dev_replace: %d\n", ret); | ||
| 2523 | goto fail_block_groups; | ||
| 2524 | } | ||
| 2525 | |||
| 2526 | btrfs_close_extra_devices(fs_info, fs_devices, 1); | ||
| 2527 | |||
| 2483 | ret = btrfs_init_space_info(fs_info); | 2528 | ret = btrfs_init_space_info(fs_info); |
| 2484 | if (ret) { | 2529 | if (ret) { |
| 2485 | printk(KERN_ERR "Failed to initial space info: %d\n", ret); | 2530 | printk(KERN_ERR "Failed to initial space info: %d\n", ret); |
| @@ -2491,6 +2536,15 @@ retry_root_backup: | |||
| 2491 | printk(KERN_ERR "Failed to read block groups: %d\n", ret); | 2536 | printk(KERN_ERR "Failed to read block groups: %d\n", ret); |
| 2492 | goto fail_block_groups; | 2537 | goto fail_block_groups; |
| 2493 | } | 2538 | } |
| 2539 | fs_info->num_tolerated_disk_barrier_failures = | ||
| 2540 | btrfs_calc_num_tolerated_disk_barrier_failures(fs_info); | ||
| 2541 | if (fs_info->fs_devices->missing_devices > | ||
| 2542 | fs_info->num_tolerated_disk_barrier_failures && | ||
| 2543 | !(sb->s_flags & MS_RDONLY)) { | ||
| 2544 | printk(KERN_WARNING | ||
| 2545 | "Btrfs: too many missing devices, writeable mount is not allowed\n"); | ||
| 2546 | goto fail_block_groups; | ||
| 2547 | } | ||
| 2494 | 2548 | ||
| 2495 | fs_info->cleaner_kthread = kthread_run(cleaner_kthread, tree_root, | 2549 | fs_info->cleaner_kthread = kthread_run(cleaner_kthread, tree_root, |
| 2496 | "btrfs-cleaner"); | 2550 | "btrfs-cleaner"); |
| @@ -2619,6 +2673,13 @@ retry_root_backup: | |||
| 2619 | return ret; | 2673 | return ret; |
| 2620 | } | 2674 | } |
| 2621 | 2675 | ||
| 2676 | ret = btrfs_resume_dev_replace_async(fs_info); | ||
| 2677 | if (ret) { | ||
| 2678 | pr_warn("btrfs: failed to resume dev_replace\n"); | ||
| 2679 | close_ctree(tree_root); | ||
| 2680 | return ret; | ||
| 2681 | } | ||
| 2682 | |||
| 2622 | return 0; | 2683 | return 0; |
| 2623 | 2684 | ||
| 2624 | fail_qgroup: | 2685 | fail_qgroup: |
| @@ -2655,6 +2716,7 @@ fail_sb_buffer: | |||
| 2655 | btrfs_stop_workers(&fs_info->submit_workers); | 2716 | btrfs_stop_workers(&fs_info->submit_workers); |
| 2656 | btrfs_stop_workers(&fs_info->delayed_workers); | 2717 | btrfs_stop_workers(&fs_info->delayed_workers); |
| 2657 | btrfs_stop_workers(&fs_info->caching_workers); | 2718 | btrfs_stop_workers(&fs_info->caching_workers); |
| 2719 | btrfs_stop_workers(&fs_info->flush_workers); | ||
| 2658 | fail_alloc: | 2720 | fail_alloc: |
| 2659 | fail_iput: | 2721 | fail_iput: |
| 2660 | btrfs_mapping_tree_free(&fs_info->mapping_tree); | 2722 | btrfs_mapping_tree_free(&fs_info->mapping_tree); |
| @@ -2874,12 +2936,10 @@ static int write_dev_flush(struct btrfs_device *device, int wait) | |||
| 2874 | printk_in_rcu("btrfs: disabling barriers on dev %s\n", | 2936 | printk_in_rcu("btrfs: disabling barriers on dev %s\n", |
| 2875 | rcu_str_deref(device->name)); | 2937 | rcu_str_deref(device->name)); |
| 2876 | device->nobarriers = 1; | 2938 | device->nobarriers = 1; |
| 2877 | } | 2939 | } else if (!bio_flagged(bio, BIO_UPTODATE)) { |
| 2878 | if (!bio_flagged(bio, BIO_UPTODATE)) { | ||
| 2879 | ret = -EIO; | 2940 | ret = -EIO; |
| 2880 | if (!bio_flagged(bio, BIO_EOPNOTSUPP)) | 2941 | btrfs_dev_stat_inc_and_print(device, |
| 2881 | btrfs_dev_stat_inc_and_print(device, | 2942 | BTRFS_DEV_STAT_FLUSH_ERRS); |
| 2882 | BTRFS_DEV_STAT_FLUSH_ERRS); | ||
| 2883 | } | 2943 | } |
| 2884 | 2944 | ||
| 2885 | /* drop the reference from the wait == 0 run */ | 2945 | /* drop the reference from the wait == 0 run */ |
| @@ -2918,14 +2978,15 @@ static int barrier_all_devices(struct btrfs_fs_info *info) | |||
| 2918 | { | 2978 | { |
| 2919 | struct list_head *head; | 2979 | struct list_head *head; |
| 2920 | struct btrfs_device *dev; | 2980 | struct btrfs_device *dev; |
| 2921 | int errors = 0; | 2981 | int errors_send = 0; |
| 2982 | int errors_wait = 0; | ||
| 2922 | int ret; | 2983 | int ret; |
| 2923 | 2984 | ||
| 2924 | /* send down all the barriers */ | 2985 | /* send down all the barriers */ |
| 2925 | head = &info->fs_devices->devices; | 2986 | head = &info->fs_devices->devices; |
| 2926 | list_for_each_entry_rcu(dev, head, dev_list) { | 2987 | list_for_each_entry_rcu(dev, head, dev_list) { |
| 2927 | if (!dev->bdev) { | 2988 | if (!dev->bdev) { |
| 2928 | errors++; | 2989 | errors_send++; |
| 2929 | continue; | 2990 | continue; |
| 2930 | } | 2991 | } |
| 2931 | if (!dev->in_fs_metadata || !dev->writeable) | 2992 | if (!dev->in_fs_metadata || !dev->writeable) |
| @@ -2933,13 +2994,13 @@ static int barrier_all_devices(struct btrfs_fs_info *info) | |||
| 2933 | 2994 | ||
| 2934 | ret = write_dev_flush(dev, 0); | 2995 | ret = write_dev_flush(dev, 0); |
| 2935 | if (ret) | 2996 | if (ret) |
| 2936 | errors++; | 2997 | errors_send++; |
| 2937 | } | 2998 | } |
| 2938 | 2999 | ||
| 2939 | /* wait for all the barriers */ | 3000 | /* wait for all the barriers */ |
| 2940 | list_for_each_entry_rcu(dev, head, dev_list) { | 3001 | list_for_each_entry_rcu(dev, head, dev_list) { |
| 2941 | if (!dev->bdev) { | 3002 | if (!dev->bdev) { |
| 2942 | errors++; | 3003 | errors_wait++; |
| 2943 | continue; | 3004 | continue; |
| 2944 | } | 3005 | } |
| 2945 | if (!dev->in_fs_metadata || !dev->writeable) | 3006 | if (!dev->in_fs_metadata || !dev->writeable) |
| @@ -2947,13 +3008,87 @@ static int barrier_all_devices(struct btrfs_fs_info *info) | |||
| 2947 | 3008 | ||
| 2948 | ret = write_dev_flush(dev, 1); | 3009 | ret = write_dev_flush(dev, 1); |
| 2949 | if (ret) | 3010 | if (ret) |
| 2950 | errors++; | 3011 | errors_wait++; |
| 2951 | } | 3012 | } |
| 2952 | if (errors) | 3013 | if (errors_send > info->num_tolerated_disk_barrier_failures || |
| 3014 | errors_wait > info->num_tolerated_disk_barrier_failures) | ||
| 2953 | return -EIO; | 3015 | return -EIO; |
| 2954 | return 0; | 3016 | return 0; |
| 2955 | } | 3017 | } |
| 2956 | 3018 | ||
| 3019 | int btrfs_calc_num_tolerated_disk_barrier_failures( | ||
| 3020 | struct btrfs_fs_info *fs_info) | ||
| 3021 | { | ||
| 3022 | struct btrfs_ioctl_space_info space; | ||
| 3023 | struct btrfs_space_info *sinfo; | ||
| 3024 | u64 types[] = {BTRFS_BLOCK_GROUP_DATA, | ||
| 3025 | BTRFS_BLOCK_GROUP_SYSTEM, | ||
| 3026 | BTRFS_BLOCK_GROUP_METADATA, | ||
| 3027 | BTRFS_BLOCK_GROUP_DATA | BTRFS_BLOCK_GROUP_METADATA}; | ||
| 3028 | int num_types = 4; | ||
| 3029 | int i; | ||
| 3030 | int c; | ||
| 3031 | int num_tolerated_disk_barrier_failures = | ||
| 3032 | (int)fs_info->fs_devices->num_devices; | ||
| 3033 | |||
| 3034 | for (i = 0; i < num_types; i++) { | ||
| 3035 | struct btrfs_space_info *tmp; | ||
| 3036 | |||
| 3037 | sinfo = NULL; | ||
| 3038 | rcu_read_lock(); | ||
| 3039 | list_for_each_entry_rcu(tmp, &fs_info->space_info, list) { | ||
| 3040 | if (tmp->flags == types[i]) { | ||
| 3041 | sinfo = tmp; | ||
| 3042 | break; | ||
| 3043 | } | ||
| 3044 | } | ||
| 3045 | rcu_read_unlock(); | ||
| 3046 | |||
| 3047 | if (!sinfo) | ||
| 3048 | continue; | ||
| 3049 | |||
| 3050 | down_read(&sinfo->groups_sem); | ||
| 3051 | for (c = 0; c < BTRFS_NR_RAID_TYPES; c++) { | ||
| 3052 | if (!list_empty(&sinfo->block_groups[c])) { | ||
| 3053 | u64 flags; | ||
| 3054 | |||
| 3055 | btrfs_get_block_group_info( | ||
| 3056 | &sinfo->block_groups[c], &space); | ||
| 3057 | if (space.total_bytes == 0 || | ||
| 3058 | space.used_bytes == 0) | ||
| 3059 | continue; | ||
| 3060 | flags = space.flags; | ||
| 3061 | /* | ||
| 3062 | * return | ||
| 3063 | * 0: if dup, single or RAID0 is configured for | ||
| 3064 | * any of metadata, system or data, else | ||
| 3065 | * 1: if RAID5 is configured, or if RAID1 or | ||
| 3066 | * RAID10 is configured and only two mirrors | ||
| 3067 | * are used, else | ||
| 3068 | * 2: if RAID6 is configured, else | ||
| 3069 | * num_mirrors - 1: if RAID1 or RAID10 is | ||
| 3070 | * configured and more than | ||
| 3071 | * 2 mirrors are used. | ||
| 3072 | */ | ||
| 3073 | if (num_tolerated_disk_barrier_failures > 0 && | ||
| 3074 | ((flags & (BTRFS_BLOCK_GROUP_DUP | | ||
| 3075 | BTRFS_BLOCK_GROUP_RAID0)) || | ||
| 3076 | ((flags & BTRFS_BLOCK_GROUP_PROFILE_MASK) | ||
| 3077 | == 0))) | ||
| 3078 | num_tolerated_disk_barrier_failures = 0; | ||
| 3079 | else if (num_tolerated_disk_barrier_failures > 1 | ||
| 3080 | && | ||
| 3081 | (flags & (BTRFS_BLOCK_GROUP_RAID1 | | ||
| 3082 | BTRFS_BLOCK_GROUP_RAID10))) | ||
| 3083 | num_tolerated_disk_barrier_failures = 1; | ||
| 3084 | } | ||
| 3085 | } | ||
| 3086 | up_read(&sinfo->groups_sem); | ||
| 3087 | } | ||
| 3088 | |||
| 3089 | return num_tolerated_disk_barrier_failures; | ||
| 3090 | } | ||
| 3091 | |||
| 2957 | int write_all_supers(struct btrfs_root *root, int max_mirrors) | 3092 | int write_all_supers(struct btrfs_root *root, int max_mirrors) |
| 2958 | { | 3093 | { |
| 2959 | struct list_head *head; | 3094 | struct list_head *head; |
| @@ -2976,8 +3111,16 @@ int write_all_supers(struct btrfs_root *root, int max_mirrors) | |||
| 2976 | mutex_lock(&root->fs_info->fs_devices->device_list_mutex); | 3111 | mutex_lock(&root->fs_info->fs_devices->device_list_mutex); |
| 2977 | head = &root->fs_info->fs_devices->devices; | 3112 | head = &root->fs_info->fs_devices->devices; |
| 2978 | 3113 | ||
| 2979 | if (do_barriers) | 3114 | if (do_barriers) { |
| 2980 | barrier_all_devices(root->fs_info); | 3115 | ret = barrier_all_devices(root->fs_info); |
| 3116 | if (ret) { | ||
| 3117 | mutex_unlock( | ||
| 3118 | &root->fs_info->fs_devices->device_list_mutex); | ||
| 3119 | btrfs_error(root->fs_info, ret, | ||
| 3120 | "errors while submitting device barriers."); | ||
| 3121 | return ret; | ||
| 3122 | } | ||
| 3123 | } | ||
| 2981 | 3124 | ||
| 2982 | list_for_each_entry_rcu(dev, head, dev_list) { | 3125 | list_for_each_entry_rcu(dev, head, dev_list) { |
| 2983 | if (!dev->bdev) { | 3126 | if (!dev->bdev) { |
| @@ -3177,16 +3320,18 @@ int close_ctree(struct btrfs_root *root) | |||
| 3177 | smp_mb(); | 3320 | smp_mb(); |
| 3178 | 3321 | ||
| 3179 | /* pause restriper - we want to resume on mount */ | 3322 | /* pause restriper - we want to resume on mount */ |
| 3180 | btrfs_pause_balance(root->fs_info); | 3323 | btrfs_pause_balance(fs_info); |
| 3181 | 3324 | ||
| 3182 | btrfs_scrub_cancel(root); | 3325 | btrfs_dev_replace_suspend_for_unmount(fs_info); |
| 3326 | |||
| 3327 | btrfs_scrub_cancel(fs_info); | ||
| 3183 | 3328 | ||
| 3184 | /* wait for any defraggers to finish */ | 3329 | /* wait for any defraggers to finish */ |
| 3185 | wait_event(fs_info->transaction_wait, | 3330 | wait_event(fs_info->transaction_wait, |
| 3186 | (atomic_read(&fs_info->defrag_running) == 0)); | 3331 | (atomic_read(&fs_info->defrag_running) == 0)); |
| 3187 | 3332 | ||
| 3188 | /* clear out the rbtree of defraggable inodes */ | 3333 | /* clear out the rbtree of defraggable inodes */ |
| 3189 | btrfs_run_defrag_inodes(fs_info); | 3334 | btrfs_cleanup_defrag_inodes(fs_info); |
| 3190 | 3335 | ||
| 3191 | if (!(fs_info->sb->s_flags & MS_RDONLY)) { | 3336 | if (!(fs_info->sb->s_flags & MS_RDONLY)) { |
| 3192 | ret = btrfs_commit_super(root); | 3337 | ret = btrfs_commit_super(root); |
| @@ -3211,10 +3356,6 @@ int close_ctree(struct btrfs_root *root) | |||
| 3211 | printk(KERN_INFO "btrfs: at unmount delalloc count %llu\n", | 3356 | printk(KERN_INFO "btrfs: at unmount delalloc count %llu\n", |
| 3212 | (unsigned long long)fs_info->delalloc_bytes); | 3357 | (unsigned long long)fs_info->delalloc_bytes); |
| 3213 | } | 3358 | } |
| 3214 | if (fs_info->total_ref_cache_size) { | ||
| 3215 | printk(KERN_INFO "btrfs: at umount reference cache size %llu\n", | ||
| 3216 | (unsigned long long)fs_info->total_ref_cache_size); | ||
| 3217 | } | ||
| 3218 | 3359 | ||
| 3219 | free_extent_buffer(fs_info->extent_root->node); | 3360 | free_extent_buffer(fs_info->extent_root->node); |
| 3220 | free_extent_buffer(fs_info->extent_root->commit_root); | 3361 | free_extent_buffer(fs_info->extent_root->commit_root); |
| @@ -3250,6 +3391,7 @@ int close_ctree(struct btrfs_root *root) | |||
| 3250 | btrfs_stop_workers(&fs_info->delayed_workers); | 3391 | btrfs_stop_workers(&fs_info->delayed_workers); |
| 3251 | btrfs_stop_workers(&fs_info->caching_workers); | 3392 | btrfs_stop_workers(&fs_info->caching_workers); |
| 3252 | btrfs_stop_workers(&fs_info->readahead_workers); | 3393 | btrfs_stop_workers(&fs_info->readahead_workers); |
| 3394 | btrfs_stop_workers(&fs_info->flush_workers); | ||
| 3253 | 3395 | ||
| 3254 | #ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY | 3396 | #ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY |
| 3255 | if (btrfs_test_opt(root, CHECK_INTEGRITY)) | 3397 | if (btrfs_test_opt(root, CHECK_INTEGRITY)) |
| @@ -3294,14 +3436,12 @@ void btrfs_mark_buffer_dirty(struct extent_buffer *buf) | |||
| 3294 | int was_dirty; | 3436 | int was_dirty; |
| 3295 | 3437 | ||
| 3296 | btrfs_assert_tree_locked(buf); | 3438 | btrfs_assert_tree_locked(buf); |
| 3297 | if (transid != root->fs_info->generation) { | 3439 | if (transid != root->fs_info->generation) |
| 3298 | printk(KERN_CRIT "btrfs transid mismatch buffer %llu, " | 3440 | WARN(1, KERN_CRIT "btrfs transid mismatch buffer %llu, " |
| 3299 | "found %llu running %llu\n", | 3441 | "found %llu running %llu\n", |
| 3300 | (unsigned long long)buf->start, | 3442 | (unsigned long long)buf->start, |
| 3301 | (unsigned long long)transid, | 3443 | (unsigned long long)transid, |
| 3302 | (unsigned long long)root->fs_info->generation); | 3444 | (unsigned long long)root->fs_info->generation); |
| 3303 | WARN_ON(1); | ||
| 3304 | } | ||
| 3305 | was_dirty = set_extent_buffer_dirty(buf); | 3445 | was_dirty = set_extent_buffer_dirty(buf); |
| 3306 | if (!was_dirty) { | 3446 | if (!was_dirty) { |
| 3307 | spin_lock(&root->fs_info->delalloc_lock); | 3447 | spin_lock(&root->fs_info->delalloc_lock); |
| @@ -3310,7 +3450,8 @@ void btrfs_mark_buffer_dirty(struct extent_buffer *buf) | |||
| 3310 | } | 3450 | } |
| 3311 | } | 3451 | } |
| 3312 | 3452 | ||
| 3313 | void btrfs_btree_balance_dirty(struct btrfs_root *root, unsigned long nr) | 3453 | static void __btrfs_btree_balance_dirty(struct btrfs_root *root, |
| 3454 | int flush_delayed) | ||
| 3314 | { | 3455 | { |
| 3315 | /* | 3456 | /* |
| 3316 | * looks as though older kernels can get into trouble with | 3457 | * looks as though older kernels can get into trouble with |
| @@ -3322,36 +3463,26 @@ void btrfs_btree_balance_dirty(struct btrfs_root *root, unsigned long nr) | |||
| 3322 | if (current->flags & PF_MEMALLOC) | 3463 | if (current->flags & PF_MEMALLOC) |
| 3323 | return; | 3464 | return; |
| 3324 | 3465 | ||
| 3325 | btrfs_balance_delayed_items(root); | 3466 | if (flush_delayed) |
| 3467 | btrfs_balance_delayed_items(root); | ||
| 3326 | 3468 | ||
| 3327 | num_dirty = root->fs_info->dirty_metadata_bytes; | 3469 | num_dirty = root->fs_info->dirty_metadata_bytes; |
| 3328 | 3470 | ||
| 3329 | if (num_dirty > thresh) { | 3471 | if (num_dirty > thresh) { |
| 3330 | balance_dirty_pages_ratelimited_nr( | 3472 | balance_dirty_pages_ratelimited( |
| 3331 | root->fs_info->btree_inode->i_mapping, 1); | 3473 | root->fs_info->btree_inode->i_mapping); |
| 3332 | } | 3474 | } |
| 3333 | return; | 3475 | return; |
| 3334 | } | 3476 | } |
| 3335 | 3477 | ||
| 3336 | void __btrfs_btree_balance_dirty(struct btrfs_root *root, unsigned long nr) | 3478 | void btrfs_btree_balance_dirty(struct btrfs_root *root) |
| 3337 | { | 3479 | { |
| 3338 | /* | 3480 | __btrfs_btree_balance_dirty(root, 1); |
| 3339 | * looks as though older kernels can get into trouble with | 3481 | } |
| 3340 | * this code, they end up stuck in balance_dirty_pages forever | ||
| 3341 | */ | ||
| 3342 | u64 num_dirty; | ||
| 3343 | unsigned long thresh = 32 * 1024 * 1024; | ||
| 3344 | |||
| 3345 | if (current->flags & PF_MEMALLOC) | ||
| 3346 | return; | ||
| 3347 | |||
| 3348 | num_dirty = root->fs_info->dirty_metadata_bytes; | ||
| 3349 | 3482 | ||
| 3350 | if (num_dirty > thresh) { | 3483 | void btrfs_btree_balance_dirty_nodelay(struct btrfs_root *root) |
| 3351 | balance_dirty_pages_ratelimited_nr( | 3484 | { |
| 3352 | root->fs_info->btree_inode->i_mapping, 1); | 3485 | __btrfs_btree_balance_dirty(root, 0); |
| 3353 | } | ||
| 3354 | return; | ||
| 3355 | } | 3486 | } |
| 3356 | 3487 | ||
| 3357 | int btrfs_read_buffer(struct extent_buffer *buf, u64 parent_transid) | 3488 | int btrfs_read_buffer(struct extent_buffer *buf, u64 parent_transid) |
| @@ -3360,52 +3491,6 @@ int btrfs_read_buffer(struct extent_buffer *buf, u64 parent_transid) | |||
| 3360 | return btree_read_extent_buffer_pages(root, buf, 0, parent_transid); | 3491 | return btree_read_extent_buffer_pages(root, buf, 0, parent_transid); |
| 3361 | } | 3492 | } |
| 3362 | 3493 | ||
| 3363 | int btree_lock_page_hook(struct page *page, void *data, | ||
| 3364 | void (*flush_fn)(void *)) | ||
| 3365 | { | ||
| 3366 | struct inode *inode = page->mapping->host; | ||
| 3367 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
| 3368 | struct extent_buffer *eb; | ||
| 3369 | |||
| 3370 | /* | ||
| 3371 | * We culled this eb but the page is still hanging out on the mapping, | ||
| 3372 | * carry on. | ||
| 3373 | */ | ||
| 3374 | if (!PagePrivate(page)) | ||
| 3375 | goto out; | ||
| 3376 | |||
| 3377 | eb = (struct extent_buffer *)page->private; | ||
| 3378 | if (!eb) { | ||
| 3379 | WARN_ON(1); | ||
| 3380 | goto out; | ||
| 3381 | } | ||
| 3382 | if (page != eb->pages[0]) | ||
| 3383 | goto out; | ||
| 3384 | |||
| 3385 | if (!btrfs_try_tree_write_lock(eb)) { | ||
| 3386 | flush_fn(data); | ||
| 3387 | btrfs_tree_lock(eb); | ||
| 3388 | } | ||
| 3389 | btrfs_set_header_flag(eb, BTRFS_HEADER_FLAG_WRITTEN); | ||
| 3390 | |||
| 3391 | if (test_and_clear_bit(EXTENT_BUFFER_DIRTY, &eb->bflags)) { | ||
| 3392 | spin_lock(&root->fs_info->delalloc_lock); | ||
| 3393 | if (root->fs_info->dirty_metadata_bytes >= eb->len) | ||
| 3394 | root->fs_info->dirty_metadata_bytes -= eb->len; | ||
| 3395 | else | ||
| 3396 | WARN_ON(1); | ||
| 3397 | spin_unlock(&root->fs_info->delalloc_lock); | ||
| 3398 | } | ||
| 3399 | |||
| 3400 | btrfs_tree_unlock(eb); | ||
| 3401 | out: | ||
| 3402 | if (!trylock_page(page)) { | ||
| 3403 | flush_fn(data); | ||
| 3404 | lock_page(page); | ||
| 3405 | } | ||
| 3406 | return 0; | ||
| 3407 | } | ||
| 3408 | |||
| 3409 | static int btrfs_check_super_valid(struct btrfs_fs_info *fs_info, | 3494 | static int btrfs_check_super_valid(struct btrfs_fs_info *fs_info, |
| 3410 | int read_only) | 3495 | int read_only) |
| 3411 | { | 3496 | { |
| @@ -3608,7 +3693,7 @@ static int btrfs_destroy_marked_extents(struct btrfs_root *root, | |||
| 3608 | 3693 | ||
| 3609 | while (1) { | 3694 | while (1) { |
| 3610 | ret = find_first_extent_bit(dirty_pages, start, &start, &end, | 3695 | ret = find_first_extent_bit(dirty_pages, start, &start, &end, |
| 3611 | mark); | 3696 | mark, NULL); |
| 3612 | if (ret) | 3697 | if (ret) |
| 3613 | break; | 3698 | break; |
| 3614 | 3699 | ||
| @@ -3663,7 +3748,7 @@ static int btrfs_destroy_pinned_extent(struct btrfs_root *root, | |||
| 3663 | again: | 3748 | again: |
| 3664 | while (1) { | 3749 | while (1) { |
| 3665 | ret = find_first_extent_bit(unpin, 0, &start, &end, | 3750 | ret = find_first_extent_bit(unpin, 0, &start, &end, |
| 3666 | EXTENT_DIRTY); | 3751 | EXTENT_DIRTY, NULL); |
| 3667 | if (ret) | 3752 | if (ret) |
| 3668 | break; | 3753 | break; |
| 3669 | 3754 | ||
| @@ -3800,7 +3885,6 @@ int btrfs_cleanup_transaction(struct btrfs_root *root) | |||
| 3800 | } | 3885 | } |
| 3801 | 3886 | ||
| 3802 | static struct extent_io_ops btree_extent_io_ops = { | 3887 | static struct extent_io_ops btree_extent_io_ops = { |
| 3803 | .write_cache_pages_lock_hook = btree_lock_page_hook, | ||
| 3804 | .readpage_end_io_hook = btree_readpage_end_io_hook, | 3888 | .readpage_end_io_hook = btree_readpage_end_io_hook, |
| 3805 | .readpage_io_failed_hook = btree_io_failed_hook, | 3889 | .readpage_io_failed_hook = btree_io_failed_hook, |
| 3806 | .submit_bio_hook = btree_submit_bio_hook, | 3890 | .submit_bio_hook = btree_submit_bio_hook, |
diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h index c5b00a735fef..305c33efb0e3 100644 --- a/fs/btrfs/disk-io.h +++ b/fs/btrfs/disk-io.h | |||
| @@ -62,8 +62,8 @@ struct btrfs_root *btrfs_read_fs_root_no_radix(struct btrfs_root *tree_root, | |||
| 62 | struct btrfs_root *btrfs_read_fs_root_no_name(struct btrfs_fs_info *fs_info, | 62 | struct btrfs_root *btrfs_read_fs_root_no_name(struct btrfs_fs_info *fs_info, |
| 63 | struct btrfs_key *location); | 63 | struct btrfs_key *location); |
| 64 | int btrfs_cleanup_fs_roots(struct btrfs_fs_info *fs_info); | 64 | int btrfs_cleanup_fs_roots(struct btrfs_fs_info *fs_info); |
| 65 | void btrfs_btree_balance_dirty(struct btrfs_root *root, unsigned long nr); | 65 | void btrfs_btree_balance_dirty(struct btrfs_root *root); |
| 66 | void __btrfs_btree_balance_dirty(struct btrfs_root *root, unsigned long nr); | 66 | void btrfs_btree_balance_dirty_nodelay(struct btrfs_root *root); |
| 67 | void btrfs_free_fs_root(struct btrfs_fs_info *fs_info, struct btrfs_root *root); | 67 | void btrfs_free_fs_root(struct btrfs_fs_info *fs_info, struct btrfs_root *root); |
| 68 | void btrfs_mark_buffer_dirty(struct extent_buffer *buf); | 68 | void btrfs_mark_buffer_dirty(struct extent_buffer *buf); |
| 69 | int btrfs_buffer_uptodate(struct extent_buffer *buf, u64 parent_transid, | 69 | int btrfs_buffer_uptodate(struct extent_buffer *buf, u64 parent_transid, |
| @@ -95,6 +95,8 @@ struct btrfs_root *btrfs_create_tree(struct btrfs_trans_handle *trans, | |||
| 95 | u64 objectid); | 95 | u64 objectid); |
| 96 | int btree_lock_page_hook(struct page *page, void *data, | 96 | int btree_lock_page_hook(struct page *page, void *data, |
| 97 | void (*flush_fn)(void *)); | 97 | void (*flush_fn)(void *)); |
| 98 | int btrfs_calc_num_tolerated_disk_barrier_failures( | ||
| 99 | struct btrfs_fs_info *fs_info); | ||
| 98 | 100 | ||
| 99 | #ifdef CONFIG_DEBUG_LOCK_ALLOC | 101 | #ifdef CONFIG_DEBUG_LOCK_ALLOC |
| 100 | void btrfs_init_lockdep(void); | 102 | void btrfs_init_lockdep(void); |
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index ba58024d40d3..521e9d4424f6 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c | |||
| @@ -33,6 +33,7 @@ | |||
| 33 | #include "volumes.h" | 33 | #include "volumes.h" |
| 34 | #include "locking.h" | 34 | #include "locking.h" |
| 35 | #include "free-space-cache.h" | 35 | #include "free-space-cache.h" |
| 36 | #include "math.h" | ||
| 36 | 37 | ||
| 37 | #undef SCRAMBLE_DELAYED_REFS | 38 | #undef SCRAMBLE_DELAYED_REFS |
| 38 | 39 | ||
| @@ -94,8 +95,8 @@ static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans, | |||
| 94 | u64 flags, struct btrfs_disk_key *key, | 95 | u64 flags, struct btrfs_disk_key *key, |
| 95 | int level, struct btrfs_key *ins); | 96 | int level, struct btrfs_key *ins); |
| 96 | static int do_chunk_alloc(struct btrfs_trans_handle *trans, | 97 | static int do_chunk_alloc(struct btrfs_trans_handle *trans, |
| 97 | struct btrfs_root *extent_root, u64 alloc_bytes, | 98 | struct btrfs_root *extent_root, u64 flags, |
| 98 | u64 flags, int force); | 99 | int force); |
| 99 | static int find_next_key(struct btrfs_path *path, int level, | 100 | static int find_next_key(struct btrfs_path *path, int level, |
| 100 | struct btrfs_key *key); | 101 | struct btrfs_key *key); |
| 101 | static void dump_space_info(struct btrfs_space_info *info, u64 bytes, | 102 | static void dump_space_info(struct btrfs_space_info *info, u64 bytes, |
| @@ -312,7 +313,8 @@ static u64 add_new_free_space(struct btrfs_block_group_cache *block_group, | |||
| 312 | while (start < end) { | 313 | while (start < end) { |
| 313 | ret = find_first_extent_bit(info->pinned_extents, start, | 314 | ret = find_first_extent_bit(info->pinned_extents, start, |
| 314 | &extent_start, &extent_end, | 315 | &extent_start, &extent_end, |
| 315 | EXTENT_DIRTY | EXTENT_UPTODATE); | 316 | EXTENT_DIRTY | EXTENT_UPTODATE, |
| 317 | NULL); | ||
| 316 | if (ret) | 318 | if (ret) |
| 317 | break; | 319 | break; |
| 318 | 320 | ||
| @@ -648,24 +650,6 @@ void btrfs_clear_space_info_full(struct btrfs_fs_info *info) | |||
| 648 | rcu_read_unlock(); | 650 | rcu_read_unlock(); |
| 649 | } | 651 | } |
| 650 | 652 | ||
| 651 | static u64 div_factor(u64 num, int factor) | ||
| 652 | { | ||
| 653 | if (factor == 10) | ||
| 654 | return num; | ||
| 655 | num *= factor; | ||
| 656 | do_div(num, 10); | ||
| 657 | return num; | ||
| 658 | } | ||
| 659 | |||
| 660 | static u64 div_factor_fine(u64 num, int factor) | ||
| 661 | { | ||
| 662 | if (factor == 100) | ||
| 663 | return num; | ||
| 664 | num *= factor; | ||
| 665 | do_div(num, 100); | ||
| 666 | return num; | ||
| 667 | } | ||
| 668 | |||
| 669 | u64 btrfs_find_block_group(struct btrfs_root *root, | 653 | u64 btrfs_find_block_group(struct btrfs_root *root, |
| 670 | u64 search_start, u64 search_hint, int owner) | 654 | u64 search_start, u64 search_hint, int owner) |
| 671 | { | 655 | { |
| @@ -1834,7 +1818,7 @@ static int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr, | |||
| 1834 | 1818 | ||
| 1835 | 1819 | ||
| 1836 | /* Tell the block device(s) that the sectors can be discarded */ | 1820 | /* Tell the block device(s) that the sectors can be discarded */ |
| 1837 | ret = btrfs_map_block(&root->fs_info->mapping_tree, REQ_DISCARD, | 1821 | ret = btrfs_map_block(root->fs_info, REQ_DISCARD, |
| 1838 | bytenr, &num_bytes, &bbio, 0); | 1822 | bytenr, &num_bytes, &bbio, 0); |
| 1839 | /* Error condition is -ENOMEM */ | 1823 | /* Error condition is -ENOMEM */ |
| 1840 | if (!ret) { | 1824 | if (!ret) { |
| @@ -2313,6 +2297,9 @@ static noinline int run_clustered_refs(struct btrfs_trans_handle *trans, | |||
| 2313 | kfree(extent_op); | 2297 | kfree(extent_op); |
| 2314 | 2298 | ||
| 2315 | if (ret) { | 2299 | if (ret) { |
| 2300 | list_del_init(&locked_ref->cluster); | ||
| 2301 | mutex_unlock(&locked_ref->mutex); | ||
| 2302 | |||
| 2316 | printk(KERN_DEBUG "btrfs: run_delayed_extent_op returned %d\n", ret); | 2303 | printk(KERN_DEBUG "btrfs: run_delayed_extent_op returned %d\n", ret); |
| 2317 | spin_lock(&delayed_refs->lock); | 2304 | spin_lock(&delayed_refs->lock); |
| 2318 | return ret; | 2305 | return ret; |
| @@ -2355,16 +2342,16 @@ static noinline int run_clustered_refs(struct btrfs_trans_handle *trans, | |||
| 2355 | count++; | 2342 | count++; |
| 2356 | 2343 | ||
| 2357 | if (ret) { | 2344 | if (ret) { |
| 2345 | if (locked_ref) { | ||
| 2346 | list_del_init(&locked_ref->cluster); | ||
| 2347 | mutex_unlock(&locked_ref->mutex); | ||
| 2348 | } | ||
| 2358 | printk(KERN_DEBUG "btrfs: run_one_delayed_ref returned %d\n", ret); | 2349 | printk(KERN_DEBUG "btrfs: run_one_delayed_ref returned %d\n", ret); |
| 2359 | spin_lock(&delayed_refs->lock); | 2350 | spin_lock(&delayed_refs->lock); |
| 2360 | return ret; | 2351 | return ret; |
| 2361 | } | 2352 | } |
| 2362 | 2353 | ||
| 2363 | next: | 2354 | next: |
| 2364 | do_chunk_alloc(trans, fs_info->extent_root, | ||
| 2365 | 2 * 1024 * 1024, | ||
| 2366 | btrfs_get_alloc_profile(root, 0), | ||
| 2367 | CHUNK_ALLOC_NO_FORCE); | ||
| 2368 | cond_resched(); | 2355 | cond_resched(); |
| 2369 | spin_lock(&delayed_refs->lock); | 2356 | spin_lock(&delayed_refs->lock); |
| 2370 | } | 2357 | } |
| @@ -2478,10 +2465,6 @@ int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans, | |||
| 2478 | if (root == root->fs_info->extent_root) | 2465 | if (root == root->fs_info->extent_root) |
| 2479 | root = root->fs_info->tree_root; | 2466 | root = root->fs_info->tree_root; |
| 2480 | 2467 | ||
| 2481 | do_chunk_alloc(trans, root->fs_info->extent_root, | ||
| 2482 | 2 * 1024 * 1024, btrfs_get_alloc_profile(root, 0), | ||
| 2483 | CHUNK_ALLOC_NO_FORCE); | ||
| 2484 | |||
| 2485 | btrfs_delayed_refs_qgroup_accounting(trans, root->fs_info); | 2468 | btrfs_delayed_refs_qgroup_accounting(trans, root->fs_info); |
| 2486 | 2469 | ||
| 2487 | delayed_refs = &trans->transaction->delayed_refs; | 2470 | delayed_refs = &trans->transaction->delayed_refs; |
| @@ -2551,6 +2534,12 @@ again: | |||
| 2551 | } | 2534 | } |
| 2552 | 2535 | ||
| 2553 | if (run_all) { | 2536 | if (run_all) { |
| 2537 | if (!list_empty(&trans->new_bgs)) { | ||
| 2538 | spin_unlock(&delayed_refs->lock); | ||
| 2539 | btrfs_create_pending_block_groups(trans, root); | ||
| 2540 | spin_lock(&delayed_refs->lock); | ||
| 2541 | } | ||
| 2542 | |||
| 2554 | node = rb_first(&delayed_refs->root); | 2543 | node = rb_first(&delayed_refs->root); |
| 2555 | if (!node) | 2544 | if (!node) |
| 2556 | goto out; | 2545 | goto out; |
| @@ -3406,7 +3395,6 @@ alloc: | |||
| 3406 | return PTR_ERR(trans); | 3395 | return PTR_ERR(trans); |
| 3407 | 3396 | ||
| 3408 | ret = do_chunk_alloc(trans, root->fs_info->extent_root, | 3397 | ret = do_chunk_alloc(trans, root->fs_info->extent_root, |
| 3409 | bytes + 2 * 1024 * 1024, | ||
| 3410 | alloc_target, | 3398 | alloc_target, |
| 3411 | CHUNK_ALLOC_NO_FORCE); | 3399 | CHUNK_ALLOC_NO_FORCE); |
| 3412 | btrfs_end_transaction(trans, root); | 3400 | btrfs_end_transaction(trans, root); |
| @@ -3488,8 +3476,7 @@ static void force_metadata_allocation(struct btrfs_fs_info *info) | |||
| 3488 | } | 3476 | } |
| 3489 | 3477 | ||
| 3490 | static int should_alloc_chunk(struct btrfs_root *root, | 3478 | static int should_alloc_chunk(struct btrfs_root *root, |
| 3491 | struct btrfs_space_info *sinfo, u64 alloc_bytes, | 3479 | struct btrfs_space_info *sinfo, int force) |
| 3492 | int force) | ||
| 3493 | { | 3480 | { |
| 3494 | struct btrfs_block_rsv *global_rsv = &root->fs_info->global_block_rsv; | 3481 | struct btrfs_block_rsv *global_rsv = &root->fs_info->global_block_rsv; |
| 3495 | u64 num_bytes = sinfo->total_bytes - sinfo->bytes_readonly; | 3482 | u64 num_bytes = sinfo->total_bytes - sinfo->bytes_readonly; |
| @@ -3504,7 +3491,8 @@ static int should_alloc_chunk(struct btrfs_root *root, | |||
| 3504 | * and purposes it's used space. Don't worry about locking the | 3491 | * and purposes it's used space. Don't worry about locking the |
| 3505 | * global_rsv, it doesn't change except when the transaction commits. | 3492 | * global_rsv, it doesn't change except when the transaction commits. |
| 3506 | */ | 3493 | */ |
| 3507 | num_allocated += global_rsv->size; | 3494 | if (sinfo->flags & BTRFS_BLOCK_GROUP_METADATA) |
| 3495 | num_allocated += global_rsv->size; | ||
| 3508 | 3496 | ||
| 3509 | /* | 3497 | /* |
| 3510 | * in limited mode, we want to have some free space up to | 3498 | * in limited mode, we want to have some free space up to |
| @@ -3518,15 +3506,8 @@ static int should_alloc_chunk(struct btrfs_root *root, | |||
| 3518 | if (num_bytes - num_allocated < thresh) | 3506 | if (num_bytes - num_allocated < thresh) |
| 3519 | return 1; | 3507 | return 1; |
| 3520 | } | 3508 | } |
| 3521 | thresh = btrfs_super_total_bytes(root->fs_info->super_copy); | ||
| 3522 | |||
| 3523 | /* 256MB or 2% of the FS */ | ||
| 3524 | thresh = max_t(u64, 256 * 1024 * 1024, div_factor_fine(thresh, 2)); | ||
| 3525 | /* system chunks need a much small threshold */ | ||
| 3526 | if (sinfo->flags & BTRFS_BLOCK_GROUP_SYSTEM) | ||
| 3527 | thresh = 32 * 1024 * 1024; | ||
| 3528 | 3509 | ||
| 3529 | if (num_bytes > thresh && sinfo->bytes_used < div_factor(num_bytes, 8)) | 3510 | if (num_allocated + 2 * 1024 * 1024 < div_factor(num_bytes, 8)) |
| 3530 | return 0; | 3511 | return 0; |
| 3531 | return 1; | 3512 | return 1; |
| 3532 | } | 3513 | } |
| @@ -3576,8 +3557,7 @@ static void check_system_chunk(struct btrfs_trans_handle *trans, | |||
| 3576 | } | 3557 | } |
| 3577 | 3558 | ||
| 3578 | static int do_chunk_alloc(struct btrfs_trans_handle *trans, | 3559 | static int do_chunk_alloc(struct btrfs_trans_handle *trans, |
| 3579 | struct btrfs_root *extent_root, u64 alloc_bytes, | 3560 | struct btrfs_root *extent_root, u64 flags, int force) |
| 3580 | u64 flags, int force) | ||
| 3581 | { | 3561 | { |
| 3582 | struct btrfs_space_info *space_info; | 3562 | struct btrfs_space_info *space_info; |
| 3583 | struct btrfs_fs_info *fs_info = extent_root->fs_info; | 3563 | struct btrfs_fs_info *fs_info = extent_root->fs_info; |
| @@ -3601,7 +3581,7 @@ again: | |||
| 3601 | return 0; | 3581 | return 0; |
| 3602 | } | 3582 | } |
| 3603 | 3583 | ||
| 3604 | if (!should_alloc_chunk(extent_root, space_info, alloc_bytes, force)) { | 3584 | if (!should_alloc_chunk(extent_root, space_info, force)) { |
| 3605 | spin_unlock(&space_info->lock); | 3585 | spin_unlock(&space_info->lock); |
| 3606 | return 0; | 3586 | return 0; |
| 3607 | } else if (space_info->chunk_alloc) { | 3587 | } else if (space_info->chunk_alloc) { |
| @@ -3669,6 +3649,60 @@ out: | |||
| 3669 | return ret; | 3649 | return ret; |
| 3670 | } | 3650 | } |
| 3671 | 3651 | ||
| 3652 | static int can_overcommit(struct btrfs_root *root, | ||
| 3653 | struct btrfs_space_info *space_info, u64 bytes, | ||
| 3654 | enum btrfs_reserve_flush_enum flush) | ||
| 3655 | { | ||
| 3656 | u64 profile = btrfs_get_alloc_profile(root, 0); | ||
| 3657 | u64 avail; | ||
| 3658 | u64 used; | ||
| 3659 | |||
| 3660 | used = space_info->bytes_used + space_info->bytes_reserved + | ||
| 3661 | space_info->bytes_pinned + space_info->bytes_readonly + | ||
| 3662 | space_info->bytes_may_use; | ||
| 3663 | |||
| 3664 | spin_lock(&root->fs_info->free_chunk_lock); | ||
| 3665 | avail = root->fs_info->free_chunk_space; | ||
| 3666 | spin_unlock(&root->fs_info->free_chunk_lock); | ||
| 3667 | |||
| 3668 | /* | ||
| 3669 | * If we have dup, raid1 or raid10 then only half of the free | ||
| 3670 | * space is actually useable. | ||
| 3671 | */ | ||
| 3672 | if (profile & (BTRFS_BLOCK_GROUP_DUP | | ||
| 3673 | BTRFS_BLOCK_GROUP_RAID1 | | ||
| 3674 | BTRFS_BLOCK_GROUP_RAID10)) | ||
| 3675 | avail >>= 1; | ||
| 3676 | |||
| 3677 | /* | ||
| 3678 | * If we aren't flushing all things, let us overcommit up to | ||
| 3679 | * 1/2th of the space. If we can flush, don't let us overcommit | ||
| 3680 | * too much, let it overcommit up to 1/8 of the space. | ||
| 3681 | */ | ||
| 3682 | if (flush == BTRFS_RESERVE_FLUSH_ALL) | ||
| 3683 | avail >>= 3; | ||
| 3684 | else | ||
| 3685 | avail >>= 1; | ||
| 3686 | |||
| 3687 | if (used + bytes < space_info->total_bytes + avail) | ||
| 3688 | return 1; | ||
| 3689 | return 0; | ||
| 3690 | } | ||
| 3691 | |||
| 3692 | static int writeback_inodes_sb_nr_if_idle_safe(struct super_block *sb, | ||
| 3693 | unsigned long nr_pages, | ||
| 3694 | enum wb_reason reason) | ||
| 3695 | { | ||
| 3696 | if (!writeback_in_progress(sb->s_bdi) && | ||
| 3697 | down_read_trylock(&sb->s_umount)) { | ||
| 3698 | writeback_inodes_sb_nr(sb, nr_pages, reason); | ||
| 3699 | up_read(&sb->s_umount); | ||
| 3700 | return 1; | ||
| 3701 | } | ||
| 3702 | |||
| 3703 | return 0; | ||
| 3704 | } | ||
| 3705 | |||
| 3672 | /* | 3706 | /* |
| 3673 | * shrink metadata reservation for delalloc | 3707 | * shrink metadata reservation for delalloc |
| 3674 | */ | 3708 | */ |
| @@ -3683,6 +3717,7 @@ static void shrink_delalloc(struct btrfs_root *root, u64 to_reclaim, u64 orig, | |||
| 3683 | long time_left; | 3717 | long time_left; |
| 3684 | unsigned long nr_pages = (2 * 1024 * 1024) >> PAGE_CACHE_SHIFT; | 3718 | unsigned long nr_pages = (2 * 1024 * 1024) >> PAGE_CACHE_SHIFT; |
| 3685 | int loops = 0; | 3719 | int loops = 0; |
| 3720 | enum btrfs_reserve_flush_enum flush; | ||
| 3686 | 3721 | ||
| 3687 | trans = (struct btrfs_trans_handle *)current->journal_info; | 3722 | trans = (struct btrfs_trans_handle *)current->journal_info; |
| 3688 | block_rsv = &root->fs_info->delalloc_block_rsv; | 3723 | block_rsv = &root->fs_info->delalloc_block_rsv; |
| @@ -3693,21 +3728,30 @@ static void shrink_delalloc(struct btrfs_root *root, u64 to_reclaim, u64 orig, | |||
| 3693 | if (delalloc_bytes == 0) { | 3728 | if (delalloc_bytes == 0) { |
| 3694 | if (trans) | 3729 | if (trans) |
| 3695 | return; | 3730 | return; |
| 3696 | btrfs_wait_ordered_extents(root, 0, 0); | 3731 | btrfs_wait_ordered_extents(root, 0); |
| 3697 | return; | 3732 | return; |
| 3698 | } | 3733 | } |
| 3699 | 3734 | ||
| 3700 | while (delalloc_bytes && loops < 3) { | 3735 | while (delalloc_bytes && loops < 3) { |
| 3701 | max_reclaim = min(delalloc_bytes, to_reclaim); | 3736 | max_reclaim = min(delalloc_bytes, to_reclaim); |
| 3702 | nr_pages = max_reclaim >> PAGE_CACHE_SHIFT; | 3737 | nr_pages = max_reclaim >> PAGE_CACHE_SHIFT; |
| 3703 | writeback_inodes_sb_nr_if_idle(root->fs_info->sb, nr_pages, | 3738 | writeback_inodes_sb_nr_if_idle_safe(root->fs_info->sb, |
| 3704 | WB_REASON_FS_FREE_SPACE); | 3739 | nr_pages, |
| 3740 | WB_REASON_FS_FREE_SPACE); | ||
| 3705 | 3741 | ||
| 3742 | /* | ||
| 3743 | * We need to wait for the async pages to actually start before | ||
| 3744 | * we do anything. | ||
| 3745 | */ | ||
| 3746 | wait_event(root->fs_info->async_submit_wait, | ||
| 3747 | !atomic_read(&root->fs_info->async_delalloc_pages)); | ||
| 3748 | |||
| 3749 | if (!trans) | ||
| 3750 | flush = BTRFS_RESERVE_FLUSH_ALL; | ||
| 3751 | else | ||
| 3752 | flush = BTRFS_RESERVE_NO_FLUSH; | ||
| 3706 | spin_lock(&space_info->lock); | 3753 | spin_lock(&space_info->lock); |
| 3707 | if (space_info->bytes_used + space_info->bytes_reserved + | 3754 | if (can_overcommit(root, space_info, orig, flush)) { |
| 3708 | space_info->bytes_pinned + space_info->bytes_readonly + | ||
| 3709 | space_info->bytes_may_use + orig <= | ||
| 3710 | space_info->total_bytes) { | ||
| 3711 | spin_unlock(&space_info->lock); | 3755 | spin_unlock(&space_info->lock); |
| 3712 | break; | 3756 | break; |
| 3713 | } | 3757 | } |
| @@ -3715,7 +3759,7 @@ static void shrink_delalloc(struct btrfs_root *root, u64 to_reclaim, u64 orig, | |||
| 3715 | 3759 | ||
| 3716 | loops++; | 3760 | loops++; |
| 3717 | if (wait_ordered && !trans) { | 3761 | if (wait_ordered && !trans) { |
| 3718 | btrfs_wait_ordered_extents(root, 0, 0); | 3762 | btrfs_wait_ordered_extents(root, 0); |
| 3719 | } else { | 3763 | } else { |
| 3720 | time_left = schedule_timeout_killable(1); | 3764 | time_left = schedule_timeout_killable(1); |
| 3721 | if (time_left) | 3765 | if (time_left) |
| @@ -3784,11 +3828,12 @@ commit: | |||
| 3784 | } | 3828 | } |
| 3785 | 3829 | ||
| 3786 | enum flush_state { | 3830 | enum flush_state { |
| 3787 | FLUSH_DELALLOC = 1, | 3831 | FLUSH_DELAYED_ITEMS_NR = 1, |
| 3788 | FLUSH_DELALLOC_WAIT = 2, | 3832 | FLUSH_DELAYED_ITEMS = 2, |
| 3789 | FLUSH_DELAYED_ITEMS_NR = 3, | 3833 | FLUSH_DELALLOC = 3, |
| 3790 | FLUSH_DELAYED_ITEMS = 4, | 3834 | FLUSH_DELALLOC_WAIT = 4, |
| 3791 | COMMIT_TRANS = 5, | 3835 | ALLOC_CHUNK = 5, |
| 3836 | COMMIT_TRANS = 6, | ||
| 3792 | }; | 3837 | }; |
| 3793 | 3838 | ||
| 3794 | static int flush_space(struct btrfs_root *root, | 3839 | static int flush_space(struct btrfs_root *root, |
| @@ -3800,11 +3845,6 @@ static int flush_space(struct btrfs_root *root, | |||
| 3800 | int ret = 0; | 3845 | int ret = 0; |
| 3801 | 3846 | ||
| 3802 | switch (state) { | 3847 | switch (state) { |
| 3803 | case FLUSH_DELALLOC: | ||
| 3804 | case FLUSH_DELALLOC_WAIT: | ||
| 3805 | shrink_delalloc(root, num_bytes, orig_bytes, | ||
| 3806 | state == FLUSH_DELALLOC_WAIT); | ||
| 3807 | break; | ||
| 3808 | case FLUSH_DELAYED_ITEMS_NR: | 3848 | case FLUSH_DELAYED_ITEMS_NR: |
| 3809 | case FLUSH_DELAYED_ITEMS: | 3849 | case FLUSH_DELAYED_ITEMS: |
| 3810 | if (state == FLUSH_DELAYED_ITEMS_NR) { | 3850 | if (state == FLUSH_DELAYED_ITEMS_NR) { |
| @@ -3825,6 +3865,24 @@ static int flush_space(struct btrfs_root *root, | |||
| 3825 | ret = btrfs_run_delayed_items_nr(trans, root, nr); | 3865 | ret = btrfs_run_delayed_items_nr(trans, root, nr); |
| 3826 | btrfs_end_transaction(trans, root); | 3866 | btrfs_end_transaction(trans, root); |
| 3827 | break; | 3867 | break; |
| 3868 | case FLUSH_DELALLOC: | ||
| 3869 | case FLUSH_DELALLOC_WAIT: | ||
| 3870 | shrink_delalloc(root, num_bytes, orig_bytes, | ||
| 3871 | state == FLUSH_DELALLOC_WAIT); | ||
| 3872 | break; | ||
| 3873 | case ALLOC_CHUNK: | ||
| 3874 | trans = btrfs_join_transaction(root); | ||
| 3875 | if (IS_ERR(trans)) { | ||
| 3876 | ret = PTR_ERR(trans); | ||
| 3877 | break; | ||
| 3878 | } | ||
| 3879 | ret = do_chunk_alloc(trans, root->fs_info->extent_root, | ||
| 3880 | btrfs_get_alloc_profile(root, 0), | ||
| 3881 | CHUNK_ALLOC_NO_FORCE); | ||
| 3882 | btrfs_end_transaction(trans, root); | ||
| 3883 | if (ret == -ENOSPC) | ||
| 3884 | ret = 0; | ||
| 3885 | break; | ||
| 3828 | case COMMIT_TRANS: | 3886 | case COMMIT_TRANS: |
| 3829 | ret = may_commit_transaction(root, space_info, orig_bytes, 0); | 3887 | ret = may_commit_transaction(root, space_info, orig_bytes, 0); |
| 3830 | break; | 3888 | break; |
| @@ -3840,7 +3898,7 @@ static int flush_space(struct btrfs_root *root, | |||
| 3840 | * @root - the root we're allocating for | 3898 | * @root - the root we're allocating for |
| 3841 | * @block_rsv - the block_rsv we're allocating for | 3899 | * @block_rsv - the block_rsv we're allocating for |
| 3842 | * @orig_bytes - the number of bytes we want | 3900 | * @orig_bytes - the number of bytes we want |
| 3843 | * @flush - wether or not we can flush to make our reservation | 3901 | * @flush - whether or not we can flush to make our reservation |
| 3844 | * | 3902 | * |
| 3845 | * This will reserve orgi_bytes number of bytes from the space info associated | 3903 | * This will reserve orgi_bytes number of bytes from the space info associated |
| 3846 | * with the block_rsv. If there is not enough space it will make an attempt to | 3904 | * with the block_rsv. If there is not enough space it will make an attempt to |
| @@ -3851,24 +3909,25 @@ static int flush_space(struct btrfs_root *root, | |||
| 3851 | */ | 3909 | */ |
| 3852 | static int reserve_metadata_bytes(struct btrfs_root *root, | 3910 | static int reserve_metadata_bytes(struct btrfs_root *root, |
| 3853 | struct btrfs_block_rsv *block_rsv, | 3911 | struct btrfs_block_rsv *block_rsv, |
| 3854 | u64 orig_bytes, int flush) | 3912 | u64 orig_bytes, |
| 3913 | enum btrfs_reserve_flush_enum flush) | ||
| 3855 | { | 3914 | { |
| 3856 | struct btrfs_space_info *space_info = block_rsv->space_info; | 3915 | struct btrfs_space_info *space_info = block_rsv->space_info; |
| 3857 | u64 used; | 3916 | u64 used; |
| 3858 | u64 num_bytes = orig_bytes; | 3917 | u64 num_bytes = orig_bytes; |
| 3859 | int flush_state = FLUSH_DELALLOC; | 3918 | int flush_state = FLUSH_DELAYED_ITEMS_NR; |
| 3860 | int ret = 0; | 3919 | int ret = 0; |
| 3861 | bool flushing = false; | 3920 | bool flushing = false; |
| 3862 | bool committed = false; | ||
| 3863 | 3921 | ||
| 3864 | again: | 3922 | again: |
| 3865 | ret = 0; | 3923 | ret = 0; |
| 3866 | spin_lock(&space_info->lock); | 3924 | spin_lock(&space_info->lock); |
| 3867 | /* | 3925 | /* |
| 3868 | * We only want to wait if somebody other than us is flushing and we are | 3926 | * We only want to wait if somebody other than us is flushing and we |
| 3869 | * actually alloed to flush. | 3927 | * are actually allowed to flush all things. |
| 3870 | */ | 3928 | */ |
| 3871 | while (flush && !flushing && space_info->flush) { | 3929 | while (flush == BTRFS_RESERVE_FLUSH_ALL && !flushing && |
| 3930 | space_info->flush) { | ||
| 3872 | spin_unlock(&space_info->lock); | 3931 | spin_unlock(&space_info->lock); |
| 3873 | /* | 3932 | /* |
| 3874 | * If we have a trans handle we can't wait because the flusher | 3933 | * If we have a trans handle we can't wait because the flusher |
| @@ -3922,80 +3981,52 @@ again: | |||
| 3922 | (orig_bytes * 2); | 3981 | (orig_bytes * 2); |
| 3923 | } | 3982 | } |
| 3924 | 3983 | ||
| 3925 | if (ret) { | 3984 | if (ret && can_overcommit(root, space_info, orig_bytes, flush)) { |
| 3926 | u64 profile = btrfs_get_alloc_profile(root, 0); | 3985 | space_info->bytes_may_use += orig_bytes; |
| 3927 | u64 avail; | 3986 | trace_btrfs_space_reservation(root->fs_info, "space_info", |
| 3928 | 3987 | space_info->flags, orig_bytes, | |
| 3929 | /* | 3988 | 1); |
| 3930 | * If we have a lot of space that's pinned, don't bother doing | 3989 | ret = 0; |
| 3931 | * the overcommit dance yet and just commit the transaction. | ||
| 3932 | */ | ||
| 3933 | avail = (space_info->total_bytes - space_info->bytes_used) * 8; | ||
| 3934 | do_div(avail, 10); | ||
| 3935 | if (space_info->bytes_pinned >= avail && flush && !committed) { | ||
| 3936 | space_info->flush = 1; | ||
| 3937 | flushing = true; | ||
| 3938 | spin_unlock(&space_info->lock); | ||
| 3939 | ret = may_commit_transaction(root, space_info, | ||
| 3940 | orig_bytes, 1); | ||
| 3941 | if (ret) | ||
| 3942 | goto out; | ||
| 3943 | committed = true; | ||
| 3944 | goto again; | ||
| 3945 | } | ||
| 3946 | |||
| 3947 | spin_lock(&root->fs_info->free_chunk_lock); | ||
| 3948 | avail = root->fs_info->free_chunk_space; | ||
| 3949 | |||
| 3950 | /* | ||
| 3951 | * If we have dup, raid1 or raid10 then only half of the free | ||
| 3952 | * space is actually useable. | ||
| 3953 | */ | ||
| 3954 | if (profile & (BTRFS_BLOCK_GROUP_DUP | | ||
| 3955 | BTRFS_BLOCK_GROUP_RAID1 | | ||
| 3956 | BTRFS_BLOCK_GROUP_RAID10)) | ||
| 3957 | avail >>= 1; | ||
| 3958 | |||
| 3959 | /* | ||
| 3960 | * If we aren't flushing don't let us overcommit too much, say | ||
| 3961 | * 1/8th of the space. If we can flush, let it overcommit up to | ||
| 3962 | * 1/2 of the space. | ||
| 3963 | */ | ||
| 3964 | if (flush) | ||
| 3965 | avail >>= 3; | ||
| 3966 | else | ||
| 3967 | avail >>= 1; | ||
| 3968 | spin_unlock(&root->fs_info->free_chunk_lock); | ||
| 3969 | |||
| 3970 | if (used + num_bytes < space_info->total_bytes + avail) { | ||
| 3971 | space_info->bytes_may_use += orig_bytes; | ||
| 3972 | trace_btrfs_space_reservation(root->fs_info, | ||
| 3973 | "space_info", space_info->flags, orig_bytes, 1); | ||
| 3974 | ret = 0; | ||
| 3975 | } | ||
| 3976 | } | 3990 | } |
| 3977 | 3991 | ||
| 3978 | /* | 3992 | /* |
| 3979 | * Couldn't make our reservation, save our place so while we're trying | 3993 | * Couldn't make our reservation, save our place so while we're trying |
| 3980 | * to reclaim space we can actually use it instead of somebody else | 3994 | * to reclaim space we can actually use it instead of somebody else |
| 3981 | * stealing it from us. | 3995 | * stealing it from us. |
| 3996 | * | ||
| 3997 | * We make the other tasks wait for the flush only when we can flush | ||
| 3998 | * all things. | ||
| 3982 | */ | 3999 | */ |
| 3983 | if (ret && flush) { | 4000 | if (ret && flush == BTRFS_RESERVE_FLUSH_ALL) { |
| 3984 | flushing = true; | 4001 | flushing = true; |
| 3985 | space_info->flush = 1; | 4002 | space_info->flush = 1; |
| 3986 | } | 4003 | } |
| 3987 | 4004 | ||
| 3988 | spin_unlock(&space_info->lock); | 4005 | spin_unlock(&space_info->lock); |
| 3989 | 4006 | ||
| 3990 | if (!ret || !flush) | 4007 | if (!ret || flush == BTRFS_RESERVE_NO_FLUSH) |
| 3991 | goto out; | 4008 | goto out; |
| 3992 | 4009 | ||
| 3993 | ret = flush_space(root, space_info, num_bytes, orig_bytes, | 4010 | ret = flush_space(root, space_info, num_bytes, orig_bytes, |
| 3994 | flush_state); | 4011 | flush_state); |
| 3995 | flush_state++; | 4012 | flush_state++; |
| 4013 | |||
| 4014 | /* | ||
| 4015 | * If we are FLUSH_LIMIT, we can not flush delalloc, or the deadlock | ||
| 4016 | * would happen. So skip delalloc flush. | ||
| 4017 | */ | ||
| 4018 | if (flush == BTRFS_RESERVE_FLUSH_LIMIT && | ||
| 4019 | (flush_state == FLUSH_DELALLOC || | ||
| 4020 | flush_state == FLUSH_DELALLOC_WAIT)) | ||
| 4021 | flush_state = ALLOC_CHUNK; | ||
| 4022 | |||
| 3996 | if (!ret) | 4023 | if (!ret) |
| 3997 | goto again; | 4024 | goto again; |
| 3998 | else if (flush_state <= COMMIT_TRANS) | 4025 | else if (flush == BTRFS_RESERVE_FLUSH_LIMIT && |
| 4026 | flush_state < COMMIT_TRANS) | ||
| 4027 | goto again; | ||
| 4028 | else if (flush == BTRFS_RESERVE_FLUSH_ALL && | ||
| 4029 | flush_state <= COMMIT_TRANS) | ||
| 3999 | goto again; | 4030 | goto again; |
| 4000 | 4031 | ||
| 4001 | out: | 4032 | out: |
| @@ -4114,13 +4145,15 @@ static int block_rsv_migrate_bytes(struct btrfs_block_rsv *src, | |||
| 4114 | return 0; | 4145 | return 0; |
| 4115 | } | 4146 | } |
| 4116 | 4147 | ||
| 4117 | void btrfs_init_block_rsv(struct btrfs_block_rsv *rsv) | 4148 | void btrfs_init_block_rsv(struct btrfs_block_rsv *rsv, unsigned short type) |
| 4118 | { | 4149 | { |
| 4119 | memset(rsv, 0, sizeof(*rsv)); | 4150 | memset(rsv, 0, sizeof(*rsv)); |
| 4120 | spin_lock_init(&rsv->lock); | 4151 | spin_lock_init(&rsv->lock); |
| 4152 | rsv->type = type; | ||
| 4121 | } | 4153 | } |
| 4122 | 4154 | ||
| 4123 | struct btrfs_block_rsv *btrfs_alloc_block_rsv(struct btrfs_root *root) | 4155 | struct btrfs_block_rsv *btrfs_alloc_block_rsv(struct btrfs_root *root, |
| 4156 | unsigned short type) | ||
| 4124 | { | 4157 | { |
| 4125 | struct btrfs_block_rsv *block_rsv; | 4158 | struct btrfs_block_rsv *block_rsv; |
| 4126 | struct btrfs_fs_info *fs_info = root->fs_info; | 4159 | struct btrfs_fs_info *fs_info = root->fs_info; |
| @@ -4129,7 +4162,7 @@ struct btrfs_block_rsv *btrfs_alloc_block_rsv(struct btrfs_root *root) | |||
| 4129 | if (!block_rsv) | 4162 | if (!block_rsv) |
| 4130 | return NULL; | 4163 | return NULL; |
| 4131 | 4164 | ||
| 4132 | btrfs_init_block_rsv(block_rsv); | 4165 | btrfs_init_block_rsv(block_rsv, type); |
| 4133 | block_rsv->space_info = __find_space_info(fs_info, | 4166 | block_rsv->space_info = __find_space_info(fs_info, |
| 4134 | BTRFS_BLOCK_GROUP_METADATA); | 4167 | BTRFS_BLOCK_GROUP_METADATA); |
| 4135 | return block_rsv; | 4168 | return block_rsv; |
| @@ -4138,13 +4171,15 @@ struct btrfs_block_rsv *btrfs_alloc_block_rsv(struct btrfs_root *root) | |||
| 4138 | void btrfs_free_block_rsv(struct btrfs_root *root, | 4171 | void btrfs_free_block_rsv(struct btrfs_root *root, |
| 4139 | struct btrfs_block_rsv *rsv) | 4172 | struct btrfs_block_rsv *rsv) |
| 4140 | { | 4173 | { |
| 4174 | if (!rsv) | ||
| 4175 | return; | ||
| 4141 | btrfs_block_rsv_release(root, rsv, (u64)-1); | 4176 | btrfs_block_rsv_release(root, rsv, (u64)-1); |
| 4142 | kfree(rsv); | 4177 | kfree(rsv); |
| 4143 | } | 4178 | } |
| 4144 | 4179 | ||
| 4145 | static inline int __block_rsv_add(struct btrfs_root *root, | 4180 | int btrfs_block_rsv_add(struct btrfs_root *root, |
| 4146 | struct btrfs_block_rsv *block_rsv, | 4181 | struct btrfs_block_rsv *block_rsv, u64 num_bytes, |
| 4147 | u64 num_bytes, int flush) | 4182 | enum btrfs_reserve_flush_enum flush) |
| 4148 | { | 4183 | { |
| 4149 | int ret; | 4184 | int ret; |
| 4150 | 4185 | ||
| @@ -4160,20 +4195,6 @@ static inline int __block_rsv_add(struct btrfs_root *root, | |||
| 4160 | return ret; | 4195 | return ret; |
| 4161 | } | 4196 | } |
| 4162 | 4197 | ||
| 4163 | int btrfs_block_rsv_add(struct btrfs_root *root, | ||
| 4164 | struct btrfs_block_rsv *block_rsv, | ||
| 4165 | u64 num_bytes) | ||
| 4166 | { | ||
| 4167 | return __block_rsv_add(root, block_rsv, num_bytes, 1); | ||
| 4168 | } | ||
| 4169 | |||
| 4170 | int btrfs_block_rsv_add_noflush(struct btrfs_root *root, | ||
| 4171 | struct btrfs_block_rsv *block_rsv, | ||
| 4172 | u64 num_bytes) | ||
| 4173 | { | ||
| 4174 | return __block_rsv_add(root, block_rsv, num_bytes, 0); | ||
| 4175 | } | ||
| 4176 | |||
| 4177 | int btrfs_block_rsv_check(struct btrfs_root *root, | 4198 | int btrfs_block_rsv_check(struct btrfs_root *root, |
| 4178 | struct btrfs_block_rsv *block_rsv, int min_factor) | 4199 | struct btrfs_block_rsv *block_rsv, int min_factor) |
| 4179 | { | 4200 | { |
| @@ -4192,9 +4213,9 @@ int btrfs_block_rsv_check(struct btrfs_root *root, | |||
| 4192 | return ret; | 4213 | return ret; |
| 4193 | } | 4214 | } |
| 4194 | 4215 | ||
| 4195 | static inline int __btrfs_block_rsv_refill(struct btrfs_root *root, | 4216 | int btrfs_block_rsv_refill(struct btrfs_root *root, |
| 4196 | struct btrfs_block_rsv *block_rsv, | 4217 | struct btrfs_block_rsv *block_rsv, u64 min_reserved, |
| 4197 | u64 min_reserved, int flush) | 4218 | enum btrfs_reserve_flush_enum flush) |
| 4198 | { | 4219 | { |
| 4199 | u64 num_bytes = 0; | 4220 | u64 num_bytes = 0; |
| 4200 | int ret = -ENOSPC; | 4221 | int ret = -ENOSPC; |
| @@ -4222,20 +4243,6 @@ static inline int __btrfs_block_rsv_refill(struct btrfs_root *root, | |||
| 4222 | return ret; | 4243 | return ret; |
| 4223 | } | 4244 | } |
| 4224 | 4245 | ||
| 4225 | int btrfs_block_rsv_refill(struct btrfs_root *root, | ||
| 4226 | struct btrfs_block_rsv *block_rsv, | ||
| 4227 | u64 min_reserved) | ||
| 4228 | { | ||
| 4229 | return __btrfs_block_rsv_refill(root, block_rsv, min_reserved, 1); | ||
| 4230 | } | ||
| 4231 | |||
| 4232 | int btrfs_block_rsv_refill_noflush(struct btrfs_root *root, | ||
| 4233 | struct btrfs_block_rsv *block_rsv, | ||
| 4234 | u64 min_reserved) | ||
| 4235 | { | ||
| 4236 | return __btrfs_block_rsv_refill(root, block_rsv, min_reserved, 0); | ||
| 4237 | } | ||
| 4238 | |||
| 4239 | int btrfs_block_rsv_migrate(struct btrfs_block_rsv *src_rsv, | 4246 | int btrfs_block_rsv_migrate(struct btrfs_block_rsv *src_rsv, |
| 4240 | struct btrfs_block_rsv *dst_rsv, | 4247 | struct btrfs_block_rsv *dst_rsv, |
| 4241 | u64 num_bytes) | 4248 | u64 num_bytes) |
| @@ -4416,10 +4423,10 @@ int btrfs_snap_reserve_metadata(struct btrfs_trans_handle *trans, | |||
| 4416 | struct btrfs_block_rsv *src_rsv = get_block_rsv(trans, root); | 4423 | struct btrfs_block_rsv *src_rsv = get_block_rsv(trans, root); |
| 4417 | struct btrfs_block_rsv *dst_rsv = &pending->block_rsv; | 4424 | struct btrfs_block_rsv *dst_rsv = &pending->block_rsv; |
| 4418 | /* | 4425 | /* |
| 4419 | * two for root back/forward refs, two for directory entries | 4426 | * two for root back/forward refs, two for directory entries, |
| 4420 | * and one for root of the snapshot. | 4427 | * one for root of the snapshot and one for parent inode. |
| 4421 | */ | 4428 | */ |
| 4422 | u64 num_bytes = btrfs_calc_trans_metadata_size(root, 5); | 4429 | u64 num_bytes = btrfs_calc_trans_metadata_size(root, 6); |
| 4423 | dst_rsv->space_info = src_rsv->space_info; | 4430 | dst_rsv->space_info = src_rsv->space_info; |
| 4424 | return block_rsv_migrate_bytes(src_rsv, dst_rsv, num_bytes); | 4431 | return block_rsv_migrate_bytes(src_rsv, dst_rsv, num_bytes); |
| 4425 | } | 4432 | } |
| @@ -4526,17 +4533,27 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes) | |||
| 4526 | u64 csum_bytes; | 4533 | u64 csum_bytes; |
| 4527 | unsigned nr_extents = 0; | 4534 | unsigned nr_extents = 0; |
| 4528 | int extra_reserve = 0; | 4535 | int extra_reserve = 0; |
| 4529 | int flush = 1; | 4536 | enum btrfs_reserve_flush_enum flush = BTRFS_RESERVE_FLUSH_ALL; |
| 4530 | int ret; | 4537 | int ret; |
| 4538 | bool delalloc_lock = true; | ||
| 4531 | 4539 | ||
| 4532 | /* Need to be holding the i_mutex here if we aren't free space cache */ | 4540 | /* If we are a free space inode we need to not flush since we will be in |
| 4533 | if (btrfs_is_free_space_inode(inode)) | 4541 | * the middle of a transaction commit. We also don't need the delalloc |
| 4534 | flush = 0; | 4542 | * mutex since we won't race with anybody. We need this mostly to make |
| 4543 | * lockdep shut its filthy mouth. | ||
| 4544 | */ | ||
| 4545 | if (btrfs_is_free_space_inode(inode)) { | ||
| 4546 | flush = BTRFS_RESERVE_NO_FLUSH; | ||
| 4547 | delalloc_lock = false; | ||
| 4548 | } | ||
| 4535 | 4549 | ||
| 4536 | if (flush && btrfs_transaction_in_commit(root->fs_info)) | 4550 | if (flush != BTRFS_RESERVE_NO_FLUSH && |
| 4551 | btrfs_transaction_in_commit(root->fs_info)) | ||
| 4537 | schedule_timeout(1); | 4552 | schedule_timeout(1); |
| 4538 | 4553 | ||
| 4539 | mutex_lock(&BTRFS_I(inode)->delalloc_mutex); | 4554 | if (delalloc_lock) |
| 4555 | mutex_lock(&BTRFS_I(inode)->delalloc_mutex); | ||
| 4556 | |||
| 4540 | num_bytes = ALIGN(num_bytes, root->sectorsize); | 4557 | num_bytes = ALIGN(num_bytes, root->sectorsize); |
| 4541 | 4558 | ||
| 4542 | spin_lock(&BTRFS_I(inode)->lock); | 4559 | spin_lock(&BTRFS_I(inode)->lock); |
| @@ -4566,7 +4583,11 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes) | |||
| 4566 | ret = btrfs_qgroup_reserve(root, num_bytes + | 4583 | ret = btrfs_qgroup_reserve(root, num_bytes + |
| 4567 | nr_extents * root->leafsize); | 4584 | nr_extents * root->leafsize); |
| 4568 | if (ret) { | 4585 | if (ret) { |
| 4569 | mutex_unlock(&BTRFS_I(inode)->delalloc_mutex); | 4586 | spin_lock(&BTRFS_I(inode)->lock); |
| 4587 | calc_csum_metadata_size(inode, num_bytes, 0); | ||
| 4588 | spin_unlock(&BTRFS_I(inode)->lock); | ||
| 4589 | if (delalloc_lock) | ||
| 4590 | mutex_unlock(&BTRFS_I(inode)->delalloc_mutex); | ||
| 4570 | return ret; | 4591 | return ret; |
| 4571 | } | 4592 | } |
| 4572 | } | 4593 | } |
| @@ -4601,7 +4622,12 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes) | |||
| 4601 | btrfs_ino(inode), | 4622 | btrfs_ino(inode), |
| 4602 | to_free, 0); | 4623 | to_free, 0); |
| 4603 | } | 4624 | } |
| 4604 | mutex_unlock(&BTRFS_I(inode)->delalloc_mutex); | 4625 | if (root->fs_info->quota_enabled) { |
| 4626 | btrfs_qgroup_free(root, num_bytes + | ||
| 4627 | nr_extents * root->leafsize); | ||
| 4628 | } | ||
| 4629 | if (delalloc_lock) | ||
| 4630 | mutex_unlock(&BTRFS_I(inode)->delalloc_mutex); | ||
| 4605 | return ret; | 4631 | return ret; |
| 4606 | } | 4632 | } |
| 4607 | 4633 | ||
| @@ -4613,7 +4639,9 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes) | |||
| 4613 | } | 4639 | } |
| 4614 | BTRFS_I(inode)->reserved_extents += nr_extents; | 4640 | BTRFS_I(inode)->reserved_extents += nr_extents; |
| 4615 | spin_unlock(&BTRFS_I(inode)->lock); | 4641 | spin_unlock(&BTRFS_I(inode)->lock); |
| 4616 | mutex_unlock(&BTRFS_I(inode)->delalloc_mutex); | 4642 | |
| 4643 | if (delalloc_lock) | ||
| 4644 | mutex_unlock(&BTRFS_I(inode)->delalloc_mutex); | ||
| 4617 | 4645 | ||
| 4618 | if (to_reserve) | 4646 | if (to_reserve) |
| 4619 | trace_btrfs_space_reservation(root->fs_info,"delalloc", | 4647 | trace_btrfs_space_reservation(root->fs_info,"delalloc", |
| @@ -4963,9 +4991,13 @@ static int unpin_extent_range(struct btrfs_root *root, u64 start, u64 end) | |||
| 4963 | { | 4991 | { |
| 4964 | struct btrfs_fs_info *fs_info = root->fs_info; | 4992 | struct btrfs_fs_info *fs_info = root->fs_info; |
| 4965 | struct btrfs_block_group_cache *cache = NULL; | 4993 | struct btrfs_block_group_cache *cache = NULL; |
| 4994 | struct btrfs_space_info *space_info; | ||
| 4995 | struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv; | ||
| 4966 | u64 len; | 4996 | u64 len; |
| 4997 | bool readonly; | ||
| 4967 | 4998 | ||
| 4968 | while (start <= end) { | 4999 | while (start <= end) { |
| 5000 | readonly = false; | ||
| 4969 | if (!cache || | 5001 | if (!cache || |
| 4970 | start >= cache->key.objectid + cache->key.offset) { | 5002 | start >= cache->key.objectid + cache->key.offset) { |
| 4971 | if (cache) | 5003 | if (cache) |
| @@ -4983,15 +5015,30 @@ static int unpin_extent_range(struct btrfs_root *root, u64 start, u64 end) | |||
| 4983 | } | 5015 | } |
| 4984 | 5016 | ||
| 4985 | start += len; | 5017 | start += len; |
| 5018 | space_info = cache->space_info; | ||
| 4986 | 5019 | ||
| 4987 | spin_lock(&cache->space_info->lock); | 5020 | spin_lock(&space_info->lock); |
| 4988 | spin_lock(&cache->lock); | 5021 | spin_lock(&cache->lock); |
| 4989 | cache->pinned -= len; | 5022 | cache->pinned -= len; |
| 4990 | cache->space_info->bytes_pinned -= len; | 5023 | space_info->bytes_pinned -= len; |
| 4991 | if (cache->ro) | 5024 | if (cache->ro) { |
| 4992 | cache->space_info->bytes_readonly += len; | 5025 | space_info->bytes_readonly += len; |
| 5026 | readonly = true; | ||
| 5027 | } | ||
| 4993 | spin_unlock(&cache->lock); | 5028 | spin_unlock(&cache->lock); |
| 4994 | spin_unlock(&cache->space_info->lock); | 5029 | if (!readonly && global_rsv->space_info == space_info) { |
| 5030 | spin_lock(&global_rsv->lock); | ||
| 5031 | if (!global_rsv->full) { | ||
| 5032 | len = min(len, global_rsv->size - | ||
| 5033 | global_rsv->reserved); | ||
| 5034 | global_rsv->reserved += len; | ||
| 5035 | space_info->bytes_may_use += len; | ||
| 5036 | if (global_rsv->reserved >= global_rsv->size) | ||
| 5037 | global_rsv->full = 1; | ||
| 5038 | } | ||
| 5039 | spin_unlock(&global_rsv->lock); | ||
| 5040 | } | ||
| 5041 | spin_unlock(&space_info->lock); | ||
| 4995 | } | 5042 | } |
| 4996 | 5043 | ||
| 4997 | if (cache) | 5044 | if (cache) |
| @@ -5018,7 +5065,7 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, | |||
| 5018 | 5065 | ||
| 5019 | while (1) { | 5066 | while (1) { |
| 5020 | ret = find_first_extent_bit(unpin, 0, &start, &end, | 5067 | ret = find_first_extent_bit(unpin, 0, &start, &end, |
| 5021 | EXTENT_DIRTY); | 5068 | EXTENT_DIRTY, NULL); |
| 5022 | if (ret) | 5069 | if (ret) |
| 5023 | break; | 5070 | break; |
| 5024 | 5071 | ||
| @@ -5096,8 +5143,10 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans, | |||
| 5096 | ret = remove_extent_backref(trans, extent_root, path, | 5143 | ret = remove_extent_backref(trans, extent_root, path, |
| 5097 | NULL, refs_to_drop, | 5144 | NULL, refs_to_drop, |
| 5098 | is_data); | 5145 | is_data); |
| 5099 | if (ret) | 5146 | if (ret) { |
| 5100 | goto abort; | 5147 | btrfs_abort_transaction(trans, extent_root, ret); |
| 5148 | goto out; | ||
| 5149 | } | ||
| 5101 | btrfs_release_path(path); | 5150 | btrfs_release_path(path); |
| 5102 | path->leave_spinning = 1; | 5151 | path->leave_spinning = 1; |
| 5103 | 5152 | ||
| @@ -5115,8 +5164,10 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans, | |||
| 5115 | btrfs_print_leaf(extent_root, | 5164 | btrfs_print_leaf(extent_root, |
| 5116 | path->nodes[0]); | 5165 | path->nodes[0]); |
| 5117 | } | 5166 | } |
| 5118 | if (ret < 0) | 5167 | if (ret < 0) { |
| 5119 | goto abort; | 5168 | btrfs_abort_transaction(trans, extent_root, ret); |
| 5169 | goto out; | ||
| 5170 | } | ||
| 5120 | extent_slot = path->slots[0]; | 5171 | extent_slot = path->slots[0]; |
| 5121 | } | 5172 | } |
| 5122 | } else if (ret == -ENOENT) { | 5173 | } else if (ret == -ENOENT) { |
| @@ -5130,7 +5181,8 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans, | |||
| 5130 | (unsigned long long)owner_objectid, | 5181 | (unsigned long long)owner_objectid, |
| 5131 | (unsigned long long)owner_offset); | 5182 | (unsigned long long)owner_offset); |
| 5132 | } else { | 5183 | } else { |
| 5133 | goto abort; | 5184 | btrfs_abort_transaction(trans, extent_root, ret); |
| 5185 | goto out; | ||
| 5134 | } | 5186 | } |
| 5135 | 5187 | ||
| 5136 | leaf = path->nodes[0]; | 5188 | leaf = path->nodes[0]; |
| @@ -5140,8 +5192,10 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans, | |||
| 5140 | BUG_ON(found_extent || extent_slot != path->slots[0]); | 5192 | BUG_ON(found_extent || extent_slot != path->slots[0]); |
| 5141 | ret = convert_extent_item_v0(trans, extent_root, path, | 5193 | ret = convert_extent_item_v0(trans, extent_root, path, |
| 5142 | owner_objectid, 0); | 5194 | owner_objectid, 0); |
| 5143 | if (ret < 0) | 5195 | if (ret < 0) { |
| 5144 | goto abort; | 5196 | btrfs_abort_transaction(trans, extent_root, ret); |
| 5197 | goto out; | ||
| 5198 | } | ||
| 5145 | 5199 | ||
| 5146 | btrfs_release_path(path); | 5200 | btrfs_release_path(path); |
| 5147 | path->leave_spinning = 1; | 5201 | path->leave_spinning = 1; |
| @@ -5158,8 +5212,11 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans, | |||
| 5158 | (unsigned long long)bytenr); | 5212 | (unsigned long long)bytenr); |
| 5159 | btrfs_print_leaf(extent_root, path->nodes[0]); | 5213 | btrfs_print_leaf(extent_root, path->nodes[0]); |
| 5160 | } | 5214 | } |
| 5161 | if (ret < 0) | 5215 | if (ret < 0) { |
| 5162 | goto abort; | 5216 | btrfs_abort_transaction(trans, extent_root, ret); |
| 5217 | goto out; | ||
| 5218 | } | ||
| 5219 | |||
| 5163 | extent_slot = path->slots[0]; | 5220 | extent_slot = path->slots[0]; |
| 5164 | leaf = path->nodes[0]; | 5221 | leaf = path->nodes[0]; |
| 5165 | item_size = btrfs_item_size_nr(leaf, extent_slot); | 5222 | item_size = btrfs_item_size_nr(leaf, extent_slot); |
| @@ -5196,8 +5253,10 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans, | |||
| 5196 | ret = remove_extent_backref(trans, extent_root, path, | 5253 | ret = remove_extent_backref(trans, extent_root, path, |
| 5197 | iref, refs_to_drop, | 5254 | iref, refs_to_drop, |
| 5198 | is_data); | 5255 | is_data); |
| 5199 | if (ret) | 5256 | if (ret) { |
| 5200 | goto abort; | 5257 | btrfs_abort_transaction(trans, extent_root, ret); |
| 5258 | goto out; | ||
| 5259 | } | ||
| 5201 | } | 5260 | } |
| 5202 | } else { | 5261 | } else { |
| 5203 | if (found_extent) { | 5262 | if (found_extent) { |
| @@ -5214,27 +5273,29 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans, | |||
| 5214 | 5273 | ||
| 5215 | ret = btrfs_del_items(trans, extent_root, path, path->slots[0], | 5274 | ret = btrfs_del_items(trans, extent_root, path, path->slots[0], |
| 5216 | num_to_del); | 5275 | num_to_del); |
| 5217 | if (ret) | 5276 | if (ret) { |
| 5218 | goto abort; | 5277 | btrfs_abort_transaction(trans, extent_root, ret); |
| 5278 | goto out; | ||
| 5279 | } | ||
| 5219 | btrfs_release_path(path); | 5280 | btrfs_release_path(path); |
| 5220 | 5281 | ||
| 5221 | if (is_data) { | 5282 | if (is_data) { |
| 5222 | ret = btrfs_del_csums(trans, root, bytenr, num_bytes); | 5283 | ret = btrfs_del_csums(trans, root, bytenr, num_bytes); |
| 5223 | if (ret) | 5284 | if (ret) { |
| 5224 | goto abort; | 5285 | btrfs_abort_transaction(trans, extent_root, ret); |
| 5286 | goto out; | ||
| 5287 | } | ||
| 5225 | } | 5288 | } |
| 5226 | 5289 | ||
| 5227 | ret = update_block_group(trans, root, bytenr, num_bytes, 0); | 5290 | ret = update_block_group(trans, root, bytenr, num_bytes, 0); |
| 5228 | if (ret) | 5291 | if (ret) { |
| 5229 | goto abort; | 5292 | btrfs_abort_transaction(trans, extent_root, ret); |
| 5293 | goto out; | ||
| 5294 | } | ||
| 5230 | } | 5295 | } |
| 5231 | out: | 5296 | out: |
| 5232 | btrfs_free_path(path); | 5297 | btrfs_free_path(path); |
| 5233 | return ret; | 5298 | return ret; |
| 5234 | |||
| 5235 | abort: | ||
| 5236 | btrfs_abort_transaction(trans, extent_root, ret); | ||
| 5237 | goto out; | ||
| 5238 | } | 5299 | } |
| 5239 | 5300 | ||
| 5240 | /* | 5301 | /* |
| @@ -5446,7 +5507,7 @@ wait_block_group_cache_done(struct btrfs_block_group_cache *cache) | |||
| 5446 | return 0; | 5507 | return 0; |
| 5447 | } | 5508 | } |
| 5448 | 5509 | ||
| 5449 | static int __get_block_group_index(u64 flags) | 5510 | int __get_raid_index(u64 flags) |
| 5450 | { | 5511 | { |
| 5451 | int index; | 5512 | int index; |
| 5452 | 5513 | ||
| @@ -5466,7 +5527,7 @@ static int __get_block_group_index(u64 flags) | |||
| 5466 | 5527 | ||
| 5467 | static int get_block_group_index(struct btrfs_block_group_cache *cache) | 5528 | static int get_block_group_index(struct btrfs_block_group_cache *cache) |
| 5468 | { | 5529 | { |
| 5469 | return __get_block_group_index(cache->flags); | 5530 | return __get_raid_index(cache->flags); |
| 5470 | } | 5531 | } |
| 5471 | 5532 | ||
| 5472 | enum btrfs_loop_type { | 5533 | enum btrfs_loop_type { |
| @@ -5497,8 +5558,6 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans, | |||
| 5497 | struct btrfs_block_group_cache *used_block_group; | 5558 | struct btrfs_block_group_cache *used_block_group; |
| 5498 | u64 search_start = 0; | 5559 | u64 search_start = 0; |
| 5499 | int empty_cluster = 2 * 1024 * 1024; | 5560 | int empty_cluster = 2 * 1024 * 1024; |
| 5500 | int allowed_chunk_alloc = 0; | ||
| 5501 | int done_chunk_alloc = 0; | ||
| 5502 | struct btrfs_space_info *space_info; | 5561 | struct btrfs_space_info *space_info; |
| 5503 | int loop = 0; | 5562 | int loop = 0; |
| 5504 | int index = 0; | 5563 | int index = 0; |
| @@ -5530,9 +5589,6 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans, | |||
| 5530 | if (btrfs_mixed_space_info(space_info)) | 5589 | if (btrfs_mixed_space_info(space_info)) |
| 5531 | use_cluster = false; | 5590 | use_cluster = false; |
| 5532 | 5591 | ||
| 5533 | if (orig_root->ref_cows || empty_size) | ||
| 5534 | allowed_chunk_alloc = 1; | ||
| 5535 | |||
| 5536 | if (data & BTRFS_BLOCK_GROUP_METADATA && use_cluster) { | 5592 | if (data & BTRFS_BLOCK_GROUP_METADATA && use_cluster) { |
| 5537 | last_ptr = &root->fs_info->meta_alloc_cluster; | 5593 | last_ptr = &root->fs_info->meta_alloc_cluster; |
| 5538 | if (!btrfs_test_opt(root, SSD)) | 5594 | if (!btrfs_test_opt(root, SSD)) |
| @@ -5806,10 +5862,6 @@ checks: | |||
| 5806 | 5862 | ||
| 5807 | trace_btrfs_reserve_extent(orig_root, block_group, | 5863 | trace_btrfs_reserve_extent(orig_root, block_group, |
| 5808 | search_start, num_bytes); | 5864 | search_start, num_bytes); |
| 5809 | if (offset < search_start) | ||
| 5810 | btrfs_add_free_space(used_block_group, offset, | ||
| 5811 | search_start - offset); | ||
| 5812 | BUG_ON(offset > search_start); | ||
| 5813 | if (used_block_group != block_group) | 5865 | if (used_block_group != block_group) |
| 5814 | btrfs_put_block_group(used_block_group); | 5866 | btrfs_put_block_group(used_block_group); |
| 5815 | btrfs_put_block_group(block_group); | 5867 | btrfs_put_block_group(block_group); |
| @@ -5842,34 +5894,17 @@ loop: | |||
| 5842 | index = 0; | 5894 | index = 0; |
| 5843 | loop++; | 5895 | loop++; |
| 5844 | if (loop == LOOP_ALLOC_CHUNK) { | 5896 | if (loop == LOOP_ALLOC_CHUNK) { |
| 5845 | if (allowed_chunk_alloc) { | 5897 | ret = do_chunk_alloc(trans, root, data, |
| 5846 | ret = do_chunk_alloc(trans, root, num_bytes + | 5898 | CHUNK_ALLOC_FORCE); |
| 5847 | 2 * 1024 * 1024, data, | 5899 | /* |
| 5848 | CHUNK_ALLOC_LIMITED); | 5900 | * Do not bail out on ENOSPC since we |
| 5849 | /* | 5901 | * can do more things. |
| 5850 | * Do not bail out on ENOSPC since we | 5902 | */ |
| 5851 | * can do more things. | 5903 | if (ret < 0 && ret != -ENOSPC) { |
| 5852 | */ | 5904 | btrfs_abort_transaction(trans, |
| 5853 | if (ret < 0 && ret != -ENOSPC) { | 5905 | root, ret); |
| 5854 | btrfs_abort_transaction(trans, | 5906 | goto out; |
| 5855 | root, ret); | ||
| 5856 | goto out; | ||
| 5857 | } | ||
| 5858 | allowed_chunk_alloc = 0; | ||
| 5859 | if (ret == 1) | ||
| 5860 | done_chunk_alloc = 1; | ||
| 5861 | } else if (!done_chunk_alloc && | ||
| 5862 | space_info->force_alloc == | ||
| 5863 | CHUNK_ALLOC_NO_FORCE) { | ||
| 5864 | space_info->force_alloc = CHUNK_ALLOC_LIMITED; | ||
| 5865 | } | 5907 | } |
| 5866 | |||
| 5867 | /* | ||
| 5868 | * We didn't allocate a chunk, go ahead and drop the | ||
| 5869 | * empty size and loop again. | ||
| 5870 | */ | ||
| 5871 | if (!done_chunk_alloc) | ||
| 5872 | loop = LOOP_NO_EMPTY_SIZE; | ||
| 5873 | } | 5908 | } |
| 5874 | 5909 | ||
| 5875 | if (loop == LOOP_NO_EMPTY_SIZE) { | 5910 | if (loop == LOOP_NO_EMPTY_SIZE) { |
| @@ -5944,20 +5979,6 @@ int btrfs_reserve_extent(struct btrfs_trans_handle *trans, | |||
| 5944 | 5979 | ||
| 5945 | data = btrfs_get_alloc_profile(root, data); | 5980 | data = btrfs_get_alloc_profile(root, data); |
| 5946 | again: | 5981 | again: |
| 5947 | /* | ||
| 5948 | * the only place that sets empty_size is btrfs_realloc_node, which | ||
| 5949 | * is not called recursively on allocations | ||
| 5950 | */ | ||
| 5951 | if (empty_size || root->ref_cows) { | ||
| 5952 | ret = do_chunk_alloc(trans, root->fs_info->extent_root, | ||
| 5953 | num_bytes + 2 * 1024 * 1024, data, | ||
| 5954 | CHUNK_ALLOC_NO_FORCE); | ||
| 5955 | if (ret < 0 && ret != -ENOSPC) { | ||
| 5956 | btrfs_abort_transaction(trans, root, ret); | ||
| 5957 | return ret; | ||
| 5958 | } | ||
| 5959 | } | ||
| 5960 | |||
| 5961 | WARN_ON(num_bytes < root->sectorsize); | 5982 | WARN_ON(num_bytes < root->sectorsize); |
| 5962 | ret = find_free_extent(trans, root, num_bytes, empty_size, | 5983 | ret = find_free_extent(trans, root, num_bytes, empty_size, |
| 5963 | hint_byte, ins, data); | 5984 | hint_byte, ins, data); |
| @@ -5967,12 +5988,6 @@ again: | |||
| 5967 | num_bytes = num_bytes >> 1; | 5988 | num_bytes = num_bytes >> 1; |
| 5968 | num_bytes = num_bytes & ~(root->sectorsize - 1); | 5989 | num_bytes = num_bytes & ~(root->sectorsize - 1); |
| 5969 | num_bytes = max(num_bytes, min_alloc_size); | 5990 | num_bytes = max(num_bytes, min_alloc_size); |
| 5970 | ret = do_chunk_alloc(trans, root->fs_info->extent_root, | ||
| 5971 | num_bytes, data, CHUNK_ALLOC_FORCE); | ||
| 5972 | if (ret < 0 && ret != -ENOSPC) { | ||
| 5973 | btrfs_abort_transaction(trans, root, ret); | ||
| 5974 | return ret; | ||
| 5975 | } | ||
| 5976 | if (num_bytes == min_alloc_size) | 5991 | if (num_bytes == min_alloc_size) |
| 5977 | final_tried = true; | 5992 | final_tried = true; |
| 5978 | goto again; | 5993 | goto again; |
| @@ -6295,7 +6310,8 @@ use_block_rsv(struct btrfs_trans_handle *trans, | |||
| 6295 | block_rsv = get_block_rsv(trans, root); | 6310 | block_rsv = get_block_rsv(trans, root); |
| 6296 | 6311 | ||
| 6297 | if (block_rsv->size == 0) { | 6312 | if (block_rsv->size == 0) { |
| 6298 | ret = reserve_metadata_bytes(root, block_rsv, blocksize, 0); | 6313 | ret = reserve_metadata_bytes(root, block_rsv, blocksize, |
| 6314 | BTRFS_RESERVE_NO_FLUSH); | ||
| 6299 | /* | 6315 | /* |
| 6300 | * If we couldn't reserve metadata bytes try and use some from | 6316 | * If we couldn't reserve metadata bytes try and use some from |
| 6301 | * the global reserve. | 6317 | * the global reserve. |
| @@ -6314,15 +6330,15 @@ use_block_rsv(struct btrfs_trans_handle *trans, | |||
| 6314 | ret = block_rsv_use_bytes(block_rsv, blocksize); | 6330 | ret = block_rsv_use_bytes(block_rsv, blocksize); |
| 6315 | if (!ret) | 6331 | if (!ret) |
| 6316 | return block_rsv; | 6332 | return block_rsv; |
| 6317 | if (ret) { | 6333 | if (ret && !block_rsv->failfast) { |
| 6318 | static DEFINE_RATELIMIT_STATE(_rs, | 6334 | static DEFINE_RATELIMIT_STATE(_rs, |
| 6319 | DEFAULT_RATELIMIT_INTERVAL, | 6335 | DEFAULT_RATELIMIT_INTERVAL, |
| 6320 | /*DEFAULT_RATELIMIT_BURST*/ 2); | 6336 | /*DEFAULT_RATELIMIT_BURST*/ 2); |
| 6321 | if (__ratelimit(&_rs)) { | 6337 | if (__ratelimit(&_rs)) |
| 6322 | printk(KERN_DEBUG "btrfs: block rsv returned %d\n", ret); | 6338 | WARN(1, KERN_DEBUG "btrfs: block rsv returned %d\n", |
| 6323 | WARN_ON(1); | 6339 | ret); |
| 6324 | } | 6340 | ret = reserve_metadata_bytes(root, block_rsv, blocksize, |
| 6325 | ret = reserve_metadata_bytes(root, block_rsv, blocksize, 0); | 6341 | BTRFS_RESERVE_NO_FLUSH); |
| 6326 | if (!ret) { | 6342 | if (!ret) { |
| 6327 | return block_rsv; | 6343 | return block_rsv; |
| 6328 | } else if (ret && block_rsv != global_rsv) { | 6344 | } else if (ret && block_rsv != global_rsv) { |
| @@ -7279,7 +7295,7 @@ int btrfs_set_block_group_ro(struct btrfs_root *root, | |||
| 7279 | 7295 | ||
| 7280 | alloc_flags = update_block_group_flags(root, cache->flags); | 7296 | alloc_flags = update_block_group_flags(root, cache->flags); |
| 7281 | if (alloc_flags != cache->flags) { | 7297 | if (alloc_flags != cache->flags) { |
| 7282 | ret = do_chunk_alloc(trans, root, 2 * 1024 * 1024, alloc_flags, | 7298 | ret = do_chunk_alloc(trans, root, alloc_flags, |
| 7283 | CHUNK_ALLOC_FORCE); | 7299 | CHUNK_ALLOC_FORCE); |
| 7284 | if (ret < 0) | 7300 | if (ret < 0) |
| 7285 | goto out; | 7301 | goto out; |
| @@ -7289,7 +7305,7 @@ int btrfs_set_block_group_ro(struct btrfs_root *root, | |||
| 7289 | if (!ret) | 7305 | if (!ret) |
| 7290 | goto out; | 7306 | goto out; |
| 7291 | alloc_flags = get_alloc_profile(root, cache->space_info->flags); | 7307 | alloc_flags = get_alloc_profile(root, cache->space_info->flags); |
| 7292 | ret = do_chunk_alloc(trans, root, 2 * 1024 * 1024, alloc_flags, | 7308 | ret = do_chunk_alloc(trans, root, alloc_flags, |
| 7293 | CHUNK_ALLOC_FORCE); | 7309 | CHUNK_ALLOC_FORCE); |
| 7294 | if (ret < 0) | 7310 | if (ret < 0) |
| 7295 | goto out; | 7311 | goto out; |
| @@ -7303,7 +7319,7 @@ int btrfs_force_chunk_alloc(struct btrfs_trans_handle *trans, | |||
| 7303 | struct btrfs_root *root, u64 type) | 7319 | struct btrfs_root *root, u64 type) |
| 7304 | { | 7320 | { |
| 7305 | u64 alloc_flags = get_alloc_profile(root, type); | 7321 | u64 alloc_flags = get_alloc_profile(root, type); |
| 7306 | return do_chunk_alloc(trans, root, 2 * 1024 * 1024, alloc_flags, | 7322 | return do_chunk_alloc(trans, root, alloc_flags, |
| 7307 | CHUNK_ALLOC_FORCE); | 7323 | CHUNK_ALLOC_FORCE); |
| 7308 | } | 7324 | } |
| 7309 | 7325 | ||
| @@ -7453,7 +7469,7 @@ int btrfs_can_relocate(struct btrfs_root *root, u64 bytenr) | |||
| 7453 | */ | 7469 | */ |
| 7454 | target = get_restripe_target(root->fs_info, block_group->flags); | 7470 | target = get_restripe_target(root->fs_info, block_group->flags); |
| 7455 | if (target) { | 7471 | if (target) { |
| 7456 | index = __get_block_group_index(extended_to_chunk(target)); | 7472 | index = __get_raid_index(extended_to_chunk(target)); |
| 7457 | } else { | 7473 | } else { |
| 7458 | /* | 7474 | /* |
| 7459 | * this is just a balance, so if we were marked as full | 7475 | * this is just a balance, so if we were marked as full |
| @@ -7487,7 +7503,8 @@ int btrfs_can_relocate(struct btrfs_root *root, u64 bytenr) | |||
| 7487 | * check to make sure we can actually find a chunk with enough | 7503 | * check to make sure we can actually find a chunk with enough |
| 7488 | * space to fit our block group in. | 7504 | * space to fit our block group in. |
| 7489 | */ | 7505 | */ |
| 7490 | if (device->total_bytes > device->bytes_used + min_free) { | 7506 | if (device->total_bytes > device->bytes_used + min_free && |
| 7507 | !device->is_tgtdev_for_dev_replace) { | ||
| 7491 | ret = find_free_dev_extent(device, min_free, | 7508 | ret = find_free_dev_extent(device, min_free, |
| 7492 | &dev_offset, NULL); | 7509 | &dev_offset, NULL); |
| 7493 | if (!ret) | 7510 | if (!ret) |
| @@ -7810,6 +7827,34 @@ error: | |||
| 7810 | return ret; | 7827 | return ret; |
| 7811 | } | 7828 | } |
| 7812 | 7829 | ||
| 7830 | void btrfs_create_pending_block_groups(struct btrfs_trans_handle *trans, | ||
| 7831 | struct btrfs_root *root) | ||
| 7832 | { | ||
| 7833 | struct btrfs_block_group_cache *block_group, *tmp; | ||
| 7834 | struct btrfs_root *extent_root = root->fs_info->extent_root; | ||
| 7835 | struct btrfs_block_group_item item; | ||
| 7836 | struct btrfs_key key; | ||
| 7837 | int ret = 0; | ||
| 7838 | |||
| 7839 | list_for_each_entry_safe(block_group, tmp, &trans->new_bgs, | ||
| 7840 | new_bg_list) { | ||
| 7841 | list_del_init(&block_group->new_bg_list); | ||
| 7842 | |||
| 7843 | if (ret) | ||
| 7844 | continue; | ||
| 7845 | |||
| 7846 | spin_lock(&block_group->lock); | ||
| 7847 | memcpy(&item, &block_group->item, sizeof(item)); | ||
| 7848 | memcpy(&key, &block_group->key, sizeof(key)); | ||
| 7849 | spin_unlock(&block_group->lock); | ||
| 7850 | |||
| 7851 | ret = btrfs_insert_item(trans, extent_root, &key, &item, | ||
| 7852 | sizeof(item)); | ||
| 7853 | if (ret) | ||
| 7854 | btrfs_abort_transaction(trans, extent_root, ret); | ||
| 7855 | } | ||
| 7856 | } | ||
| 7857 | |||
| 7813 | int btrfs_make_block_group(struct btrfs_trans_handle *trans, | 7858 | int btrfs_make_block_group(struct btrfs_trans_handle *trans, |
| 7814 | struct btrfs_root *root, u64 bytes_used, | 7859 | struct btrfs_root *root, u64 bytes_used, |
| 7815 | u64 type, u64 chunk_objectid, u64 chunk_offset, | 7860 | u64 type, u64 chunk_objectid, u64 chunk_offset, |
| @@ -7843,6 +7888,7 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans, | |||
| 7843 | spin_lock_init(&cache->lock); | 7888 | spin_lock_init(&cache->lock); |
| 7844 | INIT_LIST_HEAD(&cache->list); | 7889 | INIT_LIST_HEAD(&cache->list); |
| 7845 | INIT_LIST_HEAD(&cache->cluster_list); | 7890 | INIT_LIST_HEAD(&cache->cluster_list); |
| 7891 | INIT_LIST_HEAD(&cache->new_bg_list); | ||
| 7846 | 7892 | ||
| 7847 | btrfs_init_free_space_ctl(cache); | 7893 | btrfs_init_free_space_ctl(cache); |
| 7848 | 7894 | ||
| @@ -7874,12 +7920,7 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans, | |||
| 7874 | ret = btrfs_add_block_group_cache(root->fs_info, cache); | 7920 | ret = btrfs_add_block_group_cache(root->fs_info, cache); |
| 7875 | BUG_ON(ret); /* Logic error */ | 7921 | BUG_ON(ret); /* Logic error */ |
| 7876 | 7922 | ||
| 7877 | ret = btrfs_insert_item(trans, extent_root, &cache->key, &cache->item, | 7923 | list_add_tail(&cache->new_bg_list, &trans->new_bgs); |
| 7878 | sizeof(cache->item)); | ||
| 7879 | if (ret) { | ||
| 7880 | btrfs_abort_transaction(trans, extent_root, ret); | ||
| 7881 | return ret; | ||
| 7882 | } | ||
| 7883 | 7924 | ||
| 7884 | set_avail_alloc_bits(extent_root->fs_info, type); | 7925 | set_avail_alloc_bits(extent_root->fs_info, type); |
| 7885 | 7926 | ||
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index b08ea4717e9d..1b319df29eee 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c | |||
| @@ -45,6 +45,7 @@ struct extent_page_data { | |||
| 45 | struct bio *bio; | 45 | struct bio *bio; |
| 46 | struct extent_io_tree *tree; | 46 | struct extent_io_tree *tree; |
| 47 | get_extent_t *get_extent; | 47 | get_extent_t *get_extent; |
| 48 | unsigned long bio_flags; | ||
| 48 | 49 | ||
| 49 | /* tells writepage not to lock the state bits for this range | 50 | /* tells writepage not to lock the state bits for this range |
| 50 | * it still does the unlocking | 51 | * it still does the unlocking |
| @@ -64,13 +65,13 @@ tree_fs_info(struct extent_io_tree *tree) | |||
| 64 | 65 | ||
| 65 | int __init extent_io_init(void) | 66 | int __init extent_io_init(void) |
| 66 | { | 67 | { |
| 67 | extent_state_cache = kmem_cache_create("extent_state", | 68 | extent_state_cache = kmem_cache_create("btrfs_extent_state", |
| 68 | sizeof(struct extent_state), 0, | 69 | sizeof(struct extent_state), 0, |
| 69 | SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL); | 70 | SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL); |
| 70 | if (!extent_state_cache) | 71 | if (!extent_state_cache) |
| 71 | return -ENOMEM; | 72 | return -ENOMEM; |
| 72 | 73 | ||
| 73 | extent_buffer_cache = kmem_cache_create("extent_buffers", | 74 | extent_buffer_cache = kmem_cache_create("btrfs_extent_buffer", |
| 74 | sizeof(struct extent_buffer), 0, | 75 | sizeof(struct extent_buffer), 0, |
| 75 | SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL); | 76 | SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL); |
| 76 | if (!extent_buffer_cache) | 77 | if (!extent_buffer_cache) |
| @@ -340,12 +341,10 @@ static int insert_state(struct extent_io_tree *tree, | |||
| 340 | { | 341 | { |
| 341 | struct rb_node *node; | 342 | struct rb_node *node; |
| 342 | 343 | ||
| 343 | if (end < start) { | 344 | if (end < start) |
| 344 | printk(KERN_ERR "btrfs end < start %llu %llu\n", | 345 | WARN(1, KERN_ERR "btrfs end < start %llu %llu\n", |
| 345 | (unsigned long long)end, | 346 | (unsigned long long)end, |
| 346 | (unsigned long long)start); | 347 | (unsigned long long)start); |
| 347 | WARN_ON(1); | ||
| 348 | } | ||
| 349 | state->start = start; | 348 | state->start = start; |
| 350 | state->end = end; | 349 | state->end = end; |
| 351 | 350 | ||
| @@ -942,6 +941,7 @@ int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, int bits, | |||
| 942 | * @end: the end offset in bytes (inclusive) | 941 | * @end: the end offset in bytes (inclusive) |
| 943 | * @bits: the bits to set in this range | 942 | * @bits: the bits to set in this range |
| 944 | * @clear_bits: the bits to clear in this range | 943 | * @clear_bits: the bits to clear in this range |
| 944 | * @cached_state: state that we're going to cache | ||
| 945 | * @mask: the allocation mask | 945 | * @mask: the allocation mask |
| 946 | * | 946 | * |
| 947 | * This will go through and set bits for the given range. If any states exist | 947 | * This will go through and set bits for the given range. If any states exist |
| @@ -951,7 +951,8 @@ int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, int bits, | |||
| 951 | * boundary bits like LOCK. | 951 | * boundary bits like LOCK. |
| 952 | */ | 952 | */ |
| 953 | int convert_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, | 953 | int convert_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, |
| 954 | int bits, int clear_bits, gfp_t mask) | 954 | int bits, int clear_bits, |
| 955 | struct extent_state **cached_state, gfp_t mask) | ||
| 955 | { | 956 | { |
| 956 | struct extent_state *state; | 957 | struct extent_state *state; |
| 957 | struct extent_state *prealloc = NULL; | 958 | struct extent_state *prealloc = NULL; |
| @@ -968,6 +969,15 @@ again: | |||
| 968 | } | 969 | } |
| 969 | 970 | ||
| 970 | spin_lock(&tree->lock); | 971 | spin_lock(&tree->lock); |
| 972 | if (cached_state && *cached_state) { | ||
| 973 | state = *cached_state; | ||
| 974 | if (state->start <= start && state->end > start && | ||
| 975 | state->tree) { | ||
| 976 | node = &state->rb_node; | ||
| 977 | goto hit_next; | ||
| 978 | } | ||
| 979 | } | ||
| 980 | |||
| 971 | /* | 981 | /* |
| 972 | * this search will find all the extents that end after | 982 | * this search will find all the extents that end after |
| 973 | * our range starts. | 983 | * our range starts. |
| @@ -998,6 +1008,7 @@ hit_next: | |||
| 998 | */ | 1008 | */ |
| 999 | if (state->start == start && state->end <= end) { | 1009 | if (state->start == start && state->end <= end) { |
| 1000 | set_state_bits(tree, state, &bits); | 1010 | set_state_bits(tree, state, &bits); |
| 1011 | cache_state(state, cached_state); | ||
| 1001 | state = clear_state_bit(tree, state, &clear_bits, 0); | 1012 | state = clear_state_bit(tree, state, &clear_bits, 0); |
| 1002 | if (last_end == (u64)-1) | 1013 | if (last_end == (u64)-1) |
| 1003 | goto out; | 1014 | goto out; |
| @@ -1038,6 +1049,7 @@ hit_next: | |||
| 1038 | goto out; | 1049 | goto out; |
| 1039 | if (state->end <= end) { | 1050 | if (state->end <= end) { |
| 1040 | set_state_bits(tree, state, &bits); | 1051 | set_state_bits(tree, state, &bits); |
| 1052 | cache_state(state, cached_state); | ||
| 1041 | state = clear_state_bit(tree, state, &clear_bits, 0); | 1053 | state = clear_state_bit(tree, state, &clear_bits, 0); |
| 1042 | if (last_end == (u64)-1) | 1054 | if (last_end == (u64)-1) |
| 1043 | goto out; | 1055 | goto out; |
| @@ -1076,6 +1088,7 @@ hit_next: | |||
| 1076 | &bits); | 1088 | &bits); |
| 1077 | if (err) | 1089 | if (err) |
| 1078 | extent_io_tree_panic(tree, err); | 1090 | extent_io_tree_panic(tree, err); |
| 1091 | cache_state(prealloc, cached_state); | ||
| 1079 | prealloc = NULL; | 1092 | prealloc = NULL; |
| 1080 | start = this_end + 1; | 1093 | start = this_end + 1; |
| 1081 | goto search_again; | 1094 | goto search_again; |
| @@ -1098,6 +1111,7 @@ hit_next: | |||
| 1098 | extent_io_tree_panic(tree, err); | 1111 | extent_io_tree_panic(tree, err); |
| 1099 | 1112 | ||
| 1100 | set_state_bits(tree, prealloc, &bits); | 1113 | set_state_bits(tree, prealloc, &bits); |
| 1114 | cache_state(prealloc, cached_state); | ||
| 1101 | clear_state_bit(tree, prealloc, &clear_bits, 0); | 1115 | clear_state_bit(tree, prealloc, &clear_bits, 0); |
| 1102 | prealloc = NULL; | 1116 | prealloc = NULL; |
| 1103 | goto out; | 1117 | goto out; |
| @@ -1150,6 +1164,14 @@ int set_extent_delalloc(struct extent_io_tree *tree, u64 start, u64 end, | |||
| 1150 | NULL, cached_state, mask); | 1164 | NULL, cached_state, mask); |
| 1151 | } | 1165 | } |
| 1152 | 1166 | ||
| 1167 | int set_extent_defrag(struct extent_io_tree *tree, u64 start, u64 end, | ||
| 1168 | struct extent_state **cached_state, gfp_t mask) | ||
| 1169 | { | ||
| 1170 | return set_extent_bit(tree, start, end, | ||
| 1171 | EXTENT_DELALLOC | EXTENT_UPTODATE | EXTENT_DEFRAG, | ||
| 1172 | NULL, cached_state, mask); | ||
| 1173 | } | ||
| 1174 | |||
| 1153 | int clear_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end, | 1175 | int clear_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end, |
| 1154 | gfp_t mask) | 1176 | gfp_t mask) |
| 1155 | { | 1177 | { |
| @@ -1294,18 +1316,42 @@ out: | |||
| 1294 | * If nothing was found, 1 is returned. If found something, return 0. | 1316 | * If nothing was found, 1 is returned. If found something, return 0. |
| 1295 | */ | 1317 | */ |
| 1296 | int find_first_extent_bit(struct extent_io_tree *tree, u64 start, | 1318 | int find_first_extent_bit(struct extent_io_tree *tree, u64 start, |
| 1297 | u64 *start_ret, u64 *end_ret, int bits) | 1319 | u64 *start_ret, u64 *end_ret, int bits, |
| 1320 | struct extent_state **cached_state) | ||
| 1298 | { | 1321 | { |
| 1299 | struct extent_state *state; | 1322 | struct extent_state *state; |
| 1323 | struct rb_node *n; | ||
| 1300 | int ret = 1; | 1324 | int ret = 1; |
| 1301 | 1325 | ||
| 1302 | spin_lock(&tree->lock); | 1326 | spin_lock(&tree->lock); |
| 1327 | if (cached_state && *cached_state) { | ||
| 1328 | state = *cached_state; | ||
| 1329 | if (state->end == start - 1 && state->tree) { | ||
| 1330 | n = rb_next(&state->rb_node); | ||
| 1331 | while (n) { | ||
| 1332 | state = rb_entry(n, struct extent_state, | ||
| 1333 | rb_node); | ||
| 1334 | if (state->state & bits) | ||
| 1335 | goto got_it; | ||
| 1336 | n = rb_next(n); | ||
| 1337 | } | ||
| 1338 | free_extent_state(*cached_state); | ||
| 1339 | *cached_state = NULL; | ||
| 1340 | goto out; | ||
| 1341 | } | ||
| 1342 | free_extent_state(*cached_state); | ||
| 1343 | *cached_state = NULL; | ||
| 1344 | } | ||
| 1345 | |||
| 1303 | state = find_first_extent_bit_state(tree, start, bits); | 1346 | state = find_first_extent_bit_state(tree, start, bits); |
| 1347 | got_it: | ||
| 1304 | if (state) { | 1348 | if (state) { |
| 1349 | cache_state(state, cached_state); | ||
| 1305 | *start_ret = state->start; | 1350 | *start_ret = state->start; |
| 1306 | *end_ret = state->end; | 1351 | *end_ret = state->end; |
| 1307 | ret = 0; | 1352 | ret = 0; |
| 1308 | } | 1353 | } |
| 1354 | out: | ||
| 1309 | spin_unlock(&tree->lock); | 1355 | spin_unlock(&tree->lock); |
| 1310 | return ret; | 1356 | return ret; |
| 1311 | } | 1357 | } |
| @@ -1871,12 +1917,12 @@ static void repair_io_failure_callback(struct bio *bio, int err) | |||
| 1871 | * the standard behavior is to write all copies in a raid setup. here we only | 1917 | * the standard behavior is to write all copies in a raid setup. here we only |
| 1872 | * want to write the one bad copy. so we do the mapping for ourselves and issue | 1918 | * want to write the one bad copy. so we do the mapping for ourselves and issue |
| 1873 | * submit_bio directly. | 1919 | * submit_bio directly. |
| 1874 | * to avoid any synchonization issues, wait for the data after writing, which | 1920 | * to avoid any synchronization issues, wait for the data after writing, which |
| 1875 | * actually prevents the read that triggered the error from finishing. | 1921 | * actually prevents the read that triggered the error from finishing. |
| 1876 | * currently, there can be no more than two copies of every data bit. thus, | 1922 | * currently, there can be no more than two copies of every data bit. thus, |
| 1877 | * exactly one rewrite is required. | 1923 | * exactly one rewrite is required. |
| 1878 | */ | 1924 | */ |
| 1879 | int repair_io_failure(struct btrfs_mapping_tree *map_tree, u64 start, | 1925 | int repair_io_failure(struct btrfs_fs_info *fs_info, u64 start, |
| 1880 | u64 length, u64 logical, struct page *page, | 1926 | u64 length, u64 logical, struct page *page, |
| 1881 | int mirror_num) | 1927 | int mirror_num) |
| 1882 | { | 1928 | { |
| @@ -1898,7 +1944,7 @@ int repair_io_failure(struct btrfs_mapping_tree *map_tree, u64 start, | |||
| 1898 | bio->bi_size = 0; | 1944 | bio->bi_size = 0; |
| 1899 | map_length = length; | 1945 | map_length = length; |
| 1900 | 1946 | ||
| 1901 | ret = btrfs_map_block(map_tree, WRITE, logical, | 1947 | ret = btrfs_map_block(fs_info, WRITE, logical, |
| 1902 | &map_length, &bbio, mirror_num); | 1948 | &map_length, &bbio, mirror_num); |
| 1903 | if (ret) { | 1949 | if (ret) { |
| 1904 | bio_put(bio); | 1950 | bio_put(bio); |
| @@ -1936,14 +1982,13 @@ int repair_io_failure(struct btrfs_mapping_tree *map_tree, u64 start, | |||
| 1936 | int repair_eb_io_failure(struct btrfs_root *root, struct extent_buffer *eb, | 1982 | int repair_eb_io_failure(struct btrfs_root *root, struct extent_buffer *eb, |
| 1937 | int mirror_num) | 1983 | int mirror_num) |
| 1938 | { | 1984 | { |
| 1939 | struct btrfs_mapping_tree *map_tree = &root->fs_info->mapping_tree; | ||
| 1940 | u64 start = eb->start; | 1985 | u64 start = eb->start; |
| 1941 | unsigned long i, num_pages = num_extent_pages(eb->start, eb->len); | 1986 | unsigned long i, num_pages = num_extent_pages(eb->start, eb->len); |
| 1942 | int ret = 0; | 1987 | int ret = 0; |
| 1943 | 1988 | ||
| 1944 | for (i = 0; i < num_pages; i++) { | 1989 | for (i = 0; i < num_pages; i++) { |
| 1945 | struct page *p = extent_buffer_page(eb, i); | 1990 | struct page *p = extent_buffer_page(eb, i); |
| 1946 | ret = repair_io_failure(map_tree, start, PAGE_CACHE_SIZE, | 1991 | ret = repair_io_failure(root->fs_info, start, PAGE_CACHE_SIZE, |
| 1947 | start, p, mirror_num); | 1992 | start, p, mirror_num); |
| 1948 | if (ret) | 1993 | if (ret) |
| 1949 | break; | 1994 | break; |
| @@ -1962,7 +2007,7 @@ static int clean_io_failure(u64 start, struct page *page) | |||
| 1962 | u64 private; | 2007 | u64 private; |
| 1963 | u64 private_failure; | 2008 | u64 private_failure; |
| 1964 | struct io_failure_record *failrec; | 2009 | struct io_failure_record *failrec; |
| 1965 | struct btrfs_mapping_tree *map_tree; | 2010 | struct btrfs_fs_info *fs_info; |
| 1966 | struct extent_state *state; | 2011 | struct extent_state *state; |
| 1967 | int num_copies; | 2012 | int num_copies; |
| 1968 | int did_repair = 0; | 2013 | int did_repair = 0; |
| @@ -1998,11 +2043,11 @@ static int clean_io_failure(u64 start, struct page *page) | |||
| 1998 | spin_unlock(&BTRFS_I(inode)->io_tree.lock); | 2043 | spin_unlock(&BTRFS_I(inode)->io_tree.lock); |
| 1999 | 2044 | ||
| 2000 | if (state && state->start == failrec->start) { | 2045 | if (state && state->start == failrec->start) { |
| 2001 | map_tree = &BTRFS_I(inode)->root->fs_info->mapping_tree; | 2046 | fs_info = BTRFS_I(inode)->root->fs_info; |
| 2002 | num_copies = btrfs_num_copies(map_tree, failrec->logical, | 2047 | num_copies = btrfs_num_copies(fs_info, failrec->logical, |
| 2003 | failrec->len); | 2048 | failrec->len); |
| 2004 | if (num_copies > 1) { | 2049 | if (num_copies > 1) { |
| 2005 | ret = repair_io_failure(map_tree, start, failrec->len, | 2050 | ret = repair_io_failure(fs_info, start, failrec->len, |
| 2006 | failrec->logical, page, | 2051 | failrec->logical, page, |
| 2007 | failrec->failed_mirror); | 2052 | failrec->failed_mirror); |
| 2008 | did_repair = !ret; | 2053 | did_repair = !ret; |
| @@ -2068,7 +2113,7 @@ static int bio_readpage_error(struct bio *failed_bio, struct page *page, | |||
| 2068 | } | 2113 | } |
| 2069 | read_unlock(&em_tree->lock); | 2114 | read_unlock(&em_tree->lock); |
| 2070 | 2115 | ||
| 2071 | if (!em || IS_ERR(em)) { | 2116 | if (!em) { |
| 2072 | kfree(failrec); | 2117 | kfree(failrec); |
| 2073 | return -EIO; | 2118 | return -EIO; |
| 2074 | } | 2119 | } |
| @@ -2111,9 +2156,8 @@ static int bio_readpage_error(struct bio *failed_bio, struct page *page, | |||
| 2111 | * clean_io_failure() clean all those errors at once. | 2156 | * clean_io_failure() clean all those errors at once. |
| 2112 | */ | 2157 | */ |
| 2113 | } | 2158 | } |
| 2114 | num_copies = btrfs_num_copies( | 2159 | num_copies = btrfs_num_copies(BTRFS_I(inode)->root->fs_info, |
| 2115 | &BTRFS_I(inode)->root->fs_info->mapping_tree, | 2160 | failrec->logical, failrec->len); |
| 2116 | failrec->logical, failrec->len); | ||
| 2117 | if (num_copies == 1) { | 2161 | if (num_copies == 1) { |
| 2118 | /* | 2162 | /* |
| 2119 | * we only have a single copy of the data, so don't bother with | 2163 | * we only have a single copy of the data, so don't bother with |
| @@ -2304,8 +2348,8 @@ static void end_bio_extent_readpage(struct bio *bio, int err) | |||
| 2304 | struct extent_state *cached = NULL; | 2348 | struct extent_state *cached = NULL; |
| 2305 | struct extent_state *state; | 2349 | struct extent_state *state; |
| 2306 | 2350 | ||
| 2307 | pr_debug("end_bio_extent_readpage: bi_vcnt=%d, idx=%d, err=%d, " | 2351 | pr_debug("end_bio_extent_readpage: bi_sector=%llu, err=%d, " |
| 2308 | "mirror=%ld\n", bio->bi_vcnt, bio->bi_idx, err, | 2352 | "mirror=%ld\n", (u64)bio->bi_sector, err, |
| 2309 | (long int)bio->bi_bdev); | 2353 | (long int)bio->bi_bdev); |
| 2310 | tree = &BTRFS_I(page->mapping->host)->io_tree; | 2354 | tree = &BTRFS_I(page->mapping->host)->io_tree; |
| 2311 | 2355 | ||
| @@ -2418,10 +2462,6 @@ btrfs_bio_alloc(struct block_device *bdev, u64 first_sector, int nr_vecs, | |||
| 2418 | return bio; | 2462 | return bio; |
| 2419 | } | 2463 | } |
| 2420 | 2464 | ||
| 2421 | /* | ||
| 2422 | * Since writes are async, they will only return -ENOMEM. | ||
| 2423 | * Reads can return the full range of I/O error conditions. | ||
| 2424 | */ | ||
| 2425 | static int __must_check submit_one_bio(int rw, struct bio *bio, | 2465 | static int __must_check submit_one_bio(int rw, struct bio *bio, |
| 2426 | int mirror_num, unsigned long bio_flags) | 2466 | int mirror_num, unsigned long bio_flags) |
| 2427 | { | 2467 | { |
| @@ -2709,12 +2749,15 @@ static int __extent_read_full_page(struct extent_io_tree *tree, | |||
| 2709 | end_bio_extent_readpage, mirror_num, | 2749 | end_bio_extent_readpage, mirror_num, |
| 2710 | *bio_flags, | 2750 | *bio_flags, |
| 2711 | this_bio_flag); | 2751 | this_bio_flag); |
| 2712 | BUG_ON(ret == -ENOMEM); | 2752 | if (!ret) { |
| 2713 | nr++; | 2753 | nr++; |
| 2714 | *bio_flags = this_bio_flag; | 2754 | *bio_flags = this_bio_flag; |
| 2755 | } | ||
| 2715 | } | 2756 | } |
| 2716 | if (ret) | 2757 | if (ret) { |
| 2717 | SetPageError(page); | 2758 | SetPageError(page); |
| 2759 | unlock_extent(tree, cur, cur + iosize - 1); | ||
| 2760 | } | ||
| 2718 | cur = cur + iosize; | 2761 | cur = cur + iosize; |
| 2719 | pg_offset += iosize; | 2762 | pg_offset += iosize; |
| 2720 | } | 2763 | } |
| @@ -3161,12 +3204,16 @@ static int write_one_eb(struct extent_buffer *eb, | |||
| 3161 | struct block_device *bdev = fs_info->fs_devices->latest_bdev; | 3204 | struct block_device *bdev = fs_info->fs_devices->latest_bdev; |
| 3162 | u64 offset = eb->start; | 3205 | u64 offset = eb->start; |
| 3163 | unsigned long i, num_pages; | 3206 | unsigned long i, num_pages; |
| 3207 | unsigned long bio_flags = 0; | ||
| 3164 | int rw = (epd->sync_io ? WRITE_SYNC : WRITE); | 3208 | int rw = (epd->sync_io ? WRITE_SYNC : WRITE); |
| 3165 | int ret = 0; | 3209 | int ret = 0; |
| 3166 | 3210 | ||
| 3167 | clear_bit(EXTENT_BUFFER_IOERR, &eb->bflags); | 3211 | clear_bit(EXTENT_BUFFER_IOERR, &eb->bflags); |
| 3168 | num_pages = num_extent_pages(eb->start, eb->len); | 3212 | num_pages = num_extent_pages(eb->start, eb->len); |
| 3169 | atomic_set(&eb->io_pages, num_pages); | 3213 | atomic_set(&eb->io_pages, num_pages); |
| 3214 | if (btrfs_header_owner(eb) == BTRFS_TREE_LOG_OBJECTID) | ||
| 3215 | bio_flags = EXTENT_BIO_TREE_LOG; | ||
| 3216 | |||
| 3170 | for (i = 0; i < num_pages; i++) { | 3217 | for (i = 0; i < num_pages; i++) { |
| 3171 | struct page *p = extent_buffer_page(eb, i); | 3218 | struct page *p = extent_buffer_page(eb, i); |
| 3172 | 3219 | ||
| @@ -3175,7 +3222,8 @@ static int write_one_eb(struct extent_buffer *eb, | |||
| 3175 | ret = submit_extent_page(rw, eb->tree, p, offset >> 9, | 3222 | ret = submit_extent_page(rw, eb->tree, p, offset >> 9, |
| 3176 | PAGE_CACHE_SIZE, 0, bdev, &epd->bio, | 3223 | PAGE_CACHE_SIZE, 0, bdev, &epd->bio, |
| 3177 | -1, end_bio_extent_buffer_writepage, | 3224 | -1, end_bio_extent_buffer_writepage, |
| 3178 | 0, 0, 0); | 3225 | 0, epd->bio_flags, bio_flags); |
| 3226 | epd->bio_flags = bio_flags; | ||
| 3179 | if (ret) { | 3227 | if (ret) { |
| 3180 | set_bit(EXTENT_BUFFER_IOERR, &eb->bflags); | 3228 | set_bit(EXTENT_BUFFER_IOERR, &eb->bflags); |
| 3181 | SetPageError(p); | 3229 | SetPageError(p); |
| @@ -3210,6 +3258,7 @@ int btree_write_cache_pages(struct address_space *mapping, | |||
| 3210 | .tree = tree, | 3258 | .tree = tree, |
| 3211 | .extent_locked = 0, | 3259 | .extent_locked = 0, |
| 3212 | .sync_io = wbc->sync_mode == WB_SYNC_ALL, | 3260 | .sync_io = wbc->sync_mode == WB_SYNC_ALL, |
| 3261 | .bio_flags = 0, | ||
| 3213 | }; | 3262 | }; |
| 3214 | int ret = 0; | 3263 | int ret = 0; |
| 3215 | int done = 0; | 3264 | int done = 0; |
| @@ -3254,19 +3303,34 @@ retry: | |||
| 3254 | break; | 3303 | break; |
| 3255 | } | 3304 | } |
| 3256 | 3305 | ||
| 3306 | spin_lock(&mapping->private_lock); | ||
| 3307 | if (!PagePrivate(page)) { | ||
| 3308 | spin_unlock(&mapping->private_lock); | ||
| 3309 | continue; | ||
| 3310 | } | ||
| 3311 | |||
| 3257 | eb = (struct extent_buffer *)page->private; | 3312 | eb = (struct extent_buffer *)page->private; |
| 3313 | |||
| 3314 | /* | ||
| 3315 | * Shouldn't happen and normally this would be a BUG_ON | ||
| 3316 | * but no sense in crashing the users box for something | ||
| 3317 | * we can survive anyway. | ||
| 3318 | */ | ||
| 3258 | if (!eb) { | 3319 | if (!eb) { |
| 3320 | spin_unlock(&mapping->private_lock); | ||
| 3259 | WARN_ON(1); | 3321 | WARN_ON(1); |
| 3260 | continue; | 3322 | continue; |
| 3261 | } | 3323 | } |
| 3262 | 3324 | ||
| 3263 | if (eb == prev_eb) | 3325 | if (eb == prev_eb) { |
| 3326 | spin_unlock(&mapping->private_lock); | ||
| 3264 | continue; | 3327 | continue; |
| 3328 | } | ||
| 3265 | 3329 | ||
| 3266 | if (!atomic_inc_not_zero(&eb->refs)) { | 3330 | ret = atomic_inc_not_zero(&eb->refs); |
| 3267 | WARN_ON(1); | 3331 | spin_unlock(&mapping->private_lock); |
| 3332 | if (!ret) | ||
| 3268 | continue; | 3333 | continue; |
| 3269 | } | ||
| 3270 | 3334 | ||
| 3271 | prev_eb = eb; | 3335 | prev_eb = eb; |
| 3272 | ret = lock_extent_buffer_for_io(eb, fs_info, &epd); | 3336 | ret = lock_extent_buffer_for_io(eb, fs_info, &epd); |
| @@ -3457,7 +3521,7 @@ static void flush_epd_write_bio(struct extent_page_data *epd) | |||
| 3457 | if (epd->sync_io) | 3521 | if (epd->sync_io) |
| 3458 | rw = WRITE_SYNC; | 3522 | rw = WRITE_SYNC; |
| 3459 | 3523 | ||
| 3460 | ret = submit_one_bio(rw, epd->bio, 0, 0); | 3524 | ret = submit_one_bio(rw, epd->bio, 0, epd->bio_flags); |
| 3461 | BUG_ON(ret < 0); /* -ENOMEM */ | 3525 | BUG_ON(ret < 0); /* -ENOMEM */ |
| 3462 | epd->bio = NULL; | 3526 | epd->bio = NULL; |
| 3463 | } | 3527 | } |
| @@ -3480,6 +3544,7 @@ int extent_write_full_page(struct extent_io_tree *tree, struct page *page, | |||
| 3480 | .get_extent = get_extent, | 3544 | .get_extent = get_extent, |
| 3481 | .extent_locked = 0, | 3545 | .extent_locked = 0, |
| 3482 | .sync_io = wbc->sync_mode == WB_SYNC_ALL, | 3546 | .sync_io = wbc->sync_mode == WB_SYNC_ALL, |
| 3547 | .bio_flags = 0, | ||
| 3483 | }; | 3548 | }; |
| 3484 | 3549 | ||
| 3485 | ret = __extent_writepage(page, wbc, &epd); | 3550 | ret = __extent_writepage(page, wbc, &epd); |
| @@ -3504,6 +3569,7 @@ int extent_write_locked_range(struct extent_io_tree *tree, struct inode *inode, | |||
| 3504 | .get_extent = get_extent, | 3569 | .get_extent = get_extent, |
| 3505 | .extent_locked = 1, | 3570 | .extent_locked = 1, |
| 3506 | .sync_io = mode == WB_SYNC_ALL, | 3571 | .sync_io = mode == WB_SYNC_ALL, |
| 3572 | .bio_flags = 0, | ||
| 3507 | }; | 3573 | }; |
| 3508 | struct writeback_control wbc_writepages = { | 3574 | struct writeback_control wbc_writepages = { |
| 3509 | .sync_mode = mode, | 3575 | .sync_mode = mode, |
| @@ -3543,6 +3609,7 @@ int extent_writepages(struct extent_io_tree *tree, | |||
| 3543 | .get_extent = get_extent, | 3609 | .get_extent = get_extent, |
| 3544 | .extent_locked = 0, | 3610 | .extent_locked = 0, |
| 3545 | .sync_io = wbc->sync_mode == WB_SYNC_ALL, | 3611 | .sync_io = wbc->sync_mode == WB_SYNC_ALL, |
| 3612 | .bio_flags = 0, | ||
| 3546 | }; | 3613 | }; |
| 3547 | 3614 | ||
| 3548 | ret = extent_write_cache_pages(tree, mapping, wbc, | 3615 | ret = extent_write_cache_pages(tree, mapping, wbc, |
| @@ -3920,18 +3987,6 @@ out: | |||
| 3920 | return ret; | 3987 | return ret; |
| 3921 | } | 3988 | } |
| 3922 | 3989 | ||
| 3923 | inline struct page *extent_buffer_page(struct extent_buffer *eb, | ||
| 3924 | unsigned long i) | ||
| 3925 | { | ||
| 3926 | return eb->pages[i]; | ||
| 3927 | } | ||
| 3928 | |||
| 3929 | inline unsigned long num_extent_pages(u64 start, u64 len) | ||
| 3930 | { | ||
| 3931 | return ((start + len + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT) - | ||
| 3932 | (start >> PAGE_CACHE_SHIFT); | ||
| 3933 | } | ||
| 3934 | |||
| 3935 | static void __free_extent_buffer(struct extent_buffer *eb) | 3990 | static void __free_extent_buffer(struct extent_buffer *eb) |
| 3936 | { | 3991 | { |
| 3937 | #if LEAK_DEBUG | 3992 | #if LEAK_DEBUG |
| @@ -4047,8 +4102,8 @@ struct extent_buffer *alloc_dummy_extent_buffer(u64 start, unsigned long len) | |||
| 4047 | 4102 | ||
| 4048 | return eb; | 4103 | return eb; |
| 4049 | err: | 4104 | err: |
| 4050 | for (i--; i > 0; i--) | 4105 | for (; i > 0; i--) |
| 4051 | __free_page(eb->pages[i]); | 4106 | __free_page(eb->pages[i - 1]); |
| 4052 | __free_extent_buffer(eb); | 4107 | __free_extent_buffer(eb); |
| 4053 | return NULL; | 4108 | return NULL; |
| 4054 | } | 4109 | } |
| @@ -4192,10 +4247,8 @@ struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree, | |||
| 4192 | 4247 | ||
| 4193 | for (i = 0; i < num_pages; i++, index++) { | 4248 | for (i = 0; i < num_pages; i++, index++) { |
| 4194 | p = find_or_create_page(mapping, index, GFP_NOFS); | 4249 | p = find_or_create_page(mapping, index, GFP_NOFS); |
| 4195 | if (!p) { | 4250 | if (!p) |
| 4196 | WARN_ON(1); | ||
| 4197 | goto free_eb; | 4251 | goto free_eb; |
| 4198 | } | ||
| 4199 | 4252 | ||
| 4200 | spin_lock(&mapping->private_lock); | 4253 | spin_lock(&mapping->private_lock); |
| 4201 | if (PagePrivate(p)) { | 4254 | if (PagePrivate(p)) { |
| @@ -4338,7 +4391,6 @@ static int release_extent_buffer(struct extent_buffer *eb, gfp_t mask) | |||
| 4338 | 4391 | ||
| 4339 | /* Should be safe to release our pages at this point */ | 4392 | /* Should be safe to release our pages at this point */ |
| 4340 | btrfs_release_extent_buffer_page(eb, 0); | 4393 | btrfs_release_extent_buffer_page(eb, 0); |
| 4341 | |||
| 4342 | call_rcu(&eb->rcu_head, btrfs_release_extent_buffer_rcu); | 4394 | call_rcu(&eb->rcu_head, btrfs_release_extent_buffer_rcu); |
| 4343 | return 1; | 4395 | return 1; |
| 4344 | } | 4396 | } |
| @@ -4661,10 +4713,9 @@ int map_private_extent_buffer(struct extent_buffer *eb, unsigned long start, | |||
| 4661 | } | 4713 | } |
| 4662 | 4714 | ||
| 4663 | if (start + min_len > eb->len) { | 4715 | if (start + min_len > eb->len) { |
| 4664 | printk(KERN_ERR "btrfs bad mapping eb start %llu len %lu, " | 4716 | WARN(1, KERN_ERR "btrfs bad mapping eb start %llu len %lu, " |
| 4665 | "wanted %lu %lu\n", (unsigned long long)eb->start, | 4717 | "wanted %lu %lu\n", (unsigned long long)eb->start, |
| 4666 | eb->len, start, min_len); | 4718 | eb->len, start, min_len); |
| 4667 | WARN_ON(1); | ||
| 4668 | return -EINVAL; | 4719 | return -EINVAL; |
| 4669 | } | 4720 | } |
| 4670 | 4721 | ||
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h index 25900af5b15d..2eacfabd3263 100644 --- a/fs/btrfs/extent_io.h +++ b/fs/btrfs/extent_io.h | |||
| @@ -27,6 +27,7 @@ | |||
| 27 | * type for this bio | 27 | * type for this bio |
| 28 | */ | 28 | */ |
| 29 | #define EXTENT_BIO_COMPRESSED 1 | 29 | #define EXTENT_BIO_COMPRESSED 1 |
| 30 | #define EXTENT_BIO_TREE_LOG 2 | ||
| 30 | #define EXTENT_BIO_FLAG_SHIFT 16 | 31 | #define EXTENT_BIO_FLAG_SHIFT 16 |
| 31 | 32 | ||
| 32 | /* these are bit numbers for test/set bit */ | 33 | /* these are bit numbers for test/set bit */ |
| @@ -232,11 +233,15 @@ int set_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end, | |||
| 232 | int clear_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end, | 233 | int clear_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end, |
| 233 | gfp_t mask); | 234 | gfp_t mask); |
| 234 | int convert_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, | 235 | int convert_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, |
| 235 | int bits, int clear_bits, gfp_t mask); | 236 | int bits, int clear_bits, |
| 237 | struct extent_state **cached_state, gfp_t mask); | ||
| 236 | int set_extent_delalloc(struct extent_io_tree *tree, u64 start, u64 end, | 238 | int set_extent_delalloc(struct extent_io_tree *tree, u64 start, u64 end, |
| 237 | struct extent_state **cached_state, gfp_t mask); | 239 | struct extent_state **cached_state, gfp_t mask); |
| 240 | int set_extent_defrag(struct extent_io_tree *tree, u64 start, u64 end, | ||
| 241 | struct extent_state **cached_state, gfp_t mask); | ||
| 238 | int find_first_extent_bit(struct extent_io_tree *tree, u64 start, | 242 | int find_first_extent_bit(struct extent_io_tree *tree, u64 start, |
| 239 | u64 *start_ret, u64 *end_ret, int bits); | 243 | u64 *start_ret, u64 *end_ret, int bits, |
| 244 | struct extent_state **cached_state); | ||
| 240 | struct extent_state *find_first_extent_bit_state(struct extent_io_tree *tree, | 245 | struct extent_state *find_first_extent_bit_state(struct extent_io_tree *tree, |
| 241 | u64 start, int bits); | 246 | u64 start, int bits); |
| 242 | int extent_invalidatepage(struct extent_io_tree *tree, | 247 | int extent_invalidatepage(struct extent_io_tree *tree, |
| @@ -277,8 +282,18 @@ void free_extent_buffer_stale(struct extent_buffer *eb); | |||
| 277 | int read_extent_buffer_pages(struct extent_io_tree *tree, | 282 | int read_extent_buffer_pages(struct extent_io_tree *tree, |
| 278 | struct extent_buffer *eb, u64 start, int wait, | 283 | struct extent_buffer *eb, u64 start, int wait, |
| 279 | get_extent_t *get_extent, int mirror_num); | 284 | get_extent_t *get_extent, int mirror_num); |
| 280 | unsigned long num_extent_pages(u64 start, u64 len); | 285 | |
| 281 | struct page *extent_buffer_page(struct extent_buffer *eb, unsigned long i); | 286 | static inline unsigned long num_extent_pages(u64 start, u64 len) |
| 287 | { | ||
| 288 | return ((start + len + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT) - | ||
| 289 | (start >> PAGE_CACHE_SHIFT); | ||
| 290 | } | ||
| 291 | |||
| 292 | static inline struct page *extent_buffer_page(struct extent_buffer *eb, | ||
| 293 | unsigned long i) | ||
| 294 | { | ||
| 295 | return eb->pages[i]; | ||
| 296 | } | ||
| 282 | 297 | ||
| 283 | static inline void extent_buffer_get(struct extent_buffer *eb) | 298 | static inline void extent_buffer_get(struct extent_buffer *eb) |
| 284 | { | 299 | { |
| @@ -322,9 +337,9 @@ struct bio * | |||
| 322 | btrfs_bio_alloc(struct block_device *bdev, u64 first_sector, int nr_vecs, | 337 | btrfs_bio_alloc(struct block_device *bdev, u64 first_sector, int nr_vecs, |
| 323 | gfp_t gfp_flags); | 338 | gfp_t gfp_flags); |
| 324 | 339 | ||
| 325 | struct btrfs_mapping_tree; | 340 | struct btrfs_fs_info; |
| 326 | 341 | ||
| 327 | int repair_io_failure(struct btrfs_mapping_tree *map_tree, u64 start, | 342 | int repair_io_failure(struct btrfs_fs_info *fs_info, u64 start, |
| 328 | u64 length, u64 logical, struct page *page, | 343 | u64 length, u64 logical, struct page *page, |
| 329 | int mirror_num); | 344 | int mirror_num); |
| 330 | int end_extent_writepage(struct page *page, int err, u64 start, u64 end); | 345 | int end_extent_writepage(struct page *page, int err, u64 start, u64 end); |
diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c index 7c97b3301459..f169d6b11d7f 100644 --- a/fs/btrfs/extent_map.c +++ b/fs/btrfs/extent_map.c | |||
| @@ -11,7 +11,7 @@ static struct kmem_cache *extent_map_cache; | |||
| 11 | 11 | ||
| 12 | int __init extent_map_init(void) | 12 | int __init extent_map_init(void) |
| 13 | { | 13 | { |
| 14 | extent_map_cache = kmem_cache_create("extent_map", | 14 | extent_map_cache = kmem_cache_create("btrfs_extent_map", |
| 15 | sizeof(struct extent_map), 0, | 15 | sizeof(struct extent_map), 0, |
| 16 | SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL); | 16 | SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL); |
| 17 | if (!extent_map_cache) | 17 | if (!extent_map_cache) |
| @@ -35,6 +35,7 @@ void extent_map_exit(void) | |||
| 35 | void extent_map_tree_init(struct extent_map_tree *tree) | 35 | void extent_map_tree_init(struct extent_map_tree *tree) |
| 36 | { | 36 | { |
| 37 | tree->map = RB_ROOT; | 37 | tree->map = RB_ROOT; |
| 38 | INIT_LIST_HEAD(&tree->modified_extents); | ||
| 38 | rwlock_init(&tree->lock); | 39 | rwlock_init(&tree->lock); |
| 39 | } | 40 | } |
| 40 | 41 | ||
| @@ -48,13 +49,15 @@ void extent_map_tree_init(struct extent_map_tree *tree) | |||
| 48 | struct extent_map *alloc_extent_map(void) | 49 | struct extent_map *alloc_extent_map(void) |
| 49 | { | 50 | { |
| 50 | struct extent_map *em; | 51 | struct extent_map *em; |
| 51 | em = kmem_cache_alloc(extent_map_cache, GFP_NOFS); | 52 | em = kmem_cache_zalloc(extent_map_cache, GFP_NOFS); |
| 52 | if (!em) | 53 | if (!em) |
| 53 | return NULL; | 54 | return NULL; |
| 54 | em->in_tree = 0; | 55 | em->in_tree = 0; |
| 55 | em->flags = 0; | 56 | em->flags = 0; |
| 56 | em->compress_type = BTRFS_COMPRESS_NONE; | 57 | em->compress_type = BTRFS_COMPRESS_NONE; |
| 58 | em->generation = 0; | ||
| 57 | atomic_set(&em->refs, 1); | 59 | atomic_set(&em->refs, 1); |
| 60 | INIT_LIST_HEAD(&em->list); | ||
| 58 | return em; | 61 | return em; |
| 59 | } | 62 | } |
| 60 | 63 | ||
| @@ -72,6 +75,7 @@ void free_extent_map(struct extent_map *em) | |||
| 72 | WARN_ON(atomic_read(&em->refs) == 0); | 75 | WARN_ON(atomic_read(&em->refs) == 0); |
| 73 | if (atomic_dec_and_test(&em->refs)) { | 76 | if (atomic_dec_and_test(&em->refs)) { |
| 74 | WARN_ON(em->in_tree); | 77 | WARN_ON(em->in_tree); |
| 78 | WARN_ON(!list_empty(&em->list)); | ||
| 75 | kmem_cache_free(extent_map_cache, em); | 79 | kmem_cache_free(extent_map_cache, em); |
| 76 | } | 80 | } |
| 77 | } | 81 | } |
| @@ -194,10 +198,17 @@ static void try_merge_map(struct extent_map_tree *tree, struct extent_map *em) | |||
| 194 | merge = rb_entry(rb, struct extent_map, rb_node); | 198 | merge = rb_entry(rb, struct extent_map, rb_node); |
| 195 | if (rb && mergable_maps(merge, em)) { | 199 | if (rb && mergable_maps(merge, em)) { |
| 196 | em->start = merge->start; | 200 | em->start = merge->start; |
| 201 | em->orig_start = merge->orig_start; | ||
| 197 | em->len += merge->len; | 202 | em->len += merge->len; |
| 198 | em->block_len += merge->block_len; | 203 | em->block_len += merge->block_len; |
| 199 | em->block_start = merge->block_start; | 204 | em->block_start = merge->block_start; |
| 200 | merge->in_tree = 0; | 205 | merge->in_tree = 0; |
| 206 | em->mod_len = (em->mod_len + em->mod_start) - merge->mod_start; | ||
| 207 | em->mod_start = merge->mod_start; | ||
| 208 | em->generation = max(em->generation, merge->generation); | ||
| 209 | list_move(&em->list, &tree->modified_extents); | ||
| 210 | |||
| 211 | list_del_init(&merge->list); | ||
| 201 | rb_erase(&merge->rb_node, &tree->map); | 212 | rb_erase(&merge->rb_node, &tree->map); |
| 202 | free_extent_map(merge); | 213 | free_extent_map(merge); |
| 203 | } | 214 | } |
| @@ -211,14 +222,30 @@ static void try_merge_map(struct extent_map_tree *tree, struct extent_map *em) | |||
| 211 | em->block_len += merge->len; | 222 | em->block_len += merge->len; |
| 212 | rb_erase(&merge->rb_node, &tree->map); | 223 | rb_erase(&merge->rb_node, &tree->map); |
| 213 | merge->in_tree = 0; | 224 | merge->in_tree = 0; |
| 225 | em->mod_len = (merge->mod_start + merge->mod_len) - em->mod_start; | ||
| 226 | em->generation = max(em->generation, merge->generation); | ||
| 227 | list_del_init(&merge->list); | ||
| 214 | free_extent_map(merge); | 228 | free_extent_map(merge); |
| 215 | } | 229 | } |
| 216 | } | 230 | } |
| 217 | 231 | ||
| 218 | int unpin_extent_cache(struct extent_map_tree *tree, u64 start, u64 len) | 232 | /** |
| 233 | * unpin_extent_cache - unpin an extent from the cache | ||
| 234 | * @tree: tree to unpin the extent in | ||
| 235 | * @start: logical offset in the file | ||
| 236 | * @len: length of the extent | ||
| 237 | * @gen: generation that this extent has been modified in | ||
| 238 | * | ||
| 239 | * Called after an extent has been written to disk properly. Set the generation | ||
| 240 | * to the generation that actually added the file item to the inode so we know | ||
| 241 | * we need to sync this extent when we call fsync(). | ||
| 242 | */ | ||
| 243 | int unpin_extent_cache(struct extent_map_tree *tree, u64 start, u64 len, | ||
| 244 | u64 gen) | ||
| 219 | { | 245 | { |
| 220 | int ret = 0; | 246 | int ret = 0; |
| 221 | struct extent_map *em; | 247 | struct extent_map *em; |
| 248 | bool prealloc = false; | ||
| 222 | 249 | ||
| 223 | write_lock(&tree->lock); | 250 | write_lock(&tree->lock); |
| 224 | em = lookup_extent_mapping(tree, start, len); | 251 | em = lookup_extent_mapping(tree, start, len); |
| @@ -228,10 +255,24 @@ int unpin_extent_cache(struct extent_map_tree *tree, u64 start, u64 len) | |||
| 228 | if (!em) | 255 | if (!em) |
| 229 | goto out; | 256 | goto out; |
| 230 | 257 | ||
| 258 | list_move(&em->list, &tree->modified_extents); | ||
| 259 | em->generation = gen; | ||
| 231 | clear_bit(EXTENT_FLAG_PINNED, &em->flags); | 260 | clear_bit(EXTENT_FLAG_PINNED, &em->flags); |
| 261 | em->mod_start = em->start; | ||
| 262 | em->mod_len = em->len; | ||
| 263 | |||
| 264 | if (test_bit(EXTENT_FLAG_FILLING, &em->flags)) { | ||
| 265 | prealloc = true; | ||
| 266 | clear_bit(EXTENT_FLAG_FILLING, &em->flags); | ||
| 267 | } | ||
| 232 | 268 | ||
| 233 | try_merge_map(tree, em); | 269 | try_merge_map(tree, em); |
| 234 | 270 | ||
| 271 | if (prealloc) { | ||
| 272 | em->mod_start = em->start; | ||
| 273 | em->mod_len = em->len; | ||
| 274 | } | ||
| 275 | |||
| 235 | free_extent_map(em); | 276 | free_extent_map(em); |
| 236 | out: | 277 | out: |
| 237 | write_unlock(&tree->lock); | 278 | write_unlock(&tree->lock); |
| @@ -269,6 +310,9 @@ int add_extent_mapping(struct extent_map_tree *tree, | |||
| 269 | } | 310 | } |
| 270 | atomic_inc(&em->refs); | 311 | atomic_inc(&em->refs); |
| 271 | 312 | ||
| 313 | em->mod_start = em->start; | ||
| 314 | em->mod_len = em->len; | ||
| 315 | |||
| 272 | try_merge_map(tree, em); | 316 | try_merge_map(tree, em); |
| 273 | out: | 317 | out: |
| 274 | return ret; | 318 | return ret; |
| @@ -358,6 +402,8 @@ int remove_extent_mapping(struct extent_map_tree *tree, struct extent_map *em) | |||
| 358 | 402 | ||
| 359 | WARN_ON(test_bit(EXTENT_FLAG_PINNED, &em->flags)); | 403 | WARN_ON(test_bit(EXTENT_FLAG_PINNED, &em->flags)); |
| 360 | rb_erase(&em->rb_node, &tree->map); | 404 | rb_erase(&em->rb_node, &tree->map); |
| 405 | if (!test_bit(EXTENT_FLAG_LOGGING, &em->flags)) | ||
| 406 | list_del_init(&em->list); | ||
| 361 | em->in_tree = 0; | 407 | em->in_tree = 0; |
| 362 | return ret; | 408 | return ret; |
| 363 | } | 409 | } |
diff --git a/fs/btrfs/extent_map.h b/fs/btrfs/extent_map.h index 1195f09761fe..922943ce29e8 100644 --- a/fs/btrfs/extent_map.h +++ b/fs/btrfs/extent_map.h | |||
| @@ -13,6 +13,8 @@ | |||
| 13 | #define EXTENT_FLAG_COMPRESSED 1 | 13 | #define EXTENT_FLAG_COMPRESSED 1 |
| 14 | #define EXTENT_FLAG_VACANCY 2 /* no file extent item found */ | 14 | #define EXTENT_FLAG_VACANCY 2 /* no file extent item found */ |
| 15 | #define EXTENT_FLAG_PREALLOC 3 /* pre-allocated extent */ | 15 | #define EXTENT_FLAG_PREALLOC 3 /* pre-allocated extent */ |
| 16 | #define EXTENT_FLAG_LOGGING 4 /* Logging this extent */ | ||
| 17 | #define EXTENT_FLAG_FILLING 5 /* Filling in a preallocated extent */ | ||
| 16 | 18 | ||
| 17 | struct extent_map { | 19 | struct extent_map { |
| 18 | struct rb_node rb_node; | 20 | struct rb_node rb_node; |
| @@ -20,18 +22,24 @@ struct extent_map { | |||
| 20 | /* all of these are in bytes */ | 22 | /* all of these are in bytes */ |
| 21 | u64 start; | 23 | u64 start; |
| 22 | u64 len; | 24 | u64 len; |
| 25 | u64 mod_start; | ||
| 26 | u64 mod_len; | ||
| 23 | u64 orig_start; | 27 | u64 orig_start; |
| 28 | u64 orig_block_len; | ||
| 24 | u64 block_start; | 29 | u64 block_start; |
| 25 | u64 block_len; | 30 | u64 block_len; |
| 31 | u64 generation; | ||
| 26 | unsigned long flags; | 32 | unsigned long flags; |
| 27 | struct block_device *bdev; | 33 | struct block_device *bdev; |
| 28 | atomic_t refs; | 34 | atomic_t refs; |
| 29 | unsigned int in_tree; | 35 | unsigned int in_tree; |
| 30 | unsigned int compress_type; | 36 | unsigned int compress_type; |
| 37 | struct list_head list; | ||
| 31 | }; | 38 | }; |
| 32 | 39 | ||
| 33 | struct extent_map_tree { | 40 | struct extent_map_tree { |
| 34 | struct rb_root map; | 41 | struct rb_root map; |
| 42 | struct list_head modified_extents; | ||
| 35 | rwlock_t lock; | 43 | rwlock_t lock; |
| 36 | }; | 44 | }; |
| 37 | 45 | ||
| @@ -60,7 +68,7 @@ struct extent_map *alloc_extent_map(void); | |||
| 60 | void free_extent_map(struct extent_map *em); | 68 | void free_extent_map(struct extent_map *em); |
| 61 | int __init extent_map_init(void); | 69 | int __init extent_map_init(void); |
| 62 | void extent_map_exit(void); | 70 | void extent_map_exit(void); |
| 63 | int unpin_extent_cache(struct extent_map_tree *tree, u64 start, u64 len); | 71 | int unpin_extent_cache(struct extent_map_tree *tree, u64 start, u64 len, u64 gen); |
| 64 | struct extent_map *search_extent_mapping(struct extent_map_tree *tree, | 72 | struct extent_map *search_extent_mapping(struct extent_map_tree *tree, |
| 65 | u64 start, u64 len); | 73 | u64 start, u64 len); |
| 66 | #endif | 74 | #endif |
diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c index 857d93cd01dc..bd38cef42358 100644 --- a/fs/btrfs/file-item.c +++ b/fs/btrfs/file-item.c | |||
| @@ -25,11 +25,12 @@ | |||
| 25 | #include "transaction.h" | 25 | #include "transaction.h" |
| 26 | #include "print-tree.h" | 26 | #include "print-tree.h" |
| 27 | 27 | ||
| 28 | #define __MAX_CSUM_ITEMS(r, size) ((((BTRFS_LEAF_DATA_SIZE(r) - \ | 28 | #define __MAX_CSUM_ITEMS(r, size) ((unsigned long)(((BTRFS_LEAF_DATA_SIZE(r) - \ |
| 29 | sizeof(struct btrfs_item) * 2) / \ | 29 | sizeof(struct btrfs_item) * 2) / \ |
| 30 | size) - 1)) | 30 | size) - 1)) |
| 31 | 31 | ||
| 32 | #define MAX_CSUM_ITEMS(r, size) (min(__MAX_CSUM_ITEMS(r, size), PAGE_CACHE_SIZE)) | 32 | #define MAX_CSUM_ITEMS(r, size) (min_t(u32, __MAX_CSUM_ITEMS(r, size), \ |
| 33 | PAGE_CACHE_SIZE)) | ||
| 33 | 34 | ||
| 34 | #define MAX_ORDERED_SUM_BYTES(r) ((PAGE_SIZE - \ | 35 | #define MAX_ORDERED_SUM_BYTES(r) ((PAGE_SIZE - \ |
| 35 | sizeof(struct btrfs_ordered_sum)) / \ | 36 | sizeof(struct btrfs_ordered_sum)) / \ |
| @@ -132,7 +133,6 @@ fail: | |||
| 132 | return ERR_PTR(ret); | 133 | return ERR_PTR(ret); |
| 133 | } | 134 | } |
| 134 | 135 | ||
| 135 | |||
| 136 | int btrfs_lookup_file_extent(struct btrfs_trans_handle *trans, | 136 | int btrfs_lookup_file_extent(struct btrfs_trans_handle *trans, |
| 137 | struct btrfs_root *root, | 137 | struct btrfs_root *root, |
| 138 | struct btrfs_path *path, u64 objectid, | 138 | struct btrfs_path *path, u64 objectid, |
| @@ -150,6 +150,26 @@ int btrfs_lookup_file_extent(struct btrfs_trans_handle *trans, | |||
| 150 | return ret; | 150 | return ret; |
| 151 | } | 151 | } |
| 152 | 152 | ||
| 153 | u64 btrfs_file_extent_length(struct btrfs_path *path) | ||
| 154 | { | ||
| 155 | int extent_type; | ||
| 156 | struct btrfs_file_extent_item *fi; | ||
| 157 | u64 len; | ||
| 158 | |||
| 159 | fi = btrfs_item_ptr(path->nodes[0], path->slots[0], | ||
| 160 | struct btrfs_file_extent_item); | ||
| 161 | extent_type = btrfs_file_extent_type(path->nodes[0], fi); | ||
| 162 | |||
| 163 | if (extent_type == BTRFS_FILE_EXTENT_REG || | ||
| 164 | extent_type == BTRFS_FILE_EXTENT_PREALLOC) | ||
| 165 | len = btrfs_file_extent_num_bytes(path->nodes[0], fi); | ||
| 166 | else if (extent_type == BTRFS_FILE_EXTENT_INLINE) | ||
| 167 | len = btrfs_file_extent_inline_len(path->nodes[0], fi); | ||
| 168 | else | ||
| 169 | BUG(); | ||
| 170 | |||
| 171 | return len; | ||
| 172 | } | ||
| 153 | 173 | ||
| 154 | static int __btrfs_lookup_bio_sums(struct btrfs_root *root, | 174 | static int __btrfs_lookup_bio_sums(struct btrfs_root *root, |
| 155 | struct inode *inode, struct bio *bio, | 175 | struct inode *inode, struct bio *bio, |
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index f6b40e86121b..77061bf43edb 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c | |||
| @@ -39,7 +39,9 @@ | |||
| 39 | #include "tree-log.h" | 39 | #include "tree-log.h" |
| 40 | #include "locking.h" | 40 | #include "locking.h" |
| 41 | #include "compat.h" | 41 | #include "compat.h" |
| 42 | #include "volumes.h" | ||
| 42 | 43 | ||
| 44 | static struct kmem_cache *btrfs_inode_defrag_cachep; | ||
| 43 | /* | 45 | /* |
| 44 | * when auto defrag is enabled we | 46 | * when auto defrag is enabled we |
| 45 | * queue up these defrag structs to remember which | 47 | * queue up these defrag structs to remember which |
| @@ -89,7 +91,7 @@ static int __compare_inode_defrag(struct inode_defrag *defrag1, | |||
| 89 | * If an existing record is found the defrag item you | 91 | * If an existing record is found the defrag item you |
| 90 | * pass in is freed | 92 | * pass in is freed |
| 91 | */ | 93 | */ |
| 92 | static void __btrfs_add_inode_defrag(struct inode *inode, | 94 | static int __btrfs_add_inode_defrag(struct inode *inode, |
| 93 | struct inode_defrag *defrag) | 95 | struct inode_defrag *defrag) |
| 94 | { | 96 | { |
| 95 | struct btrfs_root *root = BTRFS_I(inode)->root; | 97 | struct btrfs_root *root = BTRFS_I(inode)->root; |
| @@ -117,18 +119,24 @@ static void __btrfs_add_inode_defrag(struct inode *inode, | |||
| 117 | entry->transid = defrag->transid; | 119 | entry->transid = defrag->transid; |
| 118 | if (defrag->last_offset > entry->last_offset) | 120 | if (defrag->last_offset > entry->last_offset) |
| 119 | entry->last_offset = defrag->last_offset; | 121 | entry->last_offset = defrag->last_offset; |
| 120 | goto exists; | 122 | return -EEXIST; |
| 121 | } | 123 | } |
| 122 | } | 124 | } |
| 123 | set_bit(BTRFS_INODE_IN_DEFRAG, &BTRFS_I(inode)->runtime_flags); | 125 | set_bit(BTRFS_INODE_IN_DEFRAG, &BTRFS_I(inode)->runtime_flags); |
| 124 | rb_link_node(&defrag->rb_node, parent, p); | 126 | rb_link_node(&defrag->rb_node, parent, p); |
| 125 | rb_insert_color(&defrag->rb_node, &root->fs_info->defrag_inodes); | 127 | rb_insert_color(&defrag->rb_node, &root->fs_info->defrag_inodes); |
| 126 | return; | 128 | return 0; |
| 129 | } | ||
| 127 | 130 | ||
| 128 | exists: | 131 | static inline int __need_auto_defrag(struct btrfs_root *root) |
| 129 | kfree(defrag); | 132 | { |
| 130 | return; | 133 | if (!btrfs_test_opt(root, AUTO_DEFRAG)) |
| 134 | return 0; | ||
| 131 | 135 | ||
| 136 | if (btrfs_fs_closing(root->fs_info)) | ||
| 137 | return 0; | ||
| 138 | |||
| 139 | return 1; | ||
| 132 | } | 140 | } |
| 133 | 141 | ||
| 134 | /* | 142 | /* |
| @@ -141,11 +149,9 @@ int btrfs_add_inode_defrag(struct btrfs_trans_handle *trans, | |||
| 141 | struct btrfs_root *root = BTRFS_I(inode)->root; | 149 | struct btrfs_root *root = BTRFS_I(inode)->root; |
| 142 | struct inode_defrag *defrag; | 150 | struct inode_defrag *defrag; |
| 143 | u64 transid; | 151 | u64 transid; |
| 152 | int ret; | ||
| 144 | 153 | ||
| 145 | if (!btrfs_test_opt(root, AUTO_DEFRAG)) | 154 | if (!__need_auto_defrag(root)) |
| 146 | return 0; | ||
| 147 | |||
| 148 | if (btrfs_fs_closing(root->fs_info)) | ||
| 149 | return 0; | 155 | return 0; |
| 150 | 156 | ||
| 151 | if (test_bit(BTRFS_INODE_IN_DEFRAG, &BTRFS_I(inode)->runtime_flags)) | 157 | if (test_bit(BTRFS_INODE_IN_DEFRAG, &BTRFS_I(inode)->runtime_flags)) |
| @@ -156,7 +162,7 @@ int btrfs_add_inode_defrag(struct btrfs_trans_handle *trans, | |||
| 156 | else | 162 | else |
| 157 | transid = BTRFS_I(inode)->root->last_trans; | 163 | transid = BTRFS_I(inode)->root->last_trans; |
| 158 | 164 | ||
| 159 | defrag = kzalloc(sizeof(*defrag), GFP_NOFS); | 165 | defrag = kmem_cache_zalloc(btrfs_inode_defrag_cachep, GFP_NOFS); |
| 160 | if (!defrag) | 166 | if (!defrag) |
| 161 | return -ENOMEM; | 167 | return -ENOMEM; |
| 162 | 168 | ||
| @@ -165,20 +171,56 @@ int btrfs_add_inode_defrag(struct btrfs_trans_handle *trans, | |||
| 165 | defrag->root = root->root_key.objectid; | 171 | defrag->root = root->root_key.objectid; |
| 166 | 172 | ||
| 167 | spin_lock(&root->fs_info->defrag_inodes_lock); | 173 | spin_lock(&root->fs_info->defrag_inodes_lock); |
| 168 | if (!test_bit(BTRFS_INODE_IN_DEFRAG, &BTRFS_I(inode)->runtime_flags)) | 174 | if (!test_bit(BTRFS_INODE_IN_DEFRAG, &BTRFS_I(inode)->runtime_flags)) { |
| 169 | __btrfs_add_inode_defrag(inode, defrag); | 175 | /* |
| 170 | else | 176 | * If we set IN_DEFRAG flag and evict the inode from memory, |
| 171 | kfree(defrag); | 177 | * and then re-read this inode, this new inode doesn't have |
| 178 | * IN_DEFRAG flag. At the case, we may find the existed defrag. | ||
| 179 | */ | ||
| 180 | ret = __btrfs_add_inode_defrag(inode, defrag); | ||
| 181 | if (ret) | ||
| 182 | kmem_cache_free(btrfs_inode_defrag_cachep, defrag); | ||
| 183 | } else { | ||
| 184 | kmem_cache_free(btrfs_inode_defrag_cachep, defrag); | ||
| 185 | } | ||
| 172 | spin_unlock(&root->fs_info->defrag_inodes_lock); | 186 | spin_unlock(&root->fs_info->defrag_inodes_lock); |
| 173 | return 0; | 187 | return 0; |
| 174 | } | 188 | } |
| 175 | 189 | ||
| 176 | /* | 190 | /* |
| 177 | * must be called with the defrag_inodes lock held | 191 | * Requeue the defrag object. If there is a defrag object that points to |
| 192 | * the same inode in the tree, we will merge them together (by | ||
| 193 | * __btrfs_add_inode_defrag()) and free the one that we want to requeue. | ||
| 194 | */ | ||
| 195 | void btrfs_requeue_inode_defrag(struct inode *inode, | ||
| 196 | struct inode_defrag *defrag) | ||
| 197 | { | ||
| 198 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
| 199 | int ret; | ||
| 200 | |||
| 201 | if (!__need_auto_defrag(root)) | ||
| 202 | goto out; | ||
| 203 | |||
| 204 | /* | ||
| 205 | * Here we don't check the IN_DEFRAG flag, because we need merge | ||
| 206 | * them together. | ||
| 207 | */ | ||
| 208 | spin_lock(&root->fs_info->defrag_inodes_lock); | ||
| 209 | ret = __btrfs_add_inode_defrag(inode, defrag); | ||
| 210 | spin_unlock(&root->fs_info->defrag_inodes_lock); | ||
| 211 | if (ret) | ||
| 212 | goto out; | ||
| 213 | return; | ||
| 214 | out: | ||
| 215 | kmem_cache_free(btrfs_inode_defrag_cachep, defrag); | ||
| 216 | } | ||
| 217 | |||
| 218 | /* | ||
| 219 | * pick the defragable inode that we want, if it doesn't exist, we will get | ||
| 220 | * the next one. | ||
| 178 | */ | 221 | */ |
| 179 | struct inode_defrag *btrfs_find_defrag_inode(struct btrfs_fs_info *info, | 222 | static struct inode_defrag * |
| 180 | u64 root, u64 ino, | 223 | btrfs_pick_defrag_inode(struct btrfs_fs_info *fs_info, u64 root, u64 ino) |
| 181 | struct rb_node **next) | ||
| 182 | { | 224 | { |
| 183 | struct inode_defrag *entry = NULL; | 225 | struct inode_defrag *entry = NULL; |
| 184 | struct inode_defrag tmp; | 226 | struct inode_defrag tmp; |
| @@ -189,7 +231,8 @@ struct inode_defrag *btrfs_find_defrag_inode(struct btrfs_fs_info *info, | |||
| 189 | tmp.ino = ino; | 231 | tmp.ino = ino; |
| 190 | tmp.root = root; | 232 | tmp.root = root; |
| 191 | 233 | ||
| 192 | p = info->defrag_inodes.rb_node; | 234 | spin_lock(&fs_info->defrag_inodes_lock); |
| 235 | p = fs_info->defrag_inodes.rb_node; | ||
| 193 | while (p) { | 236 | while (p) { |
| 194 | parent = p; | 237 | parent = p; |
| 195 | entry = rb_entry(parent, struct inode_defrag, rb_node); | 238 | entry = rb_entry(parent, struct inode_defrag, rb_node); |
| @@ -200,52 +243,131 @@ struct inode_defrag *btrfs_find_defrag_inode(struct btrfs_fs_info *info, | |||
| 200 | else if (ret > 0) | 243 | else if (ret > 0) |
| 201 | p = parent->rb_right; | 244 | p = parent->rb_right; |
| 202 | else | 245 | else |
| 203 | return entry; | 246 | goto out; |
| 204 | } | 247 | } |
| 205 | 248 | ||
| 206 | if (next) { | 249 | if (parent && __compare_inode_defrag(&tmp, entry) > 0) { |
| 207 | while (parent && __compare_inode_defrag(&tmp, entry) > 0) { | 250 | parent = rb_next(parent); |
| 208 | parent = rb_next(parent); | 251 | if (parent) |
| 209 | entry = rb_entry(parent, struct inode_defrag, rb_node); | 252 | entry = rb_entry(parent, struct inode_defrag, rb_node); |
| 210 | } | 253 | else |
| 211 | *next = parent; | 254 | entry = NULL; |
| 212 | } | 255 | } |
| 213 | return NULL; | 256 | out: |
| 257 | if (entry) | ||
| 258 | rb_erase(parent, &fs_info->defrag_inodes); | ||
| 259 | spin_unlock(&fs_info->defrag_inodes_lock); | ||
| 260 | return entry; | ||
| 214 | } | 261 | } |
| 215 | 262 | ||
| 216 | /* | 263 | void btrfs_cleanup_defrag_inodes(struct btrfs_fs_info *fs_info) |
| 217 | * run through the list of inodes in the FS that need | ||
| 218 | * defragging | ||
| 219 | */ | ||
| 220 | int btrfs_run_defrag_inodes(struct btrfs_fs_info *fs_info) | ||
| 221 | { | 264 | { |
| 222 | struct inode_defrag *defrag; | 265 | struct inode_defrag *defrag; |
| 266 | struct rb_node *node; | ||
| 267 | |||
| 268 | spin_lock(&fs_info->defrag_inodes_lock); | ||
| 269 | node = rb_first(&fs_info->defrag_inodes); | ||
| 270 | while (node) { | ||
| 271 | rb_erase(node, &fs_info->defrag_inodes); | ||
| 272 | defrag = rb_entry(node, struct inode_defrag, rb_node); | ||
| 273 | kmem_cache_free(btrfs_inode_defrag_cachep, defrag); | ||
| 274 | |||
| 275 | if (need_resched()) { | ||
| 276 | spin_unlock(&fs_info->defrag_inodes_lock); | ||
| 277 | cond_resched(); | ||
| 278 | spin_lock(&fs_info->defrag_inodes_lock); | ||
| 279 | } | ||
| 280 | |||
| 281 | node = rb_first(&fs_info->defrag_inodes); | ||
| 282 | } | ||
| 283 | spin_unlock(&fs_info->defrag_inodes_lock); | ||
| 284 | } | ||
| 285 | |||
| 286 | #define BTRFS_DEFRAG_BATCH 1024 | ||
| 287 | |||
| 288 | static int __btrfs_run_defrag_inode(struct btrfs_fs_info *fs_info, | ||
| 289 | struct inode_defrag *defrag) | ||
| 290 | { | ||
| 223 | struct btrfs_root *inode_root; | 291 | struct btrfs_root *inode_root; |
| 224 | struct inode *inode; | 292 | struct inode *inode; |
| 225 | struct rb_node *n; | ||
| 226 | struct btrfs_key key; | 293 | struct btrfs_key key; |
| 227 | struct btrfs_ioctl_defrag_range_args range; | 294 | struct btrfs_ioctl_defrag_range_args range; |
| 228 | u64 first_ino = 0; | ||
| 229 | u64 root_objectid = 0; | ||
| 230 | int num_defrag; | 295 | int num_defrag; |
| 231 | int defrag_batch = 1024; | ||
| 232 | 296 | ||
| 297 | /* get the inode */ | ||
| 298 | key.objectid = defrag->root; | ||
| 299 | btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY); | ||
| 300 | key.offset = (u64)-1; | ||
| 301 | inode_root = btrfs_read_fs_root_no_name(fs_info, &key); | ||
| 302 | if (IS_ERR(inode_root)) { | ||
| 303 | kmem_cache_free(btrfs_inode_defrag_cachep, defrag); | ||
| 304 | return PTR_ERR(inode_root); | ||
| 305 | } | ||
| 306 | |||
| 307 | key.objectid = defrag->ino; | ||
| 308 | btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY); | ||
| 309 | key.offset = 0; | ||
| 310 | inode = btrfs_iget(fs_info->sb, &key, inode_root, NULL); | ||
| 311 | if (IS_ERR(inode)) { | ||
| 312 | kmem_cache_free(btrfs_inode_defrag_cachep, defrag); | ||
| 313 | return PTR_ERR(inode); | ||
| 314 | } | ||
| 315 | |||
| 316 | /* do a chunk of defrag */ | ||
| 317 | clear_bit(BTRFS_INODE_IN_DEFRAG, &BTRFS_I(inode)->runtime_flags); | ||
| 233 | memset(&range, 0, sizeof(range)); | 318 | memset(&range, 0, sizeof(range)); |
| 234 | range.len = (u64)-1; | 319 | range.len = (u64)-1; |
| 320 | range.start = defrag->last_offset; | ||
| 321 | |||
| 322 | sb_start_write(fs_info->sb); | ||
| 323 | num_defrag = btrfs_defrag_file(inode, NULL, &range, defrag->transid, | ||
| 324 | BTRFS_DEFRAG_BATCH); | ||
| 325 | sb_end_write(fs_info->sb); | ||
| 326 | /* | ||
| 327 | * if we filled the whole defrag batch, there | ||
| 328 | * must be more work to do. Queue this defrag | ||
| 329 | * again | ||
| 330 | */ | ||
| 331 | if (num_defrag == BTRFS_DEFRAG_BATCH) { | ||
| 332 | defrag->last_offset = range.start; | ||
| 333 | btrfs_requeue_inode_defrag(inode, defrag); | ||
| 334 | } else if (defrag->last_offset && !defrag->cycled) { | ||
| 335 | /* | ||
| 336 | * we didn't fill our defrag batch, but | ||
| 337 | * we didn't start at zero. Make sure we loop | ||
| 338 | * around to the start of the file. | ||
| 339 | */ | ||
| 340 | defrag->last_offset = 0; | ||
| 341 | defrag->cycled = 1; | ||
| 342 | btrfs_requeue_inode_defrag(inode, defrag); | ||
| 343 | } else { | ||
| 344 | kmem_cache_free(btrfs_inode_defrag_cachep, defrag); | ||
| 345 | } | ||
| 346 | |||
| 347 | iput(inode); | ||
| 348 | return 0; | ||
| 349 | } | ||
| 350 | |||
| 351 | /* | ||
| 352 | * run through the list of inodes in the FS that need | ||
| 353 | * defragging | ||
| 354 | */ | ||
| 355 | int btrfs_run_defrag_inodes(struct btrfs_fs_info *fs_info) | ||
| 356 | { | ||
| 357 | struct inode_defrag *defrag; | ||
| 358 | u64 first_ino = 0; | ||
| 359 | u64 root_objectid = 0; | ||
| 235 | 360 | ||
| 236 | atomic_inc(&fs_info->defrag_running); | 361 | atomic_inc(&fs_info->defrag_running); |
| 237 | spin_lock(&fs_info->defrag_inodes_lock); | ||
| 238 | while(1) { | 362 | while(1) { |
| 239 | n = NULL; | 363 | if (!__need_auto_defrag(fs_info->tree_root)) |
| 364 | break; | ||
| 240 | 365 | ||
| 241 | /* find an inode to defrag */ | 366 | /* find an inode to defrag */ |
| 242 | defrag = btrfs_find_defrag_inode(fs_info, root_objectid, | 367 | defrag = btrfs_pick_defrag_inode(fs_info, root_objectid, |
| 243 | first_ino, &n); | 368 | first_ino); |
| 244 | if (!defrag) { | 369 | if (!defrag) { |
| 245 | if (n) { | 370 | if (root_objectid || first_ino) { |
| 246 | defrag = rb_entry(n, struct inode_defrag, | ||
| 247 | rb_node); | ||
| 248 | } else if (root_objectid || first_ino) { | ||
| 249 | root_objectid = 0; | 371 | root_objectid = 0; |
| 250 | first_ino = 0; | 372 | first_ino = 0; |
| 251 | continue; | 373 | continue; |
| @@ -254,70 +376,11 @@ int btrfs_run_defrag_inodes(struct btrfs_fs_info *fs_info) | |||
| 254 | } | 376 | } |
| 255 | } | 377 | } |
| 256 | 378 | ||
| 257 | /* remove it from the rbtree */ | ||
| 258 | first_ino = defrag->ino + 1; | 379 | first_ino = defrag->ino + 1; |
| 259 | root_objectid = defrag->root; | 380 | root_objectid = defrag->root; |
| 260 | rb_erase(&defrag->rb_node, &fs_info->defrag_inodes); | ||
| 261 | |||
| 262 | if (btrfs_fs_closing(fs_info)) | ||
| 263 | goto next_free; | ||
| 264 | 381 | ||
| 265 | spin_unlock(&fs_info->defrag_inodes_lock); | 382 | __btrfs_run_defrag_inode(fs_info, defrag); |
| 266 | |||
| 267 | /* get the inode */ | ||
| 268 | key.objectid = defrag->root; | ||
| 269 | btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY); | ||
| 270 | key.offset = (u64)-1; | ||
| 271 | inode_root = btrfs_read_fs_root_no_name(fs_info, &key); | ||
| 272 | if (IS_ERR(inode_root)) | ||
| 273 | goto next; | ||
| 274 | |||
| 275 | key.objectid = defrag->ino; | ||
| 276 | btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY); | ||
| 277 | key.offset = 0; | ||
| 278 | |||
| 279 | inode = btrfs_iget(fs_info->sb, &key, inode_root, NULL); | ||
| 280 | if (IS_ERR(inode)) | ||
| 281 | goto next; | ||
| 282 | |||
| 283 | /* do a chunk of defrag */ | ||
| 284 | clear_bit(BTRFS_INODE_IN_DEFRAG, &BTRFS_I(inode)->runtime_flags); | ||
| 285 | range.start = defrag->last_offset; | ||
| 286 | num_defrag = btrfs_defrag_file(inode, NULL, &range, defrag->transid, | ||
| 287 | defrag_batch); | ||
| 288 | /* | ||
| 289 | * if we filled the whole defrag batch, there | ||
| 290 | * must be more work to do. Queue this defrag | ||
| 291 | * again | ||
| 292 | */ | ||
| 293 | if (num_defrag == defrag_batch) { | ||
| 294 | defrag->last_offset = range.start; | ||
| 295 | __btrfs_add_inode_defrag(inode, defrag); | ||
| 296 | /* | ||
| 297 | * we don't want to kfree defrag, we added it back to | ||
| 298 | * the rbtree | ||
| 299 | */ | ||
| 300 | defrag = NULL; | ||
| 301 | } else if (defrag->last_offset && !defrag->cycled) { | ||
| 302 | /* | ||
| 303 | * we didn't fill our defrag batch, but | ||
| 304 | * we didn't start at zero. Make sure we loop | ||
| 305 | * around to the start of the file. | ||
| 306 | */ | ||
| 307 | defrag->last_offset = 0; | ||
| 308 | defrag->cycled = 1; | ||
| 309 | __btrfs_add_inode_defrag(inode, defrag); | ||
| 310 | defrag = NULL; | ||
| 311 | } | ||
| 312 | |||
| 313 | iput(inode); | ||
| 314 | next: | ||
| 315 | spin_lock(&fs_info->defrag_inodes_lock); | ||
| 316 | next_free: | ||
| 317 | kfree(defrag); | ||
| 318 | } | 383 | } |
| 319 | spin_unlock(&fs_info->defrag_inodes_lock); | ||
| 320 | |||
| 321 | atomic_dec(&fs_info->defrag_running); | 384 | atomic_dec(&fs_info->defrag_running); |
| 322 | 385 | ||
| 323 | /* | 386 | /* |
| @@ -458,14 +521,15 @@ int btrfs_dirty_pages(struct btrfs_root *root, struct inode *inode, | |||
| 458 | * this drops all the extents in the cache that intersect the range | 521 | * this drops all the extents in the cache that intersect the range |
| 459 | * [start, end]. Existing extents are split as required. | 522 | * [start, end]. Existing extents are split as required. |
| 460 | */ | 523 | */ |
| 461 | int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end, | 524 | void btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end, |
| 462 | int skip_pinned) | 525 | int skip_pinned) |
| 463 | { | 526 | { |
| 464 | struct extent_map *em; | 527 | struct extent_map *em; |
| 465 | struct extent_map *split = NULL; | 528 | struct extent_map *split = NULL; |
| 466 | struct extent_map *split2 = NULL; | 529 | struct extent_map *split2 = NULL; |
| 467 | struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; | 530 | struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; |
| 468 | u64 len = end - start + 1; | 531 | u64 len = end - start + 1; |
| 532 | u64 gen; | ||
| 469 | int ret; | 533 | int ret; |
| 470 | int testend = 1; | 534 | int testend = 1; |
| 471 | unsigned long flags; | 535 | unsigned long flags; |
| @@ -477,11 +541,14 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end, | |||
| 477 | testend = 0; | 541 | testend = 0; |
| 478 | } | 542 | } |
| 479 | while (1) { | 543 | while (1) { |
| 544 | int no_splits = 0; | ||
| 545 | |||
| 480 | if (!split) | 546 | if (!split) |
| 481 | split = alloc_extent_map(); | 547 | split = alloc_extent_map(); |
| 482 | if (!split2) | 548 | if (!split2) |
| 483 | split2 = alloc_extent_map(); | 549 | split2 = alloc_extent_map(); |
| 484 | BUG_ON(!split || !split2); /* -ENOMEM */ | 550 | if (!split || !split2) |
| 551 | no_splits = 1; | ||
| 485 | 552 | ||
| 486 | write_lock(&em_tree->lock); | 553 | write_lock(&em_tree->lock); |
| 487 | em = lookup_extent_mapping(em_tree, start, len); | 554 | em = lookup_extent_mapping(em_tree, start, len); |
| @@ -490,6 +557,7 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end, | |||
| 490 | break; | 557 | break; |
| 491 | } | 558 | } |
| 492 | flags = em->flags; | 559 | flags = em->flags; |
| 560 | gen = em->generation; | ||
| 493 | if (skip_pinned && test_bit(EXTENT_FLAG_PINNED, &em->flags)) { | 561 | if (skip_pinned && test_bit(EXTENT_FLAG_PINNED, &em->flags)) { |
| 494 | if (testend && em->start + em->len >= start + len) { | 562 | if (testend && em->start + em->len >= start + len) { |
| 495 | free_extent_map(em); | 563 | free_extent_map(em); |
| @@ -506,6 +574,8 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end, | |||
| 506 | compressed = test_bit(EXTENT_FLAG_COMPRESSED, &em->flags); | 574 | compressed = test_bit(EXTENT_FLAG_COMPRESSED, &em->flags); |
| 507 | clear_bit(EXTENT_FLAG_PINNED, &em->flags); | 575 | clear_bit(EXTENT_FLAG_PINNED, &em->flags); |
| 508 | remove_extent_mapping(em_tree, em); | 576 | remove_extent_mapping(em_tree, em); |
| 577 | if (no_splits) | ||
| 578 | goto next; | ||
| 509 | 579 | ||
| 510 | if (em->block_start < EXTENT_MAP_LAST_BYTE && | 580 | if (em->block_start < EXTENT_MAP_LAST_BYTE && |
| 511 | em->start < start) { | 581 | em->start < start) { |
| @@ -518,12 +588,15 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end, | |||
| 518 | split->block_len = em->block_len; | 588 | split->block_len = em->block_len; |
| 519 | else | 589 | else |
| 520 | split->block_len = split->len; | 590 | split->block_len = split->len; |
| 521 | 591 | split->orig_block_len = max(split->block_len, | |
| 592 | em->orig_block_len); | ||
| 593 | split->generation = gen; | ||
| 522 | split->bdev = em->bdev; | 594 | split->bdev = em->bdev; |
| 523 | split->flags = flags; | 595 | split->flags = flags; |
| 524 | split->compress_type = em->compress_type; | 596 | split->compress_type = em->compress_type; |
| 525 | ret = add_extent_mapping(em_tree, split); | 597 | ret = add_extent_mapping(em_tree, split); |
| 526 | BUG_ON(ret); /* Logic error */ | 598 | BUG_ON(ret); /* Logic error */ |
| 599 | list_move(&split->list, &em_tree->modified_extents); | ||
| 527 | free_extent_map(split); | 600 | free_extent_map(split); |
| 528 | split = split2; | 601 | split = split2; |
| 529 | split2 = NULL; | 602 | split2 = NULL; |
| @@ -537,6 +610,9 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end, | |||
| 537 | split->bdev = em->bdev; | 610 | split->bdev = em->bdev; |
| 538 | split->flags = flags; | 611 | split->flags = flags; |
| 539 | split->compress_type = em->compress_type; | 612 | split->compress_type = em->compress_type; |
| 613 | split->generation = gen; | ||
| 614 | split->orig_block_len = max(em->block_len, | ||
| 615 | em->orig_block_len); | ||
| 540 | 616 | ||
| 541 | if (compressed) { | 617 | if (compressed) { |
| 542 | split->block_len = em->block_len; | 618 | split->block_len = em->block_len; |
| @@ -545,14 +621,16 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end, | |||
| 545 | } else { | 621 | } else { |
| 546 | split->block_len = split->len; | 622 | split->block_len = split->len; |
| 547 | split->block_start = em->block_start + diff; | 623 | split->block_start = em->block_start + diff; |
| 548 | split->orig_start = split->start; | 624 | split->orig_start = em->orig_start; |
| 549 | } | 625 | } |
| 550 | 626 | ||
| 551 | ret = add_extent_mapping(em_tree, split); | 627 | ret = add_extent_mapping(em_tree, split); |
| 552 | BUG_ON(ret); /* Logic error */ | 628 | BUG_ON(ret); /* Logic error */ |
| 629 | list_move(&split->list, &em_tree->modified_extents); | ||
| 553 | free_extent_map(split); | 630 | free_extent_map(split); |
| 554 | split = NULL; | 631 | split = NULL; |
| 555 | } | 632 | } |
| 633 | next: | ||
| 556 | write_unlock(&em_tree->lock); | 634 | write_unlock(&em_tree->lock); |
| 557 | 635 | ||
| 558 | /* once for us */ | 636 | /* once for us */ |
| @@ -564,7 +642,6 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end, | |||
| 564 | free_extent_map(split); | 642 | free_extent_map(split); |
| 565 | if (split2) | 643 | if (split2) |
| 566 | free_extent_map(split2); | 644 | free_extent_map(split2); |
| 567 | return 0; | ||
| 568 | } | 645 | } |
| 569 | 646 | ||
| 570 | /* | 647 | /* |
| @@ -576,13 +653,13 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end, | |||
| 576 | * it is either truncated or split. Anything entirely inside the range | 653 | * it is either truncated or split. Anything entirely inside the range |
| 577 | * is deleted from the tree. | 654 | * is deleted from the tree. |
| 578 | */ | 655 | */ |
| 579 | int btrfs_drop_extents(struct btrfs_trans_handle *trans, struct inode *inode, | 656 | int __btrfs_drop_extents(struct btrfs_trans_handle *trans, |
| 580 | u64 start, u64 end, u64 *hint_byte, int drop_cache) | 657 | struct btrfs_root *root, struct inode *inode, |
| 658 | struct btrfs_path *path, u64 start, u64 end, | ||
| 659 | u64 *drop_end, int drop_cache) | ||
| 581 | { | 660 | { |
| 582 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
| 583 | struct extent_buffer *leaf; | 661 | struct extent_buffer *leaf; |
| 584 | struct btrfs_file_extent_item *fi; | 662 | struct btrfs_file_extent_item *fi; |
| 585 | struct btrfs_path *path; | ||
| 586 | struct btrfs_key key; | 663 | struct btrfs_key key; |
| 587 | struct btrfs_key new_key; | 664 | struct btrfs_key new_key; |
| 588 | u64 ino = btrfs_ino(inode); | 665 | u64 ino = btrfs_ino(inode); |
| @@ -597,14 +674,12 @@ int btrfs_drop_extents(struct btrfs_trans_handle *trans, struct inode *inode, | |||
| 597 | int recow; | 674 | int recow; |
| 598 | int ret; | 675 | int ret; |
| 599 | int modify_tree = -1; | 676 | int modify_tree = -1; |
| 677 | int update_refs = (root->ref_cows || root == root->fs_info->tree_root); | ||
| 678 | int found = 0; | ||
| 600 | 679 | ||
| 601 | if (drop_cache) | 680 | if (drop_cache) |
| 602 | btrfs_drop_extent_cache(inode, start, end - 1, 0); | 681 | btrfs_drop_extent_cache(inode, start, end - 1, 0); |
| 603 | 682 | ||
| 604 | path = btrfs_alloc_path(); | ||
| 605 | if (!path) | ||
| 606 | return -ENOMEM; | ||
| 607 | |||
| 608 | if (start >= BTRFS_I(inode)->disk_i_size) | 683 | if (start >= BTRFS_I(inode)->disk_i_size) |
| 609 | modify_tree = 0; | 684 | modify_tree = 0; |
| 610 | 685 | ||
| @@ -666,6 +741,7 @@ next_slot: | |||
| 666 | goto next_slot; | 741 | goto next_slot; |
| 667 | } | 742 | } |
| 668 | 743 | ||
| 744 | found = 1; | ||
| 669 | search_start = max(key.offset, start); | 745 | search_start = max(key.offset, start); |
| 670 | if (recow || !modify_tree) { | 746 | if (recow || !modify_tree) { |
| 671 | modify_tree = -1; | 747 | modify_tree = -1; |
| @@ -707,14 +783,13 @@ next_slot: | |||
| 707 | extent_end - start); | 783 | extent_end - start); |
| 708 | btrfs_mark_buffer_dirty(leaf); | 784 | btrfs_mark_buffer_dirty(leaf); |
| 709 | 785 | ||
| 710 | if (disk_bytenr > 0) { | 786 | if (update_refs && disk_bytenr > 0) { |
| 711 | ret = btrfs_inc_extent_ref(trans, root, | 787 | ret = btrfs_inc_extent_ref(trans, root, |
| 712 | disk_bytenr, num_bytes, 0, | 788 | disk_bytenr, num_bytes, 0, |
| 713 | root->root_key.objectid, | 789 | root->root_key.objectid, |
| 714 | new_key.objectid, | 790 | new_key.objectid, |
| 715 | start - extent_offset, 0); | 791 | start - extent_offset, 0); |
| 716 | BUG_ON(ret); /* -ENOMEM */ | 792 | BUG_ON(ret); /* -ENOMEM */ |
| 717 | *hint_byte = disk_bytenr; | ||
| 718 | } | 793 | } |
| 719 | key.offset = start; | 794 | key.offset = start; |
| 720 | } | 795 | } |
| @@ -734,10 +809,8 @@ next_slot: | |||
| 734 | btrfs_set_file_extent_num_bytes(leaf, fi, | 809 | btrfs_set_file_extent_num_bytes(leaf, fi, |
| 735 | extent_end - end); | 810 | extent_end - end); |
| 736 | btrfs_mark_buffer_dirty(leaf); | 811 | btrfs_mark_buffer_dirty(leaf); |
| 737 | if (disk_bytenr > 0) { | 812 | if (update_refs && disk_bytenr > 0) |
| 738 | inode_sub_bytes(inode, end - key.offset); | 813 | inode_sub_bytes(inode, end - key.offset); |
| 739 | *hint_byte = disk_bytenr; | ||
| 740 | } | ||
| 741 | break; | 814 | break; |
| 742 | } | 815 | } |
| 743 | 816 | ||
| @@ -753,10 +826,8 @@ next_slot: | |||
| 753 | btrfs_set_file_extent_num_bytes(leaf, fi, | 826 | btrfs_set_file_extent_num_bytes(leaf, fi, |
| 754 | start - key.offset); | 827 | start - key.offset); |
| 755 | btrfs_mark_buffer_dirty(leaf); | 828 | btrfs_mark_buffer_dirty(leaf); |
| 756 | if (disk_bytenr > 0) { | 829 | if (update_refs && disk_bytenr > 0) |
| 757 | inode_sub_bytes(inode, extent_end - start); | 830 | inode_sub_bytes(inode, extent_end - start); |
| 758 | *hint_byte = disk_bytenr; | ||
| 759 | } | ||
| 760 | if (end == extent_end) | 831 | if (end == extent_end) |
| 761 | break; | 832 | break; |
| 762 | 833 | ||
| @@ -777,12 +848,13 @@ next_slot: | |||
| 777 | del_nr++; | 848 | del_nr++; |
| 778 | } | 849 | } |
| 779 | 850 | ||
| 780 | if (extent_type == BTRFS_FILE_EXTENT_INLINE) { | 851 | if (update_refs && |
| 852 | extent_type == BTRFS_FILE_EXTENT_INLINE) { | ||
| 781 | inode_sub_bytes(inode, | 853 | inode_sub_bytes(inode, |
| 782 | extent_end - key.offset); | 854 | extent_end - key.offset); |
| 783 | extent_end = ALIGN(extent_end, | 855 | extent_end = ALIGN(extent_end, |
| 784 | root->sectorsize); | 856 | root->sectorsize); |
| 785 | } else if (disk_bytenr > 0) { | 857 | } else if (update_refs && disk_bytenr > 0) { |
| 786 | ret = btrfs_free_extent(trans, root, | 858 | ret = btrfs_free_extent(trans, root, |
| 787 | disk_bytenr, num_bytes, 0, | 859 | disk_bytenr, num_bytes, 0, |
| 788 | root->root_key.objectid, | 860 | root->root_key.objectid, |
| @@ -791,7 +863,6 @@ next_slot: | |||
| 791 | BUG_ON(ret); /* -ENOMEM */ | 863 | BUG_ON(ret); /* -ENOMEM */ |
| 792 | inode_sub_bytes(inode, | 864 | inode_sub_bytes(inode, |
| 793 | extent_end - key.offset); | 865 | extent_end - key.offset); |
| 794 | *hint_byte = disk_bytenr; | ||
| 795 | } | 866 | } |
| 796 | 867 | ||
| 797 | if (end == extent_end) | 868 | if (end == extent_end) |
| @@ -806,7 +877,7 @@ next_slot: | |||
| 806 | del_nr); | 877 | del_nr); |
| 807 | if (ret) { | 878 | if (ret) { |
| 808 | btrfs_abort_transaction(trans, root, ret); | 879 | btrfs_abort_transaction(trans, root, ret); |
| 809 | goto out; | 880 | break; |
| 810 | } | 881 | } |
| 811 | 882 | ||
| 812 | del_nr = 0; | 883 | del_nr = 0; |
| @@ -825,7 +896,24 @@ next_slot: | |||
| 825 | btrfs_abort_transaction(trans, root, ret); | 896 | btrfs_abort_transaction(trans, root, ret); |
| 826 | } | 897 | } |
| 827 | 898 | ||
| 828 | out: | 899 | if (drop_end) |
| 900 | *drop_end = found ? min(end, extent_end) : end; | ||
| 901 | btrfs_release_path(path); | ||
| 902 | return ret; | ||
| 903 | } | ||
| 904 | |||
| 905 | int btrfs_drop_extents(struct btrfs_trans_handle *trans, | ||
| 906 | struct btrfs_root *root, struct inode *inode, u64 start, | ||
| 907 | u64 end, int drop_cache) | ||
| 908 | { | ||
| 909 | struct btrfs_path *path; | ||
| 910 | int ret; | ||
| 911 | |||
| 912 | path = btrfs_alloc_path(); | ||
| 913 | if (!path) | ||
| 914 | return -ENOMEM; | ||
| 915 | ret = __btrfs_drop_extents(trans, root, inode, path, start, end, NULL, | ||
| 916 | drop_cache); | ||
| 829 | btrfs_free_path(path); | 917 | btrfs_free_path(path); |
| 830 | return ret; | 918 | return ret; |
| 831 | } | 919 | } |
| @@ -892,8 +980,6 @@ int btrfs_mark_extent_written(struct btrfs_trans_handle *trans, | |||
| 892 | int ret; | 980 | int ret; |
| 893 | u64 ino = btrfs_ino(inode); | 981 | u64 ino = btrfs_ino(inode); |
| 894 | 982 | ||
| 895 | btrfs_drop_extent_cache(inode, start, end - 1, 0); | ||
| 896 | |||
| 897 | path = btrfs_alloc_path(); | 983 | path = btrfs_alloc_path(); |
| 898 | if (!path) | 984 | if (!path) |
| 899 | return -ENOMEM; | 985 | return -ENOMEM; |
| @@ -935,12 +1021,16 @@ again: | |||
| 935 | btrfs_set_item_key_safe(trans, root, path, &new_key); | 1021 | btrfs_set_item_key_safe(trans, root, path, &new_key); |
| 936 | fi = btrfs_item_ptr(leaf, path->slots[0], | 1022 | fi = btrfs_item_ptr(leaf, path->slots[0], |
| 937 | struct btrfs_file_extent_item); | 1023 | struct btrfs_file_extent_item); |
| 1024 | btrfs_set_file_extent_generation(leaf, fi, | ||
| 1025 | trans->transid); | ||
| 938 | btrfs_set_file_extent_num_bytes(leaf, fi, | 1026 | btrfs_set_file_extent_num_bytes(leaf, fi, |
| 939 | extent_end - end); | 1027 | extent_end - end); |
| 940 | btrfs_set_file_extent_offset(leaf, fi, | 1028 | btrfs_set_file_extent_offset(leaf, fi, |
| 941 | end - orig_offset); | 1029 | end - orig_offset); |
| 942 | fi = btrfs_item_ptr(leaf, path->slots[0] - 1, | 1030 | fi = btrfs_item_ptr(leaf, path->slots[0] - 1, |
| 943 | struct btrfs_file_extent_item); | 1031 | struct btrfs_file_extent_item); |
| 1032 | btrfs_set_file_extent_generation(leaf, fi, | ||
| 1033 | trans->transid); | ||
| 944 | btrfs_set_file_extent_num_bytes(leaf, fi, | 1034 | btrfs_set_file_extent_num_bytes(leaf, fi, |
| 945 | end - other_start); | 1035 | end - other_start); |
| 946 | btrfs_mark_buffer_dirty(leaf); | 1036 | btrfs_mark_buffer_dirty(leaf); |
| @@ -958,12 +1048,16 @@ again: | |||
| 958 | struct btrfs_file_extent_item); | 1048 | struct btrfs_file_extent_item); |
| 959 | btrfs_set_file_extent_num_bytes(leaf, fi, | 1049 | btrfs_set_file_extent_num_bytes(leaf, fi, |
| 960 | start - key.offset); | 1050 | start - key.offset); |
| 1051 | btrfs_set_file_extent_generation(leaf, fi, | ||
| 1052 | trans->transid); | ||
| 961 | path->slots[0]++; | 1053 | path->slots[0]++; |
| 962 | new_key.offset = start; | 1054 | new_key.offset = start; |
| 963 | btrfs_set_item_key_safe(trans, root, path, &new_key); | 1055 | btrfs_set_item_key_safe(trans, root, path, &new_key); |
| 964 | 1056 | ||
| 965 | fi = btrfs_item_ptr(leaf, path->slots[0], | 1057 | fi = btrfs_item_ptr(leaf, path->slots[0], |
| 966 | struct btrfs_file_extent_item); | 1058 | struct btrfs_file_extent_item); |
| 1059 | btrfs_set_file_extent_generation(leaf, fi, | ||
| 1060 | trans->transid); | ||
| 967 | btrfs_set_file_extent_num_bytes(leaf, fi, | 1061 | btrfs_set_file_extent_num_bytes(leaf, fi, |
| 968 | other_end - start); | 1062 | other_end - start); |
| 969 | btrfs_set_file_extent_offset(leaf, fi, | 1063 | btrfs_set_file_extent_offset(leaf, fi, |
| @@ -991,12 +1085,14 @@ again: | |||
| 991 | leaf = path->nodes[0]; | 1085 | leaf = path->nodes[0]; |
| 992 | fi = btrfs_item_ptr(leaf, path->slots[0] - 1, | 1086 | fi = btrfs_item_ptr(leaf, path->slots[0] - 1, |
| 993 | struct btrfs_file_extent_item); | 1087 | struct btrfs_file_extent_item); |
| 1088 | btrfs_set_file_extent_generation(leaf, fi, trans->transid); | ||
| 994 | btrfs_set_file_extent_num_bytes(leaf, fi, | 1089 | btrfs_set_file_extent_num_bytes(leaf, fi, |
| 995 | split - key.offset); | 1090 | split - key.offset); |
| 996 | 1091 | ||
| 997 | fi = btrfs_item_ptr(leaf, path->slots[0], | 1092 | fi = btrfs_item_ptr(leaf, path->slots[0], |
| 998 | struct btrfs_file_extent_item); | 1093 | struct btrfs_file_extent_item); |
| 999 | 1094 | ||
| 1095 | btrfs_set_file_extent_generation(leaf, fi, trans->transid); | ||
| 1000 | btrfs_set_file_extent_offset(leaf, fi, split - orig_offset); | 1096 | btrfs_set_file_extent_offset(leaf, fi, split - orig_offset); |
| 1001 | btrfs_set_file_extent_num_bytes(leaf, fi, | 1097 | btrfs_set_file_extent_num_bytes(leaf, fi, |
| 1002 | extent_end - split); | 1098 | extent_end - split); |
| @@ -1056,12 +1152,14 @@ again: | |||
| 1056 | struct btrfs_file_extent_item); | 1152 | struct btrfs_file_extent_item); |
| 1057 | btrfs_set_file_extent_type(leaf, fi, | 1153 | btrfs_set_file_extent_type(leaf, fi, |
| 1058 | BTRFS_FILE_EXTENT_REG); | 1154 | BTRFS_FILE_EXTENT_REG); |
| 1155 | btrfs_set_file_extent_generation(leaf, fi, trans->transid); | ||
| 1059 | btrfs_mark_buffer_dirty(leaf); | 1156 | btrfs_mark_buffer_dirty(leaf); |
| 1060 | } else { | 1157 | } else { |
| 1061 | fi = btrfs_item_ptr(leaf, del_slot - 1, | 1158 | fi = btrfs_item_ptr(leaf, del_slot - 1, |
| 1062 | struct btrfs_file_extent_item); | 1159 | struct btrfs_file_extent_item); |
| 1063 | btrfs_set_file_extent_type(leaf, fi, | 1160 | btrfs_set_file_extent_type(leaf, fi, |
| 1064 | BTRFS_FILE_EXTENT_REG); | 1161 | BTRFS_FILE_EXTENT_REG); |
| 1162 | btrfs_set_file_extent_generation(leaf, fi, trans->transid); | ||
| 1065 | btrfs_set_file_extent_num_bytes(leaf, fi, | 1163 | btrfs_set_file_extent_num_bytes(leaf, fi, |
| 1066 | extent_end - key.offset); | 1164 | extent_end - key.offset); |
| 1067 | btrfs_mark_buffer_dirty(leaf); | 1165 | btrfs_mark_buffer_dirty(leaf); |
| @@ -1173,8 +1271,8 @@ again: | |||
| 1173 | 1271 | ||
| 1174 | clear_extent_bit(&BTRFS_I(inode)->io_tree, start_pos, | 1272 | clear_extent_bit(&BTRFS_I(inode)->io_tree, start_pos, |
| 1175 | last_pos - 1, EXTENT_DIRTY | EXTENT_DELALLOC | | 1273 | last_pos - 1, EXTENT_DIRTY | EXTENT_DELALLOC | |
| 1176 | EXTENT_DO_ACCOUNTING, 0, 0, &cached_state, | 1274 | EXTENT_DO_ACCOUNTING | EXTENT_DEFRAG, |
| 1177 | GFP_NOFS); | 1275 | 0, 0, &cached_state, GFP_NOFS); |
| 1178 | unlock_extent_cached(&BTRFS_I(inode)->io_tree, | 1276 | unlock_extent_cached(&BTRFS_I(inode)->io_tree, |
| 1179 | start_pos, last_pos - 1, &cached_state, | 1277 | start_pos, last_pos - 1, &cached_state, |
| 1180 | GFP_NOFS); | 1278 | GFP_NOFS); |
| @@ -1314,10 +1412,9 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file, | |||
| 1314 | 1412 | ||
| 1315 | cond_resched(); | 1413 | cond_resched(); |
| 1316 | 1414 | ||
| 1317 | balance_dirty_pages_ratelimited_nr(inode->i_mapping, | 1415 | balance_dirty_pages_ratelimited(inode->i_mapping); |
| 1318 | dirty_pages); | ||
| 1319 | if (dirty_pages < (root->leafsize >> PAGE_CACHE_SHIFT) + 1) | 1416 | if (dirty_pages < (root->leafsize >> PAGE_CACHE_SHIFT) + 1) |
| 1320 | btrfs_btree_balance_dirty(root, 1); | 1417 | btrfs_btree_balance_dirty(root); |
| 1321 | 1418 | ||
| 1322 | pos += copied; | 1419 | pos += copied; |
| 1323 | num_written += copied; | 1420 | num_written += copied; |
| @@ -1366,6 +1463,24 @@ out: | |||
| 1366 | return written ? written : err; | 1463 | return written ? written : err; |
| 1367 | } | 1464 | } |
| 1368 | 1465 | ||
| 1466 | static void update_time_for_write(struct inode *inode) | ||
| 1467 | { | ||
| 1468 | struct timespec now; | ||
| 1469 | |||
| 1470 | if (IS_NOCMTIME(inode)) | ||
| 1471 | return; | ||
| 1472 | |||
| 1473 | now = current_fs_time(inode->i_sb); | ||
| 1474 | if (!timespec_equal(&inode->i_mtime, &now)) | ||
| 1475 | inode->i_mtime = now; | ||
| 1476 | |||
| 1477 | if (!timespec_equal(&inode->i_ctime, &now)) | ||
| 1478 | inode->i_ctime = now; | ||
| 1479 | |||
| 1480 | if (IS_I_VERSION(inode)) | ||
| 1481 | inode_inc_iversion(inode); | ||
| 1482 | } | ||
| 1483 | |||
| 1369 | static ssize_t btrfs_file_aio_write(struct kiocb *iocb, | 1484 | static ssize_t btrfs_file_aio_write(struct kiocb *iocb, |
| 1370 | const struct iovec *iov, | 1485 | const struct iovec *iov, |
| 1371 | unsigned long nr_segs, loff_t pos) | 1486 | unsigned long nr_segs, loff_t pos) |
| @@ -1378,6 +1493,7 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb, | |||
| 1378 | ssize_t num_written = 0; | 1493 | ssize_t num_written = 0; |
| 1379 | ssize_t err = 0; | 1494 | ssize_t err = 0; |
| 1380 | size_t count, ocount; | 1495 | size_t count, ocount; |
| 1496 | bool sync = (file->f_flags & O_DSYNC) || IS_SYNC(file->f_mapping->host); | ||
| 1381 | 1497 | ||
| 1382 | sb_start_write(inode->i_sb); | 1498 | sb_start_write(inode->i_sb); |
| 1383 | 1499 | ||
| @@ -1420,11 +1536,13 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb, | |||
| 1420 | goto out; | 1536 | goto out; |
| 1421 | } | 1537 | } |
| 1422 | 1538 | ||
| 1423 | err = file_update_time(file); | 1539 | /* |
| 1424 | if (err) { | 1540 | * We reserve space for updating the inode when we reserve space for the |
| 1425 | mutex_unlock(&inode->i_mutex); | 1541 | * extent we are going to write, so we will enospc out there. We don't |
| 1426 | goto out; | 1542 | * need to start yet another transaction to update the inode as we will |
| 1427 | } | 1543 | * update the inode when we finish writing whatever data we write. |
| 1544 | */ | ||
| 1545 | update_time_for_write(inode); | ||
| 1428 | 1546 | ||
| 1429 | start_pos = round_down(pos, root->sectorsize); | 1547 | start_pos = round_down(pos, root->sectorsize); |
| 1430 | if (start_pos > i_size_read(inode)) { | 1548 | if (start_pos > i_size_read(inode)) { |
| @@ -1435,6 +1553,9 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb, | |||
| 1435 | } | 1553 | } |
| 1436 | } | 1554 | } |
| 1437 | 1555 | ||
| 1556 | if (sync) | ||
| 1557 | atomic_inc(&BTRFS_I(inode)->sync_writers); | ||
| 1558 | |||
| 1438 | if (unlikely(file->f_flags & O_DIRECT)) { | 1559 | if (unlikely(file->f_flags & O_DIRECT)) { |
| 1439 | num_written = __btrfs_direct_write(iocb, iov, nr_segs, | 1560 | num_written = __btrfs_direct_write(iocb, iov, nr_segs, |
| 1440 | pos, ppos, count, ocount); | 1561 | pos, ppos, count, ocount); |
| @@ -1461,14 +1582,21 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb, | |||
| 1461 | * this will either be one more than the running transaction | 1582 | * this will either be one more than the running transaction |
| 1462 | * or the generation used for the next transaction if there isn't | 1583 | * or the generation used for the next transaction if there isn't |
| 1463 | * one running right now. | 1584 | * one running right now. |
| 1585 | * | ||
| 1586 | * We also have to set last_sub_trans to the current log transid, | ||
| 1587 | * otherwise subsequent syncs to a file that's been synced in this | ||
| 1588 | * transaction will appear to have already occured. | ||
| 1464 | */ | 1589 | */ |
| 1465 | BTRFS_I(inode)->last_trans = root->fs_info->generation + 1; | 1590 | BTRFS_I(inode)->last_trans = root->fs_info->generation + 1; |
| 1591 | BTRFS_I(inode)->last_sub_trans = root->log_transid; | ||
| 1466 | if (num_written > 0 || num_written == -EIOCBQUEUED) { | 1592 | if (num_written > 0 || num_written == -EIOCBQUEUED) { |
| 1467 | err = generic_write_sync(file, pos, num_written); | 1593 | err = generic_write_sync(file, pos, num_written); |
| 1468 | if (err < 0 && num_written > 0) | 1594 | if (err < 0 && num_written > 0) |
| 1469 | num_written = err; | 1595 | num_written = err; |
| 1470 | } | 1596 | } |
| 1471 | out: | 1597 | out: |
| 1598 | if (sync) | ||
| 1599 | atomic_dec(&BTRFS_I(inode)->sync_writers); | ||
| 1472 | sb_end_write(inode->i_sb); | 1600 | sb_end_write(inode->i_sb); |
| 1473 | current->backing_dev_info = NULL; | 1601 | current->backing_dev_info = NULL; |
| 1474 | return num_written ? num_written : err; | 1602 | return num_written ? num_written : err; |
| @@ -1514,16 +1642,26 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) | |||
| 1514 | 1642 | ||
| 1515 | trace_btrfs_sync_file(file, datasync); | 1643 | trace_btrfs_sync_file(file, datasync); |
| 1516 | 1644 | ||
| 1645 | /* | ||
| 1646 | * We write the dirty pages in the range and wait until they complete | ||
| 1647 | * out of the ->i_mutex. If so, we can flush the dirty pages by | ||
| 1648 | * multi-task, and make the performance up. | ||
| 1649 | */ | ||
| 1650 | atomic_inc(&BTRFS_I(inode)->sync_writers); | ||
| 1651 | ret = filemap_write_and_wait_range(inode->i_mapping, start, end); | ||
| 1652 | atomic_dec(&BTRFS_I(inode)->sync_writers); | ||
| 1653 | if (ret) | ||
| 1654 | return ret; | ||
| 1655 | |||
| 1517 | mutex_lock(&inode->i_mutex); | 1656 | mutex_lock(&inode->i_mutex); |
| 1518 | 1657 | ||
| 1519 | /* | 1658 | /* |
| 1520 | * we wait first, since the writeback may change the inode, also wait | 1659 | * We flush the dirty pages again to avoid some dirty pages in the |
| 1521 | * ordered range does a filemape_write_and_wait_range which is why we | 1660 | * range being left. |
| 1522 | * don't do it above like other file systems. | ||
| 1523 | */ | 1661 | */ |
| 1524 | root->log_batch++; | 1662 | atomic_inc(&root->log_batch); |
| 1525 | btrfs_wait_ordered_range(inode, start, end); | 1663 | btrfs_wait_ordered_range(inode, start, end - start + 1); |
| 1526 | root->log_batch++; | 1664 | atomic_inc(&root->log_batch); |
| 1527 | 1665 | ||
| 1528 | /* | 1666 | /* |
| 1529 | * check the transaction that last modified this inode | 1667 | * check the transaction that last modified this inode |
| @@ -1544,6 +1682,14 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) | |||
| 1544 | BTRFS_I(inode)->last_trans <= | 1682 | BTRFS_I(inode)->last_trans <= |
| 1545 | root->fs_info->last_trans_committed) { | 1683 | root->fs_info->last_trans_committed) { |
| 1546 | BTRFS_I(inode)->last_trans = 0; | 1684 | BTRFS_I(inode)->last_trans = 0; |
| 1685 | |||
| 1686 | /* | ||
| 1687 | * We'v had everything committed since the last time we were | ||
| 1688 | * modified so clear this flag in case it was set for whatever | ||
| 1689 | * reason, it's no longer relevant. | ||
| 1690 | */ | ||
| 1691 | clear_bit(BTRFS_INODE_NEEDS_FULL_SYNC, | ||
| 1692 | &BTRFS_I(inode)->runtime_flags); | ||
| 1547 | mutex_unlock(&inode->i_mutex); | 1693 | mutex_unlock(&inode->i_mutex); |
| 1548 | goto out; | 1694 | goto out; |
| 1549 | } | 1695 | } |
| @@ -1615,6 +1761,329 @@ static int btrfs_file_mmap(struct file *filp, struct vm_area_struct *vma) | |||
| 1615 | return 0; | 1761 | return 0; |
| 1616 | } | 1762 | } |
| 1617 | 1763 | ||
| 1764 | static int hole_mergeable(struct inode *inode, struct extent_buffer *leaf, | ||
| 1765 | int slot, u64 start, u64 end) | ||
| 1766 | { | ||
| 1767 | struct btrfs_file_extent_item *fi; | ||
| 1768 | struct btrfs_key key; | ||
| 1769 | |||
| 1770 | if (slot < 0 || slot >= btrfs_header_nritems(leaf)) | ||
| 1771 | return 0; | ||
| 1772 | |||
| 1773 | btrfs_item_key_to_cpu(leaf, &key, slot); | ||
| 1774 | if (key.objectid != btrfs_ino(inode) || | ||
| 1775 | key.type != BTRFS_EXTENT_DATA_KEY) | ||
| 1776 | return 0; | ||
| 1777 | |||
| 1778 | fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item); | ||
| 1779 | |||
| 1780 | if (btrfs_file_extent_type(leaf, fi) != BTRFS_FILE_EXTENT_REG) | ||
| 1781 | return 0; | ||
| 1782 | |||
| 1783 | if (btrfs_file_extent_disk_bytenr(leaf, fi)) | ||
| 1784 | return 0; | ||
| 1785 | |||
| 1786 | if (key.offset == end) | ||
| 1787 | return 1; | ||
| 1788 | if (key.offset + btrfs_file_extent_num_bytes(leaf, fi) == start) | ||
| 1789 | return 1; | ||
| 1790 | return 0; | ||
| 1791 | } | ||
| 1792 | |||
| 1793 | static int fill_holes(struct btrfs_trans_handle *trans, struct inode *inode, | ||
| 1794 | struct btrfs_path *path, u64 offset, u64 end) | ||
| 1795 | { | ||
| 1796 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
| 1797 | struct extent_buffer *leaf; | ||
| 1798 | struct btrfs_file_extent_item *fi; | ||
| 1799 | struct extent_map *hole_em; | ||
| 1800 | struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; | ||
| 1801 | struct btrfs_key key; | ||
| 1802 | int ret; | ||
| 1803 | |||
| 1804 | key.objectid = btrfs_ino(inode); | ||
| 1805 | key.type = BTRFS_EXTENT_DATA_KEY; | ||
| 1806 | key.offset = offset; | ||
| 1807 | |||
| 1808 | |||
| 1809 | ret = btrfs_search_slot(trans, root, &key, path, 0, 1); | ||
| 1810 | if (ret < 0) | ||
| 1811 | return ret; | ||
| 1812 | BUG_ON(!ret); | ||
| 1813 | |||
| 1814 | leaf = path->nodes[0]; | ||
| 1815 | if (hole_mergeable(inode, leaf, path->slots[0]-1, offset, end)) { | ||
| 1816 | u64 num_bytes; | ||
| 1817 | |||
| 1818 | path->slots[0]--; | ||
| 1819 | fi = btrfs_item_ptr(leaf, path->slots[0], | ||
| 1820 | struct btrfs_file_extent_item); | ||
| 1821 | num_bytes = btrfs_file_extent_num_bytes(leaf, fi) + | ||
| 1822 | end - offset; | ||
| 1823 | btrfs_set_file_extent_num_bytes(leaf, fi, num_bytes); | ||
| 1824 | btrfs_set_file_extent_ram_bytes(leaf, fi, num_bytes); | ||
| 1825 | btrfs_set_file_extent_offset(leaf, fi, 0); | ||
| 1826 | btrfs_mark_buffer_dirty(leaf); | ||
| 1827 | goto out; | ||
| 1828 | } | ||
| 1829 | |||
| 1830 | if (hole_mergeable(inode, leaf, path->slots[0]+1, offset, end)) { | ||
| 1831 | u64 num_bytes; | ||
| 1832 | |||
| 1833 | path->slots[0]++; | ||
| 1834 | key.offset = offset; | ||
| 1835 | btrfs_set_item_key_safe(trans, root, path, &key); | ||
| 1836 | fi = btrfs_item_ptr(leaf, path->slots[0], | ||
| 1837 | struct btrfs_file_extent_item); | ||
| 1838 | num_bytes = btrfs_file_extent_num_bytes(leaf, fi) + end - | ||
| 1839 | offset; | ||
| 1840 | btrfs_set_file_extent_num_bytes(leaf, fi, num_bytes); | ||
| 1841 | btrfs_set_file_extent_ram_bytes(leaf, fi, num_bytes); | ||
| 1842 | btrfs_set_file_extent_offset(leaf, fi, 0); | ||
| 1843 | btrfs_mark_buffer_dirty(leaf); | ||
| 1844 | goto out; | ||
| 1845 | } | ||
| 1846 | btrfs_release_path(path); | ||
| 1847 | |||
| 1848 | ret = btrfs_insert_file_extent(trans, root, btrfs_ino(inode), offset, | ||
| 1849 | 0, 0, end - offset, 0, end - offset, | ||
| 1850 | 0, 0, 0); | ||
| 1851 | if (ret) | ||
| 1852 | return ret; | ||
| 1853 | |||
| 1854 | out: | ||
| 1855 | btrfs_release_path(path); | ||
| 1856 | |||
| 1857 | hole_em = alloc_extent_map(); | ||
| 1858 | if (!hole_em) { | ||
| 1859 | btrfs_drop_extent_cache(inode, offset, end - 1, 0); | ||
| 1860 | set_bit(BTRFS_INODE_NEEDS_FULL_SYNC, | ||
| 1861 | &BTRFS_I(inode)->runtime_flags); | ||
| 1862 | } else { | ||
| 1863 | hole_em->start = offset; | ||
| 1864 | hole_em->len = end - offset; | ||
| 1865 | hole_em->orig_start = offset; | ||
| 1866 | |||
| 1867 | hole_em->block_start = EXTENT_MAP_HOLE; | ||
| 1868 | hole_em->block_len = 0; | ||
| 1869 | hole_em->orig_block_len = 0; | ||
| 1870 | hole_em->bdev = root->fs_info->fs_devices->latest_bdev; | ||
| 1871 | hole_em->compress_type = BTRFS_COMPRESS_NONE; | ||
| 1872 | hole_em->generation = trans->transid; | ||
| 1873 | |||
| 1874 | do { | ||
| 1875 | btrfs_drop_extent_cache(inode, offset, end - 1, 0); | ||
| 1876 | write_lock(&em_tree->lock); | ||
| 1877 | ret = add_extent_mapping(em_tree, hole_em); | ||
| 1878 | if (!ret) | ||
| 1879 | list_move(&hole_em->list, | ||
| 1880 | &em_tree->modified_extents); | ||
| 1881 | write_unlock(&em_tree->lock); | ||
| 1882 | } while (ret == -EEXIST); | ||
| 1883 | free_extent_map(hole_em); | ||
| 1884 | if (ret) | ||
| 1885 | set_bit(BTRFS_INODE_NEEDS_FULL_SYNC, | ||
| 1886 | &BTRFS_I(inode)->runtime_flags); | ||
| 1887 | } | ||
| 1888 | |||
| 1889 | return 0; | ||
| 1890 | } | ||
| 1891 | |||
| 1892 | static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len) | ||
| 1893 | { | ||
| 1894 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
| 1895 | struct extent_state *cached_state = NULL; | ||
| 1896 | struct btrfs_path *path; | ||
| 1897 | struct btrfs_block_rsv *rsv; | ||
| 1898 | struct btrfs_trans_handle *trans; | ||
| 1899 | u64 lockstart = round_up(offset, BTRFS_I(inode)->root->sectorsize); | ||
| 1900 | u64 lockend = round_down(offset + len, | ||
| 1901 | BTRFS_I(inode)->root->sectorsize) - 1; | ||
| 1902 | u64 cur_offset = lockstart; | ||
| 1903 | u64 min_size = btrfs_calc_trunc_metadata_size(root, 1); | ||
| 1904 | u64 drop_end; | ||
| 1905 | int ret = 0; | ||
| 1906 | int err = 0; | ||
| 1907 | bool same_page = ((offset >> PAGE_CACHE_SHIFT) == | ||
| 1908 | ((offset + len - 1) >> PAGE_CACHE_SHIFT)); | ||
| 1909 | |||
| 1910 | btrfs_wait_ordered_range(inode, offset, len); | ||
| 1911 | |||
| 1912 | mutex_lock(&inode->i_mutex); | ||
| 1913 | /* | ||
| 1914 | * We needn't truncate any page which is beyond the end of the file | ||
| 1915 | * because we are sure there is no data there. | ||
| 1916 | */ | ||
| 1917 | /* | ||
| 1918 | * Only do this if we are in the same page and we aren't doing the | ||
| 1919 | * entire page. | ||
| 1920 | */ | ||
| 1921 | if (same_page && len < PAGE_CACHE_SIZE) { | ||
| 1922 | if (offset < round_up(inode->i_size, PAGE_CACHE_SIZE)) | ||
| 1923 | ret = btrfs_truncate_page(inode, offset, len, 0); | ||
| 1924 | mutex_unlock(&inode->i_mutex); | ||
| 1925 | return ret; | ||
| 1926 | } | ||
| 1927 | |||
| 1928 | /* zero back part of the first page */ | ||
| 1929 | if (offset < round_up(inode->i_size, PAGE_CACHE_SIZE)) { | ||
| 1930 | ret = btrfs_truncate_page(inode, offset, 0, 0); | ||
| 1931 | if (ret) { | ||
| 1932 | mutex_unlock(&inode->i_mutex); | ||
| 1933 | return ret; | ||
| 1934 | } | ||
| 1935 | } | ||
| 1936 | |||
| 1937 | /* zero the front end of the last page */ | ||
| 1938 | if (offset + len < round_up(inode->i_size, PAGE_CACHE_SIZE)) { | ||
| 1939 | ret = btrfs_truncate_page(inode, offset + len, 0, 1); | ||
| 1940 | if (ret) { | ||
| 1941 | mutex_unlock(&inode->i_mutex); | ||
| 1942 | return ret; | ||
| 1943 | } | ||
| 1944 | } | ||
| 1945 | |||
| 1946 | if (lockend < lockstart) { | ||
| 1947 | mutex_unlock(&inode->i_mutex); | ||
| 1948 | return 0; | ||
| 1949 | } | ||
| 1950 | |||
| 1951 | while (1) { | ||
| 1952 | struct btrfs_ordered_extent *ordered; | ||
| 1953 | |||
| 1954 | truncate_pagecache_range(inode, lockstart, lockend); | ||
| 1955 | |||
| 1956 | lock_extent_bits(&BTRFS_I(inode)->io_tree, lockstart, lockend, | ||
| 1957 | 0, &cached_state); | ||
| 1958 | ordered = btrfs_lookup_first_ordered_extent(inode, lockend); | ||
| 1959 | |||
| 1960 | /* | ||
| 1961 | * We need to make sure we have no ordered extents in this range | ||
| 1962 | * and nobody raced in and read a page in this range, if we did | ||
| 1963 | * we need to try again. | ||
| 1964 | */ | ||
| 1965 | if ((!ordered || | ||
| 1966 | (ordered->file_offset + ordered->len < lockstart || | ||
| 1967 | ordered->file_offset > lockend)) && | ||
| 1968 | !test_range_bit(&BTRFS_I(inode)->io_tree, lockstart, | ||
| 1969 | lockend, EXTENT_UPTODATE, 0, | ||
| 1970 | cached_state)) { | ||
| 1971 | if (ordered) | ||
| 1972 | btrfs_put_ordered_extent(ordered); | ||
| 1973 | break; | ||
| 1974 | } | ||
| 1975 | if (ordered) | ||
| 1976 | btrfs_put_ordered_extent(ordered); | ||
| 1977 | unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart, | ||
| 1978 | lockend, &cached_state, GFP_NOFS); | ||
| 1979 | btrfs_wait_ordered_range(inode, lockstart, | ||
| 1980 | lockend - lockstart + 1); | ||
| 1981 | } | ||
| 1982 | |||
| 1983 | path = btrfs_alloc_path(); | ||
| 1984 | if (!path) { | ||
| 1985 | ret = -ENOMEM; | ||
| 1986 | goto out; | ||
| 1987 | } | ||
| 1988 | |||
| 1989 | rsv = btrfs_alloc_block_rsv(root, BTRFS_BLOCK_RSV_TEMP); | ||
| 1990 | if (!rsv) { | ||
| 1991 | ret = -ENOMEM; | ||
| 1992 | goto out_free; | ||
| 1993 | } | ||
| 1994 | rsv->size = btrfs_calc_trunc_metadata_size(root, 1); | ||
| 1995 | rsv->failfast = 1; | ||
| 1996 | |||
| 1997 | /* | ||
| 1998 | * 1 - update the inode | ||
| 1999 | * 1 - removing the extents in the range | ||
| 2000 | * 1 - adding the hole extent | ||
| 2001 | */ | ||
| 2002 | trans = btrfs_start_transaction(root, 3); | ||
| 2003 | if (IS_ERR(trans)) { | ||
| 2004 | err = PTR_ERR(trans); | ||
| 2005 | goto out_free; | ||
| 2006 | } | ||
| 2007 | |||
| 2008 | ret = btrfs_block_rsv_migrate(&root->fs_info->trans_block_rsv, rsv, | ||
| 2009 | min_size); | ||
| 2010 | BUG_ON(ret); | ||
| 2011 | trans->block_rsv = rsv; | ||
| 2012 | |||
| 2013 | while (cur_offset < lockend) { | ||
| 2014 | ret = __btrfs_drop_extents(trans, root, inode, path, | ||
| 2015 | cur_offset, lockend + 1, | ||
| 2016 | &drop_end, 1); | ||
| 2017 | if (ret != -ENOSPC) | ||
| 2018 | break; | ||
| 2019 | |||
| 2020 | trans->block_rsv = &root->fs_info->trans_block_rsv; | ||
| 2021 | |||
| 2022 | ret = fill_holes(trans, inode, path, cur_offset, drop_end); | ||
| 2023 | if (ret) { | ||
| 2024 | err = ret; | ||
| 2025 | break; | ||
| 2026 | } | ||
| 2027 | |||
| 2028 | cur_offset = drop_end; | ||
| 2029 | |||
| 2030 | ret = btrfs_update_inode(trans, root, inode); | ||
| 2031 | if (ret) { | ||
| 2032 | err = ret; | ||
| 2033 | break; | ||
| 2034 | } | ||
| 2035 | |||
| 2036 | btrfs_end_transaction(trans, root); | ||
| 2037 | btrfs_btree_balance_dirty(root); | ||
| 2038 | |||
| 2039 | trans = btrfs_start_transaction(root, 3); | ||
| 2040 | if (IS_ERR(trans)) { | ||
| 2041 | ret = PTR_ERR(trans); | ||
| 2042 | trans = NULL; | ||
| 2043 | break; | ||
| 2044 | } | ||
| 2045 | |||
| 2046 | ret = btrfs_block_rsv_migrate(&root->fs_info->trans_block_rsv, | ||
| 2047 | rsv, min_size); | ||
| 2048 | BUG_ON(ret); /* shouldn't happen */ | ||
| 2049 | trans->block_rsv = rsv; | ||
| 2050 | } | ||
| 2051 | |||
| 2052 | if (ret) { | ||
| 2053 | err = ret; | ||
| 2054 | goto out_trans; | ||
| 2055 | } | ||
| 2056 | |||
| 2057 | trans->block_rsv = &root->fs_info->trans_block_rsv; | ||
| 2058 | ret = fill_holes(trans, inode, path, cur_offset, drop_end); | ||
| 2059 | if (ret) { | ||
| 2060 | err = ret; | ||
| 2061 | goto out_trans; | ||
| 2062 | } | ||
| 2063 | |||
| 2064 | out_trans: | ||
| 2065 | if (!trans) | ||
| 2066 | goto out_free; | ||
| 2067 | |||
| 2068 | inode_inc_iversion(inode); | ||
| 2069 | inode->i_mtime = inode->i_ctime = CURRENT_TIME; | ||
| 2070 | |||
| 2071 | trans->block_rsv = &root->fs_info->trans_block_rsv; | ||
| 2072 | ret = btrfs_update_inode(trans, root, inode); | ||
| 2073 | btrfs_end_transaction(trans, root); | ||
| 2074 | btrfs_btree_balance_dirty(root); | ||
| 2075 | out_free: | ||
| 2076 | btrfs_free_path(path); | ||
| 2077 | btrfs_free_block_rsv(root, rsv); | ||
| 2078 | out: | ||
| 2079 | unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart, lockend, | ||
| 2080 | &cached_state, GFP_NOFS); | ||
| 2081 | mutex_unlock(&inode->i_mutex); | ||
| 2082 | if (ret && !err) | ||
| 2083 | err = ret; | ||
| 2084 | return err; | ||
| 2085 | } | ||
| 2086 | |||
| 1618 | static long btrfs_fallocate(struct file *file, int mode, | 2087 | static long btrfs_fallocate(struct file *file, int mode, |
| 1619 | loff_t offset, loff_t len) | 2088 | loff_t offset, loff_t len) |
| 1620 | { | 2089 | { |
| @@ -1626,22 +2095,25 @@ static long btrfs_fallocate(struct file *file, int mode, | |||
| 1626 | u64 alloc_end; | 2095 | u64 alloc_end; |
| 1627 | u64 alloc_hint = 0; | 2096 | u64 alloc_hint = 0; |
| 1628 | u64 locked_end; | 2097 | u64 locked_end; |
| 1629 | u64 mask = BTRFS_I(inode)->root->sectorsize - 1; | ||
| 1630 | struct extent_map *em; | 2098 | struct extent_map *em; |
| 2099 | int blocksize = BTRFS_I(inode)->root->sectorsize; | ||
| 1631 | int ret; | 2100 | int ret; |
| 1632 | 2101 | ||
| 1633 | alloc_start = offset & ~mask; | 2102 | alloc_start = round_down(offset, blocksize); |
| 1634 | alloc_end = (offset + len + mask) & ~mask; | 2103 | alloc_end = round_up(offset + len, blocksize); |
| 1635 | 2104 | ||
| 1636 | /* We only support the FALLOC_FL_KEEP_SIZE mode */ | 2105 | /* Make sure we aren't being give some crap mode */ |
| 1637 | if (mode & ~FALLOC_FL_KEEP_SIZE) | 2106 | if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE)) |
| 1638 | return -EOPNOTSUPP; | 2107 | return -EOPNOTSUPP; |
| 1639 | 2108 | ||
| 2109 | if (mode & FALLOC_FL_PUNCH_HOLE) | ||
| 2110 | return btrfs_punch_hole(inode, offset, len); | ||
| 2111 | |||
| 1640 | /* | 2112 | /* |
| 1641 | * Make sure we have enough space before we do the | 2113 | * Make sure we have enough space before we do the |
| 1642 | * allocation. | 2114 | * allocation. |
| 1643 | */ | 2115 | */ |
| 1644 | ret = btrfs_check_data_free_space(inode, len); | 2116 | ret = btrfs_check_data_free_space(inode, alloc_end - alloc_start); |
| 1645 | if (ret) | 2117 | if (ret) |
| 1646 | return ret; | 2118 | return ret; |
| 1647 | 2119 | ||
| @@ -1709,7 +2181,7 @@ static long btrfs_fallocate(struct file *file, int mode, | |||
| 1709 | } | 2181 | } |
| 1710 | last_byte = min(extent_map_end(em), alloc_end); | 2182 | last_byte = min(extent_map_end(em), alloc_end); |
| 1711 | actual_end = min_t(u64, extent_map_end(em), offset + len); | 2183 | actual_end = min_t(u64, extent_map_end(em), offset + len); |
| 1712 | last_byte = (last_byte + mask) & ~mask; | 2184 | last_byte = ALIGN(last_byte, blocksize); |
| 1713 | 2185 | ||
| 1714 | if (em->block_start == EXTENT_MAP_HOLE || | 2186 | if (em->block_start == EXTENT_MAP_HOLE || |
| 1715 | (cur_offset >= inode->i_size && | 2187 | (cur_offset >= inode->i_size && |
| @@ -1748,11 +2220,11 @@ static long btrfs_fallocate(struct file *file, int mode, | |||
| 1748 | out: | 2220 | out: |
| 1749 | mutex_unlock(&inode->i_mutex); | 2221 | mutex_unlock(&inode->i_mutex); |
| 1750 | /* Let go of our reservation. */ | 2222 | /* Let go of our reservation. */ |
| 1751 | btrfs_free_reserved_data_space(inode, len); | 2223 | btrfs_free_reserved_data_space(inode, alloc_end - alloc_start); |
| 1752 | return ret; | 2224 | return ret; |
| 1753 | } | 2225 | } |
| 1754 | 2226 | ||
| 1755 | static int find_desired_extent(struct inode *inode, loff_t *offset, int origin) | 2227 | static int find_desired_extent(struct inode *inode, loff_t *offset, int whence) |
| 1756 | { | 2228 | { |
| 1757 | struct btrfs_root *root = BTRFS_I(inode)->root; | 2229 | struct btrfs_root *root = BTRFS_I(inode)->root; |
| 1758 | struct extent_map *em; | 2230 | struct extent_map *em; |
| @@ -1786,7 +2258,7 @@ static int find_desired_extent(struct inode *inode, loff_t *offset, int origin) | |||
| 1786 | * before the position we want in case there is outstanding delalloc | 2258 | * before the position we want in case there is outstanding delalloc |
| 1787 | * going on here. | 2259 | * going on here. |
| 1788 | */ | 2260 | */ |
| 1789 | if (origin == SEEK_HOLE && start != 0) { | 2261 | if (whence == SEEK_HOLE && start != 0) { |
| 1790 | if (start <= root->sectorsize) | 2262 | if (start <= root->sectorsize) |
| 1791 | em = btrfs_get_extent_fiemap(inode, NULL, 0, 0, | 2263 | em = btrfs_get_extent_fiemap(inode, NULL, 0, 0, |
| 1792 | root->sectorsize, 0); | 2264 | root->sectorsize, 0); |
| @@ -1820,13 +2292,13 @@ static int find_desired_extent(struct inode *inode, loff_t *offset, int origin) | |||
| 1820 | } | 2292 | } |
| 1821 | } | 2293 | } |
| 1822 | 2294 | ||
| 1823 | if (origin == SEEK_HOLE) { | 2295 | if (whence == SEEK_HOLE) { |
| 1824 | *offset = start; | 2296 | *offset = start; |
| 1825 | free_extent_map(em); | 2297 | free_extent_map(em); |
| 1826 | break; | 2298 | break; |
| 1827 | } | 2299 | } |
| 1828 | } else { | 2300 | } else { |
| 1829 | if (origin == SEEK_DATA) { | 2301 | if (whence == SEEK_DATA) { |
| 1830 | if (em->block_start == EXTENT_MAP_DELALLOC) { | 2302 | if (em->block_start == EXTENT_MAP_DELALLOC) { |
| 1831 | if (start >= inode->i_size) { | 2303 | if (start >= inode->i_size) { |
| 1832 | free_extent_map(em); | 2304 | free_extent_map(em); |
| @@ -1863,16 +2335,16 @@ out: | |||
| 1863 | return ret; | 2335 | return ret; |
| 1864 | } | 2336 | } |
| 1865 | 2337 | ||
| 1866 | static loff_t btrfs_file_llseek(struct file *file, loff_t offset, int origin) | 2338 | static loff_t btrfs_file_llseek(struct file *file, loff_t offset, int whence) |
| 1867 | { | 2339 | { |
| 1868 | struct inode *inode = file->f_mapping->host; | 2340 | struct inode *inode = file->f_mapping->host; |
| 1869 | int ret; | 2341 | int ret; |
| 1870 | 2342 | ||
| 1871 | mutex_lock(&inode->i_mutex); | 2343 | mutex_lock(&inode->i_mutex); |
| 1872 | switch (origin) { | 2344 | switch (whence) { |
| 1873 | case SEEK_END: | 2345 | case SEEK_END: |
| 1874 | case SEEK_CUR: | 2346 | case SEEK_CUR: |
| 1875 | offset = generic_file_llseek(file, offset, origin); | 2347 | offset = generic_file_llseek(file, offset, whence); |
| 1876 | goto out; | 2348 | goto out; |
| 1877 | case SEEK_DATA: | 2349 | case SEEK_DATA: |
| 1878 | case SEEK_HOLE: | 2350 | case SEEK_HOLE: |
| @@ -1881,7 +2353,7 @@ static loff_t btrfs_file_llseek(struct file *file, loff_t offset, int origin) | |||
| 1881 | return -ENXIO; | 2353 | return -ENXIO; |
| 1882 | } | 2354 | } |
| 1883 | 2355 | ||
| 1884 | ret = find_desired_extent(inode, &offset, origin); | 2356 | ret = find_desired_extent(inode, &offset, whence); |
| 1885 | if (ret) { | 2357 | if (ret) { |
| 1886 | mutex_unlock(&inode->i_mutex); | 2358 | mutex_unlock(&inode->i_mutex); |
| 1887 | return ret; | 2359 | return ret; |
| @@ -1924,3 +2396,21 @@ const struct file_operations btrfs_file_operations = { | |||
| 1924 | .compat_ioctl = btrfs_ioctl, | 2396 | .compat_ioctl = btrfs_ioctl, |
| 1925 | #endif | 2397 | #endif |
| 1926 | }; | 2398 | }; |
| 2399 | |||
| 2400 | void btrfs_auto_defrag_exit(void) | ||
| 2401 | { | ||
| 2402 | if (btrfs_inode_defrag_cachep) | ||
| 2403 | kmem_cache_destroy(btrfs_inode_defrag_cachep); | ||
| 2404 | } | ||
| 2405 | |||
| 2406 | int btrfs_auto_defrag_init(void) | ||
| 2407 | { | ||
| 2408 | btrfs_inode_defrag_cachep = kmem_cache_create("btrfs_inode_defrag", | ||
| 2409 | sizeof(struct inode_defrag), 0, | ||
| 2410 | SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, | ||
| 2411 | NULL); | ||
| 2412 | if (!btrfs_inode_defrag_cachep) | ||
| 2413 | return -ENOMEM; | ||
| 2414 | |||
| 2415 | return 0; | ||
| 2416 | } | ||
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c index 6b10acfc2f5c..59ea2e4349c9 100644 --- a/fs/btrfs/free-space-cache.c +++ b/fs/btrfs/free-space-cache.c | |||
| @@ -307,7 +307,6 @@ static void io_ctl_unmap_page(struct io_ctl *io_ctl) | |||
| 307 | 307 | ||
| 308 | static void io_ctl_map_page(struct io_ctl *io_ctl, int clear) | 308 | static void io_ctl_map_page(struct io_ctl *io_ctl, int clear) |
| 309 | { | 309 | { |
| 310 | WARN_ON(io_ctl->cur); | ||
| 311 | BUG_ON(io_ctl->index >= io_ctl->num_pages); | 310 | BUG_ON(io_ctl->index >= io_ctl->num_pages); |
| 312 | io_ctl->page = io_ctl->pages[io_ctl->index++]; | 311 | io_ctl->page = io_ctl->pages[io_ctl->index++]; |
| 313 | io_ctl->cur = kmap(io_ctl->page); | 312 | io_ctl->cur = kmap(io_ctl->page); |
| @@ -966,7 +965,7 @@ int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode, | |||
| 966 | block_group->key.offset)) { | 965 | block_group->key.offset)) { |
| 967 | ret = find_first_extent_bit(unpin, start, | 966 | ret = find_first_extent_bit(unpin, start, |
| 968 | &extent_start, &extent_end, | 967 | &extent_start, &extent_end, |
| 969 | EXTENT_DIRTY); | 968 | EXTENT_DIRTY, NULL); |
| 970 | if (ret) { | 969 | if (ret) { |
| 971 | ret = 0; | 970 | ret = 0; |
| 972 | break; | 971 | break; |
| @@ -1250,18 +1249,13 @@ tree_search_offset(struct btrfs_free_space_ctl *ctl, | |||
| 1250 | * if previous extent entry covers the offset, | 1249 | * if previous extent entry covers the offset, |
| 1251 | * we should return it instead of the bitmap entry | 1250 | * we should return it instead of the bitmap entry |
| 1252 | */ | 1251 | */ |
| 1253 | n = &entry->offset_index; | 1252 | n = rb_prev(&entry->offset_index); |
| 1254 | while (1) { | 1253 | if (n) { |
| 1255 | n = rb_prev(n); | ||
| 1256 | if (!n) | ||
| 1257 | break; | ||
| 1258 | prev = rb_entry(n, struct btrfs_free_space, | 1254 | prev = rb_entry(n, struct btrfs_free_space, |
| 1259 | offset_index); | 1255 | offset_index); |
| 1260 | if (!prev->bitmap) { | 1256 | if (!prev->bitmap && |
| 1261 | if (prev->offset + prev->bytes > offset) | 1257 | prev->offset + prev->bytes > offset) |
| 1262 | entry = prev; | 1258 | entry = prev; |
| 1263 | break; | ||
| 1264 | } | ||
| 1265 | } | 1259 | } |
| 1266 | } | 1260 | } |
| 1267 | return entry; | 1261 | return entry; |
| @@ -1287,18 +1281,13 @@ tree_search_offset(struct btrfs_free_space_ctl *ctl, | |||
| 1287 | } | 1281 | } |
| 1288 | 1282 | ||
| 1289 | if (entry->bitmap) { | 1283 | if (entry->bitmap) { |
| 1290 | n = &entry->offset_index; | 1284 | n = rb_prev(&entry->offset_index); |
| 1291 | while (1) { | 1285 | if (n) { |
| 1292 | n = rb_prev(n); | ||
| 1293 | if (!n) | ||
| 1294 | break; | ||
| 1295 | prev = rb_entry(n, struct btrfs_free_space, | 1286 | prev = rb_entry(n, struct btrfs_free_space, |
| 1296 | offset_index); | 1287 | offset_index); |
| 1297 | if (!prev->bitmap) { | 1288 | if (!prev->bitmap && |
| 1298 | if (prev->offset + prev->bytes > offset) | 1289 | prev->offset + prev->bytes > offset) |
| 1299 | return prev; | 1290 | return prev; |
| 1300 | break; | ||
| 1301 | } | ||
| 1302 | } | 1291 | } |
| 1303 | if (entry->offset + BITS_PER_BITMAP * ctl->unit > offset) | 1292 | if (entry->offset + BITS_PER_BITMAP * ctl->unit > offset) |
| 1304 | return entry; | 1293 | return entry; |
| @@ -1364,7 +1353,7 @@ static void recalculate_thresholds(struct btrfs_free_space_ctl *ctl) | |||
| 1364 | u64 bitmap_bytes; | 1353 | u64 bitmap_bytes; |
| 1365 | u64 extent_bytes; | 1354 | u64 extent_bytes; |
| 1366 | u64 size = block_group->key.offset; | 1355 | u64 size = block_group->key.offset; |
| 1367 | u64 bytes_per_bg = BITS_PER_BITMAP * block_group->sectorsize; | 1356 | u64 bytes_per_bg = BITS_PER_BITMAP * ctl->unit; |
| 1368 | int max_bitmaps = div64_u64(size + bytes_per_bg - 1, bytes_per_bg); | 1357 | int max_bitmaps = div64_u64(size + bytes_per_bg - 1, bytes_per_bg); |
| 1369 | 1358 | ||
| 1370 | BUG_ON(ctl->total_bitmaps > max_bitmaps); | 1359 | BUG_ON(ctl->total_bitmaps > max_bitmaps); |
| @@ -1454,9 +1443,7 @@ static int search_bitmap(struct btrfs_free_space_ctl *ctl, | |||
| 1454 | max_t(u64, *offset, bitmap_info->offset)); | 1443 | max_t(u64, *offset, bitmap_info->offset)); |
| 1455 | bits = bytes_to_bits(*bytes, ctl->unit); | 1444 | bits = bytes_to_bits(*bytes, ctl->unit); |
| 1456 | 1445 | ||
| 1457 | for (i = find_next_bit(bitmap_info->bitmap, BITS_PER_BITMAP, i); | 1446 | for_each_set_bit_from(i, bitmap_info->bitmap, BITS_PER_BITMAP) { |
| 1458 | i < BITS_PER_BITMAP; | ||
| 1459 | i = find_next_bit(bitmap_info->bitmap, BITS_PER_BITMAP, i + 1)) { | ||
| 1460 | next_zero = find_next_zero_bit(bitmap_info->bitmap, | 1447 | next_zero = find_next_zero_bit(bitmap_info->bitmap, |
| 1461 | BITS_PER_BITMAP, i); | 1448 | BITS_PER_BITMAP, i); |
| 1462 | if ((next_zero - i) >= bits) { | 1449 | if ((next_zero - i) >= bits) { |
| @@ -1652,8 +1639,7 @@ static bool use_bitmap(struct btrfs_free_space_ctl *ctl, | |||
| 1652 | * some block groups are so tiny they can't be enveloped by a bitmap, so | 1639 | * some block groups are so tiny they can't be enveloped by a bitmap, so |
| 1653 | * don't even bother to create a bitmap for this | 1640 | * don't even bother to create a bitmap for this |
| 1654 | */ | 1641 | */ |
| 1655 | if (BITS_PER_BITMAP * block_group->sectorsize > | 1642 | if (BITS_PER_BITMAP * ctl->unit > block_group->key.offset) |
| 1656 | block_group->key.offset) | ||
| 1657 | return false; | 1643 | return false; |
| 1658 | 1644 | ||
| 1659 | return true; | 1645 | return true; |
| @@ -2300,16 +2286,14 @@ static int btrfs_bitmap_cluster(struct btrfs_block_group_cache *block_group, | |||
| 2300 | unsigned long total_found = 0; | 2286 | unsigned long total_found = 0; |
| 2301 | int ret; | 2287 | int ret; |
| 2302 | 2288 | ||
| 2303 | i = offset_to_bit(entry->offset, block_group->sectorsize, | 2289 | i = offset_to_bit(entry->offset, ctl->unit, |
| 2304 | max_t(u64, offset, entry->offset)); | 2290 | max_t(u64, offset, entry->offset)); |
| 2305 | want_bits = bytes_to_bits(bytes, block_group->sectorsize); | 2291 | want_bits = bytes_to_bits(bytes, ctl->unit); |
| 2306 | min_bits = bytes_to_bits(min_bytes, block_group->sectorsize); | 2292 | min_bits = bytes_to_bits(min_bytes, ctl->unit); |
| 2307 | 2293 | ||
| 2308 | again: | 2294 | again: |
| 2309 | found_bits = 0; | 2295 | found_bits = 0; |
| 2310 | for (i = find_next_bit(entry->bitmap, BITS_PER_BITMAP, i); | 2296 | for_each_set_bit_from(i, entry->bitmap, BITS_PER_BITMAP) { |
| 2311 | i < BITS_PER_BITMAP; | ||
| 2312 | i = find_next_bit(entry->bitmap, BITS_PER_BITMAP, i + 1)) { | ||
| 2313 | next_zero = find_next_zero_bit(entry->bitmap, | 2297 | next_zero = find_next_zero_bit(entry->bitmap, |
| 2314 | BITS_PER_BITMAP, i); | 2298 | BITS_PER_BITMAP, i); |
| 2315 | if (next_zero - i >= min_bits) { | 2299 | if (next_zero - i >= min_bits) { |
| @@ -2329,23 +2313,22 @@ again: | |||
| 2329 | 2313 | ||
| 2330 | total_found += found_bits; | 2314 | total_found += found_bits; |
| 2331 | 2315 | ||
| 2332 | if (cluster->max_size < found_bits * block_group->sectorsize) | 2316 | if (cluster->max_size < found_bits * ctl->unit) |
| 2333 | cluster->max_size = found_bits * block_group->sectorsize; | 2317 | cluster->max_size = found_bits * ctl->unit; |
| 2334 | 2318 | ||
| 2335 | if (total_found < want_bits || cluster->max_size < cont1_bytes) { | 2319 | if (total_found < want_bits || cluster->max_size < cont1_bytes) { |
| 2336 | i = next_zero + 1; | 2320 | i = next_zero + 1; |
| 2337 | goto again; | 2321 | goto again; |
| 2338 | } | 2322 | } |
| 2339 | 2323 | ||
| 2340 | cluster->window_start = start * block_group->sectorsize + | 2324 | cluster->window_start = start * ctl->unit + entry->offset; |
| 2341 | entry->offset; | ||
| 2342 | rb_erase(&entry->offset_index, &ctl->free_space_offset); | 2325 | rb_erase(&entry->offset_index, &ctl->free_space_offset); |
| 2343 | ret = tree_insert_offset(&cluster->root, entry->offset, | 2326 | ret = tree_insert_offset(&cluster->root, entry->offset, |
| 2344 | &entry->offset_index, 1); | 2327 | &entry->offset_index, 1); |
| 2345 | BUG_ON(ret); /* -EEXIST; Logic error */ | 2328 | BUG_ON(ret); /* -EEXIST; Logic error */ |
| 2346 | 2329 | ||
| 2347 | trace_btrfs_setup_cluster(block_group, cluster, | 2330 | trace_btrfs_setup_cluster(block_group, cluster, |
| 2348 | total_found * block_group->sectorsize, 1); | 2331 | total_found * ctl->unit, 1); |
| 2349 | return 0; | 2332 | return 0; |
| 2350 | } | 2333 | } |
| 2351 | 2334 | ||
diff --git a/fs/btrfs/hash.h b/fs/btrfs/hash.h index db2ff9773b99..1d982812ab67 100644 --- a/fs/btrfs/hash.h +++ b/fs/btrfs/hash.h | |||
| @@ -24,4 +24,14 @@ static inline u64 btrfs_name_hash(const char *name, int len) | |||
| 24 | { | 24 | { |
| 25 | return crc32c((u32)~1, name, len); | 25 | return crc32c((u32)~1, name, len); |
| 26 | } | 26 | } |
| 27 | |||
| 28 | /* | ||
| 29 | * Figure the key offset of an extended inode ref | ||
| 30 | */ | ||
| 31 | static inline u64 btrfs_extref_hash(u64 parent_objectid, const char *name, | ||
| 32 | int len) | ||
| 33 | { | ||
| 34 | return (u64) crc32c(parent_objectid, name, len); | ||
| 35 | } | ||
| 36 | |||
| 27 | #endif | 37 | #endif |
diff --git a/fs/btrfs/inode-item.c b/fs/btrfs/inode-item.c index a13cf1a96c73..48b8fda93132 100644 --- a/fs/btrfs/inode-item.c +++ b/fs/btrfs/inode-item.c | |||
| @@ -18,6 +18,7 @@ | |||
| 18 | 18 | ||
| 19 | #include "ctree.h" | 19 | #include "ctree.h" |
| 20 | #include "disk-io.h" | 20 | #include "disk-io.h" |
| 21 | #include "hash.h" | ||
| 21 | #include "transaction.h" | 22 | #include "transaction.h" |
| 22 | #include "print-tree.h" | 23 | #include "print-tree.h" |
| 23 | 24 | ||
| @@ -50,18 +51,57 @@ static int find_name_in_backref(struct btrfs_path *path, const char *name, | |||
| 50 | return 0; | 51 | return 0; |
| 51 | } | 52 | } |
| 52 | 53 | ||
| 53 | struct btrfs_inode_ref * | 54 | int btrfs_find_name_in_ext_backref(struct btrfs_path *path, u64 ref_objectid, |
| 55 | const char *name, int name_len, | ||
| 56 | struct btrfs_inode_extref **extref_ret) | ||
| 57 | { | ||
| 58 | struct extent_buffer *leaf; | ||
| 59 | struct btrfs_inode_extref *extref; | ||
| 60 | unsigned long ptr; | ||
| 61 | unsigned long name_ptr; | ||
| 62 | u32 item_size; | ||
| 63 | u32 cur_offset = 0; | ||
| 64 | int ref_name_len; | ||
| 65 | |||
| 66 | leaf = path->nodes[0]; | ||
| 67 | item_size = btrfs_item_size_nr(leaf, path->slots[0]); | ||
| 68 | ptr = btrfs_item_ptr_offset(leaf, path->slots[0]); | ||
| 69 | |||
| 70 | /* | ||
| 71 | * Search all extended backrefs in this item. We're only | ||
| 72 | * looking through any collisions so most of the time this is | ||
| 73 | * just going to compare against one buffer. If all is well, | ||
| 74 | * we'll return success and the inode ref object. | ||
| 75 | */ | ||
| 76 | while (cur_offset < item_size) { | ||
| 77 | extref = (struct btrfs_inode_extref *) (ptr + cur_offset); | ||
| 78 | name_ptr = (unsigned long)(&extref->name); | ||
| 79 | ref_name_len = btrfs_inode_extref_name_len(leaf, extref); | ||
| 80 | |||
| 81 | if (ref_name_len == name_len && | ||
| 82 | btrfs_inode_extref_parent(leaf, extref) == ref_objectid && | ||
| 83 | (memcmp_extent_buffer(leaf, name, name_ptr, name_len) == 0)) { | ||
| 84 | if (extref_ret) | ||
| 85 | *extref_ret = extref; | ||
| 86 | return 1; | ||
| 87 | } | ||
| 88 | |||
| 89 | cur_offset += ref_name_len + sizeof(*extref); | ||
| 90 | } | ||
| 91 | return 0; | ||
| 92 | } | ||
| 93 | |||
| 94 | static struct btrfs_inode_ref * | ||
| 54 | btrfs_lookup_inode_ref(struct btrfs_trans_handle *trans, | 95 | btrfs_lookup_inode_ref(struct btrfs_trans_handle *trans, |
| 55 | struct btrfs_root *root, | 96 | struct btrfs_root *root, |
| 56 | struct btrfs_path *path, | 97 | struct btrfs_path *path, |
| 57 | const char *name, int name_len, | 98 | const char *name, int name_len, |
| 58 | u64 inode_objectid, u64 ref_objectid, int mod) | 99 | u64 inode_objectid, u64 ref_objectid, int ins_len, |
| 100 | int cow) | ||
| 59 | { | 101 | { |
| 102 | int ret; | ||
| 60 | struct btrfs_key key; | 103 | struct btrfs_key key; |
| 61 | struct btrfs_inode_ref *ref; | 104 | struct btrfs_inode_ref *ref; |
| 62 | int ins_len = mod < 0 ? -1 : 0; | ||
| 63 | int cow = mod != 0; | ||
| 64 | int ret; | ||
| 65 | 105 | ||
| 66 | key.objectid = inode_objectid; | 106 | key.objectid = inode_objectid; |
| 67 | key.type = BTRFS_INODE_REF_KEY; | 107 | key.type = BTRFS_INODE_REF_KEY; |
| @@ -77,13 +117,150 @@ btrfs_lookup_inode_ref(struct btrfs_trans_handle *trans, | |||
| 77 | return ref; | 117 | return ref; |
| 78 | } | 118 | } |
| 79 | 119 | ||
| 80 | int btrfs_del_inode_ref(struct btrfs_trans_handle *trans, | 120 | /* Returns NULL if no extref found */ |
| 121 | struct btrfs_inode_extref * | ||
| 122 | btrfs_lookup_inode_extref(struct btrfs_trans_handle *trans, | ||
| 123 | struct btrfs_root *root, | ||
| 124 | struct btrfs_path *path, | ||
| 125 | const char *name, int name_len, | ||
| 126 | u64 inode_objectid, u64 ref_objectid, int ins_len, | ||
| 127 | int cow) | ||
| 128 | { | ||
| 129 | int ret; | ||
| 130 | struct btrfs_key key; | ||
| 131 | struct btrfs_inode_extref *extref; | ||
| 132 | |||
| 133 | key.objectid = inode_objectid; | ||
| 134 | key.type = BTRFS_INODE_EXTREF_KEY; | ||
| 135 | key.offset = btrfs_extref_hash(ref_objectid, name, name_len); | ||
| 136 | |||
| 137 | ret = btrfs_search_slot(trans, root, &key, path, ins_len, cow); | ||
| 138 | if (ret < 0) | ||
| 139 | return ERR_PTR(ret); | ||
| 140 | if (ret > 0) | ||
| 141 | return NULL; | ||
| 142 | if (!btrfs_find_name_in_ext_backref(path, ref_objectid, name, name_len, &extref)) | ||
| 143 | return NULL; | ||
| 144 | return extref; | ||
| 145 | } | ||
| 146 | |||
| 147 | int btrfs_get_inode_ref_index(struct btrfs_trans_handle *trans, | ||
| 148 | struct btrfs_root *root, | ||
| 149 | struct btrfs_path *path, | ||
| 150 | const char *name, int name_len, | ||
| 151 | u64 inode_objectid, u64 ref_objectid, int mod, | ||
| 152 | u64 *ret_index) | ||
| 153 | { | ||
| 154 | struct btrfs_inode_ref *ref; | ||
| 155 | struct btrfs_inode_extref *extref; | ||
| 156 | int ins_len = mod < 0 ? -1 : 0; | ||
| 157 | int cow = mod != 0; | ||
| 158 | |||
| 159 | ref = btrfs_lookup_inode_ref(trans, root, path, name, name_len, | ||
| 160 | inode_objectid, ref_objectid, ins_len, | ||
| 161 | cow); | ||
| 162 | if (IS_ERR(ref)) | ||
| 163 | return PTR_ERR(ref); | ||
| 164 | |||
| 165 | if (ref != NULL) { | ||
| 166 | *ret_index = btrfs_inode_ref_index(path->nodes[0], ref); | ||
| 167 | return 0; | ||
| 168 | } | ||
| 169 | |||
| 170 | btrfs_release_path(path); | ||
| 171 | |||
| 172 | extref = btrfs_lookup_inode_extref(trans, root, path, name, | ||
| 173 | name_len, inode_objectid, | ||
| 174 | ref_objectid, ins_len, cow); | ||
| 175 | if (IS_ERR(extref)) | ||
| 176 | return PTR_ERR(extref); | ||
| 177 | |||
| 178 | if (extref) { | ||
| 179 | *ret_index = btrfs_inode_extref_index(path->nodes[0], extref); | ||
| 180 | return 0; | ||
| 181 | } | ||
| 182 | |||
| 183 | return -ENOENT; | ||
| 184 | } | ||
| 185 | |||
| 186 | int btrfs_del_inode_extref(struct btrfs_trans_handle *trans, | ||
| 81 | struct btrfs_root *root, | 187 | struct btrfs_root *root, |
| 82 | const char *name, int name_len, | 188 | const char *name, int name_len, |
| 83 | u64 inode_objectid, u64 ref_objectid, u64 *index) | 189 | u64 inode_objectid, u64 ref_objectid, u64 *index) |
| 84 | { | 190 | { |
| 85 | struct btrfs_path *path; | 191 | struct btrfs_path *path; |
| 86 | struct btrfs_key key; | 192 | struct btrfs_key key; |
| 193 | struct btrfs_inode_extref *extref; | ||
| 194 | struct extent_buffer *leaf; | ||
| 195 | int ret; | ||
| 196 | int del_len = name_len + sizeof(*extref); | ||
| 197 | unsigned long ptr; | ||
| 198 | unsigned long item_start; | ||
| 199 | u32 item_size; | ||
| 200 | |||
| 201 | key.objectid = inode_objectid; | ||
| 202 | btrfs_set_key_type(&key, BTRFS_INODE_EXTREF_KEY); | ||
| 203 | key.offset = btrfs_extref_hash(ref_objectid, name, name_len); | ||
| 204 | |||
| 205 | path = btrfs_alloc_path(); | ||
| 206 | if (!path) | ||
| 207 | return -ENOMEM; | ||
| 208 | |||
| 209 | path->leave_spinning = 1; | ||
| 210 | |||
| 211 | ret = btrfs_search_slot(trans, root, &key, path, -1, 1); | ||
| 212 | if (ret > 0) | ||
| 213 | ret = -ENOENT; | ||
| 214 | if (ret < 0) | ||
| 215 | goto out; | ||
| 216 | |||
| 217 | /* | ||
| 218 | * Sanity check - did we find the right item for this name? | ||
| 219 | * This should always succeed so error here will make the FS | ||
| 220 | * readonly. | ||
| 221 | */ | ||
| 222 | if (!btrfs_find_name_in_ext_backref(path, ref_objectid, | ||
| 223 | name, name_len, &extref)) { | ||
| 224 | btrfs_std_error(root->fs_info, -ENOENT); | ||
| 225 | ret = -EROFS; | ||
| 226 | goto out; | ||
| 227 | } | ||
| 228 | |||
| 229 | leaf = path->nodes[0]; | ||
| 230 | item_size = btrfs_item_size_nr(leaf, path->slots[0]); | ||
| 231 | if (index) | ||
| 232 | *index = btrfs_inode_extref_index(leaf, extref); | ||
| 233 | |||
| 234 | if (del_len == item_size) { | ||
| 235 | /* | ||
| 236 | * Common case only one ref in the item, remove the | ||
| 237 | * whole item. | ||
| 238 | */ | ||
| 239 | ret = btrfs_del_item(trans, root, path); | ||
| 240 | goto out; | ||
| 241 | } | ||
| 242 | |||
| 243 | ptr = (unsigned long)extref; | ||
| 244 | item_start = btrfs_item_ptr_offset(leaf, path->slots[0]); | ||
| 245 | |||
| 246 | memmove_extent_buffer(leaf, ptr, ptr + del_len, | ||
| 247 | item_size - (ptr + del_len - item_start)); | ||
| 248 | |||
| 249 | btrfs_truncate_item(trans, root, path, item_size - del_len, 1); | ||
| 250 | |||
| 251 | out: | ||
| 252 | btrfs_free_path(path); | ||
| 253 | |||
| 254 | return ret; | ||
| 255 | } | ||
| 256 | |||
| 257 | int btrfs_del_inode_ref(struct btrfs_trans_handle *trans, | ||
| 258 | struct btrfs_root *root, | ||
| 259 | const char *name, int name_len, | ||
| 260 | u64 inode_objectid, u64 ref_objectid, u64 *index) | ||
| 261 | { | ||
| 262 | struct btrfs_path *path; | ||
| 263 | struct btrfs_key key; | ||
| 87 | struct btrfs_inode_ref *ref; | 264 | struct btrfs_inode_ref *ref; |
| 88 | struct extent_buffer *leaf; | 265 | struct extent_buffer *leaf; |
| 89 | unsigned long ptr; | 266 | unsigned long ptr; |
| @@ -91,6 +268,7 @@ int btrfs_del_inode_ref(struct btrfs_trans_handle *trans, | |||
| 91 | u32 item_size; | 268 | u32 item_size; |
| 92 | u32 sub_item_len; | 269 | u32 sub_item_len; |
| 93 | int ret; | 270 | int ret; |
| 271 | int search_ext_refs = 0; | ||
| 94 | int del_len = name_len + sizeof(*ref); | 272 | int del_len = name_len + sizeof(*ref); |
| 95 | 273 | ||
| 96 | key.objectid = inode_objectid; | 274 | key.objectid = inode_objectid; |
| @@ -106,12 +284,14 @@ int btrfs_del_inode_ref(struct btrfs_trans_handle *trans, | |||
| 106 | ret = btrfs_search_slot(trans, root, &key, path, -1, 1); | 284 | ret = btrfs_search_slot(trans, root, &key, path, -1, 1); |
| 107 | if (ret > 0) { | 285 | if (ret > 0) { |
| 108 | ret = -ENOENT; | 286 | ret = -ENOENT; |
| 287 | search_ext_refs = 1; | ||
| 109 | goto out; | 288 | goto out; |
| 110 | } else if (ret < 0) { | 289 | } else if (ret < 0) { |
| 111 | goto out; | 290 | goto out; |
| 112 | } | 291 | } |
| 113 | if (!find_name_in_backref(path, name, name_len, &ref)) { | 292 | if (!find_name_in_backref(path, name, name_len, &ref)) { |
| 114 | ret = -ENOENT; | 293 | ret = -ENOENT; |
| 294 | search_ext_refs = 1; | ||
| 115 | goto out; | 295 | goto out; |
| 116 | } | 296 | } |
| 117 | leaf = path->nodes[0]; | 297 | leaf = path->nodes[0]; |
| @@ -129,8 +309,78 @@ int btrfs_del_inode_ref(struct btrfs_trans_handle *trans, | |||
| 129 | item_start = btrfs_item_ptr_offset(leaf, path->slots[0]); | 309 | item_start = btrfs_item_ptr_offset(leaf, path->slots[0]); |
| 130 | memmove_extent_buffer(leaf, ptr, ptr + sub_item_len, | 310 | memmove_extent_buffer(leaf, ptr, ptr + sub_item_len, |
| 131 | item_size - (ptr + sub_item_len - item_start)); | 311 | item_size - (ptr + sub_item_len - item_start)); |
| 132 | btrfs_truncate_item(trans, root, path, | 312 | btrfs_truncate_item(trans, root, path, item_size - sub_item_len, 1); |
| 133 | item_size - sub_item_len, 1); | 313 | out: |
| 314 | btrfs_free_path(path); | ||
| 315 | |||
| 316 | if (search_ext_refs) { | ||
| 317 | /* | ||
| 318 | * No refs were found, or we could not find the | ||
| 319 | * name in our ref array. Find and remove the extended | ||
| 320 | * inode ref then. | ||
| 321 | */ | ||
| 322 | return btrfs_del_inode_extref(trans, root, name, name_len, | ||
| 323 | inode_objectid, ref_objectid, index); | ||
| 324 | } | ||
| 325 | |||
| 326 | return ret; | ||
| 327 | } | ||
| 328 | |||
| 329 | /* | ||
| 330 | * btrfs_insert_inode_extref() - Inserts an extended inode ref into a tree. | ||
| 331 | * | ||
| 332 | * The caller must have checked against BTRFS_LINK_MAX already. | ||
| 333 | */ | ||
| 334 | static int btrfs_insert_inode_extref(struct btrfs_trans_handle *trans, | ||
| 335 | struct btrfs_root *root, | ||
| 336 | const char *name, int name_len, | ||
| 337 | u64 inode_objectid, u64 ref_objectid, u64 index) | ||
| 338 | { | ||
| 339 | struct btrfs_inode_extref *extref; | ||
| 340 | int ret; | ||
| 341 | int ins_len = name_len + sizeof(*extref); | ||
| 342 | unsigned long ptr; | ||
| 343 | struct btrfs_path *path; | ||
| 344 | struct btrfs_key key; | ||
| 345 | struct extent_buffer *leaf; | ||
| 346 | struct btrfs_item *item; | ||
| 347 | |||
| 348 | key.objectid = inode_objectid; | ||
| 349 | key.type = BTRFS_INODE_EXTREF_KEY; | ||
| 350 | key.offset = btrfs_extref_hash(ref_objectid, name, name_len); | ||
| 351 | |||
| 352 | path = btrfs_alloc_path(); | ||
| 353 | if (!path) | ||
| 354 | return -ENOMEM; | ||
| 355 | |||
| 356 | path->leave_spinning = 1; | ||
| 357 | ret = btrfs_insert_empty_item(trans, root, path, &key, | ||
| 358 | ins_len); | ||
| 359 | if (ret == -EEXIST) { | ||
| 360 | if (btrfs_find_name_in_ext_backref(path, ref_objectid, | ||
| 361 | name, name_len, NULL)) | ||
| 362 | goto out; | ||
| 363 | |||
| 364 | btrfs_extend_item(trans, root, path, ins_len); | ||
| 365 | ret = 0; | ||
| 366 | } | ||
| 367 | if (ret < 0) | ||
| 368 | goto out; | ||
| 369 | |||
| 370 | leaf = path->nodes[0]; | ||
| 371 | item = btrfs_item_nr(leaf, path->slots[0]); | ||
| 372 | ptr = (unsigned long)btrfs_item_ptr(leaf, path->slots[0], char); | ||
| 373 | ptr += btrfs_item_size(leaf, item) - ins_len; | ||
| 374 | extref = (struct btrfs_inode_extref *)ptr; | ||
| 375 | |||
| 376 | btrfs_set_inode_extref_name_len(path->nodes[0], extref, name_len); | ||
| 377 | btrfs_set_inode_extref_index(path->nodes[0], extref, index); | ||
| 378 | btrfs_set_inode_extref_parent(path->nodes[0], extref, ref_objectid); | ||
| 379 | |||
| 380 | ptr = (unsigned long)&extref->name; | ||
| 381 | write_extent_buffer(path->nodes[0], name, ptr, name_len); | ||
| 382 | btrfs_mark_buffer_dirty(path->nodes[0]); | ||
| 383 | |||
| 134 | out: | 384 | out: |
| 135 | btrfs_free_path(path); | 385 | btrfs_free_path(path); |
| 136 | return ret; | 386 | return ret; |
| @@ -191,6 +441,19 @@ int btrfs_insert_inode_ref(struct btrfs_trans_handle *trans, | |||
| 191 | 441 | ||
| 192 | out: | 442 | out: |
| 193 | btrfs_free_path(path); | 443 | btrfs_free_path(path); |
| 444 | |||
| 445 | if (ret == -EMLINK) { | ||
| 446 | struct btrfs_super_block *disk_super = root->fs_info->super_copy; | ||
| 447 | /* We ran out of space in the ref array. Need to | ||
| 448 | * add an extended ref. */ | ||
| 449 | if (btrfs_super_incompat_flags(disk_super) | ||
| 450 | & BTRFS_FEATURE_INCOMPAT_EXTENDED_IREF) | ||
| 451 | ret = btrfs_insert_inode_extref(trans, root, name, | ||
| 452 | name_len, | ||
| 453 | inode_objectid, | ||
| 454 | ref_objectid, index); | ||
| 455 | } | ||
| 456 | |||
| 194 | return ret; | 457 | return ret; |
| 195 | } | 458 | } |
| 196 | 459 | ||
diff --git a/fs/btrfs/inode-map.c b/fs/btrfs/inode-map.c index b1a1c929ba80..d26f67a59e36 100644 --- a/fs/btrfs/inode-map.c +++ b/fs/btrfs/inode-map.c | |||
| @@ -434,8 +434,9 @@ int btrfs_save_ino_cache(struct btrfs_root *root, | |||
| 434 | * 3 items for pre-allocation | 434 | * 3 items for pre-allocation |
| 435 | */ | 435 | */ |
| 436 | trans->bytes_reserved = btrfs_calc_trans_metadata_size(root, 8); | 436 | trans->bytes_reserved = btrfs_calc_trans_metadata_size(root, 8); |
| 437 | ret = btrfs_block_rsv_add_noflush(root, trans->block_rsv, | 437 | ret = btrfs_block_rsv_add(root, trans->block_rsv, |
| 438 | trans->bytes_reserved); | 438 | trans->bytes_reserved, |
| 439 | BTRFS_RESERVE_NO_FLUSH); | ||
| 439 | if (ret) | 440 | if (ret) |
| 440 | goto out; | 441 | goto out; |
| 441 | trace_btrfs_space_reservation(root->fs_info, "ino_cache", | 442 | trace_btrfs_space_reservation(root->fs_info, "ino_cache", |
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index a6ed6944e50c..67ed24ae86bb 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c | |||
| @@ -71,6 +71,7 @@ static const struct file_operations btrfs_dir_file_operations; | |||
| 71 | static struct extent_io_ops btrfs_extent_io_ops; | 71 | static struct extent_io_ops btrfs_extent_io_ops; |
| 72 | 72 | ||
| 73 | static struct kmem_cache *btrfs_inode_cachep; | 73 | static struct kmem_cache *btrfs_inode_cachep; |
| 74 | static struct kmem_cache *btrfs_delalloc_work_cachep; | ||
| 74 | struct kmem_cache *btrfs_trans_handle_cachep; | 75 | struct kmem_cache *btrfs_trans_handle_cachep; |
| 75 | struct kmem_cache *btrfs_transaction_cachep; | 76 | struct kmem_cache *btrfs_transaction_cachep; |
| 76 | struct kmem_cache *btrfs_path_cachep; | 77 | struct kmem_cache *btrfs_path_cachep; |
| @@ -94,8 +95,10 @@ static noinline int cow_file_range(struct inode *inode, | |||
| 94 | struct page *locked_page, | 95 | struct page *locked_page, |
| 95 | u64 start, u64 end, int *page_started, | 96 | u64 start, u64 end, int *page_started, |
| 96 | unsigned long *nr_written, int unlock); | 97 | unsigned long *nr_written, int unlock); |
| 97 | static noinline int btrfs_update_inode_fallback(struct btrfs_trans_handle *trans, | 98 | static struct extent_map *create_pinned_em(struct inode *inode, u64 start, |
| 98 | struct btrfs_root *root, struct inode *inode); | 99 | u64 len, u64 orig_start, |
| 100 | u64 block_start, u64 block_len, | ||
| 101 | u64 orig_block_len, int type); | ||
| 99 | 102 | ||
| 100 | static int btrfs_init_inode_security(struct btrfs_trans_handle *trans, | 103 | static int btrfs_init_inode_security(struct btrfs_trans_handle *trans, |
| 101 | struct inode *inode, struct inode *dir, | 104 | struct inode *inode, struct inode *dir, |
| @@ -230,7 +233,6 @@ static noinline int cow_file_range_inline(struct btrfs_trans_handle *trans, | |||
| 230 | u64 inline_len = actual_end - start; | 233 | u64 inline_len = actual_end - start; |
| 231 | u64 aligned_end = (end + root->sectorsize - 1) & | 234 | u64 aligned_end = (end + root->sectorsize - 1) & |
| 232 | ~((u64)root->sectorsize - 1); | 235 | ~((u64)root->sectorsize - 1); |
| 233 | u64 hint_byte; | ||
| 234 | u64 data_len = inline_len; | 236 | u64 data_len = inline_len; |
| 235 | int ret; | 237 | int ret; |
| 236 | 238 | ||
| @@ -247,8 +249,7 @@ static noinline int cow_file_range_inline(struct btrfs_trans_handle *trans, | |||
| 247 | return 1; | 249 | return 1; |
| 248 | } | 250 | } |
| 249 | 251 | ||
| 250 | ret = btrfs_drop_extents(trans, inode, start, aligned_end, | 252 | ret = btrfs_drop_extents(trans, root, inode, start, aligned_end, 1); |
| 251 | &hint_byte, 1); | ||
| 252 | if (ret) | 253 | if (ret) |
| 253 | return ret; | 254 | return ret; |
| 254 | 255 | ||
| @@ -664,7 +665,7 @@ retry: | |||
| 664 | async_extent->compressed_size, | 665 | async_extent->compressed_size, |
| 665 | async_extent->compressed_size, | 666 | async_extent->compressed_size, |
| 666 | 0, alloc_hint, &ins, 1); | 667 | 0, alloc_hint, &ins, 1); |
| 667 | if (ret) | 668 | if (ret && ret != -ENOSPC) |
| 668 | btrfs_abort_transaction(trans, root, ret); | 669 | btrfs_abort_transaction(trans, root, ret); |
| 669 | btrfs_end_transaction(trans, root); | 670 | btrfs_end_transaction(trans, root); |
| 670 | } | 671 | } |
| @@ -702,14 +703,19 @@ retry: | |||
| 702 | 703 | ||
| 703 | em->block_start = ins.objectid; | 704 | em->block_start = ins.objectid; |
| 704 | em->block_len = ins.offset; | 705 | em->block_len = ins.offset; |
| 706 | em->orig_block_len = ins.offset; | ||
| 705 | em->bdev = root->fs_info->fs_devices->latest_bdev; | 707 | em->bdev = root->fs_info->fs_devices->latest_bdev; |
| 706 | em->compress_type = async_extent->compress_type; | 708 | em->compress_type = async_extent->compress_type; |
| 707 | set_bit(EXTENT_FLAG_PINNED, &em->flags); | 709 | set_bit(EXTENT_FLAG_PINNED, &em->flags); |
| 708 | set_bit(EXTENT_FLAG_COMPRESSED, &em->flags); | 710 | set_bit(EXTENT_FLAG_COMPRESSED, &em->flags); |
| 711 | em->generation = -1; | ||
| 709 | 712 | ||
| 710 | while (1) { | 713 | while (1) { |
| 711 | write_lock(&em_tree->lock); | 714 | write_lock(&em_tree->lock); |
| 712 | ret = add_extent_mapping(em_tree, em); | 715 | ret = add_extent_mapping(em_tree, em); |
| 716 | if (!ret) | ||
| 717 | list_move(&em->list, | ||
| 718 | &em_tree->modified_extents); | ||
| 713 | write_unlock(&em_tree->lock); | 719 | write_unlock(&em_tree->lock); |
| 714 | if (ret != -EEXIST) { | 720 | if (ret != -EEXIST) { |
| 715 | free_extent_map(em); | 721 | free_extent_map(em); |
| @@ -807,14 +813,14 @@ static u64 get_extent_allocation_hint(struct inode *inode, u64 start, | |||
| 807 | * required to start IO on it. It may be clean and already done with | 813 | * required to start IO on it. It may be clean and already done with |
| 808 | * IO when we return. | 814 | * IO when we return. |
| 809 | */ | 815 | */ |
| 810 | static noinline int cow_file_range(struct inode *inode, | 816 | static noinline int __cow_file_range(struct btrfs_trans_handle *trans, |
| 811 | struct page *locked_page, | 817 | struct inode *inode, |
| 812 | u64 start, u64 end, int *page_started, | 818 | struct btrfs_root *root, |
| 813 | unsigned long *nr_written, | 819 | struct page *locked_page, |
| 814 | int unlock) | 820 | u64 start, u64 end, int *page_started, |
| 821 | unsigned long *nr_written, | ||
| 822 | int unlock) | ||
| 815 | { | 823 | { |
| 816 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
| 817 | struct btrfs_trans_handle *trans; | ||
| 818 | u64 alloc_hint = 0; | 824 | u64 alloc_hint = 0; |
| 819 | u64 num_bytes; | 825 | u64 num_bytes; |
| 820 | unsigned long ram_size; | 826 | unsigned long ram_size; |
| @@ -827,25 +833,10 @@ static noinline int cow_file_range(struct inode *inode, | |||
| 827 | int ret = 0; | 833 | int ret = 0; |
| 828 | 834 | ||
| 829 | BUG_ON(btrfs_is_free_space_inode(inode)); | 835 | BUG_ON(btrfs_is_free_space_inode(inode)); |
| 830 | trans = btrfs_join_transaction(root); | ||
| 831 | if (IS_ERR(trans)) { | ||
| 832 | extent_clear_unlock_delalloc(inode, | ||
| 833 | &BTRFS_I(inode)->io_tree, | ||
| 834 | start, end, locked_page, | ||
| 835 | EXTENT_CLEAR_UNLOCK_PAGE | | ||
| 836 | EXTENT_CLEAR_UNLOCK | | ||
| 837 | EXTENT_CLEAR_DELALLOC | | ||
| 838 | EXTENT_CLEAR_DIRTY | | ||
| 839 | EXTENT_SET_WRITEBACK | | ||
| 840 | EXTENT_END_WRITEBACK); | ||
| 841 | return PTR_ERR(trans); | ||
| 842 | } | ||
| 843 | trans->block_rsv = &root->fs_info->delalloc_block_rsv; | ||
| 844 | 836 | ||
| 845 | num_bytes = (end - start + blocksize) & ~(blocksize - 1); | 837 | num_bytes = (end - start + blocksize) & ~(blocksize - 1); |
| 846 | num_bytes = max(blocksize, num_bytes); | 838 | num_bytes = max(blocksize, num_bytes); |
| 847 | disk_num_bytes = num_bytes; | 839 | disk_num_bytes = num_bytes; |
| 848 | ret = 0; | ||
| 849 | 840 | ||
| 850 | /* if this is a small write inside eof, kick off defrag */ | 841 | /* if this is a small write inside eof, kick off defrag */ |
| 851 | if (num_bytes < 64 * 1024 && | 842 | if (num_bytes < 64 * 1024 && |
| @@ -904,12 +895,17 @@ static noinline int cow_file_range(struct inode *inode, | |||
| 904 | 895 | ||
| 905 | em->block_start = ins.objectid; | 896 | em->block_start = ins.objectid; |
| 906 | em->block_len = ins.offset; | 897 | em->block_len = ins.offset; |
| 898 | em->orig_block_len = ins.offset; | ||
| 907 | em->bdev = root->fs_info->fs_devices->latest_bdev; | 899 | em->bdev = root->fs_info->fs_devices->latest_bdev; |
| 908 | set_bit(EXTENT_FLAG_PINNED, &em->flags); | 900 | set_bit(EXTENT_FLAG_PINNED, &em->flags); |
| 901 | em->generation = -1; | ||
| 909 | 902 | ||
| 910 | while (1) { | 903 | while (1) { |
| 911 | write_lock(&em_tree->lock); | 904 | write_lock(&em_tree->lock); |
| 912 | ret = add_extent_mapping(em_tree, em); | 905 | ret = add_extent_mapping(em_tree, em); |
| 906 | if (!ret) | ||
| 907 | list_move(&em->list, | ||
| 908 | &em_tree->modified_extents); | ||
| 913 | write_unlock(&em_tree->lock); | 909 | write_unlock(&em_tree->lock); |
| 914 | if (ret != -EEXIST) { | 910 | if (ret != -EEXIST) { |
| 915 | free_extent_map(em); | 911 | free_extent_map(em); |
| @@ -956,11 +952,9 @@ static noinline int cow_file_range(struct inode *inode, | |||
| 956 | alloc_hint = ins.objectid + ins.offset; | 952 | alloc_hint = ins.objectid + ins.offset; |
| 957 | start += cur_alloc_size; | 953 | start += cur_alloc_size; |
| 958 | } | 954 | } |
| 959 | ret = 0; | ||
| 960 | out: | 955 | out: |
| 961 | btrfs_end_transaction(trans, root); | ||
| 962 | |||
| 963 | return ret; | 956 | return ret; |
| 957 | |||
| 964 | out_unlock: | 958 | out_unlock: |
| 965 | extent_clear_unlock_delalloc(inode, | 959 | extent_clear_unlock_delalloc(inode, |
| 966 | &BTRFS_I(inode)->io_tree, | 960 | &BTRFS_I(inode)->io_tree, |
| @@ -975,6 +969,39 @@ out_unlock: | |||
| 975 | goto out; | 969 | goto out; |
| 976 | } | 970 | } |
| 977 | 971 | ||
| 972 | static noinline int cow_file_range(struct inode *inode, | ||
| 973 | struct page *locked_page, | ||
| 974 | u64 start, u64 end, int *page_started, | ||
| 975 | unsigned long *nr_written, | ||
| 976 | int unlock) | ||
| 977 | { | ||
| 978 | struct btrfs_trans_handle *trans; | ||
| 979 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
| 980 | int ret; | ||
| 981 | |||
| 982 | trans = btrfs_join_transaction(root); | ||
| 983 | if (IS_ERR(trans)) { | ||
| 984 | extent_clear_unlock_delalloc(inode, | ||
| 985 | &BTRFS_I(inode)->io_tree, | ||
| 986 | start, end, locked_page, | ||
| 987 | EXTENT_CLEAR_UNLOCK_PAGE | | ||
| 988 | EXTENT_CLEAR_UNLOCK | | ||
| 989 | EXTENT_CLEAR_DELALLOC | | ||
| 990 | EXTENT_CLEAR_DIRTY | | ||
| 991 | EXTENT_SET_WRITEBACK | | ||
| 992 | EXTENT_END_WRITEBACK); | ||
| 993 | return PTR_ERR(trans); | ||
| 994 | } | ||
| 995 | trans->block_rsv = &root->fs_info->delalloc_block_rsv; | ||
| 996 | |||
| 997 | ret = __cow_file_range(trans, inode, root, locked_page, start, end, | ||
| 998 | page_started, nr_written, unlock); | ||
| 999 | |||
| 1000 | btrfs_end_transaction(trans, root); | ||
| 1001 | |||
| 1002 | return ret; | ||
| 1003 | } | ||
| 1004 | |||
| 978 | /* | 1005 | /* |
| 979 | * work queue call back to started compression on a file and pages | 1006 | * work queue call back to started compression on a file and pages |
| 980 | */ | 1007 | */ |
| @@ -1130,6 +1157,7 @@ static noinline int run_delalloc_nocow(struct inode *inode, | |||
| 1130 | u64 extent_offset; | 1157 | u64 extent_offset; |
| 1131 | u64 disk_bytenr; | 1158 | u64 disk_bytenr; |
| 1132 | u64 num_bytes; | 1159 | u64 num_bytes; |
| 1160 | u64 disk_num_bytes; | ||
| 1133 | int extent_type; | 1161 | int extent_type; |
| 1134 | int ret, err; | 1162 | int ret, err; |
| 1135 | int type; | 1163 | int type; |
| @@ -1232,6 +1260,8 @@ next_slot: | |||
| 1232 | extent_offset = btrfs_file_extent_offset(leaf, fi); | 1260 | extent_offset = btrfs_file_extent_offset(leaf, fi); |
| 1233 | extent_end = found_key.offset + | 1261 | extent_end = found_key.offset + |
| 1234 | btrfs_file_extent_num_bytes(leaf, fi); | 1262 | btrfs_file_extent_num_bytes(leaf, fi); |
| 1263 | disk_num_bytes = | ||
| 1264 | btrfs_file_extent_disk_num_bytes(leaf, fi); | ||
| 1235 | if (extent_end <= start) { | 1265 | if (extent_end <= start) { |
| 1236 | path->slots[0]++; | 1266 | path->slots[0]++; |
| 1237 | goto next_slot; | 1267 | goto next_slot; |
| @@ -1285,9 +1315,9 @@ out_check: | |||
| 1285 | 1315 | ||
| 1286 | btrfs_release_path(path); | 1316 | btrfs_release_path(path); |
| 1287 | if (cow_start != (u64)-1) { | 1317 | if (cow_start != (u64)-1) { |
| 1288 | ret = cow_file_range(inode, locked_page, cow_start, | 1318 | ret = __cow_file_range(trans, inode, root, locked_page, |
| 1289 | found_key.offset - 1, page_started, | 1319 | cow_start, found_key.offset - 1, |
| 1290 | nr_written, 1); | 1320 | page_started, nr_written, 1); |
| 1291 | if (ret) { | 1321 | if (ret) { |
| 1292 | btrfs_abort_transaction(trans, root, ret); | 1322 | btrfs_abort_transaction(trans, root, ret); |
| 1293 | goto error; | 1323 | goto error; |
| @@ -1302,15 +1332,21 @@ out_check: | |||
| 1302 | em = alloc_extent_map(); | 1332 | em = alloc_extent_map(); |
| 1303 | BUG_ON(!em); /* -ENOMEM */ | 1333 | BUG_ON(!em); /* -ENOMEM */ |
| 1304 | em->start = cur_offset; | 1334 | em->start = cur_offset; |
| 1305 | em->orig_start = em->start; | 1335 | em->orig_start = found_key.offset - extent_offset; |
| 1306 | em->len = num_bytes; | 1336 | em->len = num_bytes; |
| 1307 | em->block_len = num_bytes; | 1337 | em->block_len = num_bytes; |
| 1308 | em->block_start = disk_bytenr; | 1338 | em->block_start = disk_bytenr; |
| 1339 | em->orig_block_len = disk_num_bytes; | ||
| 1309 | em->bdev = root->fs_info->fs_devices->latest_bdev; | 1340 | em->bdev = root->fs_info->fs_devices->latest_bdev; |
| 1310 | set_bit(EXTENT_FLAG_PINNED, &em->flags); | 1341 | set_bit(EXTENT_FLAG_PINNED, &em->flags); |
| 1342 | set_bit(EXTENT_FLAG_FILLING, &em->flags); | ||
| 1343 | em->generation = -1; | ||
| 1311 | while (1) { | 1344 | while (1) { |
| 1312 | write_lock(&em_tree->lock); | 1345 | write_lock(&em_tree->lock); |
| 1313 | ret = add_extent_mapping(em_tree, em); | 1346 | ret = add_extent_mapping(em_tree, em); |
| 1347 | if (!ret) | ||
| 1348 | list_move(&em->list, | ||
| 1349 | &em_tree->modified_extents); | ||
| 1314 | write_unlock(&em_tree->lock); | 1350 | write_unlock(&em_tree->lock); |
| 1315 | if (ret != -EEXIST) { | 1351 | if (ret != -EEXIST) { |
| 1316 | free_extent_map(em); | 1352 | free_extent_map(em); |
| @@ -1355,8 +1391,9 @@ out_check: | |||
| 1355 | } | 1391 | } |
| 1356 | 1392 | ||
| 1357 | if (cow_start != (u64)-1) { | 1393 | if (cow_start != (u64)-1) { |
| 1358 | ret = cow_file_range(inode, locked_page, cow_start, end, | 1394 | ret = __cow_file_range(trans, inode, root, locked_page, |
| 1359 | page_started, nr_written, 1); | 1395 | cow_start, end, |
| 1396 | page_started, nr_written, 1); | ||
| 1360 | if (ret) { | 1397 | if (ret) { |
| 1361 | btrfs_abort_transaction(trans, root, ret); | 1398 | btrfs_abort_transaction(trans, root, ret); |
| 1362 | goto error; | 1399 | goto error; |
| @@ -1364,11 +1401,7 @@ out_check: | |||
| 1364 | } | 1401 | } |
| 1365 | 1402 | ||
| 1366 | error: | 1403 | error: |
| 1367 | if (nolock) { | 1404 | err = btrfs_end_transaction(trans, root); |
| 1368 | err = btrfs_end_transaction_nolock(trans, root); | ||
| 1369 | } else { | ||
| 1370 | err = btrfs_end_transaction(trans, root); | ||
| 1371 | } | ||
| 1372 | if (!ret) | 1405 | if (!ret) |
| 1373 | ret = err; | 1406 | ret = err; |
| 1374 | 1407 | ||
| @@ -1538,7 +1571,6 @@ int btrfs_merge_bio_hook(struct page *page, unsigned long offset, | |||
| 1538 | unsigned long bio_flags) | 1571 | unsigned long bio_flags) |
| 1539 | { | 1572 | { |
| 1540 | struct btrfs_root *root = BTRFS_I(page->mapping->host)->root; | 1573 | struct btrfs_root *root = BTRFS_I(page->mapping->host)->root; |
| 1541 | struct btrfs_mapping_tree *map_tree; | ||
| 1542 | u64 logical = (u64)bio->bi_sector << 9; | 1574 | u64 logical = (u64)bio->bi_sector << 9; |
| 1543 | u64 length = 0; | 1575 | u64 length = 0; |
| 1544 | u64 map_length; | 1576 | u64 map_length; |
| @@ -1548,11 +1580,10 @@ int btrfs_merge_bio_hook(struct page *page, unsigned long offset, | |||
| 1548 | return 0; | 1580 | return 0; |
| 1549 | 1581 | ||
| 1550 | length = bio->bi_size; | 1582 | length = bio->bi_size; |
| 1551 | map_tree = &root->fs_info->mapping_tree; | ||
| 1552 | map_length = length; | 1583 | map_length = length; |
| 1553 | ret = btrfs_map_block(map_tree, READ, logical, | 1584 | ret = btrfs_map_block(root->fs_info, READ, logical, |
| 1554 | &map_length, NULL, 0); | 1585 | &map_length, NULL, 0); |
| 1555 | /* Will always return 0 or 1 with map_multi == NULL */ | 1586 | /* Will always return 0 with map_multi == NULL */ |
| 1556 | BUG_ON(ret < 0); | 1587 | BUG_ON(ret < 0); |
| 1557 | if (map_length < length + size) | 1588 | if (map_length < length + size) |
| 1558 | return 1; | 1589 | return 1; |
| @@ -1593,7 +1624,12 @@ static int __btrfs_submit_bio_done(struct inode *inode, int rw, struct bio *bio, | |||
| 1593 | u64 bio_offset) | 1624 | u64 bio_offset) |
| 1594 | { | 1625 | { |
| 1595 | struct btrfs_root *root = BTRFS_I(inode)->root; | 1626 | struct btrfs_root *root = BTRFS_I(inode)->root; |
| 1596 | return btrfs_map_bio(root, rw, bio, mirror_num, 1); | 1627 | int ret; |
| 1628 | |||
| 1629 | ret = btrfs_map_bio(root, rw, bio, mirror_num, 1); | ||
| 1630 | if (ret) | ||
| 1631 | bio_endio(bio, ret); | ||
| 1632 | return ret; | ||
| 1597 | } | 1633 | } |
| 1598 | 1634 | ||
| 1599 | /* | 1635 | /* |
| @@ -1608,6 +1644,7 @@ static int btrfs_submit_bio_hook(struct inode *inode, int rw, struct bio *bio, | |||
| 1608 | int ret = 0; | 1644 | int ret = 0; |
| 1609 | int skip_sum; | 1645 | int skip_sum; |
| 1610 | int metadata = 0; | 1646 | int metadata = 0; |
| 1647 | int async = !atomic_read(&BTRFS_I(inode)->sync_writers); | ||
| 1611 | 1648 | ||
| 1612 | skip_sum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM; | 1649 | skip_sum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM; |
| 1613 | 1650 | ||
| @@ -1617,31 +1654,43 @@ static int btrfs_submit_bio_hook(struct inode *inode, int rw, struct bio *bio, | |||
| 1617 | if (!(rw & REQ_WRITE)) { | 1654 | if (!(rw & REQ_WRITE)) { |
| 1618 | ret = btrfs_bio_wq_end_io(root->fs_info, bio, metadata); | 1655 | ret = btrfs_bio_wq_end_io(root->fs_info, bio, metadata); |
| 1619 | if (ret) | 1656 | if (ret) |
| 1620 | return ret; | 1657 | goto out; |
| 1621 | 1658 | ||
| 1622 | if (bio_flags & EXTENT_BIO_COMPRESSED) { | 1659 | if (bio_flags & EXTENT_BIO_COMPRESSED) { |
| 1623 | return btrfs_submit_compressed_read(inode, bio, | 1660 | ret = btrfs_submit_compressed_read(inode, bio, |
| 1624 | mirror_num, bio_flags); | 1661 | mirror_num, |
| 1662 | bio_flags); | ||
| 1663 | goto out; | ||
| 1625 | } else if (!skip_sum) { | 1664 | } else if (!skip_sum) { |
| 1626 | ret = btrfs_lookup_bio_sums(root, inode, bio, NULL); | 1665 | ret = btrfs_lookup_bio_sums(root, inode, bio, NULL); |
| 1627 | if (ret) | 1666 | if (ret) |
| 1628 | return ret; | 1667 | goto out; |
| 1629 | } | 1668 | } |
| 1630 | goto mapit; | 1669 | goto mapit; |
| 1631 | } else if (!skip_sum) { | 1670 | } else if (async && !skip_sum) { |
| 1632 | /* csum items have already been cloned */ | 1671 | /* csum items have already been cloned */ |
| 1633 | if (root->root_key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID) | 1672 | if (root->root_key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID) |
| 1634 | goto mapit; | 1673 | goto mapit; |
| 1635 | /* we're doing a write, do the async checksumming */ | 1674 | /* we're doing a write, do the async checksumming */ |
| 1636 | return btrfs_wq_submit_bio(BTRFS_I(inode)->root->fs_info, | 1675 | ret = btrfs_wq_submit_bio(BTRFS_I(inode)->root->fs_info, |
| 1637 | inode, rw, bio, mirror_num, | 1676 | inode, rw, bio, mirror_num, |
| 1638 | bio_flags, bio_offset, | 1677 | bio_flags, bio_offset, |
| 1639 | __btrfs_submit_bio_start, | 1678 | __btrfs_submit_bio_start, |
| 1640 | __btrfs_submit_bio_done); | 1679 | __btrfs_submit_bio_done); |
| 1680 | goto out; | ||
| 1681 | } else if (!skip_sum) { | ||
| 1682 | ret = btrfs_csum_one_bio(root, inode, bio, 0, 0); | ||
| 1683 | if (ret) | ||
| 1684 | goto out; | ||
| 1641 | } | 1685 | } |
| 1642 | 1686 | ||
| 1643 | mapit: | 1687 | mapit: |
| 1644 | return btrfs_map_bio(root, rw, bio, mirror_num, 0); | 1688 | ret = btrfs_map_bio(root, rw, bio, mirror_num, 0); |
| 1689 | |||
| 1690 | out: | ||
| 1691 | if (ret < 0) | ||
| 1692 | bio_endio(bio, ret); | ||
| 1693 | return ret; | ||
| 1645 | } | 1694 | } |
| 1646 | 1695 | ||
| 1647 | /* | 1696 | /* |
| @@ -1664,8 +1713,7 @@ static noinline int add_pending_csums(struct btrfs_trans_handle *trans, | |||
| 1664 | int btrfs_set_extent_delalloc(struct inode *inode, u64 start, u64 end, | 1713 | int btrfs_set_extent_delalloc(struct inode *inode, u64 start, u64 end, |
| 1665 | struct extent_state **cached_state) | 1714 | struct extent_state **cached_state) |
| 1666 | { | 1715 | { |
| 1667 | if ((end & (PAGE_CACHE_SIZE - 1)) == 0) | 1716 | WARN_ON((end & (PAGE_CACHE_SIZE - 1)) == 0); |
| 1668 | WARN_ON(1); | ||
| 1669 | return set_extent_delalloc(&BTRFS_I(inode)->io_tree, start, end, | 1717 | return set_extent_delalloc(&BTRFS_I(inode)->io_tree, start, end, |
| 1670 | cached_state, GFP_NOFS); | 1718 | cached_state, GFP_NOFS); |
| 1671 | } | 1719 | } |
| @@ -1785,7 +1833,6 @@ static int insert_reserved_file_extent(struct btrfs_trans_handle *trans, | |||
| 1785 | struct btrfs_path *path; | 1833 | struct btrfs_path *path; |
| 1786 | struct extent_buffer *leaf; | 1834 | struct extent_buffer *leaf; |
| 1787 | struct btrfs_key ins; | 1835 | struct btrfs_key ins; |
| 1788 | u64 hint; | ||
| 1789 | int ret; | 1836 | int ret; |
| 1790 | 1837 | ||
| 1791 | path = btrfs_alloc_path(); | 1838 | path = btrfs_alloc_path(); |
| @@ -1803,8 +1850,8 @@ static int insert_reserved_file_extent(struct btrfs_trans_handle *trans, | |||
| 1803 | * the caller is expected to unpin it and allow it to be merged | 1850 | * the caller is expected to unpin it and allow it to be merged |
| 1804 | * with the others. | 1851 | * with the others. |
| 1805 | */ | 1852 | */ |
| 1806 | ret = btrfs_drop_extents(trans, inode, file_pos, file_pos + num_bytes, | 1853 | ret = btrfs_drop_extents(trans, root, inode, file_pos, |
| 1807 | &hint, 0); | 1854 | file_pos + num_bytes, 0); |
| 1808 | if (ret) | 1855 | if (ret) |
| 1809 | goto out; | 1856 | goto out; |
| 1810 | 1857 | ||
| @@ -1828,10 +1875,8 @@ static int insert_reserved_file_extent(struct btrfs_trans_handle *trans, | |||
| 1828 | btrfs_set_file_extent_encryption(leaf, fi, encryption); | 1875 | btrfs_set_file_extent_encryption(leaf, fi, encryption); |
| 1829 | btrfs_set_file_extent_other_encoding(leaf, fi, other_encoding); | 1876 | btrfs_set_file_extent_other_encoding(leaf, fi, other_encoding); |
| 1830 | 1877 | ||
| 1831 | btrfs_unlock_up_safe(path, 1); | ||
| 1832 | btrfs_set_lock_blocking(leaf); | ||
| 1833 | |||
| 1834 | btrfs_mark_buffer_dirty(leaf); | 1878 | btrfs_mark_buffer_dirty(leaf); |
| 1879 | btrfs_release_path(path); | ||
| 1835 | 1880 | ||
| 1836 | inode_add_bytes(inode, num_bytes); | 1881 | inode_add_bytes(inode, num_bytes); |
| 1837 | 1882 | ||
| @@ -1877,22 +1922,20 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent) | |||
| 1877 | 1922 | ||
| 1878 | if (test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags)) { | 1923 | if (test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags)) { |
| 1879 | BUG_ON(!list_empty(&ordered_extent->list)); /* Logic error */ | 1924 | BUG_ON(!list_empty(&ordered_extent->list)); /* Logic error */ |
| 1880 | ret = btrfs_ordered_update_i_size(inode, 0, ordered_extent); | 1925 | btrfs_ordered_update_i_size(inode, 0, ordered_extent); |
| 1881 | if (!ret) { | 1926 | if (nolock) |
| 1882 | if (nolock) | 1927 | trans = btrfs_join_transaction_nolock(root); |
| 1883 | trans = btrfs_join_transaction_nolock(root); | 1928 | else |
| 1884 | else | 1929 | trans = btrfs_join_transaction(root); |
| 1885 | trans = btrfs_join_transaction(root); | 1930 | if (IS_ERR(trans)) { |
| 1886 | if (IS_ERR(trans)) { | 1931 | ret = PTR_ERR(trans); |
| 1887 | ret = PTR_ERR(trans); | 1932 | trans = NULL; |
| 1888 | trans = NULL; | 1933 | goto out; |
| 1889 | goto out; | ||
| 1890 | } | ||
| 1891 | trans->block_rsv = &root->fs_info->delalloc_block_rsv; | ||
| 1892 | ret = btrfs_update_inode_fallback(trans, root, inode); | ||
| 1893 | if (ret) /* -ENOMEM or corruption */ | ||
| 1894 | btrfs_abort_transaction(trans, root, ret); | ||
| 1895 | } | 1934 | } |
| 1935 | trans->block_rsv = &root->fs_info->delalloc_block_rsv; | ||
| 1936 | ret = btrfs_update_inode_fallback(trans, root, inode); | ||
| 1937 | if (ret) /* -ENOMEM or corruption */ | ||
| 1938 | btrfs_abort_transaction(trans, root, ret); | ||
| 1896 | goto out; | 1939 | goto out; |
| 1897 | } | 1940 | } |
| 1898 | 1941 | ||
| @@ -1929,11 +1972,10 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent) | |||
| 1929 | ordered_extent->len, | 1972 | ordered_extent->len, |
| 1930 | compress_type, 0, 0, | 1973 | compress_type, 0, 0, |
| 1931 | BTRFS_FILE_EXTENT_REG); | 1974 | BTRFS_FILE_EXTENT_REG); |
| 1932 | unpin_extent_cache(&BTRFS_I(inode)->extent_tree, | ||
| 1933 | ordered_extent->file_offset, | ||
| 1934 | ordered_extent->len); | ||
| 1935 | } | 1975 | } |
| 1936 | 1976 | unpin_extent_cache(&BTRFS_I(inode)->extent_tree, | |
| 1977 | ordered_extent->file_offset, ordered_extent->len, | ||
| 1978 | trans->transid); | ||
| 1937 | if (ret < 0) { | 1979 | if (ret < 0) { |
| 1938 | btrfs_abort_transaction(trans, root, ret); | 1980 | btrfs_abort_transaction(trans, root, ret); |
| 1939 | goto out_unlock; | 1981 | goto out_unlock; |
| @@ -1942,13 +1984,11 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent) | |||
| 1942 | add_pending_csums(trans, inode, ordered_extent->file_offset, | 1984 | add_pending_csums(trans, inode, ordered_extent->file_offset, |
| 1943 | &ordered_extent->list); | 1985 | &ordered_extent->list); |
| 1944 | 1986 | ||
| 1945 | ret = btrfs_ordered_update_i_size(inode, 0, ordered_extent); | 1987 | btrfs_ordered_update_i_size(inode, 0, ordered_extent); |
| 1946 | if (!ret || !test_bit(BTRFS_ORDERED_PREALLOC, &ordered_extent->flags)) { | 1988 | ret = btrfs_update_inode_fallback(trans, root, inode); |
| 1947 | ret = btrfs_update_inode_fallback(trans, root, inode); | 1989 | if (ret) { /* -ENOMEM or corruption */ |
| 1948 | if (ret) { /* -ENOMEM or corruption */ | 1990 | btrfs_abort_transaction(trans, root, ret); |
| 1949 | btrfs_abort_transaction(trans, root, ret); | 1991 | goto out_unlock; |
| 1950 | goto out_unlock; | ||
| 1951 | } | ||
| 1952 | } | 1992 | } |
| 1953 | ret = 0; | 1993 | ret = 0; |
| 1954 | out_unlock: | 1994 | out_unlock: |
| @@ -1958,12 +1998,8 @@ out_unlock: | |||
| 1958 | out: | 1998 | out: |
| 1959 | if (root != root->fs_info->tree_root) | 1999 | if (root != root->fs_info->tree_root) |
| 1960 | btrfs_delalloc_release_metadata(inode, ordered_extent->len); | 2000 | btrfs_delalloc_release_metadata(inode, ordered_extent->len); |
| 1961 | if (trans) { | 2001 | if (trans) |
| 1962 | if (nolock) | 2002 | btrfs_end_transaction(trans, root); |
| 1963 | btrfs_end_transaction_nolock(trans, root); | ||
| 1964 | else | ||
| 1965 | btrfs_end_transaction(trans, root); | ||
| 1966 | } | ||
| 1967 | 2003 | ||
| 1968 | if (ret) | 2004 | if (ret) |
| 1969 | clear_extent_uptodate(io_tree, ordered_extent->file_offset, | 2005 | clear_extent_uptodate(io_tree, ordered_extent->file_offset, |
| @@ -2119,7 +2155,6 @@ void btrfs_run_delayed_iputs(struct btrfs_root *root) | |||
| 2119 | if (empty) | 2155 | if (empty) |
| 2120 | return; | 2156 | return; |
| 2121 | 2157 | ||
| 2122 | down_read(&root->fs_info->cleanup_work_sem); | ||
| 2123 | spin_lock(&fs_info->delayed_iput_lock); | 2158 | spin_lock(&fs_info->delayed_iput_lock); |
| 2124 | list_splice_init(&fs_info->delayed_iputs, &list); | 2159 | list_splice_init(&fs_info->delayed_iputs, &list); |
| 2125 | spin_unlock(&fs_info->delayed_iput_lock); | 2160 | spin_unlock(&fs_info->delayed_iput_lock); |
| @@ -2130,7 +2165,6 @@ void btrfs_run_delayed_iputs(struct btrfs_root *root) | |||
| 2130 | iput(delayed->inode); | 2165 | iput(delayed->inode); |
| 2131 | kfree(delayed); | 2166 | kfree(delayed); |
| 2132 | } | 2167 | } |
| 2133 | up_read(&root->fs_info->cleanup_work_sem); | ||
| 2134 | } | 2168 | } |
| 2135 | 2169 | ||
| 2136 | enum btrfs_orphan_cleanup_state { | 2170 | enum btrfs_orphan_cleanup_state { |
| @@ -2198,7 +2232,7 @@ int btrfs_orphan_add(struct btrfs_trans_handle *trans, struct inode *inode) | |||
| 2198 | int ret; | 2232 | int ret; |
| 2199 | 2233 | ||
| 2200 | if (!root->orphan_block_rsv) { | 2234 | if (!root->orphan_block_rsv) { |
| 2201 | block_rsv = btrfs_alloc_block_rsv(root); | 2235 | block_rsv = btrfs_alloc_block_rsv(root, BTRFS_BLOCK_RSV_TEMP); |
| 2202 | if (!block_rsv) | 2236 | if (!block_rsv) |
| 2203 | return -ENOMEM; | 2237 | return -ENOMEM; |
| 2204 | } | 2238 | } |
| @@ -2225,7 +2259,7 @@ int btrfs_orphan_add(struct btrfs_trans_handle *trans, struct inode *inode) | |||
| 2225 | insert = 1; | 2259 | insert = 1; |
| 2226 | #endif | 2260 | #endif |
| 2227 | insert = 1; | 2261 | insert = 1; |
| 2228 | atomic_dec(&root->orphan_inodes); | 2262 | atomic_inc(&root->orphan_inodes); |
| 2229 | } | 2263 | } |
| 2230 | 2264 | ||
| 2231 | if (!test_and_set_bit(BTRFS_INODE_ORPHAN_META_RESERVED, | 2265 | if (!test_and_set_bit(BTRFS_INODE_ORPHAN_META_RESERVED, |
| @@ -2590,6 +2624,18 @@ static void btrfs_read_locked_inode(struct inode *inode) | |||
| 2590 | 2624 | ||
| 2591 | inode_set_bytes(inode, btrfs_inode_nbytes(leaf, inode_item)); | 2625 | inode_set_bytes(inode, btrfs_inode_nbytes(leaf, inode_item)); |
| 2592 | BTRFS_I(inode)->generation = btrfs_inode_generation(leaf, inode_item); | 2626 | BTRFS_I(inode)->generation = btrfs_inode_generation(leaf, inode_item); |
| 2627 | BTRFS_I(inode)->last_trans = btrfs_inode_transid(leaf, inode_item); | ||
| 2628 | |||
| 2629 | /* | ||
| 2630 | * If we were modified in the current generation and evicted from memory | ||
| 2631 | * and then re-read we need to do a full sync since we don't have any | ||
| 2632 | * idea about which extents were modified before we were evicted from | ||
| 2633 | * cache. | ||
| 2634 | */ | ||
| 2635 | if (BTRFS_I(inode)->last_trans == root->fs_info->generation) | ||
| 2636 | set_bit(BTRFS_INODE_NEEDS_FULL_SYNC, | ||
| 2637 | &BTRFS_I(inode)->runtime_flags); | ||
| 2638 | |||
| 2593 | inode->i_version = btrfs_inode_sequence(leaf, inode_item); | 2639 | inode->i_version = btrfs_inode_sequence(leaf, inode_item); |
| 2594 | inode->i_generation = BTRFS_I(inode)->generation; | 2640 | inode->i_generation = BTRFS_I(inode)->generation; |
| 2595 | inode->i_rdev = 0; | 2641 | inode->i_rdev = 0; |
| @@ -2747,8 +2793,9 @@ noinline int btrfs_update_inode(struct btrfs_trans_handle *trans, | |||
| 2747 | return btrfs_update_inode_item(trans, root, inode); | 2793 | return btrfs_update_inode_item(trans, root, inode); |
| 2748 | } | 2794 | } |
| 2749 | 2795 | ||
| 2750 | static noinline int btrfs_update_inode_fallback(struct btrfs_trans_handle *trans, | 2796 | noinline int btrfs_update_inode_fallback(struct btrfs_trans_handle *trans, |
| 2751 | struct btrfs_root *root, struct inode *inode) | 2797 | struct btrfs_root *root, |
| 2798 | struct inode *inode) | ||
| 2752 | { | 2799 | { |
| 2753 | int ret; | 2800 | int ret; |
| 2754 | 2801 | ||
| @@ -2894,7 +2941,6 @@ static struct btrfs_trans_handle *__unlink_start_trans(struct inode *dir, | |||
| 2894 | struct btrfs_trans_handle *trans; | 2941 | struct btrfs_trans_handle *trans; |
| 2895 | struct btrfs_root *root = BTRFS_I(dir)->root; | 2942 | struct btrfs_root *root = BTRFS_I(dir)->root; |
| 2896 | struct btrfs_path *path; | 2943 | struct btrfs_path *path; |
| 2897 | struct btrfs_inode_ref *ref; | ||
| 2898 | struct btrfs_dir_item *di; | 2944 | struct btrfs_dir_item *di; |
| 2899 | struct inode *inode = dentry->d_inode; | 2945 | struct inode *inode = dentry->d_inode; |
| 2900 | u64 index; | 2946 | u64 index; |
| @@ -3008,17 +3054,17 @@ static struct btrfs_trans_handle *__unlink_start_trans(struct inode *dir, | |||
| 3008 | } | 3054 | } |
| 3009 | btrfs_release_path(path); | 3055 | btrfs_release_path(path); |
| 3010 | 3056 | ||
| 3011 | ref = btrfs_lookup_inode_ref(trans, root, path, | 3057 | ret = btrfs_get_inode_ref_index(trans, root, path, dentry->d_name.name, |
| 3012 | dentry->d_name.name, dentry->d_name.len, | 3058 | dentry->d_name.len, ino, dir_ino, 0, |
| 3013 | ino, dir_ino, 0); | 3059 | &index); |
| 3014 | if (IS_ERR(ref)) { | 3060 | if (ret) { |
| 3015 | err = PTR_ERR(ref); | 3061 | err = ret; |
| 3016 | goto out; | 3062 | goto out; |
| 3017 | } | 3063 | } |
| 3018 | BUG_ON(!ref); /* Logic error */ | 3064 | |
| 3019 | if (check_path_shared(root, path)) | 3065 | if (check_path_shared(root, path)) |
| 3020 | goto out; | 3066 | goto out; |
| 3021 | index = btrfs_inode_ref_index(path->nodes[0], ref); | 3067 | |
| 3022 | btrfs_release_path(path); | 3068 | btrfs_release_path(path); |
| 3023 | 3069 | ||
| 3024 | /* | 3070 | /* |
| @@ -3061,7 +3107,7 @@ out: | |||
| 3061 | static void __unlink_end_trans(struct btrfs_trans_handle *trans, | 3107 | static void __unlink_end_trans(struct btrfs_trans_handle *trans, |
| 3062 | struct btrfs_root *root) | 3108 | struct btrfs_root *root) |
| 3063 | { | 3109 | { |
| 3064 | if (trans->block_rsv == &root->fs_info->global_block_rsv) { | 3110 | if (trans->block_rsv->type == BTRFS_BLOCK_RSV_GLOBAL) { |
| 3065 | btrfs_block_rsv_release(root, trans->block_rsv, | 3111 | btrfs_block_rsv_release(root, trans->block_rsv, |
| 3066 | trans->bytes_reserved); | 3112 | trans->bytes_reserved); |
| 3067 | trans->block_rsv = &root->fs_info->trans_block_rsv; | 3113 | trans->block_rsv = &root->fs_info->trans_block_rsv; |
| @@ -3077,7 +3123,6 @@ static int btrfs_unlink(struct inode *dir, struct dentry *dentry) | |||
| 3077 | struct btrfs_trans_handle *trans; | 3123 | struct btrfs_trans_handle *trans; |
| 3078 | struct inode *inode = dentry->d_inode; | 3124 | struct inode *inode = dentry->d_inode; |
| 3079 | int ret; | 3125 | int ret; |
| 3080 | unsigned long nr = 0; | ||
| 3081 | 3126 | ||
| 3082 | trans = __unlink_start_trans(dir, dentry); | 3127 | trans = __unlink_start_trans(dir, dentry); |
| 3083 | if (IS_ERR(trans)) | 3128 | if (IS_ERR(trans)) |
| @@ -3097,9 +3142,8 @@ static int btrfs_unlink(struct inode *dir, struct dentry *dentry) | |||
| 3097 | } | 3142 | } |
| 3098 | 3143 | ||
| 3099 | out: | 3144 | out: |
| 3100 | nr = trans->blocks_used; | ||
| 3101 | __unlink_end_trans(trans, root); | 3145 | __unlink_end_trans(trans, root); |
| 3102 | btrfs_btree_balance_dirty(root, nr); | 3146 | btrfs_btree_balance_dirty(root); |
| 3103 | return ret; | 3147 | return ret; |
| 3104 | } | 3148 | } |
| 3105 | 3149 | ||
| @@ -3189,11 +3233,11 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry) | |||
| 3189 | int err = 0; | 3233 | int err = 0; |
| 3190 | struct btrfs_root *root = BTRFS_I(dir)->root; | 3234 | struct btrfs_root *root = BTRFS_I(dir)->root; |
| 3191 | struct btrfs_trans_handle *trans; | 3235 | struct btrfs_trans_handle *trans; |
| 3192 | unsigned long nr = 0; | ||
| 3193 | 3236 | ||
| 3194 | if (inode->i_size > BTRFS_EMPTY_DIR_SIZE || | 3237 | if (inode->i_size > BTRFS_EMPTY_DIR_SIZE) |
| 3195 | btrfs_ino(inode) == BTRFS_FIRST_FREE_OBJECTID) | ||
| 3196 | return -ENOTEMPTY; | 3238 | return -ENOTEMPTY; |
| 3239 | if (btrfs_ino(inode) == BTRFS_FIRST_FREE_OBJECTID) | ||
| 3240 | return -EPERM; | ||
| 3197 | 3241 | ||
| 3198 | trans = __unlink_start_trans(dir, dentry); | 3242 | trans = __unlink_start_trans(dir, dentry); |
| 3199 | if (IS_ERR(trans)) | 3243 | if (IS_ERR(trans)) |
| @@ -3217,9 +3261,8 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry) | |||
| 3217 | if (!err) | 3261 | if (!err) |
| 3218 | btrfs_i_size_write(inode, 0); | 3262 | btrfs_i_size_write(inode, 0); |
| 3219 | out: | 3263 | out: |
| 3220 | nr = trans->blocks_used; | ||
| 3221 | __unlink_end_trans(trans, root); | 3264 | __unlink_end_trans(trans, root); |
| 3222 | btrfs_btree_balance_dirty(root, nr); | 3265 | btrfs_btree_balance_dirty(root); |
| 3223 | 3266 | ||
| 3224 | return err; | 3267 | return err; |
| 3225 | } | 3268 | } |
| @@ -3267,8 +3310,13 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans, | |||
| 3267 | return -ENOMEM; | 3310 | return -ENOMEM; |
| 3268 | path->reada = -1; | 3311 | path->reada = -1; |
| 3269 | 3312 | ||
| 3313 | /* | ||
| 3314 | * We want to drop from the next block forward in case this new size is | ||
| 3315 | * not block aligned since we will be keeping the last block of the | ||
| 3316 | * extent just the way it is. | ||
| 3317 | */ | ||
| 3270 | if (root->ref_cows || root == root->fs_info->tree_root) | 3318 | if (root->ref_cows || root == root->fs_info->tree_root) |
| 3271 | btrfs_drop_extent_cache(inode, new_size & (~mask), (u64)-1, 0); | 3319 | btrfs_drop_extent_cache(inode, (new_size + mask) & (~mask), (u64)-1, 0); |
| 3272 | 3320 | ||
| 3273 | /* | 3321 | /* |
| 3274 | * This function is also used to drop the items in the log tree before | 3322 | * This function is also used to drop the items in the log tree before |
| @@ -3429,12 +3477,6 @@ delete: | |||
| 3429 | 3477 | ||
| 3430 | if (path->slots[0] == 0 || | 3478 | if (path->slots[0] == 0 || |
| 3431 | path->slots[0] != pending_del_slot) { | 3479 | path->slots[0] != pending_del_slot) { |
| 3432 | if (root->ref_cows && | ||
| 3433 | BTRFS_I(inode)->location.objectid != | ||
| 3434 | BTRFS_FREE_INO_OBJECTID) { | ||
| 3435 | err = -EAGAIN; | ||
| 3436 | goto out; | ||
| 3437 | } | ||
| 3438 | if (pending_del_nr) { | 3480 | if (pending_del_nr) { |
| 3439 | ret = btrfs_del_items(trans, root, path, | 3481 | ret = btrfs_del_items(trans, root, path, |
| 3440 | pending_del_slot, | 3482 | pending_del_slot, |
| @@ -3465,12 +3507,20 @@ error: | |||
| 3465 | } | 3507 | } |
| 3466 | 3508 | ||
| 3467 | /* | 3509 | /* |
| 3468 | * taken from block_truncate_page, but does cow as it zeros out | 3510 | * btrfs_truncate_page - read, zero a chunk and write a page |
| 3469 | * any bytes left in the last page in the file. | 3511 | * @inode - inode that we're zeroing |
| 3512 | * @from - the offset to start zeroing | ||
| 3513 | * @len - the length to zero, 0 to zero the entire range respective to the | ||
| 3514 | * offset | ||
| 3515 | * @front - zero up to the offset instead of from the offset on | ||
| 3516 | * | ||
| 3517 | * This will find the page for the "from" offset and cow the page and zero the | ||
| 3518 | * part we want to zero. This is used with truncate and hole punching. | ||
| 3470 | */ | 3519 | */ |
| 3471 | static int btrfs_truncate_page(struct address_space *mapping, loff_t from) | 3520 | int btrfs_truncate_page(struct inode *inode, loff_t from, loff_t len, |
| 3521 | int front) | ||
| 3472 | { | 3522 | { |
| 3473 | struct inode *inode = mapping->host; | 3523 | struct address_space *mapping = inode->i_mapping; |
| 3474 | struct btrfs_root *root = BTRFS_I(inode)->root; | 3524 | struct btrfs_root *root = BTRFS_I(inode)->root; |
| 3475 | struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; | 3525 | struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; |
| 3476 | struct btrfs_ordered_extent *ordered; | 3526 | struct btrfs_ordered_extent *ordered; |
| @@ -3485,17 +3535,18 @@ static int btrfs_truncate_page(struct address_space *mapping, loff_t from) | |||
| 3485 | u64 page_start; | 3535 | u64 page_start; |
| 3486 | u64 page_end; | 3536 | u64 page_end; |
| 3487 | 3537 | ||
| 3488 | if ((offset & (blocksize - 1)) == 0) | 3538 | if ((offset & (blocksize - 1)) == 0 && |
| 3539 | (!len || ((len & (blocksize - 1)) == 0))) | ||
| 3489 | goto out; | 3540 | goto out; |
| 3490 | ret = btrfs_delalloc_reserve_space(inode, PAGE_CACHE_SIZE); | 3541 | ret = btrfs_delalloc_reserve_space(inode, PAGE_CACHE_SIZE); |
| 3491 | if (ret) | 3542 | if (ret) |
| 3492 | goto out; | 3543 | goto out; |
| 3493 | 3544 | ||
| 3494 | ret = -ENOMEM; | ||
| 3495 | again: | 3545 | again: |
| 3496 | page = find_or_create_page(mapping, index, mask); | 3546 | page = find_or_create_page(mapping, index, mask); |
| 3497 | if (!page) { | 3547 | if (!page) { |
| 3498 | btrfs_delalloc_release_space(inode, PAGE_CACHE_SIZE); | 3548 | btrfs_delalloc_release_space(inode, PAGE_CACHE_SIZE); |
| 3549 | ret = -ENOMEM; | ||
| 3499 | goto out; | 3550 | goto out; |
| 3500 | } | 3551 | } |
| 3501 | 3552 | ||
| @@ -3532,7 +3583,8 @@ again: | |||
| 3532 | } | 3583 | } |
| 3533 | 3584 | ||
| 3534 | clear_extent_bit(&BTRFS_I(inode)->io_tree, page_start, page_end, | 3585 | clear_extent_bit(&BTRFS_I(inode)->io_tree, page_start, page_end, |
| 3535 | EXTENT_DIRTY | EXTENT_DELALLOC | EXTENT_DO_ACCOUNTING, | 3586 | EXTENT_DIRTY | EXTENT_DELALLOC | |
| 3587 | EXTENT_DO_ACCOUNTING | EXTENT_DEFRAG, | ||
| 3536 | 0, 0, &cached_state, GFP_NOFS); | 3588 | 0, 0, &cached_state, GFP_NOFS); |
| 3537 | 3589 | ||
| 3538 | ret = btrfs_set_extent_delalloc(inode, page_start, page_end, | 3590 | ret = btrfs_set_extent_delalloc(inode, page_start, page_end, |
| @@ -3543,10 +3595,14 @@ again: | |||
| 3543 | goto out_unlock; | 3595 | goto out_unlock; |
| 3544 | } | 3596 | } |
| 3545 | 3597 | ||
| 3546 | ret = 0; | ||
| 3547 | if (offset != PAGE_CACHE_SIZE) { | 3598 | if (offset != PAGE_CACHE_SIZE) { |
| 3599 | if (!len) | ||
| 3600 | len = PAGE_CACHE_SIZE - offset; | ||
| 3548 | kaddr = kmap(page); | 3601 | kaddr = kmap(page); |
| 3549 | memset(kaddr + offset, 0, PAGE_CACHE_SIZE - offset); | 3602 | if (front) |
| 3603 | memset(kaddr, 0, offset); | ||
| 3604 | else | ||
| 3605 | memset(kaddr + offset, 0, len); | ||
| 3550 | flush_dcache_page(page); | 3606 | flush_dcache_page(page); |
| 3551 | kunmap(page); | 3607 | kunmap(page); |
| 3552 | } | 3608 | } |
| @@ -3577,6 +3633,7 @@ int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size) | |||
| 3577 | struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; | 3633 | struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; |
| 3578 | struct extent_map *em = NULL; | 3634 | struct extent_map *em = NULL; |
| 3579 | struct extent_state *cached_state = NULL; | 3635 | struct extent_state *cached_state = NULL; |
| 3636 | struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; | ||
| 3580 | u64 mask = root->sectorsize - 1; | 3637 | u64 mask = root->sectorsize - 1; |
| 3581 | u64 hole_start = (oldsize + mask) & ~mask; | 3638 | u64 hole_start = (oldsize + mask) & ~mask; |
| 3582 | u64 block_end = (size + mask) & ~mask; | 3639 | u64 block_end = (size + mask) & ~mask; |
| @@ -3613,7 +3670,7 @@ int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size) | |||
| 3613 | last_byte = min(extent_map_end(em), block_end); | 3670 | last_byte = min(extent_map_end(em), block_end); |
| 3614 | last_byte = (last_byte + mask) & ~mask; | 3671 | last_byte = (last_byte + mask) & ~mask; |
| 3615 | if (!test_bit(EXTENT_FLAG_PREALLOC, &em->flags)) { | 3672 | if (!test_bit(EXTENT_FLAG_PREALLOC, &em->flags)) { |
| 3616 | u64 hint_byte = 0; | 3673 | struct extent_map *hole_em; |
| 3617 | hole_size = last_byte - cur_offset; | 3674 | hole_size = last_byte - cur_offset; |
| 3618 | 3675 | ||
| 3619 | trans = btrfs_start_transaction(root, 3); | 3676 | trans = btrfs_start_transaction(root, 3); |
| @@ -3622,9 +3679,9 @@ int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size) | |||
| 3622 | break; | 3679 | break; |
| 3623 | } | 3680 | } |
| 3624 | 3681 | ||
| 3625 | err = btrfs_drop_extents(trans, inode, cur_offset, | 3682 | err = btrfs_drop_extents(trans, root, inode, |
| 3626 | cur_offset + hole_size, | 3683 | cur_offset, |
| 3627 | &hint_byte, 1); | 3684 | cur_offset + hole_size, 1); |
| 3628 | if (err) { | 3685 | if (err) { |
| 3629 | btrfs_abort_transaction(trans, root, err); | 3686 | btrfs_abort_transaction(trans, root, err); |
| 3630 | btrfs_end_transaction(trans, root); | 3687 | btrfs_end_transaction(trans, root); |
| @@ -3641,9 +3698,40 @@ int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size) | |||
| 3641 | break; | 3698 | break; |
| 3642 | } | 3699 | } |
| 3643 | 3700 | ||
| 3644 | btrfs_drop_extent_cache(inode, hole_start, | 3701 | btrfs_drop_extent_cache(inode, cur_offset, |
| 3645 | last_byte - 1, 0); | 3702 | cur_offset + hole_size - 1, 0); |
| 3703 | hole_em = alloc_extent_map(); | ||
| 3704 | if (!hole_em) { | ||
| 3705 | set_bit(BTRFS_INODE_NEEDS_FULL_SYNC, | ||
| 3706 | &BTRFS_I(inode)->runtime_flags); | ||
| 3707 | goto next; | ||
| 3708 | } | ||
| 3709 | hole_em->start = cur_offset; | ||
| 3710 | hole_em->len = hole_size; | ||
| 3711 | hole_em->orig_start = cur_offset; | ||
| 3712 | |||
| 3713 | hole_em->block_start = EXTENT_MAP_HOLE; | ||
| 3714 | hole_em->block_len = 0; | ||
| 3715 | hole_em->orig_block_len = 0; | ||
| 3716 | hole_em->bdev = root->fs_info->fs_devices->latest_bdev; | ||
| 3717 | hole_em->compress_type = BTRFS_COMPRESS_NONE; | ||
| 3718 | hole_em->generation = trans->transid; | ||
| 3646 | 3719 | ||
| 3720 | while (1) { | ||
| 3721 | write_lock(&em_tree->lock); | ||
| 3722 | err = add_extent_mapping(em_tree, hole_em); | ||
| 3723 | if (!err) | ||
| 3724 | list_move(&hole_em->list, | ||
| 3725 | &em_tree->modified_extents); | ||
| 3726 | write_unlock(&em_tree->lock); | ||
| 3727 | if (err != -EEXIST) | ||
| 3728 | break; | ||
| 3729 | btrfs_drop_extent_cache(inode, cur_offset, | ||
| 3730 | cur_offset + | ||
| 3731 | hole_size - 1, 0); | ||
| 3732 | } | ||
| 3733 | free_extent_map(hole_em); | ||
| 3734 | next: | ||
| 3647 | btrfs_update_inode(trans, root, inode); | 3735 | btrfs_update_inode(trans, root, inode); |
| 3648 | btrfs_end_transaction(trans, root); | 3736 | btrfs_end_transaction(trans, root); |
| 3649 | } | 3737 | } |
| @@ -3740,7 +3828,6 @@ void btrfs_evict_inode(struct inode *inode) | |||
| 3740 | struct btrfs_root *root = BTRFS_I(inode)->root; | 3828 | struct btrfs_root *root = BTRFS_I(inode)->root; |
| 3741 | struct btrfs_block_rsv *rsv, *global_rsv; | 3829 | struct btrfs_block_rsv *rsv, *global_rsv; |
| 3742 | u64 min_size = btrfs_calc_trunc_metadata_size(root, 1); | 3830 | u64 min_size = btrfs_calc_trunc_metadata_size(root, 1); |
| 3743 | unsigned long nr; | ||
| 3744 | int ret; | 3831 | int ret; |
| 3745 | 3832 | ||
| 3746 | trace_btrfs_inode_evict(inode); | 3833 | trace_btrfs_inode_evict(inode); |
| @@ -3768,29 +3855,26 @@ void btrfs_evict_inode(struct inode *inode) | |||
| 3768 | goto no_delete; | 3855 | goto no_delete; |
| 3769 | } | 3856 | } |
| 3770 | 3857 | ||
| 3771 | rsv = btrfs_alloc_block_rsv(root); | 3858 | rsv = btrfs_alloc_block_rsv(root, BTRFS_BLOCK_RSV_TEMP); |
| 3772 | if (!rsv) { | 3859 | if (!rsv) { |
| 3773 | btrfs_orphan_del(NULL, inode); | 3860 | btrfs_orphan_del(NULL, inode); |
| 3774 | goto no_delete; | 3861 | goto no_delete; |
| 3775 | } | 3862 | } |
| 3776 | rsv->size = min_size; | 3863 | rsv->size = min_size; |
| 3864 | rsv->failfast = 1; | ||
| 3777 | global_rsv = &root->fs_info->global_block_rsv; | 3865 | global_rsv = &root->fs_info->global_block_rsv; |
| 3778 | 3866 | ||
| 3779 | btrfs_i_size_write(inode, 0); | 3867 | btrfs_i_size_write(inode, 0); |
| 3780 | 3868 | ||
| 3781 | /* | 3869 | /* |
| 3782 | * This is a bit simpler than btrfs_truncate since | 3870 | * This is a bit simpler than btrfs_truncate since we've already |
| 3783 | * | 3871 | * reserved our space for our orphan item in the unlink, so we just |
| 3784 | * 1) We've already reserved our space for our orphan item in the | 3872 | * need to reserve some slack space in case we add bytes and update |
| 3785 | * unlink. | 3873 | * inode item when doing the truncate. |
| 3786 | * 2) We're going to delete the inode item, so we don't need to update | ||
| 3787 | * it at all. | ||
| 3788 | * | ||
| 3789 | * So we just need to reserve some slack space in case we add bytes when | ||
| 3790 | * doing the truncate. | ||
| 3791 | */ | 3874 | */ |
| 3792 | while (1) { | 3875 | while (1) { |
| 3793 | ret = btrfs_block_rsv_refill_noflush(root, rsv, min_size); | 3876 | ret = btrfs_block_rsv_refill(root, rsv, min_size, |
| 3877 | BTRFS_RESERVE_FLUSH_LIMIT); | ||
| 3794 | 3878 | ||
| 3795 | /* | 3879 | /* |
| 3796 | * Try and steal from the global reserve since we will | 3880 | * Try and steal from the global reserve since we will |
| @@ -3808,7 +3892,7 @@ void btrfs_evict_inode(struct inode *inode) | |||
| 3808 | goto no_delete; | 3892 | goto no_delete; |
| 3809 | } | 3893 | } |
| 3810 | 3894 | ||
| 3811 | trans = btrfs_start_transaction(root, 0); | 3895 | trans = btrfs_start_transaction_lflush(root, 1); |
| 3812 | if (IS_ERR(trans)) { | 3896 | if (IS_ERR(trans)) { |
| 3813 | btrfs_orphan_del(NULL, inode); | 3897 | btrfs_orphan_del(NULL, inode); |
| 3814 | btrfs_free_block_rsv(root, rsv); | 3898 | btrfs_free_block_rsv(root, rsv); |
| @@ -3818,13 +3902,16 @@ void btrfs_evict_inode(struct inode *inode) | |||
| 3818 | trans->block_rsv = rsv; | 3902 | trans->block_rsv = rsv; |
| 3819 | 3903 | ||
| 3820 | ret = btrfs_truncate_inode_items(trans, root, inode, 0, 0); | 3904 | ret = btrfs_truncate_inode_items(trans, root, inode, 0, 0); |
| 3821 | if (ret != -EAGAIN) | 3905 | if (ret != -ENOSPC) |
| 3822 | break; | 3906 | break; |
| 3823 | 3907 | ||
| 3824 | nr = trans->blocks_used; | 3908 | trans->block_rsv = &root->fs_info->trans_block_rsv; |
| 3909 | ret = btrfs_update_inode(trans, root, inode); | ||
| 3910 | BUG_ON(ret); | ||
| 3911 | |||
| 3825 | btrfs_end_transaction(trans, root); | 3912 | btrfs_end_transaction(trans, root); |
| 3826 | trans = NULL; | 3913 | trans = NULL; |
| 3827 | btrfs_btree_balance_dirty(root, nr); | 3914 | btrfs_btree_balance_dirty(root); |
| 3828 | } | 3915 | } |
| 3829 | 3916 | ||
| 3830 | btrfs_free_block_rsv(root, rsv); | 3917 | btrfs_free_block_rsv(root, rsv); |
| @@ -3840,9 +3927,8 @@ void btrfs_evict_inode(struct inode *inode) | |||
| 3840 | root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID)) | 3927 | root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID)) |
| 3841 | btrfs_return_ino(root, btrfs_ino(inode)); | 3928 | btrfs_return_ino(root, btrfs_ino(inode)); |
| 3842 | 3929 | ||
| 3843 | nr = trans->blocks_used; | ||
| 3844 | btrfs_end_transaction(trans, root); | 3930 | btrfs_end_transaction(trans, root); |
| 3845 | btrfs_btree_balance_dirty(root, nr); | 3931 | btrfs_btree_balance_dirty(root); |
| 3846 | no_delete: | 3932 | no_delete: |
| 3847 | clear_inode(inode); | 3933 | clear_inode(inode); |
| 3848 | return; | 3934 | return; |
| @@ -4470,10 +4556,7 @@ int btrfs_write_inode(struct inode *inode, struct writeback_control *wbc) | |||
| 4470 | trans = btrfs_join_transaction(root); | 4556 | trans = btrfs_join_transaction(root); |
| 4471 | if (IS_ERR(trans)) | 4557 | if (IS_ERR(trans)) |
| 4472 | return PTR_ERR(trans); | 4558 | return PTR_ERR(trans); |
| 4473 | if (nolock) | 4559 | ret = btrfs_commit_transaction(trans, root); |
| 4474 | ret = btrfs_end_transaction_nolock(trans, root); | ||
| 4475 | else | ||
| 4476 | ret = btrfs_commit_transaction(trans, root); | ||
| 4477 | } | 4560 | } |
| 4478 | return ret; | 4561 | return ret; |
| 4479 | } | 4562 | } |
| @@ -4671,6 +4754,14 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, | |||
| 4671 | BTRFS_I(inode)->generation = trans->transid; | 4754 | BTRFS_I(inode)->generation = trans->transid; |
| 4672 | inode->i_generation = BTRFS_I(inode)->generation; | 4755 | inode->i_generation = BTRFS_I(inode)->generation; |
| 4673 | 4756 | ||
| 4757 | /* | ||
| 4758 | * We could have gotten an inode number from somebody who was fsynced | ||
| 4759 | * and then removed in this same transaction, so let's just set full | ||
| 4760 | * sync since it will be a full sync anyway and this will blow away the | ||
| 4761 | * old info in the log. | ||
| 4762 | */ | ||
| 4763 | set_bit(BTRFS_INODE_NEEDS_FULL_SYNC, &BTRFS_I(inode)->runtime_flags); | ||
| 4764 | |||
| 4674 | if (S_ISDIR(mode)) | 4765 | if (S_ISDIR(mode)) |
| 4675 | owner = 0; | 4766 | owner = 0; |
| 4676 | else | 4767 | else |
| @@ -4680,6 +4771,12 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, | |||
| 4680 | btrfs_set_key_type(&key[0], BTRFS_INODE_ITEM_KEY); | 4771 | btrfs_set_key_type(&key[0], BTRFS_INODE_ITEM_KEY); |
| 4681 | key[0].offset = 0; | 4772 | key[0].offset = 0; |
| 4682 | 4773 | ||
| 4774 | /* | ||
| 4775 | * Start new inodes with an inode_ref. This is slightly more | ||
| 4776 | * efficient for small numbers of hard links since they will | ||
| 4777 | * be packed into one item. Extended refs will kick in if we | ||
| 4778 | * add more hard links than can fit in the ref item. | ||
| 4779 | */ | ||
| 4683 | key[1].objectid = objectid; | 4780 | key[1].objectid = objectid; |
| 4684 | btrfs_set_key_type(&key[1], BTRFS_INODE_REF_KEY); | 4781 | btrfs_set_key_type(&key[1], BTRFS_INODE_REF_KEY); |
| 4685 | key[1].offset = ref_objectid; | 4782 | key[1].offset = ref_objectid; |
| @@ -4721,8 +4818,7 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, | |||
| 4721 | if (S_ISREG(mode)) { | 4818 | if (S_ISREG(mode)) { |
| 4722 | if (btrfs_test_opt(root, NODATASUM)) | 4819 | if (btrfs_test_opt(root, NODATASUM)) |
| 4723 | BTRFS_I(inode)->flags |= BTRFS_INODE_NODATASUM; | 4820 | BTRFS_I(inode)->flags |= BTRFS_INODE_NODATASUM; |
| 4724 | if (btrfs_test_opt(root, NODATACOW) || | 4821 | if (btrfs_test_opt(root, NODATACOW)) |
| 4725 | (BTRFS_I(dir)->flags & BTRFS_INODE_NODATACOW)) | ||
| 4726 | BTRFS_I(inode)->flags |= BTRFS_INODE_NODATACOW; | 4822 | BTRFS_I(inode)->flags |= BTRFS_INODE_NODATACOW; |
| 4727 | } | 4823 | } |
| 4728 | 4824 | ||
| @@ -4788,7 +4884,7 @@ int btrfs_add_link(struct btrfs_trans_handle *trans, | |||
| 4788 | ret = btrfs_insert_dir_item(trans, root, name, name_len, | 4884 | ret = btrfs_insert_dir_item(trans, root, name, name_len, |
| 4789 | parent_inode, &key, | 4885 | parent_inode, &key, |
| 4790 | btrfs_inode_type(inode), index); | 4886 | btrfs_inode_type(inode), index); |
| 4791 | if (ret == -EEXIST) | 4887 | if (ret == -EEXIST || ret == -EOVERFLOW) |
| 4792 | goto fail_dir_item; | 4888 | goto fail_dir_item; |
| 4793 | else if (ret) { | 4889 | else if (ret) { |
| 4794 | btrfs_abort_transaction(trans, root, ret); | 4890 | btrfs_abort_transaction(trans, root, ret); |
| @@ -4843,7 +4939,6 @@ static int btrfs_mknod(struct inode *dir, struct dentry *dentry, | |||
| 4843 | int err; | 4939 | int err; |
| 4844 | int drop_inode = 0; | 4940 | int drop_inode = 0; |
| 4845 | u64 objectid; | 4941 | u64 objectid; |
| 4846 | unsigned long nr = 0; | ||
| 4847 | u64 index = 0; | 4942 | u64 index = 0; |
| 4848 | 4943 | ||
| 4849 | if (!new_valid_dev(rdev)) | 4944 | if (!new_valid_dev(rdev)) |
| @@ -4876,6 +4971,12 @@ static int btrfs_mknod(struct inode *dir, struct dentry *dentry, | |||
| 4876 | goto out_unlock; | 4971 | goto out_unlock; |
| 4877 | } | 4972 | } |
| 4878 | 4973 | ||
| 4974 | err = btrfs_update_inode(trans, root, inode); | ||
| 4975 | if (err) { | ||
| 4976 | drop_inode = 1; | ||
| 4977 | goto out_unlock; | ||
| 4978 | } | ||
| 4979 | |||
| 4879 | /* | 4980 | /* |
| 4880 | * If the active LSM wants to access the inode during | 4981 | * If the active LSM wants to access the inode during |
| 4881 | * d_instantiate it needs these. Smack checks to see | 4982 | * d_instantiate it needs these. Smack checks to see |
| @@ -4893,9 +4994,8 @@ static int btrfs_mknod(struct inode *dir, struct dentry *dentry, | |||
| 4893 | d_instantiate(dentry, inode); | 4994 | d_instantiate(dentry, inode); |
| 4894 | } | 4995 | } |
| 4895 | out_unlock: | 4996 | out_unlock: |
| 4896 | nr = trans->blocks_used; | ||
| 4897 | btrfs_end_transaction(trans, root); | 4997 | btrfs_end_transaction(trans, root); |
| 4898 | btrfs_btree_balance_dirty(root, nr); | 4998 | btrfs_btree_balance_dirty(root); |
| 4899 | if (drop_inode) { | 4999 | if (drop_inode) { |
| 4900 | inode_dec_link_count(inode); | 5000 | inode_dec_link_count(inode); |
| 4901 | iput(inode); | 5001 | iput(inode); |
| @@ -4909,9 +5009,8 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry, | |||
| 4909 | struct btrfs_trans_handle *trans; | 5009 | struct btrfs_trans_handle *trans; |
| 4910 | struct btrfs_root *root = BTRFS_I(dir)->root; | 5010 | struct btrfs_root *root = BTRFS_I(dir)->root; |
| 4911 | struct inode *inode = NULL; | 5011 | struct inode *inode = NULL; |
| 4912 | int drop_inode = 0; | 5012 | int drop_inode_on_err = 0; |
| 4913 | int err; | 5013 | int err; |
| 4914 | unsigned long nr = 0; | ||
| 4915 | u64 objectid; | 5014 | u64 objectid; |
| 4916 | u64 index = 0; | 5015 | u64 index = 0; |
| 4917 | 5016 | ||
| @@ -4935,12 +5034,15 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry, | |||
| 4935 | err = PTR_ERR(inode); | 5034 | err = PTR_ERR(inode); |
| 4936 | goto out_unlock; | 5035 | goto out_unlock; |
| 4937 | } | 5036 | } |
| 5037 | drop_inode_on_err = 1; | ||
| 4938 | 5038 | ||
| 4939 | err = btrfs_init_inode_security(trans, inode, dir, &dentry->d_name); | 5039 | err = btrfs_init_inode_security(trans, inode, dir, &dentry->d_name); |
| 4940 | if (err) { | 5040 | if (err) |
| 4941 | drop_inode = 1; | 5041 | goto out_unlock; |
| 5042 | |||
| 5043 | err = btrfs_update_inode(trans, root, inode); | ||
| 5044 | if (err) | ||
| 4942 | goto out_unlock; | 5045 | goto out_unlock; |
| 4943 | } | ||
| 4944 | 5046 | ||
| 4945 | /* | 5047 | /* |
| 4946 | * If the active LSM wants to access the inode during | 5048 | * If the active LSM wants to access the inode during |
| @@ -4953,21 +5055,20 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry, | |||
| 4953 | 5055 | ||
| 4954 | err = btrfs_add_nondir(trans, dir, dentry, inode, 0, index); | 5056 | err = btrfs_add_nondir(trans, dir, dentry, inode, 0, index); |
| 4955 | if (err) | 5057 | if (err) |
| 4956 | drop_inode = 1; | 5058 | goto out_unlock; |
| 4957 | else { | 5059 | |
| 4958 | inode->i_mapping->a_ops = &btrfs_aops; | 5060 | inode->i_mapping->a_ops = &btrfs_aops; |
| 4959 | inode->i_mapping->backing_dev_info = &root->fs_info->bdi; | 5061 | inode->i_mapping->backing_dev_info = &root->fs_info->bdi; |
| 4960 | BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops; | 5062 | BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops; |
| 4961 | d_instantiate(dentry, inode); | 5063 | d_instantiate(dentry, inode); |
| 4962 | } | 5064 | |
| 4963 | out_unlock: | 5065 | out_unlock: |
| 4964 | nr = trans->blocks_used; | ||
| 4965 | btrfs_end_transaction(trans, root); | 5066 | btrfs_end_transaction(trans, root); |
| 4966 | if (drop_inode) { | 5067 | if (err && drop_inode_on_err) { |
| 4967 | inode_dec_link_count(inode); | 5068 | inode_dec_link_count(inode); |
| 4968 | iput(inode); | 5069 | iput(inode); |
| 4969 | } | 5070 | } |
| 4970 | btrfs_btree_balance_dirty(root, nr); | 5071 | btrfs_btree_balance_dirty(root); |
| 4971 | return err; | 5072 | return err; |
| 4972 | } | 5073 | } |
| 4973 | 5074 | ||
| @@ -4978,7 +5079,6 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir, | |||
| 4978 | struct btrfs_root *root = BTRFS_I(dir)->root; | 5079 | struct btrfs_root *root = BTRFS_I(dir)->root; |
| 4979 | struct inode *inode = old_dentry->d_inode; | 5080 | struct inode *inode = old_dentry->d_inode; |
| 4980 | u64 index; | 5081 | u64 index; |
| 4981 | unsigned long nr = 0; | ||
| 4982 | int err; | 5082 | int err; |
| 4983 | int drop_inode = 0; | 5083 | int drop_inode = 0; |
| 4984 | 5084 | ||
| @@ -4986,7 +5086,7 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir, | |||
| 4986 | if (root->objectid != BTRFS_I(inode)->root->objectid) | 5086 | if (root->objectid != BTRFS_I(inode)->root->objectid) |
| 4987 | return -EXDEV; | 5087 | return -EXDEV; |
| 4988 | 5088 | ||
| 4989 | if (inode->i_nlink == ~0U) | 5089 | if (inode->i_nlink >= BTRFS_LINK_MAX) |
| 4990 | return -EMLINK; | 5090 | return -EMLINK; |
| 4991 | 5091 | ||
| 4992 | err = btrfs_set_inode_index(dir, &index); | 5092 | err = btrfs_set_inode_index(dir, &index); |
| @@ -5008,6 +5108,7 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir, | |||
| 5008 | inode_inc_iversion(inode); | 5108 | inode_inc_iversion(inode); |
| 5009 | inode->i_ctime = CURRENT_TIME; | 5109 | inode->i_ctime = CURRENT_TIME; |
| 5010 | ihold(inode); | 5110 | ihold(inode); |
| 5111 | set_bit(BTRFS_INODE_COPY_EVERYTHING, &BTRFS_I(inode)->runtime_flags); | ||
| 5011 | 5112 | ||
| 5012 | err = btrfs_add_nondir(trans, dir, dentry, inode, 1, index); | 5113 | err = btrfs_add_nondir(trans, dir, dentry, inode, 1, index); |
| 5013 | 5114 | ||
| @@ -5022,14 +5123,13 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir, | |||
| 5022 | btrfs_log_new_name(trans, inode, NULL, parent); | 5123 | btrfs_log_new_name(trans, inode, NULL, parent); |
| 5023 | } | 5124 | } |
| 5024 | 5125 | ||
| 5025 | nr = trans->blocks_used; | ||
| 5026 | btrfs_end_transaction(trans, root); | 5126 | btrfs_end_transaction(trans, root); |
| 5027 | fail: | 5127 | fail: |
| 5028 | if (drop_inode) { | 5128 | if (drop_inode) { |
| 5029 | inode_dec_link_count(inode); | 5129 | inode_dec_link_count(inode); |
| 5030 | iput(inode); | 5130 | iput(inode); |
| 5031 | } | 5131 | } |
| 5032 | btrfs_btree_balance_dirty(root, nr); | 5132 | btrfs_btree_balance_dirty(root); |
| 5033 | return err; | 5133 | return err; |
| 5034 | } | 5134 | } |
| 5035 | 5135 | ||
| @@ -5042,7 +5142,6 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) | |||
| 5042 | int drop_on_err = 0; | 5142 | int drop_on_err = 0; |
| 5043 | u64 objectid = 0; | 5143 | u64 objectid = 0; |
| 5044 | u64 index = 0; | 5144 | u64 index = 0; |
| 5045 | unsigned long nr = 1; | ||
| 5046 | 5145 | ||
| 5047 | /* | 5146 | /* |
| 5048 | * 2 items for inode and ref | 5147 | * 2 items for inode and ref |
| @@ -5088,11 +5187,10 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) | |||
| 5088 | drop_on_err = 0; | 5187 | drop_on_err = 0; |
| 5089 | 5188 | ||
| 5090 | out_fail: | 5189 | out_fail: |
| 5091 | nr = trans->blocks_used; | ||
| 5092 | btrfs_end_transaction(trans, root); | 5190 | btrfs_end_transaction(trans, root); |
| 5093 | if (drop_on_err) | 5191 | if (drop_on_err) |
| 5094 | iput(inode); | 5192 | iput(inode); |
| 5095 | btrfs_btree_balance_dirty(root, nr); | 5193 | btrfs_btree_balance_dirty(root); |
| 5096 | return err; | 5194 | return err; |
| 5097 | } | 5195 | } |
| 5098 | 5196 | ||
| @@ -5286,6 +5384,7 @@ again: | |||
| 5286 | if (start + len <= found_key.offset) | 5384 | if (start + len <= found_key.offset) |
| 5287 | goto not_found; | 5385 | goto not_found; |
| 5288 | em->start = start; | 5386 | em->start = start; |
| 5387 | em->orig_start = start; | ||
| 5289 | em->len = found_key.offset - start; | 5388 | em->len = found_key.offset - start; |
| 5290 | goto not_found_em; | 5389 | goto not_found_em; |
| 5291 | } | 5390 | } |
| @@ -5296,6 +5395,8 @@ again: | |||
| 5296 | em->len = extent_end - extent_start; | 5395 | em->len = extent_end - extent_start; |
| 5297 | em->orig_start = extent_start - | 5396 | em->orig_start = extent_start - |
| 5298 | btrfs_file_extent_offset(leaf, item); | 5397 | btrfs_file_extent_offset(leaf, item); |
| 5398 | em->orig_block_len = btrfs_file_extent_disk_num_bytes(leaf, | ||
| 5399 | item); | ||
| 5299 | bytenr = btrfs_file_extent_disk_bytenr(leaf, item); | 5400 | bytenr = btrfs_file_extent_disk_bytenr(leaf, item); |
| 5300 | if (bytenr == 0) { | 5401 | if (bytenr == 0) { |
| 5301 | em->block_start = EXTENT_MAP_HOLE; | 5402 | em->block_start = EXTENT_MAP_HOLE; |
| @@ -5305,8 +5406,7 @@ again: | |||
| 5305 | set_bit(EXTENT_FLAG_COMPRESSED, &em->flags); | 5406 | set_bit(EXTENT_FLAG_COMPRESSED, &em->flags); |
| 5306 | em->compress_type = compress_type; | 5407 | em->compress_type = compress_type; |
| 5307 | em->block_start = bytenr; | 5408 | em->block_start = bytenr; |
| 5308 | em->block_len = btrfs_file_extent_disk_num_bytes(leaf, | 5409 | em->block_len = em->orig_block_len; |
| 5309 | item); | ||
| 5310 | } else { | 5410 | } else { |
| 5311 | bytenr += btrfs_file_extent_offset(leaf, item); | 5411 | bytenr += btrfs_file_extent_offset(leaf, item); |
| 5312 | em->block_start = bytenr; | 5412 | em->block_start = bytenr; |
| @@ -5336,7 +5436,8 @@ again: | |||
| 5336 | em->start = extent_start + extent_offset; | 5436 | em->start = extent_start + extent_offset; |
| 5337 | em->len = (copy_size + root->sectorsize - 1) & | 5437 | em->len = (copy_size + root->sectorsize - 1) & |
| 5338 | ~((u64)root->sectorsize - 1); | 5438 | ~((u64)root->sectorsize - 1); |
| 5339 | em->orig_start = EXTENT_MAP_INLINE; | 5439 | em->orig_block_len = em->len; |
| 5440 | em->orig_start = em->start; | ||
| 5340 | if (compress_type) { | 5441 | if (compress_type) { |
| 5341 | set_bit(EXTENT_FLAG_COMPRESSED, &em->flags); | 5442 | set_bit(EXTENT_FLAG_COMPRESSED, &em->flags); |
| 5342 | em->compress_type = compress_type; | 5443 | em->compress_type = compress_type; |
| @@ -5385,11 +5486,11 @@ again: | |||
| 5385 | extent_map_end(em) - 1, NULL, GFP_NOFS); | 5486 | extent_map_end(em) - 1, NULL, GFP_NOFS); |
| 5386 | goto insert; | 5487 | goto insert; |
| 5387 | } else { | 5488 | } else { |
| 5388 | printk(KERN_ERR "btrfs unknown found_type %d\n", found_type); | 5489 | WARN(1, KERN_ERR "btrfs unknown found_type %d\n", found_type); |
| 5389 | WARN_ON(1); | ||
| 5390 | } | 5490 | } |
| 5391 | not_found: | 5491 | not_found: |
| 5392 | em->start = start; | 5492 | em->start = start; |
| 5493 | em->orig_start = start; | ||
| 5393 | em->len = len; | 5494 | em->len = len; |
| 5394 | not_found_em: | 5495 | not_found_em: |
| 5395 | em->block_start = EXTENT_MAP_HOLE; | 5496 | em->block_start = EXTENT_MAP_HOLE; |
| @@ -5450,7 +5551,8 @@ insert: | |||
| 5450 | write_unlock(&em_tree->lock); | 5551 | write_unlock(&em_tree->lock); |
| 5451 | out: | 5552 | out: |
| 5452 | 5553 | ||
| 5453 | trace_btrfs_get_extent(root, em); | 5554 | if (em) |
| 5555 | trace_btrfs_get_extent(root, em); | ||
| 5454 | 5556 | ||
| 5455 | if (path) | 5557 | if (path) |
| 5456 | btrfs_free_path(path); | 5558 | btrfs_free_path(path); |
| @@ -5590,38 +5692,19 @@ out: | |||
| 5590 | } | 5692 | } |
| 5591 | 5693 | ||
| 5592 | static struct extent_map *btrfs_new_extent_direct(struct inode *inode, | 5694 | static struct extent_map *btrfs_new_extent_direct(struct inode *inode, |
| 5593 | struct extent_map *em, | ||
| 5594 | u64 start, u64 len) | 5695 | u64 start, u64 len) |
| 5595 | { | 5696 | { |
| 5596 | struct btrfs_root *root = BTRFS_I(inode)->root; | 5697 | struct btrfs_root *root = BTRFS_I(inode)->root; |
| 5597 | struct btrfs_trans_handle *trans; | 5698 | struct btrfs_trans_handle *trans; |
| 5598 | struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; | 5699 | struct extent_map *em; |
| 5599 | struct btrfs_key ins; | 5700 | struct btrfs_key ins; |
| 5600 | u64 alloc_hint; | 5701 | u64 alloc_hint; |
| 5601 | int ret; | 5702 | int ret; |
| 5602 | bool insert = false; | ||
| 5603 | |||
| 5604 | /* | ||
| 5605 | * Ok if the extent map we looked up is a hole and is for the exact | ||
| 5606 | * range we want, there is no reason to allocate a new one, however if | ||
| 5607 | * it is not right then we need to free this one and drop the cache for | ||
| 5608 | * our range. | ||
| 5609 | */ | ||
| 5610 | if (em->block_start != EXTENT_MAP_HOLE || em->start != start || | ||
| 5611 | em->len != len) { | ||
| 5612 | free_extent_map(em); | ||
| 5613 | em = NULL; | ||
| 5614 | insert = true; | ||
| 5615 | btrfs_drop_extent_cache(inode, start, start + len - 1, 0); | ||
| 5616 | } | ||
| 5617 | 5703 | ||
| 5618 | trans = btrfs_join_transaction(root); | 5704 | trans = btrfs_join_transaction(root); |
| 5619 | if (IS_ERR(trans)) | 5705 | if (IS_ERR(trans)) |
| 5620 | return ERR_CAST(trans); | 5706 | return ERR_CAST(trans); |
| 5621 | 5707 | ||
| 5622 | if (start <= BTRFS_I(inode)->disk_i_size && len < 64 * 1024) | ||
| 5623 | btrfs_add_inode_defrag(trans, inode); | ||
| 5624 | |||
| 5625 | trans->block_rsv = &root->fs_info->delalloc_block_rsv; | 5708 | trans->block_rsv = &root->fs_info->delalloc_block_rsv; |
| 5626 | 5709 | ||
| 5627 | alloc_hint = get_extent_allocation_hint(inode, start, len); | 5710 | alloc_hint = get_extent_allocation_hint(inode, start, len); |
| @@ -5632,37 +5715,10 @@ static struct extent_map *btrfs_new_extent_direct(struct inode *inode, | |||
| 5632 | goto out; | 5715 | goto out; |
| 5633 | } | 5716 | } |
| 5634 | 5717 | ||
| 5635 | if (!em) { | 5718 | em = create_pinned_em(inode, start, ins.offset, start, ins.objectid, |
| 5636 | em = alloc_extent_map(); | 5719 | ins.offset, ins.offset, 0); |
| 5637 | if (!em) { | 5720 | if (IS_ERR(em)) |
| 5638 | em = ERR_PTR(-ENOMEM); | 5721 | goto out; |
| 5639 | goto out; | ||
| 5640 | } | ||
| 5641 | } | ||
| 5642 | |||
| 5643 | em->start = start; | ||
| 5644 | em->orig_start = em->start; | ||
| 5645 | em->len = ins.offset; | ||
| 5646 | |||
| 5647 | em->block_start = ins.objectid; | ||
| 5648 | em->block_len = ins.offset; | ||
| 5649 | em->bdev = root->fs_info->fs_devices->latest_bdev; | ||
| 5650 | |||
| 5651 | /* | ||
| 5652 | * We need to do this because if we're using the original em we searched | ||
| 5653 | * for, we could have EXTENT_FLAG_VACANCY set, and we don't want that. | ||
| 5654 | */ | ||
| 5655 | em->flags = 0; | ||
| 5656 | set_bit(EXTENT_FLAG_PINNED, &em->flags); | ||
| 5657 | |||
| 5658 | while (insert) { | ||
| 5659 | write_lock(&em_tree->lock); | ||
| 5660 | ret = add_extent_mapping(em_tree, em); | ||
| 5661 | write_unlock(&em_tree->lock); | ||
| 5662 | if (ret != -EEXIST) | ||
| 5663 | break; | ||
| 5664 | btrfs_drop_extent_cache(inode, start, start + em->len - 1, 0); | ||
| 5665 | } | ||
| 5666 | 5722 | ||
| 5667 | ret = btrfs_add_ordered_extent_dio(inode, start, ins.objectid, | 5723 | ret = btrfs_add_ordered_extent_dio(inode, start, ins.objectid, |
| 5668 | ins.offset, ins.offset, 0); | 5724 | ins.offset, ins.offset, 0); |
| @@ -5836,6 +5892,53 @@ static int lock_extent_direct(struct inode *inode, u64 lockstart, u64 lockend, | |||
| 5836 | return ret; | 5892 | return ret; |
| 5837 | } | 5893 | } |
| 5838 | 5894 | ||
| 5895 | static struct extent_map *create_pinned_em(struct inode *inode, u64 start, | ||
| 5896 | u64 len, u64 orig_start, | ||
| 5897 | u64 block_start, u64 block_len, | ||
| 5898 | u64 orig_block_len, int type) | ||
| 5899 | { | ||
| 5900 | struct extent_map_tree *em_tree; | ||
| 5901 | struct extent_map *em; | ||
| 5902 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
| 5903 | int ret; | ||
| 5904 | |||
| 5905 | em_tree = &BTRFS_I(inode)->extent_tree; | ||
| 5906 | em = alloc_extent_map(); | ||
| 5907 | if (!em) | ||
| 5908 | return ERR_PTR(-ENOMEM); | ||
| 5909 | |||
| 5910 | em->start = start; | ||
| 5911 | em->orig_start = orig_start; | ||
| 5912 | em->len = len; | ||
| 5913 | em->block_len = block_len; | ||
| 5914 | em->block_start = block_start; | ||
| 5915 | em->bdev = root->fs_info->fs_devices->latest_bdev; | ||
| 5916 | em->orig_block_len = orig_block_len; | ||
| 5917 | em->generation = -1; | ||
| 5918 | set_bit(EXTENT_FLAG_PINNED, &em->flags); | ||
| 5919 | if (type == BTRFS_ORDERED_PREALLOC) | ||
| 5920 | set_bit(EXTENT_FLAG_FILLING, &em->flags); | ||
| 5921 | |||
| 5922 | do { | ||
| 5923 | btrfs_drop_extent_cache(inode, em->start, | ||
| 5924 | em->start + em->len - 1, 0); | ||
| 5925 | write_lock(&em_tree->lock); | ||
| 5926 | ret = add_extent_mapping(em_tree, em); | ||
| 5927 | if (!ret) | ||
| 5928 | list_move(&em->list, | ||
| 5929 | &em_tree->modified_extents); | ||
| 5930 | write_unlock(&em_tree->lock); | ||
| 5931 | } while (ret == -EEXIST); | ||
| 5932 | |||
| 5933 | if (ret) { | ||
| 5934 | free_extent_map(em); | ||
| 5935 | return ERR_PTR(ret); | ||
| 5936 | } | ||
| 5937 | |||
| 5938 | return em; | ||
| 5939 | } | ||
| 5940 | |||
| 5941 | |||
| 5839 | static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock, | 5942 | static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock, |
| 5840 | struct buffer_head *bh_result, int create) | 5943 | struct buffer_head *bh_result, int create) |
| 5841 | { | 5944 | { |
| @@ -5950,6 +6053,21 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock, | |||
| 5950 | goto must_cow; | 6053 | goto must_cow; |
| 5951 | 6054 | ||
| 5952 | if (can_nocow_odirect(trans, inode, start, len) == 1) { | 6055 | if (can_nocow_odirect(trans, inode, start, len) == 1) { |
| 6056 | u64 orig_start = em->orig_start; | ||
| 6057 | u64 orig_block_len = em->orig_block_len; | ||
| 6058 | |||
| 6059 | if (type == BTRFS_ORDERED_PREALLOC) { | ||
| 6060 | free_extent_map(em); | ||
| 6061 | em = create_pinned_em(inode, start, len, | ||
| 6062 | orig_start, | ||
| 6063 | block_start, len, | ||
| 6064 | orig_block_len, type); | ||
| 6065 | if (IS_ERR(em)) { | ||
| 6066 | btrfs_end_transaction(trans, root); | ||
| 6067 | goto unlock_err; | ||
| 6068 | } | ||
| 6069 | } | ||
| 6070 | |||
| 5953 | ret = btrfs_add_ordered_extent_dio(inode, start, | 6071 | ret = btrfs_add_ordered_extent_dio(inode, start, |
| 5954 | block_start, len, len, type); | 6072 | block_start, len, len, type); |
| 5955 | btrfs_end_transaction(trans, root); | 6073 | btrfs_end_transaction(trans, root); |
| @@ -5967,7 +6085,8 @@ must_cow: | |||
| 5967 | * it above | 6085 | * it above |
| 5968 | */ | 6086 | */ |
| 5969 | len = bh_result->b_size; | 6087 | len = bh_result->b_size; |
| 5970 | em = btrfs_new_extent_direct(inode, em, start, len); | 6088 | free_extent_map(em); |
| 6089 | em = btrfs_new_extent_direct(inode, start, len); | ||
| 5971 | if (IS_ERR(em)) { | 6090 | if (IS_ERR(em)) { |
| 5972 | ret = PTR_ERR(em); | 6091 | ret = PTR_ERR(em); |
| 5973 | goto unlock_err; | 6092 | goto unlock_err; |
| @@ -5999,7 +6118,8 @@ unlock: | |||
| 5999 | if (lockstart < lockend) { | 6118 | if (lockstart < lockend) { |
| 6000 | if (create && len < lockend - lockstart) { | 6119 | if (create && len < lockend - lockstart) { |
| 6001 | clear_extent_bit(&BTRFS_I(inode)->io_tree, lockstart, | 6120 | clear_extent_bit(&BTRFS_I(inode)->io_tree, lockstart, |
| 6002 | lockstart + len - 1, unlock_bits, 1, 0, | 6121 | lockstart + len - 1, |
| 6122 | unlock_bits | EXTENT_DEFRAG, 1, 0, | ||
| 6003 | &cached_state, GFP_NOFS); | 6123 | &cached_state, GFP_NOFS); |
| 6004 | /* | 6124 | /* |
| 6005 | * Beside unlock, we also need to cleanup reserved space | 6125 | * Beside unlock, we also need to cleanup reserved space |
| @@ -6007,8 +6127,8 @@ unlock: | |||
| 6007 | */ | 6127 | */ |
| 6008 | clear_extent_bit(&BTRFS_I(inode)->io_tree, | 6128 | clear_extent_bit(&BTRFS_I(inode)->io_tree, |
| 6009 | lockstart + len, lockend, | 6129 | lockstart + len, lockend, |
| 6010 | unlock_bits | EXTENT_DO_ACCOUNTING, | 6130 | unlock_bits | EXTENT_DO_ACCOUNTING | |
| 6011 | 1, 0, NULL, GFP_NOFS); | 6131 | EXTENT_DEFRAG, 1, 0, NULL, GFP_NOFS); |
| 6012 | } else { | 6132 | } else { |
| 6013 | clear_extent_bit(&BTRFS_I(inode)->io_tree, lockstart, | 6133 | clear_extent_bit(&BTRFS_I(inode)->io_tree, lockstart, |
| 6014 | lockend, unlock_bits, 1, 0, | 6134 | lockend, unlock_bits, 1, 0, |
| @@ -6207,6 +6327,9 @@ static inline int __btrfs_submit_dio_bio(struct bio *bio, struct inode *inode, | |||
| 6207 | struct btrfs_root *root = BTRFS_I(inode)->root; | 6327 | struct btrfs_root *root = BTRFS_I(inode)->root; |
| 6208 | int ret; | 6328 | int ret; |
| 6209 | 6329 | ||
| 6330 | if (async_submit) | ||
| 6331 | async_submit = !atomic_read(&BTRFS_I(inode)->sync_writers); | ||
| 6332 | |||
| 6210 | bio_get(bio); | 6333 | bio_get(bio); |
| 6211 | 6334 | ||
| 6212 | if (!write) { | 6335 | if (!write) { |
| @@ -6251,7 +6374,6 @@ static int btrfs_submit_direct_hook(int rw, struct btrfs_dio_private *dip, | |||
| 6251 | { | 6374 | { |
| 6252 | struct inode *inode = dip->inode; | 6375 | struct inode *inode = dip->inode; |
| 6253 | struct btrfs_root *root = BTRFS_I(inode)->root; | 6376 | struct btrfs_root *root = BTRFS_I(inode)->root; |
| 6254 | struct btrfs_mapping_tree *map_tree = &root->fs_info->mapping_tree; | ||
| 6255 | struct bio *bio; | 6377 | struct bio *bio; |
| 6256 | struct bio *orig_bio = dip->orig_bio; | 6378 | struct bio *orig_bio = dip->orig_bio; |
| 6257 | struct bio_vec *bvec = orig_bio->bi_io_vec; | 6379 | struct bio_vec *bvec = orig_bio->bi_io_vec; |
| @@ -6264,7 +6386,7 @@ static int btrfs_submit_direct_hook(int rw, struct btrfs_dio_private *dip, | |||
| 6264 | int async_submit = 0; | 6386 | int async_submit = 0; |
| 6265 | 6387 | ||
| 6266 | map_length = orig_bio->bi_size; | 6388 | map_length = orig_bio->bi_size; |
| 6267 | ret = btrfs_map_block(map_tree, READ, start_sector << 9, | 6389 | ret = btrfs_map_block(root->fs_info, READ, start_sector << 9, |
| 6268 | &map_length, NULL, 0); | 6390 | &map_length, NULL, 0); |
| 6269 | if (ret) { | 6391 | if (ret) { |
| 6270 | bio_put(orig_bio); | 6392 | bio_put(orig_bio); |
| @@ -6318,7 +6440,8 @@ static int btrfs_submit_direct_hook(int rw, struct btrfs_dio_private *dip, | |||
| 6318 | bio->bi_end_io = btrfs_end_dio_bio; | 6440 | bio->bi_end_io = btrfs_end_dio_bio; |
| 6319 | 6441 | ||
| 6320 | map_length = orig_bio->bi_size; | 6442 | map_length = orig_bio->bi_size; |
| 6321 | ret = btrfs_map_block(map_tree, READ, start_sector << 9, | 6443 | ret = btrfs_map_block(root->fs_info, READ, |
| 6444 | start_sector << 9, | ||
| 6322 | &map_length, NULL, 0); | 6445 | &map_length, NULL, 0); |
| 6323 | if (ret) { | 6446 | if (ret) { |
| 6324 | bio_put(bio); | 6447 | bio_put(bio); |
| @@ -6471,9 +6594,17 @@ static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb, | |||
| 6471 | btrfs_submit_direct, 0); | 6594 | btrfs_submit_direct, 0); |
| 6472 | } | 6595 | } |
| 6473 | 6596 | ||
| 6597 | #define BTRFS_FIEMAP_FLAGS (FIEMAP_FLAG_SYNC) | ||
| 6598 | |||
| 6474 | static int btrfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, | 6599 | static int btrfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, |
| 6475 | __u64 start, __u64 len) | 6600 | __u64 start, __u64 len) |
| 6476 | { | 6601 | { |
| 6602 | int ret; | ||
| 6603 | |||
| 6604 | ret = fiemap_check_flags(fieinfo, BTRFS_FIEMAP_FLAGS); | ||
| 6605 | if (ret) | ||
| 6606 | return ret; | ||
| 6607 | |||
| 6477 | return extent_fiemap(inode, fieinfo, start, len, btrfs_get_extent_fiemap); | 6608 | return extent_fiemap(inode, fieinfo, start, len, btrfs_get_extent_fiemap); |
| 6478 | } | 6609 | } |
| 6479 | 6610 | ||
| @@ -6573,8 +6704,8 @@ static void btrfs_invalidatepage(struct page *page, unsigned long offset) | |||
| 6573 | */ | 6704 | */ |
| 6574 | clear_extent_bit(tree, page_start, page_end, | 6705 | clear_extent_bit(tree, page_start, page_end, |
| 6575 | EXTENT_DIRTY | EXTENT_DELALLOC | | 6706 | EXTENT_DIRTY | EXTENT_DELALLOC | |
| 6576 | EXTENT_LOCKED | EXTENT_DO_ACCOUNTING, 1, 0, | 6707 | EXTENT_LOCKED | EXTENT_DO_ACCOUNTING | |
| 6577 | &cached_state, GFP_NOFS); | 6708 | EXTENT_DEFRAG, 1, 0, &cached_state, GFP_NOFS); |
| 6578 | /* | 6709 | /* |
| 6579 | * whoever cleared the private bit is responsible | 6710 | * whoever cleared the private bit is responsible |
| 6580 | * for the finish_ordered_io | 6711 | * for the finish_ordered_io |
| @@ -6590,7 +6721,8 @@ static void btrfs_invalidatepage(struct page *page, unsigned long offset) | |||
| 6590 | } | 6721 | } |
| 6591 | clear_extent_bit(tree, page_start, page_end, | 6722 | clear_extent_bit(tree, page_start, page_end, |
| 6592 | EXTENT_LOCKED | EXTENT_DIRTY | EXTENT_DELALLOC | | 6723 | EXTENT_LOCKED | EXTENT_DIRTY | EXTENT_DELALLOC | |
| 6593 | EXTENT_DO_ACCOUNTING, 1, 1, &cached_state, GFP_NOFS); | 6724 | EXTENT_DO_ACCOUNTING | EXTENT_DEFRAG, 1, 1, |
| 6725 | &cached_state, GFP_NOFS); | ||
| 6594 | __btrfs_releasepage(page, GFP_NOFS); | 6726 | __btrfs_releasepage(page, GFP_NOFS); |
| 6595 | 6727 | ||
| 6596 | ClearPageChecked(page); | 6728 | ClearPageChecked(page); |
| @@ -6687,7 +6819,8 @@ again: | |||
| 6687 | * prepare_pages in the normal write path. | 6819 | * prepare_pages in the normal write path. |
| 6688 | */ | 6820 | */ |
| 6689 | clear_extent_bit(&BTRFS_I(inode)->io_tree, page_start, page_end, | 6821 | clear_extent_bit(&BTRFS_I(inode)->io_tree, page_start, page_end, |
| 6690 | EXTENT_DIRTY | EXTENT_DELALLOC | EXTENT_DO_ACCOUNTING, | 6822 | EXTENT_DIRTY | EXTENT_DELALLOC | |
| 6823 | EXTENT_DO_ACCOUNTING | EXTENT_DEFRAG, | ||
| 6691 | 0, 0, &cached_state, GFP_NOFS); | 6824 | 0, 0, &cached_state, GFP_NOFS); |
| 6692 | 6825 | ||
| 6693 | ret = btrfs_set_extent_delalloc(inode, page_start, page_end, | 6826 | ret = btrfs_set_extent_delalloc(inode, page_start, page_end, |
| @@ -6718,6 +6851,7 @@ again: | |||
| 6718 | 6851 | ||
| 6719 | BTRFS_I(inode)->last_trans = root->fs_info->generation; | 6852 | BTRFS_I(inode)->last_trans = root->fs_info->generation; |
| 6720 | BTRFS_I(inode)->last_sub_trans = BTRFS_I(inode)->root->log_transid; | 6853 | BTRFS_I(inode)->last_sub_trans = BTRFS_I(inode)->root->log_transid; |
| 6854 | BTRFS_I(inode)->last_log_commit = BTRFS_I(inode)->root->last_log_commit; | ||
| 6721 | 6855 | ||
| 6722 | unlock_extent_cached(io_tree, page_start, page_end, &cached_state, GFP_NOFS); | 6856 | unlock_extent_cached(io_tree, page_start, page_end, &cached_state, GFP_NOFS); |
| 6723 | 6857 | ||
| @@ -6741,11 +6875,10 @@ static int btrfs_truncate(struct inode *inode) | |||
| 6741 | int ret; | 6875 | int ret; |
| 6742 | int err = 0; | 6876 | int err = 0; |
| 6743 | struct btrfs_trans_handle *trans; | 6877 | struct btrfs_trans_handle *trans; |
| 6744 | unsigned long nr; | ||
| 6745 | u64 mask = root->sectorsize - 1; | 6878 | u64 mask = root->sectorsize - 1; |
| 6746 | u64 min_size = btrfs_calc_trunc_metadata_size(root, 1); | 6879 | u64 min_size = btrfs_calc_trunc_metadata_size(root, 1); |
| 6747 | 6880 | ||
| 6748 | ret = btrfs_truncate_page(inode->i_mapping, inode->i_size); | 6881 | ret = btrfs_truncate_page(inode, inode->i_size, 0, 0); |
| 6749 | if (ret) | 6882 | if (ret) |
| 6750 | return ret; | 6883 | return ret; |
| 6751 | 6884 | ||
| @@ -6788,10 +6921,11 @@ static int btrfs_truncate(struct inode *inode) | |||
| 6788 | * 3) fs_info->trans_block_rsv - this will have 1 items worth left for | 6921 | * 3) fs_info->trans_block_rsv - this will have 1 items worth left for |
| 6789 | * updating the inode. | 6922 | * updating the inode. |
| 6790 | */ | 6923 | */ |
| 6791 | rsv = btrfs_alloc_block_rsv(root); | 6924 | rsv = btrfs_alloc_block_rsv(root, BTRFS_BLOCK_RSV_TEMP); |
| 6792 | if (!rsv) | 6925 | if (!rsv) |
| 6793 | return -ENOMEM; | 6926 | return -ENOMEM; |
| 6794 | rsv->size = min_size; | 6927 | rsv->size = min_size; |
| 6928 | rsv->failfast = 1; | ||
| 6795 | 6929 | ||
| 6796 | /* | 6930 | /* |
| 6797 | * 1 for the truncate slack space | 6931 | * 1 for the truncate slack space |
| @@ -6837,36 +6971,21 @@ static int btrfs_truncate(struct inode *inode) | |||
| 6837 | &BTRFS_I(inode)->runtime_flags)) | 6971 | &BTRFS_I(inode)->runtime_flags)) |
| 6838 | btrfs_add_ordered_operation(trans, root, inode); | 6972 | btrfs_add_ordered_operation(trans, root, inode); |
| 6839 | 6973 | ||
| 6840 | while (1) { | 6974 | /* |
| 6841 | ret = btrfs_block_rsv_refill(root, rsv, min_size); | 6975 | * So if we truncate and then write and fsync we normally would just |
| 6842 | if (ret) { | 6976 | * write the extents that changed, which is a problem if we need to |
| 6843 | /* | 6977 | * first truncate that entire inode. So set this flag so we write out |
| 6844 | * This can only happen with the original transaction we | 6978 | * all of the extents in the inode to the sync log so we're completely |
| 6845 | * started above, every other time we shouldn't have a | 6979 | * safe. |
| 6846 | * transaction started yet. | 6980 | */ |
| 6847 | */ | 6981 | set_bit(BTRFS_INODE_NEEDS_FULL_SYNC, &BTRFS_I(inode)->runtime_flags); |
| 6848 | if (ret == -EAGAIN) | 6982 | trans->block_rsv = rsv; |
| 6849 | goto end_trans; | ||
| 6850 | err = ret; | ||
| 6851 | break; | ||
| 6852 | } | ||
| 6853 | |||
| 6854 | if (!trans) { | ||
| 6855 | /* Just need the 1 for updating the inode */ | ||
| 6856 | trans = btrfs_start_transaction(root, 1); | ||
| 6857 | if (IS_ERR(trans)) { | ||
| 6858 | ret = err = PTR_ERR(trans); | ||
| 6859 | trans = NULL; | ||
| 6860 | break; | ||
| 6861 | } | ||
| 6862 | } | ||
| 6863 | |||
| 6864 | trans->block_rsv = rsv; | ||
| 6865 | 6983 | ||
| 6984 | while (1) { | ||
| 6866 | ret = btrfs_truncate_inode_items(trans, root, inode, | 6985 | ret = btrfs_truncate_inode_items(trans, root, inode, |
| 6867 | inode->i_size, | 6986 | inode->i_size, |
| 6868 | BTRFS_EXTENT_DATA_KEY); | 6987 | BTRFS_EXTENT_DATA_KEY); |
| 6869 | if (ret != -EAGAIN) { | 6988 | if (ret != -ENOSPC) { |
| 6870 | err = ret; | 6989 | err = ret; |
| 6871 | break; | 6990 | break; |
| 6872 | } | 6991 | } |
| @@ -6877,11 +6996,21 @@ static int btrfs_truncate(struct inode *inode) | |||
| 6877 | err = ret; | 6996 | err = ret; |
| 6878 | break; | 6997 | break; |
| 6879 | } | 6998 | } |
| 6880 | end_trans: | 6999 | |
| 6881 | nr = trans->blocks_used; | ||
| 6882 | btrfs_end_transaction(trans, root); | 7000 | btrfs_end_transaction(trans, root); |
| 6883 | trans = NULL; | 7001 | btrfs_btree_balance_dirty(root); |
| 6884 | btrfs_btree_balance_dirty(root, nr); | 7002 | |
| 7003 | trans = btrfs_start_transaction(root, 2); | ||
| 7004 | if (IS_ERR(trans)) { | ||
| 7005 | ret = err = PTR_ERR(trans); | ||
| 7006 | trans = NULL; | ||
| 7007 | break; | ||
| 7008 | } | ||
| 7009 | |||
| 7010 | ret = btrfs_block_rsv_migrate(&root->fs_info->trans_block_rsv, | ||
| 7011 | rsv, min_size); | ||
| 7012 | BUG_ON(ret); /* shouldn't happen */ | ||
| 7013 | trans->block_rsv = rsv; | ||
| 6885 | } | 7014 | } |
| 6886 | 7015 | ||
| 6887 | if (ret == 0 && inode->i_nlink > 0) { | 7016 | if (ret == 0 && inode->i_nlink > 0) { |
| @@ -6903,9 +7032,8 @@ end_trans: | |||
| 6903 | if (ret && !err) | 7032 | if (ret && !err) |
| 6904 | err = ret; | 7033 | err = ret; |
| 6905 | 7034 | ||
| 6906 | nr = trans->blocks_used; | ||
| 6907 | ret = btrfs_end_transaction(trans, root); | 7035 | ret = btrfs_end_transaction(trans, root); |
| 6908 | btrfs_btree_balance_dirty(root, nr); | 7036 | btrfs_btree_balance_dirty(root); |
| 6909 | } | 7037 | } |
| 6910 | 7038 | ||
| 6911 | out: | 7039 | out: |
| @@ -6965,6 +7093,7 @@ struct inode *btrfs_alloc_inode(struct super_block *sb) | |||
| 6965 | ei->csum_bytes = 0; | 7093 | ei->csum_bytes = 0; |
| 6966 | ei->index_cnt = (u64)-1; | 7094 | ei->index_cnt = (u64)-1; |
| 6967 | ei->last_unlink_trans = 0; | 7095 | ei->last_unlink_trans = 0; |
| 7096 | ei->last_log_commit = 0; | ||
| 6968 | 7097 | ||
| 6969 | spin_lock_init(&ei->lock); | 7098 | spin_lock_init(&ei->lock); |
| 6970 | ei->outstanding_extents = 0; | 7099 | ei->outstanding_extents = 0; |
| @@ -6981,6 +7110,7 @@ struct inode *btrfs_alloc_inode(struct super_block *sb) | |||
| 6981 | extent_io_tree_init(&ei->io_failure_tree, &inode->i_data); | 7110 | extent_io_tree_init(&ei->io_failure_tree, &inode->i_data); |
| 6982 | ei->io_tree.track_uptodate = 1; | 7111 | ei->io_tree.track_uptodate = 1; |
| 6983 | ei->io_failure_tree.track_uptodate = 1; | 7112 | ei->io_failure_tree.track_uptodate = 1; |
| 7113 | atomic_set(&ei->sync_writers, 0); | ||
| 6984 | mutex_init(&ei->log_mutex); | 7114 | mutex_init(&ei->log_mutex); |
| 6985 | mutex_init(&ei->delalloc_mutex); | 7115 | mutex_init(&ei->delalloc_mutex); |
| 6986 | btrfs_ordered_inode_tree_init(&ei->ordered_tree); | 7116 | btrfs_ordered_inode_tree_init(&ei->ordered_tree); |
| @@ -7091,40 +7221,49 @@ void btrfs_destroy_cachep(void) | |||
| 7091 | kmem_cache_destroy(btrfs_path_cachep); | 7221 | kmem_cache_destroy(btrfs_path_cachep); |
| 7092 | if (btrfs_free_space_cachep) | 7222 | if (btrfs_free_space_cachep) |
| 7093 | kmem_cache_destroy(btrfs_free_space_cachep); | 7223 | kmem_cache_destroy(btrfs_free_space_cachep); |
| 7224 | if (btrfs_delalloc_work_cachep) | ||
| 7225 | kmem_cache_destroy(btrfs_delalloc_work_cachep); | ||
| 7094 | } | 7226 | } |
| 7095 | 7227 | ||
| 7096 | int btrfs_init_cachep(void) | 7228 | int btrfs_init_cachep(void) |
| 7097 | { | 7229 | { |
| 7098 | btrfs_inode_cachep = kmem_cache_create("btrfs_inode_cache", | 7230 | btrfs_inode_cachep = kmem_cache_create("btrfs_inode", |
| 7099 | sizeof(struct btrfs_inode), 0, | 7231 | sizeof(struct btrfs_inode), 0, |
| 7100 | SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, init_once); | 7232 | SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, init_once); |
| 7101 | if (!btrfs_inode_cachep) | 7233 | if (!btrfs_inode_cachep) |
| 7102 | goto fail; | 7234 | goto fail; |
| 7103 | 7235 | ||
| 7104 | btrfs_trans_handle_cachep = kmem_cache_create("btrfs_trans_handle_cache", | 7236 | btrfs_trans_handle_cachep = kmem_cache_create("btrfs_trans_handle", |
| 7105 | sizeof(struct btrfs_trans_handle), 0, | 7237 | sizeof(struct btrfs_trans_handle), 0, |
| 7106 | SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL); | 7238 | SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL); |
| 7107 | if (!btrfs_trans_handle_cachep) | 7239 | if (!btrfs_trans_handle_cachep) |
| 7108 | goto fail; | 7240 | goto fail; |
| 7109 | 7241 | ||
| 7110 | btrfs_transaction_cachep = kmem_cache_create("btrfs_transaction_cache", | 7242 | btrfs_transaction_cachep = kmem_cache_create("btrfs_transaction", |
| 7111 | sizeof(struct btrfs_transaction), 0, | 7243 | sizeof(struct btrfs_transaction), 0, |
| 7112 | SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL); | 7244 | SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL); |
| 7113 | if (!btrfs_transaction_cachep) | 7245 | if (!btrfs_transaction_cachep) |
| 7114 | goto fail; | 7246 | goto fail; |
| 7115 | 7247 | ||
| 7116 | btrfs_path_cachep = kmem_cache_create("btrfs_path_cache", | 7248 | btrfs_path_cachep = kmem_cache_create("btrfs_path", |
| 7117 | sizeof(struct btrfs_path), 0, | 7249 | sizeof(struct btrfs_path), 0, |
| 7118 | SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL); | 7250 | SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL); |
| 7119 | if (!btrfs_path_cachep) | 7251 | if (!btrfs_path_cachep) |
| 7120 | goto fail; | 7252 | goto fail; |
| 7121 | 7253 | ||
| 7122 | btrfs_free_space_cachep = kmem_cache_create("btrfs_free_space_cache", | 7254 | btrfs_free_space_cachep = kmem_cache_create("btrfs_free_space", |
| 7123 | sizeof(struct btrfs_free_space), 0, | 7255 | sizeof(struct btrfs_free_space), 0, |
| 7124 | SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL); | 7256 | SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL); |
| 7125 | if (!btrfs_free_space_cachep) | 7257 | if (!btrfs_free_space_cachep) |
| 7126 | goto fail; | 7258 | goto fail; |
| 7127 | 7259 | ||
| 7260 | btrfs_delalloc_work_cachep = kmem_cache_create("btrfs_delalloc_work", | ||
| 7261 | sizeof(struct btrfs_delalloc_work), 0, | ||
| 7262 | SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, | ||
| 7263 | NULL); | ||
| 7264 | if (!btrfs_delalloc_work_cachep) | ||
| 7265 | goto fail; | ||
| 7266 | |||
| 7128 | return 0; | 7267 | return 0; |
| 7129 | fail: | 7268 | fail: |
| 7130 | btrfs_destroy_cachep(); | 7269 | btrfs_destroy_cachep(); |
| @@ -7196,6 +7335,28 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
| 7196 | if (S_ISDIR(old_inode->i_mode) && new_inode && | 7335 | if (S_ISDIR(old_inode->i_mode) && new_inode && |
| 7197 | new_inode->i_size > BTRFS_EMPTY_DIR_SIZE) | 7336 | new_inode->i_size > BTRFS_EMPTY_DIR_SIZE) |
| 7198 | return -ENOTEMPTY; | 7337 | return -ENOTEMPTY; |
| 7338 | |||
| 7339 | |||
| 7340 | /* check for collisions, even if the name isn't there */ | ||
| 7341 | ret = btrfs_check_dir_item_collision(root, new_dir->i_ino, | ||
| 7342 | new_dentry->d_name.name, | ||
| 7343 | new_dentry->d_name.len); | ||
| 7344 | |||
| 7345 | if (ret) { | ||
| 7346 | if (ret == -EEXIST) { | ||
| 7347 | /* we shouldn't get | ||
| 7348 | * eexist without a new_inode */ | ||
| 7349 | if (!new_inode) { | ||
| 7350 | WARN_ON(1); | ||
| 7351 | return ret; | ||
| 7352 | } | ||
| 7353 | } else { | ||
| 7354 | /* maybe -EOVERFLOW */ | ||
| 7355 | return ret; | ||
| 7356 | } | ||
| 7357 | } | ||
| 7358 | ret = 0; | ||
| 7359 | |||
| 7199 | /* | 7360 | /* |
| 7200 | * we're using rename to replace one file with another. | 7361 | * we're using rename to replace one file with another. |
| 7201 | * and the replacement file is large. Start IO on it now so | 7362 | * and the replacement file is large. Start IO on it now so |
| @@ -7335,6 +7496,49 @@ out_notrans: | |||
| 7335 | return ret; | 7496 | return ret; |
| 7336 | } | 7497 | } |
| 7337 | 7498 | ||
| 7499 | static void btrfs_run_delalloc_work(struct btrfs_work *work) | ||
| 7500 | { | ||
| 7501 | struct btrfs_delalloc_work *delalloc_work; | ||
| 7502 | |||
| 7503 | delalloc_work = container_of(work, struct btrfs_delalloc_work, | ||
| 7504 | work); | ||
| 7505 | if (delalloc_work->wait) | ||
| 7506 | btrfs_wait_ordered_range(delalloc_work->inode, 0, (u64)-1); | ||
| 7507 | else | ||
| 7508 | filemap_flush(delalloc_work->inode->i_mapping); | ||
| 7509 | |||
| 7510 | if (delalloc_work->delay_iput) | ||
| 7511 | btrfs_add_delayed_iput(delalloc_work->inode); | ||
| 7512 | else | ||
| 7513 | iput(delalloc_work->inode); | ||
| 7514 | complete(&delalloc_work->completion); | ||
| 7515 | } | ||
| 7516 | |||
| 7517 | struct btrfs_delalloc_work *btrfs_alloc_delalloc_work(struct inode *inode, | ||
| 7518 | int wait, int delay_iput) | ||
| 7519 | { | ||
| 7520 | struct btrfs_delalloc_work *work; | ||
| 7521 | |||
| 7522 | work = kmem_cache_zalloc(btrfs_delalloc_work_cachep, GFP_NOFS); | ||
| 7523 | if (!work) | ||
| 7524 | return NULL; | ||
| 7525 | |||
| 7526 | init_completion(&work->completion); | ||
| 7527 | INIT_LIST_HEAD(&work->list); | ||
| 7528 | work->inode = inode; | ||
| 7529 | work->wait = wait; | ||
| 7530 | work->delay_iput = delay_iput; | ||
| 7531 | work->work.func = btrfs_run_delalloc_work; | ||
| 7532 | |||
| 7533 | return work; | ||
| 7534 | } | ||
| 7535 | |||
| 7536 | void btrfs_wait_and_free_delalloc_work(struct btrfs_delalloc_work *work) | ||
| 7537 | { | ||
| 7538 | wait_for_completion(&work->completion); | ||
| 7539 | kmem_cache_free(btrfs_delalloc_work_cachep, work); | ||
| 7540 | } | ||
| 7541 | |||
| 7338 | /* | 7542 | /* |
| 7339 | * some fairly slow code that needs optimization. This walks the list | 7543 | * some fairly slow code that needs optimization. This walks the list |
| 7340 | * of all the inodes with pending delalloc and forces them to disk. | 7544 | * of all the inodes with pending delalloc and forces them to disk. |
| @@ -7344,10 +7548,15 @@ int btrfs_start_delalloc_inodes(struct btrfs_root *root, int delay_iput) | |||
| 7344 | struct list_head *head = &root->fs_info->delalloc_inodes; | 7548 | struct list_head *head = &root->fs_info->delalloc_inodes; |
| 7345 | struct btrfs_inode *binode; | 7549 | struct btrfs_inode *binode; |
| 7346 | struct inode *inode; | 7550 | struct inode *inode; |
| 7551 | struct btrfs_delalloc_work *work, *next; | ||
| 7552 | struct list_head works; | ||
| 7553 | int ret = 0; | ||
| 7347 | 7554 | ||
| 7348 | if (root->fs_info->sb->s_flags & MS_RDONLY) | 7555 | if (root->fs_info->sb->s_flags & MS_RDONLY) |
| 7349 | return -EROFS; | 7556 | return -EROFS; |
| 7350 | 7557 | ||
| 7558 | INIT_LIST_HEAD(&works); | ||
| 7559 | |||
| 7351 | spin_lock(&root->fs_info->delalloc_lock); | 7560 | spin_lock(&root->fs_info->delalloc_lock); |
| 7352 | while (!list_empty(head)) { | 7561 | while (!list_empty(head)) { |
| 7353 | binode = list_entry(head->next, struct btrfs_inode, | 7562 | binode = list_entry(head->next, struct btrfs_inode, |
| @@ -7357,11 +7566,14 @@ int btrfs_start_delalloc_inodes(struct btrfs_root *root, int delay_iput) | |||
| 7357 | list_del_init(&binode->delalloc_inodes); | 7566 | list_del_init(&binode->delalloc_inodes); |
| 7358 | spin_unlock(&root->fs_info->delalloc_lock); | 7567 | spin_unlock(&root->fs_info->delalloc_lock); |
| 7359 | if (inode) { | 7568 | if (inode) { |
| 7360 | filemap_flush(inode->i_mapping); | 7569 | work = btrfs_alloc_delalloc_work(inode, 0, delay_iput); |
| 7361 | if (delay_iput) | 7570 | if (!work) { |
| 7362 | btrfs_add_delayed_iput(inode); | 7571 | ret = -ENOMEM; |
| 7363 | else | 7572 | goto out; |
| 7364 | iput(inode); | 7573 | } |
| 7574 | list_add_tail(&work->list, &works); | ||
| 7575 | btrfs_queue_worker(&root->fs_info->flush_workers, | ||
| 7576 | &work->work); | ||
| 7365 | } | 7577 | } |
| 7366 | cond_resched(); | 7578 | cond_resched(); |
| 7367 | spin_lock(&root->fs_info->delalloc_lock); | 7579 | spin_lock(&root->fs_info->delalloc_lock); |
| @@ -7380,7 +7592,12 @@ int btrfs_start_delalloc_inodes(struct btrfs_root *root, int delay_iput) | |||
| 7380 | atomic_read(&root->fs_info->async_delalloc_pages) == 0)); | 7592 | atomic_read(&root->fs_info->async_delalloc_pages) == 0)); |
| 7381 | } | 7593 | } |
| 7382 | atomic_dec(&root->fs_info->async_submit_draining); | 7594 | atomic_dec(&root->fs_info->async_submit_draining); |
| 7383 | return 0; | 7595 | out: |
| 7596 | list_for_each_entry_safe(work, next, &works, list) { | ||
| 7597 | list_del_init(&work->list); | ||
| 7598 | btrfs_wait_and_free_delalloc_work(work); | ||
| 7599 | } | ||
| 7600 | return ret; | ||
| 7384 | } | 7601 | } |
| 7385 | 7602 | ||
| 7386 | static int btrfs_symlink(struct inode *dir, struct dentry *dentry, | 7603 | static int btrfs_symlink(struct inode *dir, struct dentry *dentry, |
| @@ -7400,7 +7617,6 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry, | |||
| 7400 | unsigned long ptr; | 7617 | unsigned long ptr; |
| 7401 | struct btrfs_file_extent_item *ei; | 7618 | struct btrfs_file_extent_item *ei; |
| 7402 | struct extent_buffer *leaf; | 7619 | struct extent_buffer *leaf; |
| 7403 | unsigned long nr = 0; | ||
| 7404 | 7620 | ||
| 7405 | name_len = strlen(symname) + 1; | 7621 | name_len = strlen(symname) + 1; |
| 7406 | if (name_len > BTRFS_MAX_INLINE_DATA_SIZE(root)) | 7622 | if (name_len > BTRFS_MAX_INLINE_DATA_SIZE(root)) |
| @@ -7498,13 +7714,12 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry, | |||
| 7498 | out_unlock: | 7714 | out_unlock: |
| 7499 | if (!err) | 7715 | if (!err) |
| 7500 | d_instantiate(dentry, inode); | 7716 | d_instantiate(dentry, inode); |
| 7501 | nr = trans->blocks_used; | ||
| 7502 | btrfs_end_transaction(trans, root); | 7717 | btrfs_end_transaction(trans, root); |
| 7503 | if (drop_inode) { | 7718 | if (drop_inode) { |
| 7504 | inode_dec_link_count(inode); | 7719 | inode_dec_link_count(inode); |
| 7505 | iput(inode); | 7720 | iput(inode); |
| 7506 | } | 7721 | } |
| 7507 | btrfs_btree_balance_dirty(root, nr); | 7722 | btrfs_btree_balance_dirty(root); |
| 7508 | return err; | 7723 | return err; |
| 7509 | } | 7724 | } |
| 7510 | 7725 | ||
| @@ -7513,6 +7728,8 @@ static int __btrfs_prealloc_file_range(struct inode *inode, int mode, | |||
| 7513 | loff_t actual_len, u64 *alloc_hint, | 7728 | loff_t actual_len, u64 *alloc_hint, |
| 7514 | struct btrfs_trans_handle *trans) | 7729 | struct btrfs_trans_handle *trans) |
| 7515 | { | 7730 | { |
| 7731 | struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; | ||
| 7732 | struct extent_map *em; | ||
| 7516 | struct btrfs_root *root = BTRFS_I(inode)->root; | 7733 | struct btrfs_root *root = BTRFS_I(inode)->root; |
| 7517 | struct btrfs_key ins; | 7734 | struct btrfs_key ins; |
| 7518 | u64 cur_offset = start; | 7735 | u64 cur_offset = start; |
| @@ -7553,6 +7770,38 @@ static int __btrfs_prealloc_file_range(struct inode *inode, int mode, | |||
| 7553 | btrfs_drop_extent_cache(inode, cur_offset, | 7770 | btrfs_drop_extent_cache(inode, cur_offset, |
| 7554 | cur_offset + ins.offset -1, 0); | 7771 | cur_offset + ins.offset -1, 0); |
| 7555 | 7772 | ||
| 7773 | em = alloc_extent_map(); | ||
| 7774 | if (!em) { | ||
| 7775 | set_bit(BTRFS_INODE_NEEDS_FULL_SYNC, | ||
| 7776 | &BTRFS_I(inode)->runtime_flags); | ||
| 7777 | goto next; | ||
| 7778 | } | ||
| 7779 | |||
| 7780 | em->start = cur_offset; | ||
| 7781 | em->orig_start = cur_offset; | ||
| 7782 | em->len = ins.offset; | ||
| 7783 | em->block_start = ins.objectid; | ||
| 7784 | em->block_len = ins.offset; | ||
| 7785 | em->orig_block_len = ins.offset; | ||
| 7786 | em->bdev = root->fs_info->fs_devices->latest_bdev; | ||
| 7787 | set_bit(EXTENT_FLAG_PREALLOC, &em->flags); | ||
| 7788 | em->generation = trans->transid; | ||
| 7789 | |||
| 7790 | while (1) { | ||
| 7791 | write_lock(&em_tree->lock); | ||
| 7792 | ret = add_extent_mapping(em_tree, em); | ||
| 7793 | if (!ret) | ||
| 7794 | list_move(&em->list, | ||
| 7795 | &em_tree->modified_extents); | ||
| 7796 | write_unlock(&em_tree->lock); | ||
| 7797 | if (ret != -EEXIST) | ||
| 7798 | break; | ||
| 7799 | btrfs_drop_extent_cache(inode, cur_offset, | ||
| 7800 | cur_offset + ins.offset - 1, | ||
| 7801 | 0); | ||
| 7802 | } | ||
| 7803 | free_extent_map(em); | ||
| 7804 | next: | ||
| 7556 | num_bytes -= ins.offset; | 7805 | num_bytes -= ins.offset; |
| 7557 | cur_offset += ins.offset; | 7806 | cur_offset += ins.offset; |
| 7558 | *alloc_hint = ins.objectid + ins.offset; | 7807 | *alloc_hint = ins.objectid + ins.offset; |
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 47127c1bd290..4b4516770f05 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c | |||
| @@ -55,6 +55,7 @@ | |||
| 55 | #include "backref.h" | 55 | #include "backref.h" |
| 56 | #include "rcu-string.h" | 56 | #include "rcu-string.h" |
| 57 | #include "send.h" | 57 | #include "send.h" |
| 58 | #include "dev-replace.h" | ||
| 58 | 59 | ||
| 59 | /* Mask out flags that are inappropriate for the given type of inode. */ | 60 | /* Mask out flags that are inappropriate for the given type of inode. */ |
| 60 | static inline __u32 btrfs_mask_flags(umode_t mode, __u32 flags) | 61 | static inline __u32 btrfs_mask_flags(umode_t mode, __u32 flags) |
| @@ -140,8 +141,11 @@ void btrfs_inherit_iflags(struct inode *inode, struct inode *dir) | |||
| 140 | BTRFS_I(inode)->flags |= BTRFS_INODE_COMPRESS; | 141 | BTRFS_I(inode)->flags |= BTRFS_INODE_COMPRESS; |
| 141 | } | 142 | } |
| 142 | 143 | ||
| 143 | if (flags & BTRFS_INODE_NODATACOW) | 144 | if (flags & BTRFS_INODE_NODATACOW) { |
| 144 | BTRFS_I(inode)->flags |= BTRFS_INODE_NODATACOW; | 145 | BTRFS_I(inode)->flags |= BTRFS_INODE_NODATACOW; |
| 146 | if (S_ISREG(inode->i_mode)) | ||
| 147 | BTRFS_I(inode)->flags |= BTRFS_INODE_NODATASUM; | ||
| 148 | } | ||
| 145 | 149 | ||
| 146 | btrfs_update_iflags(inode); | 150 | btrfs_update_iflags(inode); |
| 147 | } | 151 | } |
| @@ -181,6 +185,7 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg) | |||
| 181 | int ret; | 185 | int ret; |
| 182 | u64 ip_oldflags; | 186 | u64 ip_oldflags; |
| 183 | unsigned int i_oldflags; | 187 | unsigned int i_oldflags; |
| 188 | umode_t mode; | ||
| 184 | 189 | ||
| 185 | if (btrfs_root_readonly(root)) | 190 | if (btrfs_root_readonly(root)) |
| 186 | return -EROFS; | 191 | return -EROFS; |
| @@ -203,6 +208,7 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg) | |||
| 203 | 208 | ||
| 204 | ip_oldflags = ip->flags; | 209 | ip_oldflags = ip->flags; |
| 205 | i_oldflags = inode->i_flags; | 210 | i_oldflags = inode->i_flags; |
| 211 | mode = inode->i_mode; | ||
| 206 | 212 | ||
| 207 | flags = btrfs_mask_flags(inode->i_mode, flags); | 213 | flags = btrfs_mask_flags(inode->i_mode, flags); |
| 208 | oldflags = btrfs_flags_to_ioctl(ip->flags); | 214 | oldflags = btrfs_flags_to_ioctl(ip->flags); |
| @@ -237,10 +243,31 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg) | |||
| 237 | ip->flags |= BTRFS_INODE_DIRSYNC; | 243 | ip->flags |= BTRFS_INODE_DIRSYNC; |
| 238 | else | 244 | else |
| 239 | ip->flags &= ~BTRFS_INODE_DIRSYNC; | 245 | ip->flags &= ~BTRFS_INODE_DIRSYNC; |
| 240 | if (flags & FS_NOCOW_FL) | 246 | if (flags & FS_NOCOW_FL) { |
| 241 | ip->flags |= BTRFS_INODE_NODATACOW; | 247 | if (S_ISREG(mode)) { |
| 242 | else | 248 | /* |
| 243 | ip->flags &= ~BTRFS_INODE_NODATACOW; | 249 | * It's safe to turn csums off here, no extents exist. |
| 250 | * Otherwise we want the flag to reflect the real COW | ||
| 251 | * status of the file and will not set it. | ||
| 252 | */ | ||
| 253 | if (inode->i_size == 0) | ||
| 254 | ip->flags |= BTRFS_INODE_NODATACOW | ||
| 255 | | BTRFS_INODE_NODATASUM; | ||
| 256 | } else { | ||
| 257 | ip->flags |= BTRFS_INODE_NODATACOW; | ||
| 258 | } | ||
| 259 | } else { | ||
| 260 | /* | ||
| 261 | * Revert back under same assuptions as above | ||
| 262 | */ | ||
| 263 | if (S_ISREG(mode)) { | ||
| 264 | if (inode->i_size == 0) | ||
| 265 | ip->flags &= ~(BTRFS_INODE_NODATACOW | ||
| 266 | | BTRFS_INODE_NODATASUM); | ||
| 267 | } else { | ||
| 268 | ip->flags &= ~BTRFS_INODE_NODATACOW; | ||
| 269 | } | ||
| 270 | } | ||
| 244 | 271 | ||
| 245 | /* | 272 | /* |
| 246 | * The COMPRESS flag can only be changed by users, while the NOCOMPRESS | 273 | * The COMPRESS flag can only be changed by users, while the NOCOMPRESS |
| @@ -320,7 +347,8 @@ static noinline int btrfs_ioctl_fitrim(struct file *file, void __user *arg) | |||
| 320 | return -EOPNOTSUPP; | 347 | return -EOPNOTSUPP; |
| 321 | if (copy_from_user(&range, arg, sizeof(range))) | 348 | if (copy_from_user(&range, arg, sizeof(range))) |
| 322 | return -EFAULT; | 349 | return -EFAULT; |
| 323 | if (range.start > total_bytes) | 350 | if (range.start > total_bytes || |
| 351 | range.len < fs_info->sb->s_blocksize) | ||
| 324 | return -EINVAL; | 352 | return -EINVAL; |
| 325 | 353 | ||
| 326 | range.len = min(range.len, total_bytes - range.start); | 354 | range.len = min(range.len, total_bytes - range.start); |
| @@ -516,7 +544,8 @@ static int create_snapshot(struct btrfs_root *root, struct dentry *dentry, | |||
| 516 | if (!pending_snapshot) | 544 | if (!pending_snapshot) |
| 517 | return -ENOMEM; | 545 | return -ENOMEM; |
| 518 | 546 | ||
| 519 | btrfs_init_block_rsv(&pending_snapshot->block_rsv); | 547 | btrfs_init_block_rsv(&pending_snapshot->block_rsv, |
| 548 | BTRFS_BLOCK_RSV_TEMP); | ||
| 520 | pending_snapshot->dentry = dentry; | 549 | pending_snapshot->dentry = dentry; |
| 521 | pending_snapshot->root = root; | 550 | pending_snapshot->root = root; |
| 522 | pending_snapshot->readonly = readonly; | 551 | pending_snapshot->readonly = readonly; |
| @@ -525,7 +554,7 @@ static int create_snapshot(struct btrfs_root *root, struct dentry *dentry, | |||
| 525 | *inherit = NULL; /* take responsibility to free it */ | 554 | *inherit = NULL; /* take responsibility to free it */ |
| 526 | } | 555 | } |
| 527 | 556 | ||
| 528 | trans = btrfs_start_transaction(root->fs_info->extent_root, 5); | 557 | trans = btrfs_start_transaction(root->fs_info->extent_root, 6); |
| 529 | if (IS_ERR(trans)) { | 558 | if (IS_ERR(trans)) { |
| 530 | ret = PTR_ERR(trans); | 559 | ret = PTR_ERR(trans); |
| 531 | goto fail; | 560 | goto fail; |
| @@ -546,7 +575,12 @@ static int create_snapshot(struct btrfs_root *root, struct dentry *dentry, | |||
| 546 | ret = btrfs_commit_transaction(trans, | 575 | ret = btrfs_commit_transaction(trans, |
| 547 | root->fs_info->extent_root); | 576 | root->fs_info->extent_root); |
| 548 | } | 577 | } |
| 549 | BUG_ON(ret); | 578 | if (ret) { |
| 579 | /* cleanup_transaction has freed this for us */ | ||
| 580 | if (trans->aborted) | ||
| 581 | pending_snapshot = NULL; | ||
| 582 | goto fail; | ||
| 583 | } | ||
| 550 | 584 | ||
| 551 | ret = pending_snapshot->error; | 585 | ret = pending_snapshot->error; |
| 552 | if (ret) | 586 | if (ret) |
| @@ -614,7 +648,7 @@ static int btrfs_may_delete(struct inode *dir,struct dentry *victim,int isdir) | |||
| 614 | return -ENOENT; | 648 | return -ENOENT; |
| 615 | 649 | ||
| 616 | BUG_ON(victim->d_parent->d_inode != dir); | 650 | BUG_ON(victim->d_parent->d_inode != dir); |
| 617 | audit_inode_child(victim, dir); | 651 | audit_inode_child(dir, victim, AUDIT_TYPE_CHILD_DELETE); |
| 618 | 652 | ||
| 619 | error = inode_permission(dir, MAY_WRITE | MAY_EXEC); | 653 | error = inode_permission(dir, MAY_WRITE | MAY_EXEC); |
| 620 | if (error) | 654 | if (error) |
| @@ -679,6 +713,16 @@ static noinline int btrfs_mksubvol(struct path *parent, | |||
| 679 | if (error) | 713 | if (error) |
| 680 | goto out_dput; | 714 | goto out_dput; |
| 681 | 715 | ||
| 716 | /* | ||
| 717 | * even if this name doesn't exist, we may get hash collisions. | ||
| 718 | * check for them now when we can safely fail | ||
| 719 | */ | ||
| 720 | error = btrfs_check_dir_item_collision(BTRFS_I(dir)->root, | ||
| 721 | dir->i_ino, name, | ||
| 722 | namelen); | ||
| 723 | if (error) | ||
| 724 | goto out_dput; | ||
| 725 | |||
| 682 | down_read(&BTRFS_I(dir)->root->fs_info->subvol_sem); | 726 | down_read(&BTRFS_I(dir)->root->fs_info->subvol_sem); |
| 683 | 727 | ||
| 684 | if (btrfs_root_refs(&BTRFS_I(dir)->root->root_item) == 0) | 728 | if (btrfs_root_refs(&BTRFS_I(dir)->root->root_item) == 0) |
| @@ -1022,8 +1066,8 @@ again: | |||
| 1022 | page_start, page_end - 1, 0, &cached_state); | 1066 | page_start, page_end - 1, 0, &cached_state); |
| 1023 | clear_extent_bit(&BTRFS_I(inode)->io_tree, page_start, | 1067 | clear_extent_bit(&BTRFS_I(inode)->io_tree, page_start, |
| 1024 | page_end - 1, EXTENT_DIRTY | EXTENT_DELALLOC | | 1068 | page_end - 1, EXTENT_DIRTY | EXTENT_DELALLOC | |
| 1025 | EXTENT_DO_ACCOUNTING, 0, 0, &cached_state, | 1069 | EXTENT_DO_ACCOUNTING | EXTENT_DEFRAG, 0, 0, |
| 1026 | GFP_NOFS); | 1070 | &cached_state, GFP_NOFS); |
| 1027 | 1071 | ||
| 1028 | if (i_done != page_cnt) { | 1072 | if (i_done != page_cnt) { |
| 1029 | spin_lock(&BTRFS_I(inode)->lock); | 1073 | spin_lock(&BTRFS_I(inode)->lock); |
| @@ -1034,8 +1078,8 @@ again: | |||
| 1034 | } | 1078 | } |
| 1035 | 1079 | ||
| 1036 | 1080 | ||
| 1037 | btrfs_set_extent_delalloc(inode, page_start, page_end - 1, | 1081 | set_extent_defrag(&BTRFS_I(inode)->io_tree, page_start, page_end - 1, |
| 1038 | &cached_state); | 1082 | &cached_state, GFP_NOFS); |
| 1039 | 1083 | ||
| 1040 | unlock_extent_cached(&BTRFS_I(inode)->io_tree, | 1084 | unlock_extent_cached(&BTRFS_I(inode)->io_tree, |
| 1041 | page_start, page_end - 1, &cached_state, | 1085 | page_start, page_end - 1, &cached_state, |
| @@ -1199,7 +1243,7 @@ int btrfs_defrag_file(struct inode *inode, struct file *file, | |||
| 1199 | } | 1243 | } |
| 1200 | 1244 | ||
| 1201 | defrag_count += ret; | 1245 | defrag_count += ret; |
| 1202 | balance_dirty_pages_ratelimited_nr(inode->i_mapping, ret); | 1246 | balance_dirty_pages_ratelimited(inode->i_mapping); |
| 1203 | mutex_unlock(&inode->i_mutex); | 1247 | mutex_unlock(&inode->i_mutex); |
| 1204 | 1248 | ||
| 1205 | if (newer_than) { | 1249 | if (newer_than) { |
| @@ -1267,12 +1311,13 @@ out_ra: | |||
| 1267 | return ret; | 1311 | return ret; |
| 1268 | } | 1312 | } |
| 1269 | 1313 | ||
| 1270 | static noinline int btrfs_ioctl_resize(struct btrfs_root *root, | 1314 | static noinline int btrfs_ioctl_resize(struct file *file, |
| 1271 | void __user *arg) | 1315 | void __user *arg) |
| 1272 | { | 1316 | { |
| 1273 | u64 new_size; | 1317 | u64 new_size; |
| 1274 | u64 old_size; | 1318 | u64 old_size; |
| 1275 | u64 devid = 1; | 1319 | u64 devid = 1; |
| 1320 | struct btrfs_root *root = BTRFS_I(fdentry(file)->d_inode)->root; | ||
| 1276 | struct btrfs_ioctl_vol_args *vol_args; | 1321 | struct btrfs_ioctl_vol_args *vol_args; |
| 1277 | struct btrfs_trans_handle *trans; | 1322 | struct btrfs_trans_handle *trans; |
| 1278 | struct btrfs_device *device = NULL; | 1323 | struct btrfs_device *device = NULL; |
| @@ -1287,13 +1332,17 @@ static noinline int btrfs_ioctl_resize(struct btrfs_root *root, | |||
| 1287 | if (!capable(CAP_SYS_ADMIN)) | 1332 | if (!capable(CAP_SYS_ADMIN)) |
| 1288 | return -EPERM; | 1333 | return -EPERM; |
| 1289 | 1334 | ||
| 1290 | mutex_lock(&root->fs_info->volume_mutex); | 1335 | ret = mnt_want_write_file(file); |
| 1291 | if (root->fs_info->balance_ctl) { | 1336 | if (ret) |
| 1292 | printk(KERN_INFO "btrfs: balance in progress\n"); | 1337 | return ret; |
| 1293 | ret = -EINVAL; | 1338 | |
| 1294 | goto out; | 1339 | if (atomic_xchg(&root->fs_info->mutually_exclusive_operation_running, |
| 1340 | 1)) { | ||
| 1341 | pr_info("btrfs: dev add/delete/balance/replace/resize operation in progress\n"); | ||
| 1342 | return -EINPROGRESS; | ||
| 1295 | } | 1343 | } |
| 1296 | 1344 | ||
| 1345 | mutex_lock(&root->fs_info->volume_mutex); | ||
| 1297 | vol_args = memdup_user(arg, sizeof(*vol_args)); | 1346 | vol_args = memdup_user(arg, sizeof(*vol_args)); |
| 1298 | if (IS_ERR(vol_args)) { | 1347 | if (IS_ERR(vol_args)) { |
| 1299 | ret = PTR_ERR(vol_args); | 1348 | ret = PTR_ERR(vol_args); |
| @@ -1313,7 +1362,7 @@ static noinline int btrfs_ioctl_resize(struct btrfs_root *root, | |||
| 1313 | printk(KERN_INFO "btrfs: resizing devid %llu\n", | 1362 | printk(KERN_INFO "btrfs: resizing devid %llu\n", |
| 1314 | (unsigned long long)devid); | 1363 | (unsigned long long)devid); |
| 1315 | } | 1364 | } |
| 1316 | device = btrfs_find_device(root, devid, NULL, NULL); | 1365 | device = btrfs_find_device(root->fs_info, devid, NULL, NULL); |
| 1317 | if (!device) { | 1366 | if (!device) { |
| 1318 | printk(KERN_INFO "btrfs: resizer unable to find device %llu\n", | 1367 | printk(KERN_INFO "btrfs: resizer unable to find device %llu\n", |
| 1319 | (unsigned long long)devid); | 1368 | (unsigned long long)devid); |
| @@ -1345,6 +1394,11 @@ static noinline int btrfs_ioctl_resize(struct btrfs_root *root, | |||
| 1345 | } | 1394 | } |
| 1346 | } | 1395 | } |
| 1347 | 1396 | ||
| 1397 | if (device->is_tgtdev_for_dev_replace) { | ||
| 1398 | ret = -EINVAL; | ||
| 1399 | goto out_free; | ||
| 1400 | } | ||
| 1401 | |||
| 1348 | old_size = device->total_bytes; | 1402 | old_size = device->total_bytes; |
| 1349 | 1403 | ||
| 1350 | if (mod < 0) { | 1404 | if (mod < 0) { |
| @@ -1383,12 +1437,14 @@ static noinline int btrfs_ioctl_resize(struct btrfs_root *root, | |||
| 1383 | btrfs_commit_transaction(trans, root); | 1437 | btrfs_commit_transaction(trans, root); |
| 1384 | } else if (new_size < old_size) { | 1438 | } else if (new_size < old_size) { |
| 1385 | ret = btrfs_shrink_device(device, new_size); | 1439 | ret = btrfs_shrink_device(device, new_size); |
| 1386 | } | 1440 | } /* equal, nothing need to do */ |
| 1387 | 1441 | ||
| 1388 | out_free: | 1442 | out_free: |
| 1389 | kfree(vol_args); | 1443 | kfree(vol_args); |
| 1390 | out: | 1444 | out: |
| 1391 | mutex_unlock(&root->fs_info->volume_mutex); | 1445 | mutex_unlock(&root->fs_info->volume_mutex); |
| 1446 | mnt_drop_write_file(file); | ||
| 1447 | atomic_set(&root->fs_info->mutually_exclusive_operation_running, 0); | ||
| 1392 | return ret; | 1448 | return ret; |
| 1393 | } | 1449 | } |
| 1394 | 1450 | ||
| @@ -2130,9 +2186,17 @@ static int btrfs_ioctl_defrag(struct file *file, void __user *argp) | |||
| 2130 | if (btrfs_root_readonly(root)) | 2186 | if (btrfs_root_readonly(root)) |
| 2131 | return -EROFS; | 2187 | return -EROFS; |
| 2132 | 2188 | ||
| 2189 | if (atomic_xchg(&root->fs_info->mutually_exclusive_operation_running, | ||
| 2190 | 1)) { | ||
| 2191 | pr_info("btrfs: dev add/delete/balance/replace/resize operation in progress\n"); | ||
| 2192 | return -EINPROGRESS; | ||
| 2193 | } | ||
| 2133 | ret = mnt_want_write_file(file); | 2194 | ret = mnt_want_write_file(file); |
| 2134 | if (ret) | 2195 | if (ret) { |
| 2196 | atomic_set(&root->fs_info->mutually_exclusive_operation_running, | ||
| 2197 | 0); | ||
| 2135 | return ret; | 2198 | return ret; |
| 2199 | } | ||
| 2136 | 2200 | ||
| 2137 | switch (inode->i_mode & S_IFMT) { | 2201 | switch (inode->i_mode & S_IFMT) { |
| 2138 | case S_IFDIR: | 2202 | case S_IFDIR: |
| @@ -2184,6 +2248,7 @@ static int btrfs_ioctl_defrag(struct file *file, void __user *argp) | |||
| 2184 | } | 2248 | } |
| 2185 | out: | 2249 | out: |
| 2186 | mnt_drop_write_file(file); | 2250 | mnt_drop_write_file(file); |
| 2251 | atomic_set(&root->fs_info->mutually_exclusive_operation_running, 0); | ||
| 2187 | return ret; | 2252 | return ret; |
| 2188 | } | 2253 | } |
| 2189 | 2254 | ||
| @@ -2195,13 +2260,13 @@ static long btrfs_ioctl_add_dev(struct btrfs_root *root, void __user *arg) | |||
| 2195 | if (!capable(CAP_SYS_ADMIN)) | 2260 | if (!capable(CAP_SYS_ADMIN)) |
| 2196 | return -EPERM; | 2261 | return -EPERM; |
| 2197 | 2262 | ||
| 2198 | mutex_lock(&root->fs_info->volume_mutex); | 2263 | if (atomic_xchg(&root->fs_info->mutually_exclusive_operation_running, |
| 2199 | if (root->fs_info->balance_ctl) { | 2264 | 1)) { |
| 2200 | printk(KERN_INFO "btrfs: balance in progress\n"); | 2265 | pr_info("btrfs: dev add/delete/balance/replace/resize operation in progress\n"); |
| 2201 | ret = -EINVAL; | 2266 | return -EINPROGRESS; |
| 2202 | goto out; | ||
| 2203 | } | 2267 | } |
| 2204 | 2268 | ||
| 2269 | mutex_lock(&root->fs_info->volume_mutex); | ||
| 2205 | vol_args = memdup_user(arg, sizeof(*vol_args)); | 2270 | vol_args = memdup_user(arg, sizeof(*vol_args)); |
| 2206 | if (IS_ERR(vol_args)) { | 2271 | if (IS_ERR(vol_args)) { |
| 2207 | ret = PTR_ERR(vol_args); | 2272 | ret = PTR_ERR(vol_args); |
| @@ -2214,27 +2279,31 @@ static long btrfs_ioctl_add_dev(struct btrfs_root *root, void __user *arg) | |||
| 2214 | kfree(vol_args); | 2279 | kfree(vol_args); |
| 2215 | out: | 2280 | out: |
| 2216 | mutex_unlock(&root->fs_info->volume_mutex); | 2281 | mutex_unlock(&root->fs_info->volume_mutex); |
| 2282 | atomic_set(&root->fs_info->mutually_exclusive_operation_running, 0); | ||
| 2217 | return ret; | 2283 | return ret; |
| 2218 | } | 2284 | } |
| 2219 | 2285 | ||
| 2220 | static long btrfs_ioctl_rm_dev(struct btrfs_root *root, void __user *arg) | 2286 | static long btrfs_ioctl_rm_dev(struct file *file, void __user *arg) |
| 2221 | { | 2287 | { |
| 2288 | struct btrfs_root *root = BTRFS_I(fdentry(file)->d_inode)->root; | ||
| 2222 | struct btrfs_ioctl_vol_args *vol_args; | 2289 | struct btrfs_ioctl_vol_args *vol_args; |
| 2223 | int ret; | 2290 | int ret; |
| 2224 | 2291 | ||
| 2225 | if (!capable(CAP_SYS_ADMIN)) | 2292 | if (!capable(CAP_SYS_ADMIN)) |
| 2226 | return -EPERM; | 2293 | return -EPERM; |
| 2227 | 2294 | ||
| 2228 | if (root->fs_info->sb->s_flags & MS_RDONLY) | 2295 | ret = mnt_want_write_file(file); |
| 2229 | return -EROFS; | 2296 | if (ret) |
| 2297 | return ret; | ||
| 2230 | 2298 | ||
| 2231 | mutex_lock(&root->fs_info->volume_mutex); | 2299 | if (atomic_xchg(&root->fs_info->mutually_exclusive_operation_running, |
| 2232 | if (root->fs_info->balance_ctl) { | 2300 | 1)) { |
| 2233 | printk(KERN_INFO "btrfs: balance in progress\n"); | 2301 | pr_info("btrfs: dev add/delete/balance/replace/resize operation in progress\n"); |
| 2234 | ret = -EINVAL; | 2302 | mnt_drop_write_file(file); |
| 2235 | goto out; | 2303 | return -EINPROGRESS; |
| 2236 | } | 2304 | } |
| 2237 | 2305 | ||
| 2306 | mutex_lock(&root->fs_info->volume_mutex); | ||
| 2238 | vol_args = memdup_user(arg, sizeof(*vol_args)); | 2307 | vol_args = memdup_user(arg, sizeof(*vol_args)); |
| 2239 | if (IS_ERR(vol_args)) { | 2308 | if (IS_ERR(vol_args)) { |
| 2240 | ret = PTR_ERR(vol_args); | 2309 | ret = PTR_ERR(vol_args); |
| @@ -2247,6 +2316,8 @@ static long btrfs_ioctl_rm_dev(struct btrfs_root *root, void __user *arg) | |||
| 2247 | kfree(vol_args); | 2316 | kfree(vol_args); |
| 2248 | out: | 2317 | out: |
| 2249 | mutex_unlock(&root->fs_info->volume_mutex); | 2318 | mutex_unlock(&root->fs_info->volume_mutex); |
| 2319 | mnt_drop_write_file(file); | ||
| 2320 | atomic_set(&root->fs_info->mutually_exclusive_operation_running, 0); | ||
| 2250 | return ret; | 2321 | return ret; |
| 2251 | } | 2322 | } |
| 2252 | 2323 | ||
| @@ -2302,7 +2373,7 @@ static long btrfs_ioctl_dev_info(struct btrfs_root *root, void __user *arg) | |||
| 2302 | s_uuid = di_args->uuid; | 2373 | s_uuid = di_args->uuid; |
| 2303 | 2374 | ||
| 2304 | mutex_lock(&fs_devices->device_list_mutex); | 2375 | mutex_lock(&fs_devices->device_list_mutex); |
| 2305 | dev = btrfs_find_device(root, di_args->devid, s_uuid, NULL); | 2376 | dev = btrfs_find_device(root->fs_info, di_args->devid, s_uuid, NULL); |
| 2306 | mutex_unlock(&fs_devices->device_list_mutex); | 2377 | mutex_unlock(&fs_devices->device_list_mutex); |
| 2307 | 2378 | ||
| 2308 | if (!dev) { | 2379 | if (!dev) { |
| @@ -2351,7 +2422,6 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, | |||
| 2351 | int ret; | 2422 | int ret; |
| 2352 | u64 len = olen; | 2423 | u64 len = olen; |
| 2353 | u64 bs = root->fs_info->sb->s_blocksize; | 2424 | u64 bs = root->fs_info->sb->s_blocksize; |
| 2354 | u64 hint_byte; | ||
| 2355 | 2425 | ||
| 2356 | /* | 2426 | /* |
| 2357 | * TODO: | 2427 | * TODO: |
| @@ -2456,13 +2526,13 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, | |||
| 2456 | another, and lock file content */ | 2526 | another, and lock file content */ |
| 2457 | while (1) { | 2527 | while (1) { |
| 2458 | struct btrfs_ordered_extent *ordered; | 2528 | struct btrfs_ordered_extent *ordered; |
| 2459 | lock_extent(&BTRFS_I(src)->io_tree, off, off+len); | 2529 | lock_extent(&BTRFS_I(src)->io_tree, off, off + len - 1); |
| 2460 | ordered = btrfs_lookup_first_ordered_extent(src, off+len); | 2530 | ordered = btrfs_lookup_first_ordered_extent(src, off + len - 1); |
| 2461 | if (!ordered && | 2531 | if (!ordered && |
| 2462 | !test_range_bit(&BTRFS_I(src)->io_tree, off, off+len, | 2532 | !test_range_bit(&BTRFS_I(src)->io_tree, off, off + len - 1, |
| 2463 | EXTENT_DELALLOC, 0, NULL)) | 2533 | EXTENT_DELALLOC, 0, NULL)) |
| 2464 | break; | 2534 | break; |
| 2465 | unlock_extent(&BTRFS_I(src)->io_tree, off, off+len); | 2535 | unlock_extent(&BTRFS_I(src)->io_tree, off, off + len - 1); |
| 2466 | if (ordered) | 2536 | if (ordered) |
| 2467 | btrfs_put_ordered_extent(ordered); | 2537 | btrfs_put_ordered_extent(ordered); |
| 2468 | btrfs_wait_ordered_range(src, off, len); | 2538 | btrfs_wait_ordered_range(src, off, len); |
| @@ -2536,7 +2606,7 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, | |||
| 2536 | btrfs_release_path(path); | 2606 | btrfs_release_path(path); |
| 2537 | 2607 | ||
| 2538 | if (key.offset + datal <= off || | 2608 | if (key.offset + datal <= off || |
| 2539 | key.offset >= off+len) | 2609 | key.offset >= off + len - 1) |
| 2540 | goto next; | 2610 | goto next; |
| 2541 | 2611 | ||
| 2542 | memcpy(&new_key, &key, sizeof(new_key)); | 2612 | memcpy(&new_key, &key, sizeof(new_key)); |
| @@ -2574,10 +2644,10 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, | |||
| 2574 | datal -= off - key.offset; | 2644 | datal -= off - key.offset; |
| 2575 | } | 2645 | } |
| 2576 | 2646 | ||
| 2577 | ret = btrfs_drop_extents(trans, inode, | 2647 | ret = btrfs_drop_extents(trans, root, inode, |
| 2578 | new_key.offset, | 2648 | new_key.offset, |
| 2579 | new_key.offset + datal, | 2649 | new_key.offset + datal, |
| 2580 | &hint_byte, 1); | 2650 | 1); |
| 2581 | if (ret) { | 2651 | if (ret) { |
| 2582 | btrfs_abort_transaction(trans, root, | 2652 | btrfs_abort_transaction(trans, root, |
| 2583 | ret); | 2653 | ret); |
| @@ -2637,8 +2707,8 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, | |||
| 2637 | new_key.offset += skip; | 2707 | new_key.offset += skip; |
| 2638 | } | 2708 | } |
| 2639 | 2709 | ||
| 2640 | if (key.offset + datal > off+len) | 2710 | if (key.offset + datal > off + len) |
| 2641 | trim = key.offset + datal - (off+len); | 2711 | trim = key.offset + datal - (off + len); |
| 2642 | 2712 | ||
| 2643 | if (comp && (skip || trim)) { | 2713 | if (comp && (skip || trim)) { |
| 2644 | ret = -EINVAL; | 2714 | ret = -EINVAL; |
| @@ -2648,10 +2718,10 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, | |||
| 2648 | size -= skip + trim; | 2718 | size -= skip + trim; |
| 2649 | datal -= skip + trim; | 2719 | datal -= skip + trim; |
| 2650 | 2720 | ||
| 2651 | ret = btrfs_drop_extents(trans, inode, | 2721 | ret = btrfs_drop_extents(trans, root, inode, |
| 2652 | new_key.offset, | 2722 | new_key.offset, |
| 2653 | new_key.offset + datal, | 2723 | new_key.offset + datal, |
| 2654 | &hint_byte, 1); | 2724 | 1); |
| 2655 | if (ret) { | 2725 | if (ret) { |
| 2656 | btrfs_abort_transaction(trans, root, | 2726 | btrfs_abort_transaction(trans, root, |
| 2657 | ret); | 2727 | ret); |
| @@ -2715,7 +2785,7 @@ next: | |||
| 2715 | ret = 0; | 2785 | ret = 0; |
| 2716 | out: | 2786 | out: |
| 2717 | btrfs_release_path(path); | 2787 | btrfs_release_path(path); |
| 2718 | unlock_extent(&BTRFS_I(src)->io_tree, off, off+len); | 2788 | unlock_extent(&BTRFS_I(src)->io_tree, off, off + len - 1); |
| 2719 | out_unlock: | 2789 | out_unlock: |
| 2720 | mutex_unlock(&src->i_mutex); | 2790 | mutex_unlock(&src->i_mutex); |
| 2721 | mutex_unlock(&inode->i_mutex); | 2791 | mutex_unlock(&inode->i_mutex); |
| @@ -2796,12 +2866,19 @@ static long btrfs_ioctl_default_subvol(struct file *file, void __user *argp) | |||
| 2796 | struct btrfs_disk_key disk_key; | 2866 | struct btrfs_disk_key disk_key; |
| 2797 | u64 objectid = 0; | 2867 | u64 objectid = 0; |
| 2798 | u64 dir_id; | 2868 | u64 dir_id; |
| 2869 | int ret; | ||
| 2799 | 2870 | ||
| 2800 | if (!capable(CAP_SYS_ADMIN)) | 2871 | if (!capable(CAP_SYS_ADMIN)) |
| 2801 | return -EPERM; | 2872 | return -EPERM; |
| 2802 | 2873 | ||
| 2803 | if (copy_from_user(&objectid, argp, sizeof(objectid))) | 2874 | ret = mnt_want_write_file(file); |
| 2804 | return -EFAULT; | 2875 | if (ret) |
| 2876 | return ret; | ||
| 2877 | |||
| 2878 | if (copy_from_user(&objectid, argp, sizeof(objectid))) { | ||
| 2879 | ret = -EFAULT; | ||
| 2880 | goto out; | ||
| 2881 | } | ||
| 2805 | 2882 | ||
| 2806 | if (!objectid) | 2883 | if (!objectid) |
| 2807 | objectid = root->root_key.objectid; | 2884 | objectid = root->root_key.objectid; |
| @@ -2811,21 +2888,28 @@ static long btrfs_ioctl_default_subvol(struct file *file, void __user *argp) | |||
| 2811 | location.offset = (u64)-1; | 2888 | location.offset = (u64)-1; |
| 2812 | 2889 | ||
| 2813 | new_root = btrfs_read_fs_root_no_name(root->fs_info, &location); | 2890 | new_root = btrfs_read_fs_root_no_name(root->fs_info, &location); |
| 2814 | if (IS_ERR(new_root)) | 2891 | if (IS_ERR(new_root)) { |
| 2815 | return PTR_ERR(new_root); | 2892 | ret = PTR_ERR(new_root); |
| 2893 | goto out; | ||
| 2894 | } | ||
| 2816 | 2895 | ||
| 2817 | if (btrfs_root_refs(&new_root->root_item) == 0) | 2896 | if (btrfs_root_refs(&new_root->root_item) == 0) { |
| 2818 | return -ENOENT; | 2897 | ret = -ENOENT; |
| 2898 | goto out; | ||
| 2899 | } | ||
| 2819 | 2900 | ||
| 2820 | path = btrfs_alloc_path(); | 2901 | path = btrfs_alloc_path(); |
| 2821 | if (!path) | 2902 | if (!path) { |
| 2822 | return -ENOMEM; | 2903 | ret = -ENOMEM; |
| 2904 | goto out; | ||
| 2905 | } | ||
| 2823 | path->leave_spinning = 1; | 2906 | path->leave_spinning = 1; |
| 2824 | 2907 | ||
| 2825 | trans = btrfs_start_transaction(root, 1); | 2908 | trans = btrfs_start_transaction(root, 1); |
| 2826 | if (IS_ERR(trans)) { | 2909 | if (IS_ERR(trans)) { |
| 2827 | btrfs_free_path(path); | 2910 | btrfs_free_path(path); |
| 2828 | return PTR_ERR(trans); | 2911 | ret = PTR_ERR(trans); |
| 2912 | goto out; | ||
| 2829 | } | 2913 | } |
| 2830 | 2914 | ||
| 2831 | dir_id = btrfs_super_root_dir(root->fs_info->super_copy); | 2915 | dir_id = btrfs_super_root_dir(root->fs_info->super_copy); |
| @@ -2836,7 +2920,8 @@ static long btrfs_ioctl_default_subvol(struct file *file, void __user *argp) | |||
| 2836 | btrfs_end_transaction(trans, root); | 2920 | btrfs_end_transaction(trans, root); |
| 2837 | printk(KERN_ERR "Umm, you don't have the default dir item, " | 2921 | printk(KERN_ERR "Umm, you don't have the default dir item, " |
| 2838 | "this isn't going to work\n"); | 2922 | "this isn't going to work\n"); |
| 2839 | return -ENOENT; | 2923 | ret = -ENOENT; |
| 2924 | goto out; | ||
| 2840 | } | 2925 | } |
| 2841 | 2926 | ||
| 2842 | btrfs_cpu_key_to_disk(&disk_key, &new_root->root_key); | 2927 | btrfs_cpu_key_to_disk(&disk_key, &new_root->root_key); |
| @@ -2846,12 +2931,13 @@ static long btrfs_ioctl_default_subvol(struct file *file, void __user *argp) | |||
| 2846 | 2931 | ||
| 2847 | btrfs_set_fs_incompat(root->fs_info, DEFAULT_SUBVOL); | 2932 | btrfs_set_fs_incompat(root->fs_info, DEFAULT_SUBVOL); |
| 2848 | btrfs_end_transaction(trans, root); | 2933 | btrfs_end_transaction(trans, root); |
| 2849 | 2934 | out: | |
| 2850 | return 0; | 2935 | mnt_drop_write_file(file); |
| 2936 | return ret; | ||
| 2851 | } | 2937 | } |
| 2852 | 2938 | ||
| 2853 | static void get_block_group_info(struct list_head *groups_list, | 2939 | void btrfs_get_block_group_info(struct list_head *groups_list, |
| 2854 | struct btrfs_ioctl_space_info *space) | 2940 | struct btrfs_ioctl_space_info *space) |
| 2855 | { | 2941 | { |
| 2856 | struct btrfs_block_group_cache *block_group; | 2942 | struct btrfs_block_group_cache *block_group; |
| 2857 | 2943 | ||
| @@ -2959,8 +3045,8 @@ long btrfs_ioctl_space_info(struct btrfs_root *root, void __user *arg) | |||
| 2959 | down_read(&info->groups_sem); | 3045 | down_read(&info->groups_sem); |
| 2960 | for (c = 0; c < BTRFS_NR_RAID_TYPES; c++) { | 3046 | for (c = 0; c < BTRFS_NR_RAID_TYPES; c++) { |
| 2961 | if (!list_empty(&info->block_groups[c])) { | 3047 | if (!list_empty(&info->block_groups[c])) { |
| 2962 | get_block_group_info(&info->block_groups[c], | 3048 | btrfs_get_block_group_info( |
| 2963 | &space); | 3049 | &info->block_groups[c], &space); |
| 2964 | memcpy(dest, &space, sizeof(space)); | 3050 | memcpy(dest, &space, sizeof(space)); |
| 2965 | dest++; | 3051 | dest++; |
| 2966 | space_args.total_spaces++; | 3052 | space_args.total_spaces++; |
| @@ -3011,32 +3097,38 @@ long btrfs_ioctl_trans_end(struct file *file) | |||
| 3011 | return 0; | 3097 | return 0; |
| 3012 | } | 3098 | } |
| 3013 | 3099 | ||
| 3014 | static noinline long btrfs_ioctl_start_sync(struct file *file, void __user *argp) | 3100 | static noinline long btrfs_ioctl_start_sync(struct btrfs_root *root, |
| 3101 | void __user *argp) | ||
| 3015 | { | 3102 | { |
| 3016 | struct btrfs_root *root = BTRFS_I(file->f_dentry->d_inode)->root; | ||
| 3017 | struct btrfs_trans_handle *trans; | 3103 | struct btrfs_trans_handle *trans; |
| 3018 | u64 transid; | 3104 | u64 transid; |
| 3019 | int ret; | 3105 | int ret; |
| 3020 | 3106 | ||
| 3021 | trans = btrfs_start_transaction(root, 0); | 3107 | trans = btrfs_attach_transaction(root); |
| 3022 | if (IS_ERR(trans)) | 3108 | if (IS_ERR(trans)) { |
| 3023 | return PTR_ERR(trans); | 3109 | if (PTR_ERR(trans) != -ENOENT) |
| 3110 | return PTR_ERR(trans); | ||
| 3111 | |||
| 3112 | /* No running transaction, don't bother */ | ||
| 3113 | transid = root->fs_info->last_trans_committed; | ||
| 3114 | goto out; | ||
| 3115 | } | ||
| 3024 | transid = trans->transid; | 3116 | transid = trans->transid; |
| 3025 | ret = btrfs_commit_transaction_async(trans, root, 0); | 3117 | ret = btrfs_commit_transaction_async(trans, root, 0); |
| 3026 | if (ret) { | 3118 | if (ret) { |
| 3027 | btrfs_end_transaction(trans, root); | 3119 | btrfs_end_transaction(trans, root); |
| 3028 | return ret; | 3120 | return ret; |
| 3029 | } | 3121 | } |
| 3030 | 3122 | out: | |
| 3031 | if (argp) | 3123 | if (argp) |
| 3032 | if (copy_to_user(argp, &transid, sizeof(transid))) | 3124 | if (copy_to_user(argp, &transid, sizeof(transid))) |
| 3033 | return -EFAULT; | 3125 | return -EFAULT; |
| 3034 | return 0; | 3126 | return 0; |
| 3035 | } | 3127 | } |
| 3036 | 3128 | ||
| 3037 | static noinline long btrfs_ioctl_wait_sync(struct file *file, void __user *argp) | 3129 | static noinline long btrfs_ioctl_wait_sync(struct btrfs_root *root, |
| 3130 | void __user *argp) | ||
| 3038 | { | 3131 | { |
| 3039 | struct btrfs_root *root = BTRFS_I(file->f_dentry->d_inode)->root; | ||
| 3040 | u64 transid; | 3132 | u64 transid; |
| 3041 | 3133 | ||
| 3042 | if (argp) { | 3134 | if (argp) { |
| @@ -3048,10 +3140,11 @@ static noinline long btrfs_ioctl_wait_sync(struct file *file, void __user *argp) | |||
| 3048 | return btrfs_wait_for_commit(root, transid); | 3140 | return btrfs_wait_for_commit(root, transid); |
| 3049 | } | 3141 | } |
| 3050 | 3142 | ||
| 3051 | static long btrfs_ioctl_scrub(struct btrfs_root *root, void __user *arg) | 3143 | static long btrfs_ioctl_scrub(struct file *file, void __user *arg) |
| 3052 | { | 3144 | { |
| 3053 | int ret; | 3145 | struct btrfs_root *root = BTRFS_I(fdentry(file)->d_inode)->root; |
| 3054 | struct btrfs_ioctl_scrub_args *sa; | 3146 | struct btrfs_ioctl_scrub_args *sa; |
| 3147 | int ret; | ||
| 3055 | 3148 | ||
| 3056 | if (!capable(CAP_SYS_ADMIN)) | 3149 | if (!capable(CAP_SYS_ADMIN)) |
| 3057 | return -EPERM; | 3150 | return -EPERM; |
| @@ -3060,12 +3153,22 @@ static long btrfs_ioctl_scrub(struct btrfs_root *root, void __user *arg) | |||
| 3060 | if (IS_ERR(sa)) | 3153 | if (IS_ERR(sa)) |
| 3061 | return PTR_ERR(sa); | 3154 | return PTR_ERR(sa); |
| 3062 | 3155 | ||
| 3063 | ret = btrfs_scrub_dev(root, sa->devid, sa->start, sa->end, | 3156 | if (!(sa->flags & BTRFS_SCRUB_READONLY)) { |
| 3064 | &sa->progress, sa->flags & BTRFS_SCRUB_READONLY); | 3157 | ret = mnt_want_write_file(file); |
| 3158 | if (ret) | ||
| 3159 | goto out; | ||
| 3160 | } | ||
| 3161 | |||
| 3162 | ret = btrfs_scrub_dev(root->fs_info, sa->devid, sa->start, sa->end, | ||
| 3163 | &sa->progress, sa->flags & BTRFS_SCRUB_READONLY, | ||
| 3164 | 0); | ||
| 3065 | 3165 | ||
| 3066 | if (copy_to_user(arg, sa, sizeof(*sa))) | 3166 | if (copy_to_user(arg, sa, sizeof(*sa))) |
| 3067 | ret = -EFAULT; | 3167 | ret = -EFAULT; |
| 3068 | 3168 | ||
| 3169 | if (!(sa->flags & BTRFS_SCRUB_READONLY)) | ||
| 3170 | mnt_drop_write_file(file); | ||
| 3171 | out: | ||
| 3069 | kfree(sa); | 3172 | kfree(sa); |
| 3070 | return ret; | 3173 | return ret; |
| 3071 | } | 3174 | } |
| @@ -3075,7 +3178,7 @@ static long btrfs_ioctl_scrub_cancel(struct btrfs_root *root, void __user *arg) | |||
| 3075 | if (!capable(CAP_SYS_ADMIN)) | 3178 | if (!capable(CAP_SYS_ADMIN)) |
| 3076 | return -EPERM; | 3179 | return -EPERM; |
| 3077 | 3180 | ||
| 3078 | return btrfs_scrub_cancel(root); | 3181 | return btrfs_scrub_cancel(root->fs_info); |
| 3079 | } | 3182 | } |
| 3080 | 3183 | ||
| 3081 | static long btrfs_ioctl_scrub_progress(struct btrfs_root *root, | 3184 | static long btrfs_ioctl_scrub_progress(struct btrfs_root *root, |
| @@ -3124,6 +3227,51 @@ static long btrfs_ioctl_get_dev_stats(struct btrfs_root *root, | |||
| 3124 | return ret; | 3227 | return ret; |
| 3125 | } | 3228 | } |
| 3126 | 3229 | ||
| 3230 | static long btrfs_ioctl_dev_replace(struct btrfs_root *root, void __user *arg) | ||
| 3231 | { | ||
| 3232 | struct btrfs_ioctl_dev_replace_args *p; | ||
| 3233 | int ret; | ||
| 3234 | |||
| 3235 | if (!capable(CAP_SYS_ADMIN)) | ||
| 3236 | return -EPERM; | ||
| 3237 | |||
| 3238 | p = memdup_user(arg, sizeof(*p)); | ||
| 3239 | if (IS_ERR(p)) | ||
| 3240 | return PTR_ERR(p); | ||
| 3241 | |||
| 3242 | switch (p->cmd) { | ||
| 3243 | case BTRFS_IOCTL_DEV_REPLACE_CMD_START: | ||
| 3244 | if (atomic_xchg( | ||
| 3245 | &root->fs_info->mutually_exclusive_operation_running, | ||
| 3246 | 1)) { | ||
| 3247 | pr_info("btrfs: dev add/delete/balance/replace/resize operation in progress\n"); | ||
| 3248 | ret = -EINPROGRESS; | ||
| 3249 | } else { | ||
| 3250 | ret = btrfs_dev_replace_start(root, p); | ||
| 3251 | atomic_set( | ||
| 3252 | &root->fs_info->mutually_exclusive_operation_running, | ||
| 3253 | 0); | ||
| 3254 | } | ||
| 3255 | break; | ||
| 3256 | case BTRFS_IOCTL_DEV_REPLACE_CMD_STATUS: | ||
| 3257 | btrfs_dev_replace_status(root->fs_info, p); | ||
| 3258 | ret = 0; | ||
| 3259 | break; | ||
| 3260 | case BTRFS_IOCTL_DEV_REPLACE_CMD_CANCEL: | ||
| 3261 | ret = btrfs_dev_replace_cancel(root->fs_info, p); | ||
| 3262 | break; | ||
| 3263 | default: | ||
| 3264 | ret = -EINVAL; | ||
| 3265 | break; | ||
| 3266 | } | ||
| 3267 | |||
| 3268 | if (copy_to_user(arg, p, sizeof(*p))) | ||
| 3269 | ret = -EFAULT; | ||
| 3270 | |||
| 3271 | kfree(p); | ||
| 3272 | return ret; | ||
| 3273 | } | ||
| 3274 | |||
| 3127 | static long btrfs_ioctl_ino_to_path(struct btrfs_root *root, void __user *arg) | 3275 | static long btrfs_ioctl_ino_to_path(struct btrfs_root *root, void __user *arg) |
| 3128 | { | 3276 | { |
| 3129 | int ret = 0; | 3277 | int ret = 0; |
| @@ -3208,11 +3356,9 @@ static long btrfs_ioctl_logical_to_ino(struct btrfs_root *root, | |||
| 3208 | { | 3356 | { |
| 3209 | int ret = 0; | 3357 | int ret = 0; |
| 3210 | int size; | 3358 | int size; |
| 3211 | u64 extent_item_pos; | ||
| 3212 | struct btrfs_ioctl_logical_ino_args *loi; | 3359 | struct btrfs_ioctl_logical_ino_args *loi; |
| 3213 | struct btrfs_data_container *inodes = NULL; | 3360 | struct btrfs_data_container *inodes = NULL; |
| 3214 | struct btrfs_path *path = NULL; | 3361 | struct btrfs_path *path = NULL; |
| 3215 | struct btrfs_key key; | ||
| 3216 | 3362 | ||
| 3217 | if (!capable(CAP_SYS_ADMIN)) | 3363 | if (!capable(CAP_SYS_ADMIN)) |
| 3218 | return -EPERM; | 3364 | return -EPERM; |
| @@ -3230,7 +3376,7 @@ static long btrfs_ioctl_logical_to_ino(struct btrfs_root *root, | |||
| 3230 | goto out; | 3376 | goto out; |
| 3231 | } | 3377 | } |
| 3232 | 3378 | ||
| 3233 | size = min_t(u32, loi->size, 4096); | 3379 | size = min_t(u32, loi->size, 64 * 1024); |
| 3234 | inodes = init_data_container(size); | 3380 | inodes = init_data_container(size); |
| 3235 | if (IS_ERR(inodes)) { | 3381 | if (IS_ERR(inodes)) { |
| 3236 | ret = PTR_ERR(inodes); | 3382 | ret = PTR_ERR(inodes); |
| @@ -3238,22 +3384,13 @@ static long btrfs_ioctl_logical_to_ino(struct btrfs_root *root, | |||
| 3238 | goto out; | 3384 | goto out; |
| 3239 | } | 3385 | } |
| 3240 | 3386 | ||
| 3241 | ret = extent_from_logical(root->fs_info, loi->logical, path, &key); | 3387 | ret = iterate_inodes_from_logical(loi->logical, root->fs_info, path, |
| 3242 | btrfs_release_path(path); | 3388 | build_ino_list, inodes); |
| 3243 | 3389 | if (ret == -EINVAL) | |
| 3244 | if (ret & BTRFS_EXTENT_FLAG_TREE_BLOCK) | ||
| 3245 | ret = -ENOENT; | 3390 | ret = -ENOENT; |
| 3246 | if (ret < 0) | 3391 | if (ret < 0) |
| 3247 | goto out; | 3392 | goto out; |
| 3248 | 3393 | ||
| 3249 | extent_item_pos = loi->logical - key.objectid; | ||
| 3250 | ret = iterate_extent_inodes(root->fs_info, key.objectid, | ||
| 3251 | extent_item_pos, 0, build_ino_list, | ||
| 3252 | inodes); | ||
| 3253 | |||
| 3254 | if (ret < 0) | ||
| 3255 | goto out; | ||
| 3256 | |||
| 3257 | ret = copy_to_user((void *)(unsigned long)loi->inodes, | 3394 | ret = copy_to_user((void *)(unsigned long)loi->inodes, |
| 3258 | (void *)(unsigned long)inodes, size); | 3395 | (void *)(unsigned long)inodes, size); |
| 3259 | if (ret) | 3396 | if (ret) |
| @@ -3261,7 +3398,7 @@ static long btrfs_ioctl_logical_to_ino(struct btrfs_root *root, | |||
| 3261 | 3398 | ||
| 3262 | out: | 3399 | out: |
| 3263 | btrfs_free_path(path); | 3400 | btrfs_free_path(path); |
| 3264 | kfree(inodes); | 3401 | vfree(inodes); |
| 3265 | kfree(loi); | 3402 | kfree(loi); |
| 3266 | 3403 | ||
| 3267 | return ret; | 3404 | return ret; |
| @@ -3301,6 +3438,7 @@ static long btrfs_ioctl_balance(struct file *file, void __user *arg) | |||
| 3301 | struct btrfs_ioctl_balance_args *bargs; | 3438 | struct btrfs_ioctl_balance_args *bargs; |
| 3302 | struct btrfs_balance_control *bctl; | 3439 | struct btrfs_balance_control *bctl; |
| 3303 | int ret; | 3440 | int ret; |
| 3441 | int need_to_clear_lock = 0; | ||
| 3304 | 3442 | ||
| 3305 | if (!capable(CAP_SYS_ADMIN)) | 3443 | if (!capable(CAP_SYS_ADMIN)) |
| 3306 | return -EPERM; | 3444 | return -EPERM; |
| @@ -3336,10 +3474,13 @@ static long btrfs_ioctl_balance(struct file *file, void __user *arg) | |||
| 3336 | bargs = NULL; | 3474 | bargs = NULL; |
| 3337 | } | 3475 | } |
| 3338 | 3476 | ||
| 3339 | if (fs_info->balance_ctl) { | 3477 | if (atomic_xchg(&root->fs_info->mutually_exclusive_operation_running, |
| 3478 | 1)) { | ||
| 3479 | pr_info("btrfs: dev add/delete/balance/replace/resize operation in progress\n"); | ||
| 3340 | ret = -EINPROGRESS; | 3480 | ret = -EINPROGRESS; |
| 3341 | goto out_bargs; | 3481 | goto out_bargs; |
| 3342 | } | 3482 | } |
| 3483 | need_to_clear_lock = 1; | ||
| 3343 | 3484 | ||
| 3344 | bctl = kzalloc(sizeof(*bctl), GFP_NOFS); | 3485 | bctl = kzalloc(sizeof(*bctl), GFP_NOFS); |
| 3345 | if (!bctl) { | 3486 | if (!bctl) { |
| @@ -3373,6 +3514,9 @@ do_balance: | |||
| 3373 | out_bargs: | 3514 | out_bargs: |
| 3374 | kfree(bargs); | 3515 | kfree(bargs); |
| 3375 | out: | 3516 | out: |
| 3517 | if (need_to_clear_lock) | ||
| 3518 | atomic_set(&root->fs_info->mutually_exclusive_operation_running, | ||
| 3519 | 0); | ||
| 3376 | mutex_unlock(&fs_info->balance_mutex); | 3520 | mutex_unlock(&fs_info->balance_mutex); |
| 3377 | mutex_unlock(&fs_info->volume_mutex); | 3521 | mutex_unlock(&fs_info->volume_mutex); |
| 3378 | mnt_drop_write_file(file); | 3522 | mnt_drop_write_file(file); |
| @@ -3427,8 +3571,9 @@ out: | |||
| 3427 | return ret; | 3571 | return ret; |
| 3428 | } | 3572 | } |
| 3429 | 3573 | ||
| 3430 | static long btrfs_ioctl_quota_ctl(struct btrfs_root *root, void __user *arg) | 3574 | static long btrfs_ioctl_quota_ctl(struct file *file, void __user *arg) |
| 3431 | { | 3575 | { |
| 3576 | struct btrfs_root *root = BTRFS_I(fdentry(file)->d_inode)->root; | ||
| 3432 | struct btrfs_ioctl_quota_ctl_args *sa; | 3577 | struct btrfs_ioctl_quota_ctl_args *sa; |
| 3433 | struct btrfs_trans_handle *trans = NULL; | 3578 | struct btrfs_trans_handle *trans = NULL; |
| 3434 | int ret; | 3579 | int ret; |
| @@ -3437,12 +3582,15 @@ static long btrfs_ioctl_quota_ctl(struct btrfs_root *root, void __user *arg) | |||
| 3437 | if (!capable(CAP_SYS_ADMIN)) | 3582 | if (!capable(CAP_SYS_ADMIN)) |
| 3438 | return -EPERM; | 3583 | return -EPERM; |
| 3439 | 3584 | ||
| 3440 | if (root->fs_info->sb->s_flags & MS_RDONLY) | 3585 | ret = mnt_want_write_file(file); |
| 3441 | return -EROFS; | 3586 | if (ret) |
| 3587 | return ret; | ||
| 3442 | 3588 | ||
| 3443 | sa = memdup_user(arg, sizeof(*sa)); | 3589 | sa = memdup_user(arg, sizeof(*sa)); |
| 3444 | if (IS_ERR(sa)) | 3590 | if (IS_ERR(sa)) { |
| 3445 | return PTR_ERR(sa); | 3591 | ret = PTR_ERR(sa); |
| 3592 | goto drop_write; | ||
| 3593 | } | ||
| 3446 | 3594 | ||
| 3447 | if (sa->cmd != BTRFS_QUOTA_CTL_RESCAN) { | 3595 | if (sa->cmd != BTRFS_QUOTA_CTL_RESCAN) { |
| 3448 | trans = btrfs_start_transaction(root, 2); | 3596 | trans = btrfs_start_transaction(root, 2); |
| @@ -3475,14 +3623,16 @@ static long btrfs_ioctl_quota_ctl(struct btrfs_root *root, void __user *arg) | |||
| 3475 | if (err && !ret) | 3623 | if (err && !ret) |
| 3476 | ret = err; | 3624 | ret = err; |
| 3477 | } | 3625 | } |
| 3478 | |||
| 3479 | out: | 3626 | out: |
| 3480 | kfree(sa); | 3627 | kfree(sa); |
| 3628 | drop_write: | ||
| 3629 | mnt_drop_write_file(file); | ||
| 3481 | return ret; | 3630 | return ret; |
| 3482 | } | 3631 | } |
| 3483 | 3632 | ||
| 3484 | static long btrfs_ioctl_qgroup_assign(struct btrfs_root *root, void __user *arg) | 3633 | static long btrfs_ioctl_qgroup_assign(struct file *file, void __user *arg) |
| 3485 | { | 3634 | { |
| 3635 | struct btrfs_root *root = BTRFS_I(fdentry(file)->d_inode)->root; | ||
| 3486 | struct btrfs_ioctl_qgroup_assign_args *sa; | 3636 | struct btrfs_ioctl_qgroup_assign_args *sa; |
| 3487 | struct btrfs_trans_handle *trans; | 3637 | struct btrfs_trans_handle *trans; |
| 3488 | int ret; | 3638 | int ret; |
| @@ -3491,12 +3641,15 @@ static long btrfs_ioctl_qgroup_assign(struct btrfs_root *root, void __user *arg) | |||
| 3491 | if (!capable(CAP_SYS_ADMIN)) | 3641 | if (!capable(CAP_SYS_ADMIN)) |
| 3492 | return -EPERM; | 3642 | return -EPERM; |
| 3493 | 3643 | ||
| 3494 | if (root->fs_info->sb->s_flags & MS_RDONLY) | 3644 | ret = mnt_want_write_file(file); |
| 3495 | return -EROFS; | 3645 | if (ret) |
| 3646 | return ret; | ||
| 3496 | 3647 | ||
| 3497 | sa = memdup_user(arg, sizeof(*sa)); | 3648 | sa = memdup_user(arg, sizeof(*sa)); |
| 3498 | if (IS_ERR(sa)) | 3649 | if (IS_ERR(sa)) { |
| 3499 | return PTR_ERR(sa); | 3650 | ret = PTR_ERR(sa); |
| 3651 | goto drop_write; | ||
| 3652 | } | ||
| 3500 | 3653 | ||
| 3501 | trans = btrfs_join_transaction(root); | 3654 | trans = btrfs_join_transaction(root); |
| 3502 | if (IS_ERR(trans)) { | 3655 | if (IS_ERR(trans)) { |
| @@ -3519,11 +3672,14 @@ static long btrfs_ioctl_qgroup_assign(struct btrfs_root *root, void __user *arg) | |||
| 3519 | 3672 | ||
| 3520 | out: | 3673 | out: |
| 3521 | kfree(sa); | 3674 | kfree(sa); |
| 3675 | drop_write: | ||
| 3676 | mnt_drop_write_file(file); | ||
| 3522 | return ret; | 3677 | return ret; |
| 3523 | } | 3678 | } |
| 3524 | 3679 | ||
| 3525 | static long btrfs_ioctl_qgroup_create(struct btrfs_root *root, void __user *arg) | 3680 | static long btrfs_ioctl_qgroup_create(struct file *file, void __user *arg) |
| 3526 | { | 3681 | { |
| 3682 | struct btrfs_root *root = BTRFS_I(fdentry(file)->d_inode)->root; | ||
| 3527 | struct btrfs_ioctl_qgroup_create_args *sa; | 3683 | struct btrfs_ioctl_qgroup_create_args *sa; |
| 3528 | struct btrfs_trans_handle *trans; | 3684 | struct btrfs_trans_handle *trans; |
| 3529 | int ret; | 3685 | int ret; |
| @@ -3532,12 +3688,15 @@ static long btrfs_ioctl_qgroup_create(struct btrfs_root *root, void __user *arg) | |||
| 3532 | if (!capable(CAP_SYS_ADMIN)) | 3688 | if (!capable(CAP_SYS_ADMIN)) |
| 3533 | return -EPERM; | 3689 | return -EPERM; |
| 3534 | 3690 | ||
| 3535 | if (root->fs_info->sb->s_flags & MS_RDONLY) | 3691 | ret = mnt_want_write_file(file); |
| 3536 | return -EROFS; | 3692 | if (ret) |
| 3693 | return ret; | ||
| 3537 | 3694 | ||
| 3538 | sa = memdup_user(arg, sizeof(*sa)); | 3695 | sa = memdup_user(arg, sizeof(*sa)); |
| 3539 | if (IS_ERR(sa)) | 3696 | if (IS_ERR(sa)) { |
| 3540 | return PTR_ERR(sa); | 3697 | ret = PTR_ERR(sa); |
| 3698 | goto drop_write; | ||
| 3699 | } | ||
| 3541 | 3700 | ||
| 3542 | trans = btrfs_join_transaction(root); | 3701 | trans = btrfs_join_transaction(root); |
| 3543 | if (IS_ERR(trans)) { | 3702 | if (IS_ERR(trans)) { |
| @@ -3559,11 +3718,14 @@ static long btrfs_ioctl_qgroup_create(struct btrfs_root *root, void __user *arg) | |||
| 3559 | 3718 | ||
| 3560 | out: | 3719 | out: |
| 3561 | kfree(sa); | 3720 | kfree(sa); |
| 3721 | drop_write: | ||
| 3722 | mnt_drop_write_file(file); | ||
| 3562 | return ret; | 3723 | return ret; |
| 3563 | } | 3724 | } |
| 3564 | 3725 | ||
| 3565 | static long btrfs_ioctl_qgroup_limit(struct btrfs_root *root, void __user *arg) | 3726 | static long btrfs_ioctl_qgroup_limit(struct file *file, void __user *arg) |
| 3566 | { | 3727 | { |
| 3728 | struct btrfs_root *root = BTRFS_I(fdentry(file)->d_inode)->root; | ||
| 3567 | struct btrfs_ioctl_qgroup_limit_args *sa; | 3729 | struct btrfs_ioctl_qgroup_limit_args *sa; |
| 3568 | struct btrfs_trans_handle *trans; | 3730 | struct btrfs_trans_handle *trans; |
| 3569 | int ret; | 3731 | int ret; |
| @@ -3573,12 +3735,15 @@ static long btrfs_ioctl_qgroup_limit(struct btrfs_root *root, void __user *arg) | |||
| 3573 | if (!capable(CAP_SYS_ADMIN)) | 3735 | if (!capable(CAP_SYS_ADMIN)) |
| 3574 | return -EPERM; | 3736 | return -EPERM; |
| 3575 | 3737 | ||
| 3576 | if (root->fs_info->sb->s_flags & MS_RDONLY) | 3738 | ret = mnt_want_write_file(file); |
| 3577 | return -EROFS; | 3739 | if (ret) |
| 3740 | return ret; | ||
| 3578 | 3741 | ||
| 3579 | sa = memdup_user(arg, sizeof(*sa)); | 3742 | sa = memdup_user(arg, sizeof(*sa)); |
| 3580 | if (IS_ERR(sa)) | 3743 | if (IS_ERR(sa)) { |
| 3581 | return PTR_ERR(sa); | 3744 | ret = PTR_ERR(sa); |
| 3745 | goto drop_write; | ||
| 3746 | } | ||
| 3582 | 3747 | ||
| 3583 | trans = btrfs_join_transaction(root); | 3748 | trans = btrfs_join_transaction(root); |
| 3584 | if (IS_ERR(trans)) { | 3749 | if (IS_ERR(trans)) { |
| @@ -3601,6 +3766,8 @@ static long btrfs_ioctl_qgroup_limit(struct btrfs_root *root, void __user *arg) | |||
| 3601 | 3766 | ||
| 3602 | out: | 3767 | out: |
| 3603 | kfree(sa); | 3768 | kfree(sa); |
| 3769 | drop_write: | ||
| 3770 | mnt_drop_write_file(file); | ||
| 3604 | return ret; | 3771 | return ret; |
| 3605 | } | 3772 | } |
| 3606 | 3773 | ||
| @@ -3721,11 +3888,11 @@ long btrfs_ioctl(struct file *file, unsigned int | |||
| 3721 | case BTRFS_IOC_DEFRAG_RANGE: | 3888 | case BTRFS_IOC_DEFRAG_RANGE: |
| 3722 | return btrfs_ioctl_defrag(file, argp); | 3889 | return btrfs_ioctl_defrag(file, argp); |
| 3723 | case BTRFS_IOC_RESIZE: | 3890 | case BTRFS_IOC_RESIZE: |
| 3724 | return btrfs_ioctl_resize(root, argp); | 3891 | return btrfs_ioctl_resize(file, argp); |
| 3725 | case BTRFS_IOC_ADD_DEV: | 3892 | case BTRFS_IOC_ADD_DEV: |
| 3726 | return btrfs_ioctl_add_dev(root, argp); | 3893 | return btrfs_ioctl_add_dev(root, argp); |
| 3727 | case BTRFS_IOC_RM_DEV: | 3894 | case BTRFS_IOC_RM_DEV: |
| 3728 | return btrfs_ioctl_rm_dev(root, argp); | 3895 | return btrfs_ioctl_rm_dev(file, argp); |
| 3729 | case BTRFS_IOC_FS_INFO: | 3896 | case BTRFS_IOC_FS_INFO: |
| 3730 | return btrfs_ioctl_fs_info(root, argp); | 3897 | return btrfs_ioctl_fs_info(root, argp); |
| 3731 | case BTRFS_IOC_DEV_INFO: | 3898 | case BTRFS_IOC_DEV_INFO: |
| @@ -3754,11 +3921,11 @@ long btrfs_ioctl(struct file *file, unsigned int | |||
| 3754 | btrfs_sync_fs(file->f_dentry->d_sb, 1); | 3921 | btrfs_sync_fs(file->f_dentry->d_sb, 1); |
| 3755 | return 0; | 3922 | return 0; |
| 3756 | case BTRFS_IOC_START_SYNC: | 3923 | case BTRFS_IOC_START_SYNC: |
| 3757 | return btrfs_ioctl_start_sync(file, argp); | 3924 | return btrfs_ioctl_start_sync(root, argp); |
| 3758 | case BTRFS_IOC_WAIT_SYNC: | 3925 | case BTRFS_IOC_WAIT_SYNC: |
| 3759 | return btrfs_ioctl_wait_sync(file, argp); | 3926 | return btrfs_ioctl_wait_sync(root, argp); |
| 3760 | case BTRFS_IOC_SCRUB: | 3927 | case BTRFS_IOC_SCRUB: |
| 3761 | return btrfs_ioctl_scrub(root, argp); | 3928 | return btrfs_ioctl_scrub(file, argp); |
| 3762 | case BTRFS_IOC_SCRUB_CANCEL: | 3929 | case BTRFS_IOC_SCRUB_CANCEL: |
| 3763 | return btrfs_ioctl_scrub_cancel(root, argp); | 3930 | return btrfs_ioctl_scrub_cancel(root, argp); |
| 3764 | case BTRFS_IOC_SCRUB_PROGRESS: | 3931 | case BTRFS_IOC_SCRUB_PROGRESS: |
| @@ -3776,13 +3943,15 @@ long btrfs_ioctl(struct file *file, unsigned int | |||
| 3776 | case BTRFS_IOC_GET_DEV_STATS: | 3943 | case BTRFS_IOC_GET_DEV_STATS: |
| 3777 | return btrfs_ioctl_get_dev_stats(root, argp); | 3944 | return btrfs_ioctl_get_dev_stats(root, argp); |
| 3778 | case BTRFS_IOC_QUOTA_CTL: | 3945 | case BTRFS_IOC_QUOTA_CTL: |
| 3779 | return btrfs_ioctl_quota_ctl(root, argp); | 3946 | return btrfs_ioctl_quota_ctl(file, argp); |
| 3780 | case BTRFS_IOC_QGROUP_ASSIGN: | 3947 | case BTRFS_IOC_QGROUP_ASSIGN: |
| 3781 | return btrfs_ioctl_qgroup_assign(root, argp); | 3948 | return btrfs_ioctl_qgroup_assign(file, argp); |
| 3782 | case BTRFS_IOC_QGROUP_CREATE: | 3949 | case BTRFS_IOC_QGROUP_CREATE: |
| 3783 | return btrfs_ioctl_qgroup_create(root, argp); | 3950 | return btrfs_ioctl_qgroup_create(file, argp); |
| 3784 | case BTRFS_IOC_QGROUP_LIMIT: | 3951 | case BTRFS_IOC_QGROUP_LIMIT: |
| 3785 | return btrfs_ioctl_qgroup_limit(root, argp); | 3952 | return btrfs_ioctl_qgroup_limit(file, argp); |
| 3953 | case BTRFS_IOC_DEV_REPLACE: | ||
| 3954 | return btrfs_ioctl_dev_replace(root, argp); | ||
| 3786 | } | 3955 | } |
| 3787 | 3956 | ||
| 3788 | return -ENOTTY; | 3957 | return -ENOTTY; |
diff --git a/fs/btrfs/ioctl.h b/fs/btrfs/ioctl.h index 731e2875ab93..dabca9cc8c2e 100644 --- a/fs/btrfs/ioctl.h +++ b/fs/btrfs/ioctl.h | |||
| @@ -30,6 +30,8 @@ struct btrfs_ioctl_vol_args { | |||
| 30 | char name[BTRFS_PATH_NAME_MAX + 1]; | 30 | char name[BTRFS_PATH_NAME_MAX + 1]; |
| 31 | }; | 31 | }; |
| 32 | 32 | ||
| 33 | #define BTRFS_DEVICE_PATH_NAME_MAX 1024 | ||
| 34 | |||
| 33 | #define BTRFS_SUBVOL_CREATE_ASYNC (1ULL << 0) | 35 | #define BTRFS_SUBVOL_CREATE_ASYNC (1ULL << 0) |
| 34 | #define BTRFS_SUBVOL_RDONLY (1ULL << 1) | 36 | #define BTRFS_SUBVOL_RDONLY (1ULL << 1) |
| 35 | #define BTRFS_SUBVOL_QGROUP_INHERIT (1ULL << 2) | 37 | #define BTRFS_SUBVOL_QGROUP_INHERIT (1ULL << 2) |
| @@ -123,7 +125,48 @@ struct btrfs_ioctl_scrub_args { | |||
| 123 | __u64 unused[(1024-32-sizeof(struct btrfs_scrub_progress))/8]; | 125 | __u64 unused[(1024-32-sizeof(struct btrfs_scrub_progress))/8]; |
| 124 | }; | 126 | }; |
| 125 | 127 | ||
| 126 | #define BTRFS_DEVICE_PATH_NAME_MAX 1024 | 128 | #define BTRFS_IOCTL_DEV_REPLACE_CONT_READING_FROM_SRCDEV_MODE_ALWAYS 0 |
| 129 | #define BTRFS_IOCTL_DEV_REPLACE_CONT_READING_FROM_SRCDEV_MODE_AVOID 1 | ||
| 130 | struct btrfs_ioctl_dev_replace_start_params { | ||
| 131 | __u64 srcdevid; /* in, if 0, use srcdev_name instead */ | ||
| 132 | __u64 cont_reading_from_srcdev_mode; /* in, see #define | ||
| 133 | * above */ | ||
| 134 | __u8 srcdev_name[BTRFS_DEVICE_PATH_NAME_MAX + 1]; /* in */ | ||
| 135 | __u8 tgtdev_name[BTRFS_DEVICE_PATH_NAME_MAX + 1]; /* in */ | ||
| 136 | }; | ||
| 137 | |||
| 138 | #define BTRFS_IOCTL_DEV_REPLACE_STATE_NEVER_STARTED 0 | ||
| 139 | #define BTRFS_IOCTL_DEV_REPLACE_STATE_STARTED 1 | ||
| 140 | #define BTRFS_IOCTL_DEV_REPLACE_STATE_FINISHED 2 | ||
| 141 | #define BTRFS_IOCTL_DEV_REPLACE_STATE_CANCELED 3 | ||
| 142 | #define BTRFS_IOCTL_DEV_REPLACE_STATE_SUSPENDED 4 | ||
| 143 | struct btrfs_ioctl_dev_replace_status_params { | ||
| 144 | __u64 replace_state; /* out, see #define above */ | ||
| 145 | __u64 progress_1000; /* out, 0 <= x <= 1000 */ | ||
| 146 | __u64 time_started; /* out, seconds since 1-Jan-1970 */ | ||
| 147 | __u64 time_stopped; /* out, seconds since 1-Jan-1970 */ | ||
| 148 | __u64 num_write_errors; /* out */ | ||
| 149 | __u64 num_uncorrectable_read_errors; /* out */ | ||
| 150 | }; | ||
| 151 | |||
| 152 | #define BTRFS_IOCTL_DEV_REPLACE_CMD_START 0 | ||
| 153 | #define BTRFS_IOCTL_DEV_REPLACE_CMD_STATUS 1 | ||
| 154 | #define BTRFS_IOCTL_DEV_REPLACE_CMD_CANCEL 2 | ||
| 155 | #define BTRFS_IOCTL_DEV_REPLACE_RESULT_NO_ERROR 0 | ||
| 156 | #define BTRFS_IOCTL_DEV_REPLACE_RESULT_NOT_STARTED 1 | ||
| 157 | #define BTRFS_IOCTL_DEV_REPLACE_RESULT_ALREADY_STARTED 2 | ||
| 158 | struct btrfs_ioctl_dev_replace_args { | ||
| 159 | __u64 cmd; /* in */ | ||
| 160 | __u64 result; /* out */ | ||
| 161 | |||
| 162 | union { | ||
| 163 | struct btrfs_ioctl_dev_replace_start_params start; | ||
| 164 | struct btrfs_ioctl_dev_replace_status_params status; | ||
| 165 | }; /* in/out */ | ||
| 166 | |||
| 167 | __u64 spare[64]; | ||
| 168 | }; | ||
| 169 | |||
| 127 | struct btrfs_ioctl_dev_info_args { | 170 | struct btrfs_ioctl_dev_info_args { |
| 128 | __u64 devid; /* in/out */ | 171 | __u64 devid; /* in/out */ |
| 129 | __u8 uuid[BTRFS_UUID_SIZE]; /* in/out */ | 172 | __u8 uuid[BTRFS_UUID_SIZE]; /* in/out */ |
| @@ -453,4 +496,7 @@ struct btrfs_ioctl_send_args { | |||
| 453 | struct btrfs_ioctl_qgroup_limit_args) | 496 | struct btrfs_ioctl_qgroup_limit_args) |
| 454 | #define BTRFS_IOC_GET_DEV_STATS _IOWR(BTRFS_IOCTL_MAGIC, 52, \ | 497 | #define BTRFS_IOC_GET_DEV_STATS _IOWR(BTRFS_IOCTL_MAGIC, 52, \ |
| 455 | struct btrfs_ioctl_get_dev_stats) | 498 | struct btrfs_ioctl_get_dev_stats) |
| 499 | #define BTRFS_IOC_DEV_REPLACE _IOWR(BTRFS_IOCTL_MAGIC, 53, \ | ||
| 500 | struct btrfs_ioctl_dev_replace_args) | ||
| 501 | |||
| 456 | #endif | 502 | #endif |
diff --git a/fs/btrfs/math.h b/fs/btrfs/math.h new file mode 100644 index 000000000000..b7816cefbd13 --- /dev/null +++ b/fs/btrfs/math.h | |||
| @@ -0,0 +1,44 @@ | |||
| 1 | |||
| 2 | /* | ||
| 3 | * Copyright (C) 2012 Fujitsu. All rights reserved. | ||
| 4 | * Written by Miao Xie <miaox@cn.fujitsu.com> | ||
| 5 | * | ||
| 6 | * This program is free software; you can redistribute it and/or | ||
| 7 | * modify it under the terms of the GNU General Public | ||
| 8 | * License v2 as published by the Free Software Foundation. | ||
| 9 | * | ||
| 10 | * This program is distributed in the hope that it will be useful, | ||
| 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
| 13 | * General Public License for more details. | ||
| 14 | * | ||
| 15 | * You should have received a copy of the GNU General Public | ||
| 16 | * License along with this program; if not, write to the | ||
| 17 | * Free Software Foundation, Inc., 59 Temple Place - Suite 330, | ||
| 18 | * Boston, MA 021110-1307, USA. | ||
| 19 | */ | ||
| 20 | |||
| 21 | #ifndef __BTRFS_MATH_H | ||
| 22 | #define __BTRFS_MATH_H | ||
| 23 | |||
| 24 | #include <asm/div64.h> | ||
| 25 | |||
| 26 | static inline u64 div_factor(u64 num, int factor) | ||
| 27 | { | ||
| 28 | if (factor == 10) | ||
| 29 | return num; | ||
| 30 | num *= factor; | ||
| 31 | do_div(num, 10); | ||
| 32 | return num; | ||
| 33 | } | ||
| 34 | |||
| 35 | static inline u64 div_factor_fine(u64 num, int factor) | ||
| 36 | { | ||
| 37 | if (factor == 100) | ||
| 38 | return num; | ||
| 39 | num *= factor; | ||
| 40 | do_div(num, 100); | ||
| 41 | return num; | ||
| 42 | } | ||
| 43 | |||
| 44 | #endif | ||
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c index 051c7fe551dd..f10731297040 100644 --- a/fs/btrfs/ordered-data.c +++ b/fs/btrfs/ordered-data.c | |||
| @@ -25,6 +25,8 @@ | |||
| 25 | #include "btrfs_inode.h" | 25 | #include "btrfs_inode.h" |
| 26 | #include "extent_io.h" | 26 | #include "extent_io.h" |
| 27 | 27 | ||
| 28 | static struct kmem_cache *btrfs_ordered_extent_cache; | ||
| 29 | |||
| 28 | static u64 entry_end(struct btrfs_ordered_extent *entry) | 30 | static u64 entry_end(struct btrfs_ordered_extent *entry) |
| 29 | { | 31 | { |
| 30 | if (entry->file_offset + entry->len < entry->file_offset) | 32 | if (entry->file_offset + entry->len < entry->file_offset) |
| @@ -187,7 +189,7 @@ static int __btrfs_add_ordered_extent(struct inode *inode, u64 file_offset, | |||
| 187 | struct btrfs_ordered_extent *entry; | 189 | struct btrfs_ordered_extent *entry; |
| 188 | 190 | ||
| 189 | tree = &BTRFS_I(inode)->ordered_tree; | 191 | tree = &BTRFS_I(inode)->ordered_tree; |
| 190 | entry = kzalloc(sizeof(*entry), GFP_NOFS); | 192 | entry = kmem_cache_zalloc(btrfs_ordered_extent_cache, GFP_NOFS); |
| 191 | if (!entry) | 193 | if (!entry) |
| 192 | return -ENOMEM; | 194 | return -ENOMEM; |
| 193 | 195 | ||
| @@ -209,6 +211,8 @@ static int __btrfs_add_ordered_extent(struct inode *inode, u64 file_offset, | |||
| 209 | init_waitqueue_head(&entry->wait); | 211 | init_waitqueue_head(&entry->wait); |
| 210 | INIT_LIST_HEAD(&entry->list); | 212 | INIT_LIST_HEAD(&entry->list); |
| 211 | INIT_LIST_HEAD(&entry->root_extent_list); | 213 | INIT_LIST_HEAD(&entry->root_extent_list); |
| 214 | INIT_LIST_HEAD(&entry->work_list); | ||
| 215 | init_completion(&entry->completion); | ||
| 212 | 216 | ||
| 213 | trace_btrfs_ordered_extent_add(inode, entry); | 217 | trace_btrfs_ordered_extent_add(inode, entry); |
| 214 | 218 | ||
| @@ -421,7 +425,7 @@ void btrfs_put_ordered_extent(struct btrfs_ordered_extent *entry) | |||
| 421 | list_del(&sum->list); | 425 | list_del(&sum->list); |
| 422 | kfree(sum); | 426 | kfree(sum); |
| 423 | } | 427 | } |
| 424 | kfree(entry); | 428 | kmem_cache_free(btrfs_ordered_extent_cache, entry); |
| 425 | } | 429 | } |
| 426 | } | 430 | } |
| 427 | 431 | ||
| @@ -462,19 +466,28 @@ void btrfs_remove_ordered_extent(struct inode *inode, | |||
| 462 | wake_up(&entry->wait); | 466 | wake_up(&entry->wait); |
| 463 | } | 467 | } |
| 464 | 468 | ||
| 469 | static void btrfs_run_ordered_extent_work(struct btrfs_work *work) | ||
| 470 | { | ||
| 471 | struct btrfs_ordered_extent *ordered; | ||
| 472 | |||
| 473 | ordered = container_of(work, struct btrfs_ordered_extent, flush_work); | ||
| 474 | btrfs_start_ordered_extent(ordered->inode, ordered, 1); | ||
| 475 | complete(&ordered->completion); | ||
| 476 | } | ||
| 477 | |||
| 465 | /* | 478 | /* |
| 466 | * wait for all the ordered extents in a root. This is done when balancing | 479 | * wait for all the ordered extents in a root. This is done when balancing |
| 467 | * space between drives. | 480 | * space between drives. |
| 468 | */ | 481 | */ |
| 469 | void btrfs_wait_ordered_extents(struct btrfs_root *root, | 482 | void btrfs_wait_ordered_extents(struct btrfs_root *root, int delay_iput) |
| 470 | int nocow_only, int delay_iput) | ||
| 471 | { | 483 | { |
| 472 | struct list_head splice; | 484 | struct list_head splice, works; |
| 473 | struct list_head *cur; | 485 | struct list_head *cur; |
| 474 | struct btrfs_ordered_extent *ordered; | 486 | struct btrfs_ordered_extent *ordered, *next; |
| 475 | struct inode *inode; | 487 | struct inode *inode; |
| 476 | 488 | ||
| 477 | INIT_LIST_HEAD(&splice); | 489 | INIT_LIST_HEAD(&splice); |
| 490 | INIT_LIST_HEAD(&works); | ||
| 478 | 491 | ||
| 479 | spin_lock(&root->fs_info->ordered_extent_lock); | 492 | spin_lock(&root->fs_info->ordered_extent_lock); |
| 480 | list_splice_init(&root->fs_info->ordered_extents, &splice); | 493 | list_splice_init(&root->fs_info->ordered_extents, &splice); |
| @@ -482,15 +495,6 @@ void btrfs_wait_ordered_extents(struct btrfs_root *root, | |||
| 482 | cur = splice.next; | 495 | cur = splice.next; |
| 483 | ordered = list_entry(cur, struct btrfs_ordered_extent, | 496 | ordered = list_entry(cur, struct btrfs_ordered_extent, |
| 484 | root_extent_list); | 497 | root_extent_list); |
| 485 | if (nocow_only && | ||
| 486 | !test_bit(BTRFS_ORDERED_NOCOW, &ordered->flags) && | ||
| 487 | !test_bit(BTRFS_ORDERED_PREALLOC, &ordered->flags)) { | ||
| 488 | list_move(&ordered->root_extent_list, | ||
| 489 | &root->fs_info->ordered_extents); | ||
| 490 | cond_resched_lock(&root->fs_info->ordered_extent_lock); | ||
| 491 | continue; | ||
| 492 | } | ||
| 493 | |||
| 494 | list_del_init(&ordered->root_extent_list); | 498 | list_del_init(&ordered->root_extent_list); |
| 495 | atomic_inc(&ordered->refs); | 499 | atomic_inc(&ordered->refs); |
| 496 | 500 | ||
| @@ -502,19 +506,32 @@ void btrfs_wait_ordered_extents(struct btrfs_root *root, | |||
| 502 | spin_unlock(&root->fs_info->ordered_extent_lock); | 506 | spin_unlock(&root->fs_info->ordered_extent_lock); |
| 503 | 507 | ||
| 504 | if (inode) { | 508 | if (inode) { |
| 505 | btrfs_start_ordered_extent(inode, ordered, 1); | 509 | ordered->flush_work.func = btrfs_run_ordered_extent_work; |
| 506 | btrfs_put_ordered_extent(ordered); | 510 | list_add_tail(&ordered->work_list, &works); |
| 507 | if (delay_iput) | 511 | btrfs_queue_worker(&root->fs_info->flush_workers, |
| 508 | btrfs_add_delayed_iput(inode); | 512 | &ordered->flush_work); |
| 509 | else | ||
| 510 | iput(inode); | ||
| 511 | } else { | 513 | } else { |
| 512 | btrfs_put_ordered_extent(ordered); | 514 | btrfs_put_ordered_extent(ordered); |
| 513 | } | 515 | } |
| 514 | 516 | ||
| 517 | cond_resched(); | ||
| 515 | spin_lock(&root->fs_info->ordered_extent_lock); | 518 | spin_lock(&root->fs_info->ordered_extent_lock); |
| 516 | } | 519 | } |
| 517 | spin_unlock(&root->fs_info->ordered_extent_lock); | 520 | spin_unlock(&root->fs_info->ordered_extent_lock); |
| 521 | |||
| 522 | list_for_each_entry_safe(ordered, next, &works, work_list) { | ||
| 523 | list_del_init(&ordered->work_list); | ||
| 524 | wait_for_completion(&ordered->completion); | ||
| 525 | |||
| 526 | inode = ordered->inode; | ||
| 527 | btrfs_put_ordered_extent(ordered); | ||
| 528 | if (delay_iput) | ||
| 529 | btrfs_add_delayed_iput(inode); | ||
| 530 | else | ||
| 531 | iput(inode); | ||
| 532 | |||
| 533 | cond_resched(); | ||
| 534 | } | ||
| 518 | } | 535 | } |
| 519 | 536 | ||
| 520 | /* | 537 | /* |
| @@ -527,13 +544,17 @@ void btrfs_wait_ordered_extents(struct btrfs_root *root, | |||
| 527 | * extra check to make sure the ordered operation list really is empty | 544 | * extra check to make sure the ordered operation list really is empty |
| 528 | * before we return | 545 | * before we return |
| 529 | */ | 546 | */ |
| 530 | void btrfs_run_ordered_operations(struct btrfs_root *root, int wait) | 547 | int btrfs_run_ordered_operations(struct btrfs_root *root, int wait) |
| 531 | { | 548 | { |
| 532 | struct btrfs_inode *btrfs_inode; | 549 | struct btrfs_inode *btrfs_inode; |
| 533 | struct inode *inode; | 550 | struct inode *inode; |
| 534 | struct list_head splice; | 551 | struct list_head splice; |
| 552 | struct list_head works; | ||
| 553 | struct btrfs_delalloc_work *work, *next; | ||
| 554 | int ret = 0; | ||
| 535 | 555 | ||
| 536 | INIT_LIST_HEAD(&splice); | 556 | INIT_LIST_HEAD(&splice); |
| 557 | INIT_LIST_HEAD(&works); | ||
| 537 | 558 | ||
| 538 | mutex_lock(&root->fs_info->ordered_operations_mutex); | 559 | mutex_lock(&root->fs_info->ordered_operations_mutex); |
| 539 | spin_lock(&root->fs_info->ordered_extent_lock); | 560 | spin_lock(&root->fs_info->ordered_extent_lock); |
| @@ -541,6 +562,7 @@ again: | |||
| 541 | list_splice_init(&root->fs_info->ordered_operations, &splice); | 562 | list_splice_init(&root->fs_info->ordered_operations, &splice); |
| 542 | 563 | ||
| 543 | while (!list_empty(&splice)) { | 564 | while (!list_empty(&splice)) { |
| 565 | |||
| 544 | btrfs_inode = list_entry(splice.next, struct btrfs_inode, | 566 | btrfs_inode = list_entry(splice.next, struct btrfs_inode, |
| 545 | ordered_operations); | 567 | ordered_operations); |
| 546 | 568 | ||
| @@ -557,15 +579,26 @@ again: | |||
| 557 | list_add_tail(&BTRFS_I(inode)->ordered_operations, | 579 | list_add_tail(&BTRFS_I(inode)->ordered_operations, |
| 558 | &root->fs_info->ordered_operations); | 580 | &root->fs_info->ordered_operations); |
| 559 | } | 581 | } |
| 582 | |||
| 583 | if (!inode) | ||
| 584 | continue; | ||
| 560 | spin_unlock(&root->fs_info->ordered_extent_lock); | 585 | spin_unlock(&root->fs_info->ordered_extent_lock); |
| 561 | 586 | ||
| 562 | if (inode) { | 587 | work = btrfs_alloc_delalloc_work(inode, wait, 1); |
| 563 | if (wait) | 588 | if (!work) { |
| 564 | btrfs_wait_ordered_range(inode, 0, (u64)-1); | 589 | if (list_empty(&BTRFS_I(inode)->ordered_operations)) |
| 565 | else | 590 | list_add_tail(&btrfs_inode->ordered_operations, |
| 566 | filemap_flush(inode->i_mapping); | 591 | &splice); |
| 567 | btrfs_add_delayed_iput(inode); | 592 | spin_lock(&root->fs_info->ordered_extent_lock); |
| 593 | list_splice_tail(&splice, | ||
| 594 | &root->fs_info->ordered_operations); | ||
| 595 | spin_unlock(&root->fs_info->ordered_extent_lock); | ||
| 596 | ret = -ENOMEM; | ||
| 597 | goto out; | ||
| 568 | } | 598 | } |
| 599 | list_add_tail(&work->list, &works); | ||
| 600 | btrfs_queue_worker(&root->fs_info->flush_workers, | ||
| 601 | &work->work); | ||
| 569 | 602 | ||
| 570 | cond_resched(); | 603 | cond_resched(); |
| 571 | spin_lock(&root->fs_info->ordered_extent_lock); | 604 | spin_lock(&root->fs_info->ordered_extent_lock); |
| @@ -574,7 +607,13 @@ again: | |||
| 574 | goto again; | 607 | goto again; |
| 575 | 608 | ||
| 576 | spin_unlock(&root->fs_info->ordered_extent_lock); | 609 | spin_unlock(&root->fs_info->ordered_extent_lock); |
| 610 | out: | ||
| 611 | list_for_each_entry_safe(work, next, &works, list) { | ||
| 612 | list_del_init(&work->list); | ||
| 613 | btrfs_wait_and_free_delalloc_work(work); | ||
| 614 | } | ||
| 577 | mutex_unlock(&root->fs_info->ordered_operations_mutex); | 615 | mutex_unlock(&root->fs_info->ordered_operations_mutex); |
| 616 | return ret; | ||
| 578 | } | 617 | } |
| 579 | 618 | ||
| 580 | /* | 619 | /* |
| @@ -614,7 +653,6 @@ void btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len) | |||
| 614 | u64 end; | 653 | u64 end; |
| 615 | u64 orig_end; | 654 | u64 orig_end; |
| 616 | struct btrfs_ordered_extent *ordered; | 655 | struct btrfs_ordered_extent *ordered; |
| 617 | int found; | ||
| 618 | 656 | ||
| 619 | if (start + len < start) { | 657 | if (start + len < start) { |
| 620 | orig_end = INT_LIMIT(loff_t); | 658 | orig_end = INT_LIMIT(loff_t); |
| @@ -650,7 +688,6 @@ void btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len) | |||
| 650 | filemap_fdatawait_range(inode->i_mapping, start, orig_end); | 688 | filemap_fdatawait_range(inode->i_mapping, start, orig_end); |
| 651 | 689 | ||
| 652 | end = orig_end; | 690 | end = orig_end; |
| 653 | found = 0; | ||
| 654 | while (1) { | 691 | while (1) { |
| 655 | ordered = btrfs_lookup_first_ordered_extent(inode, end); | 692 | ordered = btrfs_lookup_first_ordered_extent(inode, end); |
| 656 | if (!ordered) | 693 | if (!ordered) |
| @@ -663,7 +700,6 @@ void btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len) | |||
| 663 | btrfs_put_ordered_extent(ordered); | 700 | btrfs_put_ordered_extent(ordered); |
| 664 | break; | 701 | break; |
| 665 | } | 702 | } |
| 666 | found++; | ||
| 667 | btrfs_start_ordered_extent(inode, ordered, 1); | 703 | btrfs_start_ordered_extent(inode, ordered, 1); |
| 668 | end = ordered->file_offset; | 704 | end = ordered->file_offset; |
| 669 | btrfs_put_ordered_extent(ordered); | 705 | btrfs_put_ordered_extent(ordered); |
| @@ -775,7 +811,6 @@ int btrfs_ordered_update_i_size(struct inode *inode, u64 offset, | |||
| 775 | struct btrfs_ordered_inode_tree *tree = &BTRFS_I(inode)->ordered_tree; | 811 | struct btrfs_ordered_inode_tree *tree = &BTRFS_I(inode)->ordered_tree; |
| 776 | u64 disk_i_size; | 812 | u64 disk_i_size; |
| 777 | u64 new_i_size; | 813 | u64 new_i_size; |
| 778 | u64 i_size_test; | ||
| 779 | u64 i_size = i_size_read(inode); | 814 | u64 i_size = i_size_read(inode); |
| 780 | struct rb_node *node; | 815 | struct rb_node *node; |
| 781 | struct rb_node *prev = NULL; | 816 | struct rb_node *prev = NULL; |
| @@ -835,55 +870,30 @@ int btrfs_ordered_update_i_size(struct inode *inode, u64 offset, | |||
| 835 | break; | 870 | break; |
| 836 | if (test->file_offset >= i_size) | 871 | if (test->file_offset >= i_size) |
| 837 | break; | 872 | break; |
| 838 | if (test->file_offset >= disk_i_size) | 873 | if (test->file_offset >= disk_i_size) { |
| 874 | /* | ||
| 875 | * we don't update disk_i_size now, so record this | ||
| 876 | * undealt i_size. Or we will not know the real | ||
| 877 | * i_size. | ||
| 878 | */ | ||
| 879 | if (test->outstanding_isize < offset) | ||
| 880 | test->outstanding_isize = offset; | ||
| 881 | if (ordered && | ||
| 882 | ordered->outstanding_isize > | ||
| 883 | test->outstanding_isize) | ||
| 884 | test->outstanding_isize = | ||
| 885 | ordered->outstanding_isize; | ||
| 839 | goto out; | 886 | goto out; |
| 840 | } | ||
| 841 | new_i_size = min_t(u64, offset, i_size); | ||
| 842 | |||
| 843 | /* | ||
| 844 | * at this point, we know we can safely update i_size to at least | ||
| 845 | * the offset from this ordered extent. But, we need to | ||
| 846 | * walk forward and see if ios from higher up in the file have | ||
| 847 | * finished. | ||
| 848 | */ | ||
| 849 | if (ordered) { | ||
| 850 | node = rb_next(&ordered->rb_node); | ||
| 851 | } else { | ||
| 852 | if (prev) | ||
| 853 | node = rb_next(prev); | ||
| 854 | else | ||
| 855 | node = rb_first(&tree->tree); | ||
| 856 | } | ||
| 857 | |||
| 858 | /* | ||
| 859 | * We are looking for an area between our current extent and the next | ||
| 860 | * ordered extent to update the i_size to. There are 3 cases here | ||
| 861 | * | ||
| 862 | * 1) We don't actually have anything and we can update to i_size. | ||
| 863 | * 2) We have stuff but they already did their i_size update so again we | ||
| 864 | * can just update to i_size. | ||
| 865 | * 3) We have an outstanding ordered extent so the most we can update | ||
| 866 | * our disk_i_size to is the start of the next offset. | ||
| 867 | */ | ||
| 868 | i_size_test = i_size; | ||
| 869 | for (; node; node = rb_next(node)) { | ||
| 870 | test = rb_entry(node, struct btrfs_ordered_extent, rb_node); | ||
| 871 | |||
| 872 | if (test_bit(BTRFS_ORDERED_UPDATED_ISIZE, &test->flags)) | ||
| 873 | continue; | ||
| 874 | if (test->file_offset > offset) { | ||
| 875 | i_size_test = test->file_offset; | ||
| 876 | break; | ||
| 877 | } | 887 | } |
| 878 | } | 888 | } |
| 889 | new_i_size = min_t(u64, offset, i_size); | ||
| 879 | 890 | ||
| 880 | /* | 891 | /* |
| 881 | * i_size_test is the end of a region after this ordered | 892 | * Some ordered extents may completed before the current one, and |
| 882 | * extent where there are no ordered extents, we can safely set | 893 | * we hold the real i_size in ->outstanding_isize. |
| 883 | * disk_i_size to this. | ||
| 884 | */ | 894 | */ |
| 885 | if (i_size_test > offset) | 895 | if (ordered && ordered->outstanding_isize > new_i_size) |
| 886 | new_i_size = min_t(u64, i_size_test, i_size); | 896 | new_i_size = min_t(u64, ordered->outstanding_isize, i_size); |
| 887 | BTRFS_I(inode)->disk_i_size = new_i_size; | 897 | BTRFS_I(inode)->disk_i_size = new_i_size; |
| 888 | ret = 0; | 898 | ret = 0; |
| 889 | out: | 899 | out: |
| @@ -968,15 +978,6 @@ void btrfs_add_ordered_operation(struct btrfs_trans_handle *trans, | |||
| 968 | if (last_mod < root->fs_info->last_trans_committed) | 978 | if (last_mod < root->fs_info->last_trans_committed) |
| 969 | return; | 979 | return; |
| 970 | 980 | ||
| 971 | /* | ||
| 972 | * the transaction is already committing. Just start the IO and | ||
| 973 | * don't bother with all of this list nonsense | ||
| 974 | */ | ||
| 975 | if (trans && root->fs_info->running_transaction->blocked) { | ||
| 976 | btrfs_wait_ordered_range(inode, 0, (u64)-1); | ||
| 977 | return; | ||
| 978 | } | ||
| 979 | |||
| 980 | spin_lock(&root->fs_info->ordered_extent_lock); | 981 | spin_lock(&root->fs_info->ordered_extent_lock); |
| 981 | if (list_empty(&BTRFS_I(inode)->ordered_operations)) { | 982 | if (list_empty(&BTRFS_I(inode)->ordered_operations)) { |
| 982 | list_add_tail(&BTRFS_I(inode)->ordered_operations, | 983 | list_add_tail(&BTRFS_I(inode)->ordered_operations, |
| @@ -984,3 +985,21 @@ void btrfs_add_ordered_operation(struct btrfs_trans_handle *trans, | |||
| 984 | } | 985 | } |
| 985 | spin_unlock(&root->fs_info->ordered_extent_lock); | 986 | spin_unlock(&root->fs_info->ordered_extent_lock); |
| 986 | } | 987 | } |
| 988 | |||
| 989 | int __init ordered_data_init(void) | ||
| 990 | { | ||
| 991 | btrfs_ordered_extent_cache = kmem_cache_create("btrfs_ordered_extent", | ||
| 992 | sizeof(struct btrfs_ordered_extent), 0, | ||
| 993 | SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, | ||
| 994 | NULL); | ||
| 995 | if (!btrfs_ordered_extent_cache) | ||
| 996 | return -ENOMEM; | ||
| 997 | |||
| 998 | return 0; | ||
| 999 | } | ||
| 1000 | |||
| 1001 | void ordered_data_exit(void) | ||
| 1002 | { | ||
| 1003 | if (btrfs_ordered_extent_cache) | ||
| 1004 | kmem_cache_destroy(btrfs_ordered_extent_cache); | ||
| 1005 | } | ||
diff --git a/fs/btrfs/ordered-data.h b/fs/btrfs/ordered-data.h index e03c560d2997..f29d4bf5fbe7 100644 --- a/fs/btrfs/ordered-data.h +++ b/fs/btrfs/ordered-data.h | |||
| @@ -76,7 +76,7 @@ struct btrfs_ordered_sum { | |||
| 76 | 76 | ||
| 77 | #define BTRFS_ORDERED_IOERR 6 /* We had an io error when writing this out */ | 77 | #define BTRFS_ORDERED_IOERR 6 /* We had an io error when writing this out */ |
| 78 | 78 | ||
| 79 | #define BTRFS_ORDERED_UPDATED_ISIZE 7 /* indicates wether this ordered extent | 79 | #define BTRFS_ORDERED_UPDATED_ISIZE 7 /* indicates whether this ordered extent |
| 80 | * has done its due diligence in updating | 80 | * has done its due diligence in updating |
| 81 | * the isize. */ | 81 | * the isize. */ |
| 82 | 82 | ||
| @@ -96,6 +96,13 @@ struct btrfs_ordered_extent { | |||
| 96 | /* number of bytes that still need writing */ | 96 | /* number of bytes that still need writing */ |
| 97 | u64 bytes_left; | 97 | u64 bytes_left; |
| 98 | 98 | ||
| 99 | /* | ||
| 100 | * the end of the ordered extent which is behind it but | ||
| 101 | * didn't update disk_i_size. Please see the comment of | ||
| 102 | * btrfs_ordered_update_i_size(); | ||
| 103 | */ | ||
| 104 | u64 outstanding_isize; | ||
| 105 | |||
| 99 | /* flags (described above) */ | 106 | /* flags (described above) */ |
| 100 | unsigned long flags; | 107 | unsigned long flags; |
| 101 | 108 | ||
| @@ -121,8 +128,11 @@ struct btrfs_ordered_extent { | |||
| 121 | struct list_head root_extent_list; | 128 | struct list_head root_extent_list; |
| 122 | 129 | ||
| 123 | struct btrfs_work work; | 130 | struct btrfs_work work; |
| 124 | }; | ||
| 125 | 131 | ||
| 132 | struct completion completion; | ||
| 133 | struct btrfs_work flush_work; | ||
| 134 | struct list_head work_list; | ||
| 135 | }; | ||
| 126 | 136 | ||
| 127 | /* | 137 | /* |
| 128 | * calculates the total size you need to allocate for an ordered sum | 138 | * calculates the total size you need to allocate for an ordered sum |
| @@ -179,10 +189,11 @@ struct btrfs_ordered_extent *btrfs_lookup_ordered_range(struct inode *inode, | |||
| 179 | int btrfs_ordered_update_i_size(struct inode *inode, u64 offset, | 189 | int btrfs_ordered_update_i_size(struct inode *inode, u64 offset, |
| 180 | struct btrfs_ordered_extent *ordered); | 190 | struct btrfs_ordered_extent *ordered); |
| 181 | int btrfs_find_ordered_sum(struct inode *inode, u64 offset, u64 disk_bytenr, u32 *sum); | 191 | int btrfs_find_ordered_sum(struct inode *inode, u64 offset, u64 disk_bytenr, u32 *sum); |
| 182 | void btrfs_run_ordered_operations(struct btrfs_root *root, int wait); | 192 | int btrfs_run_ordered_operations(struct btrfs_root *root, int wait); |
| 183 | void btrfs_add_ordered_operation(struct btrfs_trans_handle *trans, | 193 | void btrfs_add_ordered_operation(struct btrfs_trans_handle *trans, |
| 184 | struct btrfs_root *root, | 194 | struct btrfs_root *root, |
| 185 | struct inode *inode); | 195 | struct inode *inode); |
| 186 | void btrfs_wait_ordered_extents(struct btrfs_root *root, | 196 | void btrfs_wait_ordered_extents(struct btrfs_root *root, int delay_iput); |
| 187 | int nocow_only, int delay_iput); | 197 | int __init ordered_data_init(void); |
| 198 | void ordered_data_exit(void); | ||
| 188 | #endif | 199 | #endif |
diff --git a/fs/btrfs/print-tree.c b/fs/btrfs/print-tree.c index 5e23684887eb..50d95fd190a5 100644 --- a/fs/btrfs/print-tree.c +++ b/fs/btrfs/print-tree.c | |||
| @@ -297,6 +297,9 @@ void btrfs_print_leaf(struct btrfs_root *root, struct extent_buffer *l) | |||
| 297 | case BTRFS_DEV_STATS_KEY: | 297 | case BTRFS_DEV_STATS_KEY: |
| 298 | printk(KERN_INFO "\t\tdevice stats\n"); | 298 | printk(KERN_INFO "\t\tdevice stats\n"); |
| 299 | break; | 299 | break; |
| 300 | case BTRFS_DEV_REPLACE_KEY: | ||
| 301 | printk(KERN_INFO "\t\tdev replace\n"); | ||
| 302 | break; | ||
| 300 | }; | 303 | }; |
| 301 | } | 304 | } |
| 302 | } | 305 | } |
diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c index b65015581744..fe9d02c45f8e 100644 --- a/fs/btrfs/qgroup.c +++ b/fs/btrfs/qgroup.c | |||
| @@ -790,8 +790,10 @@ int btrfs_quota_enable(struct btrfs_trans_handle *trans, | |||
| 790 | } | 790 | } |
| 791 | 791 | ||
| 792 | path = btrfs_alloc_path(); | 792 | path = btrfs_alloc_path(); |
| 793 | if (!path) | 793 | if (!path) { |
| 794 | return -ENOMEM; | 794 | ret = -ENOMEM; |
| 795 | goto out_free_root; | ||
| 796 | } | ||
| 795 | 797 | ||
| 796 | key.objectid = 0; | 798 | key.objectid = 0; |
| 797 | key.type = BTRFS_QGROUP_STATUS_KEY; | 799 | key.type = BTRFS_QGROUP_STATUS_KEY; |
| @@ -800,7 +802,7 @@ int btrfs_quota_enable(struct btrfs_trans_handle *trans, | |||
| 800 | ret = btrfs_insert_empty_item(trans, quota_root, path, &key, | 802 | ret = btrfs_insert_empty_item(trans, quota_root, path, &key, |
| 801 | sizeof(*ptr)); | 803 | sizeof(*ptr)); |
| 802 | if (ret) | 804 | if (ret) |
| 803 | goto out; | 805 | goto out_free_path; |
| 804 | 806 | ||
| 805 | leaf = path->nodes[0]; | 807 | leaf = path->nodes[0]; |
| 806 | ptr = btrfs_item_ptr(leaf, path->slots[0], | 808 | ptr = btrfs_item_ptr(leaf, path->slots[0], |
| @@ -818,8 +820,15 @@ int btrfs_quota_enable(struct btrfs_trans_handle *trans, | |||
| 818 | fs_info->quota_root = quota_root; | 820 | fs_info->quota_root = quota_root; |
| 819 | fs_info->pending_quota_state = 1; | 821 | fs_info->pending_quota_state = 1; |
| 820 | spin_unlock(&fs_info->qgroup_lock); | 822 | spin_unlock(&fs_info->qgroup_lock); |
| 821 | out: | 823 | out_free_path: |
| 822 | btrfs_free_path(path); | 824 | btrfs_free_path(path); |
| 825 | out_free_root: | ||
| 826 | if (ret) { | ||
| 827 | free_extent_buffer(quota_root->node); | ||
| 828 | free_extent_buffer(quota_root->commit_root); | ||
| 829 | kfree(quota_root); | ||
| 830 | } | ||
| 831 | out: | ||
| 823 | return ret; | 832 | return ret; |
| 824 | } | 833 | } |
| 825 | 834 | ||
| @@ -1145,12 +1154,12 @@ int btrfs_qgroup_account_ref(struct btrfs_trans_handle *trans, | |||
| 1145 | 1154 | ||
| 1146 | ulist_reinit(tmp); | 1155 | ulist_reinit(tmp); |
| 1147 | /* XXX id not needed */ | 1156 | /* XXX id not needed */ |
| 1148 | ulist_add(tmp, qg->qgroupid, (unsigned long)qg, GFP_ATOMIC); | 1157 | ulist_add(tmp, qg->qgroupid, (u64)(uintptr_t)qg, GFP_ATOMIC); |
| 1149 | ULIST_ITER_INIT(&tmp_uiter); | 1158 | ULIST_ITER_INIT(&tmp_uiter); |
| 1150 | while ((tmp_unode = ulist_next(tmp, &tmp_uiter))) { | 1159 | while ((tmp_unode = ulist_next(tmp, &tmp_uiter))) { |
| 1151 | struct btrfs_qgroup_list *glist; | 1160 | struct btrfs_qgroup_list *glist; |
| 1152 | 1161 | ||
| 1153 | qg = (struct btrfs_qgroup *)tmp_unode->aux; | 1162 | qg = (struct btrfs_qgroup *)(uintptr_t)tmp_unode->aux; |
| 1154 | if (qg->refcnt < seq) | 1163 | if (qg->refcnt < seq) |
| 1155 | qg->refcnt = seq + 1; | 1164 | qg->refcnt = seq + 1; |
| 1156 | else | 1165 | else |
| @@ -1158,7 +1167,7 @@ int btrfs_qgroup_account_ref(struct btrfs_trans_handle *trans, | |||
| 1158 | 1167 | ||
| 1159 | list_for_each_entry(glist, &qg->groups, next_group) { | 1168 | list_for_each_entry(glist, &qg->groups, next_group) { |
| 1160 | ulist_add(tmp, glist->group->qgroupid, | 1169 | ulist_add(tmp, glist->group->qgroupid, |
| 1161 | (unsigned long)glist->group, | 1170 | (u64)(uintptr_t)glist->group, |
| 1162 | GFP_ATOMIC); | 1171 | GFP_ATOMIC); |
| 1163 | } | 1172 | } |
| 1164 | } | 1173 | } |
| @@ -1168,13 +1177,13 @@ int btrfs_qgroup_account_ref(struct btrfs_trans_handle *trans, | |||
| 1168 | * step 2: walk from the new root | 1177 | * step 2: walk from the new root |
| 1169 | */ | 1178 | */ |
| 1170 | ulist_reinit(tmp); | 1179 | ulist_reinit(tmp); |
| 1171 | ulist_add(tmp, qgroup->qgroupid, (unsigned long)qgroup, GFP_ATOMIC); | 1180 | ulist_add(tmp, qgroup->qgroupid, (uintptr_t)qgroup, GFP_ATOMIC); |
| 1172 | ULIST_ITER_INIT(&uiter); | 1181 | ULIST_ITER_INIT(&uiter); |
| 1173 | while ((unode = ulist_next(tmp, &uiter))) { | 1182 | while ((unode = ulist_next(tmp, &uiter))) { |
| 1174 | struct btrfs_qgroup *qg; | 1183 | struct btrfs_qgroup *qg; |
| 1175 | struct btrfs_qgroup_list *glist; | 1184 | struct btrfs_qgroup_list *glist; |
| 1176 | 1185 | ||
| 1177 | qg = (struct btrfs_qgroup *)unode->aux; | 1186 | qg = (struct btrfs_qgroup *)(uintptr_t)unode->aux; |
| 1178 | if (qg->refcnt < seq) { | 1187 | if (qg->refcnt < seq) { |
| 1179 | /* not visited by step 1 */ | 1188 | /* not visited by step 1 */ |
| 1180 | qg->rfer += sgn * node->num_bytes; | 1189 | qg->rfer += sgn * node->num_bytes; |
| @@ -1190,7 +1199,7 @@ int btrfs_qgroup_account_ref(struct btrfs_trans_handle *trans, | |||
| 1190 | 1199 | ||
| 1191 | list_for_each_entry(glist, &qg->groups, next_group) { | 1200 | list_for_each_entry(glist, &qg->groups, next_group) { |
| 1192 | ulist_add(tmp, glist->group->qgroupid, | 1201 | ulist_add(tmp, glist->group->qgroupid, |
| 1193 | (unsigned long)glist->group, GFP_ATOMIC); | 1202 | (uintptr_t)glist->group, GFP_ATOMIC); |
| 1194 | } | 1203 | } |
| 1195 | } | 1204 | } |
| 1196 | 1205 | ||
| @@ -1208,12 +1217,12 @@ int btrfs_qgroup_account_ref(struct btrfs_trans_handle *trans, | |||
| 1208 | continue; | 1217 | continue; |
| 1209 | 1218 | ||
| 1210 | ulist_reinit(tmp); | 1219 | ulist_reinit(tmp); |
| 1211 | ulist_add(tmp, qg->qgroupid, (unsigned long)qg, GFP_ATOMIC); | 1220 | ulist_add(tmp, qg->qgroupid, (uintptr_t)qg, GFP_ATOMIC); |
| 1212 | ULIST_ITER_INIT(&tmp_uiter); | 1221 | ULIST_ITER_INIT(&tmp_uiter); |
| 1213 | while ((tmp_unode = ulist_next(tmp, &tmp_uiter))) { | 1222 | while ((tmp_unode = ulist_next(tmp, &tmp_uiter))) { |
| 1214 | struct btrfs_qgroup_list *glist; | 1223 | struct btrfs_qgroup_list *glist; |
| 1215 | 1224 | ||
| 1216 | qg = (struct btrfs_qgroup *)tmp_unode->aux; | 1225 | qg = (struct btrfs_qgroup *)(uintptr_t)tmp_unode->aux; |
| 1217 | if (qg->tag == seq) | 1226 | if (qg->tag == seq) |
| 1218 | continue; | 1227 | continue; |
| 1219 | 1228 | ||
| @@ -1225,7 +1234,7 @@ int btrfs_qgroup_account_ref(struct btrfs_trans_handle *trans, | |||
| 1225 | 1234 | ||
| 1226 | list_for_each_entry(glist, &qg->groups, next_group) { | 1235 | list_for_each_entry(glist, &qg->groups, next_group) { |
| 1227 | ulist_add(tmp, glist->group->qgroupid, | 1236 | ulist_add(tmp, glist->group->qgroupid, |
| 1228 | (unsigned long)glist->group, | 1237 | (uintptr_t)glist->group, |
| 1229 | GFP_ATOMIC); | 1238 | GFP_ATOMIC); |
| 1230 | } | 1239 | } |
| 1231 | } | 1240 | } |
| @@ -1469,13 +1478,17 @@ int btrfs_qgroup_reserve(struct btrfs_root *root, u64 num_bytes) | |||
| 1469 | * be exceeded | 1478 | * be exceeded |
| 1470 | */ | 1479 | */ |
| 1471 | ulist = ulist_alloc(GFP_ATOMIC); | 1480 | ulist = ulist_alloc(GFP_ATOMIC); |
| 1472 | ulist_add(ulist, qgroup->qgroupid, (unsigned long)qgroup, GFP_ATOMIC); | 1481 | if (!ulist) { |
| 1482 | ret = -ENOMEM; | ||
| 1483 | goto out; | ||
| 1484 | } | ||
| 1485 | ulist_add(ulist, qgroup->qgroupid, (uintptr_t)qgroup, GFP_ATOMIC); | ||
| 1473 | ULIST_ITER_INIT(&uiter); | 1486 | ULIST_ITER_INIT(&uiter); |
| 1474 | while ((unode = ulist_next(ulist, &uiter))) { | 1487 | while ((unode = ulist_next(ulist, &uiter))) { |
| 1475 | struct btrfs_qgroup *qg; | 1488 | struct btrfs_qgroup *qg; |
| 1476 | struct btrfs_qgroup_list *glist; | 1489 | struct btrfs_qgroup_list *glist; |
| 1477 | 1490 | ||
| 1478 | qg = (struct btrfs_qgroup *)unode->aux; | 1491 | qg = (struct btrfs_qgroup *)(uintptr_t)unode->aux; |
| 1479 | 1492 | ||
| 1480 | if ((qg->lim_flags & BTRFS_QGROUP_LIMIT_MAX_RFER) && | 1493 | if ((qg->lim_flags & BTRFS_QGROUP_LIMIT_MAX_RFER) && |
| 1481 | qg->reserved + qg->rfer + num_bytes > | 1494 | qg->reserved + qg->rfer + num_bytes > |
| @@ -1489,7 +1502,7 @@ int btrfs_qgroup_reserve(struct btrfs_root *root, u64 num_bytes) | |||
| 1489 | 1502 | ||
| 1490 | list_for_each_entry(glist, &qg->groups, next_group) { | 1503 | list_for_each_entry(glist, &qg->groups, next_group) { |
| 1491 | ulist_add(ulist, glist->group->qgroupid, | 1504 | ulist_add(ulist, glist->group->qgroupid, |
| 1492 | (unsigned long)glist->group, GFP_ATOMIC); | 1505 | (uintptr_t)glist->group, GFP_ATOMIC); |
| 1493 | } | 1506 | } |
| 1494 | } | 1507 | } |
| 1495 | if (ret) | 1508 | if (ret) |
| @@ -1502,7 +1515,7 @@ int btrfs_qgroup_reserve(struct btrfs_root *root, u64 num_bytes) | |||
| 1502 | while ((unode = ulist_next(ulist, &uiter))) { | 1515 | while ((unode = ulist_next(ulist, &uiter))) { |
| 1503 | struct btrfs_qgroup *qg; | 1516 | struct btrfs_qgroup *qg; |
| 1504 | 1517 | ||
| 1505 | qg = (struct btrfs_qgroup *)unode->aux; | 1518 | qg = (struct btrfs_qgroup *)(uintptr_t)unode->aux; |
| 1506 | 1519 | ||
| 1507 | qg->reserved += num_bytes; | 1520 | qg->reserved += num_bytes; |
| 1508 | } | 1521 | } |
| @@ -1541,19 +1554,23 @@ void btrfs_qgroup_free(struct btrfs_root *root, u64 num_bytes) | |||
| 1541 | goto out; | 1554 | goto out; |
| 1542 | 1555 | ||
| 1543 | ulist = ulist_alloc(GFP_ATOMIC); | 1556 | ulist = ulist_alloc(GFP_ATOMIC); |
| 1544 | ulist_add(ulist, qgroup->qgroupid, (unsigned long)qgroup, GFP_ATOMIC); | 1557 | if (!ulist) { |
| 1558 | btrfs_std_error(fs_info, -ENOMEM); | ||
| 1559 | goto out; | ||
| 1560 | } | ||
| 1561 | ulist_add(ulist, qgroup->qgroupid, (uintptr_t)qgroup, GFP_ATOMIC); | ||
| 1545 | ULIST_ITER_INIT(&uiter); | 1562 | ULIST_ITER_INIT(&uiter); |
| 1546 | while ((unode = ulist_next(ulist, &uiter))) { | 1563 | while ((unode = ulist_next(ulist, &uiter))) { |
| 1547 | struct btrfs_qgroup *qg; | 1564 | struct btrfs_qgroup *qg; |
| 1548 | struct btrfs_qgroup_list *glist; | 1565 | struct btrfs_qgroup_list *glist; |
| 1549 | 1566 | ||
| 1550 | qg = (struct btrfs_qgroup *)unode->aux; | 1567 | qg = (struct btrfs_qgroup *)(uintptr_t)unode->aux; |
| 1551 | 1568 | ||
| 1552 | qg->reserved -= num_bytes; | 1569 | qg->reserved -= num_bytes; |
| 1553 | 1570 | ||
| 1554 | list_for_each_entry(glist, &qg->groups, next_group) { | 1571 | list_for_each_entry(glist, &qg->groups, next_group) { |
| 1555 | ulist_add(ulist, glist->group->qgroupid, | 1572 | ulist_add(ulist, glist->group->qgroupid, |
| 1556 | (unsigned long)glist->group, GFP_ATOMIC); | 1573 | (uintptr_t)glist->group, GFP_ATOMIC); |
| 1557 | } | 1574 | } |
| 1558 | } | 1575 | } |
| 1559 | 1576 | ||
diff --git a/fs/btrfs/reada.c b/fs/btrfs/reada.c index a955669519a2..96b93daa0bbb 100644 --- a/fs/btrfs/reada.c +++ b/fs/btrfs/reada.c | |||
| @@ -27,6 +27,7 @@ | |||
| 27 | #include "volumes.h" | 27 | #include "volumes.h" |
| 28 | #include "disk-io.h" | 28 | #include "disk-io.h" |
| 29 | #include "transaction.h" | 29 | #include "transaction.h" |
| 30 | #include "dev-replace.h" | ||
| 30 | 31 | ||
| 31 | #undef DEBUG | 32 | #undef DEBUG |
| 32 | 33 | ||
| @@ -323,7 +324,6 @@ static struct reada_extent *reada_find_extent(struct btrfs_root *root, | |||
| 323 | struct reada_extent *re = NULL; | 324 | struct reada_extent *re = NULL; |
| 324 | struct reada_extent *re_exist = NULL; | 325 | struct reada_extent *re_exist = NULL; |
| 325 | struct btrfs_fs_info *fs_info = root->fs_info; | 326 | struct btrfs_fs_info *fs_info = root->fs_info; |
| 326 | struct btrfs_mapping_tree *map_tree = &fs_info->mapping_tree; | ||
| 327 | struct btrfs_bio *bbio = NULL; | 327 | struct btrfs_bio *bbio = NULL; |
| 328 | struct btrfs_device *dev; | 328 | struct btrfs_device *dev; |
| 329 | struct btrfs_device *prev_dev; | 329 | struct btrfs_device *prev_dev; |
| @@ -332,6 +332,7 @@ static struct reada_extent *reada_find_extent(struct btrfs_root *root, | |||
| 332 | int nzones = 0; | 332 | int nzones = 0; |
| 333 | int i; | 333 | int i; |
| 334 | unsigned long index = logical >> PAGE_CACHE_SHIFT; | 334 | unsigned long index = logical >> PAGE_CACHE_SHIFT; |
| 335 | int dev_replace_is_ongoing; | ||
| 335 | 336 | ||
| 336 | spin_lock(&fs_info->reada_lock); | 337 | spin_lock(&fs_info->reada_lock); |
| 337 | re = radix_tree_lookup(&fs_info->reada_tree, index); | 338 | re = radix_tree_lookup(&fs_info->reada_tree, index); |
| @@ -358,7 +359,8 @@ static struct reada_extent *reada_find_extent(struct btrfs_root *root, | |||
| 358 | * map block | 359 | * map block |
| 359 | */ | 360 | */ |
| 360 | length = blocksize; | 361 | length = blocksize; |
| 361 | ret = btrfs_map_block(map_tree, REQ_WRITE, logical, &length, &bbio, 0); | 362 | ret = btrfs_map_block(fs_info, REQ_GET_READ_MIRRORS, logical, &length, |
| 363 | &bbio, 0); | ||
| 362 | if (ret || !bbio || length < blocksize) | 364 | if (ret || !bbio || length < blocksize) |
| 363 | goto error; | 365 | goto error; |
| 364 | 366 | ||
| @@ -393,6 +395,7 @@ static struct reada_extent *reada_find_extent(struct btrfs_root *root, | |||
| 393 | } | 395 | } |
| 394 | 396 | ||
| 395 | /* insert extent in reada_tree + all per-device trees, all or nothing */ | 397 | /* insert extent in reada_tree + all per-device trees, all or nothing */ |
| 398 | btrfs_dev_replace_lock(&fs_info->dev_replace); | ||
| 396 | spin_lock(&fs_info->reada_lock); | 399 | spin_lock(&fs_info->reada_lock); |
| 397 | ret = radix_tree_insert(&fs_info->reada_tree, index, re); | 400 | ret = radix_tree_insert(&fs_info->reada_tree, index, re); |
| 398 | if (ret == -EEXIST) { | 401 | if (ret == -EEXIST) { |
| @@ -400,13 +403,17 @@ static struct reada_extent *reada_find_extent(struct btrfs_root *root, | |||
| 400 | BUG_ON(!re_exist); | 403 | BUG_ON(!re_exist); |
| 401 | re_exist->refcnt++; | 404 | re_exist->refcnt++; |
| 402 | spin_unlock(&fs_info->reada_lock); | 405 | spin_unlock(&fs_info->reada_lock); |
| 406 | btrfs_dev_replace_unlock(&fs_info->dev_replace); | ||
| 403 | goto error; | 407 | goto error; |
| 404 | } | 408 | } |
| 405 | if (ret) { | 409 | if (ret) { |
| 406 | spin_unlock(&fs_info->reada_lock); | 410 | spin_unlock(&fs_info->reada_lock); |
| 411 | btrfs_dev_replace_unlock(&fs_info->dev_replace); | ||
| 407 | goto error; | 412 | goto error; |
| 408 | } | 413 | } |
| 409 | prev_dev = NULL; | 414 | prev_dev = NULL; |
| 415 | dev_replace_is_ongoing = btrfs_dev_replace_is_ongoing( | ||
| 416 | &fs_info->dev_replace); | ||
| 410 | for (i = 0; i < nzones; ++i) { | 417 | for (i = 0; i < nzones; ++i) { |
| 411 | dev = bbio->stripes[i].dev; | 418 | dev = bbio->stripes[i].dev; |
| 412 | if (dev == prev_dev) { | 419 | if (dev == prev_dev) { |
| @@ -419,21 +426,36 @@ static struct reada_extent *reada_find_extent(struct btrfs_root *root, | |||
| 419 | */ | 426 | */ |
| 420 | continue; | 427 | continue; |
| 421 | } | 428 | } |
| 429 | if (!dev->bdev) { | ||
| 430 | /* cannot read ahead on missing device */ | ||
| 431 | continue; | ||
| 432 | } | ||
| 433 | if (dev_replace_is_ongoing && | ||
| 434 | dev == fs_info->dev_replace.tgtdev) { | ||
| 435 | /* | ||
| 436 | * as this device is selected for reading only as | ||
| 437 | * a last resort, skip it for read ahead. | ||
| 438 | */ | ||
| 439 | continue; | ||
| 440 | } | ||
| 422 | prev_dev = dev; | 441 | prev_dev = dev; |
| 423 | ret = radix_tree_insert(&dev->reada_extents, index, re); | 442 | ret = radix_tree_insert(&dev->reada_extents, index, re); |
| 424 | if (ret) { | 443 | if (ret) { |
| 425 | while (--i >= 0) { | 444 | while (--i >= 0) { |
| 426 | dev = bbio->stripes[i].dev; | 445 | dev = bbio->stripes[i].dev; |
| 427 | BUG_ON(dev == NULL); | 446 | BUG_ON(dev == NULL); |
| 447 | /* ignore whether the entry was inserted */ | ||
| 428 | radix_tree_delete(&dev->reada_extents, index); | 448 | radix_tree_delete(&dev->reada_extents, index); |
| 429 | } | 449 | } |
| 430 | BUG_ON(fs_info == NULL); | 450 | BUG_ON(fs_info == NULL); |
| 431 | radix_tree_delete(&fs_info->reada_tree, index); | 451 | radix_tree_delete(&fs_info->reada_tree, index); |
| 432 | spin_unlock(&fs_info->reada_lock); | 452 | spin_unlock(&fs_info->reada_lock); |
| 453 | btrfs_dev_replace_unlock(&fs_info->dev_replace); | ||
| 433 | goto error; | 454 | goto error; |
| 434 | } | 455 | } |
| 435 | } | 456 | } |
| 436 | spin_unlock(&fs_info->reada_lock); | 457 | spin_unlock(&fs_info->reada_lock); |
| 458 | btrfs_dev_replace_unlock(&fs_info->dev_replace); | ||
| 437 | 459 | ||
| 438 | kfree(bbio); | 460 | kfree(bbio); |
| 439 | return re; | 461 | return re; |
| @@ -915,7 +937,10 @@ struct reada_control *btrfs_reada_add(struct btrfs_root *root, | |||
| 915 | generation = btrfs_header_generation(node); | 937 | generation = btrfs_header_generation(node); |
| 916 | free_extent_buffer(node); | 938 | free_extent_buffer(node); |
| 917 | 939 | ||
| 918 | reada_add_block(rc, start, &max_key, level, generation); | 940 | if (reada_add_block(rc, start, &max_key, level, generation)) { |
| 941 | kfree(rc); | ||
| 942 | return ERR_PTR(-ENOMEM); | ||
| 943 | } | ||
| 919 | 944 | ||
| 920 | reada_start_machine(root->fs_info); | 945 | reada_start_machine(root->fs_info); |
| 921 | 946 | ||
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index 4da08652004d..300e09ac3659 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c | |||
| @@ -2025,7 +2025,6 @@ static noinline_for_stack int merge_reloc_root(struct reloc_control *rc, | |||
| 2025 | struct btrfs_root_item *root_item; | 2025 | struct btrfs_root_item *root_item; |
| 2026 | struct btrfs_path *path; | 2026 | struct btrfs_path *path; |
| 2027 | struct extent_buffer *leaf; | 2027 | struct extent_buffer *leaf; |
| 2028 | unsigned long nr; | ||
| 2029 | int level; | 2028 | int level; |
| 2030 | int max_level; | 2029 | int max_level; |
| 2031 | int replaced = 0; | 2030 | int replaced = 0; |
| @@ -2074,7 +2073,8 @@ static noinline_for_stack int merge_reloc_root(struct reloc_control *rc, | |||
| 2074 | BUG_ON(IS_ERR(trans)); | 2073 | BUG_ON(IS_ERR(trans)); |
| 2075 | trans->block_rsv = rc->block_rsv; | 2074 | trans->block_rsv = rc->block_rsv; |
| 2076 | 2075 | ||
| 2077 | ret = btrfs_block_rsv_refill(root, rc->block_rsv, min_reserved); | 2076 | ret = btrfs_block_rsv_refill(root, rc->block_rsv, min_reserved, |
| 2077 | BTRFS_RESERVE_FLUSH_ALL); | ||
| 2078 | if (ret) { | 2078 | if (ret) { |
| 2079 | BUG_ON(ret != -EAGAIN); | 2079 | BUG_ON(ret != -EAGAIN); |
| 2080 | ret = btrfs_commit_transaction(trans, root); | 2080 | ret = btrfs_commit_transaction(trans, root); |
| @@ -2125,10 +2125,9 @@ static noinline_for_stack int merge_reloc_root(struct reloc_control *rc, | |||
| 2125 | path->slots[level]); | 2125 | path->slots[level]); |
| 2126 | root_item->drop_level = level; | 2126 | root_item->drop_level = level; |
| 2127 | 2127 | ||
| 2128 | nr = trans->blocks_used; | ||
| 2129 | btrfs_end_transaction_throttle(trans, root); | 2128 | btrfs_end_transaction_throttle(trans, root); |
| 2130 | 2129 | ||
| 2131 | btrfs_btree_balance_dirty(root, nr); | 2130 | btrfs_btree_balance_dirty(root); |
| 2132 | 2131 | ||
| 2133 | if (replaced && rc->stage == UPDATE_DATA_PTRS) | 2132 | if (replaced && rc->stage == UPDATE_DATA_PTRS) |
| 2134 | invalidate_extent_cache(root, &key, &next_key); | 2133 | invalidate_extent_cache(root, &key, &next_key); |
| @@ -2155,10 +2154,9 @@ out: | |||
| 2155 | btrfs_update_reloc_root(trans, root); | 2154 | btrfs_update_reloc_root(trans, root); |
| 2156 | } | 2155 | } |
| 2157 | 2156 | ||
| 2158 | nr = trans->blocks_used; | ||
| 2159 | btrfs_end_transaction_throttle(trans, root); | 2157 | btrfs_end_transaction_throttle(trans, root); |
| 2160 | 2158 | ||
| 2161 | btrfs_btree_balance_dirty(root, nr); | 2159 | btrfs_btree_balance_dirty(root); |
| 2162 | 2160 | ||
| 2163 | if (replaced && rc->stage == UPDATE_DATA_PTRS) | 2161 | if (replaced && rc->stage == UPDATE_DATA_PTRS) |
| 2164 | invalidate_extent_cache(root, &key, &next_key); | 2162 | invalidate_extent_cache(root, &key, &next_key); |
| @@ -2184,7 +2182,8 @@ int prepare_to_merge(struct reloc_control *rc, int err) | |||
| 2184 | again: | 2182 | again: |
| 2185 | if (!err) { | 2183 | if (!err) { |
| 2186 | num_bytes = rc->merging_rsv_size; | 2184 | num_bytes = rc->merging_rsv_size; |
| 2187 | ret = btrfs_block_rsv_add(root, rc->block_rsv, num_bytes); | 2185 | ret = btrfs_block_rsv_add(root, rc->block_rsv, num_bytes, |
| 2186 | BTRFS_RESERVE_FLUSH_ALL); | ||
| 2188 | if (ret) | 2187 | if (ret) |
| 2189 | err = ret; | 2188 | err = ret; |
| 2190 | } | 2189 | } |
| @@ -2459,7 +2458,8 @@ static int reserve_metadata_space(struct btrfs_trans_handle *trans, | |||
| 2459 | num_bytes = calcu_metadata_size(rc, node, 1) * 2; | 2458 | num_bytes = calcu_metadata_size(rc, node, 1) * 2; |
| 2460 | 2459 | ||
| 2461 | trans->block_rsv = rc->block_rsv; | 2460 | trans->block_rsv = rc->block_rsv; |
| 2462 | ret = btrfs_block_rsv_add(root, rc->block_rsv, num_bytes); | 2461 | ret = btrfs_block_rsv_add(root, rc->block_rsv, num_bytes, |
| 2462 | BTRFS_RESERVE_FLUSH_ALL); | ||
| 2463 | if (ret) { | 2463 | if (ret) { |
| 2464 | if (ret == -EAGAIN) | 2464 | if (ret == -EAGAIN) |
| 2465 | rc->commit_transaction = 1; | 2465 | rc->commit_transaction = 1; |
| @@ -3259,7 +3259,6 @@ static int delete_block_group_cache(struct btrfs_fs_info *fs_info, | |||
| 3259 | struct btrfs_path *path; | 3259 | struct btrfs_path *path; |
| 3260 | struct btrfs_root *root = fs_info->tree_root; | 3260 | struct btrfs_root *root = fs_info->tree_root; |
| 3261 | struct btrfs_trans_handle *trans; | 3261 | struct btrfs_trans_handle *trans; |
| 3262 | unsigned long nr; | ||
| 3263 | int ret = 0; | 3262 | int ret = 0; |
| 3264 | 3263 | ||
| 3265 | if (inode) | 3264 | if (inode) |
| @@ -3270,8 +3269,8 @@ static int delete_block_group_cache(struct btrfs_fs_info *fs_info, | |||
| 3270 | key.offset = 0; | 3269 | key.offset = 0; |
| 3271 | 3270 | ||
| 3272 | inode = btrfs_iget(fs_info->sb, &key, root, NULL); | 3271 | inode = btrfs_iget(fs_info->sb, &key, root, NULL); |
| 3273 | if (IS_ERR_OR_NULL(inode) || is_bad_inode(inode)) { | 3272 | if (IS_ERR(inode) || is_bad_inode(inode)) { |
| 3274 | if (inode && !IS_ERR(inode)) | 3273 | if (!IS_ERR(inode)) |
| 3275 | iput(inode); | 3274 | iput(inode); |
| 3276 | return -ENOENT; | 3275 | return -ENOENT; |
| 3277 | } | 3276 | } |
| @@ -3293,9 +3292,8 @@ truncate: | |||
| 3293 | ret = btrfs_truncate_free_space_cache(root, trans, path, inode); | 3292 | ret = btrfs_truncate_free_space_cache(root, trans, path, inode); |
| 3294 | 3293 | ||
| 3295 | btrfs_free_path(path); | 3294 | btrfs_free_path(path); |
| 3296 | nr = trans->blocks_used; | ||
| 3297 | btrfs_end_transaction(trans, root); | 3295 | btrfs_end_transaction(trans, root); |
| 3298 | btrfs_btree_balance_dirty(root, nr); | 3296 | btrfs_btree_balance_dirty(root); |
| 3299 | out: | 3297 | out: |
| 3300 | iput(inode); | 3298 | iput(inode); |
| 3301 | return ret; | 3299 | return ret; |
| @@ -3621,7 +3619,7 @@ next: | |||
| 3621 | 3619 | ||
| 3622 | ret = find_first_extent_bit(&rc->processed_blocks, | 3620 | ret = find_first_extent_bit(&rc->processed_blocks, |
| 3623 | key.objectid, &start, &end, | 3621 | key.objectid, &start, &end, |
| 3624 | EXTENT_DIRTY); | 3622 | EXTENT_DIRTY, NULL); |
| 3625 | 3623 | ||
| 3626 | if (ret == 0 && start <= key.objectid) { | 3624 | if (ret == 0 && start <= key.objectid) { |
| 3627 | btrfs_release_path(path); | 3625 | btrfs_release_path(path); |
| @@ -3674,7 +3672,8 @@ int prepare_to_relocate(struct reloc_control *rc) | |||
| 3674 | struct btrfs_trans_handle *trans; | 3672 | struct btrfs_trans_handle *trans; |
| 3675 | int ret; | 3673 | int ret; |
| 3676 | 3674 | ||
| 3677 | rc->block_rsv = btrfs_alloc_block_rsv(rc->extent_root); | 3675 | rc->block_rsv = btrfs_alloc_block_rsv(rc->extent_root, |
| 3676 | BTRFS_BLOCK_RSV_TEMP); | ||
| 3678 | if (!rc->block_rsv) | 3677 | if (!rc->block_rsv) |
| 3679 | return -ENOMEM; | 3678 | return -ENOMEM; |
| 3680 | 3679 | ||
| @@ -3684,7 +3683,8 @@ int prepare_to_relocate(struct reloc_control *rc) | |||
| 3684 | * is no reservation in transaction handle. | 3683 | * is no reservation in transaction handle. |
| 3685 | */ | 3684 | */ |
| 3686 | ret = btrfs_block_rsv_add(rc->extent_root, rc->block_rsv, | 3685 | ret = btrfs_block_rsv_add(rc->extent_root, rc->block_rsv, |
| 3687 | rc->extent_root->nodesize * 256); | 3686 | rc->extent_root->nodesize * 256, |
| 3687 | BTRFS_RESERVE_FLUSH_ALL); | ||
| 3688 | if (ret) | 3688 | if (ret) |
| 3689 | return ret; | 3689 | return ret; |
| 3690 | 3690 | ||
| @@ -3710,7 +3710,6 @@ static noinline_for_stack int relocate_block_group(struct reloc_control *rc) | |||
| 3710 | struct btrfs_trans_handle *trans = NULL; | 3710 | struct btrfs_trans_handle *trans = NULL; |
| 3711 | struct btrfs_path *path; | 3711 | struct btrfs_path *path; |
| 3712 | struct btrfs_extent_item *ei; | 3712 | struct btrfs_extent_item *ei; |
| 3713 | unsigned long nr; | ||
| 3714 | u64 flags; | 3713 | u64 flags; |
| 3715 | u32 item_size; | 3714 | u32 item_size; |
| 3716 | int ret; | 3715 | int ret; |
| @@ -3827,9 +3826,8 @@ restart: | |||
| 3827 | ret = btrfs_commit_transaction(trans, rc->extent_root); | 3826 | ret = btrfs_commit_transaction(trans, rc->extent_root); |
| 3828 | BUG_ON(ret); | 3827 | BUG_ON(ret); |
| 3829 | } else { | 3828 | } else { |
| 3830 | nr = trans->blocks_used; | ||
| 3831 | btrfs_end_transaction_throttle(trans, rc->extent_root); | 3829 | btrfs_end_transaction_throttle(trans, rc->extent_root); |
| 3832 | btrfs_btree_balance_dirty(rc->extent_root, nr); | 3830 | btrfs_btree_balance_dirty(rc->extent_root); |
| 3833 | } | 3831 | } |
| 3834 | trans = NULL; | 3832 | trans = NULL; |
| 3835 | 3833 | ||
| @@ -3859,9 +3857,8 @@ restart: | |||
| 3859 | GFP_NOFS); | 3857 | GFP_NOFS); |
| 3860 | 3858 | ||
| 3861 | if (trans) { | 3859 | if (trans) { |
| 3862 | nr = trans->blocks_used; | ||
| 3863 | btrfs_end_transaction_throttle(trans, rc->extent_root); | 3860 | btrfs_end_transaction_throttle(trans, rc->extent_root); |
| 3864 | btrfs_btree_balance_dirty(rc->extent_root, nr); | 3861 | btrfs_btree_balance_dirty(rc->extent_root); |
| 3865 | } | 3862 | } |
| 3866 | 3863 | ||
| 3867 | if (!err) { | 3864 | if (!err) { |
| @@ -3940,7 +3937,6 @@ struct inode *create_reloc_inode(struct btrfs_fs_info *fs_info, | |||
| 3940 | struct btrfs_trans_handle *trans; | 3937 | struct btrfs_trans_handle *trans; |
| 3941 | struct btrfs_root *root; | 3938 | struct btrfs_root *root; |
| 3942 | struct btrfs_key key; | 3939 | struct btrfs_key key; |
| 3943 | unsigned long nr; | ||
| 3944 | u64 objectid = BTRFS_FIRST_FREE_OBJECTID; | 3940 | u64 objectid = BTRFS_FIRST_FREE_OBJECTID; |
| 3945 | int err = 0; | 3941 | int err = 0; |
| 3946 | 3942 | ||
| @@ -3968,9 +3964,8 @@ struct inode *create_reloc_inode(struct btrfs_fs_info *fs_info, | |||
| 3968 | 3964 | ||
| 3969 | err = btrfs_orphan_add(trans, inode); | 3965 | err = btrfs_orphan_add(trans, inode); |
| 3970 | out: | 3966 | out: |
| 3971 | nr = trans->blocks_used; | ||
| 3972 | btrfs_end_transaction(trans, root); | 3967 | btrfs_end_transaction(trans, root); |
| 3973 | btrfs_btree_balance_dirty(root, nr); | 3968 | btrfs_btree_balance_dirty(root); |
| 3974 | if (err) { | 3969 | if (err) { |
| 3975 | if (inode) | 3970 | if (inode) |
| 3976 | iput(inode); | 3971 | iput(inode); |
| @@ -4056,8 +4051,12 @@ int btrfs_relocate_block_group(struct btrfs_root *extent_root, u64 group_start) | |||
| 4056 | (unsigned long long)rc->block_group->key.objectid, | 4051 | (unsigned long long)rc->block_group->key.objectid, |
| 4057 | (unsigned long long)rc->block_group->flags); | 4052 | (unsigned long long)rc->block_group->flags); |
| 4058 | 4053 | ||
| 4059 | btrfs_start_delalloc_inodes(fs_info->tree_root, 0); | 4054 | ret = btrfs_start_delalloc_inodes(fs_info->tree_root, 0); |
| 4060 | btrfs_wait_ordered_extents(fs_info->tree_root, 0, 0); | 4055 | if (ret < 0) { |
| 4056 | err = ret; | ||
| 4057 | goto out; | ||
| 4058 | } | ||
| 4059 | btrfs_wait_ordered_extents(fs_info->tree_root, 0); | ||
| 4061 | 4060 | ||
| 4062 | while (1) { | 4061 | while (1) { |
| 4063 | mutex_lock(&fs_info->cleaner_mutex); | 4062 | mutex_lock(&fs_info->cleaner_mutex); |
diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c index 10d8e4d88071..668af537a3ea 100644 --- a/fs/btrfs/root-tree.c +++ b/fs/btrfs/root-tree.c | |||
| @@ -141,8 +141,10 @@ int btrfs_update_root(struct btrfs_trans_handle *trans, struct btrfs_root | |||
| 141 | return -ENOMEM; | 141 | return -ENOMEM; |
| 142 | 142 | ||
| 143 | ret = btrfs_search_slot(trans, root, key, path, 0, 1); | 143 | ret = btrfs_search_slot(trans, root, key, path, 0, 1); |
| 144 | if (ret < 0) | 144 | if (ret < 0) { |
| 145 | goto out_abort; | 145 | btrfs_abort_transaction(trans, root, ret); |
| 146 | goto out; | ||
| 147 | } | ||
| 146 | 148 | ||
| 147 | if (ret != 0) { | 149 | if (ret != 0) { |
| 148 | btrfs_print_leaf(root, path->nodes[0]); | 150 | btrfs_print_leaf(root, path->nodes[0]); |
| @@ -166,16 +168,23 @@ int btrfs_update_root(struct btrfs_trans_handle *trans, struct btrfs_root | |||
| 166 | btrfs_release_path(path); | 168 | btrfs_release_path(path); |
| 167 | ret = btrfs_search_slot(trans, root, key, path, | 169 | ret = btrfs_search_slot(trans, root, key, path, |
| 168 | -1, 1); | 170 | -1, 1); |
| 169 | if (ret < 0) | 171 | if (ret < 0) { |
| 170 | goto out_abort; | 172 | btrfs_abort_transaction(trans, root, ret); |
| 173 | goto out; | ||
| 174 | } | ||
| 175 | |||
| 171 | ret = btrfs_del_item(trans, root, path); | 176 | ret = btrfs_del_item(trans, root, path); |
| 172 | if (ret < 0) | 177 | if (ret < 0) { |
| 173 | goto out_abort; | 178 | btrfs_abort_transaction(trans, root, ret); |
| 179 | goto out; | ||
| 180 | } | ||
| 174 | btrfs_release_path(path); | 181 | btrfs_release_path(path); |
| 175 | ret = btrfs_insert_empty_item(trans, root, path, | 182 | ret = btrfs_insert_empty_item(trans, root, path, |
| 176 | key, sizeof(*item)); | 183 | key, sizeof(*item)); |
| 177 | if (ret < 0) | 184 | if (ret < 0) { |
| 178 | goto out_abort; | 185 | btrfs_abort_transaction(trans, root, ret); |
| 186 | goto out; | ||
| 187 | } | ||
| 179 | l = path->nodes[0]; | 188 | l = path->nodes[0]; |
| 180 | slot = path->slots[0]; | 189 | slot = path->slots[0]; |
| 181 | ptr = btrfs_item_ptr_offset(l, slot); | 190 | ptr = btrfs_item_ptr_offset(l, slot); |
| @@ -192,10 +201,6 @@ int btrfs_update_root(struct btrfs_trans_handle *trans, struct btrfs_root | |||
| 192 | out: | 201 | out: |
| 193 | btrfs_free_path(path); | 202 | btrfs_free_path(path); |
| 194 | return ret; | 203 | return ret; |
| 195 | |||
| 196 | out_abort: | ||
| 197 | btrfs_abort_transaction(trans, root, ret); | ||
| 198 | goto out; | ||
| 199 | } | 204 | } |
| 200 | 205 | ||
| 201 | int btrfs_insert_root(struct btrfs_trans_handle *trans, struct btrfs_root *root, | 206 | int btrfs_insert_root(struct btrfs_trans_handle *trans, struct btrfs_root *root, |
| @@ -543,9 +548,9 @@ void btrfs_update_root_times(struct btrfs_trans_handle *trans, | |||
| 543 | struct btrfs_root_item *item = &root->root_item; | 548 | struct btrfs_root_item *item = &root->root_item; |
| 544 | struct timespec ct = CURRENT_TIME; | 549 | struct timespec ct = CURRENT_TIME; |
| 545 | 550 | ||
| 546 | spin_lock(&root->root_times_lock); | 551 | spin_lock(&root->root_item_lock); |
| 547 | item->ctransid = cpu_to_le64(trans->transid); | 552 | item->ctransid = cpu_to_le64(trans->transid); |
| 548 | item->ctime.sec = cpu_to_le64(ct.tv_sec); | 553 | item->ctime.sec = cpu_to_le64(ct.tv_sec); |
| 549 | item->ctime.nsec = cpu_to_le32(ct.tv_nsec); | 554 | item->ctime.nsec = cpu_to_le32(ct.tv_nsec); |
| 550 | spin_unlock(&root->root_times_lock); | 555 | spin_unlock(&root->root_item_lock); |
| 551 | } | 556 | } |
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c index b223620cd5a6..bdbb94f245c9 100644 --- a/fs/btrfs/scrub.c +++ b/fs/btrfs/scrub.c | |||
| @@ -1,5 +1,5 @@ | |||
| 1 | /* | 1 | /* |
| 2 | * Copyright (C) 2011 STRATO. All rights reserved. | 2 | * Copyright (C) 2011, 2012 STRATO. All rights reserved. |
| 3 | * | 3 | * |
| 4 | * This program is free software; you can redistribute it and/or | 4 | * This program is free software; you can redistribute it and/or |
| 5 | * modify it under the terms of the GNU General Public | 5 | * modify it under the terms of the GNU General Public |
| @@ -25,6 +25,7 @@ | |||
| 25 | #include "transaction.h" | 25 | #include "transaction.h" |
| 26 | #include "backref.h" | 26 | #include "backref.h" |
| 27 | #include "extent_io.h" | 27 | #include "extent_io.h" |
| 28 | #include "dev-replace.h" | ||
| 28 | #include "check-integrity.h" | 29 | #include "check-integrity.h" |
| 29 | #include "rcu-string.h" | 30 | #include "rcu-string.h" |
| 30 | 31 | ||
| @@ -42,10 +43,23 @@ | |||
| 42 | */ | 43 | */ |
| 43 | 44 | ||
| 44 | struct scrub_block; | 45 | struct scrub_block; |
| 45 | struct scrub_dev; | 46 | struct scrub_ctx; |
| 46 | 47 | ||
| 47 | #define SCRUB_PAGES_PER_BIO 16 /* 64k per bio */ | 48 | /* |
| 48 | #define SCRUB_BIOS_PER_DEV 16 /* 1 MB per device in flight */ | 49 | * the following three values only influence the performance. |
| 50 | * The last one configures the number of parallel and outstanding I/O | ||
| 51 | * operations. The first two values configure an upper limit for the number | ||
| 52 | * of (dynamically allocated) pages that are added to a bio. | ||
| 53 | */ | ||
| 54 | #define SCRUB_PAGES_PER_RD_BIO 32 /* 128k per bio */ | ||
| 55 | #define SCRUB_PAGES_PER_WR_BIO 32 /* 128k per bio */ | ||
| 56 | #define SCRUB_BIOS_PER_SCTX 64 /* 8MB per device in flight */ | ||
| 57 | |||
| 58 | /* | ||
| 59 | * the following value times PAGE_SIZE needs to be large enough to match the | ||
| 60 | * largest node/leaf/sector size that shall be supported. | ||
| 61 | * Values larger than BTRFS_STRIPE_LEN are not supported. | ||
| 62 | */ | ||
| 49 | #define SCRUB_MAX_PAGES_PER_BLOCK 16 /* 64k per node/leaf/sector */ | 63 | #define SCRUB_MAX_PAGES_PER_BLOCK 16 /* 64k per node/leaf/sector */ |
| 50 | 64 | ||
| 51 | struct scrub_page { | 65 | struct scrub_page { |
| @@ -56,6 +70,8 @@ struct scrub_page { | |||
| 56 | u64 generation; | 70 | u64 generation; |
| 57 | u64 logical; | 71 | u64 logical; |
| 58 | u64 physical; | 72 | u64 physical; |
| 73 | u64 physical_for_dev_replace; | ||
| 74 | atomic_t ref_count; | ||
| 59 | struct { | 75 | struct { |
| 60 | unsigned int mirror_num:8; | 76 | unsigned int mirror_num:8; |
| 61 | unsigned int have_csum:1; | 77 | unsigned int have_csum:1; |
| @@ -66,23 +82,28 @@ struct scrub_page { | |||
| 66 | 82 | ||
| 67 | struct scrub_bio { | 83 | struct scrub_bio { |
| 68 | int index; | 84 | int index; |
| 69 | struct scrub_dev *sdev; | 85 | struct scrub_ctx *sctx; |
| 86 | struct btrfs_device *dev; | ||
| 70 | struct bio *bio; | 87 | struct bio *bio; |
| 71 | int err; | 88 | int err; |
| 72 | u64 logical; | 89 | u64 logical; |
| 73 | u64 physical; | 90 | u64 physical; |
| 74 | struct scrub_page *pagev[SCRUB_PAGES_PER_BIO]; | 91 | #if SCRUB_PAGES_PER_WR_BIO >= SCRUB_PAGES_PER_RD_BIO |
| 92 | struct scrub_page *pagev[SCRUB_PAGES_PER_WR_BIO]; | ||
| 93 | #else | ||
| 94 | struct scrub_page *pagev[SCRUB_PAGES_PER_RD_BIO]; | ||
| 95 | #endif | ||
| 75 | int page_count; | 96 | int page_count; |
| 76 | int next_free; | 97 | int next_free; |
| 77 | struct btrfs_work work; | 98 | struct btrfs_work work; |
| 78 | }; | 99 | }; |
| 79 | 100 | ||
| 80 | struct scrub_block { | 101 | struct scrub_block { |
| 81 | struct scrub_page pagev[SCRUB_MAX_PAGES_PER_BLOCK]; | 102 | struct scrub_page *pagev[SCRUB_MAX_PAGES_PER_BLOCK]; |
| 82 | int page_count; | 103 | int page_count; |
| 83 | atomic_t outstanding_pages; | 104 | atomic_t outstanding_pages; |
| 84 | atomic_t ref_count; /* free mem on transition to zero */ | 105 | atomic_t ref_count; /* free mem on transition to zero */ |
| 85 | struct scrub_dev *sdev; | 106 | struct scrub_ctx *sctx; |
| 86 | struct { | 107 | struct { |
| 87 | unsigned int header_error:1; | 108 | unsigned int header_error:1; |
| 88 | unsigned int checksum_error:1; | 109 | unsigned int checksum_error:1; |
| @@ -91,23 +112,35 @@ struct scrub_block { | |||
| 91 | }; | 112 | }; |
| 92 | }; | 113 | }; |
| 93 | 114 | ||
| 94 | struct scrub_dev { | 115 | struct scrub_wr_ctx { |
| 95 | struct scrub_bio *bios[SCRUB_BIOS_PER_DEV]; | 116 | struct scrub_bio *wr_curr_bio; |
| 96 | struct btrfs_device *dev; | 117 | struct btrfs_device *tgtdev; |
| 118 | int pages_per_wr_bio; /* <= SCRUB_PAGES_PER_WR_BIO */ | ||
| 119 | atomic_t flush_all_writes; | ||
| 120 | struct mutex wr_lock; | ||
| 121 | }; | ||
| 122 | |||
| 123 | struct scrub_ctx { | ||
| 124 | struct scrub_bio *bios[SCRUB_BIOS_PER_SCTX]; | ||
| 125 | struct btrfs_root *dev_root; | ||
| 97 | int first_free; | 126 | int first_free; |
| 98 | int curr; | 127 | int curr; |
| 99 | atomic_t in_flight; | 128 | atomic_t bios_in_flight; |
| 100 | atomic_t fixup_cnt; | 129 | atomic_t workers_pending; |
| 101 | spinlock_t list_lock; | 130 | spinlock_t list_lock; |
| 102 | wait_queue_head_t list_wait; | 131 | wait_queue_head_t list_wait; |
| 103 | u16 csum_size; | 132 | u16 csum_size; |
| 104 | struct list_head csum_list; | 133 | struct list_head csum_list; |
| 105 | atomic_t cancel_req; | 134 | atomic_t cancel_req; |
| 106 | int readonly; | 135 | int readonly; |
| 107 | int pages_per_bio; /* <= SCRUB_PAGES_PER_BIO */ | 136 | int pages_per_rd_bio; |
| 108 | u32 sectorsize; | 137 | u32 sectorsize; |
| 109 | u32 nodesize; | 138 | u32 nodesize; |
| 110 | u32 leafsize; | 139 | u32 leafsize; |
| 140 | |||
| 141 | int is_dev_replace; | ||
| 142 | struct scrub_wr_ctx wr_ctx; | ||
| 143 | |||
| 111 | /* | 144 | /* |
| 112 | * statistics | 145 | * statistics |
| 113 | */ | 146 | */ |
| @@ -116,13 +149,23 @@ struct scrub_dev { | |||
| 116 | }; | 149 | }; |
| 117 | 150 | ||
| 118 | struct scrub_fixup_nodatasum { | 151 | struct scrub_fixup_nodatasum { |
| 119 | struct scrub_dev *sdev; | 152 | struct scrub_ctx *sctx; |
| 153 | struct btrfs_device *dev; | ||
| 120 | u64 logical; | 154 | u64 logical; |
| 121 | struct btrfs_root *root; | 155 | struct btrfs_root *root; |
| 122 | struct btrfs_work work; | 156 | struct btrfs_work work; |
| 123 | int mirror_num; | 157 | int mirror_num; |
| 124 | }; | 158 | }; |
| 125 | 159 | ||
| 160 | struct scrub_copy_nocow_ctx { | ||
| 161 | struct scrub_ctx *sctx; | ||
| 162 | u64 logical; | ||
| 163 | u64 len; | ||
| 164 | int mirror_num; | ||
| 165 | u64 physical_for_dev_replace; | ||
| 166 | struct btrfs_work work; | ||
| 167 | }; | ||
| 168 | |||
| 126 | struct scrub_warning { | 169 | struct scrub_warning { |
| 127 | struct btrfs_path *path; | 170 | struct btrfs_path *path; |
| 128 | u64 extent_item_size; | 171 | u64 extent_item_size; |
| @@ -137,15 +180,20 @@ struct scrub_warning { | |||
| 137 | }; | 180 | }; |
| 138 | 181 | ||
| 139 | 182 | ||
| 183 | static void scrub_pending_bio_inc(struct scrub_ctx *sctx); | ||
| 184 | static void scrub_pending_bio_dec(struct scrub_ctx *sctx); | ||
| 185 | static void scrub_pending_trans_workers_inc(struct scrub_ctx *sctx); | ||
| 186 | static void scrub_pending_trans_workers_dec(struct scrub_ctx *sctx); | ||
| 140 | static int scrub_handle_errored_block(struct scrub_block *sblock_to_check); | 187 | static int scrub_handle_errored_block(struct scrub_block *sblock_to_check); |
| 141 | static int scrub_setup_recheck_block(struct scrub_dev *sdev, | 188 | static int scrub_setup_recheck_block(struct scrub_ctx *sctx, |
| 142 | struct btrfs_mapping_tree *map_tree, | 189 | struct btrfs_fs_info *fs_info, |
| 190 | struct scrub_block *original_sblock, | ||
| 143 | u64 length, u64 logical, | 191 | u64 length, u64 logical, |
| 144 | struct scrub_block *sblock); | 192 | struct scrub_block *sblocks_for_recheck); |
| 145 | static int scrub_recheck_block(struct btrfs_fs_info *fs_info, | 193 | static void scrub_recheck_block(struct btrfs_fs_info *fs_info, |
| 146 | struct scrub_block *sblock, int is_metadata, | 194 | struct scrub_block *sblock, int is_metadata, |
| 147 | int have_csum, u8 *csum, u64 generation, | 195 | int have_csum, u8 *csum, u64 generation, |
| 148 | u16 csum_size); | 196 | u16 csum_size); |
| 149 | static void scrub_recheck_block_checksum(struct btrfs_fs_info *fs_info, | 197 | static void scrub_recheck_block_checksum(struct btrfs_fs_info *fs_info, |
| 150 | struct scrub_block *sblock, | 198 | struct scrub_block *sblock, |
| 151 | int is_metadata, int have_csum, | 199 | int is_metadata, int have_csum, |
| @@ -158,118 +206,221 @@ static int scrub_repair_block_from_good_copy(struct scrub_block *sblock_bad, | |||
| 158 | static int scrub_repair_page_from_good_copy(struct scrub_block *sblock_bad, | 206 | static int scrub_repair_page_from_good_copy(struct scrub_block *sblock_bad, |
| 159 | struct scrub_block *sblock_good, | 207 | struct scrub_block *sblock_good, |
| 160 | int page_num, int force_write); | 208 | int page_num, int force_write); |
| 209 | static void scrub_write_block_to_dev_replace(struct scrub_block *sblock); | ||
| 210 | static int scrub_write_page_to_dev_replace(struct scrub_block *sblock, | ||
| 211 | int page_num); | ||
| 161 | static int scrub_checksum_data(struct scrub_block *sblock); | 212 | static int scrub_checksum_data(struct scrub_block *sblock); |
| 162 | static int scrub_checksum_tree_block(struct scrub_block *sblock); | 213 | static int scrub_checksum_tree_block(struct scrub_block *sblock); |
| 163 | static int scrub_checksum_super(struct scrub_block *sblock); | 214 | static int scrub_checksum_super(struct scrub_block *sblock); |
| 164 | static void scrub_block_get(struct scrub_block *sblock); | 215 | static void scrub_block_get(struct scrub_block *sblock); |
| 165 | static void scrub_block_put(struct scrub_block *sblock); | 216 | static void scrub_block_put(struct scrub_block *sblock); |
| 166 | static int scrub_add_page_to_bio(struct scrub_dev *sdev, | 217 | static void scrub_page_get(struct scrub_page *spage); |
| 167 | struct scrub_page *spage); | 218 | static void scrub_page_put(struct scrub_page *spage); |
| 168 | static int scrub_pages(struct scrub_dev *sdev, u64 logical, u64 len, | 219 | static int scrub_add_page_to_rd_bio(struct scrub_ctx *sctx, |
| 169 | u64 physical, u64 flags, u64 gen, int mirror_num, | 220 | struct scrub_page *spage); |
| 170 | u8 *csum, int force); | 221 | static int scrub_pages(struct scrub_ctx *sctx, u64 logical, u64 len, |
| 222 | u64 physical, struct btrfs_device *dev, u64 flags, | ||
| 223 | u64 gen, int mirror_num, u8 *csum, int force, | ||
| 224 | u64 physical_for_dev_replace); | ||
| 171 | static void scrub_bio_end_io(struct bio *bio, int err); | 225 | static void scrub_bio_end_io(struct bio *bio, int err); |
| 172 | static void scrub_bio_end_io_worker(struct btrfs_work *work); | 226 | static void scrub_bio_end_io_worker(struct btrfs_work *work); |
| 173 | static void scrub_block_complete(struct scrub_block *sblock); | 227 | static void scrub_block_complete(struct scrub_block *sblock); |
| 228 | static void scrub_remap_extent(struct btrfs_fs_info *fs_info, | ||
| 229 | u64 extent_logical, u64 extent_len, | ||
| 230 | u64 *extent_physical, | ||
| 231 | struct btrfs_device **extent_dev, | ||
| 232 | int *extent_mirror_num); | ||
| 233 | static int scrub_setup_wr_ctx(struct scrub_ctx *sctx, | ||
| 234 | struct scrub_wr_ctx *wr_ctx, | ||
| 235 | struct btrfs_fs_info *fs_info, | ||
| 236 | struct btrfs_device *dev, | ||
| 237 | int is_dev_replace); | ||
| 238 | static void scrub_free_wr_ctx(struct scrub_wr_ctx *wr_ctx); | ||
| 239 | static int scrub_add_page_to_wr_bio(struct scrub_ctx *sctx, | ||
| 240 | struct scrub_page *spage); | ||
| 241 | static void scrub_wr_submit(struct scrub_ctx *sctx); | ||
| 242 | static void scrub_wr_bio_end_io(struct bio *bio, int err); | ||
| 243 | static void scrub_wr_bio_end_io_worker(struct btrfs_work *work); | ||
| 244 | static int write_page_nocow(struct scrub_ctx *sctx, | ||
| 245 | u64 physical_for_dev_replace, struct page *page); | ||
| 246 | static int copy_nocow_pages_for_inode(u64 inum, u64 offset, u64 root, | ||
| 247 | void *ctx); | ||
| 248 | static int copy_nocow_pages(struct scrub_ctx *sctx, u64 logical, u64 len, | ||
| 249 | int mirror_num, u64 physical_for_dev_replace); | ||
| 250 | static void copy_nocow_pages_worker(struct btrfs_work *work); | ||
| 251 | |||
| 252 | |||
| 253 | static void scrub_pending_bio_inc(struct scrub_ctx *sctx) | ||
| 254 | { | ||
| 255 | atomic_inc(&sctx->bios_in_flight); | ||
| 256 | } | ||
| 174 | 257 | ||
| 258 | static void scrub_pending_bio_dec(struct scrub_ctx *sctx) | ||
| 259 | { | ||
| 260 | atomic_dec(&sctx->bios_in_flight); | ||
| 261 | wake_up(&sctx->list_wait); | ||
| 262 | } | ||
| 175 | 263 | ||
| 176 | static void scrub_free_csums(struct scrub_dev *sdev) | 264 | /* |
| 265 | * used for workers that require transaction commits (i.e., for the | ||
| 266 | * NOCOW case) | ||
| 267 | */ | ||
| 268 | static void scrub_pending_trans_workers_inc(struct scrub_ctx *sctx) | ||
| 177 | { | 269 | { |
| 178 | while (!list_empty(&sdev->csum_list)) { | 270 | struct btrfs_fs_info *fs_info = sctx->dev_root->fs_info; |
| 271 | |||
| 272 | /* | ||
| 273 | * increment scrubs_running to prevent cancel requests from | ||
| 274 | * completing as long as a worker is running. we must also | ||
| 275 | * increment scrubs_paused to prevent deadlocking on pause | ||
| 276 | * requests used for transactions commits (as the worker uses a | ||
| 277 | * transaction context). it is safe to regard the worker | ||
| 278 | * as paused for all matters practical. effectively, we only | ||
| 279 | * avoid cancellation requests from completing. | ||
| 280 | */ | ||
| 281 | mutex_lock(&fs_info->scrub_lock); | ||
| 282 | atomic_inc(&fs_info->scrubs_running); | ||
| 283 | atomic_inc(&fs_info->scrubs_paused); | ||
| 284 | mutex_unlock(&fs_info->scrub_lock); | ||
| 285 | atomic_inc(&sctx->workers_pending); | ||
| 286 | } | ||
| 287 | |||
| 288 | /* used for workers that require transaction commits */ | ||
| 289 | static void scrub_pending_trans_workers_dec(struct scrub_ctx *sctx) | ||
| 290 | { | ||
| 291 | struct btrfs_fs_info *fs_info = sctx->dev_root->fs_info; | ||
| 292 | |||
| 293 | /* | ||
| 294 | * see scrub_pending_trans_workers_inc() why we're pretending | ||
| 295 | * to be paused in the scrub counters | ||
| 296 | */ | ||
| 297 | mutex_lock(&fs_info->scrub_lock); | ||
| 298 | atomic_dec(&fs_info->scrubs_running); | ||
| 299 | atomic_dec(&fs_info->scrubs_paused); | ||
| 300 | mutex_unlock(&fs_info->scrub_lock); | ||
| 301 | atomic_dec(&sctx->workers_pending); | ||
| 302 | wake_up(&fs_info->scrub_pause_wait); | ||
| 303 | wake_up(&sctx->list_wait); | ||
| 304 | } | ||
| 305 | |||
| 306 | static void scrub_free_csums(struct scrub_ctx *sctx) | ||
| 307 | { | ||
| 308 | while (!list_empty(&sctx->csum_list)) { | ||
| 179 | struct btrfs_ordered_sum *sum; | 309 | struct btrfs_ordered_sum *sum; |
| 180 | sum = list_first_entry(&sdev->csum_list, | 310 | sum = list_first_entry(&sctx->csum_list, |
| 181 | struct btrfs_ordered_sum, list); | 311 | struct btrfs_ordered_sum, list); |
| 182 | list_del(&sum->list); | 312 | list_del(&sum->list); |
| 183 | kfree(sum); | 313 | kfree(sum); |
| 184 | } | 314 | } |
| 185 | } | 315 | } |
| 186 | 316 | ||
| 187 | static noinline_for_stack void scrub_free_dev(struct scrub_dev *sdev) | 317 | static noinline_for_stack void scrub_free_ctx(struct scrub_ctx *sctx) |
| 188 | { | 318 | { |
| 189 | int i; | 319 | int i; |
| 190 | 320 | ||
| 191 | if (!sdev) | 321 | if (!sctx) |
| 192 | return; | 322 | return; |
| 193 | 323 | ||
| 324 | scrub_free_wr_ctx(&sctx->wr_ctx); | ||
| 325 | |||
| 194 | /* this can happen when scrub is cancelled */ | 326 | /* this can happen when scrub is cancelled */ |
| 195 | if (sdev->curr != -1) { | 327 | if (sctx->curr != -1) { |
| 196 | struct scrub_bio *sbio = sdev->bios[sdev->curr]; | 328 | struct scrub_bio *sbio = sctx->bios[sctx->curr]; |
| 197 | 329 | ||
| 198 | for (i = 0; i < sbio->page_count; i++) { | 330 | for (i = 0; i < sbio->page_count; i++) { |
| 199 | BUG_ON(!sbio->pagev[i]); | 331 | WARN_ON(!sbio->pagev[i]->page); |
| 200 | BUG_ON(!sbio->pagev[i]->page); | ||
| 201 | scrub_block_put(sbio->pagev[i]->sblock); | 332 | scrub_block_put(sbio->pagev[i]->sblock); |
| 202 | } | 333 | } |
| 203 | bio_put(sbio->bio); | 334 | bio_put(sbio->bio); |
| 204 | } | 335 | } |
| 205 | 336 | ||
| 206 | for (i = 0; i < SCRUB_BIOS_PER_DEV; ++i) { | 337 | for (i = 0; i < SCRUB_BIOS_PER_SCTX; ++i) { |
| 207 | struct scrub_bio *sbio = sdev->bios[i]; | 338 | struct scrub_bio *sbio = sctx->bios[i]; |
| 208 | 339 | ||
| 209 | if (!sbio) | 340 | if (!sbio) |
| 210 | break; | 341 | break; |
| 211 | kfree(sbio); | 342 | kfree(sbio); |
| 212 | } | 343 | } |
| 213 | 344 | ||
| 214 | scrub_free_csums(sdev); | 345 | scrub_free_csums(sctx); |
| 215 | kfree(sdev); | 346 | kfree(sctx); |
| 216 | } | 347 | } |
| 217 | 348 | ||
| 218 | static noinline_for_stack | 349 | static noinline_for_stack |
| 219 | struct scrub_dev *scrub_setup_dev(struct btrfs_device *dev) | 350 | struct scrub_ctx *scrub_setup_ctx(struct btrfs_device *dev, int is_dev_replace) |
| 220 | { | 351 | { |
| 221 | struct scrub_dev *sdev; | 352 | struct scrub_ctx *sctx; |
| 222 | int i; | 353 | int i; |
| 223 | struct btrfs_fs_info *fs_info = dev->dev_root->fs_info; | 354 | struct btrfs_fs_info *fs_info = dev->dev_root->fs_info; |
| 224 | int pages_per_bio; | 355 | int pages_per_rd_bio; |
| 356 | int ret; | ||
| 225 | 357 | ||
| 226 | pages_per_bio = min_t(int, SCRUB_PAGES_PER_BIO, | 358 | /* |
| 227 | bio_get_nr_vecs(dev->bdev)); | 359 | * the setting of pages_per_rd_bio is correct for scrub but might |
| 228 | sdev = kzalloc(sizeof(*sdev), GFP_NOFS); | 360 | * be wrong for the dev_replace code where we might read from |
| 229 | if (!sdev) | 361 | * different devices in the initial huge bios. However, that |
| 362 | * code is able to correctly handle the case when adding a page | ||
| 363 | * to a bio fails. | ||
| 364 | */ | ||
| 365 | if (dev->bdev) | ||
| 366 | pages_per_rd_bio = min_t(int, SCRUB_PAGES_PER_RD_BIO, | ||
| 367 | bio_get_nr_vecs(dev->bdev)); | ||
| 368 | else | ||
| 369 | pages_per_rd_bio = SCRUB_PAGES_PER_RD_BIO; | ||
| 370 | sctx = kzalloc(sizeof(*sctx), GFP_NOFS); | ||
| 371 | if (!sctx) | ||
| 230 | goto nomem; | 372 | goto nomem; |
| 231 | sdev->dev = dev; | 373 | sctx->is_dev_replace = is_dev_replace; |
| 232 | sdev->pages_per_bio = pages_per_bio; | 374 | sctx->pages_per_rd_bio = pages_per_rd_bio; |
| 233 | sdev->curr = -1; | 375 | sctx->curr = -1; |
| 234 | for (i = 0; i < SCRUB_BIOS_PER_DEV; ++i) { | 376 | sctx->dev_root = dev->dev_root; |
| 377 | for (i = 0; i < SCRUB_BIOS_PER_SCTX; ++i) { | ||
| 235 | struct scrub_bio *sbio; | 378 | struct scrub_bio *sbio; |
| 236 | 379 | ||
| 237 | sbio = kzalloc(sizeof(*sbio), GFP_NOFS); | 380 | sbio = kzalloc(sizeof(*sbio), GFP_NOFS); |
| 238 | if (!sbio) | 381 | if (!sbio) |
| 239 | goto nomem; | 382 | goto nomem; |
| 240 | sdev->bios[i] = sbio; | 383 | sctx->bios[i] = sbio; |
| 241 | 384 | ||
| 242 | sbio->index = i; | 385 | sbio->index = i; |
| 243 | sbio->sdev = sdev; | 386 | sbio->sctx = sctx; |
| 244 | sbio->page_count = 0; | 387 | sbio->page_count = 0; |
| 245 | sbio->work.func = scrub_bio_end_io_worker; | 388 | sbio->work.func = scrub_bio_end_io_worker; |
| 246 | 389 | ||
| 247 | if (i != SCRUB_BIOS_PER_DEV-1) | 390 | if (i != SCRUB_BIOS_PER_SCTX - 1) |
| 248 | sdev->bios[i]->next_free = i + 1; | 391 | sctx->bios[i]->next_free = i + 1; |
| 249 | else | 392 | else |
| 250 | sdev->bios[i]->next_free = -1; | 393 | sctx->bios[i]->next_free = -1; |
| 251 | } | 394 | } |
| 252 | sdev->first_free = 0; | 395 | sctx->first_free = 0; |
| 253 | sdev->nodesize = dev->dev_root->nodesize; | 396 | sctx->nodesize = dev->dev_root->nodesize; |
| 254 | sdev->leafsize = dev->dev_root->leafsize; | 397 | sctx->leafsize = dev->dev_root->leafsize; |
| 255 | sdev->sectorsize = dev->dev_root->sectorsize; | 398 | sctx->sectorsize = dev->dev_root->sectorsize; |
| 256 | atomic_set(&sdev->in_flight, 0); | 399 | atomic_set(&sctx->bios_in_flight, 0); |
| 257 | atomic_set(&sdev->fixup_cnt, 0); | 400 | atomic_set(&sctx->workers_pending, 0); |
| 258 | atomic_set(&sdev->cancel_req, 0); | 401 | atomic_set(&sctx->cancel_req, 0); |
| 259 | sdev->csum_size = btrfs_super_csum_size(fs_info->super_copy); | 402 | sctx->csum_size = btrfs_super_csum_size(fs_info->super_copy); |
| 260 | INIT_LIST_HEAD(&sdev->csum_list); | 403 | INIT_LIST_HEAD(&sctx->csum_list); |
| 261 | 404 | ||
| 262 | spin_lock_init(&sdev->list_lock); | 405 | spin_lock_init(&sctx->list_lock); |
| 263 | spin_lock_init(&sdev->stat_lock); | 406 | spin_lock_init(&sctx->stat_lock); |
| 264 | init_waitqueue_head(&sdev->list_wait); | 407 | init_waitqueue_head(&sctx->list_wait); |
| 265 | return sdev; | 408 | |
| 409 | ret = scrub_setup_wr_ctx(sctx, &sctx->wr_ctx, fs_info, | ||
| 410 | fs_info->dev_replace.tgtdev, is_dev_replace); | ||
| 411 | if (ret) { | ||
| 412 | scrub_free_ctx(sctx); | ||
| 413 | return ERR_PTR(ret); | ||
| 414 | } | ||
| 415 | return sctx; | ||
| 266 | 416 | ||
| 267 | nomem: | 417 | nomem: |
| 268 | scrub_free_dev(sdev); | 418 | scrub_free_ctx(sctx); |
| 269 | return ERR_PTR(-ENOMEM); | 419 | return ERR_PTR(-ENOMEM); |
| 270 | } | 420 | } |
| 271 | 421 | ||
| 272 | static int scrub_print_warning_inode(u64 inum, u64 offset, u64 root, void *ctx) | 422 | static int scrub_print_warning_inode(u64 inum, u64 offset, u64 root, |
| 423 | void *warn_ctx) | ||
| 273 | { | 424 | { |
| 274 | u64 isize; | 425 | u64 isize; |
| 275 | u32 nlink; | 426 | u32 nlink; |
| @@ -277,7 +428,7 @@ static int scrub_print_warning_inode(u64 inum, u64 offset, u64 root, void *ctx) | |||
| 277 | int i; | 428 | int i; |
| 278 | struct extent_buffer *eb; | 429 | struct extent_buffer *eb; |
| 279 | struct btrfs_inode_item *inode_item; | 430 | struct btrfs_inode_item *inode_item; |
| 280 | struct scrub_warning *swarn = ctx; | 431 | struct scrub_warning *swarn = warn_ctx; |
| 281 | struct btrfs_fs_info *fs_info = swarn->dev->dev_root->fs_info; | 432 | struct btrfs_fs_info *fs_info = swarn->dev->dev_root->fs_info; |
| 282 | struct inode_fs_paths *ipath = NULL; | 433 | struct inode_fs_paths *ipath = NULL; |
| 283 | struct btrfs_root *local_root; | 434 | struct btrfs_root *local_root; |
| @@ -345,37 +496,42 @@ err: | |||
| 345 | 496 | ||
| 346 | static void scrub_print_warning(const char *errstr, struct scrub_block *sblock) | 497 | static void scrub_print_warning(const char *errstr, struct scrub_block *sblock) |
| 347 | { | 498 | { |
| 348 | struct btrfs_device *dev = sblock->sdev->dev; | 499 | struct btrfs_device *dev; |
| 349 | struct btrfs_fs_info *fs_info = dev->dev_root->fs_info; | 500 | struct btrfs_fs_info *fs_info; |
| 350 | struct btrfs_path *path; | 501 | struct btrfs_path *path; |
| 351 | struct btrfs_key found_key; | 502 | struct btrfs_key found_key; |
| 352 | struct extent_buffer *eb; | 503 | struct extent_buffer *eb; |
| 353 | struct btrfs_extent_item *ei; | 504 | struct btrfs_extent_item *ei; |
| 354 | struct scrub_warning swarn; | 505 | struct scrub_warning swarn; |
| 355 | u32 item_size; | 506 | unsigned long ptr = 0; |
| 356 | int ret; | 507 | u64 extent_item_pos; |
| 508 | u64 flags = 0; | ||
| 357 | u64 ref_root; | 509 | u64 ref_root; |
| 510 | u32 item_size; | ||
| 358 | u8 ref_level; | 511 | u8 ref_level; |
| 359 | unsigned long ptr = 0; | ||
| 360 | const int bufsize = 4096; | 512 | const int bufsize = 4096; |
| 361 | u64 extent_item_pos; | 513 | int ret; |
| 514 | |||
| 515 | WARN_ON(sblock->page_count < 1); | ||
| 516 | dev = sblock->pagev[0]->dev; | ||
| 517 | fs_info = sblock->sctx->dev_root->fs_info; | ||
| 362 | 518 | ||
| 363 | path = btrfs_alloc_path(); | 519 | path = btrfs_alloc_path(); |
| 364 | 520 | ||
| 365 | swarn.scratch_buf = kmalloc(bufsize, GFP_NOFS); | 521 | swarn.scratch_buf = kmalloc(bufsize, GFP_NOFS); |
| 366 | swarn.msg_buf = kmalloc(bufsize, GFP_NOFS); | 522 | swarn.msg_buf = kmalloc(bufsize, GFP_NOFS); |
| 367 | BUG_ON(sblock->page_count < 1); | 523 | swarn.sector = (sblock->pagev[0]->physical) >> 9; |
| 368 | swarn.sector = (sblock->pagev[0].physical) >> 9; | 524 | swarn.logical = sblock->pagev[0]->logical; |
| 369 | swarn.logical = sblock->pagev[0].logical; | ||
| 370 | swarn.errstr = errstr; | 525 | swarn.errstr = errstr; |
| 371 | swarn.dev = dev; | 526 | swarn.dev = NULL; |
| 372 | swarn.msg_bufsize = bufsize; | 527 | swarn.msg_bufsize = bufsize; |
| 373 | swarn.scratch_bufsize = bufsize; | 528 | swarn.scratch_bufsize = bufsize; |
| 374 | 529 | ||
| 375 | if (!path || !swarn.scratch_buf || !swarn.msg_buf) | 530 | if (!path || !swarn.scratch_buf || !swarn.msg_buf) |
| 376 | goto out; | 531 | goto out; |
| 377 | 532 | ||
| 378 | ret = extent_from_logical(fs_info, swarn.logical, path, &found_key); | 533 | ret = extent_from_logical(fs_info, swarn.logical, path, &found_key, |
| 534 | &flags); | ||
| 379 | if (ret < 0) | 535 | if (ret < 0) |
| 380 | goto out; | 536 | goto out; |
| 381 | 537 | ||
| @@ -387,7 +543,7 @@ static void scrub_print_warning(const char *errstr, struct scrub_block *sblock) | |||
| 387 | item_size = btrfs_item_size_nr(eb, path->slots[0]); | 543 | item_size = btrfs_item_size_nr(eb, path->slots[0]); |
| 388 | btrfs_release_path(path); | 544 | btrfs_release_path(path); |
| 389 | 545 | ||
| 390 | if (ret & BTRFS_EXTENT_FLAG_TREE_BLOCK) { | 546 | if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) { |
| 391 | do { | 547 | do { |
| 392 | ret = tree_backref_for_extent(&ptr, eb, ei, item_size, | 548 | ret = tree_backref_for_extent(&ptr, eb, ei, item_size, |
| 393 | &ref_root, &ref_level); | 549 | &ref_root, &ref_level); |
| @@ -403,6 +559,7 @@ static void scrub_print_warning(const char *errstr, struct scrub_block *sblock) | |||
| 403 | } while (ret != 1); | 559 | } while (ret != 1); |
| 404 | } else { | 560 | } else { |
| 405 | swarn.path = path; | 561 | swarn.path = path; |
| 562 | swarn.dev = dev; | ||
| 406 | iterate_extent_inodes(fs_info, found_key.objectid, | 563 | iterate_extent_inodes(fs_info, found_key.objectid, |
| 407 | extent_item_pos, 1, | 564 | extent_item_pos, 1, |
| 408 | scrub_print_warning_inode, &swarn); | 565 | scrub_print_warning_inode, &swarn); |
| @@ -414,11 +571,11 @@ out: | |||
| 414 | kfree(swarn.msg_buf); | 571 | kfree(swarn.msg_buf); |
| 415 | } | 572 | } |
| 416 | 573 | ||
| 417 | static int scrub_fixup_readpage(u64 inum, u64 offset, u64 root, void *ctx) | 574 | static int scrub_fixup_readpage(u64 inum, u64 offset, u64 root, void *fixup_ctx) |
| 418 | { | 575 | { |
| 419 | struct page *page = NULL; | 576 | struct page *page = NULL; |
| 420 | unsigned long index; | 577 | unsigned long index; |
| 421 | struct scrub_fixup_nodatasum *fixup = ctx; | 578 | struct scrub_fixup_nodatasum *fixup = fixup_ctx; |
| 422 | int ret; | 579 | int ret; |
| 423 | int corrected = 0; | 580 | int corrected = 0; |
| 424 | struct btrfs_key key; | 581 | struct btrfs_key key; |
| @@ -449,7 +606,7 @@ static int scrub_fixup_readpage(u64 inum, u64 offset, u64 root, void *ctx) | |||
| 449 | } | 606 | } |
| 450 | 607 | ||
| 451 | if (PageUptodate(page)) { | 608 | if (PageUptodate(page)) { |
| 452 | struct btrfs_mapping_tree *map_tree; | 609 | struct btrfs_fs_info *fs_info; |
| 453 | if (PageDirty(page)) { | 610 | if (PageDirty(page)) { |
| 454 | /* | 611 | /* |
| 455 | * we need to write the data to the defect sector. the | 612 | * we need to write the data to the defect sector. the |
| @@ -470,8 +627,8 @@ static int scrub_fixup_readpage(u64 inum, u64 offset, u64 root, void *ctx) | |||
| 470 | ret = -EIO; | 627 | ret = -EIO; |
| 471 | goto out; | 628 | goto out; |
| 472 | } | 629 | } |
| 473 | map_tree = &BTRFS_I(inode)->root->fs_info->mapping_tree; | 630 | fs_info = BTRFS_I(inode)->root->fs_info; |
| 474 | ret = repair_io_failure(map_tree, offset, PAGE_SIZE, | 631 | ret = repair_io_failure(fs_info, offset, PAGE_SIZE, |
| 475 | fixup->logical, page, | 632 | fixup->logical, page, |
| 476 | fixup->mirror_num); | 633 | fixup->mirror_num); |
| 477 | unlock_page(page); | 634 | unlock_page(page); |
| @@ -528,21 +685,21 @@ static void scrub_fixup_nodatasum(struct btrfs_work *work) | |||
| 528 | { | 685 | { |
| 529 | int ret; | 686 | int ret; |
| 530 | struct scrub_fixup_nodatasum *fixup; | 687 | struct scrub_fixup_nodatasum *fixup; |
| 531 | struct scrub_dev *sdev; | 688 | struct scrub_ctx *sctx; |
| 532 | struct btrfs_trans_handle *trans = NULL; | 689 | struct btrfs_trans_handle *trans = NULL; |
| 533 | struct btrfs_fs_info *fs_info; | 690 | struct btrfs_fs_info *fs_info; |
| 534 | struct btrfs_path *path; | 691 | struct btrfs_path *path; |
| 535 | int uncorrectable = 0; | 692 | int uncorrectable = 0; |
| 536 | 693 | ||
| 537 | fixup = container_of(work, struct scrub_fixup_nodatasum, work); | 694 | fixup = container_of(work, struct scrub_fixup_nodatasum, work); |
| 538 | sdev = fixup->sdev; | 695 | sctx = fixup->sctx; |
| 539 | fs_info = fixup->root->fs_info; | 696 | fs_info = fixup->root->fs_info; |
| 540 | 697 | ||
| 541 | path = btrfs_alloc_path(); | 698 | path = btrfs_alloc_path(); |
| 542 | if (!path) { | 699 | if (!path) { |
| 543 | spin_lock(&sdev->stat_lock); | 700 | spin_lock(&sctx->stat_lock); |
| 544 | ++sdev->stat.malloc_errors; | 701 | ++sctx->stat.malloc_errors; |
| 545 | spin_unlock(&sdev->stat_lock); | 702 | spin_unlock(&sctx->stat_lock); |
| 546 | uncorrectable = 1; | 703 | uncorrectable = 1; |
| 547 | goto out; | 704 | goto out; |
| 548 | } | 705 | } |
| @@ -571,35 +728,30 @@ static void scrub_fixup_nodatasum(struct btrfs_work *work) | |||
| 571 | } | 728 | } |
| 572 | WARN_ON(ret != 1); | 729 | WARN_ON(ret != 1); |
| 573 | 730 | ||
| 574 | spin_lock(&sdev->stat_lock); | 731 | spin_lock(&sctx->stat_lock); |
| 575 | ++sdev->stat.corrected_errors; | 732 | ++sctx->stat.corrected_errors; |
| 576 | spin_unlock(&sdev->stat_lock); | 733 | spin_unlock(&sctx->stat_lock); |
| 577 | 734 | ||
| 578 | out: | 735 | out: |
| 579 | if (trans && !IS_ERR(trans)) | 736 | if (trans && !IS_ERR(trans)) |
| 580 | btrfs_end_transaction(trans, fixup->root); | 737 | btrfs_end_transaction(trans, fixup->root); |
| 581 | if (uncorrectable) { | 738 | if (uncorrectable) { |
| 582 | spin_lock(&sdev->stat_lock); | 739 | spin_lock(&sctx->stat_lock); |
| 583 | ++sdev->stat.uncorrectable_errors; | 740 | ++sctx->stat.uncorrectable_errors; |
| 584 | spin_unlock(&sdev->stat_lock); | 741 | spin_unlock(&sctx->stat_lock); |
| 585 | 742 | btrfs_dev_replace_stats_inc( | |
| 743 | &sctx->dev_root->fs_info->dev_replace. | ||
| 744 | num_uncorrectable_read_errors); | ||
| 586 | printk_ratelimited_in_rcu(KERN_ERR | 745 | printk_ratelimited_in_rcu(KERN_ERR |
| 587 | "btrfs: unable to fixup (nodatasum) error at logical %llu on dev %s\n", | 746 | "btrfs: unable to fixup (nodatasum) error at logical %llu on dev %s\n", |
| 588 | (unsigned long long)fixup->logical, | 747 | (unsigned long long)fixup->logical, |
| 589 | rcu_str_deref(sdev->dev->name)); | 748 | rcu_str_deref(fixup->dev->name)); |
| 590 | } | 749 | } |
| 591 | 750 | ||
| 592 | btrfs_free_path(path); | 751 | btrfs_free_path(path); |
| 593 | kfree(fixup); | 752 | kfree(fixup); |
| 594 | 753 | ||
| 595 | /* see caller why we're pretending to be paused in the scrub counters */ | 754 | scrub_pending_trans_workers_dec(sctx); |
| 596 | mutex_lock(&fs_info->scrub_lock); | ||
| 597 | atomic_dec(&fs_info->scrubs_running); | ||
| 598 | atomic_dec(&fs_info->scrubs_paused); | ||
| 599 | mutex_unlock(&fs_info->scrub_lock); | ||
| 600 | atomic_dec(&sdev->fixup_cnt); | ||
| 601 | wake_up(&fs_info->scrub_pause_wait); | ||
| 602 | wake_up(&sdev->list_wait); | ||
| 603 | } | 755 | } |
| 604 | 756 | ||
| 605 | /* | 757 | /* |
| @@ -612,7 +764,8 @@ out: | |||
| 612 | */ | 764 | */ |
| 613 | static int scrub_handle_errored_block(struct scrub_block *sblock_to_check) | 765 | static int scrub_handle_errored_block(struct scrub_block *sblock_to_check) |
| 614 | { | 766 | { |
| 615 | struct scrub_dev *sdev = sblock_to_check->sdev; | 767 | struct scrub_ctx *sctx = sblock_to_check->sctx; |
| 768 | struct btrfs_device *dev; | ||
| 616 | struct btrfs_fs_info *fs_info; | 769 | struct btrfs_fs_info *fs_info; |
| 617 | u64 length; | 770 | u64 length; |
| 618 | u64 logical; | 771 | u64 logical; |
| @@ -631,16 +784,33 @@ static int scrub_handle_errored_block(struct scrub_block *sblock_to_check) | |||
| 631 | DEFAULT_RATELIMIT_BURST); | 784 | DEFAULT_RATELIMIT_BURST); |
| 632 | 785 | ||
| 633 | BUG_ON(sblock_to_check->page_count < 1); | 786 | BUG_ON(sblock_to_check->page_count < 1); |
| 634 | fs_info = sdev->dev->dev_root->fs_info; | 787 | fs_info = sctx->dev_root->fs_info; |
| 788 | if (sblock_to_check->pagev[0]->flags & BTRFS_EXTENT_FLAG_SUPER) { | ||
| 789 | /* | ||
| 790 | * if we find an error in a super block, we just report it. | ||
| 791 | * They will get written with the next transaction commit | ||
| 792 | * anyway | ||
| 793 | */ | ||
| 794 | spin_lock(&sctx->stat_lock); | ||
| 795 | ++sctx->stat.super_errors; | ||
| 796 | spin_unlock(&sctx->stat_lock); | ||
| 797 | return 0; | ||
| 798 | } | ||
| 635 | length = sblock_to_check->page_count * PAGE_SIZE; | 799 | length = sblock_to_check->page_count * PAGE_SIZE; |
| 636 | logical = sblock_to_check->pagev[0].logical; | 800 | logical = sblock_to_check->pagev[0]->logical; |
| 637 | generation = sblock_to_check->pagev[0].generation; | 801 | generation = sblock_to_check->pagev[0]->generation; |
| 638 | BUG_ON(sblock_to_check->pagev[0].mirror_num < 1); | 802 | BUG_ON(sblock_to_check->pagev[0]->mirror_num < 1); |
| 639 | failed_mirror_index = sblock_to_check->pagev[0].mirror_num - 1; | 803 | failed_mirror_index = sblock_to_check->pagev[0]->mirror_num - 1; |
| 640 | is_metadata = !(sblock_to_check->pagev[0].flags & | 804 | is_metadata = !(sblock_to_check->pagev[0]->flags & |
| 641 | BTRFS_EXTENT_FLAG_DATA); | 805 | BTRFS_EXTENT_FLAG_DATA); |
| 642 | have_csum = sblock_to_check->pagev[0].have_csum; | 806 | have_csum = sblock_to_check->pagev[0]->have_csum; |
| 643 | csum = sblock_to_check->pagev[0].csum; | 807 | csum = sblock_to_check->pagev[0]->csum; |
| 808 | dev = sblock_to_check->pagev[0]->dev; | ||
| 809 | |||
| 810 | if (sctx->is_dev_replace && !is_metadata && !have_csum) { | ||
| 811 | sblocks_for_recheck = NULL; | ||
| 812 | goto nodatasum_case; | ||
| 813 | } | ||
| 644 | 814 | ||
| 645 | /* | 815 | /* |
| 646 | * read all mirrors one after the other. This includes to | 816 | * read all mirrors one after the other. This includes to |
| @@ -675,43 +845,32 @@ static int scrub_handle_errored_block(struct scrub_block *sblock_to_check) | |||
| 675 | sizeof(*sblocks_for_recheck), | 845 | sizeof(*sblocks_for_recheck), |
| 676 | GFP_NOFS); | 846 | GFP_NOFS); |
| 677 | if (!sblocks_for_recheck) { | 847 | if (!sblocks_for_recheck) { |
| 678 | spin_lock(&sdev->stat_lock); | 848 | spin_lock(&sctx->stat_lock); |
| 679 | sdev->stat.malloc_errors++; | 849 | sctx->stat.malloc_errors++; |
| 680 | sdev->stat.read_errors++; | 850 | sctx->stat.read_errors++; |
| 681 | sdev->stat.uncorrectable_errors++; | 851 | sctx->stat.uncorrectable_errors++; |
| 682 | spin_unlock(&sdev->stat_lock); | 852 | spin_unlock(&sctx->stat_lock); |
| 683 | btrfs_dev_stat_inc_and_print(sdev->dev, | 853 | btrfs_dev_stat_inc_and_print(dev, BTRFS_DEV_STAT_READ_ERRS); |
| 684 | BTRFS_DEV_STAT_READ_ERRS); | ||
| 685 | goto out; | 854 | goto out; |
| 686 | } | 855 | } |
| 687 | 856 | ||
| 688 | /* setup the context, map the logical blocks and alloc the pages */ | 857 | /* setup the context, map the logical blocks and alloc the pages */ |
| 689 | ret = scrub_setup_recheck_block(sdev, &fs_info->mapping_tree, length, | 858 | ret = scrub_setup_recheck_block(sctx, fs_info, sblock_to_check, length, |
| 690 | logical, sblocks_for_recheck); | 859 | logical, sblocks_for_recheck); |
| 691 | if (ret) { | 860 | if (ret) { |
| 692 | spin_lock(&sdev->stat_lock); | 861 | spin_lock(&sctx->stat_lock); |
| 693 | sdev->stat.read_errors++; | 862 | sctx->stat.read_errors++; |
| 694 | sdev->stat.uncorrectable_errors++; | 863 | sctx->stat.uncorrectable_errors++; |
| 695 | spin_unlock(&sdev->stat_lock); | 864 | spin_unlock(&sctx->stat_lock); |
| 696 | btrfs_dev_stat_inc_and_print(sdev->dev, | 865 | btrfs_dev_stat_inc_and_print(dev, BTRFS_DEV_STAT_READ_ERRS); |
| 697 | BTRFS_DEV_STAT_READ_ERRS); | ||
| 698 | goto out; | 866 | goto out; |
| 699 | } | 867 | } |
| 700 | BUG_ON(failed_mirror_index >= BTRFS_MAX_MIRRORS); | 868 | BUG_ON(failed_mirror_index >= BTRFS_MAX_MIRRORS); |
| 701 | sblock_bad = sblocks_for_recheck + failed_mirror_index; | 869 | sblock_bad = sblocks_for_recheck + failed_mirror_index; |
| 702 | 870 | ||
| 703 | /* build and submit the bios for the failed mirror, check checksums */ | 871 | /* build and submit the bios for the failed mirror, check checksums */ |
| 704 | ret = scrub_recheck_block(fs_info, sblock_bad, is_metadata, have_csum, | 872 | scrub_recheck_block(fs_info, sblock_bad, is_metadata, have_csum, |
| 705 | csum, generation, sdev->csum_size); | 873 | csum, generation, sctx->csum_size); |
| 706 | if (ret) { | ||
| 707 | spin_lock(&sdev->stat_lock); | ||
| 708 | sdev->stat.read_errors++; | ||
| 709 | sdev->stat.uncorrectable_errors++; | ||
| 710 | spin_unlock(&sdev->stat_lock); | ||
| 711 | btrfs_dev_stat_inc_and_print(sdev->dev, | ||
| 712 | BTRFS_DEV_STAT_READ_ERRS); | ||
| 713 | goto out; | ||
| 714 | } | ||
| 715 | 874 | ||
| 716 | if (!sblock_bad->header_error && !sblock_bad->checksum_error && | 875 | if (!sblock_bad->header_error && !sblock_bad->checksum_error && |
| 717 | sblock_bad->no_io_error_seen) { | 876 | sblock_bad->no_io_error_seen) { |
| @@ -723,50 +882,54 @@ static int scrub_handle_errored_block(struct scrub_block *sblock_to_check) | |||
| 723 | * different bio (usually one of the two latter cases is | 882 | * different bio (usually one of the two latter cases is |
| 724 | * the cause) | 883 | * the cause) |
| 725 | */ | 884 | */ |
| 726 | spin_lock(&sdev->stat_lock); | 885 | spin_lock(&sctx->stat_lock); |
| 727 | sdev->stat.unverified_errors++; | 886 | sctx->stat.unverified_errors++; |
| 728 | spin_unlock(&sdev->stat_lock); | 887 | spin_unlock(&sctx->stat_lock); |
| 729 | 888 | ||
| 889 | if (sctx->is_dev_replace) | ||
| 890 | scrub_write_block_to_dev_replace(sblock_bad); | ||
| 730 | goto out; | 891 | goto out; |
| 731 | } | 892 | } |
| 732 | 893 | ||
| 733 | if (!sblock_bad->no_io_error_seen) { | 894 | if (!sblock_bad->no_io_error_seen) { |
| 734 | spin_lock(&sdev->stat_lock); | 895 | spin_lock(&sctx->stat_lock); |
| 735 | sdev->stat.read_errors++; | 896 | sctx->stat.read_errors++; |
| 736 | spin_unlock(&sdev->stat_lock); | 897 | spin_unlock(&sctx->stat_lock); |
| 737 | if (__ratelimit(&_rs)) | 898 | if (__ratelimit(&_rs)) |
| 738 | scrub_print_warning("i/o error", sblock_to_check); | 899 | scrub_print_warning("i/o error", sblock_to_check); |
| 739 | btrfs_dev_stat_inc_and_print(sdev->dev, | 900 | btrfs_dev_stat_inc_and_print(dev, BTRFS_DEV_STAT_READ_ERRS); |
| 740 | BTRFS_DEV_STAT_READ_ERRS); | ||
| 741 | } else if (sblock_bad->checksum_error) { | 901 | } else if (sblock_bad->checksum_error) { |
| 742 | spin_lock(&sdev->stat_lock); | 902 | spin_lock(&sctx->stat_lock); |
| 743 | sdev->stat.csum_errors++; | 903 | sctx->stat.csum_errors++; |
| 744 | spin_unlock(&sdev->stat_lock); | 904 | spin_unlock(&sctx->stat_lock); |
| 745 | if (__ratelimit(&_rs)) | 905 | if (__ratelimit(&_rs)) |
| 746 | scrub_print_warning("checksum error", sblock_to_check); | 906 | scrub_print_warning("checksum error", sblock_to_check); |
| 747 | btrfs_dev_stat_inc_and_print(sdev->dev, | 907 | btrfs_dev_stat_inc_and_print(dev, |
| 748 | BTRFS_DEV_STAT_CORRUPTION_ERRS); | 908 | BTRFS_DEV_STAT_CORRUPTION_ERRS); |
| 749 | } else if (sblock_bad->header_error) { | 909 | } else if (sblock_bad->header_error) { |
| 750 | spin_lock(&sdev->stat_lock); | 910 | spin_lock(&sctx->stat_lock); |
| 751 | sdev->stat.verify_errors++; | 911 | sctx->stat.verify_errors++; |
| 752 | spin_unlock(&sdev->stat_lock); | 912 | spin_unlock(&sctx->stat_lock); |
| 753 | if (__ratelimit(&_rs)) | 913 | if (__ratelimit(&_rs)) |
| 754 | scrub_print_warning("checksum/header error", | 914 | scrub_print_warning("checksum/header error", |
| 755 | sblock_to_check); | 915 | sblock_to_check); |
| 756 | if (sblock_bad->generation_error) | 916 | if (sblock_bad->generation_error) |
| 757 | btrfs_dev_stat_inc_and_print(sdev->dev, | 917 | btrfs_dev_stat_inc_and_print(dev, |
| 758 | BTRFS_DEV_STAT_GENERATION_ERRS); | 918 | BTRFS_DEV_STAT_GENERATION_ERRS); |
| 759 | else | 919 | else |
| 760 | btrfs_dev_stat_inc_and_print(sdev->dev, | 920 | btrfs_dev_stat_inc_and_print(dev, |
| 761 | BTRFS_DEV_STAT_CORRUPTION_ERRS); | 921 | BTRFS_DEV_STAT_CORRUPTION_ERRS); |
| 762 | } | 922 | } |
| 763 | 923 | ||
| 764 | if (sdev->readonly) | 924 | if (sctx->readonly && !sctx->is_dev_replace) |
| 765 | goto did_not_correct_error; | 925 | goto did_not_correct_error; |
| 766 | 926 | ||
| 767 | if (!is_metadata && !have_csum) { | 927 | if (!is_metadata && !have_csum) { |
| 768 | struct scrub_fixup_nodatasum *fixup_nodatasum; | 928 | struct scrub_fixup_nodatasum *fixup_nodatasum; |
| 769 | 929 | ||
| 930 | nodatasum_case: | ||
| 931 | WARN_ON(sctx->is_dev_replace); | ||
| 932 | |||
| 770 | /* | 933 | /* |
| 771 | * !is_metadata and !have_csum, this means that the data | 934 | * !is_metadata and !have_csum, this means that the data |
| 772 | * might not be COW'ed, that it might be modified | 935 | * might not be COW'ed, that it might be modified |
| @@ -777,24 +940,12 @@ static int scrub_handle_errored_block(struct scrub_block *sblock_to_check) | |||
| 777 | fixup_nodatasum = kzalloc(sizeof(*fixup_nodatasum), GFP_NOFS); | 940 | fixup_nodatasum = kzalloc(sizeof(*fixup_nodatasum), GFP_NOFS); |
| 778 | if (!fixup_nodatasum) | 941 | if (!fixup_nodatasum) |
| 779 | goto did_not_correct_error; | 942 | goto did_not_correct_error; |
| 780 | fixup_nodatasum->sdev = sdev; | 943 | fixup_nodatasum->sctx = sctx; |
| 944 | fixup_nodatasum->dev = dev; | ||
| 781 | fixup_nodatasum->logical = logical; | 945 | fixup_nodatasum->logical = logical; |
| 782 | fixup_nodatasum->root = fs_info->extent_root; | 946 | fixup_nodatasum->root = fs_info->extent_root; |
| 783 | fixup_nodatasum->mirror_num = failed_mirror_index + 1; | 947 | fixup_nodatasum->mirror_num = failed_mirror_index + 1; |
| 784 | /* | 948 | scrub_pending_trans_workers_inc(sctx); |
| 785 | * increment scrubs_running to prevent cancel requests from | ||
| 786 | * completing as long as a fixup worker is running. we must also | ||
| 787 | * increment scrubs_paused to prevent deadlocking on pause | ||
| 788 | * requests used for transactions commits (as the worker uses a | ||
| 789 | * transaction context). it is safe to regard the fixup worker | ||
| 790 | * as paused for all matters practical. effectively, we only | ||
| 791 | * avoid cancellation requests from completing. | ||
| 792 | */ | ||
| 793 | mutex_lock(&fs_info->scrub_lock); | ||
| 794 | atomic_inc(&fs_info->scrubs_running); | ||
| 795 | atomic_inc(&fs_info->scrubs_paused); | ||
| 796 | mutex_unlock(&fs_info->scrub_lock); | ||
| 797 | atomic_inc(&sdev->fixup_cnt); | ||
| 798 | fixup_nodatasum->work.func = scrub_fixup_nodatasum; | 949 | fixup_nodatasum->work.func = scrub_fixup_nodatasum; |
| 799 | btrfs_queue_worker(&fs_info->scrub_workers, | 950 | btrfs_queue_worker(&fs_info->scrub_workers, |
| 800 | &fixup_nodatasum->work); | 951 | &fixup_nodatasum->work); |
| @@ -803,26 +954,8 @@ static int scrub_handle_errored_block(struct scrub_block *sblock_to_check) | |||
| 803 | 954 | ||
| 804 | /* | 955 | /* |
| 805 | * now build and submit the bios for the other mirrors, check | 956 | * now build and submit the bios for the other mirrors, check |
| 806 | * checksums | 957 | * checksums. |
| 807 | */ | 958 | * First try to pick the mirror which is completely without I/O |
| 808 | for (mirror_index = 0; | ||
| 809 | mirror_index < BTRFS_MAX_MIRRORS && | ||
| 810 | sblocks_for_recheck[mirror_index].page_count > 0; | ||
| 811 | mirror_index++) { | ||
| 812 | if (mirror_index == failed_mirror_index) | ||
| 813 | continue; | ||
| 814 | |||
| 815 | /* build and submit the bios, check checksums */ | ||
| 816 | ret = scrub_recheck_block(fs_info, | ||
| 817 | sblocks_for_recheck + mirror_index, | ||
| 818 | is_metadata, have_csum, csum, | ||
| 819 | generation, sdev->csum_size); | ||
| 820 | if (ret) | ||
| 821 | goto did_not_correct_error; | ||
| 822 | } | ||
| 823 | |||
| 824 | /* | ||
| 825 | * first try to pick the mirror which is completely without I/O | ||
| 826 | * errors and also does not have a checksum error. | 959 | * errors and also does not have a checksum error. |
| 827 | * If one is found, and if a checksum is present, the full block | 960 | * If one is found, and if a checksum is present, the full block |
| 828 | * that is known to contain an error is rewritten. Afterwards | 961 | * that is known to contain an error is rewritten. Afterwards |
| @@ -838,24 +971,93 @@ static int scrub_handle_errored_block(struct scrub_block *sblock_to_check) | |||
| 838 | mirror_index < BTRFS_MAX_MIRRORS && | 971 | mirror_index < BTRFS_MAX_MIRRORS && |
| 839 | sblocks_for_recheck[mirror_index].page_count > 0; | 972 | sblocks_for_recheck[mirror_index].page_count > 0; |
| 840 | mirror_index++) { | 973 | mirror_index++) { |
| 841 | struct scrub_block *sblock_other = sblocks_for_recheck + | 974 | struct scrub_block *sblock_other; |
| 842 | mirror_index; | 975 | |
| 976 | if (mirror_index == failed_mirror_index) | ||
| 977 | continue; | ||
| 978 | sblock_other = sblocks_for_recheck + mirror_index; | ||
| 979 | |||
| 980 | /* build and submit the bios, check checksums */ | ||
| 981 | scrub_recheck_block(fs_info, sblock_other, is_metadata, | ||
| 982 | have_csum, csum, generation, | ||
| 983 | sctx->csum_size); | ||
| 843 | 984 | ||
| 844 | if (!sblock_other->header_error && | 985 | if (!sblock_other->header_error && |
| 845 | !sblock_other->checksum_error && | 986 | !sblock_other->checksum_error && |
| 846 | sblock_other->no_io_error_seen) { | 987 | sblock_other->no_io_error_seen) { |
| 847 | int force_write = is_metadata || have_csum; | 988 | if (sctx->is_dev_replace) { |
| 848 | 989 | scrub_write_block_to_dev_replace(sblock_other); | |
| 849 | ret = scrub_repair_block_from_good_copy(sblock_bad, | 990 | } else { |
| 850 | sblock_other, | 991 | int force_write = is_metadata || have_csum; |
| 851 | force_write); | 992 | |
| 993 | ret = scrub_repair_block_from_good_copy( | ||
| 994 | sblock_bad, sblock_other, | ||
| 995 | force_write); | ||
| 996 | } | ||
| 852 | if (0 == ret) | 997 | if (0 == ret) |
| 853 | goto corrected_error; | 998 | goto corrected_error; |
| 854 | } | 999 | } |
| 855 | } | 1000 | } |
| 856 | 1001 | ||
| 857 | /* | 1002 | /* |
| 858 | * in case of I/O errors in the area that is supposed to be | 1003 | * for dev_replace, pick good pages and write to the target device. |
| 1004 | */ | ||
| 1005 | if (sctx->is_dev_replace) { | ||
| 1006 | success = 1; | ||
| 1007 | for (page_num = 0; page_num < sblock_bad->page_count; | ||
| 1008 | page_num++) { | ||
| 1009 | int sub_success; | ||
| 1010 | |||
| 1011 | sub_success = 0; | ||
| 1012 | for (mirror_index = 0; | ||
| 1013 | mirror_index < BTRFS_MAX_MIRRORS && | ||
| 1014 | sblocks_for_recheck[mirror_index].page_count > 0; | ||
| 1015 | mirror_index++) { | ||
| 1016 | struct scrub_block *sblock_other = | ||
| 1017 | sblocks_for_recheck + mirror_index; | ||
| 1018 | struct scrub_page *page_other = | ||
| 1019 | sblock_other->pagev[page_num]; | ||
| 1020 | |||
| 1021 | if (!page_other->io_error) { | ||
| 1022 | ret = scrub_write_page_to_dev_replace( | ||
| 1023 | sblock_other, page_num); | ||
| 1024 | if (ret == 0) { | ||
| 1025 | /* succeeded for this page */ | ||
| 1026 | sub_success = 1; | ||
| 1027 | break; | ||
| 1028 | } else { | ||
| 1029 | btrfs_dev_replace_stats_inc( | ||
| 1030 | &sctx->dev_root-> | ||
| 1031 | fs_info->dev_replace. | ||
| 1032 | num_write_errors); | ||
| 1033 | } | ||
| 1034 | } | ||
| 1035 | } | ||
| 1036 | |||
| 1037 | if (!sub_success) { | ||
| 1038 | /* | ||
| 1039 | * did not find a mirror to fetch the page | ||
| 1040 | * from. scrub_write_page_to_dev_replace() | ||
| 1041 | * handles this case (page->io_error), by | ||
| 1042 | * filling the block with zeros before | ||
| 1043 | * submitting the write request | ||
| 1044 | */ | ||
| 1045 | success = 0; | ||
| 1046 | ret = scrub_write_page_to_dev_replace( | ||
| 1047 | sblock_bad, page_num); | ||
| 1048 | if (ret) | ||
| 1049 | btrfs_dev_replace_stats_inc( | ||
| 1050 | &sctx->dev_root->fs_info-> | ||
| 1051 | dev_replace.num_write_errors); | ||
| 1052 | } | ||
| 1053 | } | ||
| 1054 | |||
| 1055 | goto out; | ||
| 1056 | } | ||
| 1057 | |||
| 1058 | /* | ||
| 1059 | * for regular scrub, repair those pages that are errored. | ||
| 1060 | * In case of I/O errors in the area that is supposed to be | ||
| 859 | * repaired, continue by picking good copies of those pages. | 1061 | * repaired, continue by picking good copies of those pages. |
| 860 | * Select the good pages from mirrors to rewrite bad pages from | 1062 | * Select the good pages from mirrors to rewrite bad pages from |
| 861 | * the area to fix. Afterwards verify the checksum of the block | 1063 | * the area to fix. Afterwards verify the checksum of the block |
| @@ -885,7 +1087,7 @@ static int scrub_handle_errored_block(struct scrub_block *sblock_to_check) | |||
| 885 | 1087 | ||
| 886 | success = 1; | 1088 | success = 1; |
| 887 | for (page_num = 0; page_num < sblock_bad->page_count; page_num++) { | 1089 | for (page_num = 0; page_num < sblock_bad->page_count; page_num++) { |
| 888 | struct scrub_page *page_bad = sblock_bad->pagev + page_num; | 1090 | struct scrub_page *page_bad = sblock_bad->pagev[page_num]; |
| 889 | 1091 | ||
| 890 | if (!page_bad->io_error) | 1092 | if (!page_bad->io_error) |
| 891 | continue; | 1093 | continue; |
| @@ -896,8 +1098,8 @@ static int scrub_handle_errored_block(struct scrub_block *sblock_to_check) | |||
| 896 | mirror_index++) { | 1098 | mirror_index++) { |
| 897 | struct scrub_block *sblock_other = sblocks_for_recheck + | 1099 | struct scrub_block *sblock_other = sblocks_for_recheck + |
| 898 | mirror_index; | 1100 | mirror_index; |
| 899 | struct scrub_page *page_other = sblock_other->pagev + | 1101 | struct scrub_page *page_other = sblock_other->pagev[ |
| 900 | page_num; | 1102 | page_num]; |
| 901 | 1103 | ||
| 902 | if (!page_other->io_error) { | 1104 | if (!page_other->io_error) { |
| 903 | ret = scrub_repair_page_from_good_copy( | 1105 | ret = scrub_repair_page_from_good_copy( |
| @@ -926,10 +1128,10 @@ static int scrub_handle_errored_block(struct scrub_block *sblock_to_check) | |||
| 926 | * is verified, but most likely the data comes out | 1128 | * is verified, but most likely the data comes out |
| 927 | * of the page cache. | 1129 | * of the page cache. |
| 928 | */ | 1130 | */ |
| 929 | ret = scrub_recheck_block(fs_info, sblock_bad, | 1131 | scrub_recheck_block(fs_info, sblock_bad, |
| 930 | is_metadata, have_csum, csum, | 1132 | is_metadata, have_csum, csum, |
| 931 | generation, sdev->csum_size); | 1133 | generation, sctx->csum_size); |
| 932 | if (!ret && !sblock_bad->header_error && | 1134 | if (!sblock_bad->header_error && |
| 933 | !sblock_bad->checksum_error && | 1135 | !sblock_bad->checksum_error && |
| 934 | sblock_bad->no_io_error_seen) | 1136 | sblock_bad->no_io_error_seen) |
| 935 | goto corrected_error; | 1137 | goto corrected_error; |
| @@ -937,23 +1139,23 @@ static int scrub_handle_errored_block(struct scrub_block *sblock_to_check) | |||
| 937 | goto did_not_correct_error; | 1139 | goto did_not_correct_error; |
| 938 | } else { | 1140 | } else { |
| 939 | corrected_error: | 1141 | corrected_error: |
| 940 | spin_lock(&sdev->stat_lock); | 1142 | spin_lock(&sctx->stat_lock); |
| 941 | sdev->stat.corrected_errors++; | 1143 | sctx->stat.corrected_errors++; |
| 942 | spin_unlock(&sdev->stat_lock); | 1144 | spin_unlock(&sctx->stat_lock); |
| 943 | printk_ratelimited_in_rcu(KERN_ERR | 1145 | printk_ratelimited_in_rcu(KERN_ERR |
| 944 | "btrfs: fixed up error at logical %llu on dev %s\n", | 1146 | "btrfs: fixed up error at logical %llu on dev %s\n", |
| 945 | (unsigned long long)logical, | 1147 | (unsigned long long)logical, |
| 946 | rcu_str_deref(sdev->dev->name)); | 1148 | rcu_str_deref(dev->name)); |
| 947 | } | 1149 | } |
| 948 | } else { | 1150 | } else { |
| 949 | did_not_correct_error: | 1151 | did_not_correct_error: |
| 950 | spin_lock(&sdev->stat_lock); | 1152 | spin_lock(&sctx->stat_lock); |
| 951 | sdev->stat.uncorrectable_errors++; | 1153 | sctx->stat.uncorrectable_errors++; |
| 952 | spin_unlock(&sdev->stat_lock); | 1154 | spin_unlock(&sctx->stat_lock); |
| 953 | printk_ratelimited_in_rcu(KERN_ERR | 1155 | printk_ratelimited_in_rcu(KERN_ERR |
| 954 | "btrfs: unable to fixup (regular) error at logical %llu on dev %s\n", | 1156 | "btrfs: unable to fixup (regular) error at logical %llu on dev %s\n", |
| 955 | (unsigned long long)logical, | 1157 | (unsigned long long)logical, |
| 956 | rcu_str_deref(sdev->dev->name)); | 1158 | rcu_str_deref(dev->name)); |
| 957 | } | 1159 | } |
| 958 | 1160 | ||
| 959 | out: | 1161 | out: |
| @@ -964,11 +1166,11 @@ out: | |||
| 964 | mirror_index; | 1166 | mirror_index; |
| 965 | int page_index; | 1167 | int page_index; |
| 966 | 1168 | ||
| 967 | for (page_index = 0; page_index < SCRUB_PAGES_PER_BIO; | 1169 | for (page_index = 0; page_index < sblock->page_count; |
| 968 | page_index++) | 1170 | page_index++) { |
| 969 | if (sblock->pagev[page_index].page) | 1171 | sblock->pagev[page_index]->sblock = NULL; |
| 970 | __free_page( | 1172 | scrub_page_put(sblock->pagev[page_index]); |
| 971 | sblock->pagev[page_index].page); | 1173 | } |
| 972 | } | 1174 | } |
| 973 | kfree(sblocks_for_recheck); | 1175 | kfree(sblocks_for_recheck); |
| 974 | } | 1176 | } |
| @@ -976,8 +1178,9 @@ out: | |||
| 976 | return 0; | 1178 | return 0; |
| 977 | } | 1179 | } |
| 978 | 1180 | ||
| 979 | static int scrub_setup_recheck_block(struct scrub_dev *sdev, | 1181 | static int scrub_setup_recheck_block(struct scrub_ctx *sctx, |
| 980 | struct btrfs_mapping_tree *map_tree, | 1182 | struct btrfs_fs_info *fs_info, |
| 1183 | struct scrub_block *original_sblock, | ||
| 981 | u64 length, u64 logical, | 1184 | u64 length, u64 logical, |
| 982 | struct scrub_block *sblocks_for_recheck) | 1185 | struct scrub_block *sblocks_for_recheck) |
| 983 | { | 1186 | { |
| @@ -986,7 +1189,7 @@ static int scrub_setup_recheck_block(struct scrub_dev *sdev, | |||
| 986 | int ret; | 1189 | int ret; |
| 987 | 1190 | ||
| 988 | /* | 1191 | /* |
| 989 | * note: the three members sdev, ref_count and outstanding_pages | 1192 | * note: the two members ref_count and outstanding_pages |
| 990 | * are not used (and not set) in the blocks that are used for | 1193 | * are not used (and not set) in the blocks that are used for |
| 991 | * the recheck procedure | 1194 | * the recheck procedure |
| 992 | */ | 1195 | */ |
| @@ -1001,14 +1204,14 @@ static int scrub_setup_recheck_block(struct scrub_dev *sdev, | |||
| 1001 | * with a length of PAGE_SIZE, each returned stripe | 1204 | * with a length of PAGE_SIZE, each returned stripe |
| 1002 | * represents one mirror | 1205 | * represents one mirror |
| 1003 | */ | 1206 | */ |
| 1004 | ret = btrfs_map_block(map_tree, WRITE, logical, &mapped_length, | 1207 | ret = btrfs_map_block(fs_info, REQ_GET_READ_MIRRORS, logical, |
| 1005 | &bbio, 0); | 1208 | &mapped_length, &bbio, 0); |
| 1006 | if (ret || !bbio || mapped_length < sublen) { | 1209 | if (ret || !bbio || mapped_length < sublen) { |
| 1007 | kfree(bbio); | 1210 | kfree(bbio); |
| 1008 | return -EIO; | 1211 | return -EIO; |
| 1009 | } | 1212 | } |
| 1010 | 1213 | ||
| 1011 | BUG_ON(page_index >= SCRUB_PAGES_PER_BIO); | 1214 | BUG_ON(page_index >= SCRUB_PAGES_PER_RD_BIO); |
| 1012 | for (mirror_index = 0; mirror_index < (int)bbio->num_stripes; | 1215 | for (mirror_index = 0; mirror_index < (int)bbio->num_stripes; |
| 1013 | mirror_index++) { | 1216 | mirror_index++) { |
| 1014 | struct scrub_block *sblock; | 1217 | struct scrub_block *sblock; |
| @@ -1018,20 +1221,31 @@ static int scrub_setup_recheck_block(struct scrub_dev *sdev, | |||
| 1018 | continue; | 1221 | continue; |
| 1019 | 1222 | ||
| 1020 | sblock = sblocks_for_recheck + mirror_index; | 1223 | sblock = sblocks_for_recheck + mirror_index; |
| 1021 | page = sblock->pagev + page_index; | 1224 | sblock->sctx = sctx; |
| 1225 | page = kzalloc(sizeof(*page), GFP_NOFS); | ||
| 1226 | if (!page) { | ||
| 1227 | leave_nomem: | ||
| 1228 | spin_lock(&sctx->stat_lock); | ||
| 1229 | sctx->stat.malloc_errors++; | ||
| 1230 | spin_unlock(&sctx->stat_lock); | ||
| 1231 | kfree(bbio); | ||
| 1232 | return -ENOMEM; | ||
| 1233 | } | ||
| 1234 | scrub_page_get(page); | ||
| 1235 | sblock->pagev[page_index] = page; | ||
| 1022 | page->logical = logical; | 1236 | page->logical = logical; |
| 1023 | page->physical = bbio->stripes[mirror_index].physical; | 1237 | page->physical = bbio->stripes[mirror_index].physical; |
| 1238 | BUG_ON(page_index >= original_sblock->page_count); | ||
| 1239 | page->physical_for_dev_replace = | ||
| 1240 | original_sblock->pagev[page_index]-> | ||
| 1241 | physical_for_dev_replace; | ||
| 1024 | /* for missing devices, dev->bdev is NULL */ | 1242 | /* for missing devices, dev->bdev is NULL */ |
| 1025 | page->dev = bbio->stripes[mirror_index].dev; | 1243 | page->dev = bbio->stripes[mirror_index].dev; |
| 1026 | page->mirror_num = mirror_index + 1; | 1244 | page->mirror_num = mirror_index + 1; |
| 1027 | page->page = alloc_page(GFP_NOFS); | ||
| 1028 | if (!page->page) { | ||
| 1029 | spin_lock(&sdev->stat_lock); | ||
| 1030 | sdev->stat.malloc_errors++; | ||
| 1031 | spin_unlock(&sdev->stat_lock); | ||
| 1032 | return -ENOMEM; | ||
| 1033 | } | ||
| 1034 | sblock->page_count++; | 1245 | sblock->page_count++; |
| 1246 | page->page = alloc_page(GFP_NOFS); | ||
| 1247 | if (!page->page) | ||
| 1248 | goto leave_nomem; | ||
| 1035 | } | 1249 | } |
| 1036 | kfree(bbio); | 1250 | kfree(bbio); |
| 1037 | length -= sublen; | 1251 | length -= sublen; |
| @@ -1049,10 +1263,10 @@ static int scrub_setup_recheck_block(struct scrub_dev *sdev, | |||
| 1049 | * to take those pages that are not errored from all the mirrors so that | 1263 | * to take those pages that are not errored from all the mirrors so that |
| 1050 | * the pages that are errored in the just handled mirror can be repaired. | 1264 | * the pages that are errored in the just handled mirror can be repaired. |
| 1051 | */ | 1265 | */ |
| 1052 | static int scrub_recheck_block(struct btrfs_fs_info *fs_info, | 1266 | static void scrub_recheck_block(struct btrfs_fs_info *fs_info, |
| 1053 | struct scrub_block *sblock, int is_metadata, | 1267 | struct scrub_block *sblock, int is_metadata, |
| 1054 | int have_csum, u8 *csum, u64 generation, | 1268 | int have_csum, u8 *csum, u64 generation, |
| 1055 | u16 csum_size) | 1269 | u16 csum_size) |
| 1056 | { | 1270 | { |
| 1057 | int page_num; | 1271 | int page_num; |
| 1058 | 1272 | ||
| @@ -1062,8 +1276,7 @@ static int scrub_recheck_block(struct btrfs_fs_info *fs_info, | |||
| 1062 | 1276 | ||
| 1063 | for (page_num = 0; page_num < sblock->page_count; page_num++) { | 1277 | for (page_num = 0; page_num < sblock->page_count; page_num++) { |
| 1064 | struct bio *bio; | 1278 | struct bio *bio; |
| 1065 | int ret; | 1279 | struct scrub_page *page = sblock->pagev[page_num]; |
| 1066 | struct scrub_page *page = sblock->pagev + page_num; | ||
| 1067 | DECLARE_COMPLETION_ONSTACK(complete); | 1280 | DECLARE_COMPLETION_ONSTACK(complete); |
| 1068 | 1281 | ||
| 1069 | if (page->dev->bdev == NULL) { | 1282 | if (page->dev->bdev == NULL) { |
| @@ -1072,20 +1285,19 @@ static int scrub_recheck_block(struct btrfs_fs_info *fs_info, | |||
| 1072 | continue; | 1285 | continue; |
| 1073 | } | 1286 | } |
| 1074 | 1287 | ||
| 1075 | BUG_ON(!page->page); | 1288 | WARN_ON(!page->page); |
| 1076 | bio = bio_alloc(GFP_NOFS, 1); | 1289 | bio = bio_alloc(GFP_NOFS, 1); |
| 1077 | if (!bio) | 1290 | if (!bio) { |
| 1078 | return -EIO; | 1291 | page->io_error = 1; |
| 1292 | sblock->no_io_error_seen = 0; | ||
| 1293 | continue; | ||
| 1294 | } | ||
| 1079 | bio->bi_bdev = page->dev->bdev; | 1295 | bio->bi_bdev = page->dev->bdev; |
| 1080 | bio->bi_sector = page->physical >> 9; | 1296 | bio->bi_sector = page->physical >> 9; |
| 1081 | bio->bi_end_io = scrub_complete_bio_end_io; | 1297 | bio->bi_end_io = scrub_complete_bio_end_io; |
| 1082 | bio->bi_private = &complete; | 1298 | bio->bi_private = &complete; |
| 1083 | 1299 | ||
| 1084 | ret = bio_add_page(bio, page->page, PAGE_SIZE, 0); | 1300 | bio_add_page(bio, page->page, PAGE_SIZE, 0); |
| 1085 | if (PAGE_SIZE != ret) { | ||
| 1086 | bio_put(bio); | ||
| 1087 | return -EIO; | ||
| 1088 | } | ||
| 1089 | btrfsic_submit_bio(READ, bio); | 1301 | btrfsic_submit_bio(READ, bio); |
| 1090 | 1302 | ||
| 1091 | /* this will also unplug the queue */ | 1303 | /* this will also unplug the queue */ |
| @@ -1102,7 +1314,7 @@ static int scrub_recheck_block(struct btrfs_fs_info *fs_info, | |||
| 1102 | have_csum, csum, generation, | 1314 | have_csum, csum, generation, |
| 1103 | csum_size); | 1315 | csum_size); |
| 1104 | 1316 | ||
| 1105 | return 0; | 1317 | return; |
| 1106 | } | 1318 | } |
| 1107 | 1319 | ||
| 1108 | static void scrub_recheck_block_checksum(struct btrfs_fs_info *fs_info, | 1320 | static void scrub_recheck_block_checksum(struct btrfs_fs_info *fs_info, |
| @@ -1117,14 +1329,14 @@ static void scrub_recheck_block_checksum(struct btrfs_fs_info *fs_info, | |||
| 1117 | struct btrfs_root *root = fs_info->extent_root; | 1329 | struct btrfs_root *root = fs_info->extent_root; |
| 1118 | void *mapped_buffer; | 1330 | void *mapped_buffer; |
| 1119 | 1331 | ||
| 1120 | BUG_ON(!sblock->pagev[0].page); | 1332 | WARN_ON(!sblock->pagev[0]->page); |
| 1121 | if (is_metadata) { | 1333 | if (is_metadata) { |
| 1122 | struct btrfs_header *h; | 1334 | struct btrfs_header *h; |
| 1123 | 1335 | ||
| 1124 | mapped_buffer = kmap_atomic(sblock->pagev[0].page); | 1336 | mapped_buffer = kmap_atomic(sblock->pagev[0]->page); |
| 1125 | h = (struct btrfs_header *)mapped_buffer; | 1337 | h = (struct btrfs_header *)mapped_buffer; |
| 1126 | 1338 | ||
| 1127 | if (sblock->pagev[0].logical != le64_to_cpu(h->bytenr) || | 1339 | if (sblock->pagev[0]->logical != le64_to_cpu(h->bytenr) || |
| 1128 | memcmp(h->fsid, fs_info->fsid, BTRFS_UUID_SIZE) || | 1340 | memcmp(h->fsid, fs_info->fsid, BTRFS_UUID_SIZE) || |
| 1129 | memcmp(h->chunk_tree_uuid, fs_info->chunk_tree_uuid, | 1341 | memcmp(h->chunk_tree_uuid, fs_info->chunk_tree_uuid, |
| 1130 | BTRFS_UUID_SIZE)) { | 1342 | BTRFS_UUID_SIZE)) { |
| @@ -1138,7 +1350,7 @@ static void scrub_recheck_block_checksum(struct btrfs_fs_info *fs_info, | |||
| 1138 | if (!have_csum) | 1350 | if (!have_csum) |
| 1139 | return; | 1351 | return; |
| 1140 | 1352 | ||
| 1141 | mapped_buffer = kmap_atomic(sblock->pagev[0].page); | 1353 | mapped_buffer = kmap_atomic(sblock->pagev[0]->page); |
| 1142 | } | 1354 | } |
| 1143 | 1355 | ||
| 1144 | for (page_num = 0;;) { | 1356 | for (page_num = 0;;) { |
| @@ -1154,9 +1366,9 @@ static void scrub_recheck_block_checksum(struct btrfs_fs_info *fs_info, | |||
| 1154 | page_num++; | 1366 | page_num++; |
| 1155 | if (page_num >= sblock->page_count) | 1367 | if (page_num >= sblock->page_count) |
| 1156 | break; | 1368 | break; |
| 1157 | BUG_ON(!sblock->pagev[page_num].page); | 1369 | WARN_ON(!sblock->pagev[page_num]->page); |
| 1158 | 1370 | ||
| 1159 | mapped_buffer = kmap_atomic(sblock->pagev[page_num].page); | 1371 | mapped_buffer = kmap_atomic(sblock->pagev[page_num]->page); |
| 1160 | } | 1372 | } |
| 1161 | 1373 | ||
| 1162 | btrfs_csum_final(crc, calculated_csum); | 1374 | btrfs_csum_final(crc, calculated_csum); |
| @@ -1194,17 +1406,23 @@ static int scrub_repair_page_from_good_copy(struct scrub_block *sblock_bad, | |||
| 1194 | struct scrub_block *sblock_good, | 1406 | struct scrub_block *sblock_good, |
| 1195 | int page_num, int force_write) | 1407 | int page_num, int force_write) |
| 1196 | { | 1408 | { |
| 1197 | struct scrub_page *page_bad = sblock_bad->pagev + page_num; | 1409 | struct scrub_page *page_bad = sblock_bad->pagev[page_num]; |
| 1198 | struct scrub_page *page_good = sblock_good->pagev + page_num; | 1410 | struct scrub_page *page_good = sblock_good->pagev[page_num]; |
| 1199 | 1411 | ||
| 1200 | BUG_ON(sblock_bad->pagev[page_num].page == NULL); | 1412 | BUG_ON(page_bad->page == NULL); |
| 1201 | BUG_ON(sblock_good->pagev[page_num].page == NULL); | 1413 | BUG_ON(page_good->page == NULL); |
| 1202 | if (force_write || sblock_bad->header_error || | 1414 | if (force_write || sblock_bad->header_error || |
| 1203 | sblock_bad->checksum_error || page_bad->io_error) { | 1415 | sblock_bad->checksum_error || page_bad->io_error) { |
| 1204 | struct bio *bio; | 1416 | struct bio *bio; |
| 1205 | int ret; | 1417 | int ret; |
| 1206 | DECLARE_COMPLETION_ONSTACK(complete); | 1418 | DECLARE_COMPLETION_ONSTACK(complete); |
| 1207 | 1419 | ||
| 1420 | if (!page_bad->dev->bdev) { | ||
| 1421 | printk_ratelimited(KERN_WARNING | ||
| 1422 | "btrfs: scrub_repair_page_from_good_copy(bdev == NULL) is unexpected!\n"); | ||
| 1423 | return -EIO; | ||
| 1424 | } | ||
| 1425 | |||
| 1208 | bio = bio_alloc(GFP_NOFS, 1); | 1426 | bio = bio_alloc(GFP_NOFS, 1); |
| 1209 | if (!bio) | 1427 | if (!bio) |
| 1210 | return -EIO; | 1428 | return -EIO; |
| @@ -1225,6 +1443,9 @@ static int scrub_repair_page_from_good_copy(struct scrub_block *sblock_bad, | |||
| 1225 | if (!bio_flagged(bio, BIO_UPTODATE)) { | 1443 | if (!bio_flagged(bio, BIO_UPTODATE)) { |
| 1226 | btrfs_dev_stat_inc_and_print(page_bad->dev, | 1444 | btrfs_dev_stat_inc_and_print(page_bad->dev, |
| 1227 | BTRFS_DEV_STAT_WRITE_ERRS); | 1445 | BTRFS_DEV_STAT_WRITE_ERRS); |
| 1446 | btrfs_dev_replace_stats_inc( | ||
| 1447 | &sblock_bad->sctx->dev_root->fs_info-> | ||
| 1448 | dev_replace.num_write_errors); | ||
| 1228 | bio_put(bio); | 1449 | bio_put(bio); |
| 1229 | return -EIO; | 1450 | return -EIO; |
| 1230 | } | 1451 | } |
| @@ -1234,13 +1455,174 @@ static int scrub_repair_page_from_good_copy(struct scrub_block *sblock_bad, | |||
| 1234 | return 0; | 1455 | return 0; |
| 1235 | } | 1456 | } |
| 1236 | 1457 | ||
| 1237 | static void scrub_checksum(struct scrub_block *sblock) | 1458 | static void scrub_write_block_to_dev_replace(struct scrub_block *sblock) |
| 1459 | { | ||
| 1460 | int page_num; | ||
| 1461 | |||
| 1462 | for (page_num = 0; page_num < sblock->page_count; page_num++) { | ||
| 1463 | int ret; | ||
| 1464 | |||
| 1465 | ret = scrub_write_page_to_dev_replace(sblock, page_num); | ||
| 1466 | if (ret) | ||
| 1467 | btrfs_dev_replace_stats_inc( | ||
| 1468 | &sblock->sctx->dev_root->fs_info->dev_replace. | ||
| 1469 | num_write_errors); | ||
| 1470 | } | ||
| 1471 | } | ||
| 1472 | |||
| 1473 | static int scrub_write_page_to_dev_replace(struct scrub_block *sblock, | ||
| 1474 | int page_num) | ||
| 1475 | { | ||
| 1476 | struct scrub_page *spage = sblock->pagev[page_num]; | ||
| 1477 | |||
| 1478 | BUG_ON(spage->page == NULL); | ||
| 1479 | if (spage->io_error) { | ||
| 1480 | void *mapped_buffer = kmap_atomic(spage->page); | ||
| 1481 | |||
| 1482 | memset(mapped_buffer, 0, PAGE_CACHE_SIZE); | ||
| 1483 | flush_dcache_page(spage->page); | ||
| 1484 | kunmap_atomic(mapped_buffer); | ||
| 1485 | } | ||
| 1486 | return scrub_add_page_to_wr_bio(sblock->sctx, spage); | ||
| 1487 | } | ||
| 1488 | |||
| 1489 | static int scrub_add_page_to_wr_bio(struct scrub_ctx *sctx, | ||
| 1490 | struct scrub_page *spage) | ||
| 1491 | { | ||
| 1492 | struct scrub_wr_ctx *wr_ctx = &sctx->wr_ctx; | ||
| 1493 | struct scrub_bio *sbio; | ||
| 1494 | int ret; | ||
| 1495 | |||
| 1496 | mutex_lock(&wr_ctx->wr_lock); | ||
| 1497 | again: | ||
| 1498 | if (!wr_ctx->wr_curr_bio) { | ||
| 1499 | wr_ctx->wr_curr_bio = kzalloc(sizeof(*wr_ctx->wr_curr_bio), | ||
| 1500 | GFP_NOFS); | ||
| 1501 | if (!wr_ctx->wr_curr_bio) { | ||
| 1502 | mutex_unlock(&wr_ctx->wr_lock); | ||
| 1503 | return -ENOMEM; | ||
| 1504 | } | ||
| 1505 | wr_ctx->wr_curr_bio->sctx = sctx; | ||
| 1506 | wr_ctx->wr_curr_bio->page_count = 0; | ||
| 1507 | } | ||
| 1508 | sbio = wr_ctx->wr_curr_bio; | ||
| 1509 | if (sbio->page_count == 0) { | ||
| 1510 | struct bio *bio; | ||
| 1511 | |||
| 1512 | sbio->physical = spage->physical_for_dev_replace; | ||
| 1513 | sbio->logical = spage->logical; | ||
| 1514 | sbio->dev = wr_ctx->tgtdev; | ||
| 1515 | bio = sbio->bio; | ||
| 1516 | if (!bio) { | ||
| 1517 | bio = bio_alloc(GFP_NOFS, wr_ctx->pages_per_wr_bio); | ||
| 1518 | if (!bio) { | ||
| 1519 | mutex_unlock(&wr_ctx->wr_lock); | ||
| 1520 | return -ENOMEM; | ||
| 1521 | } | ||
| 1522 | sbio->bio = bio; | ||
| 1523 | } | ||
| 1524 | |||
| 1525 | bio->bi_private = sbio; | ||
| 1526 | bio->bi_end_io = scrub_wr_bio_end_io; | ||
| 1527 | bio->bi_bdev = sbio->dev->bdev; | ||
| 1528 | bio->bi_sector = sbio->physical >> 9; | ||
| 1529 | sbio->err = 0; | ||
| 1530 | } else if (sbio->physical + sbio->page_count * PAGE_SIZE != | ||
| 1531 | spage->physical_for_dev_replace || | ||
| 1532 | sbio->logical + sbio->page_count * PAGE_SIZE != | ||
| 1533 | spage->logical) { | ||
| 1534 | scrub_wr_submit(sctx); | ||
| 1535 | goto again; | ||
| 1536 | } | ||
| 1537 | |||
| 1538 | ret = bio_add_page(sbio->bio, spage->page, PAGE_SIZE, 0); | ||
| 1539 | if (ret != PAGE_SIZE) { | ||
| 1540 | if (sbio->page_count < 1) { | ||
| 1541 | bio_put(sbio->bio); | ||
| 1542 | sbio->bio = NULL; | ||
| 1543 | mutex_unlock(&wr_ctx->wr_lock); | ||
| 1544 | return -EIO; | ||
| 1545 | } | ||
| 1546 | scrub_wr_submit(sctx); | ||
| 1547 | goto again; | ||
| 1548 | } | ||
| 1549 | |||
| 1550 | sbio->pagev[sbio->page_count] = spage; | ||
| 1551 | scrub_page_get(spage); | ||
| 1552 | sbio->page_count++; | ||
| 1553 | if (sbio->page_count == wr_ctx->pages_per_wr_bio) | ||
| 1554 | scrub_wr_submit(sctx); | ||
| 1555 | mutex_unlock(&wr_ctx->wr_lock); | ||
| 1556 | |||
| 1557 | return 0; | ||
| 1558 | } | ||
| 1559 | |||
| 1560 | static void scrub_wr_submit(struct scrub_ctx *sctx) | ||
| 1561 | { | ||
| 1562 | struct scrub_wr_ctx *wr_ctx = &sctx->wr_ctx; | ||
| 1563 | struct scrub_bio *sbio; | ||
| 1564 | |||
| 1565 | if (!wr_ctx->wr_curr_bio) | ||
| 1566 | return; | ||
| 1567 | |||
| 1568 | sbio = wr_ctx->wr_curr_bio; | ||
| 1569 | wr_ctx->wr_curr_bio = NULL; | ||
| 1570 | WARN_ON(!sbio->bio->bi_bdev); | ||
| 1571 | scrub_pending_bio_inc(sctx); | ||
| 1572 | /* process all writes in a single worker thread. Then the block layer | ||
| 1573 | * orders the requests before sending them to the driver which | ||
| 1574 | * doubled the write performance on spinning disks when measured | ||
| 1575 | * with Linux 3.5 */ | ||
| 1576 | btrfsic_submit_bio(WRITE, sbio->bio); | ||
| 1577 | } | ||
| 1578 | |||
| 1579 | static void scrub_wr_bio_end_io(struct bio *bio, int err) | ||
| 1580 | { | ||
| 1581 | struct scrub_bio *sbio = bio->bi_private; | ||
| 1582 | struct btrfs_fs_info *fs_info = sbio->dev->dev_root->fs_info; | ||
| 1583 | |||
| 1584 | sbio->err = err; | ||
| 1585 | sbio->bio = bio; | ||
| 1586 | |||
| 1587 | sbio->work.func = scrub_wr_bio_end_io_worker; | ||
| 1588 | btrfs_queue_worker(&fs_info->scrub_wr_completion_workers, &sbio->work); | ||
| 1589 | } | ||
| 1590 | |||
| 1591 | static void scrub_wr_bio_end_io_worker(struct btrfs_work *work) | ||
| 1592 | { | ||
| 1593 | struct scrub_bio *sbio = container_of(work, struct scrub_bio, work); | ||
| 1594 | struct scrub_ctx *sctx = sbio->sctx; | ||
| 1595 | int i; | ||
| 1596 | |||
| 1597 | WARN_ON(sbio->page_count > SCRUB_PAGES_PER_WR_BIO); | ||
| 1598 | if (sbio->err) { | ||
| 1599 | struct btrfs_dev_replace *dev_replace = | ||
| 1600 | &sbio->sctx->dev_root->fs_info->dev_replace; | ||
| 1601 | |||
| 1602 | for (i = 0; i < sbio->page_count; i++) { | ||
| 1603 | struct scrub_page *spage = sbio->pagev[i]; | ||
| 1604 | |||
| 1605 | spage->io_error = 1; | ||
| 1606 | btrfs_dev_replace_stats_inc(&dev_replace-> | ||
| 1607 | num_write_errors); | ||
| 1608 | } | ||
| 1609 | } | ||
| 1610 | |||
| 1611 | for (i = 0; i < sbio->page_count; i++) | ||
| 1612 | scrub_page_put(sbio->pagev[i]); | ||
| 1613 | |||
| 1614 | bio_put(sbio->bio); | ||
| 1615 | kfree(sbio); | ||
| 1616 | scrub_pending_bio_dec(sctx); | ||
| 1617 | } | ||
| 1618 | |||
| 1619 | static int scrub_checksum(struct scrub_block *sblock) | ||
| 1238 | { | 1620 | { |
| 1239 | u64 flags; | 1621 | u64 flags; |
| 1240 | int ret; | 1622 | int ret; |
| 1241 | 1623 | ||
| 1242 | BUG_ON(sblock->page_count < 1); | 1624 | WARN_ON(sblock->page_count < 1); |
| 1243 | flags = sblock->pagev[0].flags; | 1625 | flags = sblock->pagev[0]->flags; |
| 1244 | ret = 0; | 1626 | ret = 0; |
| 1245 | if (flags & BTRFS_EXTENT_FLAG_DATA) | 1627 | if (flags & BTRFS_EXTENT_FLAG_DATA) |
| 1246 | ret = scrub_checksum_data(sblock); | 1628 | ret = scrub_checksum_data(sblock); |
| @@ -1252,30 +1634,32 @@ static void scrub_checksum(struct scrub_block *sblock) | |||
| 1252 | WARN_ON(1); | 1634 | WARN_ON(1); |
| 1253 | if (ret) | 1635 | if (ret) |
| 1254 | scrub_handle_errored_block(sblock); | 1636 | scrub_handle_errored_block(sblock); |
| 1637 | |||
| 1638 | return ret; | ||
| 1255 | } | 1639 | } |
| 1256 | 1640 | ||
| 1257 | static int scrub_checksum_data(struct scrub_block *sblock) | 1641 | static int scrub_checksum_data(struct scrub_block *sblock) |
| 1258 | { | 1642 | { |
| 1259 | struct scrub_dev *sdev = sblock->sdev; | 1643 | struct scrub_ctx *sctx = sblock->sctx; |
| 1260 | u8 csum[BTRFS_CSUM_SIZE]; | 1644 | u8 csum[BTRFS_CSUM_SIZE]; |
| 1261 | u8 *on_disk_csum; | 1645 | u8 *on_disk_csum; |
| 1262 | struct page *page; | 1646 | struct page *page; |
| 1263 | void *buffer; | 1647 | void *buffer; |
| 1264 | u32 crc = ~(u32)0; | 1648 | u32 crc = ~(u32)0; |
| 1265 | int fail = 0; | 1649 | int fail = 0; |
| 1266 | struct btrfs_root *root = sdev->dev->dev_root; | 1650 | struct btrfs_root *root = sctx->dev_root; |
| 1267 | u64 len; | 1651 | u64 len; |
| 1268 | int index; | 1652 | int index; |
| 1269 | 1653 | ||
| 1270 | BUG_ON(sblock->page_count < 1); | 1654 | BUG_ON(sblock->page_count < 1); |
| 1271 | if (!sblock->pagev[0].have_csum) | 1655 | if (!sblock->pagev[0]->have_csum) |
| 1272 | return 0; | 1656 | return 0; |
| 1273 | 1657 | ||
| 1274 | on_disk_csum = sblock->pagev[0].csum; | 1658 | on_disk_csum = sblock->pagev[0]->csum; |
| 1275 | page = sblock->pagev[0].page; | 1659 | page = sblock->pagev[0]->page; |
| 1276 | buffer = kmap_atomic(page); | 1660 | buffer = kmap_atomic(page); |
| 1277 | 1661 | ||
| 1278 | len = sdev->sectorsize; | 1662 | len = sctx->sectorsize; |
| 1279 | index = 0; | 1663 | index = 0; |
| 1280 | for (;;) { | 1664 | for (;;) { |
| 1281 | u64 l = min_t(u64, len, PAGE_SIZE); | 1665 | u64 l = min_t(u64, len, PAGE_SIZE); |
| @@ -1287,13 +1671,13 @@ static int scrub_checksum_data(struct scrub_block *sblock) | |||
| 1287 | break; | 1671 | break; |
| 1288 | index++; | 1672 | index++; |
| 1289 | BUG_ON(index >= sblock->page_count); | 1673 | BUG_ON(index >= sblock->page_count); |
| 1290 | BUG_ON(!sblock->pagev[index].page); | 1674 | BUG_ON(!sblock->pagev[index]->page); |
| 1291 | page = sblock->pagev[index].page; | 1675 | page = sblock->pagev[index]->page; |
| 1292 | buffer = kmap_atomic(page); | 1676 | buffer = kmap_atomic(page); |
| 1293 | } | 1677 | } |
| 1294 | 1678 | ||
| 1295 | btrfs_csum_final(crc, csum); | 1679 | btrfs_csum_final(crc, csum); |
| 1296 | if (memcmp(csum, on_disk_csum, sdev->csum_size)) | 1680 | if (memcmp(csum, on_disk_csum, sctx->csum_size)) |
| 1297 | fail = 1; | 1681 | fail = 1; |
| 1298 | 1682 | ||
| 1299 | return fail; | 1683 | return fail; |
| @@ -1301,9 +1685,9 @@ static int scrub_checksum_data(struct scrub_block *sblock) | |||
| 1301 | 1685 | ||
| 1302 | static int scrub_checksum_tree_block(struct scrub_block *sblock) | 1686 | static int scrub_checksum_tree_block(struct scrub_block *sblock) |
| 1303 | { | 1687 | { |
| 1304 | struct scrub_dev *sdev = sblock->sdev; | 1688 | struct scrub_ctx *sctx = sblock->sctx; |
| 1305 | struct btrfs_header *h; | 1689 | struct btrfs_header *h; |
| 1306 | struct btrfs_root *root = sdev->dev->dev_root; | 1690 | struct btrfs_root *root = sctx->dev_root; |
| 1307 | struct btrfs_fs_info *fs_info = root->fs_info; | 1691 | struct btrfs_fs_info *fs_info = root->fs_info; |
| 1308 | u8 calculated_csum[BTRFS_CSUM_SIZE]; | 1692 | u8 calculated_csum[BTRFS_CSUM_SIZE]; |
| 1309 | u8 on_disk_csum[BTRFS_CSUM_SIZE]; | 1693 | u8 on_disk_csum[BTRFS_CSUM_SIZE]; |
| @@ -1318,10 +1702,10 @@ static int scrub_checksum_tree_block(struct scrub_block *sblock) | |||
| 1318 | int index; | 1702 | int index; |
| 1319 | 1703 | ||
| 1320 | BUG_ON(sblock->page_count < 1); | 1704 | BUG_ON(sblock->page_count < 1); |
| 1321 | page = sblock->pagev[0].page; | 1705 | page = sblock->pagev[0]->page; |
| 1322 | mapped_buffer = kmap_atomic(page); | 1706 | mapped_buffer = kmap_atomic(page); |
| 1323 | h = (struct btrfs_header *)mapped_buffer; | 1707 | h = (struct btrfs_header *)mapped_buffer; |
| 1324 | memcpy(on_disk_csum, h->csum, sdev->csum_size); | 1708 | memcpy(on_disk_csum, h->csum, sctx->csum_size); |
| 1325 | 1709 | ||
| 1326 | /* | 1710 | /* |
| 1327 | * we don't use the getter functions here, as we | 1711 | * we don't use the getter functions here, as we |
| @@ -1329,10 +1713,10 @@ static int scrub_checksum_tree_block(struct scrub_block *sblock) | |||
| 1329 | * b) the page is already kmapped | 1713 | * b) the page is already kmapped |
| 1330 | */ | 1714 | */ |
| 1331 | 1715 | ||
| 1332 | if (sblock->pagev[0].logical != le64_to_cpu(h->bytenr)) | 1716 | if (sblock->pagev[0]->logical != le64_to_cpu(h->bytenr)) |
| 1333 | ++fail; | 1717 | ++fail; |
| 1334 | 1718 | ||
| 1335 | if (sblock->pagev[0].generation != le64_to_cpu(h->generation)) | 1719 | if (sblock->pagev[0]->generation != le64_to_cpu(h->generation)) |
| 1336 | ++fail; | 1720 | ++fail; |
| 1337 | 1721 | ||
| 1338 | if (memcmp(h->fsid, fs_info->fsid, BTRFS_UUID_SIZE)) | 1722 | if (memcmp(h->fsid, fs_info->fsid, BTRFS_UUID_SIZE)) |
| @@ -1342,8 +1726,8 @@ static int scrub_checksum_tree_block(struct scrub_block *sblock) | |||
| 1342 | BTRFS_UUID_SIZE)) | 1726 | BTRFS_UUID_SIZE)) |
| 1343 | ++fail; | 1727 | ++fail; |
| 1344 | 1728 | ||
| 1345 | BUG_ON(sdev->nodesize != sdev->leafsize); | 1729 | WARN_ON(sctx->nodesize != sctx->leafsize); |
| 1346 | len = sdev->nodesize - BTRFS_CSUM_SIZE; | 1730 | len = sctx->nodesize - BTRFS_CSUM_SIZE; |
| 1347 | mapped_size = PAGE_SIZE - BTRFS_CSUM_SIZE; | 1731 | mapped_size = PAGE_SIZE - BTRFS_CSUM_SIZE; |
| 1348 | p = ((u8 *)mapped_buffer) + BTRFS_CSUM_SIZE; | 1732 | p = ((u8 *)mapped_buffer) + BTRFS_CSUM_SIZE; |
| 1349 | index = 0; | 1733 | index = 0; |
| @@ -1357,15 +1741,15 @@ static int scrub_checksum_tree_block(struct scrub_block *sblock) | |||
| 1357 | break; | 1741 | break; |
| 1358 | index++; | 1742 | index++; |
| 1359 | BUG_ON(index >= sblock->page_count); | 1743 | BUG_ON(index >= sblock->page_count); |
| 1360 | BUG_ON(!sblock->pagev[index].page); | 1744 | BUG_ON(!sblock->pagev[index]->page); |
| 1361 | page = sblock->pagev[index].page; | 1745 | page = sblock->pagev[index]->page; |
| 1362 | mapped_buffer = kmap_atomic(page); | 1746 | mapped_buffer = kmap_atomic(page); |
| 1363 | mapped_size = PAGE_SIZE; | 1747 | mapped_size = PAGE_SIZE; |
| 1364 | p = mapped_buffer; | 1748 | p = mapped_buffer; |
| 1365 | } | 1749 | } |
| 1366 | 1750 | ||
| 1367 | btrfs_csum_final(crc, calculated_csum); | 1751 | btrfs_csum_final(crc, calculated_csum); |
| 1368 | if (memcmp(calculated_csum, on_disk_csum, sdev->csum_size)) | 1752 | if (memcmp(calculated_csum, on_disk_csum, sctx->csum_size)) |
| 1369 | ++crc_fail; | 1753 | ++crc_fail; |
| 1370 | 1754 | ||
| 1371 | return fail || crc_fail; | 1755 | return fail || crc_fail; |
| @@ -1374,8 +1758,8 @@ static int scrub_checksum_tree_block(struct scrub_block *sblock) | |||
| 1374 | static int scrub_checksum_super(struct scrub_block *sblock) | 1758 | static int scrub_checksum_super(struct scrub_block *sblock) |
| 1375 | { | 1759 | { |
| 1376 | struct btrfs_super_block *s; | 1760 | struct btrfs_super_block *s; |
| 1377 | struct scrub_dev *sdev = sblock->sdev; | 1761 | struct scrub_ctx *sctx = sblock->sctx; |
| 1378 | struct btrfs_root *root = sdev->dev->dev_root; | 1762 | struct btrfs_root *root = sctx->dev_root; |
| 1379 | struct btrfs_fs_info *fs_info = root->fs_info; | 1763 | struct btrfs_fs_info *fs_info = root->fs_info; |
| 1380 | u8 calculated_csum[BTRFS_CSUM_SIZE]; | 1764 | u8 calculated_csum[BTRFS_CSUM_SIZE]; |
| 1381 | u8 on_disk_csum[BTRFS_CSUM_SIZE]; | 1765 | u8 on_disk_csum[BTRFS_CSUM_SIZE]; |
| @@ -1390,15 +1774,15 @@ static int scrub_checksum_super(struct scrub_block *sblock) | |||
| 1390 | int index; | 1774 | int index; |
| 1391 | 1775 | ||
| 1392 | BUG_ON(sblock->page_count < 1); | 1776 | BUG_ON(sblock->page_count < 1); |
| 1393 | page = sblock->pagev[0].page; | 1777 | page = sblock->pagev[0]->page; |
| 1394 | mapped_buffer = kmap_atomic(page); | 1778 | mapped_buffer = kmap_atomic(page); |
| 1395 | s = (struct btrfs_super_block *)mapped_buffer; | 1779 | s = (struct btrfs_super_block *)mapped_buffer; |
| 1396 | memcpy(on_disk_csum, s->csum, sdev->csum_size); | 1780 | memcpy(on_disk_csum, s->csum, sctx->csum_size); |
| 1397 | 1781 | ||
| 1398 | if (sblock->pagev[0].logical != le64_to_cpu(s->bytenr)) | 1782 | if (sblock->pagev[0]->logical != le64_to_cpu(s->bytenr)) |
| 1399 | ++fail_cor; | 1783 | ++fail_cor; |
| 1400 | 1784 | ||
| 1401 | if (sblock->pagev[0].generation != le64_to_cpu(s->generation)) | 1785 | if (sblock->pagev[0]->generation != le64_to_cpu(s->generation)) |
| 1402 | ++fail_gen; | 1786 | ++fail_gen; |
| 1403 | 1787 | ||
| 1404 | if (memcmp(s->fsid, fs_info->fsid, BTRFS_UUID_SIZE)) | 1788 | if (memcmp(s->fsid, fs_info->fsid, BTRFS_UUID_SIZE)) |
| @@ -1418,15 +1802,15 @@ static int scrub_checksum_super(struct scrub_block *sblock) | |||
| 1418 | break; | 1802 | break; |
| 1419 | index++; | 1803 | index++; |
| 1420 | BUG_ON(index >= sblock->page_count); | 1804 | BUG_ON(index >= sblock->page_count); |
| 1421 | BUG_ON(!sblock->pagev[index].page); | 1805 | BUG_ON(!sblock->pagev[index]->page); |
| 1422 | page = sblock->pagev[index].page; | 1806 | page = sblock->pagev[index]->page; |
| 1423 | mapped_buffer = kmap_atomic(page); | 1807 | mapped_buffer = kmap_atomic(page); |
| 1424 | mapped_size = PAGE_SIZE; | 1808 | mapped_size = PAGE_SIZE; |
| 1425 | p = mapped_buffer; | 1809 | p = mapped_buffer; |
| 1426 | } | 1810 | } |
| 1427 | 1811 | ||
| 1428 | btrfs_csum_final(crc, calculated_csum); | 1812 | btrfs_csum_final(crc, calculated_csum); |
| 1429 | if (memcmp(calculated_csum, on_disk_csum, sdev->csum_size)) | 1813 | if (memcmp(calculated_csum, on_disk_csum, sctx->csum_size)) |
| 1430 | ++fail_cor; | 1814 | ++fail_cor; |
| 1431 | 1815 | ||
| 1432 | if (fail_cor + fail_gen) { | 1816 | if (fail_cor + fail_gen) { |
| @@ -1435,14 +1819,14 @@ static int scrub_checksum_super(struct scrub_block *sblock) | |||
| 1435 | * They will get written with the next transaction commit | 1819 | * They will get written with the next transaction commit |
| 1436 | * anyway | 1820 | * anyway |
| 1437 | */ | 1821 | */ |
| 1438 | spin_lock(&sdev->stat_lock); | 1822 | spin_lock(&sctx->stat_lock); |
| 1439 | ++sdev->stat.super_errors; | 1823 | ++sctx->stat.super_errors; |
| 1440 | spin_unlock(&sdev->stat_lock); | 1824 | spin_unlock(&sctx->stat_lock); |
| 1441 | if (fail_cor) | 1825 | if (fail_cor) |
| 1442 | btrfs_dev_stat_inc_and_print(sdev->dev, | 1826 | btrfs_dev_stat_inc_and_print(sblock->pagev[0]->dev, |
| 1443 | BTRFS_DEV_STAT_CORRUPTION_ERRS); | 1827 | BTRFS_DEV_STAT_CORRUPTION_ERRS); |
| 1444 | else | 1828 | else |
| 1445 | btrfs_dev_stat_inc_and_print(sdev->dev, | 1829 | btrfs_dev_stat_inc_and_print(sblock->pagev[0]->dev, |
| 1446 | BTRFS_DEV_STAT_GENERATION_ERRS); | 1830 | BTRFS_DEV_STAT_GENERATION_ERRS); |
| 1447 | } | 1831 | } |
| 1448 | 1832 | ||
| @@ -1460,28 +1844,54 @@ static void scrub_block_put(struct scrub_block *sblock) | |||
| 1460 | int i; | 1844 | int i; |
| 1461 | 1845 | ||
| 1462 | for (i = 0; i < sblock->page_count; i++) | 1846 | for (i = 0; i < sblock->page_count; i++) |
| 1463 | if (sblock->pagev[i].page) | 1847 | scrub_page_put(sblock->pagev[i]); |
| 1464 | __free_page(sblock->pagev[i].page); | ||
| 1465 | kfree(sblock); | 1848 | kfree(sblock); |
| 1466 | } | 1849 | } |
| 1467 | } | 1850 | } |
| 1468 | 1851 | ||
| 1469 | static void scrub_submit(struct scrub_dev *sdev) | 1852 | static void scrub_page_get(struct scrub_page *spage) |
| 1853 | { | ||
| 1854 | atomic_inc(&spage->ref_count); | ||
| 1855 | } | ||
| 1856 | |||
| 1857 | static void scrub_page_put(struct scrub_page *spage) | ||
| 1858 | { | ||
| 1859 | if (atomic_dec_and_test(&spage->ref_count)) { | ||
| 1860 | if (spage->page) | ||
| 1861 | __free_page(spage->page); | ||
| 1862 | kfree(spage); | ||
| 1863 | } | ||
| 1864 | } | ||
| 1865 | |||
| 1866 | static void scrub_submit(struct scrub_ctx *sctx) | ||
| 1470 | { | 1867 | { |
| 1471 | struct scrub_bio *sbio; | 1868 | struct scrub_bio *sbio; |
| 1472 | 1869 | ||
| 1473 | if (sdev->curr == -1) | 1870 | if (sctx->curr == -1) |
| 1474 | return; | 1871 | return; |
| 1475 | 1872 | ||
| 1476 | sbio = sdev->bios[sdev->curr]; | 1873 | sbio = sctx->bios[sctx->curr]; |
| 1477 | sdev->curr = -1; | 1874 | sctx->curr = -1; |
| 1478 | atomic_inc(&sdev->in_flight); | 1875 | scrub_pending_bio_inc(sctx); |
| 1479 | 1876 | ||
| 1480 | btrfsic_submit_bio(READ, sbio->bio); | 1877 | if (!sbio->bio->bi_bdev) { |
| 1878 | /* | ||
| 1879 | * this case should not happen. If btrfs_map_block() is | ||
| 1880 | * wrong, it could happen for dev-replace operations on | ||
| 1881 | * missing devices when no mirrors are available, but in | ||
| 1882 | * this case it should already fail the mount. | ||
| 1883 | * This case is handled correctly (but _very_ slowly). | ||
| 1884 | */ | ||
| 1885 | printk_ratelimited(KERN_WARNING | ||
| 1886 | "btrfs: scrub_submit(bio bdev == NULL) is unexpected!\n"); | ||
| 1887 | bio_endio(sbio->bio, -EIO); | ||
| 1888 | } else { | ||
| 1889 | btrfsic_submit_bio(READ, sbio->bio); | ||
| 1890 | } | ||
| 1481 | } | 1891 | } |
| 1482 | 1892 | ||
| 1483 | static int scrub_add_page_to_bio(struct scrub_dev *sdev, | 1893 | static int scrub_add_page_to_rd_bio(struct scrub_ctx *sctx, |
| 1484 | struct scrub_page *spage) | 1894 | struct scrub_page *spage) |
| 1485 | { | 1895 | { |
| 1486 | struct scrub_block *sblock = spage->sblock; | 1896 | struct scrub_block *sblock = spage->sblock; |
| 1487 | struct scrub_bio *sbio; | 1897 | struct scrub_bio *sbio; |
| @@ -1491,28 +1901,29 @@ again: | |||
| 1491 | /* | 1901 | /* |
| 1492 | * grab a fresh bio or wait for one to become available | 1902 | * grab a fresh bio or wait for one to become available |
| 1493 | */ | 1903 | */ |
| 1494 | while (sdev->curr == -1) { | 1904 | while (sctx->curr == -1) { |
| 1495 | spin_lock(&sdev->list_lock); | 1905 | spin_lock(&sctx->list_lock); |
| 1496 | sdev->curr = sdev->first_free; | 1906 | sctx->curr = sctx->first_free; |
| 1497 | if (sdev->curr != -1) { | 1907 | if (sctx->curr != -1) { |
| 1498 | sdev->first_free = sdev->bios[sdev->curr]->next_free; | 1908 | sctx->first_free = sctx->bios[sctx->curr]->next_free; |
| 1499 | sdev->bios[sdev->curr]->next_free = -1; | 1909 | sctx->bios[sctx->curr]->next_free = -1; |
| 1500 | sdev->bios[sdev->curr]->page_count = 0; | 1910 | sctx->bios[sctx->curr]->page_count = 0; |
| 1501 | spin_unlock(&sdev->list_lock); | 1911 | spin_unlock(&sctx->list_lock); |
| 1502 | } else { | 1912 | } else { |
| 1503 | spin_unlock(&sdev->list_lock); | 1913 | spin_unlock(&sctx->list_lock); |
| 1504 | wait_event(sdev->list_wait, sdev->first_free != -1); | 1914 | wait_event(sctx->list_wait, sctx->first_free != -1); |
| 1505 | } | 1915 | } |
| 1506 | } | 1916 | } |
| 1507 | sbio = sdev->bios[sdev->curr]; | 1917 | sbio = sctx->bios[sctx->curr]; |
| 1508 | if (sbio->page_count == 0) { | 1918 | if (sbio->page_count == 0) { |
| 1509 | struct bio *bio; | 1919 | struct bio *bio; |
| 1510 | 1920 | ||
| 1511 | sbio->physical = spage->physical; | 1921 | sbio->physical = spage->physical; |
| 1512 | sbio->logical = spage->logical; | 1922 | sbio->logical = spage->logical; |
| 1923 | sbio->dev = spage->dev; | ||
| 1513 | bio = sbio->bio; | 1924 | bio = sbio->bio; |
| 1514 | if (!bio) { | 1925 | if (!bio) { |
| 1515 | bio = bio_alloc(GFP_NOFS, sdev->pages_per_bio); | 1926 | bio = bio_alloc(GFP_NOFS, sctx->pages_per_rd_bio); |
| 1516 | if (!bio) | 1927 | if (!bio) |
| 1517 | return -ENOMEM; | 1928 | return -ENOMEM; |
| 1518 | sbio->bio = bio; | 1929 | sbio->bio = bio; |
| @@ -1520,14 +1931,15 @@ again: | |||
| 1520 | 1931 | ||
| 1521 | bio->bi_private = sbio; | 1932 | bio->bi_private = sbio; |
| 1522 | bio->bi_end_io = scrub_bio_end_io; | 1933 | bio->bi_end_io = scrub_bio_end_io; |
| 1523 | bio->bi_bdev = sdev->dev->bdev; | 1934 | bio->bi_bdev = sbio->dev->bdev; |
| 1524 | bio->bi_sector = spage->physical >> 9; | 1935 | bio->bi_sector = sbio->physical >> 9; |
| 1525 | sbio->err = 0; | 1936 | sbio->err = 0; |
| 1526 | } else if (sbio->physical + sbio->page_count * PAGE_SIZE != | 1937 | } else if (sbio->physical + sbio->page_count * PAGE_SIZE != |
| 1527 | spage->physical || | 1938 | spage->physical || |
| 1528 | sbio->logical + sbio->page_count * PAGE_SIZE != | 1939 | sbio->logical + sbio->page_count * PAGE_SIZE != |
| 1529 | spage->logical) { | 1940 | spage->logical || |
| 1530 | scrub_submit(sdev); | 1941 | sbio->dev != spage->dev) { |
| 1942 | scrub_submit(sctx); | ||
| 1531 | goto again; | 1943 | goto again; |
| 1532 | } | 1944 | } |
| 1533 | 1945 | ||
| @@ -1539,81 +1951,87 @@ again: | |||
| 1539 | sbio->bio = NULL; | 1951 | sbio->bio = NULL; |
| 1540 | return -EIO; | 1952 | return -EIO; |
| 1541 | } | 1953 | } |
| 1542 | scrub_submit(sdev); | 1954 | scrub_submit(sctx); |
| 1543 | goto again; | 1955 | goto again; |
| 1544 | } | 1956 | } |
| 1545 | 1957 | ||
| 1546 | scrub_block_get(sblock); /* one for the added page */ | 1958 | scrub_block_get(sblock); /* one for the page added to the bio */ |
| 1547 | atomic_inc(&sblock->outstanding_pages); | 1959 | atomic_inc(&sblock->outstanding_pages); |
| 1548 | sbio->page_count++; | 1960 | sbio->page_count++; |
| 1549 | if (sbio->page_count == sdev->pages_per_bio) | 1961 | if (sbio->page_count == sctx->pages_per_rd_bio) |
| 1550 | scrub_submit(sdev); | 1962 | scrub_submit(sctx); |
| 1551 | 1963 | ||
| 1552 | return 0; | 1964 | return 0; |
| 1553 | } | 1965 | } |
| 1554 | 1966 | ||
| 1555 | static int scrub_pages(struct scrub_dev *sdev, u64 logical, u64 len, | 1967 | static int scrub_pages(struct scrub_ctx *sctx, u64 logical, u64 len, |
| 1556 | u64 physical, u64 flags, u64 gen, int mirror_num, | 1968 | u64 physical, struct btrfs_device *dev, u64 flags, |
| 1557 | u8 *csum, int force) | 1969 | u64 gen, int mirror_num, u8 *csum, int force, |
| 1970 | u64 physical_for_dev_replace) | ||
| 1558 | { | 1971 | { |
| 1559 | struct scrub_block *sblock; | 1972 | struct scrub_block *sblock; |
| 1560 | int index; | 1973 | int index; |
| 1561 | 1974 | ||
| 1562 | sblock = kzalloc(sizeof(*sblock), GFP_NOFS); | 1975 | sblock = kzalloc(sizeof(*sblock), GFP_NOFS); |
| 1563 | if (!sblock) { | 1976 | if (!sblock) { |
| 1564 | spin_lock(&sdev->stat_lock); | 1977 | spin_lock(&sctx->stat_lock); |
| 1565 | sdev->stat.malloc_errors++; | 1978 | sctx->stat.malloc_errors++; |
| 1566 | spin_unlock(&sdev->stat_lock); | 1979 | spin_unlock(&sctx->stat_lock); |
| 1567 | return -ENOMEM; | 1980 | return -ENOMEM; |
| 1568 | } | 1981 | } |
| 1569 | 1982 | ||
| 1570 | /* one ref inside this function, plus one for each page later on */ | 1983 | /* one ref inside this function, plus one for each page added to |
| 1984 | * a bio later on */ | ||
| 1571 | atomic_set(&sblock->ref_count, 1); | 1985 | atomic_set(&sblock->ref_count, 1); |
| 1572 | sblock->sdev = sdev; | 1986 | sblock->sctx = sctx; |
| 1573 | sblock->no_io_error_seen = 1; | 1987 | sblock->no_io_error_seen = 1; |
| 1574 | 1988 | ||
| 1575 | for (index = 0; len > 0; index++) { | 1989 | for (index = 0; len > 0; index++) { |
| 1576 | struct scrub_page *spage = sblock->pagev + index; | 1990 | struct scrub_page *spage; |
| 1577 | u64 l = min_t(u64, len, PAGE_SIZE); | 1991 | u64 l = min_t(u64, len, PAGE_SIZE); |
| 1578 | 1992 | ||
| 1579 | BUG_ON(index >= SCRUB_MAX_PAGES_PER_BLOCK); | 1993 | spage = kzalloc(sizeof(*spage), GFP_NOFS); |
| 1580 | spage->page = alloc_page(GFP_NOFS); | 1994 | if (!spage) { |
| 1581 | if (!spage->page) { | 1995 | leave_nomem: |
| 1582 | spin_lock(&sdev->stat_lock); | 1996 | spin_lock(&sctx->stat_lock); |
| 1583 | sdev->stat.malloc_errors++; | 1997 | sctx->stat.malloc_errors++; |
| 1584 | spin_unlock(&sdev->stat_lock); | 1998 | spin_unlock(&sctx->stat_lock); |
| 1585 | while (index > 0) { | 1999 | scrub_block_put(sblock); |
| 1586 | index--; | ||
| 1587 | __free_page(sblock->pagev[index].page); | ||
| 1588 | } | ||
| 1589 | kfree(sblock); | ||
| 1590 | return -ENOMEM; | 2000 | return -ENOMEM; |
| 1591 | } | 2001 | } |
| 2002 | BUG_ON(index >= SCRUB_MAX_PAGES_PER_BLOCK); | ||
| 2003 | scrub_page_get(spage); | ||
| 2004 | sblock->pagev[index] = spage; | ||
| 1592 | spage->sblock = sblock; | 2005 | spage->sblock = sblock; |
| 1593 | spage->dev = sdev->dev; | 2006 | spage->dev = dev; |
| 1594 | spage->flags = flags; | 2007 | spage->flags = flags; |
| 1595 | spage->generation = gen; | 2008 | spage->generation = gen; |
| 1596 | spage->logical = logical; | 2009 | spage->logical = logical; |
| 1597 | spage->physical = physical; | 2010 | spage->physical = physical; |
| 2011 | spage->physical_for_dev_replace = physical_for_dev_replace; | ||
| 1598 | spage->mirror_num = mirror_num; | 2012 | spage->mirror_num = mirror_num; |
| 1599 | if (csum) { | 2013 | if (csum) { |
| 1600 | spage->have_csum = 1; | 2014 | spage->have_csum = 1; |
| 1601 | memcpy(spage->csum, csum, sdev->csum_size); | 2015 | memcpy(spage->csum, csum, sctx->csum_size); |
| 1602 | } else { | 2016 | } else { |
| 1603 | spage->have_csum = 0; | 2017 | spage->have_csum = 0; |
| 1604 | } | 2018 | } |
| 1605 | sblock->page_count++; | 2019 | sblock->page_count++; |
| 2020 | spage->page = alloc_page(GFP_NOFS); | ||
| 2021 | if (!spage->page) | ||
| 2022 | goto leave_nomem; | ||
| 1606 | len -= l; | 2023 | len -= l; |
| 1607 | logical += l; | 2024 | logical += l; |
| 1608 | physical += l; | 2025 | physical += l; |
| 2026 | physical_for_dev_replace += l; | ||
| 1609 | } | 2027 | } |
| 1610 | 2028 | ||
| 1611 | BUG_ON(sblock->page_count == 0); | 2029 | WARN_ON(sblock->page_count == 0); |
| 1612 | for (index = 0; index < sblock->page_count; index++) { | 2030 | for (index = 0; index < sblock->page_count; index++) { |
| 1613 | struct scrub_page *spage = sblock->pagev + index; | 2031 | struct scrub_page *spage = sblock->pagev[index]; |
| 1614 | int ret; | 2032 | int ret; |
| 1615 | 2033 | ||
| 1616 | ret = scrub_add_page_to_bio(sdev, spage); | 2034 | ret = scrub_add_page_to_rd_bio(sctx, spage); |
| 1617 | if (ret) { | 2035 | if (ret) { |
| 1618 | scrub_block_put(sblock); | 2036 | scrub_block_put(sblock); |
| 1619 | return ret; | 2037 | return ret; |
| @@ -1621,7 +2039,7 @@ static int scrub_pages(struct scrub_dev *sdev, u64 logical, u64 len, | |||
| 1621 | } | 2039 | } |
| 1622 | 2040 | ||
| 1623 | if (force) | 2041 | if (force) |
| 1624 | scrub_submit(sdev); | 2042 | scrub_submit(sctx); |
| 1625 | 2043 | ||
| 1626 | /* last one frees, either here or in bio completion for last page */ | 2044 | /* last one frees, either here or in bio completion for last page */ |
| 1627 | scrub_block_put(sblock); | 2045 | scrub_block_put(sblock); |
| @@ -1631,8 +2049,7 @@ static int scrub_pages(struct scrub_dev *sdev, u64 logical, u64 len, | |||
| 1631 | static void scrub_bio_end_io(struct bio *bio, int err) | 2049 | static void scrub_bio_end_io(struct bio *bio, int err) |
| 1632 | { | 2050 | { |
| 1633 | struct scrub_bio *sbio = bio->bi_private; | 2051 | struct scrub_bio *sbio = bio->bi_private; |
| 1634 | struct scrub_dev *sdev = sbio->sdev; | 2052 | struct btrfs_fs_info *fs_info = sbio->dev->dev_root->fs_info; |
| 1635 | struct btrfs_fs_info *fs_info = sdev->dev->dev_root->fs_info; | ||
| 1636 | 2053 | ||
| 1637 | sbio->err = err; | 2054 | sbio->err = err; |
| 1638 | sbio->bio = bio; | 2055 | sbio->bio = bio; |
| @@ -1643,10 +2060,10 @@ static void scrub_bio_end_io(struct bio *bio, int err) | |||
| 1643 | static void scrub_bio_end_io_worker(struct btrfs_work *work) | 2060 | static void scrub_bio_end_io_worker(struct btrfs_work *work) |
| 1644 | { | 2061 | { |
| 1645 | struct scrub_bio *sbio = container_of(work, struct scrub_bio, work); | 2062 | struct scrub_bio *sbio = container_of(work, struct scrub_bio, work); |
| 1646 | struct scrub_dev *sdev = sbio->sdev; | 2063 | struct scrub_ctx *sctx = sbio->sctx; |
| 1647 | int i; | 2064 | int i; |
| 1648 | 2065 | ||
| 1649 | BUG_ON(sbio->page_count > SCRUB_PAGES_PER_BIO); | 2066 | BUG_ON(sbio->page_count > SCRUB_PAGES_PER_RD_BIO); |
| 1650 | if (sbio->err) { | 2067 | if (sbio->err) { |
| 1651 | for (i = 0; i < sbio->page_count; i++) { | 2068 | for (i = 0; i < sbio->page_count; i++) { |
| 1652 | struct scrub_page *spage = sbio->pagev[i]; | 2069 | struct scrub_page *spage = sbio->pagev[i]; |
| @@ -1666,40 +2083,39 @@ static void scrub_bio_end_io_worker(struct btrfs_work *work) | |||
| 1666 | scrub_block_put(sblock); | 2083 | scrub_block_put(sblock); |
| 1667 | } | 2084 | } |
| 1668 | 2085 | ||
| 1669 | if (sbio->err) { | 2086 | bio_put(sbio->bio); |
| 1670 | /* what is this good for??? */ | 2087 | sbio->bio = NULL; |
| 1671 | sbio->bio->bi_flags &= ~(BIO_POOL_MASK - 1); | 2088 | spin_lock(&sctx->list_lock); |
| 1672 | sbio->bio->bi_flags |= 1 << BIO_UPTODATE; | 2089 | sbio->next_free = sctx->first_free; |
| 1673 | sbio->bio->bi_phys_segments = 0; | 2090 | sctx->first_free = sbio->index; |
| 1674 | sbio->bio->bi_idx = 0; | 2091 | spin_unlock(&sctx->list_lock); |
| 1675 | 2092 | ||
| 1676 | for (i = 0; i < sbio->page_count; i++) { | 2093 | if (sctx->is_dev_replace && |
| 1677 | struct bio_vec *bi; | 2094 | atomic_read(&sctx->wr_ctx.flush_all_writes)) { |
| 1678 | bi = &sbio->bio->bi_io_vec[i]; | 2095 | mutex_lock(&sctx->wr_ctx.wr_lock); |
| 1679 | bi->bv_offset = 0; | 2096 | scrub_wr_submit(sctx); |
| 1680 | bi->bv_len = PAGE_SIZE; | 2097 | mutex_unlock(&sctx->wr_ctx.wr_lock); |
| 1681 | } | ||
| 1682 | } | 2098 | } |
| 1683 | 2099 | ||
| 1684 | bio_put(sbio->bio); | 2100 | scrub_pending_bio_dec(sctx); |
| 1685 | sbio->bio = NULL; | ||
| 1686 | spin_lock(&sdev->list_lock); | ||
| 1687 | sbio->next_free = sdev->first_free; | ||
| 1688 | sdev->first_free = sbio->index; | ||
| 1689 | spin_unlock(&sdev->list_lock); | ||
| 1690 | atomic_dec(&sdev->in_flight); | ||
| 1691 | wake_up(&sdev->list_wait); | ||
| 1692 | } | 2101 | } |
| 1693 | 2102 | ||
| 1694 | static void scrub_block_complete(struct scrub_block *sblock) | 2103 | static void scrub_block_complete(struct scrub_block *sblock) |
| 1695 | { | 2104 | { |
| 1696 | if (!sblock->no_io_error_seen) | 2105 | if (!sblock->no_io_error_seen) { |
| 1697 | scrub_handle_errored_block(sblock); | 2106 | scrub_handle_errored_block(sblock); |
| 1698 | else | 2107 | } else { |
| 1699 | scrub_checksum(sblock); | 2108 | /* |
| 2109 | * if has checksum error, write via repair mechanism in | ||
| 2110 | * dev replace case, otherwise write here in dev replace | ||
| 2111 | * case. | ||
| 2112 | */ | ||
| 2113 | if (!scrub_checksum(sblock) && sblock->sctx->is_dev_replace) | ||
| 2114 | scrub_write_block_to_dev_replace(sblock); | ||
| 2115 | } | ||
| 1700 | } | 2116 | } |
| 1701 | 2117 | ||
| 1702 | static int scrub_find_csum(struct scrub_dev *sdev, u64 logical, u64 len, | 2118 | static int scrub_find_csum(struct scrub_ctx *sctx, u64 logical, u64 len, |
| 1703 | u8 *csum) | 2119 | u8 *csum) |
| 1704 | { | 2120 | { |
| 1705 | struct btrfs_ordered_sum *sum = NULL; | 2121 | struct btrfs_ordered_sum *sum = NULL; |
| @@ -1707,15 +2123,15 @@ static int scrub_find_csum(struct scrub_dev *sdev, u64 logical, u64 len, | |||
| 1707 | unsigned long i; | 2123 | unsigned long i; |
| 1708 | unsigned long num_sectors; | 2124 | unsigned long num_sectors; |
| 1709 | 2125 | ||
| 1710 | while (!list_empty(&sdev->csum_list)) { | 2126 | while (!list_empty(&sctx->csum_list)) { |
| 1711 | sum = list_first_entry(&sdev->csum_list, | 2127 | sum = list_first_entry(&sctx->csum_list, |
| 1712 | struct btrfs_ordered_sum, list); | 2128 | struct btrfs_ordered_sum, list); |
| 1713 | if (sum->bytenr > logical) | 2129 | if (sum->bytenr > logical) |
| 1714 | return 0; | 2130 | return 0; |
| 1715 | if (sum->bytenr + sum->len > logical) | 2131 | if (sum->bytenr + sum->len > logical) |
| 1716 | break; | 2132 | break; |
| 1717 | 2133 | ||
| 1718 | ++sdev->stat.csum_discards; | 2134 | ++sctx->stat.csum_discards; |
| 1719 | list_del(&sum->list); | 2135 | list_del(&sum->list); |
| 1720 | kfree(sum); | 2136 | kfree(sum); |
| 1721 | sum = NULL; | 2137 | sum = NULL; |
| @@ -1723,10 +2139,10 @@ static int scrub_find_csum(struct scrub_dev *sdev, u64 logical, u64 len, | |||
| 1723 | if (!sum) | 2139 | if (!sum) |
| 1724 | return 0; | 2140 | return 0; |
| 1725 | 2141 | ||
| 1726 | num_sectors = sum->len / sdev->sectorsize; | 2142 | num_sectors = sum->len / sctx->sectorsize; |
| 1727 | for (i = 0; i < num_sectors; ++i) { | 2143 | for (i = 0; i < num_sectors; ++i) { |
| 1728 | if (sum->sums[i].bytenr == logical) { | 2144 | if (sum->sums[i].bytenr == logical) { |
| 1729 | memcpy(csum, &sum->sums[i].sum, sdev->csum_size); | 2145 | memcpy(csum, &sum->sums[i].sum, sctx->csum_size); |
| 1730 | ret = 1; | 2146 | ret = 1; |
| 1731 | break; | 2147 | break; |
| 1732 | } | 2148 | } |
| @@ -1739,29 +2155,30 @@ static int scrub_find_csum(struct scrub_dev *sdev, u64 logical, u64 len, | |||
| 1739 | } | 2155 | } |
| 1740 | 2156 | ||
| 1741 | /* scrub extent tries to collect up to 64 kB for each bio */ | 2157 | /* scrub extent tries to collect up to 64 kB for each bio */ |
| 1742 | static int scrub_extent(struct scrub_dev *sdev, u64 logical, u64 len, | 2158 | static int scrub_extent(struct scrub_ctx *sctx, u64 logical, u64 len, |
| 1743 | u64 physical, u64 flags, u64 gen, int mirror_num) | 2159 | u64 physical, struct btrfs_device *dev, u64 flags, |
| 2160 | u64 gen, int mirror_num, u64 physical_for_dev_replace) | ||
| 1744 | { | 2161 | { |
| 1745 | int ret; | 2162 | int ret; |
| 1746 | u8 csum[BTRFS_CSUM_SIZE]; | 2163 | u8 csum[BTRFS_CSUM_SIZE]; |
| 1747 | u32 blocksize; | 2164 | u32 blocksize; |
| 1748 | 2165 | ||
| 1749 | if (flags & BTRFS_EXTENT_FLAG_DATA) { | 2166 | if (flags & BTRFS_EXTENT_FLAG_DATA) { |
| 1750 | blocksize = sdev->sectorsize; | 2167 | blocksize = sctx->sectorsize; |
| 1751 | spin_lock(&sdev->stat_lock); | 2168 | spin_lock(&sctx->stat_lock); |
| 1752 | sdev->stat.data_extents_scrubbed++; | 2169 | sctx->stat.data_extents_scrubbed++; |
| 1753 | sdev->stat.data_bytes_scrubbed += len; | 2170 | sctx->stat.data_bytes_scrubbed += len; |
| 1754 | spin_unlock(&sdev->stat_lock); | 2171 | spin_unlock(&sctx->stat_lock); |
| 1755 | } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) { | 2172 | } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) { |
| 1756 | BUG_ON(sdev->nodesize != sdev->leafsize); | 2173 | WARN_ON(sctx->nodesize != sctx->leafsize); |
| 1757 | blocksize = sdev->nodesize; | 2174 | blocksize = sctx->nodesize; |
| 1758 | spin_lock(&sdev->stat_lock); | 2175 | spin_lock(&sctx->stat_lock); |
| 1759 | sdev->stat.tree_extents_scrubbed++; | 2176 | sctx->stat.tree_extents_scrubbed++; |
| 1760 | sdev->stat.tree_bytes_scrubbed += len; | 2177 | sctx->stat.tree_bytes_scrubbed += len; |
| 1761 | spin_unlock(&sdev->stat_lock); | 2178 | spin_unlock(&sctx->stat_lock); |
| 1762 | } else { | 2179 | } else { |
| 1763 | blocksize = sdev->sectorsize; | 2180 | blocksize = sctx->sectorsize; |
| 1764 | BUG_ON(1); | 2181 | WARN_ON(1); |
| 1765 | } | 2182 | } |
| 1766 | 2183 | ||
| 1767 | while (len) { | 2184 | while (len) { |
| @@ -1770,26 +2187,38 @@ static int scrub_extent(struct scrub_dev *sdev, u64 logical, u64 len, | |||
| 1770 | 2187 | ||
| 1771 | if (flags & BTRFS_EXTENT_FLAG_DATA) { | 2188 | if (flags & BTRFS_EXTENT_FLAG_DATA) { |
| 1772 | /* push csums to sbio */ | 2189 | /* push csums to sbio */ |
| 1773 | have_csum = scrub_find_csum(sdev, logical, l, csum); | 2190 | have_csum = scrub_find_csum(sctx, logical, l, csum); |
| 1774 | if (have_csum == 0) | 2191 | if (have_csum == 0) |
| 1775 | ++sdev->stat.no_csum; | 2192 | ++sctx->stat.no_csum; |
| 2193 | if (sctx->is_dev_replace && !have_csum) { | ||
| 2194 | ret = copy_nocow_pages(sctx, logical, l, | ||
| 2195 | mirror_num, | ||
| 2196 | physical_for_dev_replace); | ||
| 2197 | goto behind_scrub_pages; | ||
| 2198 | } | ||
| 1776 | } | 2199 | } |
| 1777 | ret = scrub_pages(sdev, logical, l, physical, flags, gen, | 2200 | ret = scrub_pages(sctx, logical, l, physical, dev, flags, gen, |
| 1778 | mirror_num, have_csum ? csum : NULL, 0); | 2201 | mirror_num, have_csum ? csum : NULL, 0, |
| 2202 | physical_for_dev_replace); | ||
| 2203 | behind_scrub_pages: | ||
| 1779 | if (ret) | 2204 | if (ret) |
| 1780 | return ret; | 2205 | return ret; |
| 1781 | len -= l; | 2206 | len -= l; |
| 1782 | logical += l; | 2207 | logical += l; |
| 1783 | physical += l; | 2208 | physical += l; |
| 2209 | physical_for_dev_replace += l; | ||
| 1784 | } | 2210 | } |
| 1785 | return 0; | 2211 | return 0; |
| 1786 | } | 2212 | } |
| 1787 | 2213 | ||
| 1788 | static noinline_for_stack int scrub_stripe(struct scrub_dev *sdev, | 2214 | static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx, |
| 1789 | struct map_lookup *map, int num, u64 base, u64 length) | 2215 | struct map_lookup *map, |
| 2216 | struct btrfs_device *scrub_dev, | ||
| 2217 | int num, u64 base, u64 length, | ||
| 2218 | int is_dev_replace) | ||
| 1790 | { | 2219 | { |
| 1791 | struct btrfs_path *path; | 2220 | struct btrfs_path *path; |
| 1792 | struct btrfs_fs_info *fs_info = sdev->dev->dev_root->fs_info; | 2221 | struct btrfs_fs_info *fs_info = sctx->dev_root->fs_info; |
| 1793 | struct btrfs_root *root = fs_info->extent_root; | 2222 | struct btrfs_root *root = fs_info->extent_root; |
| 1794 | struct btrfs_root *csum_root = fs_info->csum_root; | 2223 | struct btrfs_root *csum_root = fs_info->csum_root; |
| 1795 | struct btrfs_extent_item *extent; | 2224 | struct btrfs_extent_item *extent; |
| @@ -1809,9 +2238,13 @@ static noinline_for_stack int scrub_stripe(struct scrub_dev *sdev, | |||
| 1809 | struct reada_control *reada2; | 2238 | struct reada_control *reada2; |
| 1810 | struct btrfs_key key_start; | 2239 | struct btrfs_key key_start; |
| 1811 | struct btrfs_key key_end; | 2240 | struct btrfs_key key_end; |
| 1812 | |||
| 1813 | u64 increment = map->stripe_len; | 2241 | u64 increment = map->stripe_len; |
| 1814 | u64 offset; | 2242 | u64 offset; |
| 2243 | u64 extent_logical; | ||
| 2244 | u64 extent_physical; | ||
| 2245 | u64 extent_len; | ||
| 2246 | struct btrfs_device *extent_dev; | ||
| 2247 | int extent_mirror_num; | ||
| 1815 | 2248 | ||
| 1816 | nstripes = length; | 2249 | nstripes = length; |
| 1817 | offset = 0; | 2250 | offset = 0; |
| @@ -1855,8 +2288,8 @@ static noinline_for_stack int scrub_stripe(struct scrub_dev *sdev, | |||
| 1855 | */ | 2288 | */ |
| 1856 | logical = base + offset; | 2289 | logical = base + offset; |
| 1857 | 2290 | ||
| 1858 | wait_event(sdev->list_wait, | 2291 | wait_event(sctx->list_wait, |
| 1859 | atomic_read(&sdev->in_flight) == 0); | 2292 | atomic_read(&sctx->bios_in_flight) == 0); |
| 1860 | atomic_inc(&fs_info->scrubs_paused); | 2293 | atomic_inc(&fs_info->scrubs_paused); |
| 1861 | wake_up(&fs_info->scrub_pause_wait); | 2294 | wake_up(&fs_info->scrub_pause_wait); |
| 1862 | 2295 | ||
| @@ -1910,7 +2343,7 @@ static noinline_for_stack int scrub_stripe(struct scrub_dev *sdev, | |||
| 1910 | * canceled? | 2343 | * canceled? |
| 1911 | */ | 2344 | */ |
| 1912 | if (atomic_read(&fs_info->scrub_cancel_req) || | 2345 | if (atomic_read(&fs_info->scrub_cancel_req) || |
| 1913 | atomic_read(&sdev->cancel_req)) { | 2346 | atomic_read(&sctx->cancel_req)) { |
| 1914 | ret = -ECANCELED; | 2347 | ret = -ECANCELED; |
| 1915 | goto out; | 2348 | goto out; |
| 1916 | } | 2349 | } |
| @@ -1919,9 +2352,14 @@ static noinline_for_stack int scrub_stripe(struct scrub_dev *sdev, | |||
| 1919 | */ | 2352 | */ |
| 1920 | if (atomic_read(&fs_info->scrub_pause_req)) { | 2353 | if (atomic_read(&fs_info->scrub_pause_req)) { |
| 1921 | /* push queued extents */ | 2354 | /* push queued extents */ |
| 1922 | scrub_submit(sdev); | 2355 | atomic_set(&sctx->wr_ctx.flush_all_writes, 1); |
| 1923 | wait_event(sdev->list_wait, | 2356 | scrub_submit(sctx); |
| 1924 | atomic_read(&sdev->in_flight) == 0); | 2357 | mutex_lock(&sctx->wr_ctx.wr_lock); |
| 2358 | scrub_wr_submit(sctx); | ||
| 2359 | mutex_unlock(&sctx->wr_ctx.wr_lock); | ||
| 2360 | wait_event(sctx->list_wait, | ||
| 2361 | atomic_read(&sctx->bios_in_flight) == 0); | ||
| 2362 | atomic_set(&sctx->wr_ctx.flush_all_writes, 0); | ||
| 1925 | atomic_inc(&fs_info->scrubs_paused); | 2363 | atomic_inc(&fs_info->scrubs_paused); |
| 1926 | wake_up(&fs_info->scrub_pause_wait); | 2364 | wake_up(&fs_info->scrub_pause_wait); |
| 1927 | mutex_lock(&fs_info->scrub_lock); | 2365 | mutex_lock(&fs_info->scrub_lock); |
| @@ -1938,7 +2376,7 @@ static noinline_for_stack int scrub_stripe(struct scrub_dev *sdev, | |||
| 1938 | 2376 | ||
| 1939 | ret = btrfs_lookup_csums_range(csum_root, logical, | 2377 | ret = btrfs_lookup_csums_range(csum_root, logical, |
| 1940 | logical + map->stripe_len - 1, | 2378 | logical + map->stripe_len - 1, |
| 1941 | &sdev->csum_list, 1); | 2379 | &sctx->csum_list, 1); |
| 1942 | if (ret) | 2380 | if (ret) |
| 1943 | goto out; | 2381 | goto out; |
| 1944 | 2382 | ||
| @@ -2016,9 +2454,20 @@ static noinline_for_stack int scrub_stripe(struct scrub_dev *sdev, | |||
| 2016 | key.objectid; | 2454 | key.objectid; |
| 2017 | } | 2455 | } |
| 2018 | 2456 | ||
| 2019 | ret = scrub_extent(sdev, key.objectid, key.offset, | 2457 | extent_logical = key.objectid; |
| 2020 | key.objectid - logical + physical, | 2458 | extent_physical = key.objectid - logical + physical; |
| 2021 | flags, generation, mirror_num); | 2459 | extent_len = key.offset; |
| 2460 | extent_dev = scrub_dev; | ||
| 2461 | extent_mirror_num = mirror_num; | ||
| 2462 | if (is_dev_replace) | ||
| 2463 | scrub_remap_extent(fs_info, extent_logical, | ||
| 2464 | extent_len, &extent_physical, | ||
| 2465 | &extent_dev, | ||
| 2466 | &extent_mirror_num); | ||
| 2467 | ret = scrub_extent(sctx, extent_logical, extent_len, | ||
| 2468 | extent_physical, extent_dev, flags, | ||
| 2469 | generation, extent_mirror_num, | ||
| 2470 | key.objectid - logical + physical); | ||
| 2022 | if (ret) | 2471 | if (ret) |
| 2023 | goto out; | 2472 | goto out; |
| 2024 | 2473 | ||
| @@ -2028,29 +2477,34 @@ next: | |||
| 2028 | btrfs_release_path(path); | 2477 | btrfs_release_path(path); |
| 2029 | logical += increment; | 2478 | logical += increment; |
| 2030 | physical += map->stripe_len; | 2479 | physical += map->stripe_len; |
| 2031 | spin_lock(&sdev->stat_lock); | 2480 | spin_lock(&sctx->stat_lock); |
| 2032 | sdev->stat.last_physical = physical; | 2481 | sctx->stat.last_physical = physical; |
| 2033 | spin_unlock(&sdev->stat_lock); | 2482 | spin_unlock(&sctx->stat_lock); |
| 2034 | } | 2483 | } |
| 2484 | out: | ||
| 2035 | /* push queued extents */ | 2485 | /* push queued extents */ |
| 2036 | scrub_submit(sdev); | 2486 | scrub_submit(sctx); |
| 2487 | mutex_lock(&sctx->wr_ctx.wr_lock); | ||
| 2488 | scrub_wr_submit(sctx); | ||
| 2489 | mutex_unlock(&sctx->wr_ctx.wr_lock); | ||
| 2037 | 2490 | ||
| 2038 | out: | ||
| 2039 | blk_finish_plug(&plug); | 2491 | blk_finish_plug(&plug); |
| 2040 | btrfs_free_path(path); | 2492 | btrfs_free_path(path); |
| 2041 | return ret < 0 ? ret : 0; | 2493 | return ret < 0 ? ret : 0; |
| 2042 | } | 2494 | } |
| 2043 | 2495 | ||
| 2044 | static noinline_for_stack int scrub_chunk(struct scrub_dev *sdev, | 2496 | static noinline_for_stack int scrub_chunk(struct scrub_ctx *sctx, |
| 2045 | u64 chunk_tree, u64 chunk_objectid, u64 chunk_offset, u64 length, | 2497 | struct btrfs_device *scrub_dev, |
| 2046 | u64 dev_offset) | 2498 | u64 chunk_tree, u64 chunk_objectid, |
| 2499 | u64 chunk_offset, u64 length, | ||
| 2500 | u64 dev_offset, int is_dev_replace) | ||
| 2047 | { | 2501 | { |
| 2048 | struct btrfs_mapping_tree *map_tree = | 2502 | struct btrfs_mapping_tree *map_tree = |
| 2049 | &sdev->dev->dev_root->fs_info->mapping_tree; | 2503 | &sctx->dev_root->fs_info->mapping_tree; |
| 2050 | struct map_lookup *map; | 2504 | struct map_lookup *map; |
| 2051 | struct extent_map *em; | 2505 | struct extent_map *em; |
| 2052 | int i; | 2506 | int i; |
| 2053 | int ret = -EINVAL; | 2507 | int ret = 0; |
| 2054 | 2508 | ||
| 2055 | read_lock(&map_tree->map_tree.lock); | 2509 | read_lock(&map_tree->map_tree.lock); |
| 2056 | em = lookup_extent_mapping(&map_tree->map_tree, chunk_offset, 1); | 2510 | em = lookup_extent_mapping(&map_tree->map_tree, chunk_offset, 1); |
| @@ -2067,9 +2521,11 @@ static noinline_for_stack int scrub_chunk(struct scrub_dev *sdev, | |||
| 2067 | goto out; | 2521 | goto out; |
| 2068 | 2522 | ||
| 2069 | for (i = 0; i < map->num_stripes; ++i) { | 2523 | for (i = 0; i < map->num_stripes; ++i) { |
| 2070 | if (map->stripes[i].dev == sdev->dev && | 2524 | if (map->stripes[i].dev->bdev == scrub_dev->bdev && |
| 2071 | map->stripes[i].physical == dev_offset) { | 2525 | map->stripes[i].physical == dev_offset) { |
| 2072 | ret = scrub_stripe(sdev, map, i, chunk_offset, length); | 2526 | ret = scrub_stripe(sctx, map, scrub_dev, i, |
| 2527 | chunk_offset, length, | ||
| 2528 | is_dev_replace); | ||
| 2073 | if (ret) | 2529 | if (ret) |
| 2074 | goto out; | 2530 | goto out; |
| 2075 | } | 2531 | } |
| @@ -2081,11 +2537,13 @@ out: | |||
| 2081 | } | 2537 | } |
| 2082 | 2538 | ||
| 2083 | static noinline_for_stack | 2539 | static noinline_for_stack |
| 2084 | int scrub_enumerate_chunks(struct scrub_dev *sdev, u64 start, u64 end) | 2540 | int scrub_enumerate_chunks(struct scrub_ctx *sctx, |
| 2541 | struct btrfs_device *scrub_dev, u64 start, u64 end, | ||
| 2542 | int is_dev_replace) | ||
| 2085 | { | 2543 | { |
| 2086 | struct btrfs_dev_extent *dev_extent = NULL; | 2544 | struct btrfs_dev_extent *dev_extent = NULL; |
| 2087 | struct btrfs_path *path; | 2545 | struct btrfs_path *path; |
| 2088 | struct btrfs_root *root = sdev->dev->dev_root; | 2546 | struct btrfs_root *root = sctx->dev_root; |
| 2089 | struct btrfs_fs_info *fs_info = root->fs_info; | 2547 | struct btrfs_fs_info *fs_info = root->fs_info; |
| 2090 | u64 length; | 2548 | u64 length; |
| 2091 | u64 chunk_tree; | 2549 | u64 chunk_tree; |
| @@ -2097,6 +2555,7 @@ int scrub_enumerate_chunks(struct scrub_dev *sdev, u64 start, u64 end) | |||
| 2097 | struct btrfs_key key; | 2555 | struct btrfs_key key; |
| 2098 | struct btrfs_key found_key; | 2556 | struct btrfs_key found_key; |
| 2099 | struct btrfs_block_group_cache *cache; | 2557 | struct btrfs_block_group_cache *cache; |
| 2558 | struct btrfs_dev_replace *dev_replace = &fs_info->dev_replace; | ||
| 2100 | 2559 | ||
| 2101 | path = btrfs_alloc_path(); | 2560 | path = btrfs_alloc_path(); |
| 2102 | if (!path) | 2561 | if (!path) |
| @@ -2106,11 +2565,10 @@ int scrub_enumerate_chunks(struct scrub_dev *sdev, u64 start, u64 end) | |||
| 2106 | path->search_commit_root = 1; | 2565 | path->search_commit_root = 1; |
| 2107 | path->skip_locking = 1; | 2566 | path->skip_locking = 1; |
| 2108 | 2567 | ||
| 2109 | key.objectid = sdev->dev->devid; | 2568 | key.objectid = scrub_dev->devid; |
| 2110 | key.offset = 0ull; | 2569 | key.offset = 0ull; |
| 2111 | key.type = BTRFS_DEV_EXTENT_KEY; | 2570 | key.type = BTRFS_DEV_EXTENT_KEY; |
| 2112 | 2571 | ||
| 2113 | |||
| 2114 | while (1) { | 2572 | while (1) { |
| 2115 | ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); | 2573 | ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); |
| 2116 | if (ret < 0) | 2574 | if (ret < 0) |
| @@ -2129,7 +2587,7 @@ int scrub_enumerate_chunks(struct scrub_dev *sdev, u64 start, u64 end) | |||
| 2129 | 2587 | ||
| 2130 | btrfs_item_key_to_cpu(l, &found_key, slot); | 2588 | btrfs_item_key_to_cpu(l, &found_key, slot); |
| 2131 | 2589 | ||
| 2132 | if (found_key.objectid != sdev->dev->devid) | 2590 | if (found_key.objectid != scrub_dev->devid) |
| 2133 | break; | 2591 | break; |
| 2134 | 2592 | ||
| 2135 | if (btrfs_key_type(&found_key) != BTRFS_DEV_EXTENT_KEY) | 2593 | if (btrfs_key_type(&found_key) != BTRFS_DEV_EXTENT_KEY) |
| @@ -2163,11 +2621,62 @@ int scrub_enumerate_chunks(struct scrub_dev *sdev, u64 start, u64 end) | |||
| 2163 | ret = -ENOENT; | 2621 | ret = -ENOENT; |
| 2164 | break; | 2622 | break; |
| 2165 | } | 2623 | } |
| 2166 | ret = scrub_chunk(sdev, chunk_tree, chunk_objectid, | 2624 | dev_replace->cursor_right = found_key.offset + length; |
| 2167 | chunk_offset, length, found_key.offset); | 2625 | dev_replace->cursor_left = found_key.offset; |
| 2626 | dev_replace->item_needs_writeback = 1; | ||
| 2627 | ret = scrub_chunk(sctx, scrub_dev, chunk_tree, chunk_objectid, | ||
| 2628 | chunk_offset, length, found_key.offset, | ||
| 2629 | is_dev_replace); | ||
| 2630 | |||
| 2631 | /* | ||
| 2632 | * flush, submit all pending read and write bios, afterwards | ||
| 2633 | * wait for them. | ||
| 2634 | * Note that in the dev replace case, a read request causes | ||
| 2635 | * write requests that are submitted in the read completion | ||
| 2636 | * worker. Therefore in the current situation, it is required | ||
| 2637 | * that all write requests are flushed, so that all read and | ||
| 2638 | * write requests are really completed when bios_in_flight | ||
| 2639 | * changes to 0. | ||
| 2640 | */ | ||
| 2641 | atomic_set(&sctx->wr_ctx.flush_all_writes, 1); | ||
| 2642 | scrub_submit(sctx); | ||
| 2643 | mutex_lock(&sctx->wr_ctx.wr_lock); | ||
| 2644 | scrub_wr_submit(sctx); | ||
| 2645 | mutex_unlock(&sctx->wr_ctx.wr_lock); | ||
| 2646 | |||
| 2647 | wait_event(sctx->list_wait, | ||
| 2648 | atomic_read(&sctx->bios_in_flight) == 0); | ||
| 2649 | atomic_set(&sctx->wr_ctx.flush_all_writes, 0); | ||
| 2650 | atomic_inc(&fs_info->scrubs_paused); | ||
| 2651 | wake_up(&fs_info->scrub_pause_wait); | ||
| 2652 | wait_event(sctx->list_wait, | ||
| 2653 | atomic_read(&sctx->workers_pending) == 0); | ||
| 2654 | |||
| 2655 | mutex_lock(&fs_info->scrub_lock); | ||
| 2656 | while (atomic_read(&fs_info->scrub_pause_req)) { | ||
| 2657 | mutex_unlock(&fs_info->scrub_lock); | ||
| 2658 | wait_event(fs_info->scrub_pause_wait, | ||
| 2659 | atomic_read(&fs_info->scrub_pause_req) == 0); | ||
| 2660 | mutex_lock(&fs_info->scrub_lock); | ||
| 2661 | } | ||
| 2662 | atomic_dec(&fs_info->scrubs_paused); | ||
| 2663 | mutex_unlock(&fs_info->scrub_lock); | ||
| 2664 | wake_up(&fs_info->scrub_pause_wait); | ||
| 2665 | |||
| 2666 | dev_replace->cursor_left = dev_replace->cursor_right; | ||
| 2667 | dev_replace->item_needs_writeback = 1; | ||
| 2168 | btrfs_put_block_group(cache); | 2668 | btrfs_put_block_group(cache); |
| 2169 | if (ret) | 2669 | if (ret) |
| 2170 | break; | 2670 | break; |
| 2671 | if (is_dev_replace && | ||
| 2672 | atomic64_read(&dev_replace->num_write_errors) > 0) { | ||
| 2673 | ret = -EIO; | ||
| 2674 | break; | ||
| 2675 | } | ||
| 2676 | if (sctx->stat.malloc_errors > 0) { | ||
| 2677 | ret = -ENOMEM; | ||
| 2678 | break; | ||
| 2679 | } | ||
| 2171 | 2680 | ||
| 2172 | key.offset = found_key.offset + length; | 2681 | key.offset = found_key.offset + length; |
| 2173 | btrfs_release_path(path); | 2682 | btrfs_release_path(path); |
| @@ -2182,14 +2691,14 @@ int scrub_enumerate_chunks(struct scrub_dev *sdev, u64 start, u64 end) | |||
| 2182 | return ret < 0 ? ret : 0; | 2691 | return ret < 0 ? ret : 0; |
| 2183 | } | 2692 | } |
| 2184 | 2693 | ||
| 2185 | static noinline_for_stack int scrub_supers(struct scrub_dev *sdev) | 2694 | static noinline_for_stack int scrub_supers(struct scrub_ctx *sctx, |
| 2695 | struct btrfs_device *scrub_dev) | ||
| 2186 | { | 2696 | { |
| 2187 | int i; | 2697 | int i; |
| 2188 | u64 bytenr; | 2698 | u64 bytenr; |
| 2189 | u64 gen; | 2699 | u64 gen; |
| 2190 | int ret; | 2700 | int ret; |
| 2191 | struct btrfs_device *device = sdev->dev; | 2701 | struct btrfs_root *root = sctx->dev_root; |
| 2192 | struct btrfs_root *root = device->dev_root; | ||
| 2193 | 2702 | ||
| 2194 | if (root->fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) | 2703 | if (root->fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) |
| 2195 | return -EIO; | 2704 | return -EIO; |
| @@ -2198,15 +2707,16 @@ static noinline_for_stack int scrub_supers(struct scrub_dev *sdev) | |||
| 2198 | 2707 | ||
| 2199 | for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) { | 2708 | for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) { |
| 2200 | bytenr = btrfs_sb_offset(i); | 2709 | bytenr = btrfs_sb_offset(i); |
| 2201 | if (bytenr + BTRFS_SUPER_INFO_SIZE > device->total_bytes) | 2710 | if (bytenr + BTRFS_SUPER_INFO_SIZE > scrub_dev->total_bytes) |
| 2202 | break; | 2711 | break; |
| 2203 | 2712 | ||
| 2204 | ret = scrub_pages(sdev, bytenr, BTRFS_SUPER_INFO_SIZE, bytenr, | 2713 | ret = scrub_pages(sctx, bytenr, BTRFS_SUPER_INFO_SIZE, bytenr, |
| 2205 | BTRFS_EXTENT_FLAG_SUPER, gen, i, NULL, 1); | 2714 | scrub_dev, BTRFS_EXTENT_FLAG_SUPER, gen, i, |
| 2715 | NULL, 1, bytenr); | ||
| 2206 | if (ret) | 2716 | if (ret) |
| 2207 | return ret; | 2717 | return ret; |
| 2208 | } | 2718 | } |
| 2209 | wait_event(sdev->list_wait, atomic_read(&sdev->in_flight) == 0); | 2719 | wait_event(sctx->list_wait, atomic_read(&sctx->bios_in_flight) == 0); |
| 2210 | 2720 | ||
| 2211 | return 0; | 2721 | return 0; |
| 2212 | } | 2722 | } |
| @@ -2214,19 +2724,38 @@ static noinline_for_stack int scrub_supers(struct scrub_dev *sdev) | |||
| 2214 | /* | 2724 | /* |
| 2215 | * get a reference count on fs_info->scrub_workers. start worker if necessary | 2725 | * get a reference count on fs_info->scrub_workers. start worker if necessary |
| 2216 | */ | 2726 | */ |
| 2217 | static noinline_for_stack int scrub_workers_get(struct btrfs_root *root) | 2727 | static noinline_for_stack int scrub_workers_get(struct btrfs_fs_info *fs_info, |
| 2728 | int is_dev_replace) | ||
| 2218 | { | 2729 | { |
| 2219 | struct btrfs_fs_info *fs_info = root->fs_info; | ||
| 2220 | int ret = 0; | 2730 | int ret = 0; |
| 2221 | 2731 | ||
| 2222 | mutex_lock(&fs_info->scrub_lock); | 2732 | mutex_lock(&fs_info->scrub_lock); |
| 2223 | if (fs_info->scrub_workers_refcnt == 0) { | 2733 | if (fs_info->scrub_workers_refcnt == 0) { |
| 2224 | btrfs_init_workers(&fs_info->scrub_workers, "scrub", | 2734 | if (is_dev_replace) |
| 2225 | fs_info->thread_pool_size, &fs_info->generic_worker); | 2735 | btrfs_init_workers(&fs_info->scrub_workers, "scrub", 1, |
| 2736 | &fs_info->generic_worker); | ||
| 2737 | else | ||
| 2738 | btrfs_init_workers(&fs_info->scrub_workers, "scrub", | ||
| 2739 | fs_info->thread_pool_size, | ||
| 2740 | &fs_info->generic_worker); | ||
| 2226 | fs_info->scrub_workers.idle_thresh = 4; | 2741 | fs_info->scrub_workers.idle_thresh = 4; |
| 2227 | ret = btrfs_start_workers(&fs_info->scrub_workers); | 2742 | ret = btrfs_start_workers(&fs_info->scrub_workers); |
| 2228 | if (ret) | 2743 | if (ret) |
| 2229 | goto out; | 2744 | goto out; |
| 2745 | btrfs_init_workers(&fs_info->scrub_wr_completion_workers, | ||
| 2746 | "scrubwrc", | ||
| 2747 | fs_info->thread_pool_size, | ||
| 2748 | &fs_info->generic_worker); | ||
| 2749 | fs_info->scrub_wr_completion_workers.idle_thresh = 2; | ||
| 2750 | ret = btrfs_start_workers( | ||
| 2751 | &fs_info->scrub_wr_completion_workers); | ||
| 2752 | if (ret) | ||
| 2753 | goto out; | ||
| 2754 | btrfs_init_workers(&fs_info->scrub_nocow_workers, "scrubnc", 1, | ||
| 2755 | &fs_info->generic_worker); | ||
| 2756 | ret = btrfs_start_workers(&fs_info->scrub_nocow_workers); | ||
| 2757 | if (ret) | ||
| 2758 | goto out; | ||
| 2230 | } | 2759 | } |
| 2231 | ++fs_info->scrub_workers_refcnt; | 2760 | ++fs_info->scrub_workers_refcnt; |
| 2232 | out: | 2761 | out: |
| @@ -2235,40 +2764,41 @@ out: | |||
| 2235 | return ret; | 2764 | return ret; |
| 2236 | } | 2765 | } |
| 2237 | 2766 | ||
| 2238 | static noinline_for_stack void scrub_workers_put(struct btrfs_root *root) | 2767 | static noinline_for_stack void scrub_workers_put(struct btrfs_fs_info *fs_info) |
| 2239 | { | 2768 | { |
| 2240 | struct btrfs_fs_info *fs_info = root->fs_info; | ||
| 2241 | |||
| 2242 | mutex_lock(&fs_info->scrub_lock); | 2769 | mutex_lock(&fs_info->scrub_lock); |
| 2243 | if (--fs_info->scrub_workers_refcnt == 0) | 2770 | if (--fs_info->scrub_workers_refcnt == 0) { |
| 2244 | btrfs_stop_workers(&fs_info->scrub_workers); | 2771 | btrfs_stop_workers(&fs_info->scrub_workers); |
| 2772 | btrfs_stop_workers(&fs_info->scrub_wr_completion_workers); | ||
| 2773 | btrfs_stop_workers(&fs_info->scrub_nocow_workers); | ||
| 2774 | } | ||
| 2245 | WARN_ON(fs_info->scrub_workers_refcnt < 0); | 2775 | WARN_ON(fs_info->scrub_workers_refcnt < 0); |
| 2246 | mutex_unlock(&fs_info->scrub_lock); | 2776 | mutex_unlock(&fs_info->scrub_lock); |
| 2247 | } | 2777 | } |
| 2248 | 2778 | ||
| 2249 | 2779 | int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start, | |
| 2250 | int btrfs_scrub_dev(struct btrfs_root *root, u64 devid, u64 start, u64 end, | 2780 | u64 end, struct btrfs_scrub_progress *progress, |
| 2251 | struct btrfs_scrub_progress *progress, int readonly) | 2781 | int readonly, int is_dev_replace) |
| 2252 | { | 2782 | { |
| 2253 | struct scrub_dev *sdev; | 2783 | struct scrub_ctx *sctx; |
| 2254 | struct btrfs_fs_info *fs_info = root->fs_info; | ||
| 2255 | int ret; | 2784 | int ret; |
| 2256 | struct btrfs_device *dev; | 2785 | struct btrfs_device *dev; |
| 2257 | 2786 | ||
| 2258 | if (btrfs_fs_closing(root->fs_info)) | 2787 | if (btrfs_fs_closing(fs_info)) |
| 2259 | return -EINVAL; | 2788 | return -EINVAL; |
| 2260 | 2789 | ||
| 2261 | /* | 2790 | /* |
| 2262 | * check some assumptions | 2791 | * check some assumptions |
| 2263 | */ | 2792 | */ |
| 2264 | if (root->nodesize != root->leafsize) { | 2793 | if (fs_info->chunk_root->nodesize != fs_info->chunk_root->leafsize) { |
| 2265 | printk(KERN_ERR | 2794 | printk(KERN_ERR |
| 2266 | "btrfs_scrub: size assumption nodesize == leafsize (%d == %d) fails\n", | 2795 | "btrfs_scrub: size assumption nodesize == leafsize (%d == %d) fails\n", |
| 2267 | root->nodesize, root->leafsize); | 2796 | fs_info->chunk_root->nodesize, |
| 2797 | fs_info->chunk_root->leafsize); | ||
| 2268 | return -EINVAL; | 2798 | return -EINVAL; |
| 2269 | } | 2799 | } |
| 2270 | 2800 | ||
| 2271 | if (root->nodesize > BTRFS_STRIPE_LEN) { | 2801 | if (fs_info->chunk_root->nodesize > BTRFS_STRIPE_LEN) { |
| 2272 | /* | 2802 | /* |
| 2273 | * in this case scrub is unable to calculate the checksum | 2803 | * in this case scrub is unable to calculate the checksum |
| 2274 | * the way scrub is implemented. Do not handle this | 2804 | * the way scrub is implemented. Do not handle this |
| @@ -2276,80 +2806,105 @@ int btrfs_scrub_dev(struct btrfs_root *root, u64 devid, u64 start, u64 end, | |||
| 2276 | */ | 2806 | */ |
| 2277 | printk(KERN_ERR | 2807 | printk(KERN_ERR |
| 2278 | "btrfs_scrub: size assumption nodesize <= BTRFS_STRIPE_LEN (%d <= %d) fails\n", | 2808 | "btrfs_scrub: size assumption nodesize <= BTRFS_STRIPE_LEN (%d <= %d) fails\n", |
| 2279 | root->nodesize, BTRFS_STRIPE_LEN); | 2809 | fs_info->chunk_root->nodesize, BTRFS_STRIPE_LEN); |
| 2280 | return -EINVAL; | 2810 | return -EINVAL; |
| 2281 | } | 2811 | } |
| 2282 | 2812 | ||
| 2283 | if (root->sectorsize != PAGE_SIZE) { | 2813 | if (fs_info->chunk_root->sectorsize != PAGE_SIZE) { |
| 2284 | /* not supported for data w/o checksums */ | 2814 | /* not supported for data w/o checksums */ |
| 2285 | printk(KERN_ERR | 2815 | printk(KERN_ERR |
| 2286 | "btrfs_scrub: size assumption sectorsize != PAGE_SIZE (%d != %lld) fails\n", | 2816 | "btrfs_scrub: size assumption sectorsize != PAGE_SIZE (%d != %lld) fails\n", |
| 2287 | root->sectorsize, (unsigned long long)PAGE_SIZE); | 2817 | fs_info->chunk_root->sectorsize, |
| 2818 | (unsigned long long)PAGE_SIZE); | ||
| 2819 | return -EINVAL; | ||
| 2820 | } | ||
| 2821 | |||
| 2822 | if (fs_info->chunk_root->nodesize > | ||
| 2823 | PAGE_SIZE * SCRUB_MAX_PAGES_PER_BLOCK || | ||
| 2824 | fs_info->chunk_root->sectorsize > | ||
| 2825 | PAGE_SIZE * SCRUB_MAX_PAGES_PER_BLOCK) { | ||
| 2826 | /* | ||
| 2827 | * would exhaust the array bounds of pagev member in | ||
| 2828 | * struct scrub_block | ||
| 2829 | */ | ||
| 2830 | pr_err("btrfs_scrub: size assumption nodesize and sectorsize <= SCRUB_MAX_PAGES_PER_BLOCK (%d <= %d && %d <= %d) fails\n", | ||
| 2831 | fs_info->chunk_root->nodesize, | ||
| 2832 | SCRUB_MAX_PAGES_PER_BLOCK, | ||
| 2833 | fs_info->chunk_root->sectorsize, | ||
| 2834 | SCRUB_MAX_PAGES_PER_BLOCK); | ||
| 2288 | return -EINVAL; | 2835 | return -EINVAL; |
| 2289 | } | 2836 | } |
| 2290 | 2837 | ||
| 2291 | ret = scrub_workers_get(root); | 2838 | ret = scrub_workers_get(fs_info, is_dev_replace); |
| 2292 | if (ret) | 2839 | if (ret) |
| 2293 | return ret; | 2840 | return ret; |
| 2294 | 2841 | ||
| 2295 | mutex_lock(&root->fs_info->fs_devices->device_list_mutex); | 2842 | mutex_lock(&fs_info->fs_devices->device_list_mutex); |
| 2296 | dev = btrfs_find_device(root, devid, NULL, NULL); | 2843 | dev = btrfs_find_device(fs_info, devid, NULL, NULL); |
| 2297 | if (!dev || dev->missing) { | 2844 | if (!dev || (dev->missing && !is_dev_replace)) { |
| 2298 | mutex_unlock(&root->fs_info->fs_devices->device_list_mutex); | 2845 | mutex_unlock(&fs_info->fs_devices->device_list_mutex); |
| 2299 | scrub_workers_put(root); | 2846 | scrub_workers_put(fs_info); |
| 2300 | return -ENODEV; | 2847 | return -ENODEV; |
| 2301 | } | 2848 | } |
| 2302 | mutex_lock(&fs_info->scrub_lock); | 2849 | mutex_lock(&fs_info->scrub_lock); |
| 2303 | 2850 | ||
| 2304 | if (!dev->in_fs_metadata) { | 2851 | if (!dev->in_fs_metadata || dev->is_tgtdev_for_dev_replace) { |
| 2305 | mutex_unlock(&fs_info->scrub_lock); | 2852 | mutex_unlock(&fs_info->scrub_lock); |
| 2306 | mutex_unlock(&root->fs_info->fs_devices->device_list_mutex); | 2853 | mutex_unlock(&fs_info->fs_devices->device_list_mutex); |
| 2307 | scrub_workers_put(root); | 2854 | scrub_workers_put(fs_info); |
| 2308 | return -ENODEV; | 2855 | return -EIO; |
| 2309 | } | 2856 | } |
| 2310 | 2857 | ||
| 2311 | if (dev->scrub_device) { | 2858 | btrfs_dev_replace_lock(&fs_info->dev_replace); |
| 2859 | if (dev->scrub_device || | ||
| 2860 | (!is_dev_replace && | ||
| 2861 | btrfs_dev_replace_is_ongoing(&fs_info->dev_replace))) { | ||
| 2862 | btrfs_dev_replace_unlock(&fs_info->dev_replace); | ||
| 2312 | mutex_unlock(&fs_info->scrub_lock); | 2863 | mutex_unlock(&fs_info->scrub_lock); |
| 2313 | mutex_unlock(&root->fs_info->fs_devices->device_list_mutex); | 2864 | mutex_unlock(&fs_info->fs_devices->device_list_mutex); |
| 2314 | scrub_workers_put(root); | 2865 | scrub_workers_put(fs_info); |
| 2315 | return -EINPROGRESS; | 2866 | return -EINPROGRESS; |
| 2316 | } | 2867 | } |
| 2317 | sdev = scrub_setup_dev(dev); | 2868 | btrfs_dev_replace_unlock(&fs_info->dev_replace); |
| 2318 | if (IS_ERR(sdev)) { | 2869 | sctx = scrub_setup_ctx(dev, is_dev_replace); |
| 2870 | if (IS_ERR(sctx)) { | ||
| 2319 | mutex_unlock(&fs_info->scrub_lock); | 2871 | mutex_unlock(&fs_info->scrub_lock); |
| 2320 | mutex_unlock(&root->fs_info->fs_devices->device_list_mutex); | 2872 | mutex_unlock(&fs_info->fs_devices->device_list_mutex); |
| 2321 | scrub_workers_put(root); | 2873 | scrub_workers_put(fs_info); |
| 2322 | return PTR_ERR(sdev); | 2874 | return PTR_ERR(sctx); |
| 2323 | } | 2875 | } |
| 2324 | sdev->readonly = readonly; | 2876 | sctx->readonly = readonly; |
| 2325 | dev->scrub_device = sdev; | 2877 | dev->scrub_device = sctx; |
| 2326 | 2878 | ||
| 2327 | atomic_inc(&fs_info->scrubs_running); | 2879 | atomic_inc(&fs_info->scrubs_running); |
| 2328 | mutex_unlock(&fs_info->scrub_lock); | 2880 | mutex_unlock(&fs_info->scrub_lock); |
| 2329 | mutex_unlock(&root->fs_info->fs_devices->device_list_mutex); | 2881 | mutex_unlock(&fs_info->fs_devices->device_list_mutex); |
| 2330 | 2882 | ||
| 2331 | down_read(&fs_info->scrub_super_lock); | 2883 | if (!is_dev_replace) { |
| 2332 | ret = scrub_supers(sdev); | 2884 | down_read(&fs_info->scrub_super_lock); |
| 2333 | up_read(&fs_info->scrub_super_lock); | 2885 | ret = scrub_supers(sctx, dev); |
| 2886 | up_read(&fs_info->scrub_super_lock); | ||
| 2887 | } | ||
| 2334 | 2888 | ||
| 2335 | if (!ret) | 2889 | if (!ret) |
| 2336 | ret = scrub_enumerate_chunks(sdev, start, end); | 2890 | ret = scrub_enumerate_chunks(sctx, dev, start, end, |
| 2891 | is_dev_replace); | ||
| 2337 | 2892 | ||
| 2338 | wait_event(sdev->list_wait, atomic_read(&sdev->in_flight) == 0); | 2893 | wait_event(sctx->list_wait, atomic_read(&sctx->bios_in_flight) == 0); |
| 2339 | atomic_dec(&fs_info->scrubs_running); | 2894 | atomic_dec(&fs_info->scrubs_running); |
| 2340 | wake_up(&fs_info->scrub_pause_wait); | 2895 | wake_up(&fs_info->scrub_pause_wait); |
| 2341 | 2896 | ||
| 2342 | wait_event(sdev->list_wait, atomic_read(&sdev->fixup_cnt) == 0); | 2897 | wait_event(sctx->list_wait, atomic_read(&sctx->workers_pending) == 0); |
| 2343 | 2898 | ||
| 2344 | if (progress) | 2899 | if (progress) |
| 2345 | memcpy(progress, &sdev->stat, sizeof(*progress)); | 2900 | memcpy(progress, &sctx->stat, sizeof(*progress)); |
| 2346 | 2901 | ||
| 2347 | mutex_lock(&fs_info->scrub_lock); | 2902 | mutex_lock(&fs_info->scrub_lock); |
| 2348 | dev->scrub_device = NULL; | 2903 | dev->scrub_device = NULL; |
| 2349 | mutex_unlock(&fs_info->scrub_lock); | 2904 | mutex_unlock(&fs_info->scrub_lock); |
| 2350 | 2905 | ||
| 2351 | scrub_free_dev(sdev); | 2906 | scrub_free_ctx(sctx); |
| 2352 | scrub_workers_put(root); | 2907 | scrub_workers_put(fs_info); |
| 2353 | 2908 | ||
| 2354 | return ret; | 2909 | return ret; |
| 2355 | } | 2910 | } |
| @@ -2389,9 +2944,8 @@ void btrfs_scrub_continue_super(struct btrfs_root *root) | |||
| 2389 | up_write(&root->fs_info->scrub_super_lock); | 2944 | up_write(&root->fs_info->scrub_super_lock); |
| 2390 | } | 2945 | } |
| 2391 | 2946 | ||
| 2392 | int __btrfs_scrub_cancel(struct btrfs_fs_info *fs_info) | 2947 | int btrfs_scrub_cancel(struct btrfs_fs_info *fs_info) |
| 2393 | { | 2948 | { |
| 2394 | |||
| 2395 | mutex_lock(&fs_info->scrub_lock); | 2949 | mutex_lock(&fs_info->scrub_lock); |
| 2396 | if (!atomic_read(&fs_info->scrubs_running)) { | 2950 | if (!atomic_read(&fs_info->scrubs_running)) { |
| 2397 | mutex_unlock(&fs_info->scrub_lock); | 2951 | mutex_unlock(&fs_info->scrub_lock); |
| @@ -2411,23 +2965,18 @@ int __btrfs_scrub_cancel(struct btrfs_fs_info *fs_info) | |||
| 2411 | return 0; | 2965 | return 0; |
| 2412 | } | 2966 | } |
| 2413 | 2967 | ||
| 2414 | int btrfs_scrub_cancel(struct btrfs_root *root) | 2968 | int btrfs_scrub_cancel_dev(struct btrfs_fs_info *fs_info, |
| 2969 | struct btrfs_device *dev) | ||
| 2415 | { | 2970 | { |
| 2416 | return __btrfs_scrub_cancel(root->fs_info); | 2971 | struct scrub_ctx *sctx; |
| 2417 | } | ||
| 2418 | |||
| 2419 | int btrfs_scrub_cancel_dev(struct btrfs_root *root, struct btrfs_device *dev) | ||
| 2420 | { | ||
| 2421 | struct btrfs_fs_info *fs_info = root->fs_info; | ||
| 2422 | struct scrub_dev *sdev; | ||
| 2423 | 2972 | ||
| 2424 | mutex_lock(&fs_info->scrub_lock); | 2973 | mutex_lock(&fs_info->scrub_lock); |
| 2425 | sdev = dev->scrub_device; | 2974 | sctx = dev->scrub_device; |
| 2426 | if (!sdev) { | 2975 | if (!sctx) { |
| 2427 | mutex_unlock(&fs_info->scrub_lock); | 2976 | mutex_unlock(&fs_info->scrub_lock); |
| 2428 | return -ENOTCONN; | 2977 | return -ENOTCONN; |
| 2429 | } | 2978 | } |
| 2430 | atomic_inc(&sdev->cancel_req); | 2979 | atomic_inc(&sctx->cancel_req); |
| 2431 | while (dev->scrub_device) { | 2980 | while (dev->scrub_device) { |
| 2432 | mutex_unlock(&fs_info->scrub_lock); | 2981 | mutex_unlock(&fs_info->scrub_lock); |
| 2433 | wait_event(fs_info->scrub_pause_wait, | 2982 | wait_event(fs_info->scrub_pause_wait, |
| @@ -2450,12 +2999,12 @@ int btrfs_scrub_cancel_devid(struct btrfs_root *root, u64 devid) | |||
| 2450 | * does not go away in cancel_dev. FIXME: find a better solution | 2999 | * does not go away in cancel_dev. FIXME: find a better solution |
| 2451 | */ | 3000 | */ |
| 2452 | mutex_lock(&fs_info->fs_devices->device_list_mutex); | 3001 | mutex_lock(&fs_info->fs_devices->device_list_mutex); |
| 2453 | dev = btrfs_find_device(root, devid, NULL, NULL); | 3002 | dev = btrfs_find_device(fs_info, devid, NULL, NULL); |
| 2454 | if (!dev) { | 3003 | if (!dev) { |
| 2455 | mutex_unlock(&fs_info->fs_devices->device_list_mutex); | 3004 | mutex_unlock(&fs_info->fs_devices->device_list_mutex); |
| 2456 | return -ENODEV; | 3005 | return -ENODEV; |
| 2457 | } | 3006 | } |
| 2458 | ret = btrfs_scrub_cancel_dev(root, dev); | 3007 | ret = btrfs_scrub_cancel_dev(fs_info, dev); |
| 2459 | mutex_unlock(&fs_info->fs_devices->device_list_mutex); | 3008 | mutex_unlock(&fs_info->fs_devices->device_list_mutex); |
| 2460 | 3009 | ||
| 2461 | return ret; | 3010 | return ret; |
| @@ -2465,15 +3014,284 @@ int btrfs_scrub_progress(struct btrfs_root *root, u64 devid, | |||
| 2465 | struct btrfs_scrub_progress *progress) | 3014 | struct btrfs_scrub_progress *progress) |
| 2466 | { | 3015 | { |
| 2467 | struct btrfs_device *dev; | 3016 | struct btrfs_device *dev; |
| 2468 | struct scrub_dev *sdev = NULL; | 3017 | struct scrub_ctx *sctx = NULL; |
| 2469 | 3018 | ||
| 2470 | mutex_lock(&root->fs_info->fs_devices->device_list_mutex); | 3019 | mutex_lock(&root->fs_info->fs_devices->device_list_mutex); |
| 2471 | dev = btrfs_find_device(root, devid, NULL, NULL); | 3020 | dev = btrfs_find_device(root->fs_info, devid, NULL, NULL); |
| 2472 | if (dev) | 3021 | if (dev) |
| 2473 | sdev = dev->scrub_device; | 3022 | sctx = dev->scrub_device; |
| 2474 | if (sdev) | 3023 | if (sctx) |
| 2475 | memcpy(progress, &sdev->stat, sizeof(*progress)); | 3024 | memcpy(progress, &sctx->stat, sizeof(*progress)); |
| 2476 | mutex_unlock(&root->fs_info->fs_devices->device_list_mutex); | 3025 | mutex_unlock(&root->fs_info->fs_devices->device_list_mutex); |
| 2477 | 3026 | ||
| 2478 | return dev ? (sdev ? 0 : -ENOTCONN) : -ENODEV; | 3027 | return dev ? (sctx ? 0 : -ENOTCONN) : -ENODEV; |
| 3028 | } | ||
| 3029 | |||
| 3030 | static void scrub_remap_extent(struct btrfs_fs_info *fs_info, | ||
| 3031 | u64 extent_logical, u64 extent_len, | ||
| 3032 | u64 *extent_physical, | ||
| 3033 | struct btrfs_device **extent_dev, | ||
| 3034 | int *extent_mirror_num) | ||
| 3035 | { | ||
| 3036 | u64 mapped_length; | ||
| 3037 | struct btrfs_bio *bbio = NULL; | ||
| 3038 | int ret; | ||
| 3039 | |||
| 3040 | mapped_length = extent_len; | ||
| 3041 | ret = btrfs_map_block(fs_info, READ, extent_logical, | ||
| 3042 | &mapped_length, &bbio, 0); | ||
| 3043 | if (ret || !bbio || mapped_length < extent_len || | ||
| 3044 | !bbio->stripes[0].dev->bdev) { | ||
| 3045 | kfree(bbio); | ||
| 3046 | return; | ||
| 3047 | } | ||
| 3048 | |||
| 3049 | *extent_physical = bbio->stripes[0].physical; | ||
| 3050 | *extent_mirror_num = bbio->mirror_num; | ||
| 3051 | *extent_dev = bbio->stripes[0].dev; | ||
| 3052 | kfree(bbio); | ||
| 3053 | } | ||
| 3054 | |||
| 3055 | static int scrub_setup_wr_ctx(struct scrub_ctx *sctx, | ||
| 3056 | struct scrub_wr_ctx *wr_ctx, | ||
| 3057 | struct btrfs_fs_info *fs_info, | ||
| 3058 | struct btrfs_device *dev, | ||
| 3059 | int is_dev_replace) | ||
| 3060 | { | ||
| 3061 | WARN_ON(wr_ctx->wr_curr_bio != NULL); | ||
| 3062 | |||
| 3063 | mutex_init(&wr_ctx->wr_lock); | ||
| 3064 | wr_ctx->wr_curr_bio = NULL; | ||
| 3065 | if (!is_dev_replace) | ||
| 3066 | return 0; | ||
| 3067 | |||
| 3068 | WARN_ON(!dev->bdev); | ||
| 3069 | wr_ctx->pages_per_wr_bio = min_t(int, SCRUB_PAGES_PER_WR_BIO, | ||
| 3070 | bio_get_nr_vecs(dev->bdev)); | ||
| 3071 | wr_ctx->tgtdev = dev; | ||
| 3072 | atomic_set(&wr_ctx->flush_all_writes, 0); | ||
| 3073 | return 0; | ||
| 3074 | } | ||
| 3075 | |||
| 3076 | static void scrub_free_wr_ctx(struct scrub_wr_ctx *wr_ctx) | ||
| 3077 | { | ||
| 3078 | mutex_lock(&wr_ctx->wr_lock); | ||
| 3079 | kfree(wr_ctx->wr_curr_bio); | ||
| 3080 | wr_ctx->wr_curr_bio = NULL; | ||
| 3081 | mutex_unlock(&wr_ctx->wr_lock); | ||
| 3082 | } | ||
| 3083 | |||
| 3084 | static int copy_nocow_pages(struct scrub_ctx *sctx, u64 logical, u64 len, | ||
| 3085 | int mirror_num, u64 physical_for_dev_replace) | ||
| 3086 | { | ||
| 3087 | struct scrub_copy_nocow_ctx *nocow_ctx; | ||
| 3088 | struct btrfs_fs_info *fs_info = sctx->dev_root->fs_info; | ||
| 3089 | |||
| 3090 | nocow_ctx = kzalloc(sizeof(*nocow_ctx), GFP_NOFS); | ||
| 3091 | if (!nocow_ctx) { | ||
| 3092 | spin_lock(&sctx->stat_lock); | ||
| 3093 | sctx->stat.malloc_errors++; | ||
| 3094 | spin_unlock(&sctx->stat_lock); | ||
| 3095 | return -ENOMEM; | ||
| 3096 | } | ||
| 3097 | |||
| 3098 | scrub_pending_trans_workers_inc(sctx); | ||
| 3099 | |||
| 3100 | nocow_ctx->sctx = sctx; | ||
| 3101 | nocow_ctx->logical = logical; | ||
| 3102 | nocow_ctx->len = len; | ||
| 3103 | nocow_ctx->mirror_num = mirror_num; | ||
| 3104 | nocow_ctx->physical_for_dev_replace = physical_for_dev_replace; | ||
| 3105 | nocow_ctx->work.func = copy_nocow_pages_worker; | ||
| 3106 | btrfs_queue_worker(&fs_info->scrub_nocow_workers, | ||
| 3107 | &nocow_ctx->work); | ||
| 3108 | |||
| 3109 | return 0; | ||
| 3110 | } | ||
| 3111 | |||
| 3112 | static void copy_nocow_pages_worker(struct btrfs_work *work) | ||
| 3113 | { | ||
| 3114 | struct scrub_copy_nocow_ctx *nocow_ctx = | ||
| 3115 | container_of(work, struct scrub_copy_nocow_ctx, work); | ||
| 3116 | struct scrub_ctx *sctx = nocow_ctx->sctx; | ||
| 3117 | u64 logical = nocow_ctx->logical; | ||
| 3118 | u64 len = nocow_ctx->len; | ||
| 3119 | int mirror_num = nocow_ctx->mirror_num; | ||
| 3120 | u64 physical_for_dev_replace = nocow_ctx->physical_for_dev_replace; | ||
| 3121 | int ret; | ||
| 3122 | struct btrfs_trans_handle *trans = NULL; | ||
| 3123 | struct btrfs_fs_info *fs_info; | ||
| 3124 | struct btrfs_path *path; | ||
| 3125 | struct btrfs_root *root; | ||
| 3126 | int not_written = 0; | ||
| 3127 | |||
| 3128 | fs_info = sctx->dev_root->fs_info; | ||
| 3129 | root = fs_info->extent_root; | ||
| 3130 | |||
| 3131 | path = btrfs_alloc_path(); | ||
| 3132 | if (!path) { | ||
| 3133 | spin_lock(&sctx->stat_lock); | ||
| 3134 | sctx->stat.malloc_errors++; | ||
| 3135 | spin_unlock(&sctx->stat_lock); | ||
| 3136 | not_written = 1; | ||
| 3137 | goto out; | ||
| 3138 | } | ||
| 3139 | |||
| 3140 | trans = btrfs_join_transaction(root); | ||
| 3141 | if (IS_ERR(trans)) { | ||
| 3142 | not_written = 1; | ||
| 3143 | goto out; | ||
| 3144 | } | ||
| 3145 | |||
| 3146 | ret = iterate_inodes_from_logical(logical, fs_info, path, | ||
| 3147 | copy_nocow_pages_for_inode, | ||
| 3148 | nocow_ctx); | ||
| 3149 | if (ret != 0 && ret != -ENOENT) { | ||
| 3150 | pr_warn("iterate_inodes_from_logical() failed: log %llu, phys %llu, len %llu, mir %llu, ret %d\n", | ||
| 3151 | (unsigned long long)logical, | ||
| 3152 | (unsigned long long)physical_for_dev_replace, | ||
| 3153 | (unsigned long long)len, | ||
| 3154 | (unsigned long long)mirror_num, ret); | ||
| 3155 | not_written = 1; | ||
| 3156 | goto out; | ||
| 3157 | } | ||
| 3158 | |||
| 3159 | out: | ||
| 3160 | if (trans && !IS_ERR(trans)) | ||
| 3161 | btrfs_end_transaction(trans, root); | ||
| 3162 | if (not_written) | ||
| 3163 | btrfs_dev_replace_stats_inc(&fs_info->dev_replace. | ||
| 3164 | num_uncorrectable_read_errors); | ||
| 3165 | |||
| 3166 | btrfs_free_path(path); | ||
| 3167 | kfree(nocow_ctx); | ||
| 3168 | |||
| 3169 | scrub_pending_trans_workers_dec(sctx); | ||
| 3170 | } | ||
| 3171 | |||
| 3172 | static int copy_nocow_pages_for_inode(u64 inum, u64 offset, u64 root, void *ctx) | ||
| 3173 | { | ||
| 3174 | unsigned long index; | ||
| 3175 | struct scrub_copy_nocow_ctx *nocow_ctx = ctx; | ||
| 3176 | int ret = 0; | ||
| 3177 | struct btrfs_key key; | ||
| 3178 | struct inode *inode = NULL; | ||
| 3179 | struct btrfs_root *local_root; | ||
| 3180 | u64 physical_for_dev_replace; | ||
| 3181 | u64 len; | ||
| 3182 | struct btrfs_fs_info *fs_info = nocow_ctx->sctx->dev_root->fs_info; | ||
| 3183 | |||
| 3184 | key.objectid = root; | ||
| 3185 | key.type = BTRFS_ROOT_ITEM_KEY; | ||
| 3186 | key.offset = (u64)-1; | ||
| 3187 | local_root = btrfs_read_fs_root_no_name(fs_info, &key); | ||
| 3188 | if (IS_ERR(local_root)) | ||
| 3189 | return PTR_ERR(local_root); | ||
| 3190 | |||
| 3191 | key.type = BTRFS_INODE_ITEM_KEY; | ||
| 3192 | key.objectid = inum; | ||
| 3193 | key.offset = 0; | ||
| 3194 | inode = btrfs_iget(fs_info->sb, &key, local_root, NULL); | ||
| 3195 | if (IS_ERR(inode)) | ||
| 3196 | return PTR_ERR(inode); | ||
| 3197 | |||
| 3198 | physical_for_dev_replace = nocow_ctx->physical_for_dev_replace; | ||
| 3199 | len = nocow_ctx->len; | ||
| 3200 | while (len >= PAGE_CACHE_SIZE) { | ||
| 3201 | struct page *page = NULL; | ||
| 3202 | int ret_sub; | ||
| 3203 | |||
| 3204 | index = offset >> PAGE_CACHE_SHIFT; | ||
| 3205 | |||
| 3206 | page = find_or_create_page(inode->i_mapping, index, GFP_NOFS); | ||
| 3207 | if (!page) { | ||
| 3208 | pr_err("find_or_create_page() failed\n"); | ||
| 3209 | ret = -ENOMEM; | ||
| 3210 | goto next_page; | ||
| 3211 | } | ||
| 3212 | |||
| 3213 | if (PageUptodate(page)) { | ||
| 3214 | if (PageDirty(page)) | ||
| 3215 | goto next_page; | ||
| 3216 | } else { | ||
| 3217 | ClearPageError(page); | ||
| 3218 | ret_sub = extent_read_full_page(&BTRFS_I(inode)-> | ||
| 3219 | io_tree, | ||
| 3220 | page, btrfs_get_extent, | ||
| 3221 | nocow_ctx->mirror_num); | ||
| 3222 | if (ret_sub) { | ||
| 3223 | ret = ret_sub; | ||
| 3224 | goto next_page; | ||
| 3225 | } | ||
| 3226 | wait_on_page_locked(page); | ||
| 3227 | if (!PageUptodate(page)) { | ||
| 3228 | ret = -EIO; | ||
| 3229 | goto next_page; | ||
| 3230 | } | ||
| 3231 | } | ||
| 3232 | ret_sub = write_page_nocow(nocow_ctx->sctx, | ||
| 3233 | physical_for_dev_replace, page); | ||
| 3234 | if (ret_sub) { | ||
| 3235 | ret = ret_sub; | ||
| 3236 | goto next_page; | ||
| 3237 | } | ||
| 3238 | |||
| 3239 | next_page: | ||
| 3240 | if (page) { | ||
| 3241 | unlock_page(page); | ||
| 3242 | put_page(page); | ||
| 3243 | } | ||
| 3244 | offset += PAGE_CACHE_SIZE; | ||
| 3245 | physical_for_dev_replace += PAGE_CACHE_SIZE; | ||
| 3246 | len -= PAGE_CACHE_SIZE; | ||
| 3247 | } | ||
| 3248 | |||
| 3249 | if (inode) | ||
| 3250 | iput(inode); | ||
| 3251 | return ret; | ||
| 3252 | } | ||
| 3253 | |||
| 3254 | static int write_page_nocow(struct scrub_ctx *sctx, | ||
| 3255 | u64 physical_for_dev_replace, struct page *page) | ||
| 3256 | { | ||
| 3257 | struct bio *bio; | ||
| 3258 | struct btrfs_device *dev; | ||
| 3259 | int ret; | ||
| 3260 | DECLARE_COMPLETION_ONSTACK(compl); | ||
| 3261 | |||
| 3262 | dev = sctx->wr_ctx.tgtdev; | ||
| 3263 | if (!dev) | ||
| 3264 | return -EIO; | ||
| 3265 | if (!dev->bdev) { | ||
| 3266 | printk_ratelimited(KERN_WARNING | ||
| 3267 | "btrfs: scrub write_page_nocow(bdev == NULL) is unexpected!\n"); | ||
| 3268 | return -EIO; | ||
| 3269 | } | ||
| 3270 | bio = bio_alloc(GFP_NOFS, 1); | ||
| 3271 | if (!bio) { | ||
| 3272 | spin_lock(&sctx->stat_lock); | ||
| 3273 | sctx->stat.malloc_errors++; | ||
| 3274 | spin_unlock(&sctx->stat_lock); | ||
| 3275 | return -ENOMEM; | ||
| 3276 | } | ||
| 3277 | bio->bi_private = &compl; | ||
| 3278 | bio->bi_end_io = scrub_complete_bio_end_io; | ||
| 3279 | bio->bi_size = 0; | ||
| 3280 | bio->bi_sector = physical_for_dev_replace >> 9; | ||
| 3281 | bio->bi_bdev = dev->bdev; | ||
| 3282 | ret = bio_add_page(bio, page, PAGE_CACHE_SIZE, 0); | ||
| 3283 | if (ret != PAGE_CACHE_SIZE) { | ||
| 3284 | leave_with_eio: | ||
| 3285 | bio_put(bio); | ||
| 3286 | btrfs_dev_stat_inc_and_print(dev, BTRFS_DEV_STAT_WRITE_ERRS); | ||
| 3287 | return -EIO; | ||
| 3288 | } | ||
| 3289 | btrfsic_submit_bio(WRITE_SYNC, bio); | ||
| 3290 | wait_for_completion(&compl); | ||
| 3291 | |||
| 3292 | if (!test_bit(BIO_UPTODATE, &bio->bi_flags)) | ||
| 3293 | goto leave_with_eio; | ||
| 3294 | |||
| 3295 | bio_put(bio); | ||
| 3296 | return 0; | ||
| 2479 | } | 3297 | } |
diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c index fb5ffe95f869..54454542ad40 100644 --- a/fs/btrfs/send.c +++ b/fs/btrfs/send.c | |||
| @@ -107,7 +107,6 @@ struct send_ctx { | |||
| 107 | int cur_inode_new; | 107 | int cur_inode_new; |
| 108 | int cur_inode_new_gen; | 108 | int cur_inode_new_gen; |
| 109 | int cur_inode_deleted; | 109 | int cur_inode_deleted; |
| 110 | int cur_inode_first_ref_orphan; | ||
| 111 | u64 cur_inode_size; | 110 | u64 cur_inode_size; |
| 112 | u64 cur_inode_mode; | 111 | u64 cur_inode_mode; |
| 113 | 112 | ||
| @@ -126,7 +125,15 @@ struct send_ctx { | |||
| 126 | 125 | ||
| 127 | struct name_cache_entry { | 126 | struct name_cache_entry { |
| 128 | struct list_head list; | 127 | struct list_head list; |
| 129 | struct list_head use_list; | 128 | /* |
| 129 | * radix_tree has only 32bit entries but we need to handle 64bit inums. | ||
| 130 | * We use the lower 32bit of the 64bit inum to store it in the tree. If | ||
| 131 | * more then one inum would fall into the same entry, we use radix_list | ||
| 132 | * to store the additional entries. radix_list is also used to store | ||
| 133 | * entries where two entries have the same inum but different | ||
| 134 | * generations. | ||
| 135 | */ | ||
| 136 | struct list_head radix_list; | ||
| 130 | u64 ino; | 137 | u64 ino; |
| 131 | u64 gen; | 138 | u64 gen; |
| 132 | u64 parent_ino; | 139 | u64 parent_ino; |
| @@ -328,6 +335,7 @@ out: | |||
| 328 | return ret; | 335 | return ret; |
| 329 | } | 336 | } |
| 330 | 337 | ||
| 338 | #if 0 | ||
| 331 | static void fs_path_remove(struct fs_path *p) | 339 | static void fs_path_remove(struct fs_path *p) |
| 332 | { | 340 | { |
| 333 | BUG_ON(p->reversed); | 341 | BUG_ON(p->reversed); |
| @@ -335,6 +343,7 @@ static void fs_path_remove(struct fs_path *p) | |||
| 335 | p->end--; | 343 | p->end--; |
| 336 | *p->end = 0; | 344 | *p->end = 0; |
| 337 | } | 345 | } |
| 346 | #endif | ||
| 338 | 347 | ||
| 339 | static int fs_path_copy(struct fs_path *p, struct fs_path *from) | 348 | static int fs_path_copy(struct fs_path *p, struct fs_path *from) |
| 340 | { | 349 | { |
| @@ -377,7 +386,7 @@ static struct btrfs_path *alloc_path_for_send(void) | |||
| 377 | return path; | 386 | return path; |
| 378 | } | 387 | } |
| 379 | 388 | ||
| 380 | static int write_buf(struct send_ctx *sctx, const void *buf, u32 len) | 389 | int write_buf(struct file *filp, const void *buf, u32 len, loff_t *off) |
| 381 | { | 390 | { |
| 382 | int ret; | 391 | int ret; |
| 383 | mm_segment_t old_fs; | 392 | mm_segment_t old_fs; |
| @@ -387,8 +396,7 @@ static int write_buf(struct send_ctx *sctx, const void *buf, u32 len) | |||
| 387 | set_fs(KERNEL_DS); | 396 | set_fs(KERNEL_DS); |
| 388 | 397 | ||
| 389 | while (pos < len) { | 398 | while (pos < len) { |
| 390 | ret = vfs_write(sctx->send_filp, (char *)buf + pos, len - pos, | 399 | ret = vfs_write(filp, (char *)buf + pos, len - pos, off); |
| 391 | &sctx->send_off); | ||
| 392 | /* TODO handle that correctly */ | 400 | /* TODO handle that correctly */ |
| 393 | /*if (ret == -ERESTARTSYS) { | 401 | /*if (ret == -ERESTARTSYS) { |
| 394 | continue; | 402 | continue; |
| @@ -544,7 +552,8 @@ static int send_header(struct send_ctx *sctx) | |||
| 544 | strcpy(hdr.magic, BTRFS_SEND_STREAM_MAGIC); | 552 | strcpy(hdr.magic, BTRFS_SEND_STREAM_MAGIC); |
| 545 | hdr.version = cpu_to_le32(BTRFS_SEND_STREAM_VERSION); | 553 | hdr.version = cpu_to_le32(BTRFS_SEND_STREAM_VERSION); |
| 546 | 554 | ||
| 547 | return write_buf(sctx, &hdr, sizeof(hdr)); | 555 | return write_buf(sctx->send_filp, &hdr, sizeof(hdr), |
| 556 | &sctx->send_off); | ||
| 548 | } | 557 | } |
| 549 | 558 | ||
| 550 | /* | 559 | /* |
| @@ -581,7 +590,8 @@ static int send_cmd(struct send_ctx *sctx) | |||
| 581 | crc = crc32c(0, (unsigned char *)sctx->send_buf, sctx->send_size); | 590 | crc = crc32c(0, (unsigned char *)sctx->send_buf, sctx->send_size); |
| 582 | hdr->crc = cpu_to_le32(crc); | 591 | hdr->crc = cpu_to_le32(crc); |
| 583 | 592 | ||
| 584 | ret = write_buf(sctx, sctx->send_buf, sctx->send_size); | 593 | ret = write_buf(sctx->send_filp, sctx->send_buf, sctx->send_size, |
| 594 | &sctx->send_off); | ||
| 585 | 595 | ||
| 586 | sctx->total_send_size += sctx->send_size; | 596 | sctx->total_send_size += sctx->send_size; |
| 587 | sctx->cmd_send_size[le16_to_cpu(hdr->cmd)] += sctx->send_size; | 597 | sctx->cmd_send_size[le16_to_cpu(hdr->cmd)] += sctx->send_size; |
| @@ -687,7 +697,8 @@ out: | |||
| 687 | */ | 697 | */ |
| 688 | static int get_inode_info(struct btrfs_root *root, | 698 | static int get_inode_info(struct btrfs_root *root, |
| 689 | u64 ino, u64 *size, u64 *gen, | 699 | u64 ino, u64 *size, u64 *gen, |
| 690 | u64 *mode, u64 *uid, u64 *gid) | 700 | u64 *mode, u64 *uid, u64 *gid, |
| 701 | u64 *rdev) | ||
| 691 | { | 702 | { |
| 692 | int ret; | 703 | int ret; |
| 693 | struct btrfs_inode_item *ii; | 704 | struct btrfs_inode_item *ii; |
| @@ -721,6 +732,8 @@ static int get_inode_info(struct btrfs_root *root, | |||
| 721 | *uid = btrfs_inode_uid(path->nodes[0], ii); | 732 | *uid = btrfs_inode_uid(path->nodes[0], ii); |
| 722 | if (gid) | 733 | if (gid) |
| 723 | *gid = btrfs_inode_gid(path->nodes[0], ii); | 734 | *gid = btrfs_inode_gid(path->nodes[0], ii); |
| 735 | if (rdev) | ||
| 736 | *rdev = btrfs_inode_rdev(path->nodes[0], ii); | ||
| 724 | 737 | ||
| 725 | out: | 738 | out: |
| 726 | btrfs_free_path(path); | 739 | btrfs_free_path(path); |
| @@ -732,31 +745,36 @@ typedef int (*iterate_inode_ref_t)(int num, u64 dir, int index, | |||
| 732 | void *ctx); | 745 | void *ctx); |
| 733 | 746 | ||
| 734 | /* | 747 | /* |
| 735 | * Helper function to iterate the entries in ONE btrfs_inode_ref. | 748 | * Helper function to iterate the entries in ONE btrfs_inode_ref or |
| 749 | * btrfs_inode_extref. | ||
| 736 | * The iterate callback may return a non zero value to stop iteration. This can | 750 | * The iterate callback may return a non zero value to stop iteration. This can |
| 737 | * be a negative value for error codes or 1 to simply stop it. | 751 | * be a negative value for error codes or 1 to simply stop it. |
| 738 | * | 752 | * |
| 739 | * path must point to the INODE_REF when called. | 753 | * path must point to the INODE_REF or INODE_EXTREF when called. |
| 740 | */ | 754 | */ |
| 741 | static int iterate_inode_ref(struct send_ctx *sctx, | 755 | static int iterate_inode_ref(struct send_ctx *sctx, |
| 742 | struct btrfs_root *root, struct btrfs_path *path, | 756 | struct btrfs_root *root, struct btrfs_path *path, |
| 743 | struct btrfs_key *found_key, int resolve, | 757 | struct btrfs_key *found_key, int resolve, |
| 744 | iterate_inode_ref_t iterate, void *ctx) | 758 | iterate_inode_ref_t iterate, void *ctx) |
| 745 | { | 759 | { |
| 746 | struct extent_buffer *eb; | 760 | struct extent_buffer *eb = path->nodes[0]; |
| 747 | struct btrfs_item *item; | 761 | struct btrfs_item *item; |
| 748 | struct btrfs_inode_ref *iref; | 762 | struct btrfs_inode_ref *iref; |
| 763 | struct btrfs_inode_extref *extref; | ||
| 749 | struct btrfs_path *tmp_path; | 764 | struct btrfs_path *tmp_path; |
| 750 | struct fs_path *p; | 765 | struct fs_path *p; |
| 751 | u32 cur; | 766 | u32 cur = 0; |
| 752 | u32 len; | ||
| 753 | u32 total; | 767 | u32 total; |
| 754 | int slot; | 768 | int slot = path->slots[0]; |
| 755 | u32 name_len; | 769 | u32 name_len; |
| 756 | char *start; | 770 | char *start; |
| 757 | int ret = 0; | 771 | int ret = 0; |
| 758 | int num; | 772 | int num = 0; |
| 759 | int index; | 773 | int index; |
| 774 | u64 dir; | ||
| 775 | unsigned long name_off; | ||
| 776 | unsigned long elem_size; | ||
| 777 | unsigned long ptr; | ||
| 760 | 778 | ||
| 761 | p = fs_path_alloc_reversed(sctx); | 779 | p = fs_path_alloc_reversed(sctx); |
| 762 | if (!p) | 780 | if (!p) |
| @@ -768,24 +786,40 @@ static int iterate_inode_ref(struct send_ctx *sctx, | |||
| 768 | return -ENOMEM; | 786 | return -ENOMEM; |
| 769 | } | 787 | } |
| 770 | 788 | ||
| 771 | eb = path->nodes[0]; | ||
| 772 | slot = path->slots[0]; | ||
| 773 | item = btrfs_item_nr(eb, slot); | ||
| 774 | iref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref); | ||
| 775 | cur = 0; | ||
| 776 | len = 0; | ||
| 777 | total = btrfs_item_size(eb, item); | ||
| 778 | 789 | ||
| 779 | num = 0; | 790 | if (found_key->type == BTRFS_INODE_REF_KEY) { |
| 791 | ptr = (unsigned long)btrfs_item_ptr(eb, slot, | ||
| 792 | struct btrfs_inode_ref); | ||
| 793 | item = btrfs_item_nr(eb, slot); | ||
| 794 | total = btrfs_item_size(eb, item); | ||
| 795 | elem_size = sizeof(*iref); | ||
| 796 | } else { | ||
| 797 | ptr = btrfs_item_ptr_offset(eb, slot); | ||
| 798 | total = btrfs_item_size_nr(eb, slot); | ||
| 799 | elem_size = sizeof(*extref); | ||
| 800 | } | ||
| 801 | |||
| 780 | while (cur < total) { | 802 | while (cur < total) { |
| 781 | fs_path_reset(p); | 803 | fs_path_reset(p); |
| 782 | 804 | ||
| 783 | name_len = btrfs_inode_ref_name_len(eb, iref); | 805 | if (found_key->type == BTRFS_INODE_REF_KEY) { |
| 784 | index = btrfs_inode_ref_index(eb, iref); | 806 | iref = (struct btrfs_inode_ref *)(ptr + cur); |
| 807 | name_len = btrfs_inode_ref_name_len(eb, iref); | ||
| 808 | name_off = (unsigned long)(iref + 1); | ||
| 809 | index = btrfs_inode_ref_index(eb, iref); | ||
| 810 | dir = found_key->offset; | ||
| 811 | } else { | ||
| 812 | extref = (struct btrfs_inode_extref *)(ptr + cur); | ||
| 813 | name_len = btrfs_inode_extref_name_len(eb, extref); | ||
| 814 | name_off = (unsigned long)&extref->name; | ||
| 815 | index = btrfs_inode_extref_index(eb, extref); | ||
| 816 | dir = btrfs_inode_extref_parent(eb, extref); | ||
| 817 | } | ||
| 818 | |||
| 785 | if (resolve) { | 819 | if (resolve) { |
| 786 | start = btrfs_iref_to_path(root, tmp_path, iref, eb, | 820 | start = btrfs_ref_to_path(root, tmp_path, name_len, |
| 787 | found_key->offset, p->buf, | 821 | name_off, eb, dir, |
| 788 | p->buf_len); | 822 | p->buf, p->buf_len); |
| 789 | if (IS_ERR(start)) { | 823 | if (IS_ERR(start)) { |
| 790 | ret = PTR_ERR(start); | 824 | ret = PTR_ERR(start); |
| 791 | goto out; | 825 | goto out; |
| @@ -796,9 +830,10 @@ static int iterate_inode_ref(struct send_ctx *sctx, | |||
| 796 | p->buf_len + p->buf - start); | 830 | p->buf_len + p->buf - start); |
| 797 | if (ret < 0) | 831 | if (ret < 0) |
| 798 | goto out; | 832 | goto out; |
| 799 | start = btrfs_iref_to_path(root, tmp_path, iref, | 833 | start = btrfs_ref_to_path(root, tmp_path, |
| 800 | eb, found_key->offset, p->buf, | 834 | name_len, name_off, |
| 801 | p->buf_len); | 835 | eb, dir, |
| 836 | p->buf, p->buf_len); | ||
| 802 | if (IS_ERR(start)) { | 837 | if (IS_ERR(start)) { |
| 803 | ret = PTR_ERR(start); | 838 | ret = PTR_ERR(start); |
| 804 | goto out; | 839 | goto out; |
| @@ -807,21 +842,16 @@ static int iterate_inode_ref(struct send_ctx *sctx, | |||
| 807 | } | 842 | } |
| 808 | p->start = start; | 843 | p->start = start; |
| 809 | } else { | 844 | } else { |
| 810 | ret = fs_path_add_from_extent_buffer(p, eb, | 845 | ret = fs_path_add_from_extent_buffer(p, eb, name_off, |
| 811 | (unsigned long)(iref + 1), name_len); | 846 | name_len); |
| 812 | if (ret < 0) | 847 | if (ret < 0) |
| 813 | goto out; | 848 | goto out; |
| 814 | } | 849 | } |
| 815 | 850 | ||
| 816 | 851 | cur += elem_size + name_len; | |
| 817 | len = sizeof(*iref) + name_len; | 852 | ret = iterate(num, dir, index, p, ctx); |
| 818 | iref = (struct btrfs_inode_ref *)((char *)iref + len); | ||
| 819 | cur += len; | ||
| 820 | |||
| 821 | ret = iterate(num, found_key->offset, index, p, ctx); | ||
| 822 | if (ret) | 853 | if (ret) |
| 823 | goto out; | 854 | goto out; |
| 824 | |||
| 825 | num++; | 855 | num++; |
| 826 | } | 856 | } |
| 827 | 857 | ||
| @@ -852,7 +882,6 @@ static int iterate_dir_item(struct send_ctx *sctx, | |||
| 852 | struct extent_buffer *eb; | 882 | struct extent_buffer *eb; |
| 853 | struct btrfs_item *item; | 883 | struct btrfs_item *item; |
| 854 | struct btrfs_dir_item *di; | 884 | struct btrfs_dir_item *di; |
| 855 | struct btrfs_path *tmp_path = NULL; | ||
| 856 | struct btrfs_key di_key; | 885 | struct btrfs_key di_key; |
| 857 | char *buf = NULL; | 886 | char *buf = NULL; |
| 858 | char *buf2 = NULL; | 887 | char *buf2 = NULL; |
| @@ -874,12 +903,6 @@ static int iterate_dir_item(struct send_ctx *sctx, | |||
| 874 | goto out; | 903 | goto out; |
| 875 | } | 904 | } |
| 876 | 905 | ||
| 877 | tmp_path = alloc_path_for_send(); | ||
| 878 | if (!tmp_path) { | ||
| 879 | ret = -ENOMEM; | ||
| 880 | goto out; | ||
| 881 | } | ||
| 882 | |||
| 883 | eb = path->nodes[0]; | 906 | eb = path->nodes[0]; |
| 884 | slot = path->slots[0]; | 907 | slot = path->slots[0]; |
| 885 | item = btrfs_item_nr(eb, slot); | 908 | item = btrfs_item_nr(eb, slot); |
| @@ -941,7 +964,6 @@ static int iterate_dir_item(struct send_ctx *sctx, | |||
| 941 | } | 964 | } |
| 942 | 965 | ||
| 943 | out: | 966 | out: |
| 944 | btrfs_free_path(tmp_path); | ||
| 945 | if (buf_virtual) | 967 | if (buf_virtual) |
| 946 | vfree(buf); | 968 | vfree(buf); |
| 947 | else | 969 | else |
| @@ -993,7 +1015,8 @@ static int get_inode_path(struct send_ctx *sctx, struct btrfs_root *root, | |||
| 993 | } | 1015 | } |
| 994 | btrfs_item_key_to_cpu(p->nodes[0], &found_key, p->slots[0]); | 1016 | btrfs_item_key_to_cpu(p->nodes[0], &found_key, p->slots[0]); |
| 995 | if (found_key.objectid != ino || | 1017 | if (found_key.objectid != ino || |
| 996 | found_key.type != BTRFS_INODE_REF_KEY) { | 1018 | (found_key.type != BTRFS_INODE_REF_KEY && |
| 1019 | found_key.type != BTRFS_INODE_EXTREF_KEY)) { | ||
| 997 | ret = -ENOENT; | 1020 | ret = -ENOENT; |
| 998 | goto out; | 1021 | goto out; |
| 999 | } | 1022 | } |
| @@ -1026,12 +1049,12 @@ struct backref_ctx { | |||
| 1026 | u64 extent_len; | 1049 | u64 extent_len; |
| 1027 | 1050 | ||
| 1028 | /* Just to check for bugs in backref resolving */ | 1051 | /* Just to check for bugs in backref resolving */ |
| 1029 | int found_in_send_root; | 1052 | int found_itself; |
| 1030 | }; | 1053 | }; |
| 1031 | 1054 | ||
| 1032 | static int __clone_root_cmp_bsearch(const void *key, const void *elt) | 1055 | static int __clone_root_cmp_bsearch(const void *key, const void *elt) |
| 1033 | { | 1056 | { |
| 1034 | u64 root = (u64)key; | 1057 | u64 root = (u64)(uintptr_t)key; |
| 1035 | struct clone_root *cr = (struct clone_root *)elt; | 1058 | struct clone_root *cr = (struct clone_root *)elt; |
| 1036 | 1059 | ||
| 1037 | if (root < cr->root->objectid) | 1060 | if (root < cr->root->objectid) |
| @@ -1055,6 +1078,7 @@ static int __clone_root_cmp_sort(const void *e1, const void *e2) | |||
| 1055 | 1078 | ||
| 1056 | /* | 1079 | /* |
| 1057 | * Called for every backref that is found for the current extent. | 1080 | * Called for every backref that is found for the current extent. |
| 1081 | * Results are collected in sctx->clone_roots->ino/offset/found_refs | ||
| 1058 | */ | 1082 | */ |
| 1059 | static int __iterate_backrefs(u64 ino, u64 offset, u64 root, void *ctx_) | 1083 | static int __iterate_backrefs(u64 ino, u64 offset, u64 root, void *ctx_) |
| 1060 | { | 1084 | { |
| @@ -1064,7 +1088,7 @@ static int __iterate_backrefs(u64 ino, u64 offset, u64 root, void *ctx_) | |||
| 1064 | u64 i_size; | 1088 | u64 i_size; |
| 1065 | 1089 | ||
| 1066 | /* First check if the root is in the list of accepted clone sources */ | 1090 | /* First check if the root is in the list of accepted clone sources */ |
| 1067 | found = bsearch((void *)root, bctx->sctx->clone_roots, | 1091 | found = bsearch((void *)(uintptr_t)root, bctx->sctx->clone_roots, |
| 1068 | bctx->sctx->clone_roots_cnt, | 1092 | bctx->sctx->clone_roots_cnt, |
| 1069 | sizeof(struct clone_root), | 1093 | sizeof(struct clone_root), |
| 1070 | __clone_root_cmp_bsearch); | 1094 | __clone_root_cmp_bsearch); |
| @@ -1074,14 +1098,15 @@ static int __iterate_backrefs(u64 ino, u64 offset, u64 root, void *ctx_) | |||
| 1074 | if (found->root == bctx->sctx->send_root && | 1098 | if (found->root == bctx->sctx->send_root && |
| 1075 | ino == bctx->cur_objectid && | 1099 | ino == bctx->cur_objectid && |
| 1076 | offset == bctx->cur_offset) { | 1100 | offset == bctx->cur_offset) { |
| 1077 | bctx->found_in_send_root = 1; | 1101 | bctx->found_itself = 1; |
| 1078 | } | 1102 | } |
| 1079 | 1103 | ||
| 1080 | /* | 1104 | /* |
| 1081 | * There are inodes that have extents that lie behind it's i_size. Don't | 1105 | * There are inodes that have extents that lie behind its i_size. Don't |
| 1082 | * accept clones from these extents. | 1106 | * accept clones from these extents. |
| 1083 | */ | 1107 | */ |
| 1084 | ret = get_inode_info(found->root, ino, &i_size, NULL, NULL, NULL, NULL); | 1108 | ret = get_inode_info(found->root, ino, &i_size, NULL, NULL, NULL, NULL, |
| 1109 | NULL); | ||
| 1085 | if (ret < 0) | 1110 | if (ret < 0) |
| 1086 | return ret; | 1111 | return ret; |
| 1087 | 1112 | ||
| @@ -1101,16 +1126,12 @@ static int __iterate_backrefs(u64 ino, u64 offset, u64 root, void *ctx_) | |||
| 1101 | */ | 1126 | */ |
| 1102 | if (ino >= bctx->cur_objectid) | 1127 | if (ino >= bctx->cur_objectid) |
| 1103 | return 0; | 1128 | return 0; |
| 1104 | /*if (ino > ctx->cur_objectid) | 1129 | #if 0 |
| 1130 | if (ino > bctx->cur_objectid) | ||
| 1105 | return 0; | 1131 | return 0; |
| 1106 | if (offset + ctx->extent_len > ctx->cur_offset) | 1132 | if (offset + bctx->extent_len > bctx->cur_offset) |
| 1107 | return 0;*/ | 1133 | return 0; |
| 1108 | 1134 | #endif | |
| 1109 | bctx->found++; | ||
| 1110 | found->found_refs++; | ||
| 1111 | found->ino = ino; | ||
| 1112 | found->offset = offset; | ||
| 1113 | return 0; | ||
| 1114 | } | 1135 | } |
| 1115 | 1136 | ||
| 1116 | bctx->found++; | 1137 | bctx->found++; |
| @@ -1130,6 +1151,12 @@ static int __iterate_backrefs(u64 ino, u64 offset, u64 root, void *ctx_) | |||
| 1130 | } | 1151 | } |
| 1131 | 1152 | ||
| 1132 | /* | 1153 | /* |
| 1154 | * Given an inode, offset and extent item, it finds a good clone for a clone | ||
| 1155 | * instruction. Returns -ENOENT when none could be found. The function makes | ||
| 1156 | * sure that the returned clone is usable at the point where sending is at the | ||
| 1157 | * moment. This means, that no clones are accepted which lie behind the current | ||
| 1158 | * inode+offset. | ||
| 1159 | * | ||
| 1133 | * path must point to the extent item when called. | 1160 | * path must point to the extent item when called. |
| 1134 | */ | 1161 | */ |
| 1135 | static int find_extent_clone(struct send_ctx *sctx, | 1162 | static int find_extent_clone(struct send_ctx *sctx, |
| @@ -1141,20 +1168,29 @@ static int find_extent_clone(struct send_ctx *sctx, | |||
| 1141 | int ret; | 1168 | int ret; |
| 1142 | int extent_type; | 1169 | int extent_type; |
| 1143 | u64 logical; | 1170 | u64 logical; |
| 1171 | u64 disk_byte; | ||
| 1144 | u64 num_bytes; | 1172 | u64 num_bytes; |
| 1145 | u64 extent_item_pos; | 1173 | u64 extent_item_pos; |
| 1174 | u64 flags = 0; | ||
| 1146 | struct btrfs_file_extent_item *fi; | 1175 | struct btrfs_file_extent_item *fi; |
| 1147 | struct extent_buffer *eb = path->nodes[0]; | 1176 | struct extent_buffer *eb = path->nodes[0]; |
| 1148 | struct backref_ctx backref_ctx; | 1177 | struct backref_ctx *backref_ctx = NULL; |
| 1149 | struct clone_root *cur_clone_root; | 1178 | struct clone_root *cur_clone_root; |
| 1150 | struct btrfs_key found_key; | 1179 | struct btrfs_key found_key; |
| 1151 | struct btrfs_path *tmp_path; | 1180 | struct btrfs_path *tmp_path; |
| 1181 | int compressed; | ||
| 1152 | u32 i; | 1182 | u32 i; |
| 1153 | 1183 | ||
| 1154 | tmp_path = alloc_path_for_send(); | 1184 | tmp_path = alloc_path_for_send(); |
| 1155 | if (!tmp_path) | 1185 | if (!tmp_path) |
| 1156 | return -ENOMEM; | 1186 | return -ENOMEM; |
| 1157 | 1187 | ||
| 1188 | backref_ctx = kmalloc(sizeof(*backref_ctx), GFP_NOFS); | ||
| 1189 | if (!backref_ctx) { | ||
| 1190 | ret = -ENOMEM; | ||
| 1191 | goto out; | ||
| 1192 | } | ||
| 1193 | |||
| 1158 | if (data_offset >= ino_size) { | 1194 | if (data_offset >= ino_size) { |
| 1159 | /* | 1195 | /* |
| 1160 | * There may be extents that lie behind the file's size. | 1196 | * There may be extents that lie behind the file's size. |
| @@ -1172,22 +1208,23 @@ static int find_extent_clone(struct send_ctx *sctx, | |||
| 1172 | ret = -ENOENT; | 1208 | ret = -ENOENT; |
| 1173 | goto out; | 1209 | goto out; |
| 1174 | } | 1210 | } |
| 1211 | compressed = btrfs_file_extent_compression(eb, fi); | ||
| 1175 | 1212 | ||
| 1176 | num_bytes = btrfs_file_extent_num_bytes(eb, fi); | 1213 | num_bytes = btrfs_file_extent_num_bytes(eb, fi); |
| 1177 | logical = btrfs_file_extent_disk_bytenr(eb, fi); | 1214 | disk_byte = btrfs_file_extent_disk_bytenr(eb, fi); |
| 1178 | if (logical == 0) { | 1215 | if (disk_byte == 0) { |
| 1179 | ret = -ENOENT; | 1216 | ret = -ENOENT; |
| 1180 | goto out; | 1217 | goto out; |
| 1181 | } | 1218 | } |
| 1182 | logical += btrfs_file_extent_offset(eb, fi); | 1219 | logical = disk_byte + btrfs_file_extent_offset(eb, fi); |
| 1183 | 1220 | ||
| 1184 | ret = extent_from_logical(sctx->send_root->fs_info, | 1221 | ret = extent_from_logical(sctx->send_root->fs_info, disk_byte, tmp_path, |
| 1185 | logical, tmp_path, &found_key); | 1222 | &found_key, &flags); |
| 1186 | btrfs_release_path(tmp_path); | 1223 | btrfs_release_path(tmp_path); |
| 1187 | 1224 | ||
| 1188 | if (ret < 0) | 1225 | if (ret < 0) |
| 1189 | goto out; | 1226 | goto out; |
| 1190 | if (ret & BTRFS_EXTENT_FLAG_TREE_BLOCK) { | 1227 | if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) { |
| 1191 | ret = -EIO; | 1228 | ret = -EIO; |
| 1192 | goto out; | 1229 | goto out; |
| 1193 | } | 1230 | } |
| @@ -1202,12 +1239,12 @@ static int find_extent_clone(struct send_ctx *sctx, | |||
| 1202 | cur_clone_root->found_refs = 0; | 1239 | cur_clone_root->found_refs = 0; |
| 1203 | } | 1240 | } |
| 1204 | 1241 | ||
| 1205 | backref_ctx.sctx = sctx; | 1242 | backref_ctx->sctx = sctx; |
| 1206 | backref_ctx.found = 0; | 1243 | backref_ctx->found = 0; |
| 1207 | backref_ctx.cur_objectid = ino; | 1244 | backref_ctx->cur_objectid = ino; |
| 1208 | backref_ctx.cur_offset = data_offset; | 1245 | backref_ctx->cur_offset = data_offset; |
| 1209 | backref_ctx.found_in_send_root = 0; | 1246 | backref_ctx->found_itself = 0; |
| 1210 | backref_ctx.extent_len = num_bytes; | 1247 | backref_ctx->extent_len = num_bytes; |
| 1211 | 1248 | ||
| 1212 | /* | 1249 | /* |
| 1213 | * The last extent of a file may be too large due to page alignment. | 1250 | * The last extent of a file may be too large due to page alignment. |
| @@ -1215,25 +1252,31 @@ static int find_extent_clone(struct send_ctx *sctx, | |||
| 1215 | * __iterate_backrefs work. | 1252 | * __iterate_backrefs work. |
| 1216 | */ | 1253 | */ |
| 1217 | if (data_offset + num_bytes >= ino_size) | 1254 | if (data_offset + num_bytes >= ino_size) |
| 1218 | backref_ctx.extent_len = ino_size - data_offset; | 1255 | backref_ctx->extent_len = ino_size - data_offset; |
| 1219 | 1256 | ||
| 1220 | /* | 1257 | /* |
| 1221 | * Now collect all backrefs. | 1258 | * Now collect all backrefs. |
| 1222 | */ | 1259 | */ |
| 1260 | if (compressed == BTRFS_COMPRESS_NONE) | ||
| 1261 | extent_item_pos = logical - found_key.objectid; | ||
| 1262 | else | ||
| 1263 | extent_item_pos = 0; | ||
| 1264 | |||
| 1223 | extent_item_pos = logical - found_key.objectid; | 1265 | extent_item_pos = logical - found_key.objectid; |
| 1224 | ret = iterate_extent_inodes(sctx->send_root->fs_info, | 1266 | ret = iterate_extent_inodes(sctx->send_root->fs_info, |
| 1225 | found_key.objectid, extent_item_pos, 1, | 1267 | found_key.objectid, extent_item_pos, 1, |
| 1226 | __iterate_backrefs, &backref_ctx); | 1268 | __iterate_backrefs, backref_ctx); |
| 1269 | |||
| 1227 | if (ret < 0) | 1270 | if (ret < 0) |
| 1228 | goto out; | 1271 | goto out; |
| 1229 | 1272 | ||
| 1230 | if (!backref_ctx.found_in_send_root) { | 1273 | if (!backref_ctx->found_itself) { |
| 1231 | /* found a bug in backref code? */ | 1274 | /* found a bug in backref code? */ |
| 1232 | ret = -EIO; | 1275 | ret = -EIO; |
| 1233 | printk(KERN_ERR "btrfs: ERROR did not find backref in " | 1276 | printk(KERN_ERR "btrfs: ERROR did not find backref in " |
| 1234 | "send_root. inode=%llu, offset=%llu, " | 1277 | "send_root. inode=%llu, offset=%llu, " |
| 1235 | "logical=%llu\n", | 1278 | "disk_byte=%llu found extent=%llu\n", |
| 1236 | ino, data_offset, logical); | 1279 | ino, data_offset, disk_byte, found_key.objectid); |
| 1237 | goto out; | 1280 | goto out; |
| 1238 | } | 1281 | } |
| 1239 | 1282 | ||
| @@ -1242,7 +1285,7 @@ verbose_printk(KERN_DEBUG "btrfs: find_extent_clone: data_offset=%llu, " | |||
| 1242 | "num_bytes=%llu, logical=%llu\n", | 1285 | "num_bytes=%llu, logical=%llu\n", |
| 1243 | data_offset, ino, num_bytes, logical); | 1286 | data_offset, ino, num_bytes, logical); |
| 1244 | 1287 | ||
| 1245 | if (!backref_ctx.found) | 1288 | if (!backref_ctx->found) |
| 1246 | verbose_printk("btrfs: no clones found\n"); | 1289 | verbose_printk("btrfs: no clones found\n"); |
| 1247 | 1290 | ||
| 1248 | cur_clone_root = NULL; | 1291 | cur_clone_root = NULL; |
| @@ -1253,7 +1296,6 @@ verbose_printk(KERN_DEBUG "btrfs: find_extent_clone: data_offset=%llu, " | |||
| 1253 | else if (sctx->clone_roots[i].root == sctx->send_root) | 1296 | else if (sctx->clone_roots[i].root == sctx->send_root) |
| 1254 | /* prefer clones from send_root over others */ | 1297 | /* prefer clones from send_root over others */ |
| 1255 | cur_clone_root = sctx->clone_roots + i; | 1298 | cur_clone_root = sctx->clone_roots + i; |
| 1256 | break; | ||
| 1257 | } | 1299 | } |
| 1258 | 1300 | ||
| 1259 | } | 1301 | } |
| @@ -1267,6 +1309,7 @@ verbose_printk(KERN_DEBUG "btrfs: find_extent_clone: data_offset=%llu, " | |||
| 1267 | 1309 | ||
| 1268 | out: | 1310 | out: |
| 1269 | btrfs_free_path(tmp_path); | 1311 | btrfs_free_path(tmp_path); |
| 1312 | kfree(backref_ctx); | ||
| 1270 | return ret; | 1313 | return ret; |
| 1271 | } | 1314 | } |
| 1272 | 1315 | ||
| @@ -1307,8 +1350,6 @@ static int read_symlink(struct send_ctx *sctx, | |||
| 1307 | len = btrfs_file_extent_inline_len(path->nodes[0], ei); | 1350 | len = btrfs_file_extent_inline_len(path->nodes[0], ei); |
| 1308 | 1351 | ||
| 1309 | ret = fs_path_add_from_extent_buffer(dest, path->nodes[0], off, len); | 1352 | ret = fs_path_add_from_extent_buffer(dest, path->nodes[0], off, len); |
| 1310 | if (ret < 0) | ||
| 1311 | goto out; | ||
| 1312 | 1353 | ||
| 1313 | out: | 1354 | out: |
| 1314 | btrfs_free_path(path); | 1355 | btrfs_free_path(path); |
| @@ -1404,7 +1445,7 @@ static int get_cur_inode_state(struct send_ctx *sctx, u64 ino, u64 gen) | |||
| 1404 | u64 right_gen; | 1445 | u64 right_gen; |
| 1405 | 1446 | ||
| 1406 | ret = get_inode_info(sctx->send_root, ino, NULL, &left_gen, NULL, NULL, | 1447 | ret = get_inode_info(sctx->send_root, ino, NULL, &left_gen, NULL, NULL, |
| 1407 | NULL); | 1448 | NULL, NULL); |
| 1408 | if (ret < 0 && ret != -ENOENT) | 1449 | if (ret < 0 && ret != -ENOENT) |
| 1409 | goto out; | 1450 | goto out; |
| 1410 | left_ret = ret; | 1451 | left_ret = ret; |
| @@ -1413,16 +1454,16 @@ static int get_cur_inode_state(struct send_ctx *sctx, u64 ino, u64 gen) | |||
| 1413 | right_ret = -ENOENT; | 1454 | right_ret = -ENOENT; |
| 1414 | } else { | 1455 | } else { |
| 1415 | ret = get_inode_info(sctx->parent_root, ino, NULL, &right_gen, | 1456 | ret = get_inode_info(sctx->parent_root, ino, NULL, &right_gen, |
| 1416 | NULL, NULL, NULL); | 1457 | NULL, NULL, NULL, NULL); |
| 1417 | if (ret < 0 && ret != -ENOENT) | 1458 | if (ret < 0 && ret != -ENOENT) |
| 1418 | goto out; | 1459 | goto out; |
| 1419 | right_ret = ret; | 1460 | right_ret = ret; |
| 1420 | } | 1461 | } |
| 1421 | 1462 | ||
| 1422 | if (!left_ret && !right_ret) { | 1463 | if (!left_ret && !right_ret) { |
| 1423 | if (left_gen == gen && right_gen == gen) | 1464 | if (left_gen == gen && right_gen == gen) { |
| 1424 | ret = inode_state_no_change; | 1465 | ret = inode_state_no_change; |
| 1425 | else if (left_gen == gen) { | 1466 | } else if (left_gen == gen) { |
| 1426 | if (ino < sctx->send_progress) | 1467 | if (ino < sctx->send_progress) |
| 1427 | ret = inode_state_did_create; | 1468 | ret = inode_state_did_create; |
| 1428 | else | 1469 | else |
| @@ -1516,6 +1557,10 @@ out: | |||
| 1516 | return ret; | 1557 | return ret; |
| 1517 | } | 1558 | } |
| 1518 | 1559 | ||
| 1560 | /* | ||
| 1561 | * Looks up the first btrfs_inode_ref of a given ino. It returns the parent dir, | ||
| 1562 | * generation of the parent dir and the name of the dir entry. | ||
| 1563 | */ | ||
| 1519 | static int get_first_ref(struct send_ctx *sctx, | 1564 | static int get_first_ref(struct send_ctx *sctx, |
| 1520 | struct btrfs_root *root, u64 ino, | 1565 | struct btrfs_root *root, u64 ino, |
| 1521 | u64 *dir, u64 *dir_gen, struct fs_path *name) | 1566 | u64 *dir, u64 *dir_gen, struct fs_path *name) |
| @@ -1524,8 +1569,8 @@ static int get_first_ref(struct send_ctx *sctx, | |||
| 1524 | struct btrfs_key key; | 1569 | struct btrfs_key key; |
| 1525 | struct btrfs_key found_key; | 1570 | struct btrfs_key found_key; |
| 1526 | struct btrfs_path *path; | 1571 | struct btrfs_path *path; |
| 1527 | struct btrfs_inode_ref *iref; | ||
| 1528 | int len; | 1572 | int len; |
| 1573 | u64 parent_dir; | ||
| 1529 | 1574 | ||
| 1530 | path = alloc_path_for_send(); | 1575 | path = alloc_path_for_send(); |
| 1531 | if (!path) | 1576 | if (!path) |
| @@ -1541,27 +1586,41 @@ static int get_first_ref(struct send_ctx *sctx, | |||
| 1541 | if (!ret) | 1586 | if (!ret) |
| 1542 | btrfs_item_key_to_cpu(path->nodes[0], &found_key, | 1587 | btrfs_item_key_to_cpu(path->nodes[0], &found_key, |
| 1543 | path->slots[0]); | 1588 | path->slots[0]); |
| 1544 | if (ret || found_key.objectid != key.objectid || | 1589 | if (ret || found_key.objectid != ino || |
| 1545 | found_key.type != key.type) { | 1590 | (found_key.type != BTRFS_INODE_REF_KEY && |
| 1591 | found_key.type != BTRFS_INODE_EXTREF_KEY)) { | ||
| 1546 | ret = -ENOENT; | 1592 | ret = -ENOENT; |
| 1547 | goto out; | 1593 | goto out; |
| 1548 | } | 1594 | } |
| 1549 | 1595 | ||
| 1550 | iref = btrfs_item_ptr(path->nodes[0], path->slots[0], | 1596 | if (key.type == BTRFS_INODE_REF_KEY) { |
| 1551 | struct btrfs_inode_ref); | 1597 | struct btrfs_inode_ref *iref; |
| 1552 | len = btrfs_inode_ref_name_len(path->nodes[0], iref); | 1598 | iref = btrfs_item_ptr(path->nodes[0], path->slots[0], |
| 1553 | ret = fs_path_add_from_extent_buffer(name, path->nodes[0], | 1599 | struct btrfs_inode_ref); |
| 1554 | (unsigned long)(iref + 1), len); | 1600 | len = btrfs_inode_ref_name_len(path->nodes[0], iref); |
| 1601 | ret = fs_path_add_from_extent_buffer(name, path->nodes[0], | ||
| 1602 | (unsigned long)(iref + 1), | ||
| 1603 | len); | ||
| 1604 | parent_dir = found_key.offset; | ||
| 1605 | } else { | ||
| 1606 | struct btrfs_inode_extref *extref; | ||
| 1607 | extref = btrfs_item_ptr(path->nodes[0], path->slots[0], | ||
| 1608 | struct btrfs_inode_extref); | ||
| 1609 | len = btrfs_inode_extref_name_len(path->nodes[0], extref); | ||
| 1610 | ret = fs_path_add_from_extent_buffer(name, path->nodes[0], | ||
| 1611 | (unsigned long)&extref->name, len); | ||
| 1612 | parent_dir = btrfs_inode_extref_parent(path->nodes[0], extref); | ||
| 1613 | } | ||
| 1555 | if (ret < 0) | 1614 | if (ret < 0) |
| 1556 | goto out; | 1615 | goto out; |
| 1557 | btrfs_release_path(path); | 1616 | btrfs_release_path(path); |
| 1558 | 1617 | ||
| 1559 | ret = get_inode_info(root, found_key.offset, NULL, dir_gen, NULL, NULL, | 1618 | ret = get_inode_info(root, parent_dir, NULL, dir_gen, NULL, NULL, |
| 1560 | NULL); | 1619 | NULL, NULL); |
| 1561 | if (ret < 0) | 1620 | if (ret < 0) |
| 1562 | goto out; | 1621 | goto out; |
| 1563 | 1622 | ||
| 1564 | *dir = found_key.offset; | 1623 | *dir = parent_dir; |
| 1565 | 1624 | ||
| 1566 | out: | 1625 | out: |
| 1567 | btrfs_free_path(path); | 1626 | btrfs_free_path(path); |
| @@ -1586,22 +1645,28 @@ static int is_first_ref(struct send_ctx *sctx, | |||
| 1586 | if (ret < 0) | 1645 | if (ret < 0) |
| 1587 | goto out; | 1646 | goto out; |
| 1588 | 1647 | ||
| 1589 | if (name_len != fs_path_len(tmp_name)) { | 1648 | if (dir != tmp_dir || name_len != fs_path_len(tmp_name)) { |
| 1590 | ret = 0; | 1649 | ret = 0; |
| 1591 | goto out; | 1650 | goto out; |
| 1592 | } | 1651 | } |
| 1593 | 1652 | ||
| 1594 | ret = memcmp(tmp_name->start, name, name_len); | 1653 | ret = !memcmp(tmp_name->start, name, name_len); |
| 1595 | if (ret) | ||
| 1596 | ret = 0; | ||
| 1597 | else | ||
| 1598 | ret = 1; | ||
| 1599 | 1654 | ||
| 1600 | out: | 1655 | out: |
| 1601 | fs_path_free(sctx, tmp_name); | 1656 | fs_path_free(sctx, tmp_name); |
| 1602 | return ret; | 1657 | return ret; |
| 1603 | } | 1658 | } |
| 1604 | 1659 | ||
| 1660 | /* | ||
| 1661 | * Used by process_recorded_refs to determine if a new ref would overwrite an | ||
| 1662 | * already existing ref. In case it detects an overwrite, it returns the | ||
| 1663 | * inode/gen in who_ino/who_gen. | ||
| 1664 | * When an overwrite is detected, process_recorded_refs does proper orphanizing | ||
| 1665 | * to make sure later references to the overwritten inode are possible. | ||
| 1666 | * Orphanizing is however only required for the first ref of an inode. | ||
| 1667 | * process_recorded_refs does an additional is_first_ref check to see if | ||
| 1668 | * orphanizing is really required. | ||
| 1669 | */ | ||
| 1605 | static int will_overwrite_ref(struct send_ctx *sctx, u64 dir, u64 dir_gen, | 1670 | static int will_overwrite_ref(struct send_ctx *sctx, u64 dir, u64 dir_gen, |
| 1606 | const char *name, int name_len, | 1671 | const char *name, int name_len, |
| 1607 | u64 *who_ino, u64 *who_gen) | 1672 | u64 *who_ino, u64 *who_gen) |
| @@ -1626,9 +1691,14 @@ static int will_overwrite_ref(struct send_ctx *sctx, u64 dir, u64 dir_gen, | |||
| 1626 | goto out; | 1691 | goto out; |
| 1627 | } | 1692 | } |
| 1628 | 1693 | ||
| 1694 | /* | ||
| 1695 | * Check if the overwritten ref was already processed. If yes, the ref | ||
| 1696 | * was already unlinked/moved, so we can safely assume that we will not | ||
| 1697 | * overwrite anything at this point in time. | ||
| 1698 | */ | ||
| 1629 | if (other_inode > sctx->send_progress) { | 1699 | if (other_inode > sctx->send_progress) { |
| 1630 | ret = get_inode_info(sctx->parent_root, other_inode, NULL, | 1700 | ret = get_inode_info(sctx->parent_root, other_inode, NULL, |
| 1631 | who_gen, NULL, NULL, NULL); | 1701 | who_gen, NULL, NULL, NULL, NULL); |
| 1632 | if (ret < 0) | 1702 | if (ret < 0) |
| 1633 | goto out; | 1703 | goto out; |
| 1634 | 1704 | ||
| @@ -1642,6 +1712,13 @@ out: | |||
| 1642 | return ret; | 1712 | return ret; |
| 1643 | } | 1713 | } |
| 1644 | 1714 | ||
| 1715 | /* | ||
| 1716 | * Checks if the ref was overwritten by an already processed inode. This is | ||
| 1717 | * used by __get_cur_name_and_parent to find out if the ref was orphanized and | ||
| 1718 | * thus the orphan name needs be used. | ||
| 1719 | * process_recorded_refs also uses it to avoid unlinking of refs that were | ||
| 1720 | * overwritten. | ||
| 1721 | */ | ||
| 1645 | static int did_overwrite_ref(struct send_ctx *sctx, | 1722 | static int did_overwrite_ref(struct send_ctx *sctx, |
| 1646 | u64 dir, u64 dir_gen, | 1723 | u64 dir, u64 dir_gen, |
| 1647 | u64 ino, u64 ino_gen, | 1724 | u64 ino, u64 ino_gen, |
| @@ -1671,7 +1748,7 @@ static int did_overwrite_ref(struct send_ctx *sctx, | |||
| 1671 | } | 1748 | } |
| 1672 | 1749 | ||
| 1673 | ret = get_inode_info(sctx->send_root, ow_inode, NULL, &gen, NULL, NULL, | 1750 | ret = get_inode_info(sctx->send_root, ow_inode, NULL, &gen, NULL, NULL, |
| 1674 | NULL); | 1751 | NULL, NULL); |
| 1675 | if (ret < 0) | 1752 | if (ret < 0) |
| 1676 | goto out; | 1753 | goto out; |
| 1677 | 1754 | ||
| @@ -1690,6 +1767,11 @@ out: | |||
| 1690 | return ret; | 1767 | return ret; |
| 1691 | } | 1768 | } |
| 1692 | 1769 | ||
| 1770 | /* | ||
| 1771 | * Same as did_overwrite_ref, but also checks if it is the first ref of an inode | ||
| 1772 | * that got overwritten. This is used by process_recorded_refs to determine | ||
| 1773 | * if it has to use the path as returned by get_cur_path or the orphan name. | ||
| 1774 | */ | ||
| 1693 | static int did_overwrite_first_ref(struct send_ctx *sctx, u64 ino, u64 gen) | 1775 | static int did_overwrite_first_ref(struct send_ctx *sctx, u64 ino, u64 gen) |
| 1694 | { | 1776 | { |
| 1695 | int ret = 0; | 1777 | int ret = 0; |
| @@ -1710,39 +1792,40 @@ static int did_overwrite_first_ref(struct send_ctx *sctx, u64 ino, u64 gen) | |||
| 1710 | 1792 | ||
| 1711 | ret = did_overwrite_ref(sctx, dir, dir_gen, ino, gen, | 1793 | ret = did_overwrite_ref(sctx, dir, dir_gen, ino, gen, |
| 1712 | name->start, fs_path_len(name)); | 1794 | name->start, fs_path_len(name)); |
| 1713 | if (ret < 0) | ||
| 1714 | goto out; | ||
| 1715 | 1795 | ||
| 1716 | out: | 1796 | out: |
| 1717 | fs_path_free(sctx, name); | 1797 | fs_path_free(sctx, name); |
| 1718 | return ret; | 1798 | return ret; |
| 1719 | } | 1799 | } |
| 1720 | 1800 | ||
| 1801 | /* | ||
| 1802 | * Insert a name cache entry. On 32bit kernels the radix tree index is 32bit, | ||
| 1803 | * so we need to do some special handling in case we have clashes. This function | ||
| 1804 | * takes care of this with the help of name_cache_entry::radix_list. | ||
| 1805 | * In case of error, nce is kfreed. | ||
| 1806 | */ | ||
| 1721 | static int name_cache_insert(struct send_ctx *sctx, | 1807 | static int name_cache_insert(struct send_ctx *sctx, |
| 1722 | struct name_cache_entry *nce) | 1808 | struct name_cache_entry *nce) |
| 1723 | { | 1809 | { |
| 1724 | int ret = 0; | 1810 | int ret = 0; |
| 1725 | struct name_cache_entry **ncea; | 1811 | struct list_head *nce_head; |
| 1726 | 1812 | ||
| 1727 | ncea = radix_tree_lookup(&sctx->name_cache, nce->ino); | 1813 | nce_head = radix_tree_lookup(&sctx->name_cache, |
| 1728 | if (ncea) { | 1814 | (unsigned long)nce->ino); |
| 1729 | if (!ncea[0]) | 1815 | if (!nce_head) { |
| 1730 | ncea[0] = nce; | 1816 | nce_head = kmalloc(sizeof(*nce_head), GFP_NOFS); |
| 1731 | else if (!ncea[1]) | 1817 | if (!nce_head) |
| 1732 | ncea[1] = nce; | ||
| 1733 | else | ||
| 1734 | BUG(); | ||
| 1735 | } else { | ||
| 1736 | ncea = kmalloc(sizeof(void *) * 2, GFP_NOFS); | ||
| 1737 | if (!ncea) | ||
| 1738 | return -ENOMEM; | 1818 | return -ENOMEM; |
| 1819 | INIT_LIST_HEAD(nce_head); | ||
| 1739 | 1820 | ||
| 1740 | ncea[0] = nce; | 1821 | ret = radix_tree_insert(&sctx->name_cache, nce->ino, nce_head); |
| 1741 | ncea[1] = NULL; | 1822 | if (ret < 0) { |
| 1742 | ret = radix_tree_insert(&sctx->name_cache, nce->ino, ncea); | 1823 | kfree(nce_head); |
| 1743 | if (ret < 0) | 1824 | kfree(nce); |
| 1744 | return ret; | 1825 | return ret; |
| 1826 | } | ||
| 1745 | } | 1827 | } |
| 1828 | list_add_tail(&nce->radix_list, nce_head); | ||
| 1746 | list_add_tail(&nce->list, &sctx->name_cache_list); | 1829 | list_add_tail(&nce->list, &sctx->name_cache_list); |
| 1747 | sctx->name_cache_size++; | 1830 | sctx->name_cache_size++; |
| 1748 | 1831 | ||
| @@ -1752,50 +1835,52 @@ static int name_cache_insert(struct send_ctx *sctx, | |||
| 1752 | static void name_cache_delete(struct send_ctx *sctx, | 1835 | static void name_cache_delete(struct send_ctx *sctx, |
| 1753 | struct name_cache_entry *nce) | 1836 | struct name_cache_entry *nce) |
| 1754 | { | 1837 | { |
| 1755 | struct name_cache_entry **ncea; | 1838 | struct list_head *nce_head; |
| 1756 | |||
| 1757 | ncea = radix_tree_lookup(&sctx->name_cache, nce->ino); | ||
| 1758 | BUG_ON(!ncea); | ||
| 1759 | |||
| 1760 | if (ncea[0] == nce) | ||
| 1761 | ncea[0] = NULL; | ||
| 1762 | else if (ncea[1] == nce) | ||
| 1763 | ncea[1] = NULL; | ||
| 1764 | else | ||
| 1765 | BUG(); | ||
| 1766 | 1839 | ||
| 1767 | if (!ncea[0] && !ncea[1]) { | 1840 | nce_head = radix_tree_lookup(&sctx->name_cache, |
| 1768 | radix_tree_delete(&sctx->name_cache, nce->ino); | 1841 | (unsigned long)nce->ino); |
| 1769 | kfree(ncea); | 1842 | BUG_ON(!nce_head); |
| 1770 | } | ||
| 1771 | 1843 | ||
| 1844 | list_del(&nce->radix_list); | ||
| 1772 | list_del(&nce->list); | 1845 | list_del(&nce->list); |
| 1773 | |||
| 1774 | sctx->name_cache_size--; | 1846 | sctx->name_cache_size--; |
| 1847 | |||
| 1848 | if (list_empty(nce_head)) { | ||
| 1849 | radix_tree_delete(&sctx->name_cache, (unsigned long)nce->ino); | ||
| 1850 | kfree(nce_head); | ||
| 1851 | } | ||
| 1775 | } | 1852 | } |
| 1776 | 1853 | ||
| 1777 | static struct name_cache_entry *name_cache_search(struct send_ctx *sctx, | 1854 | static struct name_cache_entry *name_cache_search(struct send_ctx *sctx, |
| 1778 | u64 ino, u64 gen) | 1855 | u64 ino, u64 gen) |
| 1779 | { | 1856 | { |
| 1780 | struct name_cache_entry **ncea; | 1857 | struct list_head *nce_head; |
| 1858 | struct name_cache_entry *cur; | ||
| 1781 | 1859 | ||
| 1782 | ncea = radix_tree_lookup(&sctx->name_cache, ino); | 1860 | nce_head = radix_tree_lookup(&sctx->name_cache, (unsigned long)ino); |
| 1783 | if (!ncea) | 1861 | if (!nce_head) |
| 1784 | return NULL; | 1862 | return NULL; |
| 1785 | 1863 | ||
| 1786 | if (ncea[0] && ncea[0]->gen == gen) | 1864 | list_for_each_entry(cur, nce_head, radix_list) { |
| 1787 | return ncea[0]; | 1865 | if (cur->ino == ino && cur->gen == gen) |
| 1788 | else if (ncea[1] && ncea[1]->gen == gen) | 1866 | return cur; |
| 1789 | return ncea[1]; | 1867 | } |
| 1790 | return NULL; | 1868 | return NULL; |
| 1791 | } | 1869 | } |
| 1792 | 1870 | ||
| 1871 | /* | ||
| 1872 | * Removes the entry from the list and adds it back to the end. This marks the | ||
| 1873 | * entry as recently used so that name_cache_clean_unused does not remove it. | ||
| 1874 | */ | ||
| 1793 | static void name_cache_used(struct send_ctx *sctx, struct name_cache_entry *nce) | 1875 | static void name_cache_used(struct send_ctx *sctx, struct name_cache_entry *nce) |
| 1794 | { | 1876 | { |
| 1795 | list_del(&nce->list); | 1877 | list_del(&nce->list); |
| 1796 | list_add_tail(&nce->list, &sctx->name_cache_list); | 1878 | list_add_tail(&nce->list, &sctx->name_cache_list); |
| 1797 | } | 1879 | } |
| 1798 | 1880 | ||
| 1881 | /* | ||
| 1882 | * Remove some entries from the beginning of name_cache_list. | ||
| 1883 | */ | ||
| 1799 | static void name_cache_clean_unused(struct send_ctx *sctx) | 1884 | static void name_cache_clean_unused(struct send_ctx *sctx) |
| 1800 | { | 1885 | { |
| 1801 | struct name_cache_entry *nce; | 1886 | struct name_cache_entry *nce; |
| @@ -1814,13 +1899,23 @@ static void name_cache_clean_unused(struct send_ctx *sctx) | |||
| 1814 | static void name_cache_free(struct send_ctx *sctx) | 1899 | static void name_cache_free(struct send_ctx *sctx) |
| 1815 | { | 1900 | { |
| 1816 | struct name_cache_entry *nce; | 1901 | struct name_cache_entry *nce; |
| 1817 | struct name_cache_entry *tmp; | ||
| 1818 | 1902 | ||
| 1819 | list_for_each_entry_safe(nce, tmp, &sctx->name_cache_list, list) { | 1903 | while (!list_empty(&sctx->name_cache_list)) { |
| 1904 | nce = list_entry(sctx->name_cache_list.next, | ||
| 1905 | struct name_cache_entry, list); | ||
| 1820 | name_cache_delete(sctx, nce); | 1906 | name_cache_delete(sctx, nce); |
| 1907 | kfree(nce); | ||
| 1821 | } | 1908 | } |
| 1822 | } | 1909 | } |
| 1823 | 1910 | ||
| 1911 | /* | ||
| 1912 | * Used by get_cur_path for each ref up to the root. | ||
| 1913 | * Returns 0 if it succeeded. | ||
| 1914 | * Returns 1 if the inode is not existent or got overwritten. In that case, the | ||
| 1915 | * name is an orphan name. This instructs get_cur_path to stop iterating. If 1 | ||
| 1916 | * is returned, parent_ino/parent_gen are not guaranteed to be valid. | ||
| 1917 | * Returns <0 in case of error. | ||
| 1918 | */ | ||
| 1824 | static int __get_cur_name_and_parent(struct send_ctx *sctx, | 1919 | static int __get_cur_name_and_parent(struct send_ctx *sctx, |
| 1825 | u64 ino, u64 gen, | 1920 | u64 ino, u64 gen, |
| 1826 | u64 *parent_ino, | 1921 | u64 *parent_ino, |
| @@ -1832,6 +1927,11 @@ static int __get_cur_name_and_parent(struct send_ctx *sctx, | |||
| 1832 | struct btrfs_path *path = NULL; | 1927 | struct btrfs_path *path = NULL; |
| 1833 | struct name_cache_entry *nce = NULL; | 1928 | struct name_cache_entry *nce = NULL; |
| 1834 | 1929 | ||
| 1930 | /* | ||
| 1931 | * First check if we already did a call to this function with the same | ||
| 1932 | * ino/gen. If yes, check if the cache entry is still up-to-date. If yes | ||
| 1933 | * return the cached result. | ||
| 1934 | */ | ||
| 1835 | nce = name_cache_search(sctx, ino, gen); | 1935 | nce = name_cache_search(sctx, ino, gen); |
| 1836 | if (nce) { | 1936 | if (nce) { |
| 1837 | if (ino < sctx->send_progress && nce->need_later_update) { | 1937 | if (ino < sctx->send_progress && nce->need_later_update) { |
| @@ -1854,6 +1954,11 @@ static int __get_cur_name_and_parent(struct send_ctx *sctx, | |||
| 1854 | if (!path) | 1954 | if (!path) |
| 1855 | return -ENOMEM; | 1955 | return -ENOMEM; |
| 1856 | 1956 | ||
| 1957 | /* | ||
| 1958 | * If the inode is not existent yet, add the orphan name and return 1. | ||
| 1959 | * This should only happen for the parent dir that we determine in | ||
| 1960 | * __record_new_ref | ||
| 1961 | */ | ||
| 1857 | ret = is_inode_existent(sctx, ino, gen); | 1962 | ret = is_inode_existent(sctx, ino, gen); |
| 1858 | if (ret < 0) | 1963 | if (ret < 0) |
| 1859 | goto out; | 1964 | goto out; |
| @@ -1866,6 +1971,10 @@ static int __get_cur_name_and_parent(struct send_ctx *sctx, | |||
| 1866 | goto out_cache; | 1971 | goto out_cache; |
| 1867 | } | 1972 | } |
| 1868 | 1973 | ||
| 1974 | /* | ||
| 1975 | * Depending on whether the inode was already processed or not, use | ||
| 1976 | * send_root or parent_root for ref lookup. | ||
| 1977 | */ | ||
| 1869 | if (ino < sctx->send_progress) | 1978 | if (ino < sctx->send_progress) |
| 1870 | ret = get_first_ref(sctx, sctx->send_root, ino, | 1979 | ret = get_first_ref(sctx, sctx->send_root, ino, |
| 1871 | parent_ino, parent_gen, dest); | 1980 | parent_ino, parent_gen, dest); |
| @@ -1875,6 +1984,10 @@ static int __get_cur_name_and_parent(struct send_ctx *sctx, | |||
| 1875 | if (ret < 0) | 1984 | if (ret < 0) |
| 1876 | goto out; | 1985 | goto out; |
| 1877 | 1986 | ||
| 1987 | /* | ||
| 1988 | * Check if the ref was overwritten by an inode's ref that was processed | ||
| 1989 | * earlier. If yes, treat as orphan and return 1. | ||
| 1990 | */ | ||
| 1878 | ret = did_overwrite_ref(sctx, *parent_ino, *parent_gen, ino, gen, | 1991 | ret = did_overwrite_ref(sctx, *parent_ino, *parent_gen, ino, gen, |
| 1879 | dest->start, dest->end - dest->start); | 1992 | dest->start, dest->end - dest->start); |
| 1880 | if (ret < 0) | 1993 | if (ret < 0) |
| @@ -1888,6 +2001,9 @@ static int __get_cur_name_and_parent(struct send_ctx *sctx, | |||
| 1888 | } | 2001 | } |
| 1889 | 2002 | ||
| 1890 | out_cache: | 2003 | out_cache: |
| 2004 | /* | ||
| 2005 | * Store the result of the lookup in the name cache. | ||
| 2006 | */ | ||
| 1891 | nce = kmalloc(sizeof(*nce) + fs_path_len(dest) + 1, GFP_NOFS); | 2007 | nce = kmalloc(sizeof(*nce) + fs_path_len(dest) + 1, GFP_NOFS); |
| 1892 | if (!nce) { | 2008 | if (!nce) { |
| 1893 | ret = -ENOMEM; | 2009 | ret = -ENOMEM; |
| @@ -1901,7 +2017,6 @@ out_cache: | |||
| 1901 | nce->name_len = fs_path_len(dest); | 2017 | nce->name_len = fs_path_len(dest); |
| 1902 | nce->ret = ret; | 2018 | nce->ret = ret; |
| 1903 | strcpy(nce->name, dest->start); | 2019 | strcpy(nce->name, dest->start); |
| 1904 | memset(&nce->use_list, 0, sizeof(nce->use_list)); | ||
| 1905 | 2020 | ||
| 1906 | if (ino < sctx->send_progress) | 2021 | if (ino < sctx->send_progress) |
| 1907 | nce->need_later_update = 0; | 2022 | nce->need_later_update = 0; |
| @@ -2107,9 +2222,6 @@ static int send_subvol_begin(struct send_ctx *sctx) | |||
| 2107 | read_extent_buffer(leaf, name, (unsigned long)(ref + 1), namelen); | 2222 | read_extent_buffer(leaf, name, (unsigned long)(ref + 1), namelen); |
| 2108 | btrfs_release_path(path); | 2223 | btrfs_release_path(path); |
| 2109 | 2224 | ||
| 2110 | if (ret < 0) | ||
| 2111 | goto out; | ||
| 2112 | |||
| 2113 | if (parent_root) { | 2225 | if (parent_root) { |
| 2114 | ret = begin_cmd(sctx, BTRFS_SEND_C_SNAPSHOT); | 2226 | ret = begin_cmd(sctx, BTRFS_SEND_C_SNAPSHOT); |
| 2115 | if (ret < 0) | 2227 | if (ret < 0) |
| @@ -2276,7 +2388,7 @@ verbose_printk("btrfs: send_utimes %llu\n", ino); | |||
| 2276 | btrfs_inode_mtime(ii)); | 2388 | btrfs_inode_mtime(ii)); |
| 2277 | TLV_PUT_BTRFS_TIMESPEC(sctx, BTRFS_SEND_A_CTIME, eb, | 2389 | TLV_PUT_BTRFS_TIMESPEC(sctx, BTRFS_SEND_A_CTIME, eb, |
| 2278 | btrfs_inode_ctime(ii)); | 2390 | btrfs_inode_ctime(ii)); |
| 2279 | /* TODO otime? */ | 2391 | /* TODO Add otime support when the otime patches get into upstream */ |
| 2280 | 2392 | ||
| 2281 | ret = send_cmd(sctx); | 2393 | ret = send_cmd(sctx); |
| 2282 | 2394 | ||
| @@ -2292,39 +2404,39 @@ out: | |||
| 2292 | * a valid path yet because we did not process the refs yet. So, the inode | 2404 | * a valid path yet because we did not process the refs yet. So, the inode |
| 2293 | * is created as orphan. | 2405 | * is created as orphan. |
| 2294 | */ | 2406 | */ |
| 2295 | static int send_create_inode(struct send_ctx *sctx, struct btrfs_path *path, | 2407 | static int send_create_inode(struct send_ctx *sctx, u64 ino) |
| 2296 | struct btrfs_key *key) | ||
| 2297 | { | 2408 | { |
| 2298 | int ret = 0; | 2409 | int ret = 0; |
| 2299 | struct extent_buffer *eb = path->nodes[0]; | ||
| 2300 | struct btrfs_inode_item *ii; | ||
| 2301 | struct fs_path *p; | 2410 | struct fs_path *p; |
| 2302 | int slot = path->slots[0]; | ||
| 2303 | int cmd; | 2411 | int cmd; |
| 2412 | u64 gen; | ||
| 2304 | u64 mode; | 2413 | u64 mode; |
| 2414 | u64 rdev; | ||
| 2305 | 2415 | ||
| 2306 | verbose_printk("btrfs: send_create_inode %llu\n", sctx->cur_ino); | 2416 | verbose_printk("btrfs: send_create_inode %llu\n", ino); |
| 2307 | 2417 | ||
| 2308 | p = fs_path_alloc(sctx); | 2418 | p = fs_path_alloc(sctx); |
| 2309 | if (!p) | 2419 | if (!p) |
| 2310 | return -ENOMEM; | 2420 | return -ENOMEM; |
| 2311 | 2421 | ||
| 2312 | ii = btrfs_item_ptr(eb, slot, struct btrfs_inode_item); | 2422 | ret = get_inode_info(sctx->send_root, ino, NULL, &gen, &mode, NULL, |
| 2313 | mode = btrfs_inode_mode(eb, ii); | 2423 | NULL, &rdev); |
| 2424 | if (ret < 0) | ||
| 2425 | goto out; | ||
| 2314 | 2426 | ||
| 2315 | if (S_ISREG(mode)) | 2427 | if (S_ISREG(mode)) { |
| 2316 | cmd = BTRFS_SEND_C_MKFILE; | 2428 | cmd = BTRFS_SEND_C_MKFILE; |
| 2317 | else if (S_ISDIR(mode)) | 2429 | } else if (S_ISDIR(mode)) { |
| 2318 | cmd = BTRFS_SEND_C_MKDIR; | 2430 | cmd = BTRFS_SEND_C_MKDIR; |
| 2319 | else if (S_ISLNK(mode)) | 2431 | } else if (S_ISLNK(mode)) { |
| 2320 | cmd = BTRFS_SEND_C_SYMLINK; | 2432 | cmd = BTRFS_SEND_C_SYMLINK; |
| 2321 | else if (S_ISCHR(mode) || S_ISBLK(mode)) | 2433 | } else if (S_ISCHR(mode) || S_ISBLK(mode)) { |
| 2322 | cmd = BTRFS_SEND_C_MKNOD; | 2434 | cmd = BTRFS_SEND_C_MKNOD; |
| 2323 | else if (S_ISFIFO(mode)) | 2435 | } else if (S_ISFIFO(mode)) { |
| 2324 | cmd = BTRFS_SEND_C_MKFIFO; | 2436 | cmd = BTRFS_SEND_C_MKFIFO; |
| 2325 | else if (S_ISSOCK(mode)) | 2437 | } else if (S_ISSOCK(mode)) { |
| 2326 | cmd = BTRFS_SEND_C_MKSOCK; | 2438 | cmd = BTRFS_SEND_C_MKSOCK; |
| 2327 | else { | 2439 | } else { |
| 2328 | printk(KERN_WARNING "btrfs: unexpected inode type %o", | 2440 | printk(KERN_WARNING "btrfs: unexpected inode type %o", |
| 2329 | (int)(mode & S_IFMT)); | 2441 | (int)(mode & S_IFMT)); |
| 2330 | ret = -ENOTSUPP; | 2442 | ret = -ENOTSUPP; |
| @@ -2335,22 +2447,23 @@ verbose_printk("btrfs: send_create_inode %llu\n", sctx->cur_ino); | |||
| 2335 | if (ret < 0) | 2447 | if (ret < 0) |
| 2336 | goto out; | 2448 | goto out; |
| 2337 | 2449 | ||
| 2338 | ret = gen_unique_name(sctx, sctx->cur_ino, sctx->cur_inode_gen, p); | 2450 | ret = gen_unique_name(sctx, ino, gen, p); |
| 2339 | if (ret < 0) | 2451 | if (ret < 0) |
| 2340 | goto out; | 2452 | goto out; |
| 2341 | 2453 | ||
| 2342 | TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p); | 2454 | TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p); |
| 2343 | TLV_PUT_U64(sctx, BTRFS_SEND_A_INO, sctx->cur_ino); | 2455 | TLV_PUT_U64(sctx, BTRFS_SEND_A_INO, ino); |
| 2344 | 2456 | ||
| 2345 | if (S_ISLNK(mode)) { | 2457 | if (S_ISLNK(mode)) { |
| 2346 | fs_path_reset(p); | 2458 | fs_path_reset(p); |
| 2347 | ret = read_symlink(sctx, sctx->send_root, sctx->cur_ino, p); | 2459 | ret = read_symlink(sctx, sctx->send_root, ino, p); |
| 2348 | if (ret < 0) | 2460 | if (ret < 0) |
| 2349 | goto out; | 2461 | goto out; |
| 2350 | TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH_LINK, p); | 2462 | TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH_LINK, p); |
| 2351 | } else if (S_ISCHR(mode) || S_ISBLK(mode) || | 2463 | } else if (S_ISCHR(mode) || S_ISBLK(mode) || |
| 2352 | S_ISFIFO(mode) || S_ISSOCK(mode)) { | 2464 | S_ISFIFO(mode) || S_ISSOCK(mode)) { |
| 2353 | TLV_PUT_U64(sctx, BTRFS_SEND_A_RDEV, btrfs_inode_rdev(eb, ii)); | 2465 | TLV_PUT_U64(sctx, BTRFS_SEND_A_RDEV, new_encode_dev(rdev)); |
| 2466 | TLV_PUT_U64(sctx, BTRFS_SEND_A_MODE, mode); | ||
| 2354 | } | 2467 | } |
| 2355 | 2468 | ||
| 2356 | ret = send_cmd(sctx); | 2469 | ret = send_cmd(sctx); |
| @@ -2364,6 +2477,92 @@ out: | |||
| 2364 | return ret; | 2477 | return ret; |
| 2365 | } | 2478 | } |
| 2366 | 2479 | ||
| 2480 | /* | ||
| 2481 | * We need some special handling for inodes that get processed before the parent | ||
| 2482 | * directory got created. See process_recorded_refs for details. | ||
| 2483 | * This function does the check if we already created the dir out of order. | ||
| 2484 | */ | ||
| 2485 | static int did_create_dir(struct send_ctx *sctx, u64 dir) | ||
| 2486 | { | ||
| 2487 | int ret = 0; | ||
| 2488 | struct btrfs_path *path = NULL; | ||
| 2489 | struct btrfs_key key; | ||
| 2490 | struct btrfs_key found_key; | ||
| 2491 | struct btrfs_key di_key; | ||
| 2492 | struct extent_buffer *eb; | ||
| 2493 | struct btrfs_dir_item *di; | ||
| 2494 | int slot; | ||
| 2495 | |||
| 2496 | path = alloc_path_for_send(); | ||
| 2497 | if (!path) { | ||
| 2498 | ret = -ENOMEM; | ||
| 2499 | goto out; | ||
| 2500 | } | ||
| 2501 | |||
| 2502 | key.objectid = dir; | ||
| 2503 | key.type = BTRFS_DIR_INDEX_KEY; | ||
| 2504 | key.offset = 0; | ||
| 2505 | while (1) { | ||
| 2506 | ret = btrfs_search_slot_for_read(sctx->send_root, &key, path, | ||
| 2507 | 1, 0); | ||
| 2508 | if (ret < 0) | ||
| 2509 | goto out; | ||
| 2510 | if (!ret) { | ||
| 2511 | eb = path->nodes[0]; | ||
| 2512 | slot = path->slots[0]; | ||
| 2513 | btrfs_item_key_to_cpu(eb, &found_key, slot); | ||
| 2514 | } | ||
| 2515 | if (ret || found_key.objectid != key.objectid || | ||
| 2516 | found_key.type != key.type) { | ||
| 2517 | ret = 0; | ||
| 2518 | goto out; | ||
| 2519 | } | ||
| 2520 | |||
| 2521 | di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item); | ||
| 2522 | btrfs_dir_item_key_to_cpu(eb, di, &di_key); | ||
| 2523 | |||
| 2524 | if (di_key.objectid < sctx->send_progress) { | ||
| 2525 | ret = 1; | ||
| 2526 | goto out; | ||
| 2527 | } | ||
| 2528 | |||
| 2529 | key.offset = found_key.offset + 1; | ||
| 2530 | btrfs_release_path(path); | ||
| 2531 | } | ||
| 2532 | |||
| 2533 | out: | ||
| 2534 | btrfs_free_path(path); | ||
| 2535 | return ret; | ||
| 2536 | } | ||
| 2537 | |||
| 2538 | /* | ||
| 2539 | * Only creates the inode if it is: | ||
| 2540 | * 1. Not a directory | ||
| 2541 | * 2. Or a directory which was not created already due to out of order | ||
| 2542 | * directories. See did_create_dir and process_recorded_refs for details. | ||
| 2543 | */ | ||
| 2544 | static int send_create_inode_if_needed(struct send_ctx *sctx) | ||
| 2545 | { | ||
| 2546 | int ret; | ||
| 2547 | |||
| 2548 | if (S_ISDIR(sctx->cur_inode_mode)) { | ||
| 2549 | ret = did_create_dir(sctx, sctx->cur_ino); | ||
| 2550 | if (ret < 0) | ||
| 2551 | goto out; | ||
| 2552 | if (ret) { | ||
| 2553 | ret = 0; | ||
| 2554 | goto out; | ||
| 2555 | } | ||
| 2556 | } | ||
| 2557 | |||
| 2558 | ret = send_create_inode(sctx, sctx->cur_ino); | ||
| 2559 | if (ret < 0) | ||
| 2560 | goto out; | ||
| 2561 | |||
| 2562 | out: | ||
| 2563 | return ret; | ||
| 2564 | } | ||
| 2565 | |||
| 2367 | struct recorded_ref { | 2566 | struct recorded_ref { |
| 2368 | struct list_head list; | 2567 | struct list_head list; |
| 2369 | char *dir_path; | 2568 | char *dir_path; |
| @@ -2416,13 +2615,13 @@ static int record_ref(struct list_head *head, u64 dir, | |||
| 2416 | static void __free_recorded_refs(struct send_ctx *sctx, struct list_head *head) | 2615 | static void __free_recorded_refs(struct send_ctx *sctx, struct list_head *head) |
| 2417 | { | 2616 | { |
| 2418 | struct recorded_ref *cur; | 2617 | struct recorded_ref *cur; |
| 2419 | struct recorded_ref *tmp; | ||
| 2420 | 2618 | ||
| 2421 | list_for_each_entry_safe(cur, tmp, head, list) { | 2619 | while (!list_empty(head)) { |
| 2620 | cur = list_entry(head->next, struct recorded_ref, list); | ||
| 2422 | fs_path_free(sctx, cur->full_path); | 2621 | fs_path_free(sctx, cur->full_path); |
| 2622 | list_del(&cur->list); | ||
| 2423 | kfree(cur); | 2623 | kfree(cur); |
| 2424 | } | 2624 | } |
| 2425 | INIT_LIST_HEAD(head); | ||
| 2426 | } | 2625 | } |
| 2427 | 2626 | ||
| 2428 | static void free_recorded_refs(struct send_ctx *sctx) | 2627 | static void free_recorded_refs(struct send_ctx *sctx) |
| @@ -2432,7 +2631,7 @@ static void free_recorded_refs(struct send_ctx *sctx) | |||
| 2432 | } | 2631 | } |
| 2433 | 2632 | ||
| 2434 | /* | 2633 | /* |
| 2435 | * Renames/moves a file/dir to it's orphan name. Used when the first | 2634 | * Renames/moves a file/dir to its orphan name. Used when the first |
| 2436 | * ref of an unprocessed inode gets overwritten and for all non empty | 2635 | * ref of an unprocessed inode gets overwritten and for all non empty |
| 2437 | * directories. | 2636 | * directories. |
| 2438 | */ | 2637 | */ |
| @@ -2472,6 +2671,12 @@ static int can_rmdir(struct send_ctx *sctx, u64 dir, u64 send_progress) | |||
| 2472 | struct btrfs_key loc; | 2671 | struct btrfs_key loc; |
| 2473 | struct btrfs_dir_item *di; | 2672 | struct btrfs_dir_item *di; |
| 2474 | 2673 | ||
| 2674 | /* | ||
| 2675 | * Don't try to rmdir the top/root subvolume dir. | ||
| 2676 | */ | ||
| 2677 | if (dir == BTRFS_FIRST_FREE_OBJECTID) | ||
| 2678 | return 0; | ||
| 2679 | |||
| 2475 | path = alloc_path_for_send(); | 2680 | path = alloc_path_for_send(); |
| 2476 | if (!path) | 2681 | if (!path) |
| 2477 | return -ENOMEM; | 2682 | return -ENOMEM; |
| @@ -2513,160 +2718,6 @@ out: | |||
| 2513 | return ret; | 2718 | return ret; |
| 2514 | } | 2719 | } |
| 2515 | 2720 | ||
| 2516 | struct finish_unordered_dir_ctx { | ||
| 2517 | struct send_ctx *sctx; | ||
| 2518 | struct fs_path *cur_path; | ||
| 2519 | struct fs_path *dir_path; | ||
| 2520 | u64 dir_ino; | ||
| 2521 | int need_delete; | ||
| 2522 | int delete_pass; | ||
| 2523 | }; | ||
| 2524 | |||
| 2525 | int __finish_unordered_dir(int num, struct btrfs_key *di_key, | ||
| 2526 | const char *name, int name_len, | ||
| 2527 | const char *data, int data_len, | ||
| 2528 | u8 type, void *ctx) | ||
| 2529 | { | ||
| 2530 | int ret = 0; | ||
| 2531 | struct finish_unordered_dir_ctx *fctx = ctx; | ||
| 2532 | struct send_ctx *sctx = fctx->sctx; | ||
| 2533 | u64 di_gen; | ||
| 2534 | u64 di_mode; | ||
| 2535 | int is_orphan = 0; | ||
| 2536 | |||
| 2537 | if (di_key->objectid >= fctx->dir_ino) | ||
| 2538 | goto out; | ||
| 2539 | |||
| 2540 | fs_path_reset(fctx->cur_path); | ||
| 2541 | |||
| 2542 | ret = get_inode_info(sctx->send_root, di_key->objectid, | ||
| 2543 | NULL, &di_gen, &di_mode, NULL, NULL); | ||
| 2544 | if (ret < 0) | ||
| 2545 | goto out; | ||
| 2546 | |||
| 2547 | ret = is_first_ref(sctx, sctx->send_root, di_key->objectid, | ||
| 2548 | fctx->dir_ino, name, name_len); | ||
| 2549 | if (ret < 0) | ||
| 2550 | goto out; | ||
| 2551 | if (ret) { | ||
| 2552 | is_orphan = 1; | ||
| 2553 | ret = gen_unique_name(sctx, di_key->objectid, di_gen, | ||
| 2554 | fctx->cur_path); | ||
| 2555 | } else { | ||
| 2556 | ret = get_cur_path(sctx, di_key->objectid, di_gen, | ||
| 2557 | fctx->cur_path); | ||
| 2558 | } | ||
| 2559 | if (ret < 0) | ||
| 2560 | goto out; | ||
| 2561 | |||
| 2562 | ret = fs_path_add(fctx->dir_path, name, name_len); | ||
| 2563 | if (ret < 0) | ||
| 2564 | goto out; | ||
| 2565 | |||
| 2566 | if (!fctx->delete_pass) { | ||
| 2567 | if (S_ISDIR(di_mode)) { | ||
| 2568 | ret = send_rename(sctx, fctx->cur_path, | ||
| 2569 | fctx->dir_path); | ||
| 2570 | } else { | ||
| 2571 | ret = send_link(sctx, fctx->dir_path, | ||
| 2572 | fctx->cur_path); | ||
| 2573 | if (is_orphan) | ||
| 2574 | fctx->need_delete = 1; | ||
| 2575 | } | ||
| 2576 | } else if (!S_ISDIR(di_mode)) { | ||
| 2577 | ret = send_unlink(sctx, fctx->cur_path); | ||
| 2578 | } else { | ||
| 2579 | ret = 0; | ||
| 2580 | } | ||
| 2581 | |||
| 2582 | fs_path_remove(fctx->dir_path); | ||
| 2583 | |||
| 2584 | out: | ||
| 2585 | return ret; | ||
| 2586 | } | ||
| 2587 | |||
| 2588 | /* | ||
| 2589 | * Go through all dir items and see if we find refs which could not be created | ||
| 2590 | * in the past because the dir did not exist at that time. | ||
| 2591 | */ | ||
| 2592 | static int finish_outoforder_dir(struct send_ctx *sctx, u64 dir, u64 dir_gen) | ||
| 2593 | { | ||
| 2594 | int ret = 0; | ||
| 2595 | struct btrfs_path *path = NULL; | ||
| 2596 | struct btrfs_key key; | ||
| 2597 | struct btrfs_key found_key; | ||
| 2598 | struct extent_buffer *eb; | ||
| 2599 | struct finish_unordered_dir_ctx fctx; | ||
| 2600 | int slot; | ||
| 2601 | |||
| 2602 | path = alloc_path_for_send(); | ||
| 2603 | if (!path) { | ||
| 2604 | ret = -ENOMEM; | ||
| 2605 | goto out; | ||
| 2606 | } | ||
| 2607 | |||
| 2608 | memset(&fctx, 0, sizeof(fctx)); | ||
| 2609 | fctx.sctx = sctx; | ||
| 2610 | fctx.cur_path = fs_path_alloc(sctx); | ||
| 2611 | fctx.dir_path = fs_path_alloc(sctx); | ||
| 2612 | if (!fctx.cur_path || !fctx.dir_path) { | ||
| 2613 | ret = -ENOMEM; | ||
| 2614 | goto out; | ||
| 2615 | } | ||
| 2616 | fctx.dir_ino = dir; | ||
| 2617 | |||
| 2618 | ret = get_cur_path(sctx, dir, dir_gen, fctx.dir_path); | ||
| 2619 | if (ret < 0) | ||
| 2620 | goto out; | ||
| 2621 | |||
| 2622 | /* | ||
| 2623 | * We do two passes. The first links in the new refs and the second | ||
| 2624 | * deletes orphans if required. Deletion of orphans is not required for | ||
| 2625 | * directory inodes, as we always have only one ref and use rename | ||
| 2626 | * instead of link for those. | ||
| 2627 | */ | ||
| 2628 | |||
| 2629 | again: | ||
| 2630 | key.objectid = dir; | ||
| 2631 | key.type = BTRFS_DIR_ITEM_KEY; | ||
| 2632 | key.offset = 0; | ||
| 2633 | while (1) { | ||
| 2634 | ret = btrfs_search_slot_for_read(sctx->send_root, &key, path, | ||
| 2635 | 1, 0); | ||
| 2636 | if (ret < 0) | ||
| 2637 | goto out; | ||
| 2638 | eb = path->nodes[0]; | ||
| 2639 | slot = path->slots[0]; | ||
| 2640 | btrfs_item_key_to_cpu(eb, &found_key, slot); | ||
| 2641 | |||
| 2642 | if (found_key.objectid != key.objectid || | ||
| 2643 | found_key.type != key.type) { | ||
| 2644 | btrfs_release_path(path); | ||
| 2645 | break; | ||
| 2646 | } | ||
| 2647 | |||
| 2648 | ret = iterate_dir_item(sctx, sctx->send_root, path, | ||
| 2649 | &found_key, __finish_unordered_dir, | ||
| 2650 | &fctx); | ||
| 2651 | if (ret < 0) | ||
| 2652 | goto out; | ||
| 2653 | |||
| 2654 | key.offset = found_key.offset + 1; | ||
| 2655 | btrfs_release_path(path); | ||
| 2656 | } | ||
| 2657 | |||
| 2658 | if (!fctx.delete_pass && fctx.need_delete) { | ||
| 2659 | fctx.delete_pass = 1; | ||
| 2660 | goto again; | ||
| 2661 | } | ||
| 2662 | |||
| 2663 | out: | ||
| 2664 | btrfs_free_path(path); | ||
| 2665 | fs_path_free(sctx, fctx.cur_path); | ||
| 2666 | fs_path_free(sctx, fctx.dir_path); | ||
| 2667 | return ret; | ||
| 2668 | } | ||
| 2669 | |||
| 2670 | /* | 2721 | /* |
| 2671 | * This does all the move/link/unlink/rmdir magic. | 2722 | * This does all the move/link/unlink/rmdir magic. |
| 2672 | */ | 2723 | */ |
| @@ -2674,6 +2725,7 @@ static int process_recorded_refs(struct send_ctx *sctx) | |||
| 2674 | { | 2725 | { |
| 2675 | int ret = 0; | 2726 | int ret = 0; |
| 2676 | struct recorded_ref *cur; | 2727 | struct recorded_ref *cur; |
| 2728 | struct recorded_ref *cur2; | ||
| 2677 | struct ulist *check_dirs = NULL; | 2729 | struct ulist *check_dirs = NULL; |
| 2678 | struct ulist_iterator uit; | 2730 | struct ulist_iterator uit; |
| 2679 | struct ulist_node *un; | 2731 | struct ulist_node *un; |
| @@ -2685,6 +2737,12 @@ static int process_recorded_refs(struct send_ctx *sctx) | |||
| 2685 | 2737 | ||
| 2686 | verbose_printk("btrfs: process_recorded_refs %llu\n", sctx->cur_ino); | 2738 | verbose_printk("btrfs: process_recorded_refs %llu\n", sctx->cur_ino); |
| 2687 | 2739 | ||
| 2740 | /* | ||
| 2741 | * This should never happen as the root dir always has the same ref | ||
| 2742 | * which is always '..' | ||
| 2743 | */ | ||
| 2744 | BUG_ON(sctx->cur_ino <= BTRFS_FIRST_FREE_OBJECTID); | ||
| 2745 | |||
| 2688 | valid_path = fs_path_alloc(sctx); | 2746 | valid_path = fs_path_alloc(sctx); |
| 2689 | if (!valid_path) { | 2747 | if (!valid_path) { |
| 2690 | ret = -ENOMEM; | 2748 | ret = -ENOMEM; |
| @@ -2731,6 +2789,46 @@ verbose_printk("btrfs: process_recorded_refs %llu\n", sctx->cur_ino); | |||
| 2731 | 2789 | ||
| 2732 | list_for_each_entry(cur, &sctx->new_refs, list) { | 2790 | list_for_each_entry(cur, &sctx->new_refs, list) { |
| 2733 | /* | 2791 | /* |
| 2792 | * We may have refs where the parent directory does not exist | ||
| 2793 | * yet. This happens if the parent directories inum is higher | ||
| 2794 | * the the current inum. To handle this case, we create the | ||
| 2795 | * parent directory out of order. But we need to check if this | ||
| 2796 | * did already happen before due to other refs in the same dir. | ||
| 2797 | */ | ||
| 2798 | ret = get_cur_inode_state(sctx, cur->dir, cur->dir_gen); | ||
| 2799 | if (ret < 0) | ||
| 2800 | goto out; | ||
| 2801 | if (ret == inode_state_will_create) { | ||
| 2802 | ret = 0; | ||
| 2803 | /* | ||
| 2804 | * First check if any of the current inodes refs did | ||
| 2805 | * already create the dir. | ||
| 2806 | */ | ||
| 2807 | list_for_each_entry(cur2, &sctx->new_refs, list) { | ||
| 2808 | if (cur == cur2) | ||
| 2809 | break; | ||
| 2810 | if (cur2->dir == cur->dir) { | ||
| 2811 | ret = 1; | ||
| 2812 | break; | ||
| 2813 | } | ||
| 2814 | } | ||
| 2815 | |||
| 2816 | /* | ||
| 2817 | * If that did not happen, check if a previous inode | ||
| 2818 | * did already create the dir. | ||
| 2819 | */ | ||
| 2820 | if (!ret) | ||
| 2821 | ret = did_create_dir(sctx, cur->dir); | ||
| 2822 | if (ret < 0) | ||
| 2823 | goto out; | ||
| 2824 | if (!ret) { | ||
| 2825 | ret = send_create_inode(sctx, cur->dir); | ||
| 2826 | if (ret < 0) | ||
| 2827 | goto out; | ||
| 2828 | } | ||
| 2829 | } | ||
| 2830 | |||
| 2831 | /* | ||
| 2734 | * Check if this new ref would overwrite the first ref of | 2832 | * Check if this new ref would overwrite the first ref of |
| 2735 | * another unprocessed inode. If yes, orphanize the | 2833 | * another unprocessed inode. If yes, orphanize the |
| 2736 | * overwritten inode. If we find an overwritten ref that is | 2834 | * overwritten inode. If we find an overwritten ref that is |
| @@ -2764,7 +2862,7 @@ verbose_printk("btrfs: process_recorded_refs %llu\n", sctx->cur_ino); | |||
| 2764 | * inode, move it and update valid_path. If not, link or move | 2862 | * inode, move it and update valid_path. If not, link or move |
| 2765 | * it depending on the inode mode. | 2863 | * it depending on the inode mode. |
| 2766 | */ | 2864 | */ |
| 2767 | if (is_orphan && !sctx->cur_inode_first_ref_orphan) { | 2865 | if (is_orphan) { |
| 2768 | ret = send_rename(sctx, valid_path, cur->full_path); | 2866 | ret = send_rename(sctx, valid_path, cur->full_path); |
| 2769 | if (ret < 0) | 2867 | if (ret < 0) |
| 2770 | goto out; | 2868 | goto out; |
| @@ -2827,6 +2925,17 @@ verbose_printk("btrfs: process_recorded_refs %llu\n", sctx->cur_ino); | |||
| 2827 | if (ret < 0) | 2925 | if (ret < 0) |
| 2828 | goto out; | 2926 | goto out; |
| 2829 | } | 2927 | } |
| 2928 | } else if (S_ISDIR(sctx->cur_inode_mode) && | ||
| 2929 | !list_empty(&sctx->deleted_refs)) { | ||
| 2930 | /* | ||
| 2931 | * We have a moved dir. Add the old parent to check_dirs | ||
| 2932 | */ | ||
| 2933 | cur = list_entry(sctx->deleted_refs.next, struct recorded_ref, | ||
| 2934 | list); | ||
| 2935 | ret = ulist_add(check_dirs, cur->dir, cur->dir_gen, | ||
| 2936 | GFP_NOFS); | ||
| 2937 | if (ret < 0) | ||
| 2938 | goto out; | ||
| 2830 | } else if (!S_ISDIR(sctx->cur_inode_mode)) { | 2939 | } else if (!S_ISDIR(sctx->cur_inode_mode)) { |
| 2831 | /* | 2940 | /* |
| 2832 | * We have a non dir inode. Go through all deleted refs and | 2941 | * We have a non dir inode. Go through all deleted refs and |
| @@ -2840,35 +2949,9 @@ verbose_printk("btrfs: process_recorded_refs %llu\n", sctx->cur_ino); | |||
| 2840 | if (ret < 0) | 2949 | if (ret < 0) |
| 2841 | goto out; | 2950 | goto out; |
| 2842 | if (!ret) { | 2951 | if (!ret) { |
| 2843 | /* | 2952 | ret = send_unlink(sctx, cur->full_path); |
| 2844 | * In case the inode was moved to a directory | 2953 | if (ret < 0) |
| 2845 | * that was not created yet (see | 2954 | goto out; |
| 2846 | * __record_new_ref), we can not unlink the ref | ||
| 2847 | * as it will be needed later when the parent | ||
| 2848 | * directory is created, so that we can move in | ||
| 2849 | * the inode to the new dir. | ||
| 2850 | */ | ||
| 2851 | if (!is_orphan && | ||
| 2852 | sctx->cur_inode_first_ref_orphan) { | ||
| 2853 | ret = orphanize_inode(sctx, | ||
| 2854 | sctx->cur_ino, | ||
| 2855 | sctx->cur_inode_gen, | ||
| 2856 | cur->full_path); | ||
| 2857 | if (ret < 0) | ||
| 2858 | goto out; | ||
| 2859 | ret = gen_unique_name(sctx, | ||
| 2860 | sctx->cur_ino, | ||
| 2861 | sctx->cur_inode_gen, | ||
| 2862 | valid_path); | ||
| 2863 | if (ret < 0) | ||
| 2864 | goto out; | ||
| 2865 | is_orphan = 1; | ||
| 2866 | |||
| 2867 | } else { | ||
| 2868 | ret = send_unlink(sctx, cur->full_path); | ||
| 2869 | if (ret < 0) | ||
| 2870 | goto out; | ||
| 2871 | } | ||
| 2872 | } | 2955 | } |
| 2873 | ret = ulist_add(check_dirs, cur->dir, cur->dir_gen, | 2956 | ret = ulist_add(check_dirs, cur->dir, cur->dir_gen, |
| 2874 | GFP_NOFS); | 2957 | GFP_NOFS); |
| @@ -2880,12 +2963,11 @@ verbose_printk("btrfs: process_recorded_refs %llu\n", sctx->cur_ino); | |||
| 2880 | * If the inode is still orphan, unlink the orphan. This may | 2963 | * If the inode is still orphan, unlink the orphan. This may |
| 2881 | * happen when a previous inode did overwrite the first ref | 2964 | * happen when a previous inode did overwrite the first ref |
| 2882 | * of this inode and no new refs were added for the current | 2965 | * of this inode and no new refs were added for the current |
| 2883 | * inode. | 2966 | * inode. Unlinking does not mean that the inode is deleted in |
| 2884 | * We can however not delete the orphan in case the inode relies | 2967 | * all cases. There may still be links to this inode in other |
| 2885 | * in a directory that was not created yet (see | 2968 | * places. |
| 2886 | * __record_new_ref) | ||
| 2887 | */ | 2969 | */ |
| 2888 | if (is_orphan && !sctx->cur_inode_first_ref_orphan) { | 2970 | if (is_orphan) { |
| 2889 | ret = send_unlink(sctx, valid_path); | 2971 | ret = send_unlink(sctx, valid_path); |
| 2890 | if (ret < 0) | 2972 | if (ret < 0) |
| 2891 | goto out; | 2973 | goto out; |
| @@ -2900,6 +2982,11 @@ verbose_printk("btrfs: process_recorded_refs %llu\n", sctx->cur_ino); | |||
| 2900 | */ | 2982 | */ |
| 2901 | ULIST_ITER_INIT(&uit); | 2983 | ULIST_ITER_INIT(&uit); |
| 2902 | while ((un = ulist_next(check_dirs, &uit))) { | 2984 | while ((un = ulist_next(check_dirs, &uit))) { |
| 2985 | /* | ||
| 2986 | * In case we had refs into dirs that were not processed yet, | ||
| 2987 | * we don't need to do the utime and rmdir logic for these dirs. | ||
| 2988 | * The dir will be processed later. | ||
| 2989 | */ | ||
| 2903 | if (un->val > sctx->cur_ino) | 2990 | if (un->val > sctx->cur_ino) |
| 2904 | continue; | 2991 | continue; |
| 2905 | 2992 | ||
| @@ -2929,25 +3016,6 @@ verbose_printk("btrfs: process_recorded_refs %llu\n", sctx->cur_ino); | |||
| 2929 | } | 3016 | } |
| 2930 | } | 3017 | } |
| 2931 | 3018 | ||
| 2932 | /* | ||
| 2933 | * Current inode is now at it's new position, so we must increase | ||
| 2934 | * send_progress | ||
| 2935 | */ | ||
| 2936 | sctx->send_progress = sctx->cur_ino + 1; | ||
| 2937 | |||
| 2938 | /* | ||
| 2939 | * We may have a directory here that has pending refs which could not | ||
| 2940 | * be created before (because the dir did not exist before, see | ||
| 2941 | * __record_new_ref). finish_outoforder_dir will link/move the pending | ||
| 2942 | * refs. | ||
| 2943 | */ | ||
| 2944 | if (S_ISDIR(sctx->cur_inode_mode) && sctx->cur_inode_new) { | ||
| 2945 | ret = finish_outoforder_dir(sctx, sctx->cur_ino, | ||
| 2946 | sctx->cur_inode_gen); | ||
| 2947 | if (ret < 0) | ||
| 2948 | goto out; | ||
| 2949 | } | ||
| 2950 | |||
| 2951 | ret = 0; | 3019 | ret = 0; |
| 2952 | 3020 | ||
| 2953 | out: | 3021 | out: |
| @@ -2971,34 +3039,9 @@ static int __record_new_ref(int num, u64 dir, int index, | |||
| 2971 | return -ENOMEM; | 3039 | return -ENOMEM; |
| 2972 | 3040 | ||
| 2973 | ret = get_inode_info(sctx->send_root, dir, NULL, &gen, NULL, NULL, | 3041 | ret = get_inode_info(sctx->send_root, dir, NULL, &gen, NULL, NULL, |
| 2974 | NULL); | 3042 | NULL, NULL); |
| 2975 | if (ret < 0) | ||
| 2976 | goto out; | ||
| 2977 | |||
| 2978 | /* | ||
| 2979 | * The parent may be non-existent at this point in time. This happens | ||
| 2980 | * if the ino of the parent dir is higher then the current ino. In this | ||
| 2981 | * case, we can not process this ref until the parent dir is finally | ||
| 2982 | * created. If we reach the parent dir later, process_recorded_refs | ||
| 2983 | * will go through all dir items and process the refs that could not be | ||
| 2984 | * processed before. In case this is the first ref, we set | ||
| 2985 | * cur_inode_first_ref_orphan to 1 to inform process_recorded_refs to | ||
| 2986 | * keep an orphan of the inode so that it later can be used for | ||
| 2987 | * link/move | ||
| 2988 | */ | ||
| 2989 | ret = is_inode_existent(sctx, dir, gen); | ||
| 2990 | if (ret < 0) | 3043 | if (ret < 0) |
| 2991 | goto out; | 3044 | goto out; |
| 2992 | if (!ret) { | ||
| 2993 | ret = is_first_ref(sctx, sctx->send_root, sctx->cur_ino, dir, | ||
| 2994 | name->start, fs_path_len(name)); | ||
| 2995 | if (ret < 0) | ||
| 2996 | goto out; | ||
| 2997 | if (ret) | ||
| 2998 | sctx->cur_inode_first_ref_orphan = 1; | ||
| 2999 | ret = 0; | ||
| 3000 | goto out; | ||
| 3001 | } | ||
| 3002 | 3045 | ||
| 3003 | ret = get_cur_path(sctx, dir, gen, p); | 3046 | ret = get_cur_path(sctx, dir, gen, p); |
| 3004 | if (ret < 0) | 3047 | if (ret < 0) |
| @@ -3029,7 +3072,7 @@ static int __record_deleted_ref(int num, u64 dir, int index, | |||
| 3029 | return -ENOMEM; | 3072 | return -ENOMEM; |
| 3030 | 3073 | ||
| 3031 | ret = get_inode_info(sctx->parent_root, dir, NULL, &gen, NULL, NULL, | 3074 | ret = get_inode_info(sctx->parent_root, dir, NULL, &gen, NULL, NULL, |
| 3032 | NULL); | 3075 | NULL, NULL); |
| 3033 | if (ret < 0) | 3076 | if (ret < 0) |
| 3034 | goto out; | 3077 | goto out; |
| 3035 | 3078 | ||
| @@ -3206,33 +3249,29 @@ static int process_all_refs(struct send_ctx *sctx, | |||
| 3206 | key.offset = 0; | 3249 | key.offset = 0; |
| 3207 | while (1) { | 3250 | while (1) { |
| 3208 | ret = btrfs_search_slot_for_read(root, &key, path, 1, 0); | 3251 | ret = btrfs_search_slot_for_read(root, &key, path, 1, 0); |
| 3209 | if (ret < 0) { | 3252 | if (ret < 0) |
| 3210 | btrfs_release_path(path); | ||
| 3211 | goto out; | 3253 | goto out; |
| 3212 | } | 3254 | if (ret) |
| 3213 | if (ret) { | ||
| 3214 | btrfs_release_path(path); | ||
| 3215 | break; | 3255 | break; |
| 3216 | } | ||
| 3217 | 3256 | ||
| 3218 | eb = path->nodes[0]; | 3257 | eb = path->nodes[0]; |
| 3219 | slot = path->slots[0]; | 3258 | slot = path->slots[0]; |
| 3220 | btrfs_item_key_to_cpu(eb, &found_key, slot); | 3259 | btrfs_item_key_to_cpu(eb, &found_key, slot); |
| 3221 | 3260 | ||
| 3222 | if (found_key.objectid != key.objectid || | 3261 | if (found_key.objectid != key.objectid || |
| 3223 | found_key.type != key.type) { | 3262 | (found_key.type != BTRFS_INODE_REF_KEY && |
| 3224 | btrfs_release_path(path); | 3263 | found_key.type != BTRFS_INODE_EXTREF_KEY)) |
| 3225 | break; | 3264 | break; |
| 3226 | } | ||
| 3227 | 3265 | ||
| 3228 | ret = iterate_inode_ref(sctx, sctx->parent_root, path, | 3266 | ret = iterate_inode_ref(sctx, root, path, &found_key, 0, cb, |
| 3229 | &found_key, 0, cb, sctx); | 3267 | sctx); |
| 3230 | btrfs_release_path(path); | 3268 | btrfs_release_path(path); |
| 3231 | if (ret < 0) | 3269 | if (ret < 0) |
| 3232 | goto out; | 3270 | goto out; |
| 3233 | 3271 | ||
| 3234 | key.offset = found_key.offset + 1; | 3272 | key.offset = found_key.offset + 1; |
| 3235 | } | 3273 | } |
| 3274 | btrfs_release_path(path); | ||
| 3236 | 3275 | ||
| 3237 | ret = process_recorded_refs(sctx); | 3276 | ret = process_recorded_refs(sctx); |
| 3238 | 3277 | ||
| @@ -3555,7 +3594,7 @@ static int send_write(struct send_ctx *sctx, u64 offset, u32 len) | |||
| 3555 | int ret = 0; | 3594 | int ret = 0; |
| 3556 | struct fs_path *p; | 3595 | struct fs_path *p; |
| 3557 | loff_t pos = offset; | 3596 | loff_t pos = offset; |
| 3558 | int readed = 0; | 3597 | int num_read = 0; |
| 3559 | mm_segment_t old_fs; | 3598 | mm_segment_t old_fs; |
| 3560 | 3599 | ||
| 3561 | p = fs_path_alloc(sctx); | 3600 | p = fs_path_alloc(sctx); |
| @@ -3580,8 +3619,8 @@ verbose_printk("btrfs: send_write offset=%llu, len=%d\n", offset, len); | |||
| 3580 | ret = vfs_read(sctx->cur_inode_filp, sctx->read_buf, len, &pos); | 3619 | ret = vfs_read(sctx->cur_inode_filp, sctx->read_buf, len, &pos); |
| 3581 | if (ret < 0) | 3620 | if (ret < 0) |
| 3582 | goto out; | 3621 | goto out; |
| 3583 | readed = ret; | 3622 | num_read = ret; |
| 3584 | if (!readed) | 3623 | if (!num_read) |
| 3585 | goto out; | 3624 | goto out; |
| 3586 | 3625 | ||
| 3587 | ret = begin_cmd(sctx, BTRFS_SEND_C_WRITE); | 3626 | ret = begin_cmd(sctx, BTRFS_SEND_C_WRITE); |
| @@ -3594,7 +3633,7 @@ verbose_printk("btrfs: send_write offset=%llu, len=%d\n", offset, len); | |||
| 3594 | 3633 | ||
| 3595 | TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p); | 3634 | TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p); |
| 3596 | TLV_PUT_U64(sctx, BTRFS_SEND_A_FILE_OFFSET, offset); | 3635 | TLV_PUT_U64(sctx, BTRFS_SEND_A_FILE_OFFSET, offset); |
| 3597 | TLV_PUT(sctx, BTRFS_SEND_A_DATA, sctx->read_buf, readed); | 3636 | TLV_PUT(sctx, BTRFS_SEND_A_DATA, sctx->read_buf, num_read); |
| 3598 | 3637 | ||
| 3599 | ret = send_cmd(sctx); | 3638 | ret = send_cmd(sctx); |
| 3600 | 3639 | ||
| @@ -3604,7 +3643,7 @@ out: | |||
| 3604 | set_fs(old_fs); | 3643 | set_fs(old_fs); |
| 3605 | if (ret < 0) | 3644 | if (ret < 0) |
| 3606 | return ret; | 3645 | return ret; |
| 3607 | return readed; | 3646 | return num_read; |
| 3608 | } | 3647 | } |
| 3609 | 3648 | ||
| 3610 | /* | 3649 | /* |
| @@ -3615,7 +3654,6 @@ static int send_clone(struct send_ctx *sctx, | |||
| 3615 | struct clone_root *clone_root) | 3654 | struct clone_root *clone_root) |
| 3616 | { | 3655 | { |
| 3617 | int ret = 0; | 3656 | int ret = 0; |
| 3618 | struct btrfs_root *clone_root2 = clone_root->root; | ||
| 3619 | struct fs_path *p; | 3657 | struct fs_path *p; |
| 3620 | u64 gen; | 3658 | u64 gen; |
| 3621 | 3659 | ||
| @@ -3640,22 +3678,23 @@ verbose_printk("btrfs: send_clone offset=%llu, len=%d, clone_root=%llu, " | |||
| 3640 | TLV_PUT_U64(sctx, BTRFS_SEND_A_CLONE_LEN, len); | 3678 | TLV_PUT_U64(sctx, BTRFS_SEND_A_CLONE_LEN, len); |
| 3641 | TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p); | 3679 | TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p); |
| 3642 | 3680 | ||
| 3643 | if (clone_root2 == sctx->send_root) { | 3681 | if (clone_root->root == sctx->send_root) { |
| 3644 | ret = get_inode_info(sctx->send_root, clone_root->ino, NULL, | 3682 | ret = get_inode_info(sctx->send_root, clone_root->ino, NULL, |
| 3645 | &gen, NULL, NULL, NULL); | 3683 | &gen, NULL, NULL, NULL, NULL); |
| 3646 | if (ret < 0) | 3684 | if (ret < 0) |
| 3647 | goto out; | 3685 | goto out; |
| 3648 | ret = get_cur_path(sctx, clone_root->ino, gen, p); | 3686 | ret = get_cur_path(sctx, clone_root->ino, gen, p); |
| 3649 | } else { | 3687 | } else { |
| 3650 | ret = get_inode_path(sctx, clone_root2, clone_root->ino, p); | 3688 | ret = get_inode_path(sctx, clone_root->root, |
| 3689 | clone_root->ino, p); | ||
| 3651 | } | 3690 | } |
| 3652 | if (ret < 0) | 3691 | if (ret < 0) |
| 3653 | goto out; | 3692 | goto out; |
| 3654 | 3693 | ||
| 3655 | TLV_PUT_UUID(sctx, BTRFS_SEND_A_CLONE_UUID, | 3694 | TLV_PUT_UUID(sctx, BTRFS_SEND_A_CLONE_UUID, |
| 3656 | clone_root2->root_item.uuid); | 3695 | clone_root->root->root_item.uuid); |
| 3657 | TLV_PUT_U64(sctx, BTRFS_SEND_A_CLONE_CTRANSID, | 3696 | TLV_PUT_U64(sctx, BTRFS_SEND_A_CLONE_CTRANSID, |
| 3658 | clone_root2->root_item.ctransid); | 3697 | clone_root->root->root_item.ctransid); |
| 3659 | TLV_PUT_PATH(sctx, BTRFS_SEND_A_CLONE_PATH, p); | 3698 | TLV_PUT_PATH(sctx, BTRFS_SEND_A_CLONE_PATH, p); |
| 3660 | TLV_PUT_U64(sctx, BTRFS_SEND_A_CLONE_OFFSET, | 3699 | TLV_PUT_U64(sctx, BTRFS_SEND_A_CLONE_OFFSET, |
| 3661 | clone_root->offset); | 3700 | clone_root->offset); |
| @@ -3684,10 +3723,17 @@ static int send_write_or_clone(struct send_ctx *sctx, | |||
| 3684 | ei = btrfs_item_ptr(path->nodes[0], path->slots[0], | 3723 | ei = btrfs_item_ptr(path->nodes[0], path->slots[0], |
| 3685 | struct btrfs_file_extent_item); | 3724 | struct btrfs_file_extent_item); |
| 3686 | type = btrfs_file_extent_type(path->nodes[0], ei); | 3725 | type = btrfs_file_extent_type(path->nodes[0], ei); |
| 3687 | if (type == BTRFS_FILE_EXTENT_INLINE) | 3726 | if (type == BTRFS_FILE_EXTENT_INLINE) { |
| 3688 | len = btrfs_file_extent_inline_len(path->nodes[0], ei); | 3727 | len = btrfs_file_extent_inline_len(path->nodes[0], ei); |
| 3689 | else | 3728 | /* |
| 3729 | * it is possible the inline item won't cover the whole page, | ||
| 3730 | * but there may be items after this page. Make | ||
| 3731 | * sure to send the whole thing | ||
| 3732 | */ | ||
| 3733 | len = PAGE_CACHE_ALIGN(len); | ||
| 3734 | } else { | ||
| 3690 | len = btrfs_file_extent_num_bytes(path->nodes[0], ei); | 3735 | len = btrfs_file_extent_num_bytes(path->nodes[0], ei); |
| 3736 | } | ||
| 3691 | 3737 | ||
| 3692 | if (offset + len > sctx->cur_inode_size) | 3738 | if (offset + len > sctx->cur_inode_size) |
| 3693 | len = sctx->cur_inode_size - offset; | 3739 | len = sctx->cur_inode_size - offset; |
| @@ -3735,6 +3781,8 @@ static int is_extent_unchanged(struct send_ctx *sctx, | |||
| 3735 | u64 left_offset_fixed; | 3781 | u64 left_offset_fixed; |
| 3736 | u64 left_len; | 3782 | u64 left_len; |
| 3737 | u64 right_len; | 3783 | u64 right_len; |
| 3784 | u64 left_gen; | ||
| 3785 | u64 right_gen; | ||
| 3738 | u8 left_type; | 3786 | u8 left_type; |
| 3739 | u8 right_type; | 3787 | u8 right_type; |
| 3740 | 3788 | ||
| @@ -3744,17 +3792,17 @@ static int is_extent_unchanged(struct send_ctx *sctx, | |||
| 3744 | 3792 | ||
| 3745 | eb = left_path->nodes[0]; | 3793 | eb = left_path->nodes[0]; |
| 3746 | slot = left_path->slots[0]; | 3794 | slot = left_path->slots[0]; |
| 3747 | |||
| 3748 | ei = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item); | 3795 | ei = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item); |
| 3749 | left_type = btrfs_file_extent_type(eb, ei); | 3796 | left_type = btrfs_file_extent_type(eb, ei); |
| 3750 | left_disknr = btrfs_file_extent_disk_bytenr(eb, ei); | ||
| 3751 | left_len = btrfs_file_extent_num_bytes(eb, ei); | ||
| 3752 | left_offset = btrfs_file_extent_offset(eb, ei); | ||
| 3753 | 3797 | ||
| 3754 | if (left_type != BTRFS_FILE_EXTENT_REG) { | 3798 | if (left_type != BTRFS_FILE_EXTENT_REG) { |
| 3755 | ret = 0; | 3799 | ret = 0; |
| 3756 | goto out; | 3800 | goto out; |
| 3757 | } | 3801 | } |
| 3802 | left_disknr = btrfs_file_extent_disk_bytenr(eb, ei); | ||
| 3803 | left_len = btrfs_file_extent_num_bytes(eb, ei); | ||
| 3804 | left_offset = btrfs_file_extent_offset(eb, ei); | ||
| 3805 | left_gen = btrfs_file_extent_generation(eb, ei); | ||
| 3758 | 3806 | ||
| 3759 | /* | 3807 | /* |
| 3760 | * Following comments will refer to these graphics. L is the left | 3808 | * Following comments will refer to these graphics. L is the left |
| @@ -3810,6 +3858,7 @@ static int is_extent_unchanged(struct send_ctx *sctx, | |||
| 3810 | right_disknr = btrfs_file_extent_disk_bytenr(eb, ei); | 3858 | right_disknr = btrfs_file_extent_disk_bytenr(eb, ei); |
| 3811 | right_len = btrfs_file_extent_num_bytes(eb, ei); | 3859 | right_len = btrfs_file_extent_num_bytes(eb, ei); |
| 3812 | right_offset = btrfs_file_extent_offset(eb, ei); | 3860 | right_offset = btrfs_file_extent_offset(eb, ei); |
| 3861 | right_gen = btrfs_file_extent_generation(eb, ei); | ||
| 3813 | 3862 | ||
| 3814 | if (right_type != BTRFS_FILE_EXTENT_REG) { | 3863 | if (right_type != BTRFS_FILE_EXTENT_REG) { |
| 3815 | ret = 0; | 3864 | ret = 0; |
| @@ -3820,7 +3869,7 @@ static int is_extent_unchanged(struct send_ctx *sctx, | |||
| 3820 | * Are we at extent 8? If yes, we know the extent is changed. | 3869 | * Are we at extent 8? If yes, we know the extent is changed. |
| 3821 | * This may only happen on the first iteration. | 3870 | * This may only happen on the first iteration. |
| 3822 | */ | 3871 | */ |
| 3823 | if (found_key.offset + right_len < ekey->offset) { | 3872 | if (found_key.offset + right_len <= ekey->offset) { |
| 3824 | ret = 0; | 3873 | ret = 0; |
| 3825 | goto out; | 3874 | goto out; |
| 3826 | } | 3875 | } |
| @@ -3837,8 +3886,9 @@ static int is_extent_unchanged(struct send_ctx *sctx, | |||
| 3837 | /* | 3886 | /* |
| 3838 | * Check if we have the same extent. | 3887 | * Check if we have the same extent. |
| 3839 | */ | 3888 | */ |
| 3840 | if (left_disknr + left_offset_fixed != | 3889 | if (left_disknr != right_disknr || |
| 3841 | right_disknr + right_offset) { | 3890 | left_offset_fixed != right_offset || |
| 3891 | left_gen != right_gen) { | ||
| 3842 | ret = 0; | 3892 | ret = 0; |
| 3843 | goto out; | 3893 | goto out; |
| 3844 | } | 3894 | } |
| @@ -3971,12 +4021,21 @@ static int process_recorded_refs_if_needed(struct send_ctx *sctx, int at_end) | |||
| 3971 | if (sctx->cur_ino == 0) | 4021 | if (sctx->cur_ino == 0) |
| 3972 | goto out; | 4022 | goto out; |
| 3973 | if (!at_end && sctx->cur_ino == sctx->cmp_key->objectid && | 4023 | if (!at_end && sctx->cur_ino == sctx->cmp_key->objectid && |
| 3974 | sctx->cmp_key->type <= BTRFS_INODE_REF_KEY) | 4024 | sctx->cmp_key->type <= BTRFS_INODE_EXTREF_KEY) |
| 3975 | goto out; | 4025 | goto out; |
| 3976 | if (list_empty(&sctx->new_refs) && list_empty(&sctx->deleted_refs)) | 4026 | if (list_empty(&sctx->new_refs) && list_empty(&sctx->deleted_refs)) |
| 3977 | goto out; | 4027 | goto out; |
| 3978 | 4028 | ||
| 3979 | ret = process_recorded_refs(sctx); | 4029 | ret = process_recorded_refs(sctx); |
| 4030 | if (ret < 0) | ||
| 4031 | goto out; | ||
| 4032 | |||
| 4033 | /* | ||
| 4034 | * We have processed the refs and thus need to advance send_progress. | ||
| 4035 | * Now, calls to get_cur_xxx will take the updated refs of the current | ||
| 4036 | * inode into account. | ||
| 4037 | */ | ||
| 4038 | sctx->send_progress = sctx->cur_ino + 1; | ||
| 3980 | 4039 | ||
| 3981 | out: | 4040 | out: |
| 3982 | return ret; | 4041 | return ret; |
| @@ -4004,26 +4063,25 @@ static int finish_inode_if_needed(struct send_ctx *sctx, int at_end) | |||
| 4004 | goto out; | 4063 | goto out; |
| 4005 | 4064 | ||
| 4006 | ret = get_inode_info(sctx->send_root, sctx->cur_ino, NULL, NULL, | 4065 | ret = get_inode_info(sctx->send_root, sctx->cur_ino, NULL, NULL, |
| 4007 | &left_mode, &left_uid, &left_gid); | 4066 | &left_mode, &left_uid, &left_gid, NULL); |
| 4008 | if (ret < 0) | 4067 | if (ret < 0) |
| 4009 | goto out; | 4068 | goto out; |
| 4010 | 4069 | ||
| 4011 | if (!S_ISLNK(sctx->cur_inode_mode)) { | 4070 | if (!sctx->parent_root || sctx->cur_inode_new) { |
| 4012 | if (!sctx->parent_root || sctx->cur_inode_new) { | 4071 | need_chown = 1; |
| 4072 | if (!S_ISLNK(sctx->cur_inode_mode)) | ||
| 4013 | need_chmod = 1; | 4073 | need_chmod = 1; |
| 4014 | need_chown = 1; | 4074 | } else { |
| 4015 | } else { | 4075 | ret = get_inode_info(sctx->parent_root, sctx->cur_ino, |
| 4016 | ret = get_inode_info(sctx->parent_root, sctx->cur_ino, | 4076 | NULL, NULL, &right_mode, &right_uid, |
| 4017 | NULL, NULL, &right_mode, &right_uid, | 4077 | &right_gid, NULL); |
| 4018 | &right_gid); | 4078 | if (ret < 0) |
| 4019 | if (ret < 0) | 4079 | goto out; |
| 4020 | goto out; | ||
| 4021 | 4080 | ||
| 4022 | if (left_uid != right_uid || left_gid != right_gid) | 4081 | if (left_uid != right_uid || left_gid != right_gid) |
| 4023 | need_chown = 1; | 4082 | need_chown = 1; |
| 4024 | if (left_mode != right_mode) | 4083 | if (!S_ISLNK(sctx->cur_inode_mode) && left_mode != right_mode) |
| 4025 | need_chmod = 1; | 4084 | need_chmod = 1; |
| 4026 | } | ||
| 4027 | } | 4085 | } |
| 4028 | 4086 | ||
| 4029 | if (S_ISREG(sctx->cur_inode_mode)) { | 4087 | if (S_ISREG(sctx->cur_inode_mode)) { |
| @@ -4074,7 +4132,12 @@ static int changed_inode(struct send_ctx *sctx, | |||
| 4074 | 4132 | ||
| 4075 | sctx->cur_ino = key->objectid; | 4133 | sctx->cur_ino = key->objectid; |
| 4076 | sctx->cur_inode_new_gen = 0; | 4134 | sctx->cur_inode_new_gen = 0; |
| 4077 | sctx->cur_inode_first_ref_orphan = 0; | 4135 | |
| 4136 | /* | ||
| 4137 | * Set send_progress to current inode. This will tell all get_cur_xxx | ||
| 4138 | * functions that the current inode's refs are not updated yet. Later, | ||
| 4139 | * when process_recorded_refs is finished, it is set to cur_ino + 1. | ||
| 4140 | */ | ||
| 4078 | sctx->send_progress = sctx->cur_ino; | 4141 | sctx->send_progress = sctx->cur_ino; |
| 4079 | 4142 | ||
| 4080 | if (result == BTRFS_COMPARE_TREE_NEW || | 4143 | if (result == BTRFS_COMPARE_TREE_NEW || |
| @@ -4098,7 +4161,14 @@ static int changed_inode(struct send_ctx *sctx, | |||
| 4098 | 4161 | ||
| 4099 | right_gen = btrfs_inode_generation(sctx->right_path->nodes[0], | 4162 | right_gen = btrfs_inode_generation(sctx->right_path->nodes[0], |
| 4100 | right_ii); | 4163 | right_ii); |
| 4101 | if (left_gen != right_gen) | 4164 | |
| 4165 | /* | ||
| 4166 | * The cur_ino = root dir case is special here. We can't treat | ||
| 4167 | * the inode as deleted+reused because it would generate a | ||
| 4168 | * stream that tries to delete/mkdir the root dir. | ||
| 4169 | */ | ||
| 4170 | if (left_gen != right_gen && | ||
| 4171 | sctx->cur_ino != BTRFS_FIRST_FREE_OBJECTID) | ||
| 4102 | sctx->cur_inode_new_gen = 1; | 4172 | sctx->cur_inode_new_gen = 1; |
| 4103 | } | 4173 | } |
| 4104 | 4174 | ||
| @@ -4111,8 +4181,7 @@ static int changed_inode(struct send_ctx *sctx, | |||
| 4111 | sctx->cur_inode_mode = btrfs_inode_mode( | 4181 | sctx->cur_inode_mode = btrfs_inode_mode( |
| 4112 | sctx->left_path->nodes[0], left_ii); | 4182 | sctx->left_path->nodes[0], left_ii); |
| 4113 | if (sctx->cur_ino != BTRFS_FIRST_FREE_OBJECTID) | 4183 | if (sctx->cur_ino != BTRFS_FIRST_FREE_OBJECTID) |
| 4114 | ret = send_create_inode(sctx, sctx->left_path, | 4184 | ret = send_create_inode_if_needed(sctx); |
| 4115 | sctx->cmp_key); | ||
| 4116 | } else if (result == BTRFS_COMPARE_TREE_DELETED) { | 4185 | } else if (result == BTRFS_COMPARE_TREE_DELETED) { |
| 4117 | sctx->cur_inode_gen = right_gen; | 4186 | sctx->cur_inode_gen = right_gen; |
| 4118 | sctx->cur_inode_new = 0; | 4187 | sctx->cur_inode_new = 0; |
| @@ -4122,7 +4191,17 @@ static int changed_inode(struct send_ctx *sctx, | |||
| 4122 | sctx->cur_inode_mode = btrfs_inode_mode( | 4191 | sctx->cur_inode_mode = btrfs_inode_mode( |
| 4123 | sctx->right_path->nodes[0], right_ii); | 4192 | sctx->right_path->nodes[0], right_ii); |
| 4124 | } else if (result == BTRFS_COMPARE_TREE_CHANGED) { | 4193 | } else if (result == BTRFS_COMPARE_TREE_CHANGED) { |
| 4194 | /* | ||
| 4195 | * We need to do some special handling in case the inode was | ||
| 4196 | * reported as changed with a changed generation number. This | ||
| 4197 | * means that the original inode was deleted and new inode | ||
| 4198 | * reused the same inum. So we have to treat the old inode as | ||
| 4199 | * deleted and the new one as new. | ||
| 4200 | */ | ||
| 4125 | if (sctx->cur_inode_new_gen) { | 4201 | if (sctx->cur_inode_new_gen) { |
| 4202 | /* | ||
| 4203 | * First, process the inode as if it was deleted. | ||
| 4204 | */ | ||
| 4126 | sctx->cur_inode_gen = right_gen; | 4205 | sctx->cur_inode_gen = right_gen; |
| 4127 | sctx->cur_inode_new = 0; | 4206 | sctx->cur_inode_new = 0; |
| 4128 | sctx->cur_inode_deleted = 1; | 4207 | sctx->cur_inode_deleted = 1; |
| @@ -4135,6 +4214,9 @@ static int changed_inode(struct send_ctx *sctx, | |||
| 4135 | if (ret < 0) | 4214 | if (ret < 0) |
| 4136 | goto out; | 4215 | goto out; |
| 4137 | 4216 | ||
| 4217 | /* | ||
| 4218 | * Now process the inode as if it was new. | ||
| 4219 | */ | ||
| 4138 | sctx->cur_inode_gen = left_gen; | 4220 | sctx->cur_inode_gen = left_gen; |
| 4139 | sctx->cur_inode_new = 1; | 4221 | sctx->cur_inode_new = 1; |
| 4140 | sctx->cur_inode_deleted = 0; | 4222 | sctx->cur_inode_deleted = 0; |
| @@ -4142,14 +4224,23 @@ static int changed_inode(struct send_ctx *sctx, | |||
| 4142 | sctx->left_path->nodes[0], left_ii); | 4224 | sctx->left_path->nodes[0], left_ii); |
| 4143 | sctx->cur_inode_mode = btrfs_inode_mode( | 4225 | sctx->cur_inode_mode = btrfs_inode_mode( |
| 4144 | sctx->left_path->nodes[0], left_ii); | 4226 | sctx->left_path->nodes[0], left_ii); |
| 4145 | ret = send_create_inode(sctx, sctx->left_path, | 4227 | ret = send_create_inode_if_needed(sctx); |
| 4146 | sctx->cmp_key); | ||
| 4147 | if (ret < 0) | 4228 | if (ret < 0) |
| 4148 | goto out; | 4229 | goto out; |
| 4149 | 4230 | ||
| 4150 | ret = process_all_refs(sctx, BTRFS_COMPARE_TREE_NEW); | 4231 | ret = process_all_refs(sctx, BTRFS_COMPARE_TREE_NEW); |
| 4151 | if (ret < 0) | 4232 | if (ret < 0) |
| 4152 | goto out; | 4233 | goto out; |
| 4234 | /* | ||
| 4235 | * Advance send_progress now as we did not get into | ||
| 4236 | * process_recorded_refs_if_needed in the new_gen case. | ||
| 4237 | */ | ||
| 4238 | sctx->send_progress = sctx->cur_ino + 1; | ||
| 4239 | |||
| 4240 | /* | ||
| 4241 | * Now process all extents and xattrs of the inode as if | ||
| 4242 | * they were all new. | ||
| 4243 | */ | ||
| 4153 | ret = process_all_extents(sctx); | 4244 | ret = process_all_extents(sctx); |
| 4154 | if (ret < 0) | 4245 | if (ret < 0) |
| 4155 | goto out; | 4246 | goto out; |
| @@ -4172,6 +4263,16 @@ out: | |||
| 4172 | return ret; | 4263 | return ret; |
| 4173 | } | 4264 | } |
| 4174 | 4265 | ||
| 4266 | /* | ||
| 4267 | * We have to process new refs before deleted refs, but compare_trees gives us | ||
| 4268 | * the new and deleted refs mixed. To fix this, we record the new/deleted refs | ||
| 4269 | * first and later process them in process_recorded_refs. | ||
| 4270 | * For the cur_inode_new_gen case, we skip recording completely because | ||
| 4271 | * changed_inode did already initiate processing of refs. The reason for this is | ||
| 4272 | * that in this case, compare_tree actually compares the refs of 2 different | ||
| 4273 | * inodes. To fix this, process_all_refs is used in changed_inode to handle all | ||
| 4274 | * refs of the right tree as deleted and all refs of the left tree as new. | ||
| 4275 | */ | ||
| 4175 | static int changed_ref(struct send_ctx *sctx, | 4276 | static int changed_ref(struct send_ctx *sctx, |
| 4176 | enum btrfs_compare_tree_result result) | 4277 | enum btrfs_compare_tree_result result) |
| 4177 | { | 4278 | { |
| @@ -4192,6 +4293,11 @@ static int changed_ref(struct send_ctx *sctx, | |||
| 4192 | return ret; | 4293 | return ret; |
| 4193 | } | 4294 | } |
| 4194 | 4295 | ||
| 4296 | /* | ||
| 4297 | * Process new/deleted/changed xattrs. We skip processing in the | ||
| 4298 | * cur_inode_new_gen case because changed_inode did already initiate processing | ||
| 4299 | * of xattrs. The reason is the same as in changed_ref | ||
| 4300 | */ | ||
| 4195 | static int changed_xattr(struct send_ctx *sctx, | 4301 | static int changed_xattr(struct send_ctx *sctx, |
| 4196 | enum btrfs_compare_tree_result result) | 4302 | enum btrfs_compare_tree_result result) |
| 4197 | { | 4303 | { |
| @@ -4211,6 +4317,11 @@ static int changed_xattr(struct send_ctx *sctx, | |||
| 4211 | return ret; | 4317 | return ret; |
| 4212 | } | 4318 | } |
| 4213 | 4319 | ||
| 4320 | /* | ||
| 4321 | * Process new/deleted/changed extents. We skip processing in the | ||
| 4322 | * cur_inode_new_gen case because changed_inode did already initiate processing | ||
| 4323 | * of extents. The reason is the same as in changed_ref | ||
| 4324 | */ | ||
| 4214 | static int changed_extent(struct send_ctx *sctx, | 4325 | static int changed_extent(struct send_ctx *sctx, |
| 4215 | enum btrfs_compare_tree_result result) | 4326 | enum btrfs_compare_tree_result result) |
| 4216 | { | 4327 | { |
| @@ -4227,7 +4338,10 @@ static int changed_extent(struct send_ctx *sctx, | |||
| 4227 | return ret; | 4338 | return ret; |
| 4228 | } | 4339 | } |
| 4229 | 4340 | ||
| 4230 | 4341 | /* | |
| 4342 | * Updates compare related fields in sctx and simply forwards to the actual | ||
| 4343 | * changed_xxx functions. | ||
| 4344 | */ | ||
| 4231 | static int changed_cb(struct btrfs_root *left_root, | 4345 | static int changed_cb(struct btrfs_root *left_root, |
| 4232 | struct btrfs_root *right_root, | 4346 | struct btrfs_root *right_root, |
| 4233 | struct btrfs_path *left_path, | 4347 | struct btrfs_path *left_path, |
| @@ -4247,9 +4361,15 @@ static int changed_cb(struct btrfs_root *left_root, | |||
| 4247 | if (ret < 0) | 4361 | if (ret < 0) |
| 4248 | goto out; | 4362 | goto out; |
| 4249 | 4363 | ||
| 4364 | /* Ignore non-FS objects */ | ||
| 4365 | if (key->objectid == BTRFS_FREE_INO_OBJECTID || | ||
| 4366 | key->objectid == BTRFS_FREE_SPACE_OBJECTID) | ||
| 4367 | goto out; | ||
| 4368 | |||
| 4250 | if (key->type == BTRFS_INODE_ITEM_KEY) | 4369 | if (key->type == BTRFS_INODE_ITEM_KEY) |
| 4251 | ret = changed_inode(sctx, result); | 4370 | ret = changed_inode(sctx, result); |
| 4252 | else if (key->type == BTRFS_INODE_REF_KEY) | 4371 | else if (key->type == BTRFS_INODE_REF_KEY || |
| 4372 | key->type == BTRFS_INODE_EXTREF_KEY) | ||
| 4253 | ret = changed_ref(sctx, result); | 4373 | ret = changed_ref(sctx, result); |
| 4254 | else if (key->type == BTRFS_XATTR_ITEM_KEY) | 4374 | else if (key->type == BTRFS_XATTR_ITEM_KEY) |
| 4255 | ret = changed_xattr(sctx, result); | 4375 | ret = changed_xattr(sctx, result); |
| @@ -4277,9 +4397,9 @@ static int full_send_tree(struct send_ctx *sctx) | |||
| 4277 | if (!path) | 4397 | if (!path) |
| 4278 | return -ENOMEM; | 4398 | return -ENOMEM; |
| 4279 | 4399 | ||
| 4280 | spin_lock(&send_root->root_times_lock); | 4400 | spin_lock(&send_root->root_item_lock); |
| 4281 | start_ctransid = btrfs_root_ctransid(&send_root->root_item); | 4401 | start_ctransid = btrfs_root_ctransid(&send_root->root_item); |
| 4282 | spin_unlock(&send_root->root_times_lock); | 4402 | spin_unlock(&send_root->root_item_lock); |
| 4283 | 4403 | ||
| 4284 | key.objectid = BTRFS_FIRST_FREE_OBJECTID; | 4404 | key.objectid = BTRFS_FIRST_FREE_OBJECTID; |
| 4285 | key.type = BTRFS_INODE_ITEM_KEY; | 4405 | key.type = BTRFS_INODE_ITEM_KEY; |
| @@ -4299,11 +4419,12 @@ join_trans: | |||
| 4299 | } | 4419 | } |
| 4300 | 4420 | ||
| 4301 | /* | 4421 | /* |
| 4302 | * Make sure the tree has not changed | 4422 | * Make sure the tree has not changed after re-joining. We detect this |
| 4423 | * by comparing start_ctransid and ctransid. They should always match. | ||
| 4303 | */ | 4424 | */ |
| 4304 | spin_lock(&send_root->root_times_lock); | 4425 | spin_lock(&send_root->root_item_lock); |
| 4305 | ctransid = btrfs_root_ctransid(&send_root->root_item); | 4426 | ctransid = btrfs_root_ctransid(&send_root->root_item); |
| 4306 | spin_unlock(&send_root->root_times_lock); | 4427 | spin_unlock(&send_root->root_item_lock); |
| 4307 | 4428 | ||
| 4308 | if (ctransid != start_ctransid) { | 4429 | if (ctransid != start_ctransid) { |
| 4309 | WARN(1, KERN_WARNING "btrfs: the root that you're trying to " | 4430 | WARN(1, KERN_WARNING "btrfs: the root that you're trying to " |
diff --git a/fs/btrfs/send.h b/fs/btrfs/send.h index 9934e948e57f..1bf4f32fd4ef 100644 --- a/fs/btrfs/send.h +++ b/fs/btrfs/send.h | |||
| @@ -130,4 +130,5 @@ enum { | |||
| 130 | 130 | ||
| 131 | #ifdef __KERNEL__ | 131 | #ifdef __KERNEL__ |
| 132 | long btrfs_ioctl_send(struct file *mnt_file, void __user *arg); | 132 | long btrfs_ioctl_send(struct file *mnt_file, void __user *arg); |
| 133 | int write_buf(struct file *filp, const void *buf, u32 len, loff_t *off); | ||
| 133 | #endif | 134 | #endif |
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 83d6f9f9c220..99545df1b86c 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c | |||
| @@ -55,6 +55,7 @@ | |||
| 55 | #include "export.h" | 55 | #include "export.h" |
| 56 | #include "compression.h" | 56 | #include "compression.h" |
| 57 | #include "rcu-string.h" | 57 | #include "rcu-string.h" |
| 58 | #include "dev-replace.h" | ||
| 58 | 59 | ||
| 59 | #define CREATE_TRACE_POINTS | 60 | #define CREATE_TRACE_POINTS |
| 60 | #include <trace/events/btrfs.h> | 61 | #include <trace/events/btrfs.h> |
| @@ -116,7 +117,16 @@ static void btrfs_handle_error(struct btrfs_fs_info *fs_info) | |||
| 116 | if (fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) { | 117 | if (fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) { |
| 117 | sb->s_flags |= MS_RDONLY; | 118 | sb->s_flags |= MS_RDONLY; |
| 118 | printk(KERN_INFO "btrfs is forced readonly\n"); | 119 | printk(KERN_INFO "btrfs is forced readonly\n"); |
| 119 | __btrfs_scrub_cancel(fs_info); | 120 | /* |
| 121 | * Note that a running device replace operation is not | ||
| 122 | * canceled here although there is no way to update | ||
| 123 | * the progress. It would add the risk of a deadlock, | ||
| 124 | * therefore the canceling is ommited. The only penalty | ||
| 125 | * is that some I/O remains active until the procedure | ||
| 126 | * completes. The next time when the filesystem is | ||
| 127 | * mounted writeable again, the device replace | ||
| 128 | * operation continues. | ||
| 129 | */ | ||
| 120 | // WARN_ON(1); | 130 | // WARN_ON(1); |
| 121 | } | 131 | } |
| 122 | } | 132 | } |
| @@ -243,12 +253,18 @@ void __btrfs_abort_transaction(struct btrfs_trans_handle *trans, | |||
| 243 | struct btrfs_root *root, const char *function, | 253 | struct btrfs_root *root, const char *function, |
| 244 | unsigned int line, int errno) | 254 | unsigned int line, int errno) |
| 245 | { | 255 | { |
| 246 | WARN_ONCE(1, KERN_DEBUG "btrfs: Transaction aborted"); | 256 | WARN_ONCE(1, KERN_DEBUG "btrfs: Transaction aborted\n"); |
| 247 | trans->aborted = errno; | 257 | trans->aborted = errno; |
| 248 | /* Nothing used. The other threads that have joined this | 258 | /* Nothing used. The other threads that have joined this |
| 249 | * transaction may be able to continue. */ | 259 | * transaction may be able to continue. */ |
| 250 | if (!trans->blocks_used) { | 260 | if (!trans->blocks_used) { |
| 251 | btrfs_printk(root->fs_info, "Aborting unused transaction.\n"); | 261 | char nbuf[16]; |
| 262 | const char *errstr; | ||
| 263 | |||
| 264 | errstr = btrfs_decode_error(root->fs_info, errno, nbuf); | ||
| 265 | btrfs_printk(root->fs_info, | ||
| 266 | "%s:%d: Aborting unused transaction(%s).\n", | ||
| 267 | function, line, errstr); | ||
| 252 | return; | 268 | return; |
| 253 | } | 269 | } |
| 254 | trans->transaction->aborted = errno; | 270 | trans->transaction->aborted = errno; |
| @@ -407,7 +423,15 @@ int btrfs_parse_options(struct btrfs_root *root, char *options) | |||
| 407 | btrfs_set_opt(info->mount_opt, NODATASUM); | 423 | btrfs_set_opt(info->mount_opt, NODATASUM); |
| 408 | break; | 424 | break; |
| 409 | case Opt_nodatacow: | 425 | case Opt_nodatacow: |
| 410 | printk(KERN_INFO "btrfs: setting nodatacow\n"); | 426 | if (!btrfs_test_opt(root, COMPRESS) || |
| 427 | !btrfs_test_opt(root, FORCE_COMPRESS)) { | ||
| 428 | printk(KERN_INFO "btrfs: setting nodatacow, compression disabled\n"); | ||
| 429 | } else { | ||
| 430 | printk(KERN_INFO "btrfs: setting nodatacow\n"); | ||
| 431 | } | ||
| 432 | info->compress_type = BTRFS_COMPRESS_NONE; | ||
| 433 | btrfs_clear_opt(info->mount_opt, COMPRESS); | ||
| 434 | btrfs_clear_opt(info->mount_opt, FORCE_COMPRESS); | ||
| 411 | btrfs_set_opt(info->mount_opt, NODATACOW); | 435 | btrfs_set_opt(info->mount_opt, NODATACOW); |
| 412 | btrfs_set_opt(info->mount_opt, NODATASUM); | 436 | btrfs_set_opt(info->mount_opt, NODATASUM); |
| 413 | break; | 437 | break; |
| @@ -422,10 +446,14 @@ int btrfs_parse_options(struct btrfs_root *root, char *options) | |||
| 422 | compress_type = "zlib"; | 446 | compress_type = "zlib"; |
| 423 | info->compress_type = BTRFS_COMPRESS_ZLIB; | 447 | info->compress_type = BTRFS_COMPRESS_ZLIB; |
| 424 | btrfs_set_opt(info->mount_opt, COMPRESS); | 448 | btrfs_set_opt(info->mount_opt, COMPRESS); |
| 449 | btrfs_clear_opt(info->mount_opt, NODATACOW); | ||
| 450 | btrfs_clear_opt(info->mount_opt, NODATASUM); | ||
| 425 | } else if (strcmp(args[0].from, "lzo") == 0) { | 451 | } else if (strcmp(args[0].from, "lzo") == 0) { |
| 426 | compress_type = "lzo"; | 452 | compress_type = "lzo"; |
| 427 | info->compress_type = BTRFS_COMPRESS_LZO; | 453 | info->compress_type = BTRFS_COMPRESS_LZO; |
| 428 | btrfs_set_opt(info->mount_opt, COMPRESS); | 454 | btrfs_set_opt(info->mount_opt, COMPRESS); |
| 455 | btrfs_clear_opt(info->mount_opt, NODATACOW); | ||
| 456 | btrfs_clear_opt(info->mount_opt, NODATASUM); | ||
| 429 | btrfs_set_fs_incompat(info, COMPRESS_LZO); | 457 | btrfs_set_fs_incompat(info, COMPRESS_LZO); |
| 430 | } else if (strncmp(args[0].from, "no", 2) == 0) { | 458 | } else if (strncmp(args[0].from, "no", 2) == 0) { |
| 431 | compress_type = "no"; | 459 | compress_type = "no"; |
| @@ -543,11 +571,11 @@ int btrfs_parse_options(struct btrfs_root *root, char *options) | |||
| 543 | btrfs_set_opt(info->mount_opt, ENOSPC_DEBUG); | 571 | btrfs_set_opt(info->mount_opt, ENOSPC_DEBUG); |
| 544 | break; | 572 | break; |
| 545 | case Opt_defrag: | 573 | case Opt_defrag: |
| 546 | printk(KERN_INFO "btrfs: enabling auto defrag"); | 574 | printk(KERN_INFO "btrfs: enabling auto defrag\n"); |
| 547 | btrfs_set_opt(info->mount_opt, AUTO_DEFRAG); | 575 | btrfs_set_opt(info->mount_opt, AUTO_DEFRAG); |
| 548 | break; | 576 | break; |
| 549 | case Opt_recovery: | 577 | case Opt_recovery: |
| 550 | printk(KERN_INFO "btrfs: enabling auto recovery"); | 578 | printk(KERN_INFO "btrfs: enabling auto recovery\n"); |
| 551 | btrfs_set_opt(info->mount_opt, RECOVERY); | 579 | btrfs_set_opt(info->mount_opt, RECOVERY); |
| 552 | break; | 580 | break; |
| 553 | case Opt_skip_balance: | 581 | case Opt_skip_balance: |
| @@ -846,18 +874,15 @@ int btrfs_sync_fs(struct super_block *sb, int wait) | |||
| 846 | return 0; | 874 | return 0; |
| 847 | } | 875 | } |
| 848 | 876 | ||
| 849 | btrfs_wait_ordered_extents(root, 0, 0); | 877 | btrfs_wait_ordered_extents(root, 0); |
| 850 | 878 | ||
| 851 | spin_lock(&fs_info->trans_lock); | 879 | trans = btrfs_attach_transaction(root); |
| 852 | if (!fs_info->running_transaction) { | 880 | if (IS_ERR(trans)) { |
| 853 | spin_unlock(&fs_info->trans_lock); | 881 | /* no transaction, don't bother */ |
| 854 | return 0; | 882 | if (PTR_ERR(trans) == -ENOENT) |
| 855 | } | 883 | return 0; |
| 856 | spin_unlock(&fs_info->trans_lock); | ||
| 857 | |||
| 858 | trans = btrfs_join_transaction(root); | ||
| 859 | if (IS_ERR(trans)) | ||
| 860 | return PTR_ERR(trans); | 884 | return PTR_ERR(trans); |
| 885 | } | ||
| 861 | return btrfs_commit_transaction(trans, root); | 886 | return btrfs_commit_transaction(trans, root); |
| 862 | } | 887 | } |
| 863 | 888 | ||
| @@ -1171,7 +1196,8 @@ static void btrfs_resize_thread_pool(struct btrfs_fs_info *fs_info, | |||
| 1171 | btrfs_set_max_workers(&fs_info->endio_freespace_worker, new_pool_size); | 1196 | btrfs_set_max_workers(&fs_info->endio_freespace_worker, new_pool_size); |
| 1172 | btrfs_set_max_workers(&fs_info->delayed_workers, new_pool_size); | 1197 | btrfs_set_max_workers(&fs_info->delayed_workers, new_pool_size); |
| 1173 | btrfs_set_max_workers(&fs_info->readahead_workers, new_pool_size); | 1198 | btrfs_set_max_workers(&fs_info->readahead_workers, new_pool_size); |
| 1174 | btrfs_set_max_workers(&fs_info->scrub_workers, new_pool_size); | 1199 | btrfs_set_max_workers(&fs_info->scrub_wr_completion_workers, |
| 1200 | new_pool_size); | ||
| 1175 | } | 1201 | } |
| 1176 | 1202 | ||
| 1177 | static int btrfs_remount(struct super_block *sb, int *flags, char *data) | 1203 | static int btrfs_remount(struct super_block *sb, int *flags, char *data) |
| @@ -1200,8 +1226,15 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data) | |||
| 1200 | return 0; | 1226 | return 0; |
| 1201 | 1227 | ||
| 1202 | if (*flags & MS_RDONLY) { | 1228 | if (*flags & MS_RDONLY) { |
| 1229 | /* | ||
| 1230 | * this also happens on 'umount -rf' or on shutdown, when | ||
| 1231 | * the filesystem is busy. | ||
| 1232 | */ | ||
| 1203 | sb->s_flags |= MS_RDONLY; | 1233 | sb->s_flags |= MS_RDONLY; |
| 1204 | 1234 | ||
| 1235 | btrfs_dev_replace_suspend_for_unmount(fs_info); | ||
| 1236 | btrfs_scrub_cancel(fs_info); | ||
| 1237 | |||
| 1205 | ret = btrfs_commit_super(root); | 1238 | ret = btrfs_commit_super(root); |
| 1206 | if (ret) | 1239 | if (ret) |
| 1207 | goto restore; | 1240 | goto restore; |
| @@ -1211,6 +1244,15 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data) | |||
| 1211 | goto restore; | 1244 | goto restore; |
| 1212 | } | 1245 | } |
| 1213 | 1246 | ||
| 1247 | if (fs_info->fs_devices->missing_devices > | ||
| 1248 | fs_info->num_tolerated_disk_barrier_failures && | ||
| 1249 | !(*flags & MS_RDONLY)) { | ||
| 1250 | printk(KERN_WARNING | ||
| 1251 | "Btrfs: too many missing devices, writeable remount is not allowed\n"); | ||
| 1252 | ret = -EACCES; | ||
| 1253 | goto restore; | ||
| 1254 | } | ||
| 1255 | |||
| 1214 | if (btrfs_super_log_root(fs_info->super_copy) != 0) { | 1256 | if (btrfs_super_log_root(fs_info->super_copy) != 0) { |
| 1215 | ret = -EINVAL; | 1257 | ret = -EINVAL; |
| 1216 | goto restore; | 1258 | goto restore; |
| @@ -1229,6 +1271,11 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data) | |||
| 1229 | if (ret) | 1271 | if (ret) |
| 1230 | goto restore; | 1272 | goto restore; |
| 1231 | 1273 | ||
| 1274 | ret = btrfs_resume_dev_replace_async(fs_info); | ||
| 1275 | if (ret) { | ||
| 1276 | pr_warn("btrfs: failed to resume dev_replace\n"); | ||
| 1277 | goto restore; | ||
| 1278 | } | ||
| 1232 | sb->s_flags &= ~MS_RDONLY; | 1279 | sb->s_flags &= ~MS_RDONLY; |
| 1233 | } | 1280 | } |
| 1234 | 1281 | ||
| @@ -1321,7 +1368,8 @@ static int btrfs_calc_avail_data_space(struct btrfs_root *root, u64 *free_bytes) | |||
| 1321 | min_stripe_size = BTRFS_STRIPE_LEN; | 1368 | min_stripe_size = BTRFS_STRIPE_LEN; |
| 1322 | 1369 | ||
| 1323 | list_for_each_entry(device, &fs_devices->devices, dev_list) { | 1370 | list_for_each_entry(device, &fs_devices->devices, dev_list) { |
| 1324 | if (!device->in_fs_metadata || !device->bdev) | 1371 | if (!device->in_fs_metadata || !device->bdev || |
| 1372 | device->is_tgtdev_for_dev_replace) | ||
| 1325 | continue; | 1373 | continue; |
| 1326 | 1374 | ||
| 1327 | avail_space = device->total_bytes - device->bytes_used; | 1375 | avail_space = device->total_bytes - device->bytes_used; |
| @@ -1508,17 +1556,21 @@ static long btrfs_control_ioctl(struct file *file, unsigned int cmd, | |||
| 1508 | 1556 | ||
| 1509 | static int btrfs_freeze(struct super_block *sb) | 1557 | static int btrfs_freeze(struct super_block *sb) |
| 1510 | { | 1558 | { |
| 1511 | struct btrfs_fs_info *fs_info = btrfs_sb(sb); | 1559 | struct btrfs_trans_handle *trans; |
| 1512 | mutex_lock(&fs_info->transaction_kthread_mutex); | 1560 | struct btrfs_root *root = btrfs_sb(sb)->tree_root; |
| 1513 | mutex_lock(&fs_info->cleaner_mutex); | 1561 | |
| 1514 | return 0; | 1562 | trans = btrfs_attach_transaction(root); |
| 1563 | if (IS_ERR(trans)) { | ||
| 1564 | /* no transaction, don't bother */ | ||
| 1565 | if (PTR_ERR(trans) == -ENOENT) | ||
| 1566 | return 0; | ||
| 1567 | return PTR_ERR(trans); | ||
| 1568 | } | ||
| 1569 | return btrfs_commit_transaction(trans, root); | ||
| 1515 | } | 1570 | } |
| 1516 | 1571 | ||
| 1517 | static int btrfs_unfreeze(struct super_block *sb) | 1572 | static int btrfs_unfreeze(struct super_block *sb) |
| 1518 | { | 1573 | { |
| 1519 | struct btrfs_fs_info *fs_info = btrfs_sb(sb); | ||
| 1520 | mutex_unlock(&fs_info->cleaner_mutex); | ||
| 1521 | mutex_unlock(&fs_info->transaction_kthread_mutex); | ||
| 1522 | return 0; | 1574 | return 0; |
| 1523 | } | 1575 | } |
| 1524 | 1576 | ||
| @@ -1595,7 +1647,7 @@ static int btrfs_interface_init(void) | |||
| 1595 | static void btrfs_interface_exit(void) | 1647 | static void btrfs_interface_exit(void) |
| 1596 | { | 1648 | { |
| 1597 | if (misc_deregister(&btrfs_misc) < 0) | 1649 | if (misc_deregister(&btrfs_misc) < 0) |
| 1598 | printk(KERN_INFO "misc_deregister failed for control device"); | 1650 | printk(KERN_INFO "btrfs: misc_deregister failed for control device\n"); |
| 1599 | } | 1651 | } |
| 1600 | 1652 | ||
| 1601 | static int __init init_btrfs_fs(void) | 1653 | static int __init init_btrfs_fs(void) |
| @@ -1620,14 +1672,22 @@ static int __init init_btrfs_fs(void) | |||
| 1620 | if (err) | 1672 | if (err) |
| 1621 | goto free_extent_io; | 1673 | goto free_extent_io; |
| 1622 | 1674 | ||
| 1623 | err = btrfs_delayed_inode_init(); | 1675 | err = ordered_data_init(); |
| 1624 | if (err) | 1676 | if (err) |
| 1625 | goto free_extent_map; | 1677 | goto free_extent_map; |
| 1626 | 1678 | ||
| 1627 | err = btrfs_interface_init(); | 1679 | err = btrfs_delayed_inode_init(); |
| 1680 | if (err) | ||
| 1681 | goto free_ordered_data; | ||
| 1682 | |||
| 1683 | err = btrfs_auto_defrag_init(); | ||
| 1628 | if (err) | 1684 | if (err) |
| 1629 | goto free_delayed_inode; | 1685 | goto free_delayed_inode; |
| 1630 | 1686 | ||
| 1687 | err = btrfs_interface_init(); | ||
| 1688 | if (err) | ||
| 1689 | goto free_auto_defrag; | ||
| 1690 | |||
| 1631 | err = register_filesystem(&btrfs_fs_type); | 1691 | err = register_filesystem(&btrfs_fs_type); |
| 1632 | if (err) | 1692 | if (err) |
| 1633 | goto unregister_ioctl; | 1693 | goto unregister_ioctl; |
| @@ -1639,8 +1699,12 @@ static int __init init_btrfs_fs(void) | |||
| 1639 | 1699 | ||
| 1640 | unregister_ioctl: | 1700 | unregister_ioctl: |
| 1641 | btrfs_interface_exit(); | 1701 | btrfs_interface_exit(); |
| 1702 | free_auto_defrag: | ||
| 1703 | btrfs_auto_defrag_exit(); | ||
| 1642 | free_delayed_inode: | 1704 | free_delayed_inode: |
| 1643 | btrfs_delayed_inode_exit(); | 1705 | btrfs_delayed_inode_exit(); |
| 1706 | free_ordered_data: | ||
| 1707 | ordered_data_exit(); | ||
| 1644 | free_extent_map: | 1708 | free_extent_map: |
| 1645 | extent_map_exit(); | 1709 | extent_map_exit(); |
| 1646 | free_extent_io: | 1710 | free_extent_io: |
| @@ -1656,7 +1720,9 @@ free_compress: | |||
| 1656 | static void __exit exit_btrfs_fs(void) | 1720 | static void __exit exit_btrfs_fs(void) |
| 1657 | { | 1721 | { |
| 1658 | btrfs_destroy_cachep(); | 1722 | btrfs_destroy_cachep(); |
| 1723 | btrfs_auto_defrag_exit(); | ||
| 1659 | btrfs_delayed_inode_exit(); | 1724 | btrfs_delayed_inode_exit(); |
| 1725 | ordered_data_exit(); | ||
| 1660 | extent_map_exit(); | 1726 | extent_map_exit(); |
| 1661 | extent_io_exit(); | 1727 | extent_io_exit(); |
| 1662 | btrfs_interface_exit(); | 1728 | btrfs_interface_exit(); |
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 27c26004e050..87fac9a21ea5 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c | |||
| @@ -30,6 +30,7 @@ | |||
| 30 | #include "tree-log.h" | 30 | #include "tree-log.h" |
| 31 | #include "inode-map.h" | 31 | #include "inode-map.h" |
| 32 | #include "volumes.h" | 32 | #include "volumes.h" |
| 33 | #include "dev-replace.h" | ||
| 33 | 34 | ||
| 34 | #define BTRFS_ROOT_TRANS_TAG 0 | 35 | #define BTRFS_ROOT_TRANS_TAG 0 |
| 35 | 36 | ||
| @@ -53,7 +54,7 @@ static noinline void switch_commit_root(struct btrfs_root *root) | |||
| 53 | /* | 54 | /* |
| 54 | * either allocate a new transaction or hop into the existing one | 55 | * either allocate a new transaction or hop into the existing one |
| 55 | */ | 56 | */ |
| 56 | static noinline int join_transaction(struct btrfs_root *root, int nofail) | 57 | static noinline int join_transaction(struct btrfs_root *root, int type) |
| 57 | { | 58 | { |
| 58 | struct btrfs_transaction *cur_trans; | 59 | struct btrfs_transaction *cur_trans; |
| 59 | struct btrfs_fs_info *fs_info = root->fs_info; | 60 | struct btrfs_fs_info *fs_info = root->fs_info; |
| @@ -67,7 +68,13 @@ loop: | |||
| 67 | } | 68 | } |
| 68 | 69 | ||
| 69 | if (fs_info->trans_no_join) { | 70 | if (fs_info->trans_no_join) { |
| 70 | if (!nofail) { | 71 | /* |
| 72 | * If we are JOIN_NOLOCK we're already committing a current | ||
| 73 | * transaction, we just need a handle to deal with something | ||
| 74 | * when committing the transaction, such as inode cache and | ||
| 75 | * space cache. It is a special case. | ||
| 76 | */ | ||
| 77 | if (type != TRANS_JOIN_NOLOCK) { | ||
| 71 | spin_unlock(&fs_info->trans_lock); | 78 | spin_unlock(&fs_info->trans_lock); |
| 72 | return -EBUSY; | 79 | return -EBUSY; |
| 73 | } | 80 | } |
| @@ -87,6 +94,13 @@ loop: | |||
| 87 | } | 94 | } |
| 88 | spin_unlock(&fs_info->trans_lock); | 95 | spin_unlock(&fs_info->trans_lock); |
| 89 | 96 | ||
| 97 | /* | ||
| 98 | * If we are ATTACH, we just want to catch the current transaction, | ||
| 99 | * and commit it. If there is no transaction, just return ENOENT. | ||
| 100 | */ | ||
| 101 | if (type == TRANS_ATTACH) | ||
| 102 | return -ENOENT; | ||
| 103 | |||
| 90 | cur_trans = kmem_cache_alloc(btrfs_transaction_cachep, GFP_NOFS); | 104 | cur_trans = kmem_cache_alloc(btrfs_transaction_cachep, GFP_NOFS); |
| 91 | if (!cur_trans) | 105 | if (!cur_trans) |
| 92 | return -ENOMEM; | 106 | return -ENOMEM; |
| @@ -132,16 +146,12 @@ loop: | |||
| 132 | * the log must never go across transaction boundaries. | 146 | * the log must never go across transaction boundaries. |
| 133 | */ | 147 | */ |
| 134 | smp_mb(); | 148 | smp_mb(); |
| 135 | if (!list_empty(&fs_info->tree_mod_seq_list)) { | 149 | if (!list_empty(&fs_info->tree_mod_seq_list)) |
| 136 | printk(KERN_ERR "btrfs: tree_mod_seq_list not empty when " | 150 | WARN(1, KERN_ERR "btrfs: tree_mod_seq_list not empty when " |
| 137 | "creating a fresh transaction\n"); | 151 | "creating a fresh transaction\n"); |
| 138 | WARN_ON(1); | 152 | if (!RB_EMPTY_ROOT(&fs_info->tree_mod_log)) |
| 139 | } | 153 | WARN(1, KERN_ERR "btrfs: tree_mod_log rb tree not empty when " |
| 140 | if (!RB_EMPTY_ROOT(&fs_info->tree_mod_log)) { | ||
| 141 | printk(KERN_ERR "btrfs: tree_mod_log rb tree not empty when " | ||
| 142 | "creating a fresh transaction\n"); | 154 | "creating a fresh transaction\n"); |
| 143 | WARN_ON(1); | ||
| 144 | } | ||
| 145 | atomic_set(&fs_info->tree_mod_seq, 0); | 155 | atomic_set(&fs_info->tree_mod_seq, 0); |
| 146 | 156 | ||
| 147 | spin_lock_init(&cur_trans->commit_lock); | 157 | spin_lock_init(&cur_trans->commit_lock); |
| @@ -267,13 +277,6 @@ static void wait_current_trans(struct btrfs_root *root) | |||
| 267 | } | 277 | } |
| 268 | } | 278 | } |
| 269 | 279 | ||
| 270 | enum btrfs_trans_type { | ||
| 271 | TRANS_START, | ||
| 272 | TRANS_JOIN, | ||
| 273 | TRANS_USERSPACE, | ||
| 274 | TRANS_JOIN_NOLOCK, | ||
| 275 | }; | ||
| 276 | |||
| 277 | static int may_wait_transaction(struct btrfs_root *root, int type) | 280 | static int may_wait_transaction(struct btrfs_root *root, int type) |
| 278 | { | 281 | { |
| 279 | if (root->fs_info->log_root_recovering) | 282 | if (root->fs_info->log_root_recovering) |
| @@ -289,8 +292,9 @@ static int may_wait_transaction(struct btrfs_root *root, int type) | |||
| 289 | return 0; | 292 | return 0; |
| 290 | } | 293 | } |
| 291 | 294 | ||
| 292 | static struct btrfs_trans_handle *start_transaction(struct btrfs_root *root, | 295 | static struct btrfs_trans_handle * |
| 293 | u64 num_items, int type) | 296 | start_transaction(struct btrfs_root *root, u64 num_items, int type, |
| 297 | enum btrfs_reserve_flush_enum flush) | ||
| 294 | { | 298 | { |
| 295 | struct btrfs_trans_handle *h; | 299 | struct btrfs_trans_handle *h; |
| 296 | struct btrfs_transaction *cur_trans; | 300 | struct btrfs_transaction *cur_trans; |
| @@ -305,6 +309,7 @@ static struct btrfs_trans_handle *start_transaction(struct btrfs_root *root, | |||
| 305 | WARN_ON(type != TRANS_JOIN && type != TRANS_JOIN_NOLOCK); | 309 | WARN_ON(type != TRANS_JOIN && type != TRANS_JOIN_NOLOCK); |
| 306 | h = current->journal_info; | 310 | h = current->journal_info; |
| 307 | h->use_count++; | 311 | h->use_count++; |
| 312 | WARN_ON(h->use_count > 2); | ||
| 308 | h->orig_rsv = h->block_rsv; | 313 | h->orig_rsv = h->block_rsv; |
| 309 | h->block_rsv = NULL; | 314 | h->block_rsv = NULL; |
| 310 | goto got_it; | 315 | goto got_it; |
| @@ -326,7 +331,7 @@ static struct btrfs_trans_handle *start_transaction(struct btrfs_root *root, | |||
| 326 | num_bytes = btrfs_calc_trans_metadata_size(root, num_items); | 331 | num_bytes = btrfs_calc_trans_metadata_size(root, num_items); |
| 327 | ret = btrfs_block_rsv_add(root, | 332 | ret = btrfs_block_rsv_add(root, |
| 328 | &root->fs_info->trans_block_rsv, | 333 | &root->fs_info->trans_block_rsv, |
| 329 | num_bytes); | 334 | num_bytes, flush); |
| 330 | if (ret) | 335 | if (ret) |
| 331 | return ERR_PTR(ret); | 336 | return ERR_PTR(ret); |
| 332 | } | 337 | } |
| @@ -335,19 +340,34 @@ again: | |||
| 335 | if (!h) | 340 | if (!h) |
| 336 | return ERR_PTR(-ENOMEM); | 341 | return ERR_PTR(-ENOMEM); |
| 337 | 342 | ||
| 338 | sb_start_intwrite(root->fs_info->sb); | 343 | /* |
| 344 | * If we are JOIN_NOLOCK we're already committing a transaction and | ||
| 345 | * waiting on this guy, so we don't need to do the sb_start_intwrite | ||
| 346 | * because we're already holding a ref. We need this because we could | ||
| 347 | * have raced in and did an fsync() on a file which can kick a commit | ||
| 348 | * and then we deadlock with somebody doing a freeze. | ||
| 349 | * | ||
| 350 | * If we are ATTACH, it means we just want to catch the current | ||
| 351 | * transaction and commit it, so we needn't do sb_start_intwrite(). | ||
| 352 | */ | ||
| 353 | if (type < TRANS_JOIN_NOLOCK) | ||
| 354 | sb_start_intwrite(root->fs_info->sb); | ||
| 339 | 355 | ||
| 340 | if (may_wait_transaction(root, type)) | 356 | if (may_wait_transaction(root, type)) |
| 341 | wait_current_trans(root); | 357 | wait_current_trans(root); |
| 342 | 358 | ||
| 343 | do { | 359 | do { |
| 344 | ret = join_transaction(root, type == TRANS_JOIN_NOLOCK); | 360 | ret = join_transaction(root, type); |
| 345 | if (ret == -EBUSY) | 361 | if (ret == -EBUSY) |
| 346 | wait_current_trans(root); | 362 | wait_current_trans(root); |
| 347 | } while (ret == -EBUSY); | 363 | } while (ret == -EBUSY); |
| 348 | 364 | ||
| 349 | if (ret < 0) { | 365 | if (ret < 0) { |
| 350 | sb_end_intwrite(root->fs_info->sb); | 366 | /* We must get the transaction if we are JOIN_NOLOCK. */ |
| 367 | BUG_ON(type == TRANS_JOIN_NOLOCK); | ||
| 368 | |||
| 369 | if (type < TRANS_JOIN_NOLOCK) | ||
| 370 | sb_end_intwrite(root->fs_info->sb); | ||
| 351 | kmem_cache_free(btrfs_trans_handle_cachep, h); | 371 | kmem_cache_free(btrfs_trans_handle_cachep, h); |
| 352 | return ERR_PTR(ret); | 372 | return ERR_PTR(ret); |
| 353 | } | 373 | } |
| @@ -367,7 +387,9 @@ again: | |||
| 367 | h->aborted = 0; | 387 | h->aborted = 0; |
| 368 | h->qgroup_reserved = qgroup_reserved; | 388 | h->qgroup_reserved = qgroup_reserved; |
| 369 | h->delayed_ref_elem.seq = 0; | 389 | h->delayed_ref_elem.seq = 0; |
| 390 | h->type = type; | ||
| 370 | INIT_LIST_HEAD(&h->qgroup_ref_list); | 391 | INIT_LIST_HEAD(&h->qgroup_ref_list); |
| 392 | INIT_LIST_HEAD(&h->new_bgs); | ||
| 371 | 393 | ||
| 372 | smp_mb(); | 394 | smp_mb(); |
| 373 | if (cur_trans->blocked && may_wait_transaction(root, type)) { | 395 | if (cur_trans->blocked && may_wait_transaction(root, type)) { |
| @@ -393,21 +415,35 @@ got_it: | |||
| 393 | struct btrfs_trans_handle *btrfs_start_transaction(struct btrfs_root *root, | 415 | struct btrfs_trans_handle *btrfs_start_transaction(struct btrfs_root *root, |
| 394 | int num_items) | 416 | int num_items) |
| 395 | { | 417 | { |
| 396 | return start_transaction(root, num_items, TRANS_START); | 418 | return start_transaction(root, num_items, TRANS_START, |
| 419 | BTRFS_RESERVE_FLUSH_ALL); | ||
| 420 | } | ||
| 421 | |||
| 422 | struct btrfs_trans_handle *btrfs_start_transaction_lflush( | ||
| 423 | struct btrfs_root *root, int num_items) | ||
| 424 | { | ||
| 425 | return start_transaction(root, num_items, TRANS_START, | ||
| 426 | BTRFS_RESERVE_FLUSH_LIMIT); | ||
| 397 | } | 427 | } |
| 428 | |||
| 398 | struct btrfs_trans_handle *btrfs_join_transaction(struct btrfs_root *root) | 429 | struct btrfs_trans_handle *btrfs_join_transaction(struct btrfs_root *root) |
| 399 | { | 430 | { |
| 400 | return start_transaction(root, 0, TRANS_JOIN); | 431 | return start_transaction(root, 0, TRANS_JOIN, 0); |
| 401 | } | 432 | } |
| 402 | 433 | ||
| 403 | struct btrfs_trans_handle *btrfs_join_transaction_nolock(struct btrfs_root *root) | 434 | struct btrfs_trans_handle *btrfs_join_transaction_nolock(struct btrfs_root *root) |
| 404 | { | 435 | { |
| 405 | return start_transaction(root, 0, TRANS_JOIN_NOLOCK); | 436 | return start_transaction(root, 0, TRANS_JOIN_NOLOCK, 0); |
| 406 | } | 437 | } |
| 407 | 438 | ||
| 408 | struct btrfs_trans_handle *btrfs_start_ioctl_transaction(struct btrfs_root *root) | 439 | struct btrfs_trans_handle *btrfs_start_ioctl_transaction(struct btrfs_root *root) |
| 409 | { | 440 | { |
| 410 | return start_transaction(root, 0, TRANS_USERSPACE); | 441 | return start_transaction(root, 0, TRANS_USERSPACE, 0); |
| 442 | } | ||
| 443 | |||
| 444 | struct btrfs_trans_handle *btrfs_attach_transaction(struct btrfs_root *root) | ||
| 445 | { | ||
| 446 | return start_transaction(root, 0, TRANS_ATTACH, 0); | ||
| 411 | } | 447 | } |
| 412 | 448 | ||
| 413 | /* wait for a transaction commit to be fully complete */ | 449 | /* wait for a transaction commit to be fully complete */ |
| @@ -420,28 +456,31 @@ static noinline void wait_for_commit(struct btrfs_root *root, | |||
| 420 | int btrfs_wait_for_commit(struct btrfs_root *root, u64 transid) | 456 | int btrfs_wait_for_commit(struct btrfs_root *root, u64 transid) |
| 421 | { | 457 | { |
| 422 | struct btrfs_transaction *cur_trans = NULL, *t; | 458 | struct btrfs_transaction *cur_trans = NULL, *t; |
| 423 | int ret; | 459 | int ret = 0; |
| 424 | 460 | ||
| 425 | ret = 0; | ||
| 426 | if (transid) { | 461 | if (transid) { |
| 427 | if (transid <= root->fs_info->last_trans_committed) | 462 | if (transid <= root->fs_info->last_trans_committed) |
| 428 | goto out; | 463 | goto out; |
| 429 | 464 | ||
| 465 | ret = -EINVAL; | ||
| 430 | /* find specified transaction */ | 466 | /* find specified transaction */ |
| 431 | spin_lock(&root->fs_info->trans_lock); | 467 | spin_lock(&root->fs_info->trans_lock); |
| 432 | list_for_each_entry(t, &root->fs_info->trans_list, list) { | 468 | list_for_each_entry(t, &root->fs_info->trans_list, list) { |
| 433 | if (t->transid == transid) { | 469 | if (t->transid == transid) { |
| 434 | cur_trans = t; | 470 | cur_trans = t; |
| 435 | atomic_inc(&cur_trans->use_count); | 471 | atomic_inc(&cur_trans->use_count); |
| 472 | ret = 0; | ||
| 436 | break; | 473 | break; |
| 437 | } | 474 | } |
| 438 | if (t->transid > transid) | 475 | if (t->transid > transid) { |
| 476 | ret = 0; | ||
| 439 | break; | 477 | break; |
| 478 | } | ||
| 440 | } | 479 | } |
| 441 | spin_unlock(&root->fs_info->trans_lock); | 480 | spin_unlock(&root->fs_info->trans_lock); |
| 442 | ret = -EINVAL; | 481 | /* The specified transaction doesn't exist */ |
| 443 | if (!cur_trans) | 482 | if (!cur_trans) |
| 444 | goto out; /* bad transid */ | 483 | goto out; |
| 445 | } else { | 484 | } else { |
| 446 | /* find newest transaction that is committing | committed */ | 485 | /* find newest transaction that is committing | committed */ |
| 447 | spin_lock(&root->fs_info->trans_lock); | 486 | spin_lock(&root->fs_info->trans_lock); |
| @@ -461,9 +500,7 @@ int btrfs_wait_for_commit(struct btrfs_root *root, u64 transid) | |||
| 461 | } | 500 | } |
| 462 | 501 | ||
| 463 | wait_for_commit(root, cur_trans); | 502 | wait_for_commit(root, cur_trans); |
| 464 | |||
| 465 | put_transaction(cur_trans); | 503 | put_transaction(cur_trans); |
| 466 | ret = 0; | ||
| 467 | out: | 504 | out: |
| 468 | return ret; | 505 | return ret; |
| 469 | } | 506 | } |
| @@ -506,11 +543,12 @@ int btrfs_should_end_transaction(struct btrfs_trans_handle *trans, | |||
| 506 | } | 543 | } |
| 507 | 544 | ||
| 508 | static int __btrfs_end_transaction(struct btrfs_trans_handle *trans, | 545 | static int __btrfs_end_transaction(struct btrfs_trans_handle *trans, |
| 509 | struct btrfs_root *root, int throttle, int lock) | 546 | struct btrfs_root *root, int throttle) |
| 510 | { | 547 | { |
| 511 | struct btrfs_transaction *cur_trans = trans->transaction; | 548 | struct btrfs_transaction *cur_trans = trans->transaction; |
| 512 | struct btrfs_fs_info *info = root->fs_info; | 549 | struct btrfs_fs_info *info = root->fs_info; |
| 513 | int count = 0; | 550 | int count = 0; |
| 551 | int lock = (trans->type != TRANS_JOIN_NOLOCK); | ||
| 514 | int err = 0; | 552 | int err = 0; |
| 515 | 553 | ||
| 516 | if (--trans->use_count) { | 554 | if (--trans->use_count) { |
| @@ -536,6 +574,9 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans, | |||
| 536 | trans->qgroup_reserved = 0; | 574 | trans->qgroup_reserved = 0; |
| 537 | } | 575 | } |
| 538 | 576 | ||
| 577 | if (!list_empty(&trans->new_bgs)) | ||
| 578 | btrfs_create_pending_block_groups(trans, root); | ||
| 579 | |||
| 539 | while (count < 2) { | 580 | while (count < 2) { |
| 540 | unsigned long cur = trans->delayed_ref_updates; | 581 | unsigned long cur = trans->delayed_ref_updates; |
| 541 | trans->delayed_ref_updates = 0; | 582 | trans->delayed_ref_updates = 0; |
| @@ -551,7 +592,8 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans, | |||
| 551 | btrfs_trans_release_metadata(trans, root); | 592 | btrfs_trans_release_metadata(trans, root); |
| 552 | trans->block_rsv = NULL; | 593 | trans->block_rsv = NULL; |
| 553 | 594 | ||
| 554 | sb_end_intwrite(root->fs_info->sb); | 595 | if (!list_empty(&trans->new_bgs)) |
| 596 | btrfs_create_pending_block_groups(trans, root); | ||
| 555 | 597 | ||
| 556 | if (lock && !atomic_read(&root->fs_info->open_ioctl_trans) && | 598 | if (lock && !atomic_read(&root->fs_info->open_ioctl_trans) && |
| 557 | should_end_transaction(trans, root)) { | 599 | should_end_transaction(trans, root)) { |
| @@ -573,6 +615,9 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans, | |||
| 573 | } | 615 | } |
| 574 | } | 616 | } |
| 575 | 617 | ||
| 618 | if (trans->type < TRANS_JOIN_NOLOCK) | ||
| 619 | sb_end_intwrite(root->fs_info->sb); | ||
| 620 | |||
| 576 | WARN_ON(cur_trans != info->running_transaction); | 621 | WARN_ON(cur_trans != info->running_transaction); |
| 577 | WARN_ON(atomic_read(&cur_trans->num_writers) < 1); | 622 | WARN_ON(atomic_read(&cur_trans->num_writers) < 1); |
| 578 | atomic_dec(&cur_trans->num_writers); | 623 | atomic_dec(&cur_trans->num_writers); |
| @@ -604,7 +649,7 @@ int btrfs_end_transaction(struct btrfs_trans_handle *trans, | |||
| 604 | { | 649 | { |
| 605 | int ret; | 650 | int ret; |
| 606 | 651 | ||
| 607 | ret = __btrfs_end_transaction(trans, root, 0, 1); | 652 | ret = __btrfs_end_transaction(trans, root, 0); |
| 608 | if (ret) | 653 | if (ret) |
| 609 | return ret; | 654 | return ret; |
| 610 | return 0; | 655 | return 0; |
| @@ -615,18 +660,7 @@ int btrfs_end_transaction_throttle(struct btrfs_trans_handle *trans, | |||
| 615 | { | 660 | { |
| 616 | int ret; | 661 | int ret; |
| 617 | 662 | ||
| 618 | ret = __btrfs_end_transaction(trans, root, 1, 1); | 663 | ret = __btrfs_end_transaction(trans, root, 1); |
| 619 | if (ret) | ||
| 620 | return ret; | ||
| 621 | return 0; | ||
| 622 | } | ||
| 623 | |||
| 624 | int btrfs_end_transaction_nolock(struct btrfs_trans_handle *trans, | ||
| 625 | struct btrfs_root *root) | ||
| 626 | { | ||
| 627 | int ret; | ||
| 628 | |||
| 629 | ret = __btrfs_end_transaction(trans, root, 0, 0); | ||
| 630 | if (ret) | 664 | if (ret) |
| 631 | return ret; | 665 | return ret; |
| 632 | return 0; | 666 | return 0; |
| @@ -635,7 +669,7 @@ int btrfs_end_transaction_nolock(struct btrfs_trans_handle *trans, | |||
| 635 | int btrfs_end_transaction_dmeta(struct btrfs_trans_handle *trans, | 669 | int btrfs_end_transaction_dmeta(struct btrfs_trans_handle *trans, |
| 636 | struct btrfs_root *root) | 670 | struct btrfs_root *root) |
| 637 | { | 671 | { |
| 638 | return __btrfs_end_transaction(trans, root, 1, 1); | 672 | return __btrfs_end_transaction(trans, root, 1); |
| 639 | } | 673 | } |
| 640 | 674 | ||
| 641 | /* | 675 | /* |
| @@ -649,13 +683,15 @@ int btrfs_write_marked_extents(struct btrfs_root *root, | |||
| 649 | int err = 0; | 683 | int err = 0; |
| 650 | int werr = 0; | 684 | int werr = 0; |
| 651 | struct address_space *mapping = root->fs_info->btree_inode->i_mapping; | 685 | struct address_space *mapping = root->fs_info->btree_inode->i_mapping; |
| 686 | struct extent_state *cached_state = NULL; | ||
| 652 | u64 start = 0; | 687 | u64 start = 0; |
| 653 | u64 end; | 688 | u64 end; |
| 654 | 689 | ||
| 655 | while (!find_first_extent_bit(dirty_pages, start, &start, &end, | 690 | while (!find_first_extent_bit(dirty_pages, start, &start, &end, |
| 656 | mark)) { | 691 | mark, &cached_state)) { |
| 657 | convert_extent_bit(dirty_pages, start, end, EXTENT_NEED_WAIT, mark, | 692 | convert_extent_bit(dirty_pages, start, end, EXTENT_NEED_WAIT, |
| 658 | GFP_NOFS); | 693 | mark, &cached_state, GFP_NOFS); |
| 694 | cached_state = NULL; | ||
| 659 | err = filemap_fdatawrite_range(mapping, start, end); | 695 | err = filemap_fdatawrite_range(mapping, start, end); |
| 660 | if (err) | 696 | if (err) |
| 661 | werr = err; | 697 | werr = err; |
| @@ -679,12 +715,14 @@ int btrfs_wait_marked_extents(struct btrfs_root *root, | |||
| 679 | int err = 0; | 715 | int err = 0; |
| 680 | int werr = 0; | 716 | int werr = 0; |
| 681 | struct address_space *mapping = root->fs_info->btree_inode->i_mapping; | 717 | struct address_space *mapping = root->fs_info->btree_inode->i_mapping; |
| 718 | struct extent_state *cached_state = NULL; | ||
| 682 | u64 start = 0; | 719 | u64 start = 0; |
| 683 | u64 end; | 720 | u64 end; |
| 684 | 721 | ||
| 685 | while (!find_first_extent_bit(dirty_pages, start, &start, &end, | 722 | while (!find_first_extent_bit(dirty_pages, start, &start, &end, |
| 686 | EXTENT_NEED_WAIT)) { | 723 | EXTENT_NEED_WAIT, &cached_state)) { |
| 687 | clear_extent_bits(dirty_pages, start, end, EXTENT_NEED_WAIT, GFP_NOFS); | 724 | clear_extent_bit(dirty_pages, start, end, EXTENT_NEED_WAIT, |
| 725 | 0, 0, &cached_state, GFP_NOFS); | ||
| 688 | err = filemap_fdatawait_range(mapping, start, end); | 726 | err = filemap_fdatawait_range(mapping, start, end); |
| 689 | if (err) | 727 | if (err) |
| 690 | werr = err; | 728 | werr = err; |
| @@ -809,7 +847,9 @@ static noinline int commit_cowonly_roots(struct btrfs_trans_handle *trans, | |||
| 809 | return ret; | 847 | return ret; |
| 810 | 848 | ||
| 811 | ret = btrfs_run_dev_stats(trans, root->fs_info); | 849 | ret = btrfs_run_dev_stats(trans, root->fs_info); |
| 812 | BUG_ON(ret); | 850 | WARN_ON(ret); |
| 851 | ret = btrfs_run_dev_replace(trans, root->fs_info); | ||
| 852 | WARN_ON(ret); | ||
| 813 | 853 | ||
| 814 | ret = btrfs_run_qgroups(trans, root->fs_info); | 854 | ret = btrfs_run_qgroups(trans, root->fs_info); |
| 815 | BUG_ON(ret); | 855 | BUG_ON(ret); |
| @@ -832,6 +872,8 @@ static noinline int commit_cowonly_roots(struct btrfs_trans_handle *trans, | |||
| 832 | switch_commit_root(fs_info->extent_root); | 872 | switch_commit_root(fs_info->extent_root); |
| 833 | up_write(&fs_info->extent_commit_sem); | 873 | up_write(&fs_info->extent_commit_sem); |
| 834 | 874 | ||
| 875 | btrfs_after_dev_replace_commit(fs_info); | ||
| 876 | |||
| 835 | return 0; | 877 | return 0; |
| 836 | } | 878 | } |
| 837 | 879 | ||
| @@ -916,7 +958,6 @@ int btrfs_defrag_root(struct btrfs_root *root, int cacheonly) | |||
| 916 | struct btrfs_fs_info *info = root->fs_info; | 958 | struct btrfs_fs_info *info = root->fs_info; |
| 917 | struct btrfs_trans_handle *trans; | 959 | struct btrfs_trans_handle *trans; |
| 918 | int ret; | 960 | int ret; |
| 919 | unsigned long nr; | ||
| 920 | 961 | ||
| 921 | if (xchg(&root->defrag_running, 1)) | 962 | if (xchg(&root->defrag_running, 1)) |
| 922 | return 0; | 963 | return 0; |
| @@ -928,9 +969,8 @@ int btrfs_defrag_root(struct btrfs_root *root, int cacheonly) | |||
| 928 | 969 | ||
| 929 | ret = btrfs_defrag_leaves(trans, root, cacheonly); | 970 | ret = btrfs_defrag_leaves(trans, root, cacheonly); |
| 930 | 971 | ||
| 931 | nr = trans->blocks_used; | ||
| 932 | btrfs_end_transaction(trans, root); | 972 | btrfs_end_transaction(trans, root); |
| 933 | btrfs_btree_balance_dirty(info->tree_root, nr); | 973 | btrfs_btree_balance_dirty(info->tree_root); |
| 934 | cond_resched(); | 974 | cond_resched(); |
| 935 | 975 | ||
| 936 | if (btrfs_fs_closing(root->fs_info) || ret != -EAGAIN) | 976 | if (btrfs_fs_closing(root->fs_info) || ret != -EAGAIN) |
| @@ -955,6 +995,8 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, | |||
| 955 | struct btrfs_root *parent_root; | 995 | struct btrfs_root *parent_root; |
| 956 | struct btrfs_block_rsv *rsv; | 996 | struct btrfs_block_rsv *rsv; |
| 957 | struct inode *parent_inode; | 997 | struct inode *parent_inode; |
| 998 | struct btrfs_path *path; | ||
| 999 | struct btrfs_dir_item *dir_item; | ||
| 958 | struct dentry *parent; | 1000 | struct dentry *parent; |
| 959 | struct dentry *dentry; | 1001 | struct dentry *dentry; |
| 960 | struct extent_buffer *tmp; | 1002 | struct extent_buffer *tmp; |
| @@ -967,43 +1009,48 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, | |||
| 967 | u64 root_flags; | 1009 | u64 root_flags; |
| 968 | uuid_le new_uuid; | 1010 | uuid_le new_uuid; |
| 969 | 1011 | ||
| 970 | rsv = trans->block_rsv; | 1012 | path = btrfs_alloc_path(); |
| 1013 | if (!path) { | ||
| 1014 | ret = pending->error = -ENOMEM; | ||
| 1015 | goto path_alloc_fail; | ||
| 1016 | } | ||
| 971 | 1017 | ||
| 972 | new_root_item = kmalloc(sizeof(*new_root_item), GFP_NOFS); | 1018 | new_root_item = kmalloc(sizeof(*new_root_item), GFP_NOFS); |
| 973 | if (!new_root_item) { | 1019 | if (!new_root_item) { |
| 974 | ret = pending->error = -ENOMEM; | 1020 | ret = pending->error = -ENOMEM; |
| 975 | goto fail; | 1021 | goto root_item_alloc_fail; |
| 976 | } | 1022 | } |
| 977 | 1023 | ||
| 978 | ret = btrfs_find_free_objectid(tree_root, &objectid); | 1024 | ret = btrfs_find_free_objectid(tree_root, &objectid); |
| 979 | if (ret) { | 1025 | if (ret) { |
| 980 | pending->error = ret; | 1026 | pending->error = ret; |
| 981 | goto fail; | 1027 | goto no_free_objectid; |
| 982 | } | 1028 | } |
| 983 | 1029 | ||
| 984 | btrfs_reloc_pre_snapshot(trans, pending, &to_reserve); | 1030 | btrfs_reloc_pre_snapshot(trans, pending, &to_reserve); |
| 985 | 1031 | ||
| 986 | if (to_reserve > 0) { | 1032 | if (to_reserve > 0) { |
| 987 | ret = btrfs_block_rsv_add_noflush(root, &pending->block_rsv, | 1033 | ret = btrfs_block_rsv_add(root, &pending->block_rsv, |
| 988 | to_reserve); | 1034 | to_reserve, |
| 1035 | BTRFS_RESERVE_NO_FLUSH); | ||
| 989 | if (ret) { | 1036 | if (ret) { |
| 990 | pending->error = ret; | 1037 | pending->error = ret; |
| 991 | goto fail; | 1038 | goto no_free_objectid; |
| 992 | } | 1039 | } |
| 993 | } | 1040 | } |
| 994 | 1041 | ||
| 995 | ret = btrfs_qgroup_inherit(trans, fs_info, root->root_key.objectid, | 1042 | ret = btrfs_qgroup_inherit(trans, fs_info, root->root_key.objectid, |
| 996 | objectid, pending->inherit); | 1043 | objectid, pending->inherit); |
| 997 | kfree(pending->inherit); | ||
| 998 | if (ret) { | 1044 | if (ret) { |
| 999 | pending->error = ret; | 1045 | pending->error = ret; |
| 1000 | goto fail; | 1046 | goto no_free_objectid; |
| 1001 | } | 1047 | } |
| 1002 | 1048 | ||
| 1003 | key.objectid = objectid; | 1049 | key.objectid = objectid; |
| 1004 | key.offset = (u64)-1; | 1050 | key.offset = (u64)-1; |
| 1005 | key.type = BTRFS_ROOT_ITEM_KEY; | 1051 | key.type = BTRFS_ROOT_ITEM_KEY; |
| 1006 | 1052 | ||
| 1053 | rsv = trans->block_rsv; | ||
| 1007 | trans->block_rsv = &pending->block_rsv; | 1054 | trans->block_rsv = &pending->block_rsv; |
| 1008 | 1055 | ||
| 1009 | dentry = pending->dentry; | 1056 | dentry = pending->dentry; |
| @@ -1017,24 +1064,21 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, | |||
| 1017 | */ | 1064 | */ |
| 1018 | ret = btrfs_set_inode_index(parent_inode, &index); | 1065 | ret = btrfs_set_inode_index(parent_inode, &index); |
| 1019 | BUG_ON(ret); /* -ENOMEM */ | 1066 | BUG_ON(ret); /* -ENOMEM */ |
| 1020 | ret = btrfs_insert_dir_item(trans, parent_root, | 1067 | |
| 1021 | dentry->d_name.name, dentry->d_name.len, | 1068 | /* check if there is a file/dir which has the same name. */ |
| 1022 | parent_inode, &key, | 1069 | dir_item = btrfs_lookup_dir_item(NULL, parent_root, path, |
| 1023 | BTRFS_FT_DIR, index); | 1070 | btrfs_ino(parent_inode), |
| 1024 | if (ret == -EEXIST) { | 1071 | dentry->d_name.name, |
| 1072 | dentry->d_name.len, 0); | ||
| 1073 | if (dir_item != NULL && !IS_ERR(dir_item)) { | ||
| 1025 | pending->error = -EEXIST; | 1074 | pending->error = -EEXIST; |
| 1026 | dput(parent); | ||
| 1027 | goto fail; | 1075 | goto fail; |
| 1028 | } else if (ret) { | 1076 | } else if (IS_ERR(dir_item)) { |
| 1029 | goto abort_trans_dput; | 1077 | ret = PTR_ERR(dir_item); |
| 1078 | btrfs_abort_transaction(trans, root, ret); | ||
| 1079 | goto fail; | ||
| 1030 | } | 1080 | } |
| 1031 | 1081 | btrfs_release_path(path); | |
| 1032 | btrfs_i_size_write(parent_inode, parent_inode->i_size + | ||
| 1033 | dentry->d_name.len * 2); | ||
| 1034 | parent_inode->i_mtime = parent_inode->i_ctime = CURRENT_TIME; | ||
| 1035 | ret = btrfs_update_inode(trans, parent_root, parent_inode); | ||
| 1036 | if (ret) | ||
| 1037 | goto abort_trans_dput; | ||
| 1038 | 1082 | ||
| 1039 | /* | 1083 | /* |
| 1040 | * pull in the delayed directory update | 1084 | * pull in the delayed directory update |
| @@ -1043,8 +1087,8 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, | |||
| 1043 | * snapshot | 1087 | * snapshot |
| 1044 | */ | 1088 | */ |
| 1045 | ret = btrfs_run_delayed_items(trans, root); | 1089 | ret = btrfs_run_delayed_items(trans, root); |
| 1046 | if (ret) { /* Transaction aborted */ | 1090 | if (ret) { /* Transaction aborted */ |
| 1047 | dput(parent); | 1091 | btrfs_abort_transaction(trans, root, ret); |
| 1048 | goto fail; | 1092 | goto fail; |
| 1049 | } | 1093 | } |
| 1050 | 1094 | ||
| @@ -1079,7 +1123,8 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, | |||
| 1079 | if (ret) { | 1123 | if (ret) { |
| 1080 | btrfs_tree_unlock(old); | 1124 | btrfs_tree_unlock(old); |
| 1081 | free_extent_buffer(old); | 1125 | free_extent_buffer(old); |
| 1082 | goto abort_trans_dput; | 1126 | btrfs_abort_transaction(trans, root, ret); |
| 1127 | goto fail; | ||
| 1083 | } | 1128 | } |
| 1084 | 1129 | ||
| 1085 | btrfs_set_lock_blocking(old); | 1130 | btrfs_set_lock_blocking(old); |
| @@ -1088,8 +1133,10 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, | |||
| 1088 | /* clean up in any case */ | 1133 | /* clean up in any case */ |
| 1089 | btrfs_tree_unlock(old); | 1134 | btrfs_tree_unlock(old); |
| 1090 | free_extent_buffer(old); | 1135 | free_extent_buffer(old); |
| 1091 | if (ret) | 1136 | if (ret) { |
| 1092 | goto abort_trans_dput; | 1137 | btrfs_abort_transaction(trans, root, ret); |
| 1138 | goto fail; | ||
| 1139 | } | ||
| 1093 | 1140 | ||
| 1094 | /* see comments in should_cow_block() */ | 1141 | /* see comments in should_cow_block() */ |
| 1095 | root->force_cow = 1; | 1142 | root->force_cow = 1; |
| @@ -1101,8 +1148,10 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, | |||
| 1101 | ret = btrfs_insert_root(trans, tree_root, &key, new_root_item); | 1148 | ret = btrfs_insert_root(trans, tree_root, &key, new_root_item); |
| 1102 | btrfs_tree_unlock(tmp); | 1149 | btrfs_tree_unlock(tmp); |
| 1103 | free_extent_buffer(tmp); | 1150 | free_extent_buffer(tmp); |
| 1104 | if (ret) | 1151 | if (ret) { |
| 1105 | goto abort_trans_dput; | 1152 | btrfs_abort_transaction(trans, root, ret); |
| 1153 | goto fail; | ||
| 1154 | } | ||
| 1106 | 1155 | ||
| 1107 | /* | 1156 | /* |
| 1108 | * insert root back/forward references | 1157 | * insert root back/forward references |
| @@ -1111,32 +1160,58 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, | |||
| 1111 | parent_root->root_key.objectid, | 1160 | parent_root->root_key.objectid, |
| 1112 | btrfs_ino(parent_inode), index, | 1161 | btrfs_ino(parent_inode), index, |
| 1113 | dentry->d_name.name, dentry->d_name.len); | 1162 | dentry->d_name.name, dentry->d_name.len); |
| 1114 | dput(parent); | 1163 | if (ret) { |
| 1115 | if (ret) | 1164 | btrfs_abort_transaction(trans, root, ret); |
| 1116 | goto fail; | 1165 | goto fail; |
| 1166 | } | ||
| 1117 | 1167 | ||
| 1118 | key.offset = (u64)-1; | 1168 | key.offset = (u64)-1; |
| 1119 | pending->snap = btrfs_read_fs_root_no_name(root->fs_info, &key); | 1169 | pending->snap = btrfs_read_fs_root_no_name(root->fs_info, &key); |
| 1120 | if (IS_ERR(pending->snap)) { | 1170 | if (IS_ERR(pending->snap)) { |
| 1121 | ret = PTR_ERR(pending->snap); | 1171 | ret = PTR_ERR(pending->snap); |
| 1122 | goto abort_trans; | 1172 | btrfs_abort_transaction(trans, root, ret); |
| 1173 | goto fail; | ||
| 1123 | } | 1174 | } |
| 1124 | 1175 | ||
| 1125 | ret = btrfs_reloc_post_snapshot(trans, pending); | 1176 | ret = btrfs_reloc_post_snapshot(trans, pending); |
| 1177 | if (ret) { | ||
| 1178 | btrfs_abort_transaction(trans, root, ret); | ||
| 1179 | goto fail; | ||
| 1180 | } | ||
| 1181 | |||
| 1182 | ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1); | ||
| 1183 | if (ret) { | ||
| 1184 | btrfs_abort_transaction(trans, root, ret); | ||
| 1185 | goto fail; | ||
| 1186 | } | ||
| 1187 | |||
| 1188 | ret = btrfs_insert_dir_item(trans, parent_root, | ||
| 1189 | dentry->d_name.name, dentry->d_name.len, | ||
| 1190 | parent_inode, &key, | ||
| 1191 | BTRFS_FT_DIR, index); | ||
| 1192 | /* We have check then name at the beginning, so it is impossible. */ | ||
| 1193 | BUG_ON(ret == -EEXIST || ret == -EOVERFLOW); | ||
| 1194 | if (ret) { | ||
| 1195 | btrfs_abort_transaction(trans, root, ret); | ||
| 1196 | goto fail; | ||
| 1197 | } | ||
| 1198 | |||
| 1199 | btrfs_i_size_write(parent_inode, parent_inode->i_size + | ||
| 1200 | dentry->d_name.len * 2); | ||
| 1201 | parent_inode->i_mtime = parent_inode->i_ctime = CURRENT_TIME; | ||
| 1202 | ret = btrfs_update_inode_fallback(trans, parent_root, parent_inode); | ||
| 1126 | if (ret) | 1203 | if (ret) |
| 1127 | goto abort_trans; | 1204 | btrfs_abort_transaction(trans, root, ret); |
| 1128 | ret = 0; | ||
| 1129 | fail: | 1205 | fail: |
| 1130 | kfree(new_root_item); | 1206 | dput(parent); |
| 1131 | trans->block_rsv = rsv; | 1207 | trans->block_rsv = rsv; |
| 1208 | no_free_objectid: | ||
| 1209 | kfree(new_root_item); | ||
| 1210 | root_item_alloc_fail: | ||
| 1211 | btrfs_free_path(path); | ||
| 1212 | path_alloc_fail: | ||
| 1132 | btrfs_block_rsv_release(root, &pending->block_rsv, (u64)-1); | 1213 | btrfs_block_rsv_release(root, &pending->block_rsv, (u64)-1); |
| 1133 | return ret; | 1214 | return ret; |
| 1134 | |||
| 1135 | abort_trans_dput: | ||
| 1136 | dput(parent); | ||
| 1137 | abort_trans: | ||
| 1138 | btrfs_abort_transaction(trans, root, ret); | ||
| 1139 | goto fail; | ||
| 1140 | } | 1215 | } |
| 1141 | 1216 | ||
| 1142 | /* | 1217 | /* |
| @@ -1229,6 +1304,17 @@ static void do_async_commit(struct work_struct *work) | |||
| 1229 | struct btrfs_async_commit *ac = | 1304 | struct btrfs_async_commit *ac = |
| 1230 | container_of(work, struct btrfs_async_commit, work.work); | 1305 | container_of(work, struct btrfs_async_commit, work.work); |
| 1231 | 1306 | ||
| 1307 | /* | ||
| 1308 | * We've got freeze protection passed with the transaction. | ||
| 1309 | * Tell lockdep about it. | ||
| 1310 | */ | ||
| 1311 | if (ac->newtrans->type < TRANS_JOIN_NOLOCK) | ||
| 1312 | rwsem_acquire_read( | ||
| 1313 | &ac->root->fs_info->sb->s_writers.lock_map[SB_FREEZE_FS-1], | ||
| 1314 | 0, 1, _THIS_IP_); | ||
| 1315 | |||
| 1316 | current->journal_info = ac->newtrans; | ||
| 1317 | |||
| 1232 | btrfs_commit_transaction(ac->newtrans, ac->root); | 1318 | btrfs_commit_transaction(ac->newtrans, ac->root); |
| 1233 | kfree(ac); | 1319 | kfree(ac); |
| 1234 | } | 1320 | } |
| @@ -1258,6 +1344,16 @@ int btrfs_commit_transaction_async(struct btrfs_trans_handle *trans, | |||
| 1258 | atomic_inc(&cur_trans->use_count); | 1344 | atomic_inc(&cur_trans->use_count); |
| 1259 | 1345 | ||
| 1260 | btrfs_end_transaction(trans, root); | 1346 | btrfs_end_transaction(trans, root); |
| 1347 | |||
| 1348 | /* | ||
| 1349 | * Tell lockdep we've released the freeze rwsem, since the | ||
| 1350 | * async commit thread will be the one to unlock it. | ||
| 1351 | */ | ||
| 1352 | if (trans->type < TRANS_JOIN_NOLOCK) | ||
| 1353 | rwsem_release( | ||
| 1354 | &root->fs_info->sb->s_writers.lock_map[SB_FREEZE_FS-1], | ||
| 1355 | 1, _THIS_IP_); | ||
| 1356 | |||
| 1261 | schedule_delayed_work(&ac->work, 0); | 1357 | schedule_delayed_work(&ac->work, 0); |
| 1262 | 1358 | ||
| 1263 | /* wait for transaction to start and unblock */ | 1359 | /* wait for transaction to start and unblock */ |
| @@ -1306,6 +1402,48 @@ static void cleanup_transaction(struct btrfs_trans_handle *trans, | |||
| 1306 | kmem_cache_free(btrfs_trans_handle_cachep, trans); | 1402 | kmem_cache_free(btrfs_trans_handle_cachep, trans); |
| 1307 | } | 1403 | } |
| 1308 | 1404 | ||
| 1405 | static int btrfs_flush_all_pending_stuffs(struct btrfs_trans_handle *trans, | ||
| 1406 | struct btrfs_root *root) | ||
| 1407 | { | ||
| 1408 | int flush_on_commit = btrfs_test_opt(root, FLUSHONCOMMIT); | ||
| 1409 | int snap_pending = 0; | ||
| 1410 | int ret; | ||
| 1411 | |||
| 1412 | if (!flush_on_commit) { | ||
| 1413 | spin_lock(&root->fs_info->trans_lock); | ||
| 1414 | if (!list_empty(&trans->transaction->pending_snapshots)) | ||
| 1415 | snap_pending = 1; | ||
| 1416 | spin_unlock(&root->fs_info->trans_lock); | ||
| 1417 | } | ||
| 1418 | |||
| 1419 | if (flush_on_commit || snap_pending) { | ||
| 1420 | btrfs_start_delalloc_inodes(root, 1); | ||
| 1421 | btrfs_wait_ordered_extents(root, 1); | ||
| 1422 | } | ||
| 1423 | |||
| 1424 | ret = btrfs_run_delayed_items(trans, root); | ||
| 1425 | if (ret) | ||
| 1426 | return ret; | ||
| 1427 | |||
| 1428 | /* | ||
| 1429 | * running the delayed items may have added new refs. account | ||
| 1430 | * them now so that they hinder processing of more delayed refs | ||
| 1431 | * as little as possible. | ||
| 1432 | */ | ||
| 1433 | btrfs_delayed_refs_qgroup_accounting(trans, root->fs_info); | ||
| 1434 | |||
| 1435 | /* | ||
| 1436 | * rename don't use btrfs_join_transaction, so, once we | ||
| 1437 | * set the transaction to blocked above, we aren't going | ||
| 1438 | * to get any new ordered operations. We can safely run | ||
| 1439 | * it here and no for sure that nothing new will be added | ||
| 1440 | * to the list | ||
| 1441 | */ | ||
| 1442 | btrfs_run_ordered_operations(root, 1); | ||
| 1443 | |||
| 1444 | return 0; | ||
| 1445 | } | ||
| 1446 | |||
| 1309 | /* | 1447 | /* |
| 1310 | * btrfs_transaction state sequence: | 1448 | * btrfs_transaction state sequence: |
| 1311 | * in_commit = 0, blocked = 0 (initial) | 1449 | * in_commit = 0, blocked = 0 (initial) |
| @@ -1320,15 +1458,20 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, | |||
| 1320 | struct btrfs_transaction *cur_trans = trans->transaction; | 1458 | struct btrfs_transaction *cur_trans = trans->transaction; |
| 1321 | struct btrfs_transaction *prev_trans = NULL; | 1459 | struct btrfs_transaction *prev_trans = NULL; |
| 1322 | DEFINE_WAIT(wait); | 1460 | DEFINE_WAIT(wait); |
| 1323 | int ret = -EIO; | 1461 | int ret; |
| 1324 | int should_grow = 0; | 1462 | int should_grow = 0; |
| 1325 | unsigned long now = get_seconds(); | 1463 | unsigned long now = get_seconds(); |
| 1326 | int flush_on_commit = btrfs_test_opt(root, FLUSHONCOMMIT); | ||
| 1327 | 1464 | ||
| 1328 | btrfs_run_ordered_operations(root, 0); | 1465 | ret = btrfs_run_ordered_operations(root, 0); |
| 1466 | if (ret) { | ||
| 1467 | btrfs_abort_transaction(trans, root, ret); | ||
| 1468 | goto cleanup_transaction; | ||
| 1469 | } | ||
| 1329 | 1470 | ||
| 1330 | if (cur_trans->aborted) | 1471 | if (cur_trans->aborted) { |
| 1472 | ret = cur_trans->aborted; | ||
| 1331 | goto cleanup_transaction; | 1473 | goto cleanup_transaction; |
| 1474 | } | ||
| 1332 | 1475 | ||
| 1333 | /* make a pass through all the delayed refs we have so far | 1476 | /* make a pass through all the delayed refs we have so far |
| 1334 | * any runnings procs may add more while we are here | 1477 | * any runnings procs may add more while we are here |
| @@ -1348,6 +1491,9 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, | |||
| 1348 | */ | 1491 | */ |
| 1349 | cur_trans->delayed_refs.flushing = 1; | 1492 | cur_trans->delayed_refs.flushing = 1; |
| 1350 | 1493 | ||
| 1494 | if (!list_empty(&trans->new_bgs)) | ||
| 1495 | btrfs_create_pending_block_groups(trans, root); | ||
| 1496 | |||
| 1351 | ret = btrfs_run_delayed_refs(trans, root, 0); | 1497 | ret = btrfs_run_delayed_refs(trans, root, 0); |
| 1352 | if (ret) | 1498 | if (ret) |
| 1353 | goto cleanup_transaction; | 1499 | goto cleanup_transaction; |
| @@ -1393,39 +1539,14 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, | |||
| 1393 | should_grow = 1; | 1539 | should_grow = 1; |
| 1394 | 1540 | ||
| 1395 | do { | 1541 | do { |
| 1396 | int snap_pending = 0; | ||
| 1397 | |||
| 1398 | joined = cur_trans->num_joined; | 1542 | joined = cur_trans->num_joined; |
| 1399 | if (!list_empty(&trans->transaction->pending_snapshots)) | ||
| 1400 | snap_pending = 1; | ||
| 1401 | 1543 | ||
| 1402 | WARN_ON(cur_trans != trans->transaction); | 1544 | WARN_ON(cur_trans != trans->transaction); |
| 1403 | 1545 | ||
| 1404 | if (flush_on_commit || snap_pending) { | 1546 | ret = btrfs_flush_all_pending_stuffs(trans, root); |
| 1405 | btrfs_start_delalloc_inodes(root, 1); | ||
| 1406 | btrfs_wait_ordered_extents(root, 0, 1); | ||
| 1407 | } | ||
| 1408 | |||
| 1409 | ret = btrfs_run_delayed_items(trans, root); | ||
| 1410 | if (ret) | 1547 | if (ret) |
| 1411 | goto cleanup_transaction; | 1548 | goto cleanup_transaction; |
| 1412 | 1549 | ||
| 1413 | /* | ||
| 1414 | * running the delayed items may have added new refs. account | ||
| 1415 | * them now so that they hinder processing of more delayed refs | ||
| 1416 | * as little as possible. | ||
| 1417 | */ | ||
| 1418 | btrfs_delayed_refs_qgroup_accounting(trans, root->fs_info); | ||
| 1419 | |||
| 1420 | /* | ||
| 1421 | * rename don't use btrfs_join_transaction, so, once we | ||
| 1422 | * set the transaction to blocked above, we aren't going | ||
| 1423 | * to get any new ordered operations. We can safely run | ||
| 1424 | * it here and no for sure that nothing new will be added | ||
| 1425 | * to the list | ||
| 1426 | */ | ||
| 1427 | btrfs_run_ordered_operations(root, 1); | ||
| 1428 | |||
| 1429 | prepare_to_wait(&cur_trans->writer_wait, &wait, | 1550 | prepare_to_wait(&cur_trans->writer_wait, &wait, |
| 1430 | TASK_UNINTERRUPTIBLE); | 1551 | TASK_UNINTERRUPTIBLE); |
| 1431 | 1552 | ||
| @@ -1438,6 +1559,10 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, | |||
| 1438 | } while (atomic_read(&cur_trans->num_writers) > 1 || | 1559 | } while (atomic_read(&cur_trans->num_writers) > 1 || |
| 1439 | (should_grow && cur_trans->num_joined != joined)); | 1560 | (should_grow && cur_trans->num_joined != joined)); |
| 1440 | 1561 | ||
| 1562 | ret = btrfs_flush_all_pending_stuffs(trans, root); | ||
| 1563 | if (ret) | ||
| 1564 | goto cleanup_transaction; | ||
| 1565 | |||
| 1441 | /* | 1566 | /* |
| 1442 | * Ok now we need to make sure to block out any other joins while we | 1567 | * Ok now we need to make sure to block out any other joins while we |
| 1443 | * commit the transaction. We could have started a join before setting | 1568 | * commit the transaction. We could have started a join before setting |
| @@ -1456,13 +1581,28 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, | |||
| 1456 | */ | 1581 | */ |
| 1457 | mutex_lock(&root->fs_info->reloc_mutex); | 1582 | mutex_lock(&root->fs_info->reloc_mutex); |
| 1458 | 1583 | ||
| 1459 | ret = btrfs_run_delayed_items(trans, root); | 1584 | /* |
| 1585 | * We needn't worry about the delayed items because we will | ||
| 1586 | * deal with them in create_pending_snapshot(), which is the | ||
| 1587 | * core function of the snapshot creation. | ||
| 1588 | */ | ||
| 1589 | ret = create_pending_snapshots(trans, root->fs_info); | ||
| 1460 | if (ret) { | 1590 | if (ret) { |
| 1461 | mutex_unlock(&root->fs_info->reloc_mutex); | 1591 | mutex_unlock(&root->fs_info->reloc_mutex); |
| 1462 | goto cleanup_transaction; | 1592 | goto cleanup_transaction; |
| 1463 | } | 1593 | } |
| 1464 | 1594 | ||
| 1465 | ret = create_pending_snapshots(trans, root->fs_info); | 1595 | /* |
| 1596 | * We insert the dir indexes of the snapshots and update the inode | ||
| 1597 | * of the snapshots' parents after the snapshot creation, so there | ||
| 1598 | * are some delayed items which are not dealt with. Now deal with | ||
| 1599 | * them. | ||
| 1600 | * | ||
| 1601 | * We needn't worry that this operation will corrupt the snapshots, | ||
| 1602 | * because all the tree which are snapshoted will be forced to COW | ||
| 1603 | * the nodes and leaves. | ||
| 1604 | */ | ||
| 1605 | ret = btrfs_run_delayed_items(trans, root); | ||
| 1466 | if (ret) { | 1606 | if (ret) { |
| 1467 | mutex_unlock(&root->fs_info->reloc_mutex); | 1607 | mutex_unlock(&root->fs_info->reloc_mutex); |
| 1468 | goto cleanup_transaction; | 1608 | goto cleanup_transaction; |
| @@ -1584,7 +1724,8 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, | |||
| 1584 | put_transaction(cur_trans); | 1724 | put_transaction(cur_trans); |
| 1585 | put_transaction(cur_trans); | 1725 | put_transaction(cur_trans); |
| 1586 | 1726 | ||
| 1587 | sb_end_intwrite(root->fs_info->sb); | 1727 | if (trans->type < TRANS_JOIN_NOLOCK) |
| 1728 | sb_end_intwrite(root->fs_info->sb); | ||
| 1588 | 1729 | ||
| 1589 | trace_btrfs_transaction_commit(root); | 1730 | trace_btrfs_transaction_commit(root); |
| 1590 | 1731 | ||
diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h index e8b8416c688b..0e8aa1e6c287 100644 --- a/fs/btrfs/transaction.h +++ b/fs/btrfs/transaction.h | |||
| @@ -47,6 +47,14 @@ struct btrfs_transaction { | |||
| 47 | int aborted; | 47 | int aborted; |
| 48 | }; | 48 | }; |
| 49 | 49 | ||
| 50 | enum btrfs_trans_type { | ||
| 51 | TRANS_START, | ||
| 52 | TRANS_JOIN, | ||
| 53 | TRANS_USERSPACE, | ||
| 54 | TRANS_JOIN_NOLOCK, | ||
| 55 | TRANS_ATTACH, | ||
| 56 | }; | ||
| 57 | |||
| 50 | struct btrfs_trans_handle { | 58 | struct btrfs_trans_handle { |
| 51 | u64 transid; | 59 | u64 transid; |
| 52 | u64 bytes_reserved; | 60 | u64 bytes_reserved; |
| @@ -58,8 +66,9 @@ struct btrfs_trans_handle { | |||
| 58 | struct btrfs_transaction *transaction; | 66 | struct btrfs_transaction *transaction; |
| 59 | struct btrfs_block_rsv *block_rsv; | 67 | struct btrfs_block_rsv *block_rsv; |
| 60 | struct btrfs_block_rsv *orig_rsv; | 68 | struct btrfs_block_rsv *orig_rsv; |
| 61 | int aborted; | 69 | short aborted; |
| 62 | int adding_csums; | 70 | short adding_csums; |
| 71 | enum btrfs_trans_type type; | ||
| 63 | /* | 72 | /* |
| 64 | * this root is only needed to validate that the root passed to | 73 | * this root is only needed to validate that the root passed to |
| 65 | * start_transaction is the same as the one passed to end_transaction. | 74 | * start_transaction is the same as the one passed to end_transaction. |
| @@ -68,6 +77,7 @@ struct btrfs_trans_handle { | |||
| 68 | struct btrfs_root *root; | 77 | struct btrfs_root *root; |
| 69 | struct seq_list delayed_ref_elem; | 78 | struct seq_list delayed_ref_elem; |
| 70 | struct list_head qgroup_ref_list; | 79 | struct list_head qgroup_ref_list; |
| 80 | struct list_head new_bgs; | ||
| 71 | }; | 81 | }; |
| 72 | 82 | ||
| 73 | struct btrfs_pending_snapshot { | 83 | struct btrfs_pending_snapshot { |
| @@ -88,16 +98,18 @@ static inline void btrfs_set_inode_last_trans(struct btrfs_trans_handle *trans, | |||
| 88 | { | 98 | { |
| 89 | BTRFS_I(inode)->last_trans = trans->transaction->transid; | 99 | BTRFS_I(inode)->last_trans = trans->transaction->transid; |
| 90 | BTRFS_I(inode)->last_sub_trans = BTRFS_I(inode)->root->log_transid; | 100 | BTRFS_I(inode)->last_sub_trans = BTRFS_I(inode)->root->log_transid; |
| 101 | BTRFS_I(inode)->last_log_commit = BTRFS_I(inode)->root->last_log_commit; | ||
| 91 | } | 102 | } |
| 92 | 103 | ||
| 93 | int btrfs_end_transaction(struct btrfs_trans_handle *trans, | 104 | int btrfs_end_transaction(struct btrfs_trans_handle *trans, |
| 94 | struct btrfs_root *root); | 105 | struct btrfs_root *root); |
| 95 | int btrfs_end_transaction_nolock(struct btrfs_trans_handle *trans, | ||
| 96 | struct btrfs_root *root); | ||
| 97 | struct btrfs_trans_handle *btrfs_start_transaction(struct btrfs_root *root, | 106 | struct btrfs_trans_handle *btrfs_start_transaction(struct btrfs_root *root, |
| 98 | int num_items); | 107 | int num_items); |
| 108 | struct btrfs_trans_handle *btrfs_start_transaction_lflush( | ||
| 109 | struct btrfs_root *root, int num_items); | ||
| 99 | struct btrfs_trans_handle *btrfs_join_transaction(struct btrfs_root *root); | 110 | struct btrfs_trans_handle *btrfs_join_transaction(struct btrfs_root *root); |
| 100 | struct btrfs_trans_handle *btrfs_join_transaction_nolock(struct btrfs_root *root); | 111 | struct btrfs_trans_handle *btrfs_join_transaction_nolock(struct btrfs_root *root); |
| 112 | struct btrfs_trans_handle *btrfs_attach_transaction(struct btrfs_root *root); | ||
| 101 | struct btrfs_trans_handle *btrfs_start_ioctl_transaction(struct btrfs_root *root); | 113 | struct btrfs_trans_handle *btrfs_start_ioctl_transaction(struct btrfs_root *root); |
| 102 | int btrfs_wait_for_commit(struct btrfs_root *root, u64 transid); | 114 | int btrfs_wait_for_commit(struct btrfs_root *root, u64 transid); |
| 103 | int btrfs_write_and_wait_transaction(struct btrfs_trans_handle *trans, | 115 | int btrfs_write_and_wait_transaction(struct btrfs_trans_handle *trans, |
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index c86670f4f285..83186c7e45d4 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c | |||
| @@ -18,13 +18,16 @@ | |||
| 18 | 18 | ||
| 19 | #include <linux/sched.h> | 19 | #include <linux/sched.h> |
| 20 | #include <linux/slab.h> | 20 | #include <linux/slab.h> |
| 21 | #include <linux/list_sort.h> | ||
| 21 | #include "ctree.h" | 22 | #include "ctree.h" |
| 22 | #include "transaction.h" | 23 | #include "transaction.h" |
| 23 | #include "disk-io.h" | 24 | #include "disk-io.h" |
| 24 | #include "locking.h" | 25 | #include "locking.h" |
| 25 | #include "print-tree.h" | 26 | #include "print-tree.h" |
| 27 | #include "backref.h" | ||
| 26 | #include "compat.h" | 28 | #include "compat.h" |
| 27 | #include "tree-log.h" | 29 | #include "tree-log.h" |
| 30 | #include "hash.h" | ||
| 28 | 31 | ||
| 29 | /* magic values for the inode_only field in btrfs_log_inode: | 32 | /* magic values for the inode_only field in btrfs_log_inode: |
| 30 | * | 33 | * |
| @@ -146,7 +149,7 @@ static int start_log_trans(struct btrfs_trans_handle *trans, | |||
| 146 | root->log_multiple_pids = true; | 149 | root->log_multiple_pids = true; |
| 147 | } | 150 | } |
| 148 | 151 | ||
| 149 | root->log_batch++; | 152 | atomic_inc(&root->log_batch); |
| 150 | atomic_inc(&root->log_writers); | 153 | atomic_inc(&root->log_writers); |
| 151 | mutex_unlock(&root->log_mutex); | 154 | mutex_unlock(&root->log_mutex); |
| 152 | return 0; | 155 | return 0; |
| @@ -165,7 +168,7 @@ static int start_log_trans(struct btrfs_trans_handle *trans, | |||
| 165 | err = ret; | 168 | err = ret; |
| 166 | } | 169 | } |
| 167 | mutex_unlock(&root->fs_info->tree_log_mutex); | 170 | mutex_unlock(&root->fs_info->tree_log_mutex); |
| 168 | root->log_batch++; | 171 | atomic_inc(&root->log_batch); |
| 169 | atomic_inc(&root->log_writers); | 172 | atomic_inc(&root->log_writers); |
| 170 | mutex_unlock(&root->log_mutex); | 173 | mutex_unlock(&root->log_mutex); |
| 171 | return err; | 174 | return err; |
| @@ -484,7 +487,6 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans, | |||
| 484 | int found_type; | 487 | int found_type; |
| 485 | u64 mask = root->sectorsize - 1; | 488 | u64 mask = root->sectorsize - 1; |
| 486 | u64 extent_end; | 489 | u64 extent_end; |
| 487 | u64 alloc_hint; | ||
| 488 | u64 start = key->offset; | 490 | u64 start = key->offset; |
| 489 | u64 saved_nbytes; | 491 | u64 saved_nbytes; |
| 490 | struct btrfs_file_extent_item *item; | 492 | struct btrfs_file_extent_item *item; |
| @@ -550,8 +552,7 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans, | |||
| 550 | 552 | ||
| 551 | saved_nbytes = inode_get_bytes(inode); | 553 | saved_nbytes = inode_get_bytes(inode); |
| 552 | /* drop any overlapping extents */ | 554 | /* drop any overlapping extents */ |
| 553 | ret = btrfs_drop_extents(trans, inode, start, extent_end, | 555 | ret = btrfs_drop_extents(trans, root, inode, start, extent_end, 1); |
| 554 | &alloc_hint, 1); | ||
| 555 | BUG_ON(ret); | 556 | BUG_ON(ret); |
| 556 | 557 | ||
| 557 | if (found_type == BTRFS_FILE_EXTENT_REG || | 558 | if (found_type == BTRFS_FILE_EXTENT_REG || |
| @@ -744,6 +745,7 @@ out: | |||
| 744 | */ | 745 | */ |
| 745 | static noinline int backref_in_log(struct btrfs_root *log, | 746 | static noinline int backref_in_log(struct btrfs_root *log, |
| 746 | struct btrfs_key *key, | 747 | struct btrfs_key *key, |
| 748 | u64 ref_objectid, | ||
| 747 | char *name, int namelen) | 749 | char *name, int namelen) |
| 748 | { | 750 | { |
| 749 | struct btrfs_path *path; | 751 | struct btrfs_path *path; |
| @@ -764,8 +766,17 @@ static noinline int backref_in_log(struct btrfs_root *log, | |||
| 764 | if (ret != 0) | 766 | if (ret != 0) |
| 765 | goto out; | 767 | goto out; |
| 766 | 768 | ||
| 767 | item_size = btrfs_item_size_nr(path->nodes[0], path->slots[0]); | ||
| 768 | ptr = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]); | 769 | ptr = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]); |
| 770 | |||
| 771 | if (key->type == BTRFS_INODE_EXTREF_KEY) { | ||
| 772 | if (btrfs_find_name_in_ext_backref(path, ref_objectid, | ||
| 773 | name, namelen, NULL)) | ||
| 774 | match = 1; | ||
| 775 | |||
| 776 | goto out; | ||
| 777 | } | ||
| 778 | |||
| 779 | item_size = btrfs_item_size_nr(path->nodes[0], path->slots[0]); | ||
| 769 | ptr_end = ptr + item_size; | 780 | ptr_end = ptr + item_size; |
| 770 | while (ptr < ptr_end) { | 781 | while (ptr < ptr_end) { |
| 771 | ref = (struct btrfs_inode_ref *)ptr; | 782 | ref = (struct btrfs_inode_ref *)ptr; |
| @@ -786,91 +797,42 @@ out: | |||
| 786 | return match; | 797 | return match; |
| 787 | } | 798 | } |
| 788 | 799 | ||
| 789 | 800 | static inline int __add_inode_ref(struct btrfs_trans_handle *trans, | |
| 790 | /* | ||
| 791 | * replay one inode back reference item found in the log tree. | ||
| 792 | * eb, slot and key refer to the buffer and key found in the log tree. | ||
| 793 | * root is the destination we are replaying into, and path is for temp | ||
| 794 | * use by this function. (it should be released on return). | ||
| 795 | */ | ||
| 796 | static noinline int add_inode_ref(struct btrfs_trans_handle *trans, | ||
| 797 | struct btrfs_root *root, | 801 | struct btrfs_root *root, |
| 798 | struct btrfs_root *log, | ||
| 799 | struct btrfs_path *path, | 802 | struct btrfs_path *path, |
| 800 | struct extent_buffer *eb, int slot, | 803 | struct btrfs_root *log_root, |
| 801 | struct btrfs_key *key) | 804 | struct inode *dir, struct inode *inode, |
| 805 | struct extent_buffer *eb, | ||
| 806 | u64 inode_objectid, u64 parent_objectid, | ||
| 807 | u64 ref_index, char *name, int namelen, | ||
| 808 | int *search_done) | ||
| 802 | { | 809 | { |
| 803 | struct btrfs_inode_ref *ref; | ||
| 804 | struct btrfs_dir_item *di; | ||
| 805 | struct inode *dir; | ||
| 806 | struct inode *inode; | ||
| 807 | unsigned long ref_ptr; | ||
| 808 | unsigned long ref_end; | ||
| 809 | char *name; | ||
| 810 | int namelen; | ||
| 811 | int ret; | 810 | int ret; |
| 812 | int search_done = 0; | 811 | char *victim_name; |
| 813 | 812 | int victim_name_len; | |
| 814 | /* | 813 | struct extent_buffer *leaf; |
| 815 | * it is possible that we didn't log all the parent directories | 814 | struct btrfs_dir_item *di; |
| 816 | * for a given inode. If we don't find the dir, just don't | 815 | struct btrfs_key search_key; |
| 817 | * copy the back ref in. The link count fixup code will take | 816 | struct btrfs_inode_extref *extref; |
| 818 | * care of the rest | ||
| 819 | */ | ||
| 820 | dir = read_one_inode(root, key->offset); | ||
| 821 | if (!dir) | ||
| 822 | return -ENOENT; | ||
| 823 | |||
| 824 | inode = read_one_inode(root, key->objectid); | ||
| 825 | if (!inode) { | ||
| 826 | iput(dir); | ||
| 827 | return -EIO; | ||
| 828 | } | ||
| 829 | |||
| 830 | ref_ptr = btrfs_item_ptr_offset(eb, slot); | ||
| 831 | ref_end = ref_ptr + btrfs_item_size_nr(eb, slot); | ||
| 832 | 817 | ||
| 833 | again: | 818 | again: |
| 834 | ref = (struct btrfs_inode_ref *)ref_ptr; | 819 | /* Search old style refs */ |
| 835 | 820 | search_key.objectid = inode_objectid; | |
| 836 | namelen = btrfs_inode_ref_name_len(eb, ref); | 821 | search_key.type = BTRFS_INODE_REF_KEY; |
| 837 | name = kmalloc(namelen, GFP_NOFS); | 822 | search_key.offset = parent_objectid; |
| 838 | BUG_ON(!name); | 823 | ret = btrfs_search_slot(NULL, root, &search_key, path, 0, 0); |
| 839 | |||
| 840 | read_extent_buffer(eb, name, (unsigned long)(ref + 1), namelen); | ||
| 841 | |||
| 842 | /* if we already have a perfect match, we're done */ | ||
| 843 | if (inode_in_dir(root, path, btrfs_ino(dir), btrfs_ino(inode), | ||
| 844 | btrfs_inode_ref_index(eb, ref), | ||
| 845 | name, namelen)) { | ||
| 846 | goto out; | ||
| 847 | } | ||
| 848 | |||
| 849 | /* | ||
| 850 | * look for a conflicting back reference in the metadata. | ||
| 851 | * if we find one we have to unlink that name of the file | ||
| 852 | * before we add our new link. Later on, we overwrite any | ||
| 853 | * existing back reference, and we don't want to create | ||
| 854 | * dangling pointers in the directory. | ||
| 855 | */ | ||
| 856 | |||
| 857 | if (search_done) | ||
| 858 | goto insert; | ||
| 859 | |||
| 860 | ret = btrfs_search_slot(NULL, root, key, path, 0, 0); | ||
| 861 | if (ret == 0) { | 824 | if (ret == 0) { |
| 862 | char *victim_name; | ||
| 863 | int victim_name_len; | ||
| 864 | struct btrfs_inode_ref *victim_ref; | 825 | struct btrfs_inode_ref *victim_ref; |
| 865 | unsigned long ptr; | 826 | unsigned long ptr; |
| 866 | unsigned long ptr_end; | 827 | unsigned long ptr_end; |
| 867 | struct extent_buffer *leaf = path->nodes[0]; | 828 | |
| 829 | leaf = path->nodes[0]; | ||
| 868 | 830 | ||
| 869 | /* are we trying to overwrite a back ref for the root directory | 831 | /* are we trying to overwrite a back ref for the root directory |
| 870 | * if so, just jump out, we're done | 832 | * if so, just jump out, we're done |
| 871 | */ | 833 | */ |
| 872 | if (key->objectid == key->offset) | 834 | if (search_key.objectid == search_key.offset) |
| 873 | goto out_nowrite; | 835 | return 1; |
| 874 | 836 | ||
| 875 | /* check all the names in this back reference to see | 837 | /* check all the names in this back reference to see |
| 876 | * if they are in the log. if so, we allow them to stay | 838 | * if they are in the log. if so, we allow them to stay |
| @@ -889,7 +851,9 @@ again: | |||
| 889 | (unsigned long)(victim_ref + 1), | 851 | (unsigned long)(victim_ref + 1), |
| 890 | victim_name_len); | 852 | victim_name_len); |
| 891 | 853 | ||
| 892 | if (!backref_in_log(log, key, victim_name, | 854 | if (!backref_in_log(log_root, &search_key, |
| 855 | parent_objectid, | ||
| 856 | victim_name, | ||
| 893 | victim_name_len)) { | 857 | victim_name_len)) { |
| 894 | btrfs_inc_nlink(inode); | 858 | btrfs_inc_nlink(inode); |
| 895 | btrfs_release_path(path); | 859 | btrfs_release_path(path); |
| @@ -897,9 +861,14 @@ again: | |||
| 897 | ret = btrfs_unlink_inode(trans, root, dir, | 861 | ret = btrfs_unlink_inode(trans, root, dir, |
| 898 | inode, victim_name, | 862 | inode, victim_name, |
| 899 | victim_name_len); | 863 | victim_name_len); |
| 864 | BUG_ON(ret); | ||
| 900 | btrfs_run_delayed_items(trans, root); | 865 | btrfs_run_delayed_items(trans, root); |
| 866 | kfree(victim_name); | ||
| 867 | *search_done = 1; | ||
| 868 | goto again; | ||
| 901 | } | 869 | } |
| 902 | kfree(victim_name); | 870 | kfree(victim_name); |
| 871 | |||
| 903 | ptr = (unsigned long)(victim_ref + 1) + victim_name_len; | 872 | ptr = (unsigned long)(victim_ref + 1) + victim_name_len; |
| 904 | } | 873 | } |
| 905 | BUG_ON(ret); | 874 | BUG_ON(ret); |
| @@ -908,14 +877,78 @@ again: | |||
| 908 | * NOTE: we have searched root tree and checked the | 877 | * NOTE: we have searched root tree and checked the |
| 909 | * coresponding ref, it does not need to check again. | 878 | * coresponding ref, it does not need to check again. |
| 910 | */ | 879 | */ |
| 911 | search_done = 1; | 880 | *search_done = 1; |
| 881 | } | ||
| 882 | btrfs_release_path(path); | ||
| 883 | |||
| 884 | /* Same search but for extended refs */ | ||
| 885 | extref = btrfs_lookup_inode_extref(NULL, root, path, name, namelen, | ||
| 886 | inode_objectid, parent_objectid, 0, | ||
| 887 | 0); | ||
| 888 | if (!IS_ERR_OR_NULL(extref)) { | ||
| 889 | u32 item_size; | ||
| 890 | u32 cur_offset = 0; | ||
| 891 | unsigned long base; | ||
| 892 | struct inode *victim_parent; | ||
| 893 | |||
| 894 | leaf = path->nodes[0]; | ||
| 895 | |||
| 896 | item_size = btrfs_item_size_nr(leaf, path->slots[0]); | ||
| 897 | base = btrfs_item_ptr_offset(leaf, path->slots[0]); | ||
| 898 | |||
| 899 | while (cur_offset < item_size) { | ||
| 900 | extref = (struct btrfs_inode_extref *)base + cur_offset; | ||
| 901 | |||
| 902 | victim_name_len = btrfs_inode_extref_name_len(leaf, extref); | ||
| 903 | |||
| 904 | if (btrfs_inode_extref_parent(leaf, extref) != parent_objectid) | ||
| 905 | goto next; | ||
| 906 | |||
| 907 | victim_name = kmalloc(victim_name_len, GFP_NOFS); | ||
| 908 | read_extent_buffer(leaf, victim_name, (unsigned long)&extref->name, | ||
| 909 | victim_name_len); | ||
| 910 | |||
| 911 | search_key.objectid = inode_objectid; | ||
| 912 | search_key.type = BTRFS_INODE_EXTREF_KEY; | ||
| 913 | search_key.offset = btrfs_extref_hash(parent_objectid, | ||
| 914 | victim_name, | ||
| 915 | victim_name_len); | ||
| 916 | ret = 0; | ||
| 917 | if (!backref_in_log(log_root, &search_key, | ||
| 918 | parent_objectid, victim_name, | ||
| 919 | victim_name_len)) { | ||
| 920 | ret = -ENOENT; | ||
| 921 | victim_parent = read_one_inode(root, | ||
| 922 | parent_objectid); | ||
| 923 | if (victim_parent) { | ||
| 924 | btrfs_inc_nlink(inode); | ||
| 925 | btrfs_release_path(path); | ||
| 926 | |||
| 927 | ret = btrfs_unlink_inode(trans, root, | ||
| 928 | victim_parent, | ||
| 929 | inode, | ||
| 930 | victim_name, | ||
| 931 | victim_name_len); | ||
| 932 | btrfs_run_delayed_items(trans, root); | ||
| 933 | } | ||
| 934 | BUG_ON(ret); | ||
| 935 | iput(victim_parent); | ||
| 936 | kfree(victim_name); | ||
| 937 | *search_done = 1; | ||
| 938 | goto again; | ||
| 939 | } | ||
| 940 | kfree(victim_name); | ||
| 941 | BUG_ON(ret); | ||
| 942 | next: | ||
| 943 | cur_offset += victim_name_len + sizeof(*extref); | ||
| 944 | } | ||
| 945 | *search_done = 1; | ||
| 912 | } | 946 | } |
| 913 | btrfs_release_path(path); | 947 | btrfs_release_path(path); |
| 914 | 948 | ||
| 915 | /* look for a conflicting sequence number */ | 949 | /* look for a conflicting sequence number */ |
| 916 | di = btrfs_lookup_dir_index_item(trans, root, path, btrfs_ino(dir), | 950 | di = btrfs_lookup_dir_index_item(trans, root, path, btrfs_ino(dir), |
| 917 | btrfs_inode_ref_index(eb, ref), | 951 | ref_index, name, namelen, 0); |
| 918 | name, namelen, 0); | ||
| 919 | if (di && !IS_ERR(di)) { | 952 | if (di && !IS_ERR(di)) { |
| 920 | ret = drop_one_dir_item(trans, root, path, dir, di); | 953 | ret = drop_one_dir_item(trans, root, path, dir, di); |
| 921 | BUG_ON(ret); | 954 | BUG_ON(ret); |
| @@ -931,25 +964,173 @@ again: | |||
| 931 | } | 964 | } |
| 932 | btrfs_release_path(path); | 965 | btrfs_release_path(path); |
| 933 | 966 | ||
| 934 | insert: | 967 | return 0; |
| 935 | /* insert our name */ | 968 | } |
| 936 | ret = btrfs_add_link(trans, dir, inode, name, namelen, 0, | ||
| 937 | btrfs_inode_ref_index(eb, ref)); | ||
| 938 | BUG_ON(ret); | ||
| 939 | 969 | ||
| 940 | btrfs_update_inode(trans, root, inode); | 970 | static int extref_get_fields(struct extent_buffer *eb, unsigned long ref_ptr, |
| 971 | u32 *namelen, char **name, u64 *index, | ||
| 972 | u64 *parent_objectid) | ||
| 973 | { | ||
| 974 | struct btrfs_inode_extref *extref; | ||
| 941 | 975 | ||
| 942 | out: | 976 | extref = (struct btrfs_inode_extref *)ref_ptr; |
| 943 | ref_ptr = (unsigned long)(ref + 1) + namelen; | 977 | |
| 944 | kfree(name); | 978 | *namelen = btrfs_inode_extref_name_len(eb, extref); |
| 945 | if (ref_ptr < ref_end) | 979 | *name = kmalloc(*namelen, GFP_NOFS); |
| 946 | goto again; | 980 | if (*name == NULL) |
| 981 | return -ENOMEM; | ||
| 982 | |||
| 983 | read_extent_buffer(eb, *name, (unsigned long)&extref->name, | ||
| 984 | *namelen); | ||
| 985 | |||
| 986 | *index = btrfs_inode_extref_index(eb, extref); | ||
| 987 | if (parent_objectid) | ||
| 988 | *parent_objectid = btrfs_inode_extref_parent(eb, extref); | ||
| 989 | |||
| 990 | return 0; | ||
| 991 | } | ||
| 992 | |||
| 993 | static int ref_get_fields(struct extent_buffer *eb, unsigned long ref_ptr, | ||
| 994 | u32 *namelen, char **name, u64 *index) | ||
| 995 | { | ||
| 996 | struct btrfs_inode_ref *ref; | ||
| 997 | |||
| 998 | ref = (struct btrfs_inode_ref *)ref_ptr; | ||
| 999 | |||
| 1000 | *namelen = btrfs_inode_ref_name_len(eb, ref); | ||
| 1001 | *name = kmalloc(*namelen, GFP_NOFS); | ||
| 1002 | if (*name == NULL) | ||
| 1003 | return -ENOMEM; | ||
| 1004 | |||
| 1005 | read_extent_buffer(eb, *name, (unsigned long)(ref + 1), *namelen); | ||
| 1006 | |||
| 1007 | *index = btrfs_inode_ref_index(eb, ref); | ||
| 1008 | |||
| 1009 | return 0; | ||
| 1010 | } | ||
| 1011 | |||
| 1012 | /* | ||
| 1013 | * replay one inode back reference item found in the log tree. | ||
| 1014 | * eb, slot and key refer to the buffer and key found in the log tree. | ||
| 1015 | * root is the destination we are replaying into, and path is for temp | ||
| 1016 | * use by this function. (it should be released on return). | ||
| 1017 | */ | ||
| 1018 | static noinline int add_inode_ref(struct btrfs_trans_handle *trans, | ||
| 1019 | struct btrfs_root *root, | ||
| 1020 | struct btrfs_root *log, | ||
| 1021 | struct btrfs_path *path, | ||
| 1022 | struct extent_buffer *eb, int slot, | ||
| 1023 | struct btrfs_key *key) | ||
| 1024 | { | ||
| 1025 | struct inode *dir; | ||
| 1026 | struct inode *inode; | ||
| 1027 | unsigned long ref_ptr; | ||
| 1028 | unsigned long ref_end; | ||
| 1029 | char *name; | ||
| 1030 | int namelen; | ||
| 1031 | int ret; | ||
| 1032 | int search_done = 0; | ||
| 1033 | int log_ref_ver = 0; | ||
| 1034 | u64 parent_objectid; | ||
| 1035 | u64 inode_objectid; | ||
| 1036 | u64 ref_index = 0; | ||
| 1037 | int ref_struct_size; | ||
| 1038 | |||
| 1039 | ref_ptr = btrfs_item_ptr_offset(eb, slot); | ||
| 1040 | ref_end = ref_ptr + btrfs_item_size_nr(eb, slot); | ||
| 1041 | |||
| 1042 | if (key->type == BTRFS_INODE_EXTREF_KEY) { | ||
| 1043 | struct btrfs_inode_extref *r; | ||
| 1044 | |||
| 1045 | ref_struct_size = sizeof(struct btrfs_inode_extref); | ||
| 1046 | log_ref_ver = 1; | ||
| 1047 | r = (struct btrfs_inode_extref *)ref_ptr; | ||
| 1048 | parent_objectid = btrfs_inode_extref_parent(eb, r); | ||
| 1049 | } else { | ||
| 1050 | ref_struct_size = sizeof(struct btrfs_inode_ref); | ||
| 1051 | parent_objectid = key->offset; | ||
| 1052 | } | ||
| 1053 | inode_objectid = key->objectid; | ||
| 1054 | |||
| 1055 | /* | ||
| 1056 | * it is possible that we didn't log all the parent directories | ||
| 1057 | * for a given inode. If we don't find the dir, just don't | ||
| 1058 | * copy the back ref in. The link count fixup code will take | ||
| 1059 | * care of the rest | ||
| 1060 | */ | ||
| 1061 | dir = read_one_inode(root, parent_objectid); | ||
| 1062 | if (!dir) | ||
| 1063 | return -ENOENT; | ||
| 1064 | |||
| 1065 | inode = read_one_inode(root, inode_objectid); | ||
| 1066 | if (!inode) { | ||
| 1067 | iput(dir); | ||
| 1068 | return -EIO; | ||
| 1069 | } | ||
| 1070 | |||
| 1071 | while (ref_ptr < ref_end) { | ||
| 1072 | if (log_ref_ver) { | ||
| 1073 | ret = extref_get_fields(eb, ref_ptr, &namelen, &name, | ||
| 1074 | &ref_index, &parent_objectid); | ||
| 1075 | /* | ||
| 1076 | * parent object can change from one array | ||
| 1077 | * item to another. | ||
| 1078 | */ | ||
| 1079 | if (!dir) | ||
| 1080 | dir = read_one_inode(root, parent_objectid); | ||
| 1081 | if (!dir) | ||
| 1082 | return -ENOENT; | ||
| 1083 | } else { | ||
| 1084 | ret = ref_get_fields(eb, ref_ptr, &namelen, &name, | ||
| 1085 | &ref_index); | ||
| 1086 | } | ||
| 1087 | if (ret) | ||
| 1088 | return ret; | ||
| 1089 | |||
| 1090 | /* if we already have a perfect match, we're done */ | ||
| 1091 | if (!inode_in_dir(root, path, btrfs_ino(dir), btrfs_ino(inode), | ||
| 1092 | ref_index, name, namelen)) { | ||
| 1093 | /* | ||
| 1094 | * look for a conflicting back reference in the | ||
| 1095 | * metadata. if we find one we have to unlink that name | ||
| 1096 | * of the file before we add our new link. Later on, we | ||
| 1097 | * overwrite any existing back reference, and we don't | ||
| 1098 | * want to create dangling pointers in the directory. | ||
| 1099 | */ | ||
| 1100 | |||
| 1101 | if (!search_done) { | ||
| 1102 | ret = __add_inode_ref(trans, root, path, log, | ||
| 1103 | dir, inode, eb, | ||
| 1104 | inode_objectid, | ||
| 1105 | parent_objectid, | ||
| 1106 | ref_index, name, namelen, | ||
| 1107 | &search_done); | ||
| 1108 | if (ret == 1) | ||
| 1109 | goto out; | ||
| 1110 | BUG_ON(ret); | ||
| 1111 | } | ||
| 1112 | |||
| 1113 | /* insert our name */ | ||
| 1114 | ret = btrfs_add_link(trans, dir, inode, name, namelen, | ||
| 1115 | 0, ref_index); | ||
| 1116 | BUG_ON(ret); | ||
| 1117 | |||
| 1118 | btrfs_update_inode(trans, root, inode); | ||
| 1119 | } | ||
| 1120 | |||
| 1121 | ref_ptr = (unsigned long)(ref_ptr + ref_struct_size) + namelen; | ||
| 1122 | kfree(name); | ||
| 1123 | if (log_ref_ver) { | ||
| 1124 | iput(dir); | ||
| 1125 | dir = NULL; | ||
| 1126 | } | ||
| 1127 | } | ||
| 947 | 1128 | ||
| 948 | /* finally write the back reference in the inode */ | 1129 | /* finally write the back reference in the inode */ |
| 949 | ret = overwrite_item(trans, root, path, eb, slot, key); | 1130 | ret = overwrite_item(trans, root, path, eb, slot, key); |
| 950 | BUG_ON(ret); | 1131 | BUG_ON(ret); |
| 951 | 1132 | ||
| 952 | out_nowrite: | 1133 | out: |
| 953 | btrfs_release_path(path); | 1134 | btrfs_release_path(path); |
| 954 | iput(dir); | 1135 | iput(dir); |
| 955 | iput(inode); | 1136 | iput(inode); |
| @@ -966,25 +1147,55 @@ static int insert_orphan_item(struct btrfs_trans_handle *trans, | |||
| 966 | return ret; | 1147 | return ret; |
| 967 | } | 1148 | } |
| 968 | 1149 | ||
| 1150 | static int count_inode_extrefs(struct btrfs_root *root, | ||
| 1151 | struct inode *inode, struct btrfs_path *path) | ||
| 1152 | { | ||
| 1153 | int ret = 0; | ||
| 1154 | int name_len; | ||
| 1155 | unsigned int nlink = 0; | ||
| 1156 | u32 item_size; | ||
| 1157 | u32 cur_offset = 0; | ||
| 1158 | u64 inode_objectid = btrfs_ino(inode); | ||
| 1159 | u64 offset = 0; | ||
| 1160 | unsigned long ptr; | ||
| 1161 | struct btrfs_inode_extref *extref; | ||
| 1162 | struct extent_buffer *leaf; | ||
| 969 | 1163 | ||
| 970 | /* | 1164 | while (1) { |
| 971 | * There are a few corners where the link count of the file can't | 1165 | ret = btrfs_find_one_extref(root, inode_objectid, offset, path, |
| 972 | * be properly maintained during replay. So, instead of adding | 1166 | &extref, &offset); |
| 973 | * lots of complexity to the log code, we just scan the backrefs | 1167 | if (ret) |
| 974 | * for any file that has been through replay. | 1168 | break; |
| 975 | * | 1169 | |
| 976 | * The scan will update the link count on the inode to reflect the | 1170 | leaf = path->nodes[0]; |
| 977 | * number of back refs found. If it goes down to zero, the iput | 1171 | item_size = btrfs_item_size_nr(leaf, path->slots[0]); |
| 978 | * will free the inode. | 1172 | ptr = btrfs_item_ptr_offset(leaf, path->slots[0]); |
| 979 | */ | 1173 | |
| 980 | static noinline int fixup_inode_link_count(struct btrfs_trans_handle *trans, | 1174 | while (cur_offset < item_size) { |
| 981 | struct btrfs_root *root, | 1175 | extref = (struct btrfs_inode_extref *) (ptr + cur_offset); |
| 982 | struct inode *inode) | 1176 | name_len = btrfs_inode_extref_name_len(leaf, extref); |
| 1177 | |||
| 1178 | nlink++; | ||
| 1179 | |||
| 1180 | cur_offset += name_len + sizeof(*extref); | ||
| 1181 | } | ||
| 1182 | |||
| 1183 | offset++; | ||
| 1184 | btrfs_release_path(path); | ||
| 1185 | } | ||
| 1186 | btrfs_release_path(path); | ||
| 1187 | |||
| 1188 | if (ret < 0) | ||
| 1189 | return ret; | ||
| 1190 | return nlink; | ||
| 1191 | } | ||
| 1192 | |||
| 1193 | static int count_inode_refs(struct btrfs_root *root, | ||
| 1194 | struct inode *inode, struct btrfs_path *path) | ||
| 983 | { | 1195 | { |
| 984 | struct btrfs_path *path; | ||
| 985 | int ret; | 1196 | int ret; |
| 986 | struct btrfs_key key; | 1197 | struct btrfs_key key; |
| 987 | u64 nlink = 0; | 1198 | unsigned int nlink = 0; |
| 988 | unsigned long ptr; | 1199 | unsigned long ptr; |
| 989 | unsigned long ptr_end; | 1200 | unsigned long ptr_end; |
| 990 | int name_len; | 1201 | int name_len; |
| @@ -994,10 +1205,6 @@ static noinline int fixup_inode_link_count(struct btrfs_trans_handle *trans, | |||
| 994 | key.type = BTRFS_INODE_REF_KEY; | 1205 | key.type = BTRFS_INODE_REF_KEY; |
| 995 | key.offset = (u64)-1; | 1206 | key.offset = (u64)-1; |
| 996 | 1207 | ||
| 997 | path = btrfs_alloc_path(); | ||
| 998 | if (!path) | ||
| 999 | return -ENOMEM; | ||
| 1000 | |||
| 1001 | while (1) { | 1208 | while (1) { |
| 1002 | ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); | 1209 | ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); |
| 1003 | if (ret < 0) | 1210 | if (ret < 0) |
| @@ -1031,6 +1238,50 @@ static noinline int fixup_inode_link_count(struct btrfs_trans_handle *trans, | |||
| 1031 | btrfs_release_path(path); | 1238 | btrfs_release_path(path); |
| 1032 | } | 1239 | } |
| 1033 | btrfs_release_path(path); | 1240 | btrfs_release_path(path); |
| 1241 | |||
| 1242 | return nlink; | ||
| 1243 | } | ||
| 1244 | |||
| 1245 | /* | ||
| 1246 | * There are a few corners where the link count of the file can't | ||
| 1247 | * be properly maintained during replay. So, instead of adding | ||
| 1248 | * lots of complexity to the log code, we just scan the backrefs | ||
| 1249 | * for any file that has been through replay. | ||
| 1250 | * | ||
| 1251 | * The scan will update the link count on the inode to reflect the | ||
| 1252 | * number of back refs found. If it goes down to zero, the iput | ||
| 1253 | * will free the inode. | ||
| 1254 | */ | ||
| 1255 | static noinline int fixup_inode_link_count(struct btrfs_trans_handle *trans, | ||
| 1256 | struct btrfs_root *root, | ||
| 1257 | struct inode *inode) | ||
| 1258 | { | ||
| 1259 | struct btrfs_path *path; | ||
| 1260 | int ret; | ||
| 1261 | u64 nlink = 0; | ||
| 1262 | u64 ino = btrfs_ino(inode); | ||
| 1263 | |||
| 1264 | path = btrfs_alloc_path(); | ||
| 1265 | if (!path) | ||
| 1266 | return -ENOMEM; | ||
| 1267 | |||
| 1268 | ret = count_inode_refs(root, inode, path); | ||
| 1269 | if (ret < 0) | ||
| 1270 | goto out; | ||
| 1271 | |||
| 1272 | nlink = ret; | ||
| 1273 | |||
| 1274 | ret = count_inode_extrefs(root, inode, path); | ||
| 1275 | if (ret == -ENOENT) | ||
| 1276 | ret = 0; | ||
| 1277 | |||
| 1278 | if (ret < 0) | ||
| 1279 | goto out; | ||
| 1280 | |||
| 1281 | nlink += ret; | ||
| 1282 | |||
| 1283 | ret = 0; | ||
| 1284 | |||
| 1034 | if (nlink != inode->i_nlink) { | 1285 | if (nlink != inode->i_nlink) { |
| 1035 | set_nlink(inode, nlink); | 1286 | set_nlink(inode, nlink); |
| 1036 | btrfs_update_inode(trans, root, inode); | 1287 | btrfs_update_inode(trans, root, inode); |
| @@ -1046,9 +1297,10 @@ static noinline int fixup_inode_link_count(struct btrfs_trans_handle *trans, | |||
| 1046 | ret = insert_orphan_item(trans, root, ino); | 1297 | ret = insert_orphan_item(trans, root, ino); |
| 1047 | BUG_ON(ret); | 1298 | BUG_ON(ret); |
| 1048 | } | 1299 | } |
| 1049 | btrfs_free_path(path); | ||
| 1050 | 1300 | ||
| 1051 | return 0; | 1301 | out: |
| 1302 | btrfs_free_path(path); | ||
| 1303 | return ret; | ||
| 1052 | } | 1304 | } |
| 1053 | 1305 | ||
| 1054 | static noinline int fixup_inode_link_counts(struct btrfs_trans_handle *trans, | 1306 | static noinline int fixup_inode_link_counts(struct btrfs_trans_handle *trans, |
| @@ -1695,6 +1947,10 @@ static int replay_one_buffer(struct btrfs_root *log, struct extent_buffer *eb, | |||
| 1695 | ret = add_inode_ref(wc->trans, root, log, path, | 1947 | ret = add_inode_ref(wc->trans, root, log, path, |
| 1696 | eb, i, &key); | 1948 | eb, i, &key); |
| 1697 | BUG_ON(ret && ret != -ENOENT); | 1949 | BUG_ON(ret && ret != -ENOENT); |
| 1950 | } else if (key.type == BTRFS_INODE_EXTREF_KEY) { | ||
| 1951 | ret = add_inode_ref(wc->trans, root, log, path, | ||
| 1952 | eb, i, &key); | ||
| 1953 | BUG_ON(ret && ret != -ENOENT); | ||
| 1698 | } else if (key.type == BTRFS_EXTENT_DATA_KEY) { | 1954 | } else if (key.type == BTRFS_EXTENT_DATA_KEY) { |
| 1699 | ret = replay_one_extent(wc->trans, root, path, | 1955 | ret = replay_one_extent(wc->trans, root, path, |
| 1700 | eb, i, &key); | 1956 | eb, i, &key); |
| @@ -2037,7 +2293,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, | |||
| 2037 | if (atomic_read(&root->log_commit[(index1 + 1) % 2])) | 2293 | if (atomic_read(&root->log_commit[(index1 + 1) % 2])) |
| 2038 | wait_log_commit(trans, root, root->log_transid - 1); | 2294 | wait_log_commit(trans, root, root->log_transid - 1); |
| 2039 | while (1) { | 2295 | while (1) { |
| 2040 | unsigned long batch = root->log_batch; | 2296 | int batch = atomic_read(&root->log_batch); |
| 2041 | /* when we're on an ssd, just kick the log commit out */ | 2297 | /* when we're on an ssd, just kick the log commit out */ |
| 2042 | if (!btrfs_test_opt(root, SSD) && root->log_multiple_pids) { | 2298 | if (!btrfs_test_opt(root, SSD) && root->log_multiple_pids) { |
| 2043 | mutex_unlock(&root->log_mutex); | 2299 | mutex_unlock(&root->log_mutex); |
| @@ -2045,7 +2301,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, | |||
| 2045 | mutex_lock(&root->log_mutex); | 2301 | mutex_lock(&root->log_mutex); |
| 2046 | } | 2302 | } |
| 2047 | wait_for_writer(trans, root); | 2303 | wait_for_writer(trans, root); |
| 2048 | if (batch == root->log_batch) | 2304 | if (batch == atomic_read(&root->log_batch)) |
| 2049 | break; | 2305 | break; |
| 2050 | } | 2306 | } |
| 2051 | 2307 | ||
| @@ -2074,7 +2330,6 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, | |||
| 2074 | 2330 | ||
| 2075 | btrfs_set_root_node(&log->root_item, log->node); | 2331 | btrfs_set_root_node(&log->root_item, log->node); |
| 2076 | 2332 | ||
| 2077 | root->log_batch = 0; | ||
| 2078 | root->log_transid++; | 2333 | root->log_transid++; |
| 2079 | log->log_transid = root->log_transid; | 2334 | log->log_transid = root->log_transid; |
| 2080 | root->log_start_pid = 0; | 2335 | root->log_start_pid = 0; |
| @@ -2087,7 +2342,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, | |||
| 2087 | mutex_unlock(&root->log_mutex); | 2342 | mutex_unlock(&root->log_mutex); |
| 2088 | 2343 | ||
| 2089 | mutex_lock(&log_root_tree->log_mutex); | 2344 | mutex_lock(&log_root_tree->log_mutex); |
| 2090 | log_root_tree->log_batch++; | 2345 | atomic_inc(&log_root_tree->log_batch); |
| 2091 | atomic_inc(&log_root_tree->log_writers); | 2346 | atomic_inc(&log_root_tree->log_writers); |
| 2092 | mutex_unlock(&log_root_tree->log_mutex); | 2347 | mutex_unlock(&log_root_tree->log_mutex); |
| 2093 | 2348 | ||
| @@ -2157,7 +2412,6 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, | |||
| 2157 | btrfs_set_super_log_root_level(root->fs_info->super_for_commit, | 2412 | btrfs_set_super_log_root_level(root->fs_info->super_for_commit, |
| 2158 | btrfs_header_level(log_root_tree->node)); | 2413 | btrfs_header_level(log_root_tree->node)); |
| 2159 | 2414 | ||
| 2160 | log_root_tree->log_batch = 0; | ||
| 2161 | log_root_tree->log_transid++; | 2415 | log_root_tree->log_transid++; |
| 2162 | smp_mb(); | 2416 | smp_mb(); |
| 2163 | 2417 | ||
| @@ -2171,9 +2425,12 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, | |||
| 2171 | * in and cause problems either. | 2425 | * in and cause problems either. |
| 2172 | */ | 2426 | */ |
| 2173 | btrfs_scrub_pause_super(root); | 2427 | btrfs_scrub_pause_super(root); |
| 2174 | write_ctree_super(trans, root->fs_info->tree_root, 1); | 2428 | ret = write_ctree_super(trans, root->fs_info->tree_root, 1); |
| 2175 | btrfs_scrub_continue_super(root); | 2429 | btrfs_scrub_continue_super(root); |
| 2176 | ret = 0; | 2430 | if (ret) { |
| 2431 | btrfs_abort_transaction(trans, root, ret); | ||
| 2432 | goto out_wake_log_root; | ||
| 2433 | } | ||
| 2177 | 2434 | ||
| 2178 | mutex_lock(&root->log_mutex); | 2435 | mutex_lock(&root->log_mutex); |
| 2179 | if (root->last_log_commit < log_transid) | 2436 | if (root->last_log_commit < log_transid) |
| @@ -2209,7 +2466,8 @@ static void free_log_tree(struct btrfs_trans_handle *trans, | |||
| 2209 | 2466 | ||
| 2210 | while (1) { | 2467 | while (1) { |
| 2211 | ret = find_first_extent_bit(&log->dirty_log_pages, | 2468 | ret = find_first_extent_bit(&log->dirty_log_pages, |
| 2212 | 0, &start, &end, EXTENT_DIRTY | EXTENT_NEW); | 2469 | 0, &start, &end, EXTENT_DIRTY | EXTENT_NEW, |
| 2470 | NULL); | ||
| 2213 | if (ret) | 2471 | if (ret) |
| 2214 | break; | 2472 | break; |
| 2215 | 2473 | ||
| @@ -2646,6 +2904,7 @@ static int drop_objectid_items(struct btrfs_trans_handle *trans, | |||
| 2646 | int ret; | 2904 | int ret; |
| 2647 | struct btrfs_key key; | 2905 | struct btrfs_key key; |
| 2648 | struct btrfs_key found_key; | 2906 | struct btrfs_key found_key; |
| 2907 | int start_slot; | ||
| 2649 | 2908 | ||
| 2650 | key.objectid = objectid; | 2909 | key.objectid = objectid; |
| 2651 | key.type = max_key_type; | 2910 | key.type = max_key_type; |
| @@ -2667,8 +2926,18 @@ static int drop_objectid_items(struct btrfs_trans_handle *trans, | |||
| 2667 | if (found_key.objectid != objectid) | 2926 | if (found_key.objectid != objectid) |
| 2668 | break; | 2927 | break; |
| 2669 | 2928 | ||
| 2670 | ret = btrfs_del_item(trans, log, path); | 2929 | found_key.offset = 0; |
| 2671 | if (ret) | 2930 | found_key.type = 0; |
| 2931 | ret = btrfs_bin_search(path->nodes[0], &found_key, 0, | ||
| 2932 | &start_slot); | ||
| 2933 | |||
| 2934 | ret = btrfs_del_items(trans, log, path, start_slot, | ||
| 2935 | path->slots[0] - start_slot + 1); | ||
| 2936 | /* | ||
| 2937 | * If start slot isn't 0 then we don't need to re-search, we've | ||
| 2938 | * found the last guy with the objectid in this tree. | ||
| 2939 | */ | ||
| 2940 | if (ret || start_slot != 0) | ||
| 2672 | break; | 2941 | break; |
| 2673 | btrfs_release_path(path); | 2942 | btrfs_release_path(path); |
| 2674 | } | 2943 | } |
| @@ -2678,14 +2947,89 @@ static int drop_objectid_items(struct btrfs_trans_handle *trans, | |||
| 2678 | return ret; | 2947 | return ret; |
| 2679 | } | 2948 | } |
| 2680 | 2949 | ||
| 2950 | static void fill_inode_item(struct btrfs_trans_handle *trans, | ||
| 2951 | struct extent_buffer *leaf, | ||
| 2952 | struct btrfs_inode_item *item, | ||
| 2953 | struct inode *inode, int log_inode_only) | ||
| 2954 | { | ||
| 2955 | struct btrfs_map_token token; | ||
| 2956 | |||
| 2957 | btrfs_init_map_token(&token); | ||
| 2958 | |||
| 2959 | if (log_inode_only) { | ||
| 2960 | /* set the generation to zero so the recover code | ||
| 2961 | * can tell the difference between an logging | ||
| 2962 | * just to say 'this inode exists' and a logging | ||
| 2963 | * to say 'update this inode with these values' | ||
| 2964 | */ | ||
| 2965 | btrfs_set_token_inode_generation(leaf, item, 0, &token); | ||
| 2966 | btrfs_set_token_inode_size(leaf, item, 0, &token); | ||
| 2967 | } else { | ||
| 2968 | btrfs_set_token_inode_generation(leaf, item, | ||
| 2969 | BTRFS_I(inode)->generation, | ||
| 2970 | &token); | ||
| 2971 | btrfs_set_token_inode_size(leaf, item, inode->i_size, &token); | ||
| 2972 | } | ||
| 2973 | |||
| 2974 | btrfs_set_token_inode_uid(leaf, item, i_uid_read(inode), &token); | ||
| 2975 | btrfs_set_token_inode_gid(leaf, item, i_gid_read(inode), &token); | ||
| 2976 | btrfs_set_token_inode_mode(leaf, item, inode->i_mode, &token); | ||
| 2977 | btrfs_set_token_inode_nlink(leaf, item, inode->i_nlink, &token); | ||
| 2978 | |||
| 2979 | btrfs_set_token_timespec_sec(leaf, btrfs_inode_atime(item), | ||
| 2980 | inode->i_atime.tv_sec, &token); | ||
| 2981 | btrfs_set_token_timespec_nsec(leaf, btrfs_inode_atime(item), | ||
| 2982 | inode->i_atime.tv_nsec, &token); | ||
| 2983 | |||
| 2984 | btrfs_set_token_timespec_sec(leaf, btrfs_inode_mtime(item), | ||
| 2985 | inode->i_mtime.tv_sec, &token); | ||
| 2986 | btrfs_set_token_timespec_nsec(leaf, btrfs_inode_mtime(item), | ||
| 2987 | inode->i_mtime.tv_nsec, &token); | ||
| 2988 | |||
| 2989 | btrfs_set_token_timespec_sec(leaf, btrfs_inode_ctime(item), | ||
| 2990 | inode->i_ctime.tv_sec, &token); | ||
| 2991 | btrfs_set_token_timespec_nsec(leaf, btrfs_inode_ctime(item), | ||
| 2992 | inode->i_ctime.tv_nsec, &token); | ||
| 2993 | |||
| 2994 | btrfs_set_token_inode_nbytes(leaf, item, inode_get_bytes(inode), | ||
| 2995 | &token); | ||
| 2996 | |||
| 2997 | btrfs_set_token_inode_sequence(leaf, item, inode->i_version, &token); | ||
| 2998 | btrfs_set_token_inode_transid(leaf, item, trans->transid, &token); | ||
| 2999 | btrfs_set_token_inode_rdev(leaf, item, inode->i_rdev, &token); | ||
| 3000 | btrfs_set_token_inode_flags(leaf, item, BTRFS_I(inode)->flags, &token); | ||
| 3001 | btrfs_set_token_inode_block_group(leaf, item, 0, &token); | ||
| 3002 | } | ||
| 3003 | |||
| 3004 | static int log_inode_item(struct btrfs_trans_handle *trans, | ||
| 3005 | struct btrfs_root *log, struct btrfs_path *path, | ||
| 3006 | struct inode *inode) | ||
| 3007 | { | ||
| 3008 | struct btrfs_inode_item *inode_item; | ||
| 3009 | struct btrfs_key key; | ||
| 3010 | int ret; | ||
| 3011 | |||
| 3012 | memcpy(&key, &BTRFS_I(inode)->location, sizeof(key)); | ||
| 3013 | ret = btrfs_insert_empty_item(trans, log, path, &key, | ||
| 3014 | sizeof(*inode_item)); | ||
| 3015 | if (ret && ret != -EEXIST) | ||
| 3016 | return ret; | ||
| 3017 | inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0], | ||
| 3018 | struct btrfs_inode_item); | ||
| 3019 | fill_inode_item(trans, path->nodes[0], inode_item, inode, 0); | ||
| 3020 | btrfs_release_path(path); | ||
| 3021 | return 0; | ||
| 3022 | } | ||
| 3023 | |||
| 2681 | static noinline int copy_items(struct btrfs_trans_handle *trans, | 3024 | static noinline int copy_items(struct btrfs_trans_handle *trans, |
| 2682 | struct btrfs_root *log, | 3025 | struct inode *inode, |
| 2683 | struct btrfs_path *dst_path, | 3026 | struct btrfs_path *dst_path, |
| 2684 | struct extent_buffer *src, | 3027 | struct extent_buffer *src, |
| 2685 | int start_slot, int nr, int inode_only) | 3028 | int start_slot, int nr, int inode_only) |
| 2686 | { | 3029 | { |
| 2687 | unsigned long src_offset; | 3030 | unsigned long src_offset; |
| 2688 | unsigned long dst_offset; | 3031 | unsigned long dst_offset; |
| 3032 | struct btrfs_root *log = BTRFS_I(inode)->root->log_root; | ||
| 2689 | struct btrfs_file_extent_item *extent; | 3033 | struct btrfs_file_extent_item *extent; |
| 2690 | struct btrfs_inode_item *inode_item; | 3034 | struct btrfs_inode_item *inode_item; |
| 2691 | int ret; | 3035 | int ret; |
| @@ -2694,6 +3038,7 @@ static noinline int copy_items(struct btrfs_trans_handle *trans, | |||
| 2694 | char *ins_data; | 3038 | char *ins_data; |
| 2695 | int i; | 3039 | int i; |
| 2696 | struct list_head ordered_sums; | 3040 | struct list_head ordered_sums; |
| 3041 | int skip_csum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM; | ||
| 2697 | 3042 | ||
| 2698 | INIT_LIST_HEAD(&ordered_sums); | 3043 | INIT_LIST_HEAD(&ordered_sums); |
| 2699 | 3044 | ||
| @@ -2722,29 +3067,23 @@ static noinline int copy_items(struct btrfs_trans_handle *trans, | |||
| 2722 | 3067 | ||
| 2723 | src_offset = btrfs_item_ptr_offset(src, start_slot + i); | 3068 | src_offset = btrfs_item_ptr_offset(src, start_slot + i); |
| 2724 | 3069 | ||
| 2725 | copy_extent_buffer(dst_path->nodes[0], src, dst_offset, | 3070 | if (ins_keys[i].type == BTRFS_INODE_ITEM_KEY) { |
| 2726 | src_offset, ins_sizes[i]); | ||
| 2727 | |||
| 2728 | if (inode_only == LOG_INODE_EXISTS && | ||
| 2729 | ins_keys[i].type == BTRFS_INODE_ITEM_KEY) { | ||
| 2730 | inode_item = btrfs_item_ptr(dst_path->nodes[0], | 3071 | inode_item = btrfs_item_ptr(dst_path->nodes[0], |
| 2731 | dst_path->slots[0], | 3072 | dst_path->slots[0], |
| 2732 | struct btrfs_inode_item); | 3073 | struct btrfs_inode_item); |
| 2733 | btrfs_set_inode_size(dst_path->nodes[0], inode_item, 0); | 3074 | fill_inode_item(trans, dst_path->nodes[0], inode_item, |
| 2734 | 3075 | inode, inode_only == LOG_INODE_EXISTS); | |
| 2735 | /* set the generation to zero so the recover code | 3076 | } else { |
| 2736 | * can tell the difference between an logging | 3077 | copy_extent_buffer(dst_path->nodes[0], src, dst_offset, |
| 2737 | * just to say 'this inode exists' and a logging | 3078 | src_offset, ins_sizes[i]); |
| 2738 | * to say 'update this inode with these values' | ||
| 2739 | */ | ||
| 2740 | btrfs_set_inode_generation(dst_path->nodes[0], | ||
| 2741 | inode_item, 0); | ||
| 2742 | } | 3079 | } |
| 3080 | |||
| 2743 | /* take a reference on file data extents so that truncates | 3081 | /* take a reference on file data extents so that truncates |
| 2744 | * or deletes of this inode don't have to relog the inode | 3082 | * or deletes of this inode don't have to relog the inode |
| 2745 | * again | 3083 | * again |
| 2746 | */ | 3084 | */ |
| 2747 | if (btrfs_key_type(ins_keys + i) == BTRFS_EXTENT_DATA_KEY) { | 3085 | if (btrfs_key_type(ins_keys + i) == BTRFS_EXTENT_DATA_KEY && |
| 3086 | !skip_csum) { | ||
| 2748 | int found_type; | 3087 | int found_type; |
| 2749 | extent = btrfs_item_ptr(src, start_slot + i, | 3088 | extent = btrfs_item_ptr(src, start_slot + i, |
| 2750 | struct btrfs_file_extent_item); | 3089 | struct btrfs_file_extent_item); |
| @@ -2753,8 +3092,7 @@ static noinline int copy_items(struct btrfs_trans_handle *trans, | |||
| 2753 | continue; | 3092 | continue; |
| 2754 | 3093 | ||
| 2755 | found_type = btrfs_file_extent_type(src, extent); | 3094 | found_type = btrfs_file_extent_type(src, extent); |
| 2756 | if (found_type == BTRFS_FILE_EXTENT_REG || | 3095 | if (found_type == BTRFS_FILE_EXTENT_REG) { |
| 2757 | found_type == BTRFS_FILE_EXTENT_PREALLOC) { | ||
| 2758 | u64 ds, dl, cs, cl; | 3096 | u64 ds, dl, cs, cl; |
| 2759 | ds = btrfs_file_extent_disk_bytenr(src, | 3097 | ds = btrfs_file_extent_disk_bytenr(src, |
| 2760 | extent); | 3098 | extent); |
| @@ -2803,6 +3141,299 @@ static noinline int copy_items(struct btrfs_trans_handle *trans, | |||
| 2803 | return ret; | 3141 | return ret; |
| 2804 | } | 3142 | } |
| 2805 | 3143 | ||
| 3144 | static int extent_cmp(void *priv, struct list_head *a, struct list_head *b) | ||
| 3145 | { | ||
| 3146 | struct extent_map *em1, *em2; | ||
| 3147 | |||
| 3148 | em1 = list_entry(a, struct extent_map, list); | ||
| 3149 | em2 = list_entry(b, struct extent_map, list); | ||
| 3150 | |||
| 3151 | if (em1->start < em2->start) | ||
| 3152 | return -1; | ||
| 3153 | else if (em1->start > em2->start) | ||
| 3154 | return 1; | ||
| 3155 | return 0; | ||
| 3156 | } | ||
| 3157 | |||
| 3158 | static int drop_adjacent_extents(struct btrfs_trans_handle *trans, | ||
| 3159 | struct btrfs_root *root, struct inode *inode, | ||
| 3160 | struct extent_map *em, | ||
| 3161 | struct btrfs_path *path) | ||
| 3162 | { | ||
| 3163 | struct btrfs_file_extent_item *fi; | ||
| 3164 | struct extent_buffer *leaf; | ||
| 3165 | struct btrfs_key key, new_key; | ||
| 3166 | struct btrfs_map_token token; | ||
| 3167 | u64 extent_end; | ||
| 3168 | u64 extent_offset = 0; | ||
| 3169 | int extent_type; | ||
| 3170 | int del_slot = 0; | ||
| 3171 | int del_nr = 0; | ||
| 3172 | int ret = 0; | ||
| 3173 | |||
| 3174 | while (1) { | ||
| 3175 | btrfs_init_map_token(&token); | ||
| 3176 | leaf = path->nodes[0]; | ||
| 3177 | path->slots[0]++; | ||
| 3178 | if (path->slots[0] >= btrfs_header_nritems(leaf)) { | ||
| 3179 | if (del_nr) { | ||
| 3180 | ret = btrfs_del_items(trans, root, path, | ||
| 3181 | del_slot, del_nr); | ||
| 3182 | if (ret) | ||
| 3183 | return ret; | ||
| 3184 | del_nr = 0; | ||
| 3185 | } | ||
| 3186 | |||
| 3187 | ret = btrfs_next_leaf_write(trans, root, path, 1); | ||
| 3188 | if (ret < 0) | ||
| 3189 | return ret; | ||
| 3190 | if (ret > 0) | ||
| 3191 | return 0; | ||
| 3192 | leaf = path->nodes[0]; | ||
| 3193 | } | ||
| 3194 | |||
| 3195 | btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); | ||
| 3196 | if (key.objectid != btrfs_ino(inode) || | ||
| 3197 | key.type != BTRFS_EXTENT_DATA_KEY || | ||
| 3198 | key.offset >= em->start + em->len) | ||
| 3199 | break; | ||
| 3200 | |||
| 3201 | fi = btrfs_item_ptr(leaf, path->slots[0], | ||
| 3202 | struct btrfs_file_extent_item); | ||
| 3203 | extent_type = btrfs_token_file_extent_type(leaf, fi, &token); | ||
| 3204 | if (extent_type == BTRFS_FILE_EXTENT_REG || | ||
| 3205 | extent_type == BTRFS_FILE_EXTENT_PREALLOC) { | ||
| 3206 | extent_offset = btrfs_token_file_extent_offset(leaf, | ||
| 3207 | fi, &token); | ||
| 3208 | extent_end = key.offset + | ||
| 3209 | btrfs_token_file_extent_num_bytes(leaf, fi, | ||
| 3210 | &token); | ||
| 3211 | } else if (extent_type == BTRFS_FILE_EXTENT_INLINE) { | ||
| 3212 | extent_end = key.offset + | ||
| 3213 | btrfs_file_extent_inline_len(leaf, fi); | ||
| 3214 | } else { | ||
| 3215 | BUG(); | ||
| 3216 | } | ||
| 3217 | |||
| 3218 | if (extent_end <= em->len + em->start) { | ||
| 3219 | if (!del_nr) { | ||
| 3220 | del_slot = path->slots[0]; | ||
| 3221 | } | ||
| 3222 | del_nr++; | ||
| 3223 | continue; | ||
| 3224 | } | ||
| 3225 | |||
| 3226 | /* | ||
| 3227 | * Ok so we'll ignore previous items if we log a new extent, | ||
| 3228 | * which can lead to overlapping extents, so if we have an | ||
| 3229 | * existing extent we want to adjust we _have_ to check the next | ||
| 3230 | * guy to make sure we even need this extent anymore, this keeps | ||
| 3231 | * us from panicing in set_item_key_safe. | ||
| 3232 | */ | ||
| 3233 | if (path->slots[0] < btrfs_header_nritems(leaf) - 1) { | ||
| 3234 | struct btrfs_key tmp_key; | ||
| 3235 | |||
| 3236 | btrfs_item_key_to_cpu(leaf, &tmp_key, | ||
| 3237 | path->slots[0] + 1); | ||
| 3238 | if (tmp_key.objectid == btrfs_ino(inode) && | ||
| 3239 | tmp_key.type == BTRFS_EXTENT_DATA_KEY && | ||
| 3240 | tmp_key.offset <= em->start + em->len) { | ||
| 3241 | if (!del_nr) | ||
| 3242 | del_slot = path->slots[0]; | ||
| 3243 | del_nr++; | ||
| 3244 | continue; | ||
| 3245 | } | ||
| 3246 | } | ||
| 3247 | |||
| 3248 | BUG_ON(extent_type == BTRFS_FILE_EXTENT_INLINE); | ||
| 3249 | memcpy(&new_key, &key, sizeof(new_key)); | ||
| 3250 | new_key.offset = em->start + em->len; | ||
| 3251 | btrfs_set_item_key_safe(trans, root, path, &new_key); | ||
| 3252 | extent_offset += em->start + em->len - key.offset; | ||
| 3253 | btrfs_set_token_file_extent_offset(leaf, fi, extent_offset, | ||
| 3254 | &token); | ||
| 3255 | btrfs_set_token_file_extent_num_bytes(leaf, fi, extent_end - | ||
| 3256 | (em->start + em->len), | ||
| 3257 | &token); | ||
| 3258 | btrfs_mark_buffer_dirty(leaf); | ||
| 3259 | } | ||
| 3260 | |||
| 3261 | if (del_nr) | ||
| 3262 | ret = btrfs_del_items(trans, root, path, del_slot, del_nr); | ||
| 3263 | |||
| 3264 | return ret; | ||
| 3265 | } | ||
| 3266 | |||
| 3267 | static int log_one_extent(struct btrfs_trans_handle *trans, | ||
| 3268 | struct inode *inode, struct btrfs_root *root, | ||
| 3269 | struct extent_map *em, struct btrfs_path *path) | ||
| 3270 | { | ||
| 3271 | struct btrfs_root *log = root->log_root; | ||
| 3272 | struct btrfs_file_extent_item *fi; | ||
| 3273 | struct extent_buffer *leaf; | ||
| 3274 | struct list_head ordered_sums; | ||
| 3275 | struct btrfs_map_token token; | ||
| 3276 | struct btrfs_key key; | ||
| 3277 | u64 csum_offset = em->mod_start - em->start; | ||
| 3278 | u64 csum_len = em->mod_len; | ||
| 3279 | u64 extent_offset = em->start - em->orig_start; | ||
| 3280 | u64 block_len; | ||
| 3281 | int ret; | ||
| 3282 | bool skip_csum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM; | ||
| 3283 | |||
| 3284 | INIT_LIST_HEAD(&ordered_sums); | ||
| 3285 | btrfs_init_map_token(&token); | ||
| 3286 | key.objectid = btrfs_ino(inode); | ||
| 3287 | key.type = BTRFS_EXTENT_DATA_KEY; | ||
| 3288 | key.offset = em->start; | ||
| 3289 | path->really_keep_locks = 1; | ||
| 3290 | |||
| 3291 | ret = btrfs_insert_empty_item(trans, log, path, &key, sizeof(*fi)); | ||
| 3292 | if (ret && ret != -EEXIST) { | ||
| 3293 | path->really_keep_locks = 0; | ||
| 3294 | return ret; | ||
| 3295 | } | ||
| 3296 | leaf = path->nodes[0]; | ||
| 3297 | fi = btrfs_item_ptr(leaf, path->slots[0], | ||
| 3298 | struct btrfs_file_extent_item); | ||
| 3299 | btrfs_set_token_file_extent_generation(leaf, fi, em->generation, | ||
| 3300 | &token); | ||
| 3301 | if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags)) { | ||
| 3302 | skip_csum = true; | ||
| 3303 | btrfs_set_token_file_extent_type(leaf, fi, | ||
| 3304 | BTRFS_FILE_EXTENT_PREALLOC, | ||
| 3305 | &token); | ||
| 3306 | } else { | ||
| 3307 | btrfs_set_token_file_extent_type(leaf, fi, | ||
| 3308 | BTRFS_FILE_EXTENT_REG, | ||
| 3309 | &token); | ||
| 3310 | if (em->block_start == 0) | ||
| 3311 | skip_csum = true; | ||
| 3312 | } | ||
| 3313 | |||
| 3314 | block_len = max(em->block_len, em->orig_block_len); | ||
| 3315 | if (em->compress_type != BTRFS_COMPRESS_NONE) { | ||
| 3316 | btrfs_set_token_file_extent_disk_bytenr(leaf, fi, | ||
| 3317 | em->block_start, | ||
| 3318 | &token); | ||
| 3319 | btrfs_set_token_file_extent_disk_num_bytes(leaf, fi, block_len, | ||
| 3320 | &token); | ||
| 3321 | } else if (em->block_start < EXTENT_MAP_LAST_BYTE) { | ||
| 3322 | btrfs_set_token_file_extent_disk_bytenr(leaf, fi, | ||
| 3323 | em->block_start - | ||
| 3324 | extent_offset, &token); | ||
| 3325 | btrfs_set_token_file_extent_disk_num_bytes(leaf, fi, block_len, | ||
| 3326 | &token); | ||
| 3327 | } else { | ||
| 3328 | btrfs_set_token_file_extent_disk_bytenr(leaf, fi, 0, &token); | ||
| 3329 | btrfs_set_token_file_extent_disk_num_bytes(leaf, fi, 0, | ||
| 3330 | &token); | ||
| 3331 | } | ||
| 3332 | |||
| 3333 | btrfs_set_token_file_extent_offset(leaf, fi, | ||
| 3334 | em->start - em->orig_start, | ||
| 3335 | &token); | ||
| 3336 | btrfs_set_token_file_extent_num_bytes(leaf, fi, em->len, &token); | ||
| 3337 | btrfs_set_token_file_extent_ram_bytes(leaf, fi, em->len, &token); | ||
| 3338 | btrfs_set_token_file_extent_compression(leaf, fi, em->compress_type, | ||
| 3339 | &token); | ||
| 3340 | btrfs_set_token_file_extent_encryption(leaf, fi, 0, &token); | ||
| 3341 | btrfs_set_token_file_extent_other_encoding(leaf, fi, 0, &token); | ||
| 3342 | btrfs_mark_buffer_dirty(leaf); | ||
| 3343 | |||
| 3344 | /* | ||
| 3345 | * Have to check the extent to the right of us to make sure it doesn't | ||
| 3346 | * fall in our current range. We're ok if the previous extent is in our | ||
| 3347 | * range since the recovery stuff will run us in key order and thus just | ||
| 3348 | * drop the part we overwrote. | ||
| 3349 | */ | ||
| 3350 | ret = drop_adjacent_extents(trans, log, inode, em, path); | ||
| 3351 | btrfs_release_path(path); | ||
| 3352 | path->really_keep_locks = 0; | ||
| 3353 | if (ret) { | ||
| 3354 | return ret; | ||
| 3355 | } | ||
| 3356 | |||
| 3357 | if (skip_csum) | ||
| 3358 | return 0; | ||
| 3359 | |||
| 3360 | /* block start is already adjusted for the file extent offset. */ | ||
| 3361 | ret = btrfs_lookup_csums_range(log->fs_info->csum_root, | ||
| 3362 | em->block_start + csum_offset, | ||
| 3363 | em->block_start + csum_offset + | ||
| 3364 | csum_len - 1, &ordered_sums, 0); | ||
| 3365 | if (ret) | ||
| 3366 | return ret; | ||
| 3367 | |||
| 3368 | while (!list_empty(&ordered_sums)) { | ||
| 3369 | struct btrfs_ordered_sum *sums = list_entry(ordered_sums.next, | ||
| 3370 | struct btrfs_ordered_sum, | ||
| 3371 | list); | ||
| 3372 | if (!ret) | ||
| 3373 | ret = btrfs_csum_file_blocks(trans, log, sums); | ||
| 3374 | list_del(&sums->list); | ||
| 3375 | kfree(sums); | ||
| 3376 | } | ||
| 3377 | |||
| 3378 | return ret; | ||
| 3379 | } | ||
| 3380 | |||
| 3381 | static int btrfs_log_changed_extents(struct btrfs_trans_handle *trans, | ||
| 3382 | struct btrfs_root *root, | ||
| 3383 | struct inode *inode, | ||
| 3384 | struct btrfs_path *path) | ||
| 3385 | { | ||
| 3386 | struct extent_map *em, *n; | ||
| 3387 | struct list_head extents; | ||
| 3388 | struct extent_map_tree *tree = &BTRFS_I(inode)->extent_tree; | ||
| 3389 | u64 test_gen; | ||
| 3390 | int ret = 0; | ||
| 3391 | |||
| 3392 | INIT_LIST_HEAD(&extents); | ||
| 3393 | |||
| 3394 | write_lock(&tree->lock); | ||
| 3395 | test_gen = root->fs_info->last_trans_committed; | ||
| 3396 | |||
| 3397 | list_for_each_entry_safe(em, n, &tree->modified_extents, list) { | ||
| 3398 | list_del_init(&em->list); | ||
| 3399 | if (em->generation <= test_gen) | ||
| 3400 | continue; | ||
| 3401 | /* Need a ref to keep it from getting evicted from cache */ | ||
| 3402 | atomic_inc(&em->refs); | ||
| 3403 | set_bit(EXTENT_FLAG_LOGGING, &em->flags); | ||
| 3404 | list_add_tail(&em->list, &extents); | ||
| 3405 | } | ||
| 3406 | |||
| 3407 | list_sort(NULL, &extents, extent_cmp); | ||
| 3408 | |||
| 3409 | while (!list_empty(&extents)) { | ||
| 3410 | em = list_entry(extents.next, struct extent_map, list); | ||
| 3411 | |||
| 3412 | list_del_init(&em->list); | ||
| 3413 | clear_bit(EXTENT_FLAG_LOGGING, &em->flags); | ||
| 3414 | |||
| 3415 | /* | ||
| 3416 | * If we had an error we just need to delete everybody from our | ||
| 3417 | * private list. | ||
| 3418 | */ | ||
| 3419 | if (ret) { | ||
| 3420 | free_extent_map(em); | ||
| 3421 | continue; | ||
| 3422 | } | ||
| 3423 | |||
| 3424 | write_unlock(&tree->lock); | ||
| 3425 | |||
| 3426 | ret = log_one_extent(trans, inode, root, em, path); | ||
| 3427 | free_extent_map(em); | ||
| 3428 | write_lock(&tree->lock); | ||
| 3429 | } | ||
| 3430 | WARN_ON(!list_empty(&extents)); | ||
| 3431 | write_unlock(&tree->lock); | ||
| 3432 | |||
| 3433 | btrfs_release_path(path); | ||
| 3434 | return ret; | ||
| 3435 | } | ||
| 3436 | |||
| 2806 | /* log a single inode in the tree log. | 3437 | /* log a single inode in the tree log. |
| 2807 | * At least one parent directory for this inode must exist in the tree | 3438 | * At least one parent directory for this inode must exist in the tree |
| 2808 | * or be logged already. | 3439 | * or be logged already. |
| @@ -2832,6 +3463,7 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans, | |||
| 2832 | int nritems; | 3463 | int nritems; |
| 2833 | int ins_start_slot = 0; | 3464 | int ins_start_slot = 0; |
| 2834 | int ins_nr; | 3465 | int ins_nr; |
| 3466 | bool fast_search = false; | ||
| 2835 | u64 ino = btrfs_ino(inode); | 3467 | u64 ino = btrfs_ino(inode); |
| 2836 | 3468 | ||
| 2837 | log = root->log_root; | 3469 | log = root->log_root; |
| @@ -2851,21 +3483,26 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans, | |||
| 2851 | 3483 | ||
| 2852 | max_key.objectid = ino; | 3484 | max_key.objectid = ino; |
| 2853 | 3485 | ||
| 2854 | /* today the code can only do partial logging of directories */ | ||
| 2855 | if (!S_ISDIR(inode->i_mode)) | ||
| 2856 | inode_only = LOG_INODE_ALL; | ||
| 2857 | 3486 | ||
| 2858 | if (inode_only == LOG_INODE_EXISTS || S_ISDIR(inode->i_mode)) | 3487 | /* today the code can only do partial logging of directories */ |
| 3488 | if (S_ISDIR(inode->i_mode) || | ||
| 3489 | (!test_bit(BTRFS_INODE_NEEDS_FULL_SYNC, | ||
| 3490 | &BTRFS_I(inode)->runtime_flags) && | ||
| 3491 | inode_only == LOG_INODE_EXISTS)) | ||
| 2859 | max_key.type = BTRFS_XATTR_ITEM_KEY; | 3492 | max_key.type = BTRFS_XATTR_ITEM_KEY; |
| 2860 | else | 3493 | else |
| 2861 | max_key.type = (u8)-1; | 3494 | max_key.type = (u8)-1; |
| 2862 | max_key.offset = (u64)-1; | 3495 | max_key.offset = (u64)-1; |
| 2863 | 3496 | ||
| 2864 | ret = btrfs_commit_inode_delayed_items(trans, inode); | 3497 | /* Only run delayed items if we are a dir or a new file */ |
| 2865 | if (ret) { | 3498 | if (S_ISDIR(inode->i_mode) || |
| 2866 | btrfs_free_path(path); | 3499 | BTRFS_I(inode)->generation > root->fs_info->last_trans_committed) { |
| 2867 | btrfs_free_path(dst_path); | 3500 | ret = btrfs_commit_inode_delayed_items(trans, inode); |
| 2868 | return ret; | 3501 | if (ret) { |
| 3502 | btrfs_free_path(path); | ||
| 3503 | btrfs_free_path(dst_path); | ||
| 3504 | return ret; | ||
| 3505 | } | ||
| 2869 | } | 3506 | } |
| 2870 | 3507 | ||
| 2871 | mutex_lock(&BTRFS_I(inode)->log_mutex); | 3508 | mutex_lock(&BTRFS_I(inode)->log_mutex); |
| @@ -2881,7 +3518,30 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans, | |||
| 2881 | max_key_type = BTRFS_XATTR_ITEM_KEY; | 3518 | max_key_type = BTRFS_XATTR_ITEM_KEY; |
| 2882 | ret = drop_objectid_items(trans, log, path, ino, max_key_type); | 3519 | ret = drop_objectid_items(trans, log, path, ino, max_key_type); |
| 2883 | } else { | 3520 | } else { |
| 2884 | ret = btrfs_truncate_inode_items(trans, log, inode, 0, 0); | 3521 | if (test_and_clear_bit(BTRFS_INODE_NEEDS_FULL_SYNC, |
| 3522 | &BTRFS_I(inode)->runtime_flags)) { | ||
| 3523 | clear_bit(BTRFS_INODE_COPY_EVERYTHING, | ||
| 3524 | &BTRFS_I(inode)->runtime_flags); | ||
| 3525 | ret = btrfs_truncate_inode_items(trans, log, | ||
| 3526 | inode, 0, 0); | ||
| 3527 | } else if (test_and_clear_bit(BTRFS_INODE_COPY_EVERYTHING, | ||
| 3528 | &BTRFS_I(inode)->runtime_flags)) { | ||
| 3529 | if (inode_only == LOG_INODE_ALL) | ||
| 3530 | fast_search = true; | ||
| 3531 | max_key.type = BTRFS_XATTR_ITEM_KEY; | ||
| 3532 | ret = drop_objectid_items(trans, log, path, ino, | ||
| 3533 | max_key.type); | ||
| 3534 | } else { | ||
| 3535 | if (inode_only == LOG_INODE_ALL) | ||
| 3536 | fast_search = true; | ||
| 3537 | ret = log_inode_item(trans, log, dst_path, inode); | ||
| 3538 | if (ret) { | ||
| 3539 | err = ret; | ||
| 3540 | goto out_unlock; | ||
| 3541 | } | ||
| 3542 | goto log_extents; | ||
| 3543 | } | ||
| 3544 | |||
| 2885 | } | 3545 | } |
| 2886 | if (ret) { | 3546 | if (ret) { |
| 2887 | err = ret; | 3547 | err = ret; |
| @@ -2912,7 +3572,7 @@ again: | |||
| 2912 | goto next_slot; | 3572 | goto next_slot; |
| 2913 | } | 3573 | } |
| 2914 | 3574 | ||
| 2915 | ret = copy_items(trans, log, dst_path, src, ins_start_slot, | 3575 | ret = copy_items(trans, inode, dst_path, src, ins_start_slot, |
| 2916 | ins_nr, inode_only); | 3576 | ins_nr, inode_only); |
| 2917 | if (ret) { | 3577 | if (ret) { |
| 2918 | err = ret; | 3578 | err = ret; |
| @@ -2930,7 +3590,7 @@ next_slot: | |||
| 2930 | goto again; | 3590 | goto again; |
| 2931 | } | 3591 | } |
| 2932 | if (ins_nr) { | 3592 | if (ins_nr) { |
| 2933 | ret = copy_items(trans, log, dst_path, src, | 3593 | ret = copy_items(trans, inode, dst_path, src, |
| 2934 | ins_start_slot, | 3594 | ins_start_slot, |
| 2935 | ins_nr, inode_only); | 3595 | ins_nr, inode_only); |
| 2936 | if (ret) { | 3596 | if (ret) { |
| @@ -2951,8 +3611,7 @@ next_slot: | |||
| 2951 | break; | 3611 | break; |
| 2952 | } | 3612 | } |
| 2953 | if (ins_nr) { | 3613 | if (ins_nr) { |
| 2954 | ret = copy_items(trans, log, dst_path, src, | 3614 | ret = copy_items(trans, inode, dst_path, src, ins_start_slot, |
| 2955 | ins_start_slot, | ||
| 2956 | ins_nr, inode_only); | 3615 | ins_nr, inode_only); |
| 2957 | if (ret) { | 3616 | if (ret) { |
| 2958 | err = ret; | 3617 | err = ret; |
| @@ -2960,7 +3619,25 @@ next_slot: | |||
| 2960 | } | 3619 | } |
| 2961 | ins_nr = 0; | 3620 | ins_nr = 0; |
| 2962 | } | 3621 | } |
| 2963 | WARN_ON(ins_nr); | 3622 | |
| 3623 | log_extents: | ||
| 3624 | if (fast_search) { | ||
| 3625 | btrfs_release_path(dst_path); | ||
| 3626 | ret = btrfs_log_changed_extents(trans, root, inode, dst_path); | ||
| 3627 | if (ret) { | ||
| 3628 | err = ret; | ||
| 3629 | goto out_unlock; | ||
| 3630 | } | ||
| 3631 | } else { | ||
| 3632 | struct extent_map_tree *tree = &BTRFS_I(inode)->extent_tree; | ||
| 3633 | struct extent_map *em, *n; | ||
| 3634 | |||
| 3635 | write_lock(&tree->lock); | ||
| 3636 | list_for_each_entry_safe(em, n, &tree->modified_extents, list) | ||
| 3637 | list_del_init(&em->list); | ||
| 3638 | write_unlock(&tree->lock); | ||
| 3639 | } | ||
| 3640 | |||
| 2964 | if (inode_only == LOG_INODE_ALL && S_ISDIR(inode->i_mode)) { | 3641 | if (inode_only == LOG_INODE_ALL && S_ISDIR(inode->i_mode)) { |
| 2965 | btrfs_release_path(path); | 3642 | btrfs_release_path(path); |
| 2966 | btrfs_release_path(dst_path); | 3643 | btrfs_release_path(dst_path); |
| @@ -2971,6 +3648,7 @@ next_slot: | |||
| 2971 | } | 3648 | } |
| 2972 | } | 3649 | } |
| 2973 | BTRFS_I(inode)->logged_trans = trans->transid; | 3650 | BTRFS_I(inode)->logged_trans = trans->transid; |
| 3651 | BTRFS_I(inode)->last_log_commit = BTRFS_I(inode)->last_sub_trans; | ||
| 2974 | out_unlock: | 3652 | out_unlock: |
| 2975 | mutex_unlock(&BTRFS_I(inode)->log_mutex); | 3653 | mutex_unlock(&BTRFS_I(inode)->log_mutex); |
| 2976 | 3654 | ||
| @@ -3138,7 +3816,7 @@ int btrfs_log_inode_parent(struct btrfs_trans_handle *trans, | |||
| 3138 | end_trans: | 3816 | end_trans: |
| 3139 | dput(old_parent); | 3817 | dput(old_parent); |
| 3140 | if (ret < 0) { | 3818 | if (ret < 0) { |
| 3141 | BUG_ON(ret != -ENOSPC); | 3819 | WARN_ON(ret != -ENOSPC); |
| 3142 | root->fs_info->last_trans_log_full_commit = trans->transid; | 3820 | root->fs_info->last_trans_log_full_commit = trans->transid; |
| 3143 | ret = 1; | 3821 | ret = 1; |
| 3144 | } | 3822 | } |
diff --git a/fs/btrfs/ulist.c b/fs/btrfs/ulist.c index ab942f46b3dd..99be4c138db6 100644 --- a/fs/btrfs/ulist.c +++ b/fs/btrfs/ulist.c | |||
| @@ -143,14 +143,13 @@ EXPORT_SYMBOL(ulist_free); | |||
| 143 | * In case of allocation failure -ENOMEM is returned and the ulist stays | 143 | * In case of allocation failure -ENOMEM is returned and the ulist stays |
| 144 | * unaltered. | 144 | * unaltered. |
| 145 | */ | 145 | */ |
| 146 | int ulist_add(struct ulist *ulist, u64 val, unsigned long aux, | 146 | int ulist_add(struct ulist *ulist, u64 val, u64 aux, gfp_t gfp_mask) |
| 147 | gfp_t gfp_mask) | ||
| 148 | { | 147 | { |
| 149 | return ulist_add_merge(ulist, val, aux, NULL, gfp_mask); | 148 | return ulist_add_merge(ulist, val, aux, NULL, gfp_mask); |
| 150 | } | 149 | } |
| 151 | 150 | ||
| 152 | int ulist_add_merge(struct ulist *ulist, u64 val, unsigned long aux, | 151 | int ulist_add_merge(struct ulist *ulist, u64 val, u64 aux, |
| 153 | unsigned long *old_aux, gfp_t gfp_mask) | 152 | u64 *old_aux, gfp_t gfp_mask) |
| 154 | { | 153 | { |
| 155 | int i; | 154 | int i; |
| 156 | 155 | ||
diff --git a/fs/btrfs/ulist.h b/fs/btrfs/ulist.h index 21bdc8ec8130..21a1963439c3 100644 --- a/fs/btrfs/ulist.h +++ b/fs/btrfs/ulist.h | |||
| @@ -33,7 +33,7 @@ struct ulist_iterator { | |||
| 33 | */ | 33 | */ |
| 34 | struct ulist_node { | 34 | struct ulist_node { |
| 35 | u64 val; /* value to store */ | 35 | u64 val; /* value to store */ |
| 36 | unsigned long aux; /* auxiliary value saved along with the val */ | 36 | u64 aux; /* auxiliary value saved along with the val */ |
| 37 | }; | 37 | }; |
| 38 | 38 | ||
| 39 | struct ulist { | 39 | struct ulist { |
| @@ -65,10 +65,9 @@ void ulist_fini(struct ulist *ulist); | |||
| 65 | void ulist_reinit(struct ulist *ulist); | 65 | void ulist_reinit(struct ulist *ulist); |
| 66 | struct ulist *ulist_alloc(gfp_t gfp_mask); | 66 | struct ulist *ulist_alloc(gfp_t gfp_mask); |
| 67 | void ulist_free(struct ulist *ulist); | 67 | void ulist_free(struct ulist *ulist); |
| 68 | int ulist_add(struct ulist *ulist, u64 val, unsigned long aux, | 68 | int ulist_add(struct ulist *ulist, u64 val, u64 aux, gfp_t gfp_mask); |
| 69 | gfp_t gfp_mask); | 69 | int ulist_add_merge(struct ulist *ulist, u64 val, u64 aux, |
| 70 | int ulist_add_merge(struct ulist *ulist, u64 val, unsigned long aux, | 70 | u64 *old_aux, gfp_t gfp_mask); |
| 71 | unsigned long *old_aux, gfp_t gfp_mask); | ||
| 72 | struct ulist_node *ulist_next(struct ulist *ulist, | 71 | struct ulist_node *ulist_next(struct ulist *ulist, |
| 73 | struct ulist_iterator *uiter); | 72 | struct ulist_iterator *uiter); |
| 74 | 73 | ||
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 88b969aeeb71..5cce6aa74012 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c | |||
| @@ -25,7 +25,6 @@ | |||
| 25 | #include <linux/capability.h> | 25 | #include <linux/capability.h> |
| 26 | #include <linux/ratelimit.h> | 26 | #include <linux/ratelimit.h> |
| 27 | #include <linux/kthread.h> | 27 | #include <linux/kthread.h> |
| 28 | #include <asm/div64.h> | ||
| 29 | #include "compat.h" | 28 | #include "compat.h" |
| 30 | #include "ctree.h" | 29 | #include "ctree.h" |
| 31 | #include "extent_map.h" | 30 | #include "extent_map.h" |
| @@ -36,6 +35,8 @@ | |||
| 36 | #include "async-thread.h" | 35 | #include "async-thread.h" |
| 37 | #include "check-integrity.h" | 36 | #include "check-integrity.h" |
| 38 | #include "rcu-string.h" | 37 | #include "rcu-string.h" |
| 38 | #include "math.h" | ||
| 39 | #include "dev-replace.h" | ||
| 39 | 40 | ||
| 40 | static int init_first_rw_device(struct btrfs_trans_handle *trans, | 41 | static int init_first_rw_device(struct btrfs_trans_handle *trans, |
| 41 | struct btrfs_root *root, | 42 | struct btrfs_root *root, |
| @@ -71,6 +72,19 @@ static void free_fs_devices(struct btrfs_fs_devices *fs_devices) | |||
| 71 | kfree(fs_devices); | 72 | kfree(fs_devices); |
| 72 | } | 73 | } |
| 73 | 74 | ||
| 75 | static void btrfs_kobject_uevent(struct block_device *bdev, | ||
| 76 | enum kobject_action action) | ||
| 77 | { | ||
| 78 | int ret; | ||
| 79 | |||
| 80 | ret = kobject_uevent(&disk_to_dev(bdev->bd_disk)->kobj, action); | ||
| 81 | if (ret) | ||
| 82 | pr_warn("Sending event '%d' to kobject: '%s' (%p): failed\n", | ||
| 83 | action, | ||
| 84 | kobject_name(&disk_to_dev(bdev->bd_disk)->kobj), | ||
| 85 | &disk_to_dev(bdev->bd_disk)->kobj); | ||
| 86 | } | ||
| 87 | |||
| 74 | void btrfs_cleanup_fs_uuids(void) | 88 | void btrfs_cleanup_fs_uuids(void) |
| 75 | { | 89 | { |
| 76 | struct btrfs_fs_devices *fs_devices; | 90 | struct btrfs_fs_devices *fs_devices; |
| @@ -108,6 +122,44 @@ static noinline struct btrfs_fs_devices *find_fsid(u8 *fsid) | |||
| 108 | return NULL; | 122 | return NULL; |
| 109 | } | 123 | } |
| 110 | 124 | ||
| 125 | static int | ||
| 126 | btrfs_get_bdev_and_sb(const char *device_path, fmode_t flags, void *holder, | ||
| 127 | int flush, struct block_device **bdev, | ||
| 128 | struct buffer_head **bh) | ||
| 129 | { | ||
| 130 | int ret; | ||
| 131 | |||
| 132 | *bdev = blkdev_get_by_path(device_path, flags, holder); | ||
| 133 | |||
| 134 | if (IS_ERR(*bdev)) { | ||
| 135 | ret = PTR_ERR(*bdev); | ||
| 136 | printk(KERN_INFO "btrfs: open %s failed\n", device_path); | ||
| 137 | goto error; | ||
| 138 | } | ||
| 139 | |||
| 140 | if (flush) | ||
| 141 | filemap_write_and_wait((*bdev)->bd_inode->i_mapping); | ||
| 142 | ret = set_blocksize(*bdev, 4096); | ||
| 143 | if (ret) { | ||
| 144 | blkdev_put(*bdev, flags); | ||
| 145 | goto error; | ||
| 146 | } | ||
| 147 | invalidate_bdev(*bdev); | ||
| 148 | *bh = btrfs_read_dev_super(*bdev); | ||
| 149 | if (!*bh) { | ||
| 150 | ret = -EINVAL; | ||
| 151 | blkdev_put(*bdev, flags); | ||
| 152 | goto error; | ||
| 153 | } | ||
| 154 | |||
| 155 | return 0; | ||
| 156 | |||
| 157 | error: | ||
| 158 | *bdev = NULL; | ||
| 159 | *bh = NULL; | ||
| 160 | return ret; | ||
| 161 | } | ||
| 162 | |||
| 111 | static void requeue_list(struct btrfs_pending_bios *pending_bios, | 163 | static void requeue_list(struct btrfs_pending_bios *pending_bios, |
| 112 | struct bio *head, struct bio *tail) | 164 | struct bio *head, struct bio *tail) |
| 113 | { | 165 | { |
| @@ -467,7 +519,8 @@ error: | |||
| 467 | return ERR_PTR(-ENOMEM); | 519 | return ERR_PTR(-ENOMEM); |
| 468 | } | 520 | } |
| 469 | 521 | ||
| 470 | void btrfs_close_extra_devices(struct btrfs_fs_devices *fs_devices) | 522 | void btrfs_close_extra_devices(struct btrfs_fs_info *fs_info, |
| 523 | struct btrfs_fs_devices *fs_devices, int step) | ||
| 471 | { | 524 | { |
| 472 | struct btrfs_device *device, *next; | 525 | struct btrfs_device *device, *next; |
| 473 | 526 | ||
| @@ -480,8 +533,9 @@ again: | |||
| 480 | /* This is the initialized path, it is safe to release the devices. */ | 533 | /* This is the initialized path, it is safe to release the devices. */ |
| 481 | list_for_each_entry_safe(device, next, &fs_devices->devices, dev_list) { | 534 | list_for_each_entry_safe(device, next, &fs_devices->devices, dev_list) { |
| 482 | if (device->in_fs_metadata) { | 535 | if (device->in_fs_metadata) { |
| 483 | if (!latest_transid || | 536 | if (!device->is_tgtdev_for_dev_replace && |
| 484 | device->generation > latest_transid) { | 537 | (!latest_transid || |
| 538 | device->generation > latest_transid)) { | ||
| 485 | latest_devid = device->devid; | 539 | latest_devid = device->devid; |
| 486 | latest_transid = device->generation; | 540 | latest_transid = device->generation; |
| 487 | latest_bdev = device->bdev; | 541 | latest_bdev = device->bdev; |
| @@ -489,6 +543,21 @@ again: | |||
| 489 | continue; | 543 | continue; |
| 490 | } | 544 | } |
| 491 | 545 | ||
| 546 | if (device->devid == BTRFS_DEV_REPLACE_DEVID) { | ||
| 547 | /* | ||
| 548 | * In the first step, keep the device which has | ||
| 549 | * the correct fsid and the devid that is used | ||
| 550 | * for the dev_replace procedure. | ||
| 551 | * In the second step, the dev_replace state is | ||
| 552 | * read from the device tree and it is known | ||
| 553 | * whether the procedure is really active or | ||
| 554 | * not, which means whether this device is | ||
| 555 | * used or whether it should be removed. | ||
| 556 | */ | ||
| 557 | if (step == 0 || device->is_tgtdev_for_dev_replace) { | ||
| 558 | continue; | ||
| 559 | } | ||
| 560 | } | ||
| 492 | if (device->bdev) { | 561 | if (device->bdev) { |
| 493 | blkdev_put(device->bdev, device->mode); | 562 | blkdev_put(device->bdev, device->mode); |
| 494 | device->bdev = NULL; | 563 | device->bdev = NULL; |
| @@ -497,7 +566,8 @@ again: | |||
| 497 | if (device->writeable) { | 566 | if (device->writeable) { |
| 498 | list_del_init(&device->dev_alloc_list); | 567 | list_del_init(&device->dev_alloc_list); |
| 499 | device->writeable = 0; | 568 | device->writeable = 0; |
| 500 | fs_devices->rw_devices--; | 569 | if (!device->is_tgtdev_for_dev_replace) |
| 570 | fs_devices->rw_devices--; | ||
| 501 | } | 571 | } |
| 502 | list_del_init(&device->dev_list); | 572 | list_del_init(&device->dev_list); |
| 503 | fs_devices->num_devices--; | 573 | fs_devices->num_devices--; |
| @@ -555,7 +625,7 @@ static int __btrfs_close_devices(struct btrfs_fs_devices *fs_devices) | |||
| 555 | if (device->bdev) | 625 | if (device->bdev) |
| 556 | fs_devices->open_devices--; | 626 | fs_devices->open_devices--; |
| 557 | 627 | ||
| 558 | if (device->writeable) { | 628 | if (device->writeable && !device->is_tgtdev_for_dev_replace) { |
| 559 | list_del_init(&device->dev_alloc_list); | 629 | list_del_init(&device->dev_alloc_list); |
| 560 | fs_devices->rw_devices--; | 630 | fs_devices->rw_devices--; |
| 561 | } | 631 | } |
| @@ -637,18 +707,10 @@ static int __btrfs_open_devices(struct btrfs_fs_devices *fs_devices, | |||
| 637 | if (!device->name) | 707 | if (!device->name) |
| 638 | continue; | 708 | continue; |
| 639 | 709 | ||
| 640 | bdev = blkdev_get_by_path(device->name->str, flags, holder); | 710 | ret = btrfs_get_bdev_and_sb(device->name->str, flags, holder, 1, |
| 641 | if (IS_ERR(bdev)) { | 711 | &bdev, &bh); |
| 642 | printk(KERN_INFO "open %s failed\n", device->name->str); | 712 | if (ret) |
| 643 | goto error; | 713 | continue; |
| 644 | } | ||
| 645 | filemap_write_and_wait(bdev->bd_inode->i_mapping); | ||
| 646 | invalidate_bdev(bdev); | ||
| 647 | set_blocksize(bdev, 4096); | ||
| 648 | |||
| 649 | bh = btrfs_read_dev_super(bdev); | ||
| 650 | if (!bh) | ||
| 651 | goto error_close; | ||
| 652 | 714 | ||
| 653 | disk_super = (struct btrfs_super_block *)bh->b_data; | 715 | disk_super = (struct btrfs_super_block *)bh->b_data; |
| 654 | devid = btrfs_stack_device_id(&disk_super->dev_item); | 716 | devid = btrfs_stack_device_id(&disk_super->dev_item); |
| @@ -687,7 +749,7 @@ static int __btrfs_open_devices(struct btrfs_fs_devices *fs_devices, | |||
| 687 | fs_devices->rotating = 1; | 749 | fs_devices->rotating = 1; |
| 688 | 750 | ||
| 689 | fs_devices->open_devices++; | 751 | fs_devices->open_devices++; |
| 690 | if (device->writeable) { | 752 | if (device->writeable && !device->is_tgtdev_for_dev_replace) { |
| 691 | fs_devices->rw_devices++; | 753 | fs_devices->rw_devices++; |
| 692 | list_add(&device->dev_alloc_list, | 754 | list_add(&device->dev_alloc_list, |
| 693 | &fs_devices->alloc_list); | 755 | &fs_devices->alloc_list); |
| @@ -697,9 +759,7 @@ static int __btrfs_open_devices(struct btrfs_fs_devices *fs_devices, | |||
| 697 | 759 | ||
| 698 | error_brelse: | 760 | error_brelse: |
| 699 | brelse(bh); | 761 | brelse(bh); |
| 700 | error_close: | ||
| 701 | blkdev_put(bdev, flags); | 762 | blkdev_put(bdev, flags); |
| 702 | error: | ||
| 703 | continue; | 763 | continue; |
| 704 | } | 764 | } |
| 705 | if (fs_devices->open_devices == 0) { | 765 | if (fs_devices->open_devices == 0) { |
| @@ -744,40 +804,30 @@ int btrfs_scan_one_device(const char *path, fmode_t flags, void *holder, | |||
| 744 | u64 total_devices; | 804 | u64 total_devices; |
| 745 | 805 | ||
| 746 | flags |= FMODE_EXCL; | 806 | flags |= FMODE_EXCL; |
| 747 | bdev = blkdev_get_by_path(path, flags, holder); | ||
| 748 | |||
| 749 | if (IS_ERR(bdev)) { | ||
| 750 | ret = PTR_ERR(bdev); | ||
| 751 | goto error; | ||
| 752 | } | ||
| 753 | |||
| 754 | mutex_lock(&uuid_mutex); | 807 | mutex_lock(&uuid_mutex); |
| 755 | ret = set_blocksize(bdev, 4096); | 808 | ret = btrfs_get_bdev_and_sb(path, flags, holder, 0, &bdev, &bh); |
| 756 | if (ret) | 809 | if (ret) |
| 757 | goto error_close; | 810 | goto error; |
| 758 | bh = btrfs_read_dev_super(bdev); | ||
| 759 | if (!bh) { | ||
| 760 | ret = -EINVAL; | ||
| 761 | goto error_close; | ||
| 762 | } | ||
| 763 | disk_super = (struct btrfs_super_block *)bh->b_data; | 811 | disk_super = (struct btrfs_super_block *)bh->b_data; |
| 764 | devid = btrfs_stack_device_id(&disk_super->dev_item); | 812 | devid = btrfs_stack_device_id(&disk_super->dev_item); |
| 765 | transid = btrfs_super_generation(disk_super); | 813 | transid = btrfs_super_generation(disk_super); |
| 766 | total_devices = btrfs_super_num_devices(disk_super); | 814 | total_devices = btrfs_super_num_devices(disk_super); |
| 767 | if (disk_super->label[0]) | 815 | if (disk_super->label[0]) { |
| 816 | if (disk_super->label[BTRFS_LABEL_SIZE - 1]) | ||
| 817 | disk_super->label[BTRFS_LABEL_SIZE - 1] = '\0'; | ||
| 768 | printk(KERN_INFO "device label %s ", disk_super->label); | 818 | printk(KERN_INFO "device label %s ", disk_super->label); |
| 769 | else | 819 | } else { |
| 770 | printk(KERN_INFO "device fsid %pU ", disk_super->fsid); | 820 | printk(KERN_INFO "device fsid %pU ", disk_super->fsid); |
| 821 | } | ||
| 771 | printk(KERN_CONT "devid %llu transid %llu %s\n", | 822 | printk(KERN_CONT "devid %llu transid %llu %s\n", |
| 772 | (unsigned long long)devid, (unsigned long long)transid, path); | 823 | (unsigned long long)devid, (unsigned long long)transid, path); |
| 773 | ret = device_list_add(path, disk_super, devid, fs_devices_ret); | 824 | ret = device_list_add(path, disk_super, devid, fs_devices_ret); |
| 774 | if (!ret && fs_devices_ret) | 825 | if (!ret && fs_devices_ret) |
| 775 | (*fs_devices_ret)->total_devices = total_devices; | 826 | (*fs_devices_ret)->total_devices = total_devices; |
| 776 | brelse(bh); | 827 | brelse(bh); |
| 777 | error_close: | ||
| 778 | mutex_unlock(&uuid_mutex); | ||
| 779 | blkdev_put(bdev, flags); | 828 | blkdev_put(bdev, flags); |
| 780 | error: | 829 | error: |
| 830 | mutex_unlock(&uuid_mutex); | ||
| 781 | return ret; | 831 | return ret; |
| 782 | } | 832 | } |
| 783 | 833 | ||
| @@ -796,7 +846,7 @@ int btrfs_account_dev_extents_size(struct btrfs_device *device, u64 start, | |||
| 796 | 846 | ||
| 797 | *length = 0; | 847 | *length = 0; |
| 798 | 848 | ||
| 799 | if (start >= device->total_bytes) | 849 | if (start >= device->total_bytes || device->is_tgtdev_for_dev_replace) |
| 800 | return 0; | 850 | return 0; |
| 801 | 851 | ||
| 802 | path = btrfs_alloc_path(); | 852 | path = btrfs_alloc_path(); |
| @@ -913,7 +963,7 @@ int find_free_dev_extent(struct btrfs_device *device, u64 num_bytes, | |||
| 913 | max_hole_size = 0; | 963 | max_hole_size = 0; |
| 914 | hole_size = 0; | 964 | hole_size = 0; |
| 915 | 965 | ||
| 916 | if (search_start >= search_end) { | 966 | if (search_start >= search_end || device->is_tgtdev_for_dev_replace) { |
| 917 | ret = -ENOSPC; | 967 | ret = -ENOSPC; |
| 918 | goto error; | 968 | goto error; |
| 919 | } | 969 | } |
| @@ -1096,6 +1146,7 @@ int btrfs_alloc_dev_extent(struct btrfs_trans_handle *trans, | |||
| 1096 | struct btrfs_key key; | 1146 | struct btrfs_key key; |
| 1097 | 1147 | ||
| 1098 | WARN_ON(!device->in_fs_metadata); | 1148 | WARN_ON(!device->in_fs_metadata); |
| 1149 | WARN_ON(device->is_tgtdev_for_dev_replace); | ||
| 1099 | path = btrfs_alloc_path(); | 1150 | path = btrfs_alloc_path(); |
| 1100 | if (!path) | 1151 | if (!path) |
| 1101 | return -ENOMEM; | 1152 | return -ENOMEM; |
| @@ -1330,16 +1381,22 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path) | |||
| 1330 | root->fs_info->avail_system_alloc_bits | | 1381 | root->fs_info->avail_system_alloc_bits | |
| 1331 | root->fs_info->avail_metadata_alloc_bits; | 1382 | root->fs_info->avail_metadata_alloc_bits; |
| 1332 | 1383 | ||
| 1333 | if ((all_avail & BTRFS_BLOCK_GROUP_RAID10) && | 1384 | num_devices = root->fs_info->fs_devices->num_devices; |
| 1334 | root->fs_info->fs_devices->num_devices <= 4) { | 1385 | btrfs_dev_replace_lock(&root->fs_info->dev_replace); |
| 1386 | if (btrfs_dev_replace_is_ongoing(&root->fs_info->dev_replace)) { | ||
| 1387 | WARN_ON(num_devices < 1); | ||
| 1388 | num_devices--; | ||
| 1389 | } | ||
| 1390 | btrfs_dev_replace_unlock(&root->fs_info->dev_replace); | ||
| 1391 | |||
| 1392 | if ((all_avail & BTRFS_BLOCK_GROUP_RAID10) && num_devices <= 4) { | ||
| 1335 | printk(KERN_ERR "btrfs: unable to go below four devices " | 1393 | printk(KERN_ERR "btrfs: unable to go below four devices " |
| 1336 | "on raid10\n"); | 1394 | "on raid10\n"); |
| 1337 | ret = -EINVAL; | 1395 | ret = -EINVAL; |
| 1338 | goto out; | 1396 | goto out; |
| 1339 | } | 1397 | } |
| 1340 | 1398 | ||
| 1341 | if ((all_avail & BTRFS_BLOCK_GROUP_RAID1) && | 1399 | if ((all_avail & BTRFS_BLOCK_GROUP_RAID1) && num_devices <= 2) { |
| 1342 | root->fs_info->fs_devices->num_devices <= 2) { | ||
| 1343 | printk(KERN_ERR "btrfs: unable to go below two " | 1400 | printk(KERN_ERR "btrfs: unable to go below two " |
| 1344 | "devices on raid1\n"); | 1401 | "devices on raid1\n"); |
| 1345 | ret = -EINVAL; | 1402 | ret = -EINVAL; |
| @@ -1357,7 +1414,9 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path) | |||
| 1357 | * is held. | 1414 | * is held. |
| 1358 | */ | 1415 | */ |
| 1359 | list_for_each_entry(tmp, devices, dev_list) { | 1416 | list_for_each_entry(tmp, devices, dev_list) { |
| 1360 | if (tmp->in_fs_metadata && !tmp->bdev) { | 1417 | if (tmp->in_fs_metadata && |
| 1418 | !tmp->is_tgtdev_for_dev_replace && | ||
| 1419 | !tmp->bdev) { | ||
| 1361 | device = tmp; | 1420 | device = tmp; |
| 1362 | break; | 1421 | break; |
| 1363 | } | 1422 | } |
| @@ -1371,24 +1430,16 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path) | |||
| 1371 | goto out; | 1430 | goto out; |
| 1372 | } | 1431 | } |
| 1373 | } else { | 1432 | } else { |
| 1374 | bdev = blkdev_get_by_path(device_path, FMODE_READ | FMODE_EXCL, | 1433 | ret = btrfs_get_bdev_and_sb(device_path, |
| 1375 | root->fs_info->bdev_holder); | 1434 | FMODE_READ | FMODE_EXCL, |
| 1376 | if (IS_ERR(bdev)) { | 1435 | root->fs_info->bdev_holder, 0, |
| 1377 | ret = PTR_ERR(bdev); | 1436 | &bdev, &bh); |
| 1437 | if (ret) | ||
| 1378 | goto out; | 1438 | goto out; |
| 1379 | } | ||
| 1380 | |||
| 1381 | set_blocksize(bdev, 4096); | ||
| 1382 | invalidate_bdev(bdev); | ||
| 1383 | bh = btrfs_read_dev_super(bdev); | ||
| 1384 | if (!bh) { | ||
| 1385 | ret = -EINVAL; | ||
| 1386 | goto error_close; | ||
| 1387 | } | ||
| 1388 | disk_super = (struct btrfs_super_block *)bh->b_data; | 1439 | disk_super = (struct btrfs_super_block *)bh->b_data; |
| 1389 | devid = btrfs_stack_device_id(&disk_super->dev_item); | 1440 | devid = btrfs_stack_device_id(&disk_super->dev_item); |
| 1390 | dev_uuid = disk_super->dev_item.uuid; | 1441 | dev_uuid = disk_super->dev_item.uuid; |
| 1391 | device = btrfs_find_device(root, devid, dev_uuid, | 1442 | device = btrfs_find_device(root->fs_info, devid, dev_uuid, |
| 1392 | disk_super->fsid); | 1443 | disk_super->fsid); |
| 1393 | if (!device) { | 1444 | if (!device) { |
| 1394 | ret = -ENOENT; | 1445 | ret = -ENOENT; |
| @@ -1396,6 +1447,12 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path) | |||
| 1396 | } | 1447 | } |
| 1397 | } | 1448 | } |
| 1398 | 1449 | ||
| 1450 | if (device->is_tgtdev_for_dev_replace) { | ||
| 1451 | pr_err("btrfs: unable to remove the dev_replace target dev\n"); | ||
| 1452 | ret = -EINVAL; | ||
| 1453 | goto error_brelse; | ||
| 1454 | } | ||
| 1455 | |||
| 1399 | if (device->writeable && root->fs_info->fs_devices->rw_devices == 1) { | 1456 | if (device->writeable && root->fs_info->fs_devices->rw_devices == 1) { |
| 1400 | printk(KERN_ERR "btrfs: unable to remove the only writeable " | 1457 | printk(KERN_ERR "btrfs: unable to remove the only writeable " |
| 1401 | "device\n"); | 1458 | "device\n"); |
| @@ -1415,6 +1472,11 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path) | |||
| 1415 | if (ret) | 1472 | if (ret) |
| 1416 | goto error_undo; | 1473 | goto error_undo; |
| 1417 | 1474 | ||
| 1475 | /* | ||
| 1476 | * TODO: the superblock still includes this device in its num_devices | ||
| 1477 | * counter although write_all_supers() is not locked out. This | ||
| 1478 | * could give a filesystem state which requires a degraded mount. | ||
| 1479 | */ | ||
| 1418 | ret = btrfs_rm_dev_item(root->fs_info->chunk_root, device); | 1480 | ret = btrfs_rm_dev_item(root->fs_info->chunk_root, device); |
| 1419 | if (ret) | 1481 | if (ret) |
| 1420 | goto error_undo; | 1482 | goto error_undo; |
| @@ -1425,7 +1487,7 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path) | |||
| 1425 | spin_unlock(&root->fs_info->free_chunk_lock); | 1487 | spin_unlock(&root->fs_info->free_chunk_lock); |
| 1426 | 1488 | ||
| 1427 | device->in_fs_metadata = 0; | 1489 | device->in_fs_metadata = 0; |
| 1428 | btrfs_scrub_cancel_dev(root, device); | 1490 | btrfs_scrub_cancel_dev(root->fs_info, device); |
| 1429 | 1491 | ||
| 1430 | /* | 1492 | /* |
| 1431 | * the device list mutex makes sure that we don't change | 1493 | * the device list mutex makes sure that we don't change |
| @@ -1475,11 +1537,14 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path) | |||
| 1475 | free_fs_devices(cur_devices); | 1537 | free_fs_devices(cur_devices); |
| 1476 | } | 1538 | } |
| 1477 | 1539 | ||
| 1540 | root->fs_info->num_tolerated_disk_barrier_failures = | ||
| 1541 | btrfs_calc_num_tolerated_disk_barrier_failures(root->fs_info); | ||
| 1542 | |||
| 1478 | /* | 1543 | /* |
| 1479 | * at this point, the device is zero sized. We want to | 1544 | * at this point, the device is zero sized. We want to |
| 1480 | * remove it from the devices list and zero out the old super | 1545 | * remove it from the devices list and zero out the old super |
| 1481 | */ | 1546 | */ |
| 1482 | if (clear_super) { | 1547 | if (clear_super && disk_super) { |
| 1483 | /* make sure this device isn't detected as part of | 1548 | /* make sure this device isn't detected as part of |
| 1484 | * the FS anymore | 1549 | * the FS anymore |
| 1485 | */ | 1550 | */ |
| @@ -1490,9 +1555,11 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path) | |||
| 1490 | 1555 | ||
| 1491 | ret = 0; | 1556 | ret = 0; |
| 1492 | 1557 | ||
| 1558 | /* Notify udev that device has changed */ | ||
| 1559 | btrfs_kobject_uevent(bdev, KOBJ_CHANGE); | ||
| 1560 | |||
| 1493 | error_brelse: | 1561 | error_brelse: |
| 1494 | brelse(bh); | 1562 | brelse(bh); |
| 1495 | error_close: | ||
| 1496 | if (bdev) | 1563 | if (bdev) |
| 1497 | blkdev_put(bdev, FMODE_READ | FMODE_EXCL); | 1564 | blkdev_put(bdev, FMODE_READ | FMODE_EXCL); |
| 1498 | out: | 1565 | out: |
| @@ -1509,6 +1576,112 @@ error_undo: | |||
| 1509 | goto error_brelse; | 1576 | goto error_brelse; |
| 1510 | } | 1577 | } |
| 1511 | 1578 | ||
| 1579 | void btrfs_rm_dev_replace_srcdev(struct btrfs_fs_info *fs_info, | ||
| 1580 | struct btrfs_device *srcdev) | ||
| 1581 | { | ||
| 1582 | WARN_ON(!mutex_is_locked(&fs_info->fs_devices->device_list_mutex)); | ||
| 1583 | list_del_rcu(&srcdev->dev_list); | ||
| 1584 | list_del_rcu(&srcdev->dev_alloc_list); | ||
| 1585 | fs_info->fs_devices->num_devices--; | ||
| 1586 | if (srcdev->missing) { | ||
| 1587 | fs_info->fs_devices->missing_devices--; | ||
| 1588 | fs_info->fs_devices->rw_devices++; | ||
| 1589 | } | ||
| 1590 | if (srcdev->can_discard) | ||
| 1591 | fs_info->fs_devices->num_can_discard--; | ||
| 1592 | if (srcdev->bdev) | ||
| 1593 | fs_info->fs_devices->open_devices--; | ||
| 1594 | |||
| 1595 | call_rcu(&srcdev->rcu, free_device); | ||
| 1596 | } | ||
| 1597 | |||
| 1598 | void btrfs_destroy_dev_replace_tgtdev(struct btrfs_fs_info *fs_info, | ||
| 1599 | struct btrfs_device *tgtdev) | ||
| 1600 | { | ||
| 1601 | struct btrfs_device *next_device; | ||
| 1602 | |||
| 1603 | WARN_ON(!tgtdev); | ||
| 1604 | mutex_lock(&fs_info->fs_devices->device_list_mutex); | ||
| 1605 | if (tgtdev->bdev) { | ||
| 1606 | btrfs_scratch_superblock(tgtdev); | ||
| 1607 | fs_info->fs_devices->open_devices--; | ||
| 1608 | } | ||
| 1609 | fs_info->fs_devices->num_devices--; | ||
| 1610 | if (tgtdev->can_discard) | ||
| 1611 | fs_info->fs_devices->num_can_discard++; | ||
| 1612 | |||
| 1613 | next_device = list_entry(fs_info->fs_devices->devices.next, | ||
| 1614 | struct btrfs_device, dev_list); | ||
| 1615 | if (tgtdev->bdev == fs_info->sb->s_bdev) | ||
| 1616 | fs_info->sb->s_bdev = next_device->bdev; | ||
| 1617 | if (tgtdev->bdev == fs_info->fs_devices->latest_bdev) | ||
| 1618 | fs_info->fs_devices->latest_bdev = next_device->bdev; | ||
| 1619 | list_del_rcu(&tgtdev->dev_list); | ||
| 1620 | |||
| 1621 | call_rcu(&tgtdev->rcu, free_device); | ||
| 1622 | |||
| 1623 | mutex_unlock(&fs_info->fs_devices->device_list_mutex); | ||
| 1624 | } | ||
| 1625 | |||
| 1626 | int btrfs_find_device_by_path(struct btrfs_root *root, char *device_path, | ||
| 1627 | struct btrfs_device **device) | ||
| 1628 | { | ||
| 1629 | int ret = 0; | ||
| 1630 | struct btrfs_super_block *disk_super; | ||
| 1631 | u64 devid; | ||
| 1632 | u8 *dev_uuid; | ||
| 1633 | struct block_device *bdev; | ||
| 1634 | struct buffer_head *bh; | ||
| 1635 | |||
| 1636 | *device = NULL; | ||
| 1637 | ret = btrfs_get_bdev_and_sb(device_path, FMODE_READ, | ||
| 1638 | root->fs_info->bdev_holder, 0, &bdev, &bh); | ||
| 1639 | if (ret) | ||
| 1640 | return ret; | ||
| 1641 | disk_super = (struct btrfs_super_block *)bh->b_data; | ||
| 1642 | devid = btrfs_stack_device_id(&disk_super->dev_item); | ||
| 1643 | dev_uuid = disk_super->dev_item.uuid; | ||
| 1644 | *device = btrfs_find_device(root->fs_info, devid, dev_uuid, | ||
| 1645 | disk_super->fsid); | ||
| 1646 | brelse(bh); | ||
| 1647 | if (!*device) | ||
| 1648 | ret = -ENOENT; | ||
| 1649 | blkdev_put(bdev, FMODE_READ); | ||
| 1650 | return ret; | ||
| 1651 | } | ||
| 1652 | |||
| 1653 | int btrfs_find_device_missing_or_by_path(struct btrfs_root *root, | ||
| 1654 | char *device_path, | ||
| 1655 | struct btrfs_device **device) | ||
| 1656 | { | ||
| 1657 | *device = NULL; | ||
| 1658 | if (strcmp(device_path, "missing") == 0) { | ||
| 1659 | struct list_head *devices; | ||
| 1660 | struct btrfs_device *tmp; | ||
| 1661 | |||
| 1662 | devices = &root->fs_info->fs_devices->devices; | ||
| 1663 | /* | ||
| 1664 | * It is safe to read the devices since the volume_mutex | ||
| 1665 | * is held by the caller. | ||
| 1666 | */ | ||
| 1667 | list_for_each_entry(tmp, devices, dev_list) { | ||
| 1668 | if (tmp->in_fs_metadata && !tmp->bdev) { | ||
| 1669 | *device = tmp; | ||
| 1670 | break; | ||
| 1671 | } | ||
| 1672 | } | ||
| 1673 | |||
| 1674 | if (!*device) { | ||
| 1675 | pr_err("btrfs: no missing device found\n"); | ||
| 1676 | return -ENOENT; | ||
| 1677 | } | ||
| 1678 | |||
| 1679 | return 0; | ||
| 1680 | } else { | ||
| 1681 | return btrfs_find_device_by_path(root, device_path, device); | ||
| 1682 | } | ||
| 1683 | } | ||
| 1684 | |||
| 1512 | /* | 1685 | /* |
| 1513 | * does all the dirty work required for changing file system's UUID. | 1686 | * does all the dirty work required for changing file system's UUID. |
| 1514 | */ | 1687 | */ |
| @@ -1627,7 +1800,8 @@ next_slot: | |||
| 1627 | read_extent_buffer(leaf, fs_uuid, | 1800 | read_extent_buffer(leaf, fs_uuid, |
| 1628 | (unsigned long)btrfs_device_fsid(dev_item), | 1801 | (unsigned long)btrfs_device_fsid(dev_item), |
| 1629 | BTRFS_UUID_SIZE); | 1802 | BTRFS_UUID_SIZE); |
| 1630 | device = btrfs_find_device(root, devid, dev_uuid, fs_uuid); | 1803 | device = btrfs_find_device(root->fs_info, devid, dev_uuid, |
| 1804 | fs_uuid); | ||
| 1631 | BUG_ON(!device); /* Logic error */ | 1805 | BUG_ON(!device); /* Logic error */ |
| 1632 | 1806 | ||
| 1633 | if (device->fs_devices->seeding) { | 1807 | if (device->fs_devices->seeding) { |
| @@ -1675,16 +1849,17 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path) | |||
| 1675 | filemap_write_and_wait(bdev->bd_inode->i_mapping); | 1849 | filemap_write_and_wait(bdev->bd_inode->i_mapping); |
| 1676 | 1850 | ||
| 1677 | devices = &root->fs_info->fs_devices->devices; | 1851 | devices = &root->fs_info->fs_devices->devices; |
| 1678 | /* | 1852 | |
| 1679 | * we have the volume lock, so we don't need the extra | 1853 | mutex_lock(&root->fs_info->fs_devices->device_list_mutex); |
| 1680 | * device list mutex while reading the list here. | ||
| 1681 | */ | ||
| 1682 | list_for_each_entry(device, devices, dev_list) { | 1854 | list_for_each_entry(device, devices, dev_list) { |
| 1683 | if (device->bdev == bdev) { | 1855 | if (device->bdev == bdev) { |
| 1684 | ret = -EEXIST; | 1856 | ret = -EEXIST; |
| 1857 | mutex_unlock( | ||
| 1858 | &root->fs_info->fs_devices->device_list_mutex); | ||
| 1685 | goto error; | 1859 | goto error; |
| 1686 | } | 1860 | } |
| 1687 | } | 1861 | } |
| 1862 | mutex_unlock(&root->fs_info->fs_devices->device_list_mutex); | ||
| 1688 | 1863 | ||
| 1689 | device = kzalloc(sizeof(*device), GFP_NOFS); | 1864 | device = kzalloc(sizeof(*device), GFP_NOFS); |
| 1690 | if (!device) { | 1865 | if (!device) { |
| @@ -1734,6 +1909,7 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path) | |||
| 1734 | device->dev_root = root->fs_info->dev_root; | 1909 | device->dev_root = root->fs_info->dev_root; |
| 1735 | device->bdev = bdev; | 1910 | device->bdev = bdev; |
| 1736 | device->in_fs_metadata = 1; | 1911 | device->in_fs_metadata = 1; |
| 1912 | device->is_tgtdev_for_dev_replace = 0; | ||
| 1737 | device->mode = FMODE_EXCL; | 1913 | device->mode = FMODE_EXCL; |
| 1738 | set_blocksize(device->bdev, 4096); | 1914 | set_blocksize(device->bdev, 4096); |
| 1739 | 1915 | ||
| @@ -1775,15 +1951,21 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path) | |||
| 1775 | 1951 | ||
| 1776 | if (seeding_dev) { | 1952 | if (seeding_dev) { |
| 1777 | ret = init_first_rw_device(trans, root, device); | 1953 | ret = init_first_rw_device(trans, root, device); |
| 1778 | if (ret) | 1954 | if (ret) { |
| 1955 | btrfs_abort_transaction(trans, root, ret); | ||
| 1779 | goto error_trans; | 1956 | goto error_trans; |
| 1957 | } | ||
| 1780 | ret = btrfs_finish_sprout(trans, root); | 1958 | ret = btrfs_finish_sprout(trans, root); |
| 1781 | if (ret) | 1959 | if (ret) { |
| 1960 | btrfs_abort_transaction(trans, root, ret); | ||
| 1782 | goto error_trans; | 1961 | goto error_trans; |
| 1962 | } | ||
| 1783 | } else { | 1963 | } else { |
| 1784 | ret = btrfs_add_device(trans, root, device); | 1964 | ret = btrfs_add_device(trans, root, device); |
| 1785 | if (ret) | 1965 | if (ret) { |
| 1966 | btrfs_abort_transaction(trans, root, ret); | ||
| 1786 | goto error_trans; | 1967 | goto error_trans; |
| 1968 | } | ||
| 1787 | } | 1969 | } |
| 1788 | 1970 | ||
| 1789 | /* | 1971 | /* |
| @@ -1793,6 +1975,8 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path) | |||
| 1793 | btrfs_clear_space_info_full(root->fs_info); | 1975 | btrfs_clear_space_info_full(root->fs_info); |
| 1794 | 1976 | ||
| 1795 | unlock_chunks(root); | 1977 | unlock_chunks(root); |
| 1978 | root->fs_info->num_tolerated_disk_barrier_failures = | ||
| 1979 | btrfs_calc_num_tolerated_disk_barrier_failures(root->fs_info); | ||
| 1796 | ret = btrfs_commit_transaction(trans, root); | 1980 | ret = btrfs_commit_transaction(trans, root); |
| 1797 | 1981 | ||
| 1798 | if (seeding_dev) { | 1982 | if (seeding_dev) { |
| @@ -1808,13 +1992,19 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path) | |||
| 1808 | "Failed to relocate sys chunks after " | 1992 | "Failed to relocate sys chunks after " |
| 1809 | "device initialization. This can be fixed " | 1993 | "device initialization. This can be fixed " |
| 1810 | "using the \"btrfs balance\" command."); | 1994 | "using the \"btrfs balance\" command."); |
| 1995 | trans = btrfs_attach_transaction(root); | ||
| 1996 | if (IS_ERR(trans)) { | ||
| 1997 | if (PTR_ERR(trans) == -ENOENT) | ||
| 1998 | return 0; | ||
| 1999 | return PTR_ERR(trans); | ||
| 2000 | } | ||
| 2001 | ret = btrfs_commit_transaction(trans, root); | ||
| 1811 | } | 2002 | } |
| 1812 | 2003 | ||
| 1813 | return ret; | 2004 | return ret; |
| 1814 | 2005 | ||
| 1815 | error_trans: | 2006 | error_trans: |
| 1816 | unlock_chunks(root); | 2007 | unlock_chunks(root); |
| 1817 | btrfs_abort_transaction(trans, root, ret); | ||
| 1818 | btrfs_end_transaction(trans, root); | 2008 | btrfs_end_transaction(trans, root); |
| 1819 | rcu_string_free(device->name); | 2009 | rcu_string_free(device->name); |
| 1820 | kfree(device); | 2010 | kfree(device); |
| @@ -1827,6 +2017,98 @@ error: | |||
| 1827 | return ret; | 2017 | return ret; |
| 1828 | } | 2018 | } |
| 1829 | 2019 | ||
| 2020 | int btrfs_init_dev_replace_tgtdev(struct btrfs_root *root, char *device_path, | ||
| 2021 | struct btrfs_device **device_out) | ||
| 2022 | { | ||
| 2023 | struct request_queue *q; | ||
| 2024 | struct btrfs_device *device; | ||
| 2025 | struct block_device *bdev; | ||
| 2026 | struct btrfs_fs_info *fs_info = root->fs_info; | ||
| 2027 | struct list_head *devices; | ||
| 2028 | struct rcu_string *name; | ||
| 2029 | int ret = 0; | ||
| 2030 | |||
| 2031 | *device_out = NULL; | ||
| 2032 | if (fs_info->fs_devices->seeding) | ||
| 2033 | return -EINVAL; | ||
| 2034 | |||
| 2035 | bdev = blkdev_get_by_path(device_path, FMODE_WRITE | FMODE_EXCL, | ||
| 2036 | fs_info->bdev_holder); | ||
| 2037 | if (IS_ERR(bdev)) | ||
| 2038 | return PTR_ERR(bdev); | ||
| 2039 | |||
| 2040 | filemap_write_and_wait(bdev->bd_inode->i_mapping); | ||
| 2041 | |||
| 2042 | devices = &fs_info->fs_devices->devices; | ||
| 2043 | list_for_each_entry(device, devices, dev_list) { | ||
| 2044 | if (device->bdev == bdev) { | ||
| 2045 | ret = -EEXIST; | ||
| 2046 | goto error; | ||
| 2047 | } | ||
| 2048 | } | ||
| 2049 | |||
| 2050 | device = kzalloc(sizeof(*device), GFP_NOFS); | ||
| 2051 | if (!device) { | ||
| 2052 | ret = -ENOMEM; | ||
| 2053 | goto error; | ||
| 2054 | } | ||
| 2055 | |||
| 2056 | name = rcu_string_strdup(device_path, GFP_NOFS); | ||
| 2057 | if (!name) { | ||
| 2058 | kfree(device); | ||
| 2059 | ret = -ENOMEM; | ||
| 2060 | goto error; | ||
| 2061 | } | ||
| 2062 | rcu_assign_pointer(device->name, name); | ||
| 2063 | |||
| 2064 | q = bdev_get_queue(bdev); | ||
| 2065 | if (blk_queue_discard(q)) | ||
| 2066 | device->can_discard = 1; | ||
| 2067 | mutex_lock(&root->fs_info->fs_devices->device_list_mutex); | ||
| 2068 | device->writeable = 1; | ||
| 2069 | device->work.func = pending_bios_fn; | ||
| 2070 | generate_random_uuid(device->uuid); | ||
| 2071 | device->devid = BTRFS_DEV_REPLACE_DEVID; | ||
| 2072 | spin_lock_init(&device->io_lock); | ||
| 2073 | device->generation = 0; | ||
| 2074 | device->io_width = root->sectorsize; | ||
| 2075 | device->io_align = root->sectorsize; | ||
| 2076 | device->sector_size = root->sectorsize; | ||
| 2077 | device->total_bytes = i_size_read(bdev->bd_inode); | ||
| 2078 | device->disk_total_bytes = device->total_bytes; | ||
| 2079 | device->dev_root = fs_info->dev_root; | ||
| 2080 | device->bdev = bdev; | ||
| 2081 | device->in_fs_metadata = 1; | ||
| 2082 | device->is_tgtdev_for_dev_replace = 1; | ||
| 2083 | device->mode = FMODE_EXCL; | ||
| 2084 | set_blocksize(device->bdev, 4096); | ||
| 2085 | device->fs_devices = fs_info->fs_devices; | ||
| 2086 | list_add(&device->dev_list, &fs_info->fs_devices->devices); | ||
| 2087 | fs_info->fs_devices->num_devices++; | ||
| 2088 | fs_info->fs_devices->open_devices++; | ||
| 2089 | if (device->can_discard) | ||
| 2090 | fs_info->fs_devices->num_can_discard++; | ||
| 2091 | mutex_unlock(&root->fs_info->fs_devices->device_list_mutex); | ||
| 2092 | |||
| 2093 | *device_out = device; | ||
| 2094 | return ret; | ||
| 2095 | |||
| 2096 | error: | ||
| 2097 | blkdev_put(bdev, FMODE_EXCL); | ||
| 2098 | return ret; | ||
| 2099 | } | ||
| 2100 | |||
| 2101 | void btrfs_init_dev_replace_tgtdev_for_resume(struct btrfs_fs_info *fs_info, | ||
| 2102 | struct btrfs_device *tgtdev) | ||
| 2103 | { | ||
| 2104 | WARN_ON(fs_info->fs_devices->rw_devices == 0); | ||
| 2105 | tgtdev->io_width = fs_info->dev_root->sectorsize; | ||
| 2106 | tgtdev->io_align = fs_info->dev_root->sectorsize; | ||
| 2107 | tgtdev->sector_size = fs_info->dev_root->sectorsize; | ||
| 2108 | tgtdev->dev_root = fs_info->dev_root; | ||
| 2109 | tgtdev->in_fs_metadata = 1; | ||
| 2110 | } | ||
| 2111 | |||
| 1830 | static noinline int btrfs_update_device(struct btrfs_trans_handle *trans, | 2112 | static noinline int btrfs_update_device(struct btrfs_trans_handle *trans, |
| 1831 | struct btrfs_device *device) | 2113 | struct btrfs_device *device) |
| 1832 | { | 2114 | { |
| @@ -1883,7 +2165,8 @@ static int __btrfs_grow_device(struct btrfs_trans_handle *trans, | |||
| 1883 | 2165 | ||
| 1884 | if (!device->writeable) | 2166 | if (!device->writeable) |
| 1885 | return -EACCES; | 2167 | return -EACCES; |
| 1886 | if (new_size <= device->total_bytes) | 2168 | if (new_size <= device->total_bytes || |
| 2169 | device->is_tgtdev_for_dev_replace) | ||
| 1887 | return -EINVAL; | 2170 | return -EINVAL; |
| 1888 | 2171 | ||
| 1889 | btrfs_set_super_total_bytes(super_copy, old_total + diff); | 2172 | btrfs_set_super_total_bytes(super_copy, old_total + diff); |
| @@ -2321,18 +2604,6 @@ static int chunk_profiles_filter(u64 chunk_type, | |||
| 2321 | return 1; | 2604 | return 1; |
| 2322 | } | 2605 | } |
| 2323 | 2606 | ||
| 2324 | static u64 div_factor_fine(u64 num, int factor) | ||
| 2325 | { | ||
| 2326 | if (factor <= 0) | ||
| 2327 | return 0; | ||
| 2328 | if (factor >= 100) | ||
| 2329 | return num; | ||
| 2330 | |||
| 2331 | num *= factor; | ||
| 2332 | do_div(num, 100); | ||
| 2333 | return num; | ||
| 2334 | } | ||
| 2335 | |||
| 2336 | static int chunk_usage_filter(struct btrfs_fs_info *fs_info, u64 chunk_offset, | 2607 | static int chunk_usage_filter(struct btrfs_fs_info *fs_info, u64 chunk_offset, |
| 2337 | struct btrfs_balance_args *bargs) | 2608 | struct btrfs_balance_args *bargs) |
| 2338 | { | 2609 | { |
| @@ -2497,15 +2768,6 @@ static int should_balance_chunk(struct btrfs_root *root, | |||
| 2497 | return 1; | 2768 | return 1; |
| 2498 | } | 2769 | } |
| 2499 | 2770 | ||
| 2500 | static u64 div_factor(u64 num, int factor) | ||
| 2501 | { | ||
| 2502 | if (factor == 10) | ||
| 2503 | return num; | ||
| 2504 | num *= factor; | ||
| 2505 | do_div(num, 10); | ||
| 2506 | return num; | ||
| 2507 | } | ||
| 2508 | |||
| 2509 | static int __btrfs_balance(struct btrfs_fs_info *fs_info) | 2771 | static int __btrfs_balance(struct btrfs_fs_info *fs_info) |
| 2510 | { | 2772 | { |
| 2511 | struct btrfs_balance_control *bctl = fs_info->balance_ctl; | 2773 | struct btrfs_balance_control *bctl = fs_info->balance_ctl; |
| @@ -2533,7 +2795,8 @@ static int __btrfs_balance(struct btrfs_fs_info *fs_info) | |||
| 2533 | size_to_free = div_factor(old_size, 1); | 2795 | size_to_free = div_factor(old_size, 1); |
| 2534 | size_to_free = min(size_to_free, (u64)1 * 1024 * 1024); | 2796 | size_to_free = min(size_to_free, (u64)1 * 1024 * 1024); |
| 2535 | if (!device->writeable || | 2797 | if (!device->writeable || |
| 2536 | device->total_bytes - device->bytes_used > size_to_free) | 2798 | device->total_bytes - device->bytes_used > size_to_free || |
| 2799 | device->is_tgtdev_for_dev_replace) | ||
| 2537 | continue; | 2800 | continue; |
| 2538 | 2801 | ||
| 2539 | ret = btrfs_shrink_device(device, old_size - size_to_free); | 2802 | ret = btrfs_shrink_device(device, old_size - size_to_free); |
| @@ -2711,6 +2974,7 @@ int btrfs_balance(struct btrfs_balance_control *bctl, | |||
| 2711 | u64 allowed; | 2974 | u64 allowed; |
| 2712 | int mixed = 0; | 2975 | int mixed = 0; |
| 2713 | int ret; | 2976 | int ret; |
| 2977 | u64 num_devices; | ||
| 2714 | 2978 | ||
| 2715 | if (btrfs_fs_closing(fs_info) || | 2979 | if (btrfs_fs_closing(fs_info) || |
| 2716 | atomic_read(&fs_info->balance_pause_req) || | 2980 | atomic_read(&fs_info->balance_pause_req) || |
| @@ -2739,10 +3003,17 @@ int btrfs_balance(struct btrfs_balance_control *bctl, | |||
| 2739 | } | 3003 | } |
| 2740 | } | 3004 | } |
| 2741 | 3005 | ||
| 3006 | num_devices = fs_info->fs_devices->num_devices; | ||
| 3007 | btrfs_dev_replace_lock(&fs_info->dev_replace); | ||
| 3008 | if (btrfs_dev_replace_is_ongoing(&fs_info->dev_replace)) { | ||
| 3009 | BUG_ON(num_devices < 1); | ||
| 3010 | num_devices--; | ||
| 3011 | } | ||
| 3012 | btrfs_dev_replace_unlock(&fs_info->dev_replace); | ||
| 2742 | allowed = BTRFS_AVAIL_ALLOC_BIT_SINGLE; | 3013 | allowed = BTRFS_AVAIL_ALLOC_BIT_SINGLE; |
| 2743 | if (fs_info->fs_devices->num_devices == 1) | 3014 | if (num_devices == 1) |
| 2744 | allowed |= BTRFS_BLOCK_GROUP_DUP; | 3015 | allowed |= BTRFS_BLOCK_GROUP_DUP; |
| 2745 | else if (fs_info->fs_devices->num_devices < 4) | 3016 | else if (num_devices < 4) |
| 2746 | allowed |= (BTRFS_BLOCK_GROUP_RAID0 | BTRFS_BLOCK_GROUP_RAID1); | 3017 | allowed |= (BTRFS_BLOCK_GROUP_RAID0 | BTRFS_BLOCK_GROUP_RAID1); |
| 2747 | else | 3018 | else |
| 2748 | allowed |= (BTRFS_BLOCK_GROUP_RAID0 | BTRFS_BLOCK_GROUP_RAID1 | | 3019 | allowed |= (BTRFS_BLOCK_GROUP_RAID0 | BTRFS_BLOCK_GROUP_RAID1 | |
| @@ -2804,6 +3075,26 @@ int btrfs_balance(struct btrfs_balance_control *bctl, | |||
| 2804 | } | 3075 | } |
| 2805 | } | 3076 | } |
| 2806 | 3077 | ||
| 3078 | if (bctl->sys.flags & BTRFS_BALANCE_ARGS_CONVERT) { | ||
| 3079 | int num_tolerated_disk_barrier_failures; | ||
| 3080 | u64 target = bctl->sys.target; | ||
| 3081 | |||
| 3082 | num_tolerated_disk_barrier_failures = | ||
| 3083 | btrfs_calc_num_tolerated_disk_barrier_failures(fs_info); | ||
| 3084 | if (num_tolerated_disk_barrier_failures > 0 && | ||
| 3085 | (target & | ||
| 3086 | (BTRFS_BLOCK_GROUP_DUP | BTRFS_BLOCK_GROUP_RAID0 | | ||
| 3087 | BTRFS_AVAIL_ALLOC_BIT_SINGLE))) | ||
| 3088 | num_tolerated_disk_barrier_failures = 0; | ||
| 3089 | else if (num_tolerated_disk_barrier_failures > 1 && | ||
| 3090 | (target & | ||
| 3091 | (BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_RAID10))) | ||
| 3092 | num_tolerated_disk_barrier_failures = 1; | ||
| 3093 | |||
| 3094 | fs_info->num_tolerated_disk_barrier_failures = | ||
| 3095 | num_tolerated_disk_barrier_failures; | ||
| 3096 | } | ||
| 3097 | |||
| 2807 | ret = insert_balance_item(fs_info->tree_root, bctl); | 3098 | ret = insert_balance_item(fs_info->tree_root, bctl); |
| 2808 | if (ret && ret != -EEXIST) | 3099 | if (ret && ret != -EEXIST) |
| 2809 | goto out; | 3100 | goto out; |
| @@ -2836,6 +3127,11 @@ int btrfs_balance(struct btrfs_balance_control *bctl, | |||
| 2836 | __cancel_balance(fs_info); | 3127 | __cancel_balance(fs_info); |
| 2837 | } | 3128 | } |
| 2838 | 3129 | ||
| 3130 | if (bctl->sys.flags & BTRFS_BALANCE_ARGS_CONVERT) { | ||
| 3131 | fs_info->num_tolerated_disk_barrier_failures = | ||
| 3132 | btrfs_calc_num_tolerated_disk_barrier_failures(fs_info); | ||
| 3133 | } | ||
| 3134 | |||
| 2839 | wake_up(&fs_info->balance_wait_q); | 3135 | wake_up(&fs_info->balance_wait_q); |
| 2840 | 3136 | ||
| 2841 | return ret; | 3137 | return ret; |
| @@ -2860,6 +3156,7 @@ static int balance_kthread(void *data) | |||
| 2860 | ret = btrfs_balance(fs_info->balance_ctl, NULL); | 3156 | ret = btrfs_balance(fs_info->balance_ctl, NULL); |
| 2861 | } | 3157 | } |
| 2862 | 3158 | ||
| 3159 | atomic_set(&fs_info->mutually_exclusive_operation_running, 0); | ||
| 2863 | mutex_unlock(&fs_info->balance_mutex); | 3160 | mutex_unlock(&fs_info->balance_mutex); |
| 2864 | mutex_unlock(&fs_info->volume_mutex); | 3161 | mutex_unlock(&fs_info->volume_mutex); |
| 2865 | 3162 | ||
| @@ -2882,6 +3179,7 @@ int btrfs_resume_balance_async(struct btrfs_fs_info *fs_info) | |||
| 2882 | return 0; | 3179 | return 0; |
| 2883 | } | 3180 | } |
| 2884 | 3181 | ||
| 3182 | WARN_ON(atomic_xchg(&fs_info->mutually_exclusive_operation_running, 1)); | ||
| 2885 | tsk = kthread_run(balance_kthread, fs_info, "btrfs-balance"); | 3183 | tsk = kthread_run(balance_kthread, fs_info, "btrfs-balance"); |
| 2886 | if (IS_ERR(tsk)) | 3184 | if (IS_ERR(tsk)) |
| 2887 | return PTR_ERR(tsk); | 3185 | return PTR_ERR(tsk); |
| @@ -3038,7 +3336,7 @@ int btrfs_shrink_device(struct btrfs_device *device, u64 new_size) | |||
| 3038 | u64 old_size = device->total_bytes; | 3336 | u64 old_size = device->total_bytes; |
| 3039 | u64 diff = device->total_bytes - new_size; | 3337 | u64 diff = device->total_bytes - new_size; |
| 3040 | 3338 | ||
| 3041 | if (new_size >= device->total_bytes) | 3339 | if (device->is_tgtdev_for_dev_replace) |
| 3042 | return -EINVAL; | 3340 | return -EINVAL; |
| 3043 | 3341 | ||
| 3044 | path = btrfs_alloc_path(); | 3342 | path = btrfs_alloc_path(); |
| @@ -3193,6 +3491,14 @@ static int btrfs_cmp_device_info(const void *a, const void *b) | |||
| 3193 | return 0; | 3491 | return 0; |
| 3194 | } | 3492 | } |
| 3195 | 3493 | ||
| 3494 | struct btrfs_raid_attr btrfs_raid_array[BTRFS_NR_RAID_TYPES] = { | ||
| 3495 | { 2, 1, 0, 4, 2, 2 /* raid10 */ }, | ||
| 3496 | { 1, 1, 2, 2, 2, 2 /* raid1 */ }, | ||
| 3497 | { 1, 2, 1, 1, 1, 2 /* dup */ }, | ||
| 3498 | { 1, 1, 0, 2, 1, 1 /* raid0 */ }, | ||
| 3499 | { 1, 1, 0, 1, 1, 1 /* single */ }, | ||
| 3500 | }; | ||
| 3501 | |||
| 3196 | static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans, | 3502 | static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans, |
| 3197 | struct btrfs_root *extent_root, | 3503 | struct btrfs_root *extent_root, |
| 3198 | struct map_lookup **map_ret, | 3504 | struct map_lookup **map_ret, |
| @@ -3222,43 +3528,21 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans, | |||
| 3222 | int ndevs; | 3528 | int ndevs; |
| 3223 | int i; | 3529 | int i; |
| 3224 | int j; | 3530 | int j; |
| 3531 | int index; | ||
| 3225 | 3532 | ||
| 3226 | BUG_ON(!alloc_profile_is_valid(type, 0)); | 3533 | BUG_ON(!alloc_profile_is_valid(type, 0)); |
| 3227 | 3534 | ||
| 3228 | if (list_empty(&fs_devices->alloc_list)) | 3535 | if (list_empty(&fs_devices->alloc_list)) |
| 3229 | return -ENOSPC; | 3536 | return -ENOSPC; |
| 3230 | 3537 | ||
| 3231 | sub_stripes = 1; | 3538 | index = __get_raid_index(type); |
| 3232 | dev_stripes = 1; | ||
| 3233 | devs_increment = 1; | ||
| 3234 | ncopies = 1; | ||
| 3235 | devs_max = 0; /* 0 == as many as possible */ | ||
| 3236 | devs_min = 1; | ||
| 3237 | 3539 | ||
| 3238 | /* | 3540 | sub_stripes = btrfs_raid_array[index].sub_stripes; |
| 3239 | * define the properties of each RAID type. | 3541 | dev_stripes = btrfs_raid_array[index].dev_stripes; |
| 3240 | * FIXME: move this to a global table and use it in all RAID | 3542 | devs_max = btrfs_raid_array[index].devs_max; |
| 3241 | * calculation code | 3543 | devs_min = btrfs_raid_array[index].devs_min; |
| 3242 | */ | 3544 | devs_increment = btrfs_raid_array[index].devs_increment; |
| 3243 | if (type & (BTRFS_BLOCK_GROUP_DUP)) { | 3545 | ncopies = btrfs_raid_array[index].ncopies; |
| 3244 | dev_stripes = 2; | ||
| 3245 | ncopies = 2; | ||
| 3246 | devs_max = 1; | ||
| 3247 | } else if (type & (BTRFS_BLOCK_GROUP_RAID0)) { | ||
| 3248 | devs_min = 2; | ||
| 3249 | } else if (type & (BTRFS_BLOCK_GROUP_RAID1)) { | ||
| 3250 | devs_increment = 2; | ||
| 3251 | ncopies = 2; | ||
| 3252 | devs_max = 2; | ||
| 3253 | devs_min = 2; | ||
| 3254 | } else if (type & (BTRFS_BLOCK_GROUP_RAID10)) { | ||
| 3255 | sub_stripes = 2; | ||
| 3256 | devs_increment = 2; | ||
| 3257 | ncopies = 2; | ||
| 3258 | devs_min = 4; | ||
| 3259 | } else { | ||
| 3260 | devs_max = 1; | ||
| 3261 | } | ||
| 3262 | 3546 | ||
| 3263 | if (type & BTRFS_BLOCK_GROUP_DATA) { | 3547 | if (type & BTRFS_BLOCK_GROUP_DATA) { |
| 3264 | max_stripe_size = 1024 * 1024 * 1024; | 3548 | max_stripe_size = 1024 * 1024 * 1024; |
| @@ -3305,13 +3589,13 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans, | |||
| 3305 | cur = cur->next; | 3589 | cur = cur->next; |
| 3306 | 3590 | ||
| 3307 | if (!device->writeable) { | 3591 | if (!device->writeable) { |
| 3308 | printk(KERN_ERR | 3592 | WARN(1, KERN_ERR |
| 3309 | "btrfs: read-only device in alloc_list\n"); | 3593 | "btrfs: read-only device in alloc_list\n"); |
| 3310 | WARN_ON(1); | ||
| 3311 | continue; | 3594 | continue; |
| 3312 | } | 3595 | } |
| 3313 | 3596 | ||
| 3314 | if (!device->in_fs_metadata) | 3597 | if (!device->in_fs_metadata || |
| 3598 | device->is_tgtdev_for_dev_replace) | ||
| 3315 | continue; | 3599 | continue; |
| 3316 | 3600 | ||
| 3317 | if (device->total_bytes > device->bytes_used) | 3601 | if (device->total_bytes > device->bytes_used) |
| @@ -3340,6 +3624,7 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans, | |||
| 3340 | devices_info[ndevs].total_avail = total_avail; | 3624 | devices_info[ndevs].total_avail = total_avail; |
| 3341 | devices_info[ndevs].dev = device; | 3625 | devices_info[ndevs].dev = device; |
| 3342 | ++ndevs; | 3626 | ++ndevs; |
| 3627 | WARN_ON(ndevs > fs_devices->rw_devices); | ||
| 3343 | } | 3628 | } |
| 3344 | 3629 | ||
| 3345 | /* | 3630 | /* |
| @@ -3608,12 +3893,16 @@ static noinline int init_first_rw_device(struct btrfs_trans_handle *trans, | |||
| 3608 | ret = __btrfs_alloc_chunk(trans, extent_root, &sys_map, | 3893 | ret = __btrfs_alloc_chunk(trans, extent_root, &sys_map, |
| 3609 | &sys_chunk_size, &sys_stripe_size, | 3894 | &sys_chunk_size, &sys_stripe_size, |
| 3610 | sys_chunk_offset, alloc_profile); | 3895 | sys_chunk_offset, alloc_profile); |
| 3611 | if (ret) | 3896 | if (ret) { |
| 3612 | goto abort; | 3897 | btrfs_abort_transaction(trans, root, ret); |
| 3898 | goto out; | ||
| 3899 | } | ||
| 3613 | 3900 | ||
| 3614 | ret = btrfs_add_device(trans, fs_info->chunk_root, device); | 3901 | ret = btrfs_add_device(trans, fs_info->chunk_root, device); |
| 3615 | if (ret) | 3902 | if (ret) { |
| 3616 | goto abort; | 3903 | btrfs_abort_transaction(trans, root, ret); |
| 3904 | goto out; | ||
| 3905 | } | ||
| 3617 | 3906 | ||
| 3618 | /* | 3907 | /* |
| 3619 | * Modifying chunk tree needs allocating new blocks from both | 3908 | * Modifying chunk tree needs allocating new blocks from both |
| @@ -3623,19 +3912,19 @@ static noinline int init_first_rw_device(struct btrfs_trans_handle *trans, | |||
| 3623 | */ | 3912 | */ |
| 3624 | ret = __finish_chunk_alloc(trans, extent_root, map, chunk_offset, | 3913 | ret = __finish_chunk_alloc(trans, extent_root, map, chunk_offset, |
| 3625 | chunk_size, stripe_size); | 3914 | chunk_size, stripe_size); |
| 3626 | if (ret) | 3915 | if (ret) { |
| 3627 | goto abort; | 3916 | btrfs_abort_transaction(trans, root, ret); |
| 3917 | goto out; | ||
| 3918 | } | ||
| 3628 | 3919 | ||
| 3629 | ret = __finish_chunk_alloc(trans, extent_root, sys_map, | 3920 | ret = __finish_chunk_alloc(trans, extent_root, sys_map, |
| 3630 | sys_chunk_offset, sys_chunk_size, | 3921 | sys_chunk_offset, sys_chunk_size, |
| 3631 | sys_stripe_size); | 3922 | sys_stripe_size); |
| 3632 | if (ret) | 3923 | if (ret) |
| 3633 | goto abort; | 3924 | btrfs_abort_transaction(trans, root, ret); |
| 3634 | 3925 | ||
| 3635 | return 0; | 3926 | out: |
| 3636 | 3927 | ||
| 3637 | abort: | ||
| 3638 | btrfs_abort_transaction(trans, root, ret); | ||
| 3639 | return ret; | 3928 | return ret; |
| 3640 | } | 3929 | } |
| 3641 | 3930 | ||
| @@ -3694,8 +3983,9 @@ void btrfs_mapping_tree_free(struct btrfs_mapping_tree *tree) | |||
| 3694 | } | 3983 | } |
| 3695 | } | 3984 | } |
| 3696 | 3985 | ||
| 3697 | int btrfs_num_copies(struct btrfs_mapping_tree *map_tree, u64 logical, u64 len) | 3986 | int btrfs_num_copies(struct btrfs_fs_info *fs_info, u64 logical, u64 len) |
| 3698 | { | 3987 | { |
| 3988 | struct btrfs_mapping_tree *map_tree = &fs_info->mapping_tree; | ||
| 3699 | struct extent_map *em; | 3989 | struct extent_map *em; |
| 3700 | struct map_lookup *map; | 3990 | struct map_lookup *map; |
| 3701 | struct extent_map_tree *em_tree = &map_tree->map_tree; | 3991 | struct extent_map_tree *em_tree = &map_tree->map_tree; |
| @@ -3715,32 +4005,60 @@ int btrfs_num_copies(struct btrfs_mapping_tree *map_tree, u64 logical, u64 len) | |||
| 3715 | else | 4005 | else |
| 3716 | ret = 1; | 4006 | ret = 1; |
| 3717 | free_extent_map(em); | 4007 | free_extent_map(em); |
| 4008 | |||
| 4009 | btrfs_dev_replace_lock(&fs_info->dev_replace); | ||
| 4010 | if (btrfs_dev_replace_is_ongoing(&fs_info->dev_replace)) | ||
| 4011 | ret++; | ||
| 4012 | btrfs_dev_replace_unlock(&fs_info->dev_replace); | ||
| 4013 | |||
| 3718 | return ret; | 4014 | return ret; |
| 3719 | } | 4015 | } |
| 3720 | 4016 | ||
| 3721 | static int find_live_mirror(struct map_lookup *map, int first, int num, | 4017 | static int find_live_mirror(struct btrfs_fs_info *fs_info, |
| 3722 | int optimal) | 4018 | struct map_lookup *map, int first, int num, |
| 4019 | int optimal, int dev_replace_is_ongoing) | ||
| 3723 | { | 4020 | { |
| 3724 | int i; | 4021 | int i; |
| 3725 | if (map->stripes[optimal].dev->bdev) | 4022 | int tolerance; |
| 3726 | return optimal; | 4023 | struct btrfs_device *srcdev; |
| 3727 | for (i = first; i < first + num; i++) { | 4024 | |
| 3728 | if (map->stripes[i].dev->bdev) | 4025 | if (dev_replace_is_ongoing && |
| 3729 | return i; | 4026 | fs_info->dev_replace.cont_reading_from_srcdev_mode == |
| 4027 | BTRFS_DEV_REPLACE_ITEM_CONT_READING_FROM_SRCDEV_MODE_AVOID) | ||
| 4028 | srcdev = fs_info->dev_replace.srcdev; | ||
| 4029 | else | ||
| 4030 | srcdev = NULL; | ||
| 4031 | |||
| 4032 | /* | ||
| 4033 | * try to avoid the drive that is the source drive for a | ||
| 4034 | * dev-replace procedure, only choose it if no other non-missing | ||
| 4035 | * mirror is available | ||
| 4036 | */ | ||
| 4037 | for (tolerance = 0; tolerance < 2; tolerance++) { | ||
| 4038 | if (map->stripes[optimal].dev->bdev && | ||
| 4039 | (tolerance || map->stripes[optimal].dev != srcdev)) | ||
| 4040 | return optimal; | ||
| 4041 | for (i = first; i < first + num; i++) { | ||
| 4042 | if (map->stripes[i].dev->bdev && | ||
| 4043 | (tolerance || map->stripes[i].dev != srcdev)) | ||
| 4044 | return i; | ||
| 4045 | } | ||
| 3730 | } | 4046 | } |
| 4047 | |||
| 3731 | /* we couldn't find one that doesn't fail. Just return something | 4048 | /* we couldn't find one that doesn't fail. Just return something |
| 3732 | * and the io error handling code will clean up eventually | 4049 | * and the io error handling code will clean up eventually |
| 3733 | */ | 4050 | */ |
| 3734 | return optimal; | 4051 | return optimal; |
| 3735 | } | 4052 | } |
| 3736 | 4053 | ||
| 3737 | static int __btrfs_map_block(struct btrfs_mapping_tree *map_tree, int rw, | 4054 | static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw, |
| 3738 | u64 logical, u64 *length, | 4055 | u64 logical, u64 *length, |
| 3739 | struct btrfs_bio **bbio_ret, | 4056 | struct btrfs_bio **bbio_ret, |
| 3740 | int mirror_num) | 4057 | int mirror_num) |
| 3741 | { | 4058 | { |
| 3742 | struct extent_map *em; | 4059 | struct extent_map *em; |
| 3743 | struct map_lookup *map; | 4060 | struct map_lookup *map; |
| 4061 | struct btrfs_mapping_tree *map_tree = &fs_info->mapping_tree; | ||
| 3744 | struct extent_map_tree *em_tree = &map_tree->map_tree; | 4062 | struct extent_map_tree *em_tree = &map_tree->map_tree; |
| 3745 | u64 offset; | 4063 | u64 offset; |
| 3746 | u64 stripe_offset; | 4064 | u64 stripe_offset; |
| @@ -3754,13 +4072,18 @@ static int __btrfs_map_block(struct btrfs_mapping_tree *map_tree, int rw, | |||
| 3754 | int num_stripes; | 4072 | int num_stripes; |
| 3755 | int max_errors = 0; | 4073 | int max_errors = 0; |
| 3756 | struct btrfs_bio *bbio = NULL; | 4074 | struct btrfs_bio *bbio = NULL; |
| 4075 | struct btrfs_dev_replace *dev_replace = &fs_info->dev_replace; | ||
| 4076 | int dev_replace_is_ongoing = 0; | ||
| 4077 | int num_alloc_stripes; | ||
| 4078 | int patch_the_first_stripe_for_dev_replace = 0; | ||
| 4079 | u64 physical_to_patch_in_first_stripe = 0; | ||
| 3757 | 4080 | ||
| 3758 | read_lock(&em_tree->lock); | 4081 | read_lock(&em_tree->lock); |
| 3759 | em = lookup_extent_mapping(em_tree, logical, *length); | 4082 | em = lookup_extent_mapping(em_tree, logical, *length); |
| 3760 | read_unlock(&em_tree->lock); | 4083 | read_unlock(&em_tree->lock); |
| 3761 | 4084 | ||
| 3762 | if (!em) { | 4085 | if (!em) { |
| 3763 | printk(KERN_CRIT "unable to find logical %llu len %llu\n", | 4086 | printk(KERN_CRIT "btrfs: unable to find logical %llu len %llu\n", |
| 3764 | (unsigned long long)logical, | 4087 | (unsigned long long)logical, |
| 3765 | (unsigned long long)*length); | 4088 | (unsigned long long)*length); |
| 3766 | BUG(); | 4089 | BUG(); |
| @@ -3770,9 +4093,6 @@ static int __btrfs_map_block(struct btrfs_mapping_tree *map_tree, int rw, | |||
| 3770 | map = (struct map_lookup *)em->bdev; | 4093 | map = (struct map_lookup *)em->bdev; |
| 3771 | offset = logical - em->start; | 4094 | offset = logical - em->start; |
| 3772 | 4095 | ||
| 3773 | if (mirror_num > map->num_stripes) | ||
| 3774 | mirror_num = 0; | ||
| 3775 | |||
| 3776 | stripe_nr = offset; | 4096 | stripe_nr = offset; |
| 3777 | /* | 4097 | /* |
| 3778 | * stripe_nr counts the total number of stripes we have to stride | 4098 | * stripe_nr counts the total number of stripes we have to stride |
| @@ -3799,6 +4119,93 @@ static int __btrfs_map_block(struct btrfs_mapping_tree *map_tree, int rw, | |||
| 3799 | if (!bbio_ret) | 4119 | if (!bbio_ret) |
| 3800 | goto out; | 4120 | goto out; |
| 3801 | 4121 | ||
| 4122 | btrfs_dev_replace_lock(dev_replace); | ||
| 4123 | dev_replace_is_ongoing = btrfs_dev_replace_is_ongoing(dev_replace); | ||
| 4124 | if (!dev_replace_is_ongoing) | ||
| 4125 | btrfs_dev_replace_unlock(dev_replace); | ||
| 4126 | |||
| 4127 | if (dev_replace_is_ongoing && mirror_num == map->num_stripes + 1 && | ||
| 4128 | !(rw & (REQ_WRITE | REQ_DISCARD | REQ_GET_READ_MIRRORS)) && | ||
| 4129 | dev_replace->tgtdev != NULL) { | ||
| 4130 | /* | ||
| 4131 | * in dev-replace case, for repair case (that's the only | ||
| 4132 | * case where the mirror is selected explicitly when | ||
| 4133 | * calling btrfs_map_block), blocks left of the left cursor | ||
| 4134 | * can also be read from the target drive. | ||
| 4135 | * For REQ_GET_READ_MIRRORS, the target drive is added as | ||
| 4136 | * the last one to the array of stripes. For READ, it also | ||
| 4137 | * needs to be supported using the same mirror number. | ||
| 4138 | * If the requested block is not left of the left cursor, | ||
| 4139 | * EIO is returned. This can happen because btrfs_num_copies() | ||
| 4140 | * returns one more in the dev-replace case. | ||
| 4141 | */ | ||
| 4142 | u64 tmp_length = *length; | ||
| 4143 | struct btrfs_bio *tmp_bbio = NULL; | ||
| 4144 | int tmp_num_stripes; | ||
| 4145 | u64 srcdev_devid = dev_replace->srcdev->devid; | ||
| 4146 | int index_srcdev = 0; | ||
| 4147 | int found = 0; | ||
| 4148 | u64 physical_of_found = 0; | ||
| 4149 | |||
| 4150 | ret = __btrfs_map_block(fs_info, REQ_GET_READ_MIRRORS, | ||
| 4151 | logical, &tmp_length, &tmp_bbio, 0); | ||
| 4152 | if (ret) { | ||
| 4153 | WARN_ON(tmp_bbio != NULL); | ||
| 4154 | goto out; | ||
| 4155 | } | ||
| 4156 | |||
| 4157 | tmp_num_stripes = tmp_bbio->num_stripes; | ||
| 4158 | if (mirror_num > tmp_num_stripes) { | ||
| 4159 | /* | ||
| 4160 | * REQ_GET_READ_MIRRORS does not contain this | ||
| 4161 | * mirror, that means that the requested area | ||
| 4162 | * is not left of the left cursor | ||
| 4163 | */ | ||
| 4164 | ret = -EIO; | ||
| 4165 | kfree(tmp_bbio); | ||
| 4166 | goto out; | ||
| 4167 | } | ||
| 4168 | |||
| 4169 | /* | ||
| 4170 | * process the rest of the function using the mirror_num | ||
| 4171 | * of the source drive. Therefore look it up first. | ||
| 4172 | * At the end, patch the device pointer to the one of the | ||
| 4173 | * target drive. | ||
| 4174 | */ | ||
| 4175 | for (i = 0; i < tmp_num_stripes; i++) { | ||
| 4176 | if (tmp_bbio->stripes[i].dev->devid == srcdev_devid) { | ||
| 4177 | /* | ||
| 4178 | * In case of DUP, in order to keep it | ||
| 4179 | * simple, only add the mirror with the | ||
| 4180 | * lowest physical address | ||
| 4181 | */ | ||
| 4182 | if (found && | ||
| 4183 | physical_of_found <= | ||
| 4184 | tmp_bbio->stripes[i].physical) | ||
| 4185 | continue; | ||
| 4186 | index_srcdev = i; | ||
| 4187 | found = 1; | ||
| 4188 | physical_of_found = | ||
| 4189 | tmp_bbio->stripes[i].physical; | ||
| 4190 | } | ||
| 4191 | } | ||
| 4192 | |||
| 4193 | if (found) { | ||
| 4194 | mirror_num = index_srcdev + 1; | ||
| 4195 | patch_the_first_stripe_for_dev_replace = 1; | ||
| 4196 | physical_to_patch_in_first_stripe = physical_of_found; | ||
| 4197 | } else { | ||
| 4198 | WARN_ON(1); | ||
| 4199 | ret = -EIO; | ||
| 4200 | kfree(tmp_bbio); | ||
| 4201 | goto out; | ||
| 4202 | } | ||
| 4203 | |||
| 4204 | kfree(tmp_bbio); | ||
| 4205 | } else if (mirror_num > map->num_stripes) { | ||
| 4206 | mirror_num = 0; | ||
| 4207 | } | ||
| 4208 | |||
| 3802 | num_stripes = 1; | 4209 | num_stripes = 1; |
| 3803 | stripe_index = 0; | 4210 | stripe_index = 0; |
| 3804 | stripe_nr_orig = stripe_nr; | 4211 | stripe_nr_orig = stripe_nr; |
| @@ -3813,19 +4220,20 @@ static int __btrfs_map_block(struct btrfs_mapping_tree *map_tree, int rw, | |||
| 3813 | stripe_nr_end - stripe_nr_orig); | 4220 | stripe_nr_end - stripe_nr_orig); |
| 3814 | stripe_index = do_div(stripe_nr, map->num_stripes); | 4221 | stripe_index = do_div(stripe_nr, map->num_stripes); |
| 3815 | } else if (map->type & BTRFS_BLOCK_GROUP_RAID1) { | 4222 | } else if (map->type & BTRFS_BLOCK_GROUP_RAID1) { |
| 3816 | if (rw & (REQ_WRITE | REQ_DISCARD)) | 4223 | if (rw & (REQ_WRITE | REQ_DISCARD | REQ_GET_READ_MIRRORS)) |
| 3817 | num_stripes = map->num_stripes; | 4224 | num_stripes = map->num_stripes; |
| 3818 | else if (mirror_num) | 4225 | else if (mirror_num) |
| 3819 | stripe_index = mirror_num - 1; | 4226 | stripe_index = mirror_num - 1; |
| 3820 | else { | 4227 | else { |
| 3821 | stripe_index = find_live_mirror(map, 0, | 4228 | stripe_index = find_live_mirror(fs_info, map, 0, |
| 3822 | map->num_stripes, | 4229 | map->num_stripes, |
| 3823 | current->pid % map->num_stripes); | 4230 | current->pid % map->num_stripes, |
| 4231 | dev_replace_is_ongoing); | ||
| 3824 | mirror_num = stripe_index + 1; | 4232 | mirror_num = stripe_index + 1; |
| 3825 | } | 4233 | } |
| 3826 | 4234 | ||
| 3827 | } else if (map->type & BTRFS_BLOCK_GROUP_DUP) { | 4235 | } else if (map->type & BTRFS_BLOCK_GROUP_DUP) { |
| 3828 | if (rw & (REQ_WRITE | REQ_DISCARD)) { | 4236 | if (rw & (REQ_WRITE | REQ_DISCARD | REQ_GET_READ_MIRRORS)) { |
| 3829 | num_stripes = map->num_stripes; | 4237 | num_stripes = map->num_stripes; |
| 3830 | } else if (mirror_num) { | 4238 | } else if (mirror_num) { |
| 3831 | stripe_index = mirror_num - 1; | 4239 | stripe_index = mirror_num - 1; |
| @@ -3839,7 +4247,7 @@ static int __btrfs_map_block(struct btrfs_mapping_tree *map_tree, int rw, | |||
| 3839 | stripe_index = do_div(stripe_nr, factor); | 4247 | stripe_index = do_div(stripe_nr, factor); |
| 3840 | stripe_index *= map->sub_stripes; | 4248 | stripe_index *= map->sub_stripes; |
| 3841 | 4249 | ||
| 3842 | if (rw & REQ_WRITE) | 4250 | if (rw & (REQ_WRITE | REQ_GET_READ_MIRRORS)) |
| 3843 | num_stripes = map->sub_stripes; | 4251 | num_stripes = map->sub_stripes; |
| 3844 | else if (rw & REQ_DISCARD) | 4252 | else if (rw & REQ_DISCARD) |
| 3845 | num_stripes = min_t(u64, map->sub_stripes * | 4253 | num_stripes = min_t(u64, map->sub_stripes * |
| @@ -3849,9 +4257,11 @@ static int __btrfs_map_block(struct btrfs_mapping_tree *map_tree, int rw, | |||
| 3849 | stripe_index += mirror_num - 1; | 4257 | stripe_index += mirror_num - 1; |
| 3850 | else { | 4258 | else { |
| 3851 | int old_stripe_index = stripe_index; | 4259 | int old_stripe_index = stripe_index; |
| 3852 | stripe_index = find_live_mirror(map, stripe_index, | 4260 | stripe_index = find_live_mirror(fs_info, map, |
| 4261 | stripe_index, | ||
| 3853 | map->sub_stripes, stripe_index + | 4262 | map->sub_stripes, stripe_index + |
| 3854 | current->pid % map->sub_stripes); | 4263 | current->pid % map->sub_stripes, |
| 4264 | dev_replace_is_ongoing); | ||
| 3855 | mirror_num = stripe_index - old_stripe_index + 1; | 4265 | mirror_num = stripe_index - old_stripe_index + 1; |
| 3856 | } | 4266 | } |
| 3857 | } else { | 4267 | } else { |
| @@ -3865,7 +4275,14 @@ static int __btrfs_map_block(struct btrfs_mapping_tree *map_tree, int rw, | |||
| 3865 | } | 4275 | } |
| 3866 | BUG_ON(stripe_index >= map->num_stripes); | 4276 | BUG_ON(stripe_index >= map->num_stripes); |
| 3867 | 4277 | ||
| 3868 | bbio = kzalloc(btrfs_bio_size(num_stripes), GFP_NOFS); | 4278 | num_alloc_stripes = num_stripes; |
| 4279 | if (dev_replace_is_ongoing) { | ||
| 4280 | if (rw & (REQ_WRITE | REQ_DISCARD)) | ||
| 4281 | num_alloc_stripes <<= 1; | ||
| 4282 | if (rw & REQ_GET_READ_MIRRORS) | ||
| 4283 | num_alloc_stripes++; | ||
| 4284 | } | ||
| 4285 | bbio = kzalloc(btrfs_bio_size(num_alloc_stripes), GFP_NOFS); | ||
| 3869 | if (!bbio) { | 4286 | if (!bbio) { |
| 3870 | ret = -ENOMEM; | 4287 | ret = -ENOMEM; |
| 3871 | goto out; | 4288 | goto out; |
| @@ -3952,7 +4369,7 @@ static int __btrfs_map_block(struct btrfs_mapping_tree *map_tree, int rw, | |||
| 3952 | } | 4369 | } |
| 3953 | } | 4370 | } |
| 3954 | 4371 | ||
| 3955 | if (rw & REQ_WRITE) { | 4372 | if (rw & (REQ_WRITE | REQ_GET_READ_MIRRORS)) { |
| 3956 | if (map->type & (BTRFS_BLOCK_GROUP_RAID1 | | 4373 | if (map->type & (BTRFS_BLOCK_GROUP_RAID1 | |
| 3957 | BTRFS_BLOCK_GROUP_RAID10 | | 4374 | BTRFS_BLOCK_GROUP_RAID10 | |
| 3958 | BTRFS_BLOCK_GROUP_DUP)) { | 4375 | BTRFS_BLOCK_GROUP_DUP)) { |
| @@ -3960,20 +4377,115 @@ static int __btrfs_map_block(struct btrfs_mapping_tree *map_tree, int rw, | |||
| 3960 | } | 4377 | } |
| 3961 | } | 4378 | } |
| 3962 | 4379 | ||
| 4380 | if (dev_replace_is_ongoing && (rw & (REQ_WRITE | REQ_DISCARD)) && | ||
| 4381 | dev_replace->tgtdev != NULL) { | ||
| 4382 | int index_where_to_add; | ||
| 4383 | u64 srcdev_devid = dev_replace->srcdev->devid; | ||
| 4384 | |||
| 4385 | /* | ||
| 4386 | * duplicate the write operations while the dev replace | ||
| 4387 | * procedure is running. Since the copying of the old disk | ||
| 4388 | * to the new disk takes place at run time while the | ||
| 4389 | * filesystem is mounted writable, the regular write | ||
| 4390 | * operations to the old disk have to be duplicated to go | ||
| 4391 | * to the new disk as well. | ||
| 4392 | * Note that device->missing is handled by the caller, and | ||
| 4393 | * that the write to the old disk is already set up in the | ||
| 4394 | * stripes array. | ||
| 4395 | */ | ||
| 4396 | index_where_to_add = num_stripes; | ||
| 4397 | for (i = 0; i < num_stripes; i++) { | ||
| 4398 | if (bbio->stripes[i].dev->devid == srcdev_devid) { | ||
| 4399 | /* write to new disk, too */ | ||
| 4400 | struct btrfs_bio_stripe *new = | ||
| 4401 | bbio->stripes + index_where_to_add; | ||
| 4402 | struct btrfs_bio_stripe *old = | ||
| 4403 | bbio->stripes + i; | ||
| 4404 | |||
| 4405 | new->physical = old->physical; | ||
| 4406 | new->length = old->length; | ||
| 4407 | new->dev = dev_replace->tgtdev; | ||
| 4408 | index_where_to_add++; | ||
| 4409 | max_errors++; | ||
| 4410 | } | ||
| 4411 | } | ||
| 4412 | num_stripes = index_where_to_add; | ||
| 4413 | } else if (dev_replace_is_ongoing && (rw & REQ_GET_READ_MIRRORS) && | ||
| 4414 | dev_replace->tgtdev != NULL) { | ||
| 4415 | u64 srcdev_devid = dev_replace->srcdev->devid; | ||
| 4416 | int index_srcdev = 0; | ||
| 4417 | int found = 0; | ||
| 4418 | u64 physical_of_found = 0; | ||
| 4419 | |||
| 4420 | /* | ||
| 4421 | * During the dev-replace procedure, the target drive can | ||
| 4422 | * also be used to read data in case it is needed to repair | ||
| 4423 | * a corrupt block elsewhere. This is possible if the | ||
| 4424 | * requested area is left of the left cursor. In this area, | ||
| 4425 | * the target drive is a full copy of the source drive. | ||
| 4426 | */ | ||
| 4427 | for (i = 0; i < num_stripes; i++) { | ||
| 4428 | if (bbio->stripes[i].dev->devid == srcdev_devid) { | ||
| 4429 | /* | ||
| 4430 | * In case of DUP, in order to keep it | ||
| 4431 | * simple, only add the mirror with the | ||
| 4432 | * lowest physical address | ||
| 4433 | */ | ||
| 4434 | if (found && | ||
| 4435 | physical_of_found <= | ||
| 4436 | bbio->stripes[i].physical) | ||
| 4437 | continue; | ||
| 4438 | index_srcdev = i; | ||
| 4439 | found = 1; | ||
| 4440 | physical_of_found = bbio->stripes[i].physical; | ||
| 4441 | } | ||
| 4442 | } | ||
| 4443 | if (found) { | ||
| 4444 | u64 length = map->stripe_len; | ||
| 4445 | |||
| 4446 | if (physical_of_found + length <= | ||
| 4447 | dev_replace->cursor_left) { | ||
| 4448 | struct btrfs_bio_stripe *tgtdev_stripe = | ||
| 4449 | bbio->stripes + num_stripes; | ||
| 4450 | |||
| 4451 | tgtdev_stripe->physical = physical_of_found; | ||
| 4452 | tgtdev_stripe->length = | ||
| 4453 | bbio->stripes[index_srcdev].length; | ||
| 4454 | tgtdev_stripe->dev = dev_replace->tgtdev; | ||
| 4455 | |||
| 4456 | num_stripes++; | ||
| 4457 | } | ||
| 4458 | } | ||
| 4459 | } | ||
| 4460 | |||
| 3963 | *bbio_ret = bbio; | 4461 | *bbio_ret = bbio; |
| 3964 | bbio->num_stripes = num_stripes; | 4462 | bbio->num_stripes = num_stripes; |
| 3965 | bbio->max_errors = max_errors; | 4463 | bbio->max_errors = max_errors; |
| 3966 | bbio->mirror_num = mirror_num; | 4464 | bbio->mirror_num = mirror_num; |
| 4465 | |||
| 4466 | /* | ||
| 4467 | * this is the case that REQ_READ && dev_replace_is_ongoing && | ||
| 4468 | * mirror_num == num_stripes + 1 && dev_replace target drive is | ||
| 4469 | * available as a mirror | ||
| 4470 | */ | ||
| 4471 | if (patch_the_first_stripe_for_dev_replace && num_stripes > 0) { | ||
| 4472 | WARN_ON(num_stripes > 1); | ||
| 4473 | bbio->stripes[0].dev = dev_replace->tgtdev; | ||
| 4474 | bbio->stripes[0].physical = physical_to_patch_in_first_stripe; | ||
| 4475 | bbio->mirror_num = map->num_stripes + 1; | ||
| 4476 | } | ||
| 3967 | out: | 4477 | out: |
| 4478 | if (dev_replace_is_ongoing) | ||
| 4479 | btrfs_dev_replace_unlock(dev_replace); | ||
| 3968 | free_extent_map(em); | 4480 | free_extent_map(em); |
| 3969 | return ret; | 4481 | return ret; |
| 3970 | } | 4482 | } |
| 3971 | 4483 | ||
| 3972 | int btrfs_map_block(struct btrfs_mapping_tree *map_tree, int rw, | 4484 | int btrfs_map_block(struct btrfs_fs_info *fs_info, int rw, |
| 3973 | u64 logical, u64 *length, | 4485 | u64 logical, u64 *length, |
| 3974 | struct btrfs_bio **bbio_ret, int mirror_num) | 4486 | struct btrfs_bio **bbio_ret, int mirror_num) |
| 3975 | { | 4487 | { |
| 3976 | return __btrfs_map_block(map_tree, rw, logical, length, bbio_ret, | 4488 | return __btrfs_map_block(fs_info, rw, logical, length, bbio_ret, |
| 3977 | mirror_num); | 4489 | mirror_num); |
| 3978 | } | 4490 | } |
| 3979 | 4491 | ||
| @@ -4192,10 +4704,116 @@ static noinline void schedule_bio(struct btrfs_root *root, | |||
| 4192 | &device->work); | 4704 | &device->work); |
| 4193 | } | 4705 | } |
| 4194 | 4706 | ||
| 4707 | static int bio_size_ok(struct block_device *bdev, struct bio *bio, | ||
| 4708 | sector_t sector) | ||
| 4709 | { | ||
| 4710 | struct bio_vec *prev; | ||
| 4711 | struct request_queue *q = bdev_get_queue(bdev); | ||
| 4712 | unsigned short max_sectors = queue_max_sectors(q); | ||
| 4713 | struct bvec_merge_data bvm = { | ||
| 4714 | .bi_bdev = bdev, | ||
| 4715 | .bi_sector = sector, | ||
| 4716 | .bi_rw = bio->bi_rw, | ||
| 4717 | }; | ||
| 4718 | |||
| 4719 | if (bio->bi_vcnt == 0) { | ||
| 4720 | WARN_ON(1); | ||
| 4721 | return 1; | ||
| 4722 | } | ||
| 4723 | |||
| 4724 | prev = &bio->bi_io_vec[bio->bi_vcnt - 1]; | ||
| 4725 | if ((bio->bi_size >> 9) > max_sectors) | ||
| 4726 | return 0; | ||
| 4727 | |||
| 4728 | if (!q->merge_bvec_fn) | ||
| 4729 | return 1; | ||
| 4730 | |||
| 4731 | bvm.bi_size = bio->bi_size - prev->bv_len; | ||
| 4732 | if (q->merge_bvec_fn(q, &bvm, prev) < prev->bv_len) | ||
| 4733 | return 0; | ||
| 4734 | return 1; | ||
| 4735 | } | ||
| 4736 | |||
| 4737 | static void submit_stripe_bio(struct btrfs_root *root, struct btrfs_bio *bbio, | ||
| 4738 | struct bio *bio, u64 physical, int dev_nr, | ||
| 4739 | int rw, int async) | ||
| 4740 | { | ||
| 4741 | struct btrfs_device *dev = bbio->stripes[dev_nr].dev; | ||
| 4742 | |||
| 4743 | bio->bi_private = bbio; | ||
| 4744 | bio->bi_private = merge_stripe_index_into_bio_private( | ||
| 4745 | bio->bi_private, (unsigned int)dev_nr); | ||
| 4746 | bio->bi_end_io = btrfs_end_bio; | ||
| 4747 | bio->bi_sector = physical >> 9; | ||
| 4748 | #ifdef DEBUG | ||
| 4749 | { | ||
| 4750 | struct rcu_string *name; | ||
| 4751 | |||
| 4752 | rcu_read_lock(); | ||
| 4753 | name = rcu_dereference(dev->name); | ||
| 4754 | pr_debug("btrfs_map_bio: rw %d, sector=%llu, dev=%lu " | ||
| 4755 | "(%s id %llu), size=%u\n", rw, | ||
| 4756 | (u64)bio->bi_sector, (u_long)dev->bdev->bd_dev, | ||
| 4757 | name->str, dev->devid, bio->bi_size); | ||
| 4758 | rcu_read_unlock(); | ||
| 4759 | } | ||
| 4760 | #endif | ||
| 4761 | bio->bi_bdev = dev->bdev; | ||
| 4762 | if (async) | ||
| 4763 | schedule_bio(root, dev, rw, bio); | ||
| 4764 | else | ||
| 4765 | btrfsic_submit_bio(rw, bio); | ||
| 4766 | } | ||
| 4767 | |||
| 4768 | static int breakup_stripe_bio(struct btrfs_root *root, struct btrfs_bio *bbio, | ||
| 4769 | struct bio *first_bio, struct btrfs_device *dev, | ||
| 4770 | int dev_nr, int rw, int async) | ||
| 4771 | { | ||
| 4772 | struct bio_vec *bvec = first_bio->bi_io_vec; | ||
| 4773 | struct bio *bio; | ||
| 4774 | int nr_vecs = bio_get_nr_vecs(dev->bdev); | ||
| 4775 | u64 physical = bbio->stripes[dev_nr].physical; | ||
| 4776 | |||
| 4777 | again: | ||
| 4778 | bio = btrfs_bio_alloc(dev->bdev, physical >> 9, nr_vecs, GFP_NOFS); | ||
| 4779 | if (!bio) | ||
| 4780 | return -ENOMEM; | ||
| 4781 | |||
| 4782 | while (bvec <= (first_bio->bi_io_vec + first_bio->bi_vcnt - 1)) { | ||
| 4783 | if (bio_add_page(bio, bvec->bv_page, bvec->bv_len, | ||
| 4784 | bvec->bv_offset) < bvec->bv_len) { | ||
| 4785 | u64 len = bio->bi_size; | ||
| 4786 | |||
| 4787 | atomic_inc(&bbio->stripes_pending); | ||
| 4788 | submit_stripe_bio(root, bbio, bio, physical, dev_nr, | ||
| 4789 | rw, async); | ||
| 4790 | physical += len; | ||
| 4791 | goto again; | ||
| 4792 | } | ||
| 4793 | bvec++; | ||
| 4794 | } | ||
| 4795 | |||
| 4796 | submit_stripe_bio(root, bbio, bio, physical, dev_nr, rw, async); | ||
| 4797 | return 0; | ||
| 4798 | } | ||
| 4799 | |||
| 4800 | static void bbio_error(struct btrfs_bio *bbio, struct bio *bio, u64 logical) | ||
| 4801 | { | ||
| 4802 | atomic_inc(&bbio->error); | ||
| 4803 | if (atomic_dec_and_test(&bbio->stripes_pending)) { | ||
| 4804 | bio->bi_private = bbio->private; | ||
| 4805 | bio->bi_end_io = bbio->end_io; | ||
| 4806 | bio->bi_bdev = (struct block_device *) | ||
| 4807 | (unsigned long)bbio->mirror_num; | ||
| 4808 | bio->bi_sector = logical >> 9; | ||
| 4809 | kfree(bbio); | ||
| 4810 | bio_endio(bio, -EIO); | ||
| 4811 | } | ||
| 4812 | } | ||
| 4813 | |||
| 4195 | int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio, | 4814 | int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio, |
| 4196 | int mirror_num, int async_submit) | 4815 | int mirror_num, int async_submit) |
| 4197 | { | 4816 | { |
| 4198 | struct btrfs_mapping_tree *map_tree; | ||
| 4199 | struct btrfs_device *dev; | 4817 | struct btrfs_device *dev; |
| 4200 | struct bio *first_bio = bio; | 4818 | struct bio *first_bio = bio; |
| 4201 | u64 logical = (u64)bio->bi_sector << 9; | 4819 | u64 logical = (u64)bio->bi_sector << 9; |
| @@ -4207,17 +4825,16 @@ int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio, | |||
| 4207 | struct btrfs_bio *bbio = NULL; | 4825 | struct btrfs_bio *bbio = NULL; |
| 4208 | 4826 | ||
| 4209 | length = bio->bi_size; | 4827 | length = bio->bi_size; |
| 4210 | map_tree = &root->fs_info->mapping_tree; | ||
| 4211 | map_length = length; | 4828 | map_length = length; |
| 4212 | 4829 | ||
| 4213 | ret = btrfs_map_block(map_tree, rw, logical, &map_length, &bbio, | 4830 | ret = btrfs_map_block(root->fs_info, rw, logical, &map_length, &bbio, |
| 4214 | mirror_num); | 4831 | mirror_num); |
| 4215 | if (ret) /* -ENOMEM */ | 4832 | if (ret) |
| 4216 | return ret; | 4833 | return ret; |
| 4217 | 4834 | ||
| 4218 | total_devs = bbio->num_stripes; | 4835 | total_devs = bbio->num_stripes; |
| 4219 | if (map_length < length) { | 4836 | if (map_length < length) { |
| 4220 | printk(KERN_CRIT "mapping failed logical %llu bio len %llu " | 4837 | printk(KERN_CRIT "btrfs: mapping failed logical %llu bio len %llu " |
| 4221 | "len %llu\n", (unsigned long long)logical, | 4838 | "len %llu\n", (unsigned long long)logical, |
| 4222 | (unsigned long long)length, | 4839 | (unsigned long long)length, |
| 4223 | (unsigned long long)map_length); | 4840 | (unsigned long long)map_length); |
| @@ -4230,52 +4847,48 @@ int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio, | |||
| 4230 | atomic_set(&bbio->stripes_pending, bbio->num_stripes); | 4847 | atomic_set(&bbio->stripes_pending, bbio->num_stripes); |
| 4231 | 4848 | ||
| 4232 | while (dev_nr < total_devs) { | 4849 | while (dev_nr < total_devs) { |
| 4850 | dev = bbio->stripes[dev_nr].dev; | ||
| 4851 | if (!dev || !dev->bdev || (rw & WRITE && !dev->writeable)) { | ||
| 4852 | bbio_error(bbio, first_bio, logical); | ||
| 4853 | dev_nr++; | ||
| 4854 | continue; | ||
| 4855 | } | ||
| 4856 | |||
| 4857 | /* | ||
| 4858 | * Check and see if we're ok with this bio based on it's size | ||
| 4859 | * and offset with the given device. | ||
| 4860 | */ | ||
| 4861 | if (!bio_size_ok(dev->bdev, first_bio, | ||
| 4862 | bbio->stripes[dev_nr].physical >> 9)) { | ||
| 4863 | ret = breakup_stripe_bio(root, bbio, first_bio, dev, | ||
| 4864 | dev_nr, rw, async_submit); | ||
| 4865 | BUG_ON(ret); | ||
| 4866 | dev_nr++; | ||
| 4867 | continue; | ||
| 4868 | } | ||
| 4869 | |||
| 4233 | if (dev_nr < total_devs - 1) { | 4870 | if (dev_nr < total_devs - 1) { |
| 4234 | bio = bio_clone(first_bio, GFP_NOFS); | 4871 | bio = bio_clone(first_bio, GFP_NOFS); |
| 4235 | BUG_ON(!bio); /* -ENOMEM */ | 4872 | BUG_ON(!bio); /* -ENOMEM */ |
| 4236 | } else { | 4873 | } else { |
| 4237 | bio = first_bio; | 4874 | bio = first_bio; |
| 4238 | } | 4875 | } |
| 4239 | bio->bi_private = bbio; | 4876 | |
| 4240 | bio->bi_private = merge_stripe_index_into_bio_private( | 4877 | submit_stripe_bio(root, bbio, bio, |
| 4241 | bio->bi_private, (unsigned int)dev_nr); | 4878 | bbio->stripes[dev_nr].physical, dev_nr, rw, |
| 4242 | bio->bi_end_io = btrfs_end_bio; | 4879 | async_submit); |
| 4243 | bio->bi_sector = bbio->stripes[dev_nr].physical >> 9; | ||
| 4244 | dev = bbio->stripes[dev_nr].dev; | ||
| 4245 | if (dev && dev->bdev && (rw != WRITE || dev->writeable)) { | ||
| 4246 | #ifdef DEBUG | ||
| 4247 | struct rcu_string *name; | ||
| 4248 | |||
| 4249 | rcu_read_lock(); | ||
| 4250 | name = rcu_dereference(dev->name); | ||
| 4251 | pr_debug("btrfs_map_bio: rw %d, secor=%llu, dev=%lu " | ||
| 4252 | "(%s id %llu), size=%u\n", rw, | ||
| 4253 | (u64)bio->bi_sector, (u_long)dev->bdev->bd_dev, | ||
| 4254 | name->str, dev->devid, bio->bi_size); | ||
| 4255 | rcu_read_unlock(); | ||
| 4256 | #endif | ||
| 4257 | bio->bi_bdev = dev->bdev; | ||
| 4258 | if (async_submit) | ||
| 4259 | schedule_bio(root, dev, rw, bio); | ||
| 4260 | else | ||
| 4261 | btrfsic_submit_bio(rw, bio); | ||
| 4262 | } else { | ||
| 4263 | bio->bi_bdev = root->fs_info->fs_devices->latest_bdev; | ||
| 4264 | bio->bi_sector = logical >> 9; | ||
| 4265 | bio_endio(bio, -EIO); | ||
| 4266 | } | ||
| 4267 | dev_nr++; | 4880 | dev_nr++; |
| 4268 | } | 4881 | } |
| 4269 | return 0; | 4882 | return 0; |
| 4270 | } | 4883 | } |
| 4271 | 4884 | ||
| 4272 | struct btrfs_device *btrfs_find_device(struct btrfs_root *root, u64 devid, | 4885 | struct btrfs_device *btrfs_find_device(struct btrfs_fs_info *fs_info, u64 devid, |
| 4273 | u8 *uuid, u8 *fsid) | 4886 | u8 *uuid, u8 *fsid) |
| 4274 | { | 4887 | { |
| 4275 | struct btrfs_device *device; | 4888 | struct btrfs_device *device; |
| 4276 | struct btrfs_fs_devices *cur_devices; | 4889 | struct btrfs_fs_devices *cur_devices; |
| 4277 | 4890 | ||
| 4278 | cur_devices = root->fs_info->fs_devices; | 4891 | cur_devices = fs_info->fs_devices; |
| 4279 | while (cur_devices) { | 4892 | while (cur_devices) { |
| 4280 | if (!fsid || | 4893 | if (!fsid || |
| 4281 | !memcmp(cur_devices->fsid, fsid, BTRFS_UUID_SIZE)) { | 4894 | !memcmp(cur_devices->fsid, fsid, BTRFS_UUID_SIZE)) { |
| @@ -4356,6 +4969,7 @@ static int read_one_chunk(struct btrfs_root *root, struct btrfs_key *key, | |||
| 4356 | em->bdev = (struct block_device *)map; | 4969 | em->bdev = (struct block_device *)map; |
| 4357 | em->start = logical; | 4970 | em->start = logical; |
| 4358 | em->len = length; | 4971 | em->len = length; |
| 4972 | em->orig_start = 0; | ||
| 4359 | em->block_start = 0; | 4973 | em->block_start = 0; |
| 4360 | em->block_len = em->len; | 4974 | em->block_len = em->len; |
| 4361 | 4975 | ||
| @@ -4373,8 +4987,8 @@ static int read_one_chunk(struct btrfs_root *root, struct btrfs_key *key, | |||
| 4373 | read_extent_buffer(leaf, uuid, (unsigned long) | 4987 | read_extent_buffer(leaf, uuid, (unsigned long) |
| 4374 | btrfs_stripe_dev_uuid_nr(chunk, i), | 4988 | btrfs_stripe_dev_uuid_nr(chunk, i), |
| 4375 | BTRFS_UUID_SIZE); | 4989 | BTRFS_UUID_SIZE); |
| 4376 | map->stripes[i].dev = btrfs_find_device(root, devid, uuid, | 4990 | map->stripes[i].dev = btrfs_find_device(root->fs_info, devid, |
| 4377 | NULL); | 4991 | uuid, NULL); |
| 4378 | if (!map->stripes[i].dev && !btrfs_test_opt(root, DEGRADED)) { | 4992 | if (!map->stripes[i].dev && !btrfs_test_opt(root, DEGRADED)) { |
| 4379 | kfree(map); | 4993 | kfree(map); |
| 4380 | free_extent_map(em); | 4994 | free_extent_map(em); |
| @@ -4415,6 +5029,8 @@ static void fill_device_from_item(struct extent_buffer *leaf, | |||
| 4415 | device->io_align = btrfs_device_io_align(leaf, dev_item); | 5029 | device->io_align = btrfs_device_io_align(leaf, dev_item); |
| 4416 | device->io_width = btrfs_device_io_width(leaf, dev_item); | 5030 | device->io_width = btrfs_device_io_width(leaf, dev_item); |
| 4417 | device->sector_size = btrfs_device_sector_size(leaf, dev_item); | 5031 | device->sector_size = btrfs_device_sector_size(leaf, dev_item); |
| 5032 | WARN_ON(device->devid == BTRFS_DEV_REPLACE_DEVID); | ||
| 5033 | device->is_tgtdev_for_dev_replace = 0; | ||
| 4418 | 5034 | ||
| 4419 | ptr = (unsigned long)btrfs_device_uuid(dev_item); | 5035 | ptr = (unsigned long)btrfs_device_uuid(dev_item); |
| 4420 | read_extent_buffer(leaf, device->uuid, ptr, BTRFS_UUID_SIZE); | 5036 | read_extent_buffer(leaf, device->uuid, ptr, BTRFS_UUID_SIZE); |
| @@ -4492,7 +5108,7 @@ static int read_one_dev(struct btrfs_root *root, | |||
| 4492 | return ret; | 5108 | return ret; |
| 4493 | } | 5109 | } |
| 4494 | 5110 | ||
| 4495 | device = btrfs_find_device(root, devid, dev_uuid, fs_uuid); | 5111 | device = btrfs_find_device(root->fs_info, devid, dev_uuid, fs_uuid); |
| 4496 | if (!device || !device->bdev) { | 5112 | if (!device || !device->bdev) { |
| 4497 | if (!btrfs_test_opt(root, DEGRADED)) | 5113 | if (!btrfs_test_opt(root, DEGRADED)) |
| 4498 | return -EIO; | 5114 | return -EIO; |
| @@ -4525,7 +5141,7 @@ static int read_one_dev(struct btrfs_root *root, | |||
| 4525 | fill_device_from_item(leaf, dev_item, device); | 5141 | fill_device_from_item(leaf, dev_item, device); |
| 4526 | device->dev_root = root->fs_info->dev_root; | 5142 | device->dev_root = root->fs_info->dev_root; |
| 4527 | device->in_fs_metadata = 1; | 5143 | device->in_fs_metadata = 1; |
| 4528 | if (device->writeable) { | 5144 | if (device->writeable && !device->is_tgtdev_for_dev_replace) { |
| 4529 | device->fs_devices->total_rw_bytes += device->total_bytes; | 5145 | device->fs_devices->total_rw_bytes += device->total_bytes; |
| 4530 | spin_lock(&root->fs_info->free_chunk_lock); | 5146 | spin_lock(&root->fs_info->free_chunk_lock); |
| 4531 | root->fs_info->free_chunk_space += device->total_bytes - | 5147 | root->fs_info->free_chunk_space += device->total_bytes - |
| @@ -4884,7 +5500,7 @@ int btrfs_get_dev_stats(struct btrfs_root *root, | |||
| 4884 | int i; | 5500 | int i; |
| 4885 | 5501 | ||
| 4886 | mutex_lock(&fs_devices->device_list_mutex); | 5502 | mutex_lock(&fs_devices->device_list_mutex); |
| 4887 | dev = btrfs_find_device(root, stats->devid, NULL, NULL); | 5503 | dev = btrfs_find_device(root->fs_info, stats->devid, NULL, NULL); |
| 4888 | mutex_unlock(&fs_devices->device_list_mutex); | 5504 | mutex_unlock(&fs_devices->device_list_mutex); |
| 4889 | 5505 | ||
| 4890 | if (!dev) { | 5506 | if (!dev) { |
| @@ -4912,3 +5528,21 @@ int btrfs_get_dev_stats(struct btrfs_root *root, | |||
| 4912 | stats->nr_items = BTRFS_DEV_STAT_VALUES_MAX; | 5528 | stats->nr_items = BTRFS_DEV_STAT_VALUES_MAX; |
| 4913 | return 0; | 5529 | return 0; |
| 4914 | } | 5530 | } |
| 5531 | |||
| 5532 | int btrfs_scratch_superblock(struct btrfs_device *device) | ||
| 5533 | { | ||
| 5534 | struct buffer_head *bh; | ||
| 5535 | struct btrfs_super_block *disk_super; | ||
| 5536 | |||
| 5537 | bh = btrfs_read_dev_super(device->bdev); | ||
| 5538 | if (!bh) | ||
| 5539 | return -EINVAL; | ||
| 5540 | disk_super = (struct btrfs_super_block *)bh->b_data; | ||
| 5541 | |||
| 5542 | memset(&disk_super->magic, 0, sizeof(disk_super->magic)); | ||
| 5543 | set_buffer_dirty(bh); | ||
| 5544 | sync_dirty_buffer(bh); | ||
| 5545 | brelse(bh); | ||
| 5546 | |||
| 5547 | return 0; | ||
| 5548 | } | ||
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h index 53c06af92e8d..d3c3939ac751 100644 --- a/fs/btrfs/volumes.h +++ b/fs/btrfs/volumes.h | |||
| @@ -50,6 +50,7 @@ struct btrfs_device { | |||
| 50 | int in_fs_metadata; | 50 | int in_fs_metadata; |
| 51 | int missing; | 51 | int missing; |
| 52 | int can_discard; | 52 | int can_discard; |
| 53 | int is_tgtdev_for_dev_replace; | ||
| 53 | 54 | ||
| 54 | spinlock_t io_lock; | 55 | spinlock_t io_lock; |
| 55 | 56 | ||
| @@ -88,7 +89,7 @@ struct btrfs_device { | |||
| 88 | u8 uuid[BTRFS_UUID_SIZE]; | 89 | u8 uuid[BTRFS_UUID_SIZE]; |
| 89 | 90 | ||
| 90 | /* per-device scrub information */ | 91 | /* per-device scrub information */ |
| 91 | struct scrub_dev *scrub_device; | 92 | struct scrub_ctx *scrub_device; |
| 92 | 93 | ||
| 93 | struct btrfs_work work; | 94 | struct btrfs_work work; |
| 94 | struct rcu_head rcu; | 95 | struct rcu_head rcu; |
| @@ -179,6 +180,15 @@ struct btrfs_device_info { | |||
| 179 | u64 total_avail; | 180 | u64 total_avail; |
| 180 | }; | 181 | }; |
| 181 | 182 | ||
| 183 | struct btrfs_raid_attr { | ||
| 184 | int sub_stripes; /* sub_stripes info for map */ | ||
| 185 | int dev_stripes; /* stripes per dev */ | ||
| 186 | int devs_max; /* max devs to use */ | ||
| 187 | int devs_min; /* min devs needed */ | ||
| 188 | int devs_increment; /* ndevs has to be a multiple of this */ | ||
| 189 | int ncopies; /* how many copies to data has */ | ||
| 190 | }; | ||
| 191 | |||
| 182 | struct map_lookup { | 192 | struct map_lookup { |
| 183 | u64 type; | 193 | u64 type; |
| 184 | int io_align; | 194 | int io_align; |
| @@ -248,7 +258,7 @@ int btrfs_alloc_dev_extent(struct btrfs_trans_handle *trans, | |||
| 248 | struct btrfs_device *device, | 258 | struct btrfs_device *device, |
| 249 | u64 chunk_tree, u64 chunk_objectid, | 259 | u64 chunk_tree, u64 chunk_objectid, |
| 250 | u64 chunk_offset, u64 start, u64 num_bytes); | 260 | u64 chunk_offset, u64 start, u64 num_bytes); |
| 251 | int btrfs_map_block(struct btrfs_mapping_tree *map_tree, int rw, | 261 | int btrfs_map_block(struct btrfs_fs_info *fs_info, int rw, |
| 252 | u64 logical, u64 *length, | 262 | u64 logical, u64 *length, |
| 253 | struct btrfs_bio **bbio_ret, int mirror_num); | 263 | struct btrfs_bio **bbio_ret, int mirror_num); |
| 254 | int btrfs_rmap_block(struct btrfs_mapping_tree *map_tree, | 264 | int btrfs_rmap_block(struct btrfs_mapping_tree *map_tree, |
| @@ -267,19 +277,27 @@ int btrfs_open_devices(struct btrfs_fs_devices *fs_devices, | |||
| 267 | int btrfs_scan_one_device(const char *path, fmode_t flags, void *holder, | 277 | int btrfs_scan_one_device(const char *path, fmode_t flags, void *holder, |
| 268 | struct btrfs_fs_devices **fs_devices_ret); | 278 | struct btrfs_fs_devices **fs_devices_ret); |
| 269 | int btrfs_close_devices(struct btrfs_fs_devices *fs_devices); | 279 | int btrfs_close_devices(struct btrfs_fs_devices *fs_devices); |
| 270 | void btrfs_close_extra_devices(struct btrfs_fs_devices *fs_devices); | 280 | void btrfs_close_extra_devices(struct btrfs_fs_info *fs_info, |
| 281 | struct btrfs_fs_devices *fs_devices, int step); | ||
| 282 | int btrfs_find_device_missing_or_by_path(struct btrfs_root *root, | ||
| 283 | char *device_path, | ||
| 284 | struct btrfs_device **device); | ||
| 285 | int btrfs_find_device_by_path(struct btrfs_root *root, char *device_path, | ||
| 286 | struct btrfs_device **device); | ||
| 271 | int btrfs_add_device(struct btrfs_trans_handle *trans, | 287 | int btrfs_add_device(struct btrfs_trans_handle *trans, |
| 272 | struct btrfs_root *root, | 288 | struct btrfs_root *root, |
| 273 | struct btrfs_device *device); | 289 | struct btrfs_device *device); |
| 274 | int btrfs_rm_device(struct btrfs_root *root, char *device_path); | 290 | int btrfs_rm_device(struct btrfs_root *root, char *device_path); |
| 275 | void btrfs_cleanup_fs_uuids(void); | 291 | void btrfs_cleanup_fs_uuids(void); |
| 276 | int btrfs_num_copies(struct btrfs_mapping_tree *map_tree, u64 logical, u64 len); | 292 | int btrfs_num_copies(struct btrfs_fs_info *fs_info, u64 logical, u64 len); |
| 277 | int btrfs_grow_device(struct btrfs_trans_handle *trans, | 293 | int btrfs_grow_device(struct btrfs_trans_handle *trans, |
| 278 | struct btrfs_device *device, u64 new_size); | 294 | struct btrfs_device *device, u64 new_size); |
| 279 | struct btrfs_device *btrfs_find_device(struct btrfs_root *root, u64 devid, | 295 | struct btrfs_device *btrfs_find_device(struct btrfs_fs_info *fs_info, u64 devid, |
| 280 | u8 *uuid, u8 *fsid); | 296 | u8 *uuid, u8 *fsid); |
| 281 | int btrfs_shrink_device(struct btrfs_device *device, u64 new_size); | 297 | int btrfs_shrink_device(struct btrfs_device *device, u64 new_size); |
| 282 | int btrfs_init_new_device(struct btrfs_root *root, char *path); | 298 | int btrfs_init_new_device(struct btrfs_root *root, char *path); |
| 299 | int btrfs_init_dev_replace_tgtdev(struct btrfs_root *root, char *device_path, | ||
| 300 | struct btrfs_device **device_out); | ||
| 283 | int btrfs_balance(struct btrfs_balance_control *bctl, | 301 | int btrfs_balance(struct btrfs_balance_control *bctl, |
| 284 | struct btrfs_ioctl_balance_args *bargs); | 302 | struct btrfs_ioctl_balance_args *bargs); |
| 285 | int btrfs_resume_balance_async(struct btrfs_fs_info *fs_info); | 303 | int btrfs_resume_balance_async(struct btrfs_fs_info *fs_info); |
| @@ -296,6 +314,13 @@ int btrfs_get_dev_stats(struct btrfs_root *root, | |||
| 296 | int btrfs_init_dev_stats(struct btrfs_fs_info *fs_info); | 314 | int btrfs_init_dev_stats(struct btrfs_fs_info *fs_info); |
| 297 | int btrfs_run_dev_stats(struct btrfs_trans_handle *trans, | 315 | int btrfs_run_dev_stats(struct btrfs_trans_handle *trans, |
| 298 | struct btrfs_fs_info *fs_info); | 316 | struct btrfs_fs_info *fs_info); |
| 317 | void btrfs_rm_dev_replace_srcdev(struct btrfs_fs_info *fs_info, | ||
| 318 | struct btrfs_device *srcdev); | ||
| 319 | void btrfs_destroy_dev_replace_tgtdev(struct btrfs_fs_info *fs_info, | ||
| 320 | struct btrfs_device *tgtdev); | ||
| 321 | void btrfs_init_dev_replace_tgtdev_for_resume(struct btrfs_fs_info *fs_info, | ||
| 322 | struct btrfs_device *tgtdev); | ||
| 323 | int btrfs_scratch_superblock(struct btrfs_device *device); | ||
| 299 | 324 | ||
| 300 | static inline void btrfs_dev_stat_inc(struct btrfs_device *dev, | 325 | static inline void btrfs_dev_stat_inc(struct btrfs_device *dev, |
| 301 | int index) | 326 | int index) |
diff --git a/fs/btrfs/xattr.c b/fs/btrfs/xattr.c index 3f4e2d69e83a..446a6848c554 100644 --- a/fs/btrfs/xattr.c +++ b/fs/btrfs/xattr.c | |||
| @@ -122,6 +122,16 @@ static int do_setxattr(struct btrfs_trans_handle *trans, | |||
| 122 | */ | 122 | */ |
| 123 | if (!value) | 123 | if (!value) |
| 124 | goto out; | 124 | goto out; |
| 125 | } else { | ||
| 126 | di = btrfs_lookup_xattr(NULL, root, path, btrfs_ino(inode), | ||
| 127 | name, name_len, 0); | ||
| 128 | if (IS_ERR(di)) { | ||
| 129 | ret = PTR_ERR(di); | ||
| 130 | goto out; | ||
| 131 | } | ||
| 132 | if (!di && !value) | ||
| 133 | goto out; | ||
| 134 | btrfs_release_path(path); | ||
| 125 | } | 135 | } |
| 126 | 136 | ||
| 127 | again: | 137 | again: |
| @@ -198,6 +208,7 @@ int __btrfs_setxattr(struct btrfs_trans_handle *trans, | |||
| 198 | 208 | ||
| 199 | inode_inc_iversion(inode); | 209 | inode_inc_iversion(inode); |
| 200 | inode->i_ctime = CURRENT_TIME; | 210 | inode->i_ctime = CURRENT_TIME; |
| 211 | set_bit(BTRFS_INODE_COPY_EVERYTHING, &BTRFS_I(inode)->runtime_flags); | ||
| 201 | ret = btrfs_update_inode(trans, root, inode); | 212 | ret = btrfs_update_inode(trans, root, inode); |
| 202 | BUG_ON(ret); | 213 | BUG_ON(ret); |
| 203 | out: | 214 | out: |
| @@ -265,7 +276,7 @@ ssize_t btrfs_listxattr(struct dentry *dentry, char *buffer, size_t size) | |||
| 265 | 276 | ||
| 266 | di = btrfs_item_ptr(leaf, slot, struct btrfs_dir_item); | 277 | di = btrfs_item_ptr(leaf, slot, struct btrfs_dir_item); |
| 267 | if (verify_dir_item(root, leaf, di)) | 278 | if (verify_dir_item(root, leaf, di)) |
| 268 | continue; | 279 | goto next; |
| 269 | 280 | ||
| 270 | name_len = btrfs_dir_name_len(leaf, di); | 281 | name_len = btrfs_dir_name_len(leaf, di); |
| 271 | total_size += name_len + 1; | 282 | total_size += name_len + 1; |
diff --git a/fs/btrfs/zlib.c b/fs/btrfs/zlib.c index 92c20654cc55..9acb846c3e7f 100644 --- a/fs/btrfs/zlib.c +++ b/fs/btrfs/zlib.c | |||
| @@ -97,7 +97,7 @@ static int zlib_compress_pages(struct list_head *ws, | |||
| 97 | *total_in = 0; | 97 | *total_in = 0; |
| 98 | 98 | ||
| 99 | if (Z_OK != zlib_deflateInit(&workspace->def_strm, 3)) { | 99 | if (Z_OK != zlib_deflateInit(&workspace->def_strm, 3)) { |
| 100 | printk(KERN_WARNING "deflateInit failed\n"); | 100 | printk(KERN_WARNING "btrfs: deflateInit failed\n"); |
| 101 | ret = -1; | 101 | ret = -1; |
| 102 | goto out; | 102 | goto out; |
| 103 | } | 103 | } |
| @@ -125,7 +125,7 @@ static int zlib_compress_pages(struct list_head *ws, | |||
| 125 | while (workspace->def_strm.total_in < len) { | 125 | while (workspace->def_strm.total_in < len) { |
| 126 | ret = zlib_deflate(&workspace->def_strm, Z_SYNC_FLUSH); | 126 | ret = zlib_deflate(&workspace->def_strm, Z_SYNC_FLUSH); |
| 127 | if (ret != Z_OK) { | 127 | if (ret != Z_OK) { |
| 128 | printk(KERN_DEBUG "btrfs deflate in loop returned %d\n", | 128 | printk(KERN_DEBUG "btrfs: deflate in loop returned %d\n", |
| 129 | ret); | 129 | ret); |
| 130 | zlib_deflateEnd(&workspace->def_strm); | 130 | zlib_deflateEnd(&workspace->def_strm); |
| 131 | ret = -1; | 131 | ret = -1; |
| @@ -252,7 +252,7 @@ static int zlib_decompress_biovec(struct list_head *ws, struct page **pages_in, | |||
| 252 | } | 252 | } |
| 253 | 253 | ||
| 254 | if (Z_OK != zlib_inflateInit2(&workspace->inf_strm, wbits)) { | 254 | if (Z_OK != zlib_inflateInit2(&workspace->inf_strm, wbits)) { |
| 255 | printk(KERN_WARNING "inflateInit failed\n"); | 255 | printk(KERN_WARNING "btrfs: inflateInit failed\n"); |
| 256 | return -1; | 256 | return -1; |
| 257 | } | 257 | } |
| 258 | while (workspace->inf_strm.total_in < srclen) { | 258 | while (workspace->inf_strm.total_in < srclen) { |
| @@ -336,7 +336,7 @@ static int zlib_decompress(struct list_head *ws, unsigned char *data_in, | |||
| 336 | } | 336 | } |
| 337 | 337 | ||
| 338 | if (Z_OK != zlib_inflateInit2(&workspace->inf_strm, wbits)) { | 338 | if (Z_OK != zlib_inflateInit2(&workspace->inf_strm, wbits)) { |
| 339 | printk(KERN_WARNING "inflateInit failed\n"); | 339 | printk(KERN_WARNING "btrfs: inflateInit failed\n"); |
| 340 | return -1; | 340 | return -1; |
| 341 | } | 341 | } |
| 342 | 342 | ||
