diff options
Diffstat (limited to 'fs/btrfs')
34 files changed, 2874 insertions, 923 deletions
diff --git a/fs/btrfs/acl.c b/fs/btrfs/acl.c index 89b156d85d63..761e2cd8fed1 100644 --- a/fs/btrfs/acl.c +++ b/fs/btrfs/acl.c | |||
| @@ -227,7 +227,11 @@ int btrfs_init_acl(struct btrfs_trans_handle *trans, | |||
| 227 | if (ret > 0) { | 227 | if (ret > 0) { |
| 228 | /* we need an acl */ | 228 | /* we need an acl */ |
| 229 | ret = btrfs_set_acl(trans, inode, acl, ACL_TYPE_ACCESS); | 229 | ret = btrfs_set_acl(trans, inode, acl, ACL_TYPE_ACCESS); |
| 230 | } else { | ||
| 231 | cache_no_acl(inode); | ||
| 230 | } | 232 | } |
| 233 | } else { | ||
| 234 | cache_no_acl(inode); | ||
| 231 | } | 235 | } |
| 232 | failed: | 236 | failed: |
| 233 | posix_acl_release(acl); | 237 | posix_acl_release(acl); |
diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c index bcec06750232..3f75895c919b 100644 --- a/fs/btrfs/backref.c +++ b/fs/btrfs/backref.c | |||
| @@ -24,22 +24,135 @@ | |||
| 24 | #include "delayed-ref.h" | 24 | #include "delayed-ref.h" |
| 25 | #include "locking.h" | 25 | #include "locking.h" |
| 26 | 26 | ||
| 27 | struct extent_inode_elem { | ||
| 28 | u64 inum; | ||
| 29 | u64 offset; | ||
| 30 | struct extent_inode_elem *next; | ||
| 31 | }; | ||
| 32 | |||
| 33 | static int check_extent_in_eb(struct btrfs_key *key, struct extent_buffer *eb, | ||
| 34 | struct btrfs_file_extent_item *fi, | ||
| 35 | u64 extent_item_pos, | ||
| 36 | struct extent_inode_elem **eie) | ||
| 37 | { | ||
| 38 | u64 data_offset; | ||
| 39 | u64 data_len; | ||
| 40 | struct extent_inode_elem *e; | ||
| 41 | |||
| 42 | data_offset = btrfs_file_extent_offset(eb, fi); | ||
| 43 | data_len = btrfs_file_extent_num_bytes(eb, fi); | ||
| 44 | |||
| 45 | if (extent_item_pos < data_offset || | ||
| 46 | extent_item_pos >= data_offset + data_len) | ||
| 47 | return 1; | ||
| 48 | |||
| 49 | e = kmalloc(sizeof(*e), GFP_NOFS); | ||
| 50 | if (!e) | ||
| 51 | return -ENOMEM; | ||
| 52 | |||
| 53 | e->next = *eie; | ||
| 54 | e->inum = key->objectid; | ||
| 55 | e->offset = key->offset + (extent_item_pos - data_offset); | ||
| 56 | *eie = e; | ||
| 57 | |||
| 58 | return 0; | ||
| 59 | } | ||
| 60 | |||
| 61 | static int find_extent_in_eb(struct extent_buffer *eb, u64 wanted_disk_byte, | ||
| 62 | u64 extent_item_pos, | ||
| 63 | struct extent_inode_elem **eie) | ||
| 64 | { | ||
| 65 | u64 disk_byte; | ||
| 66 | struct btrfs_key key; | ||
| 67 | struct btrfs_file_extent_item *fi; | ||
| 68 | int slot; | ||
| 69 | int nritems; | ||
| 70 | int extent_type; | ||
| 71 | int ret; | ||
| 72 | |||
| 73 | /* | ||
| 74 | * from the shared data ref, we only have the leaf but we need | ||
| 75 | * the key. thus, we must look into all items and see that we | ||
| 76 | * find one (some) with a reference to our extent item. | ||
| 77 | */ | ||
| 78 | nritems = btrfs_header_nritems(eb); | ||
| 79 | for (slot = 0; slot < nritems; ++slot) { | ||
| 80 | btrfs_item_key_to_cpu(eb, &key, slot); | ||
| 81 | if (key.type != BTRFS_EXTENT_DATA_KEY) | ||
| 82 | continue; | ||
| 83 | fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item); | ||
| 84 | extent_type = btrfs_file_extent_type(eb, fi); | ||
| 85 | if (extent_type == BTRFS_FILE_EXTENT_INLINE) | ||
| 86 | continue; | ||
| 87 | /* don't skip BTRFS_FILE_EXTENT_PREALLOC, we can handle that */ | ||
| 88 | disk_byte = btrfs_file_extent_disk_bytenr(eb, fi); | ||
| 89 | if (disk_byte != wanted_disk_byte) | ||
| 90 | continue; | ||
| 91 | |||
| 92 | ret = check_extent_in_eb(&key, eb, fi, extent_item_pos, eie); | ||
| 93 | if (ret < 0) | ||
| 94 | return ret; | ||
| 95 | } | ||
| 96 | |||
| 97 | return 0; | ||
| 98 | } | ||
| 99 | |||
| 27 | /* | 100 | /* |
| 28 | * this structure records all encountered refs on the way up to the root | 101 | * this structure records all encountered refs on the way up to the root |
| 29 | */ | 102 | */ |
| 30 | struct __prelim_ref { | 103 | struct __prelim_ref { |
| 31 | struct list_head list; | 104 | struct list_head list; |
| 32 | u64 root_id; | 105 | u64 root_id; |
| 33 | struct btrfs_key key; | 106 | struct btrfs_key key_for_search; |
| 34 | int level; | 107 | int level; |
| 35 | int count; | 108 | int count; |
| 109 | struct extent_inode_elem *inode_list; | ||
| 36 | u64 parent; | 110 | u64 parent; |
| 37 | u64 wanted_disk_byte; | 111 | u64 wanted_disk_byte; |
| 38 | }; | 112 | }; |
| 39 | 113 | ||
| 114 | /* | ||
| 115 | * the rules for all callers of this function are: | ||
| 116 | * - obtaining the parent is the goal | ||
| 117 | * - if you add a key, you must know that it is a correct key | ||
| 118 | * - if you cannot add the parent or a correct key, then we will look into the | ||
| 119 | * block later to set a correct key | ||
| 120 | * | ||
| 121 | * delayed refs | ||
| 122 | * ============ | ||
| 123 | * backref type | shared | indirect | shared | indirect | ||
| 124 | * information | tree | tree | data | data | ||
| 125 | * --------------------+--------+----------+--------+---------- | ||
| 126 | * parent logical | y | - | - | - | ||
| 127 | * key to resolve | - | y | y | y | ||
| 128 | * tree block logical | - | - | - | - | ||
| 129 | * root for resolving | y | y | y | y | ||
| 130 | * | ||
| 131 | * - column 1: we've the parent -> done | ||
| 132 | * - column 2, 3, 4: we use the key to find the parent | ||
| 133 | * | ||
| 134 | * on disk refs (inline or keyed) | ||
| 135 | * ============================== | ||
| 136 | * backref type | shared | indirect | shared | indirect | ||
| 137 | * information | tree | tree | data | data | ||
| 138 | * --------------------+--------+----------+--------+---------- | ||
| 139 | * parent logical | y | - | y | - | ||
| 140 | * key to resolve | - | - | - | y | ||
| 141 | * tree block logical | y | y | y | y | ||
| 142 | * root for resolving | - | y | y | y | ||
| 143 | * | ||
| 144 | * - column 1, 3: we've the parent -> done | ||
| 145 | * - column 2: we take the first key from the block to find the parent | ||
| 146 | * (see __add_missing_keys) | ||
| 147 | * - column 4: we use the key to find the parent | ||
| 148 | * | ||
| 149 | * additional information that's available but not required to find the parent | ||
| 150 | * block might help in merging entries to gain some speed. | ||
| 151 | */ | ||
| 152 | |||
| 40 | static int __add_prelim_ref(struct list_head *head, u64 root_id, | 153 | static int __add_prelim_ref(struct list_head *head, u64 root_id, |
| 41 | struct btrfs_key *key, int level, u64 parent, | 154 | struct btrfs_key *key, int level, |
| 42 | u64 wanted_disk_byte, int count) | 155 | u64 parent, u64 wanted_disk_byte, int count) |
| 43 | { | 156 | { |
| 44 | struct __prelim_ref *ref; | 157 | struct __prelim_ref *ref; |
| 45 | 158 | ||
| @@ -50,10 +163,11 @@ static int __add_prelim_ref(struct list_head *head, u64 root_id, | |||
| 50 | 163 | ||
| 51 | ref->root_id = root_id; | 164 | ref->root_id = root_id; |
| 52 | if (key) | 165 | if (key) |
| 53 | ref->key = *key; | 166 | ref->key_for_search = *key; |
| 54 | else | 167 | else |
| 55 | memset(&ref->key, 0, sizeof(ref->key)); | 168 | memset(&ref->key_for_search, 0, sizeof(ref->key_for_search)); |
| 56 | 169 | ||
| 170 | ref->inode_list = NULL; | ||
| 57 | ref->level = level; | 171 | ref->level = level; |
| 58 | ref->count = count; | 172 | ref->count = count; |
| 59 | ref->parent = parent; | 173 | ref->parent = parent; |
| @@ -64,18 +178,26 @@ static int __add_prelim_ref(struct list_head *head, u64 root_id, | |||
| 64 | } | 178 | } |
| 65 | 179 | ||
| 66 | static int add_all_parents(struct btrfs_root *root, struct btrfs_path *path, | 180 | static int add_all_parents(struct btrfs_root *root, struct btrfs_path *path, |
| 67 | struct ulist *parents, | 181 | struct ulist *parents, int level, |
| 68 | struct extent_buffer *eb, int level, | 182 | struct btrfs_key *key, u64 wanted_disk_byte, |
| 69 | u64 wanted_objectid, u64 wanted_disk_byte) | 183 | const u64 *extent_item_pos) |
| 70 | { | 184 | { |
| 71 | int ret; | 185 | int ret; |
| 72 | int slot; | 186 | int slot = path->slots[level]; |
| 187 | struct extent_buffer *eb = path->nodes[level]; | ||
| 73 | struct btrfs_file_extent_item *fi; | 188 | struct btrfs_file_extent_item *fi; |
| 74 | struct btrfs_key key; | 189 | struct extent_inode_elem *eie = NULL; |
| 75 | u64 disk_byte; | 190 | u64 disk_byte; |
| 191 | u64 wanted_objectid = key->objectid; | ||
| 76 | 192 | ||
| 77 | add_parent: | 193 | add_parent: |
| 78 | ret = ulist_add(parents, eb->start, 0, GFP_NOFS); | 194 | if (level == 0 && extent_item_pos) { |
| 195 | fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item); | ||
| 196 | ret = check_extent_in_eb(key, eb, fi, *extent_item_pos, &eie); | ||
| 197 | if (ret < 0) | ||
| 198 | return ret; | ||
| 199 | } | ||
| 200 | ret = ulist_add(parents, eb->start, (unsigned long)eie, GFP_NOFS); | ||
| 79 | if (ret < 0) | 201 | if (ret < 0) |
| 80 | return ret; | 202 | return ret; |
| 81 | 203 | ||
| @@ -89,6 +211,7 @@ add_parent: | |||
| 89 | * repeat this until we don't find any additional EXTENT_DATA items. | 211 | * repeat this until we don't find any additional EXTENT_DATA items. |
| 90 | */ | 212 | */ |
| 91 | while (1) { | 213 | while (1) { |
| 214 | eie = NULL; | ||
| 92 | ret = btrfs_next_leaf(root, path); | 215 | ret = btrfs_next_leaf(root, path); |
| 93 | if (ret < 0) | 216 | if (ret < 0) |
| 94 | return ret; | 217 | return ret; |
| @@ -97,9 +220,9 @@ add_parent: | |||
| 97 | 220 | ||
| 98 | eb = path->nodes[0]; | 221 | eb = path->nodes[0]; |
| 99 | for (slot = 0; slot < btrfs_header_nritems(eb); ++slot) { | 222 | for (slot = 0; slot < btrfs_header_nritems(eb); ++slot) { |
| 100 | btrfs_item_key_to_cpu(eb, &key, slot); | 223 | btrfs_item_key_to_cpu(eb, key, slot); |
| 101 | if (key.objectid != wanted_objectid || | 224 | if (key->objectid != wanted_objectid || |
| 102 | key.type != BTRFS_EXTENT_DATA_KEY) | 225 | key->type != BTRFS_EXTENT_DATA_KEY) |
| 103 | return 0; | 226 | return 0; |
| 104 | fi = btrfs_item_ptr(eb, slot, | 227 | fi = btrfs_item_ptr(eb, slot, |
| 105 | struct btrfs_file_extent_item); | 228 | struct btrfs_file_extent_item); |
| @@ -118,8 +241,10 @@ add_parent: | |||
| 118 | */ | 241 | */ |
| 119 | static int __resolve_indirect_ref(struct btrfs_fs_info *fs_info, | 242 | static int __resolve_indirect_ref(struct btrfs_fs_info *fs_info, |
| 120 | int search_commit_root, | 243 | int search_commit_root, |
| 244 | u64 time_seq, | ||
| 121 | struct __prelim_ref *ref, | 245 | struct __prelim_ref *ref, |
| 122 | struct ulist *parents) | 246 | struct ulist *parents, |
| 247 | const u64 *extent_item_pos) | ||
| 123 | { | 248 | { |
| 124 | struct btrfs_path *path; | 249 | struct btrfs_path *path; |
| 125 | struct btrfs_root *root; | 250 | struct btrfs_root *root; |
| @@ -152,12 +277,13 @@ static int __resolve_indirect_ref(struct btrfs_fs_info *fs_info, | |||
| 152 | goto out; | 277 | goto out; |
| 153 | 278 | ||
| 154 | path->lowest_level = level; | 279 | path->lowest_level = level; |
| 155 | ret = btrfs_search_slot(NULL, root, &ref->key, path, 0, 0); | 280 | ret = btrfs_search_old_slot(root, &ref->key_for_search, path, time_seq); |
| 156 | pr_debug("search slot in root %llu (level %d, ref count %d) returned " | 281 | pr_debug("search slot in root %llu (level %d, ref count %d) returned " |
| 157 | "%d for key (%llu %u %llu)\n", | 282 | "%d for key (%llu %u %llu)\n", |
| 158 | (unsigned long long)ref->root_id, level, ref->count, ret, | 283 | (unsigned long long)ref->root_id, level, ref->count, ret, |
| 159 | (unsigned long long)ref->key.objectid, ref->key.type, | 284 | (unsigned long long)ref->key_for_search.objectid, |
| 160 | (unsigned long long)ref->key.offset); | 285 | ref->key_for_search.type, |
| 286 | (unsigned long long)ref->key_for_search.offset); | ||
| 161 | if (ret < 0) | 287 | if (ret < 0) |
| 162 | goto out; | 288 | goto out; |
| 163 | 289 | ||
| @@ -179,9 +305,8 @@ static int __resolve_indirect_ref(struct btrfs_fs_info *fs_info, | |||
| 179 | btrfs_item_key_to_cpu(eb, &key, path->slots[0]); | 305 | btrfs_item_key_to_cpu(eb, &key, path->slots[0]); |
| 180 | } | 306 | } |
| 181 | 307 | ||
| 182 | /* the last two parameters will only be used for level == 0 */ | 308 | ret = add_all_parents(root, path, parents, level, &key, |
| 183 | ret = add_all_parents(root, path, parents, eb, level, key.objectid, | 309 | ref->wanted_disk_byte, extent_item_pos); |
| 184 | ref->wanted_disk_byte); | ||
| 185 | out: | 310 | out: |
| 186 | btrfs_free_path(path); | 311 | btrfs_free_path(path); |
| 187 | return ret; | 312 | return ret; |
| @@ -191,8 +316,9 @@ out: | |||
| 191 | * resolve all indirect backrefs from the list | 316 | * resolve all indirect backrefs from the list |
| 192 | */ | 317 | */ |
| 193 | static int __resolve_indirect_refs(struct btrfs_fs_info *fs_info, | 318 | static int __resolve_indirect_refs(struct btrfs_fs_info *fs_info, |
| 194 | int search_commit_root, | 319 | int search_commit_root, u64 time_seq, |
| 195 | struct list_head *head) | 320 | struct list_head *head, |
| 321 | const u64 *extent_item_pos) | ||
| 196 | { | 322 | { |
| 197 | int err; | 323 | int err; |
| 198 | int ret = 0; | 324 | int ret = 0; |
| @@ -201,6 +327,7 @@ static int __resolve_indirect_refs(struct btrfs_fs_info *fs_info, | |||
| 201 | struct __prelim_ref *new_ref; | 327 | struct __prelim_ref *new_ref; |
| 202 | struct ulist *parents; | 328 | struct ulist *parents; |
| 203 | struct ulist_node *node; | 329 | struct ulist_node *node; |
| 330 | struct ulist_iterator uiter; | ||
| 204 | 331 | ||
| 205 | parents = ulist_alloc(GFP_NOFS); | 332 | parents = ulist_alloc(GFP_NOFS); |
| 206 | if (!parents) | 333 | if (!parents) |
| @@ -217,7 +344,8 @@ static int __resolve_indirect_refs(struct btrfs_fs_info *fs_info, | |||
| 217 | if (ref->count == 0) | 344 | if (ref->count == 0) |
| 218 | continue; | 345 | continue; |
| 219 | err = __resolve_indirect_ref(fs_info, search_commit_root, | 346 | err = __resolve_indirect_ref(fs_info, search_commit_root, |
| 220 | ref, parents); | 347 | time_seq, ref, parents, |
| 348 | extent_item_pos); | ||
| 221 | if (err) { | 349 | if (err) { |
| 222 | if (ret == 0) | 350 | if (ret == 0) |
| 223 | ret = err; | 351 | ret = err; |
| @@ -225,11 +353,14 @@ static int __resolve_indirect_refs(struct btrfs_fs_info *fs_info, | |||
| 225 | } | 353 | } |
| 226 | 354 | ||
| 227 | /* we put the first parent into the ref at hand */ | 355 | /* we put the first parent into the ref at hand */ |
| 228 | node = ulist_next(parents, NULL); | 356 | ULIST_ITER_INIT(&uiter); |
| 357 | node = ulist_next(parents, &uiter); | ||
| 229 | ref->parent = node ? node->val : 0; | 358 | ref->parent = node ? node->val : 0; |
| 359 | ref->inode_list = | ||
| 360 | node ? (struct extent_inode_elem *)node->aux : 0; | ||
| 230 | 361 | ||
| 231 | /* additional parents require new refs being added here */ | 362 | /* additional parents require new refs being added here */ |
| 232 | while ((node = ulist_next(parents, node))) { | 363 | while ((node = ulist_next(parents, &uiter))) { |
| 233 | new_ref = kmalloc(sizeof(*new_ref), GFP_NOFS); | 364 | new_ref = kmalloc(sizeof(*new_ref), GFP_NOFS); |
| 234 | if (!new_ref) { | 365 | if (!new_ref) { |
| 235 | ret = -ENOMEM; | 366 | ret = -ENOMEM; |
| @@ -237,6 +368,8 @@ static int __resolve_indirect_refs(struct btrfs_fs_info *fs_info, | |||
| 237 | } | 368 | } |
| 238 | memcpy(new_ref, ref, sizeof(*ref)); | 369 | memcpy(new_ref, ref, sizeof(*ref)); |
| 239 | new_ref->parent = node->val; | 370 | new_ref->parent = node->val; |
| 371 | new_ref->inode_list = | ||
| 372 | (struct extent_inode_elem *)node->aux; | ||
| 240 | list_add(&new_ref->list, &ref->list); | 373 | list_add(&new_ref->list, &ref->list); |
| 241 | } | 374 | } |
| 242 | ulist_reinit(parents); | 375 | ulist_reinit(parents); |
| @@ -246,10 +379,65 @@ static int __resolve_indirect_refs(struct btrfs_fs_info *fs_info, | |||
| 246 | return ret; | 379 | return ret; |
| 247 | } | 380 | } |
| 248 | 381 | ||
| 382 | static inline int ref_for_same_block(struct __prelim_ref *ref1, | ||
| 383 | struct __prelim_ref *ref2) | ||
| 384 | { | ||
| 385 | if (ref1->level != ref2->level) | ||
| 386 | return 0; | ||
| 387 | if (ref1->root_id != ref2->root_id) | ||
| 388 | return 0; | ||
| 389 | if (ref1->key_for_search.type != ref2->key_for_search.type) | ||
| 390 | return 0; | ||
| 391 | if (ref1->key_for_search.objectid != ref2->key_for_search.objectid) | ||
| 392 | return 0; | ||
| 393 | if (ref1->key_for_search.offset != ref2->key_for_search.offset) | ||
| 394 | return 0; | ||
| 395 | if (ref1->parent != ref2->parent) | ||
| 396 | return 0; | ||
| 397 | |||
| 398 | return 1; | ||
| 399 | } | ||
| 400 | |||
| 401 | /* | ||
| 402 | * read tree blocks and add keys where required. | ||
| 403 | */ | ||
| 404 | static int __add_missing_keys(struct btrfs_fs_info *fs_info, | ||
| 405 | struct list_head *head) | ||
| 406 | { | ||
| 407 | struct list_head *pos; | ||
| 408 | struct extent_buffer *eb; | ||
| 409 | |||
| 410 | list_for_each(pos, head) { | ||
| 411 | struct __prelim_ref *ref; | ||
| 412 | ref = list_entry(pos, struct __prelim_ref, list); | ||
| 413 | |||
| 414 | if (ref->parent) | ||
| 415 | continue; | ||
| 416 | if (ref->key_for_search.type) | ||
| 417 | continue; | ||
| 418 | BUG_ON(!ref->wanted_disk_byte); | ||
| 419 | eb = read_tree_block(fs_info->tree_root, ref->wanted_disk_byte, | ||
| 420 | fs_info->tree_root->leafsize, 0); | ||
| 421 | BUG_ON(!eb); | ||
| 422 | btrfs_tree_read_lock(eb); | ||
| 423 | if (btrfs_header_level(eb) == 0) | ||
| 424 | btrfs_item_key_to_cpu(eb, &ref->key_for_search, 0); | ||
| 425 | else | ||
| 426 | btrfs_node_key_to_cpu(eb, &ref->key_for_search, 0); | ||
| 427 | btrfs_tree_read_unlock(eb); | ||
| 428 | free_extent_buffer(eb); | ||
| 429 | } | ||
| 430 | return 0; | ||
| 431 | } | ||
| 432 | |||
| 249 | /* | 433 | /* |
| 250 | * merge two lists of backrefs and adjust counts accordingly | 434 | * merge two lists of backrefs and adjust counts accordingly |
| 251 | * | 435 | * |
| 252 | * mode = 1: merge identical keys, if key is set | 436 | * mode = 1: merge identical keys, if key is set |
| 437 | * FIXME: if we add more keys in __add_prelim_ref, we can merge more here. | ||
| 438 | * additionally, we could even add a key range for the blocks we | ||
| 439 | * looked into to merge even more (-> replace unresolved refs by those | ||
| 440 | * having a parent). | ||
| 253 | * mode = 2: merge identical parents | 441 | * mode = 2: merge identical parents |
| 254 | */ | 442 | */ |
| 255 | static int __merge_refs(struct list_head *head, int mode) | 443 | static int __merge_refs(struct list_head *head, int mode) |
| @@ -263,20 +451,21 @@ static int __merge_refs(struct list_head *head, int mode) | |||
| 263 | 451 | ||
| 264 | ref1 = list_entry(pos1, struct __prelim_ref, list); | 452 | ref1 = list_entry(pos1, struct __prelim_ref, list); |
| 265 | 453 | ||
| 266 | if (mode == 1 && ref1->key.type == 0) | ||
| 267 | continue; | ||
| 268 | for (pos2 = pos1->next, n2 = pos2->next; pos2 != head; | 454 | for (pos2 = pos1->next, n2 = pos2->next; pos2 != head; |
| 269 | pos2 = n2, n2 = pos2->next) { | 455 | pos2 = n2, n2 = pos2->next) { |
| 270 | struct __prelim_ref *ref2; | 456 | struct __prelim_ref *ref2; |
| 457 | struct __prelim_ref *xchg; | ||
| 271 | 458 | ||
| 272 | ref2 = list_entry(pos2, struct __prelim_ref, list); | 459 | ref2 = list_entry(pos2, struct __prelim_ref, list); |
| 273 | 460 | ||
| 274 | if (mode == 1) { | 461 | if (mode == 1) { |
| 275 | if (memcmp(&ref1->key, &ref2->key, | 462 | if (!ref_for_same_block(ref1, ref2)) |
| 276 | sizeof(ref1->key)) || | ||
| 277 | ref1->level != ref2->level || | ||
| 278 | ref1->root_id != ref2->root_id) | ||
| 279 | continue; | 463 | continue; |
| 464 | if (!ref1->parent && ref2->parent) { | ||
| 465 | xchg = ref1; | ||
| 466 | ref1 = ref2; | ||
| 467 | ref2 = xchg; | ||
| 468 | } | ||
| 280 | ref1->count += ref2->count; | 469 | ref1->count += ref2->count; |
| 281 | } else { | 470 | } else { |
| 282 | if (ref1->parent != ref2->parent) | 471 | if (ref1->parent != ref2->parent) |
| @@ -296,16 +485,17 @@ static int __merge_refs(struct list_head *head, int mode) | |||
| 296 | * smaller or equal that seq to the list | 485 | * smaller or equal that seq to the list |
| 297 | */ | 486 | */ |
| 298 | static int __add_delayed_refs(struct btrfs_delayed_ref_head *head, u64 seq, | 487 | static int __add_delayed_refs(struct btrfs_delayed_ref_head *head, u64 seq, |
| 299 | struct btrfs_key *info_key, | ||
| 300 | struct list_head *prefs) | 488 | struct list_head *prefs) |
| 301 | { | 489 | { |
| 302 | struct btrfs_delayed_extent_op *extent_op = head->extent_op; | 490 | struct btrfs_delayed_extent_op *extent_op = head->extent_op; |
| 303 | struct rb_node *n = &head->node.rb_node; | 491 | struct rb_node *n = &head->node.rb_node; |
| 492 | struct btrfs_key key; | ||
| 493 | struct btrfs_key op_key = {0}; | ||
| 304 | int sgn; | 494 | int sgn; |
| 305 | int ret = 0; | 495 | int ret = 0; |
| 306 | 496 | ||
| 307 | if (extent_op && extent_op->update_key) | 497 | if (extent_op && extent_op->update_key) |
| 308 | btrfs_disk_key_to_cpu(info_key, &extent_op->key); | 498 | btrfs_disk_key_to_cpu(&op_key, &extent_op->key); |
| 309 | 499 | ||
| 310 | while ((n = rb_prev(n))) { | 500 | while ((n = rb_prev(n))) { |
| 311 | struct btrfs_delayed_ref_node *node; | 501 | struct btrfs_delayed_ref_node *node; |
| @@ -337,7 +527,7 @@ static int __add_delayed_refs(struct btrfs_delayed_ref_head *head, u64 seq, | |||
| 337 | struct btrfs_delayed_tree_ref *ref; | 527 | struct btrfs_delayed_tree_ref *ref; |
| 338 | 528 | ||
| 339 | ref = btrfs_delayed_node_to_tree_ref(node); | 529 | ref = btrfs_delayed_node_to_tree_ref(node); |
| 340 | ret = __add_prelim_ref(prefs, ref->root, info_key, | 530 | ret = __add_prelim_ref(prefs, ref->root, &op_key, |
| 341 | ref->level + 1, 0, node->bytenr, | 531 | ref->level + 1, 0, node->bytenr, |
| 342 | node->ref_mod * sgn); | 532 | node->ref_mod * sgn); |
| 343 | break; | 533 | break; |
| @@ -346,7 +536,7 @@ static int __add_delayed_refs(struct btrfs_delayed_ref_head *head, u64 seq, | |||
| 346 | struct btrfs_delayed_tree_ref *ref; | 536 | struct btrfs_delayed_tree_ref *ref; |
| 347 | 537 | ||
| 348 | ref = btrfs_delayed_node_to_tree_ref(node); | 538 | ref = btrfs_delayed_node_to_tree_ref(node); |
| 349 | ret = __add_prelim_ref(prefs, ref->root, info_key, | 539 | ret = __add_prelim_ref(prefs, ref->root, NULL, |
| 350 | ref->level + 1, ref->parent, | 540 | ref->level + 1, ref->parent, |
| 351 | node->bytenr, | 541 | node->bytenr, |
| 352 | node->ref_mod * sgn); | 542 | node->ref_mod * sgn); |
| @@ -354,8 +544,6 @@ static int __add_delayed_refs(struct btrfs_delayed_ref_head *head, u64 seq, | |||
| 354 | } | 544 | } |
| 355 | case BTRFS_EXTENT_DATA_REF_KEY: { | 545 | case BTRFS_EXTENT_DATA_REF_KEY: { |
| 356 | struct btrfs_delayed_data_ref *ref; | 546 | struct btrfs_delayed_data_ref *ref; |
| 357 | struct btrfs_key key; | ||
| 358 | |||
| 359 | ref = btrfs_delayed_node_to_data_ref(node); | 547 | ref = btrfs_delayed_node_to_data_ref(node); |
| 360 | 548 | ||
| 361 | key.objectid = ref->objectid; | 549 | key.objectid = ref->objectid; |
| @@ -368,7 +556,6 @@ static int __add_delayed_refs(struct btrfs_delayed_ref_head *head, u64 seq, | |||
| 368 | } | 556 | } |
| 369 | case BTRFS_SHARED_DATA_REF_KEY: { | 557 | case BTRFS_SHARED_DATA_REF_KEY: { |
| 370 | struct btrfs_delayed_data_ref *ref; | 558 | struct btrfs_delayed_data_ref *ref; |
| 371 | struct btrfs_key key; | ||
| 372 | 559 | ||
| 373 | ref = btrfs_delayed_node_to_data_ref(node); | 560 | ref = btrfs_delayed_node_to_data_ref(node); |
| 374 | 561 | ||
| @@ -394,8 +581,7 @@ static int __add_delayed_refs(struct btrfs_delayed_ref_head *head, u64 seq, | |||
| 394 | */ | 581 | */ |
| 395 | static int __add_inline_refs(struct btrfs_fs_info *fs_info, | 582 | static int __add_inline_refs(struct btrfs_fs_info *fs_info, |
| 396 | struct btrfs_path *path, u64 bytenr, | 583 | struct btrfs_path *path, u64 bytenr, |
| 397 | struct btrfs_key *info_key, int *info_level, | 584 | int *info_level, struct list_head *prefs) |
| 398 | struct list_head *prefs) | ||
| 399 | { | 585 | { |
| 400 | int ret = 0; | 586 | int ret = 0; |
| 401 | int slot; | 587 | int slot; |
| @@ -411,7 +597,7 @@ static int __add_inline_refs(struct btrfs_fs_info *fs_info, | |||
| 411 | * enumerate all inline refs | 597 | * enumerate all inline refs |
| 412 | */ | 598 | */ |
| 413 | leaf = path->nodes[0]; | 599 | leaf = path->nodes[0]; |
| 414 | slot = path->slots[0] - 1; | 600 | slot = path->slots[0]; |
| 415 | 601 | ||
| 416 | item_size = btrfs_item_size_nr(leaf, slot); | 602 | item_size = btrfs_item_size_nr(leaf, slot); |
| 417 | BUG_ON(item_size < sizeof(*ei)); | 603 | BUG_ON(item_size < sizeof(*ei)); |
| @@ -424,12 +610,9 @@ static int __add_inline_refs(struct btrfs_fs_info *fs_info, | |||
| 424 | 610 | ||
| 425 | if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) { | 611 | if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) { |
| 426 | struct btrfs_tree_block_info *info; | 612 | struct btrfs_tree_block_info *info; |
| 427 | struct btrfs_disk_key disk_key; | ||
| 428 | 613 | ||
| 429 | info = (struct btrfs_tree_block_info *)ptr; | 614 | info = (struct btrfs_tree_block_info *)ptr; |
| 430 | *info_level = btrfs_tree_block_level(leaf, info); | 615 | *info_level = btrfs_tree_block_level(leaf, info); |
| 431 | btrfs_tree_block_key(leaf, info, &disk_key); | ||
| 432 | btrfs_disk_key_to_cpu(info_key, &disk_key); | ||
| 433 | ptr += sizeof(struct btrfs_tree_block_info); | 616 | ptr += sizeof(struct btrfs_tree_block_info); |
| 434 | BUG_ON(ptr > end); | 617 | BUG_ON(ptr > end); |
| 435 | } else { | 618 | } else { |
| @@ -447,7 +630,7 @@ static int __add_inline_refs(struct btrfs_fs_info *fs_info, | |||
| 447 | 630 | ||
| 448 | switch (type) { | 631 | switch (type) { |
| 449 | case BTRFS_SHARED_BLOCK_REF_KEY: | 632 | case BTRFS_SHARED_BLOCK_REF_KEY: |
| 450 | ret = __add_prelim_ref(prefs, 0, info_key, | 633 | ret = __add_prelim_ref(prefs, 0, NULL, |
| 451 | *info_level + 1, offset, | 634 | *info_level + 1, offset, |
| 452 | bytenr, 1); | 635 | bytenr, 1); |
| 453 | break; | 636 | break; |
| @@ -462,8 +645,9 @@ static int __add_inline_refs(struct btrfs_fs_info *fs_info, | |||
| 462 | break; | 645 | break; |
| 463 | } | 646 | } |
| 464 | case BTRFS_TREE_BLOCK_REF_KEY: | 647 | case BTRFS_TREE_BLOCK_REF_KEY: |
| 465 | ret = __add_prelim_ref(prefs, offset, info_key, | 648 | ret = __add_prelim_ref(prefs, offset, NULL, |
| 466 | *info_level + 1, 0, bytenr, 1); | 649 | *info_level + 1, 0, |
| 650 | bytenr, 1); | ||
| 467 | break; | 651 | break; |
| 468 | case BTRFS_EXTENT_DATA_REF_KEY: { | 652 | case BTRFS_EXTENT_DATA_REF_KEY: { |
| 469 | struct btrfs_extent_data_ref *dref; | 653 | struct btrfs_extent_data_ref *dref; |
| @@ -477,8 +661,8 @@ static int __add_inline_refs(struct btrfs_fs_info *fs_info, | |||
| 477 | key.type = BTRFS_EXTENT_DATA_KEY; | 661 | key.type = BTRFS_EXTENT_DATA_KEY; |
| 478 | key.offset = btrfs_extent_data_ref_offset(leaf, dref); | 662 | key.offset = btrfs_extent_data_ref_offset(leaf, dref); |
| 479 | root = btrfs_extent_data_ref_root(leaf, dref); | 663 | root = btrfs_extent_data_ref_root(leaf, dref); |
| 480 | ret = __add_prelim_ref(prefs, root, &key, 0, 0, bytenr, | 664 | ret = __add_prelim_ref(prefs, root, &key, 0, 0, |
| 481 | count); | 665 | bytenr, count); |
| 482 | break; | 666 | break; |
| 483 | } | 667 | } |
| 484 | default: | 668 | default: |
| @@ -496,8 +680,7 @@ static int __add_inline_refs(struct btrfs_fs_info *fs_info, | |||
| 496 | */ | 680 | */ |
| 497 | static int __add_keyed_refs(struct btrfs_fs_info *fs_info, | 681 | static int __add_keyed_refs(struct btrfs_fs_info *fs_info, |
| 498 | struct btrfs_path *path, u64 bytenr, | 682 | struct btrfs_path *path, u64 bytenr, |
| 499 | struct btrfs_key *info_key, int info_level, | 683 | int info_level, struct list_head *prefs) |
| 500 | struct list_head *prefs) | ||
| 501 | { | 684 | { |
| 502 | struct btrfs_root *extent_root = fs_info->extent_root; | 685 | struct btrfs_root *extent_root = fs_info->extent_root; |
| 503 | int ret; | 686 | int ret; |
| @@ -527,7 +710,7 @@ static int __add_keyed_refs(struct btrfs_fs_info *fs_info, | |||
| 527 | 710 | ||
| 528 | switch (key.type) { | 711 | switch (key.type) { |
| 529 | case BTRFS_SHARED_BLOCK_REF_KEY: | 712 | case BTRFS_SHARED_BLOCK_REF_KEY: |
| 530 | ret = __add_prelim_ref(prefs, 0, info_key, | 713 | ret = __add_prelim_ref(prefs, 0, NULL, |
| 531 | info_level + 1, key.offset, | 714 | info_level + 1, key.offset, |
| 532 | bytenr, 1); | 715 | bytenr, 1); |
| 533 | break; | 716 | break; |
| @@ -543,8 +726,9 @@ static int __add_keyed_refs(struct btrfs_fs_info *fs_info, | |||
| 543 | break; | 726 | break; |
| 544 | } | 727 | } |
| 545 | case BTRFS_TREE_BLOCK_REF_KEY: | 728 | case BTRFS_TREE_BLOCK_REF_KEY: |
| 546 | ret = __add_prelim_ref(prefs, key.offset, info_key, | 729 | ret = __add_prelim_ref(prefs, key.offset, NULL, |
| 547 | info_level + 1, 0, bytenr, 1); | 730 | info_level + 1, 0, |
| 731 | bytenr, 1); | ||
| 548 | break; | 732 | break; |
| 549 | case BTRFS_EXTENT_DATA_REF_KEY: { | 733 | case BTRFS_EXTENT_DATA_REF_KEY: { |
| 550 | struct btrfs_extent_data_ref *dref; | 734 | struct btrfs_extent_data_ref *dref; |
| @@ -560,7 +744,7 @@ static int __add_keyed_refs(struct btrfs_fs_info *fs_info, | |||
| 560 | key.offset = btrfs_extent_data_ref_offset(leaf, dref); | 744 | key.offset = btrfs_extent_data_ref_offset(leaf, dref); |
| 561 | root = btrfs_extent_data_ref_root(leaf, dref); | 745 | root = btrfs_extent_data_ref_root(leaf, dref); |
| 562 | ret = __add_prelim_ref(prefs, root, &key, 0, 0, | 746 | ret = __add_prelim_ref(prefs, root, &key, 0, 0, |
| 563 | bytenr, count); | 747 | bytenr, count); |
| 564 | break; | 748 | break; |
| 565 | } | 749 | } |
| 566 | default: | 750 | default: |
| @@ -582,11 +766,12 @@ static int __add_keyed_refs(struct btrfs_fs_info *fs_info, | |||
| 582 | */ | 766 | */ |
| 583 | static int find_parent_nodes(struct btrfs_trans_handle *trans, | 767 | static int find_parent_nodes(struct btrfs_trans_handle *trans, |
| 584 | struct btrfs_fs_info *fs_info, u64 bytenr, | 768 | struct btrfs_fs_info *fs_info, u64 bytenr, |
| 585 | u64 seq, struct ulist *refs, struct ulist *roots) | 769 | u64 delayed_ref_seq, u64 time_seq, |
| 770 | struct ulist *refs, struct ulist *roots, | ||
| 771 | const u64 *extent_item_pos) | ||
| 586 | { | 772 | { |
| 587 | struct btrfs_key key; | 773 | struct btrfs_key key; |
| 588 | struct btrfs_path *path; | 774 | struct btrfs_path *path; |
| 589 | struct btrfs_key info_key = { 0 }; | ||
| 590 | struct btrfs_delayed_ref_root *delayed_refs = NULL; | 775 | struct btrfs_delayed_ref_root *delayed_refs = NULL; |
| 591 | struct btrfs_delayed_ref_head *head; | 776 | struct btrfs_delayed_ref_head *head; |
| 592 | int info_level = 0; | 777 | int info_level = 0; |
| @@ -645,7 +830,7 @@ again: | |||
| 645 | btrfs_put_delayed_ref(&head->node); | 830 | btrfs_put_delayed_ref(&head->node); |
| 646 | goto again; | 831 | goto again; |
| 647 | } | 832 | } |
| 648 | ret = __add_delayed_refs(head, seq, &info_key, | 833 | ret = __add_delayed_refs(head, delayed_ref_seq, |
| 649 | &prefs_delayed); | 834 | &prefs_delayed); |
| 650 | if (ret) { | 835 | if (ret) { |
| 651 | spin_unlock(&delayed_refs->lock); | 836 | spin_unlock(&delayed_refs->lock); |
| @@ -659,16 +844,17 @@ again: | |||
| 659 | struct extent_buffer *leaf; | 844 | struct extent_buffer *leaf; |
| 660 | int slot; | 845 | int slot; |
| 661 | 846 | ||
| 847 | path->slots[0]--; | ||
| 662 | leaf = path->nodes[0]; | 848 | leaf = path->nodes[0]; |
| 663 | slot = path->slots[0] - 1; | 849 | slot = path->slots[0]; |
| 664 | btrfs_item_key_to_cpu(leaf, &key, slot); | 850 | btrfs_item_key_to_cpu(leaf, &key, slot); |
| 665 | if (key.objectid == bytenr && | 851 | if (key.objectid == bytenr && |
| 666 | key.type == BTRFS_EXTENT_ITEM_KEY) { | 852 | key.type == BTRFS_EXTENT_ITEM_KEY) { |
| 667 | ret = __add_inline_refs(fs_info, path, bytenr, | 853 | ret = __add_inline_refs(fs_info, path, bytenr, |
| 668 | &info_key, &info_level, &prefs); | 854 | &info_level, &prefs); |
| 669 | if (ret) | 855 | if (ret) |
| 670 | goto out; | 856 | goto out; |
| 671 | ret = __add_keyed_refs(fs_info, path, bytenr, &info_key, | 857 | ret = __add_keyed_refs(fs_info, path, bytenr, |
| 672 | info_level, &prefs); | 858 | info_level, &prefs); |
| 673 | if (ret) | 859 | if (ret) |
| 674 | goto out; | 860 | goto out; |
| @@ -676,21 +862,18 @@ again: | |||
| 676 | } | 862 | } |
| 677 | btrfs_release_path(path); | 863 | btrfs_release_path(path); |
| 678 | 864 | ||
| 679 | /* | ||
| 680 | * when adding the delayed refs above, the info_key might not have | ||
| 681 | * been known yet. Go over the list and replace the missing keys | ||
| 682 | */ | ||
| 683 | list_for_each_entry(ref, &prefs_delayed, list) { | ||
| 684 | if ((ref->key.offset | ref->key.type | ref->key.objectid) == 0) | ||
| 685 | memcpy(&ref->key, &info_key, sizeof(ref->key)); | ||
| 686 | } | ||
| 687 | list_splice_init(&prefs_delayed, &prefs); | 865 | list_splice_init(&prefs_delayed, &prefs); |
| 688 | 866 | ||
| 867 | ret = __add_missing_keys(fs_info, &prefs); | ||
| 868 | if (ret) | ||
| 869 | goto out; | ||
| 870 | |||
| 689 | ret = __merge_refs(&prefs, 1); | 871 | ret = __merge_refs(&prefs, 1); |
| 690 | if (ret) | 872 | if (ret) |
| 691 | goto out; | 873 | goto out; |
| 692 | 874 | ||
| 693 | ret = __resolve_indirect_refs(fs_info, search_commit_root, &prefs); | 875 | ret = __resolve_indirect_refs(fs_info, search_commit_root, time_seq, |
| 876 | &prefs, extent_item_pos); | ||
| 694 | if (ret) | 877 | if (ret) |
| 695 | goto out; | 878 | goto out; |
| 696 | 879 | ||
| @@ -709,7 +892,33 @@ again: | |||
| 709 | BUG_ON(ret < 0); | 892 | BUG_ON(ret < 0); |
| 710 | } | 893 | } |
| 711 | if (ref->count && ref->parent) { | 894 | if (ref->count && ref->parent) { |
| 712 | ret = ulist_add(refs, ref->parent, 0, GFP_NOFS); | 895 | struct extent_inode_elem *eie = NULL; |
| 896 | if (extent_item_pos && !ref->inode_list) { | ||
| 897 | u32 bsz; | ||
| 898 | struct extent_buffer *eb; | ||
| 899 | bsz = btrfs_level_size(fs_info->extent_root, | ||
| 900 | info_level); | ||
| 901 | eb = read_tree_block(fs_info->extent_root, | ||
| 902 | ref->parent, bsz, 0); | ||
| 903 | BUG_ON(!eb); | ||
| 904 | ret = find_extent_in_eb(eb, bytenr, | ||
| 905 | *extent_item_pos, &eie); | ||
| 906 | ref->inode_list = eie; | ||
| 907 | free_extent_buffer(eb); | ||
| 908 | } | ||
| 909 | ret = ulist_add_merge(refs, ref->parent, | ||
| 910 | (unsigned long)ref->inode_list, | ||
| 911 | (unsigned long *)&eie, GFP_NOFS); | ||
| 912 | if (!ret && extent_item_pos) { | ||
| 913 | /* | ||
| 914 | * we've recorded that parent, so we must extend | ||
| 915 | * its inode list here | ||
| 916 | */ | ||
| 917 | BUG_ON(!eie); | ||
| 918 | while (eie->next) | ||
| 919 | eie = eie->next; | ||
| 920 | eie->next = ref->inode_list; | ||
| 921 | } | ||
| 713 | BUG_ON(ret < 0); | 922 | BUG_ON(ret < 0); |
| 714 | } | 923 | } |
| 715 | kfree(ref); | 924 | kfree(ref); |
| @@ -734,6 +943,28 @@ out: | |||
| 734 | return ret; | 943 | return ret; |
| 735 | } | 944 | } |
| 736 | 945 | ||
| 946 | static void free_leaf_list(struct ulist *blocks) | ||
| 947 | { | ||
| 948 | struct ulist_node *node = NULL; | ||
| 949 | struct extent_inode_elem *eie; | ||
| 950 | struct extent_inode_elem *eie_next; | ||
| 951 | struct ulist_iterator uiter; | ||
| 952 | |||
| 953 | ULIST_ITER_INIT(&uiter); | ||
| 954 | while ((node = ulist_next(blocks, &uiter))) { | ||
| 955 | if (!node->aux) | ||
| 956 | continue; | ||
| 957 | eie = (struct extent_inode_elem *)node->aux; | ||
| 958 | for (; eie; eie = eie_next) { | ||
| 959 | eie_next = eie->next; | ||
| 960 | kfree(eie); | ||
| 961 | } | ||
| 962 | node->aux = 0; | ||
| 963 | } | ||
| 964 | |||
| 965 | ulist_free(blocks); | ||
| 966 | } | ||
| 967 | |||
| 737 | /* | 968 | /* |
| 738 | * Finds all leafs with a reference to the specified combination of bytenr and | 969 | * Finds all leafs with a reference to the specified combination of bytenr and |
| 739 | * offset. key_list_head will point to a list of corresponding keys (caller must | 970 | * offset. key_list_head will point to a list of corresponding keys (caller must |
| @@ -744,7 +975,9 @@ out: | |||
| 744 | */ | 975 | */ |
| 745 | static int btrfs_find_all_leafs(struct btrfs_trans_handle *trans, | 976 | static int btrfs_find_all_leafs(struct btrfs_trans_handle *trans, |
| 746 | struct btrfs_fs_info *fs_info, u64 bytenr, | 977 | struct btrfs_fs_info *fs_info, u64 bytenr, |
| 747 | u64 num_bytes, u64 seq, struct ulist **leafs) | 978 | u64 delayed_ref_seq, u64 time_seq, |
| 979 | struct ulist **leafs, | ||
| 980 | const u64 *extent_item_pos) | ||
| 748 | { | 981 | { |
| 749 | struct ulist *tmp; | 982 | struct ulist *tmp; |
| 750 | int ret; | 983 | int ret; |
| @@ -758,11 +991,12 @@ static int btrfs_find_all_leafs(struct btrfs_trans_handle *trans, | |||
| 758 | return -ENOMEM; | 991 | return -ENOMEM; |
| 759 | } | 992 | } |
| 760 | 993 | ||
| 761 | ret = find_parent_nodes(trans, fs_info, bytenr, seq, *leafs, tmp); | 994 | ret = find_parent_nodes(trans, fs_info, bytenr, delayed_ref_seq, |
| 995 | time_seq, *leafs, tmp, extent_item_pos); | ||
| 762 | ulist_free(tmp); | 996 | ulist_free(tmp); |
| 763 | 997 | ||
| 764 | if (ret < 0 && ret != -ENOENT) { | 998 | if (ret < 0 && ret != -ENOENT) { |
| 765 | ulist_free(*leafs); | 999 | free_leaf_list(*leafs); |
| 766 | return ret; | 1000 | return ret; |
| 767 | } | 1001 | } |
| 768 | 1002 | ||
| @@ -784,10 +1018,12 @@ static int btrfs_find_all_leafs(struct btrfs_trans_handle *trans, | |||
| 784 | */ | 1018 | */ |
| 785 | int btrfs_find_all_roots(struct btrfs_trans_handle *trans, | 1019 | int btrfs_find_all_roots(struct btrfs_trans_handle *trans, |
| 786 | struct btrfs_fs_info *fs_info, u64 bytenr, | 1020 | struct btrfs_fs_info *fs_info, u64 bytenr, |
| 787 | u64 num_bytes, u64 seq, struct ulist **roots) | 1021 | u64 delayed_ref_seq, u64 time_seq, |
| 1022 | struct ulist **roots) | ||
| 788 | { | 1023 | { |
| 789 | struct ulist *tmp; | 1024 | struct ulist *tmp; |
| 790 | struct ulist_node *node = NULL; | 1025 | struct ulist_node *node = NULL; |
| 1026 | struct ulist_iterator uiter; | ||
| 791 | int ret; | 1027 | int ret; |
| 792 | 1028 | ||
| 793 | tmp = ulist_alloc(GFP_NOFS); | 1029 | tmp = ulist_alloc(GFP_NOFS); |
| @@ -799,15 +1035,16 @@ int btrfs_find_all_roots(struct btrfs_trans_handle *trans, | |||
| 799 | return -ENOMEM; | 1035 | return -ENOMEM; |
| 800 | } | 1036 | } |
| 801 | 1037 | ||
| 1038 | ULIST_ITER_INIT(&uiter); | ||
| 802 | while (1) { | 1039 | while (1) { |
| 803 | ret = find_parent_nodes(trans, fs_info, bytenr, seq, | 1040 | ret = find_parent_nodes(trans, fs_info, bytenr, delayed_ref_seq, |
| 804 | tmp, *roots); | 1041 | time_seq, tmp, *roots, NULL); |
| 805 | if (ret < 0 && ret != -ENOENT) { | 1042 | if (ret < 0 && ret != -ENOENT) { |
| 806 | ulist_free(tmp); | 1043 | ulist_free(tmp); |
| 807 | ulist_free(*roots); | 1044 | ulist_free(*roots); |
| 808 | return ret; | 1045 | return ret; |
| 809 | } | 1046 | } |
| 810 | node = ulist_next(tmp, node); | 1047 | node = ulist_next(tmp, &uiter); |
| 811 | if (!node) | 1048 | if (!node) |
| 812 | break; | 1049 | break; |
| 813 | bytenr = node->val; | 1050 | bytenr = node->val; |
| @@ -1093,67 +1330,25 @@ int tree_backref_for_extent(unsigned long *ptr, struct extent_buffer *eb, | |||
| 1093 | return 0; | 1330 | return 0; |
| 1094 | } | 1331 | } |
| 1095 | 1332 | ||
| 1096 | static int iterate_leaf_refs(struct btrfs_fs_info *fs_info, u64 logical, | 1333 | static int iterate_leaf_refs(struct extent_inode_elem *inode_list, |
| 1097 | u64 orig_extent_item_objectid, | 1334 | u64 root, u64 extent_item_objectid, |
| 1098 | u64 extent_item_pos, u64 root, | ||
| 1099 | iterate_extent_inodes_t *iterate, void *ctx) | 1335 | iterate_extent_inodes_t *iterate, void *ctx) |
| 1100 | { | 1336 | { |
| 1101 | u64 disk_byte; | 1337 | struct extent_inode_elem *eie; |
| 1102 | struct btrfs_key key; | ||
| 1103 | struct btrfs_file_extent_item *fi; | ||
| 1104 | struct extent_buffer *eb; | ||
| 1105 | int slot; | ||
| 1106 | int nritems; | ||
| 1107 | int ret = 0; | 1338 | int ret = 0; |
| 1108 | int extent_type; | ||
| 1109 | u64 data_offset; | ||
| 1110 | u64 data_len; | ||
| 1111 | |||
| 1112 | eb = read_tree_block(fs_info->tree_root, logical, | ||
| 1113 | fs_info->tree_root->leafsize, 0); | ||
| 1114 | if (!eb) | ||
| 1115 | return -EIO; | ||
| 1116 | |||
| 1117 | /* | ||
| 1118 | * from the shared data ref, we only have the leaf but we need | ||
| 1119 | * the key. thus, we must look into all items and see that we | ||
| 1120 | * find one (some) with a reference to our extent item. | ||
| 1121 | */ | ||
| 1122 | nritems = btrfs_header_nritems(eb); | ||
| 1123 | for (slot = 0; slot < nritems; ++slot) { | ||
| 1124 | btrfs_item_key_to_cpu(eb, &key, slot); | ||
| 1125 | if (key.type != BTRFS_EXTENT_DATA_KEY) | ||
| 1126 | continue; | ||
| 1127 | fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item); | ||
| 1128 | extent_type = btrfs_file_extent_type(eb, fi); | ||
| 1129 | if (extent_type == BTRFS_FILE_EXTENT_INLINE) | ||
| 1130 | continue; | ||
| 1131 | /* don't skip BTRFS_FILE_EXTENT_PREALLOC, we can handle that */ | ||
| 1132 | disk_byte = btrfs_file_extent_disk_bytenr(eb, fi); | ||
| 1133 | if (disk_byte != orig_extent_item_objectid) | ||
| 1134 | continue; | ||
| 1135 | |||
| 1136 | data_offset = btrfs_file_extent_offset(eb, fi); | ||
| 1137 | data_len = btrfs_file_extent_num_bytes(eb, fi); | ||
| 1138 | |||
| 1139 | if (extent_item_pos < data_offset || | ||
| 1140 | extent_item_pos >= data_offset + data_len) | ||
| 1141 | continue; | ||
| 1142 | 1339 | ||
| 1340 | for (eie = inode_list; eie; eie = eie->next) { | ||
| 1143 | pr_debug("ref for %llu resolved, key (%llu EXTEND_DATA %llu), " | 1341 | pr_debug("ref for %llu resolved, key (%llu EXTEND_DATA %llu), " |
| 1144 | "root %llu\n", orig_extent_item_objectid, | 1342 | "root %llu\n", extent_item_objectid, |
| 1145 | key.objectid, key.offset, root); | 1343 | eie->inum, eie->offset, root); |
| 1146 | ret = iterate(key.objectid, | 1344 | ret = iterate(eie->inum, eie->offset, root, ctx); |
| 1147 | key.offset + (extent_item_pos - data_offset), | ||
| 1148 | root, ctx); | ||
| 1149 | if (ret) { | 1345 | if (ret) { |
| 1150 | pr_debug("stopping iteration because ret=%d\n", ret); | 1346 | pr_debug("stopping iteration for %llu due to ret=%d\n", |
| 1347 | extent_item_objectid, ret); | ||
| 1151 | break; | 1348 | break; |
| 1152 | } | 1349 | } |
| 1153 | } | 1350 | } |
| 1154 | 1351 | ||
| 1155 | free_extent_buffer(eb); | ||
| 1156 | |||
| 1157 | return ret; | 1352 | return ret; |
| 1158 | } | 1353 | } |
| 1159 | 1354 | ||
| @@ -1175,7 +1370,10 @@ int iterate_extent_inodes(struct btrfs_fs_info *fs_info, | |||
| 1175 | struct ulist *roots = NULL; | 1370 | struct ulist *roots = NULL; |
| 1176 | struct ulist_node *ref_node = NULL; | 1371 | struct ulist_node *ref_node = NULL; |
| 1177 | struct ulist_node *root_node = NULL; | 1372 | struct ulist_node *root_node = NULL; |
| 1178 | struct seq_list seq_elem; | 1373 | struct seq_list seq_elem = {}; |
| 1374 | struct seq_list tree_mod_seq_elem = {}; | ||
| 1375 | struct ulist_iterator ref_uiter; | ||
| 1376 | struct ulist_iterator root_uiter; | ||
| 1179 | struct btrfs_delayed_ref_root *delayed_refs = NULL; | 1377 | struct btrfs_delayed_ref_root *delayed_refs = NULL; |
| 1180 | 1378 | ||
| 1181 | pr_debug("resolving all inodes for extent %llu\n", | 1379 | pr_debug("resolving all inodes for extent %llu\n", |
| @@ -1192,34 +1390,41 @@ int iterate_extent_inodes(struct btrfs_fs_info *fs_info, | |||
| 1192 | spin_lock(&delayed_refs->lock); | 1390 | spin_lock(&delayed_refs->lock); |
| 1193 | btrfs_get_delayed_seq(delayed_refs, &seq_elem); | 1391 | btrfs_get_delayed_seq(delayed_refs, &seq_elem); |
| 1194 | spin_unlock(&delayed_refs->lock); | 1392 | spin_unlock(&delayed_refs->lock); |
| 1393 | btrfs_get_tree_mod_seq(fs_info, &tree_mod_seq_elem); | ||
| 1195 | } | 1394 | } |
| 1196 | 1395 | ||
| 1197 | ret = btrfs_find_all_leafs(trans, fs_info, extent_item_objectid, | 1396 | ret = btrfs_find_all_leafs(trans, fs_info, extent_item_objectid, |
| 1198 | extent_item_pos, seq_elem.seq, | 1397 | seq_elem.seq, tree_mod_seq_elem.seq, &refs, |
| 1199 | &refs); | 1398 | &extent_item_pos); |
| 1200 | |||
| 1201 | if (ret) | 1399 | if (ret) |
| 1202 | goto out; | 1400 | goto out; |
| 1203 | 1401 | ||
| 1204 | while (!ret && (ref_node = ulist_next(refs, ref_node))) { | 1402 | ULIST_ITER_INIT(&ref_uiter); |
| 1205 | ret = btrfs_find_all_roots(trans, fs_info, ref_node->val, -1, | 1403 | while (!ret && (ref_node = ulist_next(refs, &ref_uiter))) { |
| 1206 | seq_elem.seq, &roots); | 1404 | ret = btrfs_find_all_roots(trans, fs_info, ref_node->val, |
| 1405 | seq_elem.seq, | ||
| 1406 | tree_mod_seq_elem.seq, &roots); | ||
| 1207 | if (ret) | 1407 | if (ret) |
| 1208 | break; | 1408 | break; |
| 1209 | while (!ret && (root_node = ulist_next(roots, root_node))) { | 1409 | ULIST_ITER_INIT(&root_uiter); |
| 1210 | pr_debug("root %llu references leaf %llu\n", | 1410 | while (!ret && (root_node = ulist_next(roots, &root_uiter))) { |
| 1211 | root_node->val, ref_node->val); | 1411 | pr_debug("root %llu references leaf %llu, data list " |
| 1212 | ret = iterate_leaf_refs(fs_info, ref_node->val, | 1412 | "%#lx\n", root_node->val, ref_node->val, |
| 1213 | extent_item_objectid, | 1413 | ref_node->aux); |
| 1214 | extent_item_pos, root_node->val, | 1414 | ret = iterate_leaf_refs( |
| 1215 | iterate, ctx); | 1415 | (struct extent_inode_elem *)ref_node->aux, |
| 1416 | root_node->val, extent_item_objectid, | ||
| 1417 | iterate, ctx); | ||
| 1216 | } | 1418 | } |
| 1419 | ulist_free(roots); | ||
| 1420 | roots = NULL; | ||
| 1217 | } | 1421 | } |
| 1218 | 1422 | ||
| 1219 | ulist_free(refs); | 1423 | free_leaf_list(refs); |
| 1220 | ulist_free(roots); | 1424 | ulist_free(roots); |
| 1221 | out: | 1425 | out: |
| 1222 | if (!search_commit_root) { | 1426 | if (!search_commit_root) { |
| 1427 | btrfs_put_tree_mod_seq(fs_info, &tree_mod_seq_elem); | ||
| 1223 | btrfs_put_delayed_seq(delayed_refs, &seq_elem); | 1428 | btrfs_put_delayed_seq(delayed_refs, &seq_elem); |
| 1224 | btrfs_end_transaction(trans, fs_info->extent_root); | 1429 | btrfs_end_transaction(trans, fs_info->extent_root); |
| 1225 | } | 1430 | } |
diff --git a/fs/btrfs/backref.h b/fs/btrfs/backref.h index 57ea2e959e4d..c18d8ac7b795 100644 --- a/fs/btrfs/backref.h +++ b/fs/btrfs/backref.h | |||
| @@ -58,7 +58,8 @@ int paths_from_inode(u64 inum, struct inode_fs_paths *ipath); | |||
| 58 | 58 | ||
| 59 | int btrfs_find_all_roots(struct btrfs_trans_handle *trans, | 59 | int btrfs_find_all_roots(struct btrfs_trans_handle *trans, |
| 60 | struct btrfs_fs_info *fs_info, u64 bytenr, | 60 | struct btrfs_fs_info *fs_info, u64 bytenr, |
| 61 | u64 num_bytes, u64 seq, struct ulist **roots); | 61 | u64 delayed_ref_seq, u64 time_seq, |
| 62 | struct ulist **roots); | ||
| 62 | 63 | ||
| 63 | struct btrfs_data_container *init_data_container(u32 total_bytes); | 64 | struct btrfs_data_container *init_data_container(u32 total_bytes); |
| 64 | struct inode_fs_paths *init_ipath(s32 total_bytes, struct btrfs_root *fs_root, | 65 | struct inode_fs_paths *init_ipath(s32 total_bytes, struct btrfs_root *fs_root, |
diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h index 9b9b15fd5204..e616f8872e69 100644 --- a/fs/btrfs/btrfs_inode.h +++ b/fs/btrfs/btrfs_inode.h | |||
| @@ -24,6 +24,20 @@ | |||
| 24 | #include "ordered-data.h" | 24 | #include "ordered-data.h" |
| 25 | #include "delayed-inode.h" | 25 | #include "delayed-inode.h" |
| 26 | 26 | ||
| 27 | /* | ||
| 28 | * ordered_data_close is set by truncate when a file that used | ||
| 29 | * to have good data has been truncated to zero. When it is set | ||
| 30 | * the btrfs file release call will add this inode to the | ||
| 31 | * ordered operations list so that we make sure to flush out any | ||
| 32 | * new data the application may have written before commit. | ||
| 33 | */ | ||
| 34 | #define BTRFS_INODE_ORDERED_DATA_CLOSE 0 | ||
| 35 | #define BTRFS_INODE_ORPHAN_META_RESERVED 1 | ||
| 36 | #define BTRFS_INODE_DUMMY 2 | ||
| 37 | #define BTRFS_INODE_IN_DEFRAG 3 | ||
| 38 | #define BTRFS_INODE_DELALLOC_META_RESERVED 4 | ||
| 39 | #define BTRFS_INODE_HAS_ORPHAN_ITEM 5 | ||
| 40 | |||
| 27 | /* in memory btrfs inode */ | 41 | /* in memory btrfs inode */ |
| 28 | struct btrfs_inode { | 42 | struct btrfs_inode { |
| 29 | /* which subvolume this inode belongs to */ | 43 | /* which subvolume this inode belongs to */ |
| @@ -57,9 +71,6 @@ struct btrfs_inode { | |||
| 57 | /* used to order data wrt metadata */ | 71 | /* used to order data wrt metadata */ |
| 58 | struct btrfs_ordered_inode_tree ordered_tree; | 72 | struct btrfs_ordered_inode_tree ordered_tree; |
| 59 | 73 | ||
| 60 | /* for keeping track of orphaned inodes */ | ||
| 61 | struct list_head i_orphan; | ||
| 62 | |||
| 63 | /* list of all the delalloc inodes in the FS. There are times we need | 74 | /* list of all the delalloc inodes in the FS. There are times we need |
| 64 | * to write all the delalloc pages to disk, and this list is used | 75 | * to write all the delalloc pages to disk, and this list is used |
| 65 | * to walk them all. | 76 | * to walk them all. |
| @@ -78,14 +89,13 @@ struct btrfs_inode { | |||
| 78 | /* the space_info for where this inode's data allocations are done */ | 89 | /* the space_info for where this inode's data allocations are done */ |
| 79 | struct btrfs_space_info *space_info; | 90 | struct btrfs_space_info *space_info; |
| 80 | 91 | ||
| 92 | unsigned long runtime_flags; | ||
| 93 | |||
| 81 | /* full 64 bit generation number, struct vfs_inode doesn't have a big | 94 | /* full 64 bit generation number, struct vfs_inode doesn't have a big |
| 82 | * enough field for this. | 95 | * enough field for this. |
| 83 | */ | 96 | */ |
| 84 | u64 generation; | 97 | u64 generation; |
| 85 | 98 | ||
| 86 | /* sequence number for NFS changes */ | ||
| 87 | u64 sequence; | ||
| 88 | |||
| 89 | /* | 99 | /* |
| 90 | * transid of the trans_handle that last modified this inode | 100 | * transid of the trans_handle that last modified this inode |
| 91 | */ | 101 | */ |
| @@ -145,22 +155,9 @@ struct btrfs_inode { | |||
| 145 | unsigned reserved_extents; | 155 | unsigned reserved_extents; |
| 146 | 156 | ||
| 147 | /* | 157 | /* |
| 148 | * ordered_data_close is set by truncate when a file that used | ||
| 149 | * to have good data has been truncated to zero. When it is set | ||
| 150 | * the btrfs file release call will add this inode to the | ||
| 151 | * ordered operations list so that we make sure to flush out any | ||
| 152 | * new data the application may have written before commit. | ||
| 153 | */ | ||
| 154 | unsigned ordered_data_close:1; | ||
| 155 | unsigned orphan_meta_reserved:1; | ||
| 156 | unsigned dummy_inode:1; | ||
| 157 | unsigned in_defrag:1; | ||
| 158 | unsigned delalloc_meta_reserved:1; | ||
| 159 | |||
| 160 | /* | ||
| 161 | * always compress this one file | 158 | * always compress this one file |
| 162 | */ | 159 | */ |
| 163 | unsigned force_compress:4; | 160 | unsigned force_compress; |
| 164 | 161 | ||
| 165 | struct btrfs_delayed_node *delayed_node; | 162 | struct btrfs_delayed_node *delayed_node; |
| 166 | 163 | ||
| @@ -202,4 +199,17 @@ static inline bool btrfs_is_free_space_inode(struct btrfs_root *root, | |||
| 202 | return false; | 199 | return false; |
| 203 | } | 200 | } |
| 204 | 201 | ||
| 202 | static inline int btrfs_inode_in_log(struct inode *inode, u64 generation) | ||
| 203 | { | ||
| 204 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
| 205 | int ret = 0; | ||
| 206 | |||
| 207 | mutex_lock(&root->log_mutex); | ||
| 208 | if (BTRFS_I(inode)->logged_trans == generation && | ||
| 209 | BTRFS_I(inode)->last_sub_trans <= root->last_log_commit) | ||
| 210 | ret = 1; | ||
| 211 | mutex_unlock(&root->log_mutex); | ||
| 212 | return ret; | ||
| 213 | } | ||
| 214 | |||
| 205 | #endif | 215 | #endif |
diff --git a/fs/btrfs/check-integrity.c b/fs/btrfs/check-integrity.c index c053e90f2006..9cebb1fd6a3c 100644 --- a/fs/btrfs/check-integrity.c +++ b/fs/btrfs/check-integrity.c | |||
| @@ -103,8 +103,6 @@ | |||
| 103 | #define BTRFSIC_BLOCK_STACK_FRAME_MAGIC_NUMBER 20111300 | 103 | #define BTRFSIC_BLOCK_STACK_FRAME_MAGIC_NUMBER 20111300 |
| 104 | #define BTRFSIC_TREE_DUMP_MAX_INDENT_LEVEL (200 - 6) /* in characters, | 104 | #define BTRFSIC_TREE_DUMP_MAX_INDENT_LEVEL (200 - 6) /* in characters, |
| 105 | * excluding " [...]" */ | 105 | * excluding " [...]" */ |
| 106 | #define BTRFSIC_BLOCK_SIZE PAGE_SIZE | ||
| 107 | |||
| 108 | #define BTRFSIC_GENERATION_UNKNOWN ((u64)-1) | 106 | #define BTRFSIC_GENERATION_UNKNOWN ((u64)-1) |
| 109 | 107 | ||
| 110 | /* | 108 | /* |
| @@ -210,8 +208,9 @@ struct btrfsic_block_data_ctx { | |||
| 210 | u64 dev_bytenr; /* physical bytenr on device */ | 208 | u64 dev_bytenr; /* physical bytenr on device */ |
| 211 | u32 len; | 209 | u32 len; |
| 212 | struct btrfsic_dev_state *dev; | 210 | struct btrfsic_dev_state *dev; |
| 213 | char *data; | 211 | char **datav; |
| 214 | struct buffer_head *bh; /* do not use if set to NULL */ | 212 | struct page **pagev; |
| 213 | void *mem_to_free; | ||
| 215 | }; | 214 | }; |
| 216 | 215 | ||
| 217 | /* This structure is used to implement recursion without occupying | 216 | /* This structure is used to implement recursion without occupying |
| @@ -243,6 +242,8 @@ struct btrfsic_state { | |||
| 243 | struct btrfs_root *root; | 242 | struct btrfs_root *root; |
| 244 | u64 max_superblock_generation; | 243 | u64 max_superblock_generation; |
| 245 | struct btrfsic_block *latest_superblock; | 244 | struct btrfsic_block *latest_superblock; |
| 245 | u32 metablock_size; | ||
| 246 | u32 datablock_size; | ||
| 246 | }; | 247 | }; |
| 247 | 248 | ||
| 248 | static void btrfsic_block_init(struct btrfsic_block *b); | 249 | static void btrfsic_block_init(struct btrfsic_block *b); |
| @@ -290,8 +291,10 @@ static int btrfsic_process_superblock(struct btrfsic_state *state, | |||
| 290 | static int btrfsic_process_metablock(struct btrfsic_state *state, | 291 | static int btrfsic_process_metablock(struct btrfsic_state *state, |
| 291 | struct btrfsic_block *block, | 292 | struct btrfsic_block *block, |
| 292 | struct btrfsic_block_data_ctx *block_ctx, | 293 | struct btrfsic_block_data_ctx *block_ctx, |
| 293 | struct btrfs_header *hdr, | ||
| 294 | int limit_nesting, int force_iodone_flag); | 294 | int limit_nesting, int force_iodone_flag); |
| 295 | static void btrfsic_read_from_block_data( | ||
| 296 | struct btrfsic_block_data_ctx *block_ctx, | ||
| 297 | void *dst, u32 offset, size_t len); | ||
| 295 | static int btrfsic_create_link_to_next_block( | 298 | static int btrfsic_create_link_to_next_block( |
| 296 | struct btrfsic_state *state, | 299 | struct btrfsic_state *state, |
| 297 | struct btrfsic_block *block, | 300 | struct btrfsic_block *block, |
| @@ -318,12 +321,13 @@ static void btrfsic_release_block_ctx(struct btrfsic_block_data_ctx *block_ctx); | |||
| 318 | static int btrfsic_read_block(struct btrfsic_state *state, | 321 | static int btrfsic_read_block(struct btrfsic_state *state, |
| 319 | struct btrfsic_block_data_ctx *block_ctx); | 322 | struct btrfsic_block_data_ctx *block_ctx); |
| 320 | static void btrfsic_dump_database(struct btrfsic_state *state); | 323 | static void btrfsic_dump_database(struct btrfsic_state *state); |
| 324 | static void btrfsic_complete_bio_end_io(struct bio *bio, int err); | ||
| 321 | static int btrfsic_test_for_metadata(struct btrfsic_state *state, | 325 | static int btrfsic_test_for_metadata(struct btrfsic_state *state, |
| 322 | const u8 *data, unsigned int size); | 326 | char **datav, unsigned int num_pages); |
| 323 | static void btrfsic_process_written_block(struct btrfsic_dev_state *dev_state, | 327 | static void btrfsic_process_written_block(struct btrfsic_dev_state *dev_state, |
| 324 | u64 dev_bytenr, u8 *mapped_data, | 328 | u64 dev_bytenr, char **mapped_datav, |
| 325 | unsigned int len, struct bio *bio, | 329 | unsigned int num_pages, |
| 326 | int *bio_is_patched, | 330 | struct bio *bio, int *bio_is_patched, |
| 327 | struct buffer_head *bh, | 331 | struct buffer_head *bh, |
| 328 | int submit_bio_bh_rw); | 332 | int submit_bio_bh_rw); |
| 329 | static int btrfsic_process_written_superblock( | 333 | static int btrfsic_process_written_superblock( |
| @@ -375,7 +379,7 @@ static struct btrfsic_dev_state *btrfsic_dev_state_lookup( | |||
| 375 | static void btrfsic_cmp_log_and_dev_bytenr(struct btrfsic_state *state, | 379 | static void btrfsic_cmp_log_and_dev_bytenr(struct btrfsic_state *state, |
| 376 | u64 bytenr, | 380 | u64 bytenr, |
| 377 | struct btrfsic_dev_state *dev_state, | 381 | struct btrfsic_dev_state *dev_state, |
| 378 | u64 dev_bytenr, char *data); | 382 | u64 dev_bytenr); |
| 379 | 383 | ||
| 380 | static struct mutex btrfsic_mutex; | 384 | static struct mutex btrfsic_mutex; |
| 381 | static int btrfsic_is_initialized; | 385 | static int btrfsic_is_initialized; |
| @@ -651,7 +655,7 @@ static int btrfsic_process_superblock(struct btrfsic_state *state, | |||
| 651 | int pass; | 655 | int pass; |
| 652 | 656 | ||
| 653 | BUG_ON(NULL == state); | 657 | BUG_ON(NULL == state); |
| 654 | selected_super = kmalloc(sizeof(*selected_super), GFP_NOFS); | 658 | selected_super = kzalloc(sizeof(*selected_super), GFP_NOFS); |
| 655 | if (NULL == selected_super) { | 659 | if (NULL == selected_super) { |
| 656 | printk(KERN_INFO "btrfsic: error, kmalloc failed!\n"); | 660 | printk(KERN_INFO "btrfsic: error, kmalloc failed!\n"); |
| 657 | return -1; | 661 | return -1; |
| @@ -718,7 +722,7 @@ static int btrfsic_process_superblock(struct btrfsic_state *state, | |||
| 718 | 722 | ||
| 719 | num_copies = | 723 | num_copies = |
| 720 | btrfs_num_copies(&state->root->fs_info->mapping_tree, | 724 | btrfs_num_copies(&state->root->fs_info->mapping_tree, |
| 721 | next_bytenr, PAGE_SIZE); | 725 | next_bytenr, state->metablock_size); |
| 722 | if (state->print_mask & BTRFSIC_PRINT_MASK_NUM_COPIES) | 726 | if (state->print_mask & BTRFSIC_PRINT_MASK_NUM_COPIES) |
| 723 | printk(KERN_INFO "num_copies(log_bytenr=%llu) = %d\n", | 727 | printk(KERN_INFO "num_copies(log_bytenr=%llu) = %d\n", |
| 724 | (unsigned long long)next_bytenr, num_copies); | 728 | (unsigned long long)next_bytenr, num_copies); |
| @@ -727,9 +731,9 @@ static int btrfsic_process_superblock(struct btrfsic_state *state, | |||
| 727 | struct btrfsic_block *next_block; | 731 | struct btrfsic_block *next_block; |
| 728 | struct btrfsic_block_data_ctx tmp_next_block_ctx; | 732 | struct btrfsic_block_data_ctx tmp_next_block_ctx; |
| 729 | struct btrfsic_block_link *l; | 733 | struct btrfsic_block_link *l; |
| 730 | struct btrfs_header *hdr; | ||
| 731 | 734 | ||
| 732 | ret = btrfsic_map_block(state, next_bytenr, PAGE_SIZE, | 735 | ret = btrfsic_map_block(state, next_bytenr, |
| 736 | state->metablock_size, | ||
| 733 | &tmp_next_block_ctx, | 737 | &tmp_next_block_ctx, |
| 734 | mirror_num); | 738 | mirror_num); |
| 735 | if (ret) { | 739 | if (ret) { |
| @@ -758,7 +762,7 @@ static int btrfsic_process_superblock(struct btrfsic_state *state, | |||
| 758 | BUG_ON(NULL == l); | 762 | BUG_ON(NULL == l); |
| 759 | 763 | ||
| 760 | ret = btrfsic_read_block(state, &tmp_next_block_ctx); | 764 | ret = btrfsic_read_block(state, &tmp_next_block_ctx); |
| 761 | if (ret < (int)BTRFSIC_BLOCK_SIZE) { | 765 | if (ret < (int)PAGE_CACHE_SIZE) { |
| 762 | printk(KERN_INFO | 766 | printk(KERN_INFO |
| 763 | "btrfsic: read @logical %llu failed!\n", | 767 | "btrfsic: read @logical %llu failed!\n", |
| 764 | (unsigned long long) | 768 | (unsigned long long) |
| @@ -768,11 +772,9 @@ static int btrfsic_process_superblock(struct btrfsic_state *state, | |||
| 768 | return -1; | 772 | return -1; |
| 769 | } | 773 | } |
| 770 | 774 | ||
| 771 | hdr = (struct btrfs_header *)tmp_next_block_ctx.data; | ||
| 772 | ret = btrfsic_process_metablock(state, | 775 | ret = btrfsic_process_metablock(state, |
| 773 | next_block, | 776 | next_block, |
| 774 | &tmp_next_block_ctx, | 777 | &tmp_next_block_ctx, |
| 775 | hdr, | ||
| 776 | BTRFS_MAX_LEVEL + 3, 1); | 778 | BTRFS_MAX_LEVEL + 3, 1); |
| 777 | btrfsic_release_block_ctx(&tmp_next_block_ctx); | 779 | btrfsic_release_block_ctx(&tmp_next_block_ctx); |
| 778 | } | 780 | } |
| @@ -799,7 +801,10 @@ static int btrfsic_process_superblock_dev_mirror( | |||
| 799 | 801 | ||
| 800 | /* super block bytenr is always the unmapped device bytenr */ | 802 | /* super block bytenr is always the unmapped device bytenr */ |
| 801 | dev_bytenr = btrfs_sb_offset(superblock_mirror_num); | 803 | dev_bytenr = btrfs_sb_offset(superblock_mirror_num); |
| 802 | bh = __bread(superblock_bdev, dev_bytenr / 4096, 4096); | 804 | if (dev_bytenr + BTRFS_SUPER_INFO_SIZE > device->total_bytes) |
| 805 | return -1; | ||
| 806 | bh = __bread(superblock_bdev, dev_bytenr / 4096, | ||
| 807 | BTRFS_SUPER_INFO_SIZE); | ||
| 803 | if (NULL == bh) | 808 | if (NULL == bh) |
| 804 | return -1; | 809 | return -1; |
| 805 | super_tmp = (struct btrfs_super_block *) | 810 | super_tmp = (struct btrfs_super_block *) |
| @@ -808,7 +813,10 @@ static int btrfsic_process_superblock_dev_mirror( | |||
| 808 | if (btrfs_super_bytenr(super_tmp) != dev_bytenr || | 813 | if (btrfs_super_bytenr(super_tmp) != dev_bytenr || |
| 809 | strncmp((char *)(&(super_tmp->magic)), BTRFS_MAGIC, | 814 | strncmp((char *)(&(super_tmp->magic)), BTRFS_MAGIC, |
| 810 | sizeof(super_tmp->magic)) || | 815 | sizeof(super_tmp->magic)) || |
| 811 | memcmp(device->uuid, super_tmp->dev_item.uuid, BTRFS_UUID_SIZE)) { | 816 | memcmp(device->uuid, super_tmp->dev_item.uuid, BTRFS_UUID_SIZE) || |
| 817 | btrfs_super_nodesize(super_tmp) != state->metablock_size || | ||
| 818 | btrfs_super_leafsize(super_tmp) != state->metablock_size || | ||
| 819 | btrfs_super_sectorsize(super_tmp) != state->datablock_size) { | ||
| 812 | brelse(bh); | 820 | brelse(bh); |
| 813 | return 0; | 821 | return 0; |
| 814 | } | 822 | } |
| @@ -893,7 +901,7 @@ static int btrfsic_process_superblock_dev_mirror( | |||
| 893 | 901 | ||
| 894 | num_copies = | 902 | num_copies = |
| 895 | btrfs_num_copies(&state->root->fs_info->mapping_tree, | 903 | btrfs_num_copies(&state->root->fs_info->mapping_tree, |
| 896 | next_bytenr, PAGE_SIZE); | 904 | next_bytenr, state->metablock_size); |
| 897 | if (state->print_mask & BTRFSIC_PRINT_MASK_NUM_COPIES) | 905 | if (state->print_mask & BTRFSIC_PRINT_MASK_NUM_COPIES) |
| 898 | printk(KERN_INFO "num_copies(log_bytenr=%llu) = %d\n", | 906 | printk(KERN_INFO "num_copies(log_bytenr=%llu) = %d\n", |
| 899 | (unsigned long long)next_bytenr, num_copies); | 907 | (unsigned long long)next_bytenr, num_copies); |
| @@ -902,7 +910,8 @@ static int btrfsic_process_superblock_dev_mirror( | |||
| 902 | struct btrfsic_block_data_ctx tmp_next_block_ctx; | 910 | struct btrfsic_block_data_ctx tmp_next_block_ctx; |
| 903 | struct btrfsic_block_link *l; | 911 | struct btrfsic_block_link *l; |
| 904 | 912 | ||
| 905 | if (btrfsic_map_block(state, next_bytenr, PAGE_SIZE, | 913 | if (btrfsic_map_block(state, next_bytenr, |
| 914 | state->metablock_size, | ||
| 906 | &tmp_next_block_ctx, | 915 | &tmp_next_block_ctx, |
| 907 | mirror_num)) { | 916 | mirror_num)) { |
| 908 | printk(KERN_INFO "btrfsic: btrfsic_map_block(" | 917 | printk(KERN_INFO "btrfsic: btrfsic_map_block(" |
| @@ -966,13 +975,15 @@ static int btrfsic_process_metablock( | |||
| 966 | struct btrfsic_state *state, | 975 | struct btrfsic_state *state, |
| 967 | struct btrfsic_block *const first_block, | 976 | struct btrfsic_block *const first_block, |
| 968 | struct btrfsic_block_data_ctx *const first_block_ctx, | 977 | struct btrfsic_block_data_ctx *const first_block_ctx, |
| 969 | struct btrfs_header *const first_hdr, | ||
| 970 | int first_limit_nesting, int force_iodone_flag) | 978 | int first_limit_nesting, int force_iodone_flag) |
| 971 | { | 979 | { |
| 972 | struct btrfsic_stack_frame initial_stack_frame = { 0 }; | 980 | struct btrfsic_stack_frame initial_stack_frame = { 0 }; |
| 973 | struct btrfsic_stack_frame *sf; | 981 | struct btrfsic_stack_frame *sf; |
| 974 | struct btrfsic_stack_frame *next_stack; | 982 | struct btrfsic_stack_frame *next_stack; |
| 983 | struct btrfs_header *const first_hdr = | ||
| 984 | (struct btrfs_header *)first_block_ctx->datav[0]; | ||
| 975 | 985 | ||
| 986 | BUG_ON(!first_hdr); | ||
| 976 | sf = &initial_stack_frame; | 987 | sf = &initial_stack_frame; |
| 977 | sf->error = 0; | 988 | sf->error = 0; |
| 978 | sf->i = -1; | 989 | sf->i = -1; |
| @@ -1012,21 +1023,47 @@ continue_with_current_leaf_stack_frame: | |||
| 1012 | } | 1023 | } |
| 1013 | 1024 | ||
| 1014 | if (sf->i < sf->nr) { | 1025 | if (sf->i < sf->nr) { |
| 1015 | struct btrfs_item *disk_item = leafhdr->items + sf->i; | 1026 | struct btrfs_item disk_item; |
| 1016 | struct btrfs_disk_key *disk_key = &disk_item->key; | 1027 | u32 disk_item_offset = |
| 1028 | (uintptr_t)(leafhdr->items + sf->i) - | ||
| 1029 | (uintptr_t)leafhdr; | ||
| 1030 | struct btrfs_disk_key *disk_key; | ||
| 1017 | u8 type; | 1031 | u8 type; |
| 1018 | const u32 item_offset = le32_to_cpu(disk_item->offset); | 1032 | u32 item_offset; |
| 1019 | 1033 | ||
| 1034 | if (disk_item_offset + sizeof(struct btrfs_item) > | ||
| 1035 | sf->block_ctx->len) { | ||
| 1036 | leaf_item_out_of_bounce_error: | ||
| 1037 | printk(KERN_INFO | ||
| 1038 | "btrfsic: leaf item out of bounce at logical %llu, dev %s\n", | ||
| 1039 | sf->block_ctx->start, | ||
| 1040 | sf->block_ctx->dev->name); | ||
| 1041 | goto one_stack_frame_backwards; | ||
| 1042 | } | ||
| 1043 | btrfsic_read_from_block_data(sf->block_ctx, | ||
| 1044 | &disk_item, | ||
| 1045 | disk_item_offset, | ||
| 1046 | sizeof(struct btrfs_item)); | ||
| 1047 | item_offset = le32_to_cpu(disk_item.offset); | ||
| 1048 | disk_key = &disk_item.key; | ||
| 1020 | type = disk_key->type; | 1049 | type = disk_key->type; |
| 1021 | 1050 | ||
| 1022 | if (BTRFS_ROOT_ITEM_KEY == type) { | 1051 | if (BTRFS_ROOT_ITEM_KEY == type) { |
| 1023 | const struct btrfs_root_item *const root_item = | 1052 | struct btrfs_root_item root_item; |
| 1024 | (struct btrfs_root_item *) | 1053 | u32 root_item_offset; |
| 1025 | (sf->block_ctx->data + | 1054 | u64 next_bytenr; |
| 1026 | offsetof(struct btrfs_leaf, items) + | 1055 | |
| 1027 | item_offset); | 1056 | root_item_offset = item_offset + |
| 1028 | const u64 next_bytenr = | 1057 | offsetof(struct btrfs_leaf, items); |
| 1029 | le64_to_cpu(root_item->bytenr); | 1058 | if (root_item_offset + |
| 1059 | sizeof(struct btrfs_root_item) > | ||
| 1060 | sf->block_ctx->len) | ||
| 1061 | goto leaf_item_out_of_bounce_error; | ||
| 1062 | btrfsic_read_from_block_data( | ||
| 1063 | sf->block_ctx, &root_item, | ||
| 1064 | root_item_offset, | ||
| 1065 | sizeof(struct btrfs_root_item)); | ||
| 1066 | next_bytenr = le64_to_cpu(root_item.bytenr); | ||
| 1030 | 1067 | ||
| 1031 | sf->error = | 1068 | sf->error = |
| 1032 | btrfsic_create_link_to_next_block( | 1069 | btrfsic_create_link_to_next_block( |
| @@ -1041,7 +1078,7 @@ continue_with_current_leaf_stack_frame: | |||
| 1041 | &sf->num_copies, | 1078 | &sf->num_copies, |
| 1042 | &sf->mirror_num, | 1079 | &sf->mirror_num, |
| 1043 | disk_key, | 1080 | disk_key, |
| 1044 | le64_to_cpu(root_item-> | 1081 | le64_to_cpu(root_item. |
| 1045 | generation)); | 1082 | generation)); |
| 1046 | if (sf->error) | 1083 | if (sf->error) |
| 1047 | goto one_stack_frame_backwards; | 1084 | goto one_stack_frame_backwards; |
| @@ -1049,7 +1086,7 @@ continue_with_current_leaf_stack_frame: | |||
| 1049 | if (NULL != sf->next_block) { | 1086 | if (NULL != sf->next_block) { |
| 1050 | struct btrfs_header *const next_hdr = | 1087 | struct btrfs_header *const next_hdr = |
| 1051 | (struct btrfs_header *) | 1088 | (struct btrfs_header *) |
| 1052 | sf->next_block_ctx.data; | 1089 | sf->next_block_ctx.datav[0]; |
| 1053 | 1090 | ||
| 1054 | next_stack = | 1091 | next_stack = |
| 1055 | btrfsic_stack_frame_alloc(); | 1092 | btrfsic_stack_frame_alloc(); |
| @@ -1111,10 +1148,24 @@ continue_with_current_node_stack_frame: | |||
| 1111 | } | 1148 | } |
| 1112 | 1149 | ||
| 1113 | if (sf->i < sf->nr) { | 1150 | if (sf->i < sf->nr) { |
| 1114 | struct btrfs_key_ptr *disk_key_ptr = | 1151 | struct btrfs_key_ptr key_ptr; |
| 1115 | nodehdr->ptrs + sf->i; | 1152 | u32 key_ptr_offset; |
| 1116 | const u64 next_bytenr = | 1153 | u64 next_bytenr; |
| 1117 | le64_to_cpu(disk_key_ptr->blockptr); | 1154 | |
| 1155 | key_ptr_offset = (uintptr_t)(nodehdr->ptrs + sf->i) - | ||
| 1156 | (uintptr_t)nodehdr; | ||
| 1157 | if (key_ptr_offset + sizeof(struct btrfs_key_ptr) > | ||
| 1158 | sf->block_ctx->len) { | ||
| 1159 | printk(KERN_INFO | ||
| 1160 | "btrfsic: node item out of bounce at logical %llu, dev %s\n", | ||
| 1161 | sf->block_ctx->start, | ||
| 1162 | sf->block_ctx->dev->name); | ||
| 1163 | goto one_stack_frame_backwards; | ||
| 1164 | } | ||
| 1165 | btrfsic_read_from_block_data( | ||
| 1166 | sf->block_ctx, &key_ptr, key_ptr_offset, | ||
| 1167 | sizeof(struct btrfs_key_ptr)); | ||
| 1168 | next_bytenr = le64_to_cpu(key_ptr.blockptr); | ||
| 1118 | 1169 | ||
| 1119 | sf->error = btrfsic_create_link_to_next_block( | 1170 | sf->error = btrfsic_create_link_to_next_block( |
| 1120 | state, | 1171 | state, |
| @@ -1127,15 +1178,15 @@ continue_with_current_node_stack_frame: | |||
| 1127 | force_iodone_flag, | 1178 | force_iodone_flag, |
| 1128 | &sf->num_copies, | 1179 | &sf->num_copies, |
| 1129 | &sf->mirror_num, | 1180 | &sf->mirror_num, |
| 1130 | &disk_key_ptr->key, | 1181 | &key_ptr.key, |
| 1131 | le64_to_cpu(disk_key_ptr->generation)); | 1182 | le64_to_cpu(key_ptr.generation)); |
| 1132 | if (sf->error) | 1183 | if (sf->error) |
| 1133 | goto one_stack_frame_backwards; | 1184 | goto one_stack_frame_backwards; |
| 1134 | 1185 | ||
| 1135 | if (NULL != sf->next_block) { | 1186 | if (NULL != sf->next_block) { |
| 1136 | struct btrfs_header *const next_hdr = | 1187 | struct btrfs_header *const next_hdr = |
| 1137 | (struct btrfs_header *) | 1188 | (struct btrfs_header *) |
| 1138 | sf->next_block_ctx.data; | 1189 | sf->next_block_ctx.datav[0]; |
| 1139 | 1190 | ||
| 1140 | next_stack = btrfsic_stack_frame_alloc(); | 1191 | next_stack = btrfsic_stack_frame_alloc(); |
| 1141 | if (NULL == next_stack) | 1192 | if (NULL == next_stack) |
| @@ -1181,6 +1232,35 @@ one_stack_frame_backwards: | |||
| 1181 | return sf->error; | 1232 | return sf->error; |
| 1182 | } | 1233 | } |
| 1183 | 1234 | ||
| 1235 | static void btrfsic_read_from_block_data( | ||
| 1236 | struct btrfsic_block_data_ctx *block_ctx, | ||
| 1237 | void *dstv, u32 offset, size_t len) | ||
| 1238 | { | ||
| 1239 | size_t cur; | ||
| 1240 | size_t offset_in_page; | ||
| 1241 | char *kaddr; | ||
| 1242 | char *dst = (char *)dstv; | ||
| 1243 | size_t start_offset = block_ctx->start & ((u64)PAGE_CACHE_SIZE - 1); | ||
| 1244 | unsigned long i = (start_offset + offset) >> PAGE_CACHE_SHIFT; | ||
| 1245 | |||
| 1246 | WARN_ON(offset + len > block_ctx->len); | ||
| 1247 | offset_in_page = (start_offset + offset) & | ||
| 1248 | ((unsigned long)PAGE_CACHE_SIZE - 1); | ||
| 1249 | |||
| 1250 | while (len > 0) { | ||
| 1251 | cur = min(len, ((size_t)PAGE_CACHE_SIZE - offset_in_page)); | ||
| 1252 | BUG_ON(i >= (block_ctx->len + PAGE_CACHE_SIZE - 1) >> | ||
| 1253 | PAGE_CACHE_SHIFT); | ||
| 1254 | kaddr = block_ctx->datav[i]; | ||
| 1255 | memcpy(dst, kaddr + offset_in_page, cur); | ||
| 1256 | |||
| 1257 | dst += cur; | ||
| 1258 | len -= cur; | ||
| 1259 | offset_in_page = 0; | ||
| 1260 | i++; | ||
| 1261 | } | ||
| 1262 | } | ||
| 1263 | |||
| 1184 | static int btrfsic_create_link_to_next_block( | 1264 | static int btrfsic_create_link_to_next_block( |
| 1185 | struct btrfsic_state *state, | 1265 | struct btrfsic_state *state, |
| 1186 | struct btrfsic_block *block, | 1266 | struct btrfsic_block *block, |
| @@ -1204,7 +1284,7 @@ static int btrfsic_create_link_to_next_block( | |||
| 1204 | if (0 == *num_copiesp) { | 1284 | if (0 == *num_copiesp) { |
| 1205 | *num_copiesp = | 1285 | *num_copiesp = |
| 1206 | btrfs_num_copies(&state->root->fs_info->mapping_tree, | 1286 | btrfs_num_copies(&state->root->fs_info->mapping_tree, |
| 1207 | next_bytenr, PAGE_SIZE); | 1287 | next_bytenr, state->metablock_size); |
| 1208 | if (state->print_mask & BTRFSIC_PRINT_MASK_NUM_COPIES) | 1288 | if (state->print_mask & BTRFSIC_PRINT_MASK_NUM_COPIES) |
| 1209 | printk(KERN_INFO "num_copies(log_bytenr=%llu) = %d\n", | 1289 | printk(KERN_INFO "num_copies(log_bytenr=%llu) = %d\n", |
| 1210 | (unsigned long long)next_bytenr, *num_copiesp); | 1290 | (unsigned long long)next_bytenr, *num_copiesp); |
| @@ -1219,7 +1299,7 @@ static int btrfsic_create_link_to_next_block( | |||
| 1219 | "btrfsic_create_link_to_next_block(mirror_num=%d)\n", | 1299 | "btrfsic_create_link_to_next_block(mirror_num=%d)\n", |
| 1220 | *mirror_nump); | 1300 | *mirror_nump); |
| 1221 | ret = btrfsic_map_block(state, next_bytenr, | 1301 | ret = btrfsic_map_block(state, next_bytenr, |
| 1222 | BTRFSIC_BLOCK_SIZE, | 1302 | state->metablock_size, |
| 1223 | next_block_ctx, *mirror_nump); | 1303 | next_block_ctx, *mirror_nump); |
| 1224 | if (ret) { | 1304 | if (ret) { |
| 1225 | printk(KERN_INFO | 1305 | printk(KERN_INFO |
| @@ -1314,7 +1394,7 @@ static int btrfsic_create_link_to_next_block( | |||
| 1314 | 1394 | ||
| 1315 | if (limit_nesting > 0 && did_alloc_block_link) { | 1395 | if (limit_nesting > 0 && did_alloc_block_link) { |
| 1316 | ret = btrfsic_read_block(state, next_block_ctx); | 1396 | ret = btrfsic_read_block(state, next_block_ctx); |
| 1317 | if (ret < (int)BTRFSIC_BLOCK_SIZE) { | 1397 | if (ret < (int)next_block_ctx->len) { |
| 1318 | printk(KERN_INFO | 1398 | printk(KERN_INFO |
| 1319 | "btrfsic: read block @logical %llu failed!\n", | 1399 | "btrfsic: read block @logical %llu failed!\n", |
| 1320 | (unsigned long long)next_bytenr); | 1400 | (unsigned long long)next_bytenr); |
| @@ -1339,43 +1419,74 @@ static int btrfsic_handle_extent_data( | |||
| 1339 | u32 item_offset, int force_iodone_flag) | 1419 | u32 item_offset, int force_iodone_flag) |
| 1340 | { | 1420 | { |
| 1341 | int ret; | 1421 | int ret; |
| 1342 | struct btrfs_file_extent_item *file_extent_item = | 1422 | struct btrfs_file_extent_item file_extent_item; |
| 1343 | (struct btrfs_file_extent_item *)(block_ctx->data + | 1423 | u64 file_extent_item_offset; |
| 1344 | offsetof(struct btrfs_leaf, | 1424 | u64 next_bytenr; |
| 1345 | items) + item_offset); | 1425 | u64 num_bytes; |
| 1346 | u64 next_bytenr = | 1426 | u64 generation; |
| 1347 | le64_to_cpu(file_extent_item->disk_bytenr) + | ||
| 1348 | le64_to_cpu(file_extent_item->offset); | ||
| 1349 | u64 num_bytes = le64_to_cpu(file_extent_item->num_bytes); | ||
| 1350 | u64 generation = le64_to_cpu(file_extent_item->generation); | ||
| 1351 | struct btrfsic_block_link *l; | 1427 | struct btrfsic_block_link *l; |
| 1352 | 1428 | ||
| 1429 | file_extent_item_offset = offsetof(struct btrfs_leaf, items) + | ||
| 1430 | item_offset; | ||
| 1431 | if (file_extent_item_offset + | ||
| 1432 | offsetof(struct btrfs_file_extent_item, disk_num_bytes) > | ||
| 1433 | block_ctx->len) { | ||
| 1434 | printk(KERN_INFO | ||
| 1435 | "btrfsic: file item out of bounce at logical %llu, dev %s\n", | ||
| 1436 | block_ctx->start, block_ctx->dev->name); | ||
| 1437 | return -1; | ||
| 1438 | } | ||
| 1439 | |||
| 1440 | btrfsic_read_from_block_data(block_ctx, &file_extent_item, | ||
| 1441 | file_extent_item_offset, | ||
| 1442 | offsetof(struct btrfs_file_extent_item, disk_num_bytes)); | ||
| 1443 | if (BTRFS_FILE_EXTENT_REG != file_extent_item.type || | ||
| 1444 | ((u64)0) == le64_to_cpu(file_extent_item.disk_bytenr)) { | ||
| 1445 | if (state->print_mask & BTRFSIC_PRINT_MASK_VERY_VERBOSE) | ||
| 1446 | printk(KERN_INFO "extent_data: type %u, disk_bytenr = %llu\n", | ||
| 1447 | file_extent_item.type, | ||
| 1448 | (unsigned long long) | ||
| 1449 | le64_to_cpu(file_extent_item.disk_bytenr)); | ||
| 1450 | return 0; | ||
| 1451 | } | ||
| 1452 | |||
| 1453 | if (file_extent_item_offset + sizeof(struct btrfs_file_extent_item) > | ||
| 1454 | block_ctx->len) { | ||
| 1455 | printk(KERN_INFO | ||
| 1456 | "btrfsic: file item out of bounce at logical %llu, dev %s\n", | ||
| 1457 | block_ctx->start, block_ctx->dev->name); | ||
| 1458 | return -1; | ||
| 1459 | } | ||
| 1460 | btrfsic_read_from_block_data(block_ctx, &file_extent_item, | ||
| 1461 | file_extent_item_offset, | ||
| 1462 | sizeof(struct btrfs_file_extent_item)); | ||
| 1463 | next_bytenr = le64_to_cpu(file_extent_item.disk_bytenr) + | ||
| 1464 | le64_to_cpu(file_extent_item.offset); | ||
| 1465 | generation = le64_to_cpu(file_extent_item.generation); | ||
| 1466 | num_bytes = le64_to_cpu(file_extent_item.num_bytes); | ||
| 1467 | generation = le64_to_cpu(file_extent_item.generation); | ||
| 1468 | |||
| 1353 | if (state->print_mask & BTRFSIC_PRINT_MASK_VERY_VERBOSE) | 1469 | if (state->print_mask & BTRFSIC_PRINT_MASK_VERY_VERBOSE) |
| 1354 | printk(KERN_INFO "extent_data: type %u, disk_bytenr = %llu," | 1470 | printk(KERN_INFO "extent_data: type %u, disk_bytenr = %llu," |
| 1355 | " offset = %llu, num_bytes = %llu\n", | 1471 | " offset = %llu, num_bytes = %llu\n", |
| 1356 | file_extent_item->type, | 1472 | file_extent_item.type, |
| 1357 | (unsigned long long) | 1473 | (unsigned long long) |
| 1358 | le64_to_cpu(file_extent_item->disk_bytenr), | 1474 | le64_to_cpu(file_extent_item.disk_bytenr), |
| 1359 | (unsigned long long) | 1475 | (unsigned long long)le64_to_cpu(file_extent_item.offset), |
| 1360 | le64_to_cpu(file_extent_item->offset), | 1476 | (unsigned long long)num_bytes); |
| 1361 | (unsigned long long) | ||
| 1362 | le64_to_cpu(file_extent_item->num_bytes)); | ||
| 1363 | if (BTRFS_FILE_EXTENT_REG != file_extent_item->type || | ||
| 1364 | ((u64)0) == le64_to_cpu(file_extent_item->disk_bytenr)) | ||
| 1365 | return 0; | ||
| 1366 | while (num_bytes > 0) { | 1477 | while (num_bytes > 0) { |
| 1367 | u32 chunk_len; | 1478 | u32 chunk_len; |
| 1368 | int num_copies; | 1479 | int num_copies; |
| 1369 | int mirror_num; | 1480 | int mirror_num; |
| 1370 | 1481 | ||
| 1371 | if (num_bytes > BTRFSIC_BLOCK_SIZE) | 1482 | if (num_bytes > state->datablock_size) |
| 1372 | chunk_len = BTRFSIC_BLOCK_SIZE; | 1483 | chunk_len = state->datablock_size; |
| 1373 | else | 1484 | else |
| 1374 | chunk_len = num_bytes; | 1485 | chunk_len = num_bytes; |
| 1375 | 1486 | ||
| 1376 | num_copies = | 1487 | num_copies = |
| 1377 | btrfs_num_copies(&state->root->fs_info->mapping_tree, | 1488 | btrfs_num_copies(&state->root->fs_info->mapping_tree, |
| 1378 | next_bytenr, PAGE_SIZE); | 1489 | next_bytenr, state->datablock_size); |
| 1379 | if (state->print_mask & BTRFSIC_PRINT_MASK_NUM_COPIES) | 1490 | if (state->print_mask & BTRFSIC_PRINT_MASK_NUM_COPIES) |
| 1380 | printk(KERN_INFO "num_copies(log_bytenr=%llu) = %d\n", | 1491 | printk(KERN_INFO "num_copies(log_bytenr=%llu) = %d\n", |
| 1381 | (unsigned long long)next_bytenr, num_copies); | 1492 | (unsigned long long)next_bytenr, num_copies); |
| @@ -1475,8 +1586,9 @@ static int btrfsic_map_block(struct btrfsic_state *state, u64 bytenr, u32 len, | |||
| 1475 | block_ctx_out->dev_bytenr = multi->stripes[0].physical; | 1586 | block_ctx_out->dev_bytenr = multi->stripes[0].physical; |
| 1476 | block_ctx_out->start = bytenr; | 1587 | block_ctx_out->start = bytenr; |
| 1477 | block_ctx_out->len = len; | 1588 | block_ctx_out->len = len; |
| 1478 | block_ctx_out->data = NULL; | 1589 | block_ctx_out->datav = NULL; |
| 1479 | block_ctx_out->bh = NULL; | 1590 | block_ctx_out->pagev = NULL; |
| 1591 | block_ctx_out->mem_to_free = NULL; | ||
| 1480 | 1592 | ||
| 1481 | if (0 == ret) | 1593 | if (0 == ret) |
| 1482 | kfree(multi); | 1594 | kfree(multi); |
| @@ -1496,8 +1608,9 @@ static int btrfsic_map_superblock(struct btrfsic_state *state, u64 bytenr, | |||
| 1496 | block_ctx_out->dev_bytenr = bytenr; | 1608 | block_ctx_out->dev_bytenr = bytenr; |
| 1497 | block_ctx_out->start = bytenr; | 1609 | block_ctx_out->start = bytenr; |
| 1498 | block_ctx_out->len = len; | 1610 | block_ctx_out->len = len; |
| 1499 | block_ctx_out->data = NULL; | 1611 | block_ctx_out->datav = NULL; |
| 1500 | block_ctx_out->bh = NULL; | 1612 | block_ctx_out->pagev = NULL; |
| 1613 | block_ctx_out->mem_to_free = NULL; | ||
| 1501 | if (NULL != block_ctx_out->dev) { | 1614 | if (NULL != block_ctx_out->dev) { |
| 1502 | return 0; | 1615 | return 0; |
| 1503 | } else { | 1616 | } else { |
| @@ -1508,38 +1621,127 @@ static int btrfsic_map_superblock(struct btrfsic_state *state, u64 bytenr, | |||
| 1508 | 1621 | ||
| 1509 | static void btrfsic_release_block_ctx(struct btrfsic_block_data_ctx *block_ctx) | 1622 | static void btrfsic_release_block_ctx(struct btrfsic_block_data_ctx *block_ctx) |
| 1510 | { | 1623 | { |
| 1511 | if (NULL != block_ctx->bh) { | 1624 | if (block_ctx->mem_to_free) { |
| 1512 | brelse(block_ctx->bh); | 1625 | unsigned int num_pages; |
| 1513 | block_ctx->bh = NULL; | 1626 | |
| 1627 | BUG_ON(!block_ctx->datav); | ||
| 1628 | BUG_ON(!block_ctx->pagev); | ||
| 1629 | num_pages = (block_ctx->len + (u64)PAGE_CACHE_SIZE - 1) >> | ||
| 1630 | PAGE_CACHE_SHIFT; | ||
| 1631 | while (num_pages > 0) { | ||
| 1632 | num_pages--; | ||
| 1633 | if (block_ctx->datav[num_pages]) { | ||
| 1634 | kunmap(block_ctx->pagev[num_pages]); | ||
| 1635 | block_ctx->datav[num_pages] = NULL; | ||
| 1636 | } | ||
| 1637 | if (block_ctx->pagev[num_pages]) { | ||
| 1638 | __free_page(block_ctx->pagev[num_pages]); | ||
| 1639 | block_ctx->pagev[num_pages] = NULL; | ||
| 1640 | } | ||
| 1641 | } | ||
| 1642 | |||
| 1643 | kfree(block_ctx->mem_to_free); | ||
| 1644 | block_ctx->mem_to_free = NULL; | ||
| 1645 | block_ctx->pagev = NULL; | ||
| 1646 | block_ctx->datav = NULL; | ||
| 1514 | } | 1647 | } |
| 1515 | } | 1648 | } |
| 1516 | 1649 | ||
| 1517 | static int btrfsic_read_block(struct btrfsic_state *state, | 1650 | static int btrfsic_read_block(struct btrfsic_state *state, |
| 1518 | struct btrfsic_block_data_ctx *block_ctx) | 1651 | struct btrfsic_block_data_ctx *block_ctx) |
| 1519 | { | 1652 | { |
| 1520 | block_ctx->bh = NULL; | 1653 | unsigned int num_pages; |
| 1521 | if (block_ctx->dev_bytenr & 4095) { | 1654 | unsigned int i; |
| 1655 | u64 dev_bytenr; | ||
| 1656 | int ret; | ||
| 1657 | |||
| 1658 | BUG_ON(block_ctx->datav); | ||
| 1659 | BUG_ON(block_ctx->pagev); | ||
| 1660 | BUG_ON(block_ctx->mem_to_free); | ||
| 1661 | if (block_ctx->dev_bytenr & ((u64)PAGE_CACHE_SIZE - 1)) { | ||
| 1522 | printk(KERN_INFO | 1662 | printk(KERN_INFO |
| 1523 | "btrfsic: read_block() with unaligned bytenr %llu\n", | 1663 | "btrfsic: read_block() with unaligned bytenr %llu\n", |
| 1524 | (unsigned long long)block_ctx->dev_bytenr); | 1664 | (unsigned long long)block_ctx->dev_bytenr); |
| 1525 | return -1; | 1665 | return -1; |
| 1526 | } | 1666 | } |
| 1527 | if (block_ctx->len > 4096) { | 1667 | |
| 1528 | printk(KERN_INFO | 1668 | num_pages = (block_ctx->len + (u64)PAGE_CACHE_SIZE - 1) >> |
| 1529 | "btrfsic: read_block() with too huge size %d\n", | 1669 | PAGE_CACHE_SHIFT; |
| 1530 | block_ctx->len); | 1670 | block_ctx->mem_to_free = kzalloc((sizeof(*block_ctx->datav) + |
| 1671 | sizeof(*block_ctx->pagev)) * | ||
| 1672 | num_pages, GFP_NOFS); | ||
| 1673 | if (!block_ctx->mem_to_free) | ||
| 1531 | return -1; | 1674 | return -1; |
| 1675 | block_ctx->datav = block_ctx->mem_to_free; | ||
| 1676 | block_ctx->pagev = (struct page **)(block_ctx->datav + num_pages); | ||
| 1677 | for (i = 0; i < num_pages; i++) { | ||
| 1678 | block_ctx->pagev[i] = alloc_page(GFP_NOFS); | ||
| 1679 | if (!block_ctx->pagev[i]) | ||
| 1680 | return -1; | ||
| 1532 | } | 1681 | } |
| 1533 | 1682 | ||
| 1534 | block_ctx->bh = __bread(block_ctx->dev->bdev, | 1683 | dev_bytenr = block_ctx->dev_bytenr; |
| 1535 | block_ctx->dev_bytenr >> 12, 4096); | 1684 | for (i = 0; i < num_pages;) { |
| 1536 | if (NULL == block_ctx->bh) | 1685 | struct bio *bio; |
| 1537 | return -1; | 1686 | unsigned int j; |
| 1538 | block_ctx->data = block_ctx->bh->b_data; | 1687 | DECLARE_COMPLETION_ONSTACK(complete); |
| 1688 | |||
| 1689 | bio = bio_alloc(GFP_NOFS, num_pages - i); | ||
| 1690 | if (!bio) { | ||
| 1691 | printk(KERN_INFO | ||
| 1692 | "btrfsic: bio_alloc() for %u pages failed!\n", | ||
| 1693 | num_pages - i); | ||
| 1694 | return -1; | ||
| 1695 | } | ||
| 1696 | bio->bi_bdev = block_ctx->dev->bdev; | ||
| 1697 | bio->bi_sector = dev_bytenr >> 9; | ||
| 1698 | bio->bi_end_io = btrfsic_complete_bio_end_io; | ||
| 1699 | bio->bi_private = &complete; | ||
| 1700 | |||
| 1701 | for (j = i; j < num_pages; j++) { | ||
| 1702 | ret = bio_add_page(bio, block_ctx->pagev[j], | ||
| 1703 | PAGE_CACHE_SIZE, 0); | ||
| 1704 | if (PAGE_CACHE_SIZE != ret) | ||
| 1705 | break; | ||
| 1706 | } | ||
| 1707 | if (j == i) { | ||
| 1708 | printk(KERN_INFO | ||
| 1709 | "btrfsic: error, failed to add a single page!\n"); | ||
| 1710 | return -1; | ||
| 1711 | } | ||
| 1712 | submit_bio(READ, bio); | ||
| 1713 | |||
| 1714 | /* this will also unplug the queue */ | ||
| 1715 | wait_for_completion(&complete); | ||
| 1716 | |||
| 1717 | if (!test_bit(BIO_UPTODATE, &bio->bi_flags)) { | ||
| 1718 | printk(KERN_INFO | ||
| 1719 | "btrfsic: read error at logical %llu dev %s!\n", | ||
| 1720 | block_ctx->start, block_ctx->dev->name); | ||
| 1721 | bio_put(bio); | ||
| 1722 | return -1; | ||
| 1723 | } | ||
| 1724 | bio_put(bio); | ||
| 1725 | dev_bytenr += (j - i) * PAGE_CACHE_SIZE; | ||
| 1726 | i = j; | ||
| 1727 | } | ||
| 1728 | for (i = 0; i < num_pages; i++) { | ||
| 1729 | block_ctx->datav[i] = kmap(block_ctx->pagev[i]); | ||
| 1730 | if (!block_ctx->datav[i]) { | ||
| 1731 | printk(KERN_INFO "btrfsic: kmap() failed (dev %s)!\n", | ||
| 1732 | block_ctx->dev->name); | ||
| 1733 | return -1; | ||
| 1734 | } | ||
| 1735 | } | ||
| 1539 | 1736 | ||
| 1540 | return block_ctx->len; | 1737 | return block_ctx->len; |
| 1541 | } | 1738 | } |
| 1542 | 1739 | ||
| 1740 | static void btrfsic_complete_bio_end_io(struct bio *bio, int err) | ||
| 1741 | { | ||
| 1742 | complete((struct completion *)bio->bi_private); | ||
| 1743 | } | ||
| 1744 | |||
| 1543 | static void btrfsic_dump_database(struct btrfsic_state *state) | 1745 | static void btrfsic_dump_database(struct btrfsic_state *state) |
| 1544 | { | 1746 | { |
| 1545 | struct list_head *elem_all; | 1747 | struct list_head *elem_all; |
| @@ -1617,32 +1819,39 @@ static void btrfsic_dump_database(struct btrfsic_state *state) | |||
| 1617 | * (note that this test fails for the super block) | 1819 | * (note that this test fails for the super block) |
| 1618 | */ | 1820 | */ |
| 1619 | static int btrfsic_test_for_metadata(struct btrfsic_state *state, | 1821 | static int btrfsic_test_for_metadata(struct btrfsic_state *state, |
| 1620 | const u8 *data, unsigned int size) | 1822 | char **datav, unsigned int num_pages) |
| 1621 | { | 1823 | { |
| 1622 | struct btrfs_header *h; | 1824 | struct btrfs_header *h; |
| 1623 | u8 csum[BTRFS_CSUM_SIZE]; | 1825 | u8 csum[BTRFS_CSUM_SIZE]; |
| 1624 | u32 crc = ~(u32)0; | 1826 | u32 crc = ~(u32)0; |
| 1625 | int fail = 0; | 1827 | unsigned int i; |
| 1626 | int crc_fail = 0; | ||
| 1627 | 1828 | ||
| 1628 | h = (struct btrfs_header *)data; | 1829 | if (num_pages * PAGE_CACHE_SIZE < state->metablock_size) |
| 1830 | return 1; /* not metadata */ | ||
| 1831 | num_pages = state->metablock_size >> PAGE_CACHE_SHIFT; | ||
| 1832 | h = (struct btrfs_header *)datav[0]; | ||
| 1629 | 1833 | ||
| 1630 | if (memcmp(h->fsid, state->root->fs_info->fsid, BTRFS_UUID_SIZE)) | 1834 | if (memcmp(h->fsid, state->root->fs_info->fsid, BTRFS_UUID_SIZE)) |
| 1631 | fail++; | 1835 | return 1; |
| 1836 | |||
| 1837 | for (i = 0; i < num_pages; i++) { | ||
| 1838 | u8 *data = i ? datav[i] : (datav[i] + BTRFS_CSUM_SIZE); | ||
| 1839 | size_t sublen = i ? PAGE_CACHE_SIZE : | ||
| 1840 | (PAGE_CACHE_SIZE - BTRFS_CSUM_SIZE); | ||
| 1632 | 1841 | ||
| 1633 | crc = crc32c(crc, data + BTRFS_CSUM_SIZE, PAGE_SIZE - BTRFS_CSUM_SIZE); | 1842 | crc = crc32c(crc, data, sublen); |
| 1843 | } | ||
| 1634 | btrfs_csum_final(crc, csum); | 1844 | btrfs_csum_final(crc, csum); |
| 1635 | if (memcmp(csum, h->csum, state->csum_size)) | 1845 | if (memcmp(csum, h->csum, state->csum_size)) |
| 1636 | crc_fail++; | 1846 | return 1; |
| 1637 | 1847 | ||
| 1638 | return fail || crc_fail; | 1848 | return 0; /* is metadata */ |
| 1639 | } | 1849 | } |
| 1640 | 1850 | ||
| 1641 | static void btrfsic_process_written_block(struct btrfsic_dev_state *dev_state, | 1851 | static void btrfsic_process_written_block(struct btrfsic_dev_state *dev_state, |
| 1642 | u64 dev_bytenr, | 1852 | u64 dev_bytenr, char **mapped_datav, |
| 1643 | u8 *mapped_data, unsigned int len, | 1853 | unsigned int num_pages, |
| 1644 | struct bio *bio, | 1854 | struct bio *bio, int *bio_is_patched, |
| 1645 | int *bio_is_patched, | ||
| 1646 | struct buffer_head *bh, | 1855 | struct buffer_head *bh, |
| 1647 | int submit_bio_bh_rw) | 1856 | int submit_bio_bh_rw) |
| 1648 | { | 1857 | { |
| @@ -1652,12 +1861,19 @@ static void btrfsic_process_written_block(struct btrfsic_dev_state *dev_state, | |||
| 1652 | int ret; | 1861 | int ret; |
| 1653 | struct btrfsic_state *state = dev_state->state; | 1862 | struct btrfsic_state *state = dev_state->state; |
| 1654 | struct block_device *bdev = dev_state->bdev; | 1863 | struct block_device *bdev = dev_state->bdev; |
| 1864 | unsigned int processed_len; | ||
| 1655 | 1865 | ||
| 1656 | WARN_ON(len > PAGE_SIZE); | ||
| 1657 | is_metadata = (0 == btrfsic_test_for_metadata(state, mapped_data, len)); | ||
| 1658 | if (NULL != bio_is_patched) | 1866 | if (NULL != bio_is_patched) |
| 1659 | *bio_is_patched = 0; | 1867 | *bio_is_patched = 0; |
| 1660 | 1868 | ||
| 1869 | again: | ||
| 1870 | if (num_pages == 0) | ||
| 1871 | return; | ||
| 1872 | |||
| 1873 | processed_len = 0; | ||
| 1874 | is_metadata = (0 == btrfsic_test_for_metadata(state, mapped_datav, | ||
| 1875 | num_pages)); | ||
| 1876 | |||
| 1661 | block = btrfsic_block_hashtable_lookup(bdev, dev_bytenr, | 1877 | block = btrfsic_block_hashtable_lookup(bdev, dev_bytenr, |
| 1662 | &state->block_hashtable); | 1878 | &state->block_hashtable); |
| 1663 | if (NULL != block) { | 1879 | if (NULL != block) { |
| @@ -1667,8 +1883,16 @@ static void btrfsic_process_written_block(struct btrfsic_dev_state *dev_state, | |||
| 1667 | 1883 | ||
| 1668 | if (block->is_superblock) { | 1884 | if (block->is_superblock) { |
| 1669 | bytenr = le64_to_cpu(((struct btrfs_super_block *) | 1885 | bytenr = le64_to_cpu(((struct btrfs_super_block *) |
| 1670 | mapped_data)->bytenr); | 1886 | mapped_datav[0])->bytenr); |
| 1887 | if (num_pages * PAGE_CACHE_SIZE < | ||
| 1888 | BTRFS_SUPER_INFO_SIZE) { | ||
| 1889 | printk(KERN_INFO | ||
| 1890 | "btrfsic: cannot work with too short bios!\n"); | ||
| 1891 | return; | ||
| 1892 | } | ||
| 1671 | is_metadata = 1; | 1893 | is_metadata = 1; |
| 1894 | BUG_ON(BTRFS_SUPER_INFO_SIZE & (PAGE_CACHE_SIZE - 1)); | ||
| 1895 | processed_len = BTRFS_SUPER_INFO_SIZE; | ||
| 1672 | if (state->print_mask & | 1896 | if (state->print_mask & |
| 1673 | BTRFSIC_PRINT_MASK_TREE_BEFORE_SB_WRITE) { | 1897 | BTRFSIC_PRINT_MASK_TREE_BEFORE_SB_WRITE) { |
| 1674 | printk(KERN_INFO | 1898 | printk(KERN_INFO |
| @@ -1678,12 +1902,18 @@ static void btrfsic_process_written_block(struct btrfsic_dev_state *dev_state, | |||
| 1678 | } | 1902 | } |
| 1679 | if (is_metadata) { | 1903 | if (is_metadata) { |
| 1680 | if (!block->is_superblock) { | 1904 | if (!block->is_superblock) { |
| 1905 | if (num_pages * PAGE_CACHE_SIZE < | ||
| 1906 | state->metablock_size) { | ||
| 1907 | printk(KERN_INFO | ||
| 1908 | "btrfsic: cannot work with too short bios!\n"); | ||
| 1909 | return; | ||
| 1910 | } | ||
| 1911 | processed_len = state->metablock_size; | ||
| 1681 | bytenr = le64_to_cpu(((struct btrfs_header *) | 1912 | bytenr = le64_to_cpu(((struct btrfs_header *) |
| 1682 | mapped_data)->bytenr); | 1913 | mapped_datav[0])->bytenr); |
| 1683 | btrfsic_cmp_log_and_dev_bytenr(state, bytenr, | 1914 | btrfsic_cmp_log_and_dev_bytenr(state, bytenr, |
| 1684 | dev_state, | 1915 | dev_state, |
| 1685 | dev_bytenr, | 1916 | dev_bytenr); |
| 1686 | mapped_data); | ||
| 1687 | } | 1917 | } |
| 1688 | if (block->logical_bytenr != bytenr) { | 1918 | if (block->logical_bytenr != bytenr) { |
| 1689 | printk(KERN_INFO | 1919 | printk(KERN_INFO |
| @@ -1710,6 +1940,13 @@ static void btrfsic_process_written_block(struct btrfsic_dev_state *dev_state, | |||
| 1710 | block->mirror_num, | 1940 | block->mirror_num, |
| 1711 | btrfsic_get_block_type(state, block)); | 1941 | btrfsic_get_block_type(state, block)); |
| 1712 | } else { | 1942 | } else { |
| 1943 | if (num_pages * PAGE_CACHE_SIZE < | ||
| 1944 | state->datablock_size) { | ||
| 1945 | printk(KERN_INFO | ||
| 1946 | "btrfsic: cannot work with too short bios!\n"); | ||
| 1947 | return; | ||
| 1948 | } | ||
| 1949 | processed_len = state->datablock_size; | ||
| 1713 | bytenr = block->logical_bytenr; | 1950 | bytenr = block->logical_bytenr; |
| 1714 | if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE) | 1951 | if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE) |
| 1715 | printk(KERN_INFO | 1952 | printk(KERN_INFO |
| @@ -1747,7 +1984,7 @@ static void btrfsic_process_written_block(struct btrfsic_dev_state *dev_state, | |||
| 1747 | le64_to_cpu(block->disk_key.offset), | 1984 | le64_to_cpu(block->disk_key.offset), |
| 1748 | (unsigned long long) | 1985 | (unsigned long long) |
| 1749 | le64_to_cpu(((struct btrfs_header *) | 1986 | le64_to_cpu(((struct btrfs_header *) |
| 1750 | mapped_data)->generation), | 1987 | mapped_datav[0])->generation), |
| 1751 | (unsigned long long) | 1988 | (unsigned long long) |
| 1752 | state->max_superblock_generation); | 1989 | state->max_superblock_generation); |
| 1753 | btrfsic_dump_tree(state); | 1990 | btrfsic_dump_tree(state); |
| @@ -1765,10 +2002,10 @@ static void btrfsic_process_written_block(struct btrfsic_dev_state *dev_state, | |||
| 1765 | (unsigned long long)block->generation, | 2002 | (unsigned long long)block->generation, |
| 1766 | (unsigned long long) | 2003 | (unsigned long long) |
| 1767 | le64_to_cpu(((struct btrfs_header *) | 2004 | le64_to_cpu(((struct btrfs_header *) |
| 1768 | mapped_data)->generation)); | 2005 | mapped_datav[0])->generation)); |
| 1769 | /* it would not be safe to go on */ | 2006 | /* it would not be safe to go on */ |
| 1770 | btrfsic_dump_tree(state); | 2007 | btrfsic_dump_tree(state); |
| 1771 | return; | 2008 | goto continue_loop; |
| 1772 | } | 2009 | } |
| 1773 | 2010 | ||
| 1774 | /* | 2011 | /* |
| @@ -1796,18 +2033,19 @@ static void btrfsic_process_written_block(struct btrfsic_dev_state *dev_state, | |||
| 1796 | } | 2033 | } |
| 1797 | 2034 | ||
| 1798 | if (block->is_superblock) | 2035 | if (block->is_superblock) |
| 1799 | ret = btrfsic_map_superblock(state, bytenr, len, | 2036 | ret = btrfsic_map_superblock(state, bytenr, |
| 2037 | processed_len, | ||
| 1800 | bdev, &block_ctx); | 2038 | bdev, &block_ctx); |
| 1801 | else | 2039 | else |
| 1802 | ret = btrfsic_map_block(state, bytenr, len, | 2040 | ret = btrfsic_map_block(state, bytenr, processed_len, |
| 1803 | &block_ctx, 0); | 2041 | &block_ctx, 0); |
| 1804 | if (ret) { | 2042 | if (ret) { |
| 1805 | printk(KERN_INFO | 2043 | printk(KERN_INFO |
| 1806 | "btrfsic: btrfsic_map_block(root @%llu)" | 2044 | "btrfsic: btrfsic_map_block(root @%llu)" |
| 1807 | " failed!\n", (unsigned long long)bytenr); | 2045 | " failed!\n", (unsigned long long)bytenr); |
| 1808 | return; | 2046 | goto continue_loop; |
| 1809 | } | 2047 | } |
| 1810 | block_ctx.data = mapped_data; | 2048 | block_ctx.datav = mapped_datav; |
| 1811 | /* the following is required in case of writes to mirrors, | 2049 | /* the following is required in case of writes to mirrors, |
| 1812 | * use the same that was used for the lookup */ | 2050 | * use the same that was used for the lookup */ |
| 1813 | block_ctx.dev = dev_state; | 2051 | block_ctx.dev = dev_state; |
| @@ -1863,11 +2101,13 @@ static void btrfsic_process_written_block(struct btrfsic_dev_state *dev_state, | |||
| 1863 | block->logical_bytenr = bytenr; | 2101 | block->logical_bytenr = bytenr; |
| 1864 | block->is_metadata = 1; | 2102 | block->is_metadata = 1; |
| 1865 | if (block->is_superblock) { | 2103 | if (block->is_superblock) { |
| 2104 | BUG_ON(PAGE_CACHE_SIZE != | ||
| 2105 | BTRFS_SUPER_INFO_SIZE); | ||
| 1866 | ret = btrfsic_process_written_superblock( | 2106 | ret = btrfsic_process_written_superblock( |
| 1867 | state, | 2107 | state, |
| 1868 | block, | 2108 | block, |
| 1869 | (struct btrfs_super_block *) | 2109 | (struct btrfs_super_block *) |
| 1870 | mapped_data); | 2110 | mapped_datav[0]); |
| 1871 | if (state->print_mask & | 2111 | if (state->print_mask & |
| 1872 | BTRFSIC_PRINT_MASK_TREE_AFTER_SB_WRITE) { | 2112 | BTRFSIC_PRINT_MASK_TREE_AFTER_SB_WRITE) { |
| 1873 | printk(KERN_INFO | 2113 | printk(KERN_INFO |
| @@ -1880,8 +2120,6 @@ static void btrfsic_process_written_block(struct btrfsic_dev_state *dev_state, | |||
| 1880 | state, | 2120 | state, |
| 1881 | block, | 2121 | block, |
| 1882 | &block_ctx, | 2122 | &block_ctx, |
| 1883 | (struct btrfs_header *) | ||
| 1884 | block_ctx.data, | ||
| 1885 | 0, 0); | 2123 | 0, 0); |
| 1886 | } | 2124 | } |
| 1887 | if (ret) | 2125 | if (ret) |
| @@ -1912,26 +2150,30 @@ static void btrfsic_process_written_block(struct btrfsic_dev_state *dev_state, | |||
| 1912 | u64 bytenr; | 2150 | u64 bytenr; |
| 1913 | 2151 | ||
| 1914 | if (!is_metadata) { | 2152 | if (!is_metadata) { |
| 2153 | processed_len = state->datablock_size; | ||
| 1915 | if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE) | 2154 | if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE) |
| 1916 | printk(KERN_INFO "Written block (%s/%llu/?)" | 2155 | printk(KERN_INFO "Written block (%s/%llu/?)" |
| 1917 | " !found in hash table, D.\n", | 2156 | " !found in hash table, D.\n", |
| 1918 | dev_state->name, | 2157 | dev_state->name, |
| 1919 | (unsigned long long)dev_bytenr); | 2158 | (unsigned long long)dev_bytenr); |
| 1920 | if (!state->include_extent_data) | 2159 | if (!state->include_extent_data) { |
| 1921 | return; /* ignore that written D block */ | 2160 | /* ignore that written D block */ |
| 2161 | goto continue_loop; | ||
| 2162 | } | ||
| 1922 | 2163 | ||
| 1923 | /* this is getting ugly for the | 2164 | /* this is getting ugly for the |
| 1924 | * include_extent_data case... */ | 2165 | * include_extent_data case... */ |
| 1925 | bytenr = 0; /* unknown */ | 2166 | bytenr = 0; /* unknown */ |
| 1926 | block_ctx.start = bytenr; | 2167 | block_ctx.start = bytenr; |
| 1927 | block_ctx.len = len; | 2168 | block_ctx.len = processed_len; |
| 1928 | block_ctx.bh = NULL; | 2169 | block_ctx.mem_to_free = NULL; |
| 2170 | block_ctx.pagev = NULL; | ||
| 1929 | } else { | 2171 | } else { |
| 2172 | processed_len = state->metablock_size; | ||
| 1930 | bytenr = le64_to_cpu(((struct btrfs_header *) | 2173 | bytenr = le64_to_cpu(((struct btrfs_header *) |
| 1931 | mapped_data)->bytenr); | 2174 | mapped_datav[0])->bytenr); |
| 1932 | btrfsic_cmp_log_and_dev_bytenr(state, bytenr, dev_state, | 2175 | btrfsic_cmp_log_and_dev_bytenr(state, bytenr, dev_state, |
| 1933 | dev_bytenr, | 2176 | dev_bytenr); |
| 1934 | mapped_data); | ||
| 1935 | if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE) | 2177 | if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE) |
| 1936 | printk(KERN_INFO | 2178 | printk(KERN_INFO |
| 1937 | "Written block @%llu (%s/%llu/?)" | 2179 | "Written block @%llu (%s/%llu/?)" |
| @@ -1940,17 +2182,17 @@ static void btrfsic_process_written_block(struct btrfsic_dev_state *dev_state, | |||
| 1940 | dev_state->name, | 2182 | dev_state->name, |
| 1941 | (unsigned long long)dev_bytenr); | 2183 | (unsigned long long)dev_bytenr); |
| 1942 | 2184 | ||
| 1943 | ret = btrfsic_map_block(state, bytenr, len, &block_ctx, | 2185 | ret = btrfsic_map_block(state, bytenr, processed_len, |
| 1944 | 0); | 2186 | &block_ctx, 0); |
| 1945 | if (ret) { | 2187 | if (ret) { |
| 1946 | printk(KERN_INFO | 2188 | printk(KERN_INFO |
| 1947 | "btrfsic: btrfsic_map_block(root @%llu)" | 2189 | "btrfsic: btrfsic_map_block(root @%llu)" |
| 1948 | " failed!\n", | 2190 | " failed!\n", |
| 1949 | (unsigned long long)dev_bytenr); | 2191 | (unsigned long long)dev_bytenr); |
| 1950 | return; | 2192 | goto continue_loop; |
| 1951 | } | 2193 | } |
| 1952 | } | 2194 | } |
| 1953 | block_ctx.data = mapped_data; | 2195 | block_ctx.datav = mapped_datav; |
| 1954 | /* the following is required in case of writes to mirrors, | 2196 | /* the following is required in case of writes to mirrors, |
| 1955 | * use the same that was used for the lookup */ | 2197 | * use the same that was used for the lookup */ |
| 1956 | block_ctx.dev = dev_state; | 2198 | block_ctx.dev = dev_state; |
| @@ -1960,7 +2202,7 @@ static void btrfsic_process_written_block(struct btrfsic_dev_state *dev_state, | |||
| 1960 | if (NULL == block) { | 2202 | if (NULL == block) { |
| 1961 | printk(KERN_INFO "btrfsic: error, kmalloc failed!\n"); | 2203 | printk(KERN_INFO "btrfsic: error, kmalloc failed!\n"); |
| 1962 | btrfsic_release_block_ctx(&block_ctx); | 2204 | btrfsic_release_block_ctx(&block_ctx); |
| 1963 | return; | 2205 | goto continue_loop; |
| 1964 | } | 2206 | } |
| 1965 | block->dev_state = dev_state; | 2207 | block->dev_state = dev_state; |
| 1966 | block->dev_bytenr = dev_bytenr; | 2208 | block->dev_bytenr = dev_bytenr; |
| @@ -2020,9 +2262,7 @@ static void btrfsic_process_written_block(struct btrfsic_dev_state *dev_state, | |||
| 2020 | 2262 | ||
| 2021 | if (is_metadata) { | 2263 | if (is_metadata) { |
| 2022 | ret = btrfsic_process_metablock(state, block, | 2264 | ret = btrfsic_process_metablock(state, block, |
| 2023 | &block_ctx, | 2265 | &block_ctx, 0, 0); |
| 2024 | (struct btrfs_header *) | ||
| 2025 | block_ctx.data, 0, 0); | ||
| 2026 | if (ret) | 2266 | if (ret) |
| 2027 | printk(KERN_INFO | 2267 | printk(KERN_INFO |
| 2028 | "btrfsic: process_metablock(root @%llu)" | 2268 | "btrfsic: process_metablock(root @%llu)" |
| @@ -2031,6 +2271,13 @@ static void btrfsic_process_written_block(struct btrfsic_dev_state *dev_state, | |||
| 2031 | } | 2271 | } |
| 2032 | btrfsic_release_block_ctx(&block_ctx); | 2272 | btrfsic_release_block_ctx(&block_ctx); |
| 2033 | } | 2273 | } |
| 2274 | |||
| 2275 | continue_loop: | ||
| 2276 | BUG_ON(!processed_len); | ||
| 2277 | dev_bytenr += processed_len; | ||
| 2278 | mapped_datav += processed_len >> PAGE_CACHE_SHIFT; | ||
| 2279 | num_pages -= processed_len >> PAGE_CACHE_SHIFT; | ||
| 2280 | goto again; | ||
| 2034 | } | 2281 | } |
| 2035 | 2282 | ||
| 2036 | static void btrfsic_bio_end_io(struct bio *bp, int bio_error_status) | 2283 | static void btrfsic_bio_end_io(struct bio *bp, int bio_error_status) |
| @@ -2213,7 +2460,7 @@ static int btrfsic_process_written_superblock( | |||
| 2213 | 2460 | ||
| 2214 | num_copies = | 2461 | num_copies = |
| 2215 | btrfs_num_copies(&state->root->fs_info->mapping_tree, | 2462 | btrfs_num_copies(&state->root->fs_info->mapping_tree, |
| 2216 | next_bytenr, PAGE_SIZE); | 2463 | next_bytenr, BTRFS_SUPER_INFO_SIZE); |
| 2217 | if (state->print_mask & BTRFSIC_PRINT_MASK_NUM_COPIES) | 2464 | if (state->print_mask & BTRFSIC_PRINT_MASK_NUM_COPIES) |
| 2218 | printk(KERN_INFO "num_copies(log_bytenr=%llu) = %d\n", | 2465 | printk(KERN_INFO "num_copies(log_bytenr=%llu) = %d\n", |
| 2219 | (unsigned long long)next_bytenr, num_copies); | 2466 | (unsigned long long)next_bytenr, num_copies); |
| @@ -2224,7 +2471,8 @@ static int btrfsic_process_written_superblock( | |||
| 2224 | printk(KERN_INFO | 2471 | printk(KERN_INFO |
| 2225 | "btrfsic_process_written_superblock(" | 2472 | "btrfsic_process_written_superblock(" |
| 2226 | "mirror_num=%d)\n", mirror_num); | 2473 | "mirror_num=%d)\n", mirror_num); |
| 2227 | ret = btrfsic_map_block(state, next_bytenr, PAGE_SIZE, | 2474 | ret = btrfsic_map_block(state, next_bytenr, |
| 2475 | BTRFS_SUPER_INFO_SIZE, | ||
| 2228 | &tmp_next_block_ctx, | 2476 | &tmp_next_block_ctx, |
| 2229 | mirror_num); | 2477 | mirror_num); |
| 2230 | if (ret) { | 2478 | if (ret) { |
| @@ -2689,7 +2937,7 @@ static struct btrfsic_block *btrfsic_block_lookup_or_add( | |||
| 2689 | static void btrfsic_cmp_log_and_dev_bytenr(struct btrfsic_state *state, | 2937 | static void btrfsic_cmp_log_and_dev_bytenr(struct btrfsic_state *state, |
| 2690 | u64 bytenr, | 2938 | u64 bytenr, |
| 2691 | struct btrfsic_dev_state *dev_state, | 2939 | struct btrfsic_dev_state *dev_state, |
| 2692 | u64 dev_bytenr, char *data) | 2940 | u64 dev_bytenr) |
| 2693 | { | 2941 | { |
| 2694 | int num_copies; | 2942 | int num_copies; |
| 2695 | int mirror_num; | 2943 | int mirror_num; |
| @@ -2698,10 +2946,10 @@ static void btrfsic_cmp_log_and_dev_bytenr(struct btrfsic_state *state, | |||
| 2698 | int match = 0; | 2946 | int match = 0; |
| 2699 | 2947 | ||
| 2700 | num_copies = btrfs_num_copies(&state->root->fs_info->mapping_tree, | 2948 | num_copies = btrfs_num_copies(&state->root->fs_info->mapping_tree, |
| 2701 | bytenr, PAGE_SIZE); | 2949 | bytenr, state->metablock_size); |
| 2702 | 2950 | ||
| 2703 | for (mirror_num = 1; mirror_num <= num_copies; mirror_num++) { | 2951 | for (mirror_num = 1; mirror_num <= num_copies; mirror_num++) { |
| 2704 | ret = btrfsic_map_block(state, bytenr, PAGE_SIZE, | 2952 | ret = btrfsic_map_block(state, bytenr, state->metablock_size, |
| 2705 | &block_ctx, mirror_num); | 2953 | &block_ctx, mirror_num); |
| 2706 | if (ret) { | 2954 | if (ret) { |
| 2707 | printk(KERN_INFO "btrfsic:" | 2955 | printk(KERN_INFO "btrfsic:" |
| @@ -2727,7 +2975,8 @@ static void btrfsic_cmp_log_and_dev_bytenr(struct btrfsic_state *state, | |||
| 2727 | (unsigned long long)bytenr, dev_state->name, | 2975 | (unsigned long long)bytenr, dev_state->name, |
| 2728 | (unsigned long long)dev_bytenr); | 2976 | (unsigned long long)dev_bytenr); |
| 2729 | for (mirror_num = 1; mirror_num <= num_copies; mirror_num++) { | 2977 | for (mirror_num = 1; mirror_num <= num_copies; mirror_num++) { |
| 2730 | ret = btrfsic_map_block(state, bytenr, PAGE_SIZE, | 2978 | ret = btrfsic_map_block(state, bytenr, |
| 2979 | state->metablock_size, | ||
| 2731 | &block_ctx, mirror_num); | 2980 | &block_ctx, mirror_num); |
| 2732 | if (ret) | 2981 | if (ret) |
| 2733 | continue; | 2982 | continue; |
| @@ -2781,13 +3030,13 @@ int btrfsic_submit_bh(int rw, struct buffer_head *bh) | |||
| 2781 | (unsigned long)bh->b_size, bh->b_data, | 3030 | (unsigned long)bh->b_size, bh->b_data, |
| 2782 | bh->b_bdev); | 3031 | bh->b_bdev); |
| 2783 | btrfsic_process_written_block(dev_state, dev_bytenr, | 3032 | btrfsic_process_written_block(dev_state, dev_bytenr, |
| 2784 | bh->b_data, bh->b_size, NULL, | 3033 | &bh->b_data, 1, NULL, |
| 2785 | NULL, bh, rw); | 3034 | NULL, bh, rw); |
| 2786 | } else if (NULL != dev_state && (rw & REQ_FLUSH)) { | 3035 | } else if (NULL != dev_state && (rw & REQ_FLUSH)) { |
| 2787 | if (dev_state->state->print_mask & | 3036 | if (dev_state->state->print_mask & |
| 2788 | BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH) | 3037 | BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH) |
| 2789 | printk(KERN_INFO | 3038 | printk(KERN_INFO |
| 2790 | "submit_bh(rw=0x%x) FLUSH, bdev=%p)\n", | 3039 | "submit_bh(rw=0x%x FLUSH, bdev=%p)\n", |
| 2791 | rw, bh->b_bdev); | 3040 | rw, bh->b_bdev); |
| 2792 | if (!dev_state->dummy_block_for_bio_bh_flush.is_iodone) { | 3041 | if (!dev_state->dummy_block_for_bio_bh_flush.is_iodone) { |
| 2793 | if ((dev_state->state->print_mask & | 3042 | if ((dev_state->state->print_mask & |
| @@ -2836,6 +3085,7 @@ void btrfsic_submit_bio(int rw, struct bio *bio) | |||
| 2836 | unsigned int i; | 3085 | unsigned int i; |
| 2837 | u64 dev_bytenr; | 3086 | u64 dev_bytenr; |
| 2838 | int bio_is_patched; | 3087 | int bio_is_patched; |
| 3088 | char **mapped_datav; | ||
| 2839 | 3089 | ||
| 2840 | dev_bytenr = 512 * bio->bi_sector; | 3090 | dev_bytenr = 512 * bio->bi_sector; |
| 2841 | bio_is_patched = 0; | 3091 | bio_is_patched = 0; |
| @@ -2848,35 +3098,46 @@ void btrfsic_submit_bio(int rw, struct bio *bio) | |||
| 2848 | (unsigned long long)dev_bytenr, | 3098 | (unsigned long long)dev_bytenr, |
| 2849 | bio->bi_bdev); | 3099 | bio->bi_bdev); |
| 2850 | 3100 | ||
| 3101 | mapped_datav = kmalloc(sizeof(*mapped_datav) * bio->bi_vcnt, | ||
| 3102 | GFP_NOFS); | ||
| 3103 | if (!mapped_datav) | ||
| 3104 | goto leave; | ||
| 2851 | for (i = 0; i < bio->bi_vcnt; i++) { | 3105 | for (i = 0; i < bio->bi_vcnt; i++) { |
| 2852 | u8 *mapped_data; | 3106 | BUG_ON(bio->bi_io_vec[i].bv_len != PAGE_CACHE_SIZE); |
| 2853 | 3107 | mapped_datav[i] = kmap(bio->bi_io_vec[i].bv_page); | |
| 2854 | mapped_data = kmap(bio->bi_io_vec[i].bv_page); | 3108 | if (!mapped_datav[i]) { |
| 3109 | while (i > 0) { | ||
| 3110 | i--; | ||
| 3111 | kunmap(bio->bi_io_vec[i].bv_page); | ||
| 3112 | } | ||
| 3113 | kfree(mapped_datav); | ||
| 3114 | goto leave; | ||
| 3115 | } | ||
| 2855 | if ((BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH | | 3116 | if ((BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH | |
| 2856 | BTRFSIC_PRINT_MASK_VERBOSE) == | 3117 | BTRFSIC_PRINT_MASK_VERBOSE) == |
| 2857 | (dev_state->state->print_mask & | 3118 | (dev_state->state->print_mask & |
| 2858 | (BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH | | 3119 | (BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH | |
| 2859 | BTRFSIC_PRINT_MASK_VERBOSE))) | 3120 | BTRFSIC_PRINT_MASK_VERBOSE))) |
| 2860 | printk(KERN_INFO | 3121 | printk(KERN_INFO |
| 2861 | "#%u: page=%p, mapped=%p, len=%u," | 3122 | "#%u: page=%p, len=%u, offset=%u\n", |
| 2862 | " offset=%u\n", | ||
| 2863 | i, bio->bi_io_vec[i].bv_page, | 3123 | i, bio->bi_io_vec[i].bv_page, |
| 2864 | mapped_data, | ||
| 2865 | bio->bi_io_vec[i].bv_len, | 3124 | bio->bi_io_vec[i].bv_len, |
| 2866 | bio->bi_io_vec[i].bv_offset); | 3125 | bio->bi_io_vec[i].bv_offset); |
| 2867 | btrfsic_process_written_block(dev_state, dev_bytenr, | 3126 | } |
| 2868 | mapped_data, | 3127 | btrfsic_process_written_block(dev_state, dev_bytenr, |
| 2869 | bio->bi_io_vec[i].bv_len, | 3128 | mapped_datav, bio->bi_vcnt, |
| 2870 | bio, &bio_is_patched, | 3129 | bio, &bio_is_patched, |
| 2871 | NULL, rw); | 3130 | NULL, rw); |
| 3131 | while (i > 0) { | ||
| 3132 | i--; | ||
| 2872 | kunmap(bio->bi_io_vec[i].bv_page); | 3133 | kunmap(bio->bi_io_vec[i].bv_page); |
| 2873 | dev_bytenr += bio->bi_io_vec[i].bv_len; | ||
| 2874 | } | 3134 | } |
| 3135 | kfree(mapped_datav); | ||
| 2875 | } else if (NULL != dev_state && (rw & REQ_FLUSH)) { | 3136 | } else if (NULL != dev_state && (rw & REQ_FLUSH)) { |
| 2876 | if (dev_state->state->print_mask & | 3137 | if (dev_state->state->print_mask & |
| 2877 | BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH) | 3138 | BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH) |
| 2878 | printk(KERN_INFO | 3139 | printk(KERN_INFO |
| 2879 | "submit_bio(rw=0x%x) FLUSH, bdev=%p)\n", | 3140 | "submit_bio(rw=0x%x FLUSH, bdev=%p)\n", |
| 2880 | rw, bio->bi_bdev); | 3141 | rw, bio->bi_bdev); |
| 2881 | if (!dev_state->dummy_block_for_bio_bh_flush.is_iodone) { | 3142 | if (!dev_state->dummy_block_for_bio_bh_flush.is_iodone) { |
| 2882 | if ((dev_state->state->print_mask & | 3143 | if ((dev_state->state->print_mask & |
| @@ -2903,6 +3164,7 @@ void btrfsic_submit_bio(int rw, struct bio *bio) | |||
| 2903 | bio->bi_end_io = btrfsic_bio_end_io; | 3164 | bio->bi_end_io = btrfsic_bio_end_io; |
| 2904 | } | 3165 | } |
| 2905 | } | 3166 | } |
| 3167 | leave: | ||
| 2906 | mutex_unlock(&btrfsic_mutex); | 3168 | mutex_unlock(&btrfsic_mutex); |
| 2907 | 3169 | ||
| 2908 | submit_bio(rw, bio); | 3170 | submit_bio(rw, bio); |
| @@ -2917,6 +3179,30 @@ int btrfsic_mount(struct btrfs_root *root, | |||
| 2917 | struct list_head *dev_head = &fs_devices->devices; | 3179 | struct list_head *dev_head = &fs_devices->devices; |
| 2918 | struct btrfs_device *device; | 3180 | struct btrfs_device *device; |
| 2919 | 3181 | ||
| 3182 | if (root->nodesize != root->leafsize) { | ||
| 3183 | printk(KERN_INFO | ||
| 3184 | "btrfsic: cannot handle nodesize %d != leafsize %d!\n", | ||
| 3185 | root->nodesize, root->leafsize); | ||
| 3186 | return -1; | ||
| 3187 | } | ||
| 3188 | if (root->nodesize & ((u64)PAGE_CACHE_SIZE - 1)) { | ||
| 3189 | printk(KERN_INFO | ||
| 3190 | "btrfsic: cannot handle nodesize %d not being a multiple of PAGE_CACHE_SIZE %ld!\n", | ||
| 3191 | root->nodesize, (unsigned long)PAGE_CACHE_SIZE); | ||
| 3192 | return -1; | ||
| 3193 | } | ||
| 3194 | if (root->leafsize & ((u64)PAGE_CACHE_SIZE - 1)) { | ||
| 3195 | printk(KERN_INFO | ||
| 3196 | "btrfsic: cannot handle leafsize %d not being a multiple of PAGE_CACHE_SIZE %ld!\n", | ||
| 3197 | root->leafsize, (unsigned long)PAGE_CACHE_SIZE); | ||
| 3198 | return -1; | ||
| 3199 | } | ||
| 3200 | if (root->sectorsize & ((u64)PAGE_CACHE_SIZE - 1)) { | ||
| 3201 | printk(KERN_INFO | ||
| 3202 | "btrfsic: cannot handle sectorsize %d not being a multiple of PAGE_CACHE_SIZE %ld!\n", | ||
| 3203 | root->sectorsize, (unsigned long)PAGE_CACHE_SIZE); | ||
| 3204 | return -1; | ||
| 3205 | } | ||
| 2920 | state = kzalloc(sizeof(*state), GFP_NOFS); | 3206 | state = kzalloc(sizeof(*state), GFP_NOFS); |
| 2921 | if (NULL == state) { | 3207 | if (NULL == state) { |
| 2922 | printk(KERN_INFO "btrfs check-integrity: kmalloc() failed!\n"); | 3208 | printk(KERN_INFO "btrfs check-integrity: kmalloc() failed!\n"); |
| @@ -2933,6 +3219,8 @@ int btrfsic_mount(struct btrfs_root *root, | |||
| 2933 | state->print_mask = print_mask; | 3219 | state->print_mask = print_mask; |
| 2934 | state->include_extent_data = including_extent_data; | 3220 | state->include_extent_data = including_extent_data; |
| 2935 | state->csum_size = 0; | 3221 | state->csum_size = 0; |
| 3222 | state->metablock_size = root->nodesize; | ||
| 3223 | state->datablock_size = root->sectorsize; | ||
| 2936 | INIT_LIST_HEAD(&state->all_blocks_list); | 3224 | INIT_LIST_HEAD(&state->all_blocks_list); |
| 2937 | btrfsic_block_hashtable_init(&state->block_hashtable); | 3225 | btrfsic_block_hashtable_init(&state->block_hashtable); |
| 2938 | btrfsic_block_link_hashtable_init(&state->block_link_hashtable); | 3226 | btrfsic_block_link_hashtable_init(&state->block_link_hashtable); |
| @@ -3049,7 +3337,7 @@ void btrfsic_unmount(struct btrfs_root *root, | |||
| 3049 | btrfsic_block_link_free(l); | 3337 | btrfsic_block_link_free(l); |
| 3050 | } | 3338 | } |
| 3051 | 3339 | ||
| 3052 | if (b_all->is_iodone) | 3340 | if (b_all->is_iodone || b_all->never_written) |
| 3053 | btrfsic_block_free(b_all); | 3341 | btrfsic_block_free(b_all); |
| 3054 | else | 3342 | else |
| 3055 | printk(KERN_INFO "btrfs: attempt to free %c-block" | 3343 | printk(KERN_INFO "btrfs: attempt to free %c-block" |
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 4106264fbc65..d7a96cfdc50a 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c | |||
| @@ -18,6 +18,7 @@ | |||
| 18 | 18 | ||
| 19 | #include <linux/sched.h> | 19 | #include <linux/sched.h> |
| 20 | #include <linux/slab.h> | 20 | #include <linux/slab.h> |
| 21 | #include <linux/rbtree.h> | ||
| 21 | #include "ctree.h" | 22 | #include "ctree.h" |
| 22 | #include "disk-io.h" | 23 | #include "disk-io.h" |
| 23 | #include "transaction.h" | 24 | #include "transaction.h" |
| @@ -37,7 +38,16 @@ static int balance_node_right(struct btrfs_trans_handle *trans, | |||
| 37 | struct extent_buffer *dst_buf, | 38 | struct extent_buffer *dst_buf, |
| 38 | struct extent_buffer *src_buf); | 39 | struct extent_buffer *src_buf); |
| 39 | static void del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root, | 40 | static void del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root, |
| 40 | struct btrfs_path *path, int level, int slot); | 41 | struct btrfs_path *path, int level, int slot, |
| 42 | int tree_mod_log); | ||
| 43 | static void tree_mod_log_free_eb(struct btrfs_fs_info *fs_info, | ||
| 44 | struct extent_buffer *eb); | ||
| 45 | struct extent_buffer *read_old_tree_block(struct btrfs_root *root, u64 bytenr, | ||
| 46 | u32 blocksize, u64 parent_transid, | ||
| 47 | u64 time_seq); | ||
| 48 | struct extent_buffer *btrfs_find_old_tree_block(struct btrfs_root *root, | ||
| 49 | u64 bytenr, u32 blocksize, | ||
| 50 | u64 time_seq); | ||
| 41 | 51 | ||
| 42 | struct btrfs_path *btrfs_alloc_path(void) | 52 | struct btrfs_path *btrfs_alloc_path(void) |
| 43 | { | 53 | { |
| @@ -255,7 +265,7 @@ int btrfs_copy_root(struct btrfs_trans_handle *trans, | |||
| 255 | 265 | ||
| 256 | cow = btrfs_alloc_free_block(trans, root, buf->len, 0, | 266 | cow = btrfs_alloc_free_block(trans, root, buf->len, 0, |
| 257 | new_root_objectid, &disk_key, level, | 267 | new_root_objectid, &disk_key, level, |
| 258 | buf->start, 0, 1); | 268 | buf->start, 0); |
| 259 | if (IS_ERR(cow)) | 269 | if (IS_ERR(cow)) |
| 260 | return PTR_ERR(cow); | 270 | return PTR_ERR(cow); |
| 261 | 271 | ||
| @@ -288,6 +298,434 @@ int btrfs_copy_root(struct btrfs_trans_handle *trans, | |||
| 288 | return 0; | 298 | return 0; |
| 289 | } | 299 | } |
| 290 | 300 | ||
| 301 | enum mod_log_op { | ||
| 302 | MOD_LOG_KEY_REPLACE, | ||
| 303 | MOD_LOG_KEY_ADD, | ||
| 304 | MOD_LOG_KEY_REMOVE, | ||
| 305 | MOD_LOG_KEY_REMOVE_WHILE_FREEING, | ||
| 306 | MOD_LOG_KEY_REMOVE_WHILE_MOVING, | ||
| 307 | MOD_LOG_MOVE_KEYS, | ||
| 308 | MOD_LOG_ROOT_REPLACE, | ||
| 309 | }; | ||
| 310 | |||
| 311 | struct tree_mod_move { | ||
| 312 | int dst_slot; | ||
| 313 | int nr_items; | ||
| 314 | }; | ||
| 315 | |||
| 316 | struct tree_mod_root { | ||
| 317 | u64 logical; | ||
| 318 | u8 level; | ||
| 319 | }; | ||
| 320 | |||
| 321 | struct tree_mod_elem { | ||
| 322 | struct rb_node node; | ||
| 323 | u64 index; /* shifted logical */ | ||
| 324 | struct seq_list elem; | ||
| 325 | enum mod_log_op op; | ||
| 326 | |||
| 327 | /* this is used for MOD_LOG_KEY_* and MOD_LOG_MOVE_KEYS operations */ | ||
| 328 | int slot; | ||
| 329 | |||
| 330 | /* this is used for MOD_LOG_KEY* and MOD_LOG_ROOT_REPLACE */ | ||
| 331 | u64 generation; | ||
| 332 | |||
| 333 | /* those are used for op == MOD_LOG_KEY_{REPLACE,REMOVE} */ | ||
| 334 | struct btrfs_disk_key key; | ||
| 335 | u64 blockptr; | ||
| 336 | |||
| 337 | /* this is used for op == MOD_LOG_MOVE_KEYS */ | ||
| 338 | struct tree_mod_move move; | ||
| 339 | |||
| 340 | /* this is used for op == MOD_LOG_ROOT_REPLACE */ | ||
| 341 | struct tree_mod_root old_root; | ||
| 342 | }; | ||
| 343 | |||
| 344 | static inline void | ||
| 345 | __get_tree_mod_seq(struct btrfs_fs_info *fs_info, struct seq_list *elem) | ||
| 346 | { | ||
| 347 | elem->seq = atomic_inc_return(&fs_info->tree_mod_seq); | ||
| 348 | list_add_tail(&elem->list, &fs_info->tree_mod_seq_list); | ||
| 349 | } | ||
| 350 | |||
| 351 | void btrfs_get_tree_mod_seq(struct btrfs_fs_info *fs_info, | ||
| 352 | struct seq_list *elem) | ||
| 353 | { | ||
| 354 | elem->flags = 1; | ||
| 355 | spin_lock(&fs_info->tree_mod_seq_lock); | ||
| 356 | __get_tree_mod_seq(fs_info, elem); | ||
| 357 | spin_unlock(&fs_info->tree_mod_seq_lock); | ||
| 358 | } | ||
| 359 | |||
| 360 | void btrfs_put_tree_mod_seq(struct btrfs_fs_info *fs_info, | ||
| 361 | struct seq_list *elem) | ||
| 362 | { | ||
| 363 | struct rb_root *tm_root; | ||
| 364 | struct rb_node *node; | ||
| 365 | struct rb_node *next; | ||
| 366 | struct seq_list *cur_elem; | ||
| 367 | struct tree_mod_elem *tm; | ||
| 368 | u64 min_seq = (u64)-1; | ||
| 369 | u64 seq_putting = elem->seq; | ||
| 370 | |||
| 371 | if (!seq_putting) | ||
| 372 | return; | ||
| 373 | |||
| 374 | BUG_ON(!(elem->flags & 1)); | ||
| 375 | spin_lock(&fs_info->tree_mod_seq_lock); | ||
| 376 | list_del(&elem->list); | ||
| 377 | |||
| 378 | list_for_each_entry(cur_elem, &fs_info->tree_mod_seq_list, list) { | ||
| 379 | if ((cur_elem->flags & 1) && cur_elem->seq < min_seq) { | ||
| 380 | if (seq_putting > cur_elem->seq) { | ||
| 381 | /* | ||
| 382 | * blocker with lower sequence number exists, we | ||
| 383 | * cannot remove anything from the log | ||
| 384 | */ | ||
| 385 | goto out; | ||
| 386 | } | ||
| 387 | min_seq = cur_elem->seq; | ||
| 388 | } | ||
| 389 | } | ||
| 390 | |||
| 391 | /* | ||
| 392 | * anything that's lower than the lowest existing (read: blocked) | ||
| 393 | * sequence number can be removed from the tree. | ||
| 394 | */ | ||
| 395 | write_lock(&fs_info->tree_mod_log_lock); | ||
| 396 | tm_root = &fs_info->tree_mod_log; | ||
| 397 | for (node = rb_first(tm_root); node; node = next) { | ||
| 398 | next = rb_next(node); | ||
| 399 | tm = container_of(node, struct tree_mod_elem, node); | ||
| 400 | if (tm->elem.seq > min_seq) | ||
| 401 | continue; | ||
| 402 | rb_erase(node, tm_root); | ||
| 403 | list_del(&tm->elem.list); | ||
| 404 | kfree(tm); | ||
| 405 | } | ||
| 406 | write_unlock(&fs_info->tree_mod_log_lock); | ||
| 407 | out: | ||
| 408 | spin_unlock(&fs_info->tree_mod_seq_lock); | ||
| 409 | } | ||
| 410 | |||
| 411 | /* | ||
| 412 | * key order of the log: | ||
| 413 | * index -> sequence | ||
| 414 | * | ||
| 415 | * the index is the shifted logical of the *new* root node for root replace | ||
| 416 | * operations, or the shifted logical of the affected block for all other | ||
| 417 | * operations. | ||
| 418 | */ | ||
| 419 | static noinline int | ||
| 420 | __tree_mod_log_insert(struct btrfs_fs_info *fs_info, struct tree_mod_elem *tm) | ||
| 421 | { | ||
| 422 | struct rb_root *tm_root; | ||
| 423 | struct rb_node **new; | ||
| 424 | struct rb_node *parent = NULL; | ||
| 425 | struct tree_mod_elem *cur; | ||
| 426 | int ret = 0; | ||
| 427 | |||
| 428 | BUG_ON(!tm || !tm->elem.seq); | ||
| 429 | |||
| 430 | write_lock(&fs_info->tree_mod_log_lock); | ||
| 431 | tm_root = &fs_info->tree_mod_log; | ||
| 432 | new = &tm_root->rb_node; | ||
| 433 | while (*new) { | ||
| 434 | cur = container_of(*new, struct tree_mod_elem, node); | ||
| 435 | parent = *new; | ||
| 436 | if (cur->index < tm->index) | ||
| 437 | new = &((*new)->rb_left); | ||
| 438 | else if (cur->index > tm->index) | ||
| 439 | new = &((*new)->rb_right); | ||
| 440 | else if (cur->elem.seq < tm->elem.seq) | ||
| 441 | new = &((*new)->rb_left); | ||
| 442 | else if (cur->elem.seq > tm->elem.seq) | ||
| 443 | new = &((*new)->rb_right); | ||
| 444 | else { | ||
| 445 | kfree(tm); | ||
| 446 | ret = -EEXIST; | ||
| 447 | goto unlock; | ||
| 448 | } | ||
| 449 | } | ||
| 450 | |||
| 451 | rb_link_node(&tm->node, parent, new); | ||
| 452 | rb_insert_color(&tm->node, tm_root); | ||
| 453 | unlock: | ||
| 454 | write_unlock(&fs_info->tree_mod_log_lock); | ||
| 455 | return ret; | ||
| 456 | } | ||
| 457 | |||
| 458 | static inline int tree_mod_dont_log(struct btrfs_fs_info *fs_info, | ||
| 459 | struct extent_buffer *eb) { | ||
| 460 | smp_mb(); | ||
| 461 | if (list_empty(&(fs_info)->tree_mod_seq_list)) | ||
| 462 | return 1; | ||
| 463 | if (!eb) | ||
| 464 | return 0; | ||
| 465 | if (btrfs_header_level(eb) == 0) | ||
| 466 | return 1; | ||
| 467 | return 0; | ||
| 468 | } | ||
| 469 | |||
| 470 | static inline int tree_mod_alloc(struct btrfs_fs_info *fs_info, gfp_t flags, | ||
| 471 | struct tree_mod_elem **tm_ret) | ||
| 472 | { | ||
| 473 | struct tree_mod_elem *tm; | ||
| 474 | int seq; | ||
| 475 | |||
| 476 | if (tree_mod_dont_log(fs_info, NULL)) | ||
| 477 | return 0; | ||
| 478 | |||
| 479 | tm = *tm_ret = kzalloc(sizeof(*tm), flags); | ||
| 480 | if (!tm) | ||
| 481 | return -ENOMEM; | ||
| 482 | |||
| 483 | tm->elem.flags = 0; | ||
| 484 | spin_lock(&fs_info->tree_mod_seq_lock); | ||
| 485 | if (list_empty(&fs_info->tree_mod_seq_list)) { | ||
| 486 | /* | ||
| 487 | * someone emptied the list while we were waiting for the lock. | ||
| 488 | * we must not add to the list, because no blocker exists. items | ||
| 489 | * are removed from the list only when the existing blocker is | ||
| 490 | * removed from the list. | ||
| 491 | */ | ||
| 492 | kfree(tm); | ||
| 493 | seq = 0; | ||
| 494 | } else { | ||
| 495 | __get_tree_mod_seq(fs_info, &tm->elem); | ||
| 496 | seq = tm->elem.seq; | ||
| 497 | } | ||
| 498 | spin_unlock(&fs_info->tree_mod_seq_lock); | ||
| 499 | |||
| 500 | return seq; | ||
| 501 | } | ||
| 502 | |||
| 503 | static noinline int | ||
| 504 | tree_mod_log_insert_key_mask(struct btrfs_fs_info *fs_info, | ||
| 505 | struct extent_buffer *eb, int slot, | ||
| 506 | enum mod_log_op op, gfp_t flags) | ||
| 507 | { | ||
| 508 | struct tree_mod_elem *tm; | ||
| 509 | int ret; | ||
| 510 | |||
| 511 | ret = tree_mod_alloc(fs_info, flags, &tm); | ||
| 512 | if (ret <= 0) | ||
| 513 | return ret; | ||
| 514 | |||
| 515 | tm->index = eb->start >> PAGE_CACHE_SHIFT; | ||
| 516 | if (op != MOD_LOG_KEY_ADD) { | ||
| 517 | btrfs_node_key(eb, &tm->key, slot); | ||
| 518 | tm->blockptr = btrfs_node_blockptr(eb, slot); | ||
| 519 | } | ||
| 520 | tm->op = op; | ||
| 521 | tm->slot = slot; | ||
| 522 | tm->generation = btrfs_node_ptr_generation(eb, slot); | ||
| 523 | |||
| 524 | return __tree_mod_log_insert(fs_info, tm); | ||
| 525 | } | ||
| 526 | |||
| 527 | static noinline int | ||
| 528 | tree_mod_log_insert_key(struct btrfs_fs_info *fs_info, struct extent_buffer *eb, | ||
| 529 | int slot, enum mod_log_op op) | ||
| 530 | { | ||
| 531 | return tree_mod_log_insert_key_mask(fs_info, eb, slot, op, GFP_NOFS); | ||
| 532 | } | ||
| 533 | |||
| 534 | static noinline int | ||
| 535 | tree_mod_log_insert_move(struct btrfs_fs_info *fs_info, | ||
| 536 | struct extent_buffer *eb, int dst_slot, int src_slot, | ||
| 537 | int nr_items, gfp_t flags) | ||
| 538 | { | ||
| 539 | struct tree_mod_elem *tm; | ||
| 540 | int ret; | ||
| 541 | int i; | ||
| 542 | |||
| 543 | if (tree_mod_dont_log(fs_info, eb)) | ||
| 544 | return 0; | ||
| 545 | |||
| 546 | for (i = 0; i + dst_slot < src_slot && i < nr_items; i++) { | ||
| 547 | ret = tree_mod_log_insert_key(fs_info, eb, i + dst_slot, | ||
| 548 | MOD_LOG_KEY_REMOVE_WHILE_MOVING); | ||
| 549 | BUG_ON(ret < 0); | ||
| 550 | } | ||
| 551 | |||
| 552 | ret = tree_mod_alloc(fs_info, flags, &tm); | ||
| 553 | if (ret <= 0) | ||
| 554 | return ret; | ||
| 555 | |||
| 556 | tm->index = eb->start >> PAGE_CACHE_SHIFT; | ||
| 557 | tm->slot = src_slot; | ||
| 558 | tm->move.dst_slot = dst_slot; | ||
| 559 | tm->move.nr_items = nr_items; | ||
| 560 | tm->op = MOD_LOG_MOVE_KEYS; | ||
| 561 | |||
| 562 | return __tree_mod_log_insert(fs_info, tm); | ||
| 563 | } | ||
| 564 | |||
| 565 | static noinline int | ||
| 566 | tree_mod_log_insert_root(struct btrfs_fs_info *fs_info, | ||
| 567 | struct extent_buffer *old_root, | ||
| 568 | struct extent_buffer *new_root, gfp_t flags) | ||
| 569 | { | ||
| 570 | struct tree_mod_elem *tm; | ||
| 571 | int ret; | ||
| 572 | |||
| 573 | ret = tree_mod_alloc(fs_info, flags, &tm); | ||
| 574 | if (ret <= 0) | ||
| 575 | return ret; | ||
| 576 | |||
| 577 | tm->index = new_root->start >> PAGE_CACHE_SHIFT; | ||
| 578 | tm->old_root.logical = old_root->start; | ||
| 579 | tm->old_root.level = btrfs_header_level(old_root); | ||
| 580 | tm->generation = btrfs_header_generation(old_root); | ||
| 581 | tm->op = MOD_LOG_ROOT_REPLACE; | ||
| 582 | |||
| 583 | return __tree_mod_log_insert(fs_info, tm); | ||
| 584 | } | ||
| 585 | |||
| 586 | static struct tree_mod_elem * | ||
| 587 | __tree_mod_log_search(struct btrfs_fs_info *fs_info, u64 start, u64 min_seq, | ||
| 588 | int smallest) | ||
| 589 | { | ||
| 590 | struct rb_root *tm_root; | ||
| 591 | struct rb_node *node; | ||
| 592 | struct tree_mod_elem *cur = NULL; | ||
| 593 | struct tree_mod_elem *found = NULL; | ||
| 594 | u64 index = start >> PAGE_CACHE_SHIFT; | ||
| 595 | |||
| 596 | read_lock(&fs_info->tree_mod_log_lock); | ||
| 597 | tm_root = &fs_info->tree_mod_log; | ||
| 598 | node = tm_root->rb_node; | ||
| 599 | while (node) { | ||
| 600 | cur = container_of(node, struct tree_mod_elem, node); | ||
| 601 | if (cur->index < index) { | ||
| 602 | node = node->rb_left; | ||
| 603 | } else if (cur->index > index) { | ||
| 604 | node = node->rb_right; | ||
| 605 | } else if (cur->elem.seq < min_seq) { | ||
| 606 | node = node->rb_left; | ||
| 607 | } else if (!smallest) { | ||
| 608 | /* we want the node with the highest seq */ | ||
| 609 | if (found) | ||
| 610 | BUG_ON(found->elem.seq > cur->elem.seq); | ||
| 611 | found = cur; | ||
| 612 | node = node->rb_left; | ||
| 613 | } else if (cur->elem.seq > min_seq) { | ||
| 614 | /* we want the node with the smallest seq */ | ||
| 615 | if (found) | ||
| 616 | BUG_ON(found->elem.seq < cur->elem.seq); | ||
| 617 | found = cur; | ||
| 618 | node = node->rb_right; | ||
| 619 | } else { | ||
| 620 | found = cur; | ||
| 621 | break; | ||
| 622 | } | ||
| 623 | } | ||
| 624 | read_unlock(&fs_info->tree_mod_log_lock); | ||
| 625 | |||
| 626 | return found; | ||
| 627 | } | ||
| 628 | |||
| 629 | /* | ||
| 630 | * this returns the element from the log with the smallest time sequence | ||
| 631 | * value that's in the log (the oldest log item). any element with a time | ||
| 632 | * sequence lower than min_seq will be ignored. | ||
| 633 | */ | ||
| 634 | static struct tree_mod_elem * | ||
| 635 | tree_mod_log_search_oldest(struct btrfs_fs_info *fs_info, u64 start, | ||
| 636 | u64 min_seq) | ||
| 637 | { | ||
| 638 | return __tree_mod_log_search(fs_info, start, min_seq, 1); | ||
| 639 | } | ||
| 640 | |||
| 641 | /* | ||
| 642 | * this returns the element from the log with the largest time sequence | ||
| 643 | * value that's in the log (the most recent log item). any element with | ||
| 644 | * a time sequence lower than min_seq will be ignored. | ||
| 645 | */ | ||
| 646 | static struct tree_mod_elem * | ||
| 647 | tree_mod_log_search(struct btrfs_fs_info *fs_info, u64 start, u64 min_seq) | ||
| 648 | { | ||
| 649 | return __tree_mod_log_search(fs_info, start, min_seq, 0); | ||
| 650 | } | ||
| 651 | |||
| 652 | static inline void | ||
| 653 | tree_mod_log_eb_copy(struct btrfs_fs_info *fs_info, struct extent_buffer *dst, | ||
| 654 | struct extent_buffer *src, unsigned long dst_offset, | ||
| 655 | unsigned long src_offset, int nr_items) | ||
| 656 | { | ||
| 657 | int ret; | ||
| 658 | int i; | ||
| 659 | |||
| 660 | if (tree_mod_dont_log(fs_info, NULL)) | ||
| 661 | return; | ||
| 662 | |||
| 663 | if (btrfs_header_level(dst) == 0 && btrfs_header_level(src) == 0) | ||
| 664 | return; | ||
| 665 | |||
| 666 | /* speed this up by single seq for all operations? */ | ||
| 667 | for (i = 0; i < nr_items; i++) { | ||
| 668 | ret = tree_mod_log_insert_key(fs_info, src, i + src_offset, | ||
| 669 | MOD_LOG_KEY_REMOVE); | ||
| 670 | BUG_ON(ret < 0); | ||
| 671 | ret = tree_mod_log_insert_key(fs_info, dst, i + dst_offset, | ||
| 672 | MOD_LOG_KEY_ADD); | ||
| 673 | BUG_ON(ret < 0); | ||
| 674 | } | ||
| 675 | } | ||
| 676 | |||
| 677 | static inline void | ||
| 678 | tree_mod_log_eb_move(struct btrfs_fs_info *fs_info, struct extent_buffer *dst, | ||
| 679 | int dst_offset, int src_offset, int nr_items) | ||
| 680 | { | ||
| 681 | int ret; | ||
| 682 | ret = tree_mod_log_insert_move(fs_info, dst, dst_offset, src_offset, | ||
| 683 | nr_items, GFP_NOFS); | ||
| 684 | BUG_ON(ret < 0); | ||
| 685 | } | ||
| 686 | |||
| 687 | static inline void | ||
| 688 | tree_mod_log_set_node_key(struct btrfs_fs_info *fs_info, | ||
| 689 | struct extent_buffer *eb, | ||
| 690 | struct btrfs_disk_key *disk_key, int slot, int atomic) | ||
| 691 | { | ||
| 692 | int ret; | ||
| 693 | |||
| 694 | ret = tree_mod_log_insert_key_mask(fs_info, eb, slot, | ||
| 695 | MOD_LOG_KEY_REPLACE, | ||
| 696 | atomic ? GFP_ATOMIC : GFP_NOFS); | ||
| 697 | BUG_ON(ret < 0); | ||
| 698 | } | ||
| 699 | |||
| 700 | static void tree_mod_log_free_eb(struct btrfs_fs_info *fs_info, | ||
| 701 | struct extent_buffer *eb) | ||
| 702 | { | ||
| 703 | int i; | ||
| 704 | int ret; | ||
| 705 | u32 nritems; | ||
| 706 | |||
| 707 | if (tree_mod_dont_log(fs_info, eb)) | ||
| 708 | return; | ||
| 709 | |||
| 710 | nritems = btrfs_header_nritems(eb); | ||
| 711 | for (i = nritems - 1; i >= 0; i--) { | ||
| 712 | ret = tree_mod_log_insert_key(fs_info, eb, i, | ||
| 713 | MOD_LOG_KEY_REMOVE_WHILE_FREEING); | ||
| 714 | BUG_ON(ret < 0); | ||
| 715 | } | ||
| 716 | } | ||
| 717 | |||
| 718 | static inline void | ||
| 719 | tree_mod_log_set_root_pointer(struct btrfs_root *root, | ||
| 720 | struct extent_buffer *new_root_node) | ||
| 721 | { | ||
| 722 | int ret; | ||
| 723 | tree_mod_log_free_eb(root->fs_info, root->node); | ||
| 724 | ret = tree_mod_log_insert_root(root->fs_info, root->node, | ||
| 725 | new_root_node, GFP_NOFS); | ||
| 726 | BUG_ON(ret < 0); | ||
| 727 | } | ||
| 728 | |||
| 291 | /* | 729 | /* |
| 292 | * check if the tree block can be shared by multiple trees | 730 | * check if the tree block can be shared by multiple trees |
| 293 | */ | 731 | */ |
| @@ -409,6 +847,12 @@ static noinline int update_ref_for_cow(struct btrfs_trans_handle *trans, | |||
| 409 | ret = btrfs_dec_ref(trans, root, buf, 1, 1); | 847 | ret = btrfs_dec_ref(trans, root, buf, 1, 1); |
| 410 | BUG_ON(ret); /* -ENOMEM */ | 848 | BUG_ON(ret); /* -ENOMEM */ |
| 411 | } | 849 | } |
| 850 | /* | ||
| 851 | * don't log freeing in case we're freeing the root node, this | ||
| 852 | * is done by tree_mod_log_set_root_pointer later | ||
| 853 | */ | ||
| 854 | if (buf != root->node && btrfs_header_level(buf) != 0) | ||
| 855 | tree_mod_log_free_eb(root->fs_info, buf); | ||
| 412 | clean_tree_block(trans, root, buf); | 856 | clean_tree_block(trans, root, buf); |
| 413 | *last_ref = 1; | 857 | *last_ref = 1; |
| 414 | } | 858 | } |
| @@ -467,7 +911,7 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans, | |||
| 467 | 911 | ||
| 468 | cow = btrfs_alloc_free_block(trans, root, buf->len, parent_start, | 912 | cow = btrfs_alloc_free_block(trans, root, buf->len, parent_start, |
| 469 | root->root_key.objectid, &disk_key, | 913 | root->root_key.objectid, &disk_key, |
| 470 | level, search_start, empty_size, 1); | 914 | level, search_start, empty_size); |
| 471 | if (IS_ERR(cow)) | 915 | if (IS_ERR(cow)) |
| 472 | return PTR_ERR(cow); | 916 | return PTR_ERR(cow); |
| 473 | 917 | ||
| @@ -506,10 +950,11 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans, | |||
| 506 | parent_start = 0; | 950 | parent_start = 0; |
| 507 | 951 | ||
| 508 | extent_buffer_get(cow); | 952 | extent_buffer_get(cow); |
| 953 | tree_mod_log_set_root_pointer(root, cow); | ||
| 509 | rcu_assign_pointer(root->node, cow); | 954 | rcu_assign_pointer(root->node, cow); |
| 510 | 955 | ||
| 511 | btrfs_free_tree_block(trans, root, buf, parent_start, | 956 | btrfs_free_tree_block(trans, root, buf, parent_start, |
| 512 | last_ref, 1); | 957 | last_ref); |
| 513 | free_extent_buffer(buf); | 958 | free_extent_buffer(buf); |
| 514 | add_root_to_dirty_list(root); | 959 | add_root_to_dirty_list(root); |
| 515 | } else { | 960 | } else { |
| @@ -519,13 +964,15 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans, | |||
| 519 | parent_start = 0; | 964 | parent_start = 0; |
| 520 | 965 | ||
| 521 | WARN_ON(trans->transid != btrfs_header_generation(parent)); | 966 | WARN_ON(trans->transid != btrfs_header_generation(parent)); |
| 967 | tree_mod_log_insert_key(root->fs_info, parent, parent_slot, | ||
| 968 | MOD_LOG_KEY_REPLACE); | ||
| 522 | btrfs_set_node_blockptr(parent, parent_slot, | 969 | btrfs_set_node_blockptr(parent, parent_slot, |
| 523 | cow->start); | 970 | cow->start); |
| 524 | btrfs_set_node_ptr_generation(parent, parent_slot, | 971 | btrfs_set_node_ptr_generation(parent, parent_slot, |
| 525 | trans->transid); | 972 | trans->transid); |
| 526 | btrfs_mark_buffer_dirty(parent); | 973 | btrfs_mark_buffer_dirty(parent); |
| 527 | btrfs_free_tree_block(trans, root, buf, parent_start, | 974 | btrfs_free_tree_block(trans, root, buf, parent_start, |
| 528 | last_ref, 1); | 975 | last_ref); |
| 529 | } | 976 | } |
| 530 | if (unlock_orig) | 977 | if (unlock_orig) |
| 531 | btrfs_tree_unlock(buf); | 978 | btrfs_tree_unlock(buf); |
| @@ -535,6 +982,210 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans, | |||
| 535 | return 0; | 982 | return 0; |
| 536 | } | 983 | } |
| 537 | 984 | ||
| 985 | /* | ||
| 986 | * returns the logical address of the oldest predecessor of the given root. | ||
| 987 | * entries older than time_seq are ignored. | ||
| 988 | */ | ||
| 989 | static struct tree_mod_elem * | ||
| 990 | __tree_mod_log_oldest_root(struct btrfs_fs_info *fs_info, | ||
| 991 | struct btrfs_root *root, u64 time_seq) | ||
| 992 | { | ||
| 993 | struct tree_mod_elem *tm; | ||
| 994 | struct tree_mod_elem *found = NULL; | ||
| 995 | u64 root_logical = root->node->start; | ||
| 996 | int looped = 0; | ||
| 997 | |||
| 998 | if (!time_seq) | ||
| 999 | return 0; | ||
| 1000 | |||
| 1001 | /* | ||
| 1002 | * the very last operation that's logged for a root is the replacement | ||
| 1003 | * operation (if it is replaced at all). this has the index of the *new* | ||
| 1004 | * root, making it the very first operation that's logged for this root. | ||
| 1005 | */ | ||
| 1006 | while (1) { | ||
| 1007 | tm = tree_mod_log_search_oldest(fs_info, root_logical, | ||
| 1008 | time_seq); | ||
| 1009 | if (!looped && !tm) | ||
| 1010 | return 0; | ||
| 1011 | /* | ||
| 1012 | * we must have key remove operations in the log before the | ||
| 1013 | * replace operation. | ||
| 1014 | */ | ||
| 1015 | BUG_ON(!tm); | ||
| 1016 | |||
| 1017 | if (tm->op != MOD_LOG_ROOT_REPLACE) | ||
| 1018 | break; | ||
| 1019 | |||
| 1020 | found = tm; | ||
| 1021 | root_logical = tm->old_root.logical; | ||
| 1022 | BUG_ON(root_logical == root->node->start); | ||
| 1023 | looped = 1; | ||
| 1024 | } | ||
| 1025 | |||
| 1026 | return found; | ||
| 1027 | } | ||
| 1028 | |||
| 1029 | /* | ||
| 1030 | * tm is a pointer to the first operation to rewind within eb. then, all | ||
| 1031 | * previous operations will be rewinded (until we reach something older than | ||
| 1032 | * time_seq). | ||
| 1033 | */ | ||
| 1034 | static void | ||
| 1035 | __tree_mod_log_rewind(struct extent_buffer *eb, u64 time_seq, | ||
| 1036 | struct tree_mod_elem *first_tm) | ||
| 1037 | { | ||
| 1038 | u32 n; | ||
| 1039 | struct rb_node *next; | ||
| 1040 | struct tree_mod_elem *tm = first_tm; | ||
| 1041 | unsigned long o_dst; | ||
| 1042 | unsigned long o_src; | ||
| 1043 | unsigned long p_size = sizeof(struct btrfs_key_ptr); | ||
| 1044 | |||
| 1045 | n = btrfs_header_nritems(eb); | ||
| 1046 | while (tm && tm->elem.seq >= time_seq) { | ||
| 1047 | /* | ||
| 1048 | * all the operations are recorded with the operator used for | ||
| 1049 | * the modification. as we're going backwards, we do the | ||
| 1050 | * opposite of each operation here. | ||
| 1051 | */ | ||
| 1052 | switch (tm->op) { | ||
| 1053 | case MOD_LOG_KEY_REMOVE_WHILE_FREEING: | ||
| 1054 | BUG_ON(tm->slot < n); | ||
| 1055 | case MOD_LOG_KEY_REMOVE_WHILE_MOVING: | ||
| 1056 | case MOD_LOG_KEY_REMOVE: | ||
| 1057 | btrfs_set_node_key(eb, &tm->key, tm->slot); | ||
| 1058 | btrfs_set_node_blockptr(eb, tm->slot, tm->blockptr); | ||
| 1059 | btrfs_set_node_ptr_generation(eb, tm->slot, | ||
| 1060 | tm->generation); | ||
| 1061 | n++; | ||
| 1062 | break; | ||
| 1063 | case MOD_LOG_KEY_REPLACE: | ||
| 1064 | BUG_ON(tm->slot >= n); | ||
| 1065 | btrfs_set_node_key(eb, &tm->key, tm->slot); | ||
| 1066 | btrfs_set_node_blockptr(eb, tm->slot, tm->blockptr); | ||
| 1067 | btrfs_set_node_ptr_generation(eb, tm->slot, | ||
| 1068 | tm->generation); | ||
| 1069 | break; | ||
| 1070 | case MOD_LOG_KEY_ADD: | ||
| 1071 | if (tm->slot != n - 1) { | ||
| 1072 | o_dst = btrfs_node_key_ptr_offset(tm->slot); | ||
| 1073 | o_src = btrfs_node_key_ptr_offset(tm->slot + 1); | ||
| 1074 | memmove_extent_buffer(eb, o_dst, o_src, p_size); | ||
| 1075 | } | ||
| 1076 | n--; | ||
| 1077 | break; | ||
| 1078 | case MOD_LOG_MOVE_KEYS: | ||
| 1079 | o_dst = btrfs_node_key_ptr_offset(tm->slot); | ||
| 1080 | o_src = btrfs_node_key_ptr_offset(tm->move.dst_slot); | ||
| 1081 | memmove_extent_buffer(eb, o_dst, o_src, | ||
| 1082 | tm->move.nr_items * p_size); | ||
| 1083 | break; | ||
| 1084 | case MOD_LOG_ROOT_REPLACE: | ||
| 1085 | /* | ||
| 1086 | * this operation is special. for roots, this must be | ||
| 1087 | * handled explicitly before rewinding. | ||
| 1088 | * for non-roots, this operation may exist if the node | ||
| 1089 | * was a root: root A -> child B; then A gets empty and | ||
| 1090 | * B is promoted to the new root. in the mod log, we'll | ||
| 1091 | * have a root-replace operation for B, a tree block | ||
| 1092 | * that is no root. we simply ignore that operation. | ||
| 1093 | */ | ||
| 1094 | break; | ||
| 1095 | } | ||
| 1096 | next = rb_next(&tm->node); | ||
| 1097 | if (!next) | ||
| 1098 | break; | ||
| 1099 | tm = container_of(next, struct tree_mod_elem, node); | ||
| 1100 | if (tm->index != first_tm->index) | ||
| 1101 | break; | ||
| 1102 | } | ||
| 1103 | btrfs_set_header_nritems(eb, n); | ||
| 1104 | } | ||
| 1105 | |||
| 1106 | static struct extent_buffer * | ||
| 1107 | tree_mod_log_rewind(struct btrfs_fs_info *fs_info, struct extent_buffer *eb, | ||
| 1108 | u64 time_seq) | ||
| 1109 | { | ||
| 1110 | struct extent_buffer *eb_rewin; | ||
| 1111 | struct tree_mod_elem *tm; | ||
| 1112 | |||
| 1113 | if (!time_seq) | ||
| 1114 | return eb; | ||
| 1115 | |||
| 1116 | if (btrfs_header_level(eb) == 0) | ||
| 1117 | return eb; | ||
| 1118 | |||
| 1119 | tm = tree_mod_log_search(fs_info, eb->start, time_seq); | ||
| 1120 | if (!tm) | ||
| 1121 | return eb; | ||
| 1122 | |||
| 1123 | if (tm->op == MOD_LOG_KEY_REMOVE_WHILE_FREEING) { | ||
| 1124 | BUG_ON(tm->slot != 0); | ||
| 1125 | eb_rewin = alloc_dummy_extent_buffer(eb->start, | ||
| 1126 | fs_info->tree_root->nodesize); | ||
| 1127 | BUG_ON(!eb_rewin); | ||
| 1128 | btrfs_set_header_bytenr(eb_rewin, eb->start); | ||
| 1129 | btrfs_set_header_backref_rev(eb_rewin, | ||
| 1130 | btrfs_header_backref_rev(eb)); | ||
| 1131 | btrfs_set_header_owner(eb_rewin, btrfs_header_owner(eb)); | ||
| 1132 | btrfs_set_header_level(eb_rewin, btrfs_header_level(eb)); | ||
| 1133 | } else { | ||
| 1134 | eb_rewin = btrfs_clone_extent_buffer(eb); | ||
| 1135 | BUG_ON(!eb_rewin); | ||
| 1136 | } | ||
| 1137 | |||
| 1138 | extent_buffer_get(eb_rewin); | ||
| 1139 | free_extent_buffer(eb); | ||
| 1140 | |||
| 1141 | __tree_mod_log_rewind(eb_rewin, time_seq, tm); | ||
| 1142 | |||
| 1143 | return eb_rewin; | ||
| 1144 | } | ||
| 1145 | |||
| 1146 | static inline struct extent_buffer * | ||
| 1147 | get_old_root(struct btrfs_root *root, u64 time_seq) | ||
| 1148 | { | ||
| 1149 | struct tree_mod_elem *tm; | ||
| 1150 | struct extent_buffer *eb; | ||
| 1151 | struct tree_mod_root *old_root; | ||
| 1152 | u64 old_generation; | ||
| 1153 | |||
| 1154 | tm = __tree_mod_log_oldest_root(root->fs_info, root, time_seq); | ||
| 1155 | if (!tm) | ||
| 1156 | return root->node; | ||
| 1157 | |||
| 1158 | old_root = &tm->old_root; | ||
| 1159 | old_generation = tm->generation; | ||
| 1160 | |||
| 1161 | tm = tree_mod_log_search(root->fs_info, old_root->logical, time_seq); | ||
| 1162 | /* | ||
| 1163 | * there was an item in the log when __tree_mod_log_oldest_root | ||
| 1164 | * returned. this one must not go away, because the time_seq passed to | ||
| 1165 | * us must be blocking its removal. | ||
| 1166 | */ | ||
| 1167 | BUG_ON(!tm); | ||
| 1168 | |||
| 1169 | if (old_root->logical == root->node->start) { | ||
| 1170 | /* there are logged operations for the current root */ | ||
| 1171 | eb = btrfs_clone_extent_buffer(root->node); | ||
| 1172 | } else { | ||
| 1173 | /* there's a root replace operation for the current root */ | ||
| 1174 | eb = alloc_dummy_extent_buffer(tm->index << PAGE_CACHE_SHIFT, | ||
| 1175 | root->nodesize); | ||
| 1176 | btrfs_set_header_bytenr(eb, eb->start); | ||
| 1177 | btrfs_set_header_backref_rev(eb, BTRFS_MIXED_BACKREF_REV); | ||
| 1178 | btrfs_set_header_owner(eb, root->root_key.objectid); | ||
| 1179 | } | ||
| 1180 | if (!eb) | ||
| 1181 | return NULL; | ||
| 1182 | btrfs_set_header_level(eb, old_root->level); | ||
| 1183 | btrfs_set_header_generation(eb, old_generation); | ||
| 1184 | __tree_mod_log_rewind(eb, time_seq, tm); | ||
| 1185 | |||
| 1186 | return eb; | ||
| 1187 | } | ||
| 1188 | |||
| 538 | static inline int should_cow_block(struct btrfs_trans_handle *trans, | 1189 | static inline int should_cow_block(struct btrfs_trans_handle *trans, |
| 539 | struct btrfs_root *root, | 1190 | struct btrfs_root *root, |
| 540 | struct extent_buffer *buf) | 1191 | struct extent_buffer *buf) |
| @@ -739,7 +1390,11 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans, | |||
| 739 | if (!cur) | 1390 | if (!cur) |
| 740 | return -EIO; | 1391 | return -EIO; |
| 741 | } else if (!uptodate) { | 1392 | } else if (!uptodate) { |
| 742 | btrfs_read_buffer(cur, gen); | 1393 | err = btrfs_read_buffer(cur, gen); |
| 1394 | if (err) { | ||
| 1395 | free_extent_buffer(cur); | ||
| 1396 | return err; | ||
| 1397 | } | ||
| 743 | } | 1398 | } |
| 744 | } | 1399 | } |
| 745 | if (search_start == 0) | 1400 | if (search_start == 0) |
| @@ -854,20 +1509,18 @@ static noinline int generic_bin_search(struct extent_buffer *eb, | |||
| 854 | static int bin_search(struct extent_buffer *eb, struct btrfs_key *key, | 1509 | static int bin_search(struct extent_buffer *eb, struct btrfs_key *key, |
| 855 | int level, int *slot) | 1510 | int level, int *slot) |
| 856 | { | 1511 | { |
| 857 | if (level == 0) { | 1512 | if (level == 0) |
| 858 | return generic_bin_search(eb, | 1513 | return generic_bin_search(eb, |
| 859 | offsetof(struct btrfs_leaf, items), | 1514 | offsetof(struct btrfs_leaf, items), |
| 860 | sizeof(struct btrfs_item), | 1515 | sizeof(struct btrfs_item), |
| 861 | key, btrfs_header_nritems(eb), | 1516 | key, btrfs_header_nritems(eb), |
| 862 | slot); | 1517 | slot); |
| 863 | } else { | 1518 | else |
| 864 | return generic_bin_search(eb, | 1519 | return generic_bin_search(eb, |
| 865 | offsetof(struct btrfs_node, ptrs), | 1520 | offsetof(struct btrfs_node, ptrs), |
| 866 | sizeof(struct btrfs_key_ptr), | 1521 | sizeof(struct btrfs_key_ptr), |
| 867 | key, btrfs_header_nritems(eb), | 1522 | key, btrfs_header_nritems(eb), |
| 868 | slot); | 1523 | slot); |
| 869 | } | ||
| 870 | return -1; | ||
| 871 | } | 1524 | } |
| 872 | 1525 | ||
| 873 | int btrfs_bin_search(struct extent_buffer *eb, struct btrfs_key *key, | 1526 | int btrfs_bin_search(struct extent_buffer *eb, struct btrfs_key *key, |
| @@ -974,6 +1627,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans, | |||
| 974 | goto enospc; | 1627 | goto enospc; |
| 975 | } | 1628 | } |
| 976 | 1629 | ||
| 1630 | tree_mod_log_set_root_pointer(root, child); | ||
| 977 | rcu_assign_pointer(root->node, child); | 1631 | rcu_assign_pointer(root->node, child); |
| 978 | 1632 | ||
| 979 | add_root_to_dirty_list(root); | 1633 | add_root_to_dirty_list(root); |
| @@ -987,7 +1641,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans, | |||
| 987 | free_extent_buffer(mid); | 1641 | free_extent_buffer(mid); |
| 988 | 1642 | ||
| 989 | root_sub_used(root, mid->len); | 1643 | root_sub_used(root, mid->len); |
| 990 | btrfs_free_tree_block(trans, root, mid, 0, 1, 0); | 1644 | btrfs_free_tree_block(trans, root, mid, 0, 1); |
| 991 | /* once for the root ptr */ | 1645 | /* once for the root ptr */ |
| 992 | free_extent_buffer_stale(mid); | 1646 | free_extent_buffer_stale(mid); |
| 993 | return 0; | 1647 | return 0; |
| @@ -1040,14 +1694,16 @@ static noinline int balance_level(struct btrfs_trans_handle *trans, | |||
| 1040 | if (btrfs_header_nritems(right) == 0) { | 1694 | if (btrfs_header_nritems(right) == 0) { |
| 1041 | clean_tree_block(trans, root, right); | 1695 | clean_tree_block(trans, root, right); |
| 1042 | btrfs_tree_unlock(right); | 1696 | btrfs_tree_unlock(right); |
| 1043 | del_ptr(trans, root, path, level + 1, pslot + 1); | 1697 | del_ptr(trans, root, path, level + 1, pslot + 1, 1); |
| 1044 | root_sub_used(root, right->len); | 1698 | root_sub_used(root, right->len); |
| 1045 | btrfs_free_tree_block(trans, root, right, 0, 1, 0); | 1699 | btrfs_free_tree_block(trans, root, right, 0, 1); |
| 1046 | free_extent_buffer_stale(right); | 1700 | free_extent_buffer_stale(right); |
| 1047 | right = NULL; | 1701 | right = NULL; |
| 1048 | } else { | 1702 | } else { |
| 1049 | struct btrfs_disk_key right_key; | 1703 | struct btrfs_disk_key right_key; |
| 1050 | btrfs_node_key(right, &right_key, 0); | 1704 | btrfs_node_key(right, &right_key, 0); |
| 1705 | tree_mod_log_set_node_key(root->fs_info, parent, | ||
| 1706 | &right_key, pslot + 1, 0); | ||
| 1051 | btrfs_set_node_key(parent, &right_key, pslot + 1); | 1707 | btrfs_set_node_key(parent, &right_key, pslot + 1); |
| 1052 | btrfs_mark_buffer_dirty(parent); | 1708 | btrfs_mark_buffer_dirty(parent); |
| 1053 | } | 1709 | } |
| @@ -1082,15 +1738,17 @@ static noinline int balance_level(struct btrfs_trans_handle *trans, | |||
| 1082 | if (btrfs_header_nritems(mid) == 0) { | 1738 | if (btrfs_header_nritems(mid) == 0) { |
| 1083 | clean_tree_block(trans, root, mid); | 1739 | clean_tree_block(trans, root, mid); |
| 1084 | btrfs_tree_unlock(mid); | 1740 | btrfs_tree_unlock(mid); |
| 1085 | del_ptr(trans, root, path, level + 1, pslot); | 1741 | del_ptr(trans, root, path, level + 1, pslot, 1); |
| 1086 | root_sub_used(root, mid->len); | 1742 | root_sub_used(root, mid->len); |
| 1087 | btrfs_free_tree_block(trans, root, mid, 0, 1, 0); | 1743 | btrfs_free_tree_block(trans, root, mid, 0, 1); |
| 1088 | free_extent_buffer_stale(mid); | 1744 | free_extent_buffer_stale(mid); |
| 1089 | mid = NULL; | 1745 | mid = NULL; |
| 1090 | } else { | 1746 | } else { |
| 1091 | /* update the parent key to reflect our changes */ | 1747 | /* update the parent key to reflect our changes */ |
| 1092 | struct btrfs_disk_key mid_key; | 1748 | struct btrfs_disk_key mid_key; |
| 1093 | btrfs_node_key(mid, &mid_key, 0); | 1749 | btrfs_node_key(mid, &mid_key, 0); |
| 1750 | tree_mod_log_set_node_key(root->fs_info, parent, &mid_key, | ||
| 1751 | pslot, 0); | ||
| 1094 | btrfs_set_node_key(parent, &mid_key, pslot); | 1752 | btrfs_set_node_key(parent, &mid_key, pslot); |
| 1095 | btrfs_mark_buffer_dirty(parent); | 1753 | btrfs_mark_buffer_dirty(parent); |
| 1096 | } | 1754 | } |
| @@ -1188,6 +1846,8 @@ static noinline int push_nodes_for_insert(struct btrfs_trans_handle *trans, | |||
| 1188 | struct btrfs_disk_key disk_key; | 1846 | struct btrfs_disk_key disk_key; |
| 1189 | orig_slot += left_nr; | 1847 | orig_slot += left_nr; |
| 1190 | btrfs_node_key(mid, &disk_key, 0); | 1848 | btrfs_node_key(mid, &disk_key, 0); |
| 1849 | tree_mod_log_set_node_key(root->fs_info, parent, | ||
| 1850 | &disk_key, pslot, 0); | ||
| 1191 | btrfs_set_node_key(parent, &disk_key, pslot); | 1851 | btrfs_set_node_key(parent, &disk_key, pslot); |
| 1192 | btrfs_mark_buffer_dirty(parent); | 1852 | btrfs_mark_buffer_dirty(parent); |
| 1193 | if (btrfs_header_nritems(left) > orig_slot) { | 1853 | if (btrfs_header_nritems(left) > orig_slot) { |
| @@ -1239,6 +1899,8 @@ static noinline int push_nodes_for_insert(struct btrfs_trans_handle *trans, | |||
| 1239 | struct btrfs_disk_key disk_key; | 1899 | struct btrfs_disk_key disk_key; |
| 1240 | 1900 | ||
| 1241 | btrfs_node_key(right, &disk_key, 0); | 1901 | btrfs_node_key(right, &disk_key, 0); |
| 1902 | tree_mod_log_set_node_key(root->fs_info, parent, | ||
| 1903 | &disk_key, pslot + 1, 0); | ||
| 1242 | btrfs_set_node_key(parent, &disk_key, pslot + 1); | 1904 | btrfs_set_node_key(parent, &disk_key, pslot + 1); |
| 1243 | btrfs_mark_buffer_dirty(parent); | 1905 | btrfs_mark_buffer_dirty(parent); |
| 1244 | 1906 | ||
| @@ -1496,7 +2158,7 @@ static int | |||
| 1496 | read_block_for_search(struct btrfs_trans_handle *trans, | 2158 | read_block_for_search(struct btrfs_trans_handle *trans, |
| 1497 | struct btrfs_root *root, struct btrfs_path *p, | 2159 | struct btrfs_root *root, struct btrfs_path *p, |
| 1498 | struct extent_buffer **eb_ret, int level, int slot, | 2160 | struct extent_buffer **eb_ret, int level, int slot, |
| 1499 | struct btrfs_key *key) | 2161 | struct btrfs_key *key, u64 time_seq) |
| 1500 | { | 2162 | { |
| 1501 | u64 blocknr; | 2163 | u64 blocknr; |
| 1502 | u64 gen; | 2164 | u64 gen; |
| @@ -1850,7 +2512,7 @@ cow_done: | |||
| 1850 | } | 2512 | } |
| 1851 | 2513 | ||
| 1852 | err = read_block_for_search(trans, root, p, | 2514 | err = read_block_for_search(trans, root, p, |
| 1853 | &b, level, slot, key); | 2515 | &b, level, slot, key, 0); |
| 1854 | if (err == -EAGAIN) | 2516 | if (err == -EAGAIN) |
| 1855 | goto again; | 2517 | goto again; |
| 1856 | if (err) { | 2518 | if (err) { |
| @@ -1922,6 +2584,115 @@ done: | |||
| 1922 | } | 2584 | } |
| 1923 | 2585 | ||
| 1924 | /* | 2586 | /* |
| 2587 | * Like btrfs_search_slot, this looks for a key in the given tree. It uses the | ||
| 2588 | * current state of the tree together with the operations recorded in the tree | ||
| 2589 | * modification log to search for the key in a previous version of this tree, as | ||
| 2590 | * denoted by the time_seq parameter. | ||
| 2591 | * | ||
| 2592 | * Naturally, there is no support for insert, delete or cow operations. | ||
| 2593 | * | ||
| 2594 | * The resulting path and return value will be set up as if we called | ||
| 2595 | * btrfs_search_slot at that point in time with ins_len and cow both set to 0. | ||
| 2596 | */ | ||
| 2597 | int btrfs_search_old_slot(struct btrfs_root *root, struct btrfs_key *key, | ||
| 2598 | struct btrfs_path *p, u64 time_seq) | ||
| 2599 | { | ||
| 2600 | struct extent_buffer *b; | ||
| 2601 | int slot; | ||
| 2602 | int ret; | ||
| 2603 | int err; | ||
| 2604 | int level; | ||
| 2605 | int lowest_unlock = 1; | ||
| 2606 | u8 lowest_level = 0; | ||
| 2607 | |||
| 2608 | lowest_level = p->lowest_level; | ||
| 2609 | WARN_ON(p->nodes[0] != NULL); | ||
| 2610 | |||
| 2611 | if (p->search_commit_root) { | ||
| 2612 | BUG_ON(time_seq); | ||
| 2613 | return btrfs_search_slot(NULL, root, key, p, 0, 0); | ||
| 2614 | } | ||
| 2615 | |||
| 2616 | again: | ||
| 2617 | b = get_old_root(root, time_seq); | ||
| 2618 | extent_buffer_get(b); | ||
| 2619 | level = btrfs_header_level(b); | ||
| 2620 | btrfs_tree_read_lock(b); | ||
| 2621 | p->locks[level] = BTRFS_READ_LOCK; | ||
| 2622 | |||
| 2623 | while (b) { | ||
| 2624 | level = btrfs_header_level(b); | ||
| 2625 | p->nodes[level] = b; | ||
| 2626 | btrfs_clear_path_blocking(p, NULL, 0); | ||
| 2627 | |||
| 2628 | /* | ||
| 2629 | * we have a lock on b and as long as we aren't changing | ||
| 2630 | * the tree, there is no way to for the items in b to change. | ||
| 2631 | * It is safe to drop the lock on our parent before we | ||
| 2632 | * go through the expensive btree search on b. | ||
| 2633 | */ | ||
| 2634 | btrfs_unlock_up_safe(p, level + 1); | ||
| 2635 | |||
| 2636 | ret = bin_search(b, key, level, &slot); | ||
| 2637 | |||
| 2638 | if (level != 0) { | ||
| 2639 | int dec = 0; | ||
| 2640 | if (ret && slot > 0) { | ||
| 2641 | dec = 1; | ||
| 2642 | slot -= 1; | ||
| 2643 | } | ||
| 2644 | p->slots[level] = slot; | ||
| 2645 | unlock_up(p, level, lowest_unlock, 0, NULL); | ||
| 2646 | |||
| 2647 | if (level == lowest_level) { | ||
| 2648 | if (dec) | ||
| 2649 | p->slots[level]++; | ||
| 2650 | goto done; | ||
| 2651 | } | ||
| 2652 | |||
| 2653 | err = read_block_for_search(NULL, root, p, &b, level, | ||
| 2654 | slot, key, time_seq); | ||
| 2655 | if (err == -EAGAIN) | ||
| 2656 | goto again; | ||
| 2657 | if (err) { | ||
| 2658 | ret = err; | ||
| 2659 | goto done; | ||
| 2660 | } | ||
| 2661 | |||
| 2662 | level = btrfs_header_level(b); | ||
| 2663 | err = btrfs_try_tree_read_lock(b); | ||
| 2664 | if (!err) { | ||
| 2665 | btrfs_set_path_blocking(p); | ||
| 2666 | btrfs_tree_read_lock(b); | ||
| 2667 | btrfs_clear_path_blocking(p, b, | ||
| 2668 | BTRFS_READ_LOCK); | ||
| 2669 | } | ||
| 2670 | p->locks[level] = BTRFS_READ_LOCK; | ||
| 2671 | p->nodes[level] = b; | ||
| 2672 | b = tree_mod_log_rewind(root->fs_info, b, time_seq); | ||
| 2673 | if (b != p->nodes[level]) { | ||
| 2674 | btrfs_tree_unlock_rw(p->nodes[level], | ||
| 2675 | p->locks[level]); | ||
| 2676 | p->locks[level] = 0; | ||
| 2677 | p->nodes[level] = b; | ||
| 2678 | } | ||
| 2679 | } else { | ||
| 2680 | p->slots[level] = slot; | ||
| 2681 | unlock_up(p, level, lowest_unlock, 0, NULL); | ||
| 2682 | goto done; | ||
| 2683 | } | ||
| 2684 | } | ||
| 2685 | ret = 1; | ||
| 2686 | done: | ||
| 2687 | if (!p->leave_spinning) | ||
| 2688 | btrfs_set_path_blocking(p); | ||
| 2689 | if (ret < 0) | ||
| 2690 | btrfs_release_path(p); | ||
| 2691 | |||
| 2692 | return ret; | ||
| 2693 | } | ||
| 2694 | |||
| 2695 | /* | ||
| 1925 | * adjust the pointers going up the tree, starting at level | 2696 | * adjust the pointers going up the tree, starting at level |
| 1926 | * making sure the right key of each node is points to 'key'. | 2697 | * making sure the right key of each node is points to 'key'. |
| 1927 | * This is used after shifting pointers to the left, so it stops | 2698 | * This is used after shifting pointers to the left, so it stops |
| @@ -1941,6 +2712,7 @@ static void fixup_low_keys(struct btrfs_trans_handle *trans, | |||
| 1941 | if (!path->nodes[i]) | 2712 | if (!path->nodes[i]) |
| 1942 | break; | 2713 | break; |
| 1943 | t = path->nodes[i]; | 2714 | t = path->nodes[i]; |
| 2715 | tree_mod_log_set_node_key(root->fs_info, t, key, tslot, 1); | ||
| 1944 | btrfs_set_node_key(t, key, tslot); | 2716 | btrfs_set_node_key(t, key, tslot); |
| 1945 | btrfs_mark_buffer_dirty(path->nodes[i]); | 2717 | btrfs_mark_buffer_dirty(path->nodes[i]); |
| 1946 | if (tslot != 0) | 2718 | if (tslot != 0) |
| @@ -2023,12 +2795,16 @@ static int push_node_left(struct btrfs_trans_handle *trans, | |||
| 2023 | } else | 2795 | } else |
| 2024 | push_items = min(src_nritems - 8, push_items); | 2796 | push_items = min(src_nritems - 8, push_items); |
| 2025 | 2797 | ||
| 2798 | tree_mod_log_eb_copy(root->fs_info, dst, src, dst_nritems, 0, | ||
| 2799 | push_items); | ||
| 2026 | copy_extent_buffer(dst, src, | 2800 | copy_extent_buffer(dst, src, |
| 2027 | btrfs_node_key_ptr_offset(dst_nritems), | 2801 | btrfs_node_key_ptr_offset(dst_nritems), |
| 2028 | btrfs_node_key_ptr_offset(0), | 2802 | btrfs_node_key_ptr_offset(0), |
| 2029 | push_items * sizeof(struct btrfs_key_ptr)); | 2803 | push_items * sizeof(struct btrfs_key_ptr)); |
| 2030 | 2804 | ||
| 2031 | if (push_items < src_nritems) { | 2805 | if (push_items < src_nritems) { |
| 2806 | tree_mod_log_eb_move(root->fs_info, src, 0, push_items, | ||
| 2807 | src_nritems - push_items); | ||
| 2032 | memmove_extent_buffer(src, btrfs_node_key_ptr_offset(0), | 2808 | memmove_extent_buffer(src, btrfs_node_key_ptr_offset(0), |
| 2033 | btrfs_node_key_ptr_offset(push_items), | 2809 | btrfs_node_key_ptr_offset(push_items), |
| 2034 | (src_nritems - push_items) * | 2810 | (src_nritems - push_items) * |
| @@ -2082,11 +2858,14 @@ static int balance_node_right(struct btrfs_trans_handle *trans, | |||
| 2082 | if (max_push < push_items) | 2858 | if (max_push < push_items) |
| 2083 | push_items = max_push; | 2859 | push_items = max_push; |
| 2084 | 2860 | ||
| 2861 | tree_mod_log_eb_move(root->fs_info, dst, push_items, 0, dst_nritems); | ||
| 2085 | memmove_extent_buffer(dst, btrfs_node_key_ptr_offset(push_items), | 2862 | memmove_extent_buffer(dst, btrfs_node_key_ptr_offset(push_items), |
| 2086 | btrfs_node_key_ptr_offset(0), | 2863 | btrfs_node_key_ptr_offset(0), |
| 2087 | (dst_nritems) * | 2864 | (dst_nritems) * |
| 2088 | sizeof(struct btrfs_key_ptr)); | 2865 | sizeof(struct btrfs_key_ptr)); |
| 2089 | 2866 | ||
| 2867 | tree_mod_log_eb_copy(root->fs_info, dst, src, 0, | ||
| 2868 | src_nritems - push_items, push_items); | ||
| 2090 | copy_extent_buffer(dst, src, | 2869 | copy_extent_buffer(dst, src, |
| 2091 | btrfs_node_key_ptr_offset(0), | 2870 | btrfs_node_key_ptr_offset(0), |
| 2092 | btrfs_node_key_ptr_offset(src_nritems - push_items), | 2871 | btrfs_node_key_ptr_offset(src_nritems - push_items), |
| @@ -2129,7 +2908,7 @@ static noinline int insert_new_root(struct btrfs_trans_handle *trans, | |||
| 2129 | 2908 | ||
| 2130 | c = btrfs_alloc_free_block(trans, root, root->nodesize, 0, | 2909 | c = btrfs_alloc_free_block(trans, root, root->nodesize, 0, |
| 2131 | root->root_key.objectid, &lower_key, | 2910 | root->root_key.objectid, &lower_key, |
| 2132 | level, root->node->start, 0, 0); | 2911 | level, root->node->start, 0); |
| 2133 | if (IS_ERR(c)) | 2912 | if (IS_ERR(c)) |
| 2134 | return PTR_ERR(c); | 2913 | return PTR_ERR(c); |
| 2135 | 2914 | ||
| @@ -2161,6 +2940,7 @@ static noinline int insert_new_root(struct btrfs_trans_handle *trans, | |||
| 2161 | btrfs_mark_buffer_dirty(c); | 2940 | btrfs_mark_buffer_dirty(c); |
| 2162 | 2941 | ||
| 2163 | old = root->node; | 2942 | old = root->node; |
| 2943 | tree_mod_log_set_root_pointer(root, c); | ||
| 2164 | rcu_assign_pointer(root->node, c); | 2944 | rcu_assign_pointer(root->node, c); |
| 2165 | 2945 | ||
| 2166 | /* the super has an extra ref to root->node */ | 2946 | /* the super has an extra ref to root->node */ |
| @@ -2184,10 +2964,11 @@ static noinline int insert_new_root(struct btrfs_trans_handle *trans, | |||
| 2184 | static void insert_ptr(struct btrfs_trans_handle *trans, | 2964 | static void insert_ptr(struct btrfs_trans_handle *trans, |
| 2185 | struct btrfs_root *root, struct btrfs_path *path, | 2965 | struct btrfs_root *root, struct btrfs_path *path, |
| 2186 | struct btrfs_disk_key *key, u64 bytenr, | 2966 | struct btrfs_disk_key *key, u64 bytenr, |
| 2187 | int slot, int level) | 2967 | int slot, int level, int tree_mod_log) |
| 2188 | { | 2968 | { |
| 2189 | struct extent_buffer *lower; | 2969 | struct extent_buffer *lower; |
| 2190 | int nritems; | 2970 | int nritems; |
| 2971 | int ret; | ||
| 2191 | 2972 | ||
| 2192 | BUG_ON(!path->nodes[level]); | 2973 | BUG_ON(!path->nodes[level]); |
| 2193 | btrfs_assert_tree_locked(path->nodes[level]); | 2974 | btrfs_assert_tree_locked(path->nodes[level]); |
| @@ -2196,11 +2977,19 @@ static void insert_ptr(struct btrfs_trans_handle *trans, | |||
| 2196 | BUG_ON(slot > nritems); | 2977 | BUG_ON(slot > nritems); |
| 2197 | BUG_ON(nritems == BTRFS_NODEPTRS_PER_BLOCK(root)); | 2978 | BUG_ON(nritems == BTRFS_NODEPTRS_PER_BLOCK(root)); |
| 2198 | if (slot != nritems) { | 2979 | if (slot != nritems) { |
| 2980 | if (tree_mod_log && level) | ||
| 2981 | tree_mod_log_eb_move(root->fs_info, lower, slot + 1, | ||
| 2982 | slot, nritems - slot); | ||
| 2199 | memmove_extent_buffer(lower, | 2983 | memmove_extent_buffer(lower, |
| 2200 | btrfs_node_key_ptr_offset(slot + 1), | 2984 | btrfs_node_key_ptr_offset(slot + 1), |
| 2201 | btrfs_node_key_ptr_offset(slot), | 2985 | btrfs_node_key_ptr_offset(slot), |
| 2202 | (nritems - slot) * sizeof(struct btrfs_key_ptr)); | 2986 | (nritems - slot) * sizeof(struct btrfs_key_ptr)); |
| 2203 | } | 2987 | } |
| 2988 | if (tree_mod_log && level) { | ||
| 2989 | ret = tree_mod_log_insert_key(root->fs_info, lower, slot, | ||
| 2990 | MOD_LOG_KEY_ADD); | ||
| 2991 | BUG_ON(ret < 0); | ||
| 2992 | } | ||
| 2204 | btrfs_set_node_key(lower, key, slot); | 2993 | btrfs_set_node_key(lower, key, slot); |
| 2205 | btrfs_set_node_blockptr(lower, slot, bytenr); | 2994 | btrfs_set_node_blockptr(lower, slot, bytenr); |
| 2206 | WARN_ON(trans->transid == 0); | 2995 | WARN_ON(trans->transid == 0); |
| @@ -2252,7 +3041,7 @@ static noinline int split_node(struct btrfs_trans_handle *trans, | |||
| 2252 | 3041 | ||
| 2253 | split = btrfs_alloc_free_block(trans, root, root->nodesize, 0, | 3042 | split = btrfs_alloc_free_block(trans, root, root->nodesize, 0, |
| 2254 | root->root_key.objectid, | 3043 | root->root_key.objectid, |
| 2255 | &disk_key, level, c->start, 0, 0); | 3044 | &disk_key, level, c->start, 0); |
| 2256 | if (IS_ERR(split)) | 3045 | if (IS_ERR(split)) |
| 2257 | return PTR_ERR(split); | 3046 | return PTR_ERR(split); |
| 2258 | 3047 | ||
| @@ -2271,7 +3060,7 @@ static noinline int split_node(struct btrfs_trans_handle *trans, | |||
| 2271 | (unsigned long)btrfs_header_chunk_tree_uuid(split), | 3060 | (unsigned long)btrfs_header_chunk_tree_uuid(split), |
| 2272 | BTRFS_UUID_SIZE); | 3061 | BTRFS_UUID_SIZE); |
| 2273 | 3062 | ||
| 2274 | 3063 | tree_mod_log_eb_copy(root->fs_info, split, c, 0, mid, c_nritems - mid); | |
| 2275 | copy_extent_buffer(split, c, | 3064 | copy_extent_buffer(split, c, |
| 2276 | btrfs_node_key_ptr_offset(0), | 3065 | btrfs_node_key_ptr_offset(0), |
| 2277 | btrfs_node_key_ptr_offset(mid), | 3066 | btrfs_node_key_ptr_offset(mid), |
| @@ -2284,7 +3073,7 @@ static noinline int split_node(struct btrfs_trans_handle *trans, | |||
| 2284 | btrfs_mark_buffer_dirty(split); | 3073 | btrfs_mark_buffer_dirty(split); |
| 2285 | 3074 | ||
| 2286 | insert_ptr(trans, root, path, &disk_key, split->start, | 3075 | insert_ptr(trans, root, path, &disk_key, split->start, |
| 2287 | path->slots[level + 1] + 1, level + 1); | 3076 | path->slots[level + 1] + 1, level + 1, 1); |
| 2288 | 3077 | ||
| 2289 | if (path->slots[level] >= mid) { | 3078 | if (path->slots[level] >= mid) { |
| 2290 | path->slots[level] -= mid; | 3079 | path->slots[level] -= mid; |
| @@ -2821,7 +3610,7 @@ static noinline void copy_for_split(struct btrfs_trans_handle *trans, | |||
| 2821 | btrfs_set_header_nritems(l, mid); | 3610 | btrfs_set_header_nritems(l, mid); |
| 2822 | btrfs_item_key(right, &disk_key, 0); | 3611 | btrfs_item_key(right, &disk_key, 0); |
| 2823 | insert_ptr(trans, root, path, &disk_key, right->start, | 3612 | insert_ptr(trans, root, path, &disk_key, right->start, |
| 2824 | path->slots[1] + 1, 1); | 3613 | path->slots[1] + 1, 1, 0); |
| 2825 | 3614 | ||
| 2826 | btrfs_mark_buffer_dirty(right); | 3615 | btrfs_mark_buffer_dirty(right); |
| 2827 | btrfs_mark_buffer_dirty(l); | 3616 | btrfs_mark_buffer_dirty(l); |
| @@ -3004,7 +3793,7 @@ again: | |||
| 3004 | 3793 | ||
| 3005 | right = btrfs_alloc_free_block(trans, root, root->leafsize, 0, | 3794 | right = btrfs_alloc_free_block(trans, root, root->leafsize, 0, |
| 3006 | root->root_key.objectid, | 3795 | root->root_key.objectid, |
| 3007 | &disk_key, 0, l->start, 0, 0); | 3796 | &disk_key, 0, l->start, 0); |
| 3008 | if (IS_ERR(right)) | 3797 | if (IS_ERR(right)) |
| 3009 | return PTR_ERR(right); | 3798 | return PTR_ERR(right); |
| 3010 | 3799 | ||
| @@ -3028,7 +3817,7 @@ again: | |||
| 3028 | if (mid <= slot) { | 3817 | if (mid <= slot) { |
| 3029 | btrfs_set_header_nritems(right, 0); | 3818 | btrfs_set_header_nritems(right, 0); |
| 3030 | insert_ptr(trans, root, path, &disk_key, right->start, | 3819 | insert_ptr(trans, root, path, &disk_key, right->start, |
| 3031 | path->slots[1] + 1, 1); | 3820 | path->slots[1] + 1, 1, 0); |
| 3032 | btrfs_tree_unlock(path->nodes[0]); | 3821 | btrfs_tree_unlock(path->nodes[0]); |
| 3033 | free_extent_buffer(path->nodes[0]); | 3822 | free_extent_buffer(path->nodes[0]); |
| 3034 | path->nodes[0] = right; | 3823 | path->nodes[0] = right; |
| @@ -3037,7 +3826,7 @@ again: | |||
| 3037 | } else { | 3826 | } else { |
| 3038 | btrfs_set_header_nritems(right, 0); | 3827 | btrfs_set_header_nritems(right, 0); |
| 3039 | insert_ptr(trans, root, path, &disk_key, right->start, | 3828 | insert_ptr(trans, root, path, &disk_key, right->start, |
| 3040 | path->slots[1], 1); | 3829 | path->slots[1], 1, 0); |
| 3041 | btrfs_tree_unlock(path->nodes[0]); | 3830 | btrfs_tree_unlock(path->nodes[0]); |
| 3042 | free_extent_buffer(path->nodes[0]); | 3831 | free_extent_buffer(path->nodes[0]); |
| 3043 | path->nodes[0] = right; | 3832 | path->nodes[0] = right; |
| @@ -3749,19 +4538,29 @@ int btrfs_insert_item(struct btrfs_trans_handle *trans, struct btrfs_root | |||
| 3749 | * empty a node. | 4538 | * empty a node. |
| 3750 | */ | 4539 | */ |
| 3751 | static void del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root, | 4540 | static void del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root, |
| 3752 | struct btrfs_path *path, int level, int slot) | 4541 | struct btrfs_path *path, int level, int slot, |
| 4542 | int tree_mod_log) | ||
| 3753 | { | 4543 | { |
| 3754 | struct extent_buffer *parent = path->nodes[level]; | 4544 | struct extent_buffer *parent = path->nodes[level]; |
| 3755 | u32 nritems; | 4545 | u32 nritems; |
| 4546 | int ret; | ||
| 3756 | 4547 | ||
| 3757 | nritems = btrfs_header_nritems(parent); | 4548 | nritems = btrfs_header_nritems(parent); |
| 3758 | if (slot != nritems - 1) { | 4549 | if (slot != nritems - 1) { |
| 4550 | if (tree_mod_log && level) | ||
| 4551 | tree_mod_log_eb_move(root->fs_info, parent, slot, | ||
| 4552 | slot + 1, nritems - slot - 1); | ||
| 3759 | memmove_extent_buffer(parent, | 4553 | memmove_extent_buffer(parent, |
| 3760 | btrfs_node_key_ptr_offset(slot), | 4554 | btrfs_node_key_ptr_offset(slot), |
| 3761 | btrfs_node_key_ptr_offset(slot + 1), | 4555 | btrfs_node_key_ptr_offset(slot + 1), |
| 3762 | sizeof(struct btrfs_key_ptr) * | 4556 | sizeof(struct btrfs_key_ptr) * |
| 3763 | (nritems - slot - 1)); | 4557 | (nritems - slot - 1)); |
| 4558 | } else if (tree_mod_log && level) { | ||
| 4559 | ret = tree_mod_log_insert_key(root->fs_info, parent, slot, | ||
| 4560 | MOD_LOG_KEY_REMOVE); | ||
| 4561 | BUG_ON(ret < 0); | ||
| 3764 | } | 4562 | } |
| 4563 | |||
| 3765 | nritems--; | 4564 | nritems--; |
| 3766 | btrfs_set_header_nritems(parent, nritems); | 4565 | btrfs_set_header_nritems(parent, nritems); |
| 3767 | if (nritems == 0 && parent == root->node) { | 4566 | if (nritems == 0 && parent == root->node) { |
| @@ -3793,7 +4592,7 @@ static noinline void btrfs_del_leaf(struct btrfs_trans_handle *trans, | |||
| 3793 | struct extent_buffer *leaf) | 4592 | struct extent_buffer *leaf) |
| 3794 | { | 4593 | { |
| 3795 | WARN_ON(btrfs_header_generation(leaf) != trans->transid); | 4594 | WARN_ON(btrfs_header_generation(leaf) != trans->transid); |
| 3796 | del_ptr(trans, root, path, 1, path->slots[1]); | 4595 | del_ptr(trans, root, path, 1, path->slots[1], 1); |
| 3797 | 4596 | ||
| 3798 | /* | 4597 | /* |
| 3799 | * btrfs_free_extent is expensive, we want to make sure we | 4598 | * btrfs_free_extent is expensive, we want to make sure we |
| @@ -3804,7 +4603,7 @@ static noinline void btrfs_del_leaf(struct btrfs_trans_handle *trans, | |||
| 3804 | root_sub_used(root, leaf->len); | 4603 | root_sub_used(root, leaf->len); |
| 3805 | 4604 | ||
| 3806 | extent_buffer_get(leaf); | 4605 | extent_buffer_get(leaf); |
| 3807 | btrfs_free_tree_block(trans, root, leaf, 0, 1, 0); | 4606 | btrfs_free_tree_block(trans, root, leaf, 0, 1); |
| 3808 | free_extent_buffer_stale(leaf); | 4607 | free_extent_buffer_stale(leaf); |
| 3809 | } | 4608 | } |
| 3810 | /* | 4609 | /* |
| @@ -4271,7 +5070,7 @@ again: | |||
| 4271 | next = c; | 5070 | next = c; |
| 4272 | next_rw_lock = path->locks[level]; | 5071 | next_rw_lock = path->locks[level]; |
| 4273 | ret = read_block_for_search(NULL, root, path, &next, level, | 5072 | ret = read_block_for_search(NULL, root, path, &next, level, |
| 4274 | slot, &key); | 5073 | slot, &key, 0); |
| 4275 | if (ret == -EAGAIN) | 5074 | if (ret == -EAGAIN) |
| 4276 | goto again; | 5075 | goto again; |
| 4277 | 5076 | ||
| @@ -4308,7 +5107,7 @@ again: | |||
| 4308 | break; | 5107 | break; |
| 4309 | 5108 | ||
| 4310 | ret = read_block_for_search(NULL, root, path, &next, level, | 5109 | ret = read_block_for_search(NULL, root, path, &next, level, |
| 4311 | 0, &key); | 5110 | 0, &key, 0); |
| 4312 | if (ret == -EAGAIN) | 5111 | if (ret == -EAGAIN) |
| 4313 | goto again; | 5112 | goto again; |
| 4314 | 5113 | ||
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 8fd72331d600..0236d03c6732 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h | |||
| @@ -173,6 +173,9 @@ static int btrfs_csum_sizes[] = { 4, 0 }; | |||
| 173 | #define BTRFS_FT_XATTR 8 | 173 | #define BTRFS_FT_XATTR 8 |
| 174 | #define BTRFS_FT_MAX 9 | 174 | #define BTRFS_FT_MAX 9 |
| 175 | 175 | ||
| 176 | /* ioprio of readahead is set to idle */ | ||
| 177 | #define BTRFS_IOPRIO_READA (IOPRIO_PRIO_VALUE(IOPRIO_CLASS_IDLE, 0)) | ||
| 178 | |||
| 176 | /* | 179 | /* |
| 177 | * The key defines the order in the tree, and so it also defines (optimal) | 180 | * The key defines the order in the tree, and so it also defines (optimal) |
| 178 | * block layout. | 181 | * block layout. |
| @@ -823,6 +826,14 @@ struct btrfs_csum_item { | |||
| 823 | u8 csum; | 826 | u8 csum; |
| 824 | } __attribute__ ((__packed__)); | 827 | } __attribute__ ((__packed__)); |
| 825 | 828 | ||
| 829 | struct btrfs_dev_stats_item { | ||
| 830 | /* | ||
| 831 | * grow this item struct at the end for future enhancements and keep | ||
| 832 | * the existing values unchanged | ||
| 833 | */ | ||
| 834 | __le64 values[BTRFS_DEV_STAT_VALUES_MAX]; | ||
| 835 | } __attribute__ ((__packed__)); | ||
| 836 | |||
| 826 | /* different types of block groups (and chunks) */ | 837 | /* different types of block groups (and chunks) */ |
| 827 | #define BTRFS_BLOCK_GROUP_DATA (1ULL << 0) | 838 | #define BTRFS_BLOCK_GROUP_DATA (1ULL << 0) |
| 828 | #define BTRFS_BLOCK_GROUP_SYSTEM (1ULL << 1) | 839 | #define BTRFS_BLOCK_GROUP_SYSTEM (1ULL << 1) |
| @@ -1129,6 +1140,15 @@ struct btrfs_fs_info { | |||
| 1129 | spinlock_t delayed_iput_lock; | 1140 | spinlock_t delayed_iput_lock; |
| 1130 | struct list_head delayed_iputs; | 1141 | struct list_head delayed_iputs; |
| 1131 | 1142 | ||
| 1143 | /* this protects tree_mod_seq_list */ | ||
| 1144 | spinlock_t tree_mod_seq_lock; | ||
| 1145 | atomic_t tree_mod_seq; | ||
| 1146 | struct list_head tree_mod_seq_list; | ||
| 1147 | |||
| 1148 | /* this protects tree_mod_log */ | ||
| 1149 | rwlock_t tree_mod_log_lock; | ||
| 1150 | struct rb_root tree_mod_log; | ||
| 1151 | |||
| 1132 | atomic_t nr_async_submits; | 1152 | atomic_t nr_async_submits; |
| 1133 | atomic_t async_submit_draining; | 1153 | atomic_t async_submit_draining; |
| 1134 | atomic_t nr_async_bios; | 1154 | atomic_t nr_async_bios; |
| @@ -1375,7 +1395,7 @@ struct btrfs_root { | |||
| 1375 | struct list_head root_list; | 1395 | struct list_head root_list; |
| 1376 | 1396 | ||
| 1377 | spinlock_t orphan_lock; | 1397 | spinlock_t orphan_lock; |
| 1378 | struct list_head orphan_list; | 1398 | atomic_t orphan_inodes; |
| 1379 | struct btrfs_block_rsv *orphan_block_rsv; | 1399 | struct btrfs_block_rsv *orphan_block_rsv; |
| 1380 | int orphan_item_inserted; | 1400 | int orphan_item_inserted; |
| 1381 | int orphan_cleanup_state; | 1401 | int orphan_cleanup_state; |
| @@ -1508,6 +1528,12 @@ struct btrfs_ioctl_defrag_range_args { | |||
| 1508 | #define BTRFS_BALANCE_ITEM_KEY 248 | 1528 | #define BTRFS_BALANCE_ITEM_KEY 248 |
| 1509 | 1529 | ||
| 1510 | /* | 1530 | /* |
| 1531 | * Persistantly stores the io stats in the device tree. | ||
| 1532 | * One key for all stats, (0, BTRFS_DEV_STATS_KEY, devid). | ||
| 1533 | */ | ||
| 1534 | #define BTRFS_DEV_STATS_KEY 249 | ||
| 1535 | |||
| 1536 | /* | ||
| 1511 | * string items are for debugging. They just store a short string of | 1537 | * string items are for debugging. They just store a short string of |
| 1512 | * data in the FS | 1538 | * data in the FS |
| 1513 | */ | 1539 | */ |
| @@ -2415,6 +2441,30 @@ static inline u32 btrfs_file_extent_inline_item_len(struct extent_buffer *eb, | |||
| 2415 | return btrfs_item_size(eb, e) - offset; | 2441 | return btrfs_item_size(eb, e) - offset; |
| 2416 | } | 2442 | } |
| 2417 | 2443 | ||
| 2444 | /* btrfs_dev_stats_item */ | ||
| 2445 | static inline u64 btrfs_dev_stats_value(struct extent_buffer *eb, | ||
| 2446 | struct btrfs_dev_stats_item *ptr, | ||
| 2447 | int index) | ||
| 2448 | { | ||
| 2449 | u64 val; | ||
| 2450 | |||
| 2451 | read_extent_buffer(eb, &val, | ||
| 2452 | offsetof(struct btrfs_dev_stats_item, values) + | ||
| 2453 | ((unsigned long)ptr) + (index * sizeof(u64)), | ||
| 2454 | sizeof(val)); | ||
| 2455 | return val; | ||
| 2456 | } | ||
| 2457 | |||
| 2458 | static inline void btrfs_set_dev_stats_value(struct extent_buffer *eb, | ||
| 2459 | struct btrfs_dev_stats_item *ptr, | ||
| 2460 | int index, u64 val) | ||
| 2461 | { | ||
| 2462 | write_extent_buffer(eb, &val, | ||
| 2463 | offsetof(struct btrfs_dev_stats_item, values) + | ||
| 2464 | ((unsigned long)ptr) + (index * sizeof(u64)), | ||
| 2465 | sizeof(val)); | ||
| 2466 | } | ||
| 2467 | |||
| 2418 | static inline struct btrfs_fs_info *btrfs_sb(struct super_block *sb) | 2468 | static inline struct btrfs_fs_info *btrfs_sb(struct super_block *sb) |
| 2419 | { | 2469 | { |
| 2420 | return sb->s_fs_info; | 2470 | return sb->s_fs_info; |
| @@ -2496,11 +2546,11 @@ struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, | |||
| 2496 | struct btrfs_root *root, u32 blocksize, | 2546 | struct btrfs_root *root, u32 blocksize, |
| 2497 | u64 parent, u64 root_objectid, | 2547 | u64 parent, u64 root_objectid, |
| 2498 | struct btrfs_disk_key *key, int level, | 2548 | struct btrfs_disk_key *key, int level, |
| 2499 | u64 hint, u64 empty_size, int for_cow); | 2549 | u64 hint, u64 empty_size); |
| 2500 | void btrfs_free_tree_block(struct btrfs_trans_handle *trans, | 2550 | void btrfs_free_tree_block(struct btrfs_trans_handle *trans, |
| 2501 | struct btrfs_root *root, | 2551 | struct btrfs_root *root, |
| 2502 | struct extent_buffer *buf, | 2552 | struct extent_buffer *buf, |
| 2503 | u64 parent, int last_ref, int for_cow); | 2553 | u64 parent, int last_ref); |
| 2504 | struct extent_buffer *btrfs_init_new_buffer(struct btrfs_trans_handle *trans, | 2554 | struct extent_buffer *btrfs_init_new_buffer(struct btrfs_trans_handle *trans, |
| 2505 | struct btrfs_root *root, | 2555 | struct btrfs_root *root, |
| 2506 | u64 bytenr, u32 blocksize, | 2556 | u64 bytenr, u32 blocksize, |
| @@ -2659,6 +2709,8 @@ int btrfs_duplicate_item(struct btrfs_trans_handle *trans, | |||
| 2659 | int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root | 2709 | int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root |
| 2660 | *root, struct btrfs_key *key, struct btrfs_path *p, int | 2710 | *root, struct btrfs_key *key, struct btrfs_path *p, int |
| 2661 | ins_len, int cow); | 2711 | ins_len, int cow); |
| 2712 | int btrfs_search_old_slot(struct btrfs_root *root, struct btrfs_key *key, | ||
| 2713 | struct btrfs_path *p, u64 time_seq); | ||
| 2662 | int btrfs_realloc_node(struct btrfs_trans_handle *trans, | 2714 | int btrfs_realloc_node(struct btrfs_trans_handle *trans, |
| 2663 | struct btrfs_root *root, struct extent_buffer *parent, | 2715 | struct btrfs_root *root, struct extent_buffer *parent, |
| 2664 | int start_slot, int cache_only, u64 *last_ret, | 2716 | int start_slot, int cache_only, u64 *last_ret, |
| @@ -2922,7 +2974,6 @@ int btrfs_readpage(struct file *file, struct page *page); | |||
| 2922 | void btrfs_evict_inode(struct inode *inode); | 2974 | void btrfs_evict_inode(struct inode *inode); |
| 2923 | int btrfs_write_inode(struct inode *inode, struct writeback_control *wbc); | 2975 | int btrfs_write_inode(struct inode *inode, struct writeback_control *wbc); |
| 2924 | int btrfs_dirty_inode(struct inode *inode); | 2976 | int btrfs_dirty_inode(struct inode *inode); |
| 2925 | int btrfs_update_time(struct file *file); | ||
| 2926 | struct inode *btrfs_alloc_inode(struct super_block *sb); | 2977 | struct inode *btrfs_alloc_inode(struct super_block *sb); |
| 2927 | void btrfs_destroy_inode(struct inode *inode); | 2978 | void btrfs_destroy_inode(struct inode *inode); |
| 2928 | int btrfs_drop_inode(struct inode *inode); | 2979 | int btrfs_drop_inode(struct inode *inode); |
| @@ -3098,4 +3149,23 @@ void btrfs_reada_detach(void *handle); | |||
| 3098 | int btree_readahead_hook(struct btrfs_root *root, struct extent_buffer *eb, | 3149 | int btree_readahead_hook(struct btrfs_root *root, struct extent_buffer *eb, |
| 3099 | u64 start, int err); | 3150 | u64 start, int err); |
| 3100 | 3151 | ||
| 3152 | /* delayed seq elem */ | ||
| 3153 | struct seq_list { | ||
| 3154 | struct list_head list; | ||
| 3155 | u64 seq; | ||
| 3156 | u32 flags; | ||
| 3157 | }; | ||
| 3158 | |||
| 3159 | void btrfs_get_tree_mod_seq(struct btrfs_fs_info *fs_info, | ||
| 3160 | struct seq_list *elem); | ||
| 3161 | void btrfs_put_tree_mod_seq(struct btrfs_fs_info *fs_info, | ||
| 3162 | struct seq_list *elem); | ||
| 3163 | |||
| 3164 | static inline int is_fstree(u64 rootid) | ||
| 3165 | { | ||
| 3166 | if (rootid == BTRFS_FS_TREE_OBJECTID || | ||
| 3167 | (s64)rootid >= (s64)BTRFS_FIRST_FREE_OBJECTID) | ||
| 3168 | return 1; | ||
| 3169 | return 0; | ||
| 3170 | } | ||
| 3101 | #endif | 3171 | #endif |
diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c index 03e3748d84d0..c18d0442ae6d 100644 --- a/fs/btrfs/delayed-inode.c +++ b/fs/btrfs/delayed-inode.c | |||
| @@ -669,8 +669,8 @@ static int btrfs_delayed_inode_reserve_metadata( | |||
| 669 | return ret; | 669 | return ret; |
| 670 | } else if (src_rsv == &root->fs_info->delalloc_block_rsv) { | 670 | } else if (src_rsv == &root->fs_info->delalloc_block_rsv) { |
| 671 | spin_lock(&BTRFS_I(inode)->lock); | 671 | spin_lock(&BTRFS_I(inode)->lock); |
| 672 | if (BTRFS_I(inode)->delalloc_meta_reserved) { | 672 | if (test_and_clear_bit(BTRFS_INODE_DELALLOC_META_RESERVED, |
| 673 | BTRFS_I(inode)->delalloc_meta_reserved = 0; | 673 | &BTRFS_I(inode)->runtime_flags)) { |
| 674 | spin_unlock(&BTRFS_I(inode)->lock); | 674 | spin_unlock(&BTRFS_I(inode)->lock); |
| 675 | release = true; | 675 | release = true; |
| 676 | goto migrate; | 676 | goto migrate; |
| @@ -1706,7 +1706,7 @@ static void fill_stack_inode_item(struct btrfs_trans_handle *trans, | |||
| 1706 | btrfs_set_stack_inode_nbytes(inode_item, inode_get_bytes(inode)); | 1706 | btrfs_set_stack_inode_nbytes(inode_item, inode_get_bytes(inode)); |
| 1707 | btrfs_set_stack_inode_generation(inode_item, | 1707 | btrfs_set_stack_inode_generation(inode_item, |
| 1708 | BTRFS_I(inode)->generation); | 1708 | BTRFS_I(inode)->generation); |
| 1709 | btrfs_set_stack_inode_sequence(inode_item, BTRFS_I(inode)->sequence); | 1709 | btrfs_set_stack_inode_sequence(inode_item, inode->i_version); |
| 1710 | btrfs_set_stack_inode_transid(inode_item, trans->transid); | 1710 | btrfs_set_stack_inode_transid(inode_item, trans->transid); |
| 1711 | btrfs_set_stack_inode_rdev(inode_item, inode->i_rdev); | 1711 | btrfs_set_stack_inode_rdev(inode_item, inode->i_rdev); |
| 1712 | btrfs_set_stack_inode_flags(inode_item, BTRFS_I(inode)->flags); | 1712 | btrfs_set_stack_inode_flags(inode_item, BTRFS_I(inode)->flags); |
| @@ -1754,7 +1754,7 @@ int btrfs_fill_inode(struct inode *inode, u32 *rdev) | |||
| 1754 | set_nlink(inode, btrfs_stack_inode_nlink(inode_item)); | 1754 | set_nlink(inode, btrfs_stack_inode_nlink(inode_item)); |
| 1755 | inode_set_bytes(inode, btrfs_stack_inode_nbytes(inode_item)); | 1755 | inode_set_bytes(inode, btrfs_stack_inode_nbytes(inode_item)); |
| 1756 | BTRFS_I(inode)->generation = btrfs_stack_inode_generation(inode_item); | 1756 | BTRFS_I(inode)->generation = btrfs_stack_inode_generation(inode_item); |
| 1757 | BTRFS_I(inode)->sequence = btrfs_stack_inode_sequence(inode_item); | 1757 | inode->i_version = btrfs_stack_inode_sequence(inode_item); |
| 1758 | inode->i_rdev = 0; | 1758 | inode->i_rdev = 0; |
| 1759 | *rdev = btrfs_stack_inode_rdev(inode_item); | 1759 | *rdev = btrfs_stack_inode_rdev(inode_item); |
| 1760 | BTRFS_I(inode)->flags = btrfs_stack_inode_flags(inode_item); | 1760 | BTRFS_I(inode)->flags = btrfs_stack_inode_flags(inode_item); |
diff --git a/fs/btrfs/delayed-ref.c b/fs/btrfs/delayed-ref.c index 69f22e3ab3bc..13ae7b04790e 100644 --- a/fs/btrfs/delayed-ref.c +++ b/fs/btrfs/delayed-ref.c | |||
| @@ -525,7 +525,7 @@ static noinline void add_delayed_tree_ref(struct btrfs_fs_info *fs_info, | |||
| 525 | ref->is_head = 0; | 525 | ref->is_head = 0; |
| 526 | ref->in_tree = 1; | 526 | ref->in_tree = 1; |
| 527 | 527 | ||
| 528 | if (need_ref_seq(for_cow, ref_root)) | 528 | if (is_fstree(ref_root)) |
| 529 | seq = inc_delayed_seq(delayed_refs); | 529 | seq = inc_delayed_seq(delayed_refs); |
| 530 | ref->seq = seq; | 530 | ref->seq = seq; |
| 531 | 531 | ||
| @@ -584,7 +584,7 @@ static noinline void add_delayed_data_ref(struct btrfs_fs_info *fs_info, | |||
| 584 | ref->is_head = 0; | 584 | ref->is_head = 0; |
| 585 | ref->in_tree = 1; | 585 | ref->in_tree = 1; |
| 586 | 586 | ||
| 587 | if (need_ref_seq(for_cow, ref_root)) | 587 | if (is_fstree(ref_root)) |
| 588 | seq = inc_delayed_seq(delayed_refs); | 588 | seq = inc_delayed_seq(delayed_refs); |
| 589 | ref->seq = seq; | 589 | ref->seq = seq; |
| 590 | 590 | ||
| @@ -658,10 +658,11 @@ int btrfs_add_delayed_tree_ref(struct btrfs_fs_info *fs_info, | |||
| 658 | add_delayed_tree_ref(fs_info, trans, &ref->node, bytenr, | 658 | add_delayed_tree_ref(fs_info, trans, &ref->node, bytenr, |
| 659 | num_bytes, parent, ref_root, level, action, | 659 | num_bytes, parent, ref_root, level, action, |
| 660 | for_cow); | 660 | for_cow); |
| 661 | if (!need_ref_seq(for_cow, ref_root) && | 661 | if (!is_fstree(ref_root) && |
| 662 | waitqueue_active(&delayed_refs->seq_wait)) | 662 | waitqueue_active(&delayed_refs->seq_wait)) |
| 663 | wake_up(&delayed_refs->seq_wait); | 663 | wake_up(&delayed_refs->seq_wait); |
| 664 | spin_unlock(&delayed_refs->lock); | 664 | spin_unlock(&delayed_refs->lock); |
| 665 | |||
| 665 | return 0; | 666 | return 0; |
| 666 | } | 667 | } |
| 667 | 668 | ||
| @@ -706,10 +707,11 @@ int btrfs_add_delayed_data_ref(struct btrfs_fs_info *fs_info, | |||
| 706 | add_delayed_data_ref(fs_info, trans, &ref->node, bytenr, | 707 | add_delayed_data_ref(fs_info, trans, &ref->node, bytenr, |
| 707 | num_bytes, parent, ref_root, owner, offset, | 708 | num_bytes, parent, ref_root, owner, offset, |
| 708 | action, for_cow); | 709 | action, for_cow); |
| 709 | if (!need_ref_seq(for_cow, ref_root) && | 710 | if (!is_fstree(ref_root) && |
| 710 | waitqueue_active(&delayed_refs->seq_wait)) | 711 | waitqueue_active(&delayed_refs->seq_wait)) |
| 711 | wake_up(&delayed_refs->seq_wait); | 712 | wake_up(&delayed_refs->seq_wait); |
| 712 | spin_unlock(&delayed_refs->lock); | 713 | spin_unlock(&delayed_refs->lock); |
| 714 | |||
| 713 | return 0; | 715 | return 0; |
| 714 | } | 716 | } |
| 715 | 717 | ||
diff --git a/fs/btrfs/delayed-ref.h b/fs/btrfs/delayed-ref.h index d8f244d94925..413927fb9957 100644 --- a/fs/btrfs/delayed-ref.h +++ b/fs/btrfs/delayed-ref.h | |||
| @@ -195,11 +195,6 @@ int btrfs_delayed_ref_lock(struct btrfs_trans_handle *trans, | |||
| 195 | int btrfs_find_ref_cluster(struct btrfs_trans_handle *trans, | 195 | int btrfs_find_ref_cluster(struct btrfs_trans_handle *trans, |
| 196 | struct list_head *cluster, u64 search_start); | 196 | struct list_head *cluster, u64 search_start); |
| 197 | 197 | ||
| 198 | struct seq_list { | ||
| 199 | struct list_head list; | ||
| 200 | u64 seq; | ||
| 201 | }; | ||
| 202 | |||
| 203 | static inline u64 inc_delayed_seq(struct btrfs_delayed_ref_root *delayed_refs) | 198 | static inline u64 inc_delayed_seq(struct btrfs_delayed_ref_root *delayed_refs) |
| 204 | { | 199 | { |
| 205 | assert_spin_locked(&delayed_refs->lock); | 200 | assert_spin_locked(&delayed_refs->lock); |
| @@ -230,25 +225,6 @@ int btrfs_check_delayed_seq(struct btrfs_delayed_ref_root *delayed_refs, | |||
| 230 | u64 seq); | 225 | u64 seq); |
| 231 | 226 | ||
| 232 | /* | 227 | /* |
| 233 | * delayed refs with a ref_seq > 0 must be held back during backref walking. | ||
| 234 | * this only applies to items in one of the fs-trees. for_cow items never need | ||
| 235 | * to be held back, so they won't get a ref_seq number. | ||
| 236 | */ | ||
| 237 | static inline int need_ref_seq(int for_cow, u64 rootid) | ||
| 238 | { | ||
| 239 | if (for_cow) | ||
| 240 | return 0; | ||
| 241 | |||
| 242 | if (rootid == BTRFS_FS_TREE_OBJECTID) | ||
| 243 | return 1; | ||
| 244 | |||
| 245 | if ((s64)rootid >= (s64)BTRFS_FIRST_FREE_OBJECTID) | ||
| 246 | return 1; | ||
| 247 | |||
| 248 | return 0; | ||
| 249 | } | ||
| 250 | |||
| 251 | /* | ||
| 252 | * a node might live in a head or a regular ref, this lets you | 228 | * a node might live in a head or a regular ref, this lets you |
| 253 | * test for the proper type to use. | 229 | * test for the proper type to use. |
| 254 | */ | 230 | */ |
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index a7ffc88a7dbe..7ae51decf6d3 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c | |||
| @@ -1153,7 +1153,6 @@ static void __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize, | |||
| 1153 | root->orphan_block_rsv = NULL; | 1153 | root->orphan_block_rsv = NULL; |
| 1154 | 1154 | ||
| 1155 | INIT_LIST_HEAD(&root->dirty_list); | 1155 | INIT_LIST_HEAD(&root->dirty_list); |
| 1156 | INIT_LIST_HEAD(&root->orphan_list); | ||
| 1157 | INIT_LIST_HEAD(&root->root_list); | 1156 | INIT_LIST_HEAD(&root->root_list); |
| 1158 | spin_lock_init(&root->orphan_lock); | 1157 | spin_lock_init(&root->orphan_lock); |
| 1159 | spin_lock_init(&root->inode_lock); | 1158 | spin_lock_init(&root->inode_lock); |
| @@ -1166,6 +1165,7 @@ static void __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize, | |||
| 1166 | atomic_set(&root->log_commit[0], 0); | 1165 | atomic_set(&root->log_commit[0], 0); |
| 1167 | atomic_set(&root->log_commit[1], 0); | 1166 | atomic_set(&root->log_commit[1], 0); |
| 1168 | atomic_set(&root->log_writers, 0); | 1167 | atomic_set(&root->log_writers, 0); |
| 1168 | atomic_set(&root->orphan_inodes, 0); | ||
| 1169 | root->log_batch = 0; | 1169 | root->log_batch = 0; |
| 1170 | root->log_transid = 0; | 1170 | root->log_transid = 0; |
| 1171 | root->last_log_commit = 0; | 1171 | root->last_log_commit = 0; |
| @@ -1252,7 +1252,7 @@ static struct btrfs_root *alloc_log_tree(struct btrfs_trans_handle *trans, | |||
| 1252 | 1252 | ||
| 1253 | leaf = btrfs_alloc_free_block(trans, root, root->leafsize, 0, | 1253 | leaf = btrfs_alloc_free_block(trans, root, root->leafsize, 0, |
| 1254 | BTRFS_TREE_LOG_OBJECTID, NULL, | 1254 | BTRFS_TREE_LOG_OBJECTID, NULL, |
| 1255 | 0, 0, 0, 0); | 1255 | 0, 0, 0); |
| 1256 | if (IS_ERR(leaf)) { | 1256 | if (IS_ERR(leaf)) { |
| 1257 | kfree(root); | 1257 | kfree(root); |
| 1258 | return ERR_CAST(leaf); | 1258 | return ERR_CAST(leaf); |
| @@ -1914,11 +1914,14 @@ int open_ctree(struct super_block *sb, | |||
| 1914 | spin_lock_init(&fs_info->delayed_iput_lock); | 1914 | spin_lock_init(&fs_info->delayed_iput_lock); |
| 1915 | spin_lock_init(&fs_info->defrag_inodes_lock); | 1915 | spin_lock_init(&fs_info->defrag_inodes_lock); |
| 1916 | spin_lock_init(&fs_info->free_chunk_lock); | 1916 | spin_lock_init(&fs_info->free_chunk_lock); |
| 1917 | spin_lock_init(&fs_info->tree_mod_seq_lock); | ||
| 1918 | rwlock_init(&fs_info->tree_mod_log_lock); | ||
| 1917 | mutex_init(&fs_info->reloc_mutex); | 1919 | mutex_init(&fs_info->reloc_mutex); |
| 1918 | 1920 | ||
| 1919 | init_completion(&fs_info->kobj_unregister); | 1921 | init_completion(&fs_info->kobj_unregister); |
| 1920 | INIT_LIST_HEAD(&fs_info->dirty_cowonly_roots); | 1922 | INIT_LIST_HEAD(&fs_info->dirty_cowonly_roots); |
| 1921 | INIT_LIST_HEAD(&fs_info->space_info); | 1923 | INIT_LIST_HEAD(&fs_info->space_info); |
| 1924 | INIT_LIST_HEAD(&fs_info->tree_mod_seq_list); | ||
| 1922 | btrfs_mapping_init(&fs_info->mapping_tree); | 1925 | btrfs_mapping_init(&fs_info->mapping_tree); |
| 1923 | btrfs_init_block_rsv(&fs_info->global_block_rsv); | 1926 | btrfs_init_block_rsv(&fs_info->global_block_rsv); |
| 1924 | btrfs_init_block_rsv(&fs_info->delalloc_block_rsv); | 1927 | btrfs_init_block_rsv(&fs_info->delalloc_block_rsv); |
| @@ -1931,12 +1934,14 @@ int open_ctree(struct super_block *sb, | |||
| 1931 | atomic_set(&fs_info->async_submit_draining, 0); | 1934 | atomic_set(&fs_info->async_submit_draining, 0); |
| 1932 | atomic_set(&fs_info->nr_async_bios, 0); | 1935 | atomic_set(&fs_info->nr_async_bios, 0); |
| 1933 | atomic_set(&fs_info->defrag_running, 0); | 1936 | atomic_set(&fs_info->defrag_running, 0); |
| 1937 | atomic_set(&fs_info->tree_mod_seq, 0); | ||
| 1934 | fs_info->sb = sb; | 1938 | fs_info->sb = sb; |
| 1935 | fs_info->max_inline = 8192 * 1024; | 1939 | fs_info->max_inline = 8192 * 1024; |
| 1936 | fs_info->metadata_ratio = 0; | 1940 | fs_info->metadata_ratio = 0; |
| 1937 | fs_info->defrag_inodes = RB_ROOT; | 1941 | fs_info->defrag_inodes = RB_ROOT; |
| 1938 | fs_info->trans_no_join = 0; | 1942 | fs_info->trans_no_join = 0; |
| 1939 | fs_info->free_chunk_space = 0; | 1943 | fs_info->free_chunk_space = 0; |
| 1944 | fs_info->tree_mod_log = RB_ROOT; | ||
| 1940 | 1945 | ||
| 1941 | /* readahead state */ | 1946 | /* readahead state */ |
| 1942 | INIT_RADIX_TREE(&fs_info->reada_tree, GFP_NOFS & ~__GFP_WAIT); | 1947 | INIT_RADIX_TREE(&fs_info->reada_tree, GFP_NOFS & ~__GFP_WAIT); |
| @@ -2001,7 +2006,8 @@ int open_ctree(struct super_block *sb, | |||
| 2001 | BTRFS_I(fs_info->btree_inode)->root = tree_root; | 2006 | BTRFS_I(fs_info->btree_inode)->root = tree_root; |
| 2002 | memset(&BTRFS_I(fs_info->btree_inode)->location, 0, | 2007 | memset(&BTRFS_I(fs_info->btree_inode)->location, 0, |
| 2003 | sizeof(struct btrfs_key)); | 2008 | sizeof(struct btrfs_key)); |
| 2004 | BTRFS_I(fs_info->btree_inode)->dummy_inode = 1; | 2009 | set_bit(BTRFS_INODE_DUMMY, |
| 2010 | &BTRFS_I(fs_info->btree_inode)->runtime_flags); | ||
| 2005 | insert_inode_hash(fs_info->btree_inode); | 2011 | insert_inode_hash(fs_info->btree_inode); |
| 2006 | 2012 | ||
| 2007 | spin_lock_init(&fs_info->block_group_cache_lock); | 2013 | spin_lock_init(&fs_info->block_group_cache_lock); |
| @@ -2353,6 +2359,13 @@ retry_root_backup: | |||
| 2353 | fs_info->generation = generation; | 2359 | fs_info->generation = generation; |
| 2354 | fs_info->last_trans_committed = generation; | 2360 | fs_info->last_trans_committed = generation; |
| 2355 | 2361 | ||
| 2362 | ret = btrfs_init_dev_stats(fs_info); | ||
| 2363 | if (ret) { | ||
| 2364 | printk(KERN_ERR "btrfs: failed to init dev_stats: %d\n", | ||
| 2365 | ret); | ||
| 2366 | goto fail_block_groups; | ||
| 2367 | } | ||
| 2368 | |||
| 2356 | ret = btrfs_init_space_info(fs_info); | 2369 | ret = btrfs_init_space_info(fs_info); |
| 2357 | if (ret) { | 2370 | if (ret) { |
| 2358 | printk(KERN_ERR "Failed to initial space info: %d\n", ret); | 2371 | printk(KERN_ERR "Failed to initial space info: %d\n", ret); |
| @@ -2556,18 +2569,19 @@ recovery_tree_root: | |||
| 2556 | 2569 | ||
| 2557 | static void btrfs_end_buffer_write_sync(struct buffer_head *bh, int uptodate) | 2570 | static void btrfs_end_buffer_write_sync(struct buffer_head *bh, int uptodate) |
| 2558 | { | 2571 | { |
| 2559 | char b[BDEVNAME_SIZE]; | ||
| 2560 | |||
| 2561 | if (uptodate) { | 2572 | if (uptodate) { |
| 2562 | set_buffer_uptodate(bh); | 2573 | set_buffer_uptodate(bh); |
| 2563 | } else { | 2574 | } else { |
| 2575 | struct btrfs_device *device = (struct btrfs_device *) | ||
| 2576 | bh->b_private; | ||
| 2577 | |||
| 2564 | printk_ratelimited(KERN_WARNING "lost page write due to " | 2578 | printk_ratelimited(KERN_WARNING "lost page write due to " |
| 2565 | "I/O error on %s\n", | 2579 | "I/O error on %s\n", device->name); |
| 2566 | bdevname(bh->b_bdev, b)); | ||
| 2567 | /* note, we dont' set_buffer_write_io_error because we have | 2580 | /* note, we dont' set_buffer_write_io_error because we have |
| 2568 | * our own ways of dealing with the IO errors | 2581 | * our own ways of dealing with the IO errors |
| 2569 | */ | 2582 | */ |
| 2570 | clear_buffer_uptodate(bh); | 2583 | clear_buffer_uptodate(bh); |
| 2584 | btrfs_dev_stat_inc_and_print(device, BTRFS_DEV_STAT_WRITE_ERRS); | ||
| 2571 | } | 2585 | } |
| 2572 | unlock_buffer(bh); | 2586 | unlock_buffer(bh); |
| 2573 | put_bh(bh); | 2587 | put_bh(bh); |
| @@ -2682,6 +2696,7 @@ static int write_dev_supers(struct btrfs_device *device, | |||
| 2682 | set_buffer_uptodate(bh); | 2696 | set_buffer_uptodate(bh); |
| 2683 | lock_buffer(bh); | 2697 | lock_buffer(bh); |
| 2684 | bh->b_end_io = btrfs_end_buffer_write_sync; | 2698 | bh->b_end_io = btrfs_end_buffer_write_sync; |
| 2699 | bh->b_private = device; | ||
| 2685 | } | 2700 | } |
| 2686 | 2701 | ||
| 2687 | /* | 2702 | /* |
| @@ -2740,6 +2755,9 @@ static int write_dev_flush(struct btrfs_device *device, int wait) | |||
| 2740 | } | 2755 | } |
| 2741 | if (!bio_flagged(bio, BIO_UPTODATE)) { | 2756 | if (!bio_flagged(bio, BIO_UPTODATE)) { |
| 2742 | ret = -EIO; | 2757 | ret = -EIO; |
| 2758 | if (!bio_flagged(bio, BIO_EOPNOTSUPP)) | ||
| 2759 | btrfs_dev_stat_inc_and_print(device, | ||
| 2760 | BTRFS_DEV_STAT_FLUSH_ERRS); | ||
| 2743 | } | 2761 | } |
| 2744 | 2762 | ||
| 2745 | /* drop the reference from the wait == 0 run */ | 2763 | /* drop the reference from the wait == 0 run */ |
| @@ -2753,7 +2771,7 @@ static int write_dev_flush(struct btrfs_device *device, int wait) | |||
| 2753 | * one reference for us, and we leave it for the | 2771 | * one reference for us, and we leave it for the |
| 2754 | * caller | 2772 | * caller |
| 2755 | */ | 2773 | */ |
| 2756 | device->flush_bio = NULL;; | 2774 | device->flush_bio = NULL; |
| 2757 | bio = bio_alloc(GFP_NOFS, 0); | 2775 | bio = bio_alloc(GFP_NOFS, 0); |
| 2758 | if (!bio) | 2776 | if (!bio) |
| 2759 | return -ENOMEM; | 2777 | return -ENOMEM; |
| @@ -2902,19 +2920,6 @@ int write_ctree_super(struct btrfs_trans_handle *trans, | |||
| 2902 | return ret; | 2920 | return ret; |
| 2903 | } | 2921 | } |
| 2904 | 2922 | ||
| 2905 | /* Kill all outstanding I/O */ | ||
| 2906 | void btrfs_abort_devices(struct btrfs_root *root) | ||
| 2907 | { | ||
| 2908 | struct list_head *head; | ||
| 2909 | struct btrfs_device *dev; | ||
| 2910 | mutex_lock(&root->fs_info->fs_devices->device_list_mutex); | ||
| 2911 | head = &root->fs_info->fs_devices->devices; | ||
| 2912 | list_for_each_entry_rcu(dev, head, dev_list) { | ||
| 2913 | blk_abort_queue(dev->bdev->bd_disk->queue); | ||
| 2914 | } | ||
| 2915 | mutex_unlock(&root->fs_info->fs_devices->device_list_mutex); | ||
| 2916 | } | ||
| 2917 | |||
| 2918 | void btrfs_free_fs_root(struct btrfs_fs_info *fs_info, struct btrfs_root *root) | 2923 | void btrfs_free_fs_root(struct btrfs_fs_info *fs_info, struct btrfs_root *root) |
| 2919 | { | 2924 | { |
| 2920 | spin_lock(&fs_info->fs_roots_radix_lock); | 2925 | spin_lock(&fs_info->fs_roots_radix_lock); |
| @@ -3671,17 +3676,6 @@ int btrfs_cleanup_transaction(struct btrfs_root *root) | |||
| 3671 | return 0; | 3676 | return 0; |
| 3672 | } | 3677 | } |
| 3673 | 3678 | ||
| 3674 | static int btree_writepage_io_failed_hook(struct bio *bio, struct page *page, | ||
| 3675 | u64 start, u64 end, | ||
| 3676 | struct extent_state *state) | ||
| 3677 | { | ||
| 3678 | struct super_block *sb = page->mapping->host->i_sb; | ||
| 3679 | struct btrfs_fs_info *fs_info = btrfs_sb(sb); | ||
| 3680 | btrfs_error(fs_info, -EIO, | ||
| 3681 | "Error occured while writing out btree at %llu", start); | ||
| 3682 | return -EIO; | ||
| 3683 | } | ||
| 3684 | |||
| 3685 | static struct extent_io_ops btree_extent_io_ops = { | 3679 | static struct extent_io_ops btree_extent_io_ops = { |
| 3686 | .write_cache_pages_lock_hook = btree_lock_page_hook, | 3680 | .write_cache_pages_lock_hook = btree_lock_page_hook, |
| 3687 | .readpage_end_io_hook = btree_readpage_end_io_hook, | 3681 | .readpage_end_io_hook = btree_readpage_end_io_hook, |
| @@ -3689,5 +3683,4 @@ static struct extent_io_ops btree_extent_io_ops = { | |||
| 3689 | .submit_bio_hook = btree_submit_bio_hook, | 3683 | .submit_bio_hook = btree_submit_bio_hook, |
| 3690 | /* note we're sharing with inode.c for the merge bio hook */ | 3684 | /* note we're sharing with inode.c for the merge bio hook */ |
| 3691 | .merge_bio_hook = btrfs_merge_bio_hook, | 3685 | .merge_bio_hook = btrfs_merge_bio_hook, |
| 3692 | .writepage_io_failed_hook = btree_writepage_io_failed_hook, | ||
| 3693 | }; | 3686 | }; |
diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h index ab1830aaf0ed..05b3fab39f7e 100644 --- a/fs/btrfs/disk-io.h +++ b/fs/btrfs/disk-io.h | |||
| @@ -89,7 +89,6 @@ int btrfs_add_log_tree(struct btrfs_trans_handle *trans, | |||
| 89 | int btrfs_cleanup_transaction(struct btrfs_root *root); | 89 | int btrfs_cleanup_transaction(struct btrfs_root *root); |
| 90 | void btrfs_cleanup_one_transaction(struct btrfs_transaction *trans, | 90 | void btrfs_cleanup_one_transaction(struct btrfs_transaction *trans, |
| 91 | struct btrfs_root *root); | 91 | struct btrfs_root *root); |
| 92 | void btrfs_abort_devices(struct btrfs_root *root); | ||
| 93 | 92 | ||
| 94 | #ifdef CONFIG_DEBUG_LOCK_ALLOC | 93 | #ifdef CONFIG_DEBUG_LOCK_ALLOC |
| 95 | void btrfs_init_lockdep(void); | 94 | void btrfs_init_lockdep(void); |
diff --git a/fs/btrfs/export.c b/fs/btrfs/export.c index e887ee62b6d4..614f34a899c2 100644 --- a/fs/btrfs/export.c +++ b/fs/btrfs/export.c | |||
| @@ -13,15 +13,14 @@ | |||
| 13 | parent_root_objectid) / 4) | 13 | parent_root_objectid) / 4) |
| 14 | #define BTRFS_FID_SIZE_CONNECTABLE_ROOT (sizeof(struct btrfs_fid) / 4) | 14 | #define BTRFS_FID_SIZE_CONNECTABLE_ROOT (sizeof(struct btrfs_fid) / 4) |
| 15 | 15 | ||
| 16 | static int btrfs_encode_fh(struct dentry *dentry, u32 *fh, int *max_len, | 16 | static int btrfs_encode_fh(struct inode *inode, u32 *fh, int *max_len, |
| 17 | int connectable) | 17 | struct inode *parent) |
| 18 | { | 18 | { |
| 19 | struct btrfs_fid *fid = (struct btrfs_fid *)fh; | 19 | struct btrfs_fid *fid = (struct btrfs_fid *)fh; |
| 20 | struct inode *inode = dentry->d_inode; | ||
| 21 | int len = *max_len; | 20 | int len = *max_len; |
| 22 | int type; | 21 | int type; |
| 23 | 22 | ||
| 24 | if (connectable && (len < BTRFS_FID_SIZE_CONNECTABLE)) { | 23 | if (parent && (len < BTRFS_FID_SIZE_CONNECTABLE)) { |
| 25 | *max_len = BTRFS_FID_SIZE_CONNECTABLE; | 24 | *max_len = BTRFS_FID_SIZE_CONNECTABLE; |
| 26 | return 255; | 25 | return 255; |
| 27 | } else if (len < BTRFS_FID_SIZE_NON_CONNECTABLE) { | 26 | } else if (len < BTRFS_FID_SIZE_NON_CONNECTABLE) { |
| @@ -36,19 +35,13 @@ static int btrfs_encode_fh(struct dentry *dentry, u32 *fh, int *max_len, | |||
| 36 | fid->root_objectid = BTRFS_I(inode)->root->objectid; | 35 | fid->root_objectid = BTRFS_I(inode)->root->objectid; |
| 37 | fid->gen = inode->i_generation; | 36 | fid->gen = inode->i_generation; |
| 38 | 37 | ||
| 39 | if (connectable && !S_ISDIR(inode->i_mode)) { | 38 | if (parent) { |
| 40 | struct inode *parent; | ||
| 41 | u64 parent_root_id; | 39 | u64 parent_root_id; |
| 42 | 40 | ||
| 43 | spin_lock(&dentry->d_lock); | ||
| 44 | |||
| 45 | parent = dentry->d_parent->d_inode; | ||
| 46 | fid->parent_objectid = BTRFS_I(parent)->location.objectid; | 41 | fid->parent_objectid = BTRFS_I(parent)->location.objectid; |
| 47 | fid->parent_gen = parent->i_generation; | 42 | fid->parent_gen = parent->i_generation; |
| 48 | parent_root_id = BTRFS_I(parent)->root->objectid; | 43 | parent_root_id = BTRFS_I(parent)->root->objectid; |
| 49 | 44 | ||
| 50 | spin_unlock(&dentry->d_lock); | ||
| 51 | |||
| 52 | if (parent_root_id != fid->root_objectid) { | 45 | if (parent_root_id != fid->root_objectid) { |
| 53 | fid->parent_root_objectid = parent_root_id; | 46 | fid->parent_root_objectid = parent_root_id; |
| 54 | len = BTRFS_FID_SIZE_CONNECTABLE_ROOT; | 47 | len = BTRFS_FID_SIZE_CONNECTABLE_ROOT; |
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 49fd7b66d57b..4b5a1e1bdefb 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c | |||
| @@ -3578,7 +3578,7 @@ again: | |||
| 3578 | space_info->chunk_alloc = 0; | 3578 | space_info->chunk_alloc = 0; |
| 3579 | spin_unlock(&space_info->lock); | 3579 | spin_unlock(&space_info->lock); |
| 3580 | out: | 3580 | out: |
| 3581 | mutex_unlock(&extent_root->fs_info->chunk_mutex); | 3581 | mutex_unlock(&fs_info->chunk_mutex); |
| 3582 | return ret; | 3582 | return ret; |
| 3583 | } | 3583 | } |
| 3584 | 3584 | ||
| @@ -4355,10 +4355,9 @@ static unsigned drop_outstanding_extent(struct inode *inode) | |||
| 4355 | BTRFS_I(inode)->outstanding_extents--; | 4355 | BTRFS_I(inode)->outstanding_extents--; |
| 4356 | 4356 | ||
| 4357 | if (BTRFS_I(inode)->outstanding_extents == 0 && | 4357 | if (BTRFS_I(inode)->outstanding_extents == 0 && |
| 4358 | BTRFS_I(inode)->delalloc_meta_reserved) { | 4358 | test_and_clear_bit(BTRFS_INODE_DELALLOC_META_RESERVED, |
| 4359 | &BTRFS_I(inode)->runtime_flags)) | ||
| 4359 | drop_inode_space = 1; | 4360 | drop_inode_space = 1; |
| 4360 | BTRFS_I(inode)->delalloc_meta_reserved = 0; | ||
| 4361 | } | ||
| 4362 | 4361 | ||
| 4363 | /* | 4362 | /* |
| 4364 | * If we have more or the same amount of outsanding extents than we have | 4363 | * If we have more or the same amount of outsanding extents than we have |
| @@ -4465,7 +4464,8 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes) | |||
| 4465 | * Add an item to reserve for updating the inode when we complete the | 4464 | * Add an item to reserve for updating the inode when we complete the |
| 4466 | * delalloc io. | 4465 | * delalloc io. |
| 4467 | */ | 4466 | */ |
| 4468 | if (!BTRFS_I(inode)->delalloc_meta_reserved) { | 4467 | if (!test_bit(BTRFS_INODE_DELALLOC_META_RESERVED, |
| 4468 | &BTRFS_I(inode)->runtime_flags)) { | ||
| 4469 | nr_extents++; | 4469 | nr_extents++; |
| 4470 | extra_reserve = 1; | 4470 | extra_reserve = 1; |
| 4471 | } | 4471 | } |
| @@ -4511,7 +4511,8 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes) | |||
| 4511 | 4511 | ||
| 4512 | spin_lock(&BTRFS_I(inode)->lock); | 4512 | spin_lock(&BTRFS_I(inode)->lock); |
| 4513 | if (extra_reserve) { | 4513 | if (extra_reserve) { |
| 4514 | BTRFS_I(inode)->delalloc_meta_reserved = 1; | 4514 | set_bit(BTRFS_INODE_DELALLOC_META_RESERVED, |
| 4515 | &BTRFS_I(inode)->runtime_flags); | ||
| 4515 | nr_extents--; | 4516 | nr_extents--; |
| 4516 | } | 4517 | } |
| 4517 | BTRFS_I(inode)->reserved_extents += nr_extents; | 4518 | BTRFS_I(inode)->reserved_extents += nr_extents; |
| @@ -5217,7 +5218,7 @@ out: | |||
| 5217 | void btrfs_free_tree_block(struct btrfs_trans_handle *trans, | 5218 | void btrfs_free_tree_block(struct btrfs_trans_handle *trans, |
| 5218 | struct btrfs_root *root, | 5219 | struct btrfs_root *root, |
| 5219 | struct extent_buffer *buf, | 5220 | struct extent_buffer *buf, |
| 5220 | u64 parent, int last_ref, int for_cow) | 5221 | u64 parent, int last_ref) |
| 5221 | { | 5222 | { |
| 5222 | struct btrfs_block_group_cache *cache = NULL; | 5223 | struct btrfs_block_group_cache *cache = NULL; |
| 5223 | int ret; | 5224 | int ret; |
| @@ -5227,7 +5228,7 @@ void btrfs_free_tree_block(struct btrfs_trans_handle *trans, | |||
| 5227 | buf->start, buf->len, | 5228 | buf->start, buf->len, |
| 5228 | parent, root->root_key.objectid, | 5229 | parent, root->root_key.objectid, |
| 5229 | btrfs_header_level(buf), | 5230 | btrfs_header_level(buf), |
| 5230 | BTRFS_DROP_DELAYED_REF, NULL, for_cow); | 5231 | BTRFS_DROP_DELAYED_REF, NULL, 0); |
| 5231 | BUG_ON(ret); /* -ENOMEM */ | 5232 | BUG_ON(ret); /* -ENOMEM */ |
| 5232 | } | 5233 | } |
| 5233 | 5234 | ||
| @@ -6249,7 +6250,7 @@ struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, | |||
| 6249 | struct btrfs_root *root, u32 blocksize, | 6250 | struct btrfs_root *root, u32 blocksize, |
| 6250 | u64 parent, u64 root_objectid, | 6251 | u64 parent, u64 root_objectid, |
| 6251 | struct btrfs_disk_key *key, int level, | 6252 | struct btrfs_disk_key *key, int level, |
| 6252 | u64 hint, u64 empty_size, int for_cow) | 6253 | u64 hint, u64 empty_size) |
| 6253 | { | 6254 | { |
| 6254 | struct btrfs_key ins; | 6255 | struct btrfs_key ins; |
| 6255 | struct btrfs_block_rsv *block_rsv; | 6256 | struct btrfs_block_rsv *block_rsv; |
| @@ -6297,7 +6298,7 @@ struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, | |||
| 6297 | ins.objectid, | 6298 | ins.objectid, |
| 6298 | ins.offset, parent, root_objectid, | 6299 | ins.offset, parent, root_objectid, |
| 6299 | level, BTRFS_ADD_DELAYED_EXTENT, | 6300 | level, BTRFS_ADD_DELAYED_EXTENT, |
| 6300 | extent_op, for_cow); | 6301 | extent_op, 0); |
| 6301 | BUG_ON(ret); /* -ENOMEM */ | 6302 | BUG_ON(ret); /* -ENOMEM */ |
| 6302 | } | 6303 | } |
| 6303 | return buf; | 6304 | return buf; |
| @@ -6715,7 +6716,7 @@ static noinline int walk_up_proc(struct btrfs_trans_handle *trans, | |||
| 6715 | btrfs_header_owner(path->nodes[level + 1])); | 6716 | btrfs_header_owner(path->nodes[level + 1])); |
| 6716 | } | 6717 | } |
| 6717 | 6718 | ||
| 6718 | btrfs_free_tree_block(trans, root, eb, parent, wc->refs[level] == 1, 0); | 6719 | btrfs_free_tree_block(trans, root, eb, parent, wc->refs[level] == 1); |
| 6719 | out: | 6720 | out: |
| 6720 | wc->refs[level] = 0; | 6721 | wc->refs[level] = 0; |
| 6721 | wc->flags[level] = 0; | 6722 | wc->flags[level] = 0; |
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index c9018a05036e..2c8f7b204617 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c | |||
| @@ -186,7 +186,6 @@ static struct rb_node *tree_insert(struct rb_root *root, u64 offset, | |||
| 186 | return parent; | 186 | return parent; |
| 187 | } | 187 | } |
| 188 | 188 | ||
| 189 | entry = rb_entry(node, struct tree_entry, rb_node); | ||
| 190 | rb_link_node(node, parent, p); | 189 | rb_link_node(node, parent, p); |
| 191 | rb_insert_color(node, root); | 190 | rb_insert_color(node, root); |
| 192 | return NULL; | 191 | return NULL; |
| @@ -413,7 +412,7 @@ static struct extent_state *next_state(struct extent_state *state) | |||
| 413 | 412 | ||
| 414 | /* | 413 | /* |
| 415 | * utility function to clear some bits in an extent state struct. | 414 | * utility function to clear some bits in an extent state struct. |
| 416 | * it will optionally wake up any one waiting on this state (wake == 1) | 415 | * it will optionally wake up any one waiting on this state (wake == 1). |
| 417 | * | 416 | * |
| 418 | * If no bits are set on the state struct after clearing things, the | 417 | * If no bits are set on the state struct after clearing things, the |
| 419 | * struct is freed and removed from the tree | 418 | * struct is freed and removed from the tree |
| @@ -570,10 +569,8 @@ hit_next: | |||
| 570 | if (err) | 569 | if (err) |
| 571 | goto out; | 570 | goto out; |
| 572 | if (state->end <= end) { | 571 | if (state->end <= end) { |
| 573 | clear_state_bit(tree, state, &bits, wake); | 572 | state = clear_state_bit(tree, state, &bits, wake); |
| 574 | if (last_end == (u64)-1) | 573 | goto next; |
| 575 | goto out; | ||
| 576 | start = last_end + 1; | ||
| 577 | } | 574 | } |
| 578 | goto search_again; | 575 | goto search_again; |
| 579 | } | 576 | } |
| @@ -781,7 +778,6 @@ hit_next: | |||
| 781 | * Just lock what we found and keep going | 778 | * Just lock what we found and keep going |
| 782 | */ | 779 | */ |
| 783 | if (state->start == start && state->end <= end) { | 780 | if (state->start == start && state->end <= end) { |
| 784 | struct rb_node *next_node; | ||
| 785 | if (state->state & exclusive_bits) { | 781 | if (state->state & exclusive_bits) { |
| 786 | *failed_start = state->start; | 782 | *failed_start = state->start; |
| 787 | err = -EEXIST; | 783 | err = -EEXIST; |
| @@ -789,20 +785,15 @@ hit_next: | |||
| 789 | } | 785 | } |
| 790 | 786 | ||
| 791 | set_state_bits(tree, state, &bits); | 787 | set_state_bits(tree, state, &bits); |
| 792 | |||
| 793 | cache_state(state, cached_state); | 788 | cache_state(state, cached_state); |
| 794 | merge_state(tree, state); | 789 | merge_state(tree, state); |
| 795 | if (last_end == (u64)-1) | 790 | if (last_end == (u64)-1) |
| 796 | goto out; | 791 | goto out; |
| 797 | |||
| 798 | start = last_end + 1; | 792 | start = last_end + 1; |
| 799 | next_node = rb_next(&state->rb_node); | 793 | state = next_state(state); |
| 800 | if (next_node && start < end && prealloc && !need_resched()) { | 794 | if (start < end && state && state->start == start && |
| 801 | state = rb_entry(next_node, struct extent_state, | 795 | !need_resched()) |
| 802 | rb_node); | 796 | goto hit_next; |
| 803 | if (state->start == start) | ||
| 804 | goto hit_next; | ||
| 805 | } | ||
| 806 | goto search_again; | 797 | goto search_again; |
| 807 | } | 798 | } |
| 808 | 799 | ||
| @@ -845,6 +836,10 @@ hit_next: | |||
| 845 | if (last_end == (u64)-1) | 836 | if (last_end == (u64)-1) |
| 846 | goto out; | 837 | goto out; |
| 847 | start = last_end + 1; | 838 | start = last_end + 1; |
| 839 | state = next_state(state); | ||
| 840 | if (start < end && state && state->start == start && | ||
| 841 | !need_resched()) | ||
| 842 | goto hit_next; | ||
| 848 | } | 843 | } |
| 849 | goto search_again; | 844 | goto search_again; |
| 850 | } | 845 | } |
| @@ -994,21 +989,14 @@ hit_next: | |||
| 994 | * Just lock what we found and keep going | 989 | * Just lock what we found and keep going |
| 995 | */ | 990 | */ |
| 996 | if (state->start == start && state->end <= end) { | 991 | if (state->start == start && state->end <= end) { |
| 997 | struct rb_node *next_node; | ||
| 998 | |||
| 999 | set_state_bits(tree, state, &bits); | 992 | set_state_bits(tree, state, &bits); |
| 1000 | clear_state_bit(tree, state, &clear_bits, 0); | 993 | state = clear_state_bit(tree, state, &clear_bits, 0); |
| 1001 | if (last_end == (u64)-1) | 994 | if (last_end == (u64)-1) |
| 1002 | goto out; | 995 | goto out; |
| 1003 | |||
| 1004 | start = last_end + 1; | 996 | start = last_end + 1; |
| 1005 | next_node = rb_next(&state->rb_node); | 997 | if (start < end && state && state->start == start && |
| 1006 | if (next_node && start < end && prealloc && !need_resched()) { | 998 | !need_resched()) |
| 1007 | state = rb_entry(next_node, struct extent_state, | 999 | goto hit_next; |
| 1008 | rb_node); | ||
| 1009 | if (state->start == start) | ||
| 1010 | goto hit_next; | ||
| 1011 | } | ||
| 1012 | goto search_again; | 1000 | goto search_again; |
| 1013 | } | 1001 | } |
| 1014 | 1002 | ||
| @@ -1042,10 +1030,13 @@ hit_next: | |||
| 1042 | goto out; | 1030 | goto out; |
| 1043 | if (state->end <= end) { | 1031 | if (state->end <= end) { |
| 1044 | set_state_bits(tree, state, &bits); | 1032 | set_state_bits(tree, state, &bits); |
| 1045 | clear_state_bit(tree, state, &clear_bits, 0); | 1033 | state = clear_state_bit(tree, state, &clear_bits, 0); |
| 1046 | if (last_end == (u64)-1) | 1034 | if (last_end == (u64)-1) |
| 1047 | goto out; | 1035 | goto out; |
| 1048 | start = last_end + 1; | 1036 | start = last_end + 1; |
| 1037 | if (start < end && state && state->start == start && | ||
| 1038 | !need_resched()) | ||
| 1039 | goto hit_next; | ||
| 1049 | } | 1040 | } |
| 1050 | goto search_again; | 1041 | goto search_again; |
| 1051 | } | 1042 | } |
| @@ -1173,9 +1164,8 @@ int set_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end, | |||
| 1173 | cached_state, mask); | 1164 | cached_state, mask); |
| 1174 | } | 1165 | } |
| 1175 | 1166 | ||
| 1176 | static int clear_extent_uptodate(struct extent_io_tree *tree, u64 start, | 1167 | int clear_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end, |
| 1177 | u64 end, struct extent_state **cached_state, | 1168 | struct extent_state **cached_state, gfp_t mask) |
| 1178 | gfp_t mask) | ||
| 1179 | { | 1169 | { |
| 1180 | return clear_extent_bit(tree, start, end, EXTENT_UPTODATE, 0, 0, | 1170 | return clear_extent_bit(tree, start, end, EXTENT_UPTODATE, 0, 0, |
| 1181 | cached_state, mask); | 1171 | cached_state, mask); |
| @@ -1293,7 +1283,7 @@ out: | |||
| 1293 | * returned if we find something, and *start_ret and *end_ret are | 1283 | * returned if we find something, and *start_ret and *end_ret are |
| 1294 | * set to reflect the state struct that was found. | 1284 | * set to reflect the state struct that was found. |
| 1295 | * | 1285 | * |
| 1296 | * If nothing was found, 1 is returned, < 0 on error | 1286 | * If nothing was found, 1 is returned. If found something, return 0. |
| 1297 | */ | 1287 | */ |
| 1298 | int find_first_extent_bit(struct extent_io_tree *tree, u64 start, | 1288 | int find_first_extent_bit(struct extent_io_tree *tree, u64 start, |
| 1299 | u64 *start_ret, u64 *end_ret, int bits) | 1289 | u64 *start_ret, u64 *end_ret, int bits) |
| @@ -1923,6 +1913,7 @@ int repair_io_failure(struct btrfs_mapping_tree *map_tree, u64 start, | |||
| 1923 | if (!test_bit(BIO_UPTODATE, &bio->bi_flags)) { | 1913 | if (!test_bit(BIO_UPTODATE, &bio->bi_flags)) { |
| 1924 | /* try to remap that extent elsewhere? */ | 1914 | /* try to remap that extent elsewhere? */ |
| 1925 | bio_put(bio); | 1915 | bio_put(bio); |
| 1916 | btrfs_dev_stat_inc_and_print(dev, BTRFS_DEV_STAT_WRITE_ERRS); | ||
| 1926 | return -EIO; | 1917 | return -EIO; |
| 1927 | } | 1918 | } |
| 1928 | 1919 | ||
| @@ -2222,17 +2213,7 @@ int end_extent_writepage(struct page *page, int err, u64 start, u64 end) | |||
| 2222 | uptodate = 0; | 2213 | uptodate = 0; |
| 2223 | } | 2214 | } |
| 2224 | 2215 | ||
| 2225 | if (!uptodate && tree->ops && | ||
| 2226 | tree->ops->writepage_io_failed_hook) { | ||
| 2227 | ret = tree->ops->writepage_io_failed_hook(NULL, page, | ||
| 2228 | start, end, NULL); | ||
| 2229 | /* Writeback already completed */ | ||
| 2230 | if (ret == 0) | ||
| 2231 | return 1; | ||
| 2232 | } | ||
| 2233 | |||
| 2234 | if (!uptodate) { | 2216 | if (!uptodate) { |
| 2235 | clear_extent_uptodate(tree, start, end, NULL, GFP_NOFS); | ||
| 2236 | ClearPageUptodate(page); | 2217 | ClearPageUptodate(page); |
| 2237 | SetPageError(page); | 2218 | SetPageError(page); |
| 2238 | } | 2219 | } |
| @@ -2347,10 +2328,23 @@ static void end_bio_extent_readpage(struct bio *bio, int err) | |||
| 2347 | if (uptodate && tree->ops && tree->ops->readpage_end_io_hook) { | 2328 | if (uptodate && tree->ops && tree->ops->readpage_end_io_hook) { |
| 2348 | ret = tree->ops->readpage_end_io_hook(page, start, end, | 2329 | ret = tree->ops->readpage_end_io_hook(page, start, end, |
| 2349 | state, mirror); | 2330 | state, mirror); |
| 2350 | if (ret) | 2331 | if (ret) { |
| 2332 | /* no IO indicated but software detected errors | ||
| 2333 | * in the block, either checksum errors or | ||
| 2334 | * issues with the contents */ | ||
| 2335 | struct btrfs_root *root = | ||
| 2336 | BTRFS_I(page->mapping->host)->root; | ||
| 2337 | struct btrfs_device *device; | ||
| 2338 | |||
| 2351 | uptodate = 0; | 2339 | uptodate = 0; |
| 2352 | else | 2340 | device = btrfs_find_device_for_logical( |
| 2341 | root, start, mirror); | ||
| 2342 | if (device) | ||
| 2343 | btrfs_dev_stat_inc_and_print(device, | ||
| 2344 | BTRFS_DEV_STAT_CORRUPTION_ERRS); | ||
| 2345 | } else { | ||
| 2353 | clean_io_failure(start, page); | 2346 | clean_io_failure(start, page); |
| 2347 | } | ||
| 2354 | } | 2348 | } |
| 2355 | 2349 | ||
| 2356 | if (!uptodate && tree->ops && tree->ops->readpage_io_failed_hook) { | 2350 | if (!uptodate && tree->ops && tree->ops->readpage_io_failed_hook) { |
| @@ -3164,7 +3158,7 @@ static int write_one_eb(struct extent_buffer *eb, | |||
| 3164 | u64 offset = eb->start; | 3158 | u64 offset = eb->start; |
| 3165 | unsigned long i, num_pages; | 3159 | unsigned long i, num_pages; |
| 3166 | int rw = (epd->sync_io ? WRITE_SYNC : WRITE); | 3160 | int rw = (epd->sync_io ? WRITE_SYNC : WRITE); |
| 3167 | int ret; | 3161 | int ret = 0; |
| 3168 | 3162 | ||
| 3169 | clear_bit(EXTENT_BUFFER_IOERR, &eb->bflags); | 3163 | clear_bit(EXTENT_BUFFER_IOERR, &eb->bflags); |
| 3170 | num_pages = num_extent_pages(eb->start, eb->len); | 3164 | num_pages = num_extent_pages(eb->start, eb->len); |
| @@ -3930,6 +3924,7 @@ static struct extent_buffer *__alloc_extent_buffer(struct extent_io_tree *tree, | |||
| 3930 | eb->start = start; | 3924 | eb->start = start; |
| 3931 | eb->len = len; | 3925 | eb->len = len; |
| 3932 | eb->tree = tree; | 3926 | eb->tree = tree; |
| 3927 | eb->bflags = 0; | ||
| 3933 | rwlock_init(&eb->lock); | 3928 | rwlock_init(&eb->lock); |
| 3934 | atomic_set(&eb->write_locks, 0); | 3929 | atomic_set(&eb->write_locks, 0); |
| 3935 | atomic_set(&eb->read_locks, 0); | 3930 | atomic_set(&eb->read_locks, 0); |
| @@ -3967,6 +3962,60 @@ static struct extent_buffer *__alloc_extent_buffer(struct extent_io_tree *tree, | |||
| 3967 | return eb; | 3962 | return eb; |
| 3968 | } | 3963 | } |
| 3969 | 3964 | ||
| 3965 | struct extent_buffer *btrfs_clone_extent_buffer(struct extent_buffer *src) | ||
| 3966 | { | ||
| 3967 | unsigned long i; | ||
| 3968 | struct page *p; | ||
| 3969 | struct extent_buffer *new; | ||
| 3970 | unsigned long num_pages = num_extent_pages(src->start, src->len); | ||
| 3971 | |||
| 3972 | new = __alloc_extent_buffer(NULL, src->start, src->len, GFP_ATOMIC); | ||
| 3973 | if (new == NULL) | ||
| 3974 | return NULL; | ||
| 3975 | |||
| 3976 | for (i = 0; i < num_pages; i++) { | ||
| 3977 | p = alloc_page(GFP_ATOMIC); | ||
| 3978 | BUG_ON(!p); | ||
| 3979 | attach_extent_buffer_page(new, p); | ||
| 3980 | WARN_ON(PageDirty(p)); | ||
| 3981 | SetPageUptodate(p); | ||
| 3982 | new->pages[i] = p; | ||
| 3983 | } | ||
| 3984 | |||
| 3985 | copy_extent_buffer(new, src, 0, 0, src->len); | ||
| 3986 | set_bit(EXTENT_BUFFER_UPTODATE, &new->bflags); | ||
| 3987 | set_bit(EXTENT_BUFFER_DUMMY, &new->bflags); | ||
| 3988 | |||
| 3989 | return new; | ||
| 3990 | } | ||
| 3991 | |||
| 3992 | struct extent_buffer *alloc_dummy_extent_buffer(u64 start, unsigned long len) | ||
| 3993 | { | ||
| 3994 | struct extent_buffer *eb; | ||
| 3995 | unsigned long num_pages = num_extent_pages(0, len); | ||
| 3996 | unsigned long i; | ||
| 3997 | |||
| 3998 | eb = __alloc_extent_buffer(NULL, start, len, GFP_ATOMIC); | ||
| 3999 | if (!eb) | ||
| 4000 | return NULL; | ||
| 4001 | |||
| 4002 | for (i = 0; i < num_pages; i++) { | ||
| 4003 | eb->pages[i] = alloc_page(GFP_ATOMIC); | ||
| 4004 | if (!eb->pages[i]) | ||
| 4005 | goto err; | ||
| 4006 | } | ||
| 4007 | set_extent_buffer_uptodate(eb); | ||
| 4008 | btrfs_set_header_nritems(eb, 0); | ||
| 4009 | set_bit(EXTENT_BUFFER_DUMMY, &eb->bflags); | ||
| 4010 | |||
| 4011 | return eb; | ||
| 4012 | err: | ||
| 4013 | for (i--; i > 0; i--) | ||
| 4014 | __free_page(eb->pages[i]); | ||
| 4015 | __free_extent_buffer(eb); | ||
| 4016 | return NULL; | ||
| 4017 | } | ||
| 4018 | |||
| 3970 | static int extent_buffer_under_io(struct extent_buffer *eb) | 4019 | static int extent_buffer_under_io(struct extent_buffer *eb) |
| 3971 | { | 4020 | { |
| 3972 | return (atomic_read(&eb->io_pages) || | 4021 | return (atomic_read(&eb->io_pages) || |
| @@ -3981,18 +4030,21 @@ static void btrfs_release_extent_buffer_page(struct extent_buffer *eb, | |||
| 3981 | unsigned long start_idx) | 4030 | unsigned long start_idx) |
| 3982 | { | 4031 | { |
| 3983 | unsigned long index; | 4032 | unsigned long index; |
| 4033 | unsigned long num_pages; | ||
| 3984 | struct page *page; | 4034 | struct page *page; |
| 4035 | int mapped = !test_bit(EXTENT_BUFFER_DUMMY, &eb->bflags); | ||
| 3985 | 4036 | ||
| 3986 | BUG_ON(extent_buffer_under_io(eb)); | 4037 | BUG_ON(extent_buffer_under_io(eb)); |
| 3987 | 4038 | ||
| 3988 | index = num_extent_pages(eb->start, eb->len); | 4039 | num_pages = num_extent_pages(eb->start, eb->len); |
| 4040 | index = start_idx + num_pages; | ||
| 3989 | if (start_idx >= index) | 4041 | if (start_idx >= index) |
| 3990 | return; | 4042 | return; |
| 3991 | 4043 | ||
| 3992 | do { | 4044 | do { |
| 3993 | index--; | 4045 | index--; |
| 3994 | page = extent_buffer_page(eb, index); | 4046 | page = extent_buffer_page(eb, index); |
| 3995 | if (page) { | 4047 | if (page && mapped) { |
| 3996 | spin_lock(&page->mapping->private_lock); | 4048 | spin_lock(&page->mapping->private_lock); |
| 3997 | /* | 4049 | /* |
| 3998 | * We do this since we'll remove the pages after we've | 4050 | * We do this since we'll remove the pages after we've |
| @@ -4017,6 +4069,8 @@ static void btrfs_release_extent_buffer_page(struct extent_buffer *eb, | |||
| 4017 | } | 4069 | } |
| 4018 | spin_unlock(&page->mapping->private_lock); | 4070 | spin_unlock(&page->mapping->private_lock); |
| 4019 | 4071 | ||
| 4072 | } | ||
| 4073 | if (page) { | ||
| 4020 | /* One for when we alloced the page */ | 4074 | /* One for when we alloced the page */ |
| 4021 | page_cache_release(page); | 4075 | page_cache_release(page); |
| 4022 | } | 4076 | } |
| @@ -4235,14 +4289,18 @@ static void release_extent_buffer(struct extent_buffer *eb, gfp_t mask) | |||
| 4235 | { | 4289 | { |
| 4236 | WARN_ON(atomic_read(&eb->refs) == 0); | 4290 | WARN_ON(atomic_read(&eb->refs) == 0); |
| 4237 | if (atomic_dec_and_test(&eb->refs)) { | 4291 | if (atomic_dec_and_test(&eb->refs)) { |
| 4238 | struct extent_io_tree *tree = eb->tree; | 4292 | if (test_bit(EXTENT_BUFFER_DUMMY, &eb->bflags)) { |
| 4293 | spin_unlock(&eb->refs_lock); | ||
| 4294 | } else { | ||
| 4295 | struct extent_io_tree *tree = eb->tree; | ||
| 4239 | 4296 | ||
| 4240 | spin_unlock(&eb->refs_lock); | 4297 | spin_unlock(&eb->refs_lock); |
| 4241 | 4298 | ||
| 4242 | spin_lock(&tree->buffer_lock); | 4299 | spin_lock(&tree->buffer_lock); |
| 4243 | radix_tree_delete(&tree->buffer, | 4300 | radix_tree_delete(&tree->buffer, |
| 4244 | eb->start >> PAGE_CACHE_SHIFT); | 4301 | eb->start >> PAGE_CACHE_SHIFT); |
| 4245 | spin_unlock(&tree->buffer_lock); | 4302 | spin_unlock(&tree->buffer_lock); |
| 4303 | } | ||
| 4246 | 4304 | ||
| 4247 | /* Should be safe to release our pages at this point */ | 4305 | /* Should be safe to release our pages at this point */ |
| 4248 | btrfs_release_extent_buffer_page(eb, 0); | 4306 | btrfs_release_extent_buffer_page(eb, 0); |
| @@ -4260,6 +4318,10 @@ void free_extent_buffer(struct extent_buffer *eb) | |||
| 4260 | 4318 | ||
| 4261 | spin_lock(&eb->refs_lock); | 4319 | spin_lock(&eb->refs_lock); |
| 4262 | if (atomic_read(&eb->refs) == 2 && | 4320 | if (atomic_read(&eb->refs) == 2 && |
| 4321 | test_bit(EXTENT_BUFFER_DUMMY, &eb->bflags)) | ||
| 4322 | atomic_dec(&eb->refs); | ||
| 4323 | |||
| 4324 | if (atomic_read(&eb->refs) == 2 && | ||
| 4263 | test_bit(EXTENT_BUFFER_STALE, &eb->bflags) && | 4325 | test_bit(EXTENT_BUFFER_STALE, &eb->bflags) && |
| 4264 | !extent_buffer_under_io(eb) && | 4326 | !extent_buffer_under_io(eb) && |
| 4265 | test_and_clear_bit(EXTENT_BUFFER_TREE_REF, &eb->bflags)) | 4327 | test_and_clear_bit(EXTENT_BUFFER_TREE_REF, &eb->bflags)) |
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h index b516c3b8dec6..25900af5b15d 100644 --- a/fs/btrfs/extent_io.h +++ b/fs/btrfs/extent_io.h | |||
| @@ -39,6 +39,7 @@ | |||
| 39 | #define EXTENT_BUFFER_STALE 6 | 39 | #define EXTENT_BUFFER_STALE 6 |
| 40 | #define EXTENT_BUFFER_WRITEBACK 7 | 40 | #define EXTENT_BUFFER_WRITEBACK 7 |
| 41 | #define EXTENT_BUFFER_IOERR 8 | 41 | #define EXTENT_BUFFER_IOERR 8 |
| 42 | #define EXTENT_BUFFER_DUMMY 9 | ||
| 42 | 43 | ||
| 43 | /* these are flags for extent_clear_unlock_delalloc */ | 44 | /* these are flags for extent_clear_unlock_delalloc */ |
| 44 | #define EXTENT_CLEAR_UNLOCK_PAGE 0x1 | 45 | #define EXTENT_CLEAR_UNLOCK_PAGE 0x1 |
| @@ -75,9 +76,6 @@ struct extent_io_ops { | |||
| 75 | unsigned long bio_flags); | 76 | unsigned long bio_flags); |
| 76 | int (*readpage_io_hook)(struct page *page, u64 start, u64 end); | 77 | int (*readpage_io_hook)(struct page *page, u64 start, u64 end); |
| 77 | int (*readpage_io_failed_hook)(struct page *page, int failed_mirror); | 78 | int (*readpage_io_failed_hook)(struct page *page, int failed_mirror); |
| 78 | int (*writepage_io_failed_hook)(struct bio *bio, struct page *page, | ||
| 79 | u64 start, u64 end, | ||
| 80 | struct extent_state *state); | ||
| 81 | int (*readpage_end_io_hook)(struct page *page, u64 start, u64 end, | 79 | int (*readpage_end_io_hook)(struct page *page, u64 start, u64 end, |
| 82 | struct extent_state *state, int mirror); | 80 | struct extent_state *state, int mirror); |
| 83 | int (*writepage_end_io_hook)(struct page *page, u64 start, u64 end, | 81 | int (*writepage_end_io_hook)(struct page *page, u64 start, u64 end, |
| @@ -225,6 +223,8 @@ int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, | |||
| 225 | struct extent_state **cached_state, gfp_t mask); | 223 | struct extent_state **cached_state, gfp_t mask); |
| 226 | int set_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end, | 224 | int set_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end, |
| 227 | struct extent_state **cached_state, gfp_t mask); | 225 | struct extent_state **cached_state, gfp_t mask); |
| 226 | int clear_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end, | ||
| 227 | struct extent_state **cached_state, gfp_t mask); | ||
| 228 | int set_extent_new(struct extent_io_tree *tree, u64 start, u64 end, | 228 | int set_extent_new(struct extent_io_tree *tree, u64 start, u64 end, |
| 229 | gfp_t mask); | 229 | gfp_t mask); |
| 230 | int set_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end, | 230 | int set_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end, |
| @@ -265,6 +265,8 @@ void set_page_extent_mapped(struct page *page); | |||
| 265 | 265 | ||
| 266 | struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree, | 266 | struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree, |
| 267 | u64 start, unsigned long len); | 267 | u64 start, unsigned long len); |
| 268 | struct extent_buffer *alloc_dummy_extent_buffer(u64 start, unsigned long len); | ||
| 269 | struct extent_buffer *btrfs_clone_extent_buffer(struct extent_buffer *src); | ||
| 268 | struct extent_buffer *find_extent_buffer(struct extent_io_tree *tree, | 270 | struct extent_buffer *find_extent_buffer(struct extent_io_tree *tree, |
| 269 | u64 start, unsigned long len); | 271 | u64 start, unsigned long len); |
| 270 | void free_extent_buffer(struct extent_buffer *eb); | 272 | void free_extent_buffer(struct extent_buffer *eb); |
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 53bf2d764bbc..70dc8ca73e25 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c | |||
| @@ -65,6 +65,21 @@ struct inode_defrag { | |||
| 65 | int cycled; | 65 | int cycled; |
| 66 | }; | 66 | }; |
| 67 | 67 | ||
| 68 | static int __compare_inode_defrag(struct inode_defrag *defrag1, | ||
| 69 | struct inode_defrag *defrag2) | ||
| 70 | { | ||
| 71 | if (defrag1->root > defrag2->root) | ||
| 72 | return 1; | ||
| 73 | else if (defrag1->root < defrag2->root) | ||
| 74 | return -1; | ||
| 75 | else if (defrag1->ino > defrag2->ino) | ||
| 76 | return 1; | ||
| 77 | else if (defrag1->ino < defrag2->ino) | ||
| 78 | return -1; | ||
| 79 | else | ||
| 80 | return 0; | ||
| 81 | } | ||
| 82 | |||
| 68 | /* pop a record for an inode into the defrag tree. The lock | 83 | /* pop a record for an inode into the defrag tree. The lock |
| 69 | * must be held already | 84 | * must be held already |
| 70 | * | 85 | * |
| @@ -81,15 +96,17 @@ static void __btrfs_add_inode_defrag(struct inode *inode, | |||
| 81 | struct inode_defrag *entry; | 96 | struct inode_defrag *entry; |
| 82 | struct rb_node **p; | 97 | struct rb_node **p; |
| 83 | struct rb_node *parent = NULL; | 98 | struct rb_node *parent = NULL; |
| 99 | int ret; | ||
| 84 | 100 | ||
| 85 | p = &root->fs_info->defrag_inodes.rb_node; | 101 | p = &root->fs_info->defrag_inodes.rb_node; |
| 86 | while (*p) { | 102 | while (*p) { |
| 87 | parent = *p; | 103 | parent = *p; |
| 88 | entry = rb_entry(parent, struct inode_defrag, rb_node); | 104 | entry = rb_entry(parent, struct inode_defrag, rb_node); |
| 89 | 105 | ||
| 90 | if (defrag->ino < entry->ino) | 106 | ret = __compare_inode_defrag(defrag, entry); |
| 107 | if (ret < 0) | ||
| 91 | p = &parent->rb_left; | 108 | p = &parent->rb_left; |
| 92 | else if (defrag->ino > entry->ino) | 109 | else if (ret > 0) |
| 93 | p = &parent->rb_right; | 110 | p = &parent->rb_right; |
| 94 | else { | 111 | else { |
| 95 | /* if we're reinserting an entry for | 112 | /* if we're reinserting an entry for |
| @@ -103,7 +120,7 @@ static void __btrfs_add_inode_defrag(struct inode *inode, | |||
| 103 | goto exists; | 120 | goto exists; |
| 104 | } | 121 | } |
| 105 | } | 122 | } |
| 106 | BTRFS_I(inode)->in_defrag = 1; | 123 | set_bit(BTRFS_INODE_IN_DEFRAG, &BTRFS_I(inode)->runtime_flags); |
| 107 | rb_link_node(&defrag->rb_node, parent, p); | 124 | rb_link_node(&defrag->rb_node, parent, p); |
| 108 | rb_insert_color(&defrag->rb_node, &root->fs_info->defrag_inodes); | 125 | rb_insert_color(&defrag->rb_node, &root->fs_info->defrag_inodes); |
| 109 | return; | 126 | return; |
| @@ -131,7 +148,7 @@ int btrfs_add_inode_defrag(struct btrfs_trans_handle *trans, | |||
| 131 | if (btrfs_fs_closing(root->fs_info)) | 148 | if (btrfs_fs_closing(root->fs_info)) |
| 132 | return 0; | 149 | return 0; |
| 133 | 150 | ||
| 134 | if (BTRFS_I(inode)->in_defrag) | 151 | if (test_bit(BTRFS_INODE_IN_DEFRAG, &BTRFS_I(inode)->runtime_flags)) |
| 135 | return 0; | 152 | return 0; |
| 136 | 153 | ||
| 137 | if (trans) | 154 | if (trans) |
| @@ -148,7 +165,7 @@ int btrfs_add_inode_defrag(struct btrfs_trans_handle *trans, | |||
| 148 | defrag->root = root->root_key.objectid; | 165 | defrag->root = root->root_key.objectid; |
| 149 | 166 | ||
| 150 | spin_lock(&root->fs_info->defrag_inodes_lock); | 167 | spin_lock(&root->fs_info->defrag_inodes_lock); |
| 151 | if (!BTRFS_I(inode)->in_defrag) | 168 | if (!test_bit(BTRFS_INODE_IN_DEFRAG, &BTRFS_I(inode)->runtime_flags)) |
| 152 | __btrfs_add_inode_defrag(inode, defrag); | 169 | __btrfs_add_inode_defrag(inode, defrag); |
| 153 | else | 170 | else |
| 154 | kfree(defrag); | 171 | kfree(defrag); |
| @@ -159,28 +176,35 @@ int btrfs_add_inode_defrag(struct btrfs_trans_handle *trans, | |||
| 159 | /* | 176 | /* |
| 160 | * must be called with the defrag_inodes lock held | 177 | * must be called with the defrag_inodes lock held |
| 161 | */ | 178 | */ |
| 162 | struct inode_defrag *btrfs_find_defrag_inode(struct btrfs_fs_info *info, u64 ino, | 179 | struct inode_defrag *btrfs_find_defrag_inode(struct btrfs_fs_info *info, |
| 180 | u64 root, u64 ino, | ||
| 163 | struct rb_node **next) | 181 | struct rb_node **next) |
| 164 | { | 182 | { |
| 165 | struct inode_defrag *entry = NULL; | 183 | struct inode_defrag *entry = NULL; |
| 184 | struct inode_defrag tmp; | ||
| 166 | struct rb_node *p; | 185 | struct rb_node *p; |
| 167 | struct rb_node *parent = NULL; | 186 | struct rb_node *parent = NULL; |
| 187 | int ret; | ||
| 188 | |||
| 189 | tmp.ino = ino; | ||
| 190 | tmp.root = root; | ||
| 168 | 191 | ||
| 169 | p = info->defrag_inodes.rb_node; | 192 | p = info->defrag_inodes.rb_node; |
| 170 | while (p) { | 193 | while (p) { |
| 171 | parent = p; | 194 | parent = p; |
| 172 | entry = rb_entry(parent, struct inode_defrag, rb_node); | 195 | entry = rb_entry(parent, struct inode_defrag, rb_node); |
| 173 | 196 | ||
| 174 | if (ino < entry->ino) | 197 | ret = __compare_inode_defrag(&tmp, entry); |
| 198 | if (ret < 0) | ||
| 175 | p = parent->rb_left; | 199 | p = parent->rb_left; |
| 176 | else if (ino > entry->ino) | 200 | else if (ret > 0) |
| 177 | p = parent->rb_right; | 201 | p = parent->rb_right; |
| 178 | else | 202 | else |
| 179 | return entry; | 203 | return entry; |
| 180 | } | 204 | } |
| 181 | 205 | ||
| 182 | if (next) { | 206 | if (next) { |
| 183 | while (parent && ino > entry->ino) { | 207 | while (parent && __compare_inode_defrag(&tmp, entry) > 0) { |
| 184 | parent = rb_next(parent); | 208 | parent = rb_next(parent); |
| 185 | entry = rb_entry(parent, struct inode_defrag, rb_node); | 209 | entry = rb_entry(parent, struct inode_defrag, rb_node); |
| 186 | } | 210 | } |
| @@ -202,6 +226,7 @@ int btrfs_run_defrag_inodes(struct btrfs_fs_info *fs_info) | |||
| 202 | struct btrfs_key key; | 226 | struct btrfs_key key; |
| 203 | struct btrfs_ioctl_defrag_range_args range; | 227 | struct btrfs_ioctl_defrag_range_args range; |
| 204 | u64 first_ino = 0; | 228 | u64 first_ino = 0; |
| 229 | u64 root_objectid = 0; | ||
| 205 | int num_defrag; | 230 | int num_defrag; |
| 206 | int defrag_batch = 1024; | 231 | int defrag_batch = 1024; |
| 207 | 232 | ||
| @@ -214,11 +239,14 @@ int btrfs_run_defrag_inodes(struct btrfs_fs_info *fs_info) | |||
| 214 | n = NULL; | 239 | n = NULL; |
| 215 | 240 | ||
| 216 | /* find an inode to defrag */ | 241 | /* find an inode to defrag */ |
| 217 | defrag = btrfs_find_defrag_inode(fs_info, first_ino, &n); | 242 | defrag = btrfs_find_defrag_inode(fs_info, root_objectid, |
| 243 | first_ino, &n); | ||
| 218 | if (!defrag) { | 244 | if (!defrag) { |
| 219 | if (n) | 245 | if (n) { |
| 220 | defrag = rb_entry(n, struct inode_defrag, rb_node); | 246 | defrag = rb_entry(n, struct inode_defrag, |
| 221 | else if (first_ino) { | 247 | rb_node); |
| 248 | } else if (root_objectid || first_ino) { | ||
| 249 | root_objectid = 0; | ||
| 222 | first_ino = 0; | 250 | first_ino = 0; |
| 223 | continue; | 251 | continue; |
| 224 | } else { | 252 | } else { |
| @@ -228,6 +256,7 @@ int btrfs_run_defrag_inodes(struct btrfs_fs_info *fs_info) | |||
| 228 | 256 | ||
| 229 | /* remove it from the rbtree */ | 257 | /* remove it from the rbtree */ |
| 230 | first_ino = defrag->ino + 1; | 258 | first_ino = defrag->ino + 1; |
| 259 | root_objectid = defrag->root; | ||
| 231 | rb_erase(&defrag->rb_node, &fs_info->defrag_inodes); | 260 | rb_erase(&defrag->rb_node, &fs_info->defrag_inodes); |
| 232 | 261 | ||
| 233 | if (btrfs_fs_closing(fs_info)) | 262 | if (btrfs_fs_closing(fs_info)) |
| @@ -252,7 +281,7 @@ int btrfs_run_defrag_inodes(struct btrfs_fs_info *fs_info) | |||
| 252 | goto next; | 281 | goto next; |
| 253 | 282 | ||
| 254 | /* do a chunk of defrag */ | 283 | /* do a chunk of defrag */ |
| 255 | BTRFS_I(inode)->in_defrag = 0; | 284 | clear_bit(BTRFS_INODE_IN_DEFRAG, &BTRFS_I(inode)->runtime_flags); |
| 256 | range.start = defrag->last_offset; | 285 | range.start = defrag->last_offset; |
| 257 | num_defrag = btrfs_defrag_file(inode, NULL, &range, defrag->transid, | 286 | num_defrag = btrfs_defrag_file(inode, NULL, &range, defrag->transid, |
| 258 | defrag_batch); | 287 | defrag_batch); |
| @@ -1404,12 +1433,11 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb, | |||
| 1404 | goto out; | 1433 | goto out; |
| 1405 | } | 1434 | } |
| 1406 | 1435 | ||
| 1407 | err = btrfs_update_time(file); | 1436 | err = file_update_time(file); |
| 1408 | if (err) { | 1437 | if (err) { |
| 1409 | mutex_unlock(&inode->i_mutex); | 1438 | mutex_unlock(&inode->i_mutex); |
| 1410 | goto out; | 1439 | goto out; |
| 1411 | } | 1440 | } |
| 1412 | BTRFS_I(inode)->sequence++; | ||
| 1413 | 1441 | ||
| 1414 | start_pos = round_down(pos, root->sectorsize); | 1442 | start_pos = round_down(pos, root->sectorsize); |
| 1415 | if (start_pos > i_size_read(inode)) { | 1443 | if (start_pos > i_size_read(inode)) { |
| @@ -1466,8 +1494,8 @@ int btrfs_release_file(struct inode *inode, struct file *filp) | |||
| 1466 | * flush down new bytes that may have been written if the | 1494 | * flush down new bytes that may have been written if the |
| 1467 | * application were using truncate to replace a file in place. | 1495 | * application were using truncate to replace a file in place. |
| 1468 | */ | 1496 | */ |
| 1469 | if (BTRFS_I(inode)->ordered_data_close) { | 1497 | if (test_and_clear_bit(BTRFS_INODE_ORDERED_DATA_CLOSE, |
| 1470 | BTRFS_I(inode)->ordered_data_close = 0; | 1498 | &BTRFS_I(inode)->runtime_flags)) { |
| 1471 | btrfs_add_ordered_operation(NULL, BTRFS_I(inode)->root, inode); | 1499 | btrfs_add_ordered_operation(NULL, BTRFS_I(inode)->root, inode); |
| 1472 | if (inode->i_size > BTRFS_ORDERED_OPERATIONS_FLUSH_LIMIT) | 1500 | if (inode->i_size > BTRFS_ORDERED_OPERATIONS_FLUSH_LIMIT) |
| 1473 | filemap_flush(inode->i_mapping); | 1501 | filemap_flush(inode->i_mapping); |
| @@ -1498,14 +1526,15 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) | |||
| 1498 | 1526 | ||
| 1499 | trace_btrfs_sync_file(file, datasync); | 1527 | trace_btrfs_sync_file(file, datasync); |
| 1500 | 1528 | ||
| 1501 | ret = filemap_write_and_wait_range(inode->i_mapping, start, end); | ||
| 1502 | if (ret) | ||
| 1503 | return ret; | ||
| 1504 | mutex_lock(&inode->i_mutex); | 1529 | mutex_lock(&inode->i_mutex); |
| 1505 | 1530 | ||
| 1506 | /* we wait first, since the writeback may change the inode */ | 1531 | /* |
| 1532 | * we wait first, since the writeback may change the inode, also wait | ||
| 1533 | * ordered range does a filemape_write_and_wait_range which is why we | ||
| 1534 | * don't do it above like other file systems. | ||
| 1535 | */ | ||
| 1507 | root->log_batch++; | 1536 | root->log_batch++; |
| 1508 | btrfs_wait_ordered_range(inode, 0, (u64)-1); | 1537 | btrfs_wait_ordered_range(inode, start, end); |
| 1509 | root->log_batch++; | 1538 | root->log_batch++; |
| 1510 | 1539 | ||
| 1511 | /* | 1540 | /* |
| @@ -1523,7 +1552,8 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) | |||
| 1523 | * syncing | 1552 | * syncing |
| 1524 | */ | 1553 | */ |
| 1525 | smp_mb(); | 1554 | smp_mb(); |
| 1526 | if (BTRFS_I(inode)->last_trans <= | 1555 | if (btrfs_inode_in_log(inode, root->fs_info->generation) || |
| 1556 | BTRFS_I(inode)->last_trans <= | ||
| 1527 | root->fs_info->last_trans_committed) { | 1557 | root->fs_info->last_trans_committed) { |
| 1528 | BTRFS_I(inode)->last_trans = 0; | 1558 | BTRFS_I(inode)->last_trans = 0; |
| 1529 | mutex_unlock(&inode->i_mutex); | 1559 | mutex_unlock(&inode->i_mutex); |
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c index 202008ec367d..81296c57405a 100644 --- a/fs/btrfs/free-space-cache.c +++ b/fs/btrfs/free-space-cache.c | |||
| @@ -33,6 +33,8 @@ | |||
| 33 | 33 | ||
| 34 | static int link_free_space(struct btrfs_free_space_ctl *ctl, | 34 | static int link_free_space(struct btrfs_free_space_ctl *ctl, |
| 35 | struct btrfs_free_space *info); | 35 | struct btrfs_free_space *info); |
| 36 | static void unlink_free_space(struct btrfs_free_space_ctl *ctl, | ||
| 37 | struct btrfs_free_space *info); | ||
| 36 | 38 | ||
| 37 | static struct inode *__lookup_free_space_inode(struct btrfs_root *root, | 39 | static struct inode *__lookup_free_space_inode(struct btrfs_root *root, |
| 38 | struct btrfs_path *path, | 40 | struct btrfs_path *path, |
| @@ -75,7 +77,8 @@ static struct inode *__lookup_free_space_inode(struct btrfs_root *root, | |||
| 75 | return ERR_PTR(-ENOENT); | 77 | return ERR_PTR(-ENOENT); |
| 76 | } | 78 | } |
| 77 | 79 | ||
| 78 | inode->i_mapping->flags &= ~__GFP_FS; | 80 | mapping_set_gfp_mask(inode->i_mapping, |
| 81 | mapping_gfp_mask(inode->i_mapping) & ~__GFP_FS); | ||
| 79 | 82 | ||
| 80 | return inode; | 83 | return inode; |
| 81 | } | 84 | } |
| @@ -365,7 +368,7 @@ static int io_ctl_prepare_pages(struct io_ctl *io_ctl, struct inode *inode, | |||
| 365 | 368 | ||
| 366 | static void io_ctl_set_generation(struct io_ctl *io_ctl, u64 generation) | 369 | static void io_ctl_set_generation(struct io_ctl *io_ctl, u64 generation) |
| 367 | { | 370 | { |
| 368 | u64 *val; | 371 | __le64 *val; |
| 369 | 372 | ||
| 370 | io_ctl_map_page(io_ctl, 1); | 373 | io_ctl_map_page(io_ctl, 1); |
| 371 | 374 | ||
| @@ -388,7 +391,7 @@ static void io_ctl_set_generation(struct io_ctl *io_ctl, u64 generation) | |||
| 388 | 391 | ||
| 389 | static int io_ctl_check_generation(struct io_ctl *io_ctl, u64 generation) | 392 | static int io_ctl_check_generation(struct io_ctl *io_ctl, u64 generation) |
| 390 | { | 393 | { |
| 391 | u64 *gen; | 394 | __le64 *gen; |
| 392 | 395 | ||
| 393 | /* | 396 | /* |
| 394 | * Skip the crc area. If we don't check crcs then we just have a 64bit | 397 | * Skip the crc area. If we don't check crcs then we just have a 64bit |
| @@ -584,6 +587,44 @@ static int io_ctl_read_bitmap(struct io_ctl *io_ctl, | |||
| 584 | return 0; | 587 | return 0; |
| 585 | } | 588 | } |
| 586 | 589 | ||
| 590 | /* | ||
| 591 | * Since we attach pinned extents after the fact we can have contiguous sections | ||
| 592 | * of free space that are split up in entries. This poses a problem with the | ||
| 593 | * tree logging stuff since it could have allocated across what appears to be 2 | ||
| 594 | * entries since we would have merged the entries when adding the pinned extents | ||
| 595 | * back to the free space cache. So run through the space cache that we just | ||
| 596 | * loaded and merge contiguous entries. This will make the log replay stuff not | ||
| 597 | * blow up and it will make for nicer allocator behavior. | ||
| 598 | */ | ||
| 599 | static void merge_space_tree(struct btrfs_free_space_ctl *ctl) | ||
| 600 | { | ||
| 601 | struct btrfs_free_space *e, *prev = NULL; | ||
| 602 | struct rb_node *n; | ||
| 603 | |||
| 604 | again: | ||
| 605 | spin_lock(&ctl->tree_lock); | ||
| 606 | for (n = rb_first(&ctl->free_space_offset); n; n = rb_next(n)) { | ||
| 607 | e = rb_entry(n, struct btrfs_free_space, offset_index); | ||
| 608 | if (!prev) | ||
| 609 | goto next; | ||
| 610 | if (e->bitmap || prev->bitmap) | ||
| 611 | goto next; | ||
| 612 | if (prev->offset + prev->bytes == e->offset) { | ||
| 613 | unlink_free_space(ctl, prev); | ||
| 614 | unlink_free_space(ctl, e); | ||
| 615 | prev->bytes += e->bytes; | ||
| 616 | kmem_cache_free(btrfs_free_space_cachep, e); | ||
| 617 | link_free_space(ctl, prev); | ||
| 618 | prev = NULL; | ||
| 619 | spin_unlock(&ctl->tree_lock); | ||
| 620 | goto again; | ||
| 621 | } | ||
| 622 | next: | ||
| 623 | prev = e; | ||
| 624 | } | ||
| 625 | spin_unlock(&ctl->tree_lock); | ||
| 626 | } | ||
| 627 | |||
| 587 | int __load_free_space_cache(struct btrfs_root *root, struct inode *inode, | 628 | int __load_free_space_cache(struct btrfs_root *root, struct inode *inode, |
| 588 | struct btrfs_free_space_ctl *ctl, | 629 | struct btrfs_free_space_ctl *ctl, |
| 589 | struct btrfs_path *path, u64 offset) | 630 | struct btrfs_path *path, u64 offset) |
| @@ -726,6 +767,7 @@ int __load_free_space_cache(struct btrfs_root *root, struct inode *inode, | |||
| 726 | } | 767 | } |
| 727 | 768 | ||
| 728 | io_ctl_drop_pages(&io_ctl); | 769 | io_ctl_drop_pages(&io_ctl); |
| 770 | merge_space_tree(ctl); | ||
| 729 | ret = 1; | 771 | ret = 1; |
| 730 | out: | 772 | out: |
| 731 | io_ctl_free(&io_ctl); | 773 | io_ctl_free(&io_ctl); |
| @@ -972,9 +1014,7 @@ int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode, | |||
| 972 | goto out; | 1014 | goto out; |
| 973 | 1015 | ||
| 974 | 1016 | ||
| 975 | ret = filemap_write_and_wait(inode->i_mapping); | 1017 | btrfs_wait_ordered_range(inode, 0, (u64)-1); |
| 976 | if (ret) | ||
| 977 | goto out; | ||
| 978 | 1018 | ||
| 979 | key.objectid = BTRFS_FREE_SPACE_OBJECTID; | 1019 | key.objectid = BTRFS_FREE_SPACE_OBJECTID; |
| 980 | key.offset = offset; | 1020 | key.offset = offset; |
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 61b16c641ce0..f6ab6f5e635a 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c | |||
| @@ -89,7 +89,7 @@ static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = { | |||
| 89 | 89 | ||
| 90 | static int btrfs_setsize(struct inode *inode, loff_t newsize); | 90 | static int btrfs_setsize(struct inode *inode, loff_t newsize); |
| 91 | static int btrfs_truncate(struct inode *inode); | 91 | static int btrfs_truncate(struct inode *inode); |
| 92 | static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end); | 92 | static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent); |
| 93 | static noinline int cow_file_range(struct inode *inode, | 93 | static noinline int cow_file_range(struct inode *inode, |
| 94 | struct page *locked_page, | 94 | struct page *locked_page, |
| 95 | u64 start, u64 end, int *page_started, | 95 | u64 start, u64 end, int *page_started, |
| @@ -257,10 +257,13 @@ static noinline int cow_file_range_inline(struct btrfs_trans_handle *trans, | |||
| 257 | ret = insert_inline_extent(trans, root, inode, start, | 257 | ret = insert_inline_extent(trans, root, inode, start, |
| 258 | inline_len, compressed_size, | 258 | inline_len, compressed_size, |
| 259 | compress_type, compressed_pages); | 259 | compress_type, compressed_pages); |
| 260 | if (ret) { | 260 | if (ret && ret != -ENOSPC) { |
| 261 | btrfs_abort_transaction(trans, root, ret); | 261 | btrfs_abort_transaction(trans, root, ret); |
| 262 | return ret; | 262 | return ret; |
| 263 | } else if (ret == -ENOSPC) { | ||
| 264 | return 1; | ||
| 263 | } | 265 | } |
| 266 | |||
| 264 | btrfs_delalloc_release_metadata(inode, end + 1 - start); | 267 | btrfs_delalloc_release_metadata(inode, end + 1 - start); |
| 265 | btrfs_drop_extent_cache(inode, start, aligned_end - 1, 0); | 268 | btrfs_drop_extent_cache(inode, start, aligned_end - 1, 0); |
| 266 | return 0; | 269 | return 0; |
| @@ -1572,11 +1575,11 @@ static int btrfs_submit_bio_hook(struct inode *inode, int rw, struct bio *bio, | |||
| 1572 | if (btrfs_is_free_space_inode(root, inode)) | 1575 | if (btrfs_is_free_space_inode(root, inode)) |
| 1573 | metadata = 2; | 1576 | metadata = 2; |
| 1574 | 1577 | ||
| 1575 | ret = btrfs_bio_wq_end_io(root->fs_info, bio, metadata); | ||
| 1576 | if (ret) | ||
| 1577 | return ret; | ||
| 1578 | |||
| 1579 | if (!(rw & REQ_WRITE)) { | 1578 | if (!(rw & REQ_WRITE)) { |
| 1579 | ret = btrfs_bio_wq_end_io(root->fs_info, bio, metadata); | ||
| 1580 | if (ret) | ||
| 1581 | return ret; | ||
| 1582 | |||
| 1580 | if (bio_flags & EXTENT_BIO_COMPRESSED) { | 1583 | if (bio_flags & EXTENT_BIO_COMPRESSED) { |
| 1581 | return btrfs_submit_compressed_read(inode, bio, | 1584 | return btrfs_submit_compressed_read(inode, bio, |
| 1582 | mirror_num, bio_flags); | 1585 | mirror_num, bio_flags); |
| @@ -1815,25 +1818,24 @@ out: | |||
| 1815 | * an ordered extent if the range of bytes in the file it covers are | 1818 | * an ordered extent if the range of bytes in the file it covers are |
| 1816 | * fully written. | 1819 | * fully written. |
| 1817 | */ | 1820 | */ |
| 1818 | static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end) | 1821 | static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent) |
| 1819 | { | 1822 | { |
| 1823 | struct inode *inode = ordered_extent->inode; | ||
| 1820 | struct btrfs_root *root = BTRFS_I(inode)->root; | 1824 | struct btrfs_root *root = BTRFS_I(inode)->root; |
| 1821 | struct btrfs_trans_handle *trans = NULL; | 1825 | struct btrfs_trans_handle *trans = NULL; |
| 1822 | struct btrfs_ordered_extent *ordered_extent = NULL; | ||
| 1823 | struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; | 1826 | struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; |
| 1824 | struct extent_state *cached_state = NULL; | 1827 | struct extent_state *cached_state = NULL; |
| 1825 | int compress_type = 0; | 1828 | int compress_type = 0; |
| 1826 | int ret; | 1829 | int ret; |
| 1827 | bool nolock; | 1830 | bool nolock; |
| 1828 | 1831 | ||
| 1829 | ret = btrfs_dec_test_ordered_pending(inode, &ordered_extent, start, | ||
| 1830 | end - start + 1); | ||
| 1831 | if (!ret) | ||
| 1832 | return 0; | ||
| 1833 | BUG_ON(!ordered_extent); /* Logic error */ | ||
| 1834 | |||
| 1835 | nolock = btrfs_is_free_space_inode(root, inode); | 1832 | nolock = btrfs_is_free_space_inode(root, inode); |
| 1836 | 1833 | ||
| 1834 | if (test_bit(BTRFS_ORDERED_IOERR, &ordered_extent->flags)) { | ||
| 1835 | ret = -EIO; | ||
| 1836 | goto out; | ||
| 1837 | } | ||
| 1838 | |||
| 1837 | if (test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags)) { | 1839 | if (test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags)) { |
| 1838 | BUG_ON(!list_empty(&ordered_extent->list)); /* Logic error */ | 1840 | BUG_ON(!list_empty(&ordered_extent->list)); /* Logic error */ |
| 1839 | ret = btrfs_ordered_update_i_size(inode, 0, ordered_extent); | 1841 | ret = btrfs_ordered_update_i_size(inode, 0, ordered_extent); |
| @@ -1889,12 +1891,10 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end) | |||
| 1889 | ordered_extent->file_offset, | 1891 | ordered_extent->file_offset, |
| 1890 | ordered_extent->len); | 1892 | ordered_extent->len); |
| 1891 | } | 1893 | } |
| 1892 | unlock_extent_cached(io_tree, ordered_extent->file_offset, | 1894 | |
| 1893 | ordered_extent->file_offset + | ||
| 1894 | ordered_extent->len - 1, &cached_state, GFP_NOFS); | ||
| 1895 | if (ret < 0) { | 1895 | if (ret < 0) { |
| 1896 | btrfs_abort_transaction(trans, root, ret); | 1896 | btrfs_abort_transaction(trans, root, ret); |
| 1897 | goto out; | 1897 | goto out_unlock; |
| 1898 | } | 1898 | } |
| 1899 | 1899 | ||
| 1900 | add_pending_csums(trans, inode, ordered_extent->file_offset, | 1900 | add_pending_csums(trans, inode, ordered_extent->file_offset, |
| @@ -1905,10 +1905,14 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end) | |||
| 1905 | ret = btrfs_update_inode_fallback(trans, root, inode); | 1905 | ret = btrfs_update_inode_fallback(trans, root, inode); |
| 1906 | if (ret) { /* -ENOMEM or corruption */ | 1906 | if (ret) { /* -ENOMEM or corruption */ |
| 1907 | btrfs_abort_transaction(trans, root, ret); | 1907 | btrfs_abort_transaction(trans, root, ret); |
| 1908 | goto out; | 1908 | goto out_unlock; |
| 1909 | } | 1909 | } |
| 1910 | } | 1910 | } |
| 1911 | ret = 0; | 1911 | ret = 0; |
| 1912 | out_unlock: | ||
| 1913 | unlock_extent_cached(io_tree, ordered_extent->file_offset, | ||
| 1914 | ordered_extent->file_offset + | ||
| 1915 | ordered_extent->len - 1, &cached_state, GFP_NOFS); | ||
| 1912 | out: | 1916 | out: |
| 1913 | if (root != root->fs_info->tree_root) | 1917 | if (root != root->fs_info->tree_root) |
| 1914 | btrfs_delalloc_release_metadata(inode, ordered_extent->len); | 1918 | btrfs_delalloc_release_metadata(inode, ordered_extent->len); |
| @@ -1919,26 +1923,57 @@ out: | |||
| 1919 | btrfs_end_transaction(trans, root); | 1923 | btrfs_end_transaction(trans, root); |
| 1920 | } | 1924 | } |
| 1921 | 1925 | ||
| 1926 | if (ret) | ||
| 1927 | clear_extent_uptodate(io_tree, ordered_extent->file_offset, | ||
| 1928 | ordered_extent->file_offset + | ||
| 1929 | ordered_extent->len - 1, NULL, GFP_NOFS); | ||
| 1930 | |||
| 1931 | /* | ||
| 1932 | * This needs to be dont to make sure anybody waiting knows we are done | ||
| 1933 | * upating everything for this ordered extent. | ||
| 1934 | */ | ||
| 1935 | btrfs_remove_ordered_extent(inode, ordered_extent); | ||
| 1936 | |||
| 1922 | /* once for us */ | 1937 | /* once for us */ |
| 1923 | btrfs_put_ordered_extent(ordered_extent); | 1938 | btrfs_put_ordered_extent(ordered_extent); |
| 1924 | /* once for the tree */ | 1939 | /* once for the tree */ |
| 1925 | btrfs_put_ordered_extent(ordered_extent); | 1940 | btrfs_put_ordered_extent(ordered_extent); |
| 1926 | 1941 | ||
| 1927 | return 0; | 1942 | return ret; |
| 1928 | out_unlock: | 1943 | } |
| 1929 | unlock_extent_cached(io_tree, ordered_extent->file_offset, | 1944 | |
| 1930 | ordered_extent->file_offset + | 1945 | static void finish_ordered_fn(struct btrfs_work *work) |
| 1931 | ordered_extent->len - 1, &cached_state, GFP_NOFS); | 1946 | { |
| 1932 | goto out; | 1947 | struct btrfs_ordered_extent *ordered_extent; |
| 1948 | ordered_extent = container_of(work, struct btrfs_ordered_extent, work); | ||
| 1949 | btrfs_finish_ordered_io(ordered_extent); | ||
| 1933 | } | 1950 | } |
| 1934 | 1951 | ||
| 1935 | static int btrfs_writepage_end_io_hook(struct page *page, u64 start, u64 end, | 1952 | static int btrfs_writepage_end_io_hook(struct page *page, u64 start, u64 end, |
| 1936 | struct extent_state *state, int uptodate) | 1953 | struct extent_state *state, int uptodate) |
| 1937 | { | 1954 | { |
| 1955 | struct inode *inode = page->mapping->host; | ||
| 1956 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
| 1957 | struct btrfs_ordered_extent *ordered_extent = NULL; | ||
| 1958 | struct btrfs_workers *workers; | ||
| 1959 | |||
| 1938 | trace_btrfs_writepage_end_io_hook(page, start, end, uptodate); | 1960 | trace_btrfs_writepage_end_io_hook(page, start, end, uptodate); |
| 1939 | 1961 | ||
| 1940 | ClearPagePrivate2(page); | 1962 | ClearPagePrivate2(page); |
| 1941 | return btrfs_finish_ordered_io(page->mapping->host, start, end); | 1963 | if (!btrfs_dec_test_ordered_pending(inode, &ordered_extent, start, |
| 1964 | end - start + 1, uptodate)) | ||
| 1965 | return 0; | ||
| 1966 | |||
| 1967 | ordered_extent->work.func = finish_ordered_fn; | ||
| 1968 | ordered_extent->work.flags = 0; | ||
| 1969 | |||
| 1970 | if (btrfs_is_free_space_inode(root, inode)) | ||
| 1971 | workers = &root->fs_info->endio_freespace_worker; | ||
| 1972 | else | ||
| 1973 | workers = &root->fs_info->endio_write_workers; | ||
| 1974 | btrfs_queue_worker(workers, &ordered_extent->work); | ||
| 1975 | |||
| 1976 | return 0; | ||
| 1942 | } | 1977 | } |
| 1943 | 1978 | ||
| 1944 | /* | 1979 | /* |
| @@ -2072,12 +2107,12 @@ void btrfs_orphan_commit_root(struct btrfs_trans_handle *trans, | |||
| 2072 | struct btrfs_block_rsv *block_rsv; | 2107 | struct btrfs_block_rsv *block_rsv; |
| 2073 | int ret; | 2108 | int ret; |
| 2074 | 2109 | ||
| 2075 | if (!list_empty(&root->orphan_list) || | 2110 | if (atomic_read(&root->orphan_inodes) || |
| 2076 | root->orphan_cleanup_state != ORPHAN_CLEANUP_DONE) | 2111 | root->orphan_cleanup_state != ORPHAN_CLEANUP_DONE) |
| 2077 | return; | 2112 | return; |
| 2078 | 2113 | ||
| 2079 | spin_lock(&root->orphan_lock); | 2114 | spin_lock(&root->orphan_lock); |
| 2080 | if (!list_empty(&root->orphan_list)) { | 2115 | if (atomic_read(&root->orphan_inodes)) { |
| 2081 | spin_unlock(&root->orphan_lock); | 2116 | spin_unlock(&root->orphan_lock); |
| 2082 | return; | 2117 | return; |
| 2083 | } | 2118 | } |
| @@ -2134,8 +2169,8 @@ int btrfs_orphan_add(struct btrfs_trans_handle *trans, struct inode *inode) | |||
| 2134 | block_rsv = NULL; | 2169 | block_rsv = NULL; |
| 2135 | } | 2170 | } |
| 2136 | 2171 | ||
| 2137 | if (list_empty(&BTRFS_I(inode)->i_orphan)) { | 2172 | if (!test_and_set_bit(BTRFS_INODE_HAS_ORPHAN_ITEM, |
| 2138 | list_add(&BTRFS_I(inode)->i_orphan, &root->orphan_list); | 2173 | &BTRFS_I(inode)->runtime_flags)) { |
| 2139 | #if 0 | 2174 | #if 0 |
| 2140 | /* | 2175 | /* |
| 2141 | * For proper ENOSPC handling, we should do orphan | 2176 | * For proper ENOSPC handling, we should do orphan |
| @@ -2148,12 +2183,12 @@ int btrfs_orphan_add(struct btrfs_trans_handle *trans, struct inode *inode) | |||
| 2148 | insert = 1; | 2183 | insert = 1; |
| 2149 | #endif | 2184 | #endif |
| 2150 | insert = 1; | 2185 | insert = 1; |
| 2186 | atomic_dec(&root->orphan_inodes); | ||
| 2151 | } | 2187 | } |
| 2152 | 2188 | ||
| 2153 | if (!BTRFS_I(inode)->orphan_meta_reserved) { | 2189 | if (!test_and_set_bit(BTRFS_INODE_ORPHAN_META_RESERVED, |
| 2154 | BTRFS_I(inode)->orphan_meta_reserved = 1; | 2190 | &BTRFS_I(inode)->runtime_flags)) |
| 2155 | reserve = 1; | 2191 | reserve = 1; |
| 2156 | } | ||
| 2157 | spin_unlock(&root->orphan_lock); | 2192 | spin_unlock(&root->orphan_lock); |
| 2158 | 2193 | ||
| 2159 | /* grab metadata reservation from transaction handle */ | 2194 | /* grab metadata reservation from transaction handle */ |
| @@ -2166,6 +2201,8 @@ int btrfs_orphan_add(struct btrfs_trans_handle *trans, struct inode *inode) | |||
| 2166 | if (insert >= 1) { | 2201 | if (insert >= 1) { |
| 2167 | ret = btrfs_insert_orphan_item(trans, root, btrfs_ino(inode)); | 2202 | ret = btrfs_insert_orphan_item(trans, root, btrfs_ino(inode)); |
| 2168 | if (ret && ret != -EEXIST) { | 2203 | if (ret && ret != -EEXIST) { |
| 2204 | clear_bit(BTRFS_INODE_HAS_ORPHAN_ITEM, | ||
| 2205 | &BTRFS_I(inode)->runtime_flags); | ||
| 2169 | btrfs_abort_transaction(trans, root, ret); | 2206 | btrfs_abort_transaction(trans, root, ret); |
| 2170 | return ret; | 2207 | return ret; |
| 2171 | } | 2208 | } |
| @@ -2196,15 +2233,13 @@ int btrfs_orphan_del(struct btrfs_trans_handle *trans, struct inode *inode) | |||
| 2196 | int ret = 0; | 2233 | int ret = 0; |
| 2197 | 2234 | ||
| 2198 | spin_lock(&root->orphan_lock); | 2235 | spin_lock(&root->orphan_lock); |
| 2199 | if (!list_empty(&BTRFS_I(inode)->i_orphan)) { | 2236 | if (test_and_clear_bit(BTRFS_INODE_HAS_ORPHAN_ITEM, |
| 2200 | list_del_init(&BTRFS_I(inode)->i_orphan); | 2237 | &BTRFS_I(inode)->runtime_flags)) |
| 2201 | delete_item = 1; | 2238 | delete_item = 1; |
| 2202 | } | ||
| 2203 | 2239 | ||
| 2204 | if (BTRFS_I(inode)->orphan_meta_reserved) { | 2240 | if (test_and_clear_bit(BTRFS_INODE_ORPHAN_META_RESERVED, |
| 2205 | BTRFS_I(inode)->orphan_meta_reserved = 0; | 2241 | &BTRFS_I(inode)->runtime_flags)) |
| 2206 | release_rsv = 1; | 2242 | release_rsv = 1; |
| 2207 | } | ||
| 2208 | spin_unlock(&root->orphan_lock); | 2243 | spin_unlock(&root->orphan_lock); |
| 2209 | 2244 | ||
| 2210 | if (trans && delete_item) { | 2245 | if (trans && delete_item) { |
| @@ -2212,8 +2247,10 @@ int btrfs_orphan_del(struct btrfs_trans_handle *trans, struct inode *inode) | |||
| 2212 | BUG_ON(ret); /* -ENOMEM or corruption (JDM: Recheck) */ | 2247 | BUG_ON(ret); /* -ENOMEM or corruption (JDM: Recheck) */ |
| 2213 | } | 2248 | } |
| 2214 | 2249 | ||
| 2215 | if (release_rsv) | 2250 | if (release_rsv) { |
| 2216 | btrfs_orphan_release_metadata(inode); | 2251 | btrfs_orphan_release_metadata(inode); |
| 2252 | atomic_dec(&root->orphan_inodes); | ||
| 2253 | } | ||
| 2217 | 2254 | ||
| 2218 | return 0; | 2255 | return 0; |
| 2219 | } | 2256 | } |
| @@ -2341,6 +2378,8 @@ int btrfs_orphan_cleanup(struct btrfs_root *root) | |||
| 2341 | ret = PTR_ERR(trans); | 2378 | ret = PTR_ERR(trans); |
| 2342 | goto out; | 2379 | goto out; |
| 2343 | } | 2380 | } |
| 2381 | printk(KERN_ERR "auto deleting %Lu\n", | ||
| 2382 | found_key.objectid); | ||
| 2344 | ret = btrfs_del_orphan_item(trans, root, | 2383 | ret = btrfs_del_orphan_item(trans, root, |
| 2345 | found_key.objectid); | 2384 | found_key.objectid); |
| 2346 | BUG_ON(ret); /* -ENOMEM or corruption (JDM: Recheck) */ | 2385 | BUG_ON(ret); /* -ENOMEM or corruption (JDM: Recheck) */ |
| @@ -2352,9 +2391,8 @@ int btrfs_orphan_cleanup(struct btrfs_root *root) | |||
| 2352 | * add this inode to the orphan list so btrfs_orphan_del does | 2391 | * add this inode to the orphan list so btrfs_orphan_del does |
| 2353 | * the proper thing when we hit it | 2392 | * the proper thing when we hit it |
| 2354 | */ | 2393 | */ |
| 2355 | spin_lock(&root->orphan_lock); | 2394 | set_bit(BTRFS_INODE_HAS_ORPHAN_ITEM, |
| 2356 | list_add(&BTRFS_I(inode)->i_orphan, &root->orphan_list); | 2395 | &BTRFS_I(inode)->runtime_flags); |
| 2357 | spin_unlock(&root->orphan_lock); | ||
| 2358 | 2396 | ||
| 2359 | /* if we have links, this was a truncate, lets do that */ | 2397 | /* if we have links, this was a truncate, lets do that */ |
| 2360 | if (inode->i_nlink) { | 2398 | if (inode->i_nlink) { |
| @@ -2510,7 +2548,7 @@ static void btrfs_read_locked_inode(struct inode *inode) | |||
| 2510 | 2548 | ||
| 2511 | inode_set_bytes(inode, btrfs_inode_nbytes(leaf, inode_item)); | 2549 | inode_set_bytes(inode, btrfs_inode_nbytes(leaf, inode_item)); |
| 2512 | BTRFS_I(inode)->generation = btrfs_inode_generation(leaf, inode_item); | 2550 | BTRFS_I(inode)->generation = btrfs_inode_generation(leaf, inode_item); |
| 2513 | BTRFS_I(inode)->sequence = btrfs_inode_sequence(leaf, inode_item); | 2551 | inode->i_version = btrfs_inode_sequence(leaf, inode_item); |
| 2514 | inode->i_generation = BTRFS_I(inode)->generation; | 2552 | inode->i_generation = BTRFS_I(inode)->generation; |
| 2515 | inode->i_rdev = 0; | 2553 | inode->i_rdev = 0; |
| 2516 | rdev = btrfs_inode_rdev(leaf, inode_item); | 2554 | rdev = btrfs_inode_rdev(leaf, inode_item); |
| @@ -2594,7 +2632,7 @@ static void fill_inode_item(struct btrfs_trans_handle *trans, | |||
| 2594 | 2632 | ||
| 2595 | btrfs_set_inode_nbytes(leaf, item, inode_get_bytes(inode)); | 2633 | btrfs_set_inode_nbytes(leaf, item, inode_get_bytes(inode)); |
| 2596 | btrfs_set_inode_generation(leaf, item, BTRFS_I(inode)->generation); | 2634 | btrfs_set_inode_generation(leaf, item, BTRFS_I(inode)->generation); |
| 2597 | btrfs_set_inode_sequence(leaf, item, BTRFS_I(inode)->sequence); | 2635 | btrfs_set_inode_sequence(leaf, item, inode->i_version); |
| 2598 | btrfs_set_inode_transid(leaf, item, trans->transid); | 2636 | btrfs_set_inode_transid(leaf, item, trans->transid); |
| 2599 | btrfs_set_inode_rdev(leaf, item, inode->i_rdev); | 2637 | btrfs_set_inode_rdev(leaf, item, inode->i_rdev); |
| 2600 | btrfs_set_inode_flags(leaf, item, BTRFS_I(inode)->flags); | 2638 | btrfs_set_inode_flags(leaf, item, BTRFS_I(inode)->flags); |
| @@ -2752,6 +2790,8 @@ err: | |||
| 2752 | goto out; | 2790 | goto out; |
| 2753 | 2791 | ||
| 2754 | btrfs_i_size_write(dir, dir->i_size - name_len * 2); | 2792 | btrfs_i_size_write(dir, dir->i_size - name_len * 2); |
| 2793 | inode_inc_iversion(inode); | ||
| 2794 | inode_inc_iversion(dir); | ||
| 2755 | inode->i_ctime = dir->i_mtime = dir->i_ctime = CURRENT_TIME; | 2795 | inode->i_ctime = dir->i_mtime = dir->i_ctime = CURRENT_TIME; |
| 2756 | btrfs_update_inode(trans, root, dir); | 2796 | btrfs_update_inode(trans, root, dir); |
| 2757 | out: | 2797 | out: |
| @@ -3089,6 +3129,7 @@ int btrfs_unlink_subvol(struct btrfs_trans_handle *trans, | |||
| 3089 | } | 3129 | } |
| 3090 | 3130 | ||
| 3091 | btrfs_i_size_write(dir, dir->i_size - name_len * 2); | 3131 | btrfs_i_size_write(dir, dir->i_size - name_len * 2); |
| 3132 | inode_inc_iversion(dir); | ||
| 3092 | dir->i_mtime = dir->i_ctime = CURRENT_TIME; | 3133 | dir->i_mtime = dir->i_ctime = CURRENT_TIME; |
| 3093 | ret = btrfs_update_inode(trans, root, dir); | 3134 | ret = btrfs_update_inode(trans, root, dir); |
| 3094 | if (ret) | 3135 | if (ret) |
| @@ -3607,7 +3648,8 @@ static int btrfs_setsize(struct inode *inode, loff_t newsize) | |||
| 3607 | * any new writes get down to disk quickly. | 3648 | * any new writes get down to disk quickly. |
| 3608 | */ | 3649 | */ |
| 3609 | if (newsize == 0) | 3650 | if (newsize == 0) |
| 3610 | BTRFS_I(inode)->ordered_data_close = 1; | 3651 | set_bit(BTRFS_INODE_ORDERED_DATA_CLOSE, |
| 3652 | &BTRFS_I(inode)->runtime_flags); | ||
| 3611 | 3653 | ||
| 3612 | /* we don't support swapfiles, so vmtruncate shouldn't fail */ | 3654 | /* we don't support swapfiles, so vmtruncate shouldn't fail */ |
| 3613 | truncate_setsize(inode, newsize); | 3655 | truncate_setsize(inode, newsize); |
| @@ -3638,6 +3680,7 @@ static int btrfs_setattr(struct dentry *dentry, struct iattr *attr) | |||
| 3638 | 3680 | ||
| 3639 | if (attr->ia_valid) { | 3681 | if (attr->ia_valid) { |
| 3640 | setattr_copy(inode, attr); | 3682 | setattr_copy(inode, attr); |
| 3683 | inode_inc_iversion(inode); | ||
| 3641 | err = btrfs_dirty_inode(inode); | 3684 | err = btrfs_dirty_inode(inode); |
| 3642 | 3685 | ||
| 3643 | if (!err && attr->ia_valid & ATTR_MODE) | 3686 | if (!err && attr->ia_valid & ATTR_MODE) |
| @@ -3671,7 +3714,8 @@ void btrfs_evict_inode(struct inode *inode) | |||
| 3671 | btrfs_wait_ordered_range(inode, 0, (u64)-1); | 3714 | btrfs_wait_ordered_range(inode, 0, (u64)-1); |
| 3672 | 3715 | ||
| 3673 | if (root->fs_info->log_root_recovering) { | 3716 | if (root->fs_info->log_root_recovering) { |
| 3674 | BUG_ON(!list_empty(&BTRFS_I(inode)->i_orphan)); | 3717 | BUG_ON(!test_bit(BTRFS_INODE_HAS_ORPHAN_ITEM, |
| 3718 | &BTRFS_I(inode)->runtime_flags)); | ||
| 3675 | goto no_delete; | 3719 | goto no_delete; |
| 3676 | } | 3720 | } |
| 3677 | 3721 | ||
| @@ -3756,7 +3800,7 @@ void btrfs_evict_inode(struct inode *inode) | |||
| 3756 | btrfs_end_transaction(trans, root); | 3800 | btrfs_end_transaction(trans, root); |
| 3757 | btrfs_btree_balance_dirty(root, nr); | 3801 | btrfs_btree_balance_dirty(root, nr); |
| 3758 | no_delete: | 3802 | no_delete: |
| 3759 | end_writeback(inode); | 3803 | clear_inode(inode); |
| 3760 | return; | 3804 | return; |
| 3761 | } | 3805 | } |
| 3762 | 3806 | ||
| @@ -4066,7 +4110,7 @@ static struct inode *new_simple_dir(struct super_block *s, | |||
| 4066 | 4110 | ||
| 4067 | BTRFS_I(inode)->root = root; | 4111 | BTRFS_I(inode)->root = root; |
| 4068 | memcpy(&BTRFS_I(inode)->location, key, sizeof(*key)); | 4112 | memcpy(&BTRFS_I(inode)->location, key, sizeof(*key)); |
| 4069 | BTRFS_I(inode)->dummy_inode = 1; | 4113 | set_bit(BTRFS_INODE_DUMMY, &BTRFS_I(inode)->runtime_flags); |
| 4070 | 4114 | ||
| 4071 | inode->i_ino = BTRFS_EMPTY_SUBVOL_DIR_OBJECTID; | 4115 | inode->i_ino = BTRFS_EMPTY_SUBVOL_DIR_OBJECTID; |
| 4072 | inode->i_op = &btrfs_dir_ro_inode_operations; | 4116 | inode->i_op = &btrfs_dir_ro_inode_operations; |
| @@ -4370,7 +4414,7 @@ int btrfs_write_inode(struct inode *inode, struct writeback_control *wbc) | |||
| 4370 | int ret = 0; | 4414 | int ret = 0; |
| 4371 | bool nolock = false; | 4415 | bool nolock = false; |
| 4372 | 4416 | ||
| 4373 | if (BTRFS_I(inode)->dummy_inode) | 4417 | if (test_bit(BTRFS_INODE_DUMMY, &BTRFS_I(inode)->runtime_flags)) |
| 4374 | return 0; | 4418 | return 0; |
| 4375 | 4419 | ||
| 4376 | if (btrfs_fs_closing(root->fs_info) && btrfs_is_free_space_inode(root, inode)) | 4420 | if (btrfs_fs_closing(root->fs_info) && btrfs_is_free_space_inode(root, inode)) |
| @@ -4403,7 +4447,7 @@ int btrfs_dirty_inode(struct inode *inode) | |||
| 4403 | struct btrfs_trans_handle *trans; | 4447 | struct btrfs_trans_handle *trans; |
| 4404 | int ret; | 4448 | int ret; |
| 4405 | 4449 | ||
| 4406 | if (BTRFS_I(inode)->dummy_inode) | 4450 | if (test_bit(BTRFS_INODE_DUMMY, &BTRFS_I(inode)->runtime_flags)) |
| 4407 | return 0; | 4451 | return 0; |
| 4408 | 4452 | ||
| 4409 | trans = btrfs_join_transaction(root); | 4453 | trans = btrfs_join_transaction(root); |
| @@ -4431,46 +4475,18 @@ int btrfs_dirty_inode(struct inode *inode) | |||
| 4431 | * This is a copy of file_update_time. We need this so we can return error on | 4475 | * This is a copy of file_update_time. We need this so we can return error on |
| 4432 | * ENOSPC for updating the inode in the case of file write and mmap writes. | 4476 | * ENOSPC for updating the inode in the case of file write and mmap writes. |
| 4433 | */ | 4477 | */ |
| 4434 | int btrfs_update_time(struct file *file) | 4478 | static int btrfs_update_time(struct inode *inode, struct timespec *now, |
| 4479 | int flags) | ||
| 4435 | { | 4480 | { |
| 4436 | struct inode *inode = file->f_path.dentry->d_inode; | 4481 | if (flags & S_VERSION) |
| 4437 | struct timespec now; | ||
| 4438 | int ret; | ||
| 4439 | enum { S_MTIME = 1, S_CTIME = 2, S_VERSION = 4 } sync_it = 0; | ||
| 4440 | |||
| 4441 | /* First try to exhaust all avenues to not sync */ | ||
| 4442 | if (IS_NOCMTIME(inode)) | ||
| 4443 | return 0; | ||
| 4444 | |||
| 4445 | now = current_fs_time(inode->i_sb); | ||
| 4446 | if (!timespec_equal(&inode->i_mtime, &now)) | ||
| 4447 | sync_it = S_MTIME; | ||
| 4448 | |||
| 4449 | if (!timespec_equal(&inode->i_ctime, &now)) | ||
| 4450 | sync_it |= S_CTIME; | ||
| 4451 | |||
| 4452 | if (IS_I_VERSION(inode)) | ||
| 4453 | sync_it |= S_VERSION; | ||
| 4454 | |||
| 4455 | if (!sync_it) | ||
| 4456 | return 0; | ||
| 4457 | |||
| 4458 | /* Finally allowed to write? Takes lock. */ | ||
| 4459 | if (mnt_want_write_file(file)) | ||
| 4460 | return 0; | ||
| 4461 | |||
| 4462 | /* Only change inode inside the lock region */ | ||
| 4463 | if (sync_it & S_VERSION) | ||
| 4464 | inode_inc_iversion(inode); | 4482 | inode_inc_iversion(inode); |
| 4465 | if (sync_it & S_CTIME) | 4483 | if (flags & S_CTIME) |
| 4466 | inode->i_ctime = now; | 4484 | inode->i_ctime = *now; |
| 4467 | if (sync_it & S_MTIME) | 4485 | if (flags & S_MTIME) |
| 4468 | inode->i_mtime = now; | 4486 | inode->i_mtime = *now; |
| 4469 | ret = btrfs_dirty_inode(inode); | 4487 | if (flags & S_ATIME) |
| 4470 | if (!ret) | 4488 | inode->i_atime = *now; |
| 4471 | mark_inode_dirty_sync(inode); | 4489 | return btrfs_dirty_inode(inode); |
| 4472 | mnt_drop_write(file->f_path.mnt); | ||
| 4473 | return ret; | ||
| 4474 | } | 4490 | } |
| 4475 | 4491 | ||
| 4476 | /* | 4492 | /* |
| @@ -4730,6 +4746,7 @@ int btrfs_add_link(struct btrfs_trans_handle *trans, | |||
| 4730 | 4746 | ||
| 4731 | btrfs_i_size_write(parent_inode, parent_inode->i_size + | 4747 | btrfs_i_size_write(parent_inode, parent_inode->i_size + |
| 4732 | name_len * 2); | 4748 | name_len * 2); |
| 4749 | inode_inc_iversion(parent_inode); | ||
| 4733 | parent_inode->i_mtime = parent_inode->i_ctime = CURRENT_TIME; | 4750 | parent_inode->i_mtime = parent_inode->i_ctime = CURRENT_TIME; |
| 4734 | ret = btrfs_update_inode(trans, root, parent_inode); | 4751 | ret = btrfs_update_inode(trans, root, parent_inode); |
| 4735 | if (ret) | 4752 | if (ret) |
| @@ -4937,6 +4954,7 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir, | |||
| 4937 | } | 4954 | } |
| 4938 | 4955 | ||
| 4939 | btrfs_inc_nlink(inode); | 4956 | btrfs_inc_nlink(inode); |
| 4957 | inode_inc_iversion(inode); | ||
| 4940 | inode->i_ctime = CURRENT_TIME; | 4958 | inode->i_ctime = CURRENT_TIME; |
| 4941 | ihold(inode); | 4959 | ihold(inode); |
| 4942 | 4960 | ||
| @@ -5903,9 +5921,7 @@ static void btrfs_endio_direct_write(struct bio *bio, int err) | |||
| 5903 | struct btrfs_dio_private *dip = bio->bi_private; | 5921 | struct btrfs_dio_private *dip = bio->bi_private; |
| 5904 | struct inode *inode = dip->inode; | 5922 | struct inode *inode = dip->inode; |
| 5905 | struct btrfs_root *root = BTRFS_I(inode)->root; | 5923 | struct btrfs_root *root = BTRFS_I(inode)->root; |
| 5906 | struct btrfs_trans_handle *trans; | ||
| 5907 | struct btrfs_ordered_extent *ordered = NULL; | 5924 | struct btrfs_ordered_extent *ordered = NULL; |
| 5908 | struct extent_state *cached_state = NULL; | ||
| 5909 | u64 ordered_offset = dip->logical_offset; | 5925 | u64 ordered_offset = dip->logical_offset; |
| 5910 | u64 ordered_bytes = dip->bytes; | 5926 | u64 ordered_bytes = dip->bytes; |
| 5911 | int ret; | 5927 | int ret; |
| @@ -5915,73 +5931,14 @@ static void btrfs_endio_direct_write(struct bio *bio, int err) | |||
| 5915 | again: | 5931 | again: |
| 5916 | ret = btrfs_dec_test_first_ordered_pending(inode, &ordered, | 5932 | ret = btrfs_dec_test_first_ordered_pending(inode, &ordered, |
| 5917 | &ordered_offset, | 5933 | &ordered_offset, |
| 5918 | ordered_bytes); | 5934 | ordered_bytes, !err); |
| 5919 | if (!ret) | 5935 | if (!ret) |
| 5920 | goto out_test; | 5936 | goto out_test; |
| 5921 | 5937 | ||
| 5922 | BUG_ON(!ordered); | 5938 | ordered->work.func = finish_ordered_fn; |
| 5923 | 5939 | ordered->work.flags = 0; | |
| 5924 | trans = btrfs_join_transaction(root); | 5940 | btrfs_queue_worker(&root->fs_info->endio_write_workers, |
| 5925 | if (IS_ERR(trans)) { | 5941 | &ordered->work); |
| 5926 | err = -ENOMEM; | ||
| 5927 | goto out; | ||
| 5928 | } | ||
| 5929 | trans->block_rsv = &root->fs_info->delalloc_block_rsv; | ||
| 5930 | |||
| 5931 | if (test_bit(BTRFS_ORDERED_NOCOW, &ordered->flags)) { | ||
| 5932 | ret = btrfs_ordered_update_i_size(inode, 0, ordered); | ||
| 5933 | if (!ret) | ||
| 5934 | err = btrfs_update_inode_fallback(trans, root, inode); | ||
| 5935 | goto out; | ||
| 5936 | } | ||
| 5937 | |||
| 5938 | lock_extent_bits(&BTRFS_I(inode)->io_tree, ordered->file_offset, | ||
| 5939 | ordered->file_offset + ordered->len - 1, 0, | ||
| 5940 | &cached_state); | ||
| 5941 | |||
| 5942 | if (test_bit(BTRFS_ORDERED_PREALLOC, &ordered->flags)) { | ||
| 5943 | ret = btrfs_mark_extent_written(trans, inode, | ||
| 5944 | ordered->file_offset, | ||
| 5945 | ordered->file_offset + | ||
| 5946 | ordered->len); | ||
| 5947 | if (ret) { | ||
| 5948 | err = ret; | ||
| 5949 | goto out_unlock; | ||
| 5950 | } | ||
| 5951 | } else { | ||
| 5952 | ret = insert_reserved_file_extent(trans, inode, | ||
| 5953 | ordered->file_offset, | ||
| 5954 | ordered->start, | ||
| 5955 | ordered->disk_len, | ||
| 5956 | ordered->len, | ||
| 5957 | ordered->len, | ||
| 5958 | 0, 0, 0, | ||
| 5959 | BTRFS_FILE_EXTENT_REG); | ||
| 5960 | unpin_extent_cache(&BTRFS_I(inode)->extent_tree, | ||
| 5961 | ordered->file_offset, ordered->len); | ||
| 5962 | if (ret) { | ||
| 5963 | err = ret; | ||
| 5964 | WARN_ON(1); | ||
| 5965 | goto out_unlock; | ||
| 5966 | } | ||
| 5967 | } | ||
| 5968 | |||
| 5969 | add_pending_csums(trans, inode, ordered->file_offset, &ordered->list); | ||
| 5970 | ret = btrfs_ordered_update_i_size(inode, 0, ordered); | ||
| 5971 | if (!ret || !test_bit(BTRFS_ORDERED_PREALLOC, &ordered->flags)) | ||
| 5972 | btrfs_update_inode_fallback(trans, root, inode); | ||
| 5973 | ret = 0; | ||
| 5974 | out_unlock: | ||
| 5975 | unlock_extent_cached(&BTRFS_I(inode)->io_tree, ordered->file_offset, | ||
| 5976 | ordered->file_offset + ordered->len - 1, | ||
| 5977 | &cached_state, GFP_NOFS); | ||
| 5978 | out: | ||
| 5979 | btrfs_delalloc_release_metadata(inode, ordered->len); | ||
| 5980 | btrfs_end_transaction(trans, root); | ||
| 5981 | ordered_offset = ordered->file_offset + ordered->len; | ||
| 5982 | btrfs_put_ordered_extent(ordered); | ||
| 5983 | btrfs_put_ordered_extent(ordered); | ||
| 5984 | |||
| 5985 | out_test: | 5942 | out_test: |
| 5986 | /* | 5943 | /* |
| 5987 | * our bio might span multiple ordered extents. If we haven't | 5944 | * our bio might span multiple ordered extents. If we haven't |
| @@ -5990,12 +5947,12 @@ out_test: | |||
| 5990 | if (ordered_offset < dip->logical_offset + dip->bytes) { | 5947 | if (ordered_offset < dip->logical_offset + dip->bytes) { |
| 5991 | ordered_bytes = dip->logical_offset + dip->bytes - | 5948 | ordered_bytes = dip->logical_offset + dip->bytes - |
| 5992 | ordered_offset; | 5949 | ordered_offset; |
| 5950 | ordered = NULL; | ||
| 5993 | goto again; | 5951 | goto again; |
| 5994 | } | 5952 | } |
| 5995 | out_done: | 5953 | out_done: |
| 5996 | bio->bi_private = dip->private; | 5954 | bio->bi_private = dip->private; |
| 5997 | 5955 | ||
| 5998 | kfree(dip->csums); | ||
| 5999 | kfree(dip); | 5956 | kfree(dip); |
| 6000 | 5957 | ||
| 6001 | /* If we had an error make sure to clear the uptodate flag */ | 5958 | /* If we had an error make sure to clear the uptodate flag */ |
| @@ -6063,9 +6020,12 @@ static inline int __btrfs_submit_dio_bio(struct bio *bio, struct inode *inode, | |||
| 6063 | int ret; | 6020 | int ret; |
| 6064 | 6021 | ||
| 6065 | bio_get(bio); | 6022 | bio_get(bio); |
| 6066 | ret = btrfs_bio_wq_end_io(root->fs_info, bio, 0); | 6023 | |
| 6067 | if (ret) | 6024 | if (!write) { |
| 6068 | goto err; | 6025 | ret = btrfs_bio_wq_end_io(root->fs_info, bio, 0); |
| 6026 | if (ret) | ||
| 6027 | goto err; | ||
| 6028 | } | ||
| 6069 | 6029 | ||
| 6070 | if (skip_sum) | 6030 | if (skip_sum) |
| 6071 | goto map; | 6031 | goto map; |
| @@ -6485,13 +6445,13 @@ static int btrfs_releasepage(struct page *page, gfp_t gfp_flags) | |||
| 6485 | 6445 | ||
| 6486 | static void btrfs_invalidatepage(struct page *page, unsigned long offset) | 6446 | static void btrfs_invalidatepage(struct page *page, unsigned long offset) |
| 6487 | { | 6447 | { |
| 6448 | struct inode *inode = page->mapping->host; | ||
| 6488 | struct extent_io_tree *tree; | 6449 | struct extent_io_tree *tree; |
| 6489 | struct btrfs_ordered_extent *ordered; | 6450 | struct btrfs_ordered_extent *ordered; |
| 6490 | struct extent_state *cached_state = NULL; | 6451 | struct extent_state *cached_state = NULL; |
| 6491 | u64 page_start = page_offset(page); | 6452 | u64 page_start = page_offset(page); |
| 6492 | u64 page_end = page_start + PAGE_CACHE_SIZE - 1; | 6453 | u64 page_end = page_start + PAGE_CACHE_SIZE - 1; |
| 6493 | 6454 | ||
| 6494 | |||
| 6495 | /* | 6455 | /* |
| 6496 | * we have the page locked, so new writeback can't start, | 6456 | * we have the page locked, so new writeback can't start, |
| 6497 | * and the dirty bit won't be cleared while we are here. | 6457 | * and the dirty bit won't be cleared while we are here. |
| @@ -6501,13 +6461,13 @@ static void btrfs_invalidatepage(struct page *page, unsigned long offset) | |||
| 6501 | */ | 6461 | */ |
| 6502 | wait_on_page_writeback(page); | 6462 | wait_on_page_writeback(page); |
| 6503 | 6463 | ||
| 6504 | tree = &BTRFS_I(page->mapping->host)->io_tree; | 6464 | tree = &BTRFS_I(inode)->io_tree; |
| 6505 | if (offset) { | 6465 | if (offset) { |
| 6506 | btrfs_releasepage(page, GFP_NOFS); | 6466 | btrfs_releasepage(page, GFP_NOFS); |
| 6507 | return; | 6467 | return; |
| 6508 | } | 6468 | } |
| 6509 | lock_extent_bits(tree, page_start, page_end, 0, &cached_state); | 6469 | lock_extent_bits(tree, page_start, page_end, 0, &cached_state); |
| 6510 | ordered = btrfs_lookup_ordered_extent(page->mapping->host, | 6470 | ordered = btrfs_lookup_ordered_extent(inode, |
| 6511 | page_offset(page)); | 6471 | page_offset(page)); |
| 6512 | if (ordered) { | 6472 | if (ordered) { |
| 6513 | /* | 6473 | /* |
| @@ -6522,9 +6482,10 @@ static void btrfs_invalidatepage(struct page *page, unsigned long offset) | |||
| 6522 | * whoever cleared the private bit is responsible | 6482 | * whoever cleared the private bit is responsible |
| 6523 | * for the finish_ordered_io | 6483 | * for the finish_ordered_io |
| 6524 | */ | 6484 | */ |
| 6525 | if (TestClearPagePrivate2(page)) { | 6485 | if (TestClearPagePrivate2(page) && |
| 6526 | btrfs_finish_ordered_io(page->mapping->host, | 6486 | btrfs_dec_test_ordered_pending(inode, &ordered, page_start, |
| 6527 | page_start, page_end); | 6487 | PAGE_CACHE_SIZE, 1)) { |
| 6488 | btrfs_finish_ordered_io(ordered); | ||
| 6528 | } | 6489 | } |
| 6529 | btrfs_put_ordered_extent(ordered); | 6490 | btrfs_put_ordered_extent(ordered); |
| 6530 | cached_state = NULL; | 6491 | cached_state = NULL; |
| @@ -6576,7 +6537,7 @@ int btrfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) | |||
| 6576 | 6537 | ||
| 6577 | ret = btrfs_delalloc_reserve_space(inode, PAGE_CACHE_SIZE); | 6538 | ret = btrfs_delalloc_reserve_space(inode, PAGE_CACHE_SIZE); |
| 6578 | if (!ret) { | 6539 | if (!ret) { |
| 6579 | ret = btrfs_update_time(vma->vm_file); | 6540 | ret = file_update_time(vma->vm_file); |
| 6580 | reserved = 1; | 6541 | reserved = 1; |
| 6581 | } | 6542 | } |
| 6582 | if (ret) { | 6543 | if (ret) { |
| @@ -6771,7 +6732,8 @@ static int btrfs_truncate(struct inode *inode) | |||
| 6771 | * using truncate to replace the contents of the file will | 6732 | * using truncate to replace the contents of the file will |
| 6772 | * end up with a zero length file after a crash. | 6733 | * end up with a zero length file after a crash. |
| 6773 | */ | 6734 | */ |
| 6774 | if (inode->i_size == 0 && BTRFS_I(inode)->ordered_data_close) | 6735 | if (inode->i_size == 0 && test_bit(BTRFS_INODE_ORDERED_DATA_CLOSE, |
| 6736 | &BTRFS_I(inode)->runtime_flags)) | ||
| 6775 | btrfs_add_ordered_operation(trans, root, inode); | 6737 | btrfs_add_ordered_operation(trans, root, inode); |
| 6776 | 6738 | ||
| 6777 | while (1) { | 6739 | while (1) { |
| @@ -6894,7 +6856,6 @@ struct inode *btrfs_alloc_inode(struct super_block *sb) | |||
| 6894 | ei->root = NULL; | 6856 | ei->root = NULL; |
| 6895 | ei->space_info = NULL; | 6857 | ei->space_info = NULL; |
| 6896 | ei->generation = 0; | 6858 | ei->generation = 0; |
| 6897 | ei->sequence = 0; | ||
| 6898 | ei->last_trans = 0; | 6859 | ei->last_trans = 0; |
| 6899 | ei->last_sub_trans = 0; | 6860 | ei->last_sub_trans = 0; |
| 6900 | ei->logged_trans = 0; | 6861 | ei->logged_trans = 0; |
| @@ -6909,11 +6870,7 @@ struct inode *btrfs_alloc_inode(struct super_block *sb) | |||
| 6909 | ei->outstanding_extents = 0; | 6870 | ei->outstanding_extents = 0; |
| 6910 | ei->reserved_extents = 0; | 6871 | ei->reserved_extents = 0; |
| 6911 | 6872 | ||
| 6912 | ei->ordered_data_close = 0; | 6873 | ei->runtime_flags = 0; |
| 6913 | ei->orphan_meta_reserved = 0; | ||
| 6914 | ei->dummy_inode = 0; | ||
| 6915 | ei->in_defrag = 0; | ||
| 6916 | ei->delalloc_meta_reserved = 0; | ||
| 6917 | ei->force_compress = BTRFS_COMPRESS_NONE; | 6874 | ei->force_compress = BTRFS_COMPRESS_NONE; |
| 6918 | 6875 | ||
| 6919 | ei->delayed_node = NULL; | 6876 | ei->delayed_node = NULL; |
| @@ -6927,7 +6884,6 @@ struct inode *btrfs_alloc_inode(struct super_block *sb) | |||
| 6927 | mutex_init(&ei->log_mutex); | 6884 | mutex_init(&ei->log_mutex); |
| 6928 | mutex_init(&ei->delalloc_mutex); | 6885 | mutex_init(&ei->delalloc_mutex); |
| 6929 | btrfs_ordered_inode_tree_init(&ei->ordered_tree); | 6886 | btrfs_ordered_inode_tree_init(&ei->ordered_tree); |
| 6930 | INIT_LIST_HEAD(&ei->i_orphan); | ||
| 6931 | INIT_LIST_HEAD(&ei->delalloc_inodes); | 6887 | INIT_LIST_HEAD(&ei->delalloc_inodes); |
| 6932 | INIT_LIST_HEAD(&ei->ordered_operations); | 6888 | INIT_LIST_HEAD(&ei->ordered_operations); |
| 6933 | RB_CLEAR_NODE(&ei->rb_node); | 6889 | RB_CLEAR_NODE(&ei->rb_node); |
| @@ -6972,13 +6928,12 @@ void btrfs_destroy_inode(struct inode *inode) | |||
| 6972 | spin_unlock(&root->fs_info->ordered_extent_lock); | 6928 | spin_unlock(&root->fs_info->ordered_extent_lock); |
| 6973 | } | 6929 | } |
| 6974 | 6930 | ||
| 6975 | spin_lock(&root->orphan_lock); | 6931 | if (test_bit(BTRFS_INODE_HAS_ORPHAN_ITEM, |
| 6976 | if (!list_empty(&BTRFS_I(inode)->i_orphan)) { | 6932 | &BTRFS_I(inode)->runtime_flags)) { |
| 6977 | printk(KERN_INFO "BTRFS: inode %llu still on the orphan list\n", | 6933 | printk(KERN_INFO "BTRFS: inode %llu still on the orphan list\n", |
| 6978 | (unsigned long long)btrfs_ino(inode)); | 6934 | (unsigned long long)btrfs_ino(inode)); |
| 6979 | list_del_init(&BTRFS_I(inode)->i_orphan); | 6935 | atomic_dec(&root->orphan_inodes); |
| 6980 | } | 6936 | } |
| 6981 | spin_unlock(&root->orphan_lock); | ||
| 6982 | 6937 | ||
| 6983 | while (1) { | 6938 | while (1) { |
| 6984 | ordered = btrfs_lookup_first_ordered_extent(inode, (u64)-1); | 6939 | ordered = btrfs_lookup_first_ordered_extent(inode, (u64)-1); |
| @@ -7193,6 +7148,9 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
| 7193 | if (new_inode && new_inode->i_size && S_ISREG(old_inode->i_mode)) | 7148 | if (new_inode && new_inode->i_size && S_ISREG(old_inode->i_mode)) |
| 7194 | btrfs_add_ordered_operation(trans, root, old_inode); | 7149 | btrfs_add_ordered_operation(trans, root, old_inode); |
| 7195 | 7150 | ||
| 7151 | inode_inc_iversion(old_dir); | ||
| 7152 | inode_inc_iversion(new_dir); | ||
| 7153 | inode_inc_iversion(old_inode); | ||
| 7196 | old_dir->i_ctime = old_dir->i_mtime = ctime; | 7154 | old_dir->i_ctime = old_dir->i_mtime = ctime; |
| 7197 | new_dir->i_ctime = new_dir->i_mtime = ctime; | 7155 | new_dir->i_ctime = new_dir->i_mtime = ctime; |
| 7198 | old_inode->i_ctime = ctime; | 7156 | old_inode->i_ctime = ctime; |
| @@ -7219,6 +7177,7 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
| 7219 | } | 7177 | } |
| 7220 | 7178 | ||
| 7221 | if (new_inode) { | 7179 | if (new_inode) { |
| 7180 | inode_inc_iversion(new_inode); | ||
| 7222 | new_inode->i_ctime = CURRENT_TIME; | 7181 | new_inode->i_ctime = CURRENT_TIME; |
| 7223 | if (unlikely(btrfs_ino(new_inode) == | 7182 | if (unlikely(btrfs_ino(new_inode) == |
| 7224 | BTRFS_EMPTY_SUBVOL_DIR_OBJECTID)) { | 7183 | BTRFS_EMPTY_SUBVOL_DIR_OBJECTID)) { |
| @@ -7490,6 +7449,7 @@ static int __btrfs_prealloc_file_range(struct inode *inode, int mode, | |||
| 7490 | cur_offset += ins.offset; | 7449 | cur_offset += ins.offset; |
| 7491 | *alloc_hint = ins.objectid + ins.offset; | 7450 | *alloc_hint = ins.objectid + ins.offset; |
| 7492 | 7451 | ||
| 7452 | inode_inc_iversion(inode); | ||
| 7493 | inode->i_ctime = CURRENT_TIME; | 7453 | inode->i_ctime = CURRENT_TIME; |
| 7494 | BTRFS_I(inode)->flags |= BTRFS_INODE_PREALLOC; | 7454 | BTRFS_I(inode)->flags |= BTRFS_INODE_PREALLOC; |
| 7495 | if (!(mode & FALLOC_FL_KEEP_SIZE) && | 7455 | if (!(mode & FALLOC_FL_KEEP_SIZE) && |
| @@ -7647,6 +7607,7 @@ static const struct inode_operations btrfs_file_inode_operations = { | |||
| 7647 | .permission = btrfs_permission, | 7607 | .permission = btrfs_permission, |
| 7648 | .fiemap = btrfs_fiemap, | 7608 | .fiemap = btrfs_fiemap, |
| 7649 | .get_acl = btrfs_get_acl, | 7609 | .get_acl = btrfs_get_acl, |
| 7610 | .update_time = btrfs_update_time, | ||
| 7650 | }; | 7611 | }; |
| 7651 | static const struct inode_operations btrfs_special_inode_operations = { | 7612 | static const struct inode_operations btrfs_special_inode_operations = { |
| 7652 | .getattr = btrfs_getattr, | 7613 | .getattr = btrfs_getattr, |
| @@ -7657,6 +7618,7 @@ static const struct inode_operations btrfs_special_inode_operations = { | |||
| 7657 | .listxattr = btrfs_listxattr, | 7618 | .listxattr = btrfs_listxattr, |
| 7658 | .removexattr = btrfs_removexattr, | 7619 | .removexattr = btrfs_removexattr, |
| 7659 | .get_acl = btrfs_get_acl, | 7620 | .get_acl = btrfs_get_acl, |
| 7621 | .update_time = btrfs_update_time, | ||
| 7660 | }; | 7622 | }; |
| 7661 | static const struct inode_operations btrfs_symlink_inode_operations = { | 7623 | static const struct inode_operations btrfs_symlink_inode_operations = { |
| 7662 | .readlink = generic_readlink, | 7624 | .readlink = generic_readlink, |
| @@ -7670,6 +7632,7 @@ static const struct inode_operations btrfs_symlink_inode_operations = { | |||
| 7670 | .listxattr = btrfs_listxattr, | 7632 | .listxattr = btrfs_listxattr, |
| 7671 | .removexattr = btrfs_removexattr, | 7633 | .removexattr = btrfs_removexattr, |
| 7672 | .get_acl = btrfs_get_acl, | 7634 | .get_acl = btrfs_get_acl, |
| 7635 | .update_time = btrfs_update_time, | ||
| 7673 | }; | 7636 | }; |
| 7674 | 7637 | ||
| 7675 | const struct dentry_operations btrfs_dentry_operations = { | 7638 | const struct dentry_operations btrfs_dentry_operations = { |
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 14f8e1faa46e..24b776c08d99 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c | |||
| @@ -261,6 +261,7 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg) | |||
| 261 | } | 261 | } |
| 262 | 262 | ||
| 263 | btrfs_update_iflags(inode); | 263 | btrfs_update_iflags(inode); |
| 264 | inode_inc_iversion(inode); | ||
| 264 | inode->i_ctime = CURRENT_TIME; | 265 | inode->i_ctime = CURRENT_TIME; |
| 265 | ret = btrfs_update_inode(trans, root, inode); | 266 | ret = btrfs_update_inode(trans, root, inode); |
| 266 | 267 | ||
| @@ -367,7 +368,7 @@ static noinline int create_subvol(struct btrfs_root *root, | |||
| 367 | return PTR_ERR(trans); | 368 | return PTR_ERR(trans); |
| 368 | 369 | ||
| 369 | leaf = btrfs_alloc_free_block(trans, root, root->leafsize, | 370 | leaf = btrfs_alloc_free_block(trans, root, root->leafsize, |
| 370 | 0, objectid, NULL, 0, 0, 0, 0); | 371 | 0, objectid, NULL, 0, 0, 0); |
| 371 | if (IS_ERR(leaf)) { | 372 | if (IS_ERR(leaf)) { |
| 372 | ret = PTR_ERR(leaf); | 373 | ret = PTR_ERR(leaf); |
| 373 | goto fail; | 374 | goto fail; |
| @@ -2262,10 +2263,12 @@ static long btrfs_ioctl_dev_info(struct btrfs_root *root, void __user *arg) | |||
| 2262 | di_args->bytes_used = dev->bytes_used; | 2263 | di_args->bytes_used = dev->bytes_used; |
| 2263 | di_args->total_bytes = dev->total_bytes; | 2264 | di_args->total_bytes = dev->total_bytes; |
| 2264 | memcpy(di_args->uuid, dev->uuid, sizeof(di_args->uuid)); | 2265 | memcpy(di_args->uuid, dev->uuid, sizeof(di_args->uuid)); |
| 2265 | if (dev->name) | 2266 | if (dev->name) { |
| 2266 | strncpy(di_args->path, dev->name, sizeof(di_args->path)); | 2267 | strncpy(di_args->path, dev->name, sizeof(di_args->path)); |
| 2267 | else | 2268 | di_args->path[sizeof(di_args->path) - 1] = 0; |
| 2269 | } else { | ||
| 2268 | di_args->path[0] = '\0'; | 2270 | di_args->path[0] = '\0'; |
| 2271 | } | ||
| 2269 | 2272 | ||
| 2270 | out: | 2273 | out: |
| 2271 | if (ret == 0 && copy_to_user(arg, di_args, sizeof(*di_args))) | 2274 | if (ret == 0 && copy_to_user(arg, di_args, sizeof(*di_args))) |
| @@ -2622,6 +2625,7 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, | |||
| 2622 | btrfs_mark_buffer_dirty(leaf); | 2625 | btrfs_mark_buffer_dirty(leaf); |
| 2623 | btrfs_release_path(path); | 2626 | btrfs_release_path(path); |
| 2624 | 2627 | ||
| 2628 | inode_inc_iversion(inode); | ||
| 2625 | inode->i_mtime = inode->i_ctime = CURRENT_TIME; | 2629 | inode->i_mtime = inode->i_ctime = CURRENT_TIME; |
| 2626 | 2630 | ||
| 2627 | /* | 2631 | /* |
| @@ -2914,7 +2918,7 @@ long btrfs_ioctl_space_info(struct btrfs_root *root, void __user *arg) | |||
| 2914 | up_read(&info->groups_sem); | 2918 | up_read(&info->groups_sem); |
| 2915 | } | 2919 | } |
| 2916 | 2920 | ||
| 2917 | user_dest = (struct btrfs_ioctl_space_info *) | 2921 | user_dest = (struct btrfs_ioctl_space_info __user *) |
| 2918 | (arg + sizeof(struct btrfs_ioctl_space_args)); | 2922 | (arg + sizeof(struct btrfs_ioctl_space_args)); |
| 2919 | 2923 | ||
| 2920 | if (copy_to_user(user_dest, dest_orig, alloc_size)) | 2924 | if (copy_to_user(user_dest, dest_orig, alloc_size)) |
| @@ -3042,6 +3046,28 @@ static long btrfs_ioctl_scrub_progress(struct btrfs_root *root, | |||
| 3042 | return ret; | 3046 | return ret; |
| 3043 | } | 3047 | } |
| 3044 | 3048 | ||
| 3049 | static long btrfs_ioctl_get_dev_stats(struct btrfs_root *root, | ||
| 3050 | void __user *arg, int reset_after_read) | ||
| 3051 | { | ||
| 3052 | struct btrfs_ioctl_get_dev_stats *sa; | ||
| 3053 | int ret; | ||
| 3054 | |||
| 3055 | if (reset_after_read && !capable(CAP_SYS_ADMIN)) | ||
| 3056 | return -EPERM; | ||
| 3057 | |||
| 3058 | sa = memdup_user(arg, sizeof(*sa)); | ||
| 3059 | if (IS_ERR(sa)) | ||
| 3060 | return PTR_ERR(sa); | ||
| 3061 | |||
| 3062 | ret = btrfs_get_dev_stats(root, sa, reset_after_read); | ||
| 3063 | |||
| 3064 | if (copy_to_user(arg, sa, sizeof(*sa))) | ||
| 3065 | ret = -EFAULT; | ||
| 3066 | |||
| 3067 | kfree(sa); | ||
| 3068 | return ret; | ||
| 3069 | } | ||
| 3070 | |||
| 3045 | static long btrfs_ioctl_ino_to_path(struct btrfs_root *root, void __user *arg) | 3071 | static long btrfs_ioctl_ino_to_path(struct btrfs_root *root, void __user *arg) |
| 3046 | { | 3072 | { |
| 3047 | int ret = 0; | 3073 | int ret = 0; |
| @@ -3212,8 +3238,9 @@ void update_ioctl_balance_args(struct btrfs_fs_info *fs_info, int lock, | |||
| 3212 | } | 3238 | } |
| 3213 | } | 3239 | } |
| 3214 | 3240 | ||
| 3215 | static long btrfs_ioctl_balance(struct btrfs_root *root, void __user *arg) | 3241 | static long btrfs_ioctl_balance(struct file *file, void __user *arg) |
| 3216 | { | 3242 | { |
| 3243 | struct btrfs_root *root = BTRFS_I(fdentry(file)->d_inode)->root; | ||
| 3217 | struct btrfs_fs_info *fs_info = root->fs_info; | 3244 | struct btrfs_fs_info *fs_info = root->fs_info; |
| 3218 | struct btrfs_ioctl_balance_args *bargs; | 3245 | struct btrfs_ioctl_balance_args *bargs; |
| 3219 | struct btrfs_balance_control *bctl; | 3246 | struct btrfs_balance_control *bctl; |
| @@ -3225,6 +3252,10 @@ static long btrfs_ioctl_balance(struct btrfs_root *root, void __user *arg) | |||
| 3225 | if (fs_info->sb->s_flags & MS_RDONLY) | 3252 | if (fs_info->sb->s_flags & MS_RDONLY) |
| 3226 | return -EROFS; | 3253 | return -EROFS; |
| 3227 | 3254 | ||
| 3255 | ret = mnt_want_write(file->f_path.mnt); | ||
| 3256 | if (ret) | ||
| 3257 | return ret; | ||
| 3258 | |||
| 3228 | mutex_lock(&fs_info->volume_mutex); | 3259 | mutex_lock(&fs_info->volume_mutex); |
| 3229 | mutex_lock(&fs_info->balance_mutex); | 3260 | mutex_lock(&fs_info->balance_mutex); |
| 3230 | 3261 | ||
| @@ -3291,6 +3322,7 @@ out_bargs: | |||
| 3291 | out: | 3322 | out: |
| 3292 | mutex_unlock(&fs_info->balance_mutex); | 3323 | mutex_unlock(&fs_info->balance_mutex); |
| 3293 | mutex_unlock(&fs_info->volume_mutex); | 3324 | mutex_unlock(&fs_info->volume_mutex); |
| 3325 | mnt_drop_write(file->f_path.mnt); | ||
| 3294 | return ret; | 3326 | return ret; |
| 3295 | } | 3327 | } |
| 3296 | 3328 | ||
| @@ -3386,7 +3418,7 @@ long btrfs_ioctl(struct file *file, unsigned int | |||
| 3386 | case BTRFS_IOC_DEV_INFO: | 3418 | case BTRFS_IOC_DEV_INFO: |
| 3387 | return btrfs_ioctl_dev_info(root, argp); | 3419 | return btrfs_ioctl_dev_info(root, argp); |
| 3388 | case BTRFS_IOC_BALANCE: | 3420 | case BTRFS_IOC_BALANCE: |
| 3389 | return btrfs_ioctl_balance(root, NULL); | 3421 | return btrfs_ioctl_balance(file, NULL); |
| 3390 | case BTRFS_IOC_CLONE: | 3422 | case BTRFS_IOC_CLONE: |
| 3391 | return btrfs_ioctl_clone(file, arg, 0, 0, 0); | 3423 | return btrfs_ioctl_clone(file, arg, 0, 0, 0); |
| 3392 | case BTRFS_IOC_CLONE_RANGE: | 3424 | case BTRFS_IOC_CLONE_RANGE: |
| @@ -3419,11 +3451,15 @@ long btrfs_ioctl(struct file *file, unsigned int | |||
| 3419 | case BTRFS_IOC_SCRUB_PROGRESS: | 3451 | case BTRFS_IOC_SCRUB_PROGRESS: |
| 3420 | return btrfs_ioctl_scrub_progress(root, argp); | 3452 | return btrfs_ioctl_scrub_progress(root, argp); |
| 3421 | case BTRFS_IOC_BALANCE_V2: | 3453 | case BTRFS_IOC_BALANCE_V2: |
| 3422 | return btrfs_ioctl_balance(root, argp); | 3454 | return btrfs_ioctl_balance(file, argp); |
| 3423 | case BTRFS_IOC_BALANCE_CTL: | 3455 | case BTRFS_IOC_BALANCE_CTL: |
| 3424 | return btrfs_ioctl_balance_ctl(root, arg); | 3456 | return btrfs_ioctl_balance_ctl(root, arg); |
| 3425 | case BTRFS_IOC_BALANCE_PROGRESS: | 3457 | case BTRFS_IOC_BALANCE_PROGRESS: |
| 3426 | return btrfs_ioctl_balance_progress(root, argp); | 3458 | return btrfs_ioctl_balance_progress(root, argp); |
| 3459 | case BTRFS_IOC_GET_DEV_STATS: | ||
| 3460 | return btrfs_ioctl_get_dev_stats(root, argp, 0); | ||
| 3461 | case BTRFS_IOC_GET_AND_RESET_DEV_STATS: | ||
| 3462 | return btrfs_ioctl_get_dev_stats(root, argp, 1); | ||
| 3427 | } | 3463 | } |
| 3428 | 3464 | ||
| 3429 | return -ENOTTY; | 3465 | return -ENOTTY; |
diff --git a/fs/btrfs/ioctl.h b/fs/btrfs/ioctl.h index 086e6bdae1c4..497c530724cf 100644 --- a/fs/btrfs/ioctl.h +++ b/fs/btrfs/ioctl.h | |||
| @@ -266,6 +266,35 @@ struct btrfs_ioctl_logical_ino_args { | |||
| 266 | __u64 inodes; | 266 | __u64 inodes; |
| 267 | }; | 267 | }; |
| 268 | 268 | ||
| 269 | enum btrfs_dev_stat_values { | ||
| 270 | /* disk I/O failure stats */ | ||
| 271 | BTRFS_DEV_STAT_WRITE_ERRS, /* EIO or EREMOTEIO from lower layers */ | ||
| 272 | BTRFS_DEV_STAT_READ_ERRS, /* EIO or EREMOTEIO from lower layers */ | ||
| 273 | BTRFS_DEV_STAT_FLUSH_ERRS, /* EIO or EREMOTEIO from lower layers */ | ||
| 274 | |||
| 275 | /* stats for indirect indications for I/O failures */ | ||
| 276 | BTRFS_DEV_STAT_CORRUPTION_ERRS, /* checksum error, bytenr error or | ||
| 277 | * contents is illegal: this is an | ||
| 278 | * indication that the block was damaged | ||
| 279 | * during read or write, or written to | ||
| 280 | * wrong location or read from wrong | ||
| 281 | * location */ | ||
| 282 | BTRFS_DEV_STAT_GENERATION_ERRS, /* an indication that blocks have not | ||
| 283 | * been written */ | ||
| 284 | |||
| 285 | BTRFS_DEV_STAT_VALUES_MAX | ||
| 286 | }; | ||
| 287 | |||
| 288 | struct btrfs_ioctl_get_dev_stats { | ||
| 289 | __u64 devid; /* in */ | ||
| 290 | __u64 nr_items; /* in/out */ | ||
| 291 | |||
| 292 | /* out values: */ | ||
| 293 | __u64 values[BTRFS_DEV_STAT_VALUES_MAX]; | ||
| 294 | |||
| 295 | __u64 unused[128 - 2 - BTRFS_DEV_STAT_VALUES_MAX]; /* pad to 1k */ | ||
| 296 | }; | ||
| 297 | |||
| 269 | #define BTRFS_IOC_SNAP_CREATE _IOW(BTRFS_IOCTL_MAGIC, 1, \ | 298 | #define BTRFS_IOC_SNAP_CREATE _IOW(BTRFS_IOCTL_MAGIC, 1, \ |
| 270 | struct btrfs_ioctl_vol_args) | 299 | struct btrfs_ioctl_vol_args) |
| 271 | #define BTRFS_IOC_DEFRAG _IOW(BTRFS_IOCTL_MAGIC, 2, \ | 300 | #define BTRFS_IOC_DEFRAG _IOW(BTRFS_IOCTL_MAGIC, 2, \ |
| @@ -330,5 +359,9 @@ struct btrfs_ioctl_logical_ino_args { | |||
| 330 | struct btrfs_ioctl_ino_path_args) | 359 | struct btrfs_ioctl_ino_path_args) |
| 331 | #define BTRFS_IOC_LOGICAL_INO _IOWR(BTRFS_IOCTL_MAGIC, 36, \ | 360 | #define BTRFS_IOC_LOGICAL_INO _IOWR(BTRFS_IOCTL_MAGIC, 36, \ |
| 332 | struct btrfs_ioctl_ino_path_args) | 361 | struct btrfs_ioctl_ino_path_args) |
| 362 | #define BTRFS_IOC_GET_DEV_STATS _IOWR(BTRFS_IOCTL_MAGIC, 52, \ | ||
| 363 | struct btrfs_ioctl_get_dev_stats) | ||
| 364 | #define BTRFS_IOC_GET_AND_RESET_DEV_STATS _IOWR(BTRFS_IOCTL_MAGIC, 53, \ | ||
| 365 | struct btrfs_ioctl_get_dev_stats) | ||
| 333 | 366 | ||
| 334 | #endif | 367 | #endif |
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c index bbf6d0d9aebe..9e138cdc36c5 100644 --- a/fs/btrfs/ordered-data.c +++ b/fs/btrfs/ordered-data.c | |||
| @@ -196,7 +196,7 @@ static int __btrfs_add_ordered_extent(struct inode *inode, u64 file_offset, | |||
| 196 | entry->len = len; | 196 | entry->len = len; |
| 197 | entry->disk_len = disk_len; | 197 | entry->disk_len = disk_len; |
| 198 | entry->bytes_left = len; | 198 | entry->bytes_left = len; |
| 199 | entry->inode = inode; | 199 | entry->inode = igrab(inode); |
| 200 | entry->compress_type = compress_type; | 200 | entry->compress_type = compress_type; |
| 201 | if (type != BTRFS_ORDERED_IO_DONE && type != BTRFS_ORDERED_COMPLETE) | 201 | if (type != BTRFS_ORDERED_IO_DONE && type != BTRFS_ORDERED_COMPLETE) |
| 202 | set_bit(type, &entry->flags); | 202 | set_bit(type, &entry->flags); |
| @@ -212,12 +212,12 @@ static int __btrfs_add_ordered_extent(struct inode *inode, u64 file_offset, | |||
| 212 | 212 | ||
| 213 | trace_btrfs_ordered_extent_add(inode, entry); | 213 | trace_btrfs_ordered_extent_add(inode, entry); |
| 214 | 214 | ||
| 215 | spin_lock(&tree->lock); | 215 | spin_lock_irq(&tree->lock); |
| 216 | node = tree_insert(&tree->tree, file_offset, | 216 | node = tree_insert(&tree->tree, file_offset, |
| 217 | &entry->rb_node); | 217 | &entry->rb_node); |
| 218 | if (node) | 218 | if (node) |
| 219 | ordered_data_tree_panic(inode, -EEXIST, file_offset); | 219 | ordered_data_tree_panic(inode, -EEXIST, file_offset); |
| 220 | spin_unlock(&tree->lock); | 220 | spin_unlock_irq(&tree->lock); |
| 221 | 221 | ||
| 222 | spin_lock(&BTRFS_I(inode)->root->fs_info->ordered_extent_lock); | 222 | spin_lock(&BTRFS_I(inode)->root->fs_info->ordered_extent_lock); |
| 223 | list_add_tail(&entry->root_extent_list, | 223 | list_add_tail(&entry->root_extent_list, |
| @@ -264,9 +264,9 @@ void btrfs_add_ordered_sum(struct inode *inode, | |||
| 264 | struct btrfs_ordered_inode_tree *tree; | 264 | struct btrfs_ordered_inode_tree *tree; |
| 265 | 265 | ||
| 266 | tree = &BTRFS_I(inode)->ordered_tree; | 266 | tree = &BTRFS_I(inode)->ordered_tree; |
| 267 | spin_lock(&tree->lock); | 267 | spin_lock_irq(&tree->lock); |
| 268 | list_add_tail(&sum->list, &entry->list); | 268 | list_add_tail(&sum->list, &entry->list); |
| 269 | spin_unlock(&tree->lock); | 269 | spin_unlock_irq(&tree->lock); |
| 270 | } | 270 | } |
| 271 | 271 | ||
| 272 | /* | 272 | /* |
| @@ -283,18 +283,19 @@ void btrfs_add_ordered_sum(struct inode *inode, | |||
| 283 | */ | 283 | */ |
| 284 | int btrfs_dec_test_first_ordered_pending(struct inode *inode, | 284 | int btrfs_dec_test_first_ordered_pending(struct inode *inode, |
| 285 | struct btrfs_ordered_extent **cached, | 285 | struct btrfs_ordered_extent **cached, |
| 286 | u64 *file_offset, u64 io_size) | 286 | u64 *file_offset, u64 io_size, int uptodate) |
| 287 | { | 287 | { |
| 288 | struct btrfs_ordered_inode_tree *tree; | 288 | struct btrfs_ordered_inode_tree *tree; |
| 289 | struct rb_node *node; | 289 | struct rb_node *node; |
| 290 | struct btrfs_ordered_extent *entry = NULL; | 290 | struct btrfs_ordered_extent *entry = NULL; |
| 291 | int ret; | 291 | int ret; |
| 292 | unsigned long flags; | ||
| 292 | u64 dec_end; | 293 | u64 dec_end; |
| 293 | u64 dec_start; | 294 | u64 dec_start; |
| 294 | u64 to_dec; | 295 | u64 to_dec; |
| 295 | 296 | ||
| 296 | tree = &BTRFS_I(inode)->ordered_tree; | 297 | tree = &BTRFS_I(inode)->ordered_tree; |
| 297 | spin_lock(&tree->lock); | 298 | spin_lock_irqsave(&tree->lock, flags); |
| 298 | node = tree_search(tree, *file_offset); | 299 | node = tree_search(tree, *file_offset); |
| 299 | if (!node) { | 300 | if (!node) { |
| 300 | ret = 1; | 301 | ret = 1; |
| @@ -323,6 +324,9 @@ int btrfs_dec_test_first_ordered_pending(struct inode *inode, | |||
| 323 | (unsigned long long)to_dec); | 324 | (unsigned long long)to_dec); |
| 324 | } | 325 | } |
| 325 | entry->bytes_left -= to_dec; | 326 | entry->bytes_left -= to_dec; |
| 327 | if (!uptodate) | ||
| 328 | set_bit(BTRFS_ORDERED_IOERR, &entry->flags); | ||
| 329 | |||
| 326 | if (entry->bytes_left == 0) | 330 | if (entry->bytes_left == 0) |
| 327 | ret = test_and_set_bit(BTRFS_ORDERED_IO_DONE, &entry->flags); | 331 | ret = test_and_set_bit(BTRFS_ORDERED_IO_DONE, &entry->flags); |
| 328 | else | 332 | else |
| @@ -332,7 +336,7 @@ out: | |||
| 332 | *cached = entry; | 336 | *cached = entry; |
| 333 | atomic_inc(&entry->refs); | 337 | atomic_inc(&entry->refs); |
| 334 | } | 338 | } |
| 335 | spin_unlock(&tree->lock); | 339 | spin_unlock_irqrestore(&tree->lock, flags); |
| 336 | return ret == 0; | 340 | return ret == 0; |
| 337 | } | 341 | } |
| 338 | 342 | ||
| @@ -347,15 +351,21 @@ out: | |||
| 347 | */ | 351 | */ |
| 348 | int btrfs_dec_test_ordered_pending(struct inode *inode, | 352 | int btrfs_dec_test_ordered_pending(struct inode *inode, |
| 349 | struct btrfs_ordered_extent **cached, | 353 | struct btrfs_ordered_extent **cached, |
| 350 | u64 file_offset, u64 io_size) | 354 | u64 file_offset, u64 io_size, int uptodate) |
| 351 | { | 355 | { |
| 352 | struct btrfs_ordered_inode_tree *tree; | 356 | struct btrfs_ordered_inode_tree *tree; |
| 353 | struct rb_node *node; | 357 | struct rb_node *node; |
| 354 | struct btrfs_ordered_extent *entry = NULL; | 358 | struct btrfs_ordered_extent *entry = NULL; |
| 359 | unsigned long flags; | ||
| 355 | int ret; | 360 | int ret; |
| 356 | 361 | ||
| 357 | tree = &BTRFS_I(inode)->ordered_tree; | 362 | tree = &BTRFS_I(inode)->ordered_tree; |
| 358 | spin_lock(&tree->lock); | 363 | spin_lock_irqsave(&tree->lock, flags); |
| 364 | if (cached && *cached) { | ||
| 365 | entry = *cached; | ||
| 366 | goto have_entry; | ||
| 367 | } | ||
| 368 | |||
| 359 | node = tree_search(tree, file_offset); | 369 | node = tree_search(tree, file_offset); |
| 360 | if (!node) { | 370 | if (!node) { |
| 361 | ret = 1; | 371 | ret = 1; |
| @@ -363,6 +373,7 @@ int btrfs_dec_test_ordered_pending(struct inode *inode, | |||
| 363 | } | 373 | } |
| 364 | 374 | ||
| 365 | entry = rb_entry(node, struct btrfs_ordered_extent, rb_node); | 375 | entry = rb_entry(node, struct btrfs_ordered_extent, rb_node); |
| 376 | have_entry: | ||
| 366 | if (!offset_in_entry(entry, file_offset)) { | 377 | if (!offset_in_entry(entry, file_offset)) { |
| 367 | ret = 1; | 378 | ret = 1; |
| 368 | goto out; | 379 | goto out; |
| @@ -374,6 +385,9 @@ int btrfs_dec_test_ordered_pending(struct inode *inode, | |||
| 374 | (unsigned long long)io_size); | 385 | (unsigned long long)io_size); |
| 375 | } | 386 | } |
| 376 | entry->bytes_left -= io_size; | 387 | entry->bytes_left -= io_size; |
| 388 | if (!uptodate) | ||
| 389 | set_bit(BTRFS_ORDERED_IOERR, &entry->flags); | ||
| 390 | |||
| 377 | if (entry->bytes_left == 0) | 391 | if (entry->bytes_left == 0) |
| 378 | ret = test_and_set_bit(BTRFS_ORDERED_IO_DONE, &entry->flags); | 392 | ret = test_and_set_bit(BTRFS_ORDERED_IO_DONE, &entry->flags); |
| 379 | else | 393 | else |
| @@ -383,7 +397,7 @@ out: | |||
| 383 | *cached = entry; | 397 | *cached = entry; |
| 384 | atomic_inc(&entry->refs); | 398 | atomic_inc(&entry->refs); |
| 385 | } | 399 | } |
| 386 | spin_unlock(&tree->lock); | 400 | spin_unlock_irqrestore(&tree->lock, flags); |
| 387 | return ret == 0; | 401 | return ret == 0; |
| 388 | } | 402 | } |
| 389 | 403 | ||
| @@ -399,6 +413,8 @@ void btrfs_put_ordered_extent(struct btrfs_ordered_extent *entry) | |||
| 399 | trace_btrfs_ordered_extent_put(entry->inode, entry); | 413 | trace_btrfs_ordered_extent_put(entry->inode, entry); |
| 400 | 414 | ||
| 401 | if (atomic_dec_and_test(&entry->refs)) { | 415 | if (atomic_dec_and_test(&entry->refs)) { |
| 416 | if (entry->inode) | ||
| 417 | btrfs_add_delayed_iput(entry->inode); | ||
| 402 | while (!list_empty(&entry->list)) { | 418 | while (!list_empty(&entry->list)) { |
| 403 | cur = entry->list.next; | 419 | cur = entry->list.next; |
| 404 | sum = list_entry(cur, struct btrfs_ordered_sum, list); | 420 | sum = list_entry(cur, struct btrfs_ordered_sum, list); |
| @@ -411,21 +427,22 @@ void btrfs_put_ordered_extent(struct btrfs_ordered_extent *entry) | |||
| 411 | 427 | ||
| 412 | /* | 428 | /* |
| 413 | * remove an ordered extent from the tree. No references are dropped | 429 | * remove an ordered extent from the tree. No references are dropped |
| 414 | * and you must wake_up entry->wait. You must hold the tree lock | 430 | * and waiters are woken up. |
| 415 | * while you call this function. | ||
| 416 | */ | 431 | */ |
| 417 | static void __btrfs_remove_ordered_extent(struct inode *inode, | 432 | void btrfs_remove_ordered_extent(struct inode *inode, |
| 418 | struct btrfs_ordered_extent *entry) | 433 | struct btrfs_ordered_extent *entry) |
| 419 | { | 434 | { |
| 420 | struct btrfs_ordered_inode_tree *tree; | 435 | struct btrfs_ordered_inode_tree *tree; |
| 421 | struct btrfs_root *root = BTRFS_I(inode)->root; | 436 | struct btrfs_root *root = BTRFS_I(inode)->root; |
| 422 | struct rb_node *node; | 437 | struct rb_node *node; |
| 423 | 438 | ||
| 424 | tree = &BTRFS_I(inode)->ordered_tree; | 439 | tree = &BTRFS_I(inode)->ordered_tree; |
| 440 | spin_lock_irq(&tree->lock); | ||
| 425 | node = &entry->rb_node; | 441 | node = &entry->rb_node; |
| 426 | rb_erase(node, &tree->tree); | 442 | rb_erase(node, &tree->tree); |
| 427 | tree->last = NULL; | 443 | tree->last = NULL; |
| 428 | set_bit(BTRFS_ORDERED_COMPLETE, &entry->flags); | 444 | set_bit(BTRFS_ORDERED_COMPLETE, &entry->flags); |
| 445 | spin_unlock_irq(&tree->lock); | ||
| 429 | 446 | ||
| 430 | spin_lock(&root->fs_info->ordered_extent_lock); | 447 | spin_lock(&root->fs_info->ordered_extent_lock); |
| 431 | list_del_init(&entry->root_extent_list); | 448 | list_del_init(&entry->root_extent_list); |
| @@ -442,21 +459,6 @@ static void __btrfs_remove_ordered_extent(struct inode *inode, | |||
| 442 | list_del_init(&BTRFS_I(inode)->ordered_operations); | 459 | list_del_init(&BTRFS_I(inode)->ordered_operations); |
| 443 | } | 460 | } |
| 444 | spin_unlock(&root->fs_info->ordered_extent_lock); | 461 | spin_unlock(&root->fs_info->ordered_extent_lock); |
| 445 | } | ||
| 446 | |||
| 447 | /* | ||
| 448 | * remove an ordered extent from the tree. No references are dropped | ||
| 449 | * but any waiters are woken. | ||
| 450 | */ | ||
| 451 | void btrfs_remove_ordered_extent(struct inode *inode, | ||
| 452 | struct btrfs_ordered_extent *entry) | ||
| 453 | { | ||
| 454 | struct btrfs_ordered_inode_tree *tree; | ||
| 455 | |||
| 456 | tree = &BTRFS_I(inode)->ordered_tree; | ||
| 457 | spin_lock(&tree->lock); | ||
| 458 | __btrfs_remove_ordered_extent(inode, entry); | ||
| 459 | spin_unlock(&tree->lock); | ||
| 460 | wake_up(&entry->wait); | 462 | wake_up(&entry->wait); |
| 461 | } | 463 | } |
| 462 | 464 | ||
| @@ -621,19 +623,11 @@ void btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len) | |||
| 621 | if (orig_end > INT_LIMIT(loff_t)) | 623 | if (orig_end > INT_LIMIT(loff_t)) |
| 622 | orig_end = INT_LIMIT(loff_t); | 624 | orig_end = INT_LIMIT(loff_t); |
| 623 | } | 625 | } |
| 624 | again: | 626 | |
| 625 | /* start IO across the range first to instantiate any delalloc | 627 | /* start IO across the range first to instantiate any delalloc |
| 626 | * extents | 628 | * extents |
| 627 | */ | 629 | */ |
| 628 | filemap_fdatawrite_range(inode->i_mapping, start, orig_end); | 630 | filemap_write_and_wait_range(inode->i_mapping, start, orig_end); |
| 629 | |||
| 630 | /* The compression code will leave pages locked but return from | ||
| 631 | * writepage without setting the page writeback. Starting again | ||
| 632 | * with WB_SYNC_ALL will end up waiting for the IO to actually start. | ||
| 633 | */ | ||
| 634 | filemap_fdatawrite_range(inode->i_mapping, start, orig_end); | ||
| 635 | |||
| 636 | filemap_fdatawait_range(inode->i_mapping, start, orig_end); | ||
| 637 | 631 | ||
| 638 | end = orig_end; | 632 | end = orig_end; |
| 639 | found = 0; | 633 | found = 0; |
| @@ -657,11 +651,6 @@ again: | |||
| 657 | break; | 651 | break; |
| 658 | end--; | 652 | end--; |
| 659 | } | 653 | } |
| 660 | if (found || test_range_bit(&BTRFS_I(inode)->io_tree, start, orig_end, | ||
| 661 | EXTENT_DELALLOC, 0, NULL)) { | ||
| 662 | schedule_timeout(1); | ||
| 663 | goto again; | ||
| 664 | } | ||
| 665 | } | 654 | } |
| 666 | 655 | ||
| 667 | /* | 656 | /* |
| @@ -676,7 +665,7 @@ struct btrfs_ordered_extent *btrfs_lookup_ordered_extent(struct inode *inode, | |||
| 676 | struct btrfs_ordered_extent *entry = NULL; | 665 | struct btrfs_ordered_extent *entry = NULL; |
| 677 | 666 | ||
| 678 | tree = &BTRFS_I(inode)->ordered_tree; | 667 | tree = &BTRFS_I(inode)->ordered_tree; |
| 679 | spin_lock(&tree->lock); | 668 | spin_lock_irq(&tree->lock); |
| 680 | node = tree_search(tree, file_offset); | 669 | node = tree_search(tree, file_offset); |
| 681 | if (!node) | 670 | if (!node) |
| 682 | goto out; | 671 | goto out; |
| @@ -687,7 +676,7 @@ struct btrfs_ordered_extent *btrfs_lookup_ordered_extent(struct inode *inode, | |||
| 687 | if (entry) | 676 | if (entry) |
| 688 | atomic_inc(&entry->refs); | 677 | atomic_inc(&entry->refs); |
| 689 | out: | 678 | out: |
| 690 | spin_unlock(&tree->lock); | 679 | spin_unlock_irq(&tree->lock); |
| 691 | return entry; | 680 | return entry; |
| 692 | } | 681 | } |
| 693 | 682 | ||
| @@ -703,7 +692,7 @@ struct btrfs_ordered_extent *btrfs_lookup_ordered_range(struct inode *inode, | |||
| 703 | struct btrfs_ordered_extent *entry = NULL; | 692 | struct btrfs_ordered_extent *entry = NULL; |
| 704 | 693 | ||
| 705 | tree = &BTRFS_I(inode)->ordered_tree; | 694 | tree = &BTRFS_I(inode)->ordered_tree; |
| 706 | spin_lock(&tree->lock); | 695 | spin_lock_irq(&tree->lock); |
| 707 | node = tree_search(tree, file_offset); | 696 | node = tree_search(tree, file_offset); |
| 708 | if (!node) { | 697 | if (!node) { |
| 709 | node = tree_search(tree, file_offset + len); | 698 | node = tree_search(tree, file_offset + len); |
| @@ -728,7 +717,7 @@ struct btrfs_ordered_extent *btrfs_lookup_ordered_range(struct inode *inode, | |||
| 728 | out: | 717 | out: |
| 729 | if (entry) | 718 | if (entry) |
| 730 | atomic_inc(&entry->refs); | 719 | atomic_inc(&entry->refs); |
| 731 | spin_unlock(&tree->lock); | 720 | spin_unlock_irq(&tree->lock); |
| 732 | return entry; | 721 | return entry; |
| 733 | } | 722 | } |
| 734 | 723 | ||
| @@ -744,7 +733,7 @@ btrfs_lookup_first_ordered_extent(struct inode *inode, u64 file_offset) | |||
| 744 | struct btrfs_ordered_extent *entry = NULL; | 733 | struct btrfs_ordered_extent *entry = NULL; |
| 745 | 734 | ||
| 746 | tree = &BTRFS_I(inode)->ordered_tree; | 735 | tree = &BTRFS_I(inode)->ordered_tree; |
| 747 | spin_lock(&tree->lock); | 736 | spin_lock_irq(&tree->lock); |
| 748 | node = tree_search(tree, file_offset); | 737 | node = tree_search(tree, file_offset); |
| 749 | if (!node) | 738 | if (!node) |
| 750 | goto out; | 739 | goto out; |
| @@ -752,7 +741,7 @@ btrfs_lookup_first_ordered_extent(struct inode *inode, u64 file_offset) | |||
| 752 | entry = rb_entry(node, struct btrfs_ordered_extent, rb_node); | 741 | entry = rb_entry(node, struct btrfs_ordered_extent, rb_node); |
| 753 | atomic_inc(&entry->refs); | 742 | atomic_inc(&entry->refs); |
| 754 | out: | 743 | out: |
| 755 | spin_unlock(&tree->lock); | 744 | spin_unlock_irq(&tree->lock); |
| 756 | return entry; | 745 | return entry; |
| 757 | } | 746 | } |
| 758 | 747 | ||
| @@ -764,7 +753,6 @@ int btrfs_ordered_update_i_size(struct inode *inode, u64 offset, | |||
| 764 | struct btrfs_ordered_extent *ordered) | 753 | struct btrfs_ordered_extent *ordered) |
| 765 | { | 754 | { |
| 766 | struct btrfs_ordered_inode_tree *tree = &BTRFS_I(inode)->ordered_tree; | 755 | struct btrfs_ordered_inode_tree *tree = &BTRFS_I(inode)->ordered_tree; |
| 767 | struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; | ||
| 768 | u64 disk_i_size; | 756 | u64 disk_i_size; |
| 769 | u64 new_i_size; | 757 | u64 new_i_size; |
| 770 | u64 i_size_test; | 758 | u64 i_size_test; |
| @@ -779,7 +767,7 @@ int btrfs_ordered_update_i_size(struct inode *inode, u64 offset, | |||
| 779 | else | 767 | else |
| 780 | offset = ALIGN(offset, BTRFS_I(inode)->root->sectorsize); | 768 | offset = ALIGN(offset, BTRFS_I(inode)->root->sectorsize); |
| 781 | 769 | ||
| 782 | spin_lock(&tree->lock); | 770 | spin_lock_irq(&tree->lock); |
| 783 | disk_i_size = BTRFS_I(inode)->disk_i_size; | 771 | disk_i_size = BTRFS_I(inode)->disk_i_size; |
| 784 | 772 | ||
| 785 | /* truncate file */ | 773 | /* truncate file */ |
| @@ -798,14 +786,6 @@ int btrfs_ordered_update_i_size(struct inode *inode, u64 offset, | |||
| 798 | } | 786 | } |
| 799 | 787 | ||
| 800 | /* | 788 | /* |
| 801 | * we can't update the disk_isize if there are delalloc bytes | ||
| 802 | * between disk_i_size and this ordered extent | ||
| 803 | */ | ||
| 804 | if (test_range_bit(io_tree, disk_i_size, offset - 1, | ||
| 805 | EXTENT_DELALLOC, 0, NULL)) { | ||
| 806 | goto out; | ||
| 807 | } | ||
| 808 | /* | ||
| 809 | * walk backward from this ordered extent to disk_i_size. | 789 | * walk backward from this ordered extent to disk_i_size. |
| 810 | * if we find an ordered extent then we can't update disk i_size | 790 | * if we find an ordered extent then we can't update disk i_size |
| 811 | * yet | 791 | * yet |
| @@ -825,15 +805,18 @@ int btrfs_ordered_update_i_size(struct inode *inode, u64 offset, | |||
| 825 | } | 805 | } |
| 826 | node = prev; | 806 | node = prev; |
| 827 | } | 807 | } |
| 828 | while (node) { | 808 | for (; node; node = rb_prev(node)) { |
| 829 | test = rb_entry(node, struct btrfs_ordered_extent, rb_node); | 809 | test = rb_entry(node, struct btrfs_ordered_extent, rb_node); |
| 810 | |||
| 811 | /* We treat this entry as if it doesnt exist */ | ||
| 812 | if (test_bit(BTRFS_ORDERED_UPDATED_ISIZE, &test->flags)) | ||
| 813 | continue; | ||
| 830 | if (test->file_offset + test->len <= disk_i_size) | 814 | if (test->file_offset + test->len <= disk_i_size) |
| 831 | break; | 815 | break; |
| 832 | if (test->file_offset >= i_size) | 816 | if (test->file_offset >= i_size) |
| 833 | break; | 817 | break; |
| 834 | if (test->file_offset >= disk_i_size) | 818 | if (test->file_offset >= disk_i_size) |
| 835 | goto out; | 819 | goto out; |
| 836 | node = rb_prev(node); | ||
| 837 | } | 820 | } |
| 838 | new_i_size = min_t(u64, offset, i_size); | 821 | new_i_size = min_t(u64, offset, i_size); |
| 839 | 822 | ||
| @@ -851,43 +834,49 @@ int btrfs_ordered_update_i_size(struct inode *inode, u64 offset, | |||
| 851 | else | 834 | else |
| 852 | node = rb_first(&tree->tree); | 835 | node = rb_first(&tree->tree); |
| 853 | } | 836 | } |
| 854 | i_size_test = 0; | 837 | |
| 855 | if (node) { | 838 | /* |
| 856 | /* | 839 | * We are looking for an area between our current extent and the next |
| 857 | * do we have an area where IO might have finished | 840 | * ordered extent to update the i_size to. There are 3 cases here |
| 858 | * between our ordered extent and the next one. | 841 | * |
| 859 | */ | 842 | * 1) We don't actually have anything and we can update to i_size. |
| 843 | * 2) We have stuff but they already did their i_size update so again we | ||
| 844 | * can just update to i_size. | ||
| 845 | * 3) We have an outstanding ordered extent so the most we can update | ||
| 846 | * our disk_i_size to is the start of the next offset. | ||
| 847 | */ | ||
| 848 | i_size_test = i_size; | ||
| 849 | for (; node; node = rb_next(node)) { | ||
| 860 | test = rb_entry(node, struct btrfs_ordered_extent, rb_node); | 850 | test = rb_entry(node, struct btrfs_ordered_extent, rb_node); |
| 861 | if (test->file_offset > offset) | 851 | |
| 852 | if (test_bit(BTRFS_ORDERED_UPDATED_ISIZE, &test->flags)) | ||
| 853 | continue; | ||
| 854 | if (test->file_offset > offset) { | ||
| 862 | i_size_test = test->file_offset; | 855 | i_size_test = test->file_offset; |
| 863 | } else { | 856 | break; |
| 864 | i_size_test = i_size; | 857 | } |
| 865 | } | 858 | } |
| 866 | 859 | ||
| 867 | /* | 860 | /* |
| 868 | * i_size_test is the end of a region after this ordered | 861 | * i_size_test is the end of a region after this ordered |
| 869 | * extent where there are no ordered extents. As long as there | 862 | * extent where there are no ordered extents, we can safely set |
| 870 | * are no delalloc bytes in this area, it is safe to update | 863 | * disk_i_size to this. |
| 871 | * disk_i_size to the end of the region. | ||
| 872 | */ | 864 | */ |
| 873 | if (i_size_test > offset && | 865 | if (i_size_test > offset) |
| 874 | !test_range_bit(io_tree, offset, i_size_test - 1, | ||
| 875 | EXTENT_DELALLOC, 0, NULL)) { | ||
| 876 | new_i_size = min_t(u64, i_size_test, i_size); | 866 | new_i_size = min_t(u64, i_size_test, i_size); |
| 877 | } | ||
| 878 | BTRFS_I(inode)->disk_i_size = new_i_size; | 867 | BTRFS_I(inode)->disk_i_size = new_i_size; |
| 879 | ret = 0; | 868 | ret = 0; |
| 880 | out: | 869 | out: |
| 881 | /* | 870 | /* |
| 882 | * we need to remove the ordered extent with the tree lock held | 871 | * We need to do this because we can't remove ordered extents until |
| 883 | * so that other people calling this function don't find our fully | 872 | * after the i_disk_size has been updated and then the inode has been |
| 884 | * processed ordered entry and skip updating the i_size | 873 | * updated to reflect the change, so we need to tell anybody who finds |
| 874 | * this ordered extent that we've already done all the real work, we | ||
| 875 | * just haven't completed all the other work. | ||
| 885 | */ | 876 | */ |
| 886 | if (ordered) | 877 | if (ordered) |
| 887 | __btrfs_remove_ordered_extent(inode, ordered); | 878 | set_bit(BTRFS_ORDERED_UPDATED_ISIZE, &ordered->flags); |
| 888 | spin_unlock(&tree->lock); | 879 | spin_unlock_irq(&tree->lock); |
| 889 | if (ordered) | ||
| 890 | wake_up(&ordered->wait); | ||
| 891 | return ret; | 880 | return ret; |
| 892 | } | 881 | } |
| 893 | 882 | ||
| @@ -912,7 +901,7 @@ int btrfs_find_ordered_sum(struct inode *inode, u64 offset, u64 disk_bytenr, | |||
| 912 | if (!ordered) | 901 | if (!ordered) |
| 913 | return 1; | 902 | return 1; |
| 914 | 903 | ||
| 915 | spin_lock(&tree->lock); | 904 | spin_lock_irq(&tree->lock); |
| 916 | list_for_each_entry_reverse(ordered_sum, &ordered->list, list) { | 905 | list_for_each_entry_reverse(ordered_sum, &ordered->list, list) { |
| 917 | if (disk_bytenr >= ordered_sum->bytenr) { | 906 | if (disk_bytenr >= ordered_sum->bytenr) { |
| 918 | num_sectors = ordered_sum->len / sectorsize; | 907 | num_sectors = ordered_sum->len / sectorsize; |
| @@ -927,7 +916,7 @@ int btrfs_find_ordered_sum(struct inode *inode, u64 offset, u64 disk_bytenr, | |||
| 927 | } | 916 | } |
| 928 | } | 917 | } |
| 929 | out: | 918 | out: |
| 930 | spin_unlock(&tree->lock); | 919 | spin_unlock_irq(&tree->lock); |
| 931 | btrfs_put_ordered_extent(ordered); | 920 | btrfs_put_ordered_extent(ordered); |
| 932 | return ret; | 921 | return ret; |
| 933 | } | 922 | } |
diff --git a/fs/btrfs/ordered-data.h b/fs/btrfs/ordered-data.h index c355ad4dc1a6..e03c560d2997 100644 --- a/fs/btrfs/ordered-data.h +++ b/fs/btrfs/ordered-data.h | |||
| @@ -74,6 +74,12 @@ struct btrfs_ordered_sum { | |||
| 74 | 74 | ||
| 75 | #define BTRFS_ORDERED_DIRECT 5 /* set when we're doing DIO with this extent */ | 75 | #define BTRFS_ORDERED_DIRECT 5 /* set when we're doing DIO with this extent */ |
| 76 | 76 | ||
| 77 | #define BTRFS_ORDERED_IOERR 6 /* We had an io error when writing this out */ | ||
| 78 | |||
| 79 | #define BTRFS_ORDERED_UPDATED_ISIZE 7 /* indicates wether this ordered extent | ||
| 80 | * has done its due diligence in updating | ||
| 81 | * the isize. */ | ||
| 82 | |||
| 77 | struct btrfs_ordered_extent { | 83 | struct btrfs_ordered_extent { |
| 78 | /* logical offset in the file */ | 84 | /* logical offset in the file */ |
| 79 | u64 file_offset; | 85 | u64 file_offset; |
| @@ -113,6 +119,8 @@ struct btrfs_ordered_extent { | |||
| 113 | 119 | ||
| 114 | /* a per root list of all the pending ordered extents */ | 120 | /* a per root list of all the pending ordered extents */ |
| 115 | struct list_head root_extent_list; | 121 | struct list_head root_extent_list; |
| 122 | |||
| 123 | struct btrfs_work work; | ||
| 116 | }; | 124 | }; |
| 117 | 125 | ||
| 118 | 126 | ||
| @@ -143,10 +151,11 @@ void btrfs_remove_ordered_extent(struct inode *inode, | |||
| 143 | struct btrfs_ordered_extent *entry); | 151 | struct btrfs_ordered_extent *entry); |
| 144 | int btrfs_dec_test_ordered_pending(struct inode *inode, | 152 | int btrfs_dec_test_ordered_pending(struct inode *inode, |
| 145 | struct btrfs_ordered_extent **cached, | 153 | struct btrfs_ordered_extent **cached, |
| 146 | u64 file_offset, u64 io_size); | 154 | u64 file_offset, u64 io_size, int uptodate); |
| 147 | int btrfs_dec_test_first_ordered_pending(struct inode *inode, | 155 | int btrfs_dec_test_first_ordered_pending(struct inode *inode, |
| 148 | struct btrfs_ordered_extent **cached, | 156 | struct btrfs_ordered_extent **cached, |
| 149 | u64 *file_offset, u64 io_size); | 157 | u64 *file_offset, u64 io_size, |
| 158 | int uptodate); | ||
| 150 | int btrfs_add_ordered_extent(struct inode *inode, u64 file_offset, | 159 | int btrfs_add_ordered_extent(struct inode *inode, u64 file_offset, |
| 151 | u64 start, u64 len, u64 disk_len, int type); | 160 | u64 start, u64 len, u64 disk_len, int type); |
| 152 | int btrfs_add_ordered_extent_dio(struct inode *inode, u64 file_offset, | 161 | int btrfs_add_ordered_extent_dio(struct inode *inode, u64 file_offset, |
diff --git a/fs/btrfs/print-tree.c b/fs/btrfs/print-tree.c index f38e452486b8..5e23684887eb 100644 --- a/fs/btrfs/print-tree.c +++ b/fs/btrfs/print-tree.c | |||
| @@ -294,6 +294,9 @@ void btrfs_print_leaf(struct btrfs_root *root, struct extent_buffer *l) | |||
| 294 | btrfs_dev_extent_chunk_offset(l, dev_extent), | 294 | btrfs_dev_extent_chunk_offset(l, dev_extent), |
| 295 | (unsigned long long) | 295 | (unsigned long long) |
| 296 | btrfs_dev_extent_length(l, dev_extent)); | 296 | btrfs_dev_extent_length(l, dev_extent)); |
| 297 | case BTRFS_DEV_STATS_KEY: | ||
| 298 | printk(KERN_INFO "\t\tdevice stats\n"); | ||
| 299 | break; | ||
| 297 | }; | 300 | }; |
| 298 | } | 301 | } |
| 299 | } | 302 | } |
diff --git a/fs/btrfs/reada.c b/fs/btrfs/reada.c index ac5d01085884..48a4882d8ad5 100644 --- a/fs/btrfs/reada.c +++ b/fs/btrfs/reada.c | |||
| @@ -718,13 +718,18 @@ static void reada_start_machine_worker(struct btrfs_work *work) | |||
| 718 | { | 718 | { |
| 719 | struct reada_machine_work *rmw; | 719 | struct reada_machine_work *rmw; |
| 720 | struct btrfs_fs_info *fs_info; | 720 | struct btrfs_fs_info *fs_info; |
| 721 | int old_ioprio; | ||
| 721 | 722 | ||
| 722 | rmw = container_of(work, struct reada_machine_work, work); | 723 | rmw = container_of(work, struct reada_machine_work, work); |
| 723 | fs_info = rmw->fs_info; | 724 | fs_info = rmw->fs_info; |
| 724 | 725 | ||
| 725 | kfree(rmw); | 726 | kfree(rmw); |
| 726 | 727 | ||
| 728 | old_ioprio = IOPRIO_PRIO_VALUE(task_nice_ioclass(current), | ||
| 729 | task_nice_ioprio(current)); | ||
| 730 | set_task_ioprio(current, BTRFS_IOPRIO_READA); | ||
| 727 | __reada_start_machine(fs_info); | 731 | __reada_start_machine(fs_info); |
| 732 | set_task_ioprio(current, old_ioprio); | ||
| 728 | } | 733 | } |
| 729 | 734 | ||
| 730 | static void __reada_start_machine(struct btrfs_fs_info *fs_info) | 735 | static void __reada_start_machine(struct btrfs_fs_info *fs_info) |
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c index 2f3d6f917fb3..a38cfa4f251e 100644 --- a/fs/btrfs/scrub.c +++ b/fs/btrfs/scrub.c | |||
| @@ -50,7 +50,7 @@ struct scrub_dev; | |||
| 50 | struct scrub_page { | 50 | struct scrub_page { |
| 51 | struct scrub_block *sblock; | 51 | struct scrub_block *sblock; |
| 52 | struct page *page; | 52 | struct page *page; |
| 53 | struct block_device *bdev; | 53 | struct btrfs_device *dev; |
| 54 | u64 flags; /* extent flags */ | 54 | u64 flags; /* extent flags */ |
| 55 | u64 generation; | 55 | u64 generation; |
| 56 | u64 logical; | 56 | u64 logical; |
| @@ -86,6 +86,7 @@ struct scrub_block { | |||
| 86 | unsigned int header_error:1; | 86 | unsigned int header_error:1; |
| 87 | unsigned int checksum_error:1; | 87 | unsigned int checksum_error:1; |
| 88 | unsigned int no_io_error_seen:1; | 88 | unsigned int no_io_error_seen:1; |
| 89 | unsigned int generation_error:1; /* also sets header_error */ | ||
| 89 | }; | 90 | }; |
| 90 | }; | 91 | }; |
| 91 | 92 | ||
| @@ -675,6 +676,8 @@ static int scrub_handle_errored_block(struct scrub_block *sblock_to_check) | |||
| 675 | sdev->stat.read_errors++; | 676 | sdev->stat.read_errors++; |
| 676 | sdev->stat.uncorrectable_errors++; | 677 | sdev->stat.uncorrectable_errors++; |
| 677 | spin_unlock(&sdev->stat_lock); | 678 | spin_unlock(&sdev->stat_lock); |
| 679 | btrfs_dev_stat_inc_and_print(sdev->dev, | ||
| 680 | BTRFS_DEV_STAT_READ_ERRS); | ||
| 678 | goto out; | 681 | goto out; |
| 679 | } | 682 | } |
| 680 | 683 | ||
| @@ -686,6 +689,8 @@ static int scrub_handle_errored_block(struct scrub_block *sblock_to_check) | |||
| 686 | sdev->stat.read_errors++; | 689 | sdev->stat.read_errors++; |
| 687 | sdev->stat.uncorrectable_errors++; | 690 | sdev->stat.uncorrectable_errors++; |
| 688 | spin_unlock(&sdev->stat_lock); | 691 | spin_unlock(&sdev->stat_lock); |
| 692 | btrfs_dev_stat_inc_and_print(sdev->dev, | ||
| 693 | BTRFS_DEV_STAT_READ_ERRS); | ||
| 689 | goto out; | 694 | goto out; |
| 690 | } | 695 | } |
| 691 | BUG_ON(failed_mirror_index >= BTRFS_MAX_MIRRORS); | 696 | BUG_ON(failed_mirror_index >= BTRFS_MAX_MIRRORS); |
| @@ -699,6 +704,8 @@ static int scrub_handle_errored_block(struct scrub_block *sblock_to_check) | |||
| 699 | sdev->stat.read_errors++; | 704 | sdev->stat.read_errors++; |
| 700 | sdev->stat.uncorrectable_errors++; | 705 | sdev->stat.uncorrectable_errors++; |
| 701 | spin_unlock(&sdev->stat_lock); | 706 | spin_unlock(&sdev->stat_lock); |
| 707 | btrfs_dev_stat_inc_and_print(sdev->dev, | ||
| 708 | BTRFS_DEV_STAT_READ_ERRS); | ||
| 702 | goto out; | 709 | goto out; |
| 703 | } | 710 | } |
| 704 | 711 | ||
| @@ -725,12 +732,16 @@ static int scrub_handle_errored_block(struct scrub_block *sblock_to_check) | |||
| 725 | spin_unlock(&sdev->stat_lock); | 732 | spin_unlock(&sdev->stat_lock); |
| 726 | if (__ratelimit(&_rs)) | 733 | if (__ratelimit(&_rs)) |
| 727 | scrub_print_warning("i/o error", sblock_to_check); | 734 | scrub_print_warning("i/o error", sblock_to_check); |
| 735 | btrfs_dev_stat_inc_and_print(sdev->dev, | ||
| 736 | BTRFS_DEV_STAT_READ_ERRS); | ||
| 728 | } else if (sblock_bad->checksum_error) { | 737 | } else if (sblock_bad->checksum_error) { |
| 729 | spin_lock(&sdev->stat_lock); | 738 | spin_lock(&sdev->stat_lock); |
| 730 | sdev->stat.csum_errors++; | 739 | sdev->stat.csum_errors++; |
| 731 | spin_unlock(&sdev->stat_lock); | 740 | spin_unlock(&sdev->stat_lock); |
| 732 | if (__ratelimit(&_rs)) | 741 | if (__ratelimit(&_rs)) |
| 733 | scrub_print_warning("checksum error", sblock_to_check); | 742 | scrub_print_warning("checksum error", sblock_to_check); |
| 743 | btrfs_dev_stat_inc_and_print(sdev->dev, | ||
| 744 | BTRFS_DEV_STAT_CORRUPTION_ERRS); | ||
| 734 | } else if (sblock_bad->header_error) { | 745 | } else if (sblock_bad->header_error) { |
| 735 | spin_lock(&sdev->stat_lock); | 746 | spin_lock(&sdev->stat_lock); |
| 736 | sdev->stat.verify_errors++; | 747 | sdev->stat.verify_errors++; |
| @@ -738,6 +749,12 @@ static int scrub_handle_errored_block(struct scrub_block *sblock_to_check) | |||
| 738 | if (__ratelimit(&_rs)) | 749 | if (__ratelimit(&_rs)) |
| 739 | scrub_print_warning("checksum/header error", | 750 | scrub_print_warning("checksum/header error", |
| 740 | sblock_to_check); | 751 | sblock_to_check); |
| 752 | if (sblock_bad->generation_error) | ||
| 753 | btrfs_dev_stat_inc_and_print(sdev->dev, | ||
| 754 | BTRFS_DEV_STAT_GENERATION_ERRS); | ||
| 755 | else | ||
| 756 | btrfs_dev_stat_inc_and_print(sdev->dev, | ||
| 757 | BTRFS_DEV_STAT_CORRUPTION_ERRS); | ||
| 741 | } | 758 | } |
| 742 | 759 | ||
| 743 | if (sdev->readonly) | 760 | if (sdev->readonly) |
| @@ -998,8 +1015,8 @@ static int scrub_setup_recheck_block(struct scrub_dev *sdev, | |||
| 998 | page = sblock->pagev + page_index; | 1015 | page = sblock->pagev + page_index; |
| 999 | page->logical = logical; | 1016 | page->logical = logical; |
| 1000 | page->physical = bbio->stripes[mirror_index].physical; | 1017 | page->physical = bbio->stripes[mirror_index].physical; |
| 1001 | /* for missing devices, bdev is NULL */ | 1018 | /* for missing devices, dev->bdev is NULL */ |
| 1002 | page->bdev = bbio->stripes[mirror_index].dev->bdev; | 1019 | page->dev = bbio->stripes[mirror_index].dev; |
| 1003 | page->mirror_num = mirror_index + 1; | 1020 | page->mirror_num = mirror_index + 1; |
| 1004 | page->page = alloc_page(GFP_NOFS); | 1021 | page->page = alloc_page(GFP_NOFS); |
| 1005 | if (!page->page) { | 1022 | if (!page->page) { |
| @@ -1043,7 +1060,7 @@ static int scrub_recheck_block(struct btrfs_fs_info *fs_info, | |||
| 1043 | struct scrub_page *page = sblock->pagev + page_num; | 1060 | struct scrub_page *page = sblock->pagev + page_num; |
| 1044 | DECLARE_COMPLETION_ONSTACK(complete); | 1061 | DECLARE_COMPLETION_ONSTACK(complete); |
| 1045 | 1062 | ||
| 1046 | if (page->bdev == NULL) { | 1063 | if (page->dev->bdev == NULL) { |
| 1047 | page->io_error = 1; | 1064 | page->io_error = 1; |
| 1048 | sblock->no_io_error_seen = 0; | 1065 | sblock->no_io_error_seen = 0; |
| 1049 | continue; | 1066 | continue; |
| @@ -1053,7 +1070,7 @@ static int scrub_recheck_block(struct btrfs_fs_info *fs_info, | |||
| 1053 | bio = bio_alloc(GFP_NOFS, 1); | 1070 | bio = bio_alloc(GFP_NOFS, 1); |
| 1054 | if (!bio) | 1071 | if (!bio) |
| 1055 | return -EIO; | 1072 | return -EIO; |
| 1056 | bio->bi_bdev = page->bdev; | 1073 | bio->bi_bdev = page->dev->bdev; |
| 1057 | bio->bi_sector = page->physical >> 9; | 1074 | bio->bi_sector = page->physical >> 9; |
| 1058 | bio->bi_end_io = scrub_complete_bio_end_io; | 1075 | bio->bi_end_io = scrub_complete_bio_end_io; |
| 1059 | bio->bi_private = &complete; | 1076 | bio->bi_private = &complete; |
| @@ -1102,11 +1119,14 @@ static void scrub_recheck_block_checksum(struct btrfs_fs_info *fs_info, | |||
| 1102 | h = (struct btrfs_header *)mapped_buffer; | 1119 | h = (struct btrfs_header *)mapped_buffer; |
| 1103 | 1120 | ||
| 1104 | if (sblock->pagev[0].logical != le64_to_cpu(h->bytenr) || | 1121 | if (sblock->pagev[0].logical != le64_to_cpu(h->bytenr) || |
| 1105 | generation != le64_to_cpu(h->generation) || | ||
| 1106 | memcmp(h->fsid, fs_info->fsid, BTRFS_UUID_SIZE) || | 1122 | memcmp(h->fsid, fs_info->fsid, BTRFS_UUID_SIZE) || |
| 1107 | memcmp(h->chunk_tree_uuid, fs_info->chunk_tree_uuid, | 1123 | memcmp(h->chunk_tree_uuid, fs_info->chunk_tree_uuid, |
| 1108 | BTRFS_UUID_SIZE)) | 1124 | BTRFS_UUID_SIZE)) { |
| 1109 | sblock->header_error = 1; | 1125 | sblock->header_error = 1; |
| 1126 | } else if (generation != le64_to_cpu(h->generation)) { | ||
| 1127 | sblock->header_error = 1; | ||
| 1128 | sblock->generation_error = 1; | ||
| 1129 | } | ||
| 1110 | csum = h->csum; | 1130 | csum = h->csum; |
| 1111 | } else { | 1131 | } else { |
| 1112 | if (!have_csum) | 1132 | if (!have_csum) |
| @@ -1182,7 +1202,7 @@ static int scrub_repair_page_from_good_copy(struct scrub_block *sblock_bad, | |||
| 1182 | bio = bio_alloc(GFP_NOFS, 1); | 1202 | bio = bio_alloc(GFP_NOFS, 1); |
| 1183 | if (!bio) | 1203 | if (!bio) |
| 1184 | return -EIO; | 1204 | return -EIO; |
| 1185 | bio->bi_bdev = page_bad->bdev; | 1205 | bio->bi_bdev = page_bad->dev->bdev; |
| 1186 | bio->bi_sector = page_bad->physical >> 9; | 1206 | bio->bi_sector = page_bad->physical >> 9; |
| 1187 | bio->bi_end_io = scrub_complete_bio_end_io; | 1207 | bio->bi_end_io = scrub_complete_bio_end_io; |
| 1188 | bio->bi_private = &complete; | 1208 | bio->bi_private = &complete; |
| @@ -1196,6 +1216,12 @@ static int scrub_repair_page_from_good_copy(struct scrub_block *sblock_bad, | |||
| 1196 | 1216 | ||
| 1197 | /* this will also unplug the queue */ | 1217 | /* this will also unplug the queue */ |
| 1198 | wait_for_completion(&complete); | 1218 | wait_for_completion(&complete); |
| 1219 | if (!bio_flagged(bio, BIO_UPTODATE)) { | ||
| 1220 | btrfs_dev_stat_inc_and_print(page_bad->dev, | ||
| 1221 | BTRFS_DEV_STAT_WRITE_ERRS); | ||
| 1222 | bio_put(bio); | ||
| 1223 | return -EIO; | ||
| 1224 | } | ||
| 1199 | bio_put(bio); | 1225 | bio_put(bio); |
| 1200 | } | 1226 | } |
| 1201 | 1227 | ||
| @@ -1352,7 +1378,8 @@ static int scrub_checksum_super(struct scrub_block *sblock) | |||
| 1352 | u64 mapped_size; | 1378 | u64 mapped_size; |
| 1353 | void *p; | 1379 | void *p; |
| 1354 | u32 crc = ~(u32)0; | 1380 | u32 crc = ~(u32)0; |
| 1355 | int fail = 0; | 1381 | int fail_gen = 0; |
| 1382 | int fail_cor = 0; | ||
| 1356 | u64 len; | 1383 | u64 len; |
| 1357 | int index; | 1384 | int index; |
| 1358 | 1385 | ||
| @@ -1363,13 +1390,13 @@ static int scrub_checksum_super(struct scrub_block *sblock) | |||
| 1363 | memcpy(on_disk_csum, s->csum, sdev->csum_size); | 1390 | memcpy(on_disk_csum, s->csum, sdev->csum_size); |
| 1364 | 1391 | ||
| 1365 | if (sblock->pagev[0].logical != le64_to_cpu(s->bytenr)) | 1392 | if (sblock->pagev[0].logical != le64_to_cpu(s->bytenr)) |
| 1366 | ++fail; | 1393 | ++fail_cor; |
| 1367 | 1394 | ||
| 1368 | if (sblock->pagev[0].generation != le64_to_cpu(s->generation)) | 1395 | if (sblock->pagev[0].generation != le64_to_cpu(s->generation)) |
| 1369 | ++fail; | 1396 | ++fail_gen; |
| 1370 | 1397 | ||
| 1371 | if (memcmp(s->fsid, fs_info->fsid, BTRFS_UUID_SIZE)) | 1398 | if (memcmp(s->fsid, fs_info->fsid, BTRFS_UUID_SIZE)) |
| 1372 | ++fail; | 1399 | ++fail_cor; |
| 1373 | 1400 | ||
| 1374 | len = BTRFS_SUPER_INFO_SIZE - BTRFS_CSUM_SIZE; | 1401 | len = BTRFS_SUPER_INFO_SIZE - BTRFS_CSUM_SIZE; |
| 1375 | mapped_size = PAGE_SIZE - BTRFS_CSUM_SIZE; | 1402 | mapped_size = PAGE_SIZE - BTRFS_CSUM_SIZE; |
| @@ -1394,9 +1421,9 @@ static int scrub_checksum_super(struct scrub_block *sblock) | |||
| 1394 | 1421 | ||
| 1395 | btrfs_csum_final(crc, calculated_csum); | 1422 | btrfs_csum_final(crc, calculated_csum); |
| 1396 | if (memcmp(calculated_csum, on_disk_csum, sdev->csum_size)) | 1423 | if (memcmp(calculated_csum, on_disk_csum, sdev->csum_size)) |
| 1397 | ++fail; | 1424 | ++fail_cor; |
| 1398 | 1425 | ||
| 1399 | if (fail) { | 1426 | if (fail_cor + fail_gen) { |
| 1400 | /* | 1427 | /* |
| 1401 | * if we find an error in a super block, we just report it. | 1428 | * if we find an error in a super block, we just report it. |
| 1402 | * They will get written with the next transaction commit | 1429 | * They will get written with the next transaction commit |
| @@ -1405,9 +1432,15 @@ static int scrub_checksum_super(struct scrub_block *sblock) | |||
| 1405 | spin_lock(&sdev->stat_lock); | 1432 | spin_lock(&sdev->stat_lock); |
| 1406 | ++sdev->stat.super_errors; | 1433 | ++sdev->stat.super_errors; |
| 1407 | spin_unlock(&sdev->stat_lock); | 1434 | spin_unlock(&sdev->stat_lock); |
| 1435 | if (fail_cor) | ||
| 1436 | btrfs_dev_stat_inc_and_print(sdev->dev, | ||
| 1437 | BTRFS_DEV_STAT_CORRUPTION_ERRS); | ||
| 1438 | else | ||
| 1439 | btrfs_dev_stat_inc_and_print(sdev->dev, | ||
| 1440 | BTRFS_DEV_STAT_GENERATION_ERRS); | ||
| 1408 | } | 1441 | } |
| 1409 | 1442 | ||
| 1410 | return fail; | 1443 | return fail_cor + fail_gen; |
| 1411 | } | 1444 | } |
| 1412 | 1445 | ||
| 1413 | static void scrub_block_get(struct scrub_block *sblock) | 1446 | static void scrub_block_get(struct scrub_block *sblock) |
| @@ -1551,7 +1584,7 @@ static int scrub_pages(struct scrub_dev *sdev, u64 logical, u64 len, | |||
| 1551 | return -ENOMEM; | 1584 | return -ENOMEM; |
| 1552 | } | 1585 | } |
| 1553 | spage->sblock = sblock; | 1586 | spage->sblock = sblock; |
| 1554 | spage->bdev = sdev->dev->bdev; | 1587 | spage->dev = sdev->dev; |
| 1555 | spage->flags = flags; | 1588 | spage->flags = flags; |
| 1556 | spage->generation = gen; | 1589 | spage->generation = gen; |
| 1557 | spage->logical = logical; | 1590 | spage->logical = logical; |
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index c5f8fca4195f..96eb9fef7bd2 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c | |||
| @@ -188,7 +188,8 @@ void btrfs_printk(struct btrfs_fs_info *fs_info, const char *fmt, ...) | |||
| 188 | va_start(args, fmt); | 188 | va_start(args, fmt); |
| 189 | 189 | ||
| 190 | if (fmt[0] == '<' && isdigit(fmt[1]) && fmt[2] == '>') { | 190 | if (fmt[0] == '<' && isdigit(fmt[1]) && fmt[2] == '>') { |
| 191 | strncpy(lvl, fmt, 3); | 191 | memcpy(lvl, fmt, 3); |
| 192 | lvl[3] = '\0'; | ||
| 192 | fmt += 3; | 193 | fmt += 3; |
| 193 | type = logtypes[fmt[1] - '0']; | 194 | type = logtypes[fmt[1] - '0']; |
| 194 | } else | 195 | } else |
| @@ -435,11 +436,8 @@ int btrfs_parse_options(struct btrfs_root *root, char *options) | |||
| 435 | case Opt_thread_pool: | 436 | case Opt_thread_pool: |
| 436 | intarg = 0; | 437 | intarg = 0; |
| 437 | match_int(&args[0], &intarg); | 438 | match_int(&args[0], &intarg); |
| 438 | if (intarg) { | 439 | if (intarg) |
| 439 | info->thread_pool_size = intarg; | 440 | info->thread_pool_size = intarg; |
| 440 | printk(KERN_INFO "btrfs: thread pool %d\n", | ||
| 441 | info->thread_pool_size); | ||
| 442 | } | ||
| 443 | break; | 441 | break; |
| 444 | case Opt_max_inline: | 442 | case Opt_max_inline: |
| 445 | num = match_strdup(&args[0]); | 443 | num = match_strdup(&args[0]); |
| @@ -769,7 +767,7 @@ static int btrfs_fill_super(struct super_block *sb, | |||
| 769 | #ifdef CONFIG_BTRFS_FS_POSIX_ACL | 767 | #ifdef CONFIG_BTRFS_FS_POSIX_ACL |
| 770 | sb->s_flags |= MS_POSIXACL; | 768 | sb->s_flags |= MS_POSIXACL; |
| 771 | #endif | 769 | #endif |
| 772 | 770 | sb->s_flags |= MS_I_VERSION; | |
| 773 | err = open_ctree(sb, fs_devices, (char *)data); | 771 | err = open_ctree(sb, fs_devices, (char *)data); |
| 774 | if (err) { | 772 | if (err) { |
| 775 | printk("btrfs: open_ctree failed\n"); | 773 | printk("btrfs: open_ctree failed\n"); |
| @@ -925,63 +923,48 @@ static inline int is_subvolume_inode(struct inode *inode) | |||
| 925 | */ | 923 | */ |
| 926 | static char *setup_root_args(char *args) | 924 | static char *setup_root_args(char *args) |
| 927 | { | 925 | { |
| 928 | unsigned copied = 0; | 926 | unsigned len = strlen(args) + 2 + 1; |
| 929 | unsigned len = strlen(args) + 2; | 927 | char *src, *dst, *buf; |
| 930 | char *pos; | ||
| 931 | char *ret; | ||
| 932 | 928 | ||
| 933 | /* | 929 | /* |
| 934 | * We need the same args as before, but minus | 930 | * We need the same args as before, but with this substitution: |
| 935 | * | 931 | * s!subvol=[^,]+!subvolid=0! |
| 936 | * subvol=a | ||
| 937 | * | ||
| 938 | * and add | ||
| 939 | * | ||
| 940 | * subvolid=0 | ||
| 941 | * | 932 | * |
| 942 | * which is a difference of 2 characters, so we allocate strlen(args) + | 933 | * Since the replacement string is up to 2 bytes longer than the |
| 943 | * 2 characters. | 934 | * original, allocate strlen(args) + 2 + 1 bytes. |
| 944 | */ | 935 | */ |
| 945 | ret = kzalloc(len * sizeof(char), GFP_NOFS); | ||
| 946 | if (!ret) | ||
| 947 | return NULL; | ||
| 948 | pos = strstr(args, "subvol="); | ||
| 949 | 936 | ||
| 937 | src = strstr(args, "subvol="); | ||
| 950 | /* This shouldn't happen, but just in case.. */ | 938 | /* This shouldn't happen, but just in case.. */ |
| 951 | if (!pos) { | 939 | if (!src) |
| 952 | kfree(ret); | 940 | return NULL; |
| 941 | |||
| 942 | buf = dst = kmalloc(len, GFP_NOFS); | ||
| 943 | if (!buf) | ||
| 953 | return NULL; | 944 | return NULL; |
| 954 | } | ||
| 955 | 945 | ||
| 956 | /* | 946 | /* |
| 957 | * The subvol=<> arg is not at the front of the string, copy everybody | 947 | * If the subvol= arg is not at the start of the string, |
| 958 | * up to that into ret. | 948 | * copy whatever precedes it into buf. |
| 959 | */ | 949 | */ |
| 960 | if (pos != args) { | 950 | if (src != args) { |
| 961 | *pos = '\0'; | 951 | *src++ = '\0'; |
| 962 | strcpy(ret, args); | 952 | strcpy(buf, args); |
| 963 | copied += strlen(args); | 953 | dst += strlen(args); |
| 964 | pos++; | ||
| 965 | } | 954 | } |
| 966 | 955 | ||
| 967 | strncpy(ret + copied, "subvolid=0", len - copied); | 956 | strcpy(dst, "subvolid=0"); |
| 968 | 957 | dst += strlen("subvolid=0"); | |
| 969 | /* Length of subvolid=0 */ | ||
| 970 | copied += 10; | ||
| 971 | 958 | ||
| 972 | /* | 959 | /* |
| 973 | * If there is no , after the subvol= option then we know there's no | 960 | * If there is a "," after the original subvol=... string, |
| 974 | * other options and we can just return. | 961 | * copy that suffix into our buffer. Otherwise, we're done. |
| 975 | */ | 962 | */ |
| 976 | pos = strchr(pos, ','); | 963 | src = strchr(src, ','); |
| 977 | if (!pos) | 964 | if (src) |
| 978 | return ret; | 965 | strcpy(dst, src); |
| 979 | 966 | ||
| 980 | /* Copy the rest of the arguments into our buffer */ | 967 | return buf; |
| 981 | strncpy(ret + copied, pos, len - copied); | ||
| 982 | copied += strlen(pos); | ||
| 983 | |||
| 984 | return ret; | ||
| 985 | } | 968 | } |
| 986 | 969 | ||
| 987 | static struct dentry *mount_subvol(const char *subvol_name, int flags, | 970 | static struct dentry *mount_subvol(const char *subvol_name, int flags, |
| @@ -1118,6 +1101,40 @@ error_fs_info: | |||
| 1118 | return ERR_PTR(error); | 1101 | return ERR_PTR(error); |
| 1119 | } | 1102 | } |
| 1120 | 1103 | ||
| 1104 | static void btrfs_set_max_workers(struct btrfs_workers *workers, int new_limit) | ||
| 1105 | { | ||
| 1106 | spin_lock_irq(&workers->lock); | ||
| 1107 | workers->max_workers = new_limit; | ||
| 1108 | spin_unlock_irq(&workers->lock); | ||
| 1109 | } | ||
| 1110 | |||
| 1111 | static void btrfs_resize_thread_pool(struct btrfs_fs_info *fs_info, | ||
| 1112 | int new_pool_size, int old_pool_size) | ||
| 1113 | { | ||
| 1114 | if (new_pool_size == old_pool_size) | ||
| 1115 | return; | ||
| 1116 | |||
| 1117 | fs_info->thread_pool_size = new_pool_size; | ||
| 1118 | |||
| 1119 | printk(KERN_INFO "btrfs: resize thread pool %d -> %d\n", | ||
| 1120 | old_pool_size, new_pool_size); | ||
| 1121 | |||
| 1122 | btrfs_set_max_workers(&fs_info->generic_worker, new_pool_size); | ||
| 1123 | btrfs_set_max_workers(&fs_info->workers, new_pool_size); | ||
| 1124 | btrfs_set_max_workers(&fs_info->delalloc_workers, new_pool_size); | ||
| 1125 | btrfs_set_max_workers(&fs_info->submit_workers, new_pool_size); | ||
| 1126 | btrfs_set_max_workers(&fs_info->caching_workers, new_pool_size); | ||
| 1127 | btrfs_set_max_workers(&fs_info->fixup_workers, new_pool_size); | ||
| 1128 | btrfs_set_max_workers(&fs_info->endio_workers, new_pool_size); | ||
| 1129 | btrfs_set_max_workers(&fs_info->endio_meta_workers, new_pool_size); | ||
| 1130 | btrfs_set_max_workers(&fs_info->endio_meta_write_workers, new_pool_size); | ||
| 1131 | btrfs_set_max_workers(&fs_info->endio_write_workers, new_pool_size); | ||
| 1132 | btrfs_set_max_workers(&fs_info->endio_freespace_worker, new_pool_size); | ||
| 1133 | btrfs_set_max_workers(&fs_info->delayed_workers, new_pool_size); | ||
| 1134 | btrfs_set_max_workers(&fs_info->readahead_workers, new_pool_size); | ||
| 1135 | btrfs_set_max_workers(&fs_info->scrub_workers, new_pool_size); | ||
| 1136 | } | ||
| 1137 | |||
| 1121 | static int btrfs_remount(struct super_block *sb, int *flags, char *data) | 1138 | static int btrfs_remount(struct super_block *sb, int *flags, char *data) |
| 1122 | { | 1139 | { |
| 1123 | struct btrfs_fs_info *fs_info = btrfs_sb(sb); | 1140 | struct btrfs_fs_info *fs_info = btrfs_sb(sb); |
| @@ -1137,6 +1154,9 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data) | |||
| 1137 | goto restore; | 1154 | goto restore; |
| 1138 | } | 1155 | } |
| 1139 | 1156 | ||
| 1157 | btrfs_resize_thread_pool(fs_info, | ||
| 1158 | fs_info->thread_pool_size, old_thread_pool_size); | ||
| 1159 | |||
| 1140 | if ((*flags & MS_RDONLY) == (sb->s_flags & MS_RDONLY)) | 1160 | if ((*flags & MS_RDONLY) == (sb->s_flags & MS_RDONLY)) |
| 1141 | return 0; | 1161 | return 0; |
| 1142 | 1162 | ||
| @@ -1180,7 +1200,8 @@ restore: | |||
| 1180 | fs_info->compress_type = old_compress_type; | 1200 | fs_info->compress_type = old_compress_type; |
| 1181 | fs_info->max_inline = old_max_inline; | 1201 | fs_info->max_inline = old_max_inline; |
| 1182 | fs_info->alloc_start = old_alloc_start; | 1202 | fs_info->alloc_start = old_alloc_start; |
| 1183 | fs_info->thread_pool_size = old_thread_pool_size; | 1203 | btrfs_resize_thread_pool(fs_info, |
| 1204 | old_thread_pool_size, fs_info->thread_pool_size); | ||
| 1184 | fs_info->metadata_ratio = old_metadata_ratio; | 1205 | fs_info->metadata_ratio = old_metadata_ratio; |
| 1185 | return ret; | 1206 | return ret; |
| 1186 | } | 1207 | } |
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 36422254ef67..1791c6e3d834 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c | |||
| @@ -28,6 +28,7 @@ | |||
| 28 | #include "locking.h" | 28 | #include "locking.h" |
| 29 | #include "tree-log.h" | 29 | #include "tree-log.h" |
| 30 | #include "inode-map.h" | 30 | #include "inode-map.h" |
| 31 | #include "volumes.h" | ||
| 31 | 32 | ||
| 32 | #define BTRFS_ROOT_TRANS_TAG 0 | 33 | #define BTRFS_ROOT_TRANS_TAG 0 |
| 33 | 34 | ||
| @@ -55,48 +56,49 @@ static noinline void switch_commit_root(struct btrfs_root *root) | |||
| 55 | static noinline int join_transaction(struct btrfs_root *root, int nofail) | 56 | static noinline int join_transaction(struct btrfs_root *root, int nofail) |
| 56 | { | 57 | { |
| 57 | struct btrfs_transaction *cur_trans; | 58 | struct btrfs_transaction *cur_trans; |
| 59 | struct btrfs_fs_info *fs_info = root->fs_info; | ||
| 58 | 60 | ||
| 59 | spin_lock(&root->fs_info->trans_lock); | 61 | spin_lock(&fs_info->trans_lock); |
| 60 | loop: | 62 | loop: |
| 61 | /* The file system has been taken offline. No new transactions. */ | 63 | /* The file system has been taken offline. No new transactions. */ |
| 62 | if (root->fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) { | 64 | if (fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) { |
| 63 | spin_unlock(&root->fs_info->trans_lock); | 65 | spin_unlock(&fs_info->trans_lock); |
| 64 | return -EROFS; | 66 | return -EROFS; |
| 65 | } | 67 | } |
| 66 | 68 | ||
| 67 | if (root->fs_info->trans_no_join) { | 69 | if (fs_info->trans_no_join) { |
| 68 | if (!nofail) { | 70 | if (!nofail) { |
| 69 | spin_unlock(&root->fs_info->trans_lock); | 71 | spin_unlock(&fs_info->trans_lock); |
| 70 | return -EBUSY; | 72 | return -EBUSY; |
| 71 | } | 73 | } |
| 72 | } | 74 | } |
| 73 | 75 | ||
| 74 | cur_trans = root->fs_info->running_transaction; | 76 | cur_trans = fs_info->running_transaction; |
| 75 | if (cur_trans) { | 77 | if (cur_trans) { |
| 76 | if (cur_trans->aborted) { | 78 | if (cur_trans->aborted) { |
| 77 | spin_unlock(&root->fs_info->trans_lock); | 79 | spin_unlock(&fs_info->trans_lock); |
| 78 | return cur_trans->aborted; | 80 | return cur_trans->aborted; |
| 79 | } | 81 | } |
| 80 | atomic_inc(&cur_trans->use_count); | 82 | atomic_inc(&cur_trans->use_count); |
| 81 | atomic_inc(&cur_trans->num_writers); | 83 | atomic_inc(&cur_trans->num_writers); |
| 82 | cur_trans->num_joined++; | 84 | cur_trans->num_joined++; |
| 83 | spin_unlock(&root->fs_info->trans_lock); | 85 | spin_unlock(&fs_info->trans_lock); |
| 84 | return 0; | 86 | return 0; |
| 85 | } | 87 | } |
| 86 | spin_unlock(&root->fs_info->trans_lock); | 88 | spin_unlock(&fs_info->trans_lock); |
| 87 | 89 | ||
| 88 | cur_trans = kmem_cache_alloc(btrfs_transaction_cachep, GFP_NOFS); | 90 | cur_trans = kmem_cache_alloc(btrfs_transaction_cachep, GFP_NOFS); |
| 89 | if (!cur_trans) | 91 | if (!cur_trans) |
| 90 | return -ENOMEM; | 92 | return -ENOMEM; |
| 91 | 93 | ||
| 92 | spin_lock(&root->fs_info->trans_lock); | 94 | spin_lock(&fs_info->trans_lock); |
| 93 | if (root->fs_info->running_transaction) { | 95 | if (fs_info->running_transaction) { |
| 94 | /* | 96 | /* |
| 95 | * someone started a transaction after we unlocked. Make sure | 97 | * someone started a transaction after we unlocked. Make sure |
| 96 | * to redo the trans_no_join checks above | 98 | * to redo the trans_no_join checks above |
| 97 | */ | 99 | */ |
| 98 | kmem_cache_free(btrfs_transaction_cachep, cur_trans); | 100 | kmem_cache_free(btrfs_transaction_cachep, cur_trans); |
| 99 | cur_trans = root->fs_info->running_transaction; | 101 | cur_trans = fs_info->running_transaction; |
| 100 | goto loop; | 102 | goto loop; |
| 101 | } | 103 | } |
| 102 | 104 | ||
| @@ -121,20 +123,38 @@ loop: | |||
| 121 | cur_trans->delayed_refs.flushing = 0; | 123 | cur_trans->delayed_refs.flushing = 0; |
| 122 | cur_trans->delayed_refs.run_delayed_start = 0; | 124 | cur_trans->delayed_refs.run_delayed_start = 0; |
| 123 | cur_trans->delayed_refs.seq = 1; | 125 | cur_trans->delayed_refs.seq = 1; |
| 126 | |||
| 127 | /* | ||
| 128 | * although the tree mod log is per file system and not per transaction, | ||
| 129 | * the log must never go across transaction boundaries. | ||
| 130 | */ | ||
| 131 | smp_mb(); | ||
| 132 | if (!list_empty(&fs_info->tree_mod_seq_list)) { | ||
| 133 | printk(KERN_ERR "btrfs: tree_mod_seq_list not empty when " | ||
| 134 | "creating a fresh transaction\n"); | ||
| 135 | WARN_ON(1); | ||
| 136 | } | ||
| 137 | if (!RB_EMPTY_ROOT(&fs_info->tree_mod_log)) { | ||
| 138 | printk(KERN_ERR "btrfs: tree_mod_log rb tree not empty when " | ||
| 139 | "creating a fresh transaction\n"); | ||
| 140 | WARN_ON(1); | ||
| 141 | } | ||
| 142 | atomic_set(&fs_info->tree_mod_seq, 0); | ||
| 143 | |||
| 124 | init_waitqueue_head(&cur_trans->delayed_refs.seq_wait); | 144 | init_waitqueue_head(&cur_trans->delayed_refs.seq_wait); |
| 125 | spin_lock_init(&cur_trans->commit_lock); | 145 | spin_lock_init(&cur_trans->commit_lock); |
| 126 | spin_lock_init(&cur_trans->delayed_refs.lock); | 146 | spin_lock_init(&cur_trans->delayed_refs.lock); |
| 127 | INIT_LIST_HEAD(&cur_trans->delayed_refs.seq_head); | 147 | INIT_LIST_HEAD(&cur_trans->delayed_refs.seq_head); |
| 128 | 148 | ||
| 129 | INIT_LIST_HEAD(&cur_trans->pending_snapshots); | 149 | INIT_LIST_HEAD(&cur_trans->pending_snapshots); |
| 130 | list_add_tail(&cur_trans->list, &root->fs_info->trans_list); | 150 | list_add_tail(&cur_trans->list, &fs_info->trans_list); |
| 131 | extent_io_tree_init(&cur_trans->dirty_pages, | 151 | extent_io_tree_init(&cur_trans->dirty_pages, |
| 132 | root->fs_info->btree_inode->i_mapping); | 152 | fs_info->btree_inode->i_mapping); |
| 133 | root->fs_info->generation++; | 153 | fs_info->generation++; |
| 134 | cur_trans->transid = root->fs_info->generation; | 154 | cur_trans->transid = fs_info->generation; |
| 135 | root->fs_info->running_transaction = cur_trans; | 155 | fs_info->running_transaction = cur_trans; |
| 136 | cur_trans->aborted = 0; | 156 | cur_trans->aborted = 0; |
| 137 | spin_unlock(&root->fs_info->trans_lock); | 157 | spin_unlock(&fs_info->trans_lock); |
| 138 | 158 | ||
| 139 | return 0; | 159 | return 0; |
| 140 | } | 160 | } |
| @@ -758,6 +778,9 @@ static noinline int commit_cowonly_roots(struct btrfs_trans_handle *trans, | |||
| 758 | if (ret) | 778 | if (ret) |
| 759 | return ret; | 779 | return ret; |
| 760 | 780 | ||
| 781 | ret = btrfs_run_dev_stats(trans, root->fs_info); | ||
| 782 | BUG_ON(ret); | ||
| 783 | |||
| 761 | while (!list_empty(&fs_info->dirty_cowonly_roots)) { | 784 | while (!list_empty(&fs_info->dirty_cowonly_roots)) { |
| 762 | next = fs_info->dirty_cowonly_roots.next; | 785 | next = fs_info->dirty_cowonly_roots.next; |
| 763 | list_del_init(next); | 786 | list_del_init(next); |
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index eb1ae908582c..2017d0ff511c 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c | |||
| @@ -1628,7 +1628,9 @@ static int replay_one_buffer(struct btrfs_root *log, struct extent_buffer *eb, | |||
| 1628 | int i; | 1628 | int i; |
| 1629 | int ret; | 1629 | int ret; |
| 1630 | 1630 | ||
| 1631 | btrfs_read_buffer(eb, gen); | 1631 | ret = btrfs_read_buffer(eb, gen); |
| 1632 | if (ret) | ||
| 1633 | return ret; | ||
| 1632 | 1634 | ||
| 1633 | level = btrfs_header_level(eb); | 1635 | level = btrfs_header_level(eb); |
| 1634 | 1636 | ||
| @@ -1749,7 +1751,11 @@ static noinline int walk_down_log_tree(struct btrfs_trans_handle *trans, | |||
| 1749 | 1751 | ||
| 1750 | path->slots[*level]++; | 1752 | path->slots[*level]++; |
| 1751 | if (wc->free) { | 1753 | if (wc->free) { |
| 1752 | btrfs_read_buffer(next, ptr_gen); | 1754 | ret = btrfs_read_buffer(next, ptr_gen); |
| 1755 | if (ret) { | ||
| 1756 | free_extent_buffer(next); | ||
| 1757 | return ret; | ||
| 1758 | } | ||
| 1753 | 1759 | ||
| 1754 | btrfs_tree_lock(next); | 1760 | btrfs_tree_lock(next); |
| 1755 | btrfs_set_lock_blocking(next); | 1761 | btrfs_set_lock_blocking(next); |
| @@ -1766,7 +1772,11 @@ static noinline int walk_down_log_tree(struct btrfs_trans_handle *trans, | |||
| 1766 | free_extent_buffer(next); | 1772 | free_extent_buffer(next); |
| 1767 | continue; | 1773 | continue; |
| 1768 | } | 1774 | } |
| 1769 | btrfs_read_buffer(next, ptr_gen); | 1775 | ret = btrfs_read_buffer(next, ptr_gen); |
| 1776 | if (ret) { | ||
| 1777 | free_extent_buffer(next); | ||
| 1778 | return ret; | ||
| 1779 | } | ||
| 1770 | 1780 | ||
| 1771 | WARN_ON(*level <= 0); | 1781 | WARN_ON(*level <= 0); |
| 1772 | if (path->nodes[*level-1]) | 1782 | if (path->nodes[*level-1]) |
| @@ -2657,6 +2667,8 @@ static int drop_objectid_items(struct btrfs_trans_handle *trans, | |||
| 2657 | btrfs_release_path(path); | 2667 | btrfs_release_path(path); |
| 2658 | } | 2668 | } |
| 2659 | btrfs_release_path(path); | 2669 | btrfs_release_path(path); |
| 2670 | if (ret > 0) | ||
| 2671 | ret = 0; | ||
| 2660 | return ret; | 2672 | return ret; |
| 2661 | } | 2673 | } |
| 2662 | 2674 | ||
| @@ -3028,21 +3040,6 @@ out: | |||
| 3028 | return ret; | 3040 | return ret; |
| 3029 | } | 3041 | } |
| 3030 | 3042 | ||
| 3031 | static int inode_in_log(struct btrfs_trans_handle *trans, | ||
| 3032 | struct inode *inode) | ||
| 3033 | { | ||
| 3034 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
| 3035 | int ret = 0; | ||
| 3036 | |||
| 3037 | mutex_lock(&root->log_mutex); | ||
| 3038 | if (BTRFS_I(inode)->logged_trans == trans->transid && | ||
| 3039 | BTRFS_I(inode)->last_sub_trans <= root->last_log_commit) | ||
| 3040 | ret = 1; | ||
| 3041 | mutex_unlock(&root->log_mutex); | ||
| 3042 | return ret; | ||
| 3043 | } | ||
| 3044 | |||
| 3045 | |||
| 3046 | /* | 3043 | /* |
| 3047 | * helper function around btrfs_log_inode to make sure newly created | 3044 | * helper function around btrfs_log_inode to make sure newly created |
| 3048 | * parent directories also end up in the log. A minimal inode and backref | 3045 | * parent directories also end up in the log. A minimal inode and backref |
| @@ -3083,7 +3080,7 @@ int btrfs_log_inode_parent(struct btrfs_trans_handle *trans, | |||
| 3083 | if (ret) | 3080 | if (ret) |
| 3084 | goto end_no_trans; | 3081 | goto end_no_trans; |
| 3085 | 3082 | ||
| 3086 | if (inode_in_log(trans, inode)) { | 3083 | if (btrfs_inode_in_log(inode, trans->transid)) { |
| 3087 | ret = BTRFS_NO_LOG_SYNC; | 3084 | ret = BTRFS_NO_LOG_SYNC; |
| 3088 | goto end_no_trans; | 3085 | goto end_no_trans; |
| 3089 | } | 3086 | } |
diff --git a/fs/btrfs/ulist.c b/fs/btrfs/ulist.c index 12f5147bd2b1..ab942f46b3dd 100644 --- a/fs/btrfs/ulist.c +++ b/fs/btrfs/ulist.c | |||
| @@ -23,9 +23,9 @@ | |||
| 23 | * | 23 | * |
| 24 | * ulist = ulist_alloc(); | 24 | * ulist = ulist_alloc(); |
| 25 | * ulist_add(ulist, root); | 25 | * ulist_add(ulist, root); |
| 26 | * elem = NULL; | 26 | * ULIST_ITER_INIT(&uiter); |
| 27 | * | 27 | * |
| 28 | * while ((elem = ulist_next(ulist, elem)) { | 28 | * while ((elem = ulist_next(ulist, &uiter)) { |
| 29 | * for (all child nodes n in elem) | 29 | * for (all child nodes n in elem) |
| 30 | * ulist_add(ulist, n); | 30 | * ulist_add(ulist, n); |
| 31 | * do something useful with the node; | 31 | * do something useful with the node; |
| @@ -95,7 +95,7 @@ EXPORT_SYMBOL(ulist_reinit); | |||
| 95 | * | 95 | * |
| 96 | * The allocated ulist will be returned in an initialized state. | 96 | * The allocated ulist will be returned in an initialized state. |
| 97 | */ | 97 | */ |
| 98 | struct ulist *ulist_alloc(unsigned long gfp_mask) | 98 | struct ulist *ulist_alloc(gfp_t gfp_mask) |
| 99 | { | 99 | { |
| 100 | struct ulist *ulist = kmalloc(sizeof(*ulist), gfp_mask); | 100 | struct ulist *ulist = kmalloc(sizeof(*ulist), gfp_mask); |
| 101 | 101 | ||
| @@ -144,13 +144,22 @@ EXPORT_SYMBOL(ulist_free); | |||
| 144 | * unaltered. | 144 | * unaltered. |
| 145 | */ | 145 | */ |
| 146 | int ulist_add(struct ulist *ulist, u64 val, unsigned long aux, | 146 | int ulist_add(struct ulist *ulist, u64 val, unsigned long aux, |
| 147 | unsigned long gfp_mask) | 147 | gfp_t gfp_mask) |
| 148 | { | ||
| 149 | return ulist_add_merge(ulist, val, aux, NULL, gfp_mask); | ||
| 150 | } | ||
| 151 | |||
| 152 | int ulist_add_merge(struct ulist *ulist, u64 val, unsigned long aux, | ||
| 153 | unsigned long *old_aux, gfp_t gfp_mask) | ||
| 148 | { | 154 | { |
| 149 | int i; | 155 | int i; |
| 150 | 156 | ||
| 151 | for (i = 0; i < ulist->nnodes; ++i) { | 157 | for (i = 0; i < ulist->nnodes; ++i) { |
| 152 | if (ulist->nodes[i].val == val) | 158 | if (ulist->nodes[i].val == val) { |
| 159 | if (old_aux) | ||
| 160 | *old_aux = ulist->nodes[i].aux; | ||
| 153 | return 0; | 161 | return 0; |
| 162 | } | ||
| 154 | } | 163 | } |
| 155 | 164 | ||
| 156 | if (ulist->nnodes >= ulist->nodes_alloced) { | 165 | if (ulist->nnodes >= ulist->nodes_alloced) { |
| @@ -188,33 +197,26 @@ EXPORT_SYMBOL(ulist_add); | |||
| 188 | /** | 197 | /** |
| 189 | * ulist_next - iterate ulist | 198 | * ulist_next - iterate ulist |
| 190 | * @ulist: ulist to iterate | 199 | * @ulist: ulist to iterate |
| 191 | * @prev: previously returned element or %NULL to start iteration | 200 | * @uiter: iterator variable, initialized with ULIST_ITER_INIT(&iterator) |
| 192 | * | 201 | * |
| 193 | * Note: locking must be provided by the caller. In case of rwlocks only read | 202 | * Note: locking must be provided by the caller. In case of rwlocks only read |
| 194 | * locking is needed | 203 | * locking is needed |
| 195 | * | 204 | * |
| 196 | * This function is used to iterate an ulist. The iteration is started with | 205 | * This function is used to iterate an ulist. |
| 197 | * @prev = %NULL. It returns the next element from the ulist or %NULL when the | 206 | * It returns the next element from the ulist or %NULL when the |
| 198 | * end is reached. No guarantee is made with respect to the order in which | 207 | * end is reached. No guarantee is made with respect to the order in which |
| 199 | * the elements are returned. They might neither be returned in order of | 208 | * the elements are returned. They might neither be returned in order of |
| 200 | * addition nor in ascending order. | 209 | * addition nor in ascending order. |
| 201 | * It is allowed to call ulist_add during an enumeration. Newly added items | 210 | * It is allowed to call ulist_add during an enumeration. Newly added items |
| 202 | * are guaranteed to show up in the running enumeration. | 211 | * are guaranteed to show up in the running enumeration. |
| 203 | */ | 212 | */ |
| 204 | struct ulist_node *ulist_next(struct ulist *ulist, struct ulist_node *prev) | 213 | struct ulist_node *ulist_next(struct ulist *ulist, struct ulist_iterator *uiter) |
| 205 | { | 214 | { |
| 206 | int next; | ||
| 207 | |||
| 208 | if (ulist->nnodes == 0) | 215 | if (ulist->nnodes == 0) |
| 209 | return NULL; | 216 | return NULL; |
| 210 | 217 | if (uiter->i < 0 || uiter->i >= ulist->nnodes) | |
| 211 | if (!prev) | ||
| 212 | return &ulist->nodes[0]; | ||
| 213 | |||
| 214 | next = (prev - ulist->nodes) + 1; | ||
| 215 | if (next < 0 || next >= ulist->nnodes) | ||
| 216 | return NULL; | 218 | return NULL; |
| 217 | 219 | ||
| 218 | return &ulist->nodes[next]; | 220 | return &ulist->nodes[uiter->i++]; |
| 219 | } | 221 | } |
| 220 | EXPORT_SYMBOL(ulist_next); | 222 | EXPORT_SYMBOL(ulist_next); |
diff --git a/fs/btrfs/ulist.h b/fs/btrfs/ulist.h index 2e25dec58ec0..21bdc8ec8130 100644 --- a/fs/btrfs/ulist.h +++ b/fs/btrfs/ulist.h | |||
| @@ -24,6 +24,10 @@ | |||
| 24 | */ | 24 | */ |
| 25 | #define ULIST_SIZE 16 | 25 | #define ULIST_SIZE 16 |
| 26 | 26 | ||
| 27 | struct ulist_iterator { | ||
| 28 | int i; | ||
| 29 | }; | ||
| 30 | |||
| 27 | /* | 31 | /* |
| 28 | * element of the list | 32 | * element of the list |
| 29 | */ | 33 | */ |
| @@ -59,10 +63,15 @@ struct ulist { | |||
| 59 | void ulist_init(struct ulist *ulist); | 63 | void ulist_init(struct ulist *ulist); |
| 60 | void ulist_fini(struct ulist *ulist); | 64 | void ulist_fini(struct ulist *ulist); |
| 61 | void ulist_reinit(struct ulist *ulist); | 65 | void ulist_reinit(struct ulist *ulist); |
| 62 | struct ulist *ulist_alloc(unsigned long gfp_mask); | 66 | struct ulist *ulist_alloc(gfp_t gfp_mask); |
| 63 | void ulist_free(struct ulist *ulist); | 67 | void ulist_free(struct ulist *ulist); |
| 64 | int ulist_add(struct ulist *ulist, u64 val, unsigned long aux, | 68 | int ulist_add(struct ulist *ulist, u64 val, unsigned long aux, |
| 65 | unsigned long gfp_mask); | 69 | gfp_t gfp_mask); |
| 66 | struct ulist_node *ulist_next(struct ulist *ulist, struct ulist_node *prev); | 70 | int ulist_add_merge(struct ulist *ulist, u64 val, unsigned long aux, |
| 71 | unsigned long *old_aux, gfp_t gfp_mask); | ||
| 72 | struct ulist_node *ulist_next(struct ulist *ulist, | ||
| 73 | struct ulist_iterator *uiter); | ||
| 74 | |||
| 75 | #define ULIST_ITER_INIT(uiter) ((uiter)->i = 0) | ||
| 67 | 76 | ||
| 68 | #endif | 77 | #endif |
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 1411b99555a4..7782020996fe 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c | |||
| @@ -23,6 +23,7 @@ | |||
| 23 | #include <linux/random.h> | 23 | #include <linux/random.h> |
| 24 | #include <linux/iocontext.h> | 24 | #include <linux/iocontext.h> |
| 25 | #include <linux/capability.h> | 25 | #include <linux/capability.h> |
| 26 | #include <linux/ratelimit.h> | ||
| 26 | #include <linux/kthread.h> | 27 | #include <linux/kthread.h> |
| 27 | #include <asm/div64.h> | 28 | #include <asm/div64.h> |
| 28 | #include "compat.h" | 29 | #include "compat.h" |
| @@ -39,6 +40,8 @@ static int init_first_rw_device(struct btrfs_trans_handle *trans, | |||
| 39 | struct btrfs_root *root, | 40 | struct btrfs_root *root, |
| 40 | struct btrfs_device *device); | 41 | struct btrfs_device *device); |
| 41 | static int btrfs_relocate_sys_chunks(struct btrfs_root *root); | 42 | static int btrfs_relocate_sys_chunks(struct btrfs_root *root); |
| 43 | static void __btrfs_reset_dev_stats(struct btrfs_device *dev); | ||
| 44 | static void btrfs_dev_stat_print_on_load(struct btrfs_device *device); | ||
| 42 | 45 | ||
| 43 | static DEFINE_MUTEX(uuid_mutex); | 46 | static DEFINE_MUTEX(uuid_mutex); |
| 44 | static LIST_HEAD(fs_uuids); | 47 | static LIST_HEAD(fs_uuids); |
| @@ -361,6 +364,7 @@ static noinline int device_list_add(const char *path, | |||
| 361 | return -ENOMEM; | 364 | return -ENOMEM; |
| 362 | } | 365 | } |
| 363 | device->devid = devid; | 366 | device->devid = devid; |
| 367 | device->dev_stats_valid = 0; | ||
| 364 | device->work.func = pending_bios_fn; | 368 | device->work.func = pending_bios_fn; |
| 365 | memcpy(device->uuid, disk_super->dev_item.uuid, | 369 | memcpy(device->uuid, disk_super->dev_item.uuid, |
| 366 | BTRFS_UUID_SIZE); | 370 | BTRFS_UUID_SIZE); |
| @@ -1633,7 +1637,7 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path) | |||
| 1633 | int ret = 0; | 1637 | int ret = 0; |
| 1634 | 1638 | ||
| 1635 | if ((sb->s_flags & MS_RDONLY) && !root->fs_info->fs_devices->seeding) | 1639 | if ((sb->s_flags & MS_RDONLY) && !root->fs_info->fs_devices->seeding) |
| 1636 | return -EINVAL; | 1640 | return -EROFS; |
| 1637 | 1641 | ||
| 1638 | bdev = blkdev_get_by_path(device_path, FMODE_WRITE | FMODE_EXCL, | 1642 | bdev = blkdev_get_by_path(device_path, FMODE_WRITE | FMODE_EXCL, |
| 1639 | root->fs_info->bdev_holder); | 1643 | root->fs_info->bdev_holder); |
| @@ -4001,13 +4005,58 @@ int btrfs_rmap_block(struct btrfs_mapping_tree *map_tree, | |||
| 4001 | return 0; | 4005 | return 0; |
| 4002 | } | 4006 | } |
| 4003 | 4007 | ||
| 4008 | static void *merge_stripe_index_into_bio_private(void *bi_private, | ||
| 4009 | unsigned int stripe_index) | ||
| 4010 | { | ||
| 4011 | /* | ||
| 4012 | * with single, dup, RAID0, RAID1 and RAID10, stripe_index is | ||
| 4013 | * at most 1. | ||
| 4014 | * The alternative solution (instead of stealing bits from the | ||
| 4015 | * pointer) would be to allocate an intermediate structure | ||
| 4016 | * that contains the old private pointer plus the stripe_index. | ||
| 4017 | */ | ||
| 4018 | BUG_ON((((uintptr_t)bi_private) & 3) != 0); | ||
| 4019 | BUG_ON(stripe_index > 3); | ||
| 4020 | return (void *)(((uintptr_t)bi_private) | stripe_index); | ||
| 4021 | } | ||
| 4022 | |||
| 4023 | static struct btrfs_bio *extract_bbio_from_bio_private(void *bi_private) | ||
| 4024 | { | ||
| 4025 | return (struct btrfs_bio *)(((uintptr_t)bi_private) & ~((uintptr_t)3)); | ||
| 4026 | } | ||
| 4027 | |||
| 4028 | static unsigned int extract_stripe_index_from_bio_private(void *bi_private) | ||
| 4029 | { | ||
| 4030 | return (unsigned int)((uintptr_t)bi_private) & 3; | ||
| 4031 | } | ||
| 4032 | |||
| 4004 | static void btrfs_end_bio(struct bio *bio, int err) | 4033 | static void btrfs_end_bio(struct bio *bio, int err) |
| 4005 | { | 4034 | { |
| 4006 | struct btrfs_bio *bbio = bio->bi_private; | 4035 | struct btrfs_bio *bbio = extract_bbio_from_bio_private(bio->bi_private); |
| 4007 | int is_orig_bio = 0; | 4036 | int is_orig_bio = 0; |
| 4008 | 4037 | ||
| 4009 | if (err) | 4038 | if (err) { |
| 4010 | atomic_inc(&bbio->error); | 4039 | atomic_inc(&bbio->error); |
| 4040 | if (err == -EIO || err == -EREMOTEIO) { | ||
| 4041 | unsigned int stripe_index = | ||
| 4042 | extract_stripe_index_from_bio_private( | ||
| 4043 | bio->bi_private); | ||
| 4044 | struct btrfs_device *dev; | ||
| 4045 | |||
| 4046 | BUG_ON(stripe_index >= bbio->num_stripes); | ||
| 4047 | dev = bbio->stripes[stripe_index].dev; | ||
| 4048 | if (bio->bi_rw & WRITE) | ||
| 4049 | btrfs_dev_stat_inc(dev, | ||
| 4050 | BTRFS_DEV_STAT_WRITE_ERRS); | ||
| 4051 | else | ||
| 4052 | btrfs_dev_stat_inc(dev, | ||
| 4053 | BTRFS_DEV_STAT_READ_ERRS); | ||
| 4054 | if ((bio->bi_rw & WRITE_FLUSH) == WRITE_FLUSH) | ||
| 4055 | btrfs_dev_stat_inc(dev, | ||
| 4056 | BTRFS_DEV_STAT_FLUSH_ERRS); | ||
| 4057 | btrfs_dev_stat_print_on_error(dev); | ||
| 4058 | } | ||
| 4059 | } | ||
| 4011 | 4060 | ||
| 4012 | if (bio == bbio->orig_bio) | 4061 | if (bio == bbio->orig_bio) |
| 4013 | is_orig_bio = 1; | 4062 | is_orig_bio = 1; |
| @@ -4149,6 +4198,8 @@ int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio, | |||
| 4149 | bio = first_bio; | 4198 | bio = first_bio; |
| 4150 | } | 4199 | } |
| 4151 | bio->bi_private = bbio; | 4200 | bio->bi_private = bbio; |
| 4201 | bio->bi_private = merge_stripe_index_into_bio_private( | ||
| 4202 | bio->bi_private, (unsigned int)dev_nr); | ||
| 4152 | bio->bi_end_io = btrfs_end_bio; | 4203 | bio->bi_end_io = btrfs_end_bio; |
| 4153 | bio->bi_sector = bbio->stripes[dev_nr].physical >> 9; | 4204 | bio->bi_sector = bbio->stripes[dev_nr].physical >> 9; |
| 4154 | dev = bbio->stripes[dev_nr].dev; | 4205 | dev = bbio->stripes[dev_nr].dev; |
| @@ -4509,6 +4560,28 @@ int btrfs_read_sys_array(struct btrfs_root *root) | |||
| 4509 | return ret; | 4560 | return ret; |
| 4510 | } | 4561 | } |
| 4511 | 4562 | ||
| 4563 | struct btrfs_device *btrfs_find_device_for_logical(struct btrfs_root *root, | ||
| 4564 | u64 logical, int mirror_num) | ||
| 4565 | { | ||
| 4566 | struct btrfs_mapping_tree *map_tree = &root->fs_info->mapping_tree; | ||
| 4567 | int ret; | ||
| 4568 | u64 map_length = 0; | ||
| 4569 | struct btrfs_bio *bbio = NULL; | ||
| 4570 | struct btrfs_device *device; | ||
| 4571 | |||
| 4572 | BUG_ON(mirror_num == 0); | ||
| 4573 | ret = btrfs_map_block(map_tree, WRITE, logical, &map_length, &bbio, | ||
| 4574 | mirror_num); | ||
| 4575 | if (ret) { | ||
| 4576 | BUG_ON(bbio != NULL); | ||
| 4577 | return NULL; | ||
| 4578 | } | ||
| 4579 | BUG_ON(mirror_num != bbio->mirror_num); | ||
| 4580 | device = bbio->stripes[mirror_num - 1].dev; | ||
| 4581 | kfree(bbio); | ||
| 4582 | return device; | ||
| 4583 | } | ||
| 4584 | |||
| 4512 | int btrfs_read_chunk_tree(struct btrfs_root *root) | 4585 | int btrfs_read_chunk_tree(struct btrfs_root *root) |
| 4513 | { | 4586 | { |
| 4514 | struct btrfs_path *path; | 4587 | struct btrfs_path *path; |
| @@ -4583,3 +4656,230 @@ error: | |||
| 4583 | btrfs_free_path(path); | 4656 | btrfs_free_path(path); |
| 4584 | return ret; | 4657 | return ret; |
| 4585 | } | 4658 | } |
| 4659 | |||
| 4660 | static void __btrfs_reset_dev_stats(struct btrfs_device *dev) | ||
| 4661 | { | ||
| 4662 | int i; | ||
| 4663 | |||
| 4664 | for (i = 0; i < BTRFS_DEV_STAT_VALUES_MAX; i++) | ||
| 4665 | btrfs_dev_stat_reset(dev, i); | ||
| 4666 | } | ||
| 4667 | |||
| 4668 | int btrfs_init_dev_stats(struct btrfs_fs_info *fs_info) | ||
| 4669 | { | ||
| 4670 | struct btrfs_key key; | ||
| 4671 | struct btrfs_key found_key; | ||
| 4672 | struct btrfs_root *dev_root = fs_info->dev_root; | ||
| 4673 | struct btrfs_fs_devices *fs_devices = fs_info->fs_devices; | ||
| 4674 | struct extent_buffer *eb; | ||
| 4675 | int slot; | ||
| 4676 | int ret = 0; | ||
| 4677 | struct btrfs_device *device; | ||
| 4678 | struct btrfs_path *path = NULL; | ||
| 4679 | int i; | ||
| 4680 | |||
| 4681 | path = btrfs_alloc_path(); | ||
| 4682 | if (!path) { | ||
| 4683 | ret = -ENOMEM; | ||
| 4684 | goto out; | ||
| 4685 | } | ||
| 4686 | |||
| 4687 | mutex_lock(&fs_devices->device_list_mutex); | ||
| 4688 | list_for_each_entry(device, &fs_devices->devices, dev_list) { | ||
| 4689 | int item_size; | ||
| 4690 | struct btrfs_dev_stats_item *ptr; | ||
| 4691 | |||
| 4692 | key.objectid = 0; | ||
| 4693 | key.type = BTRFS_DEV_STATS_KEY; | ||
| 4694 | key.offset = device->devid; | ||
| 4695 | ret = btrfs_search_slot(NULL, dev_root, &key, path, 0, 0); | ||
| 4696 | if (ret) { | ||
| 4697 | printk(KERN_WARNING "btrfs: no dev_stats entry found for device %s (devid %llu) (OK on first mount after mkfs)\n", | ||
| 4698 | device->name, (unsigned long long)device->devid); | ||
| 4699 | __btrfs_reset_dev_stats(device); | ||
| 4700 | device->dev_stats_valid = 1; | ||
| 4701 | btrfs_release_path(path); | ||
| 4702 | continue; | ||
| 4703 | } | ||
| 4704 | slot = path->slots[0]; | ||
| 4705 | eb = path->nodes[0]; | ||
| 4706 | btrfs_item_key_to_cpu(eb, &found_key, slot); | ||
| 4707 | item_size = btrfs_item_size_nr(eb, slot); | ||
| 4708 | |||
| 4709 | ptr = btrfs_item_ptr(eb, slot, | ||
| 4710 | struct btrfs_dev_stats_item); | ||
| 4711 | |||
| 4712 | for (i = 0; i < BTRFS_DEV_STAT_VALUES_MAX; i++) { | ||
| 4713 | if (item_size >= (1 + i) * sizeof(__le64)) | ||
| 4714 | btrfs_dev_stat_set(device, i, | ||
| 4715 | btrfs_dev_stats_value(eb, ptr, i)); | ||
| 4716 | else | ||
| 4717 | btrfs_dev_stat_reset(device, i); | ||
| 4718 | } | ||
| 4719 | |||
| 4720 | device->dev_stats_valid = 1; | ||
| 4721 | btrfs_dev_stat_print_on_load(device); | ||
| 4722 | btrfs_release_path(path); | ||
| 4723 | } | ||
| 4724 | mutex_unlock(&fs_devices->device_list_mutex); | ||
| 4725 | |||
| 4726 | out: | ||
| 4727 | btrfs_free_path(path); | ||
| 4728 | return ret < 0 ? ret : 0; | ||
| 4729 | } | ||
| 4730 | |||
| 4731 | static int update_dev_stat_item(struct btrfs_trans_handle *trans, | ||
| 4732 | struct btrfs_root *dev_root, | ||
| 4733 | struct btrfs_device *device) | ||
| 4734 | { | ||
| 4735 | struct btrfs_path *path; | ||
| 4736 | struct btrfs_key key; | ||
| 4737 | struct extent_buffer *eb; | ||
| 4738 | struct btrfs_dev_stats_item *ptr; | ||
| 4739 | int ret; | ||
| 4740 | int i; | ||
| 4741 | |||
| 4742 | key.objectid = 0; | ||
| 4743 | key.type = BTRFS_DEV_STATS_KEY; | ||
| 4744 | key.offset = device->devid; | ||
| 4745 | |||
| 4746 | path = btrfs_alloc_path(); | ||
| 4747 | BUG_ON(!path); | ||
| 4748 | ret = btrfs_search_slot(trans, dev_root, &key, path, -1, 1); | ||
| 4749 | if (ret < 0) { | ||
| 4750 | printk(KERN_WARNING "btrfs: error %d while searching for dev_stats item for device %s!\n", | ||
| 4751 | ret, device->name); | ||
| 4752 | goto out; | ||
| 4753 | } | ||
| 4754 | |||
| 4755 | if (ret == 0 && | ||
| 4756 | btrfs_item_size_nr(path->nodes[0], path->slots[0]) < sizeof(*ptr)) { | ||
| 4757 | /* need to delete old one and insert a new one */ | ||
| 4758 | ret = btrfs_del_item(trans, dev_root, path); | ||
| 4759 | if (ret != 0) { | ||
| 4760 | printk(KERN_WARNING "btrfs: delete too small dev_stats item for device %s failed %d!\n", | ||
| 4761 | device->name, ret); | ||
| 4762 | goto out; | ||
| 4763 | } | ||
| 4764 | ret = 1; | ||
| 4765 | } | ||
| 4766 | |||
| 4767 | if (ret == 1) { | ||
| 4768 | /* need to insert a new item */ | ||
| 4769 | btrfs_release_path(path); | ||
| 4770 | ret = btrfs_insert_empty_item(trans, dev_root, path, | ||
| 4771 | &key, sizeof(*ptr)); | ||
| 4772 | if (ret < 0) { | ||
| 4773 | printk(KERN_WARNING "btrfs: insert dev_stats item for device %s failed %d!\n", | ||
| 4774 | device->name, ret); | ||
| 4775 | goto out; | ||
| 4776 | } | ||
| 4777 | } | ||
| 4778 | |||
| 4779 | eb = path->nodes[0]; | ||
| 4780 | ptr = btrfs_item_ptr(eb, path->slots[0], struct btrfs_dev_stats_item); | ||
| 4781 | for (i = 0; i < BTRFS_DEV_STAT_VALUES_MAX; i++) | ||
| 4782 | btrfs_set_dev_stats_value(eb, ptr, i, | ||
| 4783 | btrfs_dev_stat_read(device, i)); | ||
| 4784 | btrfs_mark_buffer_dirty(eb); | ||
| 4785 | |||
| 4786 | out: | ||
| 4787 | btrfs_free_path(path); | ||
| 4788 | return ret; | ||
| 4789 | } | ||
| 4790 | |||
| 4791 | /* | ||
| 4792 | * called from commit_transaction. Writes all changed device stats to disk. | ||
| 4793 | */ | ||
| 4794 | int btrfs_run_dev_stats(struct btrfs_trans_handle *trans, | ||
| 4795 | struct btrfs_fs_info *fs_info) | ||
| 4796 | { | ||
| 4797 | struct btrfs_root *dev_root = fs_info->dev_root; | ||
| 4798 | struct btrfs_fs_devices *fs_devices = fs_info->fs_devices; | ||
| 4799 | struct btrfs_device *device; | ||
| 4800 | int ret = 0; | ||
| 4801 | |||
| 4802 | mutex_lock(&fs_devices->device_list_mutex); | ||
| 4803 | list_for_each_entry(device, &fs_devices->devices, dev_list) { | ||
| 4804 | if (!device->dev_stats_valid || !device->dev_stats_dirty) | ||
| 4805 | continue; | ||
| 4806 | |||
| 4807 | ret = update_dev_stat_item(trans, dev_root, device); | ||
| 4808 | if (!ret) | ||
| 4809 | device->dev_stats_dirty = 0; | ||
| 4810 | } | ||
| 4811 | mutex_unlock(&fs_devices->device_list_mutex); | ||
| 4812 | |||
| 4813 | return ret; | ||
| 4814 | } | ||
| 4815 | |||
| 4816 | void btrfs_dev_stat_inc_and_print(struct btrfs_device *dev, int index) | ||
| 4817 | { | ||
| 4818 | btrfs_dev_stat_inc(dev, index); | ||
| 4819 | btrfs_dev_stat_print_on_error(dev); | ||
| 4820 | } | ||
| 4821 | |||
| 4822 | void btrfs_dev_stat_print_on_error(struct btrfs_device *dev) | ||
| 4823 | { | ||
| 4824 | if (!dev->dev_stats_valid) | ||
| 4825 | return; | ||
| 4826 | printk_ratelimited(KERN_ERR | ||
| 4827 | "btrfs: bdev %s errs: wr %u, rd %u, flush %u, corrupt %u, gen %u\n", | ||
| 4828 | dev->name, | ||
| 4829 | btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_WRITE_ERRS), | ||
| 4830 | btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_READ_ERRS), | ||
| 4831 | btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_FLUSH_ERRS), | ||
| 4832 | btrfs_dev_stat_read(dev, | ||
| 4833 | BTRFS_DEV_STAT_CORRUPTION_ERRS), | ||
| 4834 | btrfs_dev_stat_read(dev, | ||
| 4835 | BTRFS_DEV_STAT_GENERATION_ERRS)); | ||
| 4836 | } | ||
| 4837 | |||
| 4838 | static void btrfs_dev_stat_print_on_load(struct btrfs_device *dev) | ||
| 4839 | { | ||
| 4840 | printk(KERN_INFO "btrfs: bdev %s errs: wr %u, rd %u, flush %u, corrupt %u, gen %u\n", | ||
| 4841 | dev->name, | ||
| 4842 | btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_WRITE_ERRS), | ||
| 4843 | btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_READ_ERRS), | ||
| 4844 | btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_FLUSH_ERRS), | ||
| 4845 | btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_CORRUPTION_ERRS), | ||
| 4846 | btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_GENERATION_ERRS)); | ||
| 4847 | } | ||
| 4848 | |||
| 4849 | int btrfs_get_dev_stats(struct btrfs_root *root, | ||
| 4850 | struct btrfs_ioctl_get_dev_stats *stats, | ||
| 4851 | int reset_after_read) | ||
| 4852 | { | ||
| 4853 | struct btrfs_device *dev; | ||
| 4854 | struct btrfs_fs_devices *fs_devices = root->fs_info->fs_devices; | ||
| 4855 | int i; | ||
| 4856 | |||
| 4857 | mutex_lock(&fs_devices->device_list_mutex); | ||
| 4858 | dev = btrfs_find_device(root, stats->devid, NULL, NULL); | ||
| 4859 | mutex_unlock(&fs_devices->device_list_mutex); | ||
| 4860 | |||
| 4861 | if (!dev) { | ||
| 4862 | printk(KERN_WARNING | ||
| 4863 | "btrfs: get dev_stats failed, device not found\n"); | ||
| 4864 | return -ENODEV; | ||
| 4865 | } else if (!dev->dev_stats_valid) { | ||
| 4866 | printk(KERN_WARNING | ||
| 4867 | "btrfs: get dev_stats failed, not yet valid\n"); | ||
| 4868 | return -ENODEV; | ||
| 4869 | } else if (reset_after_read) { | ||
| 4870 | for (i = 0; i < BTRFS_DEV_STAT_VALUES_MAX; i++) { | ||
| 4871 | if (stats->nr_items > i) | ||
| 4872 | stats->values[i] = | ||
| 4873 | btrfs_dev_stat_read_and_reset(dev, i); | ||
| 4874 | else | ||
| 4875 | btrfs_dev_stat_reset(dev, i); | ||
| 4876 | } | ||
| 4877 | } else { | ||
| 4878 | for (i = 0; i < BTRFS_DEV_STAT_VALUES_MAX; i++) | ||
| 4879 | if (stats->nr_items > i) | ||
| 4880 | stats->values[i] = btrfs_dev_stat_read(dev, i); | ||
| 4881 | } | ||
| 4882 | if (stats->nr_items > BTRFS_DEV_STAT_VALUES_MAX) | ||
| 4883 | stats->nr_items = BTRFS_DEV_STAT_VALUES_MAX; | ||
| 4884 | return 0; | ||
| 4885 | } | ||
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h index bb6b03f97aaa..3406a88ca83e 100644 --- a/fs/btrfs/volumes.h +++ b/fs/btrfs/volumes.h | |||
| @@ -22,6 +22,7 @@ | |||
| 22 | #include <linux/bio.h> | 22 | #include <linux/bio.h> |
| 23 | #include <linux/sort.h> | 23 | #include <linux/sort.h> |
| 24 | #include "async-thread.h" | 24 | #include "async-thread.h" |
| 25 | #include "ioctl.h" | ||
| 25 | 26 | ||
| 26 | #define BTRFS_STRIPE_LEN (64 * 1024) | 27 | #define BTRFS_STRIPE_LEN (64 * 1024) |
| 27 | 28 | ||
| @@ -106,6 +107,11 @@ struct btrfs_device { | |||
| 106 | struct completion flush_wait; | 107 | struct completion flush_wait; |
| 107 | int nobarriers; | 108 | int nobarriers; |
| 108 | 109 | ||
| 110 | /* disk I/O failure stats. For detailed description refer to | ||
| 111 | * enum btrfs_dev_stat_values in ioctl.h */ | ||
| 112 | int dev_stats_valid; | ||
| 113 | int dev_stats_dirty; /* counters need to be written to disk */ | ||
| 114 | atomic_t dev_stat_values[BTRFS_DEV_STAT_VALUES_MAX]; | ||
| 109 | }; | 115 | }; |
| 110 | 116 | ||
| 111 | struct btrfs_fs_devices { | 117 | struct btrfs_fs_devices { |
| @@ -281,4 +287,50 @@ int btrfs_cancel_balance(struct btrfs_fs_info *fs_info); | |||
| 281 | int btrfs_chunk_readonly(struct btrfs_root *root, u64 chunk_offset); | 287 | int btrfs_chunk_readonly(struct btrfs_root *root, u64 chunk_offset); |
| 282 | int find_free_dev_extent(struct btrfs_device *device, u64 num_bytes, | 288 | int find_free_dev_extent(struct btrfs_device *device, u64 num_bytes, |
| 283 | u64 *start, u64 *max_avail); | 289 | u64 *start, u64 *max_avail); |
| 290 | struct btrfs_device *btrfs_find_device_for_logical(struct btrfs_root *root, | ||
| 291 | u64 logical, int mirror_num); | ||
| 292 | void btrfs_dev_stat_print_on_error(struct btrfs_device *device); | ||
| 293 | void btrfs_dev_stat_inc_and_print(struct btrfs_device *dev, int index); | ||
| 294 | int btrfs_get_dev_stats(struct btrfs_root *root, | ||
| 295 | struct btrfs_ioctl_get_dev_stats *stats, | ||
| 296 | int reset_after_read); | ||
| 297 | int btrfs_init_dev_stats(struct btrfs_fs_info *fs_info); | ||
| 298 | int btrfs_run_dev_stats(struct btrfs_trans_handle *trans, | ||
| 299 | struct btrfs_fs_info *fs_info); | ||
| 300 | |||
| 301 | static inline void btrfs_dev_stat_inc(struct btrfs_device *dev, | ||
| 302 | int index) | ||
| 303 | { | ||
| 304 | atomic_inc(dev->dev_stat_values + index); | ||
| 305 | dev->dev_stats_dirty = 1; | ||
| 306 | } | ||
| 307 | |||
| 308 | static inline int btrfs_dev_stat_read(struct btrfs_device *dev, | ||
| 309 | int index) | ||
| 310 | { | ||
| 311 | return atomic_read(dev->dev_stat_values + index); | ||
| 312 | } | ||
| 313 | |||
| 314 | static inline int btrfs_dev_stat_read_and_reset(struct btrfs_device *dev, | ||
| 315 | int index) | ||
| 316 | { | ||
| 317 | int ret; | ||
| 318 | |||
| 319 | ret = atomic_xchg(dev->dev_stat_values + index, 0); | ||
| 320 | dev->dev_stats_dirty = 1; | ||
| 321 | return ret; | ||
| 322 | } | ||
| 323 | |||
| 324 | static inline void btrfs_dev_stat_set(struct btrfs_device *dev, | ||
| 325 | int index, unsigned long val) | ||
| 326 | { | ||
| 327 | atomic_set(dev->dev_stat_values + index, val); | ||
| 328 | dev->dev_stats_dirty = 1; | ||
| 329 | } | ||
| 330 | |||
| 331 | static inline void btrfs_dev_stat_reset(struct btrfs_device *dev, | ||
| 332 | int index) | ||
| 333 | { | ||
| 334 | btrfs_dev_stat_set(dev, index, 0); | ||
| 335 | } | ||
| 284 | #endif | 336 | #endif |
diff --git a/fs/btrfs/xattr.c b/fs/btrfs/xattr.c index e7a5659087e6..3f4e2d69e83a 100644 --- a/fs/btrfs/xattr.c +++ b/fs/btrfs/xattr.c | |||
| @@ -196,6 +196,7 @@ int __btrfs_setxattr(struct btrfs_trans_handle *trans, | |||
| 196 | if (ret) | 196 | if (ret) |
| 197 | goto out; | 197 | goto out; |
| 198 | 198 | ||
| 199 | inode_inc_iversion(inode); | ||
| 199 | inode->i_ctime = CURRENT_TIME; | 200 | inode->i_ctime = CURRENT_TIME; |
| 200 | ret = btrfs_update_inode(trans, root, inode); | 201 | ret = btrfs_update_inode(trans, root, inode); |
| 201 | BUG_ON(ret); | 202 | BUG_ON(ret); |
