diff options
Diffstat (limited to 'fs/btrfs')
33 files changed, 2896 insertions, 887 deletions
diff --git a/fs/btrfs/acl.c b/fs/btrfs/acl.c index 89b156d85d63..761e2cd8fed1 100644 --- a/fs/btrfs/acl.c +++ b/fs/btrfs/acl.c | |||
@@ -227,7 +227,11 @@ int btrfs_init_acl(struct btrfs_trans_handle *trans, | |||
227 | if (ret > 0) { | 227 | if (ret > 0) { |
228 | /* we need an acl */ | 228 | /* we need an acl */ |
229 | ret = btrfs_set_acl(trans, inode, acl, ACL_TYPE_ACCESS); | 229 | ret = btrfs_set_acl(trans, inode, acl, ACL_TYPE_ACCESS); |
230 | } else { | ||
231 | cache_no_acl(inode); | ||
230 | } | 232 | } |
233 | } else { | ||
234 | cache_no_acl(inode); | ||
231 | } | 235 | } |
232 | failed: | 236 | failed: |
233 | posix_acl_release(acl); | 237 | posix_acl_release(acl); |
diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c index bcec06750232..3f75895c919b 100644 --- a/fs/btrfs/backref.c +++ b/fs/btrfs/backref.c | |||
@@ -24,22 +24,135 @@ | |||
24 | #include "delayed-ref.h" | 24 | #include "delayed-ref.h" |
25 | #include "locking.h" | 25 | #include "locking.h" |
26 | 26 | ||
27 | struct extent_inode_elem { | ||
28 | u64 inum; | ||
29 | u64 offset; | ||
30 | struct extent_inode_elem *next; | ||
31 | }; | ||
32 | |||
33 | static int check_extent_in_eb(struct btrfs_key *key, struct extent_buffer *eb, | ||
34 | struct btrfs_file_extent_item *fi, | ||
35 | u64 extent_item_pos, | ||
36 | struct extent_inode_elem **eie) | ||
37 | { | ||
38 | u64 data_offset; | ||
39 | u64 data_len; | ||
40 | struct extent_inode_elem *e; | ||
41 | |||
42 | data_offset = btrfs_file_extent_offset(eb, fi); | ||
43 | data_len = btrfs_file_extent_num_bytes(eb, fi); | ||
44 | |||
45 | if (extent_item_pos < data_offset || | ||
46 | extent_item_pos >= data_offset + data_len) | ||
47 | return 1; | ||
48 | |||
49 | e = kmalloc(sizeof(*e), GFP_NOFS); | ||
50 | if (!e) | ||
51 | return -ENOMEM; | ||
52 | |||
53 | e->next = *eie; | ||
54 | e->inum = key->objectid; | ||
55 | e->offset = key->offset + (extent_item_pos - data_offset); | ||
56 | *eie = e; | ||
57 | |||
58 | return 0; | ||
59 | } | ||
60 | |||
61 | static int find_extent_in_eb(struct extent_buffer *eb, u64 wanted_disk_byte, | ||
62 | u64 extent_item_pos, | ||
63 | struct extent_inode_elem **eie) | ||
64 | { | ||
65 | u64 disk_byte; | ||
66 | struct btrfs_key key; | ||
67 | struct btrfs_file_extent_item *fi; | ||
68 | int slot; | ||
69 | int nritems; | ||
70 | int extent_type; | ||
71 | int ret; | ||
72 | |||
73 | /* | ||
74 | * from the shared data ref, we only have the leaf but we need | ||
75 | * the key. thus, we must look into all items and see that we | ||
76 | * find one (some) with a reference to our extent item. | ||
77 | */ | ||
78 | nritems = btrfs_header_nritems(eb); | ||
79 | for (slot = 0; slot < nritems; ++slot) { | ||
80 | btrfs_item_key_to_cpu(eb, &key, slot); | ||
81 | if (key.type != BTRFS_EXTENT_DATA_KEY) | ||
82 | continue; | ||
83 | fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item); | ||
84 | extent_type = btrfs_file_extent_type(eb, fi); | ||
85 | if (extent_type == BTRFS_FILE_EXTENT_INLINE) | ||
86 | continue; | ||
87 | /* don't skip BTRFS_FILE_EXTENT_PREALLOC, we can handle that */ | ||
88 | disk_byte = btrfs_file_extent_disk_bytenr(eb, fi); | ||
89 | if (disk_byte != wanted_disk_byte) | ||
90 | continue; | ||
91 | |||
92 | ret = check_extent_in_eb(&key, eb, fi, extent_item_pos, eie); | ||
93 | if (ret < 0) | ||
94 | return ret; | ||
95 | } | ||
96 | |||
97 | return 0; | ||
98 | } | ||
99 | |||
27 | /* | 100 | /* |
28 | * this structure records all encountered refs on the way up to the root | 101 | * this structure records all encountered refs on the way up to the root |
29 | */ | 102 | */ |
30 | struct __prelim_ref { | 103 | struct __prelim_ref { |
31 | struct list_head list; | 104 | struct list_head list; |
32 | u64 root_id; | 105 | u64 root_id; |
33 | struct btrfs_key key; | 106 | struct btrfs_key key_for_search; |
34 | int level; | 107 | int level; |
35 | int count; | 108 | int count; |
109 | struct extent_inode_elem *inode_list; | ||
36 | u64 parent; | 110 | u64 parent; |
37 | u64 wanted_disk_byte; | 111 | u64 wanted_disk_byte; |
38 | }; | 112 | }; |
39 | 113 | ||
114 | /* | ||
115 | * the rules for all callers of this function are: | ||
116 | * - obtaining the parent is the goal | ||
117 | * - if you add a key, you must know that it is a correct key | ||
118 | * - if you cannot add the parent or a correct key, then we will look into the | ||
119 | * block later to set a correct key | ||
120 | * | ||
121 | * delayed refs | ||
122 | * ============ | ||
123 | * backref type | shared | indirect | shared | indirect | ||
124 | * information | tree | tree | data | data | ||
125 | * --------------------+--------+----------+--------+---------- | ||
126 | * parent logical | y | - | - | - | ||
127 | * key to resolve | - | y | y | y | ||
128 | * tree block logical | - | - | - | - | ||
129 | * root for resolving | y | y | y | y | ||
130 | * | ||
131 | * - column 1: we've the parent -> done | ||
132 | * - column 2, 3, 4: we use the key to find the parent | ||
133 | * | ||
134 | * on disk refs (inline or keyed) | ||
135 | * ============================== | ||
136 | * backref type | shared | indirect | shared | indirect | ||
137 | * information | tree | tree | data | data | ||
138 | * --------------------+--------+----------+--------+---------- | ||
139 | * parent logical | y | - | y | - | ||
140 | * key to resolve | - | - | - | y | ||
141 | * tree block logical | y | y | y | y | ||
142 | * root for resolving | - | y | y | y | ||
143 | * | ||
144 | * - column 1, 3: we've the parent -> done | ||
145 | * - column 2: we take the first key from the block to find the parent | ||
146 | * (see __add_missing_keys) | ||
147 | * - column 4: we use the key to find the parent | ||
148 | * | ||
149 | * additional information that's available but not required to find the parent | ||
150 | * block might help in merging entries to gain some speed. | ||
151 | */ | ||
152 | |||
40 | static int __add_prelim_ref(struct list_head *head, u64 root_id, | 153 | static int __add_prelim_ref(struct list_head *head, u64 root_id, |
41 | struct btrfs_key *key, int level, u64 parent, | 154 | struct btrfs_key *key, int level, |
42 | u64 wanted_disk_byte, int count) | 155 | u64 parent, u64 wanted_disk_byte, int count) |
43 | { | 156 | { |
44 | struct __prelim_ref *ref; | 157 | struct __prelim_ref *ref; |
45 | 158 | ||
@@ -50,10 +163,11 @@ static int __add_prelim_ref(struct list_head *head, u64 root_id, | |||
50 | 163 | ||
51 | ref->root_id = root_id; | 164 | ref->root_id = root_id; |
52 | if (key) | 165 | if (key) |
53 | ref->key = *key; | 166 | ref->key_for_search = *key; |
54 | else | 167 | else |
55 | memset(&ref->key, 0, sizeof(ref->key)); | 168 | memset(&ref->key_for_search, 0, sizeof(ref->key_for_search)); |
56 | 169 | ||
170 | ref->inode_list = NULL; | ||
57 | ref->level = level; | 171 | ref->level = level; |
58 | ref->count = count; | 172 | ref->count = count; |
59 | ref->parent = parent; | 173 | ref->parent = parent; |
@@ -64,18 +178,26 @@ static int __add_prelim_ref(struct list_head *head, u64 root_id, | |||
64 | } | 178 | } |
65 | 179 | ||
66 | static int add_all_parents(struct btrfs_root *root, struct btrfs_path *path, | 180 | static int add_all_parents(struct btrfs_root *root, struct btrfs_path *path, |
67 | struct ulist *parents, | 181 | struct ulist *parents, int level, |
68 | struct extent_buffer *eb, int level, | 182 | struct btrfs_key *key, u64 wanted_disk_byte, |
69 | u64 wanted_objectid, u64 wanted_disk_byte) | 183 | const u64 *extent_item_pos) |
70 | { | 184 | { |
71 | int ret; | 185 | int ret; |
72 | int slot; | 186 | int slot = path->slots[level]; |
187 | struct extent_buffer *eb = path->nodes[level]; | ||
73 | struct btrfs_file_extent_item *fi; | 188 | struct btrfs_file_extent_item *fi; |
74 | struct btrfs_key key; | 189 | struct extent_inode_elem *eie = NULL; |
75 | u64 disk_byte; | 190 | u64 disk_byte; |
191 | u64 wanted_objectid = key->objectid; | ||
76 | 192 | ||
77 | add_parent: | 193 | add_parent: |
78 | ret = ulist_add(parents, eb->start, 0, GFP_NOFS); | 194 | if (level == 0 && extent_item_pos) { |
195 | fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item); | ||
196 | ret = check_extent_in_eb(key, eb, fi, *extent_item_pos, &eie); | ||
197 | if (ret < 0) | ||
198 | return ret; | ||
199 | } | ||
200 | ret = ulist_add(parents, eb->start, (unsigned long)eie, GFP_NOFS); | ||
79 | if (ret < 0) | 201 | if (ret < 0) |
80 | return ret; | 202 | return ret; |
81 | 203 | ||
@@ -89,6 +211,7 @@ add_parent: | |||
89 | * repeat this until we don't find any additional EXTENT_DATA items. | 211 | * repeat this until we don't find any additional EXTENT_DATA items. |
90 | */ | 212 | */ |
91 | while (1) { | 213 | while (1) { |
214 | eie = NULL; | ||
92 | ret = btrfs_next_leaf(root, path); | 215 | ret = btrfs_next_leaf(root, path); |
93 | if (ret < 0) | 216 | if (ret < 0) |
94 | return ret; | 217 | return ret; |
@@ -97,9 +220,9 @@ add_parent: | |||
97 | 220 | ||
98 | eb = path->nodes[0]; | 221 | eb = path->nodes[0]; |
99 | for (slot = 0; slot < btrfs_header_nritems(eb); ++slot) { | 222 | for (slot = 0; slot < btrfs_header_nritems(eb); ++slot) { |
100 | btrfs_item_key_to_cpu(eb, &key, slot); | 223 | btrfs_item_key_to_cpu(eb, key, slot); |
101 | if (key.objectid != wanted_objectid || | 224 | if (key->objectid != wanted_objectid || |
102 | key.type != BTRFS_EXTENT_DATA_KEY) | 225 | key->type != BTRFS_EXTENT_DATA_KEY) |
103 | return 0; | 226 | return 0; |
104 | fi = btrfs_item_ptr(eb, slot, | 227 | fi = btrfs_item_ptr(eb, slot, |
105 | struct btrfs_file_extent_item); | 228 | struct btrfs_file_extent_item); |
@@ -118,8 +241,10 @@ add_parent: | |||
118 | */ | 241 | */ |
119 | static int __resolve_indirect_ref(struct btrfs_fs_info *fs_info, | 242 | static int __resolve_indirect_ref(struct btrfs_fs_info *fs_info, |
120 | int search_commit_root, | 243 | int search_commit_root, |
244 | u64 time_seq, | ||
121 | struct __prelim_ref *ref, | 245 | struct __prelim_ref *ref, |
122 | struct ulist *parents) | 246 | struct ulist *parents, |
247 | const u64 *extent_item_pos) | ||
123 | { | 248 | { |
124 | struct btrfs_path *path; | 249 | struct btrfs_path *path; |
125 | struct btrfs_root *root; | 250 | struct btrfs_root *root; |
@@ -152,12 +277,13 @@ static int __resolve_indirect_ref(struct btrfs_fs_info *fs_info, | |||
152 | goto out; | 277 | goto out; |
153 | 278 | ||
154 | path->lowest_level = level; | 279 | path->lowest_level = level; |
155 | ret = btrfs_search_slot(NULL, root, &ref->key, path, 0, 0); | 280 | ret = btrfs_search_old_slot(root, &ref->key_for_search, path, time_seq); |
156 | pr_debug("search slot in root %llu (level %d, ref count %d) returned " | 281 | pr_debug("search slot in root %llu (level %d, ref count %d) returned " |
157 | "%d for key (%llu %u %llu)\n", | 282 | "%d for key (%llu %u %llu)\n", |
158 | (unsigned long long)ref->root_id, level, ref->count, ret, | 283 | (unsigned long long)ref->root_id, level, ref->count, ret, |
159 | (unsigned long long)ref->key.objectid, ref->key.type, | 284 | (unsigned long long)ref->key_for_search.objectid, |
160 | (unsigned long long)ref->key.offset); | 285 | ref->key_for_search.type, |
286 | (unsigned long long)ref->key_for_search.offset); | ||
161 | if (ret < 0) | 287 | if (ret < 0) |
162 | goto out; | 288 | goto out; |
163 | 289 | ||
@@ -179,9 +305,8 @@ static int __resolve_indirect_ref(struct btrfs_fs_info *fs_info, | |||
179 | btrfs_item_key_to_cpu(eb, &key, path->slots[0]); | 305 | btrfs_item_key_to_cpu(eb, &key, path->slots[0]); |
180 | } | 306 | } |
181 | 307 | ||
182 | /* the last two parameters will only be used for level == 0 */ | 308 | ret = add_all_parents(root, path, parents, level, &key, |
183 | ret = add_all_parents(root, path, parents, eb, level, key.objectid, | 309 | ref->wanted_disk_byte, extent_item_pos); |
184 | ref->wanted_disk_byte); | ||
185 | out: | 310 | out: |
186 | btrfs_free_path(path); | 311 | btrfs_free_path(path); |
187 | return ret; | 312 | return ret; |
@@ -191,8 +316,9 @@ out: | |||
191 | * resolve all indirect backrefs from the list | 316 | * resolve all indirect backrefs from the list |
192 | */ | 317 | */ |
193 | static int __resolve_indirect_refs(struct btrfs_fs_info *fs_info, | 318 | static int __resolve_indirect_refs(struct btrfs_fs_info *fs_info, |
194 | int search_commit_root, | 319 | int search_commit_root, u64 time_seq, |
195 | struct list_head *head) | 320 | struct list_head *head, |
321 | const u64 *extent_item_pos) | ||
196 | { | 322 | { |
197 | int err; | 323 | int err; |
198 | int ret = 0; | 324 | int ret = 0; |
@@ -201,6 +327,7 @@ static int __resolve_indirect_refs(struct btrfs_fs_info *fs_info, | |||
201 | struct __prelim_ref *new_ref; | 327 | struct __prelim_ref *new_ref; |
202 | struct ulist *parents; | 328 | struct ulist *parents; |
203 | struct ulist_node *node; | 329 | struct ulist_node *node; |
330 | struct ulist_iterator uiter; | ||
204 | 331 | ||
205 | parents = ulist_alloc(GFP_NOFS); | 332 | parents = ulist_alloc(GFP_NOFS); |
206 | if (!parents) | 333 | if (!parents) |
@@ -217,7 +344,8 @@ static int __resolve_indirect_refs(struct btrfs_fs_info *fs_info, | |||
217 | if (ref->count == 0) | 344 | if (ref->count == 0) |
218 | continue; | 345 | continue; |
219 | err = __resolve_indirect_ref(fs_info, search_commit_root, | 346 | err = __resolve_indirect_ref(fs_info, search_commit_root, |
220 | ref, parents); | 347 | time_seq, ref, parents, |
348 | extent_item_pos); | ||
221 | if (err) { | 349 | if (err) { |
222 | if (ret == 0) | 350 | if (ret == 0) |
223 | ret = err; | 351 | ret = err; |
@@ -225,11 +353,14 @@ static int __resolve_indirect_refs(struct btrfs_fs_info *fs_info, | |||
225 | } | 353 | } |
226 | 354 | ||
227 | /* we put the first parent into the ref at hand */ | 355 | /* we put the first parent into the ref at hand */ |
228 | node = ulist_next(parents, NULL); | 356 | ULIST_ITER_INIT(&uiter); |
357 | node = ulist_next(parents, &uiter); | ||
229 | ref->parent = node ? node->val : 0; | 358 | ref->parent = node ? node->val : 0; |
359 | ref->inode_list = | ||
360 | node ? (struct extent_inode_elem *)node->aux : 0; | ||
230 | 361 | ||
231 | /* additional parents require new refs being added here */ | 362 | /* additional parents require new refs being added here */ |
232 | while ((node = ulist_next(parents, node))) { | 363 | while ((node = ulist_next(parents, &uiter))) { |
233 | new_ref = kmalloc(sizeof(*new_ref), GFP_NOFS); | 364 | new_ref = kmalloc(sizeof(*new_ref), GFP_NOFS); |
234 | if (!new_ref) { | 365 | if (!new_ref) { |
235 | ret = -ENOMEM; | 366 | ret = -ENOMEM; |
@@ -237,6 +368,8 @@ static int __resolve_indirect_refs(struct btrfs_fs_info *fs_info, | |||
237 | } | 368 | } |
238 | memcpy(new_ref, ref, sizeof(*ref)); | 369 | memcpy(new_ref, ref, sizeof(*ref)); |
239 | new_ref->parent = node->val; | 370 | new_ref->parent = node->val; |
371 | new_ref->inode_list = | ||
372 | (struct extent_inode_elem *)node->aux; | ||
240 | list_add(&new_ref->list, &ref->list); | 373 | list_add(&new_ref->list, &ref->list); |
241 | } | 374 | } |
242 | ulist_reinit(parents); | 375 | ulist_reinit(parents); |
@@ -246,10 +379,65 @@ static int __resolve_indirect_refs(struct btrfs_fs_info *fs_info, | |||
246 | return ret; | 379 | return ret; |
247 | } | 380 | } |
248 | 381 | ||
382 | static inline int ref_for_same_block(struct __prelim_ref *ref1, | ||
383 | struct __prelim_ref *ref2) | ||
384 | { | ||
385 | if (ref1->level != ref2->level) | ||
386 | return 0; | ||
387 | if (ref1->root_id != ref2->root_id) | ||
388 | return 0; | ||
389 | if (ref1->key_for_search.type != ref2->key_for_search.type) | ||
390 | return 0; | ||
391 | if (ref1->key_for_search.objectid != ref2->key_for_search.objectid) | ||
392 | return 0; | ||
393 | if (ref1->key_for_search.offset != ref2->key_for_search.offset) | ||
394 | return 0; | ||
395 | if (ref1->parent != ref2->parent) | ||
396 | return 0; | ||
397 | |||
398 | return 1; | ||
399 | } | ||
400 | |||
401 | /* | ||
402 | * read tree blocks and add keys where required. | ||
403 | */ | ||
404 | static int __add_missing_keys(struct btrfs_fs_info *fs_info, | ||
405 | struct list_head *head) | ||
406 | { | ||
407 | struct list_head *pos; | ||
408 | struct extent_buffer *eb; | ||
409 | |||
410 | list_for_each(pos, head) { | ||
411 | struct __prelim_ref *ref; | ||
412 | ref = list_entry(pos, struct __prelim_ref, list); | ||
413 | |||
414 | if (ref->parent) | ||
415 | continue; | ||
416 | if (ref->key_for_search.type) | ||
417 | continue; | ||
418 | BUG_ON(!ref->wanted_disk_byte); | ||
419 | eb = read_tree_block(fs_info->tree_root, ref->wanted_disk_byte, | ||
420 | fs_info->tree_root->leafsize, 0); | ||
421 | BUG_ON(!eb); | ||
422 | btrfs_tree_read_lock(eb); | ||
423 | if (btrfs_header_level(eb) == 0) | ||
424 | btrfs_item_key_to_cpu(eb, &ref->key_for_search, 0); | ||
425 | else | ||
426 | btrfs_node_key_to_cpu(eb, &ref->key_for_search, 0); | ||
427 | btrfs_tree_read_unlock(eb); | ||
428 | free_extent_buffer(eb); | ||
429 | } | ||
430 | return 0; | ||
431 | } | ||
432 | |||
249 | /* | 433 | /* |
250 | * merge two lists of backrefs and adjust counts accordingly | 434 | * merge two lists of backrefs and adjust counts accordingly |
251 | * | 435 | * |
252 | * mode = 1: merge identical keys, if key is set | 436 | * mode = 1: merge identical keys, if key is set |
437 | * FIXME: if we add more keys in __add_prelim_ref, we can merge more here. | ||
438 | * additionally, we could even add a key range for the blocks we | ||
439 | * looked into to merge even more (-> replace unresolved refs by those | ||
440 | * having a parent). | ||
253 | * mode = 2: merge identical parents | 441 | * mode = 2: merge identical parents |
254 | */ | 442 | */ |
255 | static int __merge_refs(struct list_head *head, int mode) | 443 | static int __merge_refs(struct list_head *head, int mode) |
@@ -263,20 +451,21 @@ static int __merge_refs(struct list_head *head, int mode) | |||
263 | 451 | ||
264 | ref1 = list_entry(pos1, struct __prelim_ref, list); | 452 | ref1 = list_entry(pos1, struct __prelim_ref, list); |
265 | 453 | ||
266 | if (mode == 1 && ref1->key.type == 0) | ||
267 | continue; | ||
268 | for (pos2 = pos1->next, n2 = pos2->next; pos2 != head; | 454 | for (pos2 = pos1->next, n2 = pos2->next; pos2 != head; |
269 | pos2 = n2, n2 = pos2->next) { | 455 | pos2 = n2, n2 = pos2->next) { |
270 | struct __prelim_ref *ref2; | 456 | struct __prelim_ref *ref2; |
457 | struct __prelim_ref *xchg; | ||
271 | 458 | ||
272 | ref2 = list_entry(pos2, struct __prelim_ref, list); | 459 | ref2 = list_entry(pos2, struct __prelim_ref, list); |
273 | 460 | ||
274 | if (mode == 1) { | 461 | if (mode == 1) { |
275 | if (memcmp(&ref1->key, &ref2->key, | 462 | if (!ref_for_same_block(ref1, ref2)) |
276 | sizeof(ref1->key)) || | ||
277 | ref1->level != ref2->level || | ||
278 | ref1->root_id != ref2->root_id) | ||
279 | continue; | 463 | continue; |
464 | if (!ref1->parent && ref2->parent) { | ||
465 | xchg = ref1; | ||
466 | ref1 = ref2; | ||
467 | ref2 = xchg; | ||
468 | } | ||
280 | ref1->count += ref2->count; | 469 | ref1->count += ref2->count; |
281 | } else { | 470 | } else { |
282 | if (ref1->parent != ref2->parent) | 471 | if (ref1->parent != ref2->parent) |
@@ -296,16 +485,17 @@ static int __merge_refs(struct list_head *head, int mode) | |||
296 | * smaller or equal that seq to the list | 485 | * smaller or equal that seq to the list |
297 | */ | 486 | */ |
298 | static int __add_delayed_refs(struct btrfs_delayed_ref_head *head, u64 seq, | 487 | static int __add_delayed_refs(struct btrfs_delayed_ref_head *head, u64 seq, |
299 | struct btrfs_key *info_key, | ||
300 | struct list_head *prefs) | 488 | struct list_head *prefs) |
301 | { | 489 | { |
302 | struct btrfs_delayed_extent_op *extent_op = head->extent_op; | 490 | struct btrfs_delayed_extent_op *extent_op = head->extent_op; |
303 | struct rb_node *n = &head->node.rb_node; | 491 | struct rb_node *n = &head->node.rb_node; |
492 | struct btrfs_key key; | ||
493 | struct btrfs_key op_key = {0}; | ||
304 | int sgn; | 494 | int sgn; |
305 | int ret = 0; | 495 | int ret = 0; |
306 | 496 | ||
307 | if (extent_op && extent_op->update_key) | 497 | if (extent_op && extent_op->update_key) |
308 | btrfs_disk_key_to_cpu(info_key, &extent_op->key); | 498 | btrfs_disk_key_to_cpu(&op_key, &extent_op->key); |
309 | 499 | ||
310 | while ((n = rb_prev(n))) { | 500 | while ((n = rb_prev(n))) { |
311 | struct btrfs_delayed_ref_node *node; | 501 | struct btrfs_delayed_ref_node *node; |
@@ -337,7 +527,7 @@ static int __add_delayed_refs(struct btrfs_delayed_ref_head *head, u64 seq, | |||
337 | struct btrfs_delayed_tree_ref *ref; | 527 | struct btrfs_delayed_tree_ref *ref; |
338 | 528 | ||
339 | ref = btrfs_delayed_node_to_tree_ref(node); | 529 | ref = btrfs_delayed_node_to_tree_ref(node); |
340 | ret = __add_prelim_ref(prefs, ref->root, info_key, | 530 | ret = __add_prelim_ref(prefs, ref->root, &op_key, |
341 | ref->level + 1, 0, node->bytenr, | 531 | ref->level + 1, 0, node->bytenr, |
342 | node->ref_mod * sgn); | 532 | node->ref_mod * sgn); |
343 | break; | 533 | break; |
@@ -346,7 +536,7 @@ static int __add_delayed_refs(struct btrfs_delayed_ref_head *head, u64 seq, | |||
346 | struct btrfs_delayed_tree_ref *ref; | 536 | struct btrfs_delayed_tree_ref *ref; |
347 | 537 | ||
348 | ref = btrfs_delayed_node_to_tree_ref(node); | 538 | ref = btrfs_delayed_node_to_tree_ref(node); |
349 | ret = __add_prelim_ref(prefs, ref->root, info_key, | 539 | ret = __add_prelim_ref(prefs, ref->root, NULL, |
350 | ref->level + 1, ref->parent, | 540 | ref->level + 1, ref->parent, |
351 | node->bytenr, | 541 | node->bytenr, |
352 | node->ref_mod * sgn); | 542 | node->ref_mod * sgn); |
@@ -354,8 +544,6 @@ static int __add_delayed_refs(struct btrfs_delayed_ref_head *head, u64 seq, | |||
354 | } | 544 | } |
355 | case BTRFS_EXTENT_DATA_REF_KEY: { | 545 | case BTRFS_EXTENT_DATA_REF_KEY: { |
356 | struct btrfs_delayed_data_ref *ref; | 546 | struct btrfs_delayed_data_ref *ref; |
357 | struct btrfs_key key; | ||
358 | |||
359 | ref = btrfs_delayed_node_to_data_ref(node); | 547 | ref = btrfs_delayed_node_to_data_ref(node); |
360 | 548 | ||
361 | key.objectid = ref->objectid; | 549 | key.objectid = ref->objectid; |
@@ -368,7 +556,6 @@ static int __add_delayed_refs(struct btrfs_delayed_ref_head *head, u64 seq, | |||
368 | } | 556 | } |
369 | case BTRFS_SHARED_DATA_REF_KEY: { | 557 | case BTRFS_SHARED_DATA_REF_KEY: { |
370 | struct btrfs_delayed_data_ref *ref; | 558 | struct btrfs_delayed_data_ref *ref; |
371 | struct btrfs_key key; | ||
372 | 559 | ||
373 | ref = btrfs_delayed_node_to_data_ref(node); | 560 | ref = btrfs_delayed_node_to_data_ref(node); |
374 | 561 | ||
@@ -394,8 +581,7 @@ static int __add_delayed_refs(struct btrfs_delayed_ref_head *head, u64 seq, | |||
394 | */ | 581 | */ |
395 | static int __add_inline_refs(struct btrfs_fs_info *fs_info, | 582 | static int __add_inline_refs(struct btrfs_fs_info *fs_info, |
396 | struct btrfs_path *path, u64 bytenr, | 583 | struct btrfs_path *path, u64 bytenr, |
397 | struct btrfs_key *info_key, int *info_level, | 584 | int *info_level, struct list_head *prefs) |
398 | struct list_head *prefs) | ||
399 | { | 585 | { |
400 | int ret = 0; | 586 | int ret = 0; |
401 | int slot; | 587 | int slot; |
@@ -411,7 +597,7 @@ static int __add_inline_refs(struct btrfs_fs_info *fs_info, | |||
411 | * enumerate all inline refs | 597 | * enumerate all inline refs |
412 | */ | 598 | */ |
413 | leaf = path->nodes[0]; | 599 | leaf = path->nodes[0]; |
414 | slot = path->slots[0] - 1; | 600 | slot = path->slots[0]; |
415 | 601 | ||
416 | item_size = btrfs_item_size_nr(leaf, slot); | 602 | item_size = btrfs_item_size_nr(leaf, slot); |
417 | BUG_ON(item_size < sizeof(*ei)); | 603 | BUG_ON(item_size < sizeof(*ei)); |
@@ -424,12 +610,9 @@ static int __add_inline_refs(struct btrfs_fs_info *fs_info, | |||
424 | 610 | ||
425 | if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) { | 611 | if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) { |
426 | struct btrfs_tree_block_info *info; | 612 | struct btrfs_tree_block_info *info; |
427 | struct btrfs_disk_key disk_key; | ||
428 | 613 | ||
429 | info = (struct btrfs_tree_block_info *)ptr; | 614 | info = (struct btrfs_tree_block_info *)ptr; |
430 | *info_level = btrfs_tree_block_level(leaf, info); | 615 | *info_level = btrfs_tree_block_level(leaf, info); |
431 | btrfs_tree_block_key(leaf, info, &disk_key); | ||
432 | btrfs_disk_key_to_cpu(info_key, &disk_key); | ||
433 | ptr += sizeof(struct btrfs_tree_block_info); | 616 | ptr += sizeof(struct btrfs_tree_block_info); |
434 | BUG_ON(ptr > end); | 617 | BUG_ON(ptr > end); |
435 | } else { | 618 | } else { |
@@ -447,7 +630,7 @@ static int __add_inline_refs(struct btrfs_fs_info *fs_info, | |||
447 | 630 | ||
448 | switch (type) { | 631 | switch (type) { |
449 | case BTRFS_SHARED_BLOCK_REF_KEY: | 632 | case BTRFS_SHARED_BLOCK_REF_KEY: |
450 | ret = __add_prelim_ref(prefs, 0, info_key, | 633 | ret = __add_prelim_ref(prefs, 0, NULL, |
451 | *info_level + 1, offset, | 634 | *info_level + 1, offset, |
452 | bytenr, 1); | 635 | bytenr, 1); |
453 | break; | 636 | break; |
@@ -462,8 +645,9 @@ static int __add_inline_refs(struct btrfs_fs_info *fs_info, | |||
462 | break; | 645 | break; |
463 | } | 646 | } |
464 | case BTRFS_TREE_BLOCK_REF_KEY: | 647 | case BTRFS_TREE_BLOCK_REF_KEY: |
465 | ret = __add_prelim_ref(prefs, offset, info_key, | 648 | ret = __add_prelim_ref(prefs, offset, NULL, |
466 | *info_level + 1, 0, bytenr, 1); | 649 | *info_level + 1, 0, |
650 | bytenr, 1); | ||
467 | break; | 651 | break; |
468 | case BTRFS_EXTENT_DATA_REF_KEY: { | 652 | case BTRFS_EXTENT_DATA_REF_KEY: { |
469 | struct btrfs_extent_data_ref *dref; | 653 | struct btrfs_extent_data_ref *dref; |
@@ -477,8 +661,8 @@ static int __add_inline_refs(struct btrfs_fs_info *fs_info, | |||
477 | key.type = BTRFS_EXTENT_DATA_KEY; | 661 | key.type = BTRFS_EXTENT_DATA_KEY; |
478 | key.offset = btrfs_extent_data_ref_offset(leaf, dref); | 662 | key.offset = btrfs_extent_data_ref_offset(leaf, dref); |
479 | root = btrfs_extent_data_ref_root(leaf, dref); | 663 | root = btrfs_extent_data_ref_root(leaf, dref); |
480 | ret = __add_prelim_ref(prefs, root, &key, 0, 0, bytenr, | 664 | ret = __add_prelim_ref(prefs, root, &key, 0, 0, |
481 | count); | 665 | bytenr, count); |
482 | break; | 666 | break; |
483 | } | 667 | } |
484 | default: | 668 | default: |
@@ -496,8 +680,7 @@ static int __add_inline_refs(struct btrfs_fs_info *fs_info, | |||
496 | */ | 680 | */ |
497 | static int __add_keyed_refs(struct btrfs_fs_info *fs_info, | 681 | static int __add_keyed_refs(struct btrfs_fs_info *fs_info, |
498 | struct btrfs_path *path, u64 bytenr, | 682 | struct btrfs_path *path, u64 bytenr, |
499 | struct btrfs_key *info_key, int info_level, | 683 | int info_level, struct list_head *prefs) |
500 | struct list_head *prefs) | ||
501 | { | 684 | { |
502 | struct btrfs_root *extent_root = fs_info->extent_root; | 685 | struct btrfs_root *extent_root = fs_info->extent_root; |
503 | int ret; | 686 | int ret; |
@@ -527,7 +710,7 @@ static int __add_keyed_refs(struct btrfs_fs_info *fs_info, | |||
527 | 710 | ||
528 | switch (key.type) { | 711 | switch (key.type) { |
529 | case BTRFS_SHARED_BLOCK_REF_KEY: | 712 | case BTRFS_SHARED_BLOCK_REF_KEY: |
530 | ret = __add_prelim_ref(prefs, 0, info_key, | 713 | ret = __add_prelim_ref(prefs, 0, NULL, |
531 | info_level + 1, key.offset, | 714 | info_level + 1, key.offset, |
532 | bytenr, 1); | 715 | bytenr, 1); |
533 | break; | 716 | break; |
@@ -543,8 +726,9 @@ static int __add_keyed_refs(struct btrfs_fs_info *fs_info, | |||
543 | break; | 726 | break; |
544 | } | 727 | } |
545 | case BTRFS_TREE_BLOCK_REF_KEY: | 728 | case BTRFS_TREE_BLOCK_REF_KEY: |
546 | ret = __add_prelim_ref(prefs, key.offset, info_key, | 729 | ret = __add_prelim_ref(prefs, key.offset, NULL, |
547 | info_level + 1, 0, bytenr, 1); | 730 | info_level + 1, 0, |
731 | bytenr, 1); | ||
548 | break; | 732 | break; |
549 | case BTRFS_EXTENT_DATA_REF_KEY: { | 733 | case BTRFS_EXTENT_DATA_REF_KEY: { |
550 | struct btrfs_extent_data_ref *dref; | 734 | struct btrfs_extent_data_ref *dref; |
@@ -560,7 +744,7 @@ static int __add_keyed_refs(struct btrfs_fs_info *fs_info, | |||
560 | key.offset = btrfs_extent_data_ref_offset(leaf, dref); | 744 | key.offset = btrfs_extent_data_ref_offset(leaf, dref); |
561 | root = btrfs_extent_data_ref_root(leaf, dref); | 745 | root = btrfs_extent_data_ref_root(leaf, dref); |
562 | ret = __add_prelim_ref(prefs, root, &key, 0, 0, | 746 | ret = __add_prelim_ref(prefs, root, &key, 0, 0, |
563 | bytenr, count); | 747 | bytenr, count); |
564 | break; | 748 | break; |
565 | } | 749 | } |
566 | default: | 750 | default: |
@@ -582,11 +766,12 @@ static int __add_keyed_refs(struct btrfs_fs_info *fs_info, | |||
582 | */ | 766 | */ |
583 | static int find_parent_nodes(struct btrfs_trans_handle *trans, | 767 | static int find_parent_nodes(struct btrfs_trans_handle *trans, |
584 | struct btrfs_fs_info *fs_info, u64 bytenr, | 768 | struct btrfs_fs_info *fs_info, u64 bytenr, |
585 | u64 seq, struct ulist *refs, struct ulist *roots) | 769 | u64 delayed_ref_seq, u64 time_seq, |
770 | struct ulist *refs, struct ulist *roots, | ||
771 | const u64 *extent_item_pos) | ||
586 | { | 772 | { |
587 | struct btrfs_key key; | 773 | struct btrfs_key key; |
588 | struct btrfs_path *path; | 774 | struct btrfs_path *path; |
589 | struct btrfs_key info_key = { 0 }; | ||
590 | struct btrfs_delayed_ref_root *delayed_refs = NULL; | 775 | struct btrfs_delayed_ref_root *delayed_refs = NULL; |
591 | struct btrfs_delayed_ref_head *head; | 776 | struct btrfs_delayed_ref_head *head; |
592 | int info_level = 0; | 777 | int info_level = 0; |
@@ -645,7 +830,7 @@ again: | |||
645 | btrfs_put_delayed_ref(&head->node); | 830 | btrfs_put_delayed_ref(&head->node); |
646 | goto again; | 831 | goto again; |
647 | } | 832 | } |
648 | ret = __add_delayed_refs(head, seq, &info_key, | 833 | ret = __add_delayed_refs(head, delayed_ref_seq, |
649 | &prefs_delayed); | 834 | &prefs_delayed); |
650 | if (ret) { | 835 | if (ret) { |
651 | spin_unlock(&delayed_refs->lock); | 836 | spin_unlock(&delayed_refs->lock); |
@@ -659,16 +844,17 @@ again: | |||
659 | struct extent_buffer *leaf; | 844 | struct extent_buffer *leaf; |
660 | int slot; | 845 | int slot; |
661 | 846 | ||
847 | path->slots[0]--; | ||
662 | leaf = path->nodes[0]; | 848 | leaf = path->nodes[0]; |
663 | slot = path->slots[0] - 1; | 849 | slot = path->slots[0]; |
664 | btrfs_item_key_to_cpu(leaf, &key, slot); | 850 | btrfs_item_key_to_cpu(leaf, &key, slot); |
665 | if (key.objectid == bytenr && | 851 | if (key.objectid == bytenr && |
666 | key.type == BTRFS_EXTENT_ITEM_KEY) { | 852 | key.type == BTRFS_EXTENT_ITEM_KEY) { |
667 | ret = __add_inline_refs(fs_info, path, bytenr, | 853 | ret = __add_inline_refs(fs_info, path, bytenr, |
668 | &info_key, &info_level, &prefs); | 854 | &info_level, &prefs); |
669 | if (ret) | 855 | if (ret) |
670 | goto out; | 856 | goto out; |
671 | ret = __add_keyed_refs(fs_info, path, bytenr, &info_key, | 857 | ret = __add_keyed_refs(fs_info, path, bytenr, |
672 | info_level, &prefs); | 858 | info_level, &prefs); |
673 | if (ret) | 859 | if (ret) |
674 | goto out; | 860 | goto out; |
@@ -676,21 +862,18 @@ again: | |||
676 | } | 862 | } |
677 | btrfs_release_path(path); | 863 | btrfs_release_path(path); |
678 | 864 | ||
679 | /* | ||
680 | * when adding the delayed refs above, the info_key might not have | ||
681 | * been known yet. Go over the list and replace the missing keys | ||
682 | */ | ||
683 | list_for_each_entry(ref, &prefs_delayed, list) { | ||
684 | if ((ref->key.offset | ref->key.type | ref->key.objectid) == 0) | ||
685 | memcpy(&ref->key, &info_key, sizeof(ref->key)); | ||
686 | } | ||
687 | list_splice_init(&prefs_delayed, &prefs); | 865 | list_splice_init(&prefs_delayed, &prefs); |
688 | 866 | ||
867 | ret = __add_missing_keys(fs_info, &prefs); | ||
868 | if (ret) | ||
869 | goto out; | ||
870 | |||
689 | ret = __merge_refs(&prefs, 1); | 871 | ret = __merge_refs(&prefs, 1); |
690 | if (ret) | 872 | if (ret) |
691 | goto out; | 873 | goto out; |
692 | 874 | ||
693 | ret = __resolve_indirect_refs(fs_info, search_commit_root, &prefs); | 875 | ret = __resolve_indirect_refs(fs_info, search_commit_root, time_seq, |
876 | &prefs, extent_item_pos); | ||
694 | if (ret) | 877 | if (ret) |
695 | goto out; | 878 | goto out; |
696 | 879 | ||
@@ -709,7 +892,33 @@ again: | |||
709 | BUG_ON(ret < 0); | 892 | BUG_ON(ret < 0); |
710 | } | 893 | } |
711 | if (ref->count && ref->parent) { | 894 | if (ref->count && ref->parent) { |
712 | ret = ulist_add(refs, ref->parent, 0, GFP_NOFS); | 895 | struct extent_inode_elem *eie = NULL; |
896 | if (extent_item_pos && !ref->inode_list) { | ||
897 | u32 bsz; | ||
898 | struct extent_buffer *eb; | ||
899 | bsz = btrfs_level_size(fs_info->extent_root, | ||
900 | info_level); | ||
901 | eb = read_tree_block(fs_info->extent_root, | ||
902 | ref->parent, bsz, 0); | ||
903 | BUG_ON(!eb); | ||
904 | ret = find_extent_in_eb(eb, bytenr, | ||
905 | *extent_item_pos, &eie); | ||
906 | ref->inode_list = eie; | ||
907 | free_extent_buffer(eb); | ||
908 | } | ||
909 | ret = ulist_add_merge(refs, ref->parent, | ||
910 | (unsigned long)ref->inode_list, | ||
911 | (unsigned long *)&eie, GFP_NOFS); | ||
912 | if (!ret && extent_item_pos) { | ||
913 | /* | ||
914 | * we've recorded that parent, so we must extend | ||
915 | * its inode list here | ||
916 | */ | ||
917 | BUG_ON(!eie); | ||
918 | while (eie->next) | ||
919 | eie = eie->next; | ||
920 | eie->next = ref->inode_list; | ||
921 | } | ||
713 | BUG_ON(ret < 0); | 922 | BUG_ON(ret < 0); |
714 | } | 923 | } |
715 | kfree(ref); | 924 | kfree(ref); |
@@ -734,6 +943,28 @@ out: | |||
734 | return ret; | 943 | return ret; |
735 | } | 944 | } |
736 | 945 | ||
946 | static void free_leaf_list(struct ulist *blocks) | ||
947 | { | ||
948 | struct ulist_node *node = NULL; | ||
949 | struct extent_inode_elem *eie; | ||
950 | struct extent_inode_elem *eie_next; | ||
951 | struct ulist_iterator uiter; | ||
952 | |||
953 | ULIST_ITER_INIT(&uiter); | ||
954 | while ((node = ulist_next(blocks, &uiter))) { | ||
955 | if (!node->aux) | ||
956 | continue; | ||
957 | eie = (struct extent_inode_elem *)node->aux; | ||
958 | for (; eie; eie = eie_next) { | ||
959 | eie_next = eie->next; | ||
960 | kfree(eie); | ||
961 | } | ||
962 | node->aux = 0; | ||
963 | } | ||
964 | |||
965 | ulist_free(blocks); | ||
966 | } | ||
967 | |||
737 | /* | 968 | /* |
738 | * Finds all leafs with a reference to the specified combination of bytenr and | 969 | * Finds all leafs with a reference to the specified combination of bytenr and |
739 | * offset. key_list_head will point to a list of corresponding keys (caller must | 970 | * offset. key_list_head will point to a list of corresponding keys (caller must |
@@ -744,7 +975,9 @@ out: | |||
744 | */ | 975 | */ |
745 | static int btrfs_find_all_leafs(struct btrfs_trans_handle *trans, | 976 | static int btrfs_find_all_leafs(struct btrfs_trans_handle *trans, |
746 | struct btrfs_fs_info *fs_info, u64 bytenr, | 977 | struct btrfs_fs_info *fs_info, u64 bytenr, |
747 | u64 num_bytes, u64 seq, struct ulist **leafs) | 978 | u64 delayed_ref_seq, u64 time_seq, |
979 | struct ulist **leafs, | ||
980 | const u64 *extent_item_pos) | ||
748 | { | 981 | { |
749 | struct ulist *tmp; | 982 | struct ulist *tmp; |
750 | int ret; | 983 | int ret; |
@@ -758,11 +991,12 @@ static int btrfs_find_all_leafs(struct btrfs_trans_handle *trans, | |||
758 | return -ENOMEM; | 991 | return -ENOMEM; |
759 | } | 992 | } |
760 | 993 | ||
761 | ret = find_parent_nodes(trans, fs_info, bytenr, seq, *leafs, tmp); | 994 | ret = find_parent_nodes(trans, fs_info, bytenr, delayed_ref_seq, |
995 | time_seq, *leafs, tmp, extent_item_pos); | ||
762 | ulist_free(tmp); | 996 | ulist_free(tmp); |
763 | 997 | ||
764 | if (ret < 0 && ret != -ENOENT) { | 998 | if (ret < 0 && ret != -ENOENT) { |
765 | ulist_free(*leafs); | 999 | free_leaf_list(*leafs); |
766 | return ret; | 1000 | return ret; |
767 | } | 1001 | } |
768 | 1002 | ||
@@ -784,10 +1018,12 @@ static int btrfs_find_all_leafs(struct btrfs_trans_handle *trans, | |||
784 | */ | 1018 | */ |
785 | int btrfs_find_all_roots(struct btrfs_trans_handle *trans, | 1019 | int btrfs_find_all_roots(struct btrfs_trans_handle *trans, |
786 | struct btrfs_fs_info *fs_info, u64 bytenr, | 1020 | struct btrfs_fs_info *fs_info, u64 bytenr, |
787 | u64 num_bytes, u64 seq, struct ulist **roots) | 1021 | u64 delayed_ref_seq, u64 time_seq, |
1022 | struct ulist **roots) | ||
788 | { | 1023 | { |
789 | struct ulist *tmp; | 1024 | struct ulist *tmp; |
790 | struct ulist_node *node = NULL; | 1025 | struct ulist_node *node = NULL; |
1026 | struct ulist_iterator uiter; | ||
791 | int ret; | 1027 | int ret; |
792 | 1028 | ||
793 | tmp = ulist_alloc(GFP_NOFS); | 1029 | tmp = ulist_alloc(GFP_NOFS); |
@@ -799,15 +1035,16 @@ int btrfs_find_all_roots(struct btrfs_trans_handle *trans, | |||
799 | return -ENOMEM; | 1035 | return -ENOMEM; |
800 | } | 1036 | } |
801 | 1037 | ||
1038 | ULIST_ITER_INIT(&uiter); | ||
802 | while (1) { | 1039 | while (1) { |
803 | ret = find_parent_nodes(trans, fs_info, bytenr, seq, | 1040 | ret = find_parent_nodes(trans, fs_info, bytenr, delayed_ref_seq, |
804 | tmp, *roots); | 1041 | time_seq, tmp, *roots, NULL); |
805 | if (ret < 0 && ret != -ENOENT) { | 1042 | if (ret < 0 && ret != -ENOENT) { |
806 | ulist_free(tmp); | 1043 | ulist_free(tmp); |
807 | ulist_free(*roots); | 1044 | ulist_free(*roots); |
808 | return ret; | 1045 | return ret; |
809 | } | 1046 | } |
810 | node = ulist_next(tmp, node); | 1047 | node = ulist_next(tmp, &uiter); |
811 | if (!node) | 1048 | if (!node) |
812 | break; | 1049 | break; |
813 | bytenr = node->val; | 1050 | bytenr = node->val; |
@@ -1093,67 +1330,25 @@ int tree_backref_for_extent(unsigned long *ptr, struct extent_buffer *eb, | |||
1093 | return 0; | 1330 | return 0; |
1094 | } | 1331 | } |
1095 | 1332 | ||
1096 | static int iterate_leaf_refs(struct btrfs_fs_info *fs_info, u64 logical, | 1333 | static int iterate_leaf_refs(struct extent_inode_elem *inode_list, |
1097 | u64 orig_extent_item_objectid, | 1334 | u64 root, u64 extent_item_objectid, |
1098 | u64 extent_item_pos, u64 root, | ||
1099 | iterate_extent_inodes_t *iterate, void *ctx) | 1335 | iterate_extent_inodes_t *iterate, void *ctx) |
1100 | { | 1336 | { |
1101 | u64 disk_byte; | 1337 | struct extent_inode_elem *eie; |
1102 | struct btrfs_key key; | ||
1103 | struct btrfs_file_extent_item *fi; | ||
1104 | struct extent_buffer *eb; | ||
1105 | int slot; | ||
1106 | int nritems; | ||
1107 | int ret = 0; | 1338 | int ret = 0; |
1108 | int extent_type; | ||
1109 | u64 data_offset; | ||
1110 | u64 data_len; | ||
1111 | |||
1112 | eb = read_tree_block(fs_info->tree_root, logical, | ||
1113 | fs_info->tree_root->leafsize, 0); | ||
1114 | if (!eb) | ||
1115 | return -EIO; | ||
1116 | |||
1117 | /* | ||
1118 | * from the shared data ref, we only have the leaf but we need | ||
1119 | * the key. thus, we must look into all items and see that we | ||
1120 | * find one (some) with a reference to our extent item. | ||
1121 | */ | ||
1122 | nritems = btrfs_header_nritems(eb); | ||
1123 | for (slot = 0; slot < nritems; ++slot) { | ||
1124 | btrfs_item_key_to_cpu(eb, &key, slot); | ||
1125 | if (key.type != BTRFS_EXTENT_DATA_KEY) | ||
1126 | continue; | ||
1127 | fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item); | ||
1128 | extent_type = btrfs_file_extent_type(eb, fi); | ||
1129 | if (extent_type == BTRFS_FILE_EXTENT_INLINE) | ||
1130 | continue; | ||
1131 | /* don't skip BTRFS_FILE_EXTENT_PREALLOC, we can handle that */ | ||
1132 | disk_byte = btrfs_file_extent_disk_bytenr(eb, fi); | ||
1133 | if (disk_byte != orig_extent_item_objectid) | ||
1134 | continue; | ||
1135 | |||
1136 | data_offset = btrfs_file_extent_offset(eb, fi); | ||
1137 | data_len = btrfs_file_extent_num_bytes(eb, fi); | ||
1138 | |||
1139 | if (extent_item_pos < data_offset || | ||
1140 | extent_item_pos >= data_offset + data_len) | ||
1141 | continue; | ||
1142 | 1339 | ||
1340 | for (eie = inode_list; eie; eie = eie->next) { | ||
1143 | pr_debug("ref for %llu resolved, key (%llu EXTEND_DATA %llu), " | 1341 | pr_debug("ref for %llu resolved, key (%llu EXTEND_DATA %llu), " |
1144 | "root %llu\n", orig_extent_item_objectid, | 1342 | "root %llu\n", extent_item_objectid, |
1145 | key.objectid, key.offset, root); | 1343 | eie->inum, eie->offset, root); |
1146 | ret = iterate(key.objectid, | 1344 | ret = iterate(eie->inum, eie->offset, root, ctx); |
1147 | key.offset + (extent_item_pos - data_offset), | ||
1148 | root, ctx); | ||
1149 | if (ret) { | 1345 | if (ret) { |
1150 | pr_debug("stopping iteration because ret=%d\n", ret); | 1346 | pr_debug("stopping iteration for %llu due to ret=%d\n", |
1347 | extent_item_objectid, ret); | ||
1151 | break; | 1348 | break; |
1152 | } | 1349 | } |
1153 | } | 1350 | } |
1154 | 1351 | ||
1155 | free_extent_buffer(eb); | ||
1156 | |||
1157 | return ret; | 1352 | return ret; |
1158 | } | 1353 | } |
1159 | 1354 | ||
@@ -1175,7 +1370,10 @@ int iterate_extent_inodes(struct btrfs_fs_info *fs_info, | |||
1175 | struct ulist *roots = NULL; | 1370 | struct ulist *roots = NULL; |
1176 | struct ulist_node *ref_node = NULL; | 1371 | struct ulist_node *ref_node = NULL; |
1177 | struct ulist_node *root_node = NULL; | 1372 | struct ulist_node *root_node = NULL; |
1178 | struct seq_list seq_elem; | 1373 | struct seq_list seq_elem = {}; |
1374 | struct seq_list tree_mod_seq_elem = {}; | ||
1375 | struct ulist_iterator ref_uiter; | ||
1376 | struct ulist_iterator root_uiter; | ||
1179 | struct btrfs_delayed_ref_root *delayed_refs = NULL; | 1377 | struct btrfs_delayed_ref_root *delayed_refs = NULL; |
1180 | 1378 | ||
1181 | pr_debug("resolving all inodes for extent %llu\n", | 1379 | pr_debug("resolving all inodes for extent %llu\n", |
@@ -1192,34 +1390,41 @@ int iterate_extent_inodes(struct btrfs_fs_info *fs_info, | |||
1192 | spin_lock(&delayed_refs->lock); | 1390 | spin_lock(&delayed_refs->lock); |
1193 | btrfs_get_delayed_seq(delayed_refs, &seq_elem); | 1391 | btrfs_get_delayed_seq(delayed_refs, &seq_elem); |
1194 | spin_unlock(&delayed_refs->lock); | 1392 | spin_unlock(&delayed_refs->lock); |
1393 | btrfs_get_tree_mod_seq(fs_info, &tree_mod_seq_elem); | ||
1195 | } | 1394 | } |
1196 | 1395 | ||
1197 | ret = btrfs_find_all_leafs(trans, fs_info, extent_item_objectid, | 1396 | ret = btrfs_find_all_leafs(trans, fs_info, extent_item_objectid, |
1198 | extent_item_pos, seq_elem.seq, | 1397 | seq_elem.seq, tree_mod_seq_elem.seq, &refs, |
1199 | &refs); | 1398 | &extent_item_pos); |
1200 | |||
1201 | if (ret) | 1399 | if (ret) |
1202 | goto out; | 1400 | goto out; |
1203 | 1401 | ||
1204 | while (!ret && (ref_node = ulist_next(refs, ref_node))) { | 1402 | ULIST_ITER_INIT(&ref_uiter); |
1205 | ret = btrfs_find_all_roots(trans, fs_info, ref_node->val, -1, | 1403 | while (!ret && (ref_node = ulist_next(refs, &ref_uiter))) { |
1206 | seq_elem.seq, &roots); | 1404 | ret = btrfs_find_all_roots(trans, fs_info, ref_node->val, |
1405 | seq_elem.seq, | ||
1406 | tree_mod_seq_elem.seq, &roots); | ||
1207 | if (ret) | 1407 | if (ret) |
1208 | break; | 1408 | break; |
1209 | while (!ret && (root_node = ulist_next(roots, root_node))) { | 1409 | ULIST_ITER_INIT(&root_uiter); |
1210 | pr_debug("root %llu references leaf %llu\n", | 1410 | while (!ret && (root_node = ulist_next(roots, &root_uiter))) { |
1211 | root_node->val, ref_node->val); | 1411 | pr_debug("root %llu references leaf %llu, data list " |
1212 | ret = iterate_leaf_refs(fs_info, ref_node->val, | 1412 | "%#lx\n", root_node->val, ref_node->val, |
1213 | extent_item_objectid, | 1413 | ref_node->aux); |
1214 | extent_item_pos, root_node->val, | 1414 | ret = iterate_leaf_refs( |
1215 | iterate, ctx); | 1415 | (struct extent_inode_elem *)ref_node->aux, |
1416 | root_node->val, extent_item_objectid, | ||
1417 | iterate, ctx); | ||
1216 | } | 1418 | } |
1419 | ulist_free(roots); | ||
1420 | roots = NULL; | ||
1217 | } | 1421 | } |
1218 | 1422 | ||
1219 | ulist_free(refs); | 1423 | free_leaf_list(refs); |
1220 | ulist_free(roots); | 1424 | ulist_free(roots); |
1221 | out: | 1425 | out: |
1222 | if (!search_commit_root) { | 1426 | if (!search_commit_root) { |
1427 | btrfs_put_tree_mod_seq(fs_info, &tree_mod_seq_elem); | ||
1223 | btrfs_put_delayed_seq(delayed_refs, &seq_elem); | 1428 | btrfs_put_delayed_seq(delayed_refs, &seq_elem); |
1224 | btrfs_end_transaction(trans, fs_info->extent_root); | 1429 | btrfs_end_transaction(trans, fs_info->extent_root); |
1225 | } | 1430 | } |
diff --git a/fs/btrfs/backref.h b/fs/btrfs/backref.h index 57ea2e959e4d..c18d8ac7b795 100644 --- a/fs/btrfs/backref.h +++ b/fs/btrfs/backref.h | |||
@@ -58,7 +58,8 @@ int paths_from_inode(u64 inum, struct inode_fs_paths *ipath); | |||
58 | 58 | ||
59 | int btrfs_find_all_roots(struct btrfs_trans_handle *trans, | 59 | int btrfs_find_all_roots(struct btrfs_trans_handle *trans, |
60 | struct btrfs_fs_info *fs_info, u64 bytenr, | 60 | struct btrfs_fs_info *fs_info, u64 bytenr, |
61 | u64 num_bytes, u64 seq, struct ulist **roots); | 61 | u64 delayed_ref_seq, u64 time_seq, |
62 | struct ulist **roots); | ||
62 | 63 | ||
63 | struct btrfs_data_container *init_data_container(u32 total_bytes); | 64 | struct btrfs_data_container *init_data_container(u32 total_bytes); |
64 | struct inode_fs_paths *init_ipath(s32 total_bytes, struct btrfs_root *fs_root, | 65 | struct inode_fs_paths *init_ipath(s32 total_bytes, struct btrfs_root *fs_root, |
diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h index 9b9b15fd5204..e616f8872e69 100644 --- a/fs/btrfs/btrfs_inode.h +++ b/fs/btrfs/btrfs_inode.h | |||
@@ -24,6 +24,20 @@ | |||
24 | #include "ordered-data.h" | 24 | #include "ordered-data.h" |
25 | #include "delayed-inode.h" | 25 | #include "delayed-inode.h" |
26 | 26 | ||
27 | /* | ||
28 | * ordered_data_close is set by truncate when a file that used | ||
29 | * to have good data has been truncated to zero. When it is set | ||
30 | * the btrfs file release call will add this inode to the | ||
31 | * ordered operations list so that we make sure to flush out any | ||
32 | * new data the application may have written before commit. | ||
33 | */ | ||
34 | #define BTRFS_INODE_ORDERED_DATA_CLOSE 0 | ||
35 | #define BTRFS_INODE_ORPHAN_META_RESERVED 1 | ||
36 | #define BTRFS_INODE_DUMMY 2 | ||
37 | #define BTRFS_INODE_IN_DEFRAG 3 | ||
38 | #define BTRFS_INODE_DELALLOC_META_RESERVED 4 | ||
39 | #define BTRFS_INODE_HAS_ORPHAN_ITEM 5 | ||
40 | |||
27 | /* in memory btrfs inode */ | 41 | /* in memory btrfs inode */ |
28 | struct btrfs_inode { | 42 | struct btrfs_inode { |
29 | /* which subvolume this inode belongs to */ | 43 | /* which subvolume this inode belongs to */ |
@@ -57,9 +71,6 @@ struct btrfs_inode { | |||
57 | /* used to order data wrt metadata */ | 71 | /* used to order data wrt metadata */ |
58 | struct btrfs_ordered_inode_tree ordered_tree; | 72 | struct btrfs_ordered_inode_tree ordered_tree; |
59 | 73 | ||
60 | /* for keeping track of orphaned inodes */ | ||
61 | struct list_head i_orphan; | ||
62 | |||
63 | /* list of all the delalloc inodes in the FS. There are times we need | 74 | /* list of all the delalloc inodes in the FS. There are times we need |
64 | * to write all the delalloc pages to disk, and this list is used | 75 | * to write all the delalloc pages to disk, and this list is used |
65 | * to walk them all. | 76 | * to walk them all. |
@@ -78,14 +89,13 @@ struct btrfs_inode { | |||
78 | /* the space_info for where this inode's data allocations are done */ | 89 | /* the space_info for where this inode's data allocations are done */ |
79 | struct btrfs_space_info *space_info; | 90 | struct btrfs_space_info *space_info; |
80 | 91 | ||
92 | unsigned long runtime_flags; | ||
93 | |||
81 | /* full 64 bit generation number, struct vfs_inode doesn't have a big | 94 | /* full 64 bit generation number, struct vfs_inode doesn't have a big |
82 | * enough field for this. | 95 | * enough field for this. |
83 | */ | 96 | */ |
84 | u64 generation; | 97 | u64 generation; |
85 | 98 | ||
86 | /* sequence number for NFS changes */ | ||
87 | u64 sequence; | ||
88 | |||
89 | /* | 99 | /* |
90 | * transid of the trans_handle that last modified this inode | 100 | * transid of the trans_handle that last modified this inode |
91 | */ | 101 | */ |
@@ -145,22 +155,9 @@ struct btrfs_inode { | |||
145 | unsigned reserved_extents; | 155 | unsigned reserved_extents; |
146 | 156 | ||
147 | /* | 157 | /* |
148 | * ordered_data_close is set by truncate when a file that used | ||
149 | * to have good data has been truncated to zero. When it is set | ||
150 | * the btrfs file release call will add this inode to the | ||
151 | * ordered operations list so that we make sure to flush out any | ||
152 | * new data the application may have written before commit. | ||
153 | */ | ||
154 | unsigned ordered_data_close:1; | ||
155 | unsigned orphan_meta_reserved:1; | ||
156 | unsigned dummy_inode:1; | ||
157 | unsigned in_defrag:1; | ||
158 | unsigned delalloc_meta_reserved:1; | ||
159 | |||
160 | /* | ||
161 | * always compress this one file | 158 | * always compress this one file |
162 | */ | 159 | */ |
163 | unsigned force_compress:4; | 160 | unsigned force_compress; |
164 | 161 | ||
165 | struct btrfs_delayed_node *delayed_node; | 162 | struct btrfs_delayed_node *delayed_node; |
166 | 163 | ||
@@ -202,4 +199,17 @@ static inline bool btrfs_is_free_space_inode(struct btrfs_root *root, | |||
202 | return false; | 199 | return false; |
203 | } | 200 | } |
204 | 201 | ||
202 | static inline int btrfs_inode_in_log(struct inode *inode, u64 generation) | ||
203 | { | ||
204 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
205 | int ret = 0; | ||
206 | |||
207 | mutex_lock(&root->log_mutex); | ||
208 | if (BTRFS_I(inode)->logged_trans == generation && | ||
209 | BTRFS_I(inode)->last_sub_trans <= root->last_log_commit) | ||
210 | ret = 1; | ||
211 | mutex_unlock(&root->log_mutex); | ||
212 | return ret; | ||
213 | } | ||
214 | |||
205 | #endif | 215 | #endif |
diff --git a/fs/btrfs/check-integrity.c b/fs/btrfs/check-integrity.c index c053e90f2006..9cebb1fd6a3c 100644 --- a/fs/btrfs/check-integrity.c +++ b/fs/btrfs/check-integrity.c | |||
@@ -103,8 +103,6 @@ | |||
103 | #define BTRFSIC_BLOCK_STACK_FRAME_MAGIC_NUMBER 20111300 | 103 | #define BTRFSIC_BLOCK_STACK_FRAME_MAGIC_NUMBER 20111300 |
104 | #define BTRFSIC_TREE_DUMP_MAX_INDENT_LEVEL (200 - 6) /* in characters, | 104 | #define BTRFSIC_TREE_DUMP_MAX_INDENT_LEVEL (200 - 6) /* in characters, |
105 | * excluding " [...]" */ | 105 | * excluding " [...]" */ |
106 | #define BTRFSIC_BLOCK_SIZE PAGE_SIZE | ||
107 | |||
108 | #define BTRFSIC_GENERATION_UNKNOWN ((u64)-1) | 106 | #define BTRFSIC_GENERATION_UNKNOWN ((u64)-1) |
109 | 107 | ||
110 | /* | 108 | /* |
@@ -210,8 +208,9 @@ struct btrfsic_block_data_ctx { | |||
210 | u64 dev_bytenr; /* physical bytenr on device */ | 208 | u64 dev_bytenr; /* physical bytenr on device */ |
211 | u32 len; | 209 | u32 len; |
212 | struct btrfsic_dev_state *dev; | 210 | struct btrfsic_dev_state *dev; |
213 | char *data; | 211 | char **datav; |
214 | struct buffer_head *bh; /* do not use if set to NULL */ | 212 | struct page **pagev; |
213 | void *mem_to_free; | ||
215 | }; | 214 | }; |
216 | 215 | ||
217 | /* This structure is used to implement recursion without occupying | 216 | /* This structure is used to implement recursion without occupying |
@@ -243,6 +242,8 @@ struct btrfsic_state { | |||
243 | struct btrfs_root *root; | 242 | struct btrfs_root *root; |
244 | u64 max_superblock_generation; | 243 | u64 max_superblock_generation; |
245 | struct btrfsic_block *latest_superblock; | 244 | struct btrfsic_block *latest_superblock; |
245 | u32 metablock_size; | ||
246 | u32 datablock_size; | ||
246 | }; | 247 | }; |
247 | 248 | ||
248 | static void btrfsic_block_init(struct btrfsic_block *b); | 249 | static void btrfsic_block_init(struct btrfsic_block *b); |
@@ -290,8 +291,10 @@ static int btrfsic_process_superblock(struct btrfsic_state *state, | |||
290 | static int btrfsic_process_metablock(struct btrfsic_state *state, | 291 | static int btrfsic_process_metablock(struct btrfsic_state *state, |
291 | struct btrfsic_block *block, | 292 | struct btrfsic_block *block, |
292 | struct btrfsic_block_data_ctx *block_ctx, | 293 | struct btrfsic_block_data_ctx *block_ctx, |
293 | struct btrfs_header *hdr, | ||
294 | int limit_nesting, int force_iodone_flag); | 294 | int limit_nesting, int force_iodone_flag); |
295 | static void btrfsic_read_from_block_data( | ||
296 | struct btrfsic_block_data_ctx *block_ctx, | ||
297 | void *dst, u32 offset, size_t len); | ||
295 | static int btrfsic_create_link_to_next_block( | 298 | static int btrfsic_create_link_to_next_block( |
296 | struct btrfsic_state *state, | 299 | struct btrfsic_state *state, |
297 | struct btrfsic_block *block, | 300 | struct btrfsic_block *block, |
@@ -318,12 +321,13 @@ static void btrfsic_release_block_ctx(struct btrfsic_block_data_ctx *block_ctx); | |||
318 | static int btrfsic_read_block(struct btrfsic_state *state, | 321 | static int btrfsic_read_block(struct btrfsic_state *state, |
319 | struct btrfsic_block_data_ctx *block_ctx); | 322 | struct btrfsic_block_data_ctx *block_ctx); |
320 | static void btrfsic_dump_database(struct btrfsic_state *state); | 323 | static void btrfsic_dump_database(struct btrfsic_state *state); |
324 | static void btrfsic_complete_bio_end_io(struct bio *bio, int err); | ||
321 | static int btrfsic_test_for_metadata(struct btrfsic_state *state, | 325 | static int btrfsic_test_for_metadata(struct btrfsic_state *state, |
322 | const u8 *data, unsigned int size); | 326 | char **datav, unsigned int num_pages); |
323 | static void btrfsic_process_written_block(struct btrfsic_dev_state *dev_state, | 327 | static void btrfsic_process_written_block(struct btrfsic_dev_state *dev_state, |
324 | u64 dev_bytenr, u8 *mapped_data, | 328 | u64 dev_bytenr, char **mapped_datav, |
325 | unsigned int len, struct bio *bio, | 329 | unsigned int num_pages, |
326 | int *bio_is_patched, | 330 | struct bio *bio, int *bio_is_patched, |
327 | struct buffer_head *bh, | 331 | struct buffer_head *bh, |
328 | int submit_bio_bh_rw); | 332 | int submit_bio_bh_rw); |
329 | static int btrfsic_process_written_superblock( | 333 | static int btrfsic_process_written_superblock( |
@@ -375,7 +379,7 @@ static struct btrfsic_dev_state *btrfsic_dev_state_lookup( | |||
375 | static void btrfsic_cmp_log_and_dev_bytenr(struct btrfsic_state *state, | 379 | static void btrfsic_cmp_log_and_dev_bytenr(struct btrfsic_state *state, |
376 | u64 bytenr, | 380 | u64 bytenr, |
377 | struct btrfsic_dev_state *dev_state, | 381 | struct btrfsic_dev_state *dev_state, |
378 | u64 dev_bytenr, char *data); | 382 | u64 dev_bytenr); |
379 | 383 | ||
380 | static struct mutex btrfsic_mutex; | 384 | static struct mutex btrfsic_mutex; |
381 | static int btrfsic_is_initialized; | 385 | static int btrfsic_is_initialized; |
@@ -651,7 +655,7 @@ static int btrfsic_process_superblock(struct btrfsic_state *state, | |||
651 | int pass; | 655 | int pass; |
652 | 656 | ||
653 | BUG_ON(NULL == state); | 657 | BUG_ON(NULL == state); |
654 | selected_super = kmalloc(sizeof(*selected_super), GFP_NOFS); | 658 | selected_super = kzalloc(sizeof(*selected_super), GFP_NOFS); |
655 | if (NULL == selected_super) { | 659 | if (NULL == selected_super) { |
656 | printk(KERN_INFO "btrfsic: error, kmalloc failed!\n"); | 660 | printk(KERN_INFO "btrfsic: error, kmalloc failed!\n"); |
657 | return -1; | 661 | return -1; |
@@ -718,7 +722,7 @@ static int btrfsic_process_superblock(struct btrfsic_state *state, | |||
718 | 722 | ||
719 | num_copies = | 723 | num_copies = |
720 | btrfs_num_copies(&state->root->fs_info->mapping_tree, | 724 | btrfs_num_copies(&state->root->fs_info->mapping_tree, |
721 | next_bytenr, PAGE_SIZE); | 725 | next_bytenr, state->metablock_size); |
722 | if (state->print_mask & BTRFSIC_PRINT_MASK_NUM_COPIES) | 726 | if (state->print_mask & BTRFSIC_PRINT_MASK_NUM_COPIES) |
723 | printk(KERN_INFO "num_copies(log_bytenr=%llu) = %d\n", | 727 | printk(KERN_INFO "num_copies(log_bytenr=%llu) = %d\n", |
724 | (unsigned long long)next_bytenr, num_copies); | 728 | (unsigned long long)next_bytenr, num_copies); |
@@ -727,9 +731,9 @@ static int btrfsic_process_superblock(struct btrfsic_state *state, | |||
727 | struct btrfsic_block *next_block; | 731 | struct btrfsic_block *next_block; |
728 | struct btrfsic_block_data_ctx tmp_next_block_ctx; | 732 | struct btrfsic_block_data_ctx tmp_next_block_ctx; |
729 | struct btrfsic_block_link *l; | 733 | struct btrfsic_block_link *l; |
730 | struct btrfs_header *hdr; | ||
731 | 734 | ||
732 | ret = btrfsic_map_block(state, next_bytenr, PAGE_SIZE, | 735 | ret = btrfsic_map_block(state, next_bytenr, |
736 | state->metablock_size, | ||
733 | &tmp_next_block_ctx, | 737 | &tmp_next_block_ctx, |
734 | mirror_num); | 738 | mirror_num); |
735 | if (ret) { | 739 | if (ret) { |
@@ -758,7 +762,7 @@ static int btrfsic_process_superblock(struct btrfsic_state *state, | |||
758 | BUG_ON(NULL == l); | 762 | BUG_ON(NULL == l); |
759 | 763 | ||
760 | ret = btrfsic_read_block(state, &tmp_next_block_ctx); | 764 | ret = btrfsic_read_block(state, &tmp_next_block_ctx); |
761 | if (ret < (int)BTRFSIC_BLOCK_SIZE) { | 765 | if (ret < (int)PAGE_CACHE_SIZE) { |
762 | printk(KERN_INFO | 766 | printk(KERN_INFO |
763 | "btrfsic: read @logical %llu failed!\n", | 767 | "btrfsic: read @logical %llu failed!\n", |
764 | (unsigned long long) | 768 | (unsigned long long) |
@@ -768,11 +772,9 @@ static int btrfsic_process_superblock(struct btrfsic_state *state, | |||
768 | return -1; | 772 | return -1; |
769 | } | 773 | } |
770 | 774 | ||
771 | hdr = (struct btrfs_header *)tmp_next_block_ctx.data; | ||
772 | ret = btrfsic_process_metablock(state, | 775 | ret = btrfsic_process_metablock(state, |
773 | next_block, | 776 | next_block, |
774 | &tmp_next_block_ctx, | 777 | &tmp_next_block_ctx, |
775 | hdr, | ||
776 | BTRFS_MAX_LEVEL + 3, 1); | 778 | BTRFS_MAX_LEVEL + 3, 1); |
777 | btrfsic_release_block_ctx(&tmp_next_block_ctx); | 779 | btrfsic_release_block_ctx(&tmp_next_block_ctx); |
778 | } | 780 | } |
@@ -799,7 +801,10 @@ static int btrfsic_process_superblock_dev_mirror( | |||
799 | 801 | ||
800 | /* super block bytenr is always the unmapped device bytenr */ | 802 | /* super block bytenr is always the unmapped device bytenr */ |
801 | dev_bytenr = btrfs_sb_offset(superblock_mirror_num); | 803 | dev_bytenr = btrfs_sb_offset(superblock_mirror_num); |
802 | bh = __bread(superblock_bdev, dev_bytenr / 4096, 4096); | 804 | if (dev_bytenr + BTRFS_SUPER_INFO_SIZE > device->total_bytes) |
805 | return -1; | ||
806 | bh = __bread(superblock_bdev, dev_bytenr / 4096, | ||
807 | BTRFS_SUPER_INFO_SIZE); | ||
803 | if (NULL == bh) | 808 | if (NULL == bh) |
804 | return -1; | 809 | return -1; |
805 | super_tmp = (struct btrfs_super_block *) | 810 | super_tmp = (struct btrfs_super_block *) |
@@ -808,7 +813,10 @@ static int btrfsic_process_superblock_dev_mirror( | |||
808 | if (btrfs_super_bytenr(super_tmp) != dev_bytenr || | 813 | if (btrfs_super_bytenr(super_tmp) != dev_bytenr || |
809 | strncmp((char *)(&(super_tmp->magic)), BTRFS_MAGIC, | 814 | strncmp((char *)(&(super_tmp->magic)), BTRFS_MAGIC, |
810 | sizeof(super_tmp->magic)) || | 815 | sizeof(super_tmp->magic)) || |
811 | memcmp(device->uuid, super_tmp->dev_item.uuid, BTRFS_UUID_SIZE)) { | 816 | memcmp(device->uuid, super_tmp->dev_item.uuid, BTRFS_UUID_SIZE) || |
817 | btrfs_super_nodesize(super_tmp) != state->metablock_size || | ||
818 | btrfs_super_leafsize(super_tmp) != state->metablock_size || | ||
819 | btrfs_super_sectorsize(super_tmp) != state->datablock_size) { | ||
812 | brelse(bh); | 820 | brelse(bh); |
813 | return 0; | 821 | return 0; |
814 | } | 822 | } |
@@ -893,7 +901,7 @@ static int btrfsic_process_superblock_dev_mirror( | |||
893 | 901 | ||
894 | num_copies = | 902 | num_copies = |
895 | btrfs_num_copies(&state->root->fs_info->mapping_tree, | 903 | btrfs_num_copies(&state->root->fs_info->mapping_tree, |
896 | next_bytenr, PAGE_SIZE); | 904 | next_bytenr, state->metablock_size); |
897 | if (state->print_mask & BTRFSIC_PRINT_MASK_NUM_COPIES) | 905 | if (state->print_mask & BTRFSIC_PRINT_MASK_NUM_COPIES) |
898 | printk(KERN_INFO "num_copies(log_bytenr=%llu) = %d\n", | 906 | printk(KERN_INFO "num_copies(log_bytenr=%llu) = %d\n", |
899 | (unsigned long long)next_bytenr, num_copies); | 907 | (unsigned long long)next_bytenr, num_copies); |
@@ -902,7 +910,8 @@ static int btrfsic_process_superblock_dev_mirror( | |||
902 | struct btrfsic_block_data_ctx tmp_next_block_ctx; | 910 | struct btrfsic_block_data_ctx tmp_next_block_ctx; |
903 | struct btrfsic_block_link *l; | 911 | struct btrfsic_block_link *l; |
904 | 912 | ||
905 | if (btrfsic_map_block(state, next_bytenr, PAGE_SIZE, | 913 | if (btrfsic_map_block(state, next_bytenr, |
914 | state->metablock_size, | ||
906 | &tmp_next_block_ctx, | 915 | &tmp_next_block_ctx, |
907 | mirror_num)) { | 916 | mirror_num)) { |
908 | printk(KERN_INFO "btrfsic: btrfsic_map_block(" | 917 | printk(KERN_INFO "btrfsic: btrfsic_map_block(" |
@@ -966,13 +975,15 @@ static int btrfsic_process_metablock( | |||
966 | struct btrfsic_state *state, | 975 | struct btrfsic_state *state, |
967 | struct btrfsic_block *const first_block, | 976 | struct btrfsic_block *const first_block, |
968 | struct btrfsic_block_data_ctx *const first_block_ctx, | 977 | struct btrfsic_block_data_ctx *const first_block_ctx, |
969 | struct btrfs_header *const first_hdr, | ||
970 | int first_limit_nesting, int force_iodone_flag) | 978 | int first_limit_nesting, int force_iodone_flag) |
971 | { | 979 | { |
972 | struct btrfsic_stack_frame initial_stack_frame = { 0 }; | 980 | struct btrfsic_stack_frame initial_stack_frame = { 0 }; |
973 | struct btrfsic_stack_frame *sf; | 981 | struct btrfsic_stack_frame *sf; |
974 | struct btrfsic_stack_frame *next_stack; | 982 | struct btrfsic_stack_frame *next_stack; |
983 | struct btrfs_header *const first_hdr = | ||
984 | (struct btrfs_header *)first_block_ctx->datav[0]; | ||
975 | 985 | ||
986 | BUG_ON(!first_hdr); | ||
976 | sf = &initial_stack_frame; | 987 | sf = &initial_stack_frame; |
977 | sf->error = 0; | 988 | sf->error = 0; |
978 | sf->i = -1; | 989 | sf->i = -1; |
@@ -1012,21 +1023,47 @@ continue_with_current_leaf_stack_frame: | |||
1012 | } | 1023 | } |
1013 | 1024 | ||
1014 | if (sf->i < sf->nr) { | 1025 | if (sf->i < sf->nr) { |
1015 | struct btrfs_item *disk_item = leafhdr->items + sf->i; | 1026 | struct btrfs_item disk_item; |
1016 | struct btrfs_disk_key *disk_key = &disk_item->key; | 1027 | u32 disk_item_offset = |
1028 | (uintptr_t)(leafhdr->items + sf->i) - | ||
1029 | (uintptr_t)leafhdr; | ||
1030 | struct btrfs_disk_key *disk_key; | ||
1017 | u8 type; | 1031 | u8 type; |
1018 | const u32 item_offset = le32_to_cpu(disk_item->offset); | 1032 | u32 item_offset; |
1019 | 1033 | ||
1034 | if (disk_item_offset + sizeof(struct btrfs_item) > | ||
1035 | sf->block_ctx->len) { | ||
1036 | leaf_item_out_of_bounce_error: | ||
1037 | printk(KERN_INFO | ||
1038 | "btrfsic: leaf item out of bounce at logical %llu, dev %s\n", | ||
1039 | sf->block_ctx->start, | ||
1040 | sf->block_ctx->dev->name); | ||
1041 | goto one_stack_frame_backwards; | ||
1042 | } | ||
1043 | btrfsic_read_from_block_data(sf->block_ctx, | ||
1044 | &disk_item, | ||
1045 | disk_item_offset, | ||
1046 | sizeof(struct btrfs_item)); | ||
1047 | item_offset = le32_to_cpu(disk_item.offset); | ||
1048 | disk_key = &disk_item.key; | ||
1020 | type = disk_key->type; | 1049 | type = disk_key->type; |
1021 | 1050 | ||
1022 | if (BTRFS_ROOT_ITEM_KEY == type) { | 1051 | if (BTRFS_ROOT_ITEM_KEY == type) { |
1023 | const struct btrfs_root_item *const root_item = | 1052 | struct btrfs_root_item root_item; |
1024 | (struct btrfs_root_item *) | 1053 | u32 root_item_offset; |
1025 | (sf->block_ctx->data + | 1054 | u64 next_bytenr; |
1026 | offsetof(struct btrfs_leaf, items) + | 1055 | |
1027 | item_offset); | 1056 | root_item_offset = item_offset + |
1028 | const u64 next_bytenr = | 1057 | offsetof(struct btrfs_leaf, items); |
1029 | le64_to_cpu(root_item->bytenr); | 1058 | if (root_item_offset + |
1059 | sizeof(struct btrfs_root_item) > | ||
1060 | sf->block_ctx->len) | ||
1061 | goto leaf_item_out_of_bounce_error; | ||
1062 | btrfsic_read_from_block_data( | ||
1063 | sf->block_ctx, &root_item, | ||
1064 | root_item_offset, | ||
1065 | sizeof(struct btrfs_root_item)); | ||
1066 | next_bytenr = le64_to_cpu(root_item.bytenr); | ||
1030 | 1067 | ||
1031 | sf->error = | 1068 | sf->error = |
1032 | btrfsic_create_link_to_next_block( | 1069 | btrfsic_create_link_to_next_block( |
@@ -1041,7 +1078,7 @@ continue_with_current_leaf_stack_frame: | |||
1041 | &sf->num_copies, | 1078 | &sf->num_copies, |
1042 | &sf->mirror_num, | 1079 | &sf->mirror_num, |
1043 | disk_key, | 1080 | disk_key, |
1044 | le64_to_cpu(root_item-> | 1081 | le64_to_cpu(root_item. |
1045 | generation)); | 1082 | generation)); |
1046 | if (sf->error) | 1083 | if (sf->error) |
1047 | goto one_stack_frame_backwards; | 1084 | goto one_stack_frame_backwards; |
@@ -1049,7 +1086,7 @@ continue_with_current_leaf_stack_frame: | |||
1049 | if (NULL != sf->next_block) { | 1086 | if (NULL != sf->next_block) { |
1050 | struct btrfs_header *const next_hdr = | 1087 | struct btrfs_header *const next_hdr = |
1051 | (struct btrfs_header *) | 1088 | (struct btrfs_header *) |
1052 | sf->next_block_ctx.data; | 1089 | sf->next_block_ctx.datav[0]; |
1053 | 1090 | ||
1054 | next_stack = | 1091 | next_stack = |
1055 | btrfsic_stack_frame_alloc(); | 1092 | btrfsic_stack_frame_alloc(); |
@@ -1111,10 +1148,24 @@ continue_with_current_node_stack_frame: | |||
1111 | } | 1148 | } |
1112 | 1149 | ||
1113 | if (sf->i < sf->nr) { | 1150 | if (sf->i < sf->nr) { |
1114 | struct btrfs_key_ptr *disk_key_ptr = | 1151 | struct btrfs_key_ptr key_ptr; |
1115 | nodehdr->ptrs + sf->i; | 1152 | u32 key_ptr_offset; |
1116 | const u64 next_bytenr = | 1153 | u64 next_bytenr; |
1117 | le64_to_cpu(disk_key_ptr->blockptr); | 1154 | |
1155 | key_ptr_offset = (uintptr_t)(nodehdr->ptrs + sf->i) - | ||
1156 | (uintptr_t)nodehdr; | ||
1157 | if (key_ptr_offset + sizeof(struct btrfs_key_ptr) > | ||
1158 | sf->block_ctx->len) { | ||
1159 | printk(KERN_INFO | ||
1160 | "btrfsic: node item out of bounce at logical %llu, dev %s\n", | ||
1161 | sf->block_ctx->start, | ||
1162 | sf->block_ctx->dev->name); | ||
1163 | goto one_stack_frame_backwards; | ||
1164 | } | ||
1165 | btrfsic_read_from_block_data( | ||
1166 | sf->block_ctx, &key_ptr, key_ptr_offset, | ||
1167 | sizeof(struct btrfs_key_ptr)); | ||
1168 | next_bytenr = le64_to_cpu(key_ptr.blockptr); | ||
1118 | 1169 | ||
1119 | sf->error = btrfsic_create_link_to_next_block( | 1170 | sf->error = btrfsic_create_link_to_next_block( |
1120 | state, | 1171 | state, |
@@ -1127,15 +1178,15 @@ continue_with_current_node_stack_frame: | |||
1127 | force_iodone_flag, | 1178 | force_iodone_flag, |
1128 | &sf->num_copies, | 1179 | &sf->num_copies, |
1129 | &sf->mirror_num, | 1180 | &sf->mirror_num, |
1130 | &disk_key_ptr->key, | 1181 | &key_ptr.key, |
1131 | le64_to_cpu(disk_key_ptr->generation)); | 1182 | le64_to_cpu(key_ptr.generation)); |
1132 | if (sf->error) | 1183 | if (sf->error) |
1133 | goto one_stack_frame_backwards; | 1184 | goto one_stack_frame_backwards; |
1134 | 1185 | ||
1135 | if (NULL != sf->next_block) { | 1186 | if (NULL != sf->next_block) { |
1136 | struct btrfs_header *const next_hdr = | 1187 | struct btrfs_header *const next_hdr = |
1137 | (struct btrfs_header *) | 1188 | (struct btrfs_header *) |
1138 | sf->next_block_ctx.data; | 1189 | sf->next_block_ctx.datav[0]; |
1139 | 1190 | ||
1140 | next_stack = btrfsic_stack_frame_alloc(); | 1191 | next_stack = btrfsic_stack_frame_alloc(); |
1141 | if (NULL == next_stack) | 1192 | if (NULL == next_stack) |
@@ -1181,6 +1232,35 @@ one_stack_frame_backwards: | |||
1181 | return sf->error; | 1232 | return sf->error; |
1182 | } | 1233 | } |
1183 | 1234 | ||
1235 | static void btrfsic_read_from_block_data( | ||
1236 | struct btrfsic_block_data_ctx *block_ctx, | ||
1237 | void *dstv, u32 offset, size_t len) | ||
1238 | { | ||
1239 | size_t cur; | ||
1240 | size_t offset_in_page; | ||
1241 | char *kaddr; | ||
1242 | char *dst = (char *)dstv; | ||
1243 | size_t start_offset = block_ctx->start & ((u64)PAGE_CACHE_SIZE - 1); | ||
1244 | unsigned long i = (start_offset + offset) >> PAGE_CACHE_SHIFT; | ||
1245 | |||
1246 | WARN_ON(offset + len > block_ctx->len); | ||
1247 | offset_in_page = (start_offset + offset) & | ||
1248 | ((unsigned long)PAGE_CACHE_SIZE - 1); | ||
1249 | |||
1250 | while (len > 0) { | ||
1251 | cur = min(len, ((size_t)PAGE_CACHE_SIZE - offset_in_page)); | ||
1252 | BUG_ON(i >= (block_ctx->len + PAGE_CACHE_SIZE - 1) >> | ||
1253 | PAGE_CACHE_SHIFT); | ||
1254 | kaddr = block_ctx->datav[i]; | ||
1255 | memcpy(dst, kaddr + offset_in_page, cur); | ||
1256 | |||
1257 | dst += cur; | ||
1258 | len -= cur; | ||
1259 | offset_in_page = 0; | ||
1260 | i++; | ||
1261 | } | ||
1262 | } | ||
1263 | |||
1184 | static int btrfsic_create_link_to_next_block( | 1264 | static int btrfsic_create_link_to_next_block( |
1185 | struct btrfsic_state *state, | 1265 | struct btrfsic_state *state, |
1186 | struct btrfsic_block *block, | 1266 | struct btrfsic_block *block, |
@@ -1204,7 +1284,7 @@ static int btrfsic_create_link_to_next_block( | |||
1204 | if (0 == *num_copiesp) { | 1284 | if (0 == *num_copiesp) { |
1205 | *num_copiesp = | 1285 | *num_copiesp = |
1206 | btrfs_num_copies(&state->root->fs_info->mapping_tree, | 1286 | btrfs_num_copies(&state->root->fs_info->mapping_tree, |
1207 | next_bytenr, PAGE_SIZE); | 1287 | next_bytenr, state->metablock_size); |
1208 | if (state->print_mask & BTRFSIC_PRINT_MASK_NUM_COPIES) | 1288 | if (state->print_mask & BTRFSIC_PRINT_MASK_NUM_COPIES) |
1209 | printk(KERN_INFO "num_copies(log_bytenr=%llu) = %d\n", | 1289 | printk(KERN_INFO "num_copies(log_bytenr=%llu) = %d\n", |
1210 | (unsigned long long)next_bytenr, *num_copiesp); | 1290 | (unsigned long long)next_bytenr, *num_copiesp); |
@@ -1219,7 +1299,7 @@ static int btrfsic_create_link_to_next_block( | |||
1219 | "btrfsic_create_link_to_next_block(mirror_num=%d)\n", | 1299 | "btrfsic_create_link_to_next_block(mirror_num=%d)\n", |
1220 | *mirror_nump); | 1300 | *mirror_nump); |
1221 | ret = btrfsic_map_block(state, next_bytenr, | 1301 | ret = btrfsic_map_block(state, next_bytenr, |
1222 | BTRFSIC_BLOCK_SIZE, | 1302 | state->metablock_size, |
1223 | next_block_ctx, *mirror_nump); | 1303 | next_block_ctx, *mirror_nump); |
1224 | if (ret) { | 1304 | if (ret) { |
1225 | printk(KERN_INFO | 1305 | printk(KERN_INFO |
@@ -1314,7 +1394,7 @@ static int btrfsic_create_link_to_next_block( | |||
1314 | 1394 | ||
1315 | if (limit_nesting > 0 && did_alloc_block_link) { | 1395 | if (limit_nesting > 0 && did_alloc_block_link) { |
1316 | ret = btrfsic_read_block(state, next_block_ctx); | 1396 | ret = btrfsic_read_block(state, next_block_ctx); |
1317 | if (ret < (int)BTRFSIC_BLOCK_SIZE) { | 1397 | if (ret < (int)next_block_ctx->len) { |
1318 | printk(KERN_INFO | 1398 | printk(KERN_INFO |
1319 | "btrfsic: read block @logical %llu failed!\n", | 1399 | "btrfsic: read block @logical %llu failed!\n", |
1320 | (unsigned long long)next_bytenr); | 1400 | (unsigned long long)next_bytenr); |
@@ -1339,43 +1419,74 @@ static int btrfsic_handle_extent_data( | |||
1339 | u32 item_offset, int force_iodone_flag) | 1419 | u32 item_offset, int force_iodone_flag) |
1340 | { | 1420 | { |
1341 | int ret; | 1421 | int ret; |
1342 | struct btrfs_file_extent_item *file_extent_item = | 1422 | struct btrfs_file_extent_item file_extent_item; |
1343 | (struct btrfs_file_extent_item *)(block_ctx->data + | 1423 | u64 file_extent_item_offset; |
1344 | offsetof(struct btrfs_leaf, | 1424 | u64 next_bytenr; |
1345 | items) + item_offset); | 1425 | u64 num_bytes; |
1346 | u64 next_bytenr = | 1426 | u64 generation; |
1347 | le64_to_cpu(file_extent_item->disk_bytenr) + | ||
1348 | le64_to_cpu(file_extent_item->offset); | ||
1349 | u64 num_bytes = le64_to_cpu(file_extent_item->num_bytes); | ||
1350 | u64 generation = le64_to_cpu(file_extent_item->generation); | ||
1351 | struct btrfsic_block_link *l; | 1427 | struct btrfsic_block_link *l; |
1352 | 1428 | ||
1429 | file_extent_item_offset = offsetof(struct btrfs_leaf, items) + | ||
1430 | item_offset; | ||
1431 | if (file_extent_item_offset + | ||
1432 | offsetof(struct btrfs_file_extent_item, disk_num_bytes) > | ||
1433 | block_ctx->len) { | ||
1434 | printk(KERN_INFO | ||
1435 | "btrfsic: file item out of bounce at logical %llu, dev %s\n", | ||
1436 | block_ctx->start, block_ctx->dev->name); | ||
1437 | return -1; | ||
1438 | } | ||
1439 | |||
1440 | btrfsic_read_from_block_data(block_ctx, &file_extent_item, | ||
1441 | file_extent_item_offset, | ||
1442 | offsetof(struct btrfs_file_extent_item, disk_num_bytes)); | ||
1443 | if (BTRFS_FILE_EXTENT_REG != file_extent_item.type || | ||
1444 | ((u64)0) == le64_to_cpu(file_extent_item.disk_bytenr)) { | ||
1445 | if (state->print_mask & BTRFSIC_PRINT_MASK_VERY_VERBOSE) | ||
1446 | printk(KERN_INFO "extent_data: type %u, disk_bytenr = %llu\n", | ||
1447 | file_extent_item.type, | ||
1448 | (unsigned long long) | ||
1449 | le64_to_cpu(file_extent_item.disk_bytenr)); | ||
1450 | return 0; | ||
1451 | } | ||
1452 | |||
1453 | if (file_extent_item_offset + sizeof(struct btrfs_file_extent_item) > | ||
1454 | block_ctx->len) { | ||
1455 | printk(KERN_INFO | ||
1456 | "btrfsic: file item out of bounce at logical %llu, dev %s\n", | ||
1457 | block_ctx->start, block_ctx->dev->name); | ||
1458 | return -1; | ||
1459 | } | ||
1460 | btrfsic_read_from_block_data(block_ctx, &file_extent_item, | ||
1461 | file_extent_item_offset, | ||
1462 | sizeof(struct btrfs_file_extent_item)); | ||
1463 | next_bytenr = le64_to_cpu(file_extent_item.disk_bytenr) + | ||
1464 | le64_to_cpu(file_extent_item.offset); | ||
1465 | generation = le64_to_cpu(file_extent_item.generation); | ||
1466 | num_bytes = le64_to_cpu(file_extent_item.num_bytes); | ||
1467 | generation = le64_to_cpu(file_extent_item.generation); | ||
1468 | |||
1353 | if (state->print_mask & BTRFSIC_PRINT_MASK_VERY_VERBOSE) | 1469 | if (state->print_mask & BTRFSIC_PRINT_MASK_VERY_VERBOSE) |
1354 | printk(KERN_INFO "extent_data: type %u, disk_bytenr = %llu," | 1470 | printk(KERN_INFO "extent_data: type %u, disk_bytenr = %llu," |
1355 | " offset = %llu, num_bytes = %llu\n", | 1471 | " offset = %llu, num_bytes = %llu\n", |
1356 | file_extent_item->type, | 1472 | file_extent_item.type, |
1357 | (unsigned long long) | 1473 | (unsigned long long) |
1358 | le64_to_cpu(file_extent_item->disk_bytenr), | 1474 | le64_to_cpu(file_extent_item.disk_bytenr), |
1359 | (unsigned long long) | 1475 | (unsigned long long)le64_to_cpu(file_extent_item.offset), |
1360 | le64_to_cpu(file_extent_item->offset), | 1476 | (unsigned long long)num_bytes); |
1361 | (unsigned long long) | ||
1362 | le64_to_cpu(file_extent_item->num_bytes)); | ||
1363 | if (BTRFS_FILE_EXTENT_REG != file_extent_item->type || | ||
1364 | ((u64)0) == le64_to_cpu(file_extent_item->disk_bytenr)) | ||
1365 | return 0; | ||
1366 | while (num_bytes > 0) { | 1477 | while (num_bytes > 0) { |
1367 | u32 chunk_len; | 1478 | u32 chunk_len; |
1368 | int num_copies; | 1479 | int num_copies; |
1369 | int mirror_num; | 1480 | int mirror_num; |
1370 | 1481 | ||
1371 | if (num_bytes > BTRFSIC_BLOCK_SIZE) | 1482 | if (num_bytes > state->datablock_size) |
1372 | chunk_len = BTRFSIC_BLOCK_SIZE; | 1483 | chunk_len = state->datablock_size; |
1373 | else | 1484 | else |
1374 | chunk_len = num_bytes; | 1485 | chunk_len = num_bytes; |
1375 | 1486 | ||
1376 | num_copies = | 1487 | num_copies = |
1377 | btrfs_num_copies(&state->root->fs_info->mapping_tree, | 1488 | btrfs_num_copies(&state->root->fs_info->mapping_tree, |
1378 | next_bytenr, PAGE_SIZE); | 1489 | next_bytenr, state->datablock_size); |
1379 | if (state->print_mask & BTRFSIC_PRINT_MASK_NUM_COPIES) | 1490 | if (state->print_mask & BTRFSIC_PRINT_MASK_NUM_COPIES) |
1380 | printk(KERN_INFO "num_copies(log_bytenr=%llu) = %d\n", | 1491 | printk(KERN_INFO "num_copies(log_bytenr=%llu) = %d\n", |
1381 | (unsigned long long)next_bytenr, num_copies); | 1492 | (unsigned long long)next_bytenr, num_copies); |
@@ -1475,8 +1586,9 @@ static int btrfsic_map_block(struct btrfsic_state *state, u64 bytenr, u32 len, | |||
1475 | block_ctx_out->dev_bytenr = multi->stripes[0].physical; | 1586 | block_ctx_out->dev_bytenr = multi->stripes[0].physical; |
1476 | block_ctx_out->start = bytenr; | 1587 | block_ctx_out->start = bytenr; |
1477 | block_ctx_out->len = len; | 1588 | block_ctx_out->len = len; |
1478 | block_ctx_out->data = NULL; | 1589 | block_ctx_out->datav = NULL; |
1479 | block_ctx_out->bh = NULL; | 1590 | block_ctx_out->pagev = NULL; |
1591 | block_ctx_out->mem_to_free = NULL; | ||
1480 | 1592 | ||
1481 | if (0 == ret) | 1593 | if (0 == ret) |
1482 | kfree(multi); | 1594 | kfree(multi); |
@@ -1496,8 +1608,9 @@ static int btrfsic_map_superblock(struct btrfsic_state *state, u64 bytenr, | |||
1496 | block_ctx_out->dev_bytenr = bytenr; | 1608 | block_ctx_out->dev_bytenr = bytenr; |
1497 | block_ctx_out->start = bytenr; | 1609 | block_ctx_out->start = bytenr; |
1498 | block_ctx_out->len = len; | 1610 | block_ctx_out->len = len; |
1499 | block_ctx_out->data = NULL; | 1611 | block_ctx_out->datav = NULL; |
1500 | block_ctx_out->bh = NULL; | 1612 | block_ctx_out->pagev = NULL; |
1613 | block_ctx_out->mem_to_free = NULL; | ||
1501 | if (NULL != block_ctx_out->dev) { | 1614 | if (NULL != block_ctx_out->dev) { |
1502 | return 0; | 1615 | return 0; |
1503 | } else { | 1616 | } else { |
@@ -1508,38 +1621,127 @@ static int btrfsic_map_superblock(struct btrfsic_state *state, u64 bytenr, | |||
1508 | 1621 | ||
1509 | static void btrfsic_release_block_ctx(struct btrfsic_block_data_ctx *block_ctx) | 1622 | static void btrfsic_release_block_ctx(struct btrfsic_block_data_ctx *block_ctx) |
1510 | { | 1623 | { |
1511 | if (NULL != block_ctx->bh) { | 1624 | if (block_ctx->mem_to_free) { |
1512 | brelse(block_ctx->bh); | 1625 | unsigned int num_pages; |
1513 | block_ctx->bh = NULL; | 1626 | |
1627 | BUG_ON(!block_ctx->datav); | ||
1628 | BUG_ON(!block_ctx->pagev); | ||
1629 | num_pages = (block_ctx->len + (u64)PAGE_CACHE_SIZE - 1) >> | ||
1630 | PAGE_CACHE_SHIFT; | ||
1631 | while (num_pages > 0) { | ||
1632 | num_pages--; | ||
1633 | if (block_ctx->datav[num_pages]) { | ||
1634 | kunmap(block_ctx->pagev[num_pages]); | ||
1635 | block_ctx->datav[num_pages] = NULL; | ||
1636 | } | ||
1637 | if (block_ctx->pagev[num_pages]) { | ||
1638 | __free_page(block_ctx->pagev[num_pages]); | ||
1639 | block_ctx->pagev[num_pages] = NULL; | ||
1640 | } | ||
1641 | } | ||
1642 | |||
1643 | kfree(block_ctx->mem_to_free); | ||
1644 | block_ctx->mem_to_free = NULL; | ||
1645 | block_ctx->pagev = NULL; | ||
1646 | block_ctx->datav = NULL; | ||
1514 | } | 1647 | } |
1515 | } | 1648 | } |
1516 | 1649 | ||
1517 | static int btrfsic_read_block(struct btrfsic_state *state, | 1650 | static int btrfsic_read_block(struct btrfsic_state *state, |
1518 | struct btrfsic_block_data_ctx *block_ctx) | 1651 | struct btrfsic_block_data_ctx *block_ctx) |
1519 | { | 1652 | { |
1520 | block_ctx->bh = NULL; | 1653 | unsigned int num_pages; |
1521 | if (block_ctx->dev_bytenr & 4095) { | 1654 | unsigned int i; |
1655 | u64 dev_bytenr; | ||
1656 | int ret; | ||
1657 | |||
1658 | BUG_ON(block_ctx->datav); | ||
1659 | BUG_ON(block_ctx->pagev); | ||
1660 | BUG_ON(block_ctx->mem_to_free); | ||
1661 | if (block_ctx->dev_bytenr & ((u64)PAGE_CACHE_SIZE - 1)) { | ||
1522 | printk(KERN_INFO | 1662 | printk(KERN_INFO |
1523 | "btrfsic: read_block() with unaligned bytenr %llu\n", | 1663 | "btrfsic: read_block() with unaligned bytenr %llu\n", |
1524 | (unsigned long long)block_ctx->dev_bytenr); | 1664 | (unsigned long long)block_ctx->dev_bytenr); |
1525 | return -1; | 1665 | return -1; |
1526 | } | 1666 | } |
1527 | if (block_ctx->len > 4096) { | 1667 | |
1528 | printk(KERN_INFO | 1668 | num_pages = (block_ctx->len + (u64)PAGE_CACHE_SIZE - 1) >> |
1529 | "btrfsic: read_block() with too huge size %d\n", | 1669 | PAGE_CACHE_SHIFT; |
1530 | block_ctx->len); | 1670 | block_ctx->mem_to_free = kzalloc((sizeof(*block_ctx->datav) + |
1671 | sizeof(*block_ctx->pagev)) * | ||
1672 | num_pages, GFP_NOFS); | ||
1673 | if (!block_ctx->mem_to_free) | ||
1531 | return -1; | 1674 | return -1; |
1675 | block_ctx->datav = block_ctx->mem_to_free; | ||
1676 | block_ctx->pagev = (struct page **)(block_ctx->datav + num_pages); | ||
1677 | for (i = 0; i < num_pages; i++) { | ||
1678 | block_ctx->pagev[i] = alloc_page(GFP_NOFS); | ||
1679 | if (!block_ctx->pagev[i]) | ||
1680 | return -1; | ||
1532 | } | 1681 | } |
1533 | 1682 | ||
1534 | block_ctx->bh = __bread(block_ctx->dev->bdev, | 1683 | dev_bytenr = block_ctx->dev_bytenr; |
1535 | block_ctx->dev_bytenr >> 12, 4096); | 1684 | for (i = 0; i < num_pages;) { |
1536 | if (NULL == block_ctx->bh) | 1685 | struct bio *bio; |
1537 | return -1; | 1686 | unsigned int j; |
1538 | block_ctx->data = block_ctx->bh->b_data; | 1687 | DECLARE_COMPLETION_ONSTACK(complete); |
1688 | |||
1689 | bio = bio_alloc(GFP_NOFS, num_pages - i); | ||
1690 | if (!bio) { | ||
1691 | printk(KERN_INFO | ||
1692 | "btrfsic: bio_alloc() for %u pages failed!\n", | ||
1693 | num_pages - i); | ||
1694 | return -1; | ||
1695 | } | ||
1696 | bio->bi_bdev = block_ctx->dev->bdev; | ||
1697 | bio->bi_sector = dev_bytenr >> 9; | ||
1698 | bio->bi_end_io = btrfsic_complete_bio_end_io; | ||
1699 | bio->bi_private = &complete; | ||
1700 | |||
1701 | for (j = i; j < num_pages; j++) { | ||
1702 | ret = bio_add_page(bio, block_ctx->pagev[j], | ||
1703 | PAGE_CACHE_SIZE, 0); | ||
1704 | if (PAGE_CACHE_SIZE != ret) | ||
1705 | break; | ||
1706 | } | ||
1707 | if (j == i) { | ||
1708 | printk(KERN_INFO | ||
1709 | "btrfsic: error, failed to add a single page!\n"); | ||
1710 | return -1; | ||
1711 | } | ||
1712 | submit_bio(READ, bio); | ||
1713 | |||
1714 | /* this will also unplug the queue */ | ||
1715 | wait_for_completion(&complete); | ||
1716 | |||
1717 | if (!test_bit(BIO_UPTODATE, &bio->bi_flags)) { | ||
1718 | printk(KERN_INFO | ||
1719 | "btrfsic: read error at logical %llu dev %s!\n", | ||
1720 | block_ctx->start, block_ctx->dev->name); | ||
1721 | bio_put(bio); | ||
1722 | return -1; | ||
1723 | } | ||
1724 | bio_put(bio); | ||
1725 | dev_bytenr += (j - i) * PAGE_CACHE_SIZE; | ||
1726 | i = j; | ||
1727 | } | ||
1728 | for (i = 0; i < num_pages; i++) { | ||
1729 | block_ctx->datav[i] = kmap(block_ctx->pagev[i]); | ||
1730 | if (!block_ctx->datav[i]) { | ||
1731 | printk(KERN_INFO "btrfsic: kmap() failed (dev %s)!\n", | ||
1732 | block_ctx->dev->name); | ||
1733 | return -1; | ||
1734 | } | ||
1735 | } | ||
1539 | 1736 | ||
1540 | return block_ctx->len; | 1737 | return block_ctx->len; |
1541 | } | 1738 | } |
1542 | 1739 | ||
1740 | static void btrfsic_complete_bio_end_io(struct bio *bio, int err) | ||
1741 | { | ||
1742 | complete((struct completion *)bio->bi_private); | ||
1743 | } | ||
1744 | |||
1543 | static void btrfsic_dump_database(struct btrfsic_state *state) | 1745 | static void btrfsic_dump_database(struct btrfsic_state *state) |
1544 | { | 1746 | { |
1545 | struct list_head *elem_all; | 1747 | struct list_head *elem_all; |
@@ -1617,32 +1819,39 @@ static void btrfsic_dump_database(struct btrfsic_state *state) | |||
1617 | * (note that this test fails for the super block) | 1819 | * (note that this test fails for the super block) |
1618 | */ | 1820 | */ |
1619 | static int btrfsic_test_for_metadata(struct btrfsic_state *state, | 1821 | static int btrfsic_test_for_metadata(struct btrfsic_state *state, |
1620 | const u8 *data, unsigned int size) | 1822 | char **datav, unsigned int num_pages) |
1621 | { | 1823 | { |
1622 | struct btrfs_header *h; | 1824 | struct btrfs_header *h; |
1623 | u8 csum[BTRFS_CSUM_SIZE]; | 1825 | u8 csum[BTRFS_CSUM_SIZE]; |
1624 | u32 crc = ~(u32)0; | 1826 | u32 crc = ~(u32)0; |
1625 | int fail = 0; | 1827 | unsigned int i; |
1626 | int crc_fail = 0; | ||
1627 | 1828 | ||
1628 | h = (struct btrfs_header *)data; | 1829 | if (num_pages * PAGE_CACHE_SIZE < state->metablock_size) |
1830 | return 1; /* not metadata */ | ||
1831 | num_pages = state->metablock_size >> PAGE_CACHE_SHIFT; | ||
1832 | h = (struct btrfs_header *)datav[0]; | ||
1629 | 1833 | ||
1630 | if (memcmp(h->fsid, state->root->fs_info->fsid, BTRFS_UUID_SIZE)) | 1834 | if (memcmp(h->fsid, state->root->fs_info->fsid, BTRFS_UUID_SIZE)) |
1631 | fail++; | 1835 | return 1; |
1836 | |||
1837 | for (i = 0; i < num_pages; i++) { | ||
1838 | u8 *data = i ? datav[i] : (datav[i] + BTRFS_CSUM_SIZE); | ||
1839 | size_t sublen = i ? PAGE_CACHE_SIZE : | ||
1840 | (PAGE_CACHE_SIZE - BTRFS_CSUM_SIZE); | ||
1632 | 1841 | ||
1633 | crc = crc32c(crc, data + BTRFS_CSUM_SIZE, PAGE_SIZE - BTRFS_CSUM_SIZE); | 1842 | crc = crc32c(crc, data, sublen); |
1843 | } | ||
1634 | btrfs_csum_final(crc, csum); | 1844 | btrfs_csum_final(crc, csum); |
1635 | if (memcmp(csum, h->csum, state->csum_size)) | 1845 | if (memcmp(csum, h->csum, state->csum_size)) |
1636 | crc_fail++; | 1846 | return 1; |
1637 | 1847 | ||
1638 | return fail || crc_fail; | 1848 | return 0; /* is metadata */ |
1639 | } | 1849 | } |
1640 | 1850 | ||
1641 | static void btrfsic_process_written_block(struct btrfsic_dev_state *dev_state, | 1851 | static void btrfsic_process_written_block(struct btrfsic_dev_state *dev_state, |
1642 | u64 dev_bytenr, | 1852 | u64 dev_bytenr, char **mapped_datav, |
1643 | u8 *mapped_data, unsigned int len, | 1853 | unsigned int num_pages, |
1644 | struct bio *bio, | 1854 | struct bio *bio, int *bio_is_patched, |
1645 | int *bio_is_patched, | ||
1646 | struct buffer_head *bh, | 1855 | struct buffer_head *bh, |
1647 | int submit_bio_bh_rw) | 1856 | int submit_bio_bh_rw) |
1648 | { | 1857 | { |
@@ -1652,12 +1861,19 @@ static void btrfsic_process_written_block(struct btrfsic_dev_state *dev_state, | |||
1652 | int ret; | 1861 | int ret; |
1653 | struct btrfsic_state *state = dev_state->state; | 1862 | struct btrfsic_state *state = dev_state->state; |
1654 | struct block_device *bdev = dev_state->bdev; | 1863 | struct block_device *bdev = dev_state->bdev; |
1864 | unsigned int processed_len; | ||
1655 | 1865 | ||
1656 | WARN_ON(len > PAGE_SIZE); | ||
1657 | is_metadata = (0 == btrfsic_test_for_metadata(state, mapped_data, len)); | ||
1658 | if (NULL != bio_is_patched) | 1866 | if (NULL != bio_is_patched) |
1659 | *bio_is_patched = 0; | 1867 | *bio_is_patched = 0; |
1660 | 1868 | ||
1869 | again: | ||
1870 | if (num_pages == 0) | ||
1871 | return; | ||
1872 | |||
1873 | processed_len = 0; | ||
1874 | is_metadata = (0 == btrfsic_test_for_metadata(state, mapped_datav, | ||
1875 | num_pages)); | ||
1876 | |||
1661 | block = btrfsic_block_hashtable_lookup(bdev, dev_bytenr, | 1877 | block = btrfsic_block_hashtable_lookup(bdev, dev_bytenr, |
1662 | &state->block_hashtable); | 1878 | &state->block_hashtable); |
1663 | if (NULL != block) { | 1879 | if (NULL != block) { |
@@ -1667,8 +1883,16 @@ static void btrfsic_process_written_block(struct btrfsic_dev_state *dev_state, | |||
1667 | 1883 | ||
1668 | if (block->is_superblock) { | 1884 | if (block->is_superblock) { |
1669 | bytenr = le64_to_cpu(((struct btrfs_super_block *) | 1885 | bytenr = le64_to_cpu(((struct btrfs_super_block *) |
1670 | mapped_data)->bytenr); | 1886 | mapped_datav[0])->bytenr); |
1887 | if (num_pages * PAGE_CACHE_SIZE < | ||
1888 | BTRFS_SUPER_INFO_SIZE) { | ||
1889 | printk(KERN_INFO | ||
1890 | "btrfsic: cannot work with too short bios!\n"); | ||
1891 | return; | ||
1892 | } | ||
1671 | is_metadata = 1; | 1893 | is_metadata = 1; |
1894 | BUG_ON(BTRFS_SUPER_INFO_SIZE & (PAGE_CACHE_SIZE - 1)); | ||
1895 | processed_len = BTRFS_SUPER_INFO_SIZE; | ||
1672 | if (state->print_mask & | 1896 | if (state->print_mask & |
1673 | BTRFSIC_PRINT_MASK_TREE_BEFORE_SB_WRITE) { | 1897 | BTRFSIC_PRINT_MASK_TREE_BEFORE_SB_WRITE) { |
1674 | printk(KERN_INFO | 1898 | printk(KERN_INFO |
@@ -1678,12 +1902,18 @@ static void btrfsic_process_written_block(struct btrfsic_dev_state *dev_state, | |||
1678 | } | 1902 | } |
1679 | if (is_metadata) { | 1903 | if (is_metadata) { |
1680 | if (!block->is_superblock) { | 1904 | if (!block->is_superblock) { |
1905 | if (num_pages * PAGE_CACHE_SIZE < | ||
1906 | state->metablock_size) { | ||
1907 | printk(KERN_INFO | ||
1908 | "btrfsic: cannot work with too short bios!\n"); | ||
1909 | return; | ||
1910 | } | ||
1911 | processed_len = state->metablock_size; | ||
1681 | bytenr = le64_to_cpu(((struct btrfs_header *) | 1912 | bytenr = le64_to_cpu(((struct btrfs_header *) |
1682 | mapped_data)->bytenr); | 1913 | mapped_datav[0])->bytenr); |
1683 | btrfsic_cmp_log_and_dev_bytenr(state, bytenr, | 1914 | btrfsic_cmp_log_and_dev_bytenr(state, bytenr, |
1684 | dev_state, | 1915 | dev_state, |
1685 | dev_bytenr, | 1916 | dev_bytenr); |
1686 | mapped_data); | ||
1687 | } | 1917 | } |
1688 | if (block->logical_bytenr != bytenr) { | 1918 | if (block->logical_bytenr != bytenr) { |
1689 | printk(KERN_INFO | 1919 | printk(KERN_INFO |
@@ -1710,6 +1940,13 @@ static void btrfsic_process_written_block(struct btrfsic_dev_state *dev_state, | |||
1710 | block->mirror_num, | 1940 | block->mirror_num, |
1711 | btrfsic_get_block_type(state, block)); | 1941 | btrfsic_get_block_type(state, block)); |
1712 | } else { | 1942 | } else { |
1943 | if (num_pages * PAGE_CACHE_SIZE < | ||
1944 | state->datablock_size) { | ||
1945 | printk(KERN_INFO | ||
1946 | "btrfsic: cannot work with too short bios!\n"); | ||
1947 | return; | ||
1948 | } | ||
1949 | processed_len = state->datablock_size; | ||
1713 | bytenr = block->logical_bytenr; | 1950 | bytenr = block->logical_bytenr; |
1714 | if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE) | 1951 | if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE) |
1715 | printk(KERN_INFO | 1952 | printk(KERN_INFO |
@@ -1747,7 +1984,7 @@ static void btrfsic_process_written_block(struct btrfsic_dev_state *dev_state, | |||
1747 | le64_to_cpu(block->disk_key.offset), | 1984 | le64_to_cpu(block->disk_key.offset), |
1748 | (unsigned long long) | 1985 | (unsigned long long) |
1749 | le64_to_cpu(((struct btrfs_header *) | 1986 | le64_to_cpu(((struct btrfs_header *) |
1750 | mapped_data)->generation), | 1987 | mapped_datav[0])->generation), |
1751 | (unsigned long long) | 1988 | (unsigned long long) |
1752 | state->max_superblock_generation); | 1989 | state->max_superblock_generation); |
1753 | btrfsic_dump_tree(state); | 1990 | btrfsic_dump_tree(state); |
@@ -1765,10 +2002,10 @@ static void btrfsic_process_written_block(struct btrfsic_dev_state *dev_state, | |||
1765 | (unsigned long long)block->generation, | 2002 | (unsigned long long)block->generation, |
1766 | (unsigned long long) | 2003 | (unsigned long long) |
1767 | le64_to_cpu(((struct btrfs_header *) | 2004 | le64_to_cpu(((struct btrfs_header *) |
1768 | mapped_data)->generation)); | 2005 | mapped_datav[0])->generation)); |
1769 | /* it would not be safe to go on */ | 2006 | /* it would not be safe to go on */ |
1770 | btrfsic_dump_tree(state); | 2007 | btrfsic_dump_tree(state); |
1771 | return; | 2008 | goto continue_loop; |
1772 | } | 2009 | } |
1773 | 2010 | ||
1774 | /* | 2011 | /* |
@@ -1796,18 +2033,19 @@ static void btrfsic_process_written_block(struct btrfsic_dev_state *dev_state, | |||
1796 | } | 2033 | } |
1797 | 2034 | ||
1798 | if (block->is_superblock) | 2035 | if (block->is_superblock) |
1799 | ret = btrfsic_map_superblock(state, bytenr, len, | 2036 | ret = btrfsic_map_superblock(state, bytenr, |
2037 | processed_len, | ||
1800 | bdev, &block_ctx); | 2038 | bdev, &block_ctx); |
1801 | else | 2039 | else |
1802 | ret = btrfsic_map_block(state, bytenr, len, | 2040 | ret = btrfsic_map_block(state, bytenr, processed_len, |
1803 | &block_ctx, 0); | 2041 | &block_ctx, 0); |
1804 | if (ret) { | 2042 | if (ret) { |
1805 | printk(KERN_INFO | 2043 | printk(KERN_INFO |
1806 | "btrfsic: btrfsic_map_block(root @%llu)" | 2044 | "btrfsic: btrfsic_map_block(root @%llu)" |
1807 | " failed!\n", (unsigned long long)bytenr); | 2045 | " failed!\n", (unsigned long long)bytenr); |
1808 | return; | 2046 | goto continue_loop; |
1809 | } | 2047 | } |
1810 | block_ctx.data = mapped_data; | 2048 | block_ctx.datav = mapped_datav; |
1811 | /* the following is required in case of writes to mirrors, | 2049 | /* the following is required in case of writes to mirrors, |
1812 | * use the same that was used for the lookup */ | 2050 | * use the same that was used for the lookup */ |
1813 | block_ctx.dev = dev_state; | 2051 | block_ctx.dev = dev_state; |
@@ -1863,11 +2101,13 @@ static void btrfsic_process_written_block(struct btrfsic_dev_state *dev_state, | |||
1863 | block->logical_bytenr = bytenr; | 2101 | block->logical_bytenr = bytenr; |
1864 | block->is_metadata = 1; | 2102 | block->is_metadata = 1; |
1865 | if (block->is_superblock) { | 2103 | if (block->is_superblock) { |
2104 | BUG_ON(PAGE_CACHE_SIZE != | ||
2105 | BTRFS_SUPER_INFO_SIZE); | ||
1866 | ret = btrfsic_process_written_superblock( | 2106 | ret = btrfsic_process_written_superblock( |
1867 | state, | 2107 | state, |
1868 | block, | 2108 | block, |
1869 | (struct btrfs_super_block *) | 2109 | (struct btrfs_super_block *) |
1870 | mapped_data); | 2110 | mapped_datav[0]); |
1871 | if (state->print_mask & | 2111 | if (state->print_mask & |
1872 | BTRFSIC_PRINT_MASK_TREE_AFTER_SB_WRITE) { | 2112 | BTRFSIC_PRINT_MASK_TREE_AFTER_SB_WRITE) { |
1873 | printk(KERN_INFO | 2113 | printk(KERN_INFO |
@@ -1880,8 +2120,6 @@ static void btrfsic_process_written_block(struct btrfsic_dev_state *dev_state, | |||
1880 | state, | 2120 | state, |
1881 | block, | 2121 | block, |
1882 | &block_ctx, | 2122 | &block_ctx, |
1883 | (struct btrfs_header *) | ||
1884 | block_ctx.data, | ||
1885 | 0, 0); | 2123 | 0, 0); |
1886 | } | 2124 | } |
1887 | if (ret) | 2125 | if (ret) |
@@ -1912,26 +2150,30 @@ static void btrfsic_process_written_block(struct btrfsic_dev_state *dev_state, | |||
1912 | u64 bytenr; | 2150 | u64 bytenr; |
1913 | 2151 | ||
1914 | if (!is_metadata) { | 2152 | if (!is_metadata) { |
2153 | processed_len = state->datablock_size; | ||
1915 | if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE) | 2154 | if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE) |
1916 | printk(KERN_INFO "Written block (%s/%llu/?)" | 2155 | printk(KERN_INFO "Written block (%s/%llu/?)" |
1917 | " !found in hash table, D.\n", | 2156 | " !found in hash table, D.\n", |
1918 | dev_state->name, | 2157 | dev_state->name, |
1919 | (unsigned long long)dev_bytenr); | 2158 | (unsigned long long)dev_bytenr); |
1920 | if (!state->include_extent_data) | 2159 | if (!state->include_extent_data) { |
1921 | return; /* ignore that written D block */ | 2160 | /* ignore that written D block */ |
2161 | goto continue_loop; | ||
2162 | } | ||
1922 | 2163 | ||
1923 | /* this is getting ugly for the | 2164 | /* this is getting ugly for the |
1924 | * include_extent_data case... */ | 2165 | * include_extent_data case... */ |
1925 | bytenr = 0; /* unknown */ | 2166 | bytenr = 0; /* unknown */ |
1926 | block_ctx.start = bytenr; | 2167 | block_ctx.start = bytenr; |
1927 | block_ctx.len = len; | 2168 | block_ctx.len = processed_len; |
1928 | block_ctx.bh = NULL; | 2169 | block_ctx.mem_to_free = NULL; |
2170 | block_ctx.pagev = NULL; | ||
1929 | } else { | 2171 | } else { |
2172 | processed_len = state->metablock_size; | ||
1930 | bytenr = le64_to_cpu(((struct btrfs_header *) | 2173 | bytenr = le64_to_cpu(((struct btrfs_header *) |
1931 | mapped_data)->bytenr); | 2174 | mapped_datav[0])->bytenr); |
1932 | btrfsic_cmp_log_and_dev_bytenr(state, bytenr, dev_state, | 2175 | btrfsic_cmp_log_and_dev_bytenr(state, bytenr, dev_state, |
1933 | dev_bytenr, | 2176 | dev_bytenr); |
1934 | mapped_data); | ||
1935 | if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE) | 2177 | if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE) |
1936 | printk(KERN_INFO | 2178 | printk(KERN_INFO |
1937 | "Written block @%llu (%s/%llu/?)" | 2179 | "Written block @%llu (%s/%llu/?)" |
@@ -1940,17 +2182,17 @@ static void btrfsic_process_written_block(struct btrfsic_dev_state *dev_state, | |||
1940 | dev_state->name, | 2182 | dev_state->name, |
1941 | (unsigned long long)dev_bytenr); | 2183 | (unsigned long long)dev_bytenr); |
1942 | 2184 | ||
1943 | ret = btrfsic_map_block(state, bytenr, len, &block_ctx, | 2185 | ret = btrfsic_map_block(state, bytenr, processed_len, |
1944 | 0); | 2186 | &block_ctx, 0); |
1945 | if (ret) { | 2187 | if (ret) { |
1946 | printk(KERN_INFO | 2188 | printk(KERN_INFO |
1947 | "btrfsic: btrfsic_map_block(root @%llu)" | 2189 | "btrfsic: btrfsic_map_block(root @%llu)" |
1948 | " failed!\n", | 2190 | " failed!\n", |
1949 | (unsigned long long)dev_bytenr); | 2191 | (unsigned long long)dev_bytenr); |
1950 | return; | 2192 | goto continue_loop; |
1951 | } | 2193 | } |
1952 | } | 2194 | } |
1953 | block_ctx.data = mapped_data; | 2195 | block_ctx.datav = mapped_datav; |
1954 | /* the following is required in case of writes to mirrors, | 2196 | /* the following is required in case of writes to mirrors, |
1955 | * use the same that was used for the lookup */ | 2197 | * use the same that was used for the lookup */ |
1956 | block_ctx.dev = dev_state; | 2198 | block_ctx.dev = dev_state; |
@@ -1960,7 +2202,7 @@ static void btrfsic_process_written_block(struct btrfsic_dev_state *dev_state, | |||
1960 | if (NULL == block) { | 2202 | if (NULL == block) { |
1961 | printk(KERN_INFO "btrfsic: error, kmalloc failed!\n"); | 2203 | printk(KERN_INFO "btrfsic: error, kmalloc failed!\n"); |
1962 | btrfsic_release_block_ctx(&block_ctx); | 2204 | btrfsic_release_block_ctx(&block_ctx); |
1963 | return; | 2205 | goto continue_loop; |
1964 | } | 2206 | } |
1965 | block->dev_state = dev_state; | 2207 | block->dev_state = dev_state; |
1966 | block->dev_bytenr = dev_bytenr; | 2208 | block->dev_bytenr = dev_bytenr; |
@@ -2020,9 +2262,7 @@ static void btrfsic_process_written_block(struct btrfsic_dev_state *dev_state, | |||
2020 | 2262 | ||
2021 | if (is_metadata) { | 2263 | if (is_metadata) { |
2022 | ret = btrfsic_process_metablock(state, block, | 2264 | ret = btrfsic_process_metablock(state, block, |
2023 | &block_ctx, | 2265 | &block_ctx, 0, 0); |
2024 | (struct btrfs_header *) | ||
2025 | block_ctx.data, 0, 0); | ||
2026 | if (ret) | 2266 | if (ret) |
2027 | printk(KERN_INFO | 2267 | printk(KERN_INFO |
2028 | "btrfsic: process_metablock(root @%llu)" | 2268 | "btrfsic: process_metablock(root @%llu)" |
@@ -2031,6 +2271,13 @@ static void btrfsic_process_written_block(struct btrfsic_dev_state *dev_state, | |||
2031 | } | 2271 | } |
2032 | btrfsic_release_block_ctx(&block_ctx); | 2272 | btrfsic_release_block_ctx(&block_ctx); |
2033 | } | 2273 | } |
2274 | |||
2275 | continue_loop: | ||
2276 | BUG_ON(!processed_len); | ||
2277 | dev_bytenr += processed_len; | ||
2278 | mapped_datav += processed_len >> PAGE_CACHE_SHIFT; | ||
2279 | num_pages -= processed_len >> PAGE_CACHE_SHIFT; | ||
2280 | goto again; | ||
2034 | } | 2281 | } |
2035 | 2282 | ||
2036 | static void btrfsic_bio_end_io(struct bio *bp, int bio_error_status) | 2283 | static void btrfsic_bio_end_io(struct bio *bp, int bio_error_status) |
@@ -2213,7 +2460,7 @@ static int btrfsic_process_written_superblock( | |||
2213 | 2460 | ||
2214 | num_copies = | 2461 | num_copies = |
2215 | btrfs_num_copies(&state->root->fs_info->mapping_tree, | 2462 | btrfs_num_copies(&state->root->fs_info->mapping_tree, |
2216 | next_bytenr, PAGE_SIZE); | 2463 | next_bytenr, BTRFS_SUPER_INFO_SIZE); |
2217 | if (state->print_mask & BTRFSIC_PRINT_MASK_NUM_COPIES) | 2464 | if (state->print_mask & BTRFSIC_PRINT_MASK_NUM_COPIES) |
2218 | printk(KERN_INFO "num_copies(log_bytenr=%llu) = %d\n", | 2465 | printk(KERN_INFO "num_copies(log_bytenr=%llu) = %d\n", |
2219 | (unsigned long long)next_bytenr, num_copies); | 2466 | (unsigned long long)next_bytenr, num_copies); |
@@ -2224,7 +2471,8 @@ static int btrfsic_process_written_superblock( | |||
2224 | printk(KERN_INFO | 2471 | printk(KERN_INFO |
2225 | "btrfsic_process_written_superblock(" | 2472 | "btrfsic_process_written_superblock(" |
2226 | "mirror_num=%d)\n", mirror_num); | 2473 | "mirror_num=%d)\n", mirror_num); |
2227 | ret = btrfsic_map_block(state, next_bytenr, PAGE_SIZE, | 2474 | ret = btrfsic_map_block(state, next_bytenr, |
2475 | BTRFS_SUPER_INFO_SIZE, | ||
2228 | &tmp_next_block_ctx, | 2476 | &tmp_next_block_ctx, |
2229 | mirror_num); | 2477 | mirror_num); |
2230 | if (ret) { | 2478 | if (ret) { |
@@ -2689,7 +2937,7 @@ static struct btrfsic_block *btrfsic_block_lookup_or_add( | |||
2689 | static void btrfsic_cmp_log_and_dev_bytenr(struct btrfsic_state *state, | 2937 | static void btrfsic_cmp_log_and_dev_bytenr(struct btrfsic_state *state, |
2690 | u64 bytenr, | 2938 | u64 bytenr, |
2691 | struct btrfsic_dev_state *dev_state, | 2939 | struct btrfsic_dev_state *dev_state, |
2692 | u64 dev_bytenr, char *data) | 2940 | u64 dev_bytenr) |
2693 | { | 2941 | { |
2694 | int num_copies; | 2942 | int num_copies; |
2695 | int mirror_num; | 2943 | int mirror_num; |
@@ -2698,10 +2946,10 @@ static void btrfsic_cmp_log_and_dev_bytenr(struct btrfsic_state *state, | |||
2698 | int match = 0; | 2946 | int match = 0; |
2699 | 2947 | ||
2700 | num_copies = btrfs_num_copies(&state->root->fs_info->mapping_tree, | 2948 | num_copies = btrfs_num_copies(&state->root->fs_info->mapping_tree, |
2701 | bytenr, PAGE_SIZE); | 2949 | bytenr, state->metablock_size); |
2702 | 2950 | ||
2703 | for (mirror_num = 1; mirror_num <= num_copies; mirror_num++) { | 2951 | for (mirror_num = 1; mirror_num <= num_copies; mirror_num++) { |
2704 | ret = btrfsic_map_block(state, bytenr, PAGE_SIZE, | 2952 | ret = btrfsic_map_block(state, bytenr, state->metablock_size, |
2705 | &block_ctx, mirror_num); | 2953 | &block_ctx, mirror_num); |
2706 | if (ret) { | 2954 | if (ret) { |
2707 | printk(KERN_INFO "btrfsic:" | 2955 | printk(KERN_INFO "btrfsic:" |
@@ -2727,7 +2975,8 @@ static void btrfsic_cmp_log_and_dev_bytenr(struct btrfsic_state *state, | |||
2727 | (unsigned long long)bytenr, dev_state->name, | 2975 | (unsigned long long)bytenr, dev_state->name, |
2728 | (unsigned long long)dev_bytenr); | 2976 | (unsigned long long)dev_bytenr); |
2729 | for (mirror_num = 1; mirror_num <= num_copies; mirror_num++) { | 2977 | for (mirror_num = 1; mirror_num <= num_copies; mirror_num++) { |
2730 | ret = btrfsic_map_block(state, bytenr, PAGE_SIZE, | 2978 | ret = btrfsic_map_block(state, bytenr, |
2979 | state->metablock_size, | ||
2731 | &block_ctx, mirror_num); | 2980 | &block_ctx, mirror_num); |
2732 | if (ret) | 2981 | if (ret) |
2733 | continue; | 2982 | continue; |
@@ -2781,13 +3030,13 @@ int btrfsic_submit_bh(int rw, struct buffer_head *bh) | |||
2781 | (unsigned long)bh->b_size, bh->b_data, | 3030 | (unsigned long)bh->b_size, bh->b_data, |
2782 | bh->b_bdev); | 3031 | bh->b_bdev); |
2783 | btrfsic_process_written_block(dev_state, dev_bytenr, | 3032 | btrfsic_process_written_block(dev_state, dev_bytenr, |
2784 | bh->b_data, bh->b_size, NULL, | 3033 | &bh->b_data, 1, NULL, |
2785 | NULL, bh, rw); | 3034 | NULL, bh, rw); |
2786 | } else if (NULL != dev_state && (rw & REQ_FLUSH)) { | 3035 | } else if (NULL != dev_state && (rw & REQ_FLUSH)) { |
2787 | if (dev_state->state->print_mask & | 3036 | if (dev_state->state->print_mask & |
2788 | BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH) | 3037 | BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH) |
2789 | printk(KERN_INFO | 3038 | printk(KERN_INFO |
2790 | "submit_bh(rw=0x%x) FLUSH, bdev=%p)\n", | 3039 | "submit_bh(rw=0x%x FLUSH, bdev=%p)\n", |
2791 | rw, bh->b_bdev); | 3040 | rw, bh->b_bdev); |
2792 | if (!dev_state->dummy_block_for_bio_bh_flush.is_iodone) { | 3041 | if (!dev_state->dummy_block_for_bio_bh_flush.is_iodone) { |
2793 | if ((dev_state->state->print_mask & | 3042 | if ((dev_state->state->print_mask & |
@@ -2836,6 +3085,7 @@ void btrfsic_submit_bio(int rw, struct bio *bio) | |||
2836 | unsigned int i; | 3085 | unsigned int i; |
2837 | u64 dev_bytenr; | 3086 | u64 dev_bytenr; |
2838 | int bio_is_patched; | 3087 | int bio_is_patched; |
3088 | char **mapped_datav; | ||
2839 | 3089 | ||
2840 | dev_bytenr = 512 * bio->bi_sector; | 3090 | dev_bytenr = 512 * bio->bi_sector; |
2841 | bio_is_patched = 0; | 3091 | bio_is_patched = 0; |
@@ -2848,35 +3098,46 @@ void btrfsic_submit_bio(int rw, struct bio *bio) | |||
2848 | (unsigned long long)dev_bytenr, | 3098 | (unsigned long long)dev_bytenr, |
2849 | bio->bi_bdev); | 3099 | bio->bi_bdev); |
2850 | 3100 | ||
3101 | mapped_datav = kmalloc(sizeof(*mapped_datav) * bio->bi_vcnt, | ||
3102 | GFP_NOFS); | ||
3103 | if (!mapped_datav) | ||
3104 | goto leave; | ||
2851 | for (i = 0; i < bio->bi_vcnt; i++) { | 3105 | for (i = 0; i < bio->bi_vcnt; i++) { |
2852 | u8 *mapped_data; | 3106 | BUG_ON(bio->bi_io_vec[i].bv_len != PAGE_CACHE_SIZE); |
2853 | 3107 | mapped_datav[i] = kmap(bio->bi_io_vec[i].bv_page); | |
2854 | mapped_data = kmap(bio->bi_io_vec[i].bv_page); | 3108 | if (!mapped_datav[i]) { |
3109 | while (i > 0) { | ||
3110 | i--; | ||
3111 | kunmap(bio->bi_io_vec[i].bv_page); | ||
3112 | } | ||
3113 | kfree(mapped_datav); | ||
3114 | goto leave; | ||
3115 | } | ||
2855 | if ((BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH | | 3116 | if ((BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH | |
2856 | BTRFSIC_PRINT_MASK_VERBOSE) == | 3117 | BTRFSIC_PRINT_MASK_VERBOSE) == |
2857 | (dev_state->state->print_mask & | 3118 | (dev_state->state->print_mask & |
2858 | (BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH | | 3119 | (BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH | |
2859 | BTRFSIC_PRINT_MASK_VERBOSE))) | 3120 | BTRFSIC_PRINT_MASK_VERBOSE))) |
2860 | printk(KERN_INFO | 3121 | printk(KERN_INFO |
2861 | "#%u: page=%p, mapped=%p, len=%u," | 3122 | "#%u: page=%p, len=%u, offset=%u\n", |
2862 | " offset=%u\n", | ||
2863 | i, bio->bi_io_vec[i].bv_page, | 3123 | i, bio->bi_io_vec[i].bv_page, |
2864 | mapped_data, | ||
2865 | bio->bi_io_vec[i].bv_len, | 3124 | bio->bi_io_vec[i].bv_len, |
2866 | bio->bi_io_vec[i].bv_offset); | 3125 | bio->bi_io_vec[i].bv_offset); |
2867 | btrfsic_process_written_block(dev_state, dev_bytenr, | 3126 | } |
2868 | mapped_data, | 3127 | btrfsic_process_written_block(dev_state, dev_bytenr, |
2869 | bio->bi_io_vec[i].bv_len, | 3128 | mapped_datav, bio->bi_vcnt, |
2870 | bio, &bio_is_patched, | 3129 | bio, &bio_is_patched, |
2871 | NULL, rw); | 3130 | NULL, rw); |
3131 | while (i > 0) { | ||
3132 | i--; | ||
2872 | kunmap(bio->bi_io_vec[i].bv_page); | 3133 | kunmap(bio->bi_io_vec[i].bv_page); |
2873 | dev_bytenr += bio->bi_io_vec[i].bv_len; | ||
2874 | } | 3134 | } |
3135 | kfree(mapped_datav); | ||
2875 | } else if (NULL != dev_state && (rw & REQ_FLUSH)) { | 3136 | } else if (NULL != dev_state && (rw & REQ_FLUSH)) { |
2876 | if (dev_state->state->print_mask & | 3137 | if (dev_state->state->print_mask & |
2877 | BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH) | 3138 | BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH) |
2878 | printk(KERN_INFO | 3139 | printk(KERN_INFO |
2879 | "submit_bio(rw=0x%x) FLUSH, bdev=%p)\n", | 3140 | "submit_bio(rw=0x%x FLUSH, bdev=%p)\n", |
2880 | rw, bio->bi_bdev); | 3141 | rw, bio->bi_bdev); |
2881 | if (!dev_state->dummy_block_for_bio_bh_flush.is_iodone) { | 3142 | if (!dev_state->dummy_block_for_bio_bh_flush.is_iodone) { |
2882 | if ((dev_state->state->print_mask & | 3143 | if ((dev_state->state->print_mask & |
@@ -2903,6 +3164,7 @@ void btrfsic_submit_bio(int rw, struct bio *bio) | |||
2903 | bio->bi_end_io = btrfsic_bio_end_io; | 3164 | bio->bi_end_io = btrfsic_bio_end_io; |
2904 | } | 3165 | } |
2905 | } | 3166 | } |
3167 | leave: | ||
2906 | mutex_unlock(&btrfsic_mutex); | 3168 | mutex_unlock(&btrfsic_mutex); |
2907 | 3169 | ||
2908 | submit_bio(rw, bio); | 3170 | submit_bio(rw, bio); |
@@ -2917,6 +3179,30 @@ int btrfsic_mount(struct btrfs_root *root, | |||
2917 | struct list_head *dev_head = &fs_devices->devices; | 3179 | struct list_head *dev_head = &fs_devices->devices; |
2918 | struct btrfs_device *device; | 3180 | struct btrfs_device *device; |
2919 | 3181 | ||
3182 | if (root->nodesize != root->leafsize) { | ||
3183 | printk(KERN_INFO | ||
3184 | "btrfsic: cannot handle nodesize %d != leafsize %d!\n", | ||
3185 | root->nodesize, root->leafsize); | ||
3186 | return -1; | ||
3187 | } | ||
3188 | if (root->nodesize & ((u64)PAGE_CACHE_SIZE - 1)) { | ||
3189 | printk(KERN_INFO | ||
3190 | "btrfsic: cannot handle nodesize %d not being a multiple of PAGE_CACHE_SIZE %ld!\n", | ||
3191 | root->nodesize, (unsigned long)PAGE_CACHE_SIZE); | ||
3192 | return -1; | ||
3193 | } | ||
3194 | if (root->leafsize & ((u64)PAGE_CACHE_SIZE - 1)) { | ||
3195 | printk(KERN_INFO | ||
3196 | "btrfsic: cannot handle leafsize %d not being a multiple of PAGE_CACHE_SIZE %ld!\n", | ||
3197 | root->leafsize, (unsigned long)PAGE_CACHE_SIZE); | ||
3198 | return -1; | ||
3199 | } | ||
3200 | if (root->sectorsize & ((u64)PAGE_CACHE_SIZE - 1)) { | ||
3201 | printk(KERN_INFO | ||
3202 | "btrfsic: cannot handle sectorsize %d not being a multiple of PAGE_CACHE_SIZE %ld!\n", | ||
3203 | root->sectorsize, (unsigned long)PAGE_CACHE_SIZE); | ||
3204 | return -1; | ||
3205 | } | ||
2920 | state = kzalloc(sizeof(*state), GFP_NOFS); | 3206 | state = kzalloc(sizeof(*state), GFP_NOFS); |
2921 | if (NULL == state) { | 3207 | if (NULL == state) { |
2922 | printk(KERN_INFO "btrfs check-integrity: kmalloc() failed!\n"); | 3208 | printk(KERN_INFO "btrfs check-integrity: kmalloc() failed!\n"); |
@@ -2933,6 +3219,8 @@ int btrfsic_mount(struct btrfs_root *root, | |||
2933 | state->print_mask = print_mask; | 3219 | state->print_mask = print_mask; |
2934 | state->include_extent_data = including_extent_data; | 3220 | state->include_extent_data = including_extent_data; |
2935 | state->csum_size = 0; | 3221 | state->csum_size = 0; |
3222 | state->metablock_size = root->nodesize; | ||
3223 | state->datablock_size = root->sectorsize; | ||
2936 | INIT_LIST_HEAD(&state->all_blocks_list); | 3224 | INIT_LIST_HEAD(&state->all_blocks_list); |
2937 | btrfsic_block_hashtable_init(&state->block_hashtable); | 3225 | btrfsic_block_hashtable_init(&state->block_hashtable); |
2938 | btrfsic_block_link_hashtable_init(&state->block_link_hashtable); | 3226 | btrfsic_block_link_hashtable_init(&state->block_link_hashtable); |
@@ -3049,7 +3337,7 @@ void btrfsic_unmount(struct btrfs_root *root, | |||
3049 | btrfsic_block_link_free(l); | 3337 | btrfsic_block_link_free(l); |
3050 | } | 3338 | } |
3051 | 3339 | ||
3052 | if (b_all->is_iodone) | 3340 | if (b_all->is_iodone || b_all->never_written) |
3053 | btrfsic_block_free(b_all); | 3341 | btrfsic_block_free(b_all); |
3054 | else | 3342 | else |
3055 | printk(KERN_INFO "btrfs: attempt to free %c-block" | 3343 | printk(KERN_INFO "btrfs: attempt to free %c-block" |
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index e801f226d7e0..d7a96cfdc50a 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c | |||
@@ -18,6 +18,7 @@ | |||
18 | 18 | ||
19 | #include <linux/sched.h> | 19 | #include <linux/sched.h> |
20 | #include <linux/slab.h> | 20 | #include <linux/slab.h> |
21 | #include <linux/rbtree.h> | ||
21 | #include "ctree.h" | 22 | #include "ctree.h" |
22 | #include "disk-io.h" | 23 | #include "disk-io.h" |
23 | #include "transaction.h" | 24 | #include "transaction.h" |
@@ -37,7 +38,16 @@ static int balance_node_right(struct btrfs_trans_handle *trans, | |||
37 | struct extent_buffer *dst_buf, | 38 | struct extent_buffer *dst_buf, |
38 | struct extent_buffer *src_buf); | 39 | struct extent_buffer *src_buf); |
39 | static void del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root, | 40 | static void del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root, |
40 | struct btrfs_path *path, int level, int slot); | 41 | struct btrfs_path *path, int level, int slot, |
42 | int tree_mod_log); | ||
43 | static void tree_mod_log_free_eb(struct btrfs_fs_info *fs_info, | ||
44 | struct extent_buffer *eb); | ||
45 | struct extent_buffer *read_old_tree_block(struct btrfs_root *root, u64 bytenr, | ||
46 | u32 blocksize, u64 parent_transid, | ||
47 | u64 time_seq); | ||
48 | struct extent_buffer *btrfs_find_old_tree_block(struct btrfs_root *root, | ||
49 | u64 bytenr, u32 blocksize, | ||
50 | u64 time_seq); | ||
41 | 51 | ||
42 | struct btrfs_path *btrfs_alloc_path(void) | 52 | struct btrfs_path *btrfs_alloc_path(void) |
43 | { | 53 | { |
@@ -220,10 +230,12 @@ struct extent_buffer *btrfs_read_lock_root_node(struct btrfs_root *root) | |||
220 | */ | 230 | */ |
221 | static void add_root_to_dirty_list(struct btrfs_root *root) | 231 | static void add_root_to_dirty_list(struct btrfs_root *root) |
222 | { | 232 | { |
233 | spin_lock(&root->fs_info->trans_lock); | ||
223 | if (root->track_dirty && list_empty(&root->dirty_list)) { | 234 | if (root->track_dirty && list_empty(&root->dirty_list)) { |
224 | list_add(&root->dirty_list, | 235 | list_add(&root->dirty_list, |
225 | &root->fs_info->dirty_cowonly_roots); | 236 | &root->fs_info->dirty_cowonly_roots); |
226 | } | 237 | } |
238 | spin_unlock(&root->fs_info->trans_lock); | ||
227 | } | 239 | } |
228 | 240 | ||
229 | /* | 241 | /* |
@@ -253,7 +265,7 @@ int btrfs_copy_root(struct btrfs_trans_handle *trans, | |||
253 | 265 | ||
254 | cow = btrfs_alloc_free_block(trans, root, buf->len, 0, | 266 | cow = btrfs_alloc_free_block(trans, root, buf->len, 0, |
255 | new_root_objectid, &disk_key, level, | 267 | new_root_objectid, &disk_key, level, |
256 | buf->start, 0, 1); | 268 | buf->start, 0); |
257 | if (IS_ERR(cow)) | 269 | if (IS_ERR(cow)) |
258 | return PTR_ERR(cow); | 270 | return PTR_ERR(cow); |
259 | 271 | ||
@@ -286,6 +298,434 @@ int btrfs_copy_root(struct btrfs_trans_handle *trans, | |||
286 | return 0; | 298 | return 0; |
287 | } | 299 | } |
288 | 300 | ||
301 | enum mod_log_op { | ||
302 | MOD_LOG_KEY_REPLACE, | ||
303 | MOD_LOG_KEY_ADD, | ||
304 | MOD_LOG_KEY_REMOVE, | ||
305 | MOD_LOG_KEY_REMOVE_WHILE_FREEING, | ||
306 | MOD_LOG_KEY_REMOVE_WHILE_MOVING, | ||
307 | MOD_LOG_MOVE_KEYS, | ||
308 | MOD_LOG_ROOT_REPLACE, | ||
309 | }; | ||
310 | |||
311 | struct tree_mod_move { | ||
312 | int dst_slot; | ||
313 | int nr_items; | ||
314 | }; | ||
315 | |||
316 | struct tree_mod_root { | ||
317 | u64 logical; | ||
318 | u8 level; | ||
319 | }; | ||
320 | |||
321 | struct tree_mod_elem { | ||
322 | struct rb_node node; | ||
323 | u64 index; /* shifted logical */ | ||
324 | struct seq_list elem; | ||
325 | enum mod_log_op op; | ||
326 | |||
327 | /* this is used for MOD_LOG_KEY_* and MOD_LOG_MOVE_KEYS operations */ | ||
328 | int slot; | ||
329 | |||
330 | /* this is used for MOD_LOG_KEY* and MOD_LOG_ROOT_REPLACE */ | ||
331 | u64 generation; | ||
332 | |||
333 | /* those are used for op == MOD_LOG_KEY_{REPLACE,REMOVE} */ | ||
334 | struct btrfs_disk_key key; | ||
335 | u64 blockptr; | ||
336 | |||
337 | /* this is used for op == MOD_LOG_MOVE_KEYS */ | ||
338 | struct tree_mod_move move; | ||
339 | |||
340 | /* this is used for op == MOD_LOG_ROOT_REPLACE */ | ||
341 | struct tree_mod_root old_root; | ||
342 | }; | ||
343 | |||
344 | static inline void | ||
345 | __get_tree_mod_seq(struct btrfs_fs_info *fs_info, struct seq_list *elem) | ||
346 | { | ||
347 | elem->seq = atomic_inc_return(&fs_info->tree_mod_seq); | ||
348 | list_add_tail(&elem->list, &fs_info->tree_mod_seq_list); | ||
349 | } | ||
350 | |||
351 | void btrfs_get_tree_mod_seq(struct btrfs_fs_info *fs_info, | ||
352 | struct seq_list *elem) | ||
353 | { | ||
354 | elem->flags = 1; | ||
355 | spin_lock(&fs_info->tree_mod_seq_lock); | ||
356 | __get_tree_mod_seq(fs_info, elem); | ||
357 | spin_unlock(&fs_info->tree_mod_seq_lock); | ||
358 | } | ||
359 | |||
360 | void btrfs_put_tree_mod_seq(struct btrfs_fs_info *fs_info, | ||
361 | struct seq_list *elem) | ||
362 | { | ||
363 | struct rb_root *tm_root; | ||
364 | struct rb_node *node; | ||
365 | struct rb_node *next; | ||
366 | struct seq_list *cur_elem; | ||
367 | struct tree_mod_elem *tm; | ||
368 | u64 min_seq = (u64)-1; | ||
369 | u64 seq_putting = elem->seq; | ||
370 | |||
371 | if (!seq_putting) | ||
372 | return; | ||
373 | |||
374 | BUG_ON(!(elem->flags & 1)); | ||
375 | spin_lock(&fs_info->tree_mod_seq_lock); | ||
376 | list_del(&elem->list); | ||
377 | |||
378 | list_for_each_entry(cur_elem, &fs_info->tree_mod_seq_list, list) { | ||
379 | if ((cur_elem->flags & 1) && cur_elem->seq < min_seq) { | ||
380 | if (seq_putting > cur_elem->seq) { | ||
381 | /* | ||
382 | * blocker with lower sequence number exists, we | ||
383 | * cannot remove anything from the log | ||
384 | */ | ||
385 | goto out; | ||
386 | } | ||
387 | min_seq = cur_elem->seq; | ||
388 | } | ||
389 | } | ||
390 | |||
391 | /* | ||
392 | * anything that's lower than the lowest existing (read: blocked) | ||
393 | * sequence number can be removed from the tree. | ||
394 | */ | ||
395 | write_lock(&fs_info->tree_mod_log_lock); | ||
396 | tm_root = &fs_info->tree_mod_log; | ||
397 | for (node = rb_first(tm_root); node; node = next) { | ||
398 | next = rb_next(node); | ||
399 | tm = container_of(node, struct tree_mod_elem, node); | ||
400 | if (tm->elem.seq > min_seq) | ||
401 | continue; | ||
402 | rb_erase(node, tm_root); | ||
403 | list_del(&tm->elem.list); | ||
404 | kfree(tm); | ||
405 | } | ||
406 | write_unlock(&fs_info->tree_mod_log_lock); | ||
407 | out: | ||
408 | spin_unlock(&fs_info->tree_mod_seq_lock); | ||
409 | } | ||
410 | |||
411 | /* | ||
412 | * key order of the log: | ||
413 | * index -> sequence | ||
414 | * | ||
415 | * the index is the shifted logical of the *new* root node for root replace | ||
416 | * operations, or the shifted logical of the affected block for all other | ||
417 | * operations. | ||
418 | */ | ||
419 | static noinline int | ||
420 | __tree_mod_log_insert(struct btrfs_fs_info *fs_info, struct tree_mod_elem *tm) | ||
421 | { | ||
422 | struct rb_root *tm_root; | ||
423 | struct rb_node **new; | ||
424 | struct rb_node *parent = NULL; | ||
425 | struct tree_mod_elem *cur; | ||
426 | int ret = 0; | ||
427 | |||
428 | BUG_ON(!tm || !tm->elem.seq); | ||
429 | |||
430 | write_lock(&fs_info->tree_mod_log_lock); | ||
431 | tm_root = &fs_info->tree_mod_log; | ||
432 | new = &tm_root->rb_node; | ||
433 | while (*new) { | ||
434 | cur = container_of(*new, struct tree_mod_elem, node); | ||
435 | parent = *new; | ||
436 | if (cur->index < tm->index) | ||
437 | new = &((*new)->rb_left); | ||
438 | else if (cur->index > tm->index) | ||
439 | new = &((*new)->rb_right); | ||
440 | else if (cur->elem.seq < tm->elem.seq) | ||
441 | new = &((*new)->rb_left); | ||
442 | else if (cur->elem.seq > tm->elem.seq) | ||
443 | new = &((*new)->rb_right); | ||
444 | else { | ||
445 | kfree(tm); | ||
446 | ret = -EEXIST; | ||
447 | goto unlock; | ||
448 | } | ||
449 | } | ||
450 | |||
451 | rb_link_node(&tm->node, parent, new); | ||
452 | rb_insert_color(&tm->node, tm_root); | ||
453 | unlock: | ||
454 | write_unlock(&fs_info->tree_mod_log_lock); | ||
455 | return ret; | ||
456 | } | ||
457 | |||
458 | static inline int tree_mod_dont_log(struct btrfs_fs_info *fs_info, | ||
459 | struct extent_buffer *eb) { | ||
460 | smp_mb(); | ||
461 | if (list_empty(&(fs_info)->tree_mod_seq_list)) | ||
462 | return 1; | ||
463 | if (!eb) | ||
464 | return 0; | ||
465 | if (btrfs_header_level(eb) == 0) | ||
466 | return 1; | ||
467 | return 0; | ||
468 | } | ||
469 | |||
470 | static inline int tree_mod_alloc(struct btrfs_fs_info *fs_info, gfp_t flags, | ||
471 | struct tree_mod_elem **tm_ret) | ||
472 | { | ||
473 | struct tree_mod_elem *tm; | ||
474 | int seq; | ||
475 | |||
476 | if (tree_mod_dont_log(fs_info, NULL)) | ||
477 | return 0; | ||
478 | |||
479 | tm = *tm_ret = kzalloc(sizeof(*tm), flags); | ||
480 | if (!tm) | ||
481 | return -ENOMEM; | ||
482 | |||
483 | tm->elem.flags = 0; | ||
484 | spin_lock(&fs_info->tree_mod_seq_lock); | ||
485 | if (list_empty(&fs_info->tree_mod_seq_list)) { | ||
486 | /* | ||
487 | * someone emptied the list while we were waiting for the lock. | ||
488 | * we must not add to the list, because no blocker exists. items | ||
489 | * are removed from the list only when the existing blocker is | ||
490 | * removed from the list. | ||
491 | */ | ||
492 | kfree(tm); | ||
493 | seq = 0; | ||
494 | } else { | ||
495 | __get_tree_mod_seq(fs_info, &tm->elem); | ||
496 | seq = tm->elem.seq; | ||
497 | } | ||
498 | spin_unlock(&fs_info->tree_mod_seq_lock); | ||
499 | |||
500 | return seq; | ||
501 | } | ||
502 | |||
503 | static noinline int | ||
504 | tree_mod_log_insert_key_mask(struct btrfs_fs_info *fs_info, | ||
505 | struct extent_buffer *eb, int slot, | ||
506 | enum mod_log_op op, gfp_t flags) | ||
507 | { | ||
508 | struct tree_mod_elem *tm; | ||
509 | int ret; | ||
510 | |||
511 | ret = tree_mod_alloc(fs_info, flags, &tm); | ||
512 | if (ret <= 0) | ||
513 | return ret; | ||
514 | |||
515 | tm->index = eb->start >> PAGE_CACHE_SHIFT; | ||
516 | if (op != MOD_LOG_KEY_ADD) { | ||
517 | btrfs_node_key(eb, &tm->key, slot); | ||
518 | tm->blockptr = btrfs_node_blockptr(eb, slot); | ||
519 | } | ||
520 | tm->op = op; | ||
521 | tm->slot = slot; | ||
522 | tm->generation = btrfs_node_ptr_generation(eb, slot); | ||
523 | |||
524 | return __tree_mod_log_insert(fs_info, tm); | ||
525 | } | ||
526 | |||
527 | static noinline int | ||
528 | tree_mod_log_insert_key(struct btrfs_fs_info *fs_info, struct extent_buffer *eb, | ||
529 | int slot, enum mod_log_op op) | ||
530 | { | ||
531 | return tree_mod_log_insert_key_mask(fs_info, eb, slot, op, GFP_NOFS); | ||
532 | } | ||
533 | |||
534 | static noinline int | ||
535 | tree_mod_log_insert_move(struct btrfs_fs_info *fs_info, | ||
536 | struct extent_buffer *eb, int dst_slot, int src_slot, | ||
537 | int nr_items, gfp_t flags) | ||
538 | { | ||
539 | struct tree_mod_elem *tm; | ||
540 | int ret; | ||
541 | int i; | ||
542 | |||
543 | if (tree_mod_dont_log(fs_info, eb)) | ||
544 | return 0; | ||
545 | |||
546 | for (i = 0; i + dst_slot < src_slot && i < nr_items; i++) { | ||
547 | ret = tree_mod_log_insert_key(fs_info, eb, i + dst_slot, | ||
548 | MOD_LOG_KEY_REMOVE_WHILE_MOVING); | ||
549 | BUG_ON(ret < 0); | ||
550 | } | ||
551 | |||
552 | ret = tree_mod_alloc(fs_info, flags, &tm); | ||
553 | if (ret <= 0) | ||
554 | return ret; | ||
555 | |||
556 | tm->index = eb->start >> PAGE_CACHE_SHIFT; | ||
557 | tm->slot = src_slot; | ||
558 | tm->move.dst_slot = dst_slot; | ||
559 | tm->move.nr_items = nr_items; | ||
560 | tm->op = MOD_LOG_MOVE_KEYS; | ||
561 | |||
562 | return __tree_mod_log_insert(fs_info, tm); | ||
563 | } | ||
564 | |||
565 | static noinline int | ||
566 | tree_mod_log_insert_root(struct btrfs_fs_info *fs_info, | ||
567 | struct extent_buffer *old_root, | ||
568 | struct extent_buffer *new_root, gfp_t flags) | ||
569 | { | ||
570 | struct tree_mod_elem *tm; | ||
571 | int ret; | ||
572 | |||
573 | ret = tree_mod_alloc(fs_info, flags, &tm); | ||
574 | if (ret <= 0) | ||
575 | return ret; | ||
576 | |||
577 | tm->index = new_root->start >> PAGE_CACHE_SHIFT; | ||
578 | tm->old_root.logical = old_root->start; | ||
579 | tm->old_root.level = btrfs_header_level(old_root); | ||
580 | tm->generation = btrfs_header_generation(old_root); | ||
581 | tm->op = MOD_LOG_ROOT_REPLACE; | ||
582 | |||
583 | return __tree_mod_log_insert(fs_info, tm); | ||
584 | } | ||
585 | |||
586 | static struct tree_mod_elem * | ||
587 | __tree_mod_log_search(struct btrfs_fs_info *fs_info, u64 start, u64 min_seq, | ||
588 | int smallest) | ||
589 | { | ||
590 | struct rb_root *tm_root; | ||
591 | struct rb_node *node; | ||
592 | struct tree_mod_elem *cur = NULL; | ||
593 | struct tree_mod_elem *found = NULL; | ||
594 | u64 index = start >> PAGE_CACHE_SHIFT; | ||
595 | |||
596 | read_lock(&fs_info->tree_mod_log_lock); | ||
597 | tm_root = &fs_info->tree_mod_log; | ||
598 | node = tm_root->rb_node; | ||
599 | while (node) { | ||
600 | cur = container_of(node, struct tree_mod_elem, node); | ||
601 | if (cur->index < index) { | ||
602 | node = node->rb_left; | ||
603 | } else if (cur->index > index) { | ||
604 | node = node->rb_right; | ||
605 | } else if (cur->elem.seq < min_seq) { | ||
606 | node = node->rb_left; | ||
607 | } else if (!smallest) { | ||
608 | /* we want the node with the highest seq */ | ||
609 | if (found) | ||
610 | BUG_ON(found->elem.seq > cur->elem.seq); | ||
611 | found = cur; | ||
612 | node = node->rb_left; | ||
613 | } else if (cur->elem.seq > min_seq) { | ||
614 | /* we want the node with the smallest seq */ | ||
615 | if (found) | ||
616 | BUG_ON(found->elem.seq < cur->elem.seq); | ||
617 | found = cur; | ||
618 | node = node->rb_right; | ||
619 | } else { | ||
620 | found = cur; | ||
621 | break; | ||
622 | } | ||
623 | } | ||
624 | read_unlock(&fs_info->tree_mod_log_lock); | ||
625 | |||
626 | return found; | ||
627 | } | ||
628 | |||
629 | /* | ||
630 | * this returns the element from the log with the smallest time sequence | ||
631 | * value that's in the log (the oldest log item). any element with a time | ||
632 | * sequence lower than min_seq will be ignored. | ||
633 | */ | ||
634 | static struct tree_mod_elem * | ||
635 | tree_mod_log_search_oldest(struct btrfs_fs_info *fs_info, u64 start, | ||
636 | u64 min_seq) | ||
637 | { | ||
638 | return __tree_mod_log_search(fs_info, start, min_seq, 1); | ||
639 | } | ||
640 | |||
641 | /* | ||
642 | * this returns the element from the log with the largest time sequence | ||
643 | * value that's in the log (the most recent log item). any element with | ||
644 | * a time sequence lower than min_seq will be ignored. | ||
645 | */ | ||
646 | static struct tree_mod_elem * | ||
647 | tree_mod_log_search(struct btrfs_fs_info *fs_info, u64 start, u64 min_seq) | ||
648 | { | ||
649 | return __tree_mod_log_search(fs_info, start, min_seq, 0); | ||
650 | } | ||
651 | |||
652 | static inline void | ||
653 | tree_mod_log_eb_copy(struct btrfs_fs_info *fs_info, struct extent_buffer *dst, | ||
654 | struct extent_buffer *src, unsigned long dst_offset, | ||
655 | unsigned long src_offset, int nr_items) | ||
656 | { | ||
657 | int ret; | ||
658 | int i; | ||
659 | |||
660 | if (tree_mod_dont_log(fs_info, NULL)) | ||
661 | return; | ||
662 | |||
663 | if (btrfs_header_level(dst) == 0 && btrfs_header_level(src) == 0) | ||
664 | return; | ||
665 | |||
666 | /* speed this up by single seq for all operations? */ | ||
667 | for (i = 0; i < nr_items; i++) { | ||
668 | ret = tree_mod_log_insert_key(fs_info, src, i + src_offset, | ||
669 | MOD_LOG_KEY_REMOVE); | ||
670 | BUG_ON(ret < 0); | ||
671 | ret = tree_mod_log_insert_key(fs_info, dst, i + dst_offset, | ||
672 | MOD_LOG_KEY_ADD); | ||
673 | BUG_ON(ret < 0); | ||
674 | } | ||
675 | } | ||
676 | |||
677 | static inline void | ||
678 | tree_mod_log_eb_move(struct btrfs_fs_info *fs_info, struct extent_buffer *dst, | ||
679 | int dst_offset, int src_offset, int nr_items) | ||
680 | { | ||
681 | int ret; | ||
682 | ret = tree_mod_log_insert_move(fs_info, dst, dst_offset, src_offset, | ||
683 | nr_items, GFP_NOFS); | ||
684 | BUG_ON(ret < 0); | ||
685 | } | ||
686 | |||
687 | static inline void | ||
688 | tree_mod_log_set_node_key(struct btrfs_fs_info *fs_info, | ||
689 | struct extent_buffer *eb, | ||
690 | struct btrfs_disk_key *disk_key, int slot, int atomic) | ||
691 | { | ||
692 | int ret; | ||
693 | |||
694 | ret = tree_mod_log_insert_key_mask(fs_info, eb, slot, | ||
695 | MOD_LOG_KEY_REPLACE, | ||
696 | atomic ? GFP_ATOMIC : GFP_NOFS); | ||
697 | BUG_ON(ret < 0); | ||
698 | } | ||
699 | |||
700 | static void tree_mod_log_free_eb(struct btrfs_fs_info *fs_info, | ||
701 | struct extent_buffer *eb) | ||
702 | { | ||
703 | int i; | ||
704 | int ret; | ||
705 | u32 nritems; | ||
706 | |||
707 | if (tree_mod_dont_log(fs_info, eb)) | ||
708 | return; | ||
709 | |||
710 | nritems = btrfs_header_nritems(eb); | ||
711 | for (i = nritems - 1; i >= 0; i--) { | ||
712 | ret = tree_mod_log_insert_key(fs_info, eb, i, | ||
713 | MOD_LOG_KEY_REMOVE_WHILE_FREEING); | ||
714 | BUG_ON(ret < 0); | ||
715 | } | ||
716 | } | ||
717 | |||
718 | static inline void | ||
719 | tree_mod_log_set_root_pointer(struct btrfs_root *root, | ||
720 | struct extent_buffer *new_root_node) | ||
721 | { | ||
722 | int ret; | ||
723 | tree_mod_log_free_eb(root->fs_info, root->node); | ||
724 | ret = tree_mod_log_insert_root(root->fs_info, root->node, | ||
725 | new_root_node, GFP_NOFS); | ||
726 | BUG_ON(ret < 0); | ||
727 | } | ||
728 | |||
289 | /* | 729 | /* |
290 | * check if the tree block can be shared by multiple trees | 730 | * check if the tree block can be shared by multiple trees |
291 | */ | 731 | */ |
@@ -407,6 +847,12 @@ static noinline int update_ref_for_cow(struct btrfs_trans_handle *trans, | |||
407 | ret = btrfs_dec_ref(trans, root, buf, 1, 1); | 847 | ret = btrfs_dec_ref(trans, root, buf, 1, 1); |
408 | BUG_ON(ret); /* -ENOMEM */ | 848 | BUG_ON(ret); /* -ENOMEM */ |
409 | } | 849 | } |
850 | /* | ||
851 | * don't log freeing in case we're freeing the root node, this | ||
852 | * is done by tree_mod_log_set_root_pointer later | ||
853 | */ | ||
854 | if (buf != root->node && btrfs_header_level(buf) != 0) | ||
855 | tree_mod_log_free_eb(root->fs_info, buf); | ||
410 | clean_tree_block(trans, root, buf); | 856 | clean_tree_block(trans, root, buf); |
411 | *last_ref = 1; | 857 | *last_ref = 1; |
412 | } | 858 | } |
@@ -465,7 +911,7 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans, | |||
465 | 911 | ||
466 | cow = btrfs_alloc_free_block(trans, root, buf->len, parent_start, | 912 | cow = btrfs_alloc_free_block(trans, root, buf->len, parent_start, |
467 | root->root_key.objectid, &disk_key, | 913 | root->root_key.objectid, &disk_key, |
468 | level, search_start, empty_size, 1); | 914 | level, search_start, empty_size); |
469 | if (IS_ERR(cow)) | 915 | if (IS_ERR(cow)) |
470 | return PTR_ERR(cow); | 916 | return PTR_ERR(cow); |
471 | 917 | ||
@@ -504,10 +950,11 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans, | |||
504 | parent_start = 0; | 950 | parent_start = 0; |
505 | 951 | ||
506 | extent_buffer_get(cow); | 952 | extent_buffer_get(cow); |
953 | tree_mod_log_set_root_pointer(root, cow); | ||
507 | rcu_assign_pointer(root->node, cow); | 954 | rcu_assign_pointer(root->node, cow); |
508 | 955 | ||
509 | btrfs_free_tree_block(trans, root, buf, parent_start, | 956 | btrfs_free_tree_block(trans, root, buf, parent_start, |
510 | last_ref, 1); | 957 | last_ref); |
511 | free_extent_buffer(buf); | 958 | free_extent_buffer(buf); |
512 | add_root_to_dirty_list(root); | 959 | add_root_to_dirty_list(root); |
513 | } else { | 960 | } else { |
@@ -517,13 +964,15 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans, | |||
517 | parent_start = 0; | 964 | parent_start = 0; |
518 | 965 | ||
519 | WARN_ON(trans->transid != btrfs_header_generation(parent)); | 966 | WARN_ON(trans->transid != btrfs_header_generation(parent)); |
967 | tree_mod_log_insert_key(root->fs_info, parent, parent_slot, | ||
968 | MOD_LOG_KEY_REPLACE); | ||
520 | btrfs_set_node_blockptr(parent, parent_slot, | 969 | btrfs_set_node_blockptr(parent, parent_slot, |
521 | cow->start); | 970 | cow->start); |
522 | btrfs_set_node_ptr_generation(parent, parent_slot, | 971 | btrfs_set_node_ptr_generation(parent, parent_slot, |
523 | trans->transid); | 972 | trans->transid); |
524 | btrfs_mark_buffer_dirty(parent); | 973 | btrfs_mark_buffer_dirty(parent); |
525 | btrfs_free_tree_block(trans, root, buf, parent_start, | 974 | btrfs_free_tree_block(trans, root, buf, parent_start, |
526 | last_ref, 1); | 975 | last_ref); |
527 | } | 976 | } |
528 | if (unlock_orig) | 977 | if (unlock_orig) |
529 | btrfs_tree_unlock(buf); | 978 | btrfs_tree_unlock(buf); |
@@ -533,6 +982,210 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans, | |||
533 | return 0; | 982 | return 0; |
534 | } | 983 | } |
535 | 984 | ||
985 | /* | ||
986 | * returns the logical address of the oldest predecessor of the given root. | ||
987 | * entries older than time_seq are ignored. | ||
988 | */ | ||
989 | static struct tree_mod_elem * | ||
990 | __tree_mod_log_oldest_root(struct btrfs_fs_info *fs_info, | ||
991 | struct btrfs_root *root, u64 time_seq) | ||
992 | { | ||
993 | struct tree_mod_elem *tm; | ||
994 | struct tree_mod_elem *found = NULL; | ||
995 | u64 root_logical = root->node->start; | ||
996 | int looped = 0; | ||
997 | |||
998 | if (!time_seq) | ||
999 | return 0; | ||
1000 | |||
1001 | /* | ||
1002 | * the very last operation that's logged for a root is the replacement | ||
1003 | * operation (if it is replaced at all). this has the index of the *new* | ||
1004 | * root, making it the very first operation that's logged for this root. | ||
1005 | */ | ||
1006 | while (1) { | ||
1007 | tm = tree_mod_log_search_oldest(fs_info, root_logical, | ||
1008 | time_seq); | ||
1009 | if (!looped && !tm) | ||
1010 | return 0; | ||
1011 | /* | ||
1012 | * we must have key remove operations in the log before the | ||
1013 | * replace operation. | ||
1014 | */ | ||
1015 | BUG_ON(!tm); | ||
1016 | |||
1017 | if (tm->op != MOD_LOG_ROOT_REPLACE) | ||
1018 | break; | ||
1019 | |||
1020 | found = tm; | ||
1021 | root_logical = tm->old_root.logical; | ||
1022 | BUG_ON(root_logical == root->node->start); | ||
1023 | looped = 1; | ||
1024 | } | ||
1025 | |||
1026 | return found; | ||
1027 | } | ||
1028 | |||
1029 | /* | ||
1030 | * tm is a pointer to the first operation to rewind within eb. then, all | ||
1031 | * previous operations will be rewinded (until we reach something older than | ||
1032 | * time_seq). | ||
1033 | */ | ||
1034 | static void | ||
1035 | __tree_mod_log_rewind(struct extent_buffer *eb, u64 time_seq, | ||
1036 | struct tree_mod_elem *first_tm) | ||
1037 | { | ||
1038 | u32 n; | ||
1039 | struct rb_node *next; | ||
1040 | struct tree_mod_elem *tm = first_tm; | ||
1041 | unsigned long o_dst; | ||
1042 | unsigned long o_src; | ||
1043 | unsigned long p_size = sizeof(struct btrfs_key_ptr); | ||
1044 | |||
1045 | n = btrfs_header_nritems(eb); | ||
1046 | while (tm && tm->elem.seq >= time_seq) { | ||
1047 | /* | ||
1048 | * all the operations are recorded with the operator used for | ||
1049 | * the modification. as we're going backwards, we do the | ||
1050 | * opposite of each operation here. | ||
1051 | */ | ||
1052 | switch (tm->op) { | ||
1053 | case MOD_LOG_KEY_REMOVE_WHILE_FREEING: | ||
1054 | BUG_ON(tm->slot < n); | ||
1055 | case MOD_LOG_KEY_REMOVE_WHILE_MOVING: | ||
1056 | case MOD_LOG_KEY_REMOVE: | ||
1057 | btrfs_set_node_key(eb, &tm->key, tm->slot); | ||
1058 | btrfs_set_node_blockptr(eb, tm->slot, tm->blockptr); | ||
1059 | btrfs_set_node_ptr_generation(eb, tm->slot, | ||
1060 | tm->generation); | ||
1061 | n++; | ||
1062 | break; | ||
1063 | case MOD_LOG_KEY_REPLACE: | ||
1064 | BUG_ON(tm->slot >= n); | ||
1065 | btrfs_set_node_key(eb, &tm->key, tm->slot); | ||
1066 | btrfs_set_node_blockptr(eb, tm->slot, tm->blockptr); | ||
1067 | btrfs_set_node_ptr_generation(eb, tm->slot, | ||
1068 | tm->generation); | ||
1069 | break; | ||
1070 | case MOD_LOG_KEY_ADD: | ||
1071 | if (tm->slot != n - 1) { | ||
1072 | o_dst = btrfs_node_key_ptr_offset(tm->slot); | ||
1073 | o_src = btrfs_node_key_ptr_offset(tm->slot + 1); | ||
1074 | memmove_extent_buffer(eb, o_dst, o_src, p_size); | ||
1075 | } | ||
1076 | n--; | ||
1077 | break; | ||
1078 | case MOD_LOG_MOVE_KEYS: | ||
1079 | o_dst = btrfs_node_key_ptr_offset(tm->slot); | ||
1080 | o_src = btrfs_node_key_ptr_offset(tm->move.dst_slot); | ||
1081 | memmove_extent_buffer(eb, o_dst, o_src, | ||
1082 | tm->move.nr_items * p_size); | ||
1083 | break; | ||
1084 | case MOD_LOG_ROOT_REPLACE: | ||
1085 | /* | ||
1086 | * this operation is special. for roots, this must be | ||
1087 | * handled explicitly before rewinding. | ||
1088 | * for non-roots, this operation may exist if the node | ||
1089 | * was a root: root A -> child B; then A gets empty and | ||
1090 | * B is promoted to the new root. in the mod log, we'll | ||
1091 | * have a root-replace operation for B, a tree block | ||
1092 | * that is no root. we simply ignore that operation. | ||
1093 | */ | ||
1094 | break; | ||
1095 | } | ||
1096 | next = rb_next(&tm->node); | ||
1097 | if (!next) | ||
1098 | break; | ||
1099 | tm = container_of(next, struct tree_mod_elem, node); | ||
1100 | if (tm->index != first_tm->index) | ||
1101 | break; | ||
1102 | } | ||
1103 | btrfs_set_header_nritems(eb, n); | ||
1104 | } | ||
1105 | |||
1106 | static struct extent_buffer * | ||
1107 | tree_mod_log_rewind(struct btrfs_fs_info *fs_info, struct extent_buffer *eb, | ||
1108 | u64 time_seq) | ||
1109 | { | ||
1110 | struct extent_buffer *eb_rewin; | ||
1111 | struct tree_mod_elem *tm; | ||
1112 | |||
1113 | if (!time_seq) | ||
1114 | return eb; | ||
1115 | |||
1116 | if (btrfs_header_level(eb) == 0) | ||
1117 | return eb; | ||
1118 | |||
1119 | tm = tree_mod_log_search(fs_info, eb->start, time_seq); | ||
1120 | if (!tm) | ||
1121 | return eb; | ||
1122 | |||
1123 | if (tm->op == MOD_LOG_KEY_REMOVE_WHILE_FREEING) { | ||
1124 | BUG_ON(tm->slot != 0); | ||
1125 | eb_rewin = alloc_dummy_extent_buffer(eb->start, | ||
1126 | fs_info->tree_root->nodesize); | ||
1127 | BUG_ON(!eb_rewin); | ||
1128 | btrfs_set_header_bytenr(eb_rewin, eb->start); | ||
1129 | btrfs_set_header_backref_rev(eb_rewin, | ||
1130 | btrfs_header_backref_rev(eb)); | ||
1131 | btrfs_set_header_owner(eb_rewin, btrfs_header_owner(eb)); | ||
1132 | btrfs_set_header_level(eb_rewin, btrfs_header_level(eb)); | ||
1133 | } else { | ||
1134 | eb_rewin = btrfs_clone_extent_buffer(eb); | ||
1135 | BUG_ON(!eb_rewin); | ||
1136 | } | ||
1137 | |||
1138 | extent_buffer_get(eb_rewin); | ||
1139 | free_extent_buffer(eb); | ||
1140 | |||
1141 | __tree_mod_log_rewind(eb_rewin, time_seq, tm); | ||
1142 | |||
1143 | return eb_rewin; | ||
1144 | } | ||
1145 | |||
1146 | static inline struct extent_buffer * | ||
1147 | get_old_root(struct btrfs_root *root, u64 time_seq) | ||
1148 | { | ||
1149 | struct tree_mod_elem *tm; | ||
1150 | struct extent_buffer *eb; | ||
1151 | struct tree_mod_root *old_root; | ||
1152 | u64 old_generation; | ||
1153 | |||
1154 | tm = __tree_mod_log_oldest_root(root->fs_info, root, time_seq); | ||
1155 | if (!tm) | ||
1156 | return root->node; | ||
1157 | |||
1158 | old_root = &tm->old_root; | ||
1159 | old_generation = tm->generation; | ||
1160 | |||
1161 | tm = tree_mod_log_search(root->fs_info, old_root->logical, time_seq); | ||
1162 | /* | ||
1163 | * there was an item in the log when __tree_mod_log_oldest_root | ||
1164 | * returned. this one must not go away, because the time_seq passed to | ||
1165 | * us must be blocking its removal. | ||
1166 | */ | ||
1167 | BUG_ON(!tm); | ||
1168 | |||
1169 | if (old_root->logical == root->node->start) { | ||
1170 | /* there are logged operations for the current root */ | ||
1171 | eb = btrfs_clone_extent_buffer(root->node); | ||
1172 | } else { | ||
1173 | /* there's a root replace operation for the current root */ | ||
1174 | eb = alloc_dummy_extent_buffer(tm->index << PAGE_CACHE_SHIFT, | ||
1175 | root->nodesize); | ||
1176 | btrfs_set_header_bytenr(eb, eb->start); | ||
1177 | btrfs_set_header_backref_rev(eb, BTRFS_MIXED_BACKREF_REV); | ||
1178 | btrfs_set_header_owner(eb, root->root_key.objectid); | ||
1179 | } | ||
1180 | if (!eb) | ||
1181 | return NULL; | ||
1182 | btrfs_set_header_level(eb, old_root->level); | ||
1183 | btrfs_set_header_generation(eb, old_generation); | ||
1184 | __tree_mod_log_rewind(eb, time_seq, tm); | ||
1185 | |||
1186 | return eb; | ||
1187 | } | ||
1188 | |||
536 | static inline int should_cow_block(struct btrfs_trans_handle *trans, | 1189 | static inline int should_cow_block(struct btrfs_trans_handle *trans, |
537 | struct btrfs_root *root, | 1190 | struct btrfs_root *root, |
538 | struct extent_buffer *buf) | 1191 | struct extent_buffer *buf) |
@@ -723,7 +1376,7 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans, | |||
723 | 1376 | ||
724 | cur = btrfs_find_tree_block(root, blocknr, blocksize); | 1377 | cur = btrfs_find_tree_block(root, blocknr, blocksize); |
725 | if (cur) | 1378 | if (cur) |
726 | uptodate = btrfs_buffer_uptodate(cur, gen); | 1379 | uptodate = btrfs_buffer_uptodate(cur, gen, 0); |
727 | else | 1380 | else |
728 | uptodate = 0; | 1381 | uptodate = 0; |
729 | if (!cur || !uptodate) { | 1382 | if (!cur || !uptodate) { |
@@ -737,7 +1390,11 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans, | |||
737 | if (!cur) | 1390 | if (!cur) |
738 | return -EIO; | 1391 | return -EIO; |
739 | } else if (!uptodate) { | 1392 | } else if (!uptodate) { |
740 | btrfs_read_buffer(cur, gen); | 1393 | err = btrfs_read_buffer(cur, gen); |
1394 | if (err) { | ||
1395 | free_extent_buffer(cur); | ||
1396 | return err; | ||
1397 | } | ||
741 | } | 1398 | } |
742 | } | 1399 | } |
743 | if (search_start == 0) | 1400 | if (search_start == 0) |
@@ -852,20 +1509,18 @@ static noinline int generic_bin_search(struct extent_buffer *eb, | |||
852 | static int bin_search(struct extent_buffer *eb, struct btrfs_key *key, | 1509 | static int bin_search(struct extent_buffer *eb, struct btrfs_key *key, |
853 | int level, int *slot) | 1510 | int level, int *slot) |
854 | { | 1511 | { |
855 | if (level == 0) { | 1512 | if (level == 0) |
856 | return generic_bin_search(eb, | 1513 | return generic_bin_search(eb, |
857 | offsetof(struct btrfs_leaf, items), | 1514 | offsetof(struct btrfs_leaf, items), |
858 | sizeof(struct btrfs_item), | 1515 | sizeof(struct btrfs_item), |
859 | key, btrfs_header_nritems(eb), | 1516 | key, btrfs_header_nritems(eb), |
860 | slot); | 1517 | slot); |
861 | } else { | 1518 | else |
862 | return generic_bin_search(eb, | 1519 | return generic_bin_search(eb, |
863 | offsetof(struct btrfs_node, ptrs), | 1520 | offsetof(struct btrfs_node, ptrs), |
864 | sizeof(struct btrfs_key_ptr), | 1521 | sizeof(struct btrfs_key_ptr), |
865 | key, btrfs_header_nritems(eb), | 1522 | key, btrfs_header_nritems(eb), |
866 | slot); | 1523 | slot); |
867 | } | ||
868 | return -1; | ||
869 | } | 1524 | } |
870 | 1525 | ||
871 | int btrfs_bin_search(struct extent_buffer *eb, struct btrfs_key *key, | 1526 | int btrfs_bin_search(struct extent_buffer *eb, struct btrfs_key *key, |
@@ -972,6 +1627,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans, | |||
972 | goto enospc; | 1627 | goto enospc; |
973 | } | 1628 | } |
974 | 1629 | ||
1630 | tree_mod_log_set_root_pointer(root, child); | ||
975 | rcu_assign_pointer(root->node, child); | 1631 | rcu_assign_pointer(root->node, child); |
976 | 1632 | ||
977 | add_root_to_dirty_list(root); | 1633 | add_root_to_dirty_list(root); |
@@ -985,7 +1641,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans, | |||
985 | free_extent_buffer(mid); | 1641 | free_extent_buffer(mid); |
986 | 1642 | ||
987 | root_sub_used(root, mid->len); | 1643 | root_sub_used(root, mid->len); |
988 | btrfs_free_tree_block(trans, root, mid, 0, 1, 0); | 1644 | btrfs_free_tree_block(trans, root, mid, 0, 1); |
989 | /* once for the root ptr */ | 1645 | /* once for the root ptr */ |
990 | free_extent_buffer_stale(mid); | 1646 | free_extent_buffer_stale(mid); |
991 | return 0; | 1647 | return 0; |
@@ -1038,14 +1694,16 @@ static noinline int balance_level(struct btrfs_trans_handle *trans, | |||
1038 | if (btrfs_header_nritems(right) == 0) { | 1694 | if (btrfs_header_nritems(right) == 0) { |
1039 | clean_tree_block(trans, root, right); | 1695 | clean_tree_block(trans, root, right); |
1040 | btrfs_tree_unlock(right); | 1696 | btrfs_tree_unlock(right); |
1041 | del_ptr(trans, root, path, level + 1, pslot + 1); | 1697 | del_ptr(trans, root, path, level + 1, pslot + 1, 1); |
1042 | root_sub_used(root, right->len); | 1698 | root_sub_used(root, right->len); |
1043 | btrfs_free_tree_block(trans, root, right, 0, 1, 0); | 1699 | btrfs_free_tree_block(trans, root, right, 0, 1); |
1044 | free_extent_buffer_stale(right); | 1700 | free_extent_buffer_stale(right); |
1045 | right = NULL; | 1701 | right = NULL; |
1046 | } else { | 1702 | } else { |
1047 | struct btrfs_disk_key right_key; | 1703 | struct btrfs_disk_key right_key; |
1048 | btrfs_node_key(right, &right_key, 0); | 1704 | btrfs_node_key(right, &right_key, 0); |
1705 | tree_mod_log_set_node_key(root->fs_info, parent, | ||
1706 | &right_key, pslot + 1, 0); | ||
1049 | btrfs_set_node_key(parent, &right_key, pslot + 1); | 1707 | btrfs_set_node_key(parent, &right_key, pslot + 1); |
1050 | btrfs_mark_buffer_dirty(parent); | 1708 | btrfs_mark_buffer_dirty(parent); |
1051 | } | 1709 | } |
@@ -1080,15 +1738,17 @@ static noinline int balance_level(struct btrfs_trans_handle *trans, | |||
1080 | if (btrfs_header_nritems(mid) == 0) { | 1738 | if (btrfs_header_nritems(mid) == 0) { |
1081 | clean_tree_block(trans, root, mid); | 1739 | clean_tree_block(trans, root, mid); |
1082 | btrfs_tree_unlock(mid); | 1740 | btrfs_tree_unlock(mid); |
1083 | del_ptr(trans, root, path, level + 1, pslot); | 1741 | del_ptr(trans, root, path, level + 1, pslot, 1); |
1084 | root_sub_used(root, mid->len); | 1742 | root_sub_used(root, mid->len); |
1085 | btrfs_free_tree_block(trans, root, mid, 0, 1, 0); | 1743 | btrfs_free_tree_block(trans, root, mid, 0, 1); |
1086 | free_extent_buffer_stale(mid); | 1744 | free_extent_buffer_stale(mid); |
1087 | mid = NULL; | 1745 | mid = NULL; |
1088 | } else { | 1746 | } else { |
1089 | /* update the parent key to reflect our changes */ | 1747 | /* update the parent key to reflect our changes */ |
1090 | struct btrfs_disk_key mid_key; | 1748 | struct btrfs_disk_key mid_key; |
1091 | btrfs_node_key(mid, &mid_key, 0); | 1749 | btrfs_node_key(mid, &mid_key, 0); |
1750 | tree_mod_log_set_node_key(root->fs_info, parent, &mid_key, | ||
1751 | pslot, 0); | ||
1092 | btrfs_set_node_key(parent, &mid_key, pslot); | 1752 | btrfs_set_node_key(parent, &mid_key, pslot); |
1093 | btrfs_mark_buffer_dirty(parent); | 1753 | btrfs_mark_buffer_dirty(parent); |
1094 | } | 1754 | } |
@@ -1186,6 +1846,8 @@ static noinline int push_nodes_for_insert(struct btrfs_trans_handle *trans, | |||
1186 | struct btrfs_disk_key disk_key; | 1846 | struct btrfs_disk_key disk_key; |
1187 | orig_slot += left_nr; | 1847 | orig_slot += left_nr; |
1188 | btrfs_node_key(mid, &disk_key, 0); | 1848 | btrfs_node_key(mid, &disk_key, 0); |
1849 | tree_mod_log_set_node_key(root->fs_info, parent, | ||
1850 | &disk_key, pslot, 0); | ||
1189 | btrfs_set_node_key(parent, &disk_key, pslot); | 1851 | btrfs_set_node_key(parent, &disk_key, pslot); |
1190 | btrfs_mark_buffer_dirty(parent); | 1852 | btrfs_mark_buffer_dirty(parent); |
1191 | if (btrfs_header_nritems(left) > orig_slot) { | 1853 | if (btrfs_header_nritems(left) > orig_slot) { |
@@ -1237,6 +1899,8 @@ static noinline int push_nodes_for_insert(struct btrfs_trans_handle *trans, | |||
1237 | struct btrfs_disk_key disk_key; | 1899 | struct btrfs_disk_key disk_key; |
1238 | 1900 | ||
1239 | btrfs_node_key(right, &disk_key, 0); | 1901 | btrfs_node_key(right, &disk_key, 0); |
1902 | tree_mod_log_set_node_key(root->fs_info, parent, | ||
1903 | &disk_key, pslot + 1, 0); | ||
1240 | btrfs_set_node_key(parent, &disk_key, pslot + 1); | 1904 | btrfs_set_node_key(parent, &disk_key, pslot + 1); |
1241 | btrfs_mark_buffer_dirty(parent); | 1905 | btrfs_mark_buffer_dirty(parent); |
1242 | 1906 | ||
@@ -1358,7 +2022,12 @@ static noinline int reada_for_balance(struct btrfs_root *root, | |||
1358 | block1 = btrfs_node_blockptr(parent, slot - 1); | 2022 | block1 = btrfs_node_blockptr(parent, slot - 1); |
1359 | gen = btrfs_node_ptr_generation(parent, slot - 1); | 2023 | gen = btrfs_node_ptr_generation(parent, slot - 1); |
1360 | eb = btrfs_find_tree_block(root, block1, blocksize); | 2024 | eb = btrfs_find_tree_block(root, block1, blocksize); |
1361 | if (eb && btrfs_buffer_uptodate(eb, gen)) | 2025 | /* |
2026 | * if we get -eagain from btrfs_buffer_uptodate, we | ||
2027 | * don't want to return eagain here. That will loop | ||
2028 | * forever | ||
2029 | */ | ||
2030 | if (eb && btrfs_buffer_uptodate(eb, gen, 1) != 0) | ||
1362 | block1 = 0; | 2031 | block1 = 0; |
1363 | free_extent_buffer(eb); | 2032 | free_extent_buffer(eb); |
1364 | } | 2033 | } |
@@ -1366,7 +2035,7 @@ static noinline int reada_for_balance(struct btrfs_root *root, | |||
1366 | block2 = btrfs_node_blockptr(parent, slot + 1); | 2035 | block2 = btrfs_node_blockptr(parent, slot + 1); |
1367 | gen = btrfs_node_ptr_generation(parent, slot + 1); | 2036 | gen = btrfs_node_ptr_generation(parent, slot + 1); |
1368 | eb = btrfs_find_tree_block(root, block2, blocksize); | 2037 | eb = btrfs_find_tree_block(root, block2, blocksize); |
1369 | if (eb && btrfs_buffer_uptodate(eb, gen)) | 2038 | if (eb && btrfs_buffer_uptodate(eb, gen, 1) != 0) |
1370 | block2 = 0; | 2039 | block2 = 0; |
1371 | free_extent_buffer(eb); | 2040 | free_extent_buffer(eb); |
1372 | } | 2041 | } |
@@ -1489,7 +2158,7 @@ static int | |||
1489 | read_block_for_search(struct btrfs_trans_handle *trans, | 2158 | read_block_for_search(struct btrfs_trans_handle *trans, |
1490 | struct btrfs_root *root, struct btrfs_path *p, | 2159 | struct btrfs_root *root, struct btrfs_path *p, |
1491 | struct extent_buffer **eb_ret, int level, int slot, | 2160 | struct extent_buffer **eb_ret, int level, int slot, |
1492 | struct btrfs_key *key) | 2161 | struct btrfs_key *key, u64 time_seq) |
1493 | { | 2162 | { |
1494 | u64 blocknr; | 2163 | u64 blocknr; |
1495 | u64 gen; | 2164 | u64 gen; |
@@ -1504,8 +2173,9 @@ read_block_for_search(struct btrfs_trans_handle *trans, | |||
1504 | 2173 | ||
1505 | tmp = btrfs_find_tree_block(root, blocknr, blocksize); | 2174 | tmp = btrfs_find_tree_block(root, blocknr, blocksize); |
1506 | if (tmp) { | 2175 | if (tmp) { |
1507 | if (btrfs_buffer_uptodate(tmp, 0)) { | 2176 | /* first we do an atomic uptodate check */ |
1508 | if (btrfs_buffer_uptodate(tmp, gen)) { | 2177 | if (btrfs_buffer_uptodate(tmp, 0, 1) > 0) { |
2178 | if (btrfs_buffer_uptodate(tmp, gen, 1) > 0) { | ||
1509 | /* | 2179 | /* |
1510 | * we found an up to date block without | 2180 | * we found an up to date block without |
1511 | * sleeping, return | 2181 | * sleeping, return |
@@ -1523,8 +2193,9 @@ read_block_for_search(struct btrfs_trans_handle *trans, | |||
1523 | free_extent_buffer(tmp); | 2193 | free_extent_buffer(tmp); |
1524 | btrfs_set_path_blocking(p); | 2194 | btrfs_set_path_blocking(p); |
1525 | 2195 | ||
2196 | /* now we're allowed to do a blocking uptodate check */ | ||
1526 | tmp = read_tree_block(root, blocknr, blocksize, gen); | 2197 | tmp = read_tree_block(root, blocknr, blocksize, gen); |
1527 | if (tmp && btrfs_buffer_uptodate(tmp, gen)) { | 2198 | if (tmp && btrfs_buffer_uptodate(tmp, gen, 0) > 0) { |
1528 | *eb_ret = tmp; | 2199 | *eb_ret = tmp; |
1529 | return 0; | 2200 | return 0; |
1530 | } | 2201 | } |
@@ -1559,7 +2230,7 @@ read_block_for_search(struct btrfs_trans_handle *trans, | |||
1559 | * and give up so that our caller doesn't loop forever | 2230 | * and give up so that our caller doesn't loop forever |
1560 | * on our EAGAINs. | 2231 | * on our EAGAINs. |
1561 | */ | 2232 | */ |
1562 | if (!btrfs_buffer_uptodate(tmp, 0)) | 2233 | if (!btrfs_buffer_uptodate(tmp, 0, 0)) |
1563 | ret = -EIO; | 2234 | ret = -EIO; |
1564 | free_extent_buffer(tmp); | 2235 | free_extent_buffer(tmp); |
1565 | } | 2236 | } |
@@ -1841,7 +2512,7 @@ cow_done: | |||
1841 | } | 2512 | } |
1842 | 2513 | ||
1843 | err = read_block_for_search(trans, root, p, | 2514 | err = read_block_for_search(trans, root, p, |
1844 | &b, level, slot, key); | 2515 | &b, level, slot, key, 0); |
1845 | if (err == -EAGAIN) | 2516 | if (err == -EAGAIN) |
1846 | goto again; | 2517 | goto again; |
1847 | if (err) { | 2518 | if (err) { |
@@ -1913,6 +2584,115 @@ done: | |||
1913 | } | 2584 | } |
1914 | 2585 | ||
1915 | /* | 2586 | /* |
2587 | * Like btrfs_search_slot, this looks for a key in the given tree. It uses the | ||
2588 | * current state of the tree together with the operations recorded in the tree | ||
2589 | * modification log to search for the key in a previous version of this tree, as | ||
2590 | * denoted by the time_seq parameter. | ||
2591 | * | ||
2592 | * Naturally, there is no support for insert, delete or cow operations. | ||
2593 | * | ||
2594 | * The resulting path and return value will be set up as if we called | ||
2595 | * btrfs_search_slot at that point in time with ins_len and cow both set to 0. | ||
2596 | */ | ||
2597 | int btrfs_search_old_slot(struct btrfs_root *root, struct btrfs_key *key, | ||
2598 | struct btrfs_path *p, u64 time_seq) | ||
2599 | { | ||
2600 | struct extent_buffer *b; | ||
2601 | int slot; | ||
2602 | int ret; | ||
2603 | int err; | ||
2604 | int level; | ||
2605 | int lowest_unlock = 1; | ||
2606 | u8 lowest_level = 0; | ||
2607 | |||
2608 | lowest_level = p->lowest_level; | ||
2609 | WARN_ON(p->nodes[0] != NULL); | ||
2610 | |||
2611 | if (p->search_commit_root) { | ||
2612 | BUG_ON(time_seq); | ||
2613 | return btrfs_search_slot(NULL, root, key, p, 0, 0); | ||
2614 | } | ||
2615 | |||
2616 | again: | ||
2617 | b = get_old_root(root, time_seq); | ||
2618 | extent_buffer_get(b); | ||
2619 | level = btrfs_header_level(b); | ||
2620 | btrfs_tree_read_lock(b); | ||
2621 | p->locks[level] = BTRFS_READ_LOCK; | ||
2622 | |||
2623 | while (b) { | ||
2624 | level = btrfs_header_level(b); | ||
2625 | p->nodes[level] = b; | ||
2626 | btrfs_clear_path_blocking(p, NULL, 0); | ||
2627 | |||
2628 | /* | ||
2629 | * we have a lock on b and as long as we aren't changing | ||
2630 | * the tree, there is no way to for the items in b to change. | ||
2631 | * It is safe to drop the lock on our parent before we | ||
2632 | * go through the expensive btree search on b. | ||
2633 | */ | ||
2634 | btrfs_unlock_up_safe(p, level + 1); | ||
2635 | |||
2636 | ret = bin_search(b, key, level, &slot); | ||
2637 | |||
2638 | if (level != 0) { | ||
2639 | int dec = 0; | ||
2640 | if (ret && slot > 0) { | ||
2641 | dec = 1; | ||
2642 | slot -= 1; | ||
2643 | } | ||
2644 | p->slots[level] = slot; | ||
2645 | unlock_up(p, level, lowest_unlock, 0, NULL); | ||
2646 | |||
2647 | if (level == lowest_level) { | ||
2648 | if (dec) | ||
2649 | p->slots[level]++; | ||
2650 | goto done; | ||
2651 | } | ||
2652 | |||
2653 | err = read_block_for_search(NULL, root, p, &b, level, | ||
2654 | slot, key, time_seq); | ||
2655 | if (err == -EAGAIN) | ||
2656 | goto again; | ||
2657 | if (err) { | ||
2658 | ret = err; | ||
2659 | goto done; | ||
2660 | } | ||
2661 | |||
2662 | level = btrfs_header_level(b); | ||
2663 | err = btrfs_try_tree_read_lock(b); | ||
2664 | if (!err) { | ||
2665 | btrfs_set_path_blocking(p); | ||
2666 | btrfs_tree_read_lock(b); | ||
2667 | btrfs_clear_path_blocking(p, b, | ||
2668 | BTRFS_READ_LOCK); | ||
2669 | } | ||
2670 | p->locks[level] = BTRFS_READ_LOCK; | ||
2671 | p->nodes[level] = b; | ||
2672 | b = tree_mod_log_rewind(root->fs_info, b, time_seq); | ||
2673 | if (b != p->nodes[level]) { | ||
2674 | btrfs_tree_unlock_rw(p->nodes[level], | ||
2675 | p->locks[level]); | ||
2676 | p->locks[level] = 0; | ||
2677 | p->nodes[level] = b; | ||
2678 | } | ||
2679 | } else { | ||
2680 | p->slots[level] = slot; | ||
2681 | unlock_up(p, level, lowest_unlock, 0, NULL); | ||
2682 | goto done; | ||
2683 | } | ||
2684 | } | ||
2685 | ret = 1; | ||
2686 | done: | ||
2687 | if (!p->leave_spinning) | ||
2688 | btrfs_set_path_blocking(p); | ||
2689 | if (ret < 0) | ||
2690 | btrfs_release_path(p); | ||
2691 | |||
2692 | return ret; | ||
2693 | } | ||
2694 | |||
2695 | /* | ||
1916 | * adjust the pointers going up the tree, starting at level | 2696 | * adjust the pointers going up the tree, starting at level |
1917 | * making sure the right key of each node is points to 'key'. | 2697 | * making sure the right key of each node is points to 'key'. |
1918 | * This is used after shifting pointers to the left, so it stops | 2698 | * This is used after shifting pointers to the left, so it stops |
@@ -1932,6 +2712,7 @@ static void fixup_low_keys(struct btrfs_trans_handle *trans, | |||
1932 | if (!path->nodes[i]) | 2712 | if (!path->nodes[i]) |
1933 | break; | 2713 | break; |
1934 | t = path->nodes[i]; | 2714 | t = path->nodes[i]; |
2715 | tree_mod_log_set_node_key(root->fs_info, t, key, tslot, 1); | ||
1935 | btrfs_set_node_key(t, key, tslot); | 2716 | btrfs_set_node_key(t, key, tslot); |
1936 | btrfs_mark_buffer_dirty(path->nodes[i]); | 2717 | btrfs_mark_buffer_dirty(path->nodes[i]); |
1937 | if (tslot != 0) | 2718 | if (tslot != 0) |
@@ -2014,12 +2795,16 @@ static int push_node_left(struct btrfs_trans_handle *trans, | |||
2014 | } else | 2795 | } else |
2015 | push_items = min(src_nritems - 8, push_items); | 2796 | push_items = min(src_nritems - 8, push_items); |
2016 | 2797 | ||
2798 | tree_mod_log_eb_copy(root->fs_info, dst, src, dst_nritems, 0, | ||
2799 | push_items); | ||
2017 | copy_extent_buffer(dst, src, | 2800 | copy_extent_buffer(dst, src, |
2018 | btrfs_node_key_ptr_offset(dst_nritems), | 2801 | btrfs_node_key_ptr_offset(dst_nritems), |
2019 | btrfs_node_key_ptr_offset(0), | 2802 | btrfs_node_key_ptr_offset(0), |
2020 | push_items * sizeof(struct btrfs_key_ptr)); | 2803 | push_items * sizeof(struct btrfs_key_ptr)); |
2021 | 2804 | ||
2022 | if (push_items < src_nritems) { | 2805 | if (push_items < src_nritems) { |
2806 | tree_mod_log_eb_move(root->fs_info, src, 0, push_items, | ||
2807 | src_nritems - push_items); | ||
2023 | memmove_extent_buffer(src, btrfs_node_key_ptr_offset(0), | 2808 | memmove_extent_buffer(src, btrfs_node_key_ptr_offset(0), |
2024 | btrfs_node_key_ptr_offset(push_items), | 2809 | btrfs_node_key_ptr_offset(push_items), |
2025 | (src_nritems - push_items) * | 2810 | (src_nritems - push_items) * |
@@ -2073,11 +2858,14 @@ static int balance_node_right(struct btrfs_trans_handle *trans, | |||
2073 | if (max_push < push_items) | 2858 | if (max_push < push_items) |
2074 | push_items = max_push; | 2859 | push_items = max_push; |
2075 | 2860 | ||
2861 | tree_mod_log_eb_move(root->fs_info, dst, push_items, 0, dst_nritems); | ||
2076 | memmove_extent_buffer(dst, btrfs_node_key_ptr_offset(push_items), | 2862 | memmove_extent_buffer(dst, btrfs_node_key_ptr_offset(push_items), |
2077 | btrfs_node_key_ptr_offset(0), | 2863 | btrfs_node_key_ptr_offset(0), |
2078 | (dst_nritems) * | 2864 | (dst_nritems) * |
2079 | sizeof(struct btrfs_key_ptr)); | 2865 | sizeof(struct btrfs_key_ptr)); |
2080 | 2866 | ||
2867 | tree_mod_log_eb_copy(root->fs_info, dst, src, 0, | ||
2868 | src_nritems - push_items, push_items); | ||
2081 | copy_extent_buffer(dst, src, | 2869 | copy_extent_buffer(dst, src, |
2082 | btrfs_node_key_ptr_offset(0), | 2870 | btrfs_node_key_ptr_offset(0), |
2083 | btrfs_node_key_ptr_offset(src_nritems - push_items), | 2871 | btrfs_node_key_ptr_offset(src_nritems - push_items), |
@@ -2120,7 +2908,7 @@ static noinline int insert_new_root(struct btrfs_trans_handle *trans, | |||
2120 | 2908 | ||
2121 | c = btrfs_alloc_free_block(trans, root, root->nodesize, 0, | 2909 | c = btrfs_alloc_free_block(trans, root, root->nodesize, 0, |
2122 | root->root_key.objectid, &lower_key, | 2910 | root->root_key.objectid, &lower_key, |
2123 | level, root->node->start, 0, 0); | 2911 | level, root->node->start, 0); |
2124 | if (IS_ERR(c)) | 2912 | if (IS_ERR(c)) |
2125 | return PTR_ERR(c); | 2913 | return PTR_ERR(c); |
2126 | 2914 | ||
@@ -2152,6 +2940,7 @@ static noinline int insert_new_root(struct btrfs_trans_handle *trans, | |||
2152 | btrfs_mark_buffer_dirty(c); | 2940 | btrfs_mark_buffer_dirty(c); |
2153 | 2941 | ||
2154 | old = root->node; | 2942 | old = root->node; |
2943 | tree_mod_log_set_root_pointer(root, c); | ||
2155 | rcu_assign_pointer(root->node, c); | 2944 | rcu_assign_pointer(root->node, c); |
2156 | 2945 | ||
2157 | /* the super has an extra ref to root->node */ | 2946 | /* the super has an extra ref to root->node */ |
@@ -2175,10 +2964,11 @@ static noinline int insert_new_root(struct btrfs_trans_handle *trans, | |||
2175 | static void insert_ptr(struct btrfs_trans_handle *trans, | 2964 | static void insert_ptr(struct btrfs_trans_handle *trans, |
2176 | struct btrfs_root *root, struct btrfs_path *path, | 2965 | struct btrfs_root *root, struct btrfs_path *path, |
2177 | struct btrfs_disk_key *key, u64 bytenr, | 2966 | struct btrfs_disk_key *key, u64 bytenr, |
2178 | int slot, int level) | 2967 | int slot, int level, int tree_mod_log) |
2179 | { | 2968 | { |
2180 | struct extent_buffer *lower; | 2969 | struct extent_buffer *lower; |
2181 | int nritems; | 2970 | int nritems; |
2971 | int ret; | ||
2182 | 2972 | ||
2183 | BUG_ON(!path->nodes[level]); | 2973 | BUG_ON(!path->nodes[level]); |
2184 | btrfs_assert_tree_locked(path->nodes[level]); | 2974 | btrfs_assert_tree_locked(path->nodes[level]); |
@@ -2187,11 +2977,19 @@ static void insert_ptr(struct btrfs_trans_handle *trans, | |||
2187 | BUG_ON(slot > nritems); | 2977 | BUG_ON(slot > nritems); |
2188 | BUG_ON(nritems == BTRFS_NODEPTRS_PER_BLOCK(root)); | 2978 | BUG_ON(nritems == BTRFS_NODEPTRS_PER_BLOCK(root)); |
2189 | if (slot != nritems) { | 2979 | if (slot != nritems) { |
2980 | if (tree_mod_log && level) | ||
2981 | tree_mod_log_eb_move(root->fs_info, lower, slot + 1, | ||
2982 | slot, nritems - slot); | ||
2190 | memmove_extent_buffer(lower, | 2983 | memmove_extent_buffer(lower, |
2191 | btrfs_node_key_ptr_offset(slot + 1), | 2984 | btrfs_node_key_ptr_offset(slot + 1), |
2192 | btrfs_node_key_ptr_offset(slot), | 2985 | btrfs_node_key_ptr_offset(slot), |
2193 | (nritems - slot) * sizeof(struct btrfs_key_ptr)); | 2986 | (nritems - slot) * sizeof(struct btrfs_key_ptr)); |
2194 | } | 2987 | } |
2988 | if (tree_mod_log && level) { | ||
2989 | ret = tree_mod_log_insert_key(root->fs_info, lower, slot, | ||
2990 | MOD_LOG_KEY_ADD); | ||
2991 | BUG_ON(ret < 0); | ||
2992 | } | ||
2195 | btrfs_set_node_key(lower, key, slot); | 2993 | btrfs_set_node_key(lower, key, slot); |
2196 | btrfs_set_node_blockptr(lower, slot, bytenr); | 2994 | btrfs_set_node_blockptr(lower, slot, bytenr); |
2197 | WARN_ON(trans->transid == 0); | 2995 | WARN_ON(trans->transid == 0); |
@@ -2243,7 +3041,7 @@ static noinline int split_node(struct btrfs_trans_handle *trans, | |||
2243 | 3041 | ||
2244 | split = btrfs_alloc_free_block(trans, root, root->nodesize, 0, | 3042 | split = btrfs_alloc_free_block(trans, root, root->nodesize, 0, |
2245 | root->root_key.objectid, | 3043 | root->root_key.objectid, |
2246 | &disk_key, level, c->start, 0, 0); | 3044 | &disk_key, level, c->start, 0); |
2247 | if (IS_ERR(split)) | 3045 | if (IS_ERR(split)) |
2248 | return PTR_ERR(split); | 3046 | return PTR_ERR(split); |
2249 | 3047 | ||
@@ -2262,7 +3060,7 @@ static noinline int split_node(struct btrfs_trans_handle *trans, | |||
2262 | (unsigned long)btrfs_header_chunk_tree_uuid(split), | 3060 | (unsigned long)btrfs_header_chunk_tree_uuid(split), |
2263 | BTRFS_UUID_SIZE); | 3061 | BTRFS_UUID_SIZE); |
2264 | 3062 | ||
2265 | 3063 | tree_mod_log_eb_copy(root->fs_info, split, c, 0, mid, c_nritems - mid); | |
2266 | copy_extent_buffer(split, c, | 3064 | copy_extent_buffer(split, c, |
2267 | btrfs_node_key_ptr_offset(0), | 3065 | btrfs_node_key_ptr_offset(0), |
2268 | btrfs_node_key_ptr_offset(mid), | 3066 | btrfs_node_key_ptr_offset(mid), |
@@ -2275,7 +3073,7 @@ static noinline int split_node(struct btrfs_trans_handle *trans, | |||
2275 | btrfs_mark_buffer_dirty(split); | 3073 | btrfs_mark_buffer_dirty(split); |
2276 | 3074 | ||
2277 | insert_ptr(trans, root, path, &disk_key, split->start, | 3075 | insert_ptr(trans, root, path, &disk_key, split->start, |
2278 | path->slots[level + 1] + 1, level + 1); | 3076 | path->slots[level + 1] + 1, level + 1, 1); |
2279 | 3077 | ||
2280 | if (path->slots[level] >= mid) { | 3078 | if (path->slots[level] >= mid) { |
2281 | path->slots[level] -= mid; | 3079 | path->slots[level] -= mid; |
@@ -2812,7 +3610,7 @@ static noinline void copy_for_split(struct btrfs_trans_handle *trans, | |||
2812 | btrfs_set_header_nritems(l, mid); | 3610 | btrfs_set_header_nritems(l, mid); |
2813 | btrfs_item_key(right, &disk_key, 0); | 3611 | btrfs_item_key(right, &disk_key, 0); |
2814 | insert_ptr(trans, root, path, &disk_key, right->start, | 3612 | insert_ptr(trans, root, path, &disk_key, right->start, |
2815 | path->slots[1] + 1, 1); | 3613 | path->slots[1] + 1, 1, 0); |
2816 | 3614 | ||
2817 | btrfs_mark_buffer_dirty(right); | 3615 | btrfs_mark_buffer_dirty(right); |
2818 | btrfs_mark_buffer_dirty(l); | 3616 | btrfs_mark_buffer_dirty(l); |
@@ -2995,7 +3793,7 @@ again: | |||
2995 | 3793 | ||
2996 | right = btrfs_alloc_free_block(trans, root, root->leafsize, 0, | 3794 | right = btrfs_alloc_free_block(trans, root, root->leafsize, 0, |
2997 | root->root_key.objectid, | 3795 | root->root_key.objectid, |
2998 | &disk_key, 0, l->start, 0, 0); | 3796 | &disk_key, 0, l->start, 0); |
2999 | if (IS_ERR(right)) | 3797 | if (IS_ERR(right)) |
3000 | return PTR_ERR(right); | 3798 | return PTR_ERR(right); |
3001 | 3799 | ||
@@ -3019,7 +3817,7 @@ again: | |||
3019 | if (mid <= slot) { | 3817 | if (mid <= slot) { |
3020 | btrfs_set_header_nritems(right, 0); | 3818 | btrfs_set_header_nritems(right, 0); |
3021 | insert_ptr(trans, root, path, &disk_key, right->start, | 3819 | insert_ptr(trans, root, path, &disk_key, right->start, |
3022 | path->slots[1] + 1, 1); | 3820 | path->slots[1] + 1, 1, 0); |
3023 | btrfs_tree_unlock(path->nodes[0]); | 3821 | btrfs_tree_unlock(path->nodes[0]); |
3024 | free_extent_buffer(path->nodes[0]); | 3822 | free_extent_buffer(path->nodes[0]); |
3025 | path->nodes[0] = right; | 3823 | path->nodes[0] = right; |
@@ -3028,7 +3826,7 @@ again: | |||
3028 | } else { | 3826 | } else { |
3029 | btrfs_set_header_nritems(right, 0); | 3827 | btrfs_set_header_nritems(right, 0); |
3030 | insert_ptr(trans, root, path, &disk_key, right->start, | 3828 | insert_ptr(trans, root, path, &disk_key, right->start, |
3031 | path->slots[1], 1); | 3829 | path->slots[1], 1, 0); |
3032 | btrfs_tree_unlock(path->nodes[0]); | 3830 | btrfs_tree_unlock(path->nodes[0]); |
3033 | free_extent_buffer(path->nodes[0]); | 3831 | free_extent_buffer(path->nodes[0]); |
3034 | path->nodes[0] = right; | 3832 | path->nodes[0] = right; |
@@ -3740,19 +4538,29 @@ int btrfs_insert_item(struct btrfs_trans_handle *trans, struct btrfs_root | |||
3740 | * empty a node. | 4538 | * empty a node. |
3741 | */ | 4539 | */ |
3742 | static void del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root, | 4540 | static void del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root, |
3743 | struct btrfs_path *path, int level, int slot) | 4541 | struct btrfs_path *path, int level, int slot, |
4542 | int tree_mod_log) | ||
3744 | { | 4543 | { |
3745 | struct extent_buffer *parent = path->nodes[level]; | 4544 | struct extent_buffer *parent = path->nodes[level]; |
3746 | u32 nritems; | 4545 | u32 nritems; |
4546 | int ret; | ||
3747 | 4547 | ||
3748 | nritems = btrfs_header_nritems(parent); | 4548 | nritems = btrfs_header_nritems(parent); |
3749 | if (slot != nritems - 1) { | 4549 | if (slot != nritems - 1) { |
4550 | if (tree_mod_log && level) | ||
4551 | tree_mod_log_eb_move(root->fs_info, parent, slot, | ||
4552 | slot + 1, nritems - slot - 1); | ||
3750 | memmove_extent_buffer(parent, | 4553 | memmove_extent_buffer(parent, |
3751 | btrfs_node_key_ptr_offset(slot), | 4554 | btrfs_node_key_ptr_offset(slot), |
3752 | btrfs_node_key_ptr_offset(slot + 1), | 4555 | btrfs_node_key_ptr_offset(slot + 1), |
3753 | sizeof(struct btrfs_key_ptr) * | 4556 | sizeof(struct btrfs_key_ptr) * |
3754 | (nritems - slot - 1)); | 4557 | (nritems - slot - 1)); |
4558 | } else if (tree_mod_log && level) { | ||
4559 | ret = tree_mod_log_insert_key(root->fs_info, parent, slot, | ||
4560 | MOD_LOG_KEY_REMOVE); | ||
4561 | BUG_ON(ret < 0); | ||
3755 | } | 4562 | } |
4563 | |||
3756 | nritems--; | 4564 | nritems--; |
3757 | btrfs_set_header_nritems(parent, nritems); | 4565 | btrfs_set_header_nritems(parent, nritems); |
3758 | if (nritems == 0 && parent == root->node) { | 4566 | if (nritems == 0 && parent == root->node) { |
@@ -3784,7 +4592,7 @@ static noinline void btrfs_del_leaf(struct btrfs_trans_handle *trans, | |||
3784 | struct extent_buffer *leaf) | 4592 | struct extent_buffer *leaf) |
3785 | { | 4593 | { |
3786 | WARN_ON(btrfs_header_generation(leaf) != trans->transid); | 4594 | WARN_ON(btrfs_header_generation(leaf) != trans->transid); |
3787 | del_ptr(trans, root, path, 1, path->slots[1]); | 4595 | del_ptr(trans, root, path, 1, path->slots[1], 1); |
3788 | 4596 | ||
3789 | /* | 4597 | /* |
3790 | * btrfs_free_extent is expensive, we want to make sure we | 4598 | * btrfs_free_extent is expensive, we want to make sure we |
@@ -3795,7 +4603,7 @@ static noinline void btrfs_del_leaf(struct btrfs_trans_handle *trans, | |||
3795 | root_sub_used(root, leaf->len); | 4603 | root_sub_used(root, leaf->len); |
3796 | 4604 | ||
3797 | extent_buffer_get(leaf); | 4605 | extent_buffer_get(leaf); |
3798 | btrfs_free_tree_block(trans, root, leaf, 0, 1, 0); | 4606 | btrfs_free_tree_block(trans, root, leaf, 0, 1); |
3799 | free_extent_buffer_stale(leaf); | 4607 | free_extent_buffer_stale(leaf); |
3800 | } | 4608 | } |
3801 | /* | 4609 | /* |
@@ -4043,7 +4851,7 @@ again: | |||
4043 | tmp = btrfs_find_tree_block(root, blockptr, | 4851 | tmp = btrfs_find_tree_block(root, blockptr, |
4044 | btrfs_level_size(root, level - 1)); | 4852 | btrfs_level_size(root, level - 1)); |
4045 | 4853 | ||
4046 | if (tmp && btrfs_buffer_uptodate(tmp, gen)) { | 4854 | if (tmp && btrfs_buffer_uptodate(tmp, gen, 1) > 0) { |
4047 | free_extent_buffer(tmp); | 4855 | free_extent_buffer(tmp); |
4048 | break; | 4856 | break; |
4049 | } | 4857 | } |
@@ -4166,7 +4974,8 @@ next: | |||
4166 | struct extent_buffer *cur; | 4974 | struct extent_buffer *cur; |
4167 | cur = btrfs_find_tree_block(root, blockptr, | 4975 | cur = btrfs_find_tree_block(root, blockptr, |
4168 | btrfs_level_size(root, level - 1)); | 4976 | btrfs_level_size(root, level - 1)); |
4169 | if (!cur || !btrfs_buffer_uptodate(cur, gen)) { | 4977 | if (!cur || |
4978 | btrfs_buffer_uptodate(cur, gen, 1) <= 0) { | ||
4170 | slot++; | 4979 | slot++; |
4171 | if (cur) | 4980 | if (cur) |
4172 | free_extent_buffer(cur); | 4981 | free_extent_buffer(cur); |
@@ -4261,7 +5070,7 @@ again: | |||
4261 | next = c; | 5070 | next = c; |
4262 | next_rw_lock = path->locks[level]; | 5071 | next_rw_lock = path->locks[level]; |
4263 | ret = read_block_for_search(NULL, root, path, &next, level, | 5072 | ret = read_block_for_search(NULL, root, path, &next, level, |
4264 | slot, &key); | 5073 | slot, &key, 0); |
4265 | if (ret == -EAGAIN) | 5074 | if (ret == -EAGAIN) |
4266 | goto again; | 5075 | goto again; |
4267 | 5076 | ||
@@ -4298,7 +5107,7 @@ again: | |||
4298 | break; | 5107 | break; |
4299 | 5108 | ||
4300 | ret = read_block_for_search(NULL, root, path, &next, level, | 5109 | ret = read_block_for_search(NULL, root, path, &next, level, |
4301 | 0, &key); | 5110 | 0, &key, 0); |
4302 | if (ret == -EAGAIN) | 5111 | if (ret == -EAGAIN) |
4303 | goto again; | 5112 | goto again; |
4304 | 5113 | ||
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 8fd72331d600..0151ca1ac657 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h | |||
@@ -173,6 +173,9 @@ static int btrfs_csum_sizes[] = { 4, 0 }; | |||
173 | #define BTRFS_FT_XATTR 8 | 173 | #define BTRFS_FT_XATTR 8 |
174 | #define BTRFS_FT_MAX 9 | 174 | #define BTRFS_FT_MAX 9 |
175 | 175 | ||
176 | /* ioprio of readahead is set to idle */ | ||
177 | #define BTRFS_IOPRIO_READA (IOPRIO_PRIO_VALUE(IOPRIO_CLASS_IDLE, 0)) | ||
178 | |||
176 | /* | 179 | /* |
177 | * The key defines the order in the tree, and so it also defines (optimal) | 180 | * The key defines the order in the tree, and so it also defines (optimal) |
178 | * block layout. | 181 | * block layout. |
@@ -823,6 +826,14 @@ struct btrfs_csum_item { | |||
823 | u8 csum; | 826 | u8 csum; |
824 | } __attribute__ ((__packed__)); | 827 | } __attribute__ ((__packed__)); |
825 | 828 | ||
829 | struct btrfs_dev_stats_item { | ||
830 | /* | ||
831 | * grow this item struct at the end for future enhancements and keep | ||
832 | * the existing values unchanged | ||
833 | */ | ||
834 | __le64 values[BTRFS_DEV_STAT_VALUES_MAX]; | ||
835 | } __attribute__ ((__packed__)); | ||
836 | |||
826 | /* different types of block groups (and chunks) */ | 837 | /* different types of block groups (and chunks) */ |
827 | #define BTRFS_BLOCK_GROUP_DATA (1ULL << 0) | 838 | #define BTRFS_BLOCK_GROUP_DATA (1ULL << 0) |
828 | #define BTRFS_BLOCK_GROUP_SYSTEM (1ULL << 1) | 839 | #define BTRFS_BLOCK_GROUP_SYSTEM (1ULL << 1) |
@@ -1129,6 +1140,15 @@ struct btrfs_fs_info { | |||
1129 | spinlock_t delayed_iput_lock; | 1140 | spinlock_t delayed_iput_lock; |
1130 | struct list_head delayed_iputs; | 1141 | struct list_head delayed_iputs; |
1131 | 1142 | ||
1143 | /* this protects tree_mod_seq_list */ | ||
1144 | spinlock_t tree_mod_seq_lock; | ||
1145 | atomic_t tree_mod_seq; | ||
1146 | struct list_head tree_mod_seq_list; | ||
1147 | |||
1148 | /* this protects tree_mod_log */ | ||
1149 | rwlock_t tree_mod_log_lock; | ||
1150 | struct rb_root tree_mod_log; | ||
1151 | |||
1132 | atomic_t nr_async_submits; | 1152 | atomic_t nr_async_submits; |
1133 | atomic_t async_submit_draining; | 1153 | atomic_t async_submit_draining; |
1134 | atomic_t nr_async_bios; | 1154 | atomic_t nr_async_bios; |
@@ -1375,7 +1395,7 @@ struct btrfs_root { | |||
1375 | struct list_head root_list; | 1395 | struct list_head root_list; |
1376 | 1396 | ||
1377 | spinlock_t orphan_lock; | 1397 | spinlock_t orphan_lock; |
1378 | struct list_head orphan_list; | 1398 | atomic_t orphan_inodes; |
1379 | struct btrfs_block_rsv *orphan_block_rsv; | 1399 | struct btrfs_block_rsv *orphan_block_rsv; |
1380 | int orphan_item_inserted; | 1400 | int orphan_item_inserted; |
1381 | int orphan_cleanup_state; | 1401 | int orphan_cleanup_state; |
@@ -1508,6 +1528,12 @@ struct btrfs_ioctl_defrag_range_args { | |||
1508 | #define BTRFS_BALANCE_ITEM_KEY 248 | 1528 | #define BTRFS_BALANCE_ITEM_KEY 248 |
1509 | 1529 | ||
1510 | /* | 1530 | /* |
1531 | * Persistantly stores the io stats in the device tree. | ||
1532 | * One key for all stats, (0, BTRFS_DEV_STATS_KEY, devid). | ||
1533 | */ | ||
1534 | #define BTRFS_DEV_STATS_KEY 249 | ||
1535 | |||
1536 | /* | ||
1511 | * string items are for debugging. They just store a short string of | 1537 | * string items are for debugging. They just store a short string of |
1512 | * data in the FS | 1538 | * data in the FS |
1513 | */ | 1539 | */ |
@@ -2415,6 +2441,30 @@ static inline u32 btrfs_file_extent_inline_item_len(struct extent_buffer *eb, | |||
2415 | return btrfs_item_size(eb, e) - offset; | 2441 | return btrfs_item_size(eb, e) - offset; |
2416 | } | 2442 | } |
2417 | 2443 | ||
2444 | /* btrfs_dev_stats_item */ | ||
2445 | static inline u64 btrfs_dev_stats_value(struct extent_buffer *eb, | ||
2446 | struct btrfs_dev_stats_item *ptr, | ||
2447 | int index) | ||
2448 | { | ||
2449 | u64 val; | ||
2450 | |||
2451 | read_extent_buffer(eb, &val, | ||
2452 | offsetof(struct btrfs_dev_stats_item, values) + | ||
2453 | ((unsigned long)ptr) + (index * sizeof(u64)), | ||
2454 | sizeof(val)); | ||
2455 | return val; | ||
2456 | } | ||
2457 | |||
2458 | static inline void btrfs_set_dev_stats_value(struct extent_buffer *eb, | ||
2459 | struct btrfs_dev_stats_item *ptr, | ||
2460 | int index, u64 val) | ||
2461 | { | ||
2462 | write_extent_buffer(eb, &val, | ||
2463 | offsetof(struct btrfs_dev_stats_item, values) + | ||
2464 | ((unsigned long)ptr) + (index * sizeof(u64)), | ||
2465 | sizeof(val)); | ||
2466 | } | ||
2467 | |||
2418 | static inline struct btrfs_fs_info *btrfs_sb(struct super_block *sb) | 2468 | static inline struct btrfs_fs_info *btrfs_sb(struct super_block *sb) |
2419 | { | 2469 | { |
2420 | return sb->s_fs_info; | 2470 | return sb->s_fs_info; |
@@ -2496,11 +2546,11 @@ struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, | |||
2496 | struct btrfs_root *root, u32 blocksize, | 2546 | struct btrfs_root *root, u32 blocksize, |
2497 | u64 parent, u64 root_objectid, | 2547 | u64 parent, u64 root_objectid, |
2498 | struct btrfs_disk_key *key, int level, | 2548 | struct btrfs_disk_key *key, int level, |
2499 | u64 hint, u64 empty_size, int for_cow); | 2549 | u64 hint, u64 empty_size); |
2500 | void btrfs_free_tree_block(struct btrfs_trans_handle *trans, | 2550 | void btrfs_free_tree_block(struct btrfs_trans_handle *trans, |
2501 | struct btrfs_root *root, | 2551 | struct btrfs_root *root, |
2502 | struct extent_buffer *buf, | 2552 | struct extent_buffer *buf, |
2503 | u64 parent, int last_ref, int for_cow); | 2553 | u64 parent, int last_ref); |
2504 | struct extent_buffer *btrfs_init_new_buffer(struct btrfs_trans_handle *trans, | 2554 | struct extent_buffer *btrfs_init_new_buffer(struct btrfs_trans_handle *trans, |
2505 | struct btrfs_root *root, | 2555 | struct btrfs_root *root, |
2506 | u64 bytenr, u32 blocksize, | 2556 | u64 bytenr, u32 blocksize, |
@@ -2659,6 +2709,8 @@ int btrfs_duplicate_item(struct btrfs_trans_handle *trans, | |||
2659 | int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root | 2709 | int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root |
2660 | *root, struct btrfs_key *key, struct btrfs_path *p, int | 2710 | *root, struct btrfs_key *key, struct btrfs_path *p, int |
2661 | ins_len, int cow); | 2711 | ins_len, int cow); |
2712 | int btrfs_search_old_slot(struct btrfs_root *root, struct btrfs_key *key, | ||
2713 | struct btrfs_path *p, u64 time_seq); | ||
2662 | int btrfs_realloc_node(struct btrfs_trans_handle *trans, | 2714 | int btrfs_realloc_node(struct btrfs_trans_handle *trans, |
2663 | struct btrfs_root *root, struct extent_buffer *parent, | 2715 | struct btrfs_root *root, struct extent_buffer *parent, |
2664 | int start_slot, int cache_only, u64 *last_ret, | 2716 | int start_slot, int cache_only, u64 *last_ret, |
@@ -3098,4 +3150,23 @@ void btrfs_reada_detach(void *handle); | |||
3098 | int btree_readahead_hook(struct btrfs_root *root, struct extent_buffer *eb, | 3150 | int btree_readahead_hook(struct btrfs_root *root, struct extent_buffer *eb, |
3099 | u64 start, int err); | 3151 | u64 start, int err); |
3100 | 3152 | ||
3153 | /* delayed seq elem */ | ||
3154 | struct seq_list { | ||
3155 | struct list_head list; | ||
3156 | u64 seq; | ||
3157 | u32 flags; | ||
3158 | }; | ||
3159 | |||
3160 | void btrfs_get_tree_mod_seq(struct btrfs_fs_info *fs_info, | ||
3161 | struct seq_list *elem); | ||
3162 | void btrfs_put_tree_mod_seq(struct btrfs_fs_info *fs_info, | ||
3163 | struct seq_list *elem); | ||
3164 | |||
3165 | static inline int is_fstree(u64 rootid) | ||
3166 | { | ||
3167 | if (rootid == BTRFS_FS_TREE_OBJECTID || | ||
3168 | (s64)rootid >= (s64)BTRFS_FIRST_FREE_OBJECTID) | ||
3169 | return 1; | ||
3170 | return 0; | ||
3171 | } | ||
3101 | #endif | 3172 | #endif |
diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c index 03e3748d84d0..c18d0442ae6d 100644 --- a/fs/btrfs/delayed-inode.c +++ b/fs/btrfs/delayed-inode.c | |||
@@ -669,8 +669,8 @@ static int btrfs_delayed_inode_reserve_metadata( | |||
669 | return ret; | 669 | return ret; |
670 | } else if (src_rsv == &root->fs_info->delalloc_block_rsv) { | 670 | } else if (src_rsv == &root->fs_info->delalloc_block_rsv) { |
671 | spin_lock(&BTRFS_I(inode)->lock); | 671 | spin_lock(&BTRFS_I(inode)->lock); |
672 | if (BTRFS_I(inode)->delalloc_meta_reserved) { | 672 | if (test_and_clear_bit(BTRFS_INODE_DELALLOC_META_RESERVED, |
673 | BTRFS_I(inode)->delalloc_meta_reserved = 0; | 673 | &BTRFS_I(inode)->runtime_flags)) { |
674 | spin_unlock(&BTRFS_I(inode)->lock); | 674 | spin_unlock(&BTRFS_I(inode)->lock); |
675 | release = true; | 675 | release = true; |
676 | goto migrate; | 676 | goto migrate; |
@@ -1706,7 +1706,7 @@ static void fill_stack_inode_item(struct btrfs_trans_handle *trans, | |||
1706 | btrfs_set_stack_inode_nbytes(inode_item, inode_get_bytes(inode)); | 1706 | btrfs_set_stack_inode_nbytes(inode_item, inode_get_bytes(inode)); |
1707 | btrfs_set_stack_inode_generation(inode_item, | 1707 | btrfs_set_stack_inode_generation(inode_item, |
1708 | BTRFS_I(inode)->generation); | 1708 | BTRFS_I(inode)->generation); |
1709 | btrfs_set_stack_inode_sequence(inode_item, BTRFS_I(inode)->sequence); | 1709 | btrfs_set_stack_inode_sequence(inode_item, inode->i_version); |
1710 | btrfs_set_stack_inode_transid(inode_item, trans->transid); | 1710 | btrfs_set_stack_inode_transid(inode_item, trans->transid); |
1711 | btrfs_set_stack_inode_rdev(inode_item, inode->i_rdev); | 1711 | btrfs_set_stack_inode_rdev(inode_item, inode->i_rdev); |
1712 | btrfs_set_stack_inode_flags(inode_item, BTRFS_I(inode)->flags); | 1712 | btrfs_set_stack_inode_flags(inode_item, BTRFS_I(inode)->flags); |
@@ -1754,7 +1754,7 @@ int btrfs_fill_inode(struct inode *inode, u32 *rdev) | |||
1754 | set_nlink(inode, btrfs_stack_inode_nlink(inode_item)); | 1754 | set_nlink(inode, btrfs_stack_inode_nlink(inode_item)); |
1755 | inode_set_bytes(inode, btrfs_stack_inode_nbytes(inode_item)); | 1755 | inode_set_bytes(inode, btrfs_stack_inode_nbytes(inode_item)); |
1756 | BTRFS_I(inode)->generation = btrfs_stack_inode_generation(inode_item); | 1756 | BTRFS_I(inode)->generation = btrfs_stack_inode_generation(inode_item); |
1757 | BTRFS_I(inode)->sequence = btrfs_stack_inode_sequence(inode_item); | 1757 | inode->i_version = btrfs_stack_inode_sequence(inode_item); |
1758 | inode->i_rdev = 0; | 1758 | inode->i_rdev = 0; |
1759 | *rdev = btrfs_stack_inode_rdev(inode_item); | 1759 | *rdev = btrfs_stack_inode_rdev(inode_item); |
1760 | BTRFS_I(inode)->flags = btrfs_stack_inode_flags(inode_item); | 1760 | BTRFS_I(inode)->flags = btrfs_stack_inode_flags(inode_item); |
diff --git a/fs/btrfs/delayed-ref.c b/fs/btrfs/delayed-ref.c index 69f22e3ab3bc..13ae7b04790e 100644 --- a/fs/btrfs/delayed-ref.c +++ b/fs/btrfs/delayed-ref.c | |||
@@ -525,7 +525,7 @@ static noinline void add_delayed_tree_ref(struct btrfs_fs_info *fs_info, | |||
525 | ref->is_head = 0; | 525 | ref->is_head = 0; |
526 | ref->in_tree = 1; | 526 | ref->in_tree = 1; |
527 | 527 | ||
528 | if (need_ref_seq(for_cow, ref_root)) | 528 | if (is_fstree(ref_root)) |
529 | seq = inc_delayed_seq(delayed_refs); | 529 | seq = inc_delayed_seq(delayed_refs); |
530 | ref->seq = seq; | 530 | ref->seq = seq; |
531 | 531 | ||
@@ -584,7 +584,7 @@ static noinline void add_delayed_data_ref(struct btrfs_fs_info *fs_info, | |||
584 | ref->is_head = 0; | 584 | ref->is_head = 0; |
585 | ref->in_tree = 1; | 585 | ref->in_tree = 1; |
586 | 586 | ||
587 | if (need_ref_seq(for_cow, ref_root)) | 587 | if (is_fstree(ref_root)) |
588 | seq = inc_delayed_seq(delayed_refs); | 588 | seq = inc_delayed_seq(delayed_refs); |
589 | ref->seq = seq; | 589 | ref->seq = seq; |
590 | 590 | ||
@@ -658,10 +658,11 @@ int btrfs_add_delayed_tree_ref(struct btrfs_fs_info *fs_info, | |||
658 | add_delayed_tree_ref(fs_info, trans, &ref->node, bytenr, | 658 | add_delayed_tree_ref(fs_info, trans, &ref->node, bytenr, |
659 | num_bytes, parent, ref_root, level, action, | 659 | num_bytes, parent, ref_root, level, action, |
660 | for_cow); | 660 | for_cow); |
661 | if (!need_ref_seq(for_cow, ref_root) && | 661 | if (!is_fstree(ref_root) && |
662 | waitqueue_active(&delayed_refs->seq_wait)) | 662 | waitqueue_active(&delayed_refs->seq_wait)) |
663 | wake_up(&delayed_refs->seq_wait); | 663 | wake_up(&delayed_refs->seq_wait); |
664 | spin_unlock(&delayed_refs->lock); | 664 | spin_unlock(&delayed_refs->lock); |
665 | |||
665 | return 0; | 666 | return 0; |
666 | } | 667 | } |
667 | 668 | ||
@@ -706,10 +707,11 @@ int btrfs_add_delayed_data_ref(struct btrfs_fs_info *fs_info, | |||
706 | add_delayed_data_ref(fs_info, trans, &ref->node, bytenr, | 707 | add_delayed_data_ref(fs_info, trans, &ref->node, bytenr, |
707 | num_bytes, parent, ref_root, owner, offset, | 708 | num_bytes, parent, ref_root, owner, offset, |
708 | action, for_cow); | 709 | action, for_cow); |
709 | if (!need_ref_seq(for_cow, ref_root) && | 710 | if (!is_fstree(ref_root) && |
710 | waitqueue_active(&delayed_refs->seq_wait)) | 711 | waitqueue_active(&delayed_refs->seq_wait)) |
711 | wake_up(&delayed_refs->seq_wait); | 712 | wake_up(&delayed_refs->seq_wait); |
712 | spin_unlock(&delayed_refs->lock); | 713 | spin_unlock(&delayed_refs->lock); |
714 | |||
713 | return 0; | 715 | return 0; |
714 | } | 716 | } |
715 | 717 | ||
diff --git a/fs/btrfs/delayed-ref.h b/fs/btrfs/delayed-ref.h index d8f244d94925..413927fb9957 100644 --- a/fs/btrfs/delayed-ref.h +++ b/fs/btrfs/delayed-ref.h | |||
@@ -195,11 +195,6 @@ int btrfs_delayed_ref_lock(struct btrfs_trans_handle *trans, | |||
195 | int btrfs_find_ref_cluster(struct btrfs_trans_handle *trans, | 195 | int btrfs_find_ref_cluster(struct btrfs_trans_handle *trans, |
196 | struct list_head *cluster, u64 search_start); | 196 | struct list_head *cluster, u64 search_start); |
197 | 197 | ||
198 | struct seq_list { | ||
199 | struct list_head list; | ||
200 | u64 seq; | ||
201 | }; | ||
202 | |||
203 | static inline u64 inc_delayed_seq(struct btrfs_delayed_ref_root *delayed_refs) | 198 | static inline u64 inc_delayed_seq(struct btrfs_delayed_ref_root *delayed_refs) |
204 | { | 199 | { |
205 | assert_spin_locked(&delayed_refs->lock); | 200 | assert_spin_locked(&delayed_refs->lock); |
@@ -230,25 +225,6 @@ int btrfs_check_delayed_seq(struct btrfs_delayed_ref_root *delayed_refs, | |||
230 | u64 seq); | 225 | u64 seq); |
231 | 226 | ||
232 | /* | 227 | /* |
233 | * delayed refs with a ref_seq > 0 must be held back during backref walking. | ||
234 | * this only applies to items in one of the fs-trees. for_cow items never need | ||
235 | * to be held back, so they won't get a ref_seq number. | ||
236 | */ | ||
237 | static inline int need_ref_seq(int for_cow, u64 rootid) | ||
238 | { | ||
239 | if (for_cow) | ||
240 | return 0; | ||
241 | |||
242 | if (rootid == BTRFS_FS_TREE_OBJECTID) | ||
243 | return 1; | ||
244 | |||
245 | if ((s64)rootid >= (s64)BTRFS_FIRST_FREE_OBJECTID) | ||
246 | return 1; | ||
247 | |||
248 | return 0; | ||
249 | } | ||
250 | |||
251 | /* | ||
252 | * a node might live in a head or a regular ref, this lets you | 228 | * a node might live in a head or a regular ref, this lets you |
253 | * test for the proper type to use. | 229 | * test for the proper type to use. |
254 | */ | 230 | */ |
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index d0c969beaad4..7ae51decf6d3 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c | |||
@@ -323,7 +323,8 @@ static int csum_tree_block(struct btrfs_root *root, struct extent_buffer *buf, | |||
323 | * in the wrong place. | 323 | * in the wrong place. |
324 | */ | 324 | */ |
325 | static int verify_parent_transid(struct extent_io_tree *io_tree, | 325 | static int verify_parent_transid(struct extent_io_tree *io_tree, |
326 | struct extent_buffer *eb, u64 parent_transid) | 326 | struct extent_buffer *eb, u64 parent_transid, |
327 | int atomic) | ||
327 | { | 328 | { |
328 | struct extent_state *cached_state = NULL; | 329 | struct extent_state *cached_state = NULL; |
329 | int ret; | 330 | int ret; |
@@ -331,6 +332,9 @@ static int verify_parent_transid(struct extent_io_tree *io_tree, | |||
331 | if (!parent_transid || btrfs_header_generation(eb) == parent_transid) | 332 | if (!parent_transid || btrfs_header_generation(eb) == parent_transid) |
332 | return 0; | 333 | return 0; |
333 | 334 | ||
335 | if (atomic) | ||
336 | return -EAGAIN; | ||
337 | |||
334 | lock_extent_bits(io_tree, eb->start, eb->start + eb->len - 1, | 338 | lock_extent_bits(io_tree, eb->start, eb->start + eb->len - 1, |
335 | 0, &cached_state); | 339 | 0, &cached_state); |
336 | if (extent_buffer_uptodate(eb) && | 340 | if (extent_buffer_uptodate(eb) && |
@@ -372,7 +376,8 @@ static int btree_read_extent_buffer_pages(struct btrfs_root *root, | |||
372 | ret = read_extent_buffer_pages(io_tree, eb, start, | 376 | ret = read_extent_buffer_pages(io_tree, eb, start, |
373 | WAIT_COMPLETE, | 377 | WAIT_COMPLETE, |
374 | btree_get_extent, mirror_num); | 378 | btree_get_extent, mirror_num); |
375 | if (!ret && !verify_parent_transid(io_tree, eb, parent_transid)) | 379 | if (!ret && !verify_parent_transid(io_tree, eb, |
380 | parent_transid, 0)) | ||
376 | break; | 381 | break; |
377 | 382 | ||
378 | /* | 383 | /* |
@@ -1148,7 +1153,6 @@ static void __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize, | |||
1148 | root->orphan_block_rsv = NULL; | 1153 | root->orphan_block_rsv = NULL; |
1149 | 1154 | ||
1150 | INIT_LIST_HEAD(&root->dirty_list); | 1155 | INIT_LIST_HEAD(&root->dirty_list); |
1151 | INIT_LIST_HEAD(&root->orphan_list); | ||
1152 | INIT_LIST_HEAD(&root->root_list); | 1156 | INIT_LIST_HEAD(&root->root_list); |
1153 | spin_lock_init(&root->orphan_lock); | 1157 | spin_lock_init(&root->orphan_lock); |
1154 | spin_lock_init(&root->inode_lock); | 1158 | spin_lock_init(&root->inode_lock); |
@@ -1161,6 +1165,7 @@ static void __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize, | |||
1161 | atomic_set(&root->log_commit[0], 0); | 1165 | atomic_set(&root->log_commit[0], 0); |
1162 | atomic_set(&root->log_commit[1], 0); | 1166 | atomic_set(&root->log_commit[1], 0); |
1163 | atomic_set(&root->log_writers, 0); | 1167 | atomic_set(&root->log_writers, 0); |
1168 | atomic_set(&root->orphan_inodes, 0); | ||
1164 | root->log_batch = 0; | 1169 | root->log_batch = 0; |
1165 | root->log_transid = 0; | 1170 | root->log_transid = 0; |
1166 | root->last_log_commit = 0; | 1171 | root->last_log_commit = 0; |
@@ -1202,7 +1207,7 @@ static int __must_check find_and_setup_root(struct btrfs_root *tree_root, | |||
1202 | root->commit_root = NULL; | 1207 | root->commit_root = NULL; |
1203 | root->node = read_tree_block(root, btrfs_root_bytenr(&root->root_item), | 1208 | root->node = read_tree_block(root, btrfs_root_bytenr(&root->root_item), |
1204 | blocksize, generation); | 1209 | blocksize, generation); |
1205 | if (!root->node || !btrfs_buffer_uptodate(root->node, generation)) { | 1210 | if (!root->node || !btrfs_buffer_uptodate(root->node, generation, 0)) { |
1206 | free_extent_buffer(root->node); | 1211 | free_extent_buffer(root->node); |
1207 | root->node = NULL; | 1212 | root->node = NULL; |
1208 | return -EIO; | 1213 | return -EIO; |
@@ -1247,7 +1252,7 @@ static struct btrfs_root *alloc_log_tree(struct btrfs_trans_handle *trans, | |||
1247 | 1252 | ||
1248 | leaf = btrfs_alloc_free_block(trans, root, root->leafsize, 0, | 1253 | leaf = btrfs_alloc_free_block(trans, root, root->leafsize, 0, |
1249 | BTRFS_TREE_LOG_OBJECTID, NULL, | 1254 | BTRFS_TREE_LOG_OBJECTID, NULL, |
1250 | 0, 0, 0, 0); | 1255 | 0, 0, 0); |
1251 | if (IS_ERR(leaf)) { | 1256 | if (IS_ERR(leaf)) { |
1252 | kfree(root); | 1257 | kfree(root); |
1253 | return ERR_CAST(leaf); | 1258 | return ERR_CAST(leaf); |
@@ -1909,11 +1914,14 @@ int open_ctree(struct super_block *sb, | |||
1909 | spin_lock_init(&fs_info->delayed_iput_lock); | 1914 | spin_lock_init(&fs_info->delayed_iput_lock); |
1910 | spin_lock_init(&fs_info->defrag_inodes_lock); | 1915 | spin_lock_init(&fs_info->defrag_inodes_lock); |
1911 | spin_lock_init(&fs_info->free_chunk_lock); | 1916 | spin_lock_init(&fs_info->free_chunk_lock); |
1917 | spin_lock_init(&fs_info->tree_mod_seq_lock); | ||
1918 | rwlock_init(&fs_info->tree_mod_log_lock); | ||
1912 | mutex_init(&fs_info->reloc_mutex); | 1919 | mutex_init(&fs_info->reloc_mutex); |
1913 | 1920 | ||
1914 | init_completion(&fs_info->kobj_unregister); | 1921 | init_completion(&fs_info->kobj_unregister); |
1915 | INIT_LIST_HEAD(&fs_info->dirty_cowonly_roots); | 1922 | INIT_LIST_HEAD(&fs_info->dirty_cowonly_roots); |
1916 | INIT_LIST_HEAD(&fs_info->space_info); | 1923 | INIT_LIST_HEAD(&fs_info->space_info); |
1924 | INIT_LIST_HEAD(&fs_info->tree_mod_seq_list); | ||
1917 | btrfs_mapping_init(&fs_info->mapping_tree); | 1925 | btrfs_mapping_init(&fs_info->mapping_tree); |
1918 | btrfs_init_block_rsv(&fs_info->global_block_rsv); | 1926 | btrfs_init_block_rsv(&fs_info->global_block_rsv); |
1919 | btrfs_init_block_rsv(&fs_info->delalloc_block_rsv); | 1927 | btrfs_init_block_rsv(&fs_info->delalloc_block_rsv); |
@@ -1926,12 +1934,14 @@ int open_ctree(struct super_block *sb, | |||
1926 | atomic_set(&fs_info->async_submit_draining, 0); | 1934 | atomic_set(&fs_info->async_submit_draining, 0); |
1927 | atomic_set(&fs_info->nr_async_bios, 0); | 1935 | atomic_set(&fs_info->nr_async_bios, 0); |
1928 | atomic_set(&fs_info->defrag_running, 0); | 1936 | atomic_set(&fs_info->defrag_running, 0); |
1937 | atomic_set(&fs_info->tree_mod_seq, 0); | ||
1929 | fs_info->sb = sb; | 1938 | fs_info->sb = sb; |
1930 | fs_info->max_inline = 8192 * 1024; | 1939 | fs_info->max_inline = 8192 * 1024; |
1931 | fs_info->metadata_ratio = 0; | 1940 | fs_info->metadata_ratio = 0; |
1932 | fs_info->defrag_inodes = RB_ROOT; | 1941 | fs_info->defrag_inodes = RB_ROOT; |
1933 | fs_info->trans_no_join = 0; | 1942 | fs_info->trans_no_join = 0; |
1934 | fs_info->free_chunk_space = 0; | 1943 | fs_info->free_chunk_space = 0; |
1944 | fs_info->tree_mod_log = RB_ROOT; | ||
1935 | 1945 | ||
1936 | /* readahead state */ | 1946 | /* readahead state */ |
1937 | INIT_RADIX_TREE(&fs_info->reada_tree, GFP_NOFS & ~__GFP_WAIT); | 1947 | INIT_RADIX_TREE(&fs_info->reada_tree, GFP_NOFS & ~__GFP_WAIT); |
@@ -1996,7 +2006,8 @@ int open_ctree(struct super_block *sb, | |||
1996 | BTRFS_I(fs_info->btree_inode)->root = tree_root; | 2006 | BTRFS_I(fs_info->btree_inode)->root = tree_root; |
1997 | memset(&BTRFS_I(fs_info->btree_inode)->location, 0, | 2007 | memset(&BTRFS_I(fs_info->btree_inode)->location, 0, |
1998 | sizeof(struct btrfs_key)); | 2008 | sizeof(struct btrfs_key)); |
1999 | BTRFS_I(fs_info->btree_inode)->dummy_inode = 1; | 2009 | set_bit(BTRFS_INODE_DUMMY, |
2010 | &BTRFS_I(fs_info->btree_inode)->runtime_flags); | ||
2000 | insert_inode_hash(fs_info->btree_inode); | 2011 | insert_inode_hash(fs_info->btree_inode); |
2001 | 2012 | ||
2002 | spin_lock_init(&fs_info->block_group_cache_lock); | 2013 | spin_lock_init(&fs_info->block_group_cache_lock); |
@@ -2348,6 +2359,13 @@ retry_root_backup: | |||
2348 | fs_info->generation = generation; | 2359 | fs_info->generation = generation; |
2349 | fs_info->last_trans_committed = generation; | 2360 | fs_info->last_trans_committed = generation; |
2350 | 2361 | ||
2362 | ret = btrfs_init_dev_stats(fs_info); | ||
2363 | if (ret) { | ||
2364 | printk(KERN_ERR "btrfs: failed to init dev_stats: %d\n", | ||
2365 | ret); | ||
2366 | goto fail_block_groups; | ||
2367 | } | ||
2368 | |||
2351 | ret = btrfs_init_space_info(fs_info); | 2369 | ret = btrfs_init_space_info(fs_info); |
2352 | if (ret) { | 2370 | if (ret) { |
2353 | printk(KERN_ERR "Failed to initial space info: %d\n", ret); | 2371 | printk(KERN_ERR "Failed to initial space info: %d\n", ret); |
@@ -2551,18 +2569,19 @@ recovery_tree_root: | |||
2551 | 2569 | ||
2552 | static void btrfs_end_buffer_write_sync(struct buffer_head *bh, int uptodate) | 2570 | static void btrfs_end_buffer_write_sync(struct buffer_head *bh, int uptodate) |
2553 | { | 2571 | { |
2554 | char b[BDEVNAME_SIZE]; | ||
2555 | |||
2556 | if (uptodate) { | 2572 | if (uptodate) { |
2557 | set_buffer_uptodate(bh); | 2573 | set_buffer_uptodate(bh); |
2558 | } else { | 2574 | } else { |
2575 | struct btrfs_device *device = (struct btrfs_device *) | ||
2576 | bh->b_private; | ||
2577 | |||
2559 | printk_ratelimited(KERN_WARNING "lost page write due to " | 2578 | printk_ratelimited(KERN_WARNING "lost page write due to " |
2560 | "I/O error on %s\n", | 2579 | "I/O error on %s\n", device->name); |
2561 | bdevname(bh->b_bdev, b)); | ||
2562 | /* note, we dont' set_buffer_write_io_error because we have | 2580 | /* note, we dont' set_buffer_write_io_error because we have |
2563 | * our own ways of dealing with the IO errors | 2581 | * our own ways of dealing with the IO errors |
2564 | */ | 2582 | */ |
2565 | clear_buffer_uptodate(bh); | 2583 | clear_buffer_uptodate(bh); |
2584 | btrfs_dev_stat_inc_and_print(device, BTRFS_DEV_STAT_WRITE_ERRS); | ||
2566 | } | 2585 | } |
2567 | unlock_buffer(bh); | 2586 | unlock_buffer(bh); |
2568 | put_bh(bh); | 2587 | put_bh(bh); |
@@ -2677,6 +2696,7 @@ static int write_dev_supers(struct btrfs_device *device, | |||
2677 | set_buffer_uptodate(bh); | 2696 | set_buffer_uptodate(bh); |
2678 | lock_buffer(bh); | 2697 | lock_buffer(bh); |
2679 | bh->b_end_io = btrfs_end_buffer_write_sync; | 2698 | bh->b_end_io = btrfs_end_buffer_write_sync; |
2699 | bh->b_private = device; | ||
2680 | } | 2700 | } |
2681 | 2701 | ||
2682 | /* | 2702 | /* |
@@ -2735,6 +2755,9 @@ static int write_dev_flush(struct btrfs_device *device, int wait) | |||
2735 | } | 2755 | } |
2736 | if (!bio_flagged(bio, BIO_UPTODATE)) { | 2756 | if (!bio_flagged(bio, BIO_UPTODATE)) { |
2737 | ret = -EIO; | 2757 | ret = -EIO; |
2758 | if (!bio_flagged(bio, BIO_EOPNOTSUPP)) | ||
2759 | btrfs_dev_stat_inc_and_print(device, | ||
2760 | BTRFS_DEV_STAT_FLUSH_ERRS); | ||
2738 | } | 2761 | } |
2739 | 2762 | ||
2740 | /* drop the reference from the wait == 0 run */ | 2763 | /* drop the reference from the wait == 0 run */ |
@@ -2748,7 +2771,7 @@ static int write_dev_flush(struct btrfs_device *device, int wait) | |||
2748 | * one reference for us, and we leave it for the | 2771 | * one reference for us, and we leave it for the |
2749 | * caller | 2772 | * caller |
2750 | */ | 2773 | */ |
2751 | device->flush_bio = NULL;; | 2774 | device->flush_bio = NULL; |
2752 | bio = bio_alloc(GFP_NOFS, 0); | 2775 | bio = bio_alloc(GFP_NOFS, 0); |
2753 | if (!bio) | 2776 | if (!bio) |
2754 | return -ENOMEM; | 2777 | return -ENOMEM; |
@@ -2897,19 +2920,6 @@ int write_ctree_super(struct btrfs_trans_handle *trans, | |||
2897 | return ret; | 2920 | return ret; |
2898 | } | 2921 | } |
2899 | 2922 | ||
2900 | /* Kill all outstanding I/O */ | ||
2901 | void btrfs_abort_devices(struct btrfs_root *root) | ||
2902 | { | ||
2903 | struct list_head *head; | ||
2904 | struct btrfs_device *dev; | ||
2905 | mutex_lock(&root->fs_info->fs_devices->device_list_mutex); | ||
2906 | head = &root->fs_info->fs_devices->devices; | ||
2907 | list_for_each_entry_rcu(dev, head, dev_list) { | ||
2908 | blk_abort_queue(dev->bdev->bd_disk->queue); | ||
2909 | } | ||
2910 | mutex_unlock(&root->fs_info->fs_devices->device_list_mutex); | ||
2911 | } | ||
2912 | |||
2913 | void btrfs_free_fs_root(struct btrfs_fs_info *fs_info, struct btrfs_root *root) | 2923 | void btrfs_free_fs_root(struct btrfs_fs_info *fs_info, struct btrfs_root *root) |
2914 | { | 2924 | { |
2915 | spin_lock(&fs_info->fs_roots_radix_lock); | 2925 | spin_lock(&fs_info->fs_roots_radix_lock); |
@@ -3143,7 +3153,8 @@ int close_ctree(struct btrfs_root *root) | |||
3143 | return 0; | 3153 | return 0; |
3144 | } | 3154 | } |
3145 | 3155 | ||
3146 | int btrfs_buffer_uptodate(struct extent_buffer *buf, u64 parent_transid) | 3156 | int btrfs_buffer_uptodate(struct extent_buffer *buf, u64 parent_transid, |
3157 | int atomic) | ||
3147 | { | 3158 | { |
3148 | int ret; | 3159 | int ret; |
3149 | struct inode *btree_inode = buf->pages[0]->mapping->host; | 3160 | struct inode *btree_inode = buf->pages[0]->mapping->host; |
@@ -3153,7 +3164,9 @@ int btrfs_buffer_uptodate(struct extent_buffer *buf, u64 parent_transid) | |||
3153 | return ret; | 3164 | return ret; |
3154 | 3165 | ||
3155 | ret = verify_parent_transid(&BTRFS_I(btree_inode)->io_tree, buf, | 3166 | ret = verify_parent_transid(&BTRFS_I(btree_inode)->io_tree, buf, |
3156 | parent_transid); | 3167 | parent_transid, atomic); |
3168 | if (ret == -EAGAIN) | ||
3169 | return ret; | ||
3157 | return !ret; | 3170 | return !ret; |
3158 | } | 3171 | } |
3159 | 3172 | ||
@@ -3663,17 +3676,6 @@ int btrfs_cleanup_transaction(struct btrfs_root *root) | |||
3663 | return 0; | 3676 | return 0; |
3664 | } | 3677 | } |
3665 | 3678 | ||
3666 | static int btree_writepage_io_failed_hook(struct bio *bio, struct page *page, | ||
3667 | u64 start, u64 end, | ||
3668 | struct extent_state *state) | ||
3669 | { | ||
3670 | struct super_block *sb = page->mapping->host->i_sb; | ||
3671 | struct btrfs_fs_info *fs_info = btrfs_sb(sb); | ||
3672 | btrfs_error(fs_info, -EIO, | ||
3673 | "Error occured while writing out btree at %llu", start); | ||
3674 | return -EIO; | ||
3675 | } | ||
3676 | |||
3677 | static struct extent_io_ops btree_extent_io_ops = { | 3679 | static struct extent_io_ops btree_extent_io_ops = { |
3678 | .write_cache_pages_lock_hook = btree_lock_page_hook, | 3680 | .write_cache_pages_lock_hook = btree_lock_page_hook, |
3679 | .readpage_end_io_hook = btree_readpage_end_io_hook, | 3681 | .readpage_end_io_hook = btree_readpage_end_io_hook, |
@@ -3681,5 +3683,4 @@ static struct extent_io_ops btree_extent_io_ops = { | |||
3681 | .submit_bio_hook = btree_submit_bio_hook, | 3683 | .submit_bio_hook = btree_submit_bio_hook, |
3682 | /* note we're sharing with inode.c for the merge bio hook */ | 3684 | /* note we're sharing with inode.c for the merge bio hook */ |
3683 | .merge_bio_hook = btrfs_merge_bio_hook, | 3685 | .merge_bio_hook = btrfs_merge_bio_hook, |
3684 | .writepage_io_failed_hook = btree_writepage_io_failed_hook, | ||
3685 | }; | 3686 | }; |
diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h index a7ace1a2dd12..05b3fab39f7e 100644 --- a/fs/btrfs/disk-io.h +++ b/fs/btrfs/disk-io.h | |||
@@ -66,7 +66,8 @@ void btrfs_btree_balance_dirty(struct btrfs_root *root, unsigned long nr); | |||
66 | void __btrfs_btree_balance_dirty(struct btrfs_root *root, unsigned long nr); | 66 | void __btrfs_btree_balance_dirty(struct btrfs_root *root, unsigned long nr); |
67 | void btrfs_free_fs_root(struct btrfs_fs_info *fs_info, struct btrfs_root *root); | 67 | void btrfs_free_fs_root(struct btrfs_fs_info *fs_info, struct btrfs_root *root); |
68 | void btrfs_mark_buffer_dirty(struct extent_buffer *buf); | 68 | void btrfs_mark_buffer_dirty(struct extent_buffer *buf); |
69 | int btrfs_buffer_uptodate(struct extent_buffer *buf, u64 parent_transid); | 69 | int btrfs_buffer_uptodate(struct extent_buffer *buf, u64 parent_transid, |
70 | int atomic); | ||
70 | int btrfs_set_buffer_uptodate(struct extent_buffer *buf); | 71 | int btrfs_set_buffer_uptodate(struct extent_buffer *buf); |
71 | int btrfs_read_buffer(struct extent_buffer *buf, u64 parent_transid); | 72 | int btrfs_read_buffer(struct extent_buffer *buf, u64 parent_transid); |
72 | u32 btrfs_csum_data(struct btrfs_root *root, char *data, u32 seed, size_t len); | 73 | u32 btrfs_csum_data(struct btrfs_root *root, char *data, u32 seed, size_t len); |
@@ -88,7 +89,6 @@ int btrfs_add_log_tree(struct btrfs_trans_handle *trans, | |||
88 | int btrfs_cleanup_transaction(struct btrfs_root *root); | 89 | int btrfs_cleanup_transaction(struct btrfs_root *root); |
89 | void btrfs_cleanup_one_transaction(struct btrfs_transaction *trans, | 90 | void btrfs_cleanup_one_transaction(struct btrfs_transaction *trans, |
90 | struct btrfs_root *root); | 91 | struct btrfs_root *root); |
91 | void btrfs_abort_devices(struct btrfs_root *root); | ||
92 | 92 | ||
93 | #ifdef CONFIG_DEBUG_LOCK_ALLOC | 93 | #ifdef CONFIG_DEBUG_LOCK_ALLOC |
94 | void btrfs_init_lockdep(void); | 94 | void btrfs_init_lockdep(void); |
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 6fc2e6f5aab8..4b5a1e1bdefb 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c | |||
@@ -3578,7 +3578,7 @@ again: | |||
3578 | space_info->chunk_alloc = 0; | 3578 | space_info->chunk_alloc = 0; |
3579 | spin_unlock(&space_info->lock); | 3579 | spin_unlock(&space_info->lock); |
3580 | out: | 3580 | out: |
3581 | mutex_unlock(&extent_root->fs_info->chunk_mutex); | 3581 | mutex_unlock(&fs_info->chunk_mutex); |
3582 | return ret; | 3582 | return ret; |
3583 | } | 3583 | } |
3584 | 3584 | ||
@@ -4355,10 +4355,9 @@ static unsigned drop_outstanding_extent(struct inode *inode) | |||
4355 | BTRFS_I(inode)->outstanding_extents--; | 4355 | BTRFS_I(inode)->outstanding_extents--; |
4356 | 4356 | ||
4357 | if (BTRFS_I(inode)->outstanding_extents == 0 && | 4357 | if (BTRFS_I(inode)->outstanding_extents == 0 && |
4358 | BTRFS_I(inode)->delalloc_meta_reserved) { | 4358 | test_and_clear_bit(BTRFS_INODE_DELALLOC_META_RESERVED, |
4359 | &BTRFS_I(inode)->runtime_flags)) | ||
4359 | drop_inode_space = 1; | 4360 | drop_inode_space = 1; |
4360 | BTRFS_I(inode)->delalloc_meta_reserved = 0; | ||
4361 | } | ||
4362 | 4361 | ||
4363 | /* | 4362 | /* |
4364 | * If we have more or the same amount of outsanding extents than we have | 4363 | * If we have more or the same amount of outsanding extents than we have |
@@ -4465,7 +4464,8 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes) | |||
4465 | * Add an item to reserve for updating the inode when we complete the | 4464 | * Add an item to reserve for updating the inode when we complete the |
4466 | * delalloc io. | 4465 | * delalloc io. |
4467 | */ | 4466 | */ |
4468 | if (!BTRFS_I(inode)->delalloc_meta_reserved) { | 4467 | if (!test_bit(BTRFS_INODE_DELALLOC_META_RESERVED, |
4468 | &BTRFS_I(inode)->runtime_flags)) { | ||
4469 | nr_extents++; | 4469 | nr_extents++; |
4470 | extra_reserve = 1; | 4470 | extra_reserve = 1; |
4471 | } | 4471 | } |
@@ -4511,7 +4511,8 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes) | |||
4511 | 4511 | ||
4512 | spin_lock(&BTRFS_I(inode)->lock); | 4512 | spin_lock(&BTRFS_I(inode)->lock); |
4513 | if (extra_reserve) { | 4513 | if (extra_reserve) { |
4514 | BTRFS_I(inode)->delalloc_meta_reserved = 1; | 4514 | set_bit(BTRFS_INODE_DELALLOC_META_RESERVED, |
4515 | &BTRFS_I(inode)->runtime_flags); | ||
4515 | nr_extents--; | 4516 | nr_extents--; |
4516 | } | 4517 | } |
4517 | BTRFS_I(inode)->reserved_extents += nr_extents; | 4518 | BTRFS_I(inode)->reserved_extents += nr_extents; |
@@ -5217,7 +5218,7 @@ out: | |||
5217 | void btrfs_free_tree_block(struct btrfs_trans_handle *trans, | 5218 | void btrfs_free_tree_block(struct btrfs_trans_handle *trans, |
5218 | struct btrfs_root *root, | 5219 | struct btrfs_root *root, |
5219 | struct extent_buffer *buf, | 5220 | struct extent_buffer *buf, |
5220 | u64 parent, int last_ref, int for_cow) | 5221 | u64 parent, int last_ref) |
5221 | { | 5222 | { |
5222 | struct btrfs_block_group_cache *cache = NULL; | 5223 | struct btrfs_block_group_cache *cache = NULL; |
5223 | int ret; | 5224 | int ret; |
@@ -5227,7 +5228,7 @@ void btrfs_free_tree_block(struct btrfs_trans_handle *trans, | |||
5227 | buf->start, buf->len, | 5228 | buf->start, buf->len, |
5228 | parent, root->root_key.objectid, | 5229 | parent, root->root_key.objectid, |
5229 | btrfs_header_level(buf), | 5230 | btrfs_header_level(buf), |
5230 | BTRFS_DROP_DELAYED_REF, NULL, for_cow); | 5231 | BTRFS_DROP_DELAYED_REF, NULL, 0); |
5231 | BUG_ON(ret); /* -ENOMEM */ | 5232 | BUG_ON(ret); /* -ENOMEM */ |
5232 | } | 5233 | } |
5233 | 5234 | ||
@@ -6249,7 +6250,7 @@ struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, | |||
6249 | struct btrfs_root *root, u32 blocksize, | 6250 | struct btrfs_root *root, u32 blocksize, |
6250 | u64 parent, u64 root_objectid, | 6251 | u64 parent, u64 root_objectid, |
6251 | struct btrfs_disk_key *key, int level, | 6252 | struct btrfs_disk_key *key, int level, |
6252 | u64 hint, u64 empty_size, int for_cow) | 6253 | u64 hint, u64 empty_size) |
6253 | { | 6254 | { |
6254 | struct btrfs_key ins; | 6255 | struct btrfs_key ins; |
6255 | struct btrfs_block_rsv *block_rsv; | 6256 | struct btrfs_block_rsv *block_rsv; |
@@ -6297,7 +6298,7 @@ struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, | |||
6297 | ins.objectid, | 6298 | ins.objectid, |
6298 | ins.offset, parent, root_objectid, | 6299 | ins.offset, parent, root_objectid, |
6299 | level, BTRFS_ADD_DELAYED_EXTENT, | 6300 | level, BTRFS_ADD_DELAYED_EXTENT, |
6300 | extent_op, for_cow); | 6301 | extent_op, 0); |
6301 | BUG_ON(ret); /* -ENOMEM */ | 6302 | BUG_ON(ret); /* -ENOMEM */ |
6302 | } | 6303 | } |
6303 | return buf; | 6304 | return buf; |
@@ -6568,7 +6569,7 @@ static noinline int do_walk_down(struct btrfs_trans_handle *trans, | |||
6568 | goto skip; | 6569 | goto skip; |
6569 | } | 6570 | } |
6570 | 6571 | ||
6571 | if (!btrfs_buffer_uptodate(next, generation)) { | 6572 | if (!btrfs_buffer_uptodate(next, generation, 0)) { |
6572 | btrfs_tree_unlock(next); | 6573 | btrfs_tree_unlock(next); |
6573 | free_extent_buffer(next); | 6574 | free_extent_buffer(next); |
6574 | next = NULL; | 6575 | next = NULL; |
@@ -6715,7 +6716,7 @@ static noinline int walk_up_proc(struct btrfs_trans_handle *trans, | |||
6715 | btrfs_header_owner(path->nodes[level + 1])); | 6716 | btrfs_header_owner(path->nodes[level + 1])); |
6716 | } | 6717 | } |
6717 | 6718 | ||
6718 | btrfs_free_tree_block(trans, root, eb, parent, wc->refs[level] == 1, 0); | 6719 | btrfs_free_tree_block(trans, root, eb, parent, wc->refs[level] == 1); |
6719 | out: | 6720 | out: |
6720 | wc->refs[level] = 0; | 6721 | wc->refs[level] = 0; |
6721 | wc->flags[level] = 0; | 6722 | wc->flags[level] = 0; |
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 198c2ba2fa40..2c8f7b204617 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c | |||
@@ -186,7 +186,6 @@ static struct rb_node *tree_insert(struct rb_root *root, u64 offset, | |||
186 | return parent; | 186 | return parent; |
187 | } | 187 | } |
188 | 188 | ||
189 | entry = rb_entry(node, struct tree_entry, rb_node); | ||
190 | rb_link_node(node, parent, p); | 189 | rb_link_node(node, parent, p); |
191 | rb_insert_color(node, root); | 190 | rb_insert_color(node, root); |
192 | return NULL; | 191 | return NULL; |
@@ -413,7 +412,7 @@ static struct extent_state *next_state(struct extent_state *state) | |||
413 | 412 | ||
414 | /* | 413 | /* |
415 | * utility function to clear some bits in an extent state struct. | 414 | * utility function to clear some bits in an extent state struct. |
416 | * it will optionally wake up any one waiting on this state (wake == 1) | 415 | * it will optionally wake up any one waiting on this state (wake == 1). |
417 | * | 416 | * |
418 | * If no bits are set on the state struct after clearing things, the | 417 | * If no bits are set on the state struct after clearing things, the |
419 | * struct is freed and removed from the tree | 418 | * struct is freed and removed from the tree |
@@ -570,10 +569,8 @@ hit_next: | |||
570 | if (err) | 569 | if (err) |
571 | goto out; | 570 | goto out; |
572 | if (state->end <= end) { | 571 | if (state->end <= end) { |
573 | clear_state_bit(tree, state, &bits, wake); | 572 | state = clear_state_bit(tree, state, &bits, wake); |
574 | if (last_end == (u64)-1) | 573 | goto next; |
575 | goto out; | ||
576 | start = last_end + 1; | ||
577 | } | 574 | } |
578 | goto search_again; | 575 | goto search_again; |
579 | } | 576 | } |
@@ -781,7 +778,6 @@ hit_next: | |||
781 | * Just lock what we found and keep going | 778 | * Just lock what we found and keep going |
782 | */ | 779 | */ |
783 | if (state->start == start && state->end <= end) { | 780 | if (state->start == start && state->end <= end) { |
784 | struct rb_node *next_node; | ||
785 | if (state->state & exclusive_bits) { | 781 | if (state->state & exclusive_bits) { |
786 | *failed_start = state->start; | 782 | *failed_start = state->start; |
787 | err = -EEXIST; | 783 | err = -EEXIST; |
@@ -789,20 +785,15 @@ hit_next: | |||
789 | } | 785 | } |
790 | 786 | ||
791 | set_state_bits(tree, state, &bits); | 787 | set_state_bits(tree, state, &bits); |
792 | |||
793 | cache_state(state, cached_state); | 788 | cache_state(state, cached_state); |
794 | merge_state(tree, state); | 789 | merge_state(tree, state); |
795 | if (last_end == (u64)-1) | 790 | if (last_end == (u64)-1) |
796 | goto out; | 791 | goto out; |
797 | |||
798 | start = last_end + 1; | 792 | start = last_end + 1; |
799 | next_node = rb_next(&state->rb_node); | 793 | state = next_state(state); |
800 | if (next_node && start < end && prealloc && !need_resched()) { | 794 | if (start < end && state && state->start == start && |
801 | state = rb_entry(next_node, struct extent_state, | 795 | !need_resched()) |
802 | rb_node); | 796 | goto hit_next; |
803 | if (state->start == start) | ||
804 | goto hit_next; | ||
805 | } | ||
806 | goto search_again; | 797 | goto search_again; |
807 | } | 798 | } |
808 | 799 | ||
@@ -845,6 +836,10 @@ hit_next: | |||
845 | if (last_end == (u64)-1) | 836 | if (last_end == (u64)-1) |
846 | goto out; | 837 | goto out; |
847 | start = last_end + 1; | 838 | start = last_end + 1; |
839 | state = next_state(state); | ||
840 | if (start < end && state && state->start == start && | ||
841 | !need_resched()) | ||
842 | goto hit_next; | ||
848 | } | 843 | } |
849 | goto search_again; | 844 | goto search_again; |
850 | } | 845 | } |
@@ -994,21 +989,14 @@ hit_next: | |||
994 | * Just lock what we found and keep going | 989 | * Just lock what we found and keep going |
995 | */ | 990 | */ |
996 | if (state->start == start && state->end <= end) { | 991 | if (state->start == start && state->end <= end) { |
997 | struct rb_node *next_node; | ||
998 | |||
999 | set_state_bits(tree, state, &bits); | 992 | set_state_bits(tree, state, &bits); |
1000 | clear_state_bit(tree, state, &clear_bits, 0); | 993 | state = clear_state_bit(tree, state, &clear_bits, 0); |
1001 | if (last_end == (u64)-1) | 994 | if (last_end == (u64)-1) |
1002 | goto out; | 995 | goto out; |
1003 | |||
1004 | start = last_end + 1; | 996 | start = last_end + 1; |
1005 | next_node = rb_next(&state->rb_node); | 997 | if (start < end && state && state->start == start && |
1006 | if (next_node && start < end && prealloc && !need_resched()) { | 998 | !need_resched()) |
1007 | state = rb_entry(next_node, struct extent_state, | 999 | goto hit_next; |
1008 | rb_node); | ||
1009 | if (state->start == start) | ||
1010 | goto hit_next; | ||
1011 | } | ||
1012 | goto search_again; | 1000 | goto search_again; |
1013 | } | 1001 | } |
1014 | 1002 | ||
@@ -1042,10 +1030,13 @@ hit_next: | |||
1042 | goto out; | 1030 | goto out; |
1043 | if (state->end <= end) { | 1031 | if (state->end <= end) { |
1044 | set_state_bits(tree, state, &bits); | 1032 | set_state_bits(tree, state, &bits); |
1045 | clear_state_bit(tree, state, &clear_bits, 0); | 1033 | state = clear_state_bit(tree, state, &clear_bits, 0); |
1046 | if (last_end == (u64)-1) | 1034 | if (last_end == (u64)-1) |
1047 | goto out; | 1035 | goto out; |
1048 | start = last_end + 1; | 1036 | start = last_end + 1; |
1037 | if (start < end && state && state->start == start && | ||
1038 | !need_resched()) | ||
1039 | goto hit_next; | ||
1049 | } | 1040 | } |
1050 | goto search_again; | 1041 | goto search_again; |
1051 | } | 1042 | } |
@@ -1173,9 +1164,8 @@ int set_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end, | |||
1173 | cached_state, mask); | 1164 | cached_state, mask); |
1174 | } | 1165 | } |
1175 | 1166 | ||
1176 | static int clear_extent_uptodate(struct extent_io_tree *tree, u64 start, | 1167 | int clear_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end, |
1177 | u64 end, struct extent_state **cached_state, | 1168 | struct extent_state **cached_state, gfp_t mask) |
1178 | gfp_t mask) | ||
1179 | { | 1169 | { |
1180 | return clear_extent_bit(tree, start, end, EXTENT_UPTODATE, 0, 0, | 1170 | return clear_extent_bit(tree, start, end, EXTENT_UPTODATE, 0, 0, |
1181 | cached_state, mask); | 1171 | cached_state, mask); |
@@ -1293,7 +1283,7 @@ out: | |||
1293 | * returned if we find something, and *start_ret and *end_ret are | 1283 | * returned if we find something, and *start_ret and *end_ret are |
1294 | * set to reflect the state struct that was found. | 1284 | * set to reflect the state struct that was found. |
1295 | * | 1285 | * |
1296 | * If nothing was found, 1 is returned, < 0 on error | 1286 | * If nothing was found, 1 is returned. If found something, return 0. |
1297 | */ | 1287 | */ |
1298 | int find_first_extent_bit(struct extent_io_tree *tree, u64 start, | 1288 | int find_first_extent_bit(struct extent_io_tree *tree, u64 start, |
1299 | u64 *start_ret, u64 *end_ret, int bits) | 1289 | u64 *start_ret, u64 *end_ret, int bits) |
@@ -1923,6 +1913,7 @@ int repair_io_failure(struct btrfs_mapping_tree *map_tree, u64 start, | |||
1923 | if (!test_bit(BIO_UPTODATE, &bio->bi_flags)) { | 1913 | if (!test_bit(BIO_UPTODATE, &bio->bi_flags)) { |
1924 | /* try to remap that extent elsewhere? */ | 1914 | /* try to remap that extent elsewhere? */ |
1925 | bio_put(bio); | 1915 | bio_put(bio); |
1916 | btrfs_dev_stat_inc_and_print(dev, BTRFS_DEV_STAT_WRITE_ERRS); | ||
1926 | return -EIO; | 1917 | return -EIO; |
1927 | } | 1918 | } |
1928 | 1919 | ||
@@ -2222,17 +2213,7 @@ int end_extent_writepage(struct page *page, int err, u64 start, u64 end) | |||
2222 | uptodate = 0; | 2213 | uptodate = 0; |
2223 | } | 2214 | } |
2224 | 2215 | ||
2225 | if (!uptodate && tree->ops && | ||
2226 | tree->ops->writepage_io_failed_hook) { | ||
2227 | ret = tree->ops->writepage_io_failed_hook(NULL, page, | ||
2228 | start, end, NULL); | ||
2229 | /* Writeback already completed */ | ||
2230 | if (ret == 0) | ||
2231 | return 1; | ||
2232 | } | ||
2233 | |||
2234 | if (!uptodate) { | 2216 | if (!uptodate) { |
2235 | clear_extent_uptodate(tree, start, end, NULL, GFP_NOFS); | ||
2236 | ClearPageUptodate(page); | 2217 | ClearPageUptodate(page); |
2237 | SetPageError(page); | 2218 | SetPageError(page); |
2238 | } | 2219 | } |
@@ -2347,10 +2328,23 @@ static void end_bio_extent_readpage(struct bio *bio, int err) | |||
2347 | if (uptodate && tree->ops && tree->ops->readpage_end_io_hook) { | 2328 | if (uptodate && tree->ops && tree->ops->readpage_end_io_hook) { |
2348 | ret = tree->ops->readpage_end_io_hook(page, start, end, | 2329 | ret = tree->ops->readpage_end_io_hook(page, start, end, |
2349 | state, mirror); | 2330 | state, mirror); |
2350 | if (ret) | 2331 | if (ret) { |
2332 | /* no IO indicated but software detected errors | ||
2333 | * in the block, either checksum errors or | ||
2334 | * issues with the contents */ | ||
2335 | struct btrfs_root *root = | ||
2336 | BTRFS_I(page->mapping->host)->root; | ||
2337 | struct btrfs_device *device; | ||
2338 | |||
2351 | uptodate = 0; | 2339 | uptodate = 0; |
2352 | else | 2340 | device = btrfs_find_device_for_logical( |
2341 | root, start, mirror); | ||
2342 | if (device) | ||
2343 | btrfs_dev_stat_inc_and_print(device, | ||
2344 | BTRFS_DEV_STAT_CORRUPTION_ERRS); | ||
2345 | } else { | ||
2353 | clean_io_failure(start, page); | 2346 | clean_io_failure(start, page); |
2347 | } | ||
2354 | } | 2348 | } |
2355 | 2349 | ||
2356 | if (!uptodate && tree->ops && tree->ops->readpage_io_failed_hook) { | 2350 | if (!uptodate && tree->ops && tree->ops->readpage_io_failed_hook) { |
@@ -3164,7 +3158,7 @@ static int write_one_eb(struct extent_buffer *eb, | |||
3164 | u64 offset = eb->start; | 3158 | u64 offset = eb->start; |
3165 | unsigned long i, num_pages; | 3159 | unsigned long i, num_pages; |
3166 | int rw = (epd->sync_io ? WRITE_SYNC : WRITE); | 3160 | int rw = (epd->sync_io ? WRITE_SYNC : WRITE); |
3167 | int ret; | 3161 | int ret = 0; |
3168 | 3162 | ||
3169 | clear_bit(EXTENT_BUFFER_IOERR, &eb->bflags); | 3163 | clear_bit(EXTENT_BUFFER_IOERR, &eb->bflags); |
3170 | num_pages = num_extent_pages(eb->start, eb->len); | 3164 | num_pages = num_extent_pages(eb->start, eb->len); |
@@ -3930,6 +3924,7 @@ static struct extent_buffer *__alloc_extent_buffer(struct extent_io_tree *tree, | |||
3930 | eb->start = start; | 3924 | eb->start = start; |
3931 | eb->len = len; | 3925 | eb->len = len; |
3932 | eb->tree = tree; | 3926 | eb->tree = tree; |
3927 | eb->bflags = 0; | ||
3933 | rwlock_init(&eb->lock); | 3928 | rwlock_init(&eb->lock); |
3934 | atomic_set(&eb->write_locks, 0); | 3929 | atomic_set(&eb->write_locks, 0); |
3935 | atomic_set(&eb->read_locks, 0); | 3930 | atomic_set(&eb->read_locks, 0); |
@@ -3967,6 +3962,60 @@ static struct extent_buffer *__alloc_extent_buffer(struct extent_io_tree *tree, | |||
3967 | return eb; | 3962 | return eb; |
3968 | } | 3963 | } |
3969 | 3964 | ||
3965 | struct extent_buffer *btrfs_clone_extent_buffer(struct extent_buffer *src) | ||
3966 | { | ||
3967 | unsigned long i; | ||
3968 | struct page *p; | ||
3969 | struct extent_buffer *new; | ||
3970 | unsigned long num_pages = num_extent_pages(src->start, src->len); | ||
3971 | |||
3972 | new = __alloc_extent_buffer(NULL, src->start, src->len, GFP_ATOMIC); | ||
3973 | if (new == NULL) | ||
3974 | return NULL; | ||
3975 | |||
3976 | for (i = 0; i < num_pages; i++) { | ||
3977 | p = alloc_page(GFP_ATOMIC); | ||
3978 | BUG_ON(!p); | ||
3979 | attach_extent_buffer_page(new, p); | ||
3980 | WARN_ON(PageDirty(p)); | ||
3981 | SetPageUptodate(p); | ||
3982 | new->pages[i] = p; | ||
3983 | } | ||
3984 | |||
3985 | copy_extent_buffer(new, src, 0, 0, src->len); | ||
3986 | set_bit(EXTENT_BUFFER_UPTODATE, &new->bflags); | ||
3987 | set_bit(EXTENT_BUFFER_DUMMY, &new->bflags); | ||
3988 | |||
3989 | return new; | ||
3990 | } | ||
3991 | |||
3992 | struct extent_buffer *alloc_dummy_extent_buffer(u64 start, unsigned long len) | ||
3993 | { | ||
3994 | struct extent_buffer *eb; | ||
3995 | unsigned long num_pages = num_extent_pages(0, len); | ||
3996 | unsigned long i; | ||
3997 | |||
3998 | eb = __alloc_extent_buffer(NULL, start, len, GFP_ATOMIC); | ||
3999 | if (!eb) | ||
4000 | return NULL; | ||
4001 | |||
4002 | for (i = 0; i < num_pages; i++) { | ||
4003 | eb->pages[i] = alloc_page(GFP_ATOMIC); | ||
4004 | if (!eb->pages[i]) | ||
4005 | goto err; | ||
4006 | } | ||
4007 | set_extent_buffer_uptodate(eb); | ||
4008 | btrfs_set_header_nritems(eb, 0); | ||
4009 | set_bit(EXTENT_BUFFER_DUMMY, &eb->bflags); | ||
4010 | |||
4011 | return eb; | ||
4012 | err: | ||
4013 | for (i--; i > 0; i--) | ||
4014 | __free_page(eb->pages[i]); | ||
4015 | __free_extent_buffer(eb); | ||
4016 | return NULL; | ||
4017 | } | ||
4018 | |||
3970 | static int extent_buffer_under_io(struct extent_buffer *eb) | 4019 | static int extent_buffer_under_io(struct extent_buffer *eb) |
3971 | { | 4020 | { |
3972 | return (atomic_read(&eb->io_pages) || | 4021 | return (atomic_read(&eb->io_pages) || |
@@ -3981,18 +4030,21 @@ static void btrfs_release_extent_buffer_page(struct extent_buffer *eb, | |||
3981 | unsigned long start_idx) | 4030 | unsigned long start_idx) |
3982 | { | 4031 | { |
3983 | unsigned long index; | 4032 | unsigned long index; |
4033 | unsigned long num_pages; | ||
3984 | struct page *page; | 4034 | struct page *page; |
4035 | int mapped = !test_bit(EXTENT_BUFFER_DUMMY, &eb->bflags); | ||
3985 | 4036 | ||
3986 | BUG_ON(extent_buffer_under_io(eb)); | 4037 | BUG_ON(extent_buffer_under_io(eb)); |
3987 | 4038 | ||
3988 | index = num_extent_pages(eb->start, eb->len); | 4039 | num_pages = num_extent_pages(eb->start, eb->len); |
4040 | index = start_idx + num_pages; | ||
3989 | if (start_idx >= index) | 4041 | if (start_idx >= index) |
3990 | return; | 4042 | return; |
3991 | 4043 | ||
3992 | do { | 4044 | do { |
3993 | index--; | 4045 | index--; |
3994 | page = extent_buffer_page(eb, index); | 4046 | page = extent_buffer_page(eb, index); |
3995 | if (page) { | 4047 | if (page && mapped) { |
3996 | spin_lock(&page->mapping->private_lock); | 4048 | spin_lock(&page->mapping->private_lock); |
3997 | /* | 4049 | /* |
3998 | * We do this since we'll remove the pages after we've | 4050 | * We do this since we'll remove the pages after we've |
@@ -4017,6 +4069,8 @@ static void btrfs_release_extent_buffer_page(struct extent_buffer *eb, | |||
4017 | } | 4069 | } |
4018 | spin_unlock(&page->mapping->private_lock); | 4070 | spin_unlock(&page->mapping->private_lock); |
4019 | 4071 | ||
4072 | } | ||
4073 | if (page) { | ||
4020 | /* One for when we alloced the page */ | 4074 | /* One for when we alloced the page */ |
4021 | page_cache_release(page); | 4075 | page_cache_release(page); |
4022 | } | 4076 | } |
@@ -4120,6 +4174,7 @@ struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree, | |||
4120 | if (atomic_inc_not_zero(&exists->refs)) { | 4174 | if (atomic_inc_not_zero(&exists->refs)) { |
4121 | spin_unlock(&mapping->private_lock); | 4175 | spin_unlock(&mapping->private_lock); |
4122 | unlock_page(p); | 4176 | unlock_page(p); |
4177 | page_cache_release(p); | ||
4123 | mark_extent_buffer_accessed(exists); | 4178 | mark_extent_buffer_accessed(exists); |
4124 | goto free_eb; | 4179 | goto free_eb; |
4125 | } | 4180 | } |
@@ -4199,8 +4254,7 @@ free_eb: | |||
4199 | unlock_page(eb->pages[i]); | 4254 | unlock_page(eb->pages[i]); |
4200 | } | 4255 | } |
4201 | 4256 | ||
4202 | if (!atomic_dec_and_test(&eb->refs)) | 4257 | WARN_ON(!atomic_dec_and_test(&eb->refs)); |
4203 | return exists; | ||
4204 | btrfs_release_extent_buffer(eb); | 4258 | btrfs_release_extent_buffer(eb); |
4205 | return exists; | 4259 | return exists; |
4206 | } | 4260 | } |
@@ -4235,14 +4289,18 @@ static void release_extent_buffer(struct extent_buffer *eb, gfp_t mask) | |||
4235 | { | 4289 | { |
4236 | WARN_ON(atomic_read(&eb->refs) == 0); | 4290 | WARN_ON(atomic_read(&eb->refs) == 0); |
4237 | if (atomic_dec_and_test(&eb->refs)) { | 4291 | if (atomic_dec_and_test(&eb->refs)) { |
4238 | struct extent_io_tree *tree = eb->tree; | 4292 | if (test_bit(EXTENT_BUFFER_DUMMY, &eb->bflags)) { |
4293 | spin_unlock(&eb->refs_lock); | ||
4294 | } else { | ||
4295 | struct extent_io_tree *tree = eb->tree; | ||
4239 | 4296 | ||
4240 | spin_unlock(&eb->refs_lock); | 4297 | spin_unlock(&eb->refs_lock); |
4241 | 4298 | ||
4242 | spin_lock(&tree->buffer_lock); | 4299 | spin_lock(&tree->buffer_lock); |
4243 | radix_tree_delete(&tree->buffer, | 4300 | radix_tree_delete(&tree->buffer, |
4244 | eb->start >> PAGE_CACHE_SHIFT); | 4301 | eb->start >> PAGE_CACHE_SHIFT); |
4245 | spin_unlock(&tree->buffer_lock); | 4302 | spin_unlock(&tree->buffer_lock); |
4303 | } | ||
4246 | 4304 | ||
4247 | /* Should be safe to release our pages at this point */ | 4305 | /* Should be safe to release our pages at this point */ |
4248 | btrfs_release_extent_buffer_page(eb, 0); | 4306 | btrfs_release_extent_buffer_page(eb, 0); |
@@ -4260,6 +4318,10 @@ void free_extent_buffer(struct extent_buffer *eb) | |||
4260 | 4318 | ||
4261 | spin_lock(&eb->refs_lock); | 4319 | spin_lock(&eb->refs_lock); |
4262 | if (atomic_read(&eb->refs) == 2 && | 4320 | if (atomic_read(&eb->refs) == 2 && |
4321 | test_bit(EXTENT_BUFFER_DUMMY, &eb->bflags)) | ||
4322 | atomic_dec(&eb->refs); | ||
4323 | |||
4324 | if (atomic_read(&eb->refs) == 2 && | ||
4263 | test_bit(EXTENT_BUFFER_STALE, &eb->bflags) && | 4325 | test_bit(EXTENT_BUFFER_STALE, &eb->bflags) && |
4264 | !extent_buffer_under_io(eb) && | 4326 | !extent_buffer_under_io(eb) && |
4265 | test_and_clear_bit(EXTENT_BUFFER_TREE_REF, &eb->bflags)) | 4327 | test_and_clear_bit(EXTENT_BUFFER_TREE_REF, &eb->bflags)) |
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h index b516c3b8dec6..25900af5b15d 100644 --- a/fs/btrfs/extent_io.h +++ b/fs/btrfs/extent_io.h | |||
@@ -39,6 +39,7 @@ | |||
39 | #define EXTENT_BUFFER_STALE 6 | 39 | #define EXTENT_BUFFER_STALE 6 |
40 | #define EXTENT_BUFFER_WRITEBACK 7 | 40 | #define EXTENT_BUFFER_WRITEBACK 7 |
41 | #define EXTENT_BUFFER_IOERR 8 | 41 | #define EXTENT_BUFFER_IOERR 8 |
42 | #define EXTENT_BUFFER_DUMMY 9 | ||
42 | 43 | ||
43 | /* these are flags for extent_clear_unlock_delalloc */ | 44 | /* these are flags for extent_clear_unlock_delalloc */ |
44 | #define EXTENT_CLEAR_UNLOCK_PAGE 0x1 | 45 | #define EXTENT_CLEAR_UNLOCK_PAGE 0x1 |
@@ -75,9 +76,6 @@ struct extent_io_ops { | |||
75 | unsigned long bio_flags); | 76 | unsigned long bio_flags); |
76 | int (*readpage_io_hook)(struct page *page, u64 start, u64 end); | 77 | int (*readpage_io_hook)(struct page *page, u64 start, u64 end); |
77 | int (*readpage_io_failed_hook)(struct page *page, int failed_mirror); | 78 | int (*readpage_io_failed_hook)(struct page *page, int failed_mirror); |
78 | int (*writepage_io_failed_hook)(struct bio *bio, struct page *page, | ||
79 | u64 start, u64 end, | ||
80 | struct extent_state *state); | ||
81 | int (*readpage_end_io_hook)(struct page *page, u64 start, u64 end, | 79 | int (*readpage_end_io_hook)(struct page *page, u64 start, u64 end, |
82 | struct extent_state *state, int mirror); | 80 | struct extent_state *state, int mirror); |
83 | int (*writepage_end_io_hook)(struct page *page, u64 start, u64 end, | 81 | int (*writepage_end_io_hook)(struct page *page, u64 start, u64 end, |
@@ -225,6 +223,8 @@ int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, | |||
225 | struct extent_state **cached_state, gfp_t mask); | 223 | struct extent_state **cached_state, gfp_t mask); |
226 | int set_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end, | 224 | int set_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end, |
227 | struct extent_state **cached_state, gfp_t mask); | 225 | struct extent_state **cached_state, gfp_t mask); |
226 | int clear_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end, | ||
227 | struct extent_state **cached_state, gfp_t mask); | ||
228 | int set_extent_new(struct extent_io_tree *tree, u64 start, u64 end, | 228 | int set_extent_new(struct extent_io_tree *tree, u64 start, u64 end, |
229 | gfp_t mask); | 229 | gfp_t mask); |
230 | int set_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end, | 230 | int set_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end, |
@@ -265,6 +265,8 @@ void set_page_extent_mapped(struct page *page); | |||
265 | 265 | ||
266 | struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree, | 266 | struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree, |
267 | u64 start, unsigned long len); | 267 | u64 start, unsigned long len); |
268 | struct extent_buffer *alloc_dummy_extent_buffer(u64 start, unsigned long len); | ||
269 | struct extent_buffer *btrfs_clone_extent_buffer(struct extent_buffer *src); | ||
268 | struct extent_buffer *find_extent_buffer(struct extent_io_tree *tree, | 270 | struct extent_buffer *find_extent_buffer(struct extent_io_tree *tree, |
269 | u64 start, unsigned long len); | 271 | u64 start, unsigned long len); |
270 | void free_extent_buffer(struct extent_buffer *eb); | 272 | void free_extent_buffer(struct extent_buffer *eb); |
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 53bf2d764bbc..876cddd6b2f0 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c | |||
@@ -65,6 +65,21 @@ struct inode_defrag { | |||
65 | int cycled; | 65 | int cycled; |
66 | }; | 66 | }; |
67 | 67 | ||
68 | static int __compare_inode_defrag(struct inode_defrag *defrag1, | ||
69 | struct inode_defrag *defrag2) | ||
70 | { | ||
71 | if (defrag1->root > defrag2->root) | ||
72 | return 1; | ||
73 | else if (defrag1->root < defrag2->root) | ||
74 | return -1; | ||
75 | else if (defrag1->ino > defrag2->ino) | ||
76 | return 1; | ||
77 | else if (defrag1->ino < defrag2->ino) | ||
78 | return -1; | ||
79 | else | ||
80 | return 0; | ||
81 | } | ||
82 | |||
68 | /* pop a record for an inode into the defrag tree. The lock | 83 | /* pop a record for an inode into the defrag tree. The lock |
69 | * must be held already | 84 | * must be held already |
70 | * | 85 | * |
@@ -81,15 +96,17 @@ static void __btrfs_add_inode_defrag(struct inode *inode, | |||
81 | struct inode_defrag *entry; | 96 | struct inode_defrag *entry; |
82 | struct rb_node **p; | 97 | struct rb_node **p; |
83 | struct rb_node *parent = NULL; | 98 | struct rb_node *parent = NULL; |
99 | int ret; | ||
84 | 100 | ||
85 | p = &root->fs_info->defrag_inodes.rb_node; | 101 | p = &root->fs_info->defrag_inodes.rb_node; |
86 | while (*p) { | 102 | while (*p) { |
87 | parent = *p; | 103 | parent = *p; |
88 | entry = rb_entry(parent, struct inode_defrag, rb_node); | 104 | entry = rb_entry(parent, struct inode_defrag, rb_node); |
89 | 105 | ||
90 | if (defrag->ino < entry->ino) | 106 | ret = __compare_inode_defrag(defrag, entry); |
107 | if (ret < 0) | ||
91 | p = &parent->rb_left; | 108 | p = &parent->rb_left; |
92 | else if (defrag->ino > entry->ino) | 109 | else if (ret > 0) |
93 | p = &parent->rb_right; | 110 | p = &parent->rb_right; |
94 | else { | 111 | else { |
95 | /* if we're reinserting an entry for | 112 | /* if we're reinserting an entry for |
@@ -103,7 +120,7 @@ static void __btrfs_add_inode_defrag(struct inode *inode, | |||
103 | goto exists; | 120 | goto exists; |
104 | } | 121 | } |
105 | } | 122 | } |
106 | BTRFS_I(inode)->in_defrag = 1; | 123 | set_bit(BTRFS_INODE_IN_DEFRAG, &BTRFS_I(inode)->runtime_flags); |
107 | rb_link_node(&defrag->rb_node, parent, p); | 124 | rb_link_node(&defrag->rb_node, parent, p); |
108 | rb_insert_color(&defrag->rb_node, &root->fs_info->defrag_inodes); | 125 | rb_insert_color(&defrag->rb_node, &root->fs_info->defrag_inodes); |
109 | return; | 126 | return; |
@@ -131,7 +148,7 @@ int btrfs_add_inode_defrag(struct btrfs_trans_handle *trans, | |||
131 | if (btrfs_fs_closing(root->fs_info)) | 148 | if (btrfs_fs_closing(root->fs_info)) |
132 | return 0; | 149 | return 0; |
133 | 150 | ||
134 | if (BTRFS_I(inode)->in_defrag) | 151 | if (test_bit(BTRFS_INODE_IN_DEFRAG, &BTRFS_I(inode)->runtime_flags)) |
135 | return 0; | 152 | return 0; |
136 | 153 | ||
137 | if (trans) | 154 | if (trans) |
@@ -148,7 +165,7 @@ int btrfs_add_inode_defrag(struct btrfs_trans_handle *trans, | |||
148 | defrag->root = root->root_key.objectid; | 165 | defrag->root = root->root_key.objectid; |
149 | 166 | ||
150 | spin_lock(&root->fs_info->defrag_inodes_lock); | 167 | spin_lock(&root->fs_info->defrag_inodes_lock); |
151 | if (!BTRFS_I(inode)->in_defrag) | 168 | if (!test_bit(BTRFS_INODE_IN_DEFRAG, &BTRFS_I(inode)->runtime_flags)) |
152 | __btrfs_add_inode_defrag(inode, defrag); | 169 | __btrfs_add_inode_defrag(inode, defrag); |
153 | else | 170 | else |
154 | kfree(defrag); | 171 | kfree(defrag); |
@@ -159,28 +176,35 @@ int btrfs_add_inode_defrag(struct btrfs_trans_handle *trans, | |||
159 | /* | 176 | /* |
160 | * must be called with the defrag_inodes lock held | 177 | * must be called with the defrag_inodes lock held |
161 | */ | 178 | */ |
162 | struct inode_defrag *btrfs_find_defrag_inode(struct btrfs_fs_info *info, u64 ino, | 179 | struct inode_defrag *btrfs_find_defrag_inode(struct btrfs_fs_info *info, |
180 | u64 root, u64 ino, | ||
163 | struct rb_node **next) | 181 | struct rb_node **next) |
164 | { | 182 | { |
165 | struct inode_defrag *entry = NULL; | 183 | struct inode_defrag *entry = NULL; |
184 | struct inode_defrag tmp; | ||
166 | struct rb_node *p; | 185 | struct rb_node *p; |
167 | struct rb_node *parent = NULL; | 186 | struct rb_node *parent = NULL; |
187 | int ret; | ||
188 | |||
189 | tmp.ino = ino; | ||
190 | tmp.root = root; | ||
168 | 191 | ||
169 | p = info->defrag_inodes.rb_node; | 192 | p = info->defrag_inodes.rb_node; |
170 | while (p) { | 193 | while (p) { |
171 | parent = p; | 194 | parent = p; |
172 | entry = rb_entry(parent, struct inode_defrag, rb_node); | 195 | entry = rb_entry(parent, struct inode_defrag, rb_node); |
173 | 196 | ||
174 | if (ino < entry->ino) | 197 | ret = __compare_inode_defrag(&tmp, entry); |
198 | if (ret < 0) | ||
175 | p = parent->rb_left; | 199 | p = parent->rb_left; |
176 | else if (ino > entry->ino) | 200 | else if (ret > 0) |
177 | p = parent->rb_right; | 201 | p = parent->rb_right; |
178 | else | 202 | else |
179 | return entry; | 203 | return entry; |
180 | } | 204 | } |
181 | 205 | ||
182 | if (next) { | 206 | if (next) { |
183 | while (parent && ino > entry->ino) { | 207 | while (parent && __compare_inode_defrag(&tmp, entry) > 0) { |
184 | parent = rb_next(parent); | 208 | parent = rb_next(parent); |
185 | entry = rb_entry(parent, struct inode_defrag, rb_node); | 209 | entry = rb_entry(parent, struct inode_defrag, rb_node); |
186 | } | 210 | } |
@@ -202,6 +226,7 @@ int btrfs_run_defrag_inodes(struct btrfs_fs_info *fs_info) | |||
202 | struct btrfs_key key; | 226 | struct btrfs_key key; |
203 | struct btrfs_ioctl_defrag_range_args range; | 227 | struct btrfs_ioctl_defrag_range_args range; |
204 | u64 first_ino = 0; | 228 | u64 first_ino = 0; |
229 | u64 root_objectid = 0; | ||
205 | int num_defrag; | 230 | int num_defrag; |
206 | int defrag_batch = 1024; | 231 | int defrag_batch = 1024; |
207 | 232 | ||
@@ -214,11 +239,14 @@ int btrfs_run_defrag_inodes(struct btrfs_fs_info *fs_info) | |||
214 | n = NULL; | 239 | n = NULL; |
215 | 240 | ||
216 | /* find an inode to defrag */ | 241 | /* find an inode to defrag */ |
217 | defrag = btrfs_find_defrag_inode(fs_info, first_ino, &n); | 242 | defrag = btrfs_find_defrag_inode(fs_info, root_objectid, |
243 | first_ino, &n); | ||
218 | if (!defrag) { | 244 | if (!defrag) { |
219 | if (n) | 245 | if (n) { |
220 | defrag = rb_entry(n, struct inode_defrag, rb_node); | 246 | defrag = rb_entry(n, struct inode_defrag, |
221 | else if (first_ino) { | 247 | rb_node); |
248 | } else if (root_objectid || first_ino) { | ||
249 | root_objectid = 0; | ||
222 | first_ino = 0; | 250 | first_ino = 0; |
223 | continue; | 251 | continue; |
224 | } else { | 252 | } else { |
@@ -228,6 +256,7 @@ int btrfs_run_defrag_inodes(struct btrfs_fs_info *fs_info) | |||
228 | 256 | ||
229 | /* remove it from the rbtree */ | 257 | /* remove it from the rbtree */ |
230 | first_ino = defrag->ino + 1; | 258 | first_ino = defrag->ino + 1; |
259 | root_objectid = defrag->root; | ||
231 | rb_erase(&defrag->rb_node, &fs_info->defrag_inodes); | 260 | rb_erase(&defrag->rb_node, &fs_info->defrag_inodes); |
232 | 261 | ||
233 | if (btrfs_fs_closing(fs_info)) | 262 | if (btrfs_fs_closing(fs_info)) |
@@ -252,7 +281,7 @@ int btrfs_run_defrag_inodes(struct btrfs_fs_info *fs_info) | |||
252 | goto next; | 281 | goto next; |
253 | 282 | ||
254 | /* do a chunk of defrag */ | 283 | /* do a chunk of defrag */ |
255 | BTRFS_I(inode)->in_defrag = 0; | 284 | clear_bit(BTRFS_INODE_IN_DEFRAG, &BTRFS_I(inode)->runtime_flags); |
256 | range.start = defrag->last_offset; | 285 | range.start = defrag->last_offset; |
257 | num_defrag = btrfs_defrag_file(inode, NULL, &range, defrag->transid, | 286 | num_defrag = btrfs_defrag_file(inode, NULL, &range, defrag->transid, |
258 | defrag_batch); | 287 | defrag_batch); |
@@ -1409,7 +1438,6 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb, | |||
1409 | mutex_unlock(&inode->i_mutex); | 1438 | mutex_unlock(&inode->i_mutex); |
1410 | goto out; | 1439 | goto out; |
1411 | } | 1440 | } |
1412 | BTRFS_I(inode)->sequence++; | ||
1413 | 1441 | ||
1414 | start_pos = round_down(pos, root->sectorsize); | 1442 | start_pos = round_down(pos, root->sectorsize); |
1415 | if (start_pos > i_size_read(inode)) { | 1443 | if (start_pos > i_size_read(inode)) { |
@@ -1466,8 +1494,8 @@ int btrfs_release_file(struct inode *inode, struct file *filp) | |||
1466 | * flush down new bytes that may have been written if the | 1494 | * flush down new bytes that may have been written if the |
1467 | * application were using truncate to replace a file in place. | 1495 | * application were using truncate to replace a file in place. |
1468 | */ | 1496 | */ |
1469 | if (BTRFS_I(inode)->ordered_data_close) { | 1497 | if (test_and_clear_bit(BTRFS_INODE_ORDERED_DATA_CLOSE, |
1470 | BTRFS_I(inode)->ordered_data_close = 0; | 1498 | &BTRFS_I(inode)->runtime_flags)) { |
1471 | btrfs_add_ordered_operation(NULL, BTRFS_I(inode)->root, inode); | 1499 | btrfs_add_ordered_operation(NULL, BTRFS_I(inode)->root, inode); |
1472 | if (inode->i_size > BTRFS_ORDERED_OPERATIONS_FLUSH_LIMIT) | 1500 | if (inode->i_size > BTRFS_ORDERED_OPERATIONS_FLUSH_LIMIT) |
1473 | filemap_flush(inode->i_mapping); | 1501 | filemap_flush(inode->i_mapping); |
@@ -1498,14 +1526,15 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) | |||
1498 | 1526 | ||
1499 | trace_btrfs_sync_file(file, datasync); | 1527 | trace_btrfs_sync_file(file, datasync); |
1500 | 1528 | ||
1501 | ret = filemap_write_and_wait_range(inode->i_mapping, start, end); | ||
1502 | if (ret) | ||
1503 | return ret; | ||
1504 | mutex_lock(&inode->i_mutex); | 1529 | mutex_lock(&inode->i_mutex); |
1505 | 1530 | ||
1506 | /* we wait first, since the writeback may change the inode */ | 1531 | /* |
1532 | * we wait first, since the writeback may change the inode, also wait | ||
1533 | * ordered range does a filemape_write_and_wait_range which is why we | ||
1534 | * don't do it above like other file systems. | ||
1535 | */ | ||
1507 | root->log_batch++; | 1536 | root->log_batch++; |
1508 | btrfs_wait_ordered_range(inode, 0, (u64)-1); | 1537 | btrfs_wait_ordered_range(inode, start, end); |
1509 | root->log_batch++; | 1538 | root->log_batch++; |
1510 | 1539 | ||
1511 | /* | 1540 | /* |
@@ -1523,7 +1552,8 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) | |||
1523 | * syncing | 1552 | * syncing |
1524 | */ | 1553 | */ |
1525 | smp_mb(); | 1554 | smp_mb(); |
1526 | if (BTRFS_I(inode)->last_trans <= | 1555 | if (btrfs_inode_in_log(inode, root->fs_info->generation) || |
1556 | BTRFS_I(inode)->last_trans <= | ||
1527 | root->fs_info->last_trans_committed) { | 1557 | root->fs_info->last_trans_committed) { |
1528 | BTRFS_I(inode)->last_trans = 0; | 1558 | BTRFS_I(inode)->last_trans = 0; |
1529 | mutex_unlock(&inode->i_mutex); | 1559 | mutex_unlock(&inode->i_mutex); |
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c index 202008ec367d..19a0d85b451c 100644 --- a/fs/btrfs/free-space-cache.c +++ b/fs/btrfs/free-space-cache.c | |||
@@ -33,6 +33,8 @@ | |||
33 | 33 | ||
34 | static int link_free_space(struct btrfs_free_space_ctl *ctl, | 34 | static int link_free_space(struct btrfs_free_space_ctl *ctl, |
35 | struct btrfs_free_space *info); | 35 | struct btrfs_free_space *info); |
36 | static void unlink_free_space(struct btrfs_free_space_ctl *ctl, | ||
37 | struct btrfs_free_space *info); | ||
36 | 38 | ||
37 | static struct inode *__lookup_free_space_inode(struct btrfs_root *root, | 39 | static struct inode *__lookup_free_space_inode(struct btrfs_root *root, |
38 | struct btrfs_path *path, | 40 | struct btrfs_path *path, |
@@ -584,6 +586,44 @@ static int io_ctl_read_bitmap(struct io_ctl *io_ctl, | |||
584 | return 0; | 586 | return 0; |
585 | } | 587 | } |
586 | 588 | ||
589 | /* | ||
590 | * Since we attach pinned extents after the fact we can have contiguous sections | ||
591 | * of free space that are split up in entries. This poses a problem with the | ||
592 | * tree logging stuff since it could have allocated across what appears to be 2 | ||
593 | * entries since we would have merged the entries when adding the pinned extents | ||
594 | * back to the free space cache. So run through the space cache that we just | ||
595 | * loaded and merge contiguous entries. This will make the log replay stuff not | ||
596 | * blow up and it will make for nicer allocator behavior. | ||
597 | */ | ||
598 | static void merge_space_tree(struct btrfs_free_space_ctl *ctl) | ||
599 | { | ||
600 | struct btrfs_free_space *e, *prev = NULL; | ||
601 | struct rb_node *n; | ||
602 | |||
603 | again: | ||
604 | spin_lock(&ctl->tree_lock); | ||
605 | for (n = rb_first(&ctl->free_space_offset); n; n = rb_next(n)) { | ||
606 | e = rb_entry(n, struct btrfs_free_space, offset_index); | ||
607 | if (!prev) | ||
608 | goto next; | ||
609 | if (e->bitmap || prev->bitmap) | ||
610 | goto next; | ||
611 | if (prev->offset + prev->bytes == e->offset) { | ||
612 | unlink_free_space(ctl, prev); | ||
613 | unlink_free_space(ctl, e); | ||
614 | prev->bytes += e->bytes; | ||
615 | kmem_cache_free(btrfs_free_space_cachep, e); | ||
616 | link_free_space(ctl, prev); | ||
617 | prev = NULL; | ||
618 | spin_unlock(&ctl->tree_lock); | ||
619 | goto again; | ||
620 | } | ||
621 | next: | ||
622 | prev = e; | ||
623 | } | ||
624 | spin_unlock(&ctl->tree_lock); | ||
625 | } | ||
626 | |||
587 | int __load_free_space_cache(struct btrfs_root *root, struct inode *inode, | 627 | int __load_free_space_cache(struct btrfs_root *root, struct inode *inode, |
588 | struct btrfs_free_space_ctl *ctl, | 628 | struct btrfs_free_space_ctl *ctl, |
589 | struct btrfs_path *path, u64 offset) | 629 | struct btrfs_path *path, u64 offset) |
@@ -726,6 +766,7 @@ int __load_free_space_cache(struct btrfs_root *root, struct inode *inode, | |||
726 | } | 766 | } |
727 | 767 | ||
728 | io_ctl_drop_pages(&io_ctl); | 768 | io_ctl_drop_pages(&io_ctl); |
769 | merge_space_tree(ctl); | ||
729 | ret = 1; | 770 | ret = 1; |
730 | out: | 771 | out: |
731 | io_ctl_free(&io_ctl); | 772 | io_ctl_free(&io_ctl); |
@@ -972,9 +1013,7 @@ int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode, | |||
972 | goto out; | 1013 | goto out; |
973 | 1014 | ||
974 | 1015 | ||
975 | ret = filemap_write_and_wait(inode->i_mapping); | 1016 | btrfs_wait_ordered_range(inode, 0, (u64)-1); |
976 | if (ret) | ||
977 | goto out; | ||
978 | 1017 | ||
979 | key.objectid = BTRFS_FREE_SPACE_OBJECTID; | 1018 | key.objectid = BTRFS_FREE_SPACE_OBJECTID; |
980 | key.offset = offset; | 1019 | key.offset = offset; |
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 61b16c641ce0..e9991adc0960 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c | |||
@@ -89,7 +89,7 @@ static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = { | |||
89 | 89 | ||
90 | static int btrfs_setsize(struct inode *inode, loff_t newsize); | 90 | static int btrfs_setsize(struct inode *inode, loff_t newsize); |
91 | static int btrfs_truncate(struct inode *inode); | 91 | static int btrfs_truncate(struct inode *inode); |
92 | static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end); | 92 | static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent); |
93 | static noinline int cow_file_range(struct inode *inode, | 93 | static noinline int cow_file_range(struct inode *inode, |
94 | struct page *locked_page, | 94 | struct page *locked_page, |
95 | u64 start, u64 end, int *page_started, | 95 | u64 start, u64 end, int *page_started, |
@@ -257,10 +257,13 @@ static noinline int cow_file_range_inline(struct btrfs_trans_handle *trans, | |||
257 | ret = insert_inline_extent(trans, root, inode, start, | 257 | ret = insert_inline_extent(trans, root, inode, start, |
258 | inline_len, compressed_size, | 258 | inline_len, compressed_size, |
259 | compress_type, compressed_pages); | 259 | compress_type, compressed_pages); |
260 | if (ret) { | 260 | if (ret && ret != -ENOSPC) { |
261 | btrfs_abort_transaction(trans, root, ret); | 261 | btrfs_abort_transaction(trans, root, ret); |
262 | return ret; | 262 | return ret; |
263 | } else if (ret == -ENOSPC) { | ||
264 | return 1; | ||
263 | } | 265 | } |
266 | |||
264 | btrfs_delalloc_release_metadata(inode, end + 1 - start); | 267 | btrfs_delalloc_release_metadata(inode, end + 1 - start); |
265 | btrfs_drop_extent_cache(inode, start, aligned_end - 1, 0); | 268 | btrfs_drop_extent_cache(inode, start, aligned_end - 1, 0); |
266 | return 0; | 269 | return 0; |
@@ -1572,11 +1575,11 @@ static int btrfs_submit_bio_hook(struct inode *inode, int rw, struct bio *bio, | |||
1572 | if (btrfs_is_free_space_inode(root, inode)) | 1575 | if (btrfs_is_free_space_inode(root, inode)) |
1573 | metadata = 2; | 1576 | metadata = 2; |
1574 | 1577 | ||
1575 | ret = btrfs_bio_wq_end_io(root->fs_info, bio, metadata); | ||
1576 | if (ret) | ||
1577 | return ret; | ||
1578 | |||
1579 | if (!(rw & REQ_WRITE)) { | 1578 | if (!(rw & REQ_WRITE)) { |
1579 | ret = btrfs_bio_wq_end_io(root->fs_info, bio, metadata); | ||
1580 | if (ret) | ||
1581 | return ret; | ||
1582 | |||
1580 | if (bio_flags & EXTENT_BIO_COMPRESSED) { | 1583 | if (bio_flags & EXTENT_BIO_COMPRESSED) { |
1581 | return btrfs_submit_compressed_read(inode, bio, | 1584 | return btrfs_submit_compressed_read(inode, bio, |
1582 | mirror_num, bio_flags); | 1585 | mirror_num, bio_flags); |
@@ -1815,25 +1818,24 @@ out: | |||
1815 | * an ordered extent if the range of bytes in the file it covers are | 1818 | * an ordered extent if the range of bytes in the file it covers are |
1816 | * fully written. | 1819 | * fully written. |
1817 | */ | 1820 | */ |
1818 | static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end) | 1821 | static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent) |
1819 | { | 1822 | { |
1823 | struct inode *inode = ordered_extent->inode; | ||
1820 | struct btrfs_root *root = BTRFS_I(inode)->root; | 1824 | struct btrfs_root *root = BTRFS_I(inode)->root; |
1821 | struct btrfs_trans_handle *trans = NULL; | 1825 | struct btrfs_trans_handle *trans = NULL; |
1822 | struct btrfs_ordered_extent *ordered_extent = NULL; | ||
1823 | struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; | 1826 | struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; |
1824 | struct extent_state *cached_state = NULL; | 1827 | struct extent_state *cached_state = NULL; |
1825 | int compress_type = 0; | 1828 | int compress_type = 0; |
1826 | int ret; | 1829 | int ret; |
1827 | bool nolock; | 1830 | bool nolock; |
1828 | 1831 | ||
1829 | ret = btrfs_dec_test_ordered_pending(inode, &ordered_extent, start, | ||
1830 | end - start + 1); | ||
1831 | if (!ret) | ||
1832 | return 0; | ||
1833 | BUG_ON(!ordered_extent); /* Logic error */ | ||
1834 | |||
1835 | nolock = btrfs_is_free_space_inode(root, inode); | 1832 | nolock = btrfs_is_free_space_inode(root, inode); |
1836 | 1833 | ||
1834 | if (test_bit(BTRFS_ORDERED_IOERR, &ordered_extent->flags)) { | ||
1835 | ret = -EIO; | ||
1836 | goto out; | ||
1837 | } | ||
1838 | |||
1837 | if (test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags)) { | 1839 | if (test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags)) { |
1838 | BUG_ON(!list_empty(&ordered_extent->list)); /* Logic error */ | 1840 | BUG_ON(!list_empty(&ordered_extent->list)); /* Logic error */ |
1839 | ret = btrfs_ordered_update_i_size(inode, 0, ordered_extent); | 1841 | ret = btrfs_ordered_update_i_size(inode, 0, ordered_extent); |
@@ -1889,12 +1891,10 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end) | |||
1889 | ordered_extent->file_offset, | 1891 | ordered_extent->file_offset, |
1890 | ordered_extent->len); | 1892 | ordered_extent->len); |
1891 | } | 1893 | } |
1892 | unlock_extent_cached(io_tree, ordered_extent->file_offset, | 1894 | |
1893 | ordered_extent->file_offset + | ||
1894 | ordered_extent->len - 1, &cached_state, GFP_NOFS); | ||
1895 | if (ret < 0) { | 1895 | if (ret < 0) { |
1896 | btrfs_abort_transaction(trans, root, ret); | 1896 | btrfs_abort_transaction(trans, root, ret); |
1897 | goto out; | 1897 | goto out_unlock; |
1898 | } | 1898 | } |
1899 | 1899 | ||
1900 | add_pending_csums(trans, inode, ordered_extent->file_offset, | 1900 | add_pending_csums(trans, inode, ordered_extent->file_offset, |
@@ -1905,10 +1905,14 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end) | |||
1905 | ret = btrfs_update_inode_fallback(trans, root, inode); | 1905 | ret = btrfs_update_inode_fallback(trans, root, inode); |
1906 | if (ret) { /* -ENOMEM or corruption */ | 1906 | if (ret) { /* -ENOMEM or corruption */ |
1907 | btrfs_abort_transaction(trans, root, ret); | 1907 | btrfs_abort_transaction(trans, root, ret); |
1908 | goto out; | 1908 | goto out_unlock; |
1909 | } | 1909 | } |
1910 | } | 1910 | } |
1911 | ret = 0; | 1911 | ret = 0; |
1912 | out_unlock: | ||
1913 | unlock_extent_cached(io_tree, ordered_extent->file_offset, | ||
1914 | ordered_extent->file_offset + | ||
1915 | ordered_extent->len - 1, &cached_state, GFP_NOFS); | ||
1912 | out: | 1916 | out: |
1913 | if (root != root->fs_info->tree_root) | 1917 | if (root != root->fs_info->tree_root) |
1914 | btrfs_delalloc_release_metadata(inode, ordered_extent->len); | 1918 | btrfs_delalloc_release_metadata(inode, ordered_extent->len); |
@@ -1919,26 +1923,57 @@ out: | |||
1919 | btrfs_end_transaction(trans, root); | 1923 | btrfs_end_transaction(trans, root); |
1920 | } | 1924 | } |
1921 | 1925 | ||
1926 | if (ret) | ||
1927 | clear_extent_uptodate(io_tree, ordered_extent->file_offset, | ||
1928 | ordered_extent->file_offset + | ||
1929 | ordered_extent->len - 1, NULL, GFP_NOFS); | ||
1930 | |||
1931 | /* | ||
1932 | * This needs to be dont to make sure anybody waiting knows we are done | ||
1933 | * upating everything for this ordered extent. | ||
1934 | */ | ||
1935 | btrfs_remove_ordered_extent(inode, ordered_extent); | ||
1936 | |||
1922 | /* once for us */ | 1937 | /* once for us */ |
1923 | btrfs_put_ordered_extent(ordered_extent); | 1938 | btrfs_put_ordered_extent(ordered_extent); |
1924 | /* once for the tree */ | 1939 | /* once for the tree */ |
1925 | btrfs_put_ordered_extent(ordered_extent); | 1940 | btrfs_put_ordered_extent(ordered_extent); |
1926 | 1941 | ||
1927 | return 0; | 1942 | return ret; |
1928 | out_unlock: | 1943 | } |
1929 | unlock_extent_cached(io_tree, ordered_extent->file_offset, | 1944 | |
1930 | ordered_extent->file_offset + | 1945 | static void finish_ordered_fn(struct btrfs_work *work) |
1931 | ordered_extent->len - 1, &cached_state, GFP_NOFS); | 1946 | { |
1932 | goto out; | 1947 | struct btrfs_ordered_extent *ordered_extent; |
1948 | ordered_extent = container_of(work, struct btrfs_ordered_extent, work); | ||
1949 | btrfs_finish_ordered_io(ordered_extent); | ||
1933 | } | 1950 | } |
1934 | 1951 | ||
1935 | static int btrfs_writepage_end_io_hook(struct page *page, u64 start, u64 end, | 1952 | static int btrfs_writepage_end_io_hook(struct page *page, u64 start, u64 end, |
1936 | struct extent_state *state, int uptodate) | 1953 | struct extent_state *state, int uptodate) |
1937 | { | 1954 | { |
1955 | struct inode *inode = page->mapping->host; | ||
1956 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
1957 | struct btrfs_ordered_extent *ordered_extent = NULL; | ||
1958 | struct btrfs_workers *workers; | ||
1959 | |||
1938 | trace_btrfs_writepage_end_io_hook(page, start, end, uptodate); | 1960 | trace_btrfs_writepage_end_io_hook(page, start, end, uptodate); |
1939 | 1961 | ||
1940 | ClearPagePrivate2(page); | 1962 | ClearPagePrivate2(page); |
1941 | return btrfs_finish_ordered_io(page->mapping->host, start, end); | 1963 | if (!btrfs_dec_test_ordered_pending(inode, &ordered_extent, start, |
1964 | end - start + 1, uptodate)) | ||
1965 | return 0; | ||
1966 | |||
1967 | ordered_extent->work.func = finish_ordered_fn; | ||
1968 | ordered_extent->work.flags = 0; | ||
1969 | |||
1970 | if (btrfs_is_free_space_inode(root, inode)) | ||
1971 | workers = &root->fs_info->endio_freespace_worker; | ||
1972 | else | ||
1973 | workers = &root->fs_info->endio_write_workers; | ||
1974 | btrfs_queue_worker(workers, &ordered_extent->work); | ||
1975 | |||
1976 | return 0; | ||
1942 | } | 1977 | } |
1943 | 1978 | ||
1944 | /* | 1979 | /* |
@@ -2072,12 +2107,12 @@ void btrfs_orphan_commit_root(struct btrfs_trans_handle *trans, | |||
2072 | struct btrfs_block_rsv *block_rsv; | 2107 | struct btrfs_block_rsv *block_rsv; |
2073 | int ret; | 2108 | int ret; |
2074 | 2109 | ||
2075 | if (!list_empty(&root->orphan_list) || | 2110 | if (atomic_read(&root->orphan_inodes) || |
2076 | root->orphan_cleanup_state != ORPHAN_CLEANUP_DONE) | 2111 | root->orphan_cleanup_state != ORPHAN_CLEANUP_DONE) |
2077 | return; | 2112 | return; |
2078 | 2113 | ||
2079 | spin_lock(&root->orphan_lock); | 2114 | spin_lock(&root->orphan_lock); |
2080 | if (!list_empty(&root->orphan_list)) { | 2115 | if (atomic_read(&root->orphan_inodes)) { |
2081 | spin_unlock(&root->orphan_lock); | 2116 | spin_unlock(&root->orphan_lock); |
2082 | return; | 2117 | return; |
2083 | } | 2118 | } |
@@ -2134,8 +2169,8 @@ int btrfs_orphan_add(struct btrfs_trans_handle *trans, struct inode *inode) | |||
2134 | block_rsv = NULL; | 2169 | block_rsv = NULL; |
2135 | } | 2170 | } |
2136 | 2171 | ||
2137 | if (list_empty(&BTRFS_I(inode)->i_orphan)) { | 2172 | if (!test_and_set_bit(BTRFS_INODE_HAS_ORPHAN_ITEM, |
2138 | list_add(&BTRFS_I(inode)->i_orphan, &root->orphan_list); | 2173 | &BTRFS_I(inode)->runtime_flags)) { |
2139 | #if 0 | 2174 | #if 0 |
2140 | /* | 2175 | /* |
2141 | * For proper ENOSPC handling, we should do orphan | 2176 | * For proper ENOSPC handling, we should do orphan |
@@ -2148,12 +2183,12 @@ int btrfs_orphan_add(struct btrfs_trans_handle *trans, struct inode *inode) | |||
2148 | insert = 1; | 2183 | insert = 1; |
2149 | #endif | 2184 | #endif |
2150 | insert = 1; | 2185 | insert = 1; |
2186 | atomic_dec(&root->orphan_inodes); | ||
2151 | } | 2187 | } |
2152 | 2188 | ||
2153 | if (!BTRFS_I(inode)->orphan_meta_reserved) { | 2189 | if (!test_and_set_bit(BTRFS_INODE_ORPHAN_META_RESERVED, |
2154 | BTRFS_I(inode)->orphan_meta_reserved = 1; | 2190 | &BTRFS_I(inode)->runtime_flags)) |
2155 | reserve = 1; | 2191 | reserve = 1; |
2156 | } | ||
2157 | spin_unlock(&root->orphan_lock); | 2192 | spin_unlock(&root->orphan_lock); |
2158 | 2193 | ||
2159 | /* grab metadata reservation from transaction handle */ | 2194 | /* grab metadata reservation from transaction handle */ |
@@ -2166,6 +2201,8 @@ int btrfs_orphan_add(struct btrfs_trans_handle *trans, struct inode *inode) | |||
2166 | if (insert >= 1) { | 2201 | if (insert >= 1) { |
2167 | ret = btrfs_insert_orphan_item(trans, root, btrfs_ino(inode)); | 2202 | ret = btrfs_insert_orphan_item(trans, root, btrfs_ino(inode)); |
2168 | if (ret && ret != -EEXIST) { | 2203 | if (ret && ret != -EEXIST) { |
2204 | clear_bit(BTRFS_INODE_HAS_ORPHAN_ITEM, | ||
2205 | &BTRFS_I(inode)->runtime_flags); | ||
2169 | btrfs_abort_transaction(trans, root, ret); | 2206 | btrfs_abort_transaction(trans, root, ret); |
2170 | return ret; | 2207 | return ret; |
2171 | } | 2208 | } |
@@ -2196,15 +2233,13 @@ int btrfs_orphan_del(struct btrfs_trans_handle *trans, struct inode *inode) | |||
2196 | int ret = 0; | 2233 | int ret = 0; |
2197 | 2234 | ||
2198 | spin_lock(&root->orphan_lock); | 2235 | spin_lock(&root->orphan_lock); |
2199 | if (!list_empty(&BTRFS_I(inode)->i_orphan)) { | 2236 | if (test_and_clear_bit(BTRFS_INODE_HAS_ORPHAN_ITEM, |
2200 | list_del_init(&BTRFS_I(inode)->i_orphan); | 2237 | &BTRFS_I(inode)->runtime_flags)) |
2201 | delete_item = 1; | 2238 | delete_item = 1; |
2202 | } | ||
2203 | 2239 | ||
2204 | if (BTRFS_I(inode)->orphan_meta_reserved) { | 2240 | if (test_and_clear_bit(BTRFS_INODE_ORPHAN_META_RESERVED, |
2205 | BTRFS_I(inode)->orphan_meta_reserved = 0; | 2241 | &BTRFS_I(inode)->runtime_flags)) |
2206 | release_rsv = 1; | 2242 | release_rsv = 1; |
2207 | } | ||
2208 | spin_unlock(&root->orphan_lock); | 2243 | spin_unlock(&root->orphan_lock); |
2209 | 2244 | ||
2210 | if (trans && delete_item) { | 2245 | if (trans && delete_item) { |
@@ -2212,8 +2247,10 @@ int btrfs_orphan_del(struct btrfs_trans_handle *trans, struct inode *inode) | |||
2212 | BUG_ON(ret); /* -ENOMEM or corruption (JDM: Recheck) */ | 2247 | BUG_ON(ret); /* -ENOMEM or corruption (JDM: Recheck) */ |
2213 | } | 2248 | } |
2214 | 2249 | ||
2215 | if (release_rsv) | 2250 | if (release_rsv) { |
2216 | btrfs_orphan_release_metadata(inode); | 2251 | btrfs_orphan_release_metadata(inode); |
2252 | atomic_dec(&root->orphan_inodes); | ||
2253 | } | ||
2217 | 2254 | ||
2218 | return 0; | 2255 | return 0; |
2219 | } | 2256 | } |
@@ -2341,6 +2378,8 @@ int btrfs_orphan_cleanup(struct btrfs_root *root) | |||
2341 | ret = PTR_ERR(trans); | 2378 | ret = PTR_ERR(trans); |
2342 | goto out; | 2379 | goto out; |
2343 | } | 2380 | } |
2381 | printk(KERN_ERR "auto deleting %Lu\n", | ||
2382 | found_key.objectid); | ||
2344 | ret = btrfs_del_orphan_item(trans, root, | 2383 | ret = btrfs_del_orphan_item(trans, root, |
2345 | found_key.objectid); | 2384 | found_key.objectid); |
2346 | BUG_ON(ret); /* -ENOMEM or corruption (JDM: Recheck) */ | 2385 | BUG_ON(ret); /* -ENOMEM or corruption (JDM: Recheck) */ |
@@ -2352,9 +2391,8 @@ int btrfs_orphan_cleanup(struct btrfs_root *root) | |||
2352 | * add this inode to the orphan list so btrfs_orphan_del does | 2391 | * add this inode to the orphan list so btrfs_orphan_del does |
2353 | * the proper thing when we hit it | 2392 | * the proper thing when we hit it |
2354 | */ | 2393 | */ |
2355 | spin_lock(&root->orphan_lock); | 2394 | set_bit(BTRFS_INODE_HAS_ORPHAN_ITEM, |
2356 | list_add(&BTRFS_I(inode)->i_orphan, &root->orphan_list); | 2395 | &BTRFS_I(inode)->runtime_flags); |
2357 | spin_unlock(&root->orphan_lock); | ||
2358 | 2396 | ||
2359 | /* if we have links, this was a truncate, lets do that */ | 2397 | /* if we have links, this was a truncate, lets do that */ |
2360 | if (inode->i_nlink) { | 2398 | if (inode->i_nlink) { |
@@ -2510,7 +2548,7 @@ static void btrfs_read_locked_inode(struct inode *inode) | |||
2510 | 2548 | ||
2511 | inode_set_bytes(inode, btrfs_inode_nbytes(leaf, inode_item)); | 2549 | inode_set_bytes(inode, btrfs_inode_nbytes(leaf, inode_item)); |
2512 | BTRFS_I(inode)->generation = btrfs_inode_generation(leaf, inode_item); | 2550 | BTRFS_I(inode)->generation = btrfs_inode_generation(leaf, inode_item); |
2513 | BTRFS_I(inode)->sequence = btrfs_inode_sequence(leaf, inode_item); | 2551 | inode->i_version = btrfs_inode_sequence(leaf, inode_item); |
2514 | inode->i_generation = BTRFS_I(inode)->generation; | 2552 | inode->i_generation = BTRFS_I(inode)->generation; |
2515 | inode->i_rdev = 0; | 2553 | inode->i_rdev = 0; |
2516 | rdev = btrfs_inode_rdev(leaf, inode_item); | 2554 | rdev = btrfs_inode_rdev(leaf, inode_item); |
@@ -2594,7 +2632,7 @@ static void fill_inode_item(struct btrfs_trans_handle *trans, | |||
2594 | 2632 | ||
2595 | btrfs_set_inode_nbytes(leaf, item, inode_get_bytes(inode)); | 2633 | btrfs_set_inode_nbytes(leaf, item, inode_get_bytes(inode)); |
2596 | btrfs_set_inode_generation(leaf, item, BTRFS_I(inode)->generation); | 2634 | btrfs_set_inode_generation(leaf, item, BTRFS_I(inode)->generation); |
2597 | btrfs_set_inode_sequence(leaf, item, BTRFS_I(inode)->sequence); | 2635 | btrfs_set_inode_sequence(leaf, item, inode->i_version); |
2598 | btrfs_set_inode_transid(leaf, item, trans->transid); | 2636 | btrfs_set_inode_transid(leaf, item, trans->transid); |
2599 | btrfs_set_inode_rdev(leaf, item, inode->i_rdev); | 2637 | btrfs_set_inode_rdev(leaf, item, inode->i_rdev); |
2600 | btrfs_set_inode_flags(leaf, item, BTRFS_I(inode)->flags); | 2638 | btrfs_set_inode_flags(leaf, item, BTRFS_I(inode)->flags); |
@@ -2752,6 +2790,8 @@ err: | |||
2752 | goto out; | 2790 | goto out; |
2753 | 2791 | ||
2754 | btrfs_i_size_write(dir, dir->i_size - name_len * 2); | 2792 | btrfs_i_size_write(dir, dir->i_size - name_len * 2); |
2793 | inode_inc_iversion(inode); | ||
2794 | inode_inc_iversion(dir); | ||
2755 | inode->i_ctime = dir->i_mtime = dir->i_ctime = CURRENT_TIME; | 2795 | inode->i_ctime = dir->i_mtime = dir->i_ctime = CURRENT_TIME; |
2756 | btrfs_update_inode(trans, root, dir); | 2796 | btrfs_update_inode(trans, root, dir); |
2757 | out: | 2797 | out: |
@@ -3089,6 +3129,7 @@ int btrfs_unlink_subvol(struct btrfs_trans_handle *trans, | |||
3089 | } | 3129 | } |
3090 | 3130 | ||
3091 | btrfs_i_size_write(dir, dir->i_size - name_len * 2); | 3131 | btrfs_i_size_write(dir, dir->i_size - name_len * 2); |
3132 | inode_inc_iversion(dir); | ||
3092 | dir->i_mtime = dir->i_ctime = CURRENT_TIME; | 3133 | dir->i_mtime = dir->i_ctime = CURRENT_TIME; |
3093 | ret = btrfs_update_inode(trans, root, dir); | 3134 | ret = btrfs_update_inode(trans, root, dir); |
3094 | if (ret) | 3135 | if (ret) |
@@ -3607,7 +3648,8 @@ static int btrfs_setsize(struct inode *inode, loff_t newsize) | |||
3607 | * any new writes get down to disk quickly. | 3648 | * any new writes get down to disk quickly. |
3608 | */ | 3649 | */ |
3609 | if (newsize == 0) | 3650 | if (newsize == 0) |
3610 | BTRFS_I(inode)->ordered_data_close = 1; | 3651 | set_bit(BTRFS_INODE_ORDERED_DATA_CLOSE, |
3652 | &BTRFS_I(inode)->runtime_flags); | ||
3611 | 3653 | ||
3612 | /* we don't support swapfiles, so vmtruncate shouldn't fail */ | 3654 | /* we don't support swapfiles, so vmtruncate shouldn't fail */ |
3613 | truncate_setsize(inode, newsize); | 3655 | truncate_setsize(inode, newsize); |
@@ -3638,6 +3680,7 @@ static int btrfs_setattr(struct dentry *dentry, struct iattr *attr) | |||
3638 | 3680 | ||
3639 | if (attr->ia_valid) { | 3681 | if (attr->ia_valid) { |
3640 | setattr_copy(inode, attr); | 3682 | setattr_copy(inode, attr); |
3683 | inode_inc_iversion(inode); | ||
3641 | err = btrfs_dirty_inode(inode); | 3684 | err = btrfs_dirty_inode(inode); |
3642 | 3685 | ||
3643 | if (!err && attr->ia_valid & ATTR_MODE) | 3686 | if (!err && attr->ia_valid & ATTR_MODE) |
@@ -3671,7 +3714,8 @@ void btrfs_evict_inode(struct inode *inode) | |||
3671 | btrfs_wait_ordered_range(inode, 0, (u64)-1); | 3714 | btrfs_wait_ordered_range(inode, 0, (u64)-1); |
3672 | 3715 | ||
3673 | if (root->fs_info->log_root_recovering) { | 3716 | if (root->fs_info->log_root_recovering) { |
3674 | BUG_ON(!list_empty(&BTRFS_I(inode)->i_orphan)); | 3717 | BUG_ON(!test_bit(BTRFS_INODE_HAS_ORPHAN_ITEM, |
3718 | &BTRFS_I(inode)->runtime_flags)); | ||
3675 | goto no_delete; | 3719 | goto no_delete; |
3676 | } | 3720 | } |
3677 | 3721 | ||
@@ -3756,7 +3800,7 @@ void btrfs_evict_inode(struct inode *inode) | |||
3756 | btrfs_end_transaction(trans, root); | 3800 | btrfs_end_transaction(trans, root); |
3757 | btrfs_btree_balance_dirty(root, nr); | 3801 | btrfs_btree_balance_dirty(root, nr); |
3758 | no_delete: | 3802 | no_delete: |
3759 | end_writeback(inode); | 3803 | clear_inode(inode); |
3760 | return; | 3804 | return; |
3761 | } | 3805 | } |
3762 | 3806 | ||
@@ -4066,7 +4110,7 @@ static struct inode *new_simple_dir(struct super_block *s, | |||
4066 | 4110 | ||
4067 | BTRFS_I(inode)->root = root; | 4111 | BTRFS_I(inode)->root = root; |
4068 | memcpy(&BTRFS_I(inode)->location, key, sizeof(*key)); | 4112 | memcpy(&BTRFS_I(inode)->location, key, sizeof(*key)); |
4069 | BTRFS_I(inode)->dummy_inode = 1; | 4113 | set_bit(BTRFS_INODE_DUMMY, &BTRFS_I(inode)->runtime_flags); |
4070 | 4114 | ||
4071 | inode->i_ino = BTRFS_EMPTY_SUBVOL_DIR_OBJECTID; | 4115 | inode->i_ino = BTRFS_EMPTY_SUBVOL_DIR_OBJECTID; |
4072 | inode->i_op = &btrfs_dir_ro_inode_operations; | 4116 | inode->i_op = &btrfs_dir_ro_inode_operations; |
@@ -4370,7 +4414,7 @@ int btrfs_write_inode(struct inode *inode, struct writeback_control *wbc) | |||
4370 | int ret = 0; | 4414 | int ret = 0; |
4371 | bool nolock = false; | 4415 | bool nolock = false; |
4372 | 4416 | ||
4373 | if (BTRFS_I(inode)->dummy_inode) | 4417 | if (test_bit(BTRFS_INODE_DUMMY, &BTRFS_I(inode)->runtime_flags)) |
4374 | return 0; | 4418 | return 0; |
4375 | 4419 | ||
4376 | if (btrfs_fs_closing(root->fs_info) && btrfs_is_free_space_inode(root, inode)) | 4420 | if (btrfs_fs_closing(root->fs_info) && btrfs_is_free_space_inode(root, inode)) |
@@ -4403,7 +4447,7 @@ int btrfs_dirty_inode(struct inode *inode) | |||
4403 | struct btrfs_trans_handle *trans; | 4447 | struct btrfs_trans_handle *trans; |
4404 | int ret; | 4448 | int ret; |
4405 | 4449 | ||
4406 | if (BTRFS_I(inode)->dummy_inode) | 4450 | if (test_bit(BTRFS_INODE_DUMMY, &BTRFS_I(inode)->runtime_flags)) |
4407 | return 0; | 4451 | return 0; |
4408 | 4452 | ||
4409 | trans = btrfs_join_transaction(root); | 4453 | trans = btrfs_join_transaction(root); |
@@ -4730,6 +4774,7 @@ int btrfs_add_link(struct btrfs_trans_handle *trans, | |||
4730 | 4774 | ||
4731 | btrfs_i_size_write(parent_inode, parent_inode->i_size + | 4775 | btrfs_i_size_write(parent_inode, parent_inode->i_size + |
4732 | name_len * 2); | 4776 | name_len * 2); |
4777 | inode_inc_iversion(parent_inode); | ||
4733 | parent_inode->i_mtime = parent_inode->i_ctime = CURRENT_TIME; | 4778 | parent_inode->i_mtime = parent_inode->i_ctime = CURRENT_TIME; |
4734 | ret = btrfs_update_inode(trans, root, parent_inode); | 4779 | ret = btrfs_update_inode(trans, root, parent_inode); |
4735 | if (ret) | 4780 | if (ret) |
@@ -4937,6 +4982,7 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir, | |||
4937 | } | 4982 | } |
4938 | 4983 | ||
4939 | btrfs_inc_nlink(inode); | 4984 | btrfs_inc_nlink(inode); |
4985 | inode_inc_iversion(inode); | ||
4940 | inode->i_ctime = CURRENT_TIME; | 4986 | inode->i_ctime = CURRENT_TIME; |
4941 | ihold(inode); | 4987 | ihold(inode); |
4942 | 4988 | ||
@@ -5903,9 +5949,7 @@ static void btrfs_endio_direct_write(struct bio *bio, int err) | |||
5903 | struct btrfs_dio_private *dip = bio->bi_private; | 5949 | struct btrfs_dio_private *dip = bio->bi_private; |
5904 | struct inode *inode = dip->inode; | 5950 | struct inode *inode = dip->inode; |
5905 | struct btrfs_root *root = BTRFS_I(inode)->root; | 5951 | struct btrfs_root *root = BTRFS_I(inode)->root; |
5906 | struct btrfs_trans_handle *trans; | ||
5907 | struct btrfs_ordered_extent *ordered = NULL; | 5952 | struct btrfs_ordered_extent *ordered = NULL; |
5908 | struct extent_state *cached_state = NULL; | ||
5909 | u64 ordered_offset = dip->logical_offset; | 5953 | u64 ordered_offset = dip->logical_offset; |
5910 | u64 ordered_bytes = dip->bytes; | 5954 | u64 ordered_bytes = dip->bytes; |
5911 | int ret; | 5955 | int ret; |
@@ -5915,73 +5959,14 @@ static void btrfs_endio_direct_write(struct bio *bio, int err) | |||
5915 | again: | 5959 | again: |
5916 | ret = btrfs_dec_test_first_ordered_pending(inode, &ordered, | 5960 | ret = btrfs_dec_test_first_ordered_pending(inode, &ordered, |
5917 | &ordered_offset, | 5961 | &ordered_offset, |
5918 | ordered_bytes); | 5962 | ordered_bytes, !err); |
5919 | if (!ret) | 5963 | if (!ret) |
5920 | goto out_test; | 5964 | goto out_test; |
5921 | 5965 | ||
5922 | BUG_ON(!ordered); | 5966 | ordered->work.func = finish_ordered_fn; |
5923 | 5967 | ordered->work.flags = 0; | |
5924 | trans = btrfs_join_transaction(root); | 5968 | btrfs_queue_worker(&root->fs_info->endio_write_workers, |
5925 | if (IS_ERR(trans)) { | 5969 | &ordered->work); |
5926 | err = -ENOMEM; | ||
5927 | goto out; | ||
5928 | } | ||
5929 | trans->block_rsv = &root->fs_info->delalloc_block_rsv; | ||
5930 | |||
5931 | if (test_bit(BTRFS_ORDERED_NOCOW, &ordered->flags)) { | ||
5932 | ret = btrfs_ordered_update_i_size(inode, 0, ordered); | ||
5933 | if (!ret) | ||
5934 | err = btrfs_update_inode_fallback(trans, root, inode); | ||
5935 | goto out; | ||
5936 | } | ||
5937 | |||
5938 | lock_extent_bits(&BTRFS_I(inode)->io_tree, ordered->file_offset, | ||
5939 | ordered->file_offset + ordered->len - 1, 0, | ||
5940 | &cached_state); | ||
5941 | |||
5942 | if (test_bit(BTRFS_ORDERED_PREALLOC, &ordered->flags)) { | ||
5943 | ret = btrfs_mark_extent_written(trans, inode, | ||
5944 | ordered->file_offset, | ||
5945 | ordered->file_offset + | ||
5946 | ordered->len); | ||
5947 | if (ret) { | ||
5948 | err = ret; | ||
5949 | goto out_unlock; | ||
5950 | } | ||
5951 | } else { | ||
5952 | ret = insert_reserved_file_extent(trans, inode, | ||
5953 | ordered->file_offset, | ||
5954 | ordered->start, | ||
5955 | ordered->disk_len, | ||
5956 | ordered->len, | ||
5957 | ordered->len, | ||
5958 | 0, 0, 0, | ||
5959 | BTRFS_FILE_EXTENT_REG); | ||
5960 | unpin_extent_cache(&BTRFS_I(inode)->extent_tree, | ||
5961 | ordered->file_offset, ordered->len); | ||
5962 | if (ret) { | ||
5963 | err = ret; | ||
5964 | WARN_ON(1); | ||
5965 | goto out_unlock; | ||
5966 | } | ||
5967 | } | ||
5968 | |||
5969 | add_pending_csums(trans, inode, ordered->file_offset, &ordered->list); | ||
5970 | ret = btrfs_ordered_update_i_size(inode, 0, ordered); | ||
5971 | if (!ret || !test_bit(BTRFS_ORDERED_PREALLOC, &ordered->flags)) | ||
5972 | btrfs_update_inode_fallback(trans, root, inode); | ||
5973 | ret = 0; | ||
5974 | out_unlock: | ||
5975 | unlock_extent_cached(&BTRFS_I(inode)->io_tree, ordered->file_offset, | ||
5976 | ordered->file_offset + ordered->len - 1, | ||
5977 | &cached_state, GFP_NOFS); | ||
5978 | out: | ||
5979 | btrfs_delalloc_release_metadata(inode, ordered->len); | ||
5980 | btrfs_end_transaction(trans, root); | ||
5981 | ordered_offset = ordered->file_offset + ordered->len; | ||
5982 | btrfs_put_ordered_extent(ordered); | ||
5983 | btrfs_put_ordered_extent(ordered); | ||
5984 | |||
5985 | out_test: | 5970 | out_test: |
5986 | /* | 5971 | /* |
5987 | * our bio might span multiple ordered extents. If we haven't | 5972 | * our bio might span multiple ordered extents. If we haven't |
@@ -5990,12 +5975,12 @@ out_test: | |||
5990 | if (ordered_offset < dip->logical_offset + dip->bytes) { | 5975 | if (ordered_offset < dip->logical_offset + dip->bytes) { |
5991 | ordered_bytes = dip->logical_offset + dip->bytes - | 5976 | ordered_bytes = dip->logical_offset + dip->bytes - |
5992 | ordered_offset; | 5977 | ordered_offset; |
5978 | ordered = NULL; | ||
5993 | goto again; | 5979 | goto again; |
5994 | } | 5980 | } |
5995 | out_done: | 5981 | out_done: |
5996 | bio->bi_private = dip->private; | 5982 | bio->bi_private = dip->private; |
5997 | 5983 | ||
5998 | kfree(dip->csums); | ||
5999 | kfree(dip); | 5984 | kfree(dip); |
6000 | 5985 | ||
6001 | /* If we had an error make sure to clear the uptodate flag */ | 5986 | /* If we had an error make sure to clear the uptodate flag */ |
@@ -6063,9 +6048,12 @@ static inline int __btrfs_submit_dio_bio(struct bio *bio, struct inode *inode, | |||
6063 | int ret; | 6048 | int ret; |
6064 | 6049 | ||
6065 | bio_get(bio); | 6050 | bio_get(bio); |
6066 | ret = btrfs_bio_wq_end_io(root->fs_info, bio, 0); | 6051 | |
6067 | if (ret) | 6052 | if (!write) { |
6068 | goto err; | 6053 | ret = btrfs_bio_wq_end_io(root->fs_info, bio, 0); |
6054 | if (ret) | ||
6055 | goto err; | ||
6056 | } | ||
6069 | 6057 | ||
6070 | if (skip_sum) | 6058 | if (skip_sum) |
6071 | goto map; | 6059 | goto map; |
@@ -6485,13 +6473,13 @@ static int btrfs_releasepage(struct page *page, gfp_t gfp_flags) | |||
6485 | 6473 | ||
6486 | static void btrfs_invalidatepage(struct page *page, unsigned long offset) | 6474 | static void btrfs_invalidatepage(struct page *page, unsigned long offset) |
6487 | { | 6475 | { |
6476 | struct inode *inode = page->mapping->host; | ||
6488 | struct extent_io_tree *tree; | 6477 | struct extent_io_tree *tree; |
6489 | struct btrfs_ordered_extent *ordered; | 6478 | struct btrfs_ordered_extent *ordered; |
6490 | struct extent_state *cached_state = NULL; | 6479 | struct extent_state *cached_state = NULL; |
6491 | u64 page_start = page_offset(page); | 6480 | u64 page_start = page_offset(page); |
6492 | u64 page_end = page_start + PAGE_CACHE_SIZE - 1; | 6481 | u64 page_end = page_start + PAGE_CACHE_SIZE - 1; |
6493 | 6482 | ||
6494 | |||
6495 | /* | 6483 | /* |
6496 | * we have the page locked, so new writeback can't start, | 6484 | * we have the page locked, so new writeback can't start, |
6497 | * and the dirty bit won't be cleared while we are here. | 6485 | * and the dirty bit won't be cleared while we are here. |
@@ -6501,13 +6489,13 @@ static void btrfs_invalidatepage(struct page *page, unsigned long offset) | |||
6501 | */ | 6489 | */ |
6502 | wait_on_page_writeback(page); | 6490 | wait_on_page_writeback(page); |
6503 | 6491 | ||
6504 | tree = &BTRFS_I(page->mapping->host)->io_tree; | 6492 | tree = &BTRFS_I(inode)->io_tree; |
6505 | if (offset) { | 6493 | if (offset) { |
6506 | btrfs_releasepage(page, GFP_NOFS); | 6494 | btrfs_releasepage(page, GFP_NOFS); |
6507 | return; | 6495 | return; |
6508 | } | 6496 | } |
6509 | lock_extent_bits(tree, page_start, page_end, 0, &cached_state); | 6497 | lock_extent_bits(tree, page_start, page_end, 0, &cached_state); |
6510 | ordered = btrfs_lookup_ordered_extent(page->mapping->host, | 6498 | ordered = btrfs_lookup_ordered_extent(inode, |
6511 | page_offset(page)); | 6499 | page_offset(page)); |
6512 | if (ordered) { | 6500 | if (ordered) { |
6513 | /* | 6501 | /* |
@@ -6522,9 +6510,10 @@ static void btrfs_invalidatepage(struct page *page, unsigned long offset) | |||
6522 | * whoever cleared the private bit is responsible | 6510 | * whoever cleared the private bit is responsible |
6523 | * for the finish_ordered_io | 6511 | * for the finish_ordered_io |
6524 | */ | 6512 | */ |
6525 | if (TestClearPagePrivate2(page)) { | 6513 | if (TestClearPagePrivate2(page) && |
6526 | btrfs_finish_ordered_io(page->mapping->host, | 6514 | btrfs_dec_test_ordered_pending(inode, &ordered, page_start, |
6527 | page_start, page_end); | 6515 | PAGE_CACHE_SIZE, 1)) { |
6516 | btrfs_finish_ordered_io(ordered); | ||
6528 | } | 6517 | } |
6529 | btrfs_put_ordered_extent(ordered); | 6518 | btrfs_put_ordered_extent(ordered); |
6530 | cached_state = NULL; | 6519 | cached_state = NULL; |
@@ -6771,7 +6760,8 @@ static int btrfs_truncate(struct inode *inode) | |||
6771 | * using truncate to replace the contents of the file will | 6760 | * using truncate to replace the contents of the file will |
6772 | * end up with a zero length file after a crash. | 6761 | * end up with a zero length file after a crash. |
6773 | */ | 6762 | */ |
6774 | if (inode->i_size == 0 && BTRFS_I(inode)->ordered_data_close) | 6763 | if (inode->i_size == 0 && test_bit(BTRFS_INODE_ORDERED_DATA_CLOSE, |
6764 | &BTRFS_I(inode)->runtime_flags)) | ||
6775 | btrfs_add_ordered_operation(trans, root, inode); | 6765 | btrfs_add_ordered_operation(trans, root, inode); |
6776 | 6766 | ||
6777 | while (1) { | 6767 | while (1) { |
@@ -6894,7 +6884,6 @@ struct inode *btrfs_alloc_inode(struct super_block *sb) | |||
6894 | ei->root = NULL; | 6884 | ei->root = NULL; |
6895 | ei->space_info = NULL; | 6885 | ei->space_info = NULL; |
6896 | ei->generation = 0; | 6886 | ei->generation = 0; |
6897 | ei->sequence = 0; | ||
6898 | ei->last_trans = 0; | 6887 | ei->last_trans = 0; |
6899 | ei->last_sub_trans = 0; | 6888 | ei->last_sub_trans = 0; |
6900 | ei->logged_trans = 0; | 6889 | ei->logged_trans = 0; |
@@ -6909,11 +6898,7 @@ struct inode *btrfs_alloc_inode(struct super_block *sb) | |||
6909 | ei->outstanding_extents = 0; | 6898 | ei->outstanding_extents = 0; |
6910 | ei->reserved_extents = 0; | 6899 | ei->reserved_extents = 0; |
6911 | 6900 | ||
6912 | ei->ordered_data_close = 0; | 6901 | ei->runtime_flags = 0; |
6913 | ei->orphan_meta_reserved = 0; | ||
6914 | ei->dummy_inode = 0; | ||
6915 | ei->in_defrag = 0; | ||
6916 | ei->delalloc_meta_reserved = 0; | ||
6917 | ei->force_compress = BTRFS_COMPRESS_NONE; | 6902 | ei->force_compress = BTRFS_COMPRESS_NONE; |
6918 | 6903 | ||
6919 | ei->delayed_node = NULL; | 6904 | ei->delayed_node = NULL; |
@@ -6927,7 +6912,6 @@ struct inode *btrfs_alloc_inode(struct super_block *sb) | |||
6927 | mutex_init(&ei->log_mutex); | 6912 | mutex_init(&ei->log_mutex); |
6928 | mutex_init(&ei->delalloc_mutex); | 6913 | mutex_init(&ei->delalloc_mutex); |
6929 | btrfs_ordered_inode_tree_init(&ei->ordered_tree); | 6914 | btrfs_ordered_inode_tree_init(&ei->ordered_tree); |
6930 | INIT_LIST_HEAD(&ei->i_orphan); | ||
6931 | INIT_LIST_HEAD(&ei->delalloc_inodes); | 6915 | INIT_LIST_HEAD(&ei->delalloc_inodes); |
6932 | INIT_LIST_HEAD(&ei->ordered_operations); | 6916 | INIT_LIST_HEAD(&ei->ordered_operations); |
6933 | RB_CLEAR_NODE(&ei->rb_node); | 6917 | RB_CLEAR_NODE(&ei->rb_node); |
@@ -6972,13 +6956,12 @@ void btrfs_destroy_inode(struct inode *inode) | |||
6972 | spin_unlock(&root->fs_info->ordered_extent_lock); | 6956 | spin_unlock(&root->fs_info->ordered_extent_lock); |
6973 | } | 6957 | } |
6974 | 6958 | ||
6975 | spin_lock(&root->orphan_lock); | 6959 | if (test_bit(BTRFS_INODE_HAS_ORPHAN_ITEM, |
6976 | if (!list_empty(&BTRFS_I(inode)->i_orphan)) { | 6960 | &BTRFS_I(inode)->runtime_flags)) { |
6977 | printk(KERN_INFO "BTRFS: inode %llu still on the orphan list\n", | 6961 | printk(KERN_INFO "BTRFS: inode %llu still on the orphan list\n", |
6978 | (unsigned long long)btrfs_ino(inode)); | 6962 | (unsigned long long)btrfs_ino(inode)); |
6979 | list_del_init(&BTRFS_I(inode)->i_orphan); | 6963 | atomic_dec(&root->orphan_inodes); |
6980 | } | 6964 | } |
6981 | spin_unlock(&root->orphan_lock); | ||
6982 | 6965 | ||
6983 | while (1) { | 6966 | while (1) { |
6984 | ordered = btrfs_lookup_first_ordered_extent(inode, (u64)-1); | 6967 | ordered = btrfs_lookup_first_ordered_extent(inode, (u64)-1); |
@@ -7193,6 +7176,9 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
7193 | if (new_inode && new_inode->i_size && S_ISREG(old_inode->i_mode)) | 7176 | if (new_inode && new_inode->i_size && S_ISREG(old_inode->i_mode)) |
7194 | btrfs_add_ordered_operation(trans, root, old_inode); | 7177 | btrfs_add_ordered_operation(trans, root, old_inode); |
7195 | 7178 | ||
7179 | inode_inc_iversion(old_dir); | ||
7180 | inode_inc_iversion(new_dir); | ||
7181 | inode_inc_iversion(old_inode); | ||
7196 | old_dir->i_ctime = old_dir->i_mtime = ctime; | 7182 | old_dir->i_ctime = old_dir->i_mtime = ctime; |
7197 | new_dir->i_ctime = new_dir->i_mtime = ctime; | 7183 | new_dir->i_ctime = new_dir->i_mtime = ctime; |
7198 | old_inode->i_ctime = ctime; | 7184 | old_inode->i_ctime = ctime; |
@@ -7219,6 +7205,7 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
7219 | } | 7205 | } |
7220 | 7206 | ||
7221 | if (new_inode) { | 7207 | if (new_inode) { |
7208 | inode_inc_iversion(new_inode); | ||
7222 | new_inode->i_ctime = CURRENT_TIME; | 7209 | new_inode->i_ctime = CURRENT_TIME; |
7223 | if (unlikely(btrfs_ino(new_inode) == | 7210 | if (unlikely(btrfs_ino(new_inode) == |
7224 | BTRFS_EMPTY_SUBVOL_DIR_OBJECTID)) { | 7211 | BTRFS_EMPTY_SUBVOL_DIR_OBJECTID)) { |
@@ -7490,6 +7477,7 @@ static int __btrfs_prealloc_file_range(struct inode *inode, int mode, | |||
7490 | cur_offset += ins.offset; | 7477 | cur_offset += ins.offset; |
7491 | *alloc_hint = ins.objectid + ins.offset; | 7478 | *alloc_hint = ins.objectid + ins.offset; |
7492 | 7479 | ||
7480 | inode_inc_iversion(inode); | ||
7493 | inode->i_ctime = CURRENT_TIME; | 7481 | inode->i_ctime = CURRENT_TIME; |
7494 | BTRFS_I(inode)->flags |= BTRFS_INODE_PREALLOC; | 7482 | BTRFS_I(inode)->flags |= BTRFS_INODE_PREALLOC; |
7495 | if (!(mode & FALLOC_FL_KEEP_SIZE) && | 7483 | if (!(mode & FALLOC_FL_KEEP_SIZE) && |
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 14f8e1faa46e..24b776c08d99 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c | |||
@@ -261,6 +261,7 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg) | |||
261 | } | 261 | } |
262 | 262 | ||
263 | btrfs_update_iflags(inode); | 263 | btrfs_update_iflags(inode); |
264 | inode_inc_iversion(inode); | ||
264 | inode->i_ctime = CURRENT_TIME; | 265 | inode->i_ctime = CURRENT_TIME; |
265 | ret = btrfs_update_inode(trans, root, inode); | 266 | ret = btrfs_update_inode(trans, root, inode); |
266 | 267 | ||
@@ -367,7 +368,7 @@ static noinline int create_subvol(struct btrfs_root *root, | |||
367 | return PTR_ERR(trans); | 368 | return PTR_ERR(trans); |
368 | 369 | ||
369 | leaf = btrfs_alloc_free_block(trans, root, root->leafsize, | 370 | leaf = btrfs_alloc_free_block(trans, root, root->leafsize, |
370 | 0, objectid, NULL, 0, 0, 0, 0); | 371 | 0, objectid, NULL, 0, 0, 0); |
371 | if (IS_ERR(leaf)) { | 372 | if (IS_ERR(leaf)) { |
372 | ret = PTR_ERR(leaf); | 373 | ret = PTR_ERR(leaf); |
373 | goto fail; | 374 | goto fail; |
@@ -2262,10 +2263,12 @@ static long btrfs_ioctl_dev_info(struct btrfs_root *root, void __user *arg) | |||
2262 | di_args->bytes_used = dev->bytes_used; | 2263 | di_args->bytes_used = dev->bytes_used; |
2263 | di_args->total_bytes = dev->total_bytes; | 2264 | di_args->total_bytes = dev->total_bytes; |
2264 | memcpy(di_args->uuid, dev->uuid, sizeof(di_args->uuid)); | 2265 | memcpy(di_args->uuid, dev->uuid, sizeof(di_args->uuid)); |
2265 | if (dev->name) | 2266 | if (dev->name) { |
2266 | strncpy(di_args->path, dev->name, sizeof(di_args->path)); | 2267 | strncpy(di_args->path, dev->name, sizeof(di_args->path)); |
2267 | else | 2268 | di_args->path[sizeof(di_args->path) - 1] = 0; |
2269 | } else { | ||
2268 | di_args->path[0] = '\0'; | 2270 | di_args->path[0] = '\0'; |
2271 | } | ||
2269 | 2272 | ||
2270 | out: | 2273 | out: |
2271 | if (ret == 0 && copy_to_user(arg, di_args, sizeof(*di_args))) | 2274 | if (ret == 0 && copy_to_user(arg, di_args, sizeof(*di_args))) |
@@ -2622,6 +2625,7 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, | |||
2622 | btrfs_mark_buffer_dirty(leaf); | 2625 | btrfs_mark_buffer_dirty(leaf); |
2623 | btrfs_release_path(path); | 2626 | btrfs_release_path(path); |
2624 | 2627 | ||
2628 | inode_inc_iversion(inode); | ||
2625 | inode->i_mtime = inode->i_ctime = CURRENT_TIME; | 2629 | inode->i_mtime = inode->i_ctime = CURRENT_TIME; |
2626 | 2630 | ||
2627 | /* | 2631 | /* |
@@ -2914,7 +2918,7 @@ long btrfs_ioctl_space_info(struct btrfs_root *root, void __user *arg) | |||
2914 | up_read(&info->groups_sem); | 2918 | up_read(&info->groups_sem); |
2915 | } | 2919 | } |
2916 | 2920 | ||
2917 | user_dest = (struct btrfs_ioctl_space_info *) | 2921 | user_dest = (struct btrfs_ioctl_space_info __user *) |
2918 | (arg + sizeof(struct btrfs_ioctl_space_args)); | 2922 | (arg + sizeof(struct btrfs_ioctl_space_args)); |
2919 | 2923 | ||
2920 | if (copy_to_user(user_dest, dest_orig, alloc_size)) | 2924 | if (copy_to_user(user_dest, dest_orig, alloc_size)) |
@@ -3042,6 +3046,28 @@ static long btrfs_ioctl_scrub_progress(struct btrfs_root *root, | |||
3042 | return ret; | 3046 | return ret; |
3043 | } | 3047 | } |
3044 | 3048 | ||
3049 | static long btrfs_ioctl_get_dev_stats(struct btrfs_root *root, | ||
3050 | void __user *arg, int reset_after_read) | ||
3051 | { | ||
3052 | struct btrfs_ioctl_get_dev_stats *sa; | ||
3053 | int ret; | ||
3054 | |||
3055 | if (reset_after_read && !capable(CAP_SYS_ADMIN)) | ||
3056 | return -EPERM; | ||
3057 | |||
3058 | sa = memdup_user(arg, sizeof(*sa)); | ||
3059 | if (IS_ERR(sa)) | ||
3060 | return PTR_ERR(sa); | ||
3061 | |||
3062 | ret = btrfs_get_dev_stats(root, sa, reset_after_read); | ||
3063 | |||
3064 | if (copy_to_user(arg, sa, sizeof(*sa))) | ||
3065 | ret = -EFAULT; | ||
3066 | |||
3067 | kfree(sa); | ||
3068 | return ret; | ||
3069 | } | ||
3070 | |||
3045 | static long btrfs_ioctl_ino_to_path(struct btrfs_root *root, void __user *arg) | 3071 | static long btrfs_ioctl_ino_to_path(struct btrfs_root *root, void __user *arg) |
3046 | { | 3072 | { |
3047 | int ret = 0; | 3073 | int ret = 0; |
@@ -3212,8 +3238,9 @@ void update_ioctl_balance_args(struct btrfs_fs_info *fs_info, int lock, | |||
3212 | } | 3238 | } |
3213 | } | 3239 | } |
3214 | 3240 | ||
3215 | static long btrfs_ioctl_balance(struct btrfs_root *root, void __user *arg) | 3241 | static long btrfs_ioctl_balance(struct file *file, void __user *arg) |
3216 | { | 3242 | { |
3243 | struct btrfs_root *root = BTRFS_I(fdentry(file)->d_inode)->root; | ||
3217 | struct btrfs_fs_info *fs_info = root->fs_info; | 3244 | struct btrfs_fs_info *fs_info = root->fs_info; |
3218 | struct btrfs_ioctl_balance_args *bargs; | 3245 | struct btrfs_ioctl_balance_args *bargs; |
3219 | struct btrfs_balance_control *bctl; | 3246 | struct btrfs_balance_control *bctl; |
@@ -3225,6 +3252,10 @@ static long btrfs_ioctl_balance(struct btrfs_root *root, void __user *arg) | |||
3225 | if (fs_info->sb->s_flags & MS_RDONLY) | 3252 | if (fs_info->sb->s_flags & MS_RDONLY) |
3226 | return -EROFS; | 3253 | return -EROFS; |
3227 | 3254 | ||
3255 | ret = mnt_want_write(file->f_path.mnt); | ||
3256 | if (ret) | ||
3257 | return ret; | ||
3258 | |||
3228 | mutex_lock(&fs_info->volume_mutex); | 3259 | mutex_lock(&fs_info->volume_mutex); |
3229 | mutex_lock(&fs_info->balance_mutex); | 3260 | mutex_lock(&fs_info->balance_mutex); |
3230 | 3261 | ||
@@ -3291,6 +3322,7 @@ out_bargs: | |||
3291 | out: | 3322 | out: |
3292 | mutex_unlock(&fs_info->balance_mutex); | 3323 | mutex_unlock(&fs_info->balance_mutex); |
3293 | mutex_unlock(&fs_info->volume_mutex); | 3324 | mutex_unlock(&fs_info->volume_mutex); |
3325 | mnt_drop_write(file->f_path.mnt); | ||
3294 | return ret; | 3326 | return ret; |
3295 | } | 3327 | } |
3296 | 3328 | ||
@@ -3386,7 +3418,7 @@ long btrfs_ioctl(struct file *file, unsigned int | |||
3386 | case BTRFS_IOC_DEV_INFO: | 3418 | case BTRFS_IOC_DEV_INFO: |
3387 | return btrfs_ioctl_dev_info(root, argp); | 3419 | return btrfs_ioctl_dev_info(root, argp); |
3388 | case BTRFS_IOC_BALANCE: | 3420 | case BTRFS_IOC_BALANCE: |
3389 | return btrfs_ioctl_balance(root, NULL); | 3421 | return btrfs_ioctl_balance(file, NULL); |
3390 | case BTRFS_IOC_CLONE: | 3422 | case BTRFS_IOC_CLONE: |
3391 | return btrfs_ioctl_clone(file, arg, 0, 0, 0); | 3423 | return btrfs_ioctl_clone(file, arg, 0, 0, 0); |
3392 | case BTRFS_IOC_CLONE_RANGE: | 3424 | case BTRFS_IOC_CLONE_RANGE: |
@@ -3419,11 +3451,15 @@ long btrfs_ioctl(struct file *file, unsigned int | |||
3419 | case BTRFS_IOC_SCRUB_PROGRESS: | 3451 | case BTRFS_IOC_SCRUB_PROGRESS: |
3420 | return btrfs_ioctl_scrub_progress(root, argp); | 3452 | return btrfs_ioctl_scrub_progress(root, argp); |
3421 | case BTRFS_IOC_BALANCE_V2: | 3453 | case BTRFS_IOC_BALANCE_V2: |
3422 | return btrfs_ioctl_balance(root, argp); | 3454 | return btrfs_ioctl_balance(file, argp); |
3423 | case BTRFS_IOC_BALANCE_CTL: | 3455 | case BTRFS_IOC_BALANCE_CTL: |
3424 | return btrfs_ioctl_balance_ctl(root, arg); | 3456 | return btrfs_ioctl_balance_ctl(root, arg); |
3425 | case BTRFS_IOC_BALANCE_PROGRESS: | 3457 | case BTRFS_IOC_BALANCE_PROGRESS: |
3426 | return btrfs_ioctl_balance_progress(root, argp); | 3458 | return btrfs_ioctl_balance_progress(root, argp); |
3459 | case BTRFS_IOC_GET_DEV_STATS: | ||
3460 | return btrfs_ioctl_get_dev_stats(root, argp, 0); | ||
3461 | case BTRFS_IOC_GET_AND_RESET_DEV_STATS: | ||
3462 | return btrfs_ioctl_get_dev_stats(root, argp, 1); | ||
3427 | } | 3463 | } |
3428 | 3464 | ||
3429 | return -ENOTTY; | 3465 | return -ENOTTY; |
diff --git a/fs/btrfs/ioctl.h b/fs/btrfs/ioctl.h index 4f69028a68c4..497c530724cf 100644 --- a/fs/btrfs/ioctl.h +++ b/fs/btrfs/ioctl.h | |||
@@ -252,7 +252,7 @@ struct btrfs_data_container { | |||
252 | 252 | ||
253 | struct btrfs_ioctl_ino_path_args { | 253 | struct btrfs_ioctl_ino_path_args { |
254 | __u64 inum; /* in */ | 254 | __u64 inum; /* in */ |
255 | __u32 size; /* in */ | 255 | __u64 size; /* in */ |
256 | __u64 reserved[4]; | 256 | __u64 reserved[4]; |
257 | /* struct btrfs_data_container *fspath; out */ | 257 | /* struct btrfs_data_container *fspath; out */ |
258 | __u64 fspath; /* out */ | 258 | __u64 fspath; /* out */ |
@@ -260,12 +260,41 @@ struct btrfs_ioctl_ino_path_args { | |||
260 | 260 | ||
261 | struct btrfs_ioctl_logical_ino_args { | 261 | struct btrfs_ioctl_logical_ino_args { |
262 | __u64 logical; /* in */ | 262 | __u64 logical; /* in */ |
263 | __u32 size; /* in */ | 263 | __u64 size; /* in */ |
264 | __u64 reserved[4]; | 264 | __u64 reserved[4]; |
265 | /* struct btrfs_data_container *inodes; out */ | 265 | /* struct btrfs_data_container *inodes; out */ |
266 | __u64 inodes; | 266 | __u64 inodes; |
267 | }; | 267 | }; |
268 | 268 | ||
269 | enum btrfs_dev_stat_values { | ||
270 | /* disk I/O failure stats */ | ||
271 | BTRFS_DEV_STAT_WRITE_ERRS, /* EIO or EREMOTEIO from lower layers */ | ||
272 | BTRFS_DEV_STAT_READ_ERRS, /* EIO or EREMOTEIO from lower layers */ | ||
273 | BTRFS_DEV_STAT_FLUSH_ERRS, /* EIO or EREMOTEIO from lower layers */ | ||
274 | |||
275 | /* stats for indirect indications for I/O failures */ | ||
276 | BTRFS_DEV_STAT_CORRUPTION_ERRS, /* checksum error, bytenr error or | ||
277 | * contents is illegal: this is an | ||
278 | * indication that the block was damaged | ||
279 | * during read or write, or written to | ||
280 | * wrong location or read from wrong | ||
281 | * location */ | ||
282 | BTRFS_DEV_STAT_GENERATION_ERRS, /* an indication that blocks have not | ||
283 | * been written */ | ||
284 | |||
285 | BTRFS_DEV_STAT_VALUES_MAX | ||
286 | }; | ||
287 | |||
288 | struct btrfs_ioctl_get_dev_stats { | ||
289 | __u64 devid; /* in */ | ||
290 | __u64 nr_items; /* in/out */ | ||
291 | |||
292 | /* out values: */ | ||
293 | __u64 values[BTRFS_DEV_STAT_VALUES_MAX]; | ||
294 | |||
295 | __u64 unused[128 - 2 - BTRFS_DEV_STAT_VALUES_MAX]; /* pad to 1k */ | ||
296 | }; | ||
297 | |||
269 | #define BTRFS_IOC_SNAP_CREATE _IOW(BTRFS_IOCTL_MAGIC, 1, \ | 298 | #define BTRFS_IOC_SNAP_CREATE _IOW(BTRFS_IOCTL_MAGIC, 1, \ |
270 | struct btrfs_ioctl_vol_args) | 299 | struct btrfs_ioctl_vol_args) |
271 | #define BTRFS_IOC_DEFRAG _IOW(BTRFS_IOCTL_MAGIC, 2, \ | 300 | #define BTRFS_IOC_DEFRAG _IOW(BTRFS_IOCTL_MAGIC, 2, \ |
@@ -330,5 +359,9 @@ struct btrfs_ioctl_logical_ino_args { | |||
330 | struct btrfs_ioctl_ino_path_args) | 359 | struct btrfs_ioctl_ino_path_args) |
331 | #define BTRFS_IOC_LOGICAL_INO _IOWR(BTRFS_IOCTL_MAGIC, 36, \ | 360 | #define BTRFS_IOC_LOGICAL_INO _IOWR(BTRFS_IOCTL_MAGIC, 36, \ |
332 | struct btrfs_ioctl_ino_path_args) | 361 | struct btrfs_ioctl_ino_path_args) |
362 | #define BTRFS_IOC_GET_DEV_STATS _IOWR(BTRFS_IOCTL_MAGIC, 52, \ | ||
363 | struct btrfs_ioctl_get_dev_stats) | ||
364 | #define BTRFS_IOC_GET_AND_RESET_DEV_STATS _IOWR(BTRFS_IOCTL_MAGIC, 53, \ | ||
365 | struct btrfs_ioctl_get_dev_stats) | ||
333 | 366 | ||
334 | #endif | 367 | #endif |
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c index bbf6d0d9aebe..9e138cdc36c5 100644 --- a/fs/btrfs/ordered-data.c +++ b/fs/btrfs/ordered-data.c | |||
@@ -196,7 +196,7 @@ static int __btrfs_add_ordered_extent(struct inode *inode, u64 file_offset, | |||
196 | entry->len = len; | 196 | entry->len = len; |
197 | entry->disk_len = disk_len; | 197 | entry->disk_len = disk_len; |
198 | entry->bytes_left = len; | 198 | entry->bytes_left = len; |
199 | entry->inode = inode; | 199 | entry->inode = igrab(inode); |
200 | entry->compress_type = compress_type; | 200 | entry->compress_type = compress_type; |
201 | if (type != BTRFS_ORDERED_IO_DONE && type != BTRFS_ORDERED_COMPLETE) | 201 | if (type != BTRFS_ORDERED_IO_DONE && type != BTRFS_ORDERED_COMPLETE) |
202 | set_bit(type, &entry->flags); | 202 | set_bit(type, &entry->flags); |
@@ -212,12 +212,12 @@ static int __btrfs_add_ordered_extent(struct inode *inode, u64 file_offset, | |||
212 | 212 | ||
213 | trace_btrfs_ordered_extent_add(inode, entry); | 213 | trace_btrfs_ordered_extent_add(inode, entry); |
214 | 214 | ||
215 | spin_lock(&tree->lock); | 215 | spin_lock_irq(&tree->lock); |
216 | node = tree_insert(&tree->tree, file_offset, | 216 | node = tree_insert(&tree->tree, file_offset, |
217 | &entry->rb_node); | 217 | &entry->rb_node); |
218 | if (node) | 218 | if (node) |
219 | ordered_data_tree_panic(inode, -EEXIST, file_offset); | 219 | ordered_data_tree_panic(inode, -EEXIST, file_offset); |
220 | spin_unlock(&tree->lock); | 220 | spin_unlock_irq(&tree->lock); |
221 | 221 | ||
222 | spin_lock(&BTRFS_I(inode)->root->fs_info->ordered_extent_lock); | 222 | spin_lock(&BTRFS_I(inode)->root->fs_info->ordered_extent_lock); |
223 | list_add_tail(&entry->root_extent_list, | 223 | list_add_tail(&entry->root_extent_list, |
@@ -264,9 +264,9 @@ void btrfs_add_ordered_sum(struct inode *inode, | |||
264 | struct btrfs_ordered_inode_tree *tree; | 264 | struct btrfs_ordered_inode_tree *tree; |
265 | 265 | ||
266 | tree = &BTRFS_I(inode)->ordered_tree; | 266 | tree = &BTRFS_I(inode)->ordered_tree; |
267 | spin_lock(&tree->lock); | 267 | spin_lock_irq(&tree->lock); |
268 | list_add_tail(&sum->list, &entry->list); | 268 | list_add_tail(&sum->list, &entry->list); |
269 | spin_unlock(&tree->lock); | 269 | spin_unlock_irq(&tree->lock); |
270 | } | 270 | } |
271 | 271 | ||
272 | /* | 272 | /* |
@@ -283,18 +283,19 @@ void btrfs_add_ordered_sum(struct inode *inode, | |||
283 | */ | 283 | */ |
284 | int btrfs_dec_test_first_ordered_pending(struct inode *inode, | 284 | int btrfs_dec_test_first_ordered_pending(struct inode *inode, |
285 | struct btrfs_ordered_extent **cached, | 285 | struct btrfs_ordered_extent **cached, |
286 | u64 *file_offset, u64 io_size) | 286 | u64 *file_offset, u64 io_size, int uptodate) |
287 | { | 287 | { |
288 | struct btrfs_ordered_inode_tree *tree; | 288 | struct btrfs_ordered_inode_tree *tree; |
289 | struct rb_node *node; | 289 | struct rb_node *node; |
290 | struct btrfs_ordered_extent *entry = NULL; | 290 | struct btrfs_ordered_extent *entry = NULL; |
291 | int ret; | 291 | int ret; |
292 | unsigned long flags; | ||
292 | u64 dec_end; | 293 | u64 dec_end; |
293 | u64 dec_start; | 294 | u64 dec_start; |
294 | u64 to_dec; | 295 | u64 to_dec; |
295 | 296 | ||
296 | tree = &BTRFS_I(inode)->ordered_tree; | 297 | tree = &BTRFS_I(inode)->ordered_tree; |
297 | spin_lock(&tree->lock); | 298 | spin_lock_irqsave(&tree->lock, flags); |
298 | node = tree_search(tree, *file_offset); | 299 | node = tree_search(tree, *file_offset); |
299 | if (!node) { | 300 | if (!node) { |
300 | ret = 1; | 301 | ret = 1; |
@@ -323,6 +324,9 @@ int btrfs_dec_test_first_ordered_pending(struct inode *inode, | |||
323 | (unsigned long long)to_dec); | 324 | (unsigned long long)to_dec); |
324 | } | 325 | } |
325 | entry->bytes_left -= to_dec; | 326 | entry->bytes_left -= to_dec; |
327 | if (!uptodate) | ||
328 | set_bit(BTRFS_ORDERED_IOERR, &entry->flags); | ||
329 | |||
326 | if (entry->bytes_left == 0) | 330 | if (entry->bytes_left == 0) |
327 | ret = test_and_set_bit(BTRFS_ORDERED_IO_DONE, &entry->flags); | 331 | ret = test_and_set_bit(BTRFS_ORDERED_IO_DONE, &entry->flags); |
328 | else | 332 | else |
@@ -332,7 +336,7 @@ out: | |||
332 | *cached = entry; | 336 | *cached = entry; |
333 | atomic_inc(&entry->refs); | 337 | atomic_inc(&entry->refs); |
334 | } | 338 | } |
335 | spin_unlock(&tree->lock); | 339 | spin_unlock_irqrestore(&tree->lock, flags); |
336 | return ret == 0; | 340 | return ret == 0; |
337 | } | 341 | } |
338 | 342 | ||
@@ -347,15 +351,21 @@ out: | |||
347 | */ | 351 | */ |
348 | int btrfs_dec_test_ordered_pending(struct inode *inode, | 352 | int btrfs_dec_test_ordered_pending(struct inode *inode, |
349 | struct btrfs_ordered_extent **cached, | 353 | struct btrfs_ordered_extent **cached, |
350 | u64 file_offset, u64 io_size) | 354 | u64 file_offset, u64 io_size, int uptodate) |
351 | { | 355 | { |
352 | struct btrfs_ordered_inode_tree *tree; | 356 | struct btrfs_ordered_inode_tree *tree; |
353 | struct rb_node *node; | 357 | struct rb_node *node; |
354 | struct btrfs_ordered_extent *entry = NULL; | 358 | struct btrfs_ordered_extent *entry = NULL; |
359 | unsigned long flags; | ||
355 | int ret; | 360 | int ret; |
356 | 361 | ||
357 | tree = &BTRFS_I(inode)->ordered_tree; | 362 | tree = &BTRFS_I(inode)->ordered_tree; |
358 | spin_lock(&tree->lock); | 363 | spin_lock_irqsave(&tree->lock, flags); |
364 | if (cached && *cached) { | ||
365 | entry = *cached; | ||
366 | goto have_entry; | ||
367 | } | ||
368 | |||
359 | node = tree_search(tree, file_offset); | 369 | node = tree_search(tree, file_offset); |
360 | if (!node) { | 370 | if (!node) { |
361 | ret = 1; | 371 | ret = 1; |
@@ -363,6 +373,7 @@ int btrfs_dec_test_ordered_pending(struct inode *inode, | |||
363 | } | 373 | } |
364 | 374 | ||
365 | entry = rb_entry(node, struct btrfs_ordered_extent, rb_node); | 375 | entry = rb_entry(node, struct btrfs_ordered_extent, rb_node); |
376 | have_entry: | ||
366 | if (!offset_in_entry(entry, file_offset)) { | 377 | if (!offset_in_entry(entry, file_offset)) { |
367 | ret = 1; | 378 | ret = 1; |
368 | goto out; | 379 | goto out; |
@@ -374,6 +385,9 @@ int btrfs_dec_test_ordered_pending(struct inode *inode, | |||
374 | (unsigned long long)io_size); | 385 | (unsigned long long)io_size); |
375 | } | 386 | } |
376 | entry->bytes_left -= io_size; | 387 | entry->bytes_left -= io_size; |
388 | if (!uptodate) | ||
389 | set_bit(BTRFS_ORDERED_IOERR, &entry->flags); | ||
390 | |||
377 | if (entry->bytes_left == 0) | 391 | if (entry->bytes_left == 0) |
378 | ret = test_and_set_bit(BTRFS_ORDERED_IO_DONE, &entry->flags); | 392 | ret = test_and_set_bit(BTRFS_ORDERED_IO_DONE, &entry->flags); |
379 | else | 393 | else |
@@ -383,7 +397,7 @@ out: | |||
383 | *cached = entry; | 397 | *cached = entry; |
384 | atomic_inc(&entry->refs); | 398 | atomic_inc(&entry->refs); |
385 | } | 399 | } |
386 | spin_unlock(&tree->lock); | 400 | spin_unlock_irqrestore(&tree->lock, flags); |
387 | return ret == 0; | 401 | return ret == 0; |
388 | } | 402 | } |
389 | 403 | ||
@@ -399,6 +413,8 @@ void btrfs_put_ordered_extent(struct btrfs_ordered_extent *entry) | |||
399 | trace_btrfs_ordered_extent_put(entry->inode, entry); | 413 | trace_btrfs_ordered_extent_put(entry->inode, entry); |
400 | 414 | ||
401 | if (atomic_dec_and_test(&entry->refs)) { | 415 | if (atomic_dec_and_test(&entry->refs)) { |
416 | if (entry->inode) | ||
417 | btrfs_add_delayed_iput(entry->inode); | ||
402 | while (!list_empty(&entry->list)) { | 418 | while (!list_empty(&entry->list)) { |
403 | cur = entry->list.next; | 419 | cur = entry->list.next; |
404 | sum = list_entry(cur, struct btrfs_ordered_sum, list); | 420 | sum = list_entry(cur, struct btrfs_ordered_sum, list); |
@@ -411,21 +427,22 @@ void btrfs_put_ordered_extent(struct btrfs_ordered_extent *entry) | |||
411 | 427 | ||
412 | /* | 428 | /* |
413 | * remove an ordered extent from the tree. No references are dropped | 429 | * remove an ordered extent from the tree. No references are dropped |
414 | * and you must wake_up entry->wait. You must hold the tree lock | 430 | * and waiters are woken up. |
415 | * while you call this function. | ||
416 | */ | 431 | */ |
417 | static void __btrfs_remove_ordered_extent(struct inode *inode, | 432 | void btrfs_remove_ordered_extent(struct inode *inode, |
418 | struct btrfs_ordered_extent *entry) | 433 | struct btrfs_ordered_extent *entry) |
419 | { | 434 | { |
420 | struct btrfs_ordered_inode_tree *tree; | 435 | struct btrfs_ordered_inode_tree *tree; |
421 | struct btrfs_root *root = BTRFS_I(inode)->root; | 436 | struct btrfs_root *root = BTRFS_I(inode)->root; |
422 | struct rb_node *node; | 437 | struct rb_node *node; |
423 | 438 | ||
424 | tree = &BTRFS_I(inode)->ordered_tree; | 439 | tree = &BTRFS_I(inode)->ordered_tree; |
440 | spin_lock_irq(&tree->lock); | ||
425 | node = &entry->rb_node; | 441 | node = &entry->rb_node; |
426 | rb_erase(node, &tree->tree); | 442 | rb_erase(node, &tree->tree); |
427 | tree->last = NULL; | 443 | tree->last = NULL; |
428 | set_bit(BTRFS_ORDERED_COMPLETE, &entry->flags); | 444 | set_bit(BTRFS_ORDERED_COMPLETE, &entry->flags); |
445 | spin_unlock_irq(&tree->lock); | ||
429 | 446 | ||
430 | spin_lock(&root->fs_info->ordered_extent_lock); | 447 | spin_lock(&root->fs_info->ordered_extent_lock); |
431 | list_del_init(&entry->root_extent_list); | 448 | list_del_init(&entry->root_extent_list); |
@@ -442,21 +459,6 @@ static void __btrfs_remove_ordered_extent(struct inode *inode, | |||
442 | list_del_init(&BTRFS_I(inode)->ordered_operations); | 459 | list_del_init(&BTRFS_I(inode)->ordered_operations); |
443 | } | 460 | } |
444 | spin_unlock(&root->fs_info->ordered_extent_lock); | 461 | spin_unlock(&root->fs_info->ordered_extent_lock); |
445 | } | ||
446 | |||
447 | /* | ||
448 | * remove an ordered extent from the tree. No references are dropped | ||
449 | * but any waiters are woken. | ||
450 | */ | ||
451 | void btrfs_remove_ordered_extent(struct inode *inode, | ||
452 | struct btrfs_ordered_extent *entry) | ||
453 | { | ||
454 | struct btrfs_ordered_inode_tree *tree; | ||
455 | |||
456 | tree = &BTRFS_I(inode)->ordered_tree; | ||
457 | spin_lock(&tree->lock); | ||
458 | __btrfs_remove_ordered_extent(inode, entry); | ||
459 | spin_unlock(&tree->lock); | ||
460 | wake_up(&entry->wait); | 462 | wake_up(&entry->wait); |
461 | } | 463 | } |
462 | 464 | ||
@@ -621,19 +623,11 @@ void btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len) | |||
621 | if (orig_end > INT_LIMIT(loff_t)) | 623 | if (orig_end > INT_LIMIT(loff_t)) |
622 | orig_end = INT_LIMIT(loff_t); | 624 | orig_end = INT_LIMIT(loff_t); |
623 | } | 625 | } |
624 | again: | 626 | |
625 | /* start IO across the range first to instantiate any delalloc | 627 | /* start IO across the range first to instantiate any delalloc |
626 | * extents | 628 | * extents |
627 | */ | 629 | */ |
628 | filemap_fdatawrite_range(inode->i_mapping, start, orig_end); | 630 | filemap_write_and_wait_range(inode->i_mapping, start, orig_end); |
629 | |||
630 | /* The compression code will leave pages locked but return from | ||
631 | * writepage without setting the page writeback. Starting again | ||
632 | * with WB_SYNC_ALL will end up waiting for the IO to actually start. | ||
633 | */ | ||
634 | filemap_fdatawrite_range(inode->i_mapping, start, orig_end); | ||
635 | |||
636 | filemap_fdatawait_range(inode->i_mapping, start, orig_end); | ||
637 | 631 | ||
638 | end = orig_end; | 632 | end = orig_end; |
639 | found = 0; | 633 | found = 0; |
@@ -657,11 +651,6 @@ again: | |||
657 | break; | 651 | break; |
658 | end--; | 652 | end--; |
659 | } | 653 | } |
660 | if (found || test_range_bit(&BTRFS_I(inode)->io_tree, start, orig_end, | ||
661 | EXTENT_DELALLOC, 0, NULL)) { | ||
662 | schedule_timeout(1); | ||
663 | goto again; | ||
664 | } | ||
665 | } | 654 | } |
666 | 655 | ||
667 | /* | 656 | /* |
@@ -676,7 +665,7 @@ struct btrfs_ordered_extent *btrfs_lookup_ordered_extent(struct inode *inode, | |||
676 | struct btrfs_ordered_extent *entry = NULL; | 665 | struct btrfs_ordered_extent *entry = NULL; |
677 | 666 | ||
678 | tree = &BTRFS_I(inode)->ordered_tree; | 667 | tree = &BTRFS_I(inode)->ordered_tree; |
679 | spin_lock(&tree->lock); | 668 | spin_lock_irq(&tree->lock); |
680 | node = tree_search(tree, file_offset); | 669 | node = tree_search(tree, file_offset); |
681 | if (!node) | 670 | if (!node) |
682 | goto out; | 671 | goto out; |
@@ -687,7 +676,7 @@ struct btrfs_ordered_extent *btrfs_lookup_ordered_extent(struct inode *inode, | |||
687 | if (entry) | 676 | if (entry) |
688 | atomic_inc(&entry->refs); | 677 | atomic_inc(&entry->refs); |
689 | out: | 678 | out: |
690 | spin_unlock(&tree->lock); | 679 | spin_unlock_irq(&tree->lock); |
691 | return entry; | 680 | return entry; |
692 | } | 681 | } |
693 | 682 | ||
@@ -703,7 +692,7 @@ struct btrfs_ordered_extent *btrfs_lookup_ordered_range(struct inode *inode, | |||
703 | struct btrfs_ordered_extent *entry = NULL; | 692 | struct btrfs_ordered_extent *entry = NULL; |
704 | 693 | ||
705 | tree = &BTRFS_I(inode)->ordered_tree; | 694 | tree = &BTRFS_I(inode)->ordered_tree; |
706 | spin_lock(&tree->lock); | 695 | spin_lock_irq(&tree->lock); |
707 | node = tree_search(tree, file_offset); | 696 | node = tree_search(tree, file_offset); |
708 | if (!node) { | 697 | if (!node) { |
709 | node = tree_search(tree, file_offset + len); | 698 | node = tree_search(tree, file_offset + len); |
@@ -728,7 +717,7 @@ struct btrfs_ordered_extent *btrfs_lookup_ordered_range(struct inode *inode, | |||
728 | out: | 717 | out: |
729 | if (entry) | 718 | if (entry) |
730 | atomic_inc(&entry->refs); | 719 | atomic_inc(&entry->refs); |
731 | spin_unlock(&tree->lock); | 720 | spin_unlock_irq(&tree->lock); |
732 | return entry; | 721 | return entry; |
733 | } | 722 | } |
734 | 723 | ||
@@ -744,7 +733,7 @@ btrfs_lookup_first_ordered_extent(struct inode *inode, u64 file_offset) | |||
744 | struct btrfs_ordered_extent *entry = NULL; | 733 | struct btrfs_ordered_extent *entry = NULL; |
745 | 734 | ||
746 | tree = &BTRFS_I(inode)->ordered_tree; | 735 | tree = &BTRFS_I(inode)->ordered_tree; |
747 | spin_lock(&tree->lock); | 736 | spin_lock_irq(&tree->lock); |
748 | node = tree_search(tree, file_offset); | 737 | node = tree_search(tree, file_offset); |
749 | if (!node) | 738 | if (!node) |
750 | goto out; | 739 | goto out; |
@@ -752,7 +741,7 @@ btrfs_lookup_first_ordered_extent(struct inode *inode, u64 file_offset) | |||
752 | entry = rb_entry(node, struct btrfs_ordered_extent, rb_node); | 741 | entry = rb_entry(node, struct btrfs_ordered_extent, rb_node); |
753 | atomic_inc(&entry->refs); | 742 | atomic_inc(&entry->refs); |
754 | out: | 743 | out: |
755 | spin_unlock(&tree->lock); | 744 | spin_unlock_irq(&tree->lock); |
756 | return entry; | 745 | return entry; |
757 | } | 746 | } |
758 | 747 | ||
@@ -764,7 +753,6 @@ int btrfs_ordered_update_i_size(struct inode *inode, u64 offset, | |||
764 | struct btrfs_ordered_extent *ordered) | 753 | struct btrfs_ordered_extent *ordered) |
765 | { | 754 | { |
766 | struct btrfs_ordered_inode_tree *tree = &BTRFS_I(inode)->ordered_tree; | 755 | struct btrfs_ordered_inode_tree *tree = &BTRFS_I(inode)->ordered_tree; |
767 | struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; | ||
768 | u64 disk_i_size; | 756 | u64 disk_i_size; |
769 | u64 new_i_size; | 757 | u64 new_i_size; |
770 | u64 i_size_test; | 758 | u64 i_size_test; |
@@ -779,7 +767,7 @@ int btrfs_ordered_update_i_size(struct inode *inode, u64 offset, | |||
779 | else | 767 | else |
780 | offset = ALIGN(offset, BTRFS_I(inode)->root->sectorsize); | 768 | offset = ALIGN(offset, BTRFS_I(inode)->root->sectorsize); |
781 | 769 | ||
782 | spin_lock(&tree->lock); | 770 | spin_lock_irq(&tree->lock); |
783 | disk_i_size = BTRFS_I(inode)->disk_i_size; | 771 | disk_i_size = BTRFS_I(inode)->disk_i_size; |
784 | 772 | ||
785 | /* truncate file */ | 773 | /* truncate file */ |
@@ -798,14 +786,6 @@ int btrfs_ordered_update_i_size(struct inode *inode, u64 offset, | |||
798 | } | 786 | } |
799 | 787 | ||
800 | /* | 788 | /* |
801 | * we can't update the disk_isize if there are delalloc bytes | ||
802 | * between disk_i_size and this ordered extent | ||
803 | */ | ||
804 | if (test_range_bit(io_tree, disk_i_size, offset - 1, | ||
805 | EXTENT_DELALLOC, 0, NULL)) { | ||
806 | goto out; | ||
807 | } | ||
808 | /* | ||
809 | * walk backward from this ordered extent to disk_i_size. | 789 | * walk backward from this ordered extent to disk_i_size. |
810 | * if we find an ordered extent then we can't update disk i_size | 790 | * if we find an ordered extent then we can't update disk i_size |
811 | * yet | 791 | * yet |
@@ -825,15 +805,18 @@ int btrfs_ordered_update_i_size(struct inode *inode, u64 offset, | |||
825 | } | 805 | } |
826 | node = prev; | 806 | node = prev; |
827 | } | 807 | } |
828 | while (node) { | 808 | for (; node; node = rb_prev(node)) { |
829 | test = rb_entry(node, struct btrfs_ordered_extent, rb_node); | 809 | test = rb_entry(node, struct btrfs_ordered_extent, rb_node); |
810 | |||
811 | /* We treat this entry as if it doesnt exist */ | ||
812 | if (test_bit(BTRFS_ORDERED_UPDATED_ISIZE, &test->flags)) | ||
813 | continue; | ||
830 | if (test->file_offset + test->len <= disk_i_size) | 814 | if (test->file_offset + test->len <= disk_i_size) |
831 | break; | 815 | break; |
832 | if (test->file_offset >= i_size) | 816 | if (test->file_offset >= i_size) |
833 | break; | 817 | break; |
834 | if (test->file_offset >= disk_i_size) | 818 | if (test->file_offset >= disk_i_size) |
835 | goto out; | 819 | goto out; |
836 | node = rb_prev(node); | ||
837 | } | 820 | } |
838 | new_i_size = min_t(u64, offset, i_size); | 821 | new_i_size = min_t(u64, offset, i_size); |
839 | 822 | ||
@@ -851,43 +834,49 @@ int btrfs_ordered_update_i_size(struct inode *inode, u64 offset, | |||
851 | else | 834 | else |
852 | node = rb_first(&tree->tree); | 835 | node = rb_first(&tree->tree); |
853 | } | 836 | } |
854 | i_size_test = 0; | 837 | |
855 | if (node) { | 838 | /* |
856 | /* | 839 | * We are looking for an area between our current extent and the next |
857 | * do we have an area where IO might have finished | 840 | * ordered extent to update the i_size to. There are 3 cases here |
858 | * between our ordered extent and the next one. | 841 | * |
859 | */ | 842 | * 1) We don't actually have anything and we can update to i_size. |
843 | * 2) We have stuff but they already did their i_size update so again we | ||
844 | * can just update to i_size. | ||
845 | * 3) We have an outstanding ordered extent so the most we can update | ||
846 | * our disk_i_size to is the start of the next offset. | ||
847 | */ | ||
848 | i_size_test = i_size; | ||
849 | for (; node; node = rb_next(node)) { | ||
860 | test = rb_entry(node, struct btrfs_ordered_extent, rb_node); | 850 | test = rb_entry(node, struct btrfs_ordered_extent, rb_node); |
861 | if (test->file_offset > offset) | 851 | |
852 | if (test_bit(BTRFS_ORDERED_UPDATED_ISIZE, &test->flags)) | ||
853 | continue; | ||
854 | if (test->file_offset > offset) { | ||
862 | i_size_test = test->file_offset; | 855 | i_size_test = test->file_offset; |
863 | } else { | 856 | break; |
864 | i_size_test = i_size; | 857 | } |
865 | } | 858 | } |
866 | 859 | ||
867 | /* | 860 | /* |
868 | * i_size_test is the end of a region after this ordered | 861 | * i_size_test is the end of a region after this ordered |
869 | * extent where there are no ordered extents. As long as there | 862 | * extent where there are no ordered extents, we can safely set |
870 | * are no delalloc bytes in this area, it is safe to update | 863 | * disk_i_size to this. |
871 | * disk_i_size to the end of the region. | ||
872 | */ | 864 | */ |
873 | if (i_size_test > offset && | 865 | if (i_size_test > offset) |
874 | !test_range_bit(io_tree, offset, i_size_test - 1, | ||
875 | EXTENT_DELALLOC, 0, NULL)) { | ||
876 | new_i_size = min_t(u64, i_size_test, i_size); | 866 | new_i_size = min_t(u64, i_size_test, i_size); |
877 | } | ||
878 | BTRFS_I(inode)->disk_i_size = new_i_size; | 867 | BTRFS_I(inode)->disk_i_size = new_i_size; |
879 | ret = 0; | 868 | ret = 0; |
880 | out: | 869 | out: |
881 | /* | 870 | /* |
882 | * we need to remove the ordered extent with the tree lock held | 871 | * We need to do this because we can't remove ordered extents until |
883 | * so that other people calling this function don't find our fully | 872 | * after the i_disk_size has been updated and then the inode has been |
884 | * processed ordered entry and skip updating the i_size | 873 | * updated to reflect the change, so we need to tell anybody who finds |
874 | * this ordered extent that we've already done all the real work, we | ||
875 | * just haven't completed all the other work. | ||
885 | */ | 876 | */ |
886 | if (ordered) | 877 | if (ordered) |
887 | __btrfs_remove_ordered_extent(inode, ordered); | 878 | set_bit(BTRFS_ORDERED_UPDATED_ISIZE, &ordered->flags); |
888 | spin_unlock(&tree->lock); | 879 | spin_unlock_irq(&tree->lock); |
889 | if (ordered) | ||
890 | wake_up(&ordered->wait); | ||
891 | return ret; | 880 | return ret; |
892 | } | 881 | } |
893 | 882 | ||
@@ -912,7 +901,7 @@ int btrfs_find_ordered_sum(struct inode *inode, u64 offset, u64 disk_bytenr, | |||
912 | if (!ordered) | 901 | if (!ordered) |
913 | return 1; | 902 | return 1; |
914 | 903 | ||
915 | spin_lock(&tree->lock); | 904 | spin_lock_irq(&tree->lock); |
916 | list_for_each_entry_reverse(ordered_sum, &ordered->list, list) { | 905 | list_for_each_entry_reverse(ordered_sum, &ordered->list, list) { |
917 | if (disk_bytenr >= ordered_sum->bytenr) { | 906 | if (disk_bytenr >= ordered_sum->bytenr) { |
918 | num_sectors = ordered_sum->len / sectorsize; | 907 | num_sectors = ordered_sum->len / sectorsize; |
@@ -927,7 +916,7 @@ int btrfs_find_ordered_sum(struct inode *inode, u64 offset, u64 disk_bytenr, | |||
927 | } | 916 | } |
928 | } | 917 | } |
929 | out: | 918 | out: |
930 | spin_unlock(&tree->lock); | 919 | spin_unlock_irq(&tree->lock); |
931 | btrfs_put_ordered_extent(ordered); | 920 | btrfs_put_ordered_extent(ordered); |
932 | return ret; | 921 | return ret; |
933 | } | 922 | } |
diff --git a/fs/btrfs/ordered-data.h b/fs/btrfs/ordered-data.h index c355ad4dc1a6..e03c560d2997 100644 --- a/fs/btrfs/ordered-data.h +++ b/fs/btrfs/ordered-data.h | |||
@@ -74,6 +74,12 @@ struct btrfs_ordered_sum { | |||
74 | 74 | ||
75 | #define BTRFS_ORDERED_DIRECT 5 /* set when we're doing DIO with this extent */ | 75 | #define BTRFS_ORDERED_DIRECT 5 /* set when we're doing DIO with this extent */ |
76 | 76 | ||
77 | #define BTRFS_ORDERED_IOERR 6 /* We had an io error when writing this out */ | ||
78 | |||
79 | #define BTRFS_ORDERED_UPDATED_ISIZE 7 /* indicates wether this ordered extent | ||
80 | * has done its due diligence in updating | ||
81 | * the isize. */ | ||
82 | |||
77 | struct btrfs_ordered_extent { | 83 | struct btrfs_ordered_extent { |
78 | /* logical offset in the file */ | 84 | /* logical offset in the file */ |
79 | u64 file_offset; | 85 | u64 file_offset; |
@@ -113,6 +119,8 @@ struct btrfs_ordered_extent { | |||
113 | 119 | ||
114 | /* a per root list of all the pending ordered extents */ | 120 | /* a per root list of all the pending ordered extents */ |
115 | struct list_head root_extent_list; | 121 | struct list_head root_extent_list; |
122 | |||
123 | struct btrfs_work work; | ||
116 | }; | 124 | }; |
117 | 125 | ||
118 | 126 | ||
@@ -143,10 +151,11 @@ void btrfs_remove_ordered_extent(struct inode *inode, | |||
143 | struct btrfs_ordered_extent *entry); | 151 | struct btrfs_ordered_extent *entry); |
144 | int btrfs_dec_test_ordered_pending(struct inode *inode, | 152 | int btrfs_dec_test_ordered_pending(struct inode *inode, |
145 | struct btrfs_ordered_extent **cached, | 153 | struct btrfs_ordered_extent **cached, |
146 | u64 file_offset, u64 io_size); | 154 | u64 file_offset, u64 io_size, int uptodate); |
147 | int btrfs_dec_test_first_ordered_pending(struct inode *inode, | 155 | int btrfs_dec_test_first_ordered_pending(struct inode *inode, |
148 | struct btrfs_ordered_extent **cached, | 156 | struct btrfs_ordered_extent **cached, |
149 | u64 *file_offset, u64 io_size); | 157 | u64 *file_offset, u64 io_size, |
158 | int uptodate); | ||
150 | int btrfs_add_ordered_extent(struct inode *inode, u64 file_offset, | 159 | int btrfs_add_ordered_extent(struct inode *inode, u64 file_offset, |
151 | u64 start, u64 len, u64 disk_len, int type); | 160 | u64 start, u64 len, u64 disk_len, int type); |
152 | int btrfs_add_ordered_extent_dio(struct inode *inode, u64 file_offset, | 161 | int btrfs_add_ordered_extent_dio(struct inode *inode, u64 file_offset, |
diff --git a/fs/btrfs/print-tree.c b/fs/btrfs/print-tree.c index f38e452486b8..5e23684887eb 100644 --- a/fs/btrfs/print-tree.c +++ b/fs/btrfs/print-tree.c | |||
@@ -294,6 +294,9 @@ void btrfs_print_leaf(struct btrfs_root *root, struct extent_buffer *l) | |||
294 | btrfs_dev_extent_chunk_offset(l, dev_extent), | 294 | btrfs_dev_extent_chunk_offset(l, dev_extent), |
295 | (unsigned long long) | 295 | (unsigned long long) |
296 | btrfs_dev_extent_length(l, dev_extent)); | 296 | btrfs_dev_extent_length(l, dev_extent)); |
297 | case BTRFS_DEV_STATS_KEY: | ||
298 | printk(KERN_INFO "\t\tdevice stats\n"); | ||
299 | break; | ||
297 | }; | 300 | }; |
298 | } | 301 | } |
299 | } | 302 | } |
diff --git a/fs/btrfs/reada.c b/fs/btrfs/reada.c index ac5d01085884..48a4882d8ad5 100644 --- a/fs/btrfs/reada.c +++ b/fs/btrfs/reada.c | |||
@@ -718,13 +718,18 @@ static void reada_start_machine_worker(struct btrfs_work *work) | |||
718 | { | 718 | { |
719 | struct reada_machine_work *rmw; | 719 | struct reada_machine_work *rmw; |
720 | struct btrfs_fs_info *fs_info; | 720 | struct btrfs_fs_info *fs_info; |
721 | int old_ioprio; | ||
721 | 722 | ||
722 | rmw = container_of(work, struct reada_machine_work, work); | 723 | rmw = container_of(work, struct reada_machine_work, work); |
723 | fs_info = rmw->fs_info; | 724 | fs_info = rmw->fs_info; |
724 | 725 | ||
725 | kfree(rmw); | 726 | kfree(rmw); |
726 | 727 | ||
728 | old_ioprio = IOPRIO_PRIO_VALUE(task_nice_ioclass(current), | ||
729 | task_nice_ioprio(current)); | ||
730 | set_task_ioprio(current, BTRFS_IOPRIO_READA); | ||
727 | __reada_start_machine(fs_info); | 731 | __reada_start_machine(fs_info); |
732 | set_task_ioprio(current, old_ioprio); | ||
728 | } | 733 | } |
729 | 734 | ||
730 | static void __reada_start_machine(struct btrfs_fs_info *fs_info) | 735 | static void __reada_start_machine(struct btrfs_fs_info *fs_info) |
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c index 4f76fc3f8e89..a38cfa4f251e 100644 --- a/fs/btrfs/scrub.c +++ b/fs/btrfs/scrub.c | |||
@@ -50,7 +50,7 @@ struct scrub_dev; | |||
50 | struct scrub_page { | 50 | struct scrub_page { |
51 | struct scrub_block *sblock; | 51 | struct scrub_block *sblock; |
52 | struct page *page; | 52 | struct page *page; |
53 | struct block_device *bdev; | 53 | struct btrfs_device *dev; |
54 | u64 flags; /* extent flags */ | 54 | u64 flags; /* extent flags */ |
55 | u64 generation; | 55 | u64 generation; |
56 | u64 logical; | 56 | u64 logical; |
@@ -86,6 +86,7 @@ struct scrub_block { | |||
86 | unsigned int header_error:1; | 86 | unsigned int header_error:1; |
87 | unsigned int checksum_error:1; | 87 | unsigned int checksum_error:1; |
88 | unsigned int no_io_error_seen:1; | 88 | unsigned int no_io_error_seen:1; |
89 | unsigned int generation_error:1; /* also sets header_error */ | ||
89 | }; | 90 | }; |
90 | }; | 91 | }; |
91 | 92 | ||
@@ -675,6 +676,8 @@ static int scrub_handle_errored_block(struct scrub_block *sblock_to_check) | |||
675 | sdev->stat.read_errors++; | 676 | sdev->stat.read_errors++; |
676 | sdev->stat.uncorrectable_errors++; | 677 | sdev->stat.uncorrectable_errors++; |
677 | spin_unlock(&sdev->stat_lock); | 678 | spin_unlock(&sdev->stat_lock); |
679 | btrfs_dev_stat_inc_and_print(sdev->dev, | ||
680 | BTRFS_DEV_STAT_READ_ERRS); | ||
678 | goto out; | 681 | goto out; |
679 | } | 682 | } |
680 | 683 | ||
@@ -686,6 +689,8 @@ static int scrub_handle_errored_block(struct scrub_block *sblock_to_check) | |||
686 | sdev->stat.read_errors++; | 689 | sdev->stat.read_errors++; |
687 | sdev->stat.uncorrectable_errors++; | 690 | sdev->stat.uncorrectable_errors++; |
688 | spin_unlock(&sdev->stat_lock); | 691 | spin_unlock(&sdev->stat_lock); |
692 | btrfs_dev_stat_inc_and_print(sdev->dev, | ||
693 | BTRFS_DEV_STAT_READ_ERRS); | ||
689 | goto out; | 694 | goto out; |
690 | } | 695 | } |
691 | BUG_ON(failed_mirror_index >= BTRFS_MAX_MIRRORS); | 696 | BUG_ON(failed_mirror_index >= BTRFS_MAX_MIRRORS); |
@@ -699,6 +704,8 @@ static int scrub_handle_errored_block(struct scrub_block *sblock_to_check) | |||
699 | sdev->stat.read_errors++; | 704 | sdev->stat.read_errors++; |
700 | sdev->stat.uncorrectable_errors++; | 705 | sdev->stat.uncorrectable_errors++; |
701 | spin_unlock(&sdev->stat_lock); | 706 | spin_unlock(&sdev->stat_lock); |
707 | btrfs_dev_stat_inc_and_print(sdev->dev, | ||
708 | BTRFS_DEV_STAT_READ_ERRS); | ||
702 | goto out; | 709 | goto out; |
703 | } | 710 | } |
704 | 711 | ||
@@ -725,12 +732,16 @@ static int scrub_handle_errored_block(struct scrub_block *sblock_to_check) | |||
725 | spin_unlock(&sdev->stat_lock); | 732 | spin_unlock(&sdev->stat_lock); |
726 | if (__ratelimit(&_rs)) | 733 | if (__ratelimit(&_rs)) |
727 | scrub_print_warning("i/o error", sblock_to_check); | 734 | scrub_print_warning("i/o error", sblock_to_check); |
735 | btrfs_dev_stat_inc_and_print(sdev->dev, | ||
736 | BTRFS_DEV_STAT_READ_ERRS); | ||
728 | } else if (sblock_bad->checksum_error) { | 737 | } else if (sblock_bad->checksum_error) { |
729 | spin_lock(&sdev->stat_lock); | 738 | spin_lock(&sdev->stat_lock); |
730 | sdev->stat.csum_errors++; | 739 | sdev->stat.csum_errors++; |
731 | spin_unlock(&sdev->stat_lock); | 740 | spin_unlock(&sdev->stat_lock); |
732 | if (__ratelimit(&_rs)) | 741 | if (__ratelimit(&_rs)) |
733 | scrub_print_warning("checksum error", sblock_to_check); | 742 | scrub_print_warning("checksum error", sblock_to_check); |
743 | btrfs_dev_stat_inc_and_print(sdev->dev, | ||
744 | BTRFS_DEV_STAT_CORRUPTION_ERRS); | ||
734 | } else if (sblock_bad->header_error) { | 745 | } else if (sblock_bad->header_error) { |
735 | spin_lock(&sdev->stat_lock); | 746 | spin_lock(&sdev->stat_lock); |
736 | sdev->stat.verify_errors++; | 747 | sdev->stat.verify_errors++; |
@@ -738,6 +749,12 @@ static int scrub_handle_errored_block(struct scrub_block *sblock_to_check) | |||
738 | if (__ratelimit(&_rs)) | 749 | if (__ratelimit(&_rs)) |
739 | scrub_print_warning("checksum/header error", | 750 | scrub_print_warning("checksum/header error", |
740 | sblock_to_check); | 751 | sblock_to_check); |
752 | if (sblock_bad->generation_error) | ||
753 | btrfs_dev_stat_inc_and_print(sdev->dev, | ||
754 | BTRFS_DEV_STAT_GENERATION_ERRS); | ||
755 | else | ||
756 | btrfs_dev_stat_inc_and_print(sdev->dev, | ||
757 | BTRFS_DEV_STAT_CORRUPTION_ERRS); | ||
741 | } | 758 | } |
742 | 759 | ||
743 | if (sdev->readonly) | 760 | if (sdev->readonly) |
@@ -998,7 +1015,8 @@ static int scrub_setup_recheck_block(struct scrub_dev *sdev, | |||
998 | page = sblock->pagev + page_index; | 1015 | page = sblock->pagev + page_index; |
999 | page->logical = logical; | 1016 | page->logical = logical; |
1000 | page->physical = bbio->stripes[mirror_index].physical; | 1017 | page->physical = bbio->stripes[mirror_index].physical; |
1001 | page->bdev = bbio->stripes[mirror_index].dev->bdev; | 1018 | /* for missing devices, dev->bdev is NULL */ |
1019 | page->dev = bbio->stripes[mirror_index].dev; | ||
1002 | page->mirror_num = mirror_index + 1; | 1020 | page->mirror_num = mirror_index + 1; |
1003 | page->page = alloc_page(GFP_NOFS); | 1021 | page->page = alloc_page(GFP_NOFS); |
1004 | if (!page->page) { | 1022 | if (!page->page) { |
@@ -1042,11 +1060,17 @@ static int scrub_recheck_block(struct btrfs_fs_info *fs_info, | |||
1042 | struct scrub_page *page = sblock->pagev + page_num; | 1060 | struct scrub_page *page = sblock->pagev + page_num; |
1043 | DECLARE_COMPLETION_ONSTACK(complete); | 1061 | DECLARE_COMPLETION_ONSTACK(complete); |
1044 | 1062 | ||
1063 | if (page->dev->bdev == NULL) { | ||
1064 | page->io_error = 1; | ||
1065 | sblock->no_io_error_seen = 0; | ||
1066 | continue; | ||
1067 | } | ||
1068 | |||
1045 | BUG_ON(!page->page); | 1069 | BUG_ON(!page->page); |
1046 | bio = bio_alloc(GFP_NOFS, 1); | 1070 | bio = bio_alloc(GFP_NOFS, 1); |
1047 | if (!bio) | 1071 | if (!bio) |
1048 | return -EIO; | 1072 | return -EIO; |
1049 | bio->bi_bdev = page->bdev; | 1073 | bio->bi_bdev = page->dev->bdev; |
1050 | bio->bi_sector = page->physical >> 9; | 1074 | bio->bi_sector = page->physical >> 9; |
1051 | bio->bi_end_io = scrub_complete_bio_end_io; | 1075 | bio->bi_end_io = scrub_complete_bio_end_io; |
1052 | bio->bi_private = &complete; | 1076 | bio->bi_private = &complete; |
@@ -1095,11 +1119,14 @@ static void scrub_recheck_block_checksum(struct btrfs_fs_info *fs_info, | |||
1095 | h = (struct btrfs_header *)mapped_buffer; | 1119 | h = (struct btrfs_header *)mapped_buffer; |
1096 | 1120 | ||
1097 | if (sblock->pagev[0].logical != le64_to_cpu(h->bytenr) || | 1121 | if (sblock->pagev[0].logical != le64_to_cpu(h->bytenr) || |
1098 | generation != le64_to_cpu(h->generation) || | ||
1099 | memcmp(h->fsid, fs_info->fsid, BTRFS_UUID_SIZE) || | 1122 | memcmp(h->fsid, fs_info->fsid, BTRFS_UUID_SIZE) || |
1100 | memcmp(h->chunk_tree_uuid, fs_info->chunk_tree_uuid, | 1123 | memcmp(h->chunk_tree_uuid, fs_info->chunk_tree_uuid, |
1101 | BTRFS_UUID_SIZE)) | 1124 | BTRFS_UUID_SIZE)) { |
1125 | sblock->header_error = 1; | ||
1126 | } else if (generation != le64_to_cpu(h->generation)) { | ||
1102 | sblock->header_error = 1; | 1127 | sblock->header_error = 1; |
1128 | sblock->generation_error = 1; | ||
1129 | } | ||
1103 | csum = h->csum; | 1130 | csum = h->csum; |
1104 | } else { | 1131 | } else { |
1105 | if (!have_csum) | 1132 | if (!have_csum) |
@@ -1175,7 +1202,7 @@ static int scrub_repair_page_from_good_copy(struct scrub_block *sblock_bad, | |||
1175 | bio = bio_alloc(GFP_NOFS, 1); | 1202 | bio = bio_alloc(GFP_NOFS, 1); |
1176 | if (!bio) | 1203 | if (!bio) |
1177 | return -EIO; | 1204 | return -EIO; |
1178 | bio->bi_bdev = page_bad->bdev; | 1205 | bio->bi_bdev = page_bad->dev->bdev; |
1179 | bio->bi_sector = page_bad->physical >> 9; | 1206 | bio->bi_sector = page_bad->physical >> 9; |
1180 | bio->bi_end_io = scrub_complete_bio_end_io; | 1207 | bio->bi_end_io = scrub_complete_bio_end_io; |
1181 | bio->bi_private = &complete; | 1208 | bio->bi_private = &complete; |
@@ -1189,6 +1216,12 @@ static int scrub_repair_page_from_good_copy(struct scrub_block *sblock_bad, | |||
1189 | 1216 | ||
1190 | /* this will also unplug the queue */ | 1217 | /* this will also unplug the queue */ |
1191 | wait_for_completion(&complete); | 1218 | wait_for_completion(&complete); |
1219 | if (!bio_flagged(bio, BIO_UPTODATE)) { | ||
1220 | btrfs_dev_stat_inc_and_print(page_bad->dev, | ||
1221 | BTRFS_DEV_STAT_WRITE_ERRS); | ||
1222 | bio_put(bio); | ||
1223 | return -EIO; | ||
1224 | } | ||
1192 | bio_put(bio); | 1225 | bio_put(bio); |
1193 | } | 1226 | } |
1194 | 1227 | ||
@@ -1345,7 +1378,8 @@ static int scrub_checksum_super(struct scrub_block *sblock) | |||
1345 | u64 mapped_size; | 1378 | u64 mapped_size; |
1346 | void *p; | 1379 | void *p; |
1347 | u32 crc = ~(u32)0; | 1380 | u32 crc = ~(u32)0; |
1348 | int fail = 0; | 1381 | int fail_gen = 0; |
1382 | int fail_cor = 0; | ||
1349 | u64 len; | 1383 | u64 len; |
1350 | int index; | 1384 | int index; |
1351 | 1385 | ||
@@ -1356,13 +1390,13 @@ static int scrub_checksum_super(struct scrub_block *sblock) | |||
1356 | memcpy(on_disk_csum, s->csum, sdev->csum_size); | 1390 | memcpy(on_disk_csum, s->csum, sdev->csum_size); |
1357 | 1391 | ||
1358 | if (sblock->pagev[0].logical != le64_to_cpu(s->bytenr)) | 1392 | if (sblock->pagev[0].logical != le64_to_cpu(s->bytenr)) |
1359 | ++fail; | 1393 | ++fail_cor; |
1360 | 1394 | ||
1361 | if (sblock->pagev[0].generation != le64_to_cpu(s->generation)) | 1395 | if (sblock->pagev[0].generation != le64_to_cpu(s->generation)) |
1362 | ++fail; | 1396 | ++fail_gen; |
1363 | 1397 | ||
1364 | if (memcmp(s->fsid, fs_info->fsid, BTRFS_UUID_SIZE)) | 1398 | if (memcmp(s->fsid, fs_info->fsid, BTRFS_UUID_SIZE)) |
1365 | ++fail; | 1399 | ++fail_cor; |
1366 | 1400 | ||
1367 | len = BTRFS_SUPER_INFO_SIZE - BTRFS_CSUM_SIZE; | 1401 | len = BTRFS_SUPER_INFO_SIZE - BTRFS_CSUM_SIZE; |
1368 | mapped_size = PAGE_SIZE - BTRFS_CSUM_SIZE; | 1402 | mapped_size = PAGE_SIZE - BTRFS_CSUM_SIZE; |
@@ -1387,9 +1421,9 @@ static int scrub_checksum_super(struct scrub_block *sblock) | |||
1387 | 1421 | ||
1388 | btrfs_csum_final(crc, calculated_csum); | 1422 | btrfs_csum_final(crc, calculated_csum); |
1389 | if (memcmp(calculated_csum, on_disk_csum, sdev->csum_size)) | 1423 | if (memcmp(calculated_csum, on_disk_csum, sdev->csum_size)) |
1390 | ++fail; | 1424 | ++fail_cor; |
1391 | 1425 | ||
1392 | if (fail) { | 1426 | if (fail_cor + fail_gen) { |
1393 | /* | 1427 | /* |
1394 | * if we find an error in a super block, we just report it. | 1428 | * if we find an error in a super block, we just report it. |
1395 | * They will get written with the next transaction commit | 1429 | * They will get written with the next transaction commit |
@@ -1398,9 +1432,15 @@ static int scrub_checksum_super(struct scrub_block *sblock) | |||
1398 | spin_lock(&sdev->stat_lock); | 1432 | spin_lock(&sdev->stat_lock); |
1399 | ++sdev->stat.super_errors; | 1433 | ++sdev->stat.super_errors; |
1400 | spin_unlock(&sdev->stat_lock); | 1434 | spin_unlock(&sdev->stat_lock); |
1435 | if (fail_cor) | ||
1436 | btrfs_dev_stat_inc_and_print(sdev->dev, | ||
1437 | BTRFS_DEV_STAT_CORRUPTION_ERRS); | ||
1438 | else | ||
1439 | btrfs_dev_stat_inc_and_print(sdev->dev, | ||
1440 | BTRFS_DEV_STAT_GENERATION_ERRS); | ||
1401 | } | 1441 | } |
1402 | 1442 | ||
1403 | return fail; | 1443 | return fail_cor + fail_gen; |
1404 | } | 1444 | } |
1405 | 1445 | ||
1406 | static void scrub_block_get(struct scrub_block *sblock) | 1446 | static void scrub_block_get(struct scrub_block *sblock) |
@@ -1544,7 +1584,7 @@ static int scrub_pages(struct scrub_dev *sdev, u64 logical, u64 len, | |||
1544 | return -ENOMEM; | 1584 | return -ENOMEM; |
1545 | } | 1585 | } |
1546 | spage->sblock = sblock; | 1586 | spage->sblock = sblock; |
1547 | spage->bdev = sdev->dev->bdev; | 1587 | spage->dev = sdev->dev; |
1548 | spage->flags = flags; | 1588 | spage->flags = flags; |
1549 | spage->generation = gen; | 1589 | spage->generation = gen; |
1550 | spage->logical = logical; | 1590 | spage->logical = logical; |
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index c5f8fca4195f..96eb9fef7bd2 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c | |||
@@ -188,7 +188,8 @@ void btrfs_printk(struct btrfs_fs_info *fs_info, const char *fmt, ...) | |||
188 | va_start(args, fmt); | 188 | va_start(args, fmt); |
189 | 189 | ||
190 | if (fmt[0] == '<' && isdigit(fmt[1]) && fmt[2] == '>') { | 190 | if (fmt[0] == '<' && isdigit(fmt[1]) && fmt[2] == '>') { |
191 | strncpy(lvl, fmt, 3); | 191 | memcpy(lvl, fmt, 3); |
192 | lvl[3] = '\0'; | ||
192 | fmt += 3; | 193 | fmt += 3; |
193 | type = logtypes[fmt[1] - '0']; | 194 | type = logtypes[fmt[1] - '0']; |
194 | } else | 195 | } else |
@@ -435,11 +436,8 @@ int btrfs_parse_options(struct btrfs_root *root, char *options) | |||
435 | case Opt_thread_pool: | 436 | case Opt_thread_pool: |
436 | intarg = 0; | 437 | intarg = 0; |
437 | match_int(&args[0], &intarg); | 438 | match_int(&args[0], &intarg); |
438 | if (intarg) { | 439 | if (intarg) |
439 | info->thread_pool_size = intarg; | 440 | info->thread_pool_size = intarg; |
440 | printk(KERN_INFO "btrfs: thread pool %d\n", | ||
441 | info->thread_pool_size); | ||
442 | } | ||
443 | break; | 441 | break; |
444 | case Opt_max_inline: | 442 | case Opt_max_inline: |
445 | num = match_strdup(&args[0]); | 443 | num = match_strdup(&args[0]); |
@@ -769,7 +767,7 @@ static int btrfs_fill_super(struct super_block *sb, | |||
769 | #ifdef CONFIG_BTRFS_FS_POSIX_ACL | 767 | #ifdef CONFIG_BTRFS_FS_POSIX_ACL |
770 | sb->s_flags |= MS_POSIXACL; | 768 | sb->s_flags |= MS_POSIXACL; |
771 | #endif | 769 | #endif |
772 | 770 | sb->s_flags |= MS_I_VERSION; | |
773 | err = open_ctree(sb, fs_devices, (char *)data); | 771 | err = open_ctree(sb, fs_devices, (char *)data); |
774 | if (err) { | 772 | if (err) { |
775 | printk("btrfs: open_ctree failed\n"); | 773 | printk("btrfs: open_ctree failed\n"); |
@@ -925,63 +923,48 @@ static inline int is_subvolume_inode(struct inode *inode) | |||
925 | */ | 923 | */ |
926 | static char *setup_root_args(char *args) | 924 | static char *setup_root_args(char *args) |
927 | { | 925 | { |
928 | unsigned copied = 0; | 926 | unsigned len = strlen(args) + 2 + 1; |
929 | unsigned len = strlen(args) + 2; | 927 | char *src, *dst, *buf; |
930 | char *pos; | ||
931 | char *ret; | ||
932 | 928 | ||
933 | /* | 929 | /* |
934 | * We need the same args as before, but minus | 930 | * We need the same args as before, but with this substitution: |
935 | * | 931 | * s!subvol=[^,]+!subvolid=0! |
936 | * subvol=a | ||
937 | * | ||
938 | * and add | ||
939 | * | ||
940 | * subvolid=0 | ||
941 | * | 932 | * |
942 | * which is a difference of 2 characters, so we allocate strlen(args) + | 933 | * Since the replacement string is up to 2 bytes longer than the |
943 | * 2 characters. | 934 | * original, allocate strlen(args) + 2 + 1 bytes. |
944 | */ | 935 | */ |
945 | ret = kzalloc(len * sizeof(char), GFP_NOFS); | ||
946 | if (!ret) | ||
947 | return NULL; | ||
948 | pos = strstr(args, "subvol="); | ||
949 | 936 | ||
937 | src = strstr(args, "subvol="); | ||
950 | /* This shouldn't happen, but just in case.. */ | 938 | /* This shouldn't happen, but just in case.. */ |
951 | if (!pos) { | 939 | if (!src) |
952 | kfree(ret); | 940 | return NULL; |
941 | |||
942 | buf = dst = kmalloc(len, GFP_NOFS); | ||
943 | if (!buf) | ||
953 | return NULL; | 944 | return NULL; |
954 | } | ||
955 | 945 | ||
956 | /* | 946 | /* |
957 | * The subvol=<> arg is not at the front of the string, copy everybody | 947 | * If the subvol= arg is not at the start of the string, |
958 | * up to that into ret. | 948 | * copy whatever precedes it into buf. |
959 | */ | 949 | */ |
960 | if (pos != args) { | 950 | if (src != args) { |
961 | *pos = '\0'; | 951 | *src++ = '\0'; |
962 | strcpy(ret, args); | 952 | strcpy(buf, args); |
963 | copied += strlen(args); | 953 | dst += strlen(args); |
964 | pos++; | ||
965 | } | 954 | } |
966 | 955 | ||
967 | strncpy(ret + copied, "subvolid=0", len - copied); | 956 | strcpy(dst, "subvolid=0"); |
968 | 957 | dst += strlen("subvolid=0"); | |
969 | /* Length of subvolid=0 */ | ||
970 | copied += 10; | ||
971 | 958 | ||
972 | /* | 959 | /* |
973 | * If there is no , after the subvol= option then we know there's no | 960 | * If there is a "," after the original subvol=... string, |
974 | * other options and we can just return. | 961 | * copy that suffix into our buffer. Otherwise, we're done. |
975 | */ | 962 | */ |
976 | pos = strchr(pos, ','); | 963 | src = strchr(src, ','); |
977 | if (!pos) | 964 | if (src) |
978 | return ret; | 965 | strcpy(dst, src); |
979 | 966 | ||
980 | /* Copy the rest of the arguments into our buffer */ | 967 | return buf; |
981 | strncpy(ret + copied, pos, len - copied); | ||
982 | copied += strlen(pos); | ||
983 | |||
984 | return ret; | ||
985 | } | 968 | } |
986 | 969 | ||
987 | static struct dentry *mount_subvol(const char *subvol_name, int flags, | 970 | static struct dentry *mount_subvol(const char *subvol_name, int flags, |
@@ -1118,6 +1101,40 @@ error_fs_info: | |||
1118 | return ERR_PTR(error); | 1101 | return ERR_PTR(error); |
1119 | } | 1102 | } |
1120 | 1103 | ||
1104 | static void btrfs_set_max_workers(struct btrfs_workers *workers, int new_limit) | ||
1105 | { | ||
1106 | spin_lock_irq(&workers->lock); | ||
1107 | workers->max_workers = new_limit; | ||
1108 | spin_unlock_irq(&workers->lock); | ||
1109 | } | ||
1110 | |||
1111 | static void btrfs_resize_thread_pool(struct btrfs_fs_info *fs_info, | ||
1112 | int new_pool_size, int old_pool_size) | ||
1113 | { | ||
1114 | if (new_pool_size == old_pool_size) | ||
1115 | return; | ||
1116 | |||
1117 | fs_info->thread_pool_size = new_pool_size; | ||
1118 | |||
1119 | printk(KERN_INFO "btrfs: resize thread pool %d -> %d\n", | ||
1120 | old_pool_size, new_pool_size); | ||
1121 | |||
1122 | btrfs_set_max_workers(&fs_info->generic_worker, new_pool_size); | ||
1123 | btrfs_set_max_workers(&fs_info->workers, new_pool_size); | ||
1124 | btrfs_set_max_workers(&fs_info->delalloc_workers, new_pool_size); | ||
1125 | btrfs_set_max_workers(&fs_info->submit_workers, new_pool_size); | ||
1126 | btrfs_set_max_workers(&fs_info->caching_workers, new_pool_size); | ||
1127 | btrfs_set_max_workers(&fs_info->fixup_workers, new_pool_size); | ||
1128 | btrfs_set_max_workers(&fs_info->endio_workers, new_pool_size); | ||
1129 | btrfs_set_max_workers(&fs_info->endio_meta_workers, new_pool_size); | ||
1130 | btrfs_set_max_workers(&fs_info->endio_meta_write_workers, new_pool_size); | ||
1131 | btrfs_set_max_workers(&fs_info->endio_write_workers, new_pool_size); | ||
1132 | btrfs_set_max_workers(&fs_info->endio_freespace_worker, new_pool_size); | ||
1133 | btrfs_set_max_workers(&fs_info->delayed_workers, new_pool_size); | ||
1134 | btrfs_set_max_workers(&fs_info->readahead_workers, new_pool_size); | ||
1135 | btrfs_set_max_workers(&fs_info->scrub_workers, new_pool_size); | ||
1136 | } | ||
1137 | |||
1121 | static int btrfs_remount(struct super_block *sb, int *flags, char *data) | 1138 | static int btrfs_remount(struct super_block *sb, int *flags, char *data) |
1122 | { | 1139 | { |
1123 | struct btrfs_fs_info *fs_info = btrfs_sb(sb); | 1140 | struct btrfs_fs_info *fs_info = btrfs_sb(sb); |
@@ -1137,6 +1154,9 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data) | |||
1137 | goto restore; | 1154 | goto restore; |
1138 | } | 1155 | } |
1139 | 1156 | ||
1157 | btrfs_resize_thread_pool(fs_info, | ||
1158 | fs_info->thread_pool_size, old_thread_pool_size); | ||
1159 | |||
1140 | if ((*flags & MS_RDONLY) == (sb->s_flags & MS_RDONLY)) | 1160 | if ((*flags & MS_RDONLY) == (sb->s_flags & MS_RDONLY)) |
1141 | return 0; | 1161 | return 0; |
1142 | 1162 | ||
@@ -1180,7 +1200,8 @@ restore: | |||
1180 | fs_info->compress_type = old_compress_type; | 1200 | fs_info->compress_type = old_compress_type; |
1181 | fs_info->max_inline = old_max_inline; | 1201 | fs_info->max_inline = old_max_inline; |
1182 | fs_info->alloc_start = old_alloc_start; | 1202 | fs_info->alloc_start = old_alloc_start; |
1183 | fs_info->thread_pool_size = old_thread_pool_size; | 1203 | btrfs_resize_thread_pool(fs_info, |
1204 | old_thread_pool_size, fs_info->thread_pool_size); | ||
1184 | fs_info->metadata_ratio = old_metadata_ratio; | 1205 | fs_info->metadata_ratio = old_metadata_ratio; |
1185 | return ret; | 1206 | return ret; |
1186 | } | 1207 | } |
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 36422254ef67..1791c6e3d834 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c | |||
@@ -28,6 +28,7 @@ | |||
28 | #include "locking.h" | 28 | #include "locking.h" |
29 | #include "tree-log.h" | 29 | #include "tree-log.h" |
30 | #include "inode-map.h" | 30 | #include "inode-map.h" |
31 | #include "volumes.h" | ||
31 | 32 | ||
32 | #define BTRFS_ROOT_TRANS_TAG 0 | 33 | #define BTRFS_ROOT_TRANS_TAG 0 |
33 | 34 | ||
@@ -55,48 +56,49 @@ static noinline void switch_commit_root(struct btrfs_root *root) | |||
55 | static noinline int join_transaction(struct btrfs_root *root, int nofail) | 56 | static noinline int join_transaction(struct btrfs_root *root, int nofail) |
56 | { | 57 | { |
57 | struct btrfs_transaction *cur_trans; | 58 | struct btrfs_transaction *cur_trans; |
59 | struct btrfs_fs_info *fs_info = root->fs_info; | ||
58 | 60 | ||
59 | spin_lock(&root->fs_info->trans_lock); | 61 | spin_lock(&fs_info->trans_lock); |
60 | loop: | 62 | loop: |
61 | /* The file system has been taken offline. No new transactions. */ | 63 | /* The file system has been taken offline. No new transactions. */ |
62 | if (root->fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) { | 64 | if (fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) { |
63 | spin_unlock(&root->fs_info->trans_lock); | 65 | spin_unlock(&fs_info->trans_lock); |
64 | return -EROFS; | 66 | return -EROFS; |
65 | } | 67 | } |
66 | 68 | ||
67 | if (root->fs_info->trans_no_join) { | 69 | if (fs_info->trans_no_join) { |
68 | if (!nofail) { | 70 | if (!nofail) { |
69 | spin_unlock(&root->fs_info->trans_lock); | 71 | spin_unlock(&fs_info->trans_lock); |
70 | return -EBUSY; | 72 | return -EBUSY; |
71 | } | 73 | } |
72 | } | 74 | } |
73 | 75 | ||
74 | cur_trans = root->fs_info->running_transaction; | 76 | cur_trans = fs_info->running_transaction; |
75 | if (cur_trans) { | 77 | if (cur_trans) { |
76 | if (cur_trans->aborted) { | 78 | if (cur_trans->aborted) { |
77 | spin_unlock(&root->fs_info->trans_lock); | 79 | spin_unlock(&fs_info->trans_lock); |
78 | return cur_trans->aborted; | 80 | return cur_trans->aborted; |
79 | } | 81 | } |
80 | atomic_inc(&cur_trans->use_count); | 82 | atomic_inc(&cur_trans->use_count); |
81 | atomic_inc(&cur_trans->num_writers); | 83 | atomic_inc(&cur_trans->num_writers); |
82 | cur_trans->num_joined++; | 84 | cur_trans->num_joined++; |
83 | spin_unlock(&root->fs_info->trans_lock); | 85 | spin_unlock(&fs_info->trans_lock); |
84 | return 0; | 86 | return 0; |
85 | } | 87 | } |
86 | spin_unlock(&root->fs_info->trans_lock); | 88 | spin_unlock(&fs_info->trans_lock); |
87 | 89 | ||
88 | cur_trans = kmem_cache_alloc(btrfs_transaction_cachep, GFP_NOFS); | 90 | cur_trans = kmem_cache_alloc(btrfs_transaction_cachep, GFP_NOFS); |
89 | if (!cur_trans) | 91 | if (!cur_trans) |
90 | return -ENOMEM; | 92 | return -ENOMEM; |
91 | 93 | ||
92 | spin_lock(&root->fs_info->trans_lock); | 94 | spin_lock(&fs_info->trans_lock); |
93 | if (root->fs_info->running_transaction) { | 95 | if (fs_info->running_transaction) { |
94 | /* | 96 | /* |
95 | * someone started a transaction after we unlocked. Make sure | 97 | * someone started a transaction after we unlocked. Make sure |
96 | * to redo the trans_no_join checks above | 98 | * to redo the trans_no_join checks above |
97 | */ | 99 | */ |
98 | kmem_cache_free(btrfs_transaction_cachep, cur_trans); | 100 | kmem_cache_free(btrfs_transaction_cachep, cur_trans); |
99 | cur_trans = root->fs_info->running_transaction; | 101 | cur_trans = fs_info->running_transaction; |
100 | goto loop; | 102 | goto loop; |
101 | } | 103 | } |
102 | 104 | ||
@@ -121,20 +123,38 @@ loop: | |||
121 | cur_trans->delayed_refs.flushing = 0; | 123 | cur_trans->delayed_refs.flushing = 0; |
122 | cur_trans->delayed_refs.run_delayed_start = 0; | 124 | cur_trans->delayed_refs.run_delayed_start = 0; |
123 | cur_trans->delayed_refs.seq = 1; | 125 | cur_trans->delayed_refs.seq = 1; |
126 | |||
127 | /* | ||
128 | * although the tree mod log is per file system and not per transaction, | ||
129 | * the log must never go across transaction boundaries. | ||
130 | */ | ||
131 | smp_mb(); | ||
132 | if (!list_empty(&fs_info->tree_mod_seq_list)) { | ||
133 | printk(KERN_ERR "btrfs: tree_mod_seq_list not empty when " | ||
134 | "creating a fresh transaction\n"); | ||
135 | WARN_ON(1); | ||
136 | } | ||
137 | if (!RB_EMPTY_ROOT(&fs_info->tree_mod_log)) { | ||
138 | printk(KERN_ERR "btrfs: tree_mod_log rb tree not empty when " | ||
139 | "creating a fresh transaction\n"); | ||
140 | WARN_ON(1); | ||
141 | } | ||
142 | atomic_set(&fs_info->tree_mod_seq, 0); | ||
143 | |||
124 | init_waitqueue_head(&cur_trans->delayed_refs.seq_wait); | 144 | init_waitqueue_head(&cur_trans->delayed_refs.seq_wait); |
125 | spin_lock_init(&cur_trans->commit_lock); | 145 | spin_lock_init(&cur_trans->commit_lock); |
126 | spin_lock_init(&cur_trans->delayed_refs.lock); | 146 | spin_lock_init(&cur_trans->delayed_refs.lock); |
127 | INIT_LIST_HEAD(&cur_trans->delayed_refs.seq_head); | 147 | INIT_LIST_HEAD(&cur_trans->delayed_refs.seq_head); |
128 | 148 | ||
129 | INIT_LIST_HEAD(&cur_trans->pending_snapshots); | 149 | INIT_LIST_HEAD(&cur_trans->pending_snapshots); |
130 | list_add_tail(&cur_trans->list, &root->fs_info->trans_list); | 150 | list_add_tail(&cur_trans->list, &fs_info->trans_list); |
131 | extent_io_tree_init(&cur_trans->dirty_pages, | 151 | extent_io_tree_init(&cur_trans->dirty_pages, |
132 | root->fs_info->btree_inode->i_mapping); | 152 | fs_info->btree_inode->i_mapping); |
133 | root->fs_info->generation++; | 153 | fs_info->generation++; |
134 | cur_trans->transid = root->fs_info->generation; | 154 | cur_trans->transid = fs_info->generation; |
135 | root->fs_info->running_transaction = cur_trans; | 155 | fs_info->running_transaction = cur_trans; |
136 | cur_trans->aborted = 0; | 156 | cur_trans->aborted = 0; |
137 | spin_unlock(&root->fs_info->trans_lock); | 157 | spin_unlock(&fs_info->trans_lock); |
138 | 158 | ||
139 | return 0; | 159 | return 0; |
140 | } | 160 | } |
@@ -758,6 +778,9 @@ static noinline int commit_cowonly_roots(struct btrfs_trans_handle *trans, | |||
758 | if (ret) | 778 | if (ret) |
759 | return ret; | 779 | return ret; |
760 | 780 | ||
781 | ret = btrfs_run_dev_stats(trans, root->fs_info); | ||
782 | BUG_ON(ret); | ||
783 | |||
761 | while (!list_empty(&fs_info->dirty_cowonly_roots)) { | 784 | while (!list_empty(&fs_info->dirty_cowonly_roots)) { |
762 | next = fs_info->dirty_cowonly_roots.next; | 785 | next = fs_info->dirty_cowonly_roots.next; |
763 | list_del_init(next); | 786 | list_del_init(next); |
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index d017283ae6f5..2017d0ff511c 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c | |||
@@ -279,7 +279,7 @@ static int process_one_buffer(struct btrfs_root *log, | |||
279 | log->fs_info->extent_root, | 279 | log->fs_info->extent_root, |
280 | eb->start, eb->len); | 280 | eb->start, eb->len); |
281 | 281 | ||
282 | if (btrfs_buffer_uptodate(eb, gen)) { | 282 | if (btrfs_buffer_uptodate(eb, gen, 0)) { |
283 | if (wc->write) | 283 | if (wc->write) |
284 | btrfs_write_tree_block(eb); | 284 | btrfs_write_tree_block(eb); |
285 | if (wc->wait) | 285 | if (wc->wait) |
@@ -1628,7 +1628,9 @@ static int replay_one_buffer(struct btrfs_root *log, struct extent_buffer *eb, | |||
1628 | int i; | 1628 | int i; |
1629 | int ret; | 1629 | int ret; |
1630 | 1630 | ||
1631 | btrfs_read_buffer(eb, gen); | 1631 | ret = btrfs_read_buffer(eb, gen); |
1632 | if (ret) | ||
1633 | return ret; | ||
1632 | 1634 | ||
1633 | level = btrfs_header_level(eb); | 1635 | level = btrfs_header_level(eb); |
1634 | 1636 | ||
@@ -1749,7 +1751,11 @@ static noinline int walk_down_log_tree(struct btrfs_trans_handle *trans, | |||
1749 | 1751 | ||
1750 | path->slots[*level]++; | 1752 | path->slots[*level]++; |
1751 | if (wc->free) { | 1753 | if (wc->free) { |
1752 | btrfs_read_buffer(next, ptr_gen); | 1754 | ret = btrfs_read_buffer(next, ptr_gen); |
1755 | if (ret) { | ||
1756 | free_extent_buffer(next); | ||
1757 | return ret; | ||
1758 | } | ||
1753 | 1759 | ||
1754 | btrfs_tree_lock(next); | 1760 | btrfs_tree_lock(next); |
1755 | btrfs_set_lock_blocking(next); | 1761 | btrfs_set_lock_blocking(next); |
@@ -1766,7 +1772,11 @@ static noinline int walk_down_log_tree(struct btrfs_trans_handle *trans, | |||
1766 | free_extent_buffer(next); | 1772 | free_extent_buffer(next); |
1767 | continue; | 1773 | continue; |
1768 | } | 1774 | } |
1769 | btrfs_read_buffer(next, ptr_gen); | 1775 | ret = btrfs_read_buffer(next, ptr_gen); |
1776 | if (ret) { | ||
1777 | free_extent_buffer(next); | ||
1778 | return ret; | ||
1779 | } | ||
1770 | 1780 | ||
1771 | WARN_ON(*level <= 0); | 1781 | WARN_ON(*level <= 0); |
1772 | if (path->nodes[*level-1]) | 1782 | if (path->nodes[*level-1]) |
@@ -2657,6 +2667,8 @@ static int drop_objectid_items(struct btrfs_trans_handle *trans, | |||
2657 | btrfs_release_path(path); | 2667 | btrfs_release_path(path); |
2658 | } | 2668 | } |
2659 | btrfs_release_path(path); | 2669 | btrfs_release_path(path); |
2670 | if (ret > 0) | ||
2671 | ret = 0; | ||
2660 | return ret; | 2672 | return ret; |
2661 | } | 2673 | } |
2662 | 2674 | ||
@@ -3028,21 +3040,6 @@ out: | |||
3028 | return ret; | 3040 | return ret; |
3029 | } | 3041 | } |
3030 | 3042 | ||
3031 | static int inode_in_log(struct btrfs_trans_handle *trans, | ||
3032 | struct inode *inode) | ||
3033 | { | ||
3034 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
3035 | int ret = 0; | ||
3036 | |||
3037 | mutex_lock(&root->log_mutex); | ||
3038 | if (BTRFS_I(inode)->logged_trans == trans->transid && | ||
3039 | BTRFS_I(inode)->last_sub_trans <= root->last_log_commit) | ||
3040 | ret = 1; | ||
3041 | mutex_unlock(&root->log_mutex); | ||
3042 | return ret; | ||
3043 | } | ||
3044 | |||
3045 | |||
3046 | /* | 3043 | /* |
3047 | * helper function around btrfs_log_inode to make sure newly created | 3044 | * helper function around btrfs_log_inode to make sure newly created |
3048 | * parent directories also end up in the log. A minimal inode and backref | 3045 | * parent directories also end up in the log. A minimal inode and backref |
@@ -3083,7 +3080,7 @@ int btrfs_log_inode_parent(struct btrfs_trans_handle *trans, | |||
3083 | if (ret) | 3080 | if (ret) |
3084 | goto end_no_trans; | 3081 | goto end_no_trans; |
3085 | 3082 | ||
3086 | if (inode_in_log(trans, inode)) { | 3083 | if (btrfs_inode_in_log(inode, trans->transid)) { |
3087 | ret = BTRFS_NO_LOG_SYNC; | 3084 | ret = BTRFS_NO_LOG_SYNC; |
3088 | goto end_no_trans; | 3085 | goto end_no_trans; |
3089 | } | 3086 | } |
diff --git a/fs/btrfs/ulist.c b/fs/btrfs/ulist.c index 12f5147bd2b1..ab942f46b3dd 100644 --- a/fs/btrfs/ulist.c +++ b/fs/btrfs/ulist.c | |||
@@ -23,9 +23,9 @@ | |||
23 | * | 23 | * |
24 | * ulist = ulist_alloc(); | 24 | * ulist = ulist_alloc(); |
25 | * ulist_add(ulist, root); | 25 | * ulist_add(ulist, root); |
26 | * elem = NULL; | 26 | * ULIST_ITER_INIT(&uiter); |
27 | * | 27 | * |
28 | * while ((elem = ulist_next(ulist, elem)) { | 28 | * while ((elem = ulist_next(ulist, &uiter)) { |
29 | * for (all child nodes n in elem) | 29 | * for (all child nodes n in elem) |
30 | * ulist_add(ulist, n); | 30 | * ulist_add(ulist, n); |
31 | * do something useful with the node; | 31 | * do something useful with the node; |
@@ -95,7 +95,7 @@ EXPORT_SYMBOL(ulist_reinit); | |||
95 | * | 95 | * |
96 | * The allocated ulist will be returned in an initialized state. | 96 | * The allocated ulist will be returned in an initialized state. |
97 | */ | 97 | */ |
98 | struct ulist *ulist_alloc(unsigned long gfp_mask) | 98 | struct ulist *ulist_alloc(gfp_t gfp_mask) |
99 | { | 99 | { |
100 | struct ulist *ulist = kmalloc(sizeof(*ulist), gfp_mask); | 100 | struct ulist *ulist = kmalloc(sizeof(*ulist), gfp_mask); |
101 | 101 | ||
@@ -144,13 +144,22 @@ EXPORT_SYMBOL(ulist_free); | |||
144 | * unaltered. | 144 | * unaltered. |
145 | */ | 145 | */ |
146 | int ulist_add(struct ulist *ulist, u64 val, unsigned long aux, | 146 | int ulist_add(struct ulist *ulist, u64 val, unsigned long aux, |
147 | unsigned long gfp_mask) | 147 | gfp_t gfp_mask) |
148 | { | ||
149 | return ulist_add_merge(ulist, val, aux, NULL, gfp_mask); | ||
150 | } | ||
151 | |||
152 | int ulist_add_merge(struct ulist *ulist, u64 val, unsigned long aux, | ||
153 | unsigned long *old_aux, gfp_t gfp_mask) | ||
148 | { | 154 | { |
149 | int i; | 155 | int i; |
150 | 156 | ||
151 | for (i = 0; i < ulist->nnodes; ++i) { | 157 | for (i = 0; i < ulist->nnodes; ++i) { |
152 | if (ulist->nodes[i].val == val) | 158 | if (ulist->nodes[i].val == val) { |
159 | if (old_aux) | ||
160 | *old_aux = ulist->nodes[i].aux; | ||
153 | return 0; | 161 | return 0; |
162 | } | ||
154 | } | 163 | } |
155 | 164 | ||
156 | if (ulist->nnodes >= ulist->nodes_alloced) { | 165 | if (ulist->nnodes >= ulist->nodes_alloced) { |
@@ -188,33 +197,26 @@ EXPORT_SYMBOL(ulist_add); | |||
188 | /** | 197 | /** |
189 | * ulist_next - iterate ulist | 198 | * ulist_next - iterate ulist |
190 | * @ulist: ulist to iterate | 199 | * @ulist: ulist to iterate |
191 | * @prev: previously returned element or %NULL to start iteration | 200 | * @uiter: iterator variable, initialized with ULIST_ITER_INIT(&iterator) |
192 | * | 201 | * |
193 | * Note: locking must be provided by the caller. In case of rwlocks only read | 202 | * Note: locking must be provided by the caller. In case of rwlocks only read |
194 | * locking is needed | 203 | * locking is needed |
195 | * | 204 | * |
196 | * This function is used to iterate an ulist. The iteration is started with | 205 | * This function is used to iterate an ulist. |
197 | * @prev = %NULL. It returns the next element from the ulist or %NULL when the | 206 | * It returns the next element from the ulist or %NULL when the |
198 | * end is reached. No guarantee is made with respect to the order in which | 207 | * end is reached. No guarantee is made with respect to the order in which |
199 | * the elements are returned. They might neither be returned in order of | 208 | * the elements are returned. They might neither be returned in order of |
200 | * addition nor in ascending order. | 209 | * addition nor in ascending order. |
201 | * It is allowed to call ulist_add during an enumeration. Newly added items | 210 | * It is allowed to call ulist_add during an enumeration. Newly added items |
202 | * are guaranteed to show up in the running enumeration. | 211 | * are guaranteed to show up in the running enumeration. |
203 | */ | 212 | */ |
204 | struct ulist_node *ulist_next(struct ulist *ulist, struct ulist_node *prev) | 213 | struct ulist_node *ulist_next(struct ulist *ulist, struct ulist_iterator *uiter) |
205 | { | 214 | { |
206 | int next; | ||
207 | |||
208 | if (ulist->nnodes == 0) | 215 | if (ulist->nnodes == 0) |
209 | return NULL; | 216 | return NULL; |
210 | 217 | if (uiter->i < 0 || uiter->i >= ulist->nnodes) | |
211 | if (!prev) | ||
212 | return &ulist->nodes[0]; | ||
213 | |||
214 | next = (prev - ulist->nodes) + 1; | ||
215 | if (next < 0 || next >= ulist->nnodes) | ||
216 | return NULL; | 218 | return NULL; |
217 | 219 | ||
218 | return &ulist->nodes[next]; | 220 | return &ulist->nodes[uiter->i++]; |
219 | } | 221 | } |
220 | EXPORT_SYMBOL(ulist_next); | 222 | EXPORT_SYMBOL(ulist_next); |
diff --git a/fs/btrfs/ulist.h b/fs/btrfs/ulist.h index 2e25dec58ec0..21bdc8ec8130 100644 --- a/fs/btrfs/ulist.h +++ b/fs/btrfs/ulist.h | |||
@@ -24,6 +24,10 @@ | |||
24 | */ | 24 | */ |
25 | #define ULIST_SIZE 16 | 25 | #define ULIST_SIZE 16 |
26 | 26 | ||
27 | struct ulist_iterator { | ||
28 | int i; | ||
29 | }; | ||
30 | |||
27 | /* | 31 | /* |
28 | * element of the list | 32 | * element of the list |
29 | */ | 33 | */ |
@@ -59,10 +63,15 @@ struct ulist { | |||
59 | void ulist_init(struct ulist *ulist); | 63 | void ulist_init(struct ulist *ulist); |
60 | void ulist_fini(struct ulist *ulist); | 64 | void ulist_fini(struct ulist *ulist); |
61 | void ulist_reinit(struct ulist *ulist); | 65 | void ulist_reinit(struct ulist *ulist); |
62 | struct ulist *ulist_alloc(unsigned long gfp_mask); | 66 | struct ulist *ulist_alloc(gfp_t gfp_mask); |
63 | void ulist_free(struct ulist *ulist); | 67 | void ulist_free(struct ulist *ulist); |
64 | int ulist_add(struct ulist *ulist, u64 val, unsigned long aux, | 68 | int ulist_add(struct ulist *ulist, u64 val, unsigned long aux, |
65 | unsigned long gfp_mask); | 69 | gfp_t gfp_mask); |
66 | struct ulist_node *ulist_next(struct ulist *ulist, struct ulist_node *prev); | 70 | int ulist_add_merge(struct ulist *ulist, u64 val, unsigned long aux, |
71 | unsigned long *old_aux, gfp_t gfp_mask); | ||
72 | struct ulist_node *ulist_next(struct ulist *ulist, | ||
73 | struct ulist_iterator *uiter); | ||
74 | |||
75 | #define ULIST_ITER_INIT(uiter) ((uiter)->i = 0) | ||
67 | 76 | ||
68 | #endif | 77 | #endif |
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 1411b99555a4..7782020996fe 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c | |||
@@ -23,6 +23,7 @@ | |||
23 | #include <linux/random.h> | 23 | #include <linux/random.h> |
24 | #include <linux/iocontext.h> | 24 | #include <linux/iocontext.h> |
25 | #include <linux/capability.h> | 25 | #include <linux/capability.h> |
26 | #include <linux/ratelimit.h> | ||
26 | #include <linux/kthread.h> | 27 | #include <linux/kthread.h> |
27 | #include <asm/div64.h> | 28 | #include <asm/div64.h> |
28 | #include "compat.h" | 29 | #include "compat.h" |
@@ -39,6 +40,8 @@ static int init_first_rw_device(struct btrfs_trans_handle *trans, | |||
39 | struct btrfs_root *root, | 40 | struct btrfs_root *root, |
40 | struct btrfs_device *device); | 41 | struct btrfs_device *device); |
41 | static int btrfs_relocate_sys_chunks(struct btrfs_root *root); | 42 | static int btrfs_relocate_sys_chunks(struct btrfs_root *root); |
43 | static void __btrfs_reset_dev_stats(struct btrfs_device *dev); | ||
44 | static void btrfs_dev_stat_print_on_load(struct btrfs_device *device); | ||
42 | 45 | ||
43 | static DEFINE_MUTEX(uuid_mutex); | 46 | static DEFINE_MUTEX(uuid_mutex); |
44 | static LIST_HEAD(fs_uuids); | 47 | static LIST_HEAD(fs_uuids); |
@@ -361,6 +364,7 @@ static noinline int device_list_add(const char *path, | |||
361 | return -ENOMEM; | 364 | return -ENOMEM; |
362 | } | 365 | } |
363 | device->devid = devid; | 366 | device->devid = devid; |
367 | device->dev_stats_valid = 0; | ||
364 | device->work.func = pending_bios_fn; | 368 | device->work.func = pending_bios_fn; |
365 | memcpy(device->uuid, disk_super->dev_item.uuid, | 369 | memcpy(device->uuid, disk_super->dev_item.uuid, |
366 | BTRFS_UUID_SIZE); | 370 | BTRFS_UUID_SIZE); |
@@ -1633,7 +1637,7 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path) | |||
1633 | int ret = 0; | 1637 | int ret = 0; |
1634 | 1638 | ||
1635 | if ((sb->s_flags & MS_RDONLY) && !root->fs_info->fs_devices->seeding) | 1639 | if ((sb->s_flags & MS_RDONLY) && !root->fs_info->fs_devices->seeding) |
1636 | return -EINVAL; | 1640 | return -EROFS; |
1637 | 1641 | ||
1638 | bdev = blkdev_get_by_path(device_path, FMODE_WRITE | FMODE_EXCL, | 1642 | bdev = blkdev_get_by_path(device_path, FMODE_WRITE | FMODE_EXCL, |
1639 | root->fs_info->bdev_holder); | 1643 | root->fs_info->bdev_holder); |
@@ -4001,13 +4005,58 @@ int btrfs_rmap_block(struct btrfs_mapping_tree *map_tree, | |||
4001 | return 0; | 4005 | return 0; |
4002 | } | 4006 | } |
4003 | 4007 | ||
4008 | static void *merge_stripe_index_into_bio_private(void *bi_private, | ||
4009 | unsigned int stripe_index) | ||
4010 | { | ||
4011 | /* | ||
4012 | * with single, dup, RAID0, RAID1 and RAID10, stripe_index is | ||
4013 | * at most 1. | ||
4014 | * The alternative solution (instead of stealing bits from the | ||
4015 | * pointer) would be to allocate an intermediate structure | ||
4016 | * that contains the old private pointer plus the stripe_index. | ||
4017 | */ | ||
4018 | BUG_ON((((uintptr_t)bi_private) & 3) != 0); | ||
4019 | BUG_ON(stripe_index > 3); | ||
4020 | return (void *)(((uintptr_t)bi_private) | stripe_index); | ||
4021 | } | ||
4022 | |||
4023 | static struct btrfs_bio *extract_bbio_from_bio_private(void *bi_private) | ||
4024 | { | ||
4025 | return (struct btrfs_bio *)(((uintptr_t)bi_private) & ~((uintptr_t)3)); | ||
4026 | } | ||
4027 | |||
4028 | static unsigned int extract_stripe_index_from_bio_private(void *bi_private) | ||
4029 | { | ||
4030 | return (unsigned int)((uintptr_t)bi_private) & 3; | ||
4031 | } | ||
4032 | |||
4004 | static void btrfs_end_bio(struct bio *bio, int err) | 4033 | static void btrfs_end_bio(struct bio *bio, int err) |
4005 | { | 4034 | { |
4006 | struct btrfs_bio *bbio = bio->bi_private; | 4035 | struct btrfs_bio *bbio = extract_bbio_from_bio_private(bio->bi_private); |
4007 | int is_orig_bio = 0; | 4036 | int is_orig_bio = 0; |
4008 | 4037 | ||
4009 | if (err) | 4038 | if (err) { |
4010 | atomic_inc(&bbio->error); | 4039 | atomic_inc(&bbio->error); |
4040 | if (err == -EIO || err == -EREMOTEIO) { | ||
4041 | unsigned int stripe_index = | ||
4042 | extract_stripe_index_from_bio_private( | ||
4043 | bio->bi_private); | ||
4044 | struct btrfs_device *dev; | ||
4045 | |||
4046 | BUG_ON(stripe_index >= bbio->num_stripes); | ||
4047 | dev = bbio->stripes[stripe_index].dev; | ||
4048 | if (bio->bi_rw & WRITE) | ||
4049 | btrfs_dev_stat_inc(dev, | ||
4050 | BTRFS_DEV_STAT_WRITE_ERRS); | ||
4051 | else | ||
4052 | btrfs_dev_stat_inc(dev, | ||
4053 | BTRFS_DEV_STAT_READ_ERRS); | ||
4054 | if ((bio->bi_rw & WRITE_FLUSH) == WRITE_FLUSH) | ||
4055 | btrfs_dev_stat_inc(dev, | ||
4056 | BTRFS_DEV_STAT_FLUSH_ERRS); | ||
4057 | btrfs_dev_stat_print_on_error(dev); | ||
4058 | } | ||
4059 | } | ||
4011 | 4060 | ||
4012 | if (bio == bbio->orig_bio) | 4061 | if (bio == bbio->orig_bio) |
4013 | is_orig_bio = 1; | 4062 | is_orig_bio = 1; |
@@ -4149,6 +4198,8 @@ int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio, | |||
4149 | bio = first_bio; | 4198 | bio = first_bio; |
4150 | } | 4199 | } |
4151 | bio->bi_private = bbio; | 4200 | bio->bi_private = bbio; |
4201 | bio->bi_private = merge_stripe_index_into_bio_private( | ||
4202 | bio->bi_private, (unsigned int)dev_nr); | ||
4152 | bio->bi_end_io = btrfs_end_bio; | 4203 | bio->bi_end_io = btrfs_end_bio; |
4153 | bio->bi_sector = bbio->stripes[dev_nr].physical >> 9; | 4204 | bio->bi_sector = bbio->stripes[dev_nr].physical >> 9; |
4154 | dev = bbio->stripes[dev_nr].dev; | 4205 | dev = bbio->stripes[dev_nr].dev; |
@@ -4509,6 +4560,28 @@ int btrfs_read_sys_array(struct btrfs_root *root) | |||
4509 | return ret; | 4560 | return ret; |
4510 | } | 4561 | } |
4511 | 4562 | ||
4563 | struct btrfs_device *btrfs_find_device_for_logical(struct btrfs_root *root, | ||
4564 | u64 logical, int mirror_num) | ||
4565 | { | ||
4566 | struct btrfs_mapping_tree *map_tree = &root->fs_info->mapping_tree; | ||
4567 | int ret; | ||
4568 | u64 map_length = 0; | ||
4569 | struct btrfs_bio *bbio = NULL; | ||
4570 | struct btrfs_device *device; | ||
4571 | |||
4572 | BUG_ON(mirror_num == 0); | ||
4573 | ret = btrfs_map_block(map_tree, WRITE, logical, &map_length, &bbio, | ||
4574 | mirror_num); | ||
4575 | if (ret) { | ||
4576 | BUG_ON(bbio != NULL); | ||
4577 | return NULL; | ||
4578 | } | ||
4579 | BUG_ON(mirror_num != bbio->mirror_num); | ||
4580 | device = bbio->stripes[mirror_num - 1].dev; | ||
4581 | kfree(bbio); | ||
4582 | return device; | ||
4583 | } | ||
4584 | |||
4512 | int btrfs_read_chunk_tree(struct btrfs_root *root) | 4585 | int btrfs_read_chunk_tree(struct btrfs_root *root) |
4513 | { | 4586 | { |
4514 | struct btrfs_path *path; | 4587 | struct btrfs_path *path; |
@@ -4583,3 +4656,230 @@ error: | |||
4583 | btrfs_free_path(path); | 4656 | btrfs_free_path(path); |
4584 | return ret; | 4657 | return ret; |
4585 | } | 4658 | } |
4659 | |||
4660 | static void __btrfs_reset_dev_stats(struct btrfs_device *dev) | ||
4661 | { | ||
4662 | int i; | ||
4663 | |||
4664 | for (i = 0; i < BTRFS_DEV_STAT_VALUES_MAX; i++) | ||
4665 | btrfs_dev_stat_reset(dev, i); | ||
4666 | } | ||
4667 | |||
4668 | int btrfs_init_dev_stats(struct btrfs_fs_info *fs_info) | ||
4669 | { | ||
4670 | struct btrfs_key key; | ||
4671 | struct btrfs_key found_key; | ||
4672 | struct btrfs_root *dev_root = fs_info->dev_root; | ||
4673 | struct btrfs_fs_devices *fs_devices = fs_info->fs_devices; | ||
4674 | struct extent_buffer *eb; | ||
4675 | int slot; | ||
4676 | int ret = 0; | ||
4677 | struct btrfs_device *device; | ||
4678 | struct btrfs_path *path = NULL; | ||
4679 | int i; | ||
4680 | |||
4681 | path = btrfs_alloc_path(); | ||
4682 | if (!path) { | ||
4683 | ret = -ENOMEM; | ||
4684 | goto out; | ||
4685 | } | ||
4686 | |||
4687 | mutex_lock(&fs_devices->device_list_mutex); | ||
4688 | list_for_each_entry(device, &fs_devices->devices, dev_list) { | ||
4689 | int item_size; | ||
4690 | struct btrfs_dev_stats_item *ptr; | ||
4691 | |||
4692 | key.objectid = 0; | ||
4693 | key.type = BTRFS_DEV_STATS_KEY; | ||
4694 | key.offset = device->devid; | ||
4695 | ret = btrfs_search_slot(NULL, dev_root, &key, path, 0, 0); | ||
4696 | if (ret) { | ||
4697 | printk(KERN_WARNING "btrfs: no dev_stats entry found for device %s (devid %llu) (OK on first mount after mkfs)\n", | ||
4698 | device->name, (unsigned long long)device->devid); | ||
4699 | __btrfs_reset_dev_stats(device); | ||
4700 | device->dev_stats_valid = 1; | ||
4701 | btrfs_release_path(path); | ||
4702 | continue; | ||
4703 | } | ||
4704 | slot = path->slots[0]; | ||
4705 | eb = path->nodes[0]; | ||
4706 | btrfs_item_key_to_cpu(eb, &found_key, slot); | ||
4707 | item_size = btrfs_item_size_nr(eb, slot); | ||
4708 | |||
4709 | ptr = btrfs_item_ptr(eb, slot, | ||
4710 | struct btrfs_dev_stats_item); | ||
4711 | |||
4712 | for (i = 0; i < BTRFS_DEV_STAT_VALUES_MAX; i++) { | ||
4713 | if (item_size >= (1 + i) * sizeof(__le64)) | ||
4714 | btrfs_dev_stat_set(device, i, | ||
4715 | btrfs_dev_stats_value(eb, ptr, i)); | ||
4716 | else | ||
4717 | btrfs_dev_stat_reset(device, i); | ||
4718 | } | ||
4719 | |||
4720 | device->dev_stats_valid = 1; | ||
4721 | btrfs_dev_stat_print_on_load(device); | ||
4722 | btrfs_release_path(path); | ||
4723 | } | ||
4724 | mutex_unlock(&fs_devices->device_list_mutex); | ||
4725 | |||
4726 | out: | ||
4727 | btrfs_free_path(path); | ||
4728 | return ret < 0 ? ret : 0; | ||
4729 | } | ||
4730 | |||
4731 | static int update_dev_stat_item(struct btrfs_trans_handle *trans, | ||
4732 | struct btrfs_root *dev_root, | ||
4733 | struct btrfs_device *device) | ||
4734 | { | ||
4735 | struct btrfs_path *path; | ||
4736 | struct btrfs_key key; | ||
4737 | struct extent_buffer *eb; | ||
4738 | struct btrfs_dev_stats_item *ptr; | ||
4739 | int ret; | ||
4740 | int i; | ||
4741 | |||
4742 | key.objectid = 0; | ||
4743 | key.type = BTRFS_DEV_STATS_KEY; | ||
4744 | key.offset = device->devid; | ||
4745 | |||
4746 | path = btrfs_alloc_path(); | ||
4747 | BUG_ON(!path); | ||
4748 | ret = btrfs_search_slot(trans, dev_root, &key, path, -1, 1); | ||
4749 | if (ret < 0) { | ||
4750 | printk(KERN_WARNING "btrfs: error %d while searching for dev_stats item for device %s!\n", | ||
4751 | ret, device->name); | ||
4752 | goto out; | ||
4753 | } | ||
4754 | |||
4755 | if (ret == 0 && | ||
4756 | btrfs_item_size_nr(path->nodes[0], path->slots[0]) < sizeof(*ptr)) { | ||
4757 | /* need to delete old one and insert a new one */ | ||
4758 | ret = btrfs_del_item(trans, dev_root, path); | ||
4759 | if (ret != 0) { | ||
4760 | printk(KERN_WARNING "btrfs: delete too small dev_stats item for device %s failed %d!\n", | ||
4761 | device->name, ret); | ||
4762 | goto out; | ||
4763 | } | ||
4764 | ret = 1; | ||
4765 | } | ||
4766 | |||
4767 | if (ret == 1) { | ||
4768 | /* need to insert a new item */ | ||
4769 | btrfs_release_path(path); | ||
4770 | ret = btrfs_insert_empty_item(trans, dev_root, path, | ||
4771 | &key, sizeof(*ptr)); | ||
4772 | if (ret < 0) { | ||
4773 | printk(KERN_WARNING "btrfs: insert dev_stats item for device %s failed %d!\n", | ||
4774 | device->name, ret); | ||
4775 | goto out; | ||
4776 | } | ||
4777 | } | ||
4778 | |||
4779 | eb = path->nodes[0]; | ||
4780 | ptr = btrfs_item_ptr(eb, path->slots[0], struct btrfs_dev_stats_item); | ||
4781 | for (i = 0; i < BTRFS_DEV_STAT_VALUES_MAX; i++) | ||
4782 | btrfs_set_dev_stats_value(eb, ptr, i, | ||
4783 | btrfs_dev_stat_read(device, i)); | ||
4784 | btrfs_mark_buffer_dirty(eb); | ||
4785 | |||
4786 | out: | ||
4787 | btrfs_free_path(path); | ||
4788 | return ret; | ||
4789 | } | ||
4790 | |||
4791 | /* | ||
4792 | * called from commit_transaction. Writes all changed device stats to disk. | ||
4793 | */ | ||
4794 | int btrfs_run_dev_stats(struct btrfs_trans_handle *trans, | ||
4795 | struct btrfs_fs_info *fs_info) | ||
4796 | { | ||
4797 | struct btrfs_root *dev_root = fs_info->dev_root; | ||
4798 | struct btrfs_fs_devices *fs_devices = fs_info->fs_devices; | ||
4799 | struct btrfs_device *device; | ||
4800 | int ret = 0; | ||
4801 | |||
4802 | mutex_lock(&fs_devices->device_list_mutex); | ||
4803 | list_for_each_entry(device, &fs_devices->devices, dev_list) { | ||
4804 | if (!device->dev_stats_valid || !device->dev_stats_dirty) | ||
4805 | continue; | ||
4806 | |||
4807 | ret = update_dev_stat_item(trans, dev_root, device); | ||
4808 | if (!ret) | ||
4809 | device->dev_stats_dirty = 0; | ||
4810 | } | ||
4811 | mutex_unlock(&fs_devices->device_list_mutex); | ||
4812 | |||
4813 | return ret; | ||
4814 | } | ||
4815 | |||
4816 | void btrfs_dev_stat_inc_and_print(struct btrfs_device *dev, int index) | ||
4817 | { | ||
4818 | btrfs_dev_stat_inc(dev, index); | ||
4819 | btrfs_dev_stat_print_on_error(dev); | ||
4820 | } | ||
4821 | |||
4822 | void btrfs_dev_stat_print_on_error(struct btrfs_device *dev) | ||
4823 | { | ||
4824 | if (!dev->dev_stats_valid) | ||
4825 | return; | ||
4826 | printk_ratelimited(KERN_ERR | ||
4827 | "btrfs: bdev %s errs: wr %u, rd %u, flush %u, corrupt %u, gen %u\n", | ||
4828 | dev->name, | ||
4829 | btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_WRITE_ERRS), | ||
4830 | btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_READ_ERRS), | ||
4831 | btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_FLUSH_ERRS), | ||
4832 | btrfs_dev_stat_read(dev, | ||
4833 | BTRFS_DEV_STAT_CORRUPTION_ERRS), | ||
4834 | btrfs_dev_stat_read(dev, | ||
4835 | BTRFS_DEV_STAT_GENERATION_ERRS)); | ||
4836 | } | ||
4837 | |||
4838 | static void btrfs_dev_stat_print_on_load(struct btrfs_device *dev) | ||
4839 | { | ||
4840 | printk(KERN_INFO "btrfs: bdev %s errs: wr %u, rd %u, flush %u, corrupt %u, gen %u\n", | ||
4841 | dev->name, | ||
4842 | btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_WRITE_ERRS), | ||
4843 | btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_READ_ERRS), | ||
4844 | btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_FLUSH_ERRS), | ||
4845 | btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_CORRUPTION_ERRS), | ||
4846 | btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_GENERATION_ERRS)); | ||
4847 | } | ||
4848 | |||
4849 | int btrfs_get_dev_stats(struct btrfs_root *root, | ||
4850 | struct btrfs_ioctl_get_dev_stats *stats, | ||
4851 | int reset_after_read) | ||
4852 | { | ||
4853 | struct btrfs_device *dev; | ||
4854 | struct btrfs_fs_devices *fs_devices = root->fs_info->fs_devices; | ||
4855 | int i; | ||
4856 | |||
4857 | mutex_lock(&fs_devices->device_list_mutex); | ||
4858 | dev = btrfs_find_device(root, stats->devid, NULL, NULL); | ||
4859 | mutex_unlock(&fs_devices->device_list_mutex); | ||
4860 | |||
4861 | if (!dev) { | ||
4862 | printk(KERN_WARNING | ||
4863 | "btrfs: get dev_stats failed, device not found\n"); | ||
4864 | return -ENODEV; | ||
4865 | } else if (!dev->dev_stats_valid) { | ||
4866 | printk(KERN_WARNING | ||
4867 | "btrfs: get dev_stats failed, not yet valid\n"); | ||
4868 | return -ENODEV; | ||
4869 | } else if (reset_after_read) { | ||
4870 | for (i = 0; i < BTRFS_DEV_STAT_VALUES_MAX; i++) { | ||
4871 | if (stats->nr_items > i) | ||
4872 | stats->values[i] = | ||
4873 | btrfs_dev_stat_read_and_reset(dev, i); | ||
4874 | else | ||
4875 | btrfs_dev_stat_reset(dev, i); | ||
4876 | } | ||
4877 | } else { | ||
4878 | for (i = 0; i < BTRFS_DEV_STAT_VALUES_MAX; i++) | ||
4879 | if (stats->nr_items > i) | ||
4880 | stats->values[i] = btrfs_dev_stat_read(dev, i); | ||
4881 | } | ||
4882 | if (stats->nr_items > BTRFS_DEV_STAT_VALUES_MAX) | ||
4883 | stats->nr_items = BTRFS_DEV_STAT_VALUES_MAX; | ||
4884 | return 0; | ||
4885 | } | ||
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h index bb6b03f97aaa..3406a88ca83e 100644 --- a/fs/btrfs/volumes.h +++ b/fs/btrfs/volumes.h | |||
@@ -22,6 +22,7 @@ | |||
22 | #include <linux/bio.h> | 22 | #include <linux/bio.h> |
23 | #include <linux/sort.h> | 23 | #include <linux/sort.h> |
24 | #include "async-thread.h" | 24 | #include "async-thread.h" |
25 | #include "ioctl.h" | ||
25 | 26 | ||
26 | #define BTRFS_STRIPE_LEN (64 * 1024) | 27 | #define BTRFS_STRIPE_LEN (64 * 1024) |
27 | 28 | ||
@@ -106,6 +107,11 @@ struct btrfs_device { | |||
106 | struct completion flush_wait; | 107 | struct completion flush_wait; |
107 | int nobarriers; | 108 | int nobarriers; |
108 | 109 | ||
110 | /* disk I/O failure stats. For detailed description refer to | ||
111 | * enum btrfs_dev_stat_values in ioctl.h */ | ||
112 | int dev_stats_valid; | ||
113 | int dev_stats_dirty; /* counters need to be written to disk */ | ||
114 | atomic_t dev_stat_values[BTRFS_DEV_STAT_VALUES_MAX]; | ||
109 | }; | 115 | }; |
110 | 116 | ||
111 | struct btrfs_fs_devices { | 117 | struct btrfs_fs_devices { |
@@ -281,4 +287,50 @@ int btrfs_cancel_balance(struct btrfs_fs_info *fs_info); | |||
281 | int btrfs_chunk_readonly(struct btrfs_root *root, u64 chunk_offset); | 287 | int btrfs_chunk_readonly(struct btrfs_root *root, u64 chunk_offset); |
282 | int find_free_dev_extent(struct btrfs_device *device, u64 num_bytes, | 288 | int find_free_dev_extent(struct btrfs_device *device, u64 num_bytes, |
283 | u64 *start, u64 *max_avail); | 289 | u64 *start, u64 *max_avail); |
290 | struct btrfs_device *btrfs_find_device_for_logical(struct btrfs_root *root, | ||
291 | u64 logical, int mirror_num); | ||
292 | void btrfs_dev_stat_print_on_error(struct btrfs_device *device); | ||
293 | void btrfs_dev_stat_inc_and_print(struct btrfs_device *dev, int index); | ||
294 | int btrfs_get_dev_stats(struct btrfs_root *root, | ||
295 | struct btrfs_ioctl_get_dev_stats *stats, | ||
296 | int reset_after_read); | ||
297 | int btrfs_init_dev_stats(struct btrfs_fs_info *fs_info); | ||
298 | int btrfs_run_dev_stats(struct btrfs_trans_handle *trans, | ||
299 | struct btrfs_fs_info *fs_info); | ||
300 | |||
301 | static inline void btrfs_dev_stat_inc(struct btrfs_device *dev, | ||
302 | int index) | ||
303 | { | ||
304 | atomic_inc(dev->dev_stat_values + index); | ||
305 | dev->dev_stats_dirty = 1; | ||
306 | } | ||
307 | |||
308 | static inline int btrfs_dev_stat_read(struct btrfs_device *dev, | ||
309 | int index) | ||
310 | { | ||
311 | return atomic_read(dev->dev_stat_values + index); | ||
312 | } | ||
313 | |||
314 | static inline int btrfs_dev_stat_read_and_reset(struct btrfs_device *dev, | ||
315 | int index) | ||
316 | { | ||
317 | int ret; | ||
318 | |||
319 | ret = atomic_xchg(dev->dev_stat_values + index, 0); | ||
320 | dev->dev_stats_dirty = 1; | ||
321 | return ret; | ||
322 | } | ||
323 | |||
324 | static inline void btrfs_dev_stat_set(struct btrfs_device *dev, | ||
325 | int index, unsigned long val) | ||
326 | { | ||
327 | atomic_set(dev->dev_stat_values + index, val); | ||
328 | dev->dev_stats_dirty = 1; | ||
329 | } | ||
330 | |||
331 | static inline void btrfs_dev_stat_reset(struct btrfs_device *dev, | ||
332 | int index) | ||
333 | { | ||
334 | btrfs_dev_stat_set(dev, index, 0); | ||
335 | } | ||
284 | #endif | 336 | #endif |
diff --git a/fs/btrfs/xattr.c b/fs/btrfs/xattr.c index e7a5659087e6..3f4e2d69e83a 100644 --- a/fs/btrfs/xattr.c +++ b/fs/btrfs/xattr.c | |||
@@ -196,6 +196,7 @@ int __btrfs_setxattr(struct btrfs_trans_handle *trans, | |||
196 | if (ret) | 196 | if (ret) |
197 | goto out; | 197 | goto out; |
198 | 198 | ||
199 | inode_inc_iversion(inode); | ||
199 | inode->i_ctime = CURRENT_TIME; | 200 | inode->i_ctime = CURRENT_TIME; |
200 | ret = btrfs_update_inode(trans, root, inode); | 201 | ret = btrfs_update_inode(trans, root, inode); |
201 | BUG_ON(ret); | 202 | BUG_ON(ret); |