diff options
author | Chris Mason <chris.mason@oracle.com> | 2012-05-31 16:50:28 -0400 |
---|---|---|
committer | Chris Mason <chris.mason@oracle.com> | 2012-05-31 16:49:53 -0400 |
commit | 1e20932a23578bb1ec59107843574e259b96193f (patch) | |
tree | 844ae54293c4414fc4c232a36d0e4d4939dc35aa | |
parent | cfc442b69696b593cb442f09997dcb4cb5748171 (diff) | |
parent | c31931088fd6cf953bd0868a2647b6c3928e6c96 (diff) |
Merge branch 'for-chris' of git://git.jan-o-sch.net/btrfs-unstable into for-linus
Conflicts:
fs/btrfs/ulist.h
Signed-off-by: Chris Mason <chris.mason@oracle.com>
-rw-r--r-- | fs/btrfs/backref.c | 495 | ||||
-rw-r--r-- | fs/btrfs/backref.h | 3 | ||||
-rw-r--r-- | fs/btrfs/ctree.c | 849 | ||||
-rw-r--r-- | fs/btrfs/ctree.h | 34 | ||||
-rw-r--r-- | fs/btrfs/delayed-ref.c | 10 | ||||
-rw-r--r-- | fs/btrfs/delayed-ref.h | 24 | ||||
-rw-r--r-- | fs/btrfs/disk-io.c | 7 | ||||
-rw-r--r-- | fs/btrfs/extent-tree.c | 10 | ||||
-rw-r--r-- | fs/btrfs/extent_io.c | 80 | ||||
-rw-r--r-- | fs/btrfs/extent_io.h | 3 | ||||
-rw-r--r-- | fs/btrfs/ioctl.c | 2 | ||||
-rw-r--r-- | fs/btrfs/transaction.c | 55 | ||||
-rw-r--r-- | fs/btrfs/ulist.c | 34 | ||||
-rw-r--r-- | fs/btrfs/ulist.h | 14 |
14 files changed, 1368 insertions, 252 deletions
diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c index bcec06750232..3f75895c919b 100644 --- a/fs/btrfs/backref.c +++ b/fs/btrfs/backref.c | |||
@@ -24,22 +24,135 @@ | |||
24 | #include "delayed-ref.h" | 24 | #include "delayed-ref.h" |
25 | #include "locking.h" | 25 | #include "locking.h" |
26 | 26 | ||
27 | struct extent_inode_elem { | ||
28 | u64 inum; | ||
29 | u64 offset; | ||
30 | struct extent_inode_elem *next; | ||
31 | }; | ||
32 | |||
33 | static int check_extent_in_eb(struct btrfs_key *key, struct extent_buffer *eb, | ||
34 | struct btrfs_file_extent_item *fi, | ||
35 | u64 extent_item_pos, | ||
36 | struct extent_inode_elem **eie) | ||
37 | { | ||
38 | u64 data_offset; | ||
39 | u64 data_len; | ||
40 | struct extent_inode_elem *e; | ||
41 | |||
42 | data_offset = btrfs_file_extent_offset(eb, fi); | ||
43 | data_len = btrfs_file_extent_num_bytes(eb, fi); | ||
44 | |||
45 | if (extent_item_pos < data_offset || | ||
46 | extent_item_pos >= data_offset + data_len) | ||
47 | return 1; | ||
48 | |||
49 | e = kmalloc(sizeof(*e), GFP_NOFS); | ||
50 | if (!e) | ||
51 | return -ENOMEM; | ||
52 | |||
53 | e->next = *eie; | ||
54 | e->inum = key->objectid; | ||
55 | e->offset = key->offset + (extent_item_pos - data_offset); | ||
56 | *eie = e; | ||
57 | |||
58 | return 0; | ||
59 | } | ||
60 | |||
61 | static int find_extent_in_eb(struct extent_buffer *eb, u64 wanted_disk_byte, | ||
62 | u64 extent_item_pos, | ||
63 | struct extent_inode_elem **eie) | ||
64 | { | ||
65 | u64 disk_byte; | ||
66 | struct btrfs_key key; | ||
67 | struct btrfs_file_extent_item *fi; | ||
68 | int slot; | ||
69 | int nritems; | ||
70 | int extent_type; | ||
71 | int ret; | ||
72 | |||
73 | /* | ||
74 | * from the shared data ref, we only have the leaf but we need | ||
75 | * the key. thus, we must look into all items and see that we | ||
76 | * find one (some) with a reference to our extent item. | ||
77 | */ | ||
78 | nritems = btrfs_header_nritems(eb); | ||
79 | for (slot = 0; slot < nritems; ++slot) { | ||
80 | btrfs_item_key_to_cpu(eb, &key, slot); | ||
81 | if (key.type != BTRFS_EXTENT_DATA_KEY) | ||
82 | continue; | ||
83 | fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item); | ||
84 | extent_type = btrfs_file_extent_type(eb, fi); | ||
85 | if (extent_type == BTRFS_FILE_EXTENT_INLINE) | ||
86 | continue; | ||
87 | /* don't skip BTRFS_FILE_EXTENT_PREALLOC, we can handle that */ | ||
88 | disk_byte = btrfs_file_extent_disk_bytenr(eb, fi); | ||
89 | if (disk_byte != wanted_disk_byte) | ||
90 | continue; | ||
91 | |||
92 | ret = check_extent_in_eb(&key, eb, fi, extent_item_pos, eie); | ||
93 | if (ret < 0) | ||
94 | return ret; | ||
95 | } | ||
96 | |||
97 | return 0; | ||
98 | } | ||
99 | |||
27 | /* | 100 | /* |
28 | * this structure records all encountered refs on the way up to the root | 101 | * this structure records all encountered refs on the way up to the root |
29 | */ | 102 | */ |
30 | struct __prelim_ref { | 103 | struct __prelim_ref { |
31 | struct list_head list; | 104 | struct list_head list; |
32 | u64 root_id; | 105 | u64 root_id; |
33 | struct btrfs_key key; | 106 | struct btrfs_key key_for_search; |
34 | int level; | 107 | int level; |
35 | int count; | 108 | int count; |
109 | struct extent_inode_elem *inode_list; | ||
36 | u64 parent; | 110 | u64 parent; |
37 | u64 wanted_disk_byte; | 111 | u64 wanted_disk_byte; |
38 | }; | 112 | }; |
39 | 113 | ||
114 | /* | ||
115 | * the rules for all callers of this function are: | ||
116 | * - obtaining the parent is the goal | ||
117 | * - if you add a key, you must know that it is a correct key | ||
118 | * - if you cannot add the parent or a correct key, then we will look into the | ||
119 | * block later to set a correct key | ||
120 | * | ||
121 | * delayed refs | ||
122 | * ============ | ||
123 | * backref type | shared | indirect | shared | indirect | ||
124 | * information | tree | tree | data | data | ||
125 | * --------------------+--------+----------+--------+---------- | ||
126 | * parent logical | y | - | - | - | ||
127 | * key to resolve | - | y | y | y | ||
128 | * tree block logical | - | - | - | - | ||
129 | * root for resolving | y | y | y | y | ||
130 | * | ||
131 | * - column 1: we've the parent -> done | ||
132 | * - column 2, 3, 4: we use the key to find the parent | ||
133 | * | ||
134 | * on disk refs (inline or keyed) | ||
135 | * ============================== | ||
136 | * backref type | shared | indirect | shared | indirect | ||
137 | * information | tree | tree | data | data | ||
138 | * --------------------+--------+----------+--------+---------- | ||
139 | * parent logical | y | - | y | - | ||
140 | * key to resolve | - | - | - | y | ||
141 | * tree block logical | y | y | y | y | ||
142 | * root for resolving | - | y | y | y | ||
143 | * | ||
144 | * - column 1, 3: we've the parent -> done | ||
145 | * - column 2: we take the first key from the block to find the parent | ||
146 | * (see __add_missing_keys) | ||
147 | * - column 4: we use the key to find the parent | ||
148 | * | ||
149 | * additional information that's available but not required to find the parent | ||
150 | * block might help in merging entries to gain some speed. | ||
151 | */ | ||
152 | |||
40 | static int __add_prelim_ref(struct list_head *head, u64 root_id, | 153 | static int __add_prelim_ref(struct list_head *head, u64 root_id, |
41 | struct btrfs_key *key, int level, u64 parent, | 154 | struct btrfs_key *key, int level, |
42 | u64 wanted_disk_byte, int count) | 155 | u64 parent, u64 wanted_disk_byte, int count) |
43 | { | 156 | { |
44 | struct __prelim_ref *ref; | 157 | struct __prelim_ref *ref; |
45 | 158 | ||
@@ -50,10 +163,11 @@ static int __add_prelim_ref(struct list_head *head, u64 root_id, | |||
50 | 163 | ||
51 | ref->root_id = root_id; | 164 | ref->root_id = root_id; |
52 | if (key) | 165 | if (key) |
53 | ref->key = *key; | 166 | ref->key_for_search = *key; |
54 | else | 167 | else |
55 | memset(&ref->key, 0, sizeof(ref->key)); | 168 | memset(&ref->key_for_search, 0, sizeof(ref->key_for_search)); |
56 | 169 | ||
170 | ref->inode_list = NULL; | ||
57 | ref->level = level; | 171 | ref->level = level; |
58 | ref->count = count; | 172 | ref->count = count; |
59 | ref->parent = parent; | 173 | ref->parent = parent; |
@@ -64,18 +178,26 @@ static int __add_prelim_ref(struct list_head *head, u64 root_id, | |||
64 | } | 178 | } |
65 | 179 | ||
66 | static int add_all_parents(struct btrfs_root *root, struct btrfs_path *path, | 180 | static int add_all_parents(struct btrfs_root *root, struct btrfs_path *path, |
67 | struct ulist *parents, | 181 | struct ulist *parents, int level, |
68 | struct extent_buffer *eb, int level, | 182 | struct btrfs_key *key, u64 wanted_disk_byte, |
69 | u64 wanted_objectid, u64 wanted_disk_byte) | 183 | const u64 *extent_item_pos) |
70 | { | 184 | { |
71 | int ret; | 185 | int ret; |
72 | int slot; | 186 | int slot = path->slots[level]; |
187 | struct extent_buffer *eb = path->nodes[level]; | ||
73 | struct btrfs_file_extent_item *fi; | 188 | struct btrfs_file_extent_item *fi; |
74 | struct btrfs_key key; | 189 | struct extent_inode_elem *eie = NULL; |
75 | u64 disk_byte; | 190 | u64 disk_byte; |
191 | u64 wanted_objectid = key->objectid; | ||
76 | 192 | ||
77 | add_parent: | 193 | add_parent: |
78 | ret = ulist_add(parents, eb->start, 0, GFP_NOFS); | 194 | if (level == 0 && extent_item_pos) { |
195 | fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item); | ||
196 | ret = check_extent_in_eb(key, eb, fi, *extent_item_pos, &eie); | ||
197 | if (ret < 0) | ||
198 | return ret; | ||
199 | } | ||
200 | ret = ulist_add(parents, eb->start, (unsigned long)eie, GFP_NOFS); | ||
79 | if (ret < 0) | 201 | if (ret < 0) |
80 | return ret; | 202 | return ret; |
81 | 203 | ||
@@ -89,6 +211,7 @@ add_parent: | |||
89 | * repeat this until we don't find any additional EXTENT_DATA items. | 211 | * repeat this until we don't find any additional EXTENT_DATA items. |
90 | */ | 212 | */ |
91 | while (1) { | 213 | while (1) { |
214 | eie = NULL; | ||
92 | ret = btrfs_next_leaf(root, path); | 215 | ret = btrfs_next_leaf(root, path); |
93 | if (ret < 0) | 216 | if (ret < 0) |
94 | return ret; | 217 | return ret; |
@@ -97,9 +220,9 @@ add_parent: | |||
97 | 220 | ||
98 | eb = path->nodes[0]; | 221 | eb = path->nodes[0]; |
99 | for (slot = 0; slot < btrfs_header_nritems(eb); ++slot) { | 222 | for (slot = 0; slot < btrfs_header_nritems(eb); ++slot) { |
100 | btrfs_item_key_to_cpu(eb, &key, slot); | 223 | btrfs_item_key_to_cpu(eb, key, slot); |
101 | if (key.objectid != wanted_objectid || | 224 | if (key->objectid != wanted_objectid || |
102 | key.type != BTRFS_EXTENT_DATA_KEY) | 225 | key->type != BTRFS_EXTENT_DATA_KEY) |
103 | return 0; | 226 | return 0; |
104 | fi = btrfs_item_ptr(eb, slot, | 227 | fi = btrfs_item_ptr(eb, slot, |
105 | struct btrfs_file_extent_item); | 228 | struct btrfs_file_extent_item); |
@@ -118,8 +241,10 @@ add_parent: | |||
118 | */ | 241 | */ |
119 | static int __resolve_indirect_ref(struct btrfs_fs_info *fs_info, | 242 | static int __resolve_indirect_ref(struct btrfs_fs_info *fs_info, |
120 | int search_commit_root, | 243 | int search_commit_root, |
244 | u64 time_seq, | ||
121 | struct __prelim_ref *ref, | 245 | struct __prelim_ref *ref, |
122 | struct ulist *parents) | 246 | struct ulist *parents, |
247 | const u64 *extent_item_pos) | ||
123 | { | 248 | { |
124 | struct btrfs_path *path; | 249 | struct btrfs_path *path; |
125 | struct btrfs_root *root; | 250 | struct btrfs_root *root; |
@@ -152,12 +277,13 @@ static int __resolve_indirect_ref(struct btrfs_fs_info *fs_info, | |||
152 | goto out; | 277 | goto out; |
153 | 278 | ||
154 | path->lowest_level = level; | 279 | path->lowest_level = level; |
155 | ret = btrfs_search_slot(NULL, root, &ref->key, path, 0, 0); | 280 | ret = btrfs_search_old_slot(root, &ref->key_for_search, path, time_seq); |
156 | pr_debug("search slot in root %llu (level %d, ref count %d) returned " | 281 | pr_debug("search slot in root %llu (level %d, ref count %d) returned " |
157 | "%d for key (%llu %u %llu)\n", | 282 | "%d for key (%llu %u %llu)\n", |
158 | (unsigned long long)ref->root_id, level, ref->count, ret, | 283 | (unsigned long long)ref->root_id, level, ref->count, ret, |
159 | (unsigned long long)ref->key.objectid, ref->key.type, | 284 | (unsigned long long)ref->key_for_search.objectid, |
160 | (unsigned long long)ref->key.offset); | 285 | ref->key_for_search.type, |
286 | (unsigned long long)ref->key_for_search.offset); | ||
161 | if (ret < 0) | 287 | if (ret < 0) |
162 | goto out; | 288 | goto out; |
163 | 289 | ||
@@ -179,9 +305,8 @@ static int __resolve_indirect_ref(struct btrfs_fs_info *fs_info, | |||
179 | btrfs_item_key_to_cpu(eb, &key, path->slots[0]); | 305 | btrfs_item_key_to_cpu(eb, &key, path->slots[0]); |
180 | } | 306 | } |
181 | 307 | ||
182 | /* the last two parameters will only be used for level == 0 */ | 308 | ret = add_all_parents(root, path, parents, level, &key, |
183 | ret = add_all_parents(root, path, parents, eb, level, key.objectid, | 309 | ref->wanted_disk_byte, extent_item_pos); |
184 | ref->wanted_disk_byte); | ||
185 | out: | 310 | out: |
186 | btrfs_free_path(path); | 311 | btrfs_free_path(path); |
187 | return ret; | 312 | return ret; |
@@ -191,8 +316,9 @@ out: | |||
191 | * resolve all indirect backrefs from the list | 316 | * resolve all indirect backrefs from the list |
192 | */ | 317 | */ |
193 | static int __resolve_indirect_refs(struct btrfs_fs_info *fs_info, | 318 | static int __resolve_indirect_refs(struct btrfs_fs_info *fs_info, |
194 | int search_commit_root, | 319 | int search_commit_root, u64 time_seq, |
195 | struct list_head *head) | 320 | struct list_head *head, |
321 | const u64 *extent_item_pos) | ||
196 | { | 322 | { |
197 | int err; | 323 | int err; |
198 | int ret = 0; | 324 | int ret = 0; |
@@ -201,6 +327,7 @@ static int __resolve_indirect_refs(struct btrfs_fs_info *fs_info, | |||
201 | struct __prelim_ref *new_ref; | 327 | struct __prelim_ref *new_ref; |
202 | struct ulist *parents; | 328 | struct ulist *parents; |
203 | struct ulist_node *node; | 329 | struct ulist_node *node; |
330 | struct ulist_iterator uiter; | ||
204 | 331 | ||
205 | parents = ulist_alloc(GFP_NOFS); | 332 | parents = ulist_alloc(GFP_NOFS); |
206 | if (!parents) | 333 | if (!parents) |
@@ -217,7 +344,8 @@ static int __resolve_indirect_refs(struct btrfs_fs_info *fs_info, | |||
217 | if (ref->count == 0) | 344 | if (ref->count == 0) |
218 | continue; | 345 | continue; |
219 | err = __resolve_indirect_ref(fs_info, search_commit_root, | 346 | err = __resolve_indirect_ref(fs_info, search_commit_root, |
220 | ref, parents); | 347 | time_seq, ref, parents, |
348 | extent_item_pos); | ||
221 | if (err) { | 349 | if (err) { |
222 | if (ret == 0) | 350 | if (ret == 0) |
223 | ret = err; | 351 | ret = err; |
@@ -225,11 +353,14 @@ static int __resolve_indirect_refs(struct btrfs_fs_info *fs_info, | |||
225 | } | 353 | } |
226 | 354 | ||
227 | /* we put the first parent into the ref at hand */ | 355 | /* we put the first parent into the ref at hand */ |
228 | node = ulist_next(parents, NULL); | 356 | ULIST_ITER_INIT(&uiter); |
357 | node = ulist_next(parents, &uiter); | ||
229 | ref->parent = node ? node->val : 0; | 358 | ref->parent = node ? node->val : 0; |
359 | ref->inode_list = | ||
360 | node ? (struct extent_inode_elem *)node->aux : 0; | ||
230 | 361 | ||
231 | /* additional parents require new refs being added here */ | 362 | /* additional parents require new refs being added here */ |
232 | while ((node = ulist_next(parents, node))) { | 363 | while ((node = ulist_next(parents, &uiter))) { |
233 | new_ref = kmalloc(sizeof(*new_ref), GFP_NOFS); | 364 | new_ref = kmalloc(sizeof(*new_ref), GFP_NOFS); |
234 | if (!new_ref) { | 365 | if (!new_ref) { |
235 | ret = -ENOMEM; | 366 | ret = -ENOMEM; |
@@ -237,6 +368,8 @@ static int __resolve_indirect_refs(struct btrfs_fs_info *fs_info, | |||
237 | } | 368 | } |
238 | memcpy(new_ref, ref, sizeof(*ref)); | 369 | memcpy(new_ref, ref, sizeof(*ref)); |
239 | new_ref->parent = node->val; | 370 | new_ref->parent = node->val; |
371 | new_ref->inode_list = | ||
372 | (struct extent_inode_elem *)node->aux; | ||
240 | list_add(&new_ref->list, &ref->list); | 373 | list_add(&new_ref->list, &ref->list); |
241 | } | 374 | } |
242 | ulist_reinit(parents); | 375 | ulist_reinit(parents); |
@@ -246,10 +379,65 @@ static int __resolve_indirect_refs(struct btrfs_fs_info *fs_info, | |||
246 | return ret; | 379 | return ret; |
247 | } | 380 | } |
248 | 381 | ||
382 | static inline int ref_for_same_block(struct __prelim_ref *ref1, | ||
383 | struct __prelim_ref *ref2) | ||
384 | { | ||
385 | if (ref1->level != ref2->level) | ||
386 | return 0; | ||
387 | if (ref1->root_id != ref2->root_id) | ||
388 | return 0; | ||
389 | if (ref1->key_for_search.type != ref2->key_for_search.type) | ||
390 | return 0; | ||
391 | if (ref1->key_for_search.objectid != ref2->key_for_search.objectid) | ||
392 | return 0; | ||
393 | if (ref1->key_for_search.offset != ref2->key_for_search.offset) | ||
394 | return 0; | ||
395 | if (ref1->parent != ref2->parent) | ||
396 | return 0; | ||
397 | |||
398 | return 1; | ||
399 | } | ||
400 | |||
401 | /* | ||
402 | * read tree blocks and add keys where required. | ||
403 | */ | ||
404 | static int __add_missing_keys(struct btrfs_fs_info *fs_info, | ||
405 | struct list_head *head) | ||
406 | { | ||
407 | struct list_head *pos; | ||
408 | struct extent_buffer *eb; | ||
409 | |||
410 | list_for_each(pos, head) { | ||
411 | struct __prelim_ref *ref; | ||
412 | ref = list_entry(pos, struct __prelim_ref, list); | ||
413 | |||
414 | if (ref->parent) | ||
415 | continue; | ||
416 | if (ref->key_for_search.type) | ||
417 | continue; | ||
418 | BUG_ON(!ref->wanted_disk_byte); | ||
419 | eb = read_tree_block(fs_info->tree_root, ref->wanted_disk_byte, | ||
420 | fs_info->tree_root->leafsize, 0); | ||
421 | BUG_ON(!eb); | ||
422 | btrfs_tree_read_lock(eb); | ||
423 | if (btrfs_header_level(eb) == 0) | ||
424 | btrfs_item_key_to_cpu(eb, &ref->key_for_search, 0); | ||
425 | else | ||
426 | btrfs_node_key_to_cpu(eb, &ref->key_for_search, 0); | ||
427 | btrfs_tree_read_unlock(eb); | ||
428 | free_extent_buffer(eb); | ||
429 | } | ||
430 | return 0; | ||
431 | } | ||
432 | |||
249 | /* | 433 | /* |
250 | * merge two lists of backrefs and adjust counts accordingly | 434 | * merge two lists of backrefs and adjust counts accordingly |
251 | * | 435 | * |
252 | * mode = 1: merge identical keys, if key is set | 436 | * mode = 1: merge identical keys, if key is set |
437 | * FIXME: if we add more keys in __add_prelim_ref, we can merge more here. | ||
438 | * additionally, we could even add a key range for the blocks we | ||
439 | * looked into to merge even more (-> replace unresolved refs by those | ||
440 | * having a parent). | ||
253 | * mode = 2: merge identical parents | 441 | * mode = 2: merge identical parents |
254 | */ | 442 | */ |
255 | static int __merge_refs(struct list_head *head, int mode) | 443 | static int __merge_refs(struct list_head *head, int mode) |
@@ -263,20 +451,21 @@ static int __merge_refs(struct list_head *head, int mode) | |||
263 | 451 | ||
264 | ref1 = list_entry(pos1, struct __prelim_ref, list); | 452 | ref1 = list_entry(pos1, struct __prelim_ref, list); |
265 | 453 | ||
266 | if (mode == 1 && ref1->key.type == 0) | ||
267 | continue; | ||
268 | for (pos2 = pos1->next, n2 = pos2->next; pos2 != head; | 454 | for (pos2 = pos1->next, n2 = pos2->next; pos2 != head; |
269 | pos2 = n2, n2 = pos2->next) { | 455 | pos2 = n2, n2 = pos2->next) { |
270 | struct __prelim_ref *ref2; | 456 | struct __prelim_ref *ref2; |
457 | struct __prelim_ref *xchg; | ||
271 | 458 | ||
272 | ref2 = list_entry(pos2, struct __prelim_ref, list); | 459 | ref2 = list_entry(pos2, struct __prelim_ref, list); |
273 | 460 | ||
274 | if (mode == 1) { | 461 | if (mode == 1) { |
275 | if (memcmp(&ref1->key, &ref2->key, | 462 | if (!ref_for_same_block(ref1, ref2)) |
276 | sizeof(ref1->key)) || | ||
277 | ref1->level != ref2->level || | ||
278 | ref1->root_id != ref2->root_id) | ||
279 | continue; | 463 | continue; |
464 | if (!ref1->parent && ref2->parent) { | ||
465 | xchg = ref1; | ||
466 | ref1 = ref2; | ||
467 | ref2 = xchg; | ||
468 | } | ||
280 | ref1->count += ref2->count; | 469 | ref1->count += ref2->count; |
281 | } else { | 470 | } else { |
282 | if (ref1->parent != ref2->parent) | 471 | if (ref1->parent != ref2->parent) |
@@ -296,16 +485,17 @@ static int __merge_refs(struct list_head *head, int mode) | |||
296 | * smaller or equal that seq to the list | 485 | * smaller or equal that seq to the list |
297 | */ | 486 | */ |
298 | static int __add_delayed_refs(struct btrfs_delayed_ref_head *head, u64 seq, | 487 | static int __add_delayed_refs(struct btrfs_delayed_ref_head *head, u64 seq, |
299 | struct btrfs_key *info_key, | ||
300 | struct list_head *prefs) | 488 | struct list_head *prefs) |
301 | { | 489 | { |
302 | struct btrfs_delayed_extent_op *extent_op = head->extent_op; | 490 | struct btrfs_delayed_extent_op *extent_op = head->extent_op; |
303 | struct rb_node *n = &head->node.rb_node; | 491 | struct rb_node *n = &head->node.rb_node; |
492 | struct btrfs_key key; | ||
493 | struct btrfs_key op_key = {0}; | ||
304 | int sgn; | 494 | int sgn; |
305 | int ret = 0; | 495 | int ret = 0; |
306 | 496 | ||
307 | if (extent_op && extent_op->update_key) | 497 | if (extent_op && extent_op->update_key) |
308 | btrfs_disk_key_to_cpu(info_key, &extent_op->key); | 498 | btrfs_disk_key_to_cpu(&op_key, &extent_op->key); |
309 | 499 | ||
310 | while ((n = rb_prev(n))) { | 500 | while ((n = rb_prev(n))) { |
311 | struct btrfs_delayed_ref_node *node; | 501 | struct btrfs_delayed_ref_node *node; |
@@ -337,7 +527,7 @@ static int __add_delayed_refs(struct btrfs_delayed_ref_head *head, u64 seq, | |||
337 | struct btrfs_delayed_tree_ref *ref; | 527 | struct btrfs_delayed_tree_ref *ref; |
338 | 528 | ||
339 | ref = btrfs_delayed_node_to_tree_ref(node); | 529 | ref = btrfs_delayed_node_to_tree_ref(node); |
340 | ret = __add_prelim_ref(prefs, ref->root, info_key, | 530 | ret = __add_prelim_ref(prefs, ref->root, &op_key, |
341 | ref->level + 1, 0, node->bytenr, | 531 | ref->level + 1, 0, node->bytenr, |
342 | node->ref_mod * sgn); | 532 | node->ref_mod * sgn); |
343 | break; | 533 | break; |
@@ -346,7 +536,7 @@ static int __add_delayed_refs(struct btrfs_delayed_ref_head *head, u64 seq, | |||
346 | struct btrfs_delayed_tree_ref *ref; | 536 | struct btrfs_delayed_tree_ref *ref; |
347 | 537 | ||
348 | ref = btrfs_delayed_node_to_tree_ref(node); | 538 | ref = btrfs_delayed_node_to_tree_ref(node); |
349 | ret = __add_prelim_ref(prefs, ref->root, info_key, | 539 | ret = __add_prelim_ref(prefs, ref->root, NULL, |
350 | ref->level + 1, ref->parent, | 540 | ref->level + 1, ref->parent, |
351 | node->bytenr, | 541 | node->bytenr, |
352 | node->ref_mod * sgn); | 542 | node->ref_mod * sgn); |
@@ -354,8 +544,6 @@ static int __add_delayed_refs(struct btrfs_delayed_ref_head *head, u64 seq, | |||
354 | } | 544 | } |
355 | case BTRFS_EXTENT_DATA_REF_KEY: { | 545 | case BTRFS_EXTENT_DATA_REF_KEY: { |
356 | struct btrfs_delayed_data_ref *ref; | 546 | struct btrfs_delayed_data_ref *ref; |
357 | struct btrfs_key key; | ||
358 | |||
359 | ref = btrfs_delayed_node_to_data_ref(node); | 547 | ref = btrfs_delayed_node_to_data_ref(node); |
360 | 548 | ||
361 | key.objectid = ref->objectid; | 549 | key.objectid = ref->objectid; |
@@ -368,7 +556,6 @@ static int __add_delayed_refs(struct btrfs_delayed_ref_head *head, u64 seq, | |||
368 | } | 556 | } |
369 | case BTRFS_SHARED_DATA_REF_KEY: { | 557 | case BTRFS_SHARED_DATA_REF_KEY: { |
370 | struct btrfs_delayed_data_ref *ref; | 558 | struct btrfs_delayed_data_ref *ref; |
371 | struct btrfs_key key; | ||
372 | 559 | ||
373 | ref = btrfs_delayed_node_to_data_ref(node); | 560 | ref = btrfs_delayed_node_to_data_ref(node); |
374 | 561 | ||
@@ -394,8 +581,7 @@ static int __add_delayed_refs(struct btrfs_delayed_ref_head *head, u64 seq, | |||
394 | */ | 581 | */ |
395 | static int __add_inline_refs(struct btrfs_fs_info *fs_info, | 582 | static int __add_inline_refs(struct btrfs_fs_info *fs_info, |
396 | struct btrfs_path *path, u64 bytenr, | 583 | struct btrfs_path *path, u64 bytenr, |
397 | struct btrfs_key *info_key, int *info_level, | 584 | int *info_level, struct list_head *prefs) |
398 | struct list_head *prefs) | ||
399 | { | 585 | { |
400 | int ret = 0; | 586 | int ret = 0; |
401 | int slot; | 587 | int slot; |
@@ -411,7 +597,7 @@ static int __add_inline_refs(struct btrfs_fs_info *fs_info, | |||
411 | * enumerate all inline refs | 597 | * enumerate all inline refs |
412 | */ | 598 | */ |
413 | leaf = path->nodes[0]; | 599 | leaf = path->nodes[0]; |
414 | slot = path->slots[0] - 1; | 600 | slot = path->slots[0]; |
415 | 601 | ||
416 | item_size = btrfs_item_size_nr(leaf, slot); | 602 | item_size = btrfs_item_size_nr(leaf, slot); |
417 | BUG_ON(item_size < sizeof(*ei)); | 603 | BUG_ON(item_size < sizeof(*ei)); |
@@ -424,12 +610,9 @@ static int __add_inline_refs(struct btrfs_fs_info *fs_info, | |||
424 | 610 | ||
425 | if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) { | 611 | if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) { |
426 | struct btrfs_tree_block_info *info; | 612 | struct btrfs_tree_block_info *info; |
427 | struct btrfs_disk_key disk_key; | ||
428 | 613 | ||
429 | info = (struct btrfs_tree_block_info *)ptr; | 614 | info = (struct btrfs_tree_block_info *)ptr; |
430 | *info_level = btrfs_tree_block_level(leaf, info); | 615 | *info_level = btrfs_tree_block_level(leaf, info); |
431 | btrfs_tree_block_key(leaf, info, &disk_key); | ||
432 | btrfs_disk_key_to_cpu(info_key, &disk_key); | ||
433 | ptr += sizeof(struct btrfs_tree_block_info); | 616 | ptr += sizeof(struct btrfs_tree_block_info); |
434 | BUG_ON(ptr > end); | 617 | BUG_ON(ptr > end); |
435 | } else { | 618 | } else { |
@@ -447,7 +630,7 @@ static int __add_inline_refs(struct btrfs_fs_info *fs_info, | |||
447 | 630 | ||
448 | switch (type) { | 631 | switch (type) { |
449 | case BTRFS_SHARED_BLOCK_REF_KEY: | 632 | case BTRFS_SHARED_BLOCK_REF_KEY: |
450 | ret = __add_prelim_ref(prefs, 0, info_key, | 633 | ret = __add_prelim_ref(prefs, 0, NULL, |
451 | *info_level + 1, offset, | 634 | *info_level + 1, offset, |
452 | bytenr, 1); | 635 | bytenr, 1); |
453 | break; | 636 | break; |
@@ -462,8 +645,9 @@ static int __add_inline_refs(struct btrfs_fs_info *fs_info, | |||
462 | break; | 645 | break; |
463 | } | 646 | } |
464 | case BTRFS_TREE_BLOCK_REF_KEY: | 647 | case BTRFS_TREE_BLOCK_REF_KEY: |
465 | ret = __add_prelim_ref(prefs, offset, info_key, | 648 | ret = __add_prelim_ref(prefs, offset, NULL, |
466 | *info_level + 1, 0, bytenr, 1); | 649 | *info_level + 1, 0, |
650 | bytenr, 1); | ||
467 | break; | 651 | break; |
468 | case BTRFS_EXTENT_DATA_REF_KEY: { | 652 | case BTRFS_EXTENT_DATA_REF_KEY: { |
469 | struct btrfs_extent_data_ref *dref; | 653 | struct btrfs_extent_data_ref *dref; |
@@ -477,8 +661,8 @@ static int __add_inline_refs(struct btrfs_fs_info *fs_info, | |||
477 | key.type = BTRFS_EXTENT_DATA_KEY; | 661 | key.type = BTRFS_EXTENT_DATA_KEY; |
478 | key.offset = btrfs_extent_data_ref_offset(leaf, dref); | 662 | key.offset = btrfs_extent_data_ref_offset(leaf, dref); |
479 | root = btrfs_extent_data_ref_root(leaf, dref); | 663 | root = btrfs_extent_data_ref_root(leaf, dref); |
480 | ret = __add_prelim_ref(prefs, root, &key, 0, 0, bytenr, | 664 | ret = __add_prelim_ref(prefs, root, &key, 0, 0, |
481 | count); | 665 | bytenr, count); |
482 | break; | 666 | break; |
483 | } | 667 | } |
484 | default: | 668 | default: |
@@ -496,8 +680,7 @@ static int __add_inline_refs(struct btrfs_fs_info *fs_info, | |||
496 | */ | 680 | */ |
497 | static int __add_keyed_refs(struct btrfs_fs_info *fs_info, | 681 | static int __add_keyed_refs(struct btrfs_fs_info *fs_info, |
498 | struct btrfs_path *path, u64 bytenr, | 682 | struct btrfs_path *path, u64 bytenr, |
499 | struct btrfs_key *info_key, int info_level, | 683 | int info_level, struct list_head *prefs) |
500 | struct list_head *prefs) | ||
501 | { | 684 | { |
502 | struct btrfs_root *extent_root = fs_info->extent_root; | 685 | struct btrfs_root *extent_root = fs_info->extent_root; |
503 | int ret; | 686 | int ret; |
@@ -527,7 +710,7 @@ static int __add_keyed_refs(struct btrfs_fs_info *fs_info, | |||
527 | 710 | ||
528 | switch (key.type) { | 711 | switch (key.type) { |
529 | case BTRFS_SHARED_BLOCK_REF_KEY: | 712 | case BTRFS_SHARED_BLOCK_REF_KEY: |
530 | ret = __add_prelim_ref(prefs, 0, info_key, | 713 | ret = __add_prelim_ref(prefs, 0, NULL, |
531 | info_level + 1, key.offset, | 714 | info_level + 1, key.offset, |
532 | bytenr, 1); | 715 | bytenr, 1); |
533 | break; | 716 | break; |
@@ -543,8 +726,9 @@ static int __add_keyed_refs(struct btrfs_fs_info *fs_info, | |||
543 | break; | 726 | break; |
544 | } | 727 | } |
545 | case BTRFS_TREE_BLOCK_REF_KEY: | 728 | case BTRFS_TREE_BLOCK_REF_KEY: |
546 | ret = __add_prelim_ref(prefs, key.offset, info_key, | 729 | ret = __add_prelim_ref(prefs, key.offset, NULL, |
547 | info_level + 1, 0, bytenr, 1); | 730 | info_level + 1, 0, |
731 | bytenr, 1); | ||
548 | break; | 732 | break; |
549 | case BTRFS_EXTENT_DATA_REF_KEY: { | 733 | case BTRFS_EXTENT_DATA_REF_KEY: { |
550 | struct btrfs_extent_data_ref *dref; | 734 | struct btrfs_extent_data_ref *dref; |
@@ -560,7 +744,7 @@ static int __add_keyed_refs(struct btrfs_fs_info *fs_info, | |||
560 | key.offset = btrfs_extent_data_ref_offset(leaf, dref); | 744 | key.offset = btrfs_extent_data_ref_offset(leaf, dref); |
561 | root = btrfs_extent_data_ref_root(leaf, dref); | 745 | root = btrfs_extent_data_ref_root(leaf, dref); |
562 | ret = __add_prelim_ref(prefs, root, &key, 0, 0, | 746 | ret = __add_prelim_ref(prefs, root, &key, 0, 0, |
563 | bytenr, count); | 747 | bytenr, count); |
564 | break; | 748 | break; |
565 | } | 749 | } |
566 | default: | 750 | default: |
@@ -582,11 +766,12 @@ static int __add_keyed_refs(struct btrfs_fs_info *fs_info, | |||
582 | */ | 766 | */ |
583 | static int find_parent_nodes(struct btrfs_trans_handle *trans, | 767 | static int find_parent_nodes(struct btrfs_trans_handle *trans, |
584 | struct btrfs_fs_info *fs_info, u64 bytenr, | 768 | struct btrfs_fs_info *fs_info, u64 bytenr, |
585 | u64 seq, struct ulist *refs, struct ulist *roots) | 769 | u64 delayed_ref_seq, u64 time_seq, |
770 | struct ulist *refs, struct ulist *roots, | ||
771 | const u64 *extent_item_pos) | ||
586 | { | 772 | { |
587 | struct btrfs_key key; | 773 | struct btrfs_key key; |
588 | struct btrfs_path *path; | 774 | struct btrfs_path *path; |
589 | struct btrfs_key info_key = { 0 }; | ||
590 | struct btrfs_delayed_ref_root *delayed_refs = NULL; | 775 | struct btrfs_delayed_ref_root *delayed_refs = NULL; |
591 | struct btrfs_delayed_ref_head *head; | 776 | struct btrfs_delayed_ref_head *head; |
592 | int info_level = 0; | 777 | int info_level = 0; |
@@ -645,7 +830,7 @@ again: | |||
645 | btrfs_put_delayed_ref(&head->node); | 830 | btrfs_put_delayed_ref(&head->node); |
646 | goto again; | 831 | goto again; |
647 | } | 832 | } |
648 | ret = __add_delayed_refs(head, seq, &info_key, | 833 | ret = __add_delayed_refs(head, delayed_ref_seq, |
649 | &prefs_delayed); | 834 | &prefs_delayed); |
650 | if (ret) { | 835 | if (ret) { |
651 | spin_unlock(&delayed_refs->lock); | 836 | spin_unlock(&delayed_refs->lock); |
@@ -659,16 +844,17 @@ again: | |||
659 | struct extent_buffer *leaf; | 844 | struct extent_buffer *leaf; |
660 | int slot; | 845 | int slot; |
661 | 846 | ||
847 | path->slots[0]--; | ||
662 | leaf = path->nodes[0]; | 848 | leaf = path->nodes[0]; |
663 | slot = path->slots[0] - 1; | 849 | slot = path->slots[0]; |
664 | btrfs_item_key_to_cpu(leaf, &key, slot); | 850 | btrfs_item_key_to_cpu(leaf, &key, slot); |
665 | if (key.objectid == bytenr && | 851 | if (key.objectid == bytenr && |
666 | key.type == BTRFS_EXTENT_ITEM_KEY) { | 852 | key.type == BTRFS_EXTENT_ITEM_KEY) { |
667 | ret = __add_inline_refs(fs_info, path, bytenr, | 853 | ret = __add_inline_refs(fs_info, path, bytenr, |
668 | &info_key, &info_level, &prefs); | 854 | &info_level, &prefs); |
669 | if (ret) | 855 | if (ret) |
670 | goto out; | 856 | goto out; |
671 | ret = __add_keyed_refs(fs_info, path, bytenr, &info_key, | 857 | ret = __add_keyed_refs(fs_info, path, bytenr, |
672 | info_level, &prefs); | 858 | info_level, &prefs); |
673 | if (ret) | 859 | if (ret) |
674 | goto out; | 860 | goto out; |
@@ -676,21 +862,18 @@ again: | |||
676 | } | 862 | } |
677 | btrfs_release_path(path); | 863 | btrfs_release_path(path); |
678 | 864 | ||
679 | /* | ||
680 | * when adding the delayed refs above, the info_key might not have | ||
681 | * been known yet. Go over the list and replace the missing keys | ||
682 | */ | ||
683 | list_for_each_entry(ref, &prefs_delayed, list) { | ||
684 | if ((ref->key.offset | ref->key.type | ref->key.objectid) == 0) | ||
685 | memcpy(&ref->key, &info_key, sizeof(ref->key)); | ||
686 | } | ||
687 | list_splice_init(&prefs_delayed, &prefs); | 865 | list_splice_init(&prefs_delayed, &prefs); |
688 | 866 | ||
867 | ret = __add_missing_keys(fs_info, &prefs); | ||
868 | if (ret) | ||
869 | goto out; | ||
870 | |||
689 | ret = __merge_refs(&prefs, 1); | 871 | ret = __merge_refs(&prefs, 1); |
690 | if (ret) | 872 | if (ret) |
691 | goto out; | 873 | goto out; |
692 | 874 | ||
693 | ret = __resolve_indirect_refs(fs_info, search_commit_root, &prefs); | 875 | ret = __resolve_indirect_refs(fs_info, search_commit_root, time_seq, |
876 | &prefs, extent_item_pos); | ||
694 | if (ret) | 877 | if (ret) |
695 | goto out; | 878 | goto out; |
696 | 879 | ||
@@ -709,7 +892,33 @@ again: | |||
709 | BUG_ON(ret < 0); | 892 | BUG_ON(ret < 0); |
710 | } | 893 | } |
711 | if (ref->count && ref->parent) { | 894 | if (ref->count && ref->parent) { |
712 | ret = ulist_add(refs, ref->parent, 0, GFP_NOFS); | 895 | struct extent_inode_elem *eie = NULL; |
896 | if (extent_item_pos && !ref->inode_list) { | ||
897 | u32 bsz; | ||
898 | struct extent_buffer *eb; | ||
899 | bsz = btrfs_level_size(fs_info->extent_root, | ||
900 | info_level); | ||
901 | eb = read_tree_block(fs_info->extent_root, | ||
902 | ref->parent, bsz, 0); | ||
903 | BUG_ON(!eb); | ||
904 | ret = find_extent_in_eb(eb, bytenr, | ||
905 | *extent_item_pos, &eie); | ||
906 | ref->inode_list = eie; | ||
907 | free_extent_buffer(eb); | ||
908 | } | ||
909 | ret = ulist_add_merge(refs, ref->parent, | ||
910 | (unsigned long)ref->inode_list, | ||
911 | (unsigned long *)&eie, GFP_NOFS); | ||
912 | if (!ret && extent_item_pos) { | ||
913 | /* | ||
914 | * we've recorded that parent, so we must extend | ||
915 | * its inode list here | ||
916 | */ | ||
917 | BUG_ON(!eie); | ||
918 | while (eie->next) | ||
919 | eie = eie->next; | ||
920 | eie->next = ref->inode_list; | ||
921 | } | ||
713 | BUG_ON(ret < 0); | 922 | BUG_ON(ret < 0); |
714 | } | 923 | } |
715 | kfree(ref); | 924 | kfree(ref); |
@@ -734,6 +943,28 @@ out: | |||
734 | return ret; | 943 | return ret; |
735 | } | 944 | } |
736 | 945 | ||
946 | static void free_leaf_list(struct ulist *blocks) | ||
947 | { | ||
948 | struct ulist_node *node = NULL; | ||
949 | struct extent_inode_elem *eie; | ||
950 | struct extent_inode_elem *eie_next; | ||
951 | struct ulist_iterator uiter; | ||
952 | |||
953 | ULIST_ITER_INIT(&uiter); | ||
954 | while ((node = ulist_next(blocks, &uiter))) { | ||
955 | if (!node->aux) | ||
956 | continue; | ||
957 | eie = (struct extent_inode_elem *)node->aux; | ||
958 | for (; eie; eie = eie_next) { | ||
959 | eie_next = eie->next; | ||
960 | kfree(eie); | ||
961 | } | ||
962 | node->aux = 0; | ||
963 | } | ||
964 | |||
965 | ulist_free(blocks); | ||
966 | } | ||
967 | |||
737 | /* | 968 | /* |
738 | * Finds all leafs with a reference to the specified combination of bytenr and | 969 | * Finds all leafs with a reference to the specified combination of bytenr and |
739 | * offset. key_list_head will point to a list of corresponding keys (caller must | 970 | * offset. key_list_head will point to a list of corresponding keys (caller must |
@@ -744,7 +975,9 @@ out: | |||
744 | */ | 975 | */ |
745 | static int btrfs_find_all_leafs(struct btrfs_trans_handle *trans, | 976 | static int btrfs_find_all_leafs(struct btrfs_trans_handle *trans, |
746 | struct btrfs_fs_info *fs_info, u64 bytenr, | 977 | struct btrfs_fs_info *fs_info, u64 bytenr, |
747 | u64 num_bytes, u64 seq, struct ulist **leafs) | 978 | u64 delayed_ref_seq, u64 time_seq, |
979 | struct ulist **leafs, | ||
980 | const u64 *extent_item_pos) | ||
748 | { | 981 | { |
749 | struct ulist *tmp; | 982 | struct ulist *tmp; |
750 | int ret; | 983 | int ret; |
@@ -758,11 +991,12 @@ static int btrfs_find_all_leafs(struct btrfs_trans_handle *trans, | |||
758 | return -ENOMEM; | 991 | return -ENOMEM; |
759 | } | 992 | } |
760 | 993 | ||
761 | ret = find_parent_nodes(trans, fs_info, bytenr, seq, *leafs, tmp); | 994 | ret = find_parent_nodes(trans, fs_info, bytenr, delayed_ref_seq, |
995 | time_seq, *leafs, tmp, extent_item_pos); | ||
762 | ulist_free(tmp); | 996 | ulist_free(tmp); |
763 | 997 | ||
764 | if (ret < 0 && ret != -ENOENT) { | 998 | if (ret < 0 && ret != -ENOENT) { |
765 | ulist_free(*leafs); | 999 | free_leaf_list(*leafs); |
766 | return ret; | 1000 | return ret; |
767 | } | 1001 | } |
768 | 1002 | ||
@@ -784,10 +1018,12 @@ static int btrfs_find_all_leafs(struct btrfs_trans_handle *trans, | |||
784 | */ | 1018 | */ |
785 | int btrfs_find_all_roots(struct btrfs_trans_handle *trans, | 1019 | int btrfs_find_all_roots(struct btrfs_trans_handle *trans, |
786 | struct btrfs_fs_info *fs_info, u64 bytenr, | 1020 | struct btrfs_fs_info *fs_info, u64 bytenr, |
787 | u64 num_bytes, u64 seq, struct ulist **roots) | 1021 | u64 delayed_ref_seq, u64 time_seq, |
1022 | struct ulist **roots) | ||
788 | { | 1023 | { |
789 | struct ulist *tmp; | 1024 | struct ulist *tmp; |
790 | struct ulist_node *node = NULL; | 1025 | struct ulist_node *node = NULL; |
1026 | struct ulist_iterator uiter; | ||
791 | int ret; | 1027 | int ret; |
792 | 1028 | ||
793 | tmp = ulist_alloc(GFP_NOFS); | 1029 | tmp = ulist_alloc(GFP_NOFS); |
@@ -799,15 +1035,16 @@ int btrfs_find_all_roots(struct btrfs_trans_handle *trans, | |||
799 | return -ENOMEM; | 1035 | return -ENOMEM; |
800 | } | 1036 | } |
801 | 1037 | ||
1038 | ULIST_ITER_INIT(&uiter); | ||
802 | while (1) { | 1039 | while (1) { |
803 | ret = find_parent_nodes(trans, fs_info, bytenr, seq, | 1040 | ret = find_parent_nodes(trans, fs_info, bytenr, delayed_ref_seq, |
804 | tmp, *roots); | 1041 | time_seq, tmp, *roots, NULL); |
805 | if (ret < 0 && ret != -ENOENT) { | 1042 | if (ret < 0 && ret != -ENOENT) { |
806 | ulist_free(tmp); | 1043 | ulist_free(tmp); |
807 | ulist_free(*roots); | 1044 | ulist_free(*roots); |
808 | return ret; | 1045 | return ret; |
809 | } | 1046 | } |
810 | node = ulist_next(tmp, node); | 1047 | node = ulist_next(tmp, &uiter); |
811 | if (!node) | 1048 | if (!node) |
812 | break; | 1049 | break; |
813 | bytenr = node->val; | 1050 | bytenr = node->val; |
@@ -1093,67 +1330,25 @@ int tree_backref_for_extent(unsigned long *ptr, struct extent_buffer *eb, | |||
1093 | return 0; | 1330 | return 0; |
1094 | } | 1331 | } |
1095 | 1332 | ||
1096 | static int iterate_leaf_refs(struct btrfs_fs_info *fs_info, u64 logical, | 1333 | static int iterate_leaf_refs(struct extent_inode_elem *inode_list, |
1097 | u64 orig_extent_item_objectid, | 1334 | u64 root, u64 extent_item_objectid, |
1098 | u64 extent_item_pos, u64 root, | ||
1099 | iterate_extent_inodes_t *iterate, void *ctx) | 1335 | iterate_extent_inodes_t *iterate, void *ctx) |
1100 | { | 1336 | { |
1101 | u64 disk_byte; | 1337 | struct extent_inode_elem *eie; |
1102 | struct btrfs_key key; | ||
1103 | struct btrfs_file_extent_item *fi; | ||
1104 | struct extent_buffer *eb; | ||
1105 | int slot; | ||
1106 | int nritems; | ||
1107 | int ret = 0; | 1338 | int ret = 0; |
1108 | int extent_type; | ||
1109 | u64 data_offset; | ||
1110 | u64 data_len; | ||
1111 | |||
1112 | eb = read_tree_block(fs_info->tree_root, logical, | ||
1113 | fs_info->tree_root->leafsize, 0); | ||
1114 | if (!eb) | ||
1115 | return -EIO; | ||
1116 | |||
1117 | /* | ||
1118 | * from the shared data ref, we only have the leaf but we need | ||
1119 | * the key. thus, we must look into all items and see that we | ||
1120 | * find one (some) with a reference to our extent item. | ||
1121 | */ | ||
1122 | nritems = btrfs_header_nritems(eb); | ||
1123 | for (slot = 0; slot < nritems; ++slot) { | ||
1124 | btrfs_item_key_to_cpu(eb, &key, slot); | ||
1125 | if (key.type != BTRFS_EXTENT_DATA_KEY) | ||
1126 | continue; | ||
1127 | fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item); | ||
1128 | extent_type = btrfs_file_extent_type(eb, fi); | ||
1129 | if (extent_type == BTRFS_FILE_EXTENT_INLINE) | ||
1130 | continue; | ||
1131 | /* don't skip BTRFS_FILE_EXTENT_PREALLOC, we can handle that */ | ||
1132 | disk_byte = btrfs_file_extent_disk_bytenr(eb, fi); | ||
1133 | if (disk_byte != orig_extent_item_objectid) | ||
1134 | continue; | ||
1135 | |||
1136 | data_offset = btrfs_file_extent_offset(eb, fi); | ||
1137 | data_len = btrfs_file_extent_num_bytes(eb, fi); | ||
1138 | |||
1139 | if (extent_item_pos < data_offset || | ||
1140 | extent_item_pos >= data_offset + data_len) | ||
1141 | continue; | ||
1142 | 1339 | ||
1340 | for (eie = inode_list; eie; eie = eie->next) { | ||
1143 | pr_debug("ref for %llu resolved, key (%llu EXTEND_DATA %llu), " | 1341 | pr_debug("ref for %llu resolved, key (%llu EXTEND_DATA %llu), " |
1144 | "root %llu\n", orig_extent_item_objectid, | 1342 | "root %llu\n", extent_item_objectid, |
1145 | key.objectid, key.offset, root); | 1343 | eie->inum, eie->offset, root); |
1146 | ret = iterate(key.objectid, | 1344 | ret = iterate(eie->inum, eie->offset, root, ctx); |
1147 | key.offset + (extent_item_pos - data_offset), | ||
1148 | root, ctx); | ||
1149 | if (ret) { | 1345 | if (ret) { |
1150 | pr_debug("stopping iteration because ret=%d\n", ret); | 1346 | pr_debug("stopping iteration for %llu due to ret=%d\n", |
1347 | extent_item_objectid, ret); | ||
1151 | break; | 1348 | break; |
1152 | } | 1349 | } |
1153 | } | 1350 | } |
1154 | 1351 | ||
1155 | free_extent_buffer(eb); | ||
1156 | |||
1157 | return ret; | 1352 | return ret; |
1158 | } | 1353 | } |
1159 | 1354 | ||
@@ -1175,7 +1370,10 @@ int iterate_extent_inodes(struct btrfs_fs_info *fs_info, | |||
1175 | struct ulist *roots = NULL; | 1370 | struct ulist *roots = NULL; |
1176 | struct ulist_node *ref_node = NULL; | 1371 | struct ulist_node *ref_node = NULL; |
1177 | struct ulist_node *root_node = NULL; | 1372 | struct ulist_node *root_node = NULL; |
1178 | struct seq_list seq_elem; | 1373 | struct seq_list seq_elem = {}; |
1374 | struct seq_list tree_mod_seq_elem = {}; | ||
1375 | struct ulist_iterator ref_uiter; | ||
1376 | struct ulist_iterator root_uiter; | ||
1179 | struct btrfs_delayed_ref_root *delayed_refs = NULL; | 1377 | struct btrfs_delayed_ref_root *delayed_refs = NULL; |
1180 | 1378 | ||
1181 | pr_debug("resolving all inodes for extent %llu\n", | 1379 | pr_debug("resolving all inodes for extent %llu\n", |
@@ -1192,34 +1390,41 @@ int iterate_extent_inodes(struct btrfs_fs_info *fs_info, | |||
1192 | spin_lock(&delayed_refs->lock); | 1390 | spin_lock(&delayed_refs->lock); |
1193 | btrfs_get_delayed_seq(delayed_refs, &seq_elem); | 1391 | btrfs_get_delayed_seq(delayed_refs, &seq_elem); |
1194 | spin_unlock(&delayed_refs->lock); | 1392 | spin_unlock(&delayed_refs->lock); |
1393 | btrfs_get_tree_mod_seq(fs_info, &tree_mod_seq_elem); | ||
1195 | } | 1394 | } |
1196 | 1395 | ||
1197 | ret = btrfs_find_all_leafs(trans, fs_info, extent_item_objectid, | 1396 | ret = btrfs_find_all_leafs(trans, fs_info, extent_item_objectid, |
1198 | extent_item_pos, seq_elem.seq, | 1397 | seq_elem.seq, tree_mod_seq_elem.seq, &refs, |
1199 | &refs); | 1398 | &extent_item_pos); |
1200 | |||
1201 | if (ret) | 1399 | if (ret) |
1202 | goto out; | 1400 | goto out; |
1203 | 1401 | ||
1204 | while (!ret && (ref_node = ulist_next(refs, ref_node))) { | 1402 | ULIST_ITER_INIT(&ref_uiter); |
1205 | ret = btrfs_find_all_roots(trans, fs_info, ref_node->val, -1, | 1403 | while (!ret && (ref_node = ulist_next(refs, &ref_uiter))) { |
1206 | seq_elem.seq, &roots); | 1404 | ret = btrfs_find_all_roots(trans, fs_info, ref_node->val, |
1405 | seq_elem.seq, | ||
1406 | tree_mod_seq_elem.seq, &roots); | ||
1207 | if (ret) | 1407 | if (ret) |
1208 | break; | 1408 | break; |
1209 | while (!ret && (root_node = ulist_next(roots, root_node))) { | 1409 | ULIST_ITER_INIT(&root_uiter); |
1210 | pr_debug("root %llu references leaf %llu\n", | 1410 | while (!ret && (root_node = ulist_next(roots, &root_uiter))) { |
1211 | root_node->val, ref_node->val); | 1411 | pr_debug("root %llu references leaf %llu, data list " |
1212 | ret = iterate_leaf_refs(fs_info, ref_node->val, | 1412 | "%#lx\n", root_node->val, ref_node->val, |
1213 | extent_item_objectid, | 1413 | ref_node->aux); |
1214 | extent_item_pos, root_node->val, | 1414 | ret = iterate_leaf_refs( |
1215 | iterate, ctx); | 1415 | (struct extent_inode_elem *)ref_node->aux, |
1416 | root_node->val, extent_item_objectid, | ||
1417 | iterate, ctx); | ||
1216 | } | 1418 | } |
1419 | ulist_free(roots); | ||
1420 | roots = NULL; | ||
1217 | } | 1421 | } |
1218 | 1422 | ||
1219 | ulist_free(refs); | 1423 | free_leaf_list(refs); |
1220 | ulist_free(roots); | 1424 | ulist_free(roots); |
1221 | out: | 1425 | out: |
1222 | if (!search_commit_root) { | 1426 | if (!search_commit_root) { |
1427 | btrfs_put_tree_mod_seq(fs_info, &tree_mod_seq_elem); | ||
1223 | btrfs_put_delayed_seq(delayed_refs, &seq_elem); | 1428 | btrfs_put_delayed_seq(delayed_refs, &seq_elem); |
1224 | btrfs_end_transaction(trans, fs_info->extent_root); | 1429 | btrfs_end_transaction(trans, fs_info->extent_root); |
1225 | } | 1430 | } |
diff --git a/fs/btrfs/backref.h b/fs/btrfs/backref.h index 57ea2e959e4d..c18d8ac7b795 100644 --- a/fs/btrfs/backref.h +++ b/fs/btrfs/backref.h | |||
@@ -58,7 +58,8 @@ int paths_from_inode(u64 inum, struct inode_fs_paths *ipath); | |||
58 | 58 | ||
59 | int btrfs_find_all_roots(struct btrfs_trans_handle *trans, | 59 | int btrfs_find_all_roots(struct btrfs_trans_handle *trans, |
60 | struct btrfs_fs_info *fs_info, u64 bytenr, | 60 | struct btrfs_fs_info *fs_info, u64 bytenr, |
61 | u64 num_bytes, u64 seq, struct ulist **roots); | 61 | u64 delayed_ref_seq, u64 time_seq, |
62 | struct ulist **roots); | ||
62 | 63 | ||
63 | struct btrfs_data_container *init_data_container(u32 total_bytes); | 64 | struct btrfs_data_container *init_data_container(u32 total_bytes); |
64 | struct inode_fs_paths *init_ipath(s32 total_bytes, struct btrfs_root *fs_root, | 65 | struct inode_fs_paths *init_ipath(s32 total_bytes, struct btrfs_root *fs_root, |
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 99fcad631a21..d7a96cfdc50a 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c | |||
@@ -18,6 +18,7 @@ | |||
18 | 18 | ||
19 | #include <linux/sched.h> | 19 | #include <linux/sched.h> |
20 | #include <linux/slab.h> | 20 | #include <linux/slab.h> |
21 | #include <linux/rbtree.h> | ||
21 | #include "ctree.h" | 22 | #include "ctree.h" |
22 | #include "disk-io.h" | 23 | #include "disk-io.h" |
23 | #include "transaction.h" | 24 | #include "transaction.h" |
@@ -37,7 +38,16 @@ static int balance_node_right(struct btrfs_trans_handle *trans, | |||
37 | struct extent_buffer *dst_buf, | 38 | struct extent_buffer *dst_buf, |
38 | struct extent_buffer *src_buf); | 39 | struct extent_buffer *src_buf); |
39 | static void del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root, | 40 | static void del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root, |
40 | struct btrfs_path *path, int level, int slot); | 41 | struct btrfs_path *path, int level, int slot, |
42 | int tree_mod_log); | ||
43 | static void tree_mod_log_free_eb(struct btrfs_fs_info *fs_info, | ||
44 | struct extent_buffer *eb); | ||
45 | struct extent_buffer *read_old_tree_block(struct btrfs_root *root, u64 bytenr, | ||
46 | u32 blocksize, u64 parent_transid, | ||
47 | u64 time_seq); | ||
48 | struct extent_buffer *btrfs_find_old_tree_block(struct btrfs_root *root, | ||
49 | u64 bytenr, u32 blocksize, | ||
50 | u64 time_seq); | ||
41 | 51 | ||
42 | struct btrfs_path *btrfs_alloc_path(void) | 52 | struct btrfs_path *btrfs_alloc_path(void) |
43 | { | 53 | { |
@@ -255,7 +265,7 @@ int btrfs_copy_root(struct btrfs_trans_handle *trans, | |||
255 | 265 | ||
256 | cow = btrfs_alloc_free_block(trans, root, buf->len, 0, | 266 | cow = btrfs_alloc_free_block(trans, root, buf->len, 0, |
257 | new_root_objectid, &disk_key, level, | 267 | new_root_objectid, &disk_key, level, |
258 | buf->start, 0, 1); | 268 | buf->start, 0); |
259 | if (IS_ERR(cow)) | 269 | if (IS_ERR(cow)) |
260 | return PTR_ERR(cow); | 270 | return PTR_ERR(cow); |
261 | 271 | ||
@@ -288,6 +298,434 @@ int btrfs_copy_root(struct btrfs_trans_handle *trans, | |||
288 | return 0; | 298 | return 0; |
289 | } | 299 | } |
290 | 300 | ||
301 | enum mod_log_op { | ||
302 | MOD_LOG_KEY_REPLACE, | ||
303 | MOD_LOG_KEY_ADD, | ||
304 | MOD_LOG_KEY_REMOVE, | ||
305 | MOD_LOG_KEY_REMOVE_WHILE_FREEING, | ||
306 | MOD_LOG_KEY_REMOVE_WHILE_MOVING, | ||
307 | MOD_LOG_MOVE_KEYS, | ||
308 | MOD_LOG_ROOT_REPLACE, | ||
309 | }; | ||
310 | |||
311 | struct tree_mod_move { | ||
312 | int dst_slot; | ||
313 | int nr_items; | ||
314 | }; | ||
315 | |||
316 | struct tree_mod_root { | ||
317 | u64 logical; | ||
318 | u8 level; | ||
319 | }; | ||
320 | |||
321 | struct tree_mod_elem { | ||
322 | struct rb_node node; | ||
323 | u64 index; /* shifted logical */ | ||
324 | struct seq_list elem; | ||
325 | enum mod_log_op op; | ||
326 | |||
327 | /* this is used for MOD_LOG_KEY_* and MOD_LOG_MOVE_KEYS operations */ | ||
328 | int slot; | ||
329 | |||
330 | /* this is used for MOD_LOG_KEY* and MOD_LOG_ROOT_REPLACE */ | ||
331 | u64 generation; | ||
332 | |||
333 | /* those are used for op == MOD_LOG_KEY_{REPLACE,REMOVE} */ | ||
334 | struct btrfs_disk_key key; | ||
335 | u64 blockptr; | ||
336 | |||
337 | /* this is used for op == MOD_LOG_MOVE_KEYS */ | ||
338 | struct tree_mod_move move; | ||
339 | |||
340 | /* this is used for op == MOD_LOG_ROOT_REPLACE */ | ||
341 | struct tree_mod_root old_root; | ||
342 | }; | ||
343 | |||
344 | static inline void | ||
345 | __get_tree_mod_seq(struct btrfs_fs_info *fs_info, struct seq_list *elem) | ||
346 | { | ||
347 | elem->seq = atomic_inc_return(&fs_info->tree_mod_seq); | ||
348 | list_add_tail(&elem->list, &fs_info->tree_mod_seq_list); | ||
349 | } | ||
350 | |||
351 | void btrfs_get_tree_mod_seq(struct btrfs_fs_info *fs_info, | ||
352 | struct seq_list *elem) | ||
353 | { | ||
354 | elem->flags = 1; | ||
355 | spin_lock(&fs_info->tree_mod_seq_lock); | ||
356 | __get_tree_mod_seq(fs_info, elem); | ||
357 | spin_unlock(&fs_info->tree_mod_seq_lock); | ||
358 | } | ||
359 | |||
360 | void btrfs_put_tree_mod_seq(struct btrfs_fs_info *fs_info, | ||
361 | struct seq_list *elem) | ||
362 | { | ||
363 | struct rb_root *tm_root; | ||
364 | struct rb_node *node; | ||
365 | struct rb_node *next; | ||
366 | struct seq_list *cur_elem; | ||
367 | struct tree_mod_elem *tm; | ||
368 | u64 min_seq = (u64)-1; | ||
369 | u64 seq_putting = elem->seq; | ||
370 | |||
371 | if (!seq_putting) | ||
372 | return; | ||
373 | |||
374 | BUG_ON(!(elem->flags & 1)); | ||
375 | spin_lock(&fs_info->tree_mod_seq_lock); | ||
376 | list_del(&elem->list); | ||
377 | |||
378 | list_for_each_entry(cur_elem, &fs_info->tree_mod_seq_list, list) { | ||
379 | if ((cur_elem->flags & 1) && cur_elem->seq < min_seq) { | ||
380 | if (seq_putting > cur_elem->seq) { | ||
381 | /* | ||
382 | * blocker with lower sequence number exists, we | ||
383 | * cannot remove anything from the log | ||
384 | */ | ||
385 | goto out; | ||
386 | } | ||
387 | min_seq = cur_elem->seq; | ||
388 | } | ||
389 | } | ||
390 | |||
391 | /* | ||
392 | * anything that's lower than the lowest existing (read: blocked) | ||
393 | * sequence number can be removed from the tree. | ||
394 | */ | ||
395 | write_lock(&fs_info->tree_mod_log_lock); | ||
396 | tm_root = &fs_info->tree_mod_log; | ||
397 | for (node = rb_first(tm_root); node; node = next) { | ||
398 | next = rb_next(node); | ||
399 | tm = container_of(node, struct tree_mod_elem, node); | ||
400 | if (tm->elem.seq > min_seq) | ||
401 | continue; | ||
402 | rb_erase(node, tm_root); | ||
403 | list_del(&tm->elem.list); | ||
404 | kfree(tm); | ||
405 | } | ||
406 | write_unlock(&fs_info->tree_mod_log_lock); | ||
407 | out: | ||
408 | spin_unlock(&fs_info->tree_mod_seq_lock); | ||
409 | } | ||
410 | |||
411 | /* | ||
412 | * key order of the log: | ||
413 | * index -> sequence | ||
414 | * | ||
415 | * the index is the shifted logical of the *new* root node for root replace | ||
416 | * operations, or the shifted logical of the affected block for all other | ||
417 | * operations. | ||
418 | */ | ||
419 | static noinline int | ||
420 | __tree_mod_log_insert(struct btrfs_fs_info *fs_info, struct tree_mod_elem *tm) | ||
421 | { | ||
422 | struct rb_root *tm_root; | ||
423 | struct rb_node **new; | ||
424 | struct rb_node *parent = NULL; | ||
425 | struct tree_mod_elem *cur; | ||
426 | int ret = 0; | ||
427 | |||
428 | BUG_ON(!tm || !tm->elem.seq); | ||
429 | |||
430 | write_lock(&fs_info->tree_mod_log_lock); | ||
431 | tm_root = &fs_info->tree_mod_log; | ||
432 | new = &tm_root->rb_node; | ||
433 | while (*new) { | ||
434 | cur = container_of(*new, struct tree_mod_elem, node); | ||
435 | parent = *new; | ||
436 | if (cur->index < tm->index) | ||
437 | new = &((*new)->rb_left); | ||
438 | else if (cur->index > tm->index) | ||
439 | new = &((*new)->rb_right); | ||
440 | else if (cur->elem.seq < tm->elem.seq) | ||
441 | new = &((*new)->rb_left); | ||
442 | else if (cur->elem.seq > tm->elem.seq) | ||
443 | new = &((*new)->rb_right); | ||
444 | else { | ||
445 | kfree(tm); | ||
446 | ret = -EEXIST; | ||
447 | goto unlock; | ||
448 | } | ||
449 | } | ||
450 | |||
451 | rb_link_node(&tm->node, parent, new); | ||
452 | rb_insert_color(&tm->node, tm_root); | ||
453 | unlock: | ||
454 | write_unlock(&fs_info->tree_mod_log_lock); | ||
455 | return ret; | ||
456 | } | ||
457 | |||
458 | static inline int tree_mod_dont_log(struct btrfs_fs_info *fs_info, | ||
459 | struct extent_buffer *eb) { | ||
460 | smp_mb(); | ||
461 | if (list_empty(&(fs_info)->tree_mod_seq_list)) | ||
462 | return 1; | ||
463 | if (!eb) | ||
464 | return 0; | ||
465 | if (btrfs_header_level(eb) == 0) | ||
466 | return 1; | ||
467 | return 0; | ||
468 | } | ||
469 | |||
470 | static inline int tree_mod_alloc(struct btrfs_fs_info *fs_info, gfp_t flags, | ||
471 | struct tree_mod_elem **tm_ret) | ||
472 | { | ||
473 | struct tree_mod_elem *tm; | ||
474 | int seq; | ||
475 | |||
476 | if (tree_mod_dont_log(fs_info, NULL)) | ||
477 | return 0; | ||
478 | |||
479 | tm = *tm_ret = kzalloc(sizeof(*tm), flags); | ||
480 | if (!tm) | ||
481 | return -ENOMEM; | ||
482 | |||
483 | tm->elem.flags = 0; | ||
484 | spin_lock(&fs_info->tree_mod_seq_lock); | ||
485 | if (list_empty(&fs_info->tree_mod_seq_list)) { | ||
486 | /* | ||
487 | * someone emptied the list while we were waiting for the lock. | ||
488 | * we must not add to the list, because no blocker exists. items | ||
489 | * are removed from the list only when the existing blocker is | ||
490 | * removed from the list. | ||
491 | */ | ||
492 | kfree(tm); | ||
493 | seq = 0; | ||
494 | } else { | ||
495 | __get_tree_mod_seq(fs_info, &tm->elem); | ||
496 | seq = tm->elem.seq; | ||
497 | } | ||
498 | spin_unlock(&fs_info->tree_mod_seq_lock); | ||
499 | |||
500 | return seq; | ||
501 | } | ||
502 | |||
503 | static noinline int | ||
504 | tree_mod_log_insert_key_mask(struct btrfs_fs_info *fs_info, | ||
505 | struct extent_buffer *eb, int slot, | ||
506 | enum mod_log_op op, gfp_t flags) | ||
507 | { | ||
508 | struct tree_mod_elem *tm; | ||
509 | int ret; | ||
510 | |||
511 | ret = tree_mod_alloc(fs_info, flags, &tm); | ||
512 | if (ret <= 0) | ||
513 | return ret; | ||
514 | |||
515 | tm->index = eb->start >> PAGE_CACHE_SHIFT; | ||
516 | if (op != MOD_LOG_KEY_ADD) { | ||
517 | btrfs_node_key(eb, &tm->key, slot); | ||
518 | tm->blockptr = btrfs_node_blockptr(eb, slot); | ||
519 | } | ||
520 | tm->op = op; | ||
521 | tm->slot = slot; | ||
522 | tm->generation = btrfs_node_ptr_generation(eb, slot); | ||
523 | |||
524 | return __tree_mod_log_insert(fs_info, tm); | ||
525 | } | ||
526 | |||
527 | static noinline int | ||
528 | tree_mod_log_insert_key(struct btrfs_fs_info *fs_info, struct extent_buffer *eb, | ||
529 | int slot, enum mod_log_op op) | ||
530 | { | ||
531 | return tree_mod_log_insert_key_mask(fs_info, eb, slot, op, GFP_NOFS); | ||
532 | } | ||
533 | |||
534 | static noinline int | ||
535 | tree_mod_log_insert_move(struct btrfs_fs_info *fs_info, | ||
536 | struct extent_buffer *eb, int dst_slot, int src_slot, | ||
537 | int nr_items, gfp_t flags) | ||
538 | { | ||
539 | struct tree_mod_elem *tm; | ||
540 | int ret; | ||
541 | int i; | ||
542 | |||
543 | if (tree_mod_dont_log(fs_info, eb)) | ||
544 | return 0; | ||
545 | |||
546 | for (i = 0; i + dst_slot < src_slot && i < nr_items; i++) { | ||
547 | ret = tree_mod_log_insert_key(fs_info, eb, i + dst_slot, | ||
548 | MOD_LOG_KEY_REMOVE_WHILE_MOVING); | ||
549 | BUG_ON(ret < 0); | ||
550 | } | ||
551 | |||
552 | ret = tree_mod_alloc(fs_info, flags, &tm); | ||
553 | if (ret <= 0) | ||
554 | return ret; | ||
555 | |||
556 | tm->index = eb->start >> PAGE_CACHE_SHIFT; | ||
557 | tm->slot = src_slot; | ||
558 | tm->move.dst_slot = dst_slot; | ||
559 | tm->move.nr_items = nr_items; | ||
560 | tm->op = MOD_LOG_MOVE_KEYS; | ||
561 | |||
562 | return __tree_mod_log_insert(fs_info, tm); | ||
563 | } | ||
564 | |||
565 | static noinline int | ||
566 | tree_mod_log_insert_root(struct btrfs_fs_info *fs_info, | ||
567 | struct extent_buffer *old_root, | ||
568 | struct extent_buffer *new_root, gfp_t flags) | ||
569 | { | ||
570 | struct tree_mod_elem *tm; | ||
571 | int ret; | ||
572 | |||
573 | ret = tree_mod_alloc(fs_info, flags, &tm); | ||
574 | if (ret <= 0) | ||
575 | return ret; | ||
576 | |||
577 | tm->index = new_root->start >> PAGE_CACHE_SHIFT; | ||
578 | tm->old_root.logical = old_root->start; | ||
579 | tm->old_root.level = btrfs_header_level(old_root); | ||
580 | tm->generation = btrfs_header_generation(old_root); | ||
581 | tm->op = MOD_LOG_ROOT_REPLACE; | ||
582 | |||
583 | return __tree_mod_log_insert(fs_info, tm); | ||
584 | } | ||
585 | |||
586 | static struct tree_mod_elem * | ||
587 | __tree_mod_log_search(struct btrfs_fs_info *fs_info, u64 start, u64 min_seq, | ||
588 | int smallest) | ||
589 | { | ||
590 | struct rb_root *tm_root; | ||
591 | struct rb_node *node; | ||
592 | struct tree_mod_elem *cur = NULL; | ||
593 | struct tree_mod_elem *found = NULL; | ||
594 | u64 index = start >> PAGE_CACHE_SHIFT; | ||
595 | |||
596 | read_lock(&fs_info->tree_mod_log_lock); | ||
597 | tm_root = &fs_info->tree_mod_log; | ||
598 | node = tm_root->rb_node; | ||
599 | while (node) { | ||
600 | cur = container_of(node, struct tree_mod_elem, node); | ||
601 | if (cur->index < index) { | ||
602 | node = node->rb_left; | ||
603 | } else if (cur->index > index) { | ||
604 | node = node->rb_right; | ||
605 | } else if (cur->elem.seq < min_seq) { | ||
606 | node = node->rb_left; | ||
607 | } else if (!smallest) { | ||
608 | /* we want the node with the highest seq */ | ||
609 | if (found) | ||
610 | BUG_ON(found->elem.seq > cur->elem.seq); | ||
611 | found = cur; | ||
612 | node = node->rb_left; | ||
613 | } else if (cur->elem.seq > min_seq) { | ||
614 | /* we want the node with the smallest seq */ | ||
615 | if (found) | ||
616 | BUG_ON(found->elem.seq < cur->elem.seq); | ||
617 | found = cur; | ||
618 | node = node->rb_right; | ||
619 | } else { | ||
620 | found = cur; | ||
621 | break; | ||
622 | } | ||
623 | } | ||
624 | read_unlock(&fs_info->tree_mod_log_lock); | ||
625 | |||
626 | return found; | ||
627 | } | ||
628 | |||
629 | /* | ||
630 | * this returns the element from the log with the smallest time sequence | ||
631 | * value that's in the log (the oldest log item). any element with a time | ||
632 | * sequence lower than min_seq will be ignored. | ||
633 | */ | ||
634 | static struct tree_mod_elem * | ||
635 | tree_mod_log_search_oldest(struct btrfs_fs_info *fs_info, u64 start, | ||
636 | u64 min_seq) | ||
637 | { | ||
638 | return __tree_mod_log_search(fs_info, start, min_seq, 1); | ||
639 | } | ||
640 | |||
641 | /* | ||
642 | * this returns the element from the log with the largest time sequence | ||
643 | * value that's in the log (the most recent log item). any element with | ||
644 | * a time sequence lower than min_seq will be ignored. | ||
645 | */ | ||
646 | static struct tree_mod_elem * | ||
647 | tree_mod_log_search(struct btrfs_fs_info *fs_info, u64 start, u64 min_seq) | ||
648 | { | ||
649 | return __tree_mod_log_search(fs_info, start, min_seq, 0); | ||
650 | } | ||
651 | |||
652 | static inline void | ||
653 | tree_mod_log_eb_copy(struct btrfs_fs_info *fs_info, struct extent_buffer *dst, | ||
654 | struct extent_buffer *src, unsigned long dst_offset, | ||
655 | unsigned long src_offset, int nr_items) | ||
656 | { | ||
657 | int ret; | ||
658 | int i; | ||
659 | |||
660 | if (tree_mod_dont_log(fs_info, NULL)) | ||
661 | return; | ||
662 | |||
663 | if (btrfs_header_level(dst) == 0 && btrfs_header_level(src) == 0) | ||
664 | return; | ||
665 | |||
666 | /* speed this up by single seq for all operations? */ | ||
667 | for (i = 0; i < nr_items; i++) { | ||
668 | ret = tree_mod_log_insert_key(fs_info, src, i + src_offset, | ||
669 | MOD_LOG_KEY_REMOVE); | ||
670 | BUG_ON(ret < 0); | ||
671 | ret = tree_mod_log_insert_key(fs_info, dst, i + dst_offset, | ||
672 | MOD_LOG_KEY_ADD); | ||
673 | BUG_ON(ret < 0); | ||
674 | } | ||
675 | } | ||
676 | |||
677 | static inline void | ||
678 | tree_mod_log_eb_move(struct btrfs_fs_info *fs_info, struct extent_buffer *dst, | ||
679 | int dst_offset, int src_offset, int nr_items) | ||
680 | { | ||
681 | int ret; | ||
682 | ret = tree_mod_log_insert_move(fs_info, dst, dst_offset, src_offset, | ||
683 | nr_items, GFP_NOFS); | ||
684 | BUG_ON(ret < 0); | ||
685 | } | ||
686 | |||
687 | static inline void | ||
688 | tree_mod_log_set_node_key(struct btrfs_fs_info *fs_info, | ||
689 | struct extent_buffer *eb, | ||
690 | struct btrfs_disk_key *disk_key, int slot, int atomic) | ||
691 | { | ||
692 | int ret; | ||
693 | |||
694 | ret = tree_mod_log_insert_key_mask(fs_info, eb, slot, | ||
695 | MOD_LOG_KEY_REPLACE, | ||
696 | atomic ? GFP_ATOMIC : GFP_NOFS); | ||
697 | BUG_ON(ret < 0); | ||
698 | } | ||
699 | |||
700 | static void tree_mod_log_free_eb(struct btrfs_fs_info *fs_info, | ||
701 | struct extent_buffer *eb) | ||
702 | { | ||
703 | int i; | ||
704 | int ret; | ||
705 | u32 nritems; | ||
706 | |||
707 | if (tree_mod_dont_log(fs_info, eb)) | ||
708 | return; | ||
709 | |||
710 | nritems = btrfs_header_nritems(eb); | ||
711 | for (i = nritems - 1; i >= 0; i--) { | ||
712 | ret = tree_mod_log_insert_key(fs_info, eb, i, | ||
713 | MOD_LOG_KEY_REMOVE_WHILE_FREEING); | ||
714 | BUG_ON(ret < 0); | ||
715 | } | ||
716 | } | ||
717 | |||
718 | static inline void | ||
719 | tree_mod_log_set_root_pointer(struct btrfs_root *root, | ||
720 | struct extent_buffer *new_root_node) | ||
721 | { | ||
722 | int ret; | ||
723 | tree_mod_log_free_eb(root->fs_info, root->node); | ||
724 | ret = tree_mod_log_insert_root(root->fs_info, root->node, | ||
725 | new_root_node, GFP_NOFS); | ||
726 | BUG_ON(ret < 0); | ||
727 | } | ||
728 | |||
291 | /* | 729 | /* |
292 | * check if the tree block can be shared by multiple trees | 730 | * check if the tree block can be shared by multiple trees |
293 | */ | 731 | */ |
@@ -409,6 +847,12 @@ static noinline int update_ref_for_cow(struct btrfs_trans_handle *trans, | |||
409 | ret = btrfs_dec_ref(trans, root, buf, 1, 1); | 847 | ret = btrfs_dec_ref(trans, root, buf, 1, 1); |
410 | BUG_ON(ret); /* -ENOMEM */ | 848 | BUG_ON(ret); /* -ENOMEM */ |
411 | } | 849 | } |
850 | /* | ||
851 | * don't log freeing in case we're freeing the root node, this | ||
852 | * is done by tree_mod_log_set_root_pointer later | ||
853 | */ | ||
854 | if (buf != root->node && btrfs_header_level(buf) != 0) | ||
855 | tree_mod_log_free_eb(root->fs_info, buf); | ||
412 | clean_tree_block(trans, root, buf); | 856 | clean_tree_block(trans, root, buf); |
413 | *last_ref = 1; | 857 | *last_ref = 1; |
414 | } | 858 | } |
@@ -467,7 +911,7 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans, | |||
467 | 911 | ||
468 | cow = btrfs_alloc_free_block(trans, root, buf->len, parent_start, | 912 | cow = btrfs_alloc_free_block(trans, root, buf->len, parent_start, |
469 | root->root_key.objectid, &disk_key, | 913 | root->root_key.objectid, &disk_key, |
470 | level, search_start, empty_size, 1); | 914 | level, search_start, empty_size); |
471 | if (IS_ERR(cow)) | 915 | if (IS_ERR(cow)) |
472 | return PTR_ERR(cow); | 916 | return PTR_ERR(cow); |
473 | 917 | ||
@@ -506,10 +950,11 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans, | |||
506 | parent_start = 0; | 950 | parent_start = 0; |
507 | 951 | ||
508 | extent_buffer_get(cow); | 952 | extent_buffer_get(cow); |
953 | tree_mod_log_set_root_pointer(root, cow); | ||
509 | rcu_assign_pointer(root->node, cow); | 954 | rcu_assign_pointer(root->node, cow); |
510 | 955 | ||
511 | btrfs_free_tree_block(trans, root, buf, parent_start, | 956 | btrfs_free_tree_block(trans, root, buf, parent_start, |
512 | last_ref, 1); | 957 | last_ref); |
513 | free_extent_buffer(buf); | 958 | free_extent_buffer(buf); |
514 | add_root_to_dirty_list(root); | 959 | add_root_to_dirty_list(root); |
515 | } else { | 960 | } else { |
@@ -519,13 +964,15 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans, | |||
519 | parent_start = 0; | 964 | parent_start = 0; |
520 | 965 | ||
521 | WARN_ON(trans->transid != btrfs_header_generation(parent)); | 966 | WARN_ON(trans->transid != btrfs_header_generation(parent)); |
967 | tree_mod_log_insert_key(root->fs_info, parent, parent_slot, | ||
968 | MOD_LOG_KEY_REPLACE); | ||
522 | btrfs_set_node_blockptr(parent, parent_slot, | 969 | btrfs_set_node_blockptr(parent, parent_slot, |
523 | cow->start); | 970 | cow->start); |
524 | btrfs_set_node_ptr_generation(parent, parent_slot, | 971 | btrfs_set_node_ptr_generation(parent, parent_slot, |
525 | trans->transid); | 972 | trans->transid); |
526 | btrfs_mark_buffer_dirty(parent); | 973 | btrfs_mark_buffer_dirty(parent); |
527 | btrfs_free_tree_block(trans, root, buf, parent_start, | 974 | btrfs_free_tree_block(trans, root, buf, parent_start, |
528 | last_ref, 1); | 975 | last_ref); |
529 | } | 976 | } |
530 | if (unlock_orig) | 977 | if (unlock_orig) |
531 | btrfs_tree_unlock(buf); | 978 | btrfs_tree_unlock(buf); |
@@ -535,6 +982,210 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans, | |||
535 | return 0; | 982 | return 0; |
536 | } | 983 | } |
537 | 984 | ||
985 | /* | ||
986 | * returns the logical address of the oldest predecessor of the given root. | ||
987 | * entries older than time_seq are ignored. | ||
988 | */ | ||
989 | static struct tree_mod_elem * | ||
990 | __tree_mod_log_oldest_root(struct btrfs_fs_info *fs_info, | ||
991 | struct btrfs_root *root, u64 time_seq) | ||
992 | { | ||
993 | struct tree_mod_elem *tm; | ||
994 | struct tree_mod_elem *found = NULL; | ||
995 | u64 root_logical = root->node->start; | ||
996 | int looped = 0; | ||
997 | |||
998 | if (!time_seq) | ||
999 | return 0; | ||
1000 | |||
1001 | /* | ||
1002 | * the very last operation that's logged for a root is the replacement | ||
1003 | * operation (if it is replaced at all). this has the index of the *new* | ||
1004 | * root, making it the very first operation that's logged for this root. | ||
1005 | */ | ||
1006 | while (1) { | ||
1007 | tm = tree_mod_log_search_oldest(fs_info, root_logical, | ||
1008 | time_seq); | ||
1009 | if (!looped && !tm) | ||
1010 | return 0; | ||
1011 | /* | ||
1012 | * we must have key remove operations in the log before the | ||
1013 | * replace operation. | ||
1014 | */ | ||
1015 | BUG_ON(!tm); | ||
1016 | |||
1017 | if (tm->op != MOD_LOG_ROOT_REPLACE) | ||
1018 | break; | ||
1019 | |||
1020 | found = tm; | ||
1021 | root_logical = tm->old_root.logical; | ||
1022 | BUG_ON(root_logical == root->node->start); | ||
1023 | looped = 1; | ||
1024 | } | ||
1025 | |||
1026 | return found; | ||
1027 | } | ||
1028 | |||
1029 | /* | ||
1030 | * tm is a pointer to the first operation to rewind within eb. then, all | ||
1031 | * previous operations will be rewinded (until we reach something older than | ||
1032 | * time_seq). | ||
1033 | */ | ||
1034 | static void | ||
1035 | __tree_mod_log_rewind(struct extent_buffer *eb, u64 time_seq, | ||
1036 | struct tree_mod_elem *first_tm) | ||
1037 | { | ||
1038 | u32 n; | ||
1039 | struct rb_node *next; | ||
1040 | struct tree_mod_elem *tm = first_tm; | ||
1041 | unsigned long o_dst; | ||
1042 | unsigned long o_src; | ||
1043 | unsigned long p_size = sizeof(struct btrfs_key_ptr); | ||
1044 | |||
1045 | n = btrfs_header_nritems(eb); | ||
1046 | while (tm && tm->elem.seq >= time_seq) { | ||
1047 | /* | ||
1048 | * all the operations are recorded with the operator used for | ||
1049 | * the modification. as we're going backwards, we do the | ||
1050 | * opposite of each operation here. | ||
1051 | */ | ||
1052 | switch (tm->op) { | ||
1053 | case MOD_LOG_KEY_REMOVE_WHILE_FREEING: | ||
1054 | BUG_ON(tm->slot < n); | ||
1055 | case MOD_LOG_KEY_REMOVE_WHILE_MOVING: | ||
1056 | case MOD_LOG_KEY_REMOVE: | ||
1057 | btrfs_set_node_key(eb, &tm->key, tm->slot); | ||
1058 | btrfs_set_node_blockptr(eb, tm->slot, tm->blockptr); | ||
1059 | btrfs_set_node_ptr_generation(eb, tm->slot, | ||
1060 | tm->generation); | ||
1061 | n++; | ||
1062 | break; | ||
1063 | case MOD_LOG_KEY_REPLACE: | ||
1064 | BUG_ON(tm->slot >= n); | ||
1065 | btrfs_set_node_key(eb, &tm->key, tm->slot); | ||
1066 | btrfs_set_node_blockptr(eb, tm->slot, tm->blockptr); | ||
1067 | btrfs_set_node_ptr_generation(eb, tm->slot, | ||
1068 | tm->generation); | ||
1069 | break; | ||
1070 | case MOD_LOG_KEY_ADD: | ||
1071 | if (tm->slot != n - 1) { | ||
1072 | o_dst = btrfs_node_key_ptr_offset(tm->slot); | ||
1073 | o_src = btrfs_node_key_ptr_offset(tm->slot + 1); | ||
1074 | memmove_extent_buffer(eb, o_dst, o_src, p_size); | ||
1075 | } | ||
1076 | n--; | ||
1077 | break; | ||
1078 | case MOD_LOG_MOVE_KEYS: | ||
1079 | o_dst = btrfs_node_key_ptr_offset(tm->slot); | ||
1080 | o_src = btrfs_node_key_ptr_offset(tm->move.dst_slot); | ||
1081 | memmove_extent_buffer(eb, o_dst, o_src, | ||
1082 | tm->move.nr_items * p_size); | ||
1083 | break; | ||
1084 | case MOD_LOG_ROOT_REPLACE: | ||
1085 | /* | ||
1086 | * this operation is special. for roots, this must be | ||
1087 | * handled explicitly before rewinding. | ||
1088 | * for non-roots, this operation may exist if the node | ||
1089 | * was a root: root A -> child B; then A gets empty and | ||
1090 | * B is promoted to the new root. in the mod log, we'll | ||
1091 | * have a root-replace operation for B, a tree block | ||
1092 | * that is no root. we simply ignore that operation. | ||
1093 | */ | ||
1094 | break; | ||
1095 | } | ||
1096 | next = rb_next(&tm->node); | ||
1097 | if (!next) | ||
1098 | break; | ||
1099 | tm = container_of(next, struct tree_mod_elem, node); | ||
1100 | if (tm->index != first_tm->index) | ||
1101 | break; | ||
1102 | } | ||
1103 | btrfs_set_header_nritems(eb, n); | ||
1104 | } | ||
1105 | |||
1106 | static struct extent_buffer * | ||
1107 | tree_mod_log_rewind(struct btrfs_fs_info *fs_info, struct extent_buffer *eb, | ||
1108 | u64 time_seq) | ||
1109 | { | ||
1110 | struct extent_buffer *eb_rewin; | ||
1111 | struct tree_mod_elem *tm; | ||
1112 | |||
1113 | if (!time_seq) | ||
1114 | return eb; | ||
1115 | |||
1116 | if (btrfs_header_level(eb) == 0) | ||
1117 | return eb; | ||
1118 | |||
1119 | tm = tree_mod_log_search(fs_info, eb->start, time_seq); | ||
1120 | if (!tm) | ||
1121 | return eb; | ||
1122 | |||
1123 | if (tm->op == MOD_LOG_KEY_REMOVE_WHILE_FREEING) { | ||
1124 | BUG_ON(tm->slot != 0); | ||
1125 | eb_rewin = alloc_dummy_extent_buffer(eb->start, | ||
1126 | fs_info->tree_root->nodesize); | ||
1127 | BUG_ON(!eb_rewin); | ||
1128 | btrfs_set_header_bytenr(eb_rewin, eb->start); | ||
1129 | btrfs_set_header_backref_rev(eb_rewin, | ||
1130 | btrfs_header_backref_rev(eb)); | ||
1131 | btrfs_set_header_owner(eb_rewin, btrfs_header_owner(eb)); | ||
1132 | btrfs_set_header_level(eb_rewin, btrfs_header_level(eb)); | ||
1133 | } else { | ||
1134 | eb_rewin = btrfs_clone_extent_buffer(eb); | ||
1135 | BUG_ON(!eb_rewin); | ||
1136 | } | ||
1137 | |||
1138 | extent_buffer_get(eb_rewin); | ||
1139 | free_extent_buffer(eb); | ||
1140 | |||
1141 | __tree_mod_log_rewind(eb_rewin, time_seq, tm); | ||
1142 | |||
1143 | return eb_rewin; | ||
1144 | } | ||
1145 | |||
1146 | static inline struct extent_buffer * | ||
1147 | get_old_root(struct btrfs_root *root, u64 time_seq) | ||
1148 | { | ||
1149 | struct tree_mod_elem *tm; | ||
1150 | struct extent_buffer *eb; | ||
1151 | struct tree_mod_root *old_root; | ||
1152 | u64 old_generation; | ||
1153 | |||
1154 | tm = __tree_mod_log_oldest_root(root->fs_info, root, time_seq); | ||
1155 | if (!tm) | ||
1156 | return root->node; | ||
1157 | |||
1158 | old_root = &tm->old_root; | ||
1159 | old_generation = tm->generation; | ||
1160 | |||
1161 | tm = tree_mod_log_search(root->fs_info, old_root->logical, time_seq); | ||
1162 | /* | ||
1163 | * there was an item in the log when __tree_mod_log_oldest_root | ||
1164 | * returned. this one must not go away, because the time_seq passed to | ||
1165 | * us must be blocking its removal. | ||
1166 | */ | ||
1167 | BUG_ON(!tm); | ||
1168 | |||
1169 | if (old_root->logical == root->node->start) { | ||
1170 | /* there are logged operations for the current root */ | ||
1171 | eb = btrfs_clone_extent_buffer(root->node); | ||
1172 | } else { | ||
1173 | /* there's a root replace operation for the current root */ | ||
1174 | eb = alloc_dummy_extent_buffer(tm->index << PAGE_CACHE_SHIFT, | ||
1175 | root->nodesize); | ||
1176 | btrfs_set_header_bytenr(eb, eb->start); | ||
1177 | btrfs_set_header_backref_rev(eb, BTRFS_MIXED_BACKREF_REV); | ||
1178 | btrfs_set_header_owner(eb, root->root_key.objectid); | ||
1179 | } | ||
1180 | if (!eb) | ||
1181 | return NULL; | ||
1182 | btrfs_set_header_level(eb, old_root->level); | ||
1183 | btrfs_set_header_generation(eb, old_generation); | ||
1184 | __tree_mod_log_rewind(eb, time_seq, tm); | ||
1185 | |||
1186 | return eb; | ||
1187 | } | ||
1188 | |||
538 | static inline int should_cow_block(struct btrfs_trans_handle *trans, | 1189 | static inline int should_cow_block(struct btrfs_trans_handle *trans, |
539 | struct btrfs_root *root, | 1190 | struct btrfs_root *root, |
540 | struct extent_buffer *buf) | 1191 | struct extent_buffer *buf) |
@@ -976,6 +1627,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans, | |||
976 | goto enospc; | 1627 | goto enospc; |
977 | } | 1628 | } |
978 | 1629 | ||
1630 | tree_mod_log_set_root_pointer(root, child); | ||
979 | rcu_assign_pointer(root->node, child); | 1631 | rcu_assign_pointer(root->node, child); |
980 | 1632 | ||
981 | add_root_to_dirty_list(root); | 1633 | add_root_to_dirty_list(root); |
@@ -989,7 +1641,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans, | |||
989 | free_extent_buffer(mid); | 1641 | free_extent_buffer(mid); |
990 | 1642 | ||
991 | root_sub_used(root, mid->len); | 1643 | root_sub_used(root, mid->len); |
992 | btrfs_free_tree_block(trans, root, mid, 0, 1, 0); | 1644 | btrfs_free_tree_block(trans, root, mid, 0, 1); |
993 | /* once for the root ptr */ | 1645 | /* once for the root ptr */ |
994 | free_extent_buffer_stale(mid); | 1646 | free_extent_buffer_stale(mid); |
995 | return 0; | 1647 | return 0; |
@@ -1042,14 +1694,16 @@ static noinline int balance_level(struct btrfs_trans_handle *trans, | |||
1042 | if (btrfs_header_nritems(right) == 0) { | 1694 | if (btrfs_header_nritems(right) == 0) { |
1043 | clean_tree_block(trans, root, right); | 1695 | clean_tree_block(trans, root, right); |
1044 | btrfs_tree_unlock(right); | 1696 | btrfs_tree_unlock(right); |
1045 | del_ptr(trans, root, path, level + 1, pslot + 1); | 1697 | del_ptr(trans, root, path, level + 1, pslot + 1, 1); |
1046 | root_sub_used(root, right->len); | 1698 | root_sub_used(root, right->len); |
1047 | btrfs_free_tree_block(trans, root, right, 0, 1, 0); | 1699 | btrfs_free_tree_block(trans, root, right, 0, 1); |
1048 | free_extent_buffer_stale(right); | 1700 | free_extent_buffer_stale(right); |
1049 | right = NULL; | 1701 | right = NULL; |
1050 | } else { | 1702 | } else { |
1051 | struct btrfs_disk_key right_key; | 1703 | struct btrfs_disk_key right_key; |
1052 | btrfs_node_key(right, &right_key, 0); | 1704 | btrfs_node_key(right, &right_key, 0); |
1705 | tree_mod_log_set_node_key(root->fs_info, parent, | ||
1706 | &right_key, pslot + 1, 0); | ||
1053 | btrfs_set_node_key(parent, &right_key, pslot + 1); | 1707 | btrfs_set_node_key(parent, &right_key, pslot + 1); |
1054 | btrfs_mark_buffer_dirty(parent); | 1708 | btrfs_mark_buffer_dirty(parent); |
1055 | } | 1709 | } |
@@ -1084,15 +1738,17 @@ static noinline int balance_level(struct btrfs_trans_handle *trans, | |||
1084 | if (btrfs_header_nritems(mid) == 0) { | 1738 | if (btrfs_header_nritems(mid) == 0) { |
1085 | clean_tree_block(trans, root, mid); | 1739 | clean_tree_block(trans, root, mid); |
1086 | btrfs_tree_unlock(mid); | 1740 | btrfs_tree_unlock(mid); |
1087 | del_ptr(trans, root, path, level + 1, pslot); | 1741 | del_ptr(trans, root, path, level + 1, pslot, 1); |
1088 | root_sub_used(root, mid->len); | 1742 | root_sub_used(root, mid->len); |
1089 | btrfs_free_tree_block(trans, root, mid, 0, 1, 0); | 1743 | btrfs_free_tree_block(trans, root, mid, 0, 1); |
1090 | free_extent_buffer_stale(mid); | 1744 | free_extent_buffer_stale(mid); |
1091 | mid = NULL; | 1745 | mid = NULL; |
1092 | } else { | 1746 | } else { |
1093 | /* update the parent key to reflect our changes */ | 1747 | /* update the parent key to reflect our changes */ |
1094 | struct btrfs_disk_key mid_key; | 1748 | struct btrfs_disk_key mid_key; |
1095 | btrfs_node_key(mid, &mid_key, 0); | 1749 | btrfs_node_key(mid, &mid_key, 0); |
1750 | tree_mod_log_set_node_key(root->fs_info, parent, &mid_key, | ||
1751 | pslot, 0); | ||
1096 | btrfs_set_node_key(parent, &mid_key, pslot); | 1752 | btrfs_set_node_key(parent, &mid_key, pslot); |
1097 | btrfs_mark_buffer_dirty(parent); | 1753 | btrfs_mark_buffer_dirty(parent); |
1098 | } | 1754 | } |
@@ -1190,6 +1846,8 @@ static noinline int push_nodes_for_insert(struct btrfs_trans_handle *trans, | |||
1190 | struct btrfs_disk_key disk_key; | 1846 | struct btrfs_disk_key disk_key; |
1191 | orig_slot += left_nr; | 1847 | orig_slot += left_nr; |
1192 | btrfs_node_key(mid, &disk_key, 0); | 1848 | btrfs_node_key(mid, &disk_key, 0); |
1849 | tree_mod_log_set_node_key(root->fs_info, parent, | ||
1850 | &disk_key, pslot, 0); | ||
1193 | btrfs_set_node_key(parent, &disk_key, pslot); | 1851 | btrfs_set_node_key(parent, &disk_key, pslot); |
1194 | btrfs_mark_buffer_dirty(parent); | 1852 | btrfs_mark_buffer_dirty(parent); |
1195 | if (btrfs_header_nritems(left) > orig_slot) { | 1853 | if (btrfs_header_nritems(left) > orig_slot) { |
@@ -1241,6 +1899,8 @@ static noinline int push_nodes_for_insert(struct btrfs_trans_handle *trans, | |||
1241 | struct btrfs_disk_key disk_key; | 1899 | struct btrfs_disk_key disk_key; |
1242 | 1900 | ||
1243 | btrfs_node_key(right, &disk_key, 0); | 1901 | btrfs_node_key(right, &disk_key, 0); |
1902 | tree_mod_log_set_node_key(root->fs_info, parent, | ||
1903 | &disk_key, pslot + 1, 0); | ||
1244 | btrfs_set_node_key(parent, &disk_key, pslot + 1); | 1904 | btrfs_set_node_key(parent, &disk_key, pslot + 1); |
1245 | btrfs_mark_buffer_dirty(parent); | 1905 | btrfs_mark_buffer_dirty(parent); |
1246 | 1906 | ||
@@ -1498,7 +2158,7 @@ static int | |||
1498 | read_block_for_search(struct btrfs_trans_handle *trans, | 2158 | read_block_for_search(struct btrfs_trans_handle *trans, |
1499 | struct btrfs_root *root, struct btrfs_path *p, | 2159 | struct btrfs_root *root, struct btrfs_path *p, |
1500 | struct extent_buffer **eb_ret, int level, int slot, | 2160 | struct extent_buffer **eb_ret, int level, int slot, |
1501 | struct btrfs_key *key) | 2161 | struct btrfs_key *key, u64 time_seq) |
1502 | { | 2162 | { |
1503 | u64 blocknr; | 2163 | u64 blocknr; |
1504 | u64 gen; | 2164 | u64 gen; |
@@ -1852,7 +2512,7 @@ cow_done: | |||
1852 | } | 2512 | } |
1853 | 2513 | ||
1854 | err = read_block_for_search(trans, root, p, | 2514 | err = read_block_for_search(trans, root, p, |
1855 | &b, level, slot, key); | 2515 | &b, level, slot, key, 0); |
1856 | if (err == -EAGAIN) | 2516 | if (err == -EAGAIN) |
1857 | goto again; | 2517 | goto again; |
1858 | if (err) { | 2518 | if (err) { |
@@ -1924,6 +2584,115 @@ done: | |||
1924 | } | 2584 | } |
1925 | 2585 | ||
1926 | /* | 2586 | /* |
2587 | * Like btrfs_search_slot, this looks for a key in the given tree. It uses the | ||
2588 | * current state of the tree together with the operations recorded in the tree | ||
2589 | * modification log to search for the key in a previous version of this tree, as | ||
2590 | * denoted by the time_seq parameter. | ||
2591 | * | ||
2592 | * Naturally, there is no support for insert, delete or cow operations. | ||
2593 | * | ||
2594 | * The resulting path and return value will be set up as if we called | ||
2595 | * btrfs_search_slot at that point in time with ins_len and cow both set to 0. | ||
2596 | */ | ||
2597 | int btrfs_search_old_slot(struct btrfs_root *root, struct btrfs_key *key, | ||
2598 | struct btrfs_path *p, u64 time_seq) | ||
2599 | { | ||
2600 | struct extent_buffer *b; | ||
2601 | int slot; | ||
2602 | int ret; | ||
2603 | int err; | ||
2604 | int level; | ||
2605 | int lowest_unlock = 1; | ||
2606 | u8 lowest_level = 0; | ||
2607 | |||
2608 | lowest_level = p->lowest_level; | ||
2609 | WARN_ON(p->nodes[0] != NULL); | ||
2610 | |||
2611 | if (p->search_commit_root) { | ||
2612 | BUG_ON(time_seq); | ||
2613 | return btrfs_search_slot(NULL, root, key, p, 0, 0); | ||
2614 | } | ||
2615 | |||
2616 | again: | ||
2617 | b = get_old_root(root, time_seq); | ||
2618 | extent_buffer_get(b); | ||
2619 | level = btrfs_header_level(b); | ||
2620 | btrfs_tree_read_lock(b); | ||
2621 | p->locks[level] = BTRFS_READ_LOCK; | ||
2622 | |||
2623 | while (b) { | ||
2624 | level = btrfs_header_level(b); | ||
2625 | p->nodes[level] = b; | ||
2626 | btrfs_clear_path_blocking(p, NULL, 0); | ||
2627 | |||
2628 | /* | ||
2629 | * we have a lock on b and as long as we aren't changing | ||
2630 | * the tree, there is no way to for the items in b to change. | ||
2631 | * It is safe to drop the lock on our parent before we | ||
2632 | * go through the expensive btree search on b. | ||
2633 | */ | ||
2634 | btrfs_unlock_up_safe(p, level + 1); | ||
2635 | |||
2636 | ret = bin_search(b, key, level, &slot); | ||
2637 | |||
2638 | if (level != 0) { | ||
2639 | int dec = 0; | ||
2640 | if (ret && slot > 0) { | ||
2641 | dec = 1; | ||
2642 | slot -= 1; | ||
2643 | } | ||
2644 | p->slots[level] = slot; | ||
2645 | unlock_up(p, level, lowest_unlock, 0, NULL); | ||
2646 | |||
2647 | if (level == lowest_level) { | ||
2648 | if (dec) | ||
2649 | p->slots[level]++; | ||
2650 | goto done; | ||
2651 | } | ||
2652 | |||
2653 | err = read_block_for_search(NULL, root, p, &b, level, | ||
2654 | slot, key, time_seq); | ||
2655 | if (err == -EAGAIN) | ||
2656 | goto again; | ||
2657 | if (err) { | ||
2658 | ret = err; | ||
2659 | goto done; | ||
2660 | } | ||
2661 | |||
2662 | level = btrfs_header_level(b); | ||
2663 | err = btrfs_try_tree_read_lock(b); | ||
2664 | if (!err) { | ||
2665 | btrfs_set_path_blocking(p); | ||
2666 | btrfs_tree_read_lock(b); | ||
2667 | btrfs_clear_path_blocking(p, b, | ||
2668 | BTRFS_READ_LOCK); | ||
2669 | } | ||
2670 | p->locks[level] = BTRFS_READ_LOCK; | ||
2671 | p->nodes[level] = b; | ||
2672 | b = tree_mod_log_rewind(root->fs_info, b, time_seq); | ||
2673 | if (b != p->nodes[level]) { | ||
2674 | btrfs_tree_unlock_rw(p->nodes[level], | ||
2675 | p->locks[level]); | ||
2676 | p->locks[level] = 0; | ||
2677 | p->nodes[level] = b; | ||
2678 | } | ||
2679 | } else { | ||
2680 | p->slots[level] = slot; | ||
2681 | unlock_up(p, level, lowest_unlock, 0, NULL); | ||
2682 | goto done; | ||
2683 | } | ||
2684 | } | ||
2685 | ret = 1; | ||
2686 | done: | ||
2687 | if (!p->leave_spinning) | ||
2688 | btrfs_set_path_blocking(p); | ||
2689 | if (ret < 0) | ||
2690 | btrfs_release_path(p); | ||
2691 | |||
2692 | return ret; | ||
2693 | } | ||
2694 | |||
2695 | /* | ||
1927 | * adjust the pointers going up the tree, starting at level | 2696 | * adjust the pointers going up the tree, starting at level |
1928 | * making sure the right key of each node is points to 'key'. | 2697 | * making sure the right key of each node is points to 'key'. |
1929 | * This is used after shifting pointers to the left, so it stops | 2698 | * This is used after shifting pointers to the left, so it stops |
@@ -1943,6 +2712,7 @@ static void fixup_low_keys(struct btrfs_trans_handle *trans, | |||
1943 | if (!path->nodes[i]) | 2712 | if (!path->nodes[i]) |
1944 | break; | 2713 | break; |
1945 | t = path->nodes[i]; | 2714 | t = path->nodes[i]; |
2715 | tree_mod_log_set_node_key(root->fs_info, t, key, tslot, 1); | ||
1946 | btrfs_set_node_key(t, key, tslot); | 2716 | btrfs_set_node_key(t, key, tslot); |
1947 | btrfs_mark_buffer_dirty(path->nodes[i]); | 2717 | btrfs_mark_buffer_dirty(path->nodes[i]); |
1948 | if (tslot != 0) | 2718 | if (tslot != 0) |
@@ -2025,12 +2795,16 @@ static int push_node_left(struct btrfs_trans_handle *trans, | |||
2025 | } else | 2795 | } else |
2026 | push_items = min(src_nritems - 8, push_items); | 2796 | push_items = min(src_nritems - 8, push_items); |
2027 | 2797 | ||
2798 | tree_mod_log_eb_copy(root->fs_info, dst, src, dst_nritems, 0, | ||
2799 | push_items); | ||
2028 | copy_extent_buffer(dst, src, | 2800 | copy_extent_buffer(dst, src, |
2029 | btrfs_node_key_ptr_offset(dst_nritems), | 2801 | btrfs_node_key_ptr_offset(dst_nritems), |
2030 | btrfs_node_key_ptr_offset(0), | 2802 | btrfs_node_key_ptr_offset(0), |
2031 | push_items * sizeof(struct btrfs_key_ptr)); | 2803 | push_items * sizeof(struct btrfs_key_ptr)); |
2032 | 2804 | ||
2033 | if (push_items < src_nritems) { | 2805 | if (push_items < src_nritems) { |
2806 | tree_mod_log_eb_move(root->fs_info, src, 0, push_items, | ||
2807 | src_nritems - push_items); | ||
2034 | memmove_extent_buffer(src, btrfs_node_key_ptr_offset(0), | 2808 | memmove_extent_buffer(src, btrfs_node_key_ptr_offset(0), |
2035 | btrfs_node_key_ptr_offset(push_items), | 2809 | btrfs_node_key_ptr_offset(push_items), |
2036 | (src_nritems - push_items) * | 2810 | (src_nritems - push_items) * |
@@ -2084,11 +2858,14 @@ static int balance_node_right(struct btrfs_trans_handle *trans, | |||
2084 | if (max_push < push_items) | 2858 | if (max_push < push_items) |
2085 | push_items = max_push; | 2859 | push_items = max_push; |
2086 | 2860 | ||
2861 | tree_mod_log_eb_move(root->fs_info, dst, push_items, 0, dst_nritems); | ||
2087 | memmove_extent_buffer(dst, btrfs_node_key_ptr_offset(push_items), | 2862 | memmove_extent_buffer(dst, btrfs_node_key_ptr_offset(push_items), |
2088 | btrfs_node_key_ptr_offset(0), | 2863 | btrfs_node_key_ptr_offset(0), |
2089 | (dst_nritems) * | 2864 | (dst_nritems) * |
2090 | sizeof(struct btrfs_key_ptr)); | 2865 | sizeof(struct btrfs_key_ptr)); |
2091 | 2866 | ||
2867 | tree_mod_log_eb_copy(root->fs_info, dst, src, 0, | ||
2868 | src_nritems - push_items, push_items); | ||
2092 | copy_extent_buffer(dst, src, | 2869 | copy_extent_buffer(dst, src, |
2093 | btrfs_node_key_ptr_offset(0), | 2870 | btrfs_node_key_ptr_offset(0), |
2094 | btrfs_node_key_ptr_offset(src_nritems - push_items), | 2871 | btrfs_node_key_ptr_offset(src_nritems - push_items), |
@@ -2131,7 +2908,7 @@ static noinline int insert_new_root(struct btrfs_trans_handle *trans, | |||
2131 | 2908 | ||
2132 | c = btrfs_alloc_free_block(trans, root, root->nodesize, 0, | 2909 | c = btrfs_alloc_free_block(trans, root, root->nodesize, 0, |
2133 | root->root_key.objectid, &lower_key, | 2910 | root->root_key.objectid, &lower_key, |
2134 | level, root->node->start, 0, 0); | 2911 | level, root->node->start, 0); |
2135 | if (IS_ERR(c)) | 2912 | if (IS_ERR(c)) |
2136 | return PTR_ERR(c); | 2913 | return PTR_ERR(c); |
2137 | 2914 | ||
@@ -2163,6 +2940,7 @@ static noinline int insert_new_root(struct btrfs_trans_handle *trans, | |||
2163 | btrfs_mark_buffer_dirty(c); | 2940 | btrfs_mark_buffer_dirty(c); |
2164 | 2941 | ||
2165 | old = root->node; | 2942 | old = root->node; |
2943 | tree_mod_log_set_root_pointer(root, c); | ||
2166 | rcu_assign_pointer(root->node, c); | 2944 | rcu_assign_pointer(root->node, c); |
2167 | 2945 | ||
2168 | /* the super has an extra ref to root->node */ | 2946 | /* the super has an extra ref to root->node */ |
@@ -2186,10 +2964,11 @@ static noinline int insert_new_root(struct btrfs_trans_handle *trans, | |||
2186 | static void insert_ptr(struct btrfs_trans_handle *trans, | 2964 | static void insert_ptr(struct btrfs_trans_handle *trans, |
2187 | struct btrfs_root *root, struct btrfs_path *path, | 2965 | struct btrfs_root *root, struct btrfs_path *path, |
2188 | struct btrfs_disk_key *key, u64 bytenr, | 2966 | struct btrfs_disk_key *key, u64 bytenr, |
2189 | int slot, int level) | 2967 | int slot, int level, int tree_mod_log) |
2190 | { | 2968 | { |
2191 | struct extent_buffer *lower; | 2969 | struct extent_buffer *lower; |
2192 | int nritems; | 2970 | int nritems; |
2971 | int ret; | ||
2193 | 2972 | ||
2194 | BUG_ON(!path->nodes[level]); | 2973 | BUG_ON(!path->nodes[level]); |
2195 | btrfs_assert_tree_locked(path->nodes[level]); | 2974 | btrfs_assert_tree_locked(path->nodes[level]); |
@@ -2198,11 +2977,19 @@ static void insert_ptr(struct btrfs_trans_handle *trans, | |||
2198 | BUG_ON(slot > nritems); | 2977 | BUG_ON(slot > nritems); |
2199 | BUG_ON(nritems == BTRFS_NODEPTRS_PER_BLOCK(root)); | 2978 | BUG_ON(nritems == BTRFS_NODEPTRS_PER_BLOCK(root)); |
2200 | if (slot != nritems) { | 2979 | if (slot != nritems) { |
2980 | if (tree_mod_log && level) | ||
2981 | tree_mod_log_eb_move(root->fs_info, lower, slot + 1, | ||
2982 | slot, nritems - slot); | ||
2201 | memmove_extent_buffer(lower, | 2983 | memmove_extent_buffer(lower, |
2202 | btrfs_node_key_ptr_offset(slot + 1), | 2984 | btrfs_node_key_ptr_offset(slot + 1), |
2203 | btrfs_node_key_ptr_offset(slot), | 2985 | btrfs_node_key_ptr_offset(slot), |
2204 | (nritems - slot) * sizeof(struct btrfs_key_ptr)); | 2986 | (nritems - slot) * sizeof(struct btrfs_key_ptr)); |
2205 | } | 2987 | } |
2988 | if (tree_mod_log && level) { | ||
2989 | ret = tree_mod_log_insert_key(root->fs_info, lower, slot, | ||
2990 | MOD_LOG_KEY_ADD); | ||
2991 | BUG_ON(ret < 0); | ||
2992 | } | ||
2206 | btrfs_set_node_key(lower, key, slot); | 2993 | btrfs_set_node_key(lower, key, slot); |
2207 | btrfs_set_node_blockptr(lower, slot, bytenr); | 2994 | btrfs_set_node_blockptr(lower, slot, bytenr); |
2208 | WARN_ON(trans->transid == 0); | 2995 | WARN_ON(trans->transid == 0); |
@@ -2254,7 +3041,7 @@ static noinline int split_node(struct btrfs_trans_handle *trans, | |||
2254 | 3041 | ||
2255 | split = btrfs_alloc_free_block(trans, root, root->nodesize, 0, | 3042 | split = btrfs_alloc_free_block(trans, root, root->nodesize, 0, |
2256 | root->root_key.objectid, | 3043 | root->root_key.objectid, |
2257 | &disk_key, level, c->start, 0, 0); | 3044 | &disk_key, level, c->start, 0); |
2258 | if (IS_ERR(split)) | 3045 | if (IS_ERR(split)) |
2259 | return PTR_ERR(split); | 3046 | return PTR_ERR(split); |
2260 | 3047 | ||
@@ -2273,7 +3060,7 @@ static noinline int split_node(struct btrfs_trans_handle *trans, | |||
2273 | (unsigned long)btrfs_header_chunk_tree_uuid(split), | 3060 | (unsigned long)btrfs_header_chunk_tree_uuid(split), |
2274 | BTRFS_UUID_SIZE); | 3061 | BTRFS_UUID_SIZE); |
2275 | 3062 | ||
2276 | 3063 | tree_mod_log_eb_copy(root->fs_info, split, c, 0, mid, c_nritems - mid); | |
2277 | copy_extent_buffer(split, c, | 3064 | copy_extent_buffer(split, c, |
2278 | btrfs_node_key_ptr_offset(0), | 3065 | btrfs_node_key_ptr_offset(0), |
2279 | btrfs_node_key_ptr_offset(mid), | 3066 | btrfs_node_key_ptr_offset(mid), |
@@ -2286,7 +3073,7 @@ static noinline int split_node(struct btrfs_trans_handle *trans, | |||
2286 | btrfs_mark_buffer_dirty(split); | 3073 | btrfs_mark_buffer_dirty(split); |
2287 | 3074 | ||
2288 | insert_ptr(trans, root, path, &disk_key, split->start, | 3075 | insert_ptr(trans, root, path, &disk_key, split->start, |
2289 | path->slots[level + 1] + 1, level + 1); | 3076 | path->slots[level + 1] + 1, level + 1, 1); |
2290 | 3077 | ||
2291 | if (path->slots[level] >= mid) { | 3078 | if (path->slots[level] >= mid) { |
2292 | path->slots[level] -= mid; | 3079 | path->slots[level] -= mid; |
@@ -2823,7 +3610,7 @@ static noinline void copy_for_split(struct btrfs_trans_handle *trans, | |||
2823 | btrfs_set_header_nritems(l, mid); | 3610 | btrfs_set_header_nritems(l, mid); |
2824 | btrfs_item_key(right, &disk_key, 0); | 3611 | btrfs_item_key(right, &disk_key, 0); |
2825 | insert_ptr(trans, root, path, &disk_key, right->start, | 3612 | insert_ptr(trans, root, path, &disk_key, right->start, |
2826 | path->slots[1] + 1, 1); | 3613 | path->slots[1] + 1, 1, 0); |
2827 | 3614 | ||
2828 | btrfs_mark_buffer_dirty(right); | 3615 | btrfs_mark_buffer_dirty(right); |
2829 | btrfs_mark_buffer_dirty(l); | 3616 | btrfs_mark_buffer_dirty(l); |
@@ -3006,7 +3793,7 @@ again: | |||
3006 | 3793 | ||
3007 | right = btrfs_alloc_free_block(trans, root, root->leafsize, 0, | 3794 | right = btrfs_alloc_free_block(trans, root, root->leafsize, 0, |
3008 | root->root_key.objectid, | 3795 | root->root_key.objectid, |
3009 | &disk_key, 0, l->start, 0, 0); | 3796 | &disk_key, 0, l->start, 0); |
3010 | if (IS_ERR(right)) | 3797 | if (IS_ERR(right)) |
3011 | return PTR_ERR(right); | 3798 | return PTR_ERR(right); |
3012 | 3799 | ||
@@ -3030,7 +3817,7 @@ again: | |||
3030 | if (mid <= slot) { | 3817 | if (mid <= slot) { |
3031 | btrfs_set_header_nritems(right, 0); | 3818 | btrfs_set_header_nritems(right, 0); |
3032 | insert_ptr(trans, root, path, &disk_key, right->start, | 3819 | insert_ptr(trans, root, path, &disk_key, right->start, |
3033 | path->slots[1] + 1, 1); | 3820 | path->slots[1] + 1, 1, 0); |
3034 | btrfs_tree_unlock(path->nodes[0]); | 3821 | btrfs_tree_unlock(path->nodes[0]); |
3035 | free_extent_buffer(path->nodes[0]); | 3822 | free_extent_buffer(path->nodes[0]); |
3036 | path->nodes[0] = right; | 3823 | path->nodes[0] = right; |
@@ -3039,7 +3826,7 @@ again: | |||
3039 | } else { | 3826 | } else { |
3040 | btrfs_set_header_nritems(right, 0); | 3827 | btrfs_set_header_nritems(right, 0); |
3041 | insert_ptr(trans, root, path, &disk_key, right->start, | 3828 | insert_ptr(trans, root, path, &disk_key, right->start, |
3042 | path->slots[1], 1); | 3829 | path->slots[1], 1, 0); |
3043 | btrfs_tree_unlock(path->nodes[0]); | 3830 | btrfs_tree_unlock(path->nodes[0]); |
3044 | free_extent_buffer(path->nodes[0]); | 3831 | free_extent_buffer(path->nodes[0]); |
3045 | path->nodes[0] = right; | 3832 | path->nodes[0] = right; |
@@ -3751,19 +4538,29 @@ int btrfs_insert_item(struct btrfs_trans_handle *trans, struct btrfs_root | |||
3751 | * empty a node. | 4538 | * empty a node. |
3752 | */ | 4539 | */ |
3753 | static void del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root, | 4540 | static void del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root, |
3754 | struct btrfs_path *path, int level, int slot) | 4541 | struct btrfs_path *path, int level, int slot, |
4542 | int tree_mod_log) | ||
3755 | { | 4543 | { |
3756 | struct extent_buffer *parent = path->nodes[level]; | 4544 | struct extent_buffer *parent = path->nodes[level]; |
3757 | u32 nritems; | 4545 | u32 nritems; |
4546 | int ret; | ||
3758 | 4547 | ||
3759 | nritems = btrfs_header_nritems(parent); | 4548 | nritems = btrfs_header_nritems(parent); |
3760 | if (slot != nritems - 1) { | 4549 | if (slot != nritems - 1) { |
4550 | if (tree_mod_log && level) | ||
4551 | tree_mod_log_eb_move(root->fs_info, parent, slot, | ||
4552 | slot + 1, nritems - slot - 1); | ||
3761 | memmove_extent_buffer(parent, | 4553 | memmove_extent_buffer(parent, |
3762 | btrfs_node_key_ptr_offset(slot), | 4554 | btrfs_node_key_ptr_offset(slot), |
3763 | btrfs_node_key_ptr_offset(slot + 1), | 4555 | btrfs_node_key_ptr_offset(slot + 1), |
3764 | sizeof(struct btrfs_key_ptr) * | 4556 | sizeof(struct btrfs_key_ptr) * |
3765 | (nritems - slot - 1)); | 4557 | (nritems - slot - 1)); |
4558 | } else if (tree_mod_log && level) { | ||
4559 | ret = tree_mod_log_insert_key(root->fs_info, parent, slot, | ||
4560 | MOD_LOG_KEY_REMOVE); | ||
4561 | BUG_ON(ret < 0); | ||
3766 | } | 4562 | } |
4563 | |||
3767 | nritems--; | 4564 | nritems--; |
3768 | btrfs_set_header_nritems(parent, nritems); | 4565 | btrfs_set_header_nritems(parent, nritems); |
3769 | if (nritems == 0 && parent == root->node) { | 4566 | if (nritems == 0 && parent == root->node) { |
@@ -3795,7 +4592,7 @@ static noinline void btrfs_del_leaf(struct btrfs_trans_handle *trans, | |||
3795 | struct extent_buffer *leaf) | 4592 | struct extent_buffer *leaf) |
3796 | { | 4593 | { |
3797 | WARN_ON(btrfs_header_generation(leaf) != trans->transid); | 4594 | WARN_ON(btrfs_header_generation(leaf) != trans->transid); |
3798 | del_ptr(trans, root, path, 1, path->slots[1]); | 4595 | del_ptr(trans, root, path, 1, path->slots[1], 1); |
3799 | 4596 | ||
3800 | /* | 4597 | /* |
3801 | * btrfs_free_extent is expensive, we want to make sure we | 4598 | * btrfs_free_extent is expensive, we want to make sure we |
@@ -3806,7 +4603,7 @@ static noinline void btrfs_del_leaf(struct btrfs_trans_handle *trans, | |||
3806 | root_sub_used(root, leaf->len); | 4603 | root_sub_used(root, leaf->len); |
3807 | 4604 | ||
3808 | extent_buffer_get(leaf); | 4605 | extent_buffer_get(leaf); |
3809 | btrfs_free_tree_block(trans, root, leaf, 0, 1, 0); | 4606 | btrfs_free_tree_block(trans, root, leaf, 0, 1); |
3810 | free_extent_buffer_stale(leaf); | 4607 | free_extent_buffer_stale(leaf); |
3811 | } | 4608 | } |
3812 | /* | 4609 | /* |
@@ -4273,7 +5070,7 @@ again: | |||
4273 | next = c; | 5070 | next = c; |
4274 | next_rw_lock = path->locks[level]; | 5071 | next_rw_lock = path->locks[level]; |
4275 | ret = read_block_for_search(NULL, root, path, &next, level, | 5072 | ret = read_block_for_search(NULL, root, path, &next, level, |
4276 | slot, &key); | 5073 | slot, &key, 0); |
4277 | if (ret == -EAGAIN) | 5074 | if (ret == -EAGAIN) |
4278 | goto again; | 5075 | goto again; |
4279 | 5076 | ||
@@ -4310,7 +5107,7 @@ again: | |||
4310 | break; | 5107 | break; |
4311 | 5108 | ||
4312 | ret = read_block_for_search(NULL, root, path, &next, level, | 5109 | ret = read_block_for_search(NULL, root, path, &next, level, |
4313 | 0, &key); | 5110 | 0, &key, 0); |
4314 | if (ret == -EAGAIN) | 5111 | if (ret == -EAGAIN) |
4315 | goto again; | 5112 | goto again; |
4316 | 5113 | ||
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 1c665ebe47e0..0151ca1ac657 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h | |||
@@ -1140,6 +1140,15 @@ struct btrfs_fs_info { | |||
1140 | spinlock_t delayed_iput_lock; | 1140 | spinlock_t delayed_iput_lock; |
1141 | struct list_head delayed_iputs; | 1141 | struct list_head delayed_iputs; |
1142 | 1142 | ||
1143 | /* this protects tree_mod_seq_list */ | ||
1144 | spinlock_t tree_mod_seq_lock; | ||
1145 | atomic_t tree_mod_seq; | ||
1146 | struct list_head tree_mod_seq_list; | ||
1147 | |||
1148 | /* this protects tree_mod_log */ | ||
1149 | rwlock_t tree_mod_log_lock; | ||
1150 | struct rb_root tree_mod_log; | ||
1151 | |||
1143 | atomic_t nr_async_submits; | 1152 | atomic_t nr_async_submits; |
1144 | atomic_t async_submit_draining; | 1153 | atomic_t async_submit_draining; |
1145 | atomic_t nr_async_bios; | 1154 | atomic_t nr_async_bios; |
@@ -2537,11 +2546,11 @@ struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, | |||
2537 | struct btrfs_root *root, u32 blocksize, | 2546 | struct btrfs_root *root, u32 blocksize, |
2538 | u64 parent, u64 root_objectid, | 2547 | u64 parent, u64 root_objectid, |
2539 | struct btrfs_disk_key *key, int level, | 2548 | struct btrfs_disk_key *key, int level, |
2540 | u64 hint, u64 empty_size, int for_cow); | 2549 | u64 hint, u64 empty_size); |
2541 | void btrfs_free_tree_block(struct btrfs_trans_handle *trans, | 2550 | void btrfs_free_tree_block(struct btrfs_trans_handle *trans, |
2542 | struct btrfs_root *root, | 2551 | struct btrfs_root *root, |
2543 | struct extent_buffer *buf, | 2552 | struct extent_buffer *buf, |
2544 | u64 parent, int last_ref, int for_cow); | 2553 | u64 parent, int last_ref); |
2545 | struct extent_buffer *btrfs_init_new_buffer(struct btrfs_trans_handle *trans, | 2554 | struct extent_buffer *btrfs_init_new_buffer(struct btrfs_trans_handle *trans, |
2546 | struct btrfs_root *root, | 2555 | struct btrfs_root *root, |
2547 | u64 bytenr, u32 blocksize, | 2556 | u64 bytenr, u32 blocksize, |
@@ -2700,6 +2709,8 @@ int btrfs_duplicate_item(struct btrfs_trans_handle *trans, | |||
2700 | int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root | 2709 | int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root |
2701 | *root, struct btrfs_key *key, struct btrfs_path *p, int | 2710 | *root, struct btrfs_key *key, struct btrfs_path *p, int |
2702 | ins_len, int cow); | 2711 | ins_len, int cow); |
2712 | int btrfs_search_old_slot(struct btrfs_root *root, struct btrfs_key *key, | ||
2713 | struct btrfs_path *p, u64 time_seq); | ||
2703 | int btrfs_realloc_node(struct btrfs_trans_handle *trans, | 2714 | int btrfs_realloc_node(struct btrfs_trans_handle *trans, |
2704 | struct btrfs_root *root, struct extent_buffer *parent, | 2715 | struct btrfs_root *root, struct extent_buffer *parent, |
2705 | int start_slot, int cache_only, u64 *last_ret, | 2716 | int start_slot, int cache_only, u64 *last_ret, |
@@ -3139,4 +3150,23 @@ void btrfs_reada_detach(void *handle); | |||
3139 | int btree_readahead_hook(struct btrfs_root *root, struct extent_buffer *eb, | 3150 | int btree_readahead_hook(struct btrfs_root *root, struct extent_buffer *eb, |
3140 | u64 start, int err); | 3151 | u64 start, int err); |
3141 | 3152 | ||
3153 | /* delayed seq elem */ | ||
3154 | struct seq_list { | ||
3155 | struct list_head list; | ||
3156 | u64 seq; | ||
3157 | u32 flags; | ||
3158 | }; | ||
3159 | |||
3160 | void btrfs_get_tree_mod_seq(struct btrfs_fs_info *fs_info, | ||
3161 | struct seq_list *elem); | ||
3162 | void btrfs_put_tree_mod_seq(struct btrfs_fs_info *fs_info, | ||
3163 | struct seq_list *elem); | ||
3164 | |||
3165 | static inline int is_fstree(u64 rootid) | ||
3166 | { | ||
3167 | if (rootid == BTRFS_FS_TREE_OBJECTID || | ||
3168 | (s64)rootid >= (s64)BTRFS_FIRST_FREE_OBJECTID) | ||
3169 | return 1; | ||
3170 | return 0; | ||
3171 | } | ||
3142 | #endif | 3172 | #endif |
diff --git a/fs/btrfs/delayed-ref.c b/fs/btrfs/delayed-ref.c index 69f22e3ab3bc..13ae7b04790e 100644 --- a/fs/btrfs/delayed-ref.c +++ b/fs/btrfs/delayed-ref.c | |||
@@ -525,7 +525,7 @@ static noinline void add_delayed_tree_ref(struct btrfs_fs_info *fs_info, | |||
525 | ref->is_head = 0; | 525 | ref->is_head = 0; |
526 | ref->in_tree = 1; | 526 | ref->in_tree = 1; |
527 | 527 | ||
528 | if (need_ref_seq(for_cow, ref_root)) | 528 | if (is_fstree(ref_root)) |
529 | seq = inc_delayed_seq(delayed_refs); | 529 | seq = inc_delayed_seq(delayed_refs); |
530 | ref->seq = seq; | 530 | ref->seq = seq; |
531 | 531 | ||
@@ -584,7 +584,7 @@ static noinline void add_delayed_data_ref(struct btrfs_fs_info *fs_info, | |||
584 | ref->is_head = 0; | 584 | ref->is_head = 0; |
585 | ref->in_tree = 1; | 585 | ref->in_tree = 1; |
586 | 586 | ||
587 | if (need_ref_seq(for_cow, ref_root)) | 587 | if (is_fstree(ref_root)) |
588 | seq = inc_delayed_seq(delayed_refs); | 588 | seq = inc_delayed_seq(delayed_refs); |
589 | ref->seq = seq; | 589 | ref->seq = seq; |
590 | 590 | ||
@@ -658,10 +658,11 @@ int btrfs_add_delayed_tree_ref(struct btrfs_fs_info *fs_info, | |||
658 | add_delayed_tree_ref(fs_info, trans, &ref->node, bytenr, | 658 | add_delayed_tree_ref(fs_info, trans, &ref->node, bytenr, |
659 | num_bytes, parent, ref_root, level, action, | 659 | num_bytes, parent, ref_root, level, action, |
660 | for_cow); | 660 | for_cow); |
661 | if (!need_ref_seq(for_cow, ref_root) && | 661 | if (!is_fstree(ref_root) && |
662 | waitqueue_active(&delayed_refs->seq_wait)) | 662 | waitqueue_active(&delayed_refs->seq_wait)) |
663 | wake_up(&delayed_refs->seq_wait); | 663 | wake_up(&delayed_refs->seq_wait); |
664 | spin_unlock(&delayed_refs->lock); | 664 | spin_unlock(&delayed_refs->lock); |
665 | |||
665 | return 0; | 666 | return 0; |
666 | } | 667 | } |
667 | 668 | ||
@@ -706,10 +707,11 @@ int btrfs_add_delayed_data_ref(struct btrfs_fs_info *fs_info, | |||
706 | add_delayed_data_ref(fs_info, trans, &ref->node, bytenr, | 707 | add_delayed_data_ref(fs_info, trans, &ref->node, bytenr, |
707 | num_bytes, parent, ref_root, owner, offset, | 708 | num_bytes, parent, ref_root, owner, offset, |
708 | action, for_cow); | 709 | action, for_cow); |
709 | if (!need_ref_seq(for_cow, ref_root) && | 710 | if (!is_fstree(ref_root) && |
710 | waitqueue_active(&delayed_refs->seq_wait)) | 711 | waitqueue_active(&delayed_refs->seq_wait)) |
711 | wake_up(&delayed_refs->seq_wait); | 712 | wake_up(&delayed_refs->seq_wait); |
712 | spin_unlock(&delayed_refs->lock); | 713 | spin_unlock(&delayed_refs->lock); |
714 | |||
713 | return 0; | 715 | return 0; |
714 | } | 716 | } |
715 | 717 | ||
diff --git a/fs/btrfs/delayed-ref.h b/fs/btrfs/delayed-ref.h index d8f244d94925..413927fb9957 100644 --- a/fs/btrfs/delayed-ref.h +++ b/fs/btrfs/delayed-ref.h | |||
@@ -195,11 +195,6 @@ int btrfs_delayed_ref_lock(struct btrfs_trans_handle *trans, | |||
195 | int btrfs_find_ref_cluster(struct btrfs_trans_handle *trans, | 195 | int btrfs_find_ref_cluster(struct btrfs_trans_handle *trans, |
196 | struct list_head *cluster, u64 search_start); | 196 | struct list_head *cluster, u64 search_start); |
197 | 197 | ||
198 | struct seq_list { | ||
199 | struct list_head list; | ||
200 | u64 seq; | ||
201 | }; | ||
202 | |||
203 | static inline u64 inc_delayed_seq(struct btrfs_delayed_ref_root *delayed_refs) | 198 | static inline u64 inc_delayed_seq(struct btrfs_delayed_ref_root *delayed_refs) |
204 | { | 199 | { |
205 | assert_spin_locked(&delayed_refs->lock); | 200 | assert_spin_locked(&delayed_refs->lock); |
@@ -230,25 +225,6 @@ int btrfs_check_delayed_seq(struct btrfs_delayed_ref_root *delayed_refs, | |||
230 | u64 seq); | 225 | u64 seq); |
231 | 226 | ||
232 | /* | 227 | /* |
233 | * delayed refs with a ref_seq > 0 must be held back during backref walking. | ||
234 | * this only applies to items in one of the fs-trees. for_cow items never need | ||
235 | * to be held back, so they won't get a ref_seq number. | ||
236 | */ | ||
237 | static inline int need_ref_seq(int for_cow, u64 rootid) | ||
238 | { | ||
239 | if (for_cow) | ||
240 | return 0; | ||
241 | |||
242 | if (rootid == BTRFS_FS_TREE_OBJECTID) | ||
243 | return 1; | ||
244 | |||
245 | if ((s64)rootid >= (s64)BTRFS_FIRST_FREE_OBJECTID) | ||
246 | return 1; | ||
247 | |||
248 | return 0; | ||
249 | } | ||
250 | |||
251 | /* | ||
252 | * a node might live in a head or a regular ref, this lets you | 228 | * a node might live in a head or a regular ref, this lets you |
253 | * test for the proper type to use. | 229 | * test for the proper type to use. |
254 | */ | 230 | */ |
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index b0d49e21b0b1..b99d5127ba18 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c | |||
@@ -1252,7 +1252,7 @@ static struct btrfs_root *alloc_log_tree(struct btrfs_trans_handle *trans, | |||
1252 | 1252 | ||
1253 | leaf = btrfs_alloc_free_block(trans, root, root->leafsize, 0, | 1253 | leaf = btrfs_alloc_free_block(trans, root, root->leafsize, 0, |
1254 | BTRFS_TREE_LOG_OBJECTID, NULL, | 1254 | BTRFS_TREE_LOG_OBJECTID, NULL, |
1255 | 0, 0, 0, 0); | 1255 | 0, 0, 0); |
1256 | if (IS_ERR(leaf)) { | 1256 | if (IS_ERR(leaf)) { |
1257 | kfree(root); | 1257 | kfree(root); |
1258 | return ERR_CAST(leaf); | 1258 | return ERR_CAST(leaf); |
@@ -1914,11 +1914,14 @@ int open_ctree(struct super_block *sb, | |||
1914 | spin_lock_init(&fs_info->delayed_iput_lock); | 1914 | spin_lock_init(&fs_info->delayed_iput_lock); |
1915 | spin_lock_init(&fs_info->defrag_inodes_lock); | 1915 | spin_lock_init(&fs_info->defrag_inodes_lock); |
1916 | spin_lock_init(&fs_info->free_chunk_lock); | 1916 | spin_lock_init(&fs_info->free_chunk_lock); |
1917 | spin_lock_init(&fs_info->tree_mod_seq_lock); | ||
1918 | rwlock_init(&fs_info->tree_mod_log_lock); | ||
1917 | mutex_init(&fs_info->reloc_mutex); | 1919 | mutex_init(&fs_info->reloc_mutex); |
1918 | 1920 | ||
1919 | init_completion(&fs_info->kobj_unregister); | 1921 | init_completion(&fs_info->kobj_unregister); |
1920 | INIT_LIST_HEAD(&fs_info->dirty_cowonly_roots); | 1922 | INIT_LIST_HEAD(&fs_info->dirty_cowonly_roots); |
1921 | INIT_LIST_HEAD(&fs_info->space_info); | 1923 | INIT_LIST_HEAD(&fs_info->space_info); |
1924 | INIT_LIST_HEAD(&fs_info->tree_mod_seq_list); | ||
1922 | btrfs_mapping_init(&fs_info->mapping_tree); | 1925 | btrfs_mapping_init(&fs_info->mapping_tree); |
1923 | btrfs_init_block_rsv(&fs_info->global_block_rsv); | 1926 | btrfs_init_block_rsv(&fs_info->global_block_rsv); |
1924 | btrfs_init_block_rsv(&fs_info->delalloc_block_rsv); | 1927 | btrfs_init_block_rsv(&fs_info->delalloc_block_rsv); |
@@ -1931,12 +1934,14 @@ int open_ctree(struct super_block *sb, | |||
1931 | atomic_set(&fs_info->async_submit_draining, 0); | 1934 | atomic_set(&fs_info->async_submit_draining, 0); |
1932 | atomic_set(&fs_info->nr_async_bios, 0); | 1935 | atomic_set(&fs_info->nr_async_bios, 0); |
1933 | atomic_set(&fs_info->defrag_running, 0); | 1936 | atomic_set(&fs_info->defrag_running, 0); |
1937 | atomic_set(&fs_info->tree_mod_seq, 0); | ||
1934 | fs_info->sb = sb; | 1938 | fs_info->sb = sb; |
1935 | fs_info->max_inline = 8192 * 1024; | 1939 | fs_info->max_inline = 8192 * 1024; |
1936 | fs_info->metadata_ratio = 0; | 1940 | fs_info->metadata_ratio = 0; |
1937 | fs_info->defrag_inodes = RB_ROOT; | 1941 | fs_info->defrag_inodes = RB_ROOT; |
1938 | fs_info->trans_no_join = 0; | 1942 | fs_info->trans_no_join = 0; |
1939 | fs_info->free_chunk_space = 0; | 1943 | fs_info->free_chunk_space = 0; |
1944 | fs_info->tree_mod_log = RB_ROOT; | ||
1940 | 1945 | ||
1941 | /* readahead state */ | 1946 | /* readahead state */ |
1942 | INIT_RADIX_TREE(&fs_info->reada_tree, GFP_NOFS & ~__GFP_WAIT); | 1947 | INIT_RADIX_TREE(&fs_info->reada_tree, GFP_NOFS & ~__GFP_WAIT); |
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 1902726fa70a..4b5a1e1bdefb 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c | |||
@@ -5218,7 +5218,7 @@ out: | |||
5218 | void btrfs_free_tree_block(struct btrfs_trans_handle *trans, | 5218 | void btrfs_free_tree_block(struct btrfs_trans_handle *trans, |
5219 | struct btrfs_root *root, | 5219 | struct btrfs_root *root, |
5220 | struct extent_buffer *buf, | 5220 | struct extent_buffer *buf, |
5221 | u64 parent, int last_ref, int for_cow) | 5221 | u64 parent, int last_ref) |
5222 | { | 5222 | { |
5223 | struct btrfs_block_group_cache *cache = NULL; | 5223 | struct btrfs_block_group_cache *cache = NULL; |
5224 | int ret; | 5224 | int ret; |
@@ -5228,7 +5228,7 @@ void btrfs_free_tree_block(struct btrfs_trans_handle *trans, | |||
5228 | buf->start, buf->len, | 5228 | buf->start, buf->len, |
5229 | parent, root->root_key.objectid, | 5229 | parent, root->root_key.objectid, |
5230 | btrfs_header_level(buf), | 5230 | btrfs_header_level(buf), |
5231 | BTRFS_DROP_DELAYED_REF, NULL, for_cow); | 5231 | BTRFS_DROP_DELAYED_REF, NULL, 0); |
5232 | BUG_ON(ret); /* -ENOMEM */ | 5232 | BUG_ON(ret); /* -ENOMEM */ |
5233 | } | 5233 | } |
5234 | 5234 | ||
@@ -6250,7 +6250,7 @@ struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, | |||
6250 | struct btrfs_root *root, u32 blocksize, | 6250 | struct btrfs_root *root, u32 blocksize, |
6251 | u64 parent, u64 root_objectid, | 6251 | u64 parent, u64 root_objectid, |
6252 | struct btrfs_disk_key *key, int level, | 6252 | struct btrfs_disk_key *key, int level, |
6253 | u64 hint, u64 empty_size, int for_cow) | 6253 | u64 hint, u64 empty_size) |
6254 | { | 6254 | { |
6255 | struct btrfs_key ins; | 6255 | struct btrfs_key ins; |
6256 | struct btrfs_block_rsv *block_rsv; | 6256 | struct btrfs_block_rsv *block_rsv; |
@@ -6298,7 +6298,7 @@ struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, | |||
6298 | ins.objectid, | 6298 | ins.objectid, |
6299 | ins.offset, parent, root_objectid, | 6299 | ins.offset, parent, root_objectid, |
6300 | level, BTRFS_ADD_DELAYED_EXTENT, | 6300 | level, BTRFS_ADD_DELAYED_EXTENT, |
6301 | extent_op, for_cow); | 6301 | extent_op, 0); |
6302 | BUG_ON(ret); /* -ENOMEM */ | 6302 | BUG_ON(ret); /* -ENOMEM */ |
6303 | } | 6303 | } |
6304 | return buf; | 6304 | return buf; |
@@ -6716,7 +6716,7 @@ static noinline int walk_up_proc(struct btrfs_trans_handle *trans, | |||
6716 | btrfs_header_owner(path->nodes[level + 1])); | 6716 | btrfs_header_owner(path->nodes[level + 1])); |
6717 | } | 6717 | } |
6718 | 6718 | ||
6719 | btrfs_free_tree_block(trans, root, eb, parent, wc->refs[level] == 1, 0); | 6719 | btrfs_free_tree_block(trans, root, eb, parent, wc->refs[level] == 1); |
6720 | out: | 6720 | out: |
6721 | wc->refs[level] = 0; | 6721 | wc->refs[level] = 0; |
6722 | wc->flags[level] = 0; | 6722 | wc->flags[level] = 0; |
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index b3692c1373aa..2c8f7b204617 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c | |||
@@ -3924,6 +3924,7 @@ static struct extent_buffer *__alloc_extent_buffer(struct extent_io_tree *tree, | |||
3924 | eb->start = start; | 3924 | eb->start = start; |
3925 | eb->len = len; | 3925 | eb->len = len; |
3926 | eb->tree = tree; | 3926 | eb->tree = tree; |
3927 | eb->bflags = 0; | ||
3927 | rwlock_init(&eb->lock); | 3928 | rwlock_init(&eb->lock); |
3928 | atomic_set(&eb->write_locks, 0); | 3929 | atomic_set(&eb->write_locks, 0); |
3929 | atomic_set(&eb->read_locks, 0); | 3930 | atomic_set(&eb->read_locks, 0); |
@@ -3961,6 +3962,60 @@ static struct extent_buffer *__alloc_extent_buffer(struct extent_io_tree *tree, | |||
3961 | return eb; | 3962 | return eb; |
3962 | } | 3963 | } |
3963 | 3964 | ||
3965 | struct extent_buffer *btrfs_clone_extent_buffer(struct extent_buffer *src) | ||
3966 | { | ||
3967 | unsigned long i; | ||
3968 | struct page *p; | ||
3969 | struct extent_buffer *new; | ||
3970 | unsigned long num_pages = num_extent_pages(src->start, src->len); | ||
3971 | |||
3972 | new = __alloc_extent_buffer(NULL, src->start, src->len, GFP_ATOMIC); | ||
3973 | if (new == NULL) | ||
3974 | return NULL; | ||
3975 | |||
3976 | for (i = 0; i < num_pages; i++) { | ||
3977 | p = alloc_page(GFP_ATOMIC); | ||
3978 | BUG_ON(!p); | ||
3979 | attach_extent_buffer_page(new, p); | ||
3980 | WARN_ON(PageDirty(p)); | ||
3981 | SetPageUptodate(p); | ||
3982 | new->pages[i] = p; | ||
3983 | } | ||
3984 | |||
3985 | copy_extent_buffer(new, src, 0, 0, src->len); | ||
3986 | set_bit(EXTENT_BUFFER_UPTODATE, &new->bflags); | ||
3987 | set_bit(EXTENT_BUFFER_DUMMY, &new->bflags); | ||
3988 | |||
3989 | return new; | ||
3990 | } | ||
3991 | |||
3992 | struct extent_buffer *alloc_dummy_extent_buffer(u64 start, unsigned long len) | ||
3993 | { | ||
3994 | struct extent_buffer *eb; | ||
3995 | unsigned long num_pages = num_extent_pages(0, len); | ||
3996 | unsigned long i; | ||
3997 | |||
3998 | eb = __alloc_extent_buffer(NULL, start, len, GFP_ATOMIC); | ||
3999 | if (!eb) | ||
4000 | return NULL; | ||
4001 | |||
4002 | for (i = 0; i < num_pages; i++) { | ||
4003 | eb->pages[i] = alloc_page(GFP_ATOMIC); | ||
4004 | if (!eb->pages[i]) | ||
4005 | goto err; | ||
4006 | } | ||
4007 | set_extent_buffer_uptodate(eb); | ||
4008 | btrfs_set_header_nritems(eb, 0); | ||
4009 | set_bit(EXTENT_BUFFER_DUMMY, &eb->bflags); | ||
4010 | |||
4011 | return eb; | ||
4012 | err: | ||
4013 | for (i--; i > 0; i--) | ||
4014 | __free_page(eb->pages[i]); | ||
4015 | __free_extent_buffer(eb); | ||
4016 | return NULL; | ||
4017 | } | ||
4018 | |||
3964 | static int extent_buffer_under_io(struct extent_buffer *eb) | 4019 | static int extent_buffer_under_io(struct extent_buffer *eb) |
3965 | { | 4020 | { |
3966 | return (atomic_read(&eb->io_pages) || | 4021 | return (atomic_read(&eb->io_pages) || |
@@ -3977,6 +4032,7 @@ static void btrfs_release_extent_buffer_page(struct extent_buffer *eb, | |||
3977 | unsigned long index; | 4032 | unsigned long index; |
3978 | unsigned long num_pages; | 4033 | unsigned long num_pages; |
3979 | struct page *page; | 4034 | struct page *page; |
4035 | int mapped = !test_bit(EXTENT_BUFFER_DUMMY, &eb->bflags); | ||
3980 | 4036 | ||
3981 | BUG_ON(extent_buffer_under_io(eb)); | 4037 | BUG_ON(extent_buffer_under_io(eb)); |
3982 | 4038 | ||
@@ -3988,7 +4044,7 @@ static void btrfs_release_extent_buffer_page(struct extent_buffer *eb, | |||
3988 | do { | 4044 | do { |
3989 | index--; | 4045 | index--; |
3990 | page = extent_buffer_page(eb, index); | 4046 | page = extent_buffer_page(eb, index); |
3991 | if (page) { | 4047 | if (page && mapped) { |
3992 | spin_lock(&page->mapping->private_lock); | 4048 | spin_lock(&page->mapping->private_lock); |
3993 | /* | 4049 | /* |
3994 | * We do this since we'll remove the pages after we've | 4050 | * We do this since we'll remove the pages after we've |
@@ -4013,6 +4069,8 @@ static void btrfs_release_extent_buffer_page(struct extent_buffer *eb, | |||
4013 | } | 4069 | } |
4014 | spin_unlock(&page->mapping->private_lock); | 4070 | spin_unlock(&page->mapping->private_lock); |
4015 | 4071 | ||
4072 | } | ||
4073 | if (page) { | ||
4016 | /* One for when we alloced the page */ | 4074 | /* One for when we alloced the page */ |
4017 | page_cache_release(page); | 4075 | page_cache_release(page); |
4018 | } | 4076 | } |
@@ -4231,14 +4289,18 @@ static void release_extent_buffer(struct extent_buffer *eb, gfp_t mask) | |||
4231 | { | 4289 | { |
4232 | WARN_ON(atomic_read(&eb->refs) == 0); | 4290 | WARN_ON(atomic_read(&eb->refs) == 0); |
4233 | if (atomic_dec_and_test(&eb->refs)) { | 4291 | if (atomic_dec_and_test(&eb->refs)) { |
4234 | struct extent_io_tree *tree = eb->tree; | 4292 | if (test_bit(EXTENT_BUFFER_DUMMY, &eb->bflags)) { |
4293 | spin_unlock(&eb->refs_lock); | ||
4294 | } else { | ||
4295 | struct extent_io_tree *tree = eb->tree; | ||
4235 | 4296 | ||
4236 | spin_unlock(&eb->refs_lock); | 4297 | spin_unlock(&eb->refs_lock); |
4237 | 4298 | ||
4238 | spin_lock(&tree->buffer_lock); | 4299 | spin_lock(&tree->buffer_lock); |
4239 | radix_tree_delete(&tree->buffer, | 4300 | radix_tree_delete(&tree->buffer, |
4240 | eb->start >> PAGE_CACHE_SHIFT); | 4301 | eb->start >> PAGE_CACHE_SHIFT); |
4241 | spin_unlock(&tree->buffer_lock); | 4302 | spin_unlock(&tree->buffer_lock); |
4303 | } | ||
4242 | 4304 | ||
4243 | /* Should be safe to release our pages at this point */ | 4305 | /* Should be safe to release our pages at this point */ |
4244 | btrfs_release_extent_buffer_page(eb, 0); | 4306 | btrfs_release_extent_buffer_page(eb, 0); |
@@ -4256,6 +4318,10 @@ void free_extent_buffer(struct extent_buffer *eb) | |||
4256 | 4318 | ||
4257 | spin_lock(&eb->refs_lock); | 4319 | spin_lock(&eb->refs_lock); |
4258 | if (atomic_read(&eb->refs) == 2 && | 4320 | if (atomic_read(&eb->refs) == 2 && |
4321 | test_bit(EXTENT_BUFFER_DUMMY, &eb->bflags)) | ||
4322 | atomic_dec(&eb->refs); | ||
4323 | |||
4324 | if (atomic_read(&eb->refs) == 2 && | ||
4259 | test_bit(EXTENT_BUFFER_STALE, &eb->bflags) && | 4325 | test_bit(EXTENT_BUFFER_STALE, &eb->bflags) && |
4260 | !extent_buffer_under_io(eb) && | 4326 | !extent_buffer_under_io(eb) && |
4261 | test_and_clear_bit(EXTENT_BUFFER_TREE_REF, &eb->bflags)) | 4327 | test_and_clear_bit(EXTENT_BUFFER_TREE_REF, &eb->bflags)) |
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h index 4d8124b64577..25900af5b15d 100644 --- a/fs/btrfs/extent_io.h +++ b/fs/btrfs/extent_io.h | |||
@@ -39,6 +39,7 @@ | |||
39 | #define EXTENT_BUFFER_STALE 6 | 39 | #define EXTENT_BUFFER_STALE 6 |
40 | #define EXTENT_BUFFER_WRITEBACK 7 | 40 | #define EXTENT_BUFFER_WRITEBACK 7 |
41 | #define EXTENT_BUFFER_IOERR 8 | 41 | #define EXTENT_BUFFER_IOERR 8 |
42 | #define EXTENT_BUFFER_DUMMY 9 | ||
42 | 43 | ||
43 | /* these are flags for extent_clear_unlock_delalloc */ | 44 | /* these are flags for extent_clear_unlock_delalloc */ |
44 | #define EXTENT_CLEAR_UNLOCK_PAGE 0x1 | 45 | #define EXTENT_CLEAR_UNLOCK_PAGE 0x1 |
@@ -264,6 +265,8 @@ void set_page_extent_mapped(struct page *page); | |||
264 | 265 | ||
265 | struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree, | 266 | struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree, |
266 | u64 start, unsigned long len); | 267 | u64 start, unsigned long len); |
268 | struct extent_buffer *alloc_dummy_extent_buffer(u64 start, unsigned long len); | ||
269 | struct extent_buffer *btrfs_clone_extent_buffer(struct extent_buffer *src); | ||
267 | struct extent_buffer *find_extent_buffer(struct extent_io_tree *tree, | 270 | struct extent_buffer *find_extent_buffer(struct extent_io_tree *tree, |
268 | u64 start, unsigned long len); | 271 | u64 start, unsigned long len); |
269 | void free_extent_buffer(struct extent_buffer *eb); | 272 | void free_extent_buffer(struct extent_buffer *eb); |
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 0f8c354c4c76..24b776c08d99 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c | |||
@@ -368,7 +368,7 @@ static noinline int create_subvol(struct btrfs_root *root, | |||
368 | return PTR_ERR(trans); | 368 | return PTR_ERR(trans); |
369 | 369 | ||
370 | leaf = btrfs_alloc_free_block(trans, root, root->leafsize, | 370 | leaf = btrfs_alloc_free_block(trans, root, root->leafsize, |
371 | 0, objectid, NULL, 0, 0, 0, 0); | 371 | 0, objectid, NULL, 0, 0, 0); |
372 | if (IS_ERR(leaf)) { | 372 | if (IS_ERR(leaf)) { |
373 | ret = PTR_ERR(leaf); | 373 | ret = PTR_ERR(leaf); |
374 | goto fail; | 374 | goto fail; |
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 82b03afcbd92..1791c6e3d834 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c | |||
@@ -56,48 +56,49 @@ static noinline void switch_commit_root(struct btrfs_root *root) | |||
56 | static noinline int join_transaction(struct btrfs_root *root, int nofail) | 56 | static noinline int join_transaction(struct btrfs_root *root, int nofail) |
57 | { | 57 | { |
58 | struct btrfs_transaction *cur_trans; | 58 | struct btrfs_transaction *cur_trans; |
59 | struct btrfs_fs_info *fs_info = root->fs_info; | ||
59 | 60 | ||
60 | spin_lock(&root->fs_info->trans_lock); | 61 | spin_lock(&fs_info->trans_lock); |
61 | loop: | 62 | loop: |
62 | /* The file system has been taken offline. No new transactions. */ | 63 | /* The file system has been taken offline. No new transactions. */ |
63 | if (root->fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) { | 64 | if (fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) { |
64 | spin_unlock(&root->fs_info->trans_lock); | 65 | spin_unlock(&fs_info->trans_lock); |
65 | return -EROFS; | 66 | return -EROFS; |
66 | } | 67 | } |
67 | 68 | ||
68 | if (root->fs_info->trans_no_join) { | 69 | if (fs_info->trans_no_join) { |
69 | if (!nofail) { | 70 | if (!nofail) { |
70 | spin_unlock(&root->fs_info->trans_lock); | 71 | spin_unlock(&fs_info->trans_lock); |
71 | return -EBUSY; | 72 | return -EBUSY; |
72 | } | 73 | } |
73 | } | 74 | } |
74 | 75 | ||
75 | cur_trans = root->fs_info->running_transaction; | 76 | cur_trans = fs_info->running_transaction; |
76 | if (cur_trans) { | 77 | if (cur_trans) { |
77 | if (cur_trans->aborted) { | 78 | if (cur_trans->aborted) { |
78 | spin_unlock(&root->fs_info->trans_lock); | 79 | spin_unlock(&fs_info->trans_lock); |
79 | return cur_trans->aborted; | 80 | return cur_trans->aborted; |
80 | } | 81 | } |
81 | atomic_inc(&cur_trans->use_count); | 82 | atomic_inc(&cur_trans->use_count); |
82 | atomic_inc(&cur_trans->num_writers); | 83 | atomic_inc(&cur_trans->num_writers); |
83 | cur_trans->num_joined++; | 84 | cur_trans->num_joined++; |
84 | spin_unlock(&root->fs_info->trans_lock); | 85 | spin_unlock(&fs_info->trans_lock); |
85 | return 0; | 86 | return 0; |
86 | } | 87 | } |
87 | spin_unlock(&root->fs_info->trans_lock); | 88 | spin_unlock(&fs_info->trans_lock); |
88 | 89 | ||
89 | cur_trans = kmem_cache_alloc(btrfs_transaction_cachep, GFP_NOFS); | 90 | cur_trans = kmem_cache_alloc(btrfs_transaction_cachep, GFP_NOFS); |
90 | if (!cur_trans) | 91 | if (!cur_trans) |
91 | return -ENOMEM; | 92 | return -ENOMEM; |
92 | 93 | ||
93 | spin_lock(&root->fs_info->trans_lock); | 94 | spin_lock(&fs_info->trans_lock); |
94 | if (root->fs_info->running_transaction) { | 95 | if (fs_info->running_transaction) { |
95 | /* | 96 | /* |
96 | * someone started a transaction after we unlocked. Make sure | 97 | * someone started a transaction after we unlocked. Make sure |
97 | * to redo the trans_no_join checks above | 98 | * to redo the trans_no_join checks above |
98 | */ | 99 | */ |
99 | kmem_cache_free(btrfs_transaction_cachep, cur_trans); | 100 | kmem_cache_free(btrfs_transaction_cachep, cur_trans); |
100 | cur_trans = root->fs_info->running_transaction; | 101 | cur_trans = fs_info->running_transaction; |
101 | goto loop; | 102 | goto loop; |
102 | } | 103 | } |
103 | 104 | ||
@@ -122,20 +123,38 @@ loop: | |||
122 | cur_trans->delayed_refs.flushing = 0; | 123 | cur_trans->delayed_refs.flushing = 0; |
123 | cur_trans->delayed_refs.run_delayed_start = 0; | 124 | cur_trans->delayed_refs.run_delayed_start = 0; |
124 | cur_trans->delayed_refs.seq = 1; | 125 | cur_trans->delayed_refs.seq = 1; |
126 | |||
127 | /* | ||
128 | * although the tree mod log is per file system and not per transaction, | ||
129 | * the log must never go across transaction boundaries. | ||
130 | */ | ||
131 | smp_mb(); | ||
132 | if (!list_empty(&fs_info->tree_mod_seq_list)) { | ||
133 | printk(KERN_ERR "btrfs: tree_mod_seq_list not empty when " | ||
134 | "creating a fresh transaction\n"); | ||
135 | WARN_ON(1); | ||
136 | } | ||
137 | if (!RB_EMPTY_ROOT(&fs_info->tree_mod_log)) { | ||
138 | printk(KERN_ERR "btrfs: tree_mod_log rb tree not empty when " | ||
139 | "creating a fresh transaction\n"); | ||
140 | WARN_ON(1); | ||
141 | } | ||
142 | atomic_set(&fs_info->tree_mod_seq, 0); | ||
143 | |||
125 | init_waitqueue_head(&cur_trans->delayed_refs.seq_wait); | 144 | init_waitqueue_head(&cur_trans->delayed_refs.seq_wait); |
126 | spin_lock_init(&cur_trans->commit_lock); | 145 | spin_lock_init(&cur_trans->commit_lock); |
127 | spin_lock_init(&cur_trans->delayed_refs.lock); | 146 | spin_lock_init(&cur_trans->delayed_refs.lock); |
128 | INIT_LIST_HEAD(&cur_trans->delayed_refs.seq_head); | 147 | INIT_LIST_HEAD(&cur_trans->delayed_refs.seq_head); |
129 | 148 | ||
130 | INIT_LIST_HEAD(&cur_trans->pending_snapshots); | 149 | INIT_LIST_HEAD(&cur_trans->pending_snapshots); |
131 | list_add_tail(&cur_trans->list, &root->fs_info->trans_list); | 150 | list_add_tail(&cur_trans->list, &fs_info->trans_list); |
132 | extent_io_tree_init(&cur_trans->dirty_pages, | 151 | extent_io_tree_init(&cur_trans->dirty_pages, |
133 | root->fs_info->btree_inode->i_mapping); | 152 | fs_info->btree_inode->i_mapping); |
134 | root->fs_info->generation++; | 153 | fs_info->generation++; |
135 | cur_trans->transid = root->fs_info->generation; | 154 | cur_trans->transid = fs_info->generation; |
136 | root->fs_info->running_transaction = cur_trans; | 155 | fs_info->running_transaction = cur_trans; |
137 | cur_trans->aborted = 0; | 156 | cur_trans->aborted = 0; |
138 | spin_unlock(&root->fs_info->trans_lock); | 157 | spin_unlock(&fs_info->trans_lock); |
139 | 158 | ||
140 | return 0; | 159 | return 0; |
141 | } | 160 | } |
diff --git a/fs/btrfs/ulist.c b/fs/btrfs/ulist.c index ad993bc2df93..ab942f46b3dd 100644 --- a/fs/btrfs/ulist.c +++ b/fs/btrfs/ulist.c | |||
@@ -23,9 +23,9 @@ | |||
23 | * | 23 | * |
24 | * ulist = ulist_alloc(); | 24 | * ulist = ulist_alloc(); |
25 | * ulist_add(ulist, root); | 25 | * ulist_add(ulist, root); |
26 | * elem = NULL; | 26 | * ULIST_ITER_INIT(&uiter); |
27 | * | 27 | * |
28 | * while ((elem = ulist_next(ulist, elem)) { | 28 | * while ((elem = ulist_next(ulist, &uiter)) { |
29 | * for (all child nodes n in elem) | 29 | * for (all child nodes n in elem) |
30 | * ulist_add(ulist, n); | 30 | * ulist_add(ulist, n); |
31 | * do something useful with the node; | 31 | * do something useful with the node; |
@@ -146,11 +146,20 @@ EXPORT_SYMBOL(ulist_free); | |||
146 | int ulist_add(struct ulist *ulist, u64 val, unsigned long aux, | 146 | int ulist_add(struct ulist *ulist, u64 val, unsigned long aux, |
147 | gfp_t gfp_mask) | 147 | gfp_t gfp_mask) |
148 | { | 148 | { |
149 | return ulist_add_merge(ulist, val, aux, NULL, gfp_mask); | ||
150 | } | ||
151 | |||
152 | int ulist_add_merge(struct ulist *ulist, u64 val, unsigned long aux, | ||
153 | unsigned long *old_aux, gfp_t gfp_mask) | ||
154 | { | ||
149 | int i; | 155 | int i; |
150 | 156 | ||
151 | for (i = 0; i < ulist->nnodes; ++i) { | 157 | for (i = 0; i < ulist->nnodes; ++i) { |
152 | if (ulist->nodes[i].val == val) | 158 | if (ulist->nodes[i].val == val) { |
159 | if (old_aux) | ||
160 | *old_aux = ulist->nodes[i].aux; | ||
153 | return 0; | 161 | return 0; |
162 | } | ||
154 | } | 163 | } |
155 | 164 | ||
156 | if (ulist->nnodes >= ulist->nodes_alloced) { | 165 | if (ulist->nnodes >= ulist->nodes_alloced) { |
@@ -188,33 +197,26 @@ EXPORT_SYMBOL(ulist_add); | |||
188 | /** | 197 | /** |
189 | * ulist_next - iterate ulist | 198 | * ulist_next - iterate ulist |
190 | * @ulist: ulist to iterate | 199 | * @ulist: ulist to iterate |
191 | * @prev: previously returned element or %NULL to start iteration | 200 | * @uiter: iterator variable, initialized with ULIST_ITER_INIT(&iterator) |
192 | * | 201 | * |
193 | * Note: locking must be provided by the caller. In case of rwlocks only read | 202 | * Note: locking must be provided by the caller. In case of rwlocks only read |
194 | * locking is needed | 203 | * locking is needed |
195 | * | 204 | * |
196 | * This function is used to iterate an ulist. The iteration is started with | 205 | * This function is used to iterate an ulist. |
197 | * @prev = %NULL. It returns the next element from the ulist or %NULL when the | 206 | * It returns the next element from the ulist or %NULL when the |
198 | * end is reached. No guarantee is made with respect to the order in which | 207 | * end is reached. No guarantee is made with respect to the order in which |
199 | * the elements are returned. They might neither be returned in order of | 208 | * the elements are returned. They might neither be returned in order of |
200 | * addition nor in ascending order. | 209 | * addition nor in ascending order. |
201 | * It is allowed to call ulist_add during an enumeration. Newly added items | 210 | * It is allowed to call ulist_add during an enumeration. Newly added items |
202 | * are guaranteed to show up in the running enumeration. | 211 | * are guaranteed to show up in the running enumeration. |
203 | */ | 212 | */ |
204 | struct ulist_node *ulist_next(struct ulist *ulist, struct ulist_node *prev) | 213 | struct ulist_node *ulist_next(struct ulist *ulist, struct ulist_iterator *uiter) |
205 | { | 214 | { |
206 | int next; | ||
207 | |||
208 | if (ulist->nnodes == 0) | 215 | if (ulist->nnodes == 0) |
209 | return NULL; | 216 | return NULL; |
210 | 217 | if (uiter->i < 0 || uiter->i >= ulist->nnodes) | |
211 | if (!prev) | ||
212 | return &ulist->nodes[0]; | ||
213 | |||
214 | next = (prev - ulist->nodes) + 1; | ||
215 | if (next < 0 || next >= ulist->nnodes) | ||
216 | return NULL; | 218 | return NULL; |
217 | 219 | ||
218 | return &ulist->nodes[next]; | 220 | return &ulist->nodes[uiter->i++]; |
219 | } | 221 | } |
220 | EXPORT_SYMBOL(ulist_next); | 222 | EXPORT_SYMBOL(ulist_next); |
diff --git a/fs/btrfs/ulist.h b/fs/btrfs/ulist.h index 6568c3527732..21bdc8ec8130 100644 --- a/fs/btrfs/ulist.h +++ b/fs/btrfs/ulist.h | |||
@@ -24,6 +24,10 @@ | |||
24 | */ | 24 | */ |
25 | #define ULIST_SIZE 16 | 25 | #define ULIST_SIZE 16 |
26 | 26 | ||
27 | struct ulist_iterator { | ||
28 | int i; | ||
29 | }; | ||
30 | |||
27 | /* | 31 | /* |
28 | * element of the list | 32 | * element of the list |
29 | */ | 33 | */ |
@@ -61,7 +65,13 @@ void ulist_fini(struct ulist *ulist); | |||
61 | void ulist_reinit(struct ulist *ulist); | 65 | void ulist_reinit(struct ulist *ulist); |
62 | struct ulist *ulist_alloc(gfp_t gfp_mask); | 66 | struct ulist *ulist_alloc(gfp_t gfp_mask); |
63 | void ulist_free(struct ulist *ulist); | 67 | void ulist_free(struct ulist *ulist); |
64 | int ulist_add(struct ulist *ulist, u64 val, unsigned long aux, gfp_t gfp_mask); | 68 | int ulist_add(struct ulist *ulist, u64 val, unsigned long aux, |
65 | struct ulist_node *ulist_next(struct ulist *ulist, struct ulist_node *prev); | 69 | gfp_t gfp_mask); |
70 | int ulist_add_merge(struct ulist *ulist, u64 val, unsigned long aux, | ||
71 | unsigned long *old_aux, gfp_t gfp_mask); | ||
72 | struct ulist_node *ulist_next(struct ulist *ulist, | ||
73 | struct ulist_iterator *uiter); | ||
74 | |||
75 | #define ULIST_ITER_INIT(uiter) ((uiter)->i = 0) | ||
66 | 76 | ||
67 | #endif | 77 | #endif |