aboutsummaryrefslogtreecommitdiffstats
path: root/fs/btrfs/delayed-inode.c
diff options
context:
space:
mode:
authorMiao Xie <miaox@cn.fujitsu.com>2011-04-22 06:12:22 -0400
committerChris Mason <chris.mason@oracle.com>2011-05-21 09:30:56 -0400
commit16cdcec736cd214350cdb591bf1091f8beedefa0 (patch)
tree5598d4561660c4d7a1d4de8b3703d6dd3cc7f9e7 /fs/btrfs/delayed-inode.c
parent61c4f2c81c61f73549928dfd9f3e8f26aa36a8cf (diff)
btrfs: implement delayed inode items operation
Changelog V5 -> V6: - Fix oom when the memory load is high, by storing the delayed nodes into the root's radix tree, and letting btrfs inodes go. Changelog V4 -> V5: - Fix the race on adding the delayed node to the inode, which is spotted by Chris Mason. - Merge Chris Mason's incremental patch into this patch. - Fix deadlock between readdir() and memory fault, which is reported by Itaru Kitayama. Changelog V3 -> V4: - Fix nested lock, which is reported by Itaru Kitayama, by updating space cache inode in time. Changelog V2 -> V3: - Fix the race between the delayed worker and the task which does delayed items balance, which is reported by Tsutomu Itoh. - Modify the patch address David Sterba's comment. - Fix the bug of the cpu recursion spinlock, reported by Chris Mason Changelog V1 -> V2: - break up the global rb-tree, use a list to manage the delayed nodes, which is created for every directory and file, and used to manage the delayed directory name index items and the delayed inode item. - introduce a worker to deal with the delayed nodes. Compare with Ext3/4, the performance of file creation and deletion on btrfs is very poor. the reason is that btrfs must do a lot of b+ tree insertions, such as inode item, directory name item, directory name index and so on. If we can do some delayed b+ tree insertion or deletion, we can improve the performance, so we made this patch which implemented delayed directory name index insertion/deletion and delayed inode update. Implementation: - introduce a delayed root object into the filesystem, that use two lists to manage the delayed nodes which are created for every file/directory. One is used to manage all the delayed nodes that have delayed items. And the other is used to manage the delayed nodes which is waiting to be dealt with by the work thread. - Every delayed node has two rb-tree, one is used to manage the directory name index which is going to be inserted into b+ tree, and the other is used to manage the directory name index which is going to be deleted from b+ tree. - introduce a worker to deal with the delayed operation. This worker is used to deal with the works of the delayed directory name index items insertion and deletion and the delayed inode update. When the delayed items is beyond the lower limit, we create works for some delayed nodes and insert them into the work queue of the worker, and then go back. When the delayed items is beyond the upper bound, we create works for all the delayed nodes that haven't been dealt with, and insert them into the work queue of the worker, and then wait for that the untreated items is below some threshold value. - When we want to insert a directory name index into b+ tree, we just add the information into the delayed inserting rb-tree. And then we check the number of the delayed items and do delayed items balance. (The balance policy is above.) - When we want to delete a directory name index from the b+ tree, we search it in the inserting rb-tree at first. If we look it up, just drop it. If not, add the key of it into the delayed deleting rb-tree. Similar to the delayed inserting rb-tree, we also check the number of the delayed items and do delayed items balance. (The same to inserting manipulation) - When we want to update the metadata of some inode, we cached the data of the inode into the delayed node. the worker will flush it into the b+ tree after dealing with the delayed insertion and deletion. - We will move the delayed node to the tail of the list after we access the delayed node, By this way, we can cache more delayed items and merge more inode updates. - If we want to commit transaction, we will deal with all the delayed node. - the delayed node will be freed when we free the btrfs inode. - Before we log the inode items, we commit all the directory name index items and the delayed inode update. I did a quick test by the benchmark tool[1] and found we can improve the performance of file creation by ~15%, and file deletion by ~20%. Before applying this patch: Create files: Total files: 50000 Total time: 1.096108 Average time: 0.000022 Delete files: Total files: 50000 Total time: 1.510403 Average time: 0.000030 After applying this patch: Create files: Total files: 50000 Total time: 0.932899 Average time: 0.000019 Delete files: Total files: 50000 Total time: 1.215732 Average time: 0.000024 [1] http://marc.info/?l=linux-btrfs&m=128212635122920&q=p3 Many thanks for Kitayama-san's help! Signed-off-by: Miao Xie <miaox@cn.fujitsu.com> Reviewed-by: David Sterba <dave@jikos.cz> Tested-by: Tsutomu Itoh <t-itoh@jp.fujitsu.com> Tested-by: Itaru Kitayama <kitayama@cl.bb4u.ne.jp> Signed-off-by: Chris Mason <chris.mason@oracle.com>
Diffstat (limited to 'fs/btrfs/delayed-inode.c')
-rw-r--r--fs/btrfs/delayed-inode.c1694
1 files changed, 1694 insertions, 0 deletions
diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c
new file mode 100644
index 000000000000..95485318f001
--- /dev/null
+++ b/fs/btrfs/delayed-inode.c
@@ -0,0 +1,1694 @@
1/*
2 * Copyright (C) 2011 Fujitsu. All rights reserved.
3 * Written by Miao Xie <miaox@cn.fujitsu.com>
4 *
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public
7 * License v2 as published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public
15 * License along with this program; if not, write to the
16 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
17 * Boston, MA 021110-1307, USA.
18 */
19
20#include <linux/slab.h>
21#include "delayed-inode.h"
22#include "disk-io.h"
23#include "transaction.h"
24
25#define BTRFS_DELAYED_WRITEBACK 400
26#define BTRFS_DELAYED_BACKGROUND 100
27
28static struct kmem_cache *delayed_node_cache;
29
30int __init btrfs_delayed_inode_init(void)
31{
32 delayed_node_cache = kmem_cache_create("delayed_node",
33 sizeof(struct btrfs_delayed_node),
34 0,
35 SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD,
36 NULL);
37 if (!delayed_node_cache)
38 return -ENOMEM;
39 return 0;
40}
41
42void btrfs_delayed_inode_exit(void)
43{
44 if (delayed_node_cache)
45 kmem_cache_destroy(delayed_node_cache);
46}
47
48static inline void btrfs_init_delayed_node(
49 struct btrfs_delayed_node *delayed_node,
50 struct btrfs_root *root, u64 inode_id)
51{
52 delayed_node->root = root;
53 delayed_node->inode_id = inode_id;
54 atomic_set(&delayed_node->refs, 0);
55 delayed_node->count = 0;
56 delayed_node->in_list = 0;
57 delayed_node->inode_dirty = 0;
58 delayed_node->ins_root = RB_ROOT;
59 delayed_node->del_root = RB_ROOT;
60 mutex_init(&delayed_node->mutex);
61 delayed_node->index_cnt = 0;
62 INIT_LIST_HEAD(&delayed_node->n_list);
63 INIT_LIST_HEAD(&delayed_node->p_list);
64 delayed_node->bytes_reserved = 0;
65}
66
67static inline int btrfs_is_continuous_delayed_item(
68 struct btrfs_delayed_item *item1,
69 struct btrfs_delayed_item *item2)
70{
71 if (item1->key.type == BTRFS_DIR_INDEX_KEY &&
72 item1->key.objectid == item2->key.objectid &&
73 item1->key.type == item2->key.type &&
74 item1->key.offset + 1 == item2->key.offset)
75 return 1;
76 return 0;
77}
78
79static inline struct btrfs_delayed_root *btrfs_get_delayed_root(
80 struct btrfs_root *root)
81{
82 return root->fs_info->delayed_root;
83}
84
85static struct btrfs_delayed_node *btrfs_get_or_create_delayed_node(
86 struct inode *inode)
87{
88 struct btrfs_delayed_node *node;
89 struct btrfs_inode *btrfs_inode = BTRFS_I(inode);
90 struct btrfs_root *root = btrfs_inode->root;
91 int ret;
92
93again:
94 node = ACCESS_ONCE(btrfs_inode->delayed_node);
95 if (node) {
96 atomic_inc(&node->refs); /* can be accessed */
97 return node;
98 }
99
100 spin_lock(&root->inode_lock);
101 node = radix_tree_lookup(&root->delayed_nodes_tree, inode->i_ino);
102 if (node) {
103 if (btrfs_inode->delayed_node) {
104 spin_unlock(&root->inode_lock);
105 goto again;
106 }
107 btrfs_inode->delayed_node = node;
108 atomic_inc(&node->refs); /* can be accessed */
109 atomic_inc(&node->refs); /* cached in the inode */
110 spin_unlock(&root->inode_lock);
111 return node;
112 }
113 spin_unlock(&root->inode_lock);
114
115 node = kmem_cache_alloc(delayed_node_cache, GFP_NOFS);
116 if (!node)
117 return ERR_PTR(-ENOMEM);
118 btrfs_init_delayed_node(node, root, inode->i_ino);
119
120 atomic_inc(&node->refs); /* cached in the btrfs inode */
121 atomic_inc(&node->refs); /* can be accessed */
122
123 ret = radix_tree_preload(GFP_NOFS & ~__GFP_HIGHMEM);
124 if (ret) {
125 kmem_cache_free(delayed_node_cache, node);
126 return ERR_PTR(ret);
127 }
128
129 spin_lock(&root->inode_lock);
130 ret = radix_tree_insert(&root->delayed_nodes_tree, inode->i_ino, node);
131 if (ret == -EEXIST) {
132 kmem_cache_free(delayed_node_cache, node);
133 spin_unlock(&root->inode_lock);
134 radix_tree_preload_end();
135 goto again;
136 }
137 btrfs_inode->delayed_node = node;
138 spin_unlock(&root->inode_lock);
139 radix_tree_preload_end();
140
141 return node;
142}
143
144/*
145 * Call it when holding delayed_node->mutex
146 *
147 * If mod = 1, add this node into the prepared list.
148 */
149static void btrfs_queue_delayed_node(struct btrfs_delayed_root *root,
150 struct btrfs_delayed_node *node,
151 int mod)
152{
153 spin_lock(&root->lock);
154 if (node->in_list) {
155 if (!list_empty(&node->p_list))
156 list_move_tail(&node->p_list, &root->prepare_list);
157 else if (mod)
158 list_add_tail(&node->p_list, &root->prepare_list);
159 } else {
160 list_add_tail(&node->n_list, &root->node_list);
161 list_add_tail(&node->p_list, &root->prepare_list);
162 atomic_inc(&node->refs); /* inserted into list */
163 root->nodes++;
164 node->in_list = 1;
165 }
166 spin_unlock(&root->lock);
167}
168
169/* Call it when holding delayed_node->mutex */
170static void btrfs_dequeue_delayed_node(struct btrfs_delayed_root *root,
171 struct btrfs_delayed_node *node)
172{
173 spin_lock(&root->lock);
174 if (node->in_list) {
175 root->nodes--;
176 atomic_dec(&node->refs); /* not in the list */
177 list_del_init(&node->n_list);
178 if (!list_empty(&node->p_list))
179 list_del_init(&node->p_list);
180 node->in_list = 0;
181 }
182 spin_unlock(&root->lock);
183}
184
185struct btrfs_delayed_node *btrfs_first_delayed_node(
186 struct btrfs_delayed_root *delayed_root)
187{
188 struct list_head *p;
189 struct btrfs_delayed_node *node = NULL;
190
191 spin_lock(&delayed_root->lock);
192 if (list_empty(&delayed_root->node_list))
193 goto out;
194
195 p = delayed_root->node_list.next;
196 node = list_entry(p, struct btrfs_delayed_node, n_list);
197 atomic_inc(&node->refs);
198out:
199 spin_unlock(&delayed_root->lock);
200
201 return node;
202}
203
204struct btrfs_delayed_node *btrfs_next_delayed_node(
205 struct btrfs_delayed_node *node)
206{
207 struct btrfs_delayed_root *delayed_root;
208 struct list_head *p;
209 struct btrfs_delayed_node *next = NULL;
210
211 delayed_root = node->root->fs_info->delayed_root;
212 spin_lock(&delayed_root->lock);
213 if (!node->in_list) { /* not in the list */
214 if (list_empty(&delayed_root->node_list))
215 goto out;
216 p = delayed_root->node_list.next;
217 } else if (list_is_last(&node->n_list, &delayed_root->node_list))
218 goto out;
219 else
220 p = node->n_list.next;
221
222 next = list_entry(p, struct btrfs_delayed_node, n_list);
223 atomic_inc(&next->refs);
224out:
225 spin_unlock(&delayed_root->lock);
226
227 return next;
228}
229
230static void __btrfs_release_delayed_node(
231 struct btrfs_delayed_node *delayed_node,
232 int mod)
233{
234 struct btrfs_delayed_root *delayed_root;
235
236 if (!delayed_node)
237 return;
238
239 delayed_root = delayed_node->root->fs_info->delayed_root;
240
241 mutex_lock(&delayed_node->mutex);
242 if (delayed_node->count)
243 btrfs_queue_delayed_node(delayed_root, delayed_node, mod);
244 else
245 btrfs_dequeue_delayed_node(delayed_root, delayed_node);
246 mutex_unlock(&delayed_node->mutex);
247
248 if (atomic_dec_and_test(&delayed_node->refs)) {
249 struct btrfs_root *root = delayed_node->root;
250 spin_lock(&root->inode_lock);
251 if (atomic_read(&delayed_node->refs) == 0) {
252 radix_tree_delete(&root->delayed_nodes_tree,
253 delayed_node->inode_id);
254 kmem_cache_free(delayed_node_cache, delayed_node);
255 }
256 spin_unlock(&root->inode_lock);
257 }
258}
259
260static inline void btrfs_release_delayed_node(struct btrfs_delayed_node *node)
261{
262 __btrfs_release_delayed_node(node, 0);
263}
264
265struct btrfs_delayed_node *btrfs_first_prepared_delayed_node(
266 struct btrfs_delayed_root *delayed_root)
267{
268 struct list_head *p;
269 struct btrfs_delayed_node *node = NULL;
270
271 spin_lock(&delayed_root->lock);
272 if (list_empty(&delayed_root->prepare_list))
273 goto out;
274
275 p = delayed_root->prepare_list.next;
276 list_del_init(p);
277 node = list_entry(p, struct btrfs_delayed_node, p_list);
278 atomic_inc(&node->refs);
279out:
280 spin_unlock(&delayed_root->lock);
281
282 return node;
283}
284
285static inline void btrfs_release_prepared_delayed_node(
286 struct btrfs_delayed_node *node)
287{
288 __btrfs_release_delayed_node(node, 1);
289}
290
291struct btrfs_delayed_item *btrfs_alloc_delayed_item(u32 data_len)
292{
293 struct btrfs_delayed_item *item;
294 item = kmalloc(sizeof(*item) + data_len, GFP_NOFS);
295 if (item) {
296 item->data_len = data_len;
297 item->ins_or_del = 0;
298 item->bytes_reserved = 0;
299 item->block_rsv = NULL;
300 item->delayed_node = NULL;
301 atomic_set(&item->refs, 1);
302 }
303 return item;
304}
305
306/*
307 * __btrfs_lookup_delayed_item - look up the delayed item by key
308 * @delayed_node: pointer to the delayed node
309 * @key: the key to look up
310 * @prev: used to store the prev item if the right item isn't found
311 * @next: used to store the next item if the right item isn't found
312 *
313 * Note: if we don't find the right item, we will return the prev item and
314 * the next item.
315 */
316static struct btrfs_delayed_item *__btrfs_lookup_delayed_item(
317 struct rb_root *root,
318 struct btrfs_key *key,
319 struct btrfs_delayed_item **prev,
320 struct btrfs_delayed_item **next)
321{
322 struct rb_node *node, *prev_node = NULL;
323 struct btrfs_delayed_item *delayed_item = NULL;
324 int ret = 0;
325
326 node = root->rb_node;
327
328 while (node) {
329 delayed_item = rb_entry(node, struct btrfs_delayed_item,
330 rb_node);
331 prev_node = node;
332 ret = btrfs_comp_cpu_keys(&delayed_item->key, key);
333 if (ret < 0)
334 node = node->rb_right;
335 else if (ret > 0)
336 node = node->rb_left;
337 else
338 return delayed_item;
339 }
340
341 if (prev) {
342 if (!prev_node)
343 *prev = NULL;
344 else if (ret < 0)
345 *prev = delayed_item;
346 else if ((node = rb_prev(prev_node)) != NULL) {
347 *prev = rb_entry(node, struct btrfs_delayed_item,
348 rb_node);
349 } else
350 *prev = NULL;
351 }
352
353 if (next) {
354 if (!prev_node)
355 *next = NULL;
356 else if (ret > 0)
357 *next = delayed_item;
358 else if ((node = rb_next(prev_node)) != NULL) {
359 *next = rb_entry(node, struct btrfs_delayed_item,
360 rb_node);
361 } else
362 *next = NULL;
363 }
364 return NULL;
365}
366
367struct btrfs_delayed_item *__btrfs_lookup_delayed_insertion_item(
368 struct btrfs_delayed_node *delayed_node,
369 struct btrfs_key *key)
370{
371 struct btrfs_delayed_item *item;
372
373 item = __btrfs_lookup_delayed_item(&delayed_node->ins_root, key,
374 NULL, NULL);
375 return item;
376}
377
378struct btrfs_delayed_item *__btrfs_lookup_delayed_deletion_item(
379 struct btrfs_delayed_node *delayed_node,
380 struct btrfs_key *key)
381{
382 struct btrfs_delayed_item *item;
383
384 item = __btrfs_lookup_delayed_item(&delayed_node->del_root, key,
385 NULL, NULL);
386 return item;
387}
388
389struct btrfs_delayed_item *__btrfs_search_delayed_insertion_item(
390 struct btrfs_delayed_node *delayed_node,
391 struct btrfs_key *key)
392{
393 struct btrfs_delayed_item *item, *next;
394
395 item = __btrfs_lookup_delayed_item(&delayed_node->ins_root, key,
396 NULL, &next);
397 if (!item)
398 item = next;
399
400 return item;
401}
402
403struct btrfs_delayed_item *__btrfs_search_delayed_deletion_item(
404 struct btrfs_delayed_node *delayed_node,
405 struct btrfs_key *key)
406{
407 struct btrfs_delayed_item *item, *next;
408
409 item = __btrfs_lookup_delayed_item(&delayed_node->del_root, key,
410 NULL, &next);
411 if (!item)
412 item = next;
413
414 return item;
415}
416
417static int __btrfs_add_delayed_item(struct btrfs_delayed_node *delayed_node,
418 struct btrfs_delayed_item *ins,
419 int action)
420{
421 struct rb_node **p, *node;
422 struct rb_node *parent_node = NULL;
423 struct rb_root *root;
424 struct btrfs_delayed_item *item;
425 int cmp;
426
427 if (action == BTRFS_DELAYED_INSERTION_ITEM)
428 root = &delayed_node->ins_root;
429 else if (action == BTRFS_DELAYED_DELETION_ITEM)
430 root = &delayed_node->del_root;
431 else
432 BUG();
433 p = &root->rb_node;
434 node = &ins->rb_node;
435
436 while (*p) {
437 parent_node = *p;
438 item = rb_entry(parent_node, struct btrfs_delayed_item,
439 rb_node);
440
441 cmp = btrfs_comp_cpu_keys(&item->key, &ins->key);
442 if (cmp < 0)
443 p = &(*p)->rb_right;
444 else if (cmp > 0)
445 p = &(*p)->rb_left;
446 else
447 return -EEXIST;
448 }
449
450 rb_link_node(node, parent_node, p);
451 rb_insert_color(node, root);
452 ins->delayed_node = delayed_node;
453 ins->ins_or_del = action;
454
455 if (ins->key.type == BTRFS_DIR_INDEX_KEY &&
456 action == BTRFS_DELAYED_INSERTION_ITEM &&
457 ins->key.offset >= delayed_node->index_cnt)
458 delayed_node->index_cnt = ins->key.offset + 1;
459
460 delayed_node->count++;
461 atomic_inc(&delayed_node->root->fs_info->delayed_root->items);
462 return 0;
463}
464
465static int __btrfs_add_delayed_insertion_item(struct btrfs_delayed_node *node,
466 struct btrfs_delayed_item *item)
467{
468 return __btrfs_add_delayed_item(node, item,
469 BTRFS_DELAYED_INSERTION_ITEM);
470}
471
472static int __btrfs_add_delayed_deletion_item(struct btrfs_delayed_node *node,
473 struct btrfs_delayed_item *item)
474{
475 return __btrfs_add_delayed_item(node, item,
476 BTRFS_DELAYED_DELETION_ITEM);
477}
478
479static void __btrfs_remove_delayed_item(struct btrfs_delayed_item *delayed_item)
480{
481 struct rb_root *root;
482 struct btrfs_delayed_root *delayed_root;
483
484 delayed_root = delayed_item->delayed_node->root->fs_info->delayed_root;
485
486 BUG_ON(!delayed_root);
487 BUG_ON(delayed_item->ins_or_del != BTRFS_DELAYED_DELETION_ITEM &&
488 delayed_item->ins_or_del != BTRFS_DELAYED_INSERTION_ITEM);
489
490 if (delayed_item->ins_or_del == BTRFS_DELAYED_INSERTION_ITEM)
491 root = &delayed_item->delayed_node->ins_root;
492 else
493 root = &delayed_item->delayed_node->del_root;
494
495 rb_erase(&delayed_item->rb_node, root);
496 delayed_item->delayed_node->count--;
497 atomic_dec(&delayed_root->items);
498 if (atomic_read(&delayed_root->items) < BTRFS_DELAYED_BACKGROUND &&
499 waitqueue_active(&delayed_root->wait))
500 wake_up(&delayed_root->wait);
501}
502
503static void btrfs_release_delayed_item(struct btrfs_delayed_item *item)
504{
505 if (item) {
506 __btrfs_remove_delayed_item(item);
507 if (atomic_dec_and_test(&item->refs))
508 kfree(item);
509 }
510}
511
512struct btrfs_delayed_item *__btrfs_first_delayed_insertion_item(
513 struct btrfs_delayed_node *delayed_node)
514{
515 struct rb_node *p;
516 struct btrfs_delayed_item *item = NULL;
517
518 p = rb_first(&delayed_node->ins_root);
519 if (p)
520 item = rb_entry(p, struct btrfs_delayed_item, rb_node);
521
522 return item;
523}
524
525struct btrfs_delayed_item *__btrfs_first_delayed_deletion_item(
526 struct btrfs_delayed_node *delayed_node)
527{
528 struct rb_node *p;
529 struct btrfs_delayed_item *item = NULL;
530
531 p = rb_first(&delayed_node->del_root);
532 if (p)
533 item = rb_entry(p, struct btrfs_delayed_item, rb_node);
534
535 return item;
536}
537
538struct btrfs_delayed_item *__btrfs_next_delayed_item(
539 struct btrfs_delayed_item *item)
540{
541 struct rb_node *p;
542 struct btrfs_delayed_item *next = NULL;
543
544 p = rb_next(&item->rb_node);
545 if (p)
546 next = rb_entry(p, struct btrfs_delayed_item, rb_node);
547
548 return next;
549}
550
551static inline struct btrfs_delayed_node *btrfs_get_delayed_node(
552 struct inode *inode)
553{
554 struct btrfs_inode *btrfs_inode = BTRFS_I(inode);
555 struct btrfs_delayed_node *delayed_node;
556
557 delayed_node = btrfs_inode->delayed_node;
558 if (delayed_node)
559 atomic_inc(&delayed_node->refs);
560
561 return delayed_node;
562}
563
564static inline struct btrfs_root *btrfs_get_fs_root(struct btrfs_root *root,
565 u64 root_id)
566{
567 struct btrfs_key root_key;
568
569 if (root->objectid == root_id)
570 return root;
571
572 root_key.objectid = root_id;
573 root_key.type = BTRFS_ROOT_ITEM_KEY;
574 root_key.offset = (u64)-1;
575 return btrfs_read_fs_root_no_name(root->fs_info, &root_key);
576}
577
578static int btrfs_delayed_item_reserve_metadata(struct btrfs_trans_handle *trans,
579 struct btrfs_root *root,
580 struct btrfs_delayed_item *item)
581{
582 struct btrfs_block_rsv *src_rsv;
583 struct btrfs_block_rsv *dst_rsv;
584 u64 num_bytes;
585 int ret;
586
587 if (!trans->bytes_reserved)
588 return 0;
589
590 src_rsv = trans->block_rsv;
591 dst_rsv = &root->fs_info->global_block_rsv;
592
593 num_bytes = btrfs_calc_trans_metadata_size(root, 1);
594 ret = btrfs_block_rsv_migrate(src_rsv, dst_rsv, num_bytes);
595 if (!ret) {
596 item->bytes_reserved = num_bytes;
597 item->block_rsv = dst_rsv;
598 }
599
600 return ret;
601}
602
603static void btrfs_delayed_item_release_metadata(struct btrfs_root *root,
604 struct btrfs_delayed_item *item)
605{
606 if (!item->bytes_reserved)
607 return;
608
609 btrfs_block_rsv_release(root, item->block_rsv,
610 item->bytes_reserved);
611}
612
613static int btrfs_delayed_inode_reserve_metadata(
614 struct btrfs_trans_handle *trans,
615 struct btrfs_root *root,
616 struct btrfs_delayed_node *node)
617{
618 struct btrfs_block_rsv *src_rsv;
619 struct btrfs_block_rsv *dst_rsv;
620 u64 num_bytes;
621 int ret;
622
623 if (!trans->bytes_reserved)
624 return 0;
625
626 src_rsv = trans->block_rsv;
627 dst_rsv = &root->fs_info->global_block_rsv;
628
629 num_bytes = btrfs_calc_trans_metadata_size(root, 1);
630 ret = btrfs_block_rsv_migrate(src_rsv, dst_rsv, num_bytes);
631 if (!ret)
632 node->bytes_reserved = num_bytes;
633
634 return ret;
635}
636
637static void btrfs_delayed_inode_release_metadata(struct btrfs_root *root,
638 struct btrfs_delayed_node *node)
639{
640 struct btrfs_block_rsv *rsv;
641
642 if (!node->bytes_reserved)
643 return;
644
645 rsv = &root->fs_info->global_block_rsv;
646 btrfs_block_rsv_release(root, rsv,
647 node->bytes_reserved);
648 node->bytes_reserved = 0;
649}
650
651/*
652 * This helper will insert some continuous items into the same leaf according
653 * to the free space of the leaf.
654 */
655static int btrfs_batch_insert_items(struct btrfs_trans_handle *trans,
656 struct btrfs_root *root,
657 struct btrfs_path *path,
658 struct btrfs_delayed_item *item)
659{
660 struct btrfs_delayed_item *curr, *next;
661 int free_space;
662 int total_data_size = 0, total_size = 0;
663 struct extent_buffer *leaf;
664 char *data_ptr;
665 struct btrfs_key *keys;
666 u32 *data_size;
667 struct list_head head;
668 int slot;
669 int nitems;
670 int i;
671 int ret = 0;
672
673 BUG_ON(!path->nodes[0]);
674
675 leaf = path->nodes[0];
676 free_space = btrfs_leaf_free_space(root, leaf);
677 INIT_LIST_HEAD(&head);
678
679 next = item;
680
681 /*
682 * count the number of the continuous items that we can insert in batch
683 */
684 while (total_size + next->data_len + sizeof(struct btrfs_item) <=
685 free_space) {
686 total_data_size += next->data_len;
687 total_size += next->data_len + sizeof(struct btrfs_item);
688 list_add_tail(&next->tree_list, &head);
689 nitems++;
690
691 curr = next;
692 next = __btrfs_next_delayed_item(curr);
693 if (!next)
694 break;
695
696 if (!btrfs_is_continuous_delayed_item(curr, next))
697 break;
698 }
699
700 if (!nitems) {
701 ret = 0;
702 goto out;
703 }
704
705 /*
706 * we need allocate some memory space, but it might cause the task
707 * to sleep, so we set all locked nodes in the path to blocking locks
708 * first.
709 */
710 btrfs_set_path_blocking(path);
711
712 keys = kmalloc(sizeof(struct btrfs_key) * nitems, GFP_NOFS);
713 if (!keys) {
714 ret = -ENOMEM;
715 goto out;
716 }
717
718 data_size = kmalloc(sizeof(u32) * nitems, GFP_NOFS);
719 if (!data_size) {
720 ret = -ENOMEM;
721 goto error;
722 }
723
724 /* get keys of all the delayed items */
725 i = 0;
726 list_for_each_entry(next, &head, tree_list) {
727 keys[i] = next->key;
728 data_size[i] = next->data_len;
729 i++;
730 }
731
732 /* reset all the locked nodes in the patch to spinning locks. */
733 btrfs_clear_path_blocking(path, NULL);
734
735 /* insert the keys of the items */
736 ret = setup_items_for_insert(trans, root, path, keys, data_size,
737 total_data_size, total_size, nitems);
738 if (ret)
739 goto error;
740
741 /* insert the dir index items */
742 slot = path->slots[0];
743 list_for_each_entry_safe(curr, next, &head, tree_list) {
744 data_ptr = btrfs_item_ptr(leaf, slot, char);
745 write_extent_buffer(leaf, &curr->data,
746 (unsigned long)data_ptr,
747 curr->data_len);
748 slot++;
749
750 btrfs_delayed_item_release_metadata(root, curr);
751
752 list_del(&curr->tree_list);
753 btrfs_release_delayed_item(curr);
754 }
755
756error:
757 kfree(data_size);
758 kfree(keys);
759out:
760 return ret;
761}
762
763/*
764 * This helper can just do simple insertion that needn't extend item for new
765 * data, such as directory name index insertion, inode insertion.
766 */
767static int btrfs_insert_delayed_item(struct btrfs_trans_handle *trans,
768 struct btrfs_root *root,
769 struct btrfs_path *path,
770 struct btrfs_delayed_item *delayed_item)
771{
772 struct extent_buffer *leaf;
773 struct btrfs_item *item;
774 char *ptr;
775 int ret;
776
777 ret = btrfs_insert_empty_item(trans, root, path, &delayed_item->key,
778 delayed_item->data_len);
779 if (ret < 0 && ret != -EEXIST)
780 return ret;
781
782 leaf = path->nodes[0];
783
784 item = btrfs_item_nr(leaf, path->slots[0]);
785 ptr = btrfs_item_ptr(leaf, path->slots[0], char);
786
787 write_extent_buffer(leaf, delayed_item->data, (unsigned long)ptr,
788 delayed_item->data_len);
789 btrfs_mark_buffer_dirty(leaf);
790
791 btrfs_delayed_item_release_metadata(root, delayed_item);
792 return 0;
793}
794
795/*
796 * we insert an item first, then if there are some continuous items, we try
797 * to insert those items into the same leaf.
798 */
799static int btrfs_insert_delayed_items(struct btrfs_trans_handle *trans,
800 struct btrfs_path *path,
801 struct btrfs_root *root,
802 struct btrfs_delayed_node *node)
803{
804 struct btrfs_delayed_item *curr, *prev;
805 int ret = 0;
806
807do_again:
808 mutex_lock(&node->mutex);
809 curr = __btrfs_first_delayed_insertion_item(node);
810 if (!curr)
811 goto insert_end;
812
813 ret = btrfs_insert_delayed_item(trans, root, path, curr);
814 if (ret < 0) {
815 btrfs_release_path(root, path);
816 goto insert_end;
817 }
818
819 prev = curr;
820 curr = __btrfs_next_delayed_item(prev);
821 if (curr && btrfs_is_continuous_delayed_item(prev, curr)) {
822 /* insert the continuous items into the same leaf */
823 path->slots[0]++;
824 btrfs_batch_insert_items(trans, root, path, curr);
825 }
826 btrfs_release_delayed_item(prev);
827 btrfs_mark_buffer_dirty(path->nodes[0]);
828
829 btrfs_release_path(root, path);
830 mutex_unlock(&node->mutex);
831 goto do_again;
832
833insert_end:
834 mutex_unlock(&node->mutex);
835 return ret;
836}
837
838static int btrfs_batch_delete_items(struct btrfs_trans_handle *trans,
839 struct btrfs_root *root,
840 struct btrfs_path *path,
841 struct btrfs_delayed_item *item)
842{
843 struct btrfs_delayed_item *curr, *next;
844 struct extent_buffer *leaf;
845 struct btrfs_key key;
846 struct list_head head;
847 int nitems, i, last_item;
848 int ret = 0;
849
850 BUG_ON(!path->nodes[0]);
851
852 leaf = path->nodes[0];
853
854 i = path->slots[0];
855 last_item = btrfs_header_nritems(leaf) - 1;
856 if (i > last_item)
857 return -ENOENT; /* FIXME: Is errno suitable? */
858
859 next = item;
860 INIT_LIST_HEAD(&head);
861 btrfs_item_key_to_cpu(leaf, &key, i);
862 nitems = 0;
863 /*
864 * count the number of the dir index items that we can delete in batch
865 */
866 while (btrfs_comp_cpu_keys(&next->key, &key) == 0) {
867 list_add_tail(&next->tree_list, &head);
868 nitems++;
869
870 curr = next;
871 next = __btrfs_next_delayed_item(curr);
872 if (!next)
873 break;
874
875 if (!btrfs_is_continuous_delayed_item(curr, next))
876 break;
877
878 i++;
879 if (i > last_item)
880 break;
881 btrfs_item_key_to_cpu(leaf, &key, i);
882 }
883
884 if (!nitems)
885 return 0;
886
887 ret = btrfs_del_items(trans, root, path, path->slots[0], nitems);
888 if (ret)
889 goto out;
890
891 list_for_each_entry_safe(curr, next, &head, tree_list) {
892 btrfs_delayed_item_release_metadata(root, curr);
893 list_del(&curr->tree_list);
894 btrfs_release_delayed_item(curr);
895 }
896
897out:
898 return ret;
899}
900
901static int btrfs_delete_delayed_items(struct btrfs_trans_handle *trans,
902 struct btrfs_path *path,
903 struct btrfs_root *root,
904 struct btrfs_delayed_node *node)
905{
906 struct btrfs_delayed_item *curr, *prev;
907 int ret = 0;
908
909do_again:
910 mutex_lock(&node->mutex);
911 curr = __btrfs_first_delayed_deletion_item(node);
912 if (!curr)
913 goto delete_fail;
914
915 ret = btrfs_search_slot(trans, root, &curr->key, path, -1, 1);
916 if (ret < 0)
917 goto delete_fail;
918 else if (ret > 0) {
919 /*
920 * can't find the item which the node points to, so this node
921 * is invalid, just drop it.
922 */
923 prev = curr;
924 curr = __btrfs_next_delayed_item(prev);
925 btrfs_release_delayed_item(prev);
926 ret = 0;
927 btrfs_release_path(root, path);
928 if (curr)
929 goto do_again;
930 else
931 goto delete_fail;
932 }
933
934 btrfs_batch_delete_items(trans, root, path, curr);
935 btrfs_release_path(root, path);
936 mutex_unlock(&node->mutex);
937 goto do_again;
938
939delete_fail:
940 btrfs_release_path(root, path);
941 mutex_unlock(&node->mutex);
942 return ret;
943}
944
945static void btrfs_release_delayed_inode(struct btrfs_delayed_node *delayed_node)
946{
947 struct btrfs_delayed_root *delayed_root;
948
949 if (delayed_node && delayed_node->inode_dirty) {
950 BUG_ON(!delayed_node->root);
951 delayed_node->inode_dirty = 0;
952 delayed_node->count--;
953
954 delayed_root = delayed_node->root->fs_info->delayed_root;
955 atomic_dec(&delayed_root->items);
956 if (atomic_read(&delayed_root->items) <
957 BTRFS_DELAYED_BACKGROUND &&
958 waitqueue_active(&delayed_root->wait))
959 wake_up(&delayed_root->wait);
960 }
961}
962
963static int btrfs_update_delayed_inode(struct btrfs_trans_handle *trans,
964 struct btrfs_root *root,
965 struct btrfs_path *path,
966 struct btrfs_delayed_node *node)
967{
968 struct btrfs_key key;
969 struct btrfs_inode_item *inode_item;
970 struct extent_buffer *leaf;
971 int ret;
972
973 mutex_lock(&node->mutex);
974 if (!node->inode_dirty) {
975 mutex_unlock(&node->mutex);
976 return 0;
977 }
978
979 key.objectid = node->inode_id;
980 btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY);
981 key.offset = 0;
982 ret = btrfs_lookup_inode(trans, root, path, &key, 1);
983 if (ret > 0) {
984 btrfs_release_path(root, path);
985 mutex_unlock(&node->mutex);
986 return -ENOENT;
987 } else if (ret < 0) {
988 mutex_unlock(&node->mutex);
989 return ret;
990 }
991
992 btrfs_unlock_up_safe(path, 1);
993 leaf = path->nodes[0];
994 inode_item = btrfs_item_ptr(leaf, path->slots[0],
995 struct btrfs_inode_item);
996 write_extent_buffer(leaf, &node->inode_item, (unsigned long)inode_item,
997 sizeof(struct btrfs_inode_item));
998 btrfs_mark_buffer_dirty(leaf);
999 btrfs_release_path(root, path);
1000
1001 btrfs_delayed_inode_release_metadata(root, node);
1002 btrfs_release_delayed_inode(node);
1003 mutex_unlock(&node->mutex);
1004
1005 return 0;
1006}
1007
1008/* Called when committing the transaction. */
1009int btrfs_run_delayed_items(struct btrfs_trans_handle *trans,
1010 struct btrfs_root *root)
1011{
1012 struct btrfs_delayed_root *delayed_root;
1013 struct btrfs_delayed_node *curr_node, *prev_node;
1014 struct btrfs_path *path;
1015 int ret = 0;
1016
1017 path = btrfs_alloc_path();
1018 if (!path)
1019 return -ENOMEM;
1020 path->leave_spinning = 1;
1021
1022 delayed_root = btrfs_get_delayed_root(root);
1023
1024 curr_node = btrfs_first_delayed_node(delayed_root);
1025 while (curr_node) {
1026 root = curr_node->root;
1027 ret = btrfs_insert_delayed_items(trans, path, root,
1028 curr_node);
1029 if (!ret)
1030 ret = btrfs_delete_delayed_items(trans, path, root,
1031 curr_node);
1032 if (!ret)
1033 ret = btrfs_update_delayed_inode(trans, root, path,
1034 curr_node);
1035 if (ret) {
1036 btrfs_release_delayed_node(curr_node);
1037 break;
1038 }
1039
1040 prev_node = curr_node;
1041 curr_node = btrfs_next_delayed_node(curr_node);
1042 btrfs_release_delayed_node(prev_node);
1043 }
1044
1045 btrfs_free_path(path);
1046 return ret;
1047}
1048
1049static int __btrfs_commit_inode_delayed_items(struct btrfs_trans_handle *trans,
1050 struct btrfs_delayed_node *node)
1051{
1052 struct btrfs_path *path;
1053 int ret;
1054
1055 path = btrfs_alloc_path();
1056 if (!path)
1057 return -ENOMEM;
1058 path->leave_spinning = 1;
1059
1060 ret = btrfs_insert_delayed_items(trans, path, node->root, node);
1061 if (!ret)
1062 ret = btrfs_delete_delayed_items(trans, path, node->root, node);
1063 if (!ret)
1064 ret = btrfs_update_delayed_inode(trans, node->root, path, node);
1065 btrfs_free_path(path);
1066
1067 return ret;
1068}
1069
1070int btrfs_commit_inode_delayed_items(struct btrfs_trans_handle *trans,
1071 struct inode *inode)
1072{
1073 struct btrfs_delayed_node *delayed_node = btrfs_get_delayed_node(inode);
1074 int ret;
1075
1076 if (!delayed_node)
1077 return 0;
1078
1079 mutex_lock(&delayed_node->mutex);
1080 if (!delayed_node->count) {
1081 mutex_unlock(&delayed_node->mutex);
1082 btrfs_release_delayed_node(delayed_node);
1083 return 0;
1084 }
1085 mutex_unlock(&delayed_node->mutex);
1086
1087 ret = __btrfs_commit_inode_delayed_items(trans, delayed_node);
1088 btrfs_release_delayed_node(delayed_node);
1089 return ret;
1090}
1091
1092void btrfs_remove_delayed_node(struct inode *inode)
1093{
1094 struct btrfs_delayed_node *delayed_node;
1095
1096 delayed_node = ACCESS_ONCE(BTRFS_I(inode)->delayed_node);
1097 if (!delayed_node)
1098 return;
1099
1100 BTRFS_I(inode)->delayed_node = NULL;
1101 btrfs_release_delayed_node(delayed_node);
1102}
1103
1104struct btrfs_async_delayed_node {
1105 struct btrfs_root *root;
1106 struct btrfs_delayed_node *delayed_node;
1107 struct btrfs_work work;
1108};
1109
1110static void btrfs_async_run_delayed_node_done(struct btrfs_work *work)
1111{
1112 struct btrfs_async_delayed_node *async_node;
1113 struct btrfs_trans_handle *trans;
1114 struct btrfs_path *path;
1115 struct btrfs_delayed_node *delayed_node = NULL;
1116 struct btrfs_root *root;
1117 unsigned long nr = 0;
1118 int need_requeue = 0;
1119 int ret;
1120
1121 async_node = container_of(work, struct btrfs_async_delayed_node, work);
1122
1123 path = btrfs_alloc_path();
1124 if (!path)
1125 goto out;
1126 path->leave_spinning = 1;
1127
1128 delayed_node = async_node->delayed_node;
1129 root = delayed_node->root;
1130
1131 trans = btrfs_join_transaction(root, 0);
1132 if (IS_ERR(trans))
1133 goto free_path;
1134
1135 ret = btrfs_insert_delayed_items(trans, path, root, delayed_node);
1136 if (!ret)
1137 ret = btrfs_delete_delayed_items(trans, path, root,
1138 delayed_node);
1139
1140 if (!ret)
1141 btrfs_update_delayed_inode(trans, root, path, delayed_node);
1142
1143 /*
1144 * Maybe new delayed items have been inserted, so we need requeue
1145 * the work. Besides that, we must dequeue the empty delayed nodes
1146 * to avoid the race between delayed items balance and the worker.
1147 * The race like this:
1148 * Task1 Worker thread
1149 * count == 0, needn't requeue
1150 * also needn't insert the
1151 * delayed node into prepare
1152 * list again.
1153 * add lots of delayed items
1154 * queue the delayed node
1155 * already in the list,
1156 * and not in the prepare
1157 * list, it means the delayed
1158 * node is being dealt with
1159 * by the worker.
1160 * do delayed items balance
1161 * the delayed node is being
1162 * dealt with by the worker
1163 * now, just wait.
1164 * the worker goto idle.
1165 * Task1 will sleep until the transaction is commited.
1166 */
1167 mutex_lock(&delayed_node->mutex);
1168 if (delayed_node->count)
1169 need_requeue = 1;
1170 else
1171 btrfs_dequeue_delayed_node(root->fs_info->delayed_root,
1172 delayed_node);
1173 mutex_unlock(&delayed_node->mutex);
1174
1175 nr = trans->blocks_used;
1176
1177 btrfs_end_transaction_dmeta(trans, root);
1178 __btrfs_btree_balance_dirty(root, nr);
1179free_path:
1180 btrfs_free_path(path);
1181out:
1182 if (need_requeue)
1183 btrfs_requeue_work(&async_node->work);
1184 else {
1185 btrfs_release_prepared_delayed_node(delayed_node);
1186 kfree(async_node);
1187 }
1188}
1189
1190static int btrfs_wq_run_delayed_node(struct btrfs_delayed_root *delayed_root,
1191 struct btrfs_root *root, int all)
1192{
1193 struct btrfs_async_delayed_node *async_node;
1194 struct btrfs_delayed_node *curr;
1195 int count = 0;
1196
1197again:
1198 curr = btrfs_first_prepared_delayed_node(delayed_root);
1199 if (!curr)
1200 return 0;
1201
1202 async_node = kmalloc(sizeof(*async_node), GFP_NOFS);
1203 if (!async_node) {
1204 btrfs_release_prepared_delayed_node(curr);
1205 return -ENOMEM;
1206 }
1207
1208 async_node->root = root;
1209 async_node->delayed_node = curr;
1210
1211 async_node->work.func = btrfs_async_run_delayed_node_done;
1212 async_node->work.flags = 0;
1213
1214 btrfs_queue_worker(&root->fs_info->delayed_workers, &async_node->work);
1215 count++;
1216
1217 if (all || count < 4)
1218 goto again;
1219
1220 return 0;
1221}
1222
1223void btrfs_balance_delayed_items(struct btrfs_root *root)
1224{
1225 struct btrfs_delayed_root *delayed_root;
1226
1227 delayed_root = btrfs_get_delayed_root(root);
1228
1229 if (atomic_read(&delayed_root->items) < BTRFS_DELAYED_BACKGROUND)
1230 return;
1231
1232 if (atomic_read(&delayed_root->items) >= BTRFS_DELAYED_WRITEBACK) {
1233 int ret;
1234 ret = btrfs_wq_run_delayed_node(delayed_root, root, 1);
1235 if (ret)
1236 return;
1237
1238 wait_event_interruptible_timeout(
1239 delayed_root->wait,
1240 (atomic_read(&delayed_root->items) <
1241 BTRFS_DELAYED_BACKGROUND),
1242 HZ);
1243 return;
1244 }
1245
1246 btrfs_wq_run_delayed_node(delayed_root, root, 0);
1247}
1248
1249int btrfs_insert_delayed_dir_index(struct btrfs_trans_handle *trans,
1250 struct btrfs_root *root, const char *name,
1251 int name_len, struct inode *dir,
1252 struct btrfs_disk_key *disk_key, u8 type,
1253 u64 index)
1254{
1255 struct btrfs_delayed_node *delayed_node;
1256 struct btrfs_delayed_item *delayed_item;
1257 struct btrfs_dir_item *dir_item;
1258 int ret;
1259
1260 delayed_node = btrfs_get_or_create_delayed_node(dir);
1261 if (IS_ERR(delayed_node))
1262 return PTR_ERR(delayed_node);
1263
1264 delayed_item = btrfs_alloc_delayed_item(sizeof(*dir_item) + name_len);
1265 if (!delayed_item) {
1266 ret = -ENOMEM;
1267 goto release_node;
1268 }
1269
1270 ret = btrfs_delayed_item_reserve_metadata(trans, root, delayed_item);
1271 /*
1272 * we have reserved enough space when we start a new transaction,
1273 * so reserving metadata failure is impossible
1274 */
1275 BUG_ON(ret);
1276
1277 delayed_item->key.objectid = dir->i_ino;
1278 btrfs_set_key_type(&delayed_item->key, BTRFS_DIR_INDEX_KEY);
1279 delayed_item->key.offset = index;
1280
1281 dir_item = (struct btrfs_dir_item *)delayed_item->data;
1282 dir_item->location = *disk_key;
1283 dir_item->transid = cpu_to_le64(trans->transid);
1284 dir_item->data_len = 0;
1285 dir_item->name_len = cpu_to_le16(name_len);
1286 dir_item->type = type;
1287 memcpy((char *)(dir_item + 1), name, name_len);
1288
1289 mutex_lock(&delayed_node->mutex);
1290 ret = __btrfs_add_delayed_insertion_item(delayed_node, delayed_item);
1291 if (unlikely(ret)) {
1292 printk(KERN_ERR "err add delayed dir index item(name: %s) into "
1293 "the insertion tree of the delayed node"
1294 "(root id: %llu, inode id: %llu, errno: %d)\n",
1295 name,
1296 (unsigned long long)delayed_node->root->objectid,
1297 (unsigned long long)delayed_node->inode_id,
1298 ret);
1299 BUG();
1300 }
1301 mutex_unlock(&delayed_node->mutex);
1302
1303release_node:
1304 btrfs_release_delayed_node(delayed_node);
1305 return ret;
1306}
1307
1308static int btrfs_delete_delayed_insertion_item(struct btrfs_root *root,
1309 struct btrfs_delayed_node *node,
1310 struct btrfs_key *key)
1311{
1312 struct btrfs_delayed_item *item;
1313
1314 mutex_lock(&node->mutex);
1315 item = __btrfs_lookup_delayed_insertion_item(node, key);
1316 if (!item) {
1317 mutex_unlock(&node->mutex);
1318 return 1;
1319 }
1320
1321 btrfs_delayed_item_release_metadata(root, item);
1322 btrfs_release_delayed_item(item);
1323 mutex_unlock(&node->mutex);
1324 return 0;
1325}
1326
1327int btrfs_delete_delayed_dir_index(struct btrfs_trans_handle *trans,
1328 struct btrfs_root *root, struct inode *dir,
1329 u64 index)
1330{
1331 struct btrfs_delayed_node *node;
1332 struct btrfs_delayed_item *item;
1333 struct btrfs_key item_key;
1334 int ret;
1335
1336 node = btrfs_get_or_create_delayed_node(dir);
1337 if (IS_ERR(node))
1338 return PTR_ERR(node);
1339
1340 item_key.objectid = dir->i_ino;
1341 btrfs_set_key_type(&item_key, BTRFS_DIR_INDEX_KEY);
1342 item_key.offset = index;
1343
1344 ret = btrfs_delete_delayed_insertion_item(root, node, &item_key);
1345 if (!ret)
1346 goto end;
1347
1348 item = btrfs_alloc_delayed_item(0);
1349 if (!item) {
1350 ret = -ENOMEM;
1351 goto end;
1352 }
1353
1354 item->key = item_key;
1355
1356 ret = btrfs_delayed_item_reserve_metadata(trans, root, item);
1357 /*
1358 * we have reserved enough space when we start a new transaction,
1359 * so reserving metadata failure is impossible.
1360 */
1361 BUG_ON(ret);
1362
1363 mutex_lock(&node->mutex);
1364 ret = __btrfs_add_delayed_deletion_item(node, item);
1365 if (unlikely(ret)) {
1366 printk(KERN_ERR "err add delayed dir index item(index: %llu) "
1367 "into the deletion tree of the delayed node"
1368 "(root id: %llu, inode id: %llu, errno: %d)\n",
1369 (unsigned long long)index,
1370 (unsigned long long)node->root->objectid,
1371 (unsigned long long)node->inode_id,
1372 ret);
1373 BUG();
1374 }
1375 mutex_unlock(&node->mutex);
1376end:
1377 btrfs_release_delayed_node(node);
1378 return ret;
1379}
1380
1381int btrfs_inode_delayed_dir_index_count(struct inode *inode)
1382{
1383 struct btrfs_delayed_node *delayed_node = BTRFS_I(inode)->delayed_node;
1384 int ret = 0;
1385
1386 if (!delayed_node)
1387 return -ENOENT;
1388
1389 /*
1390 * Since we have held i_mutex of this directory, it is impossible that
1391 * a new directory index is added into the delayed node and index_cnt
1392 * is updated now. So we needn't lock the delayed node.
1393 */
1394 if (!delayed_node->index_cnt)
1395 return -EINVAL;
1396
1397 BTRFS_I(inode)->index_cnt = delayed_node->index_cnt;
1398 return ret;
1399}
1400
1401void btrfs_get_delayed_items(struct inode *inode, struct list_head *ins_list,
1402 struct list_head *del_list)
1403{
1404 struct btrfs_delayed_node *delayed_node;
1405 struct btrfs_delayed_item *item;
1406
1407 delayed_node = btrfs_get_delayed_node(inode);
1408 if (!delayed_node)
1409 return;
1410
1411 mutex_lock(&delayed_node->mutex);
1412 item = __btrfs_first_delayed_insertion_item(delayed_node);
1413 while (item) {
1414 atomic_inc(&item->refs);
1415 list_add_tail(&item->readdir_list, ins_list);
1416 item = __btrfs_next_delayed_item(item);
1417 }
1418
1419 item = __btrfs_first_delayed_deletion_item(delayed_node);
1420 while (item) {
1421 atomic_inc(&item->refs);
1422 list_add_tail(&item->readdir_list, del_list);
1423 item = __btrfs_next_delayed_item(item);
1424 }
1425 mutex_unlock(&delayed_node->mutex);
1426 /*
1427 * This delayed node is still cached in the btrfs inode, so refs
1428 * must be > 1 now, and we needn't check it is going to be freed
1429 * or not.
1430 *
1431 * Besides that, this function is used to read dir, we do not
1432 * insert/delete delayed items in this period. So we also needn't
1433 * requeue or dequeue this delayed node.
1434 */
1435 atomic_dec(&delayed_node->refs);
1436}
1437
1438void btrfs_put_delayed_items(struct list_head *ins_list,
1439 struct list_head *del_list)
1440{
1441 struct btrfs_delayed_item *curr, *next;
1442
1443 list_for_each_entry_safe(curr, next, ins_list, readdir_list) {
1444 list_del(&curr->readdir_list);
1445 if (atomic_dec_and_test(&curr->refs))
1446 kfree(curr);
1447 }
1448
1449 list_for_each_entry_safe(curr, next, del_list, readdir_list) {
1450 list_del(&curr->readdir_list);
1451 if (atomic_dec_and_test(&curr->refs))
1452 kfree(curr);
1453 }
1454}
1455
1456int btrfs_should_delete_dir_index(struct list_head *del_list,
1457 u64 index)
1458{
1459 struct btrfs_delayed_item *curr, *next;
1460 int ret;
1461
1462 if (list_empty(del_list))
1463 return 0;
1464
1465 list_for_each_entry_safe(curr, next, del_list, readdir_list) {
1466 if (curr->key.offset > index)
1467 break;
1468
1469 list_del(&curr->readdir_list);
1470 ret = (curr->key.offset == index);
1471
1472 if (atomic_dec_and_test(&curr->refs))
1473 kfree(curr);
1474
1475 if (ret)
1476 return 1;
1477 else
1478 continue;
1479 }
1480 return 0;
1481}
1482
1483/*
1484 * btrfs_readdir_delayed_dir_index - read dir info stored in the delayed tree
1485 *
1486 */
1487int btrfs_readdir_delayed_dir_index(struct file *filp, void *dirent,
1488 filldir_t filldir,
1489 struct list_head *ins_list)
1490{
1491 struct btrfs_dir_item *di;
1492 struct btrfs_delayed_item *curr, *next;
1493 struct btrfs_key location;
1494 char *name;
1495 int name_len;
1496 int over = 0;
1497 unsigned char d_type;
1498
1499 if (list_empty(ins_list))
1500 return 0;
1501
1502 /*
1503 * Changing the data of the delayed item is impossible. So
1504 * we needn't lock them. And we have held i_mutex of the
1505 * directory, nobody can delete any directory indexes now.
1506 */
1507 list_for_each_entry_safe(curr, next, ins_list, readdir_list) {
1508 list_del(&curr->readdir_list);
1509
1510 if (curr->key.offset < filp->f_pos) {
1511 if (atomic_dec_and_test(&curr->refs))
1512 kfree(curr);
1513 continue;
1514 }
1515
1516 filp->f_pos = curr->key.offset;
1517
1518 di = (struct btrfs_dir_item *)curr->data;
1519 name = (char *)(di + 1);
1520 name_len = le16_to_cpu(di->name_len);
1521
1522 d_type = btrfs_filetype_table[di->type];
1523 btrfs_disk_key_to_cpu(&location, &di->location);
1524
1525 over = filldir(dirent, name, name_len, curr->key.offset,
1526 location.objectid, d_type);
1527
1528 if (atomic_dec_and_test(&curr->refs))
1529 kfree(curr);
1530
1531 if (over)
1532 return 1;
1533 }
1534 return 0;
1535}
1536
1537BTRFS_SETGET_STACK_FUNCS(stack_inode_generation, struct btrfs_inode_item,
1538 generation, 64);
1539BTRFS_SETGET_STACK_FUNCS(stack_inode_sequence, struct btrfs_inode_item,
1540 sequence, 64);
1541BTRFS_SETGET_STACK_FUNCS(stack_inode_transid, struct btrfs_inode_item,
1542 transid, 64);
1543BTRFS_SETGET_STACK_FUNCS(stack_inode_size, struct btrfs_inode_item, size, 64);
1544BTRFS_SETGET_STACK_FUNCS(stack_inode_nbytes, struct btrfs_inode_item,
1545 nbytes, 64);
1546BTRFS_SETGET_STACK_FUNCS(stack_inode_block_group, struct btrfs_inode_item,
1547 block_group, 64);
1548BTRFS_SETGET_STACK_FUNCS(stack_inode_nlink, struct btrfs_inode_item, nlink, 32);
1549BTRFS_SETGET_STACK_FUNCS(stack_inode_uid, struct btrfs_inode_item, uid, 32);
1550BTRFS_SETGET_STACK_FUNCS(stack_inode_gid, struct btrfs_inode_item, gid, 32);
1551BTRFS_SETGET_STACK_FUNCS(stack_inode_mode, struct btrfs_inode_item, mode, 32);
1552BTRFS_SETGET_STACK_FUNCS(stack_inode_rdev, struct btrfs_inode_item, rdev, 64);
1553BTRFS_SETGET_STACK_FUNCS(stack_inode_flags, struct btrfs_inode_item, flags, 64);
1554
1555BTRFS_SETGET_STACK_FUNCS(stack_timespec_sec, struct btrfs_timespec, sec, 64);
1556BTRFS_SETGET_STACK_FUNCS(stack_timespec_nsec, struct btrfs_timespec, nsec, 32);
1557
1558static void fill_stack_inode_item(struct btrfs_trans_handle *trans,
1559 struct btrfs_inode_item *inode_item,
1560 struct inode *inode)
1561{
1562 btrfs_set_stack_inode_uid(inode_item, inode->i_uid);
1563 btrfs_set_stack_inode_gid(inode_item, inode->i_gid);
1564 btrfs_set_stack_inode_size(inode_item, BTRFS_I(inode)->disk_i_size);
1565 btrfs_set_stack_inode_mode(inode_item, inode->i_mode);
1566 btrfs_set_stack_inode_nlink(inode_item, inode->i_nlink);
1567 btrfs_set_stack_inode_nbytes(inode_item, inode_get_bytes(inode));
1568 btrfs_set_stack_inode_generation(inode_item,
1569 BTRFS_I(inode)->generation);
1570 btrfs_set_stack_inode_sequence(inode_item, BTRFS_I(inode)->sequence);
1571 btrfs_set_stack_inode_transid(inode_item, trans->transid);
1572 btrfs_set_stack_inode_rdev(inode_item, inode->i_rdev);
1573 btrfs_set_stack_inode_flags(inode_item, BTRFS_I(inode)->flags);
1574 btrfs_set_stack_inode_block_group(inode_item,
1575 BTRFS_I(inode)->block_group);
1576
1577 btrfs_set_stack_timespec_sec(btrfs_inode_atime(inode_item),
1578 inode->i_atime.tv_sec);
1579 btrfs_set_stack_timespec_nsec(btrfs_inode_atime(inode_item),
1580 inode->i_atime.tv_nsec);
1581
1582 btrfs_set_stack_timespec_sec(btrfs_inode_mtime(inode_item),
1583 inode->i_mtime.tv_sec);
1584 btrfs_set_stack_timespec_nsec(btrfs_inode_mtime(inode_item),
1585 inode->i_mtime.tv_nsec);
1586
1587 btrfs_set_stack_timespec_sec(btrfs_inode_ctime(inode_item),
1588 inode->i_ctime.tv_sec);
1589 btrfs_set_stack_timespec_nsec(btrfs_inode_ctime(inode_item),
1590 inode->i_ctime.tv_nsec);
1591}
1592
1593int btrfs_delayed_update_inode(struct btrfs_trans_handle *trans,
1594 struct btrfs_root *root, struct inode *inode)
1595{
1596 struct btrfs_delayed_node *delayed_node;
1597 int ret;
1598
1599 delayed_node = btrfs_get_or_create_delayed_node(inode);
1600 if (IS_ERR(delayed_node))
1601 return PTR_ERR(delayed_node);
1602
1603 mutex_lock(&delayed_node->mutex);
1604 if (delayed_node->inode_dirty) {
1605 fill_stack_inode_item(trans, &delayed_node->inode_item, inode);
1606 goto release_node;
1607 }
1608
1609 ret = btrfs_delayed_inode_reserve_metadata(trans, root, delayed_node);
1610 /*
1611 * we must reserve enough space when we start a new transaction,
1612 * so reserving metadata failure is impossible
1613 */
1614 BUG_ON(ret);
1615
1616 fill_stack_inode_item(trans, &delayed_node->inode_item, inode);
1617 delayed_node->inode_dirty = 1;
1618 delayed_node->count++;
1619 atomic_inc(&root->fs_info->delayed_root->items);
1620release_node:
1621 mutex_unlock(&delayed_node->mutex);
1622 btrfs_release_delayed_node(delayed_node);
1623 return ret;
1624}
1625
1626static void __btrfs_kill_delayed_node(struct btrfs_delayed_node *delayed_node)
1627{
1628 struct btrfs_root *root = delayed_node->root;
1629 struct btrfs_delayed_item *curr_item, *prev_item;
1630
1631 mutex_lock(&delayed_node->mutex);
1632 curr_item = __btrfs_first_delayed_insertion_item(delayed_node);
1633 while (curr_item) {
1634 btrfs_delayed_item_release_metadata(root, curr_item);
1635 prev_item = curr_item;
1636 curr_item = __btrfs_next_delayed_item(prev_item);
1637 btrfs_release_delayed_item(prev_item);
1638 }
1639
1640 curr_item = __btrfs_first_delayed_deletion_item(delayed_node);
1641 while (curr_item) {
1642 btrfs_delayed_item_release_metadata(root, curr_item);
1643 prev_item = curr_item;
1644 curr_item = __btrfs_next_delayed_item(prev_item);
1645 btrfs_release_delayed_item(prev_item);
1646 }
1647
1648 if (delayed_node->inode_dirty) {
1649 btrfs_delayed_inode_release_metadata(root, delayed_node);
1650 btrfs_release_delayed_inode(delayed_node);
1651 }
1652 mutex_unlock(&delayed_node->mutex);
1653}
1654
1655void btrfs_kill_delayed_inode_items(struct inode *inode)
1656{
1657 struct btrfs_delayed_node *delayed_node;
1658
1659 delayed_node = btrfs_get_delayed_node(inode);
1660 if (!delayed_node)
1661 return;
1662
1663 __btrfs_kill_delayed_node(delayed_node);
1664 btrfs_release_delayed_node(delayed_node);
1665}
1666
1667void btrfs_kill_all_delayed_nodes(struct btrfs_root *root)
1668{
1669 u64 inode_id = 0;
1670 struct btrfs_delayed_node *delayed_nodes[8];
1671 int i, n;
1672
1673 while (1) {
1674 spin_lock(&root->inode_lock);
1675 n = radix_tree_gang_lookup(&root->delayed_nodes_tree,
1676 (void **)delayed_nodes, inode_id,
1677 ARRAY_SIZE(delayed_nodes));
1678 if (!n) {
1679 spin_unlock(&root->inode_lock);
1680 break;
1681 }
1682
1683 inode_id = delayed_nodes[n - 1]->inode_id + 1;
1684
1685 for (i = 0; i < n; i++)
1686 atomic_inc(&delayed_nodes[i]->refs);
1687 spin_unlock(&root->inode_lock);
1688
1689 for (i = 0; i < n; i++) {
1690 __btrfs_kill_delayed_node(delayed_nodes[i]);
1691 btrfs_release_delayed_node(delayed_nodes[i]);
1692 }
1693 }
1694}