aboutsummaryrefslogtreecommitdiffstats
path: root/fs/btrfs/ctree.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/btrfs/ctree.c')
-rw-r--r--fs/btrfs/ctree.c127
1 files changed, 117 insertions, 10 deletions
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index 50e81f43e6d4..ff3261ff2e19 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (C) 2007 Oracle. All rights reserved. 2 * Copyright (C) 2007,2008 Oracle. All rights reserved.
3 * 3 *
4 * This program is free software; you can redistribute it and/or 4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public 5 * modify it under the terms of the GNU General Public
@@ -54,12 +54,19 @@ struct btrfs_path *btrfs_alloc_path(void)
54 return path; 54 return path;
55} 55}
56 56
57/* this also releases the path */
57void btrfs_free_path(struct btrfs_path *p) 58void btrfs_free_path(struct btrfs_path *p)
58{ 59{
59 btrfs_release_path(NULL, p); 60 btrfs_release_path(NULL, p);
60 kmem_cache_free(btrfs_path_cachep, p); 61 kmem_cache_free(btrfs_path_cachep, p);
61} 62}
62 63
64/*
65 * path release drops references on the extent buffers in the path
66 * and it drops any locks held by this path
67 *
68 * It is safe to call this on paths that no locks or extent buffers held.
69 */
63void noinline btrfs_release_path(struct btrfs_root *root, struct btrfs_path *p) 70void noinline btrfs_release_path(struct btrfs_root *root, struct btrfs_path *p)
64{ 71{
65 int i; 72 int i;
@@ -77,6 +84,16 @@ void noinline btrfs_release_path(struct btrfs_root *root, struct btrfs_path *p)
77 } 84 }
78} 85}
79 86
87/*
88 * safely gets a reference on the root node of a tree. A lock
89 * is not taken, so a concurrent writer may put a different node
90 * at the root of the tree. See btrfs_lock_root_node for the
91 * looping required.
92 *
93 * The extent buffer returned by this has a reference taken, so
94 * it won't disappear. It may stop being the root of the tree
95 * at any time because there are no locks held.
96 */
80struct extent_buffer *btrfs_root_node(struct btrfs_root *root) 97struct extent_buffer *btrfs_root_node(struct btrfs_root *root)
81{ 98{
82 struct extent_buffer *eb; 99 struct extent_buffer *eb;
@@ -87,6 +104,10 @@ struct extent_buffer *btrfs_root_node(struct btrfs_root *root)
87 return eb; 104 return eb;
88} 105}
89 106
107/* loop around taking references on and locking the root node of the
108 * tree until you end up with a lock on the root. A locked buffer
109 * is returned, with a reference held.
110 */
90struct extent_buffer *btrfs_lock_root_node(struct btrfs_root *root) 111struct extent_buffer *btrfs_lock_root_node(struct btrfs_root *root)
91{ 112{
92 struct extent_buffer *eb; 113 struct extent_buffer *eb;
@@ -108,6 +129,10 @@ struct extent_buffer *btrfs_lock_root_node(struct btrfs_root *root)
108 return eb; 129 return eb;
109} 130}
110 131
132/* cowonly root (everything not a reference counted cow subvolume), just get
133 * put onto a simple dirty list. transaction.c walks this to make sure they
134 * get properly updated on disk.
135 */
111static void add_root_to_dirty_list(struct btrfs_root *root) 136static void add_root_to_dirty_list(struct btrfs_root *root)
112{ 137{
113 if (root->track_dirty && list_empty(&root->dirty_list)) { 138 if (root->track_dirty && list_empty(&root->dirty_list)) {
@@ -116,6 +141,11 @@ static void add_root_to_dirty_list(struct btrfs_root *root)
116 } 141 }
117} 142}
118 143
144/*
145 * used by snapshot creation to make a copy of a root for a tree with
146 * a given objectid. The buffer with the new root node is returned in
147 * cow_ret, and this func returns zero on success or a negative error code.
148 */
119int btrfs_copy_root(struct btrfs_trans_handle *trans, 149int btrfs_copy_root(struct btrfs_trans_handle *trans,
120 struct btrfs_root *root, 150 struct btrfs_root *root,
121 struct extent_buffer *buf, 151 struct extent_buffer *buf,
@@ -167,6 +197,22 @@ int btrfs_copy_root(struct btrfs_trans_handle *trans,
167 return 0; 197 return 0;
168} 198}
169 199
200/*
201 * does the dirty work in cow of a single block. The parent block
202 * (if supplied) is updated to point to the new cow copy. The new
203 * buffer is marked dirty and returned locked. If you modify the block
204 * it needs to be marked dirty again.
205 *
206 * search_start -- an allocation hint for the new block
207 *
208 * empty_size -- a hint that you plan on doing more cow. This is the size in bytes
209 * the allocator should try to find free next to the block it returns. This is
210 * just a hint and may be ignored by the allocator.
211 *
212 * prealloc_dest -- if you have already reserved a destination for the cow,
213 * this uses that block instead of allocating a new one. btrfs_alloc_reserved_extent
214 * is used to finish the allocation.
215 */
170int noinline __btrfs_cow_block(struct btrfs_trans_handle *trans, 216int noinline __btrfs_cow_block(struct btrfs_trans_handle *trans,
171 struct btrfs_root *root, 217 struct btrfs_root *root,
172 struct extent_buffer *buf, 218 struct extent_buffer *buf,
@@ -311,6 +357,11 @@ int noinline __btrfs_cow_block(struct btrfs_trans_handle *trans,
311 return 0; 357 return 0;
312} 358}
313 359
360/*
361 * cows a single block, see __btrfs_cow_block for the real work.
362 * This version of it has extra checks so that a block isn't cow'd more than
363 * once per transaction, as long as it hasn't been written yet
364 */
314int noinline btrfs_cow_block(struct btrfs_trans_handle *trans, 365int noinline btrfs_cow_block(struct btrfs_trans_handle *trans,
315 struct btrfs_root *root, struct extent_buffer *buf, 366 struct btrfs_root *root, struct extent_buffer *buf,
316 struct extent_buffer *parent, int parent_slot, 367 struct extent_buffer *parent, int parent_slot,
@@ -347,6 +398,10 @@ int noinline btrfs_cow_block(struct btrfs_trans_handle *trans,
347 return ret; 398 return ret;
348} 399}
349 400
401/*
402 * helper function for defrag to decide if two blocks pointed to by a
403 * node are actually close by
404 */
350static int close_blocks(u64 blocknr, u64 other, u32 blocksize) 405static int close_blocks(u64 blocknr, u64 other, u32 blocksize)
351{ 406{
352 if (blocknr < other && other - (blocknr + blocksize) < 32768) 407 if (blocknr < other && other - (blocknr + blocksize) < 32768)
@@ -381,6 +436,11 @@ static int comp_keys(struct btrfs_disk_key *disk, struct btrfs_key *k2)
381} 436}
382 437
383 438
439/*
440 * this is used by the defrag code to go through all the
441 * leaves pointed to by a node and reallocate them so that
442 * disk order is close to key order
443 */
384int btrfs_realloc_node(struct btrfs_trans_handle *trans, 444int btrfs_realloc_node(struct btrfs_trans_handle *trans,
385 struct btrfs_root *root, struct extent_buffer *parent, 445 struct btrfs_root *root, struct extent_buffer *parent,
386 int start_slot, int cache_only, u64 *last_ret, 446 int start_slot, int cache_only, u64 *last_ret,
@@ -521,6 +581,10 @@ static inline unsigned int leaf_data_end(struct btrfs_root *root,
521 return btrfs_item_offset_nr(leaf, nr - 1); 581 return btrfs_item_offset_nr(leaf, nr - 1);
522} 582}
523 583
584/*
585 * extra debugging checks to make sure all the items in a key are
586 * well formed and in the proper order
587 */
524static int check_node(struct btrfs_root *root, struct btrfs_path *path, 588static int check_node(struct btrfs_root *root, struct btrfs_path *path,
525 int level) 589 int level)
526{ 590{
@@ -561,6 +625,10 @@ static int check_node(struct btrfs_root *root, struct btrfs_path *path,
561 return 0; 625 return 0;
562} 626}
563 627
628/*
629 * extra checking to make sure all the items in a leaf are
630 * well formed and in the proper order
631 */
564static int check_leaf(struct btrfs_root *root, struct btrfs_path *path, 632static int check_leaf(struct btrfs_root *root, struct btrfs_path *path,
565 int level) 633 int level)
566{ 634{
@@ -782,6 +850,10 @@ static int bin_search(struct extent_buffer *eb, struct btrfs_key *key,
782 return -1; 850 return -1;
783} 851}
784 852
853/* given a node and slot number, this reads the blocks it points to. The
854 * extent buffer is returned with a reference taken (but unlocked).
855 * NULL is returned on error.
856 */
785static noinline struct extent_buffer *read_node_slot(struct btrfs_root *root, 857static noinline struct extent_buffer *read_node_slot(struct btrfs_root *root,
786 struct extent_buffer *parent, int slot) 858 struct extent_buffer *parent, int slot)
787{ 859{
@@ -798,6 +870,11 @@ static noinline struct extent_buffer *read_node_slot(struct btrfs_root *root,
798 btrfs_node_ptr_generation(parent, slot)); 870 btrfs_node_ptr_generation(parent, slot));
799} 871}
800 872
873/*
874 * node level balancing, used to make sure nodes are in proper order for
875 * item deletion. We balance from the top down, so we have to make sure
876 * that a deletion won't leave an node completely empty later on.
877 */
801static noinline int balance_level(struct btrfs_trans_handle *trans, 878static noinline int balance_level(struct btrfs_trans_handle *trans,
802 struct btrfs_root *root, 879 struct btrfs_root *root,
803 struct btrfs_path *path, int level) 880 struct btrfs_path *path, int level)
@@ -1024,7 +1101,10 @@ enospc:
1024 return ret; 1101 return ret;
1025} 1102}
1026 1103
1027/* returns zero if the push worked, non-zero otherwise */ 1104/* Node balancing for insertion. Here we only split or push nodes around
1105 * when they are completely full. This is also done top down, so we
1106 * have to be pessimistic.
1107 */
1028static int noinline push_nodes_for_insert(struct btrfs_trans_handle *trans, 1108static int noinline push_nodes_for_insert(struct btrfs_trans_handle *trans,
1029 struct btrfs_root *root, 1109 struct btrfs_root *root,
1030 struct btrfs_path *path, int level) 1110 struct btrfs_path *path, int level)
@@ -1150,7 +1230,8 @@ static int noinline push_nodes_for_insert(struct btrfs_trans_handle *trans,
1150} 1230}
1151 1231
1152/* 1232/*
1153 * readahead one full node of leaves 1233 * readahead one full node of leaves, finding things that are close
1234 * to the block in 'slot', and triggering ra on them.
1154 */ 1235 */
1155static noinline void reada_for_search(struct btrfs_root *root, 1236static noinline void reada_for_search(struct btrfs_root *root,
1156 struct btrfs_path *path, 1237 struct btrfs_path *path,
@@ -1226,6 +1307,19 @@ static noinline void reada_for_search(struct btrfs_root *root,
1226 } 1307 }
1227} 1308}
1228 1309
1310/*
1311 * when we walk down the tree, it is usually safe to unlock the higher layers in
1312 * the tree. The exceptions are when our path goes through slot 0, because operations
1313 * on the tree might require changing key pointers higher up in the tree.
1314 *
1315 * callers might also have set path->keep_locks, which tells this code to
1316 * keep the lock if the path points to the last slot in the block. This is
1317 * part of walking through the tree, and selecting the next slot in the higher
1318 * block.
1319 *
1320 * lowest_unlock sets the lowest level in the tree we're allowed to unlock.
1321 * so if lowest_unlock is 1, level 0 won't be unlocked
1322 */
1229static noinline void unlock_up(struct btrfs_path *path, int level, 1323static noinline void unlock_up(struct btrfs_path *path, int level,
1230 int lowest_unlock) 1324 int lowest_unlock)
1231{ 1325{
@@ -2705,6 +2799,12 @@ again:
2705 return ret; 2799 return ret;
2706} 2800}
2707 2801
2802/*
2803 * make the item pointed to by the path smaller. new_size indicates
2804 * how small to make it, and from_end tells us if we just chop bytes
2805 * off the end of the item or if we shift the item to chop bytes off
2806 * the front.
2807 */
2708int btrfs_truncate_item(struct btrfs_trans_handle *trans, 2808int btrfs_truncate_item(struct btrfs_trans_handle *trans,
2709 struct btrfs_root *root, 2809 struct btrfs_root *root,
2710 struct btrfs_path *path, 2810 struct btrfs_path *path,
@@ -2818,6 +2918,9 @@ int btrfs_truncate_item(struct btrfs_trans_handle *trans,
2818 return ret; 2918 return ret;
2819} 2919}
2820 2920
2921/*
2922 * make the item pointed to by the path bigger, data_size is the new size.
2923 */
2821int btrfs_extend_item(struct btrfs_trans_handle *trans, 2924int btrfs_extend_item(struct btrfs_trans_handle *trans,
2822 struct btrfs_root *root, struct btrfs_path *path, 2925 struct btrfs_root *root, struct btrfs_path *path,
2823 u32 data_size) 2926 u32 data_size)
@@ -2897,7 +3000,7 @@ int btrfs_extend_item(struct btrfs_trans_handle *trans,
2897} 3000}
2898 3001
2899/* 3002/*
2900 * Given a key and some data, insert an item into the tree. 3003 * Given a key and some data, insert items into the tree.
2901 * This does all the path init required, making room in the tree if needed. 3004 * This does all the path init required, making room in the tree if needed.
2902 */ 3005 */
2903int btrfs_insert_empty_items(struct btrfs_trans_handle *trans, 3006int btrfs_insert_empty_items(struct btrfs_trans_handle *trans,
@@ -3046,9 +3149,8 @@ int btrfs_insert_item(struct btrfs_trans_handle *trans, struct btrfs_root
3046/* 3149/*
3047 * delete the pointer from a given node. 3150 * delete the pointer from a given node.
3048 * 3151 *
3049 * If the delete empties a node, the node is removed from the tree, 3152 * the tree should have been previously balanced so the deletion does not
3050 * continuing all the way the root if required. The root is converted into 3153 * empty a node.
3051 * a leaf if all the nodes are emptied.
3052 */ 3154 */
3053static int del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root, 3155static int del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root,
3054 struct btrfs_path *path, int level, int slot) 3156 struct btrfs_path *path, int level, int slot)
@@ -3233,6 +3335,9 @@ int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root,
3233 * search the tree again to find a leaf with lesser keys 3335 * search the tree again to find a leaf with lesser keys
3234 * returns 0 if it found something or 1 if there are no lesser leaves. 3336 * returns 0 if it found something or 1 if there are no lesser leaves.
3235 * returns < 0 on io errors. 3337 * returns < 0 on io errors.
3338 *
3339 * This may release the path, and so you may lose any locks held at the
3340 * time you call it.
3236 */ 3341 */
3237int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path) 3342int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path)
3238{ 3343{
@@ -3265,9 +3370,7 @@ int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path)
3265/* 3370/*
3266 * A helper function to walk down the tree starting at min_key, and looking 3371 * A helper function to walk down the tree starting at min_key, and looking
3267 * for nodes or leaves that are either in cache or have a minimum 3372 * for nodes or leaves that are either in cache or have a minimum
3268 * transaction id. This is used by the btree defrag code, but could 3373 * transaction id. This is used by the btree defrag code, and tree logging
3269 * also be used to search for blocks that have changed since a given
3270 * transaction id.
3271 * 3374 *
3272 * This does not cow, but it does stuff the starting key it finds back 3375 * This does not cow, but it does stuff the starting key it finds back
3273 * into min_key, so you can call btrfs_search_slot with cow=1 on the 3376 * into min_key, so you can call btrfs_search_slot with cow=1 on the
@@ -3279,6 +3382,10 @@ int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path)
3279 * This honors path->lowest_level to prevent descent past a given level 3382 * This honors path->lowest_level to prevent descent past a given level
3280 * of the tree. 3383 * of the tree.
3281 * 3384 *
3385 * min_trans indicates the oldest transaction that you are interested
3386 * in walking through. Any nodes or leaves older than min_trans are
3387 * skipped over (without reading them).
3388 *
3282 * returns zero if something useful was found, < 0 on error and 1 if there 3389 * returns zero if something useful was found, < 0 on error and 1 if there
3283 * was nothing in the tree that matched the search criteria. 3390 * was nothing in the tree that matched the search criteria.
3284 */ 3391 */