diff options
Diffstat (limited to 'fs/btrfs/ctree.c')
-rw-r--r-- | fs/btrfs/ctree.c | 127 |
1 files changed, 117 insertions, 10 deletions
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 50e81f43e6d4..ff3261ff2e19 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c | |||
@@ -1,5 +1,5 @@ | |||
1 | /* | 1 | /* |
2 | * Copyright (C) 2007 Oracle. All rights reserved. | 2 | * Copyright (C) 2007,2008 Oracle. All rights reserved. |
3 | * | 3 | * |
4 | * This program is free software; you can redistribute it and/or | 4 | * This program is free software; you can redistribute it and/or |
5 | * modify it under the terms of the GNU General Public | 5 | * modify it under the terms of the GNU General Public |
@@ -54,12 +54,19 @@ struct btrfs_path *btrfs_alloc_path(void) | |||
54 | return path; | 54 | return path; |
55 | } | 55 | } |
56 | 56 | ||
57 | /* this also releases the path */ | ||
57 | void btrfs_free_path(struct btrfs_path *p) | 58 | void btrfs_free_path(struct btrfs_path *p) |
58 | { | 59 | { |
59 | btrfs_release_path(NULL, p); | 60 | btrfs_release_path(NULL, p); |
60 | kmem_cache_free(btrfs_path_cachep, p); | 61 | kmem_cache_free(btrfs_path_cachep, p); |
61 | } | 62 | } |
62 | 63 | ||
64 | /* | ||
65 | * path release drops references on the extent buffers in the path | ||
66 | * and it drops any locks held by this path | ||
67 | * | ||
68 | * It is safe to call this on paths that no locks or extent buffers held. | ||
69 | */ | ||
63 | void noinline btrfs_release_path(struct btrfs_root *root, struct btrfs_path *p) | 70 | void noinline btrfs_release_path(struct btrfs_root *root, struct btrfs_path *p) |
64 | { | 71 | { |
65 | int i; | 72 | int i; |
@@ -77,6 +84,16 @@ void noinline btrfs_release_path(struct btrfs_root *root, struct btrfs_path *p) | |||
77 | } | 84 | } |
78 | } | 85 | } |
79 | 86 | ||
87 | /* | ||
88 | * safely gets a reference on the root node of a tree. A lock | ||
89 | * is not taken, so a concurrent writer may put a different node | ||
90 | * at the root of the tree. See btrfs_lock_root_node for the | ||
91 | * looping required. | ||
92 | * | ||
93 | * The extent buffer returned by this has a reference taken, so | ||
94 | * it won't disappear. It may stop being the root of the tree | ||
95 | * at any time because there are no locks held. | ||
96 | */ | ||
80 | struct extent_buffer *btrfs_root_node(struct btrfs_root *root) | 97 | struct extent_buffer *btrfs_root_node(struct btrfs_root *root) |
81 | { | 98 | { |
82 | struct extent_buffer *eb; | 99 | struct extent_buffer *eb; |
@@ -87,6 +104,10 @@ struct extent_buffer *btrfs_root_node(struct btrfs_root *root) | |||
87 | return eb; | 104 | return eb; |
88 | } | 105 | } |
89 | 106 | ||
107 | /* loop around taking references on and locking the root node of the | ||
108 | * tree until you end up with a lock on the root. A locked buffer | ||
109 | * is returned, with a reference held. | ||
110 | */ | ||
90 | struct extent_buffer *btrfs_lock_root_node(struct btrfs_root *root) | 111 | struct extent_buffer *btrfs_lock_root_node(struct btrfs_root *root) |
91 | { | 112 | { |
92 | struct extent_buffer *eb; | 113 | struct extent_buffer *eb; |
@@ -108,6 +129,10 @@ struct extent_buffer *btrfs_lock_root_node(struct btrfs_root *root) | |||
108 | return eb; | 129 | return eb; |
109 | } | 130 | } |
110 | 131 | ||
132 | /* cowonly root (everything not a reference counted cow subvolume), just get | ||
133 | * put onto a simple dirty list. transaction.c walks this to make sure they | ||
134 | * get properly updated on disk. | ||
135 | */ | ||
111 | static void add_root_to_dirty_list(struct btrfs_root *root) | 136 | static void add_root_to_dirty_list(struct btrfs_root *root) |
112 | { | 137 | { |
113 | if (root->track_dirty && list_empty(&root->dirty_list)) { | 138 | if (root->track_dirty && list_empty(&root->dirty_list)) { |
@@ -116,6 +141,11 @@ static void add_root_to_dirty_list(struct btrfs_root *root) | |||
116 | } | 141 | } |
117 | } | 142 | } |
118 | 143 | ||
144 | /* | ||
145 | * used by snapshot creation to make a copy of a root for a tree with | ||
146 | * a given objectid. The buffer with the new root node is returned in | ||
147 | * cow_ret, and this func returns zero on success or a negative error code. | ||
148 | */ | ||
119 | int btrfs_copy_root(struct btrfs_trans_handle *trans, | 149 | int btrfs_copy_root(struct btrfs_trans_handle *trans, |
120 | struct btrfs_root *root, | 150 | struct btrfs_root *root, |
121 | struct extent_buffer *buf, | 151 | struct extent_buffer *buf, |
@@ -167,6 +197,22 @@ int btrfs_copy_root(struct btrfs_trans_handle *trans, | |||
167 | return 0; | 197 | return 0; |
168 | } | 198 | } |
169 | 199 | ||
200 | /* | ||
201 | * does the dirty work in cow of a single block. The parent block | ||
202 | * (if supplied) is updated to point to the new cow copy. The new | ||
203 | * buffer is marked dirty and returned locked. If you modify the block | ||
204 | * it needs to be marked dirty again. | ||
205 | * | ||
206 | * search_start -- an allocation hint for the new block | ||
207 | * | ||
208 | * empty_size -- a hint that you plan on doing more cow. This is the size in bytes | ||
209 | * the allocator should try to find free next to the block it returns. This is | ||
210 | * just a hint and may be ignored by the allocator. | ||
211 | * | ||
212 | * prealloc_dest -- if you have already reserved a destination for the cow, | ||
213 | * this uses that block instead of allocating a new one. btrfs_alloc_reserved_extent | ||
214 | * is used to finish the allocation. | ||
215 | */ | ||
170 | int noinline __btrfs_cow_block(struct btrfs_trans_handle *trans, | 216 | int noinline __btrfs_cow_block(struct btrfs_trans_handle *trans, |
171 | struct btrfs_root *root, | 217 | struct btrfs_root *root, |
172 | struct extent_buffer *buf, | 218 | struct extent_buffer *buf, |
@@ -311,6 +357,11 @@ int noinline __btrfs_cow_block(struct btrfs_trans_handle *trans, | |||
311 | return 0; | 357 | return 0; |
312 | } | 358 | } |
313 | 359 | ||
360 | /* | ||
361 | * cows a single block, see __btrfs_cow_block for the real work. | ||
362 | * This version of it has extra checks so that a block isn't cow'd more than | ||
363 | * once per transaction, as long as it hasn't been written yet | ||
364 | */ | ||
314 | int noinline btrfs_cow_block(struct btrfs_trans_handle *trans, | 365 | int noinline btrfs_cow_block(struct btrfs_trans_handle *trans, |
315 | struct btrfs_root *root, struct extent_buffer *buf, | 366 | struct btrfs_root *root, struct extent_buffer *buf, |
316 | struct extent_buffer *parent, int parent_slot, | 367 | struct extent_buffer *parent, int parent_slot, |
@@ -347,6 +398,10 @@ int noinline btrfs_cow_block(struct btrfs_trans_handle *trans, | |||
347 | return ret; | 398 | return ret; |
348 | } | 399 | } |
349 | 400 | ||
401 | /* | ||
402 | * helper function for defrag to decide if two blocks pointed to by a | ||
403 | * node are actually close by | ||
404 | */ | ||
350 | static int close_blocks(u64 blocknr, u64 other, u32 blocksize) | 405 | static int close_blocks(u64 blocknr, u64 other, u32 blocksize) |
351 | { | 406 | { |
352 | if (blocknr < other && other - (blocknr + blocksize) < 32768) | 407 | if (blocknr < other && other - (blocknr + blocksize) < 32768) |
@@ -381,6 +436,11 @@ static int comp_keys(struct btrfs_disk_key *disk, struct btrfs_key *k2) | |||
381 | } | 436 | } |
382 | 437 | ||
383 | 438 | ||
439 | /* | ||
440 | * this is used by the defrag code to go through all the | ||
441 | * leaves pointed to by a node and reallocate them so that | ||
442 | * disk order is close to key order | ||
443 | */ | ||
384 | int btrfs_realloc_node(struct btrfs_trans_handle *trans, | 444 | int btrfs_realloc_node(struct btrfs_trans_handle *trans, |
385 | struct btrfs_root *root, struct extent_buffer *parent, | 445 | struct btrfs_root *root, struct extent_buffer *parent, |
386 | int start_slot, int cache_only, u64 *last_ret, | 446 | int start_slot, int cache_only, u64 *last_ret, |
@@ -521,6 +581,10 @@ static inline unsigned int leaf_data_end(struct btrfs_root *root, | |||
521 | return btrfs_item_offset_nr(leaf, nr - 1); | 581 | return btrfs_item_offset_nr(leaf, nr - 1); |
522 | } | 582 | } |
523 | 583 | ||
584 | /* | ||
585 | * extra debugging checks to make sure all the items in a key are | ||
586 | * well formed and in the proper order | ||
587 | */ | ||
524 | static int check_node(struct btrfs_root *root, struct btrfs_path *path, | 588 | static int check_node(struct btrfs_root *root, struct btrfs_path *path, |
525 | int level) | 589 | int level) |
526 | { | 590 | { |
@@ -561,6 +625,10 @@ static int check_node(struct btrfs_root *root, struct btrfs_path *path, | |||
561 | return 0; | 625 | return 0; |
562 | } | 626 | } |
563 | 627 | ||
628 | /* | ||
629 | * extra checking to make sure all the items in a leaf are | ||
630 | * well formed and in the proper order | ||
631 | */ | ||
564 | static int check_leaf(struct btrfs_root *root, struct btrfs_path *path, | 632 | static int check_leaf(struct btrfs_root *root, struct btrfs_path *path, |
565 | int level) | 633 | int level) |
566 | { | 634 | { |
@@ -782,6 +850,10 @@ static int bin_search(struct extent_buffer *eb, struct btrfs_key *key, | |||
782 | return -1; | 850 | return -1; |
783 | } | 851 | } |
784 | 852 | ||
853 | /* given a node and slot number, this reads the blocks it points to. The | ||
854 | * extent buffer is returned with a reference taken (but unlocked). | ||
855 | * NULL is returned on error. | ||
856 | */ | ||
785 | static noinline struct extent_buffer *read_node_slot(struct btrfs_root *root, | 857 | static noinline struct extent_buffer *read_node_slot(struct btrfs_root *root, |
786 | struct extent_buffer *parent, int slot) | 858 | struct extent_buffer *parent, int slot) |
787 | { | 859 | { |
@@ -798,6 +870,11 @@ static noinline struct extent_buffer *read_node_slot(struct btrfs_root *root, | |||
798 | btrfs_node_ptr_generation(parent, slot)); | 870 | btrfs_node_ptr_generation(parent, slot)); |
799 | } | 871 | } |
800 | 872 | ||
873 | /* | ||
874 | * node level balancing, used to make sure nodes are in proper order for | ||
875 | * item deletion. We balance from the top down, so we have to make sure | ||
876 | * that a deletion won't leave an node completely empty later on. | ||
877 | */ | ||
801 | static noinline int balance_level(struct btrfs_trans_handle *trans, | 878 | static noinline int balance_level(struct btrfs_trans_handle *trans, |
802 | struct btrfs_root *root, | 879 | struct btrfs_root *root, |
803 | struct btrfs_path *path, int level) | 880 | struct btrfs_path *path, int level) |
@@ -1024,7 +1101,10 @@ enospc: | |||
1024 | return ret; | 1101 | return ret; |
1025 | } | 1102 | } |
1026 | 1103 | ||
1027 | /* returns zero if the push worked, non-zero otherwise */ | 1104 | /* Node balancing for insertion. Here we only split or push nodes around |
1105 | * when they are completely full. This is also done top down, so we | ||
1106 | * have to be pessimistic. | ||
1107 | */ | ||
1028 | static int noinline push_nodes_for_insert(struct btrfs_trans_handle *trans, | 1108 | static int noinline push_nodes_for_insert(struct btrfs_trans_handle *trans, |
1029 | struct btrfs_root *root, | 1109 | struct btrfs_root *root, |
1030 | struct btrfs_path *path, int level) | 1110 | struct btrfs_path *path, int level) |
@@ -1150,7 +1230,8 @@ static int noinline push_nodes_for_insert(struct btrfs_trans_handle *trans, | |||
1150 | } | 1230 | } |
1151 | 1231 | ||
1152 | /* | 1232 | /* |
1153 | * readahead one full node of leaves | 1233 | * readahead one full node of leaves, finding things that are close |
1234 | * to the block in 'slot', and triggering ra on them. | ||
1154 | */ | 1235 | */ |
1155 | static noinline void reada_for_search(struct btrfs_root *root, | 1236 | static noinline void reada_for_search(struct btrfs_root *root, |
1156 | struct btrfs_path *path, | 1237 | struct btrfs_path *path, |
@@ -1226,6 +1307,19 @@ static noinline void reada_for_search(struct btrfs_root *root, | |||
1226 | } | 1307 | } |
1227 | } | 1308 | } |
1228 | 1309 | ||
1310 | /* | ||
1311 | * when we walk down the tree, it is usually safe to unlock the higher layers in | ||
1312 | * the tree. The exceptions are when our path goes through slot 0, because operations | ||
1313 | * on the tree might require changing key pointers higher up in the tree. | ||
1314 | * | ||
1315 | * callers might also have set path->keep_locks, which tells this code to | ||
1316 | * keep the lock if the path points to the last slot in the block. This is | ||
1317 | * part of walking through the tree, and selecting the next slot in the higher | ||
1318 | * block. | ||
1319 | * | ||
1320 | * lowest_unlock sets the lowest level in the tree we're allowed to unlock. | ||
1321 | * so if lowest_unlock is 1, level 0 won't be unlocked | ||
1322 | */ | ||
1229 | static noinline void unlock_up(struct btrfs_path *path, int level, | 1323 | static noinline void unlock_up(struct btrfs_path *path, int level, |
1230 | int lowest_unlock) | 1324 | int lowest_unlock) |
1231 | { | 1325 | { |
@@ -2705,6 +2799,12 @@ again: | |||
2705 | return ret; | 2799 | return ret; |
2706 | } | 2800 | } |
2707 | 2801 | ||
2802 | /* | ||
2803 | * make the item pointed to by the path smaller. new_size indicates | ||
2804 | * how small to make it, and from_end tells us if we just chop bytes | ||
2805 | * off the end of the item or if we shift the item to chop bytes off | ||
2806 | * the front. | ||
2807 | */ | ||
2708 | int btrfs_truncate_item(struct btrfs_trans_handle *trans, | 2808 | int btrfs_truncate_item(struct btrfs_trans_handle *trans, |
2709 | struct btrfs_root *root, | 2809 | struct btrfs_root *root, |
2710 | struct btrfs_path *path, | 2810 | struct btrfs_path *path, |
@@ -2818,6 +2918,9 @@ int btrfs_truncate_item(struct btrfs_trans_handle *trans, | |||
2818 | return ret; | 2918 | return ret; |
2819 | } | 2919 | } |
2820 | 2920 | ||
2921 | /* | ||
2922 | * make the item pointed to by the path bigger, data_size is the new size. | ||
2923 | */ | ||
2821 | int btrfs_extend_item(struct btrfs_trans_handle *trans, | 2924 | int btrfs_extend_item(struct btrfs_trans_handle *trans, |
2822 | struct btrfs_root *root, struct btrfs_path *path, | 2925 | struct btrfs_root *root, struct btrfs_path *path, |
2823 | u32 data_size) | 2926 | u32 data_size) |
@@ -2897,7 +3000,7 @@ int btrfs_extend_item(struct btrfs_trans_handle *trans, | |||
2897 | } | 3000 | } |
2898 | 3001 | ||
2899 | /* | 3002 | /* |
2900 | * Given a key and some data, insert an item into the tree. | 3003 | * Given a key and some data, insert items into the tree. |
2901 | * This does all the path init required, making room in the tree if needed. | 3004 | * This does all the path init required, making room in the tree if needed. |
2902 | */ | 3005 | */ |
2903 | int btrfs_insert_empty_items(struct btrfs_trans_handle *trans, | 3006 | int btrfs_insert_empty_items(struct btrfs_trans_handle *trans, |
@@ -3046,9 +3149,8 @@ int btrfs_insert_item(struct btrfs_trans_handle *trans, struct btrfs_root | |||
3046 | /* | 3149 | /* |
3047 | * delete the pointer from a given node. | 3150 | * delete the pointer from a given node. |
3048 | * | 3151 | * |
3049 | * If the delete empties a node, the node is removed from the tree, | 3152 | * the tree should have been previously balanced so the deletion does not |
3050 | * continuing all the way the root if required. The root is converted into | 3153 | * empty a node. |
3051 | * a leaf if all the nodes are emptied. | ||
3052 | */ | 3154 | */ |
3053 | static int del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root, | 3155 | static int del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root, |
3054 | struct btrfs_path *path, int level, int slot) | 3156 | struct btrfs_path *path, int level, int slot) |
@@ -3233,6 +3335,9 @@ int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root, | |||
3233 | * search the tree again to find a leaf with lesser keys | 3335 | * search the tree again to find a leaf with lesser keys |
3234 | * returns 0 if it found something or 1 if there are no lesser leaves. | 3336 | * returns 0 if it found something or 1 if there are no lesser leaves. |
3235 | * returns < 0 on io errors. | 3337 | * returns < 0 on io errors. |
3338 | * | ||
3339 | * This may release the path, and so you may lose any locks held at the | ||
3340 | * time you call it. | ||
3236 | */ | 3341 | */ |
3237 | int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path) | 3342 | int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path) |
3238 | { | 3343 | { |
@@ -3265,9 +3370,7 @@ int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path) | |||
3265 | /* | 3370 | /* |
3266 | * A helper function to walk down the tree starting at min_key, and looking | 3371 | * A helper function to walk down the tree starting at min_key, and looking |
3267 | * for nodes or leaves that are either in cache or have a minimum | 3372 | * for nodes or leaves that are either in cache or have a minimum |
3268 | * transaction id. This is used by the btree defrag code, but could | 3373 | * transaction id. This is used by the btree defrag code, and tree logging |
3269 | * also be used to search for blocks that have changed since a given | ||
3270 | * transaction id. | ||
3271 | * | 3374 | * |
3272 | * This does not cow, but it does stuff the starting key it finds back | 3375 | * This does not cow, but it does stuff the starting key it finds back |
3273 | * into min_key, so you can call btrfs_search_slot with cow=1 on the | 3376 | * into min_key, so you can call btrfs_search_slot with cow=1 on the |
@@ -3279,6 +3382,10 @@ int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path) | |||
3279 | * This honors path->lowest_level to prevent descent past a given level | 3382 | * This honors path->lowest_level to prevent descent past a given level |
3280 | * of the tree. | 3383 | * of the tree. |
3281 | * | 3384 | * |
3385 | * min_trans indicates the oldest transaction that you are interested | ||
3386 | * in walking through. Any nodes or leaves older than min_trans are | ||
3387 | * skipped over (without reading them). | ||
3388 | * | ||
3282 | * returns zero if something useful was found, < 0 on error and 1 if there | 3389 | * returns zero if something useful was found, < 0 on error and 1 if there |
3283 | * was nothing in the tree that matched the search criteria. | 3390 | * was nothing in the tree that matched the search criteria. |
3284 | */ | 3391 | */ |