aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/btrfs/Makefile11
-rw-r--r--fs/btrfs/ctree.c432
-rw-r--r--fs/btrfs/ctree.h62
-rw-r--r--fs/btrfs/disk-io.c174
-rw-r--r--fs/btrfs/disk-io.h21
-rw-r--r--fs/btrfs/kerncompat.h1
-rw-r--r--fs/btrfs/radix-tree.c836
-rw-r--r--fs/btrfs/radix-tree.h73
8 files changed, 1447 insertions, 163 deletions
diff --git a/fs/btrfs/Makefile b/fs/btrfs/Makefile
index 9f84c08baab2..63360212a075 100644
--- a/fs/btrfs/Makefile
+++ b/fs/btrfs/Makefile
@@ -1,7 +1,12 @@
1 1
2ctree: ctree.o 2CFLAGS= -g -Wall
3 gcc -g -O2 -Wall -o ctree ctree.c 3
4.c.o:
5 $(CC) $(CFLAGS) -c $<
6
7ctree: ctree.o disk-io.h ctree.h disk-io.o radix-tree.o radix-tree.h
8 gcc $(CFLAGS) -o ctree ctree.o disk-io.o radix-tree.o
4 9
5clean: 10clean:
6 rm ctree ctree.o 11 rm ctree *.o
7 12
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index 4bf5e92584bd..6f0522f21082 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -1,68 +1,25 @@
1#include <stdio.h> 1#include <stdio.h>
2#include <stdlib.h> 2#include <stdlib.h>
3#include "kerncompat.h" 3#include "kerncompat.h"
4 4#include "radix-tree.h"
5#define BLOCKSIZE 4096 5#include "ctree.h"
6 6#include "disk-io.h"
7struct key {
8 u64 objectid;
9 u32 flags;
10 u64 offset;
11} __attribute__ ((__packed__));
12
13struct header {
14 u64 fsid[2]; /* FS specific uuid */
15 u64 blocknum;
16 u64 parentid;
17 u32 csum;
18 u32 ham;
19 u16 nritems;
20 u16 flags;
21} __attribute__ ((__packed__));
22
23#define NODEPTRS_PER_BLOCK ((BLOCKSIZE - sizeof(struct header)) / \
24 (sizeof(struct key) + sizeof(u64)))
25
26#define LEVEL_BITS 3
27#define MAX_LEVEL (1 << LEVEL_BITS)
28#define node_level(f) ((f) & (MAX_LEVEL-1))
29#define is_leaf(f) (node_level(f) == 0)
30
31struct ctree_root {
32 struct node *node;
33};
34
35struct item {
36 struct key key;
37 u16 offset;
38 u16 size;
39} __attribute__ ((__packed__));
40
41#define LEAF_DATA_SIZE (BLOCKSIZE - sizeof(struct header))
42struct leaf {
43 struct header header;
44 union {
45 struct item items[LEAF_DATA_SIZE/sizeof(struct item)];
46 u8 data[BLOCKSIZE-sizeof(struct header)];
47 };
48} __attribute__ ((__packed__));
49
50struct node {
51 struct header header;
52 struct key keys[NODEPTRS_PER_BLOCK];
53 u64 blockptrs[NODEPTRS_PER_BLOCK];
54} __attribute__ ((__packed__));
55
56struct ctree_path {
57 struct node *nodes[MAX_LEVEL];
58 int slots[MAX_LEVEL];
59};
60 7
61static inline void init_path(struct ctree_path *p) 8static inline void init_path(struct ctree_path *p)
62{ 9{
63 memset(p, 0, sizeof(*p)); 10 memset(p, 0, sizeof(*p));
64} 11}
65 12
13static void release_path(struct ctree_root *root, struct ctree_path *p)
14{
15 int i;
16 for (i = 0; i < MAX_LEVEL; i++) {
17 if (!p->nodes[i])
18 break;
19 tree_block_release(root, p->nodes[i]);
20 }
21}
22
66static inline unsigned int leaf_data_end(struct leaf *leaf) 23static inline unsigned int leaf_data_end(struct leaf *leaf)
67{ 24{
68 unsigned int nr = leaf->header.nritems; 25 unsigned int nr = leaf->header.nritems;
@@ -135,26 +92,25 @@ int bin_search(struct node *c, struct key *key, int *slot)
135 return -1; 92 return -1;
136} 93}
137 94
138void *read_block(u64 blocknum)
139{
140 return (void *)blocknum;
141}
142
143int search_slot(struct ctree_root *root, struct key *key, struct ctree_path *p) 95int search_slot(struct ctree_root *root, struct key *key, struct ctree_path *p)
144{ 96{
145 struct node *c = root->node; 97 struct tree_buffer *b = root->node;
98 struct node *c;
99
146 int slot; 100 int slot;
147 int ret; 101 int ret;
148 int level; 102 int level;
149 while (c) { 103 b->count++;
104 while (b) {
105 c = &b->node;
150 level = node_level(c->header.flags); 106 level = node_level(c->header.flags);
151 p->nodes[level] = c; 107 p->nodes[level] = b;
152 ret = bin_search(c, key, &slot); 108 ret = bin_search(c, key, &slot);
153 if (!is_leaf(c->header.flags)) { 109 if (!is_leaf(c->header.flags)) {
154 if (ret && slot > 0) 110 if (ret && slot > 0)
155 slot -= 1; 111 slot -= 1;
156 p->slots[level] = slot; 112 p->slots[level] = slot;
157 c = read_block(c->blockptrs[slot]); 113 b = read_tree_block(root, c->blockptrs[slot]);
158 continue; 114 continue;
159 } else { 115 } else {
160 p->slots[level] = slot; 116 p->slots[level] = slot;
@@ -164,17 +120,20 @@ int search_slot(struct ctree_root *root, struct key *key, struct ctree_path *p)
164 return -1; 120 return -1;
165} 121}
166 122
167static void fixup_low_keys(struct ctree_path *path, struct key *key, 123static void fixup_low_keys(struct ctree_root *root,
168 int level) 124 struct ctree_path *path, struct key *key,
125 int level)
169{ 126{
170 int i; 127 int i;
171 /* adjust the pointers going up the tree */ 128 /* adjust the pointers going up the tree */
172 for (i = level; i < MAX_LEVEL; i++) { 129 for (i = level; i < MAX_LEVEL; i++) {
173 struct node *t = path->nodes[i]; 130 struct node *t;
174 int tslot = path->slots[i]; 131 int tslot = path->slots[i];
175 if (!t) 132 if (!path->nodes[i])
176 break; 133 break;
134 t = &path->nodes[i]->node;
177 memcpy(t->keys + tslot, key, sizeof(*key)); 135 memcpy(t->keys + tslot, key, sizeof(*key));
136 write_tree_block(root, path->nodes[i]);
178 if (tslot != 0) 137 if (tslot != 0)
179 break; 138 break;
180 } 139 }
@@ -190,27 +149,34 @@ int __insert_ptr(struct ctree_root *root,
190 int nritems; 149 int nritems;
191 /* need a new root */ 150 /* need a new root */
192 if (!path->nodes[level]) { 151 if (!path->nodes[level]) {
193 c = malloc(sizeof(struct node)); 152 struct tree_buffer *t;
153 t = alloc_free_block(root);
154 c = &t->node;
194 memset(c, 0, sizeof(c)); 155 memset(c, 0, sizeof(c));
195 c->header.nritems = 2; 156 c->header.nritems = 2;
196 c->header.flags = node_level(level); 157 c->header.flags = node_level(level);
197 lower = path->nodes[level-1]; 158 c->header.blocknr = t->blocknr;
159 lower = &path->nodes[level-1]->node;
198 if (is_leaf(lower->header.flags)) 160 if (is_leaf(lower->header.flags))
199 lower_key = &((struct leaf *)lower)->items[0].key; 161 lower_key = &((struct leaf *)lower)->items[0].key;
200 else 162 else
201 lower_key = lower->keys; 163 lower_key = lower->keys;
202 memcpy(c->keys, lower_key, sizeof(struct key)); 164 memcpy(c->keys, lower_key, sizeof(struct key));
203 memcpy(c->keys + 1, key, sizeof(struct key)); 165 memcpy(c->keys + 1, key, sizeof(struct key));
204 c->blockptrs[0] = (u64)lower; 166 c->blockptrs[0] = path->nodes[level-1]->blocknr;
205 c->blockptrs[1] = blocknr; 167 c->blockptrs[1] = blocknr;
206 root->node = c; 168 /* the path has an extra ref to root->node */
207 path->nodes[level] = c; 169 tree_block_release(root, root->node);
170 root->node = t;
171 t->count++;
172 write_tree_block(root, t);
173 path->nodes[level] = t;
208 path->slots[level] = 0; 174 path->slots[level] = 0;
209 if (c->keys[1].objectid == 0) 175 if (c->keys[1].objectid == 0)
210 BUG(); 176 BUG();
211 return 0; 177 return 0;
212 } 178 }
213 lower = path->nodes[level]; 179 lower = &path->nodes[level]->node;
214 nritems = lower->header.nritems; 180 nritems = lower->header.nritems;
215 if (slot > nritems) 181 if (slot > nritems)
216 BUG(); 182 BUG();
@@ -227,6 +193,7 @@ int __insert_ptr(struct ctree_root *root,
227 lower->header.nritems++; 193 lower->header.nritems++;
228 if (lower->keys[1].objectid == 0) 194 if (lower->keys[1].objectid == 0)
229 BUG(); 195 BUG();
196 write_tree_block(root, path->nodes[level]);
230 return 0; 197 return 0;
231} 198}
232 199
@@ -238,6 +205,8 @@ int push_node_left(struct ctree_root *root, struct ctree_path *path, int level)
238 int push_items = 0; 205 int push_items = 0;
239 int left_nritems; 206 int left_nritems;
240 int right_nritems; 207 int right_nritems;
208 struct tree_buffer *t;
209 struct tree_buffer *right_buf;
241 210
242 if (level == MAX_LEVEL - 1 || path->nodes[level + 1] == 0) 211 if (level == MAX_LEVEL - 1 || path->nodes[level + 1] == 0)
243 return 1; 212 return 1;
@@ -245,13 +214,18 @@ int push_node_left(struct ctree_root *root, struct ctree_path *path, int level)
245 if (slot == 0) 214 if (slot == 0)
246 return 1; 215 return 1;
247 216
248 left = read_block(path->nodes[level + 1]->blockptrs[slot - 1]); 217 t = read_tree_block(root,
249 right = path->nodes[level]; 218 path->nodes[level + 1]->node.blockptrs[slot - 1]);
219 left = &t->node;
220 right_buf = path->nodes[level];
221 right = &right_buf->node;
250 left_nritems = left->header.nritems; 222 left_nritems = left->header.nritems;
251 right_nritems = right->header.nritems; 223 right_nritems = right->header.nritems;
252 push_items = NODEPTRS_PER_BLOCK - (left_nritems + 1); 224 push_items = NODEPTRS_PER_BLOCK - (left_nritems + 1);
253 if (push_items <= 0) 225 if (push_items <= 0) {
226 tree_block_release(root, t);
254 return 1; 227 return 1;
228 }
255 229
256 if (right_nritems < push_items) 230 if (right_nritems < push_items)
257 push_items = right_nritems; 231 push_items = right_nritems;
@@ -267,15 +241,20 @@ int push_node_left(struct ctree_root *root, struct ctree_path *path, int level)
267 left->header.nritems += push_items; 241 left->header.nritems += push_items;
268 242
269 /* adjust the pointers going up the tree */ 243 /* adjust the pointers going up the tree */
270 fixup_low_keys(path, right->keys, level + 1); 244 fixup_low_keys(root, path, right->keys, level + 1);
245
246 write_tree_block(root, t);
247 write_tree_block(root, right_buf);
271 248
272 /* then fixup the leaf pointer in the path */ 249 /* then fixup the leaf pointer in the path */
273 if (path->slots[level] < push_items) { 250 if (path->slots[level] < push_items) {
274 path->slots[level] += left_nritems; 251 path->slots[level] += left_nritems;
275 path->nodes[level] = (struct node*)left; 252 tree_block_release(root, path->nodes[level]);
253 path->nodes[level] = t;
276 path->slots[level + 1] -= 1; 254 path->slots[level + 1] -= 1;
277 } else { 255 } else {
278 path->slots[level] -= push_items; 256 path->slots[level] -= push_items;
257 tree_block_release(root, t);
279 } 258 }
280 return 0; 259 return 0;
281} 260}
@@ -283,6 +262,8 @@ int push_node_left(struct ctree_root *root, struct ctree_path *path, int level)
283int push_node_right(struct ctree_root *root, struct ctree_path *path, int level) 262int push_node_right(struct ctree_root *root, struct ctree_path *path, int level)
284{ 263{
285 int slot; 264 int slot;
265 struct tree_buffer *t;
266 struct tree_buffer *src_buffer;
286 struct node *dst; 267 struct node *dst;
287 struct node *src; 268 struct node *src;
288 int push_items = 0; 269 int push_items = 0;
@@ -295,16 +276,21 @@ int push_node_right(struct ctree_root *root, struct ctree_path *path, int level)
295 if (slot == NODEPTRS_PER_BLOCK - 1) 276 if (slot == NODEPTRS_PER_BLOCK - 1)
296 return 1; 277 return 1;
297 278
298 if (slot >= path->nodes[level + 1]->header.nritems -1) 279 if (slot >= path->nodes[level + 1]->node.header.nritems -1)
299 return 1; 280 return 1;
300 281
301 dst = read_block(path->nodes[level + 1]->blockptrs[slot + 1]); 282 t = read_tree_block(root,
302 src = path->nodes[level]; 283 path->nodes[level + 1]->node.blockptrs[slot + 1]);
284 dst = &t->node;
285 src_buffer = path->nodes[level];
286 src = &src_buffer->node;
303 dst_nritems = dst->header.nritems; 287 dst_nritems = dst->header.nritems;
304 src_nritems = src->header.nritems; 288 src_nritems = src->header.nritems;
305 push_items = NODEPTRS_PER_BLOCK - (dst_nritems + 1); 289 push_items = NODEPTRS_PER_BLOCK - (dst_nritems + 1);
306 if (push_items <= 0) 290 if (push_items <= 0) {
291 tree_block_release(root, t);
307 return 1; 292 return 1;
293 }
308 294
309 if (src_nritems < push_items) 295 if (src_nritems < push_items)
310 push_items = src_nritems; 296 push_items = src_nritems;
@@ -322,13 +308,21 @@ int push_node_right(struct ctree_root *root, struct ctree_path *path, int level)
322 dst->header.nritems += push_items; 308 dst->header.nritems += push_items;
323 309
324 /* adjust the pointers going up the tree */ 310 /* adjust the pointers going up the tree */
325 memcpy(path->nodes[level + 1]->keys + path->slots[level + 1] + 1, 311 memcpy(path->nodes[level + 1]->node.keys + path->slots[level + 1] + 1,
326 dst->keys, sizeof(struct key)); 312 dst->keys, sizeof(struct key));
313
314 write_tree_block(root, path->nodes[level + 1]);
315 write_tree_block(root, t);
316 write_tree_block(root, src_buffer);
317
327 /* then fixup the leaf pointer in the path */ 318 /* then fixup the leaf pointer in the path */
328 if (path->slots[level] >= src->header.nritems) { 319 if (path->slots[level] >= src->header.nritems) {
329 path->slots[level] -= src->header.nritems; 320 path->slots[level] -= src->header.nritems;
330 path->nodes[level] = (struct node*)dst; 321 tree_block_release(root, path->nodes[level]);
322 path->nodes[level] = t;
331 path->slots[level + 1] += 1; 323 path->slots[level + 1] += 1;
324 } else {
325 tree_block_release(root, t);
332 } 326 }
333 return 0; 327 return 0;
334} 328}
@@ -337,15 +331,18 @@ int insert_ptr(struct ctree_root *root,
337 struct ctree_path *path, struct key *key, 331 struct ctree_path *path, struct key *key,
338 u64 blocknr, int level) 332 u64 blocknr, int level)
339{ 333{
340 struct node *c = path->nodes[level]; 334 struct tree_buffer *t = path->nodes[level];
335 struct node *c = &path->nodes[level]->node;
341 struct node *b; 336 struct node *b;
342 struct node *bal[MAX_LEVEL]; 337 struct tree_buffer *b_buffer;
338 struct tree_buffer *bal[MAX_LEVEL];
343 int bal_level = level; 339 int bal_level = level;
344 int mid; 340 int mid;
345 int bal_start = -1; 341 int bal_start = -1;
346 342
347 memset(bal, 0, ARRAY_SIZE(bal)); 343 memset(bal, 0, ARRAY_SIZE(bal));
348 while(c && c->header.nritems == NODEPTRS_PER_BLOCK) { 344 while(t && t->node.header.nritems == NODEPTRS_PER_BLOCK) {
345 c = &t->node;
349 if (push_node_left(root, path, 346 if (push_node_left(root, path,
350 node_level(c->header.flags)) == 0) 347 node_level(c->header.flags)) == 0)
351 break; 348 break;
@@ -355,8 +352,10 @@ int insert_ptr(struct ctree_root *root,
355 bal_start = bal_level; 352 bal_start = bal_level;
356 if (bal_level == MAX_LEVEL - 1) 353 if (bal_level == MAX_LEVEL - 1)
357 BUG(); 354 BUG();
358 b = malloc(sizeof(struct node)); 355 b_buffer = alloc_free_block(root);
356 b = &b_buffer->node;
359 b->header.flags = c->header.flags; 357 b->header.flags = c->header.flags;
358 b->header.blocknr = b_buffer->blocknr;
360 mid = (c->header.nritems + 1) / 2; 359 mid = (c->header.nritems + 1) / 2;
361 memcpy(b->keys, c->keys + mid, 360 memcpy(b->keys, c->keys + mid,
362 (c->header.nritems - mid) * sizeof(struct key)); 361 (c->header.nritems - mid) * sizeof(struct key));
@@ -364,21 +363,28 @@ int insert_ptr(struct ctree_root *root,
364 (c->header.nritems - mid) * sizeof(u64)); 363 (c->header.nritems - mid) * sizeof(u64));
365 b->header.nritems = c->header.nritems - mid; 364 b->header.nritems = c->header.nritems - mid;
366 c->header.nritems = mid; 365 c->header.nritems = mid;
367 bal[bal_level] = b; 366
367 write_tree_block(root, t);
368 write_tree_block(root, b_buffer);
369
370 bal[bal_level] = b_buffer;
368 if (bal_level == MAX_LEVEL - 1) 371 if (bal_level == MAX_LEVEL - 1)
369 break; 372 break;
370 bal_level += 1; 373 bal_level += 1;
371 c = path->nodes[bal_level]; 374 t = path->nodes[bal_level];
372 } 375 }
373 while(bal_start > 0) { 376 while(bal_start > 0) {
374 b = bal[bal_start]; 377 b_buffer = bal[bal_start];
375 c = path->nodes[bal_start]; 378 c = &path->nodes[bal_start]->node;
376 __insert_ptr(root, path, b->keys, (u64)b, 379 __insert_ptr(root, path, b_buffer->node.keys, b_buffer->blocknr,
377 path->slots[bal_start + 1] + 1, bal_start + 1); 380 path->slots[bal_start + 1] + 1, bal_start + 1);
378 if (path->slots[bal_start] >= c->header.nritems) { 381 if (path->slots[bal_start] >= c->header.nritems) {
379 path->slots[bal_start] -= c->header.nritems; 382 path->slots[bal_start] -= c->header.nritems;
380 path->nodes[bal_start] = b; 383 tree_block_release(root, path->nodes[bal_start]);
384 path->nodes[bal_start] = b_buffer;
381 path->slots[bal_start + 1] += 1; 385 path->slots[bal_start + 1] += 1;
386 } else {
387 tree_block_release(root, b_buffer);
382 } 388 }
383 bal_start--; 389 bal_start--;
384 if (!bal[bal_start]) 390 if (!bal[bal_start])
@@ -404,7 +410,9 @@ int leaf_space_used(struct leaf *l, int start, int nr)
404int push_leaf_left(struct ctree_root *root, struct ctree_path *path, 410int push_leaf_left(struct ctree_root *root, struct ctree_path *path,
405 int data_size) 411 int data_size)
406{ 412{
407 struct leaf *right = (struct leaf *)path->nodes[0]; 413 struct tree_buffer *right_buf = path->nodes[0];
414 struct leaf *right = &right_buf->leaf;
415 struct tree_buffer *t;
408 struct leaf *left; 416 struct leaf *left;
409 int slot; 417 int slot;
410 int i; 418 int i;
@@ -421,9 +429,11 @@ int push_leaf_left(struct ctree_root *root, struct ctree_path *path,
421 if (!path->nodes[1]) { 429 if (!path->nodes[1]) {
422 return 1; 430 return 1;
423 } 431 }
424 left = read_block(path->nodes[1]->blockptrs[slot - 1]); 432 t = read_tree_block(root, path->nodes[1]->node.blockptrs[slot - 1]);
433 left = &t->leaf;
425 free_space = leaf_free_space(left); 434 free_space = leaf_free_space(left);
426 if (free_space < data_size + sizeof(struct item)) { 435 if (free_space < data_size + sizeof(struct item)) {
436 tree_block_release(root, t);
427 return 1; 437 return 1;
428 } 438 }
429 for (i = 0; i < right->header.nritems; i++) { 439 for (i = 0; i < right->header.nritems; i++) {
@@ -436,6 +446,7 @@ int push_leaf_left(struct ctree_root *root, struct ctree_path *path,
436 push_space += item->size + sizeof(*item); 446 push_space += item->size + sizeof(*item);
437 } 447 }
438 if (push_items == 0) { 448 if (push_items == 0) {
449 tree_block_release(root, t);
439 return 1; 450 return 1;
440 } 451 }
441 /* push data from right to left */ 452 /* push data from right to left */
@@ -446,6 +457,8 @@ int push_leaf_left(struct ctree_root *root, struct ctree_path *path,
446 right->data + right->items[push_items - 1].offset, 457 right->data + right->items[push_items - 1].offset,
447 push_space); 458 push_space);
448 old_left_nritems = left->header.nritems; 459 old_left_nritems = left->header.nritems;
460 BUG_ON(old_left_nritems < 0);
461
449 for(i = old_left_nritems; i < old_left_nritems + push_items; i++) { 462 for(i = old_left_nritems; i < old_left_nritems + push_items; i++) {
450 left->items[i].offset -= LEAF_DATA_SIZE - 463 left->items[i].offset -= LEAF_DATA_SIZE -
451 left->items[old_left_nritems -1].offset; 464 left->items[old_left_nritems -1].offset;
@@ -460,30 +473,40 @@ int push_leaf_left(struct ctree_root *root, struct ctree_path *path,
460 (right->header.nritems - push_items) * sizeof(struct item)); 473 (right->header.nritems - push_items) * sizeof(struct item));
461 right->header.nritems -= push_items; 474 right->header.nritems -= push_items;
462 push_space = LEAF_DATA_SIZE; 475 push_space = LEAF_DATA_SIZE;
476
463 for (i = 0; i < right->header.nritems; i++) { 477 for (i = 0; i < right->header.nritems; i++) {
464 right->items[i].offset = push_space - right->items[i].size; 478 right->items[i].offset = push_space - right->items[i].size;
465 push_space = right->items[i].offset; 479 push_space = right->items[i].offset;
466 } 480 }
467 fixup_low_keys(path, &right->items[0].key, 1); 481
482 write_tree_block(root, t);
483 write_tree_block(root, right_buf);
484
485 fixup_low_keys(root, path, &right->items[0].key, 1);
468 486
469 /* then fixup the leaf pointer in the path */ 487 /* then fixup the leaf pointer in the path */
470 if (path->slots[0] < push_items) { 488 if (path->slots[0] < push_items) {
471 path->slots[0] += old_left_nritems; 489 path->slots[0] += old_left_nritems;
472 path->nodes[0] = (struct node*)left; 490 tree_block_release(root, path->nodes[0]);
491 path->nodes[0] = t;
473 path->slots[1] -= 1; 492 path->slots[1] -= 1;
474 } else { 493 } else {
494 tree_block_release(root, t);
475 path->slots[0] -= push_items; 495 path->slots[0] -= push_items;
476 } 496 }
497 BUG_ON(path->slots[0] < 0);
477 return 0; 498 return 0;
478} 499}
479 500
480int split_leaf(struct ctree_root *root, struct ctree_path *path, int data_size) 501int split_leaf(struct ctree_root *root, struct ctree_path *path, int data_size)
481{ 502{
482 struct leaf *l = (struct leaf *)path->nodes[0]; 503 struct tree_buffer *l_buf = path->nodes[0];
483 int nritems = l->header.nritems; 504 struct leaf *l = &l_buf->leaf;
484 int mid = (nritems + 1)/ 2; 505 int nritems;
485 int slot = path->slots[0]; 506 int mid;
507 int slot;
486 struct leaf *right; 508 struct leaf *right;
509 struct tree_buffer *right_buffer;
487 int space_needed = data_size + sizeof(struct item); 510 int space_needed = data_size + sizeof(struct item);
488 int data_copy_size; 511 int data_copy_size;
489 int rt_data_off; 512 int rt_data_off;
@@ -491,9 +514,19 @@ int split_leaf(struct ctree_root *root, struct ctree_path *path, int data_size)
491 int ret; 514 int ret;
492 515
493 if (push_leaf_left(root, path, data_size) == 0) { 516 if (push_leaf_left(root, path, data_size) == 0) {
494 return 0; 517 l_buf = path->nodes[0];
518 l = &l_buf->leaf;
519 if (leaf_free_space(l) >= sizeof(struct item) + data_size)
520 return 0;
495 } 521 }
496 right = malloc(sizeof(struct leaf)); 522 slot = path->slots[0];
523 nritems = l->header.nritems;
524 mid = (nritems + 1)/ 2;
525
526 right_buffer = alloc_free_block(root);
527 BUG_ON(!right_buffer);
528 BUG_ON(mid == nritems);
529 right = &right_buffer->leaf;
497 memset(right, 0, sizeof(*right)); 530 memset(right, 0, sizeof(*right));
498 if (mid <= slot) { 531 if (mid <= slot) {
499 if (leaf_space_used(l, mid, nritems - mid) + space_needed > 532 if (leaf_space_used(l, mid, nritems - mid) + space_needed >
@@ -505,6 +538,8 @@ int split_leaf(struct ctree_root *root, struct ctree_path *path, int data_size)
505 BUG(); 538 BUG();
506 } 539 }
507 right->header.nritems = nritems - mid; 540 right->header.nritems = nritems - mid;
541 right->header.blocknr = right_buffer->blocknr;
542 right->header.flags = node_level(0);
508 data_copy_size = l->items[mid].offset + l->items[mid].size - 543 data_copy_size = l->items[mid].offset + l->items[mid].size -
509 leaf_data_end(l); 544 leaf_data_end(l);
510 memcpy(right->items, l->items + mid, 545 memcpy(right->items, l->items + mid,
@@ -518,12 +553,20 @@ int split_leaf(struct ctree_root *root, struct ctree_path *path, int data_size)
518 } 553 }
519 l->header.nritems = mid; 554 l->header.nritems = mid;
520 ret = insert_ptr(root, path, &right->items[0].key, 555 ret = insert_ptr(root, path, &right->items[0].key,
521 (u64)right, 1); 556 right_buffer->blocknr, 1);
557
558 write_tree_block(root, right_buffer);
559 write_tree_block(root, l_buf);
560
561 BUG_ON(path->slots[0] != slot);
522 if (mid <= slot) { 562 if (mid <= slot) {
523 path->nodes[0] = (struct node *)right; 563 tree_block_release(root, path->nodes[0]);
564 path->nodes[0] = right_buffer;
524 path->slots[0] -= mid; 565 path->slots[0] -= mid;
525 path->slots[1] += 1; 566 path->slots[1] += 1;
526 } 567 } else
568 tree_block_release(root, right_buffer);
569 BUG_ON(path->slots[0] < 0);
527 return ret; 570 return ret;
528} 571}
529 572
@@ -532,28 +575,48 @@ int insert_item(struct ctree_root *root, struct key *key,
532{ 575{
533 int ret; 576 int ret;
534 int slot; 577 int slot;
578 int slot_orig;
535 struct leaf *leaf; 579 struct leaf *leaf;
580 struct tree_buffer *leaf_buf;
536 unsigned int nritems; 581 unsigned int nritems;
537 unsigned int data_end; 582 unsigned int data_end;
538 struct ctree_path path; 583 struct ctree_path path;
539 584
585 if (!root->node) {
586 struct tree_buffer *t;
587 t = alloc_free_block(root);
588 BUG_ON(!t);
589 t->node.header.nritems = 0;
590 t->node.header.flags = node_level(0);
591 t->node.header.blocknr = t->blocknr;
592 root->node = t;
593 write_tree_block(root, t);
594 }
540 init_path(&path); 595 init_path(&path);
541 ret = search_slot(root, key, &path); 596 ret = search_slot(root, key, &path);
542 if (ret == 0) 597 if (ret == 0) {
598 release_path(root, &path);
543 return -EEXIST; 599 return -EEXIST;
600 }
544 601
545 leaf = (struct leaf *)path.nodes[0]; 602 slot_orig = path.slots[0];
546 if (leaf_free_space(leaf) < sizeof(struct item) + data_size) 603 leaf_buf = path.nodes[0];
604 leaf = &leaf_buf->leaf;
605 if (leaf_free_space(leaf) < sizeof(struct item) + data_size) {
547 split_leaf(root, &path, data_size); 606 split_leaf(root, &path, data_size);
548 leaf = (struct leaf *)path.nodes[0]; 607 leaf_buf = path.nodes[0];
608 leaf = &path.nodes[0]->leaf;
609 }
549 nritems = leaf->header.nritems; 610 nritems = leaf->header.nritems;
550 data_end = leaf_data_end(leaf); 611 data_end = leaf_data_end(leaf);
612
551 if (leaf_free_space(leaf) < sizeof(struct item) + data_size) 613 if (leaf_free_space(leaf) < sizeof(struct item) + data_size)
552 BUG(); 614 BUG();
553 615
554 slot = path.slots[0]; 616 slot = path.slots[0];
617 BUG_ON(slot < 0);
555 if (slot == 0) 618 if (slot == 0)
556 fixup_low_keys(&path, key, 1); 619 fixup_low_keys(root, &path, key, 1);
557 if (slot != nritems) { 620 if (slot != nritems) {
558 int i; 621 int i;
559 unsigned int old_data = leaf->items[slot].offset + 622 unsigned int old_data = leaf->items[slot].offset +
@@ -580,21 +643,25 @@ int insert_item(struct ctree_root *root, struct key *key,
580 leaf->items[slot].size = data_size; 643 leaf->items[slot].size = data_size;
581 memcpy(leaf->data + data_end - data_size, data, data_size); 644 memcpy(leaf->data + data_end - data_size, data, data_size);
582 leaf->header.nritems += 1; 645 leaf->header.nritems += 1;
646 write_tree_block(root, leaf_buf);
583 if (leaf_free_space(leaf) < 0) 647 if (leaf_free_space(leaf) < 0)
584 BUG(); 648 BUG();
649 release_path(root, &path);
585 return 0; 650 return 0;
586} 651}
587 652
588int del_ptr(struct ctree_root *root, struct ctree_path *path, int level) 653int del_ptr(struct ctree_root *root, struct ctree_path *path, int level)
589{ 654{
590 int slot; 655 int slot;
656 struct tree_buffer *t;
591 struct node *node; 657 struct node *node;
592 int nritems; 658 int nritems;
593 659
594 while(1) { 660 while(1) {
595 node = path->nodes[level]; 661 t = path->nodes[level];
596 if (!node) 662 if (!t)
597 break; 663 break;
664 node = &t->node;
598 slot = path->slots[level]; 665 slot = path->slots[level];
599 nritems = node->header.nritems; 666 nritems = node->header.nritems;
600 667
@@ -606,28 +673,34 @@ int del_ptr(struct ctree_root *root, struct ctree_path *path, int level)
606 sizeof(u64) * (nritems - slot - 1)); 673 sizeof(u64) * (nritems - slot - 1));
607 } 674 }
608 node->header.nritems--; 675 node->header.nritems--;
676 write_tree_block(root, t);
609 if (node->header.nritems != 0) { 677 if (node->header.nritems != 0) {
610 int tslot; 678 int tslot;
611 if (slot == 0) 679 if (slot == 0)
612 fixup_low_keys(path, node->keys, level + 1); 680 fixup_low_keys(root, path, node->keys,
681 level + 1);
613 tslot = path->slots[level+1]; 682 tslot = path->slots[level+1];
683 t->count++;
614 push_node_left(root, path, level); 684 push_node_left(root, path, level);
615 if (node->header.nritems) { 685 if (node->header.nritems) {
616 push_node_right(root, path, level); 686 push_node_right(root, path, level);
617 } 687 }
618 if (node->header.nritems) 688 if (node->header.nritems) {
689 tree_block_release(root, t);
619 break; 690 break;
691 }
692 tree_block_release(root, t);
620 path->slots[level+1] = tslot; 693 path->slots[level+1] = tslot;
621 } 694 }
622 if (node == root->node) { 695 if (t == root->node) {
623 printf("root is now null!\n"); 696 /* just turn the root into a leaf and break */
624 root->node = NULL; 697 root->node->node.header.flags = node_level(0);
698 write_tree_block(root, t);
625 break; 699 break;
626 } 700 }
627 level++; 701 level++;
628 if (!path->nodes[level]) 702 if (!path->nodes[level])
629 BUG(); 703 BUG();
630 free(node);
631 } 704 }
632 return 0; 705 return 0;
633} 706}
@@ -636,10 +709,12 @@ int del_item(struct ctree_root *root, struct ctree_path *path)
636{ 709{
637 int slot; 710 int slot;
638 struct leaf *leaf; 711 struct leaf *leaf;
712 struct tree_buffer *leaf_buf;
639 int doff; 713 int doff;
640 int dsize; 714 int dsize;
641 715
642 leaf = (struct leaf *)path->nodes[0]; 716 leaf_buf = path->nodes[0];
717 leaf = &leaf_buf->leaf;
643 slot = path->slots[0]; 718 slot = path->slots[0];
644 doff = leaf->items[slot].offset; 719 doff = leaf->items[slot].offset;
645 dsize = leaf->items[slot].size; 720 dsize = leaf->items[slot].size;
@@ -658,14 +733,15 @@ int del_item(struct ctree_root *root, struct ctree_path *path)
658 } 733 }
659 leaf->header.nritems -= 1; 734 leaf->header.nritems -= 1;
660 if (leaf->header.nritems == 0) { 735 if (leaf->header.nritems == 0) {
661 if (leaf == (struct leaf *)root->node) 736 if (leaf_buf == root->node) {
662 root->node = NULL; 737 leaf->header.flags = node_level(0);
663 else 738 write_tree_block(root, leaf_buf);
739 } else
664 del_ptr(root, path, 1); 740 del_ptr(root, path, 1);
665 free(leaf);
666 } else { 741 } else {
667 if (slot == 0) 742 if (slot == 0)
668 fixup_low_keys(path, &leaf->items[0].key, 1); 743 fixup_low_keys(root, path, &leaf->items[0].key, 1);
744 write_tree_block(root, leaf_buf);
669 if (leaf_space_used(leaf, 0, leaf->header.nritems) < 745 if (leaf_space_used(leaf, 0, leaf->header.nritems) <
670 LEAF_DATA_SIZE / 4) { 746 LEAF_DATA_SIZE / 4) {
671 /* push_leaf_left fixes the path. 747 /* push_leaf_left fixes the path.
@@ -673,12 +749,13 @@ int del_item(struct ctree_root *root, struct ctree_path *path)
673 * for possible call to del_ptr below 749 * for possible call to del_ptr below
674 */ 750 */
675 slot = path->slots[1]; 751 slot = path->slots[1];
752 leaf_buf->count++;
676 push_leaf_left(root, path, 1); 753 push_leaf_left(root, path, 1);
677 if (leaf->header.nritems == 0) { 754 if (leaf->header.nritems == 0) {
678 free(leaf);
679 path->slots[1] = slot; 755 path->slots[1] = slot;
680 del_ptr(root, path, 1); 756 del_ptr(root, path, 1);
681 } 757 }
758 tree_block_release(root, leaf_buf);
682 } 759 }
683 } 760 }
684 return 0; 761 return 0;
@@ -689,7 +766,7 @@ void print_leaf(struct leaf *l)
689 int i; 766 int i;
690 int nr = l->header.nritems; 767 int nr = l->header.nritems;
691 struct item *item; 768 struct item *item;
692 printf("leaf %p total ptrs %d free space %d\n", l, nr, 769 printf("leaf %lu total ptrs %d free space %d\n", l->header.blocknr, nr,
693 leaf_free_space(l)); 770 leaf_free_space(l));
694 fflush(stdout); 771 fflush(stdout);
695 for (i = 0 ; i < nr ; i++) { 772 for (i = 0 ; i < nr ; i++) {
@@ -703,38 +780,45 @@ void print_leaf(struct leaf *l)
703 fflush(stdout); 780 fflush(stdout);
704 } 781 }
705} 782}
706void print_tree(struct node *c) 783void print_tree(struct ctree_root *root, struct tree_buffer *t)
707{ 784{
708 int i; 785 int i;
709 int nr; 786 int nr;
787 struct node *c;
710 788
711 if (!c) 789 if (!t)
712 return; 790 return;
791 c = &t->node;
713 nr = c->header.nritems; 792 nr = c->header.nritems;
793 if (c->header.blocknr != t->blocknr)
794 BUG();
714 if (is_leaf(c->header.flags)) { 795 if (is_leaf(c->header.flags)) {
715 print_leaf((struct leaf *)c); 796 print_leaf((struct leaf *)c);
716 return; 797 return;
717 } 798 }
718 printf("node %p level %d total ptrs %d free spc %lu\n", c, 799 printf("node %lu level %d total ptrs %d free spc %lu\n", t->blocknr,
719 node_level(c->header.flags), c->header.nritems, 800 node_level(c->header.flags), c->header.nritems,
720 NODEPTRS_PER_BLOCK - c->header.nritems); 801 NODEPTRS_PER_BLOCK - c->header.nritems);
721 fflush(stdout); 802 fflush(stdout);
722 for (i = 0; i < nr; i++) { 803 for (i = 0; i < nr; i++) {
723 printf("\tkey %d (%lu %u %lu) block %lx\n", 804 printf("\tkey %d (%lu %u %lu) block %lu\n",
724 i, 805 i,
725 c->keys[i].objectid, c->keys[i].flags, c->keys[i].offset, 806 c->keys[i].objectid, c->keys[i].flags, c->keys[i].offset,
726 c->blockptrs[i]); 807 c->blockptrs[i]);
727 fflush(stdout); 808 fflush(stdout);
728 } 809 }
729 for (i = 0; i < nr; i++) { 810 for (i = 0; i < nr; i++) {
730 struct node *next = read_block(c->blockptrs[i]); 811 struct tree_buffer *next_buf = read_tree_block(root,
812 c->blockptrs[i]);
813 struct node *next = &next_buf->node;
731 if (is_leaf(next->header.flags) && 814 if (is_leaf(next->header.flags) &&
732 node_level(c->header.flags) != 1) 815 node_level(c->header.flags) != 1)
733 BUG(); 816 BUG();
734 if (node_level(next->header.flags) != 817 if (node_level(next->header.flags) !=
735 node_level(c->header.flags) - 1) 818 node_level(c->header.flags) - 1)
736 BUG(); 819 BUG();
737 print_tree(next); 820 print_tree(root, next_buf);
821 tree_block_release(root, next_buf);
738 } 822 }
739 823
740} 824}
@@ -746,23 +830,24 @@ int next_key(int i, int max_key) {
746} 830}
747 831
748int main() { 832int main() {
749 struct leaf *first_node = malloc(sizeof(struct leaf)); 833 struct ctree_root *root;
750 struct ctree_root root;
751 struct key ins; 834 struct key ins;
752 struct key last = { (u64)-1, 0, 0}; 835 struct key last = { (u64)-1, 0, 0};
753 char *buf; 836 char *buf;
754 int i; 837 int i;
755 int num; 838 int num;
756 int ret; 839 int ret;
757 int run_size = 100000; 840 int run_size = 1000000;
758 int max_key = 100000000; 841 int max_key = 100000000;
759 int tree_size = 0; 842 int tree_size = 0;
760 struct ctree_path path; 843 struct ctree_path path;
761 844
845 radix_tree_init();
846
847
848 root = open_ctree("dbfile");
762 849
763 srand(55); 850 srand(55);
764 root.node = (struct node *)first_node;
765 memset(first_node, 0, sizeof(*first_node));
766 for (i = 0; i < run_size; i++) { 851 for (i = 0; i < run_size; i++) {
767 buf = malloc(64); 852 buf = malloc(64);
768 num = next_key(i, max_key); 853 num = next_key(i, max_key);
@@ -772,39 +857,46 @@ int main() {
772 ins.objectid = num; 857 ins.objectid = num;
773 ins.offset = 0; 858 ins.offset = 0;
774 ins.flags = 0; 859 ins.flags = 0;
775 ret = insert_item(&root, &ins, buf, strlen(buf)); 860 ret = insert_item(root, &ins, buf, strlen(buf));
776 if (!ret) 861 if (!ret)
777 tree_size++; 862 tree_size++;
778 } 863 }
864 close_ctree(root);
865 root = open_ctree("dbfile");
866 printf("starting search\n");
779 srand(55); 867 srand(55);
780 for (i = 0; i < run_size; i++) { 868 for (i = 0; i < run_size; i++) {
781 num = next_key(i, max_key); 869 num = next_key(i, max_key);
782 ins.objectid = num; 870 ins.objectid = num;
783 init_path(&path); 871 init_path(&path);
784 ret = search_slot(&root, &ins, &path); 872 ret = search_slot(root, &ins, &path);
785 if (ret) { 873 if (ret) {
786 print_tree(root.node); 874 print_tree(root, root->node);
787 printf("unable to find %d\n", num); 875 printf("unable to find %d\n", num);
788 exit(1); 876 exit(1);
789 } 877 }
790 } 878 release_path(root, &path);
791 printf("node %p level %d total ptrs %d free spc %lu\n", root.node, 879 }
792 node_level(root.node->header.flags), root.node->header.nritems, 880 close_ctree(root);
793 NODEPTRS_PER_BLOCK - root.node->header.nritems); 881 root = open_ctree("dbfile");
794 // print_tree(root.node); 882 printf("node %p level %d total ptrs %d free spc %lu\n", root->node,
795 printf("all searches good\n"); 883 node_level(root->node->node.header.flags),
884 root->node->node.header.nritems,
885 NODEPTRS_PER_BLOCK - root->node->node.header.nritems);
886 printf("all searches good, deleting some items\n");
796 i = 0; 887 i = 0;
797 srand(55); 888 srand(55);
798 for (i = 0 ; i < run_size/4; i++) { 889 for (i = 0 ; i < run_size/4; i++) {
799 num = next_key(i, max_key); 890 num = next_key(i, max_key);
800 ins.objectid = num; 891 ins.objectid = num;
801 init_path(&path); 892 init_path(&path);
802 ret = search_slot(&root, &ins, &path); 893 ret = search_slot(root, &ins, &path);
803 if (ret) 894 if (ret)
804 continue; 895 continue;
805 ret = del_item(&root, &path); 896 ret = del_item(root, &path);
806 if (ret != 0) 897 if (ret != 0)
807 BUG(); 898 BUG();
899 release_path(root, &path);
808 tree_size--; 900 tree_size--;
809 } 901 }
810 srand(128); 902 srand(128);
@@ -813,38 +905,58 @@ int main() {
813 num = next_key(i, max_key); 905 num = next_key(i, max_key);
814 sprintf(buf, "string-%d", num); 906 sprintf(buf, "string-%d", num);
815 ins.objectid = num; 907 ins.objectid = num;
816 ret = insert_item(&root, &ins, buf, strlen(buf)); 908 ret = insert_item(root, &ins, buf, strlen(buf));
817 if (!ret) 909 if (!ret)
818 tree_size++; 910 tree_size++;
819 } 911 }
820 while(root.node) { 912 close_ctree(root);
913 root = open_ctree("dbfile");
914 printf("starting search2\n");
915 srand(128);
916 for (i = 0; i < run_size; i++) {
917 num = next_key(i, max_key);
918 ins.objectid = num;
919 init_path(&path);
920 ret = search_slot(root, &ins, &path);
921 if (ret) {
922 print_tree(root, root->node);
923 printf("unable to find %d\n", num);
924 exit(1);
925 }
926 release_path(root, &path);
927 }
928 printf("starting big long delete run\n");
929 while(root->node && root->node->node.header.nritems > 0) {
821 struct leaf *leaf; 930 struct leaf *leaf;
822 int slot; 931 int slot;
823 ins.objectid = (u64)-1; 932 ins.objectid = (u64)-1;
824 init_path(&path); 933 init_path(&path);
825 ret = search_slot(&root, &ins, &path); 934 ret = search_slot(root, &ins, &path);
826 if (ret == 0) 935 if (ret == 0)
827 BUG(); 936 BUG();
828 937
829 leaf = (struct leaf *)(path.nodes[0]); 938 leaf = &path.nodes[0]->leaf;
830 slot = path.slots[0]; 939 slot = path.slots[0];
831 if (slot != leaf->header.nritems) 940 if (slot != leaf->header.nritems)
832 BUG(); 941 BUG();
833 while(path.slots[0] > 0) { 942 while(path.slots[0] > 0) {
834 path.slots[0] -= 1; 943 path.slots[0] -= 1;
835 slot = path.slots[0]; 944 slot = path.slots[0];
836 leaf = (struct leaf *)(path.nodes[0]); 945 leaf = &path.nodes[0]->leaf;
837 946
838 if (comp_keys(&last, &leaf->items[slot].key) <= 0) 947 if (comp_keys(&last, &leaf->items[slot].key) <= 0)
839 BUG(); 948 BUG();
840 memcpy(&last, &leaf->items[slot].key, sizeof(last)); 949 memcpy(&last, &leaf->items[slot].key, sizeof(last));
841 ret = del_item(&root, &path); 950 ret = del_item(root, &path);
842 if (ret != 0) 951 if (ret != 0) {
952 printf("del_item returned %d\n", ret);
843 BUG(); 953 BUG();
954 }
844 tree_size--; 955 tree_size--;
845 } 956 }
957 release_path(root, &path);
846 } 958 }
847 print_tree(root.node); 959 close_ctree(root);
848 printf("tree size is now %d\n", tree_size); 960 printf("tree size is now %d\n", tree_size);
849 return 0; 961 return 0;
850} 962}
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
new file mode 100644
index 000000000000..586bf1866042
--- /dev/null
+++ b/fs/btrfs/ctree.h
@@ -0,0 +1,62 @@
1#ifndef __CTREE__
2#define __CTREE__
3
4#define CTREE_BLOCKSIZE 4096
5
6struct key {
7 u64 objectid;
8 u32 flags;
9 u64 offset;
10} __attribute__ ((__packed__));
11
12struct header {
13 u64 fsid[2]; /* FS specific uuid */
14 u64 blocknr;
15 u64 parentid;
16 u32 csum;
17 u32 ham;
18 u16 nritems;
19 u16 flags;
20} __attribute__ ((__packed__));
21
22#define NODEPTRS_PER_BLOCK ((CTREE_BLOCKSIZE - sizeof(struct header)) / \
23 (sizeof(struct key) + sizeof(u64)))
24
25#define LEVEL_BITS 3
26#define MAX_LEVEL (1 << LEVEL_BITS)
27#define node_level(f) ((f) & (MAX_LEVEL-1))
28#define is_leaf(f) (node_level(f) == 0)
29
30struct tree_buffer;
31struct ctree_root {
32 struct tree_buffer *node;
33 int fp;
34 struct radix_tree_root cache_radix;
35};
36
37struct item {
38 struct key key;
39 u16 offset;
40 u16 size;
41} __attribute__ ((__packed__));
42
43#define LEAF_DATA_SIZE (CTREE_BLOCKSIZE - sizeof(struct header))
44struct leaf {
45 struct header header;
46 union {
47 struct item items[LEAF_DATA_SIZE/sizeof(struct item)];
48 u8 data[CTREE_BLOCKSIZE-sizeof(struct header)];
49 };
50} __attribute__ ((__packed__));
51
52struct node {
53 struct header header;
54 struct key keys[NODEPTRS_PER_BLOCK];
55 u64 blockptrs[NODEPTRS_PER_BLOCK];
56} __attribute__ ((__packed__));
57
58struct ctree_path {
59 struct tree_buffer *nodes[MAX_LEVEL];
60 int slots[MAX_LEVEL];
61};
62#endif
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
new file mode 100644
index 000000000000..8d51a07051d9
--- /dev/null
+++ b/fs/btrfs/disk-io.c
@@ -0,0 +1,174 @@
1#define _XOPEN_SOURCE 500
2#include <stdio.h>
3#include <stdlib.h>
4#include <sys/types.h>
5#include <sys/stat.h>
6#include <fcntl.h>
7#include <unistd.h>
8#include "kerncompat.h"
9#include "radix-tree.h"
10#include "ctree.h"
11#include "disk-io.h"
12
13static int allocated_blocks = 0;
14
15struct ctree_header {
16 u64 root_block;
17} __attribute__ ((__packed__));
18
19static int get_free_block(struct ctree_root *root, u64 *block)
20{
21 struct stat st;
22 int ret;
23
24 st.st_size = 0;
25 ret = fstat(root->fp, &st);
26 if (st.st_size > sizeof(struct ctree_header)) {
27 *block = (st.st_size -
28 sizeof(struct ctree_header)) / CTREE_BLOCKSIZE;
29 } else {
30 *block = 0;
31 }
32 ret = ftruncate(root->fp, sizeof(struct ctree_header) + (*block + 1) *
33 CTREE_BLOCKSIZE);
34 return ret;
35}
36
37struct tree_buffer *alloc_tree_block(struct ctree_root *root, u64 blocknr)
38{
39 struct tree_buffer *buf;
40 int ret;
41 buf = malloc(sizeof(struct tree_buffer));
42 if (!buf)
43 return buf;
44 allocated_blocks++;
45 buf->blocknr = blocknr;
46 buf->count = 1;
47 radix_tree_preload(GFP_KERNEL);
48 ret = radix_tree_insert(&root->cache_radix, blocknr, buf);
49 radix_tree_preload_end();
50 if (ret) {
51 free(buf);
52 return NULL;
53 }
54 return buf;
55}
56
57struct tree_buffer *alloc_free_block(struct ctree_root *root)
58{
59 u64 free_block;
60 int ret;
61 struct tree_buffer * buf;
62 ret = get_free_block(root, &free_block);
63 if (ret) {
64 BUG();
65 return NULL;
66 }
67 buf = alloc_tree_block(root, free_block);
68 if (!buf)
69 BUG();
70 return buf;
71}
72
73struct tree_buffer *read_tree_block(struct ctree_root *root, u64 blocknr)
74{
75 loff_t offset = blocknr * CTREE_BLOCKSIZE + sizeof(struct ctree_header);
76 struct tree_buffer *buf;
77 int ret;
78
79 buf = radix_tree_lookup(&root->cache_radix, blocknr);
80 if (buf) {
81 buf->count++;
82 if (buf->blocknr != blocknr)
83 BUG();
84 if (buf->blocknr != buf->node.header.blocknr)
85 BUG();
86 return buf;
87 }
88 buf = alloc_tree_block(root, blocknr);
89 if (!buf)
90 return NULL;
91 ret = pread(root->fp, &buf->node, CTREE_BLOCKSIZE, offset);
92 if (ret != CTREE_BLOCKSIZE) {
93 free(buf);
94 return NULL;
95 }
96 if (buf->blocknr != buf->node.header.blocknr)
97 BUG();
98 return buf;
99}
100
101int write_tree_block(struct ctree_root *root, struct tree_buffer *buf)
102{
103 u64 blocknr = buf->blocknr;
104 loff_t offset = blocknr * CTREE_BLOCKSIZE + sizeof(struct ctree_header);
105 int ret;
106
107 if (buf->blocknr != buf->node.header.blocknr)
108 BUG();
109 ret = pwrite(root->fp, &buf->node, CTREE_BLOCKSIZE, offset);
110 if (ret != CTREE_BLOCKSIZE)
111 return ret;
112 if (buf == root->node)
113 return update_root_block(root);
114 return 0;
115}
116
117struct ctree_root *open_ctree(char *filename)
118{
119 struct ctree_root *root = malloc(sizeof(struct ctree_root));
120 int fp;
121 u64 root_block;
122 int ret;
123
124 fp = open(filename, O_CREAT | O_RDWR);
125 if (fp < 0) {
126 free(root);
127 return NULL;
128 }
129 root->fp = fp;
130 INIT_RADIX_TREE(&root->cache_radix, GFP_KERNEL);
131 ret = pread(fp, &root_block, sizeof(u64), 0);
132 if (ret == sizeof(u64)) {
133 printf("reading root node at block %lu\n", root_block);
134 root->node = read_tree_block(root, root_block);
135 } else
136 root->node = NULL;
137 return root;
138}
139
140int close_ctree(struct ctree_root *root)
141{
142 close(root->fp);
143 if (root->node)
144 tree_block_release(root, root->node);
145 free(root);
146 printf("on close %d blocks are allocated\n", allocated_blocks);
147 return 0;
148}
149
150int update_root_block(struct ctree_root *root)
151{
152 int ret;
153 u64 root_block = root->node->blocknr;
154
155 ret = pwrite(root->fp, &root_block, sizeof(u64), 0);
156 if (ret != sizeof(u64))
157 return ret;
158 return 0;
159}
160
161void tree_block_release(struct ctree_root *root, struct tree_buffer *buf)
162{
163 buf->count--;
164 if (buf->count == 0) {
165 if (!radix_tree_lookup(&root->cache_radix, buf->blocknr))
166 BUG();
167 radix_tree_delete(&root->cache_radix, buf->blocknr);
168 memset(buf, 0, sizeof(*buf));
169 free(buf);
170 BUG_ON(allocated_blocks == 0);
171 allocated_blocks--;
172 }
173}
174
diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h
new file mode 100644
index 000000000000..ee95fa05c4a3
--- /dev/null
+++ b/fs/btrfs/disk-io.h
@@ -0,0 +1,21 @@
1#ifndef __DISKIO__
2#define __DISKIO__
3
4struct tree_buffer {
5 u64 blocknr;
6 int count;
7 union {
8 struct node node;
9 struct leaf leaf;
10 };
11};
12
13struct tree_buffer *read_tree_block(struct ctree_root *root, u64 blocknr);
14int write_tree_block(struct ctree_root *root, struct tree_buffer *buf);
15struct ctree_root *open_ctree(char *filename);
16int close_ctree(struct ctree_root *root);
17void tree_block_release(struct ctree_root *root, struct tree_buffer *buf);
18struct tree_buffer *alloc_free_block(struct ctree_root *root);
19int update_root_block(struct ctree_root *root);
20
21#endif
diff --git a/fs/btrfs/kerncompat.h b/fs/btrfs/kerncompat.h
index 3a4bb4d661f9..347ca06205c2 100644
--- a/fs/btrfs/kerncompat.h
+++ b/fs/btrfs/kerncompat.h
@@ -6,6 +6,7 @@
6#define BITS_PER_LONG 64 6#define BITS_PER_LONG 64
7#define __GFP_BITS_SHIFT 20 7#define __GFP_BITS_SHIFT 20
8#define __GFP_BITS_MASK ((int)((1 << __GFP_BITS_SHIFT) - 1)) 8#define __GFP_BITS_MASK ((int)((1 << __GFP_BITS_SHIFT) - 1))
9#define GFP_KERNEL 0
9#define __read_mostly 10#define __read_mostly
10#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) 11#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
11#define __force 12#define __force
diff --git a/fs/btrfs/radix-tree.c b/fs/btrfs/radix-tree.c
new file mode 100644
index 000000000000..baa25ca1c2ac
--- /dev/null
+++ b/fs/btrfs/radix-tree.c
@@ -0,0 +1,836 @@
1/*
2 * Copyright (C) 2001 Momchil Velikov
3 * Portions Copyright (C) 2001 Christoph Hellwig
4 * Copyright (C) 2005 SGI, Christoph Lameter <clameter@sgi.com>
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License as
8 * published by the Free Software Foundation; either version 2, or (at
9 * your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful, but
12 * WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
19 */
20
21#include "kerncompat.h"
22#include "radix-tree.h"
23#ifdef __KERNEL__
24#define RADIX_TREE_MAP_SHIFT (CONFIG_BASE_SMALL ? 4 : 6)
25#else
26#define RADIX_TREE_MAP_SHIFT 3 /* For more stressful testing */
27#endif
28
29#define RADIX_TREE_MAP_SIZE (1UL << RADIX_TREE_MAP_SHIFT)
30#define RADIX_TREE_MAP_MASK (RADIX_TREE_MAP_SIZE-1)
31
32#define RADIX_TREE_TAG_LONGS \
33 ((RADIX_TREE_MAP_SIZE + BITS_PER_LONG - 1) / BITS_PER_LONG)
34
35struct radix_tree_node {
36 unsigned int count;
37 void *slots[RADIX_TREE_MAP_SIZE];
38 unsigned long tags[RADIX_TREE_MAX_TAGS][RADIX_TREE_TAG_LONGS];
39};
40
41struct radix_tree_path {
42 struct radix_tree_node *node;
43 int offset;
44};
45
46#define RADIX_TREE_INDEX_BITS (8 /* CHAR_BIT */ * sizeof(unsigned long))
47#define RADIX_TREE_MAX_PATH (RADIX_TREE_INDEX_BITS/RADIX_TREE_MAP_SHIFT + 2)
48
49static unsigned long height_to_maxindex[RADIX_TREE_MAX_PATH] __read_mostly;
50
51/*
52 * Per-cpu pool of preloaded nodes
53 */
54struct radix_tree_preload {
55 int nr;
56 struct radix_tree_node *nodes[RADIX_TREE_MAX_PATH];
57};
58struct radix_tree_preload radix_tree_preloads = { 0, };
59
60static inline gfp_t root_gfp_mask(struct radix_tree_root *root)
61{
62 return root->gfp_mask & __GFP_BITS_MASK;
63}
64
65static int internal_nodes = 0;
66/*
67 * This assumes that the caller has performed appropriate preallocation, and
68 * that the caller has pinned this thread of control to the current CPU.
69 */
70static struct radix_tree_node *
71radix_tree_node_alloc(struct radix_tree_root *root)
72{
73 struct radix_tree_node *ret;
74 ret = malloc(sizeof(struct radix_tree_node));
75 if (ret) {
76 memset(ret, 0, sizeof(struct radix_tree_node));
77 internal_nodes++;
78 }
79 return ret;
80}
81
82static inline void
83radix_tree_node_free(struct radix_tree_node *node)
84{
85 internal_nodes--;
86 free(node);
87}
88
89/*
90 * Load up this CPU's radix_tree_node buffer with sufficient objects to
91 * ensure that the addition of a single element in the tree cannot fail. On
92 * success, return zero, with preemption disabled. On error, return -ENOMEM
93 * with preemption not disabled.
94 */
95int radix_tree_preload(gfp_t gfp_mask)
96{
97 struct radix_tree_preload *rtp;
98 struct radix_tree_node *node;
99 int ret = -ENOMEM;
100
101 preempt_disable();
102 rtp = &__get_cpu_var(radix_tree_preloads);
103 while (rtp->nr < ARRAY_SIZE(rtp->nodes)) {
104 preempt_enable();
105 node = radix_tree_node_alloc(NULL);
106 if (node == NULL)
107 goto out;
108 preempt_disable();
109 rtp = &__get_cpu_var(radix_tree_preloads);
110 if (rtp->nr < ARRAY_SIZE(rtp->nodes))
111 rtp->nodes[rtp->nr++] = node;
112 else
113 radix_tree_node_free(node);
114 }
115 ret = 0;
116out:
117 return ret;
118}
119
120static inline void tag_set(struct radix_tree_node *node, unsigned int tag,
121 int offset)
122{
123 __set_bit(offset, node->tags[tag]);
124}
125
126static inline void tag_clear(struct radix_tree_node *node, unsigned int tag,
127 int offset)
128{
129 __clear_bit(offset, node->tags[tag]);
130}
131
132static inline int tag_get(struct radix_tree_node *node, unsigned int tag,
133 int offset)
134{
135 return test_bit(offset, node->tags[tag]);
136}
137
138static inline void root_tag_set(struct radix_tree_root *root, unsigned int tag)
139{
140 root->gfp_mask |= (__force gfp_t)(1 << (tag + __GFP_BITS_SHIFT));
141}
142
143
144static inline void root_tag_clear(struct radix_tree_root *root, unsigned int tag)
145{
146 root->gfp_mask &= (__force gfp_t)~(1 << (tag + __GFP_BITS_SHIFT));
147}
148
149static inline void root_tag_clear_all(struct radix_tree_root *root)
150{
151 root->gfp_mask &= __GFP_BITS_MASK;
152}
153
154static inline int root_tag_get(struct radix_tree_root *root, unsigned int tag)
155{
156 return (__force unsigned)root->gfp_mask & (1 << (tag + __GFP_BITS_SHIFT));
157}
158
159/*
160 * Returns 1 if any slot in the node has this tag set.
161 * Otherwise returns 0.
162 */
163static inline int any_tag_set(struct radix_tree_node *node, unsigned int tag)
164{
165 int idx;
166 for (idx = 0; idx < RADIX_TREE_TAG_LONGS; idx++) {
167 if (node->tags[tag][idx])
168 return 1;
169 }
170 return 0;
171}
172
173/*
174 * Return the maximum key which can be store into a
175 * radix tree with height HEIGHT.
176 */
177static inline unsigned long radix_tree_maxindex(unsigned int height)
178{
179 return height_to_maxindex[height];
180}
181
182/*
183 * Extend a radix tree so it can store key @index.
184 */
185static int radix_tree_extend(struct radix_tree_root *root, unsigned long index)
186{
187 struct radix_tree_node *node;
188 unsigned int height;
189 int tag;
190
191 /* Figure out what the height should be. */
192 height = root->height + 1;
193 while (index > radix_tree_maxindex(height))
194 height++;
195
196 if (root->rnode == NULL) {
197 root->height = height;
198 goto out;
199 }
200
201 do {
202 if (!(node = radix_tree_node_alloc(root)))
203 return -ENOMEM;
204
205 /* Increase the height. */
206 node->slots[0] = root->rnode;
207
208 /* Propagate the aggregated tag info into the new root */
209 for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++) {
210 if (root_tag_get(root, tag))
211 tag_set(node, tag, 0);
212 }
213
214 node->count = 1;
215 root->rnode = node;
216 root->height++;
217 } while (height > root->height);
218out:
219 return 0;
220}
221
222/**
223 * radix_tree_insert - insert into a radix tree
224 * @root: radix tree root
225 * @index: index key
226 * @item: item to insert
227 *
228 * Insert an item into the radix tree at position @index.
229 */
230int radix_tree_insert(struct radix_tree_root *root,
231 unsigned long index, void *item)
232{
233 struct radix_tree_node *node = NULL, *slot;
234 unsigned int height, shift;
235 int offset;
236 int error;
237
238 /* Make sure the tree is high enough. */
239 if (index > radix_tree_maxindex(root->height)) {
240 error = radix_tree_extend(root, index);
241 if (error)
242 return error;
243 }
244
245 slot = root->rnode;
246 height = root->height;
247 shift = (height-1) * RADIX_TREE_MAP_SHIFT;
248
249 offset = 0; /* uninitialised var warning */
250 while (height > 0) {
251 if (slot == NULL) {
252 /* Have to add a child node. */
253 if (!(slot = radix_tree_node_alloc(root)))
254 return -ENOMEM;
255 if (node) {
256 node->slots[offset] = slot;
257 node->count++;
258 } else
259 root->rnode = slot;
260 }
261
262 /* Go a level down */
263 offset = (index >> shift) & RADIX_TREE_MAP_MASK;
264 node = slot;
265 slot = node->slots[offset];
266 shift -= RADIX_TREE_MAP_SHIFT;
267 height--;
268 }
269
270 if (slot != NULL)
271 return -EEXIST;
272
273 if (node) {
274 node->count++;
275 node->slots[offset] = item;
276 BUG_ON(tag_get(node, 0, offset));
277 BUG_ON(tag_get(node, 1, offset));
278 } else {
279 root->rnode = item;
280 BUG_ON(root_tag_get(root, 0));
281 BUG_ON(root_tag_get(root, 1));
282 }
283
284 return 0;
285}
286
287static inline void **__lookup_slot(struct radix_tree_root *root,
288 unsigned long index)
289{
290 unsigned int height, shift;
291 struct radix_tree_node **slot;
292
293 height = root->height;
294
295 if (index > radix_tree_maxindex(height))
296 return NULL;
297
298 if (height == 0 && root->rnode)
299 return (void **)&root->rnode;
300
301 shift = (height-1) * RADIX_TREE_MAP_SHIFT;
302 slot = &root->rnode;
303
304 while (height > 0) {
305 if (*slot == NULL)
306 return NULL;
307
308 slot = (struct radix_tree_node **)
309 ((*slot)->slots +
310 ((index >> shift) & RADIX_TREE_MAP_MASK));
311 shift -= RADIX_TREE_MAP_SHIFT;
312 height--;
313 }
314
315 return (void **)slot;
316}
317
318/**
319 * radix_tree_lookup_slot - lookup a slot in a radix tree
320 * @root: radix tree root
321 * @index: index key
322 *
323 * Lookup the slot corresponding to the position @index in the radix tree
324 * @root. This is useful for update-if-exists operations.
325 */
326void **radix_tree_lookup_slot(struct radix_tree_root *root, unsigned long index)
327{
328 return __lookup_slot(root, index);
329}
330
331/**
332 * radix_tree_lookup - perform lookup operation on a radix tree
333 * @root: radix tree root
334 * @index: index key
335 *
336 * Lookup the item at the position @index in the radix tree @root.
337 */
338void *radix_tree_lookup(struct radix_tree_root *root, unsigned long index)
339{
340 void **slot;
341
342 slot = __lookup_slot(root, index);
343 return slot != NULL ? *slot : NULL;
344}
345
346/**
347 * radix_tree_tag_set - set a tag on a radix tree node
348 * @root: radix tree root
349 * @index: index key
350 * @tag: tag index
351 *
352 * Set the search tag (which must be < RADIX_TREE_MAX_TAGS)
353 * corresponding to @index in the radix tree. From
354 * the root all the way down to the leaf node.
355 *
356 * Returns the address of the tagged item. Setting a tag on a not-present
357 * item is a bug.
358 */
359void *radix_tree_tag_set(struct radix_tree_root *root,
360 unsigned long index, unsigned int tag)
361{
362 unsigned int height, shift;
363 struct radix_tree_node *slot;
364
365 height = root->height;
366 BUG_ON(index > radix_tree_maxindex(height));
367
368 slot = root->rnode;
369 shift = (height - 1) * RADIX_TREE_MAP_SHIFT;
370
371 while (height > 0) {
372 int offset;
373
374 offset = (index >> shift) & RADIX_TREE_MAP_MASK;
375 if (!tag_get(slot, tag, offset))
376 tag_set(slot, tag, offset);
377 slot = slot->slots[offset];
378 BUG_ON(slot == NULL);
379 shift -= RADIX_TREE_MAP_SHIFT;
380 height--;
381 }
382
383 /* set the root's tag bit */
384 if (slot && !root_tag_get(root, tag))
385 root_tag_set(root, tag);
386
387 return slot;
388}
389
390/**
391 * radix_tree_tag_clear - clear a tag on a radix tree node
392 * @root: radix tree root
393 * @index: index key
394 * @tag: tag index
395 *
396 * Clear the search tag (which must be < RADIX_TREE_MAX_TAGS)
397 * corresponding to @index in the radix tree. If
398 * this causes the leaf node to have no tags set then clear the tag in the
399 * next-to-leaf node, etc.
400 *
401 * Returns the address of the tagged item on success, else NULL. ie:
402 * has the same return value and semantics as radix_tree_lookup().
403 */
404void *radix_tree_tag_clear(struct radix_tree_root *root,
405 unsigned long index, unsigned int tag)
406{
407 struct radix_tree_path path[RADIX_TREE_MAX_PATH], *pathp = path;
408 struct radix_tree_node *slot = NULL;
409 unsigned int height, shift;
410
411 height = root->height;
412 if (index > radix_tree_maxindex(height))
413 goto out;
414
415 shift = (height - 1) * RADIX_TREE_MAP_SHIFT;
416 pathp->node = NULL;
417 slot = root->rnode;
418
419 while (height > 0) {
420 int offset;
421
422 if (slot == NULL)
423 goto out;
424
425 offset = (index >> shift) & RADIX_TREE_MAP_MASK;
426 pathp[1].offset = offset;
427 pathp[1].node = slot;
428 slot = slot->slots[offset];
429 pathp++;
430 shift -= RADIX_TREE_MAP_SHIFT;
431 height--;
432 }
433
434 if (slot == NULL)
435 goto out;
436
437 while (pathp->node) {
438 if (!tag_get(pathp->node, tag, pathp->offset))
439 goto out;
440 tag_clear(pathp->node, tag, pathp->offset);
441 if (any_tag_set(pathp->node, tag))
442 goto out;
443 pathp--;
444 }
445
446 /* clear the root's tag bit */
447 if (root_tag_get(root, tag))
448 root_tag_clear(root, tag);
449
450out:
451 return slot;
452}
453
454#ifndef __KERNEL__ /* Only the test harness uses this at present */
455/**
456 * radix_tree_tag_get - get a tag on a radix tree node
457 * @root: radix tree root
458 * @index: index key
459 * @tag: tag index (< RADIX_TREE_MAX_TAGS)
460 *
461 * Return values:
462 *
463 * 0: tag not present or not set
464 * 1: tag set
465 */
466int radix_tree_tag_get(struct radix_tree_root *root,
467 unsigned long index, unsigned int tag)
468{
469 unsigned int height, shift;
470 struct radix_tree_node *slot;
471 int saw_unset_tag = 0;
472
473 height = root->height;
474 if (index > radix_tree_maxindex(height))
475 return 0;
476
477 /* check the root's tag bit */
478 if (!root_tag_get(root, tag))
479 return 0;
480
481 if (height == 0)
482 return 1;
483
484 shift = (height - 1) * RADIX_TREE_MAP_SHIFT;
485 slot = root->rnode;
486
487 for ( ; ; ) {
488 int offset;
489
490 if (slot == NULL)
491 return 0;
492
493 offset = (index >> shift) & RADIX_TREE_MAP_MASK;
494
495 /*
496 * This is just a debug check. Later, we can bale as soon as
497 * we see an unset tag.
498 */
499 if (!tag_get(slot, tag, offset))
500 saw_unset_tag = 1;
501 if (height == 1) {
502 int ret = tag_get(slot, tag, offset);
503
504 BUG_ON(ret && saw_unset_tag);
505 return !!ret;
506 }
507 slot = slot->slots[offset];
508 shift -= RADIX_TREE_MAP_SHIFT;
509 height--;
510 }
511}
512#endif
513
514static unsigned int
515__lookup(struct radix_tree_root *root, void **results, unsigned long index,
516 unsigned int max_items, unsigned long *next_index)
517{
518 unsigned int nr_found = 0;
519 unsigned int shift, height;
520 struct radix_tree_node *slot;
521 unsigned long i;
522
523 height = root->height;
524 if (height == 0) {
525 if (root->rnode && index == 0)
526 results[nr_found++] = root->rnode;
527 goto out;
528 }
529
530 shift = (height-1) * RADIX_TREE_MAP_SHIFT;
531 slot = root->rnode;
532
533 for ( ; height > 1; height--) {
534
535 for (i = (index >> shift) & RADIX_TREE_MAP_MASK ;
536 i < RADIX_TREE_MAP_SIZE; i++) {
537 if (slot->slots[i] != NULL)
538 break;
539 index &= ~((1UL << shift) - 1);
540 index += 1UL << shift;
541 if (index == 0)
542 goto out; /* 32-bit wraparound */
543 }
544 if (i == RADIX_TREE_MAP_SIZE)
545 goto out;
546
547 shift -= RADIX_TREE_MAP_SHIFT;
548 slot = slot->slots[i];
549 }
550
551 /* Bottom level: grab some items */
552 for (i = index & RADIX_TREE_MAP_MASK; i < RADIX_TREE_MAP_SIZE; i++) {
553 index++;
554 if (slot->slots[i]) {
555 results[nr_found++] = slot->slots[i];
556 if (nr_found == max_items)
557 goto out;
558 }
559 }
560out:
561 *next_index = index;
562 return nr_found;
563}
564
565/**
566 * radix_tree_gang_lookup - perform multiple lookup on a radix tree
567 * @root: radix tree root
568 * @results: where the results of the lookup are placed
569 * @first_index: start the lookup from this key
570 * @max_items: place up to this many items at *results
571 *
572 * Performs an index-ascending scan of the tree for present items. Places
573 * them at *@results and returns the number of items which were placed at
574 * *@results.
575 *
576 * The implementation is naive.
577 */
578unsigned int
579radix_tree_gang_lookup(struct radix_tree_root *root, void **results,
580 unsigned long first_index, unsigned int max_items)
581{
582 const unsigned long max_index = radix_tree_maxindex(root->height);
583 unsigned long cur_index = first_index;
584 unsigned int ret = 0;
585
586 while (ret < max_items) {
587 unsigned int nr_found;
588 unsigned long next_index; /* Index of next search */
589
590 if (cur_index > max_index)
591 break;
592 nr_found = __lookup(root, results + ret, cur_index,
593 max_items - ret, &next_index);
594 ret += nr_found;
595 if (next_index == 0)
596 break;
597 cur_index = next_index;
598 }
599 return ret;
600}
601
602/*
603 * FIXME: the two tag_get()s here should use find_next_bit() instead of
604 * open-coding the search.
605 */
606static unsigned int
607__lookup_tag(struct radix_tree_root *root, void **results, unsigned long index,
608 unsigned int max_items, unsigned long *next_index, unsigned int tag)
609{
610 unsigned int nr_found = 0;
611 unsigned int shift;
612 unsigned int height = root->height;
613 struct radix_tree_node *slot;
614
615 if (height == 0) {
616 if (root->rnode && index == 0)
617 results[nr_found++] = root->rnode;
618 goto out;
619 }
620
621 shift = (height - 1) * RADIX_TREE_MAP_SHIFT;
622 slot = root->rnode;
623
624 do {
625 unsigned long i = (index >> shift) & RADIX_TREE_MAP_MASK;
626
627 for ( ; i < RADIX_TREE_MAP_SIZE; i++) {
628 if (tag_get(slot, tag, i)) {
629 BUG_ON(slot->slots[i] == NULL);
630 break;
631 }
632 index &= ~((1UL << shift) - 1);
633 index += 1UL << shift;
634 if (index == 0)
635 goto out; /* 32-bit wraparound */
636 }
637 if (i == RADIX_TREE_MAP_SIZE)
638 goto out;
639 height--;
640 if (height == 0) { /* Bottom level: grab some items */
641 unsigned long j = index & RADIX_TREE_MAP_MASK;
642
643 for ( ; j < RADIX_TREE_MAP_SIZE; j++) {
644 index++;
645 if (tag_get(slot, tag, j)) {
646 BUG_ON(slot->slots[j] == NULL);
647 results[nr_found++] = slot->slots[j];
648 if (nr_found == max_items)
649 goto out;
650 }
651 }
652 }
653 shift -= RADIX_TREE_MAP_SHIFT;
654 slot = slot->slots[i];
655 } while (height > 0);
656out:
657 *next_index = index;
658 return nr_found;
659}
660
661/**
662 * radix_tree_gang_lookup_tag - perform multiple lookup on a radix tree
663 * based on a tag
664 * @root: radix tree root
665 * @results: where the results of the lookup are placed
666 * @first_index: start the lookup from this key
667 * @max_items: place up to this many items at *results
668 * @tag: the tag index (< RADIX_TREE_MAX_TAGS)
669 *
670 * Performs an index-ascending scan of the tree for present items which
671 * have the tag indexed by @tag set. Places the items at *@results and
672 * returns the number of items which were placed at *@results.
673 */
674unsigned int
675radix_tree_gang_lookup_tag(struct radix_tree_root *root, void **results,
676 unsigned long first_index, unsigned int max_items,
677 unsigned int tag)
678{
679 const unsigned long max_index = radix_tree_maxindex(root->height);
680 unsigned long cur_index = first_index;
681 unsigned int ret = 0;
682
683 /* check the root's tag bit */
684 if (!root_tag_get(root, tag))
685 return 0;
686
687 while (ret < max_items) {
688 unsigned int nr_found;
689 unsigned long next_index; /* Index of next search */
690
691 if (cur_index > max_index)
692 break;
693 nr_found = __lookup_tag(root, results + ret, cur_index,
694 max_items - ret, &next_index, tag);
695 ret += nr_found;
696 if (next_index == 0)
697 break;
698 cur_index = next_index;
699 }
700 return ret;
701}
702
703/**
704 * radix_tree_shrink - shrink height of a radix tree to minimal
705 * @root radix tree root
706 */
707static inline void radix_tree_shrink(struct radix_tree_root *root)
708{
709 /* try to shrink tree height */
710 while (root->height > 0 &&
711 root->rnode->count == 1 &&
712 root->rnode->slots[0]) {
713 struct radix_tree_node *to_free = root->rnode;
714
715 root->rnode = to_free->slots[0];
716 root->height--;
717 /* must only free zeroed nodes into the slab */
718 tag_clear(to_free, 0, 0);
719 tag_clear(to_free, 1, 0);
720 to_free->slots[0] = NULL;
721 to_free->count = 0;
722 radix_tree_node_free(to_free);
723 }
724}
725
726/**
727 * radix_tree_delete - delete an item from a radix tree
728 * @root: radix tree root
729 * @index: index key
730 *
731 * Remove the item at @index from the radix tree rooted at @root.
732 *
733 * Returns the address of the deleted item, or NULL if it was not present.
734 */
735void *radix_tree_delete(struct radix_tree_root *root, unsigned long index)
736{
737 struct radix_tree_path path[RADIX_TREE_MAX_PATH], *pathp = path;
738 struct radix_tree_node *slot = NULL;
739 unsigned int height, shift;
740 int tag;
741 int offset;
742
743 height = root->height;
744 if (index > radix_tree_maxindex(height))
745 goto out;
746
747 slot = root->rnode;
748 if (height == 0 && root->rnode) {
749 root_tag_clear_all(root);
750 root->rnode = NULL;
751 goto out;
752 }
753
754 shift = (height - 1) * RADIX_TREE_MAP_SHIFT;
755 pathp->node = NULL;
756
757 do {
758 if (slot == NULL)
759 goto out;
760
761 pathp++;
762 offset = (index >> shift) & RADIX_TREE_MAP_MASK;
763 pathp->offset = offset;
764 pathp->node = slot;
765 slot = slot->slots[offset];
766 shift -= RADIX_TREE_MAP_SHIFT;
767 height--;
768 } while (height > 0);
769
770 if (slot == NULL)
771 goto out;
772
773 /*
774 * Clear all tags associated with the just-deleted item
775 */
776 for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++) {
777 if (tag_get(pathp->node, tag, pathp->offset))
778 radix_tree_tag_clear(root, index, tag);
779 }
780
781 /* Now free the nodes we do not need anymore */
782 while (pathp->node) {
783 pathp->node->slots[pathp->offset] = NULL;
784 pathp->node->count--;
785
786 if (pathp->node->count) {
787 if (pathp->node == root->rnode)
788 radix_tree_shrink(root);
789 goto out;
790 }
791
792 /* Node with zero slots in use so free it */
793 radix_tree_node_free(pathp->node);
794
795 pathp--;
796 }
797 root_tag_clear_all(root);
798 root->height = 0;
799 root->rnode = NULL;
800
801out:
802 return slot;
803}
804
805/**
806 * radix_tree_tagged - test whether any items in the tree are tagged
807 * @root: radix tree root
808 * @tag: tag to test
809 */
810int radix_tree_tagged(struct radix_tree_root *root, unsigned int tag)
811{
812 return root_tag_get(root, tag);
813}
814
815static unsigned long __maxindex(unsigned int height)
816{
817 unsigned int tmp = height * RADIX_TREE_MAP_SHIFT;
818 unsigned long index = (~0UL >> (RADIX_TREE_INDEX_BITS - tmp - 1)) >> 1;
819
820 if (tmp >= RADIX_TREE_INDEX_BITS)
821 index = ~0UL;
822 return index;
823}
824
825static void radix_tree_init_maxindex(void)
826{
827 unsigned int i;
828
829 for (i = 0; i < ARRAY_SIZE(height_to_maxindex); i++)
830 height_to_maxindex[i] = __maxindex(i);
831}
832
833void radix_tree_init(void)
834{
835 radix_tree_init_maxindex();
836}
diff --git a/fs/btrfs/radix-tree.h b/fs/btrfs/radix-tree.h
new file mode 100644
index 000000000000..c3ce88137f77
--- /dev/null
+++ b/fs/btrfs/radix-tree.h
@@ -0,0 +1,73 @@
1/*
2 * Copyright (C) 2001 Momchil Velikov
3 * Portions Copyright (C) 2001 Christoph Hellwig
4 *
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License as
7 * published by the Free Software Foundation; either version 2, or (at
8 * your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
18 */
19#ifndef _LINUX_RADIX_TREE_H
20#define _LINUX_RADIX_TREE_H
21
22#define RADIX_TREE_MAX_TAGS 2
23
24/* root tags are stored in gfp_mask, shifted by __GFP_BITS_SHIFT */
25struct radix_tree_root {
26 unsigned int height;
27 gfp_t gfp_mask;
28 struct radix_tree_node *rnode;
29};
30
31#define RADIX_TREE_INIT(mask) { \
32 .height = 0, \
33 .gfp_mask = (mask), \
34 .rnode = NULL, \
35}
36
37#define RADIX_TREE(name, mask) \
38 struct radix_tree_root name = RADIX_TREE_INIT(mask)
39
40#define INIT_RADIX_TREE(root, mask) \
41do { \
42 (root)->height = 0; \
43 (root)->gfp_mask = (mask); \
44 (root)->rnode = NULL; \
45} while (0)
46
47int radix_tree_insert(struct radix_tree_root *, unsigned long, void *);
48void *radix_tree_lookup(struct radix_tree_root *, unsigned long);
49void **radix_tree_lookup_slot(struct radix_tree_root *, unsigned long);
50void *radix_tree_delete(struct radix_tree_root *, unsigned long);
51unsigned int
52radix_tree_gang_lookup(struct radix_tree_root *root, void **results,
53 unsigned long first_index, unsigned int max_items);
54int radix_tree_preload(gfp_t gfp_mask);
55void radix_tree_init(void);
56void *radix_tree_tag_set(struct radix_tree_root *root,
57 unsigned long index, unsigned int tag);
58void *radix_tree_tag_clear(struct radix_tree_root *root,
59 unsigned long index, unsigned int tag);
60int radix_tree_tag_get(struct radix_tree_root *root,
61 unsigned long index, unsigned int tag);
62unsigned int
63radix_tree_gang_lookup_tag(struct radix_tree_root *root, void **results,
64 unsigned long first_index, unsigned int max_items,
65 unsigned int tag);
66int radix_tree_tagged(struct radix_tree_root *root, unsigned int tag);
67
68static inline void radix_tree_preload_end(void)
69{
70 preempt_enable();
71}
72
73#endif /* _LINUX_RADIX_TREE_H */