aboutsummaryrefslogtreecommitdiffstats
path: root/fs/btrfs/ctree.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/btrfs/ctree.c')
-rw-r--r--fs/btrfs/ctree.c432
1 files changed, 272 insertions, 160 deletions
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index 4bf5e92584bd..6f0522f21082 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -1,68 +1,25 @@
1#include <stdio.h> 1#include <stdio.h>
2#include <stdlib.h> 2#include <stdlib.h>
3#include "kerncompat.h" 3#include "kerncompat.h"
4 4#include "radix-tree.h"
5#define BLOCKSIZE 4096 5#include "ctree.h"
6 6#include "disk-io.h"
7struct key {
8 u64 objectid;
9 u32 flags;
10 u64 offset;
11} __attribute__ ((__packed__));
12
13struct header {
14 u64 fsid[2]; /* FS specific uuid */
15 u64 blocknum;
16 u64 parentid;
17 u32 csum;
18 u32 ham;
19 u16 nritems;
20 u16 flags;
21} __attribute__ ((__packed__));
22
23#define NODEPTRS_PER_BLOCK ((BLOCKSIZE - sizeof(struct header)) / \
24 (sizeof(struct key) + sizeof(u64)))
25
26#define LEVEL_BITS 3
27#define MAX_LEVEL (1 << LEVEL_BITS)
28#define node_level(f) ((f) & (MAX_LEVEL-1))
29#define is_leaf(f) (node_level(f) == 0)
30
31struct ctree_root {
32 struct node *node;
33};
34
35struct item {
36 struct key key;
37 u16 offset;
38 u16 size;
39} __attribute__ ((__packed__));
40
41#define LEAF_DATA_SIZE (BLOCKSIZE - sizeof(struct header))
42struct leaf {
43 struct header header;
44 union {
45 struct item items[LEAF_DATA_SIZE/sizeof(struct item)];
46 u8 data[BLOCKSIZE-sizeof(struct header)];
47 };
48} __attribute__ ((__packed__));
49
50struct node {
51 struct header header;
52 struct key keys[NODEPTRS_PER_BLOCK];
53 u64 blockptrs[NODEPTRS_PER_BLOCK];
54} __attribute__ ((__packed__));
55
56struct ctree_path {
57 struct node *nodes[MAX_LEVEL];
58 int slots[MAX_LEVEL];
59};
60 7
61static inline void init_path(struct ctree_path *p) 8static inline void init_path(struct ctree_path *p)
62{ 9{
63 memset(p, 0, sizeof(*p)); 10 memset(p, 0, sizeof(*p));
64} 11}
65 12
13static void release_path(struct ctree_root *root, struct ctree_path *p)
14{
15 int i;
16 for (i = 0; i < MAX_LEVEL; i++) {
17 if (!p->nodes[i])
18 break;
19 tree_block_release(root, p->nodes[i]);
20 }
21}
22
66static inline unsigned int leaf_data_end(struct leaf *leaf) 23static inline unsigned int leaf_data_end(struct leaf *leaf)
67{ 24{
68 unsigned int nr = leaf->header.nritems; 25 unsigned int nr = leaf->header.nritems;
@@ -135,26 +92,25 @@ int bin_search(struct node *c, struct key *key, int *slot)
135 return -1; 92 return -1;
136} 93}
137 94
138void *read_block(u64 blocknum)
139{
140 return (void *)blocknum;
141}
142
143int search_slot(struct ctree_root *root, struct key *key, struct ctree_path *p) 95int search_slot(struct ctree_root *root, struct key *key, struct ctree_path *p)
144{ 96{
145 struct node *c = root->node; 97 struct tree_buffer *b = root->node;
98 struct node *c;
99
146 int slot; 100 int slot;
147 int ret; 101 int ret;
148 int level; 102 int level;
149 while (c) { 103 b->count++;
104 while (b) {
105 c = &b->node;
150 level = node_level(c->header.flags); 106 level = node_level(c->header.flags);
151 p->nodes[level] = c; 107 p->nodes[level] = b;
152 ret = bin_search(c, key, &slot); 108 ret = bin_search(c, key, &slot);
153 if (!is_leaf(c->header.flags)) { 109 if (!is_leaf(c->header.flags)) {
154 if (ret && slot > 0) 110 if (ret && slot > 0)
155 slot -= 1; 111 slot -= 1;
156 p->slots[level] = slot; 112 p->slots[level] = slot;
157 c = read_block(c->blockptrs[slot]); 113 b = read_tree_block(root, c->blockptrs[slot]);
158 continue; 114 continue;
159 } else { 115 } else {
160 p->slots[level] = slot; 116 p->slots[level] = slot;
@@ -164,17 +120,20 @@ int search_slot(struct ctree_root *root, struct key *key, struct ctree_path *p)
164 return -1; 120 return -1;
165} 121}
166 122
167static void fixup_low_keys(struct ctree_path *path, struct key *key, 123static void fixup_low_keys(struct ctree_root *root,
168 int level) 124 struct ctree_path *path, struct key *key,
125 int level)
169{ 126{
170 int i; 127 int i;
171 /* adjust the pointers going up the tree */ 128 /* adjust the pointers going up the tree */
172 for (i = level; i < MAX_LEVEL; i++) { 129 for (i = level; i < MAX_LEVEL; i++) {
173 struct node *t = path->nodes[i]; 130 struct node *t;
174 int tslot = path->slots[i]; 131 int tslot = path->slots[i];
175 if (!t) 132 if (!path->nodes[i])
176 break; 133 break;
134 t = &path->nodes[i]->node;
177 memcpy(t->keys + tslot, key, sizeof(*key)); 135 memcpy(t->keys + tslot, key, sizeof(*key));
136 write_tree_block(root, path->nodes[i]);
178 if (tslot != 0) 137 if (tslot != 0)
179 break; 138 break;
180 } 139 }
@@ -190,27 +149,34 @@ int __insert_ptr(struct ctree_root *root,
190 int nritems; 149 int nritems;
191 /* need a new root */ 150 /* need a new root */
192 if (!path->nodes[level]) { 151 if (!path->nodes[level]) {
193 c = malloc(sizeof(struct node)); 152 struct tree_buffer *t;
153 t = alloc_free_block(root);
154 c = &t->node;
194 memset(c, 0, sizeof(c)); 155 memset(c, 0, sizeof(c));
195 c->header.nritems = 2; 156 c->header.nritems = 2;
196 c->header.flags = node_level(level); 157 c->header.flags = node_level(level);
197 lower = path->nodes[level-1]; 158 c->header.blocknr = t->blocknr;
159 lower = &path->nodes[level-1]->node;
198 if (is_leaf(lower->header.flags)) 160 if (is_leaf(lower->header.flags))
199 lower_key = &((struct leaf *)lower)->items[0].key; 161 lower_key = &((struct leaf *)lower)->items[0].key;
200 else 162 else
201 lower_key = lower->keys; 163 lower_key = lower->keys;
202 memcpy(c->keys, lower_key, sizeof(struct key)); 164 memcpy(c->keys, lower_key, sizeof(struct key));
203 memcpy(c->keys + 1, key, sizeof(struct key)); 165 memcpy(c->keys + 1, key, sizeof(struct key));
204 c->blockptrs[0] = (u64)lower; 166 c->blockptrs[0] = path->nodes[level-1]->blocknr;
205 c->blockptrs[1] = blocknr; 167 c->blockptrs[1] = blocknr;
206 root->node = c; 168 /* the path has an extra ref to root->node */
207 path->nodes[level] = c; 169 tree_block_release(root, root->node);
170 root->node = t;
171 t->count++;
172 write_tree_block(root, t);
173 path->nodes[level] = t;
208 path->slots[level] = 0; 174 path->slots[level] = 0;
209 if (c->keys[1].objectid == 0) 175 if (c->keys[1].objectid == 0)
210 BUG(); 176 BUG();
211 return 0; 177 return 0;
212 } 178 }
213 lower = path->nodes[level]; 179 lower = &path->nodes[level]->node;
214 nritems = lower->header.nritems; 180 nritems = lower->header.nritems;
215 if (slot > nritems) 181 if (slot > nritems)
216 BUG(); 182 BUG();
@@ -227,6 +193,7 @@ int __insert_ptr(struct ctree_root *root,
227 lower->header.nritems++; 193 lower->header.nritems++;
228 if (lower->keys[1].objectid == 0) 194 if (lower->keys[1].objectid == 0)
229 BUG(); 195 BUG();
196 write_tree_block(root, path->nodes[level]);
230 return 0; 197 return 0;
231} 198}
232 199
@@ -238,6 +205,8 @@ int push_node_left(struct ctree_root *root, struct ctree_path *path, int level)
238 int push_items = 0; 205 int push_items = 0;
239 int left_nritems; 206 int left_nritems;
240 int right_nritems; 207 int right_nritems;
208 struct tree_buffer *t;
209 struct tree_buffer *right_buf;
241 210
242 if (level == MAX_LEVEL - 1 || path->nodes[level + 1] == 0) 211 if (level == MAX_LEVEL - 1 || path->nodes[level + 1] == 0)
243 return 1; 212 return 1;
@@ -245,13 +214,18 @@ int push_node_left(struct ctree_root *root, struct ctree_path *path, int level)
245 if (slot == 0) 214 if (slot == 0)
246 return 1; 215 return 1;
247 216
248 left = read_block(path->nodes[level + 1]->blockptrs[slot - 1]); 217 t = read_tree_block(root,
249 right = path->nodes[level]; 218 path->nodes[level + 1]->node.blockptrs[slot - 1]);
219 left = &t->node;
220 right_buf = path->nodes[level];
221 right = &right_buf->node;
250 left_nritems = left->header.nritems; 222 left_nritems = left->header.nritems;
251 right_nritems = right->header.nritems; 223 right_nritems = right->header.nritems;
252 push_items = NODEPTRS_PER_BLOCK - (left_nritems + 1); 224 push_items = NODEPTRS_PER_BLOCK - (left_nritems + 1);
253 if (push_items <= 0) 225 if (push_items <= 0) {
226 tree_block_release(root, t);
254 return 1; 227 return 1;
228 }
255 229
256 if (right_nritems < push_items) 230 if (right_nritems < push_items)
257 push_items = right_nritems; 231 push_items = right_nritems;
@@ -267,15 +241,20 @@ int push_node_left(struct ctree_root *root, struct ctree_path *path, int level)
267 left->header.nritems += push_items; 241 left->header.nritems += push_items;
268 242
269 /* adjust the pointers going up the tree */ 243 /* adjust the pointers going up the tree */
270 fixup_low_keys(path, right->keys, level + 1); 244 fixup_low_keys(root, path, right->keys, level + 1);
245
246 write_tree_block(root, t);
247 write_tree_block(root, right_buf);
271 248
272 /* then fixup the leaf pointer in the path */ 249 /* then fixup the leaf pointer in the path */
273 if (path->slots[level] < push_items) { 250 if (path->slots[level] < push_items) {
274 path->slots[level] += left_nritems; 251 path->slots[level] += left_nritems;
275 path->nodes[level] = (struct node*)left; 252 tree_block_release(root, path->nodes[level]);
253 path->nodes[level] = t;
276 path->slots[level + 1] -= 1; 254 path->slots[level + 1] -= 1;
277 } else { 255 } else {
278 path->slots[level] -= push_items; 256 path->slots[level] -= push_items;
257 tree_block_release(root, t);
279 } 258 }
280 return 0; 259 return 0;
281} 260}
@@ -283,6 +262,8 @@ int push_node_left(struct ctree_root *root, struct ctree_path *path, int level)
283int push_node_right(struct ctree_root *root, struct ctree_path *path, int level) 262int push_node_right(struct ctree_root *root, struct ctree_path *path, int level)
284{ 263{
285 int slot; 264 int slot;
265 struct tree_buffer *t;
266 struct tree_buffer *src_buffer;
286 struct node *dst; 267 struct node *dst;
287 struct node *src; 268 struct node *src;
288 int push_items = 0; 269 int push_items = 0;
@@ -295,16 +276,21 @@ int push_node_right(struct ctree_root *root, struct ctree_path *path, int level)
295 if (slot == NODEPTRS_PER_BLOCK - 1) 276 if (slot == NODEPTRS_PER_BLOCK - 1)
296 return 1; 277 return 1;
297 278
298 if (slot >= path->nodes[level + 1]->header.nritems -1) 279 if (slot >= path->nodes[level + 1]->node.header.nritems -1)
299 return 1; 280 return 1;
300 281
301 dst = read_block(path->nodes[level + 1]->blockptrs[slot + 1]); 282 t = read_tree_block(root,
302 src = path->nodes[level]; 283 path->nodes[level + 1]->node.blockptrs[slot + 1]);
284 dst = &t->node;
285 src_buffer = path->nodes[level];
286 src = &src_buffer->node;
303 dst_nritems = dst->header.nritems; 287 dst_nritems = dst->header.nritems;
304 src_nritems = src->header.nritems; 288 src_nritems = src->header.nritems;
305 push_items = NODEPTRS_PER_BLOCK - (dst_nritems + 1); 289 push_items = NODEPTRS_PER_BLOCK - (dst_nritems + 1);
306 if (push_items <= 0) 290 if (push_items <= 0) {
291 tree_block_release(root, t);
307 return 1; 292 return 1;
293 }
308 294
309 if (src_nritems < push_items) 295 if (src_nritems < push_items)
310 push_items = src_nritems; 296 push_items = src_nritems;
@@ -322,13 +308,21 @@ int push_node_right(struct ctree_root *root, struct ctree_path *path, int level)
322 dst->header.nritems += push_items; 308 dst->header.nritems += push_items;
323 309
324 /* adjust the pointers going up the tree */ 310 /* adjust the pointers going up the tree */
325 memcpy(path->nodes[level + 1]->keys + path->slots[level + 1] + 1, 311 memcpy(path->nodes[level + 1]->node.keys + path->slots[level + 1] + 1,
326 dst->keys, sizeof(struct key)); 312 dst->keys, sizeof(struct key));
313
314 write_tree_block(root, path->nodes[level + 1]);
315 write_tree_block(root, t);
316 write_tree_block(root, src_buffer);
317
327 /* then fixup the leaf pointer in the path */ 318 /* then fixup the leaf pointer in the path */
328 if (path->slots[level] >= src->header.nritems) { 319 if (path->slots[level] >= src->header.nritems) {
329 path->slots[level] -= src->header.nritems; 320 path->slots[level] -= src->header.nritems;
330 path->nodes[level] = (struct node*)dst; 321 tree_block_release(root, path->nodes[level]);
322 path->nodes[level] = t;
331 path->slots[level + 1] += 1; 323 path->slots[level + 1] += 1;
324 } else {
325 tree_block_release(root, t);
332 } 326 }
333 return 0; 327 return 0;
334} 328}
@@ -337,15 +331,18 @@ int insert_ptr(struct ctree_root *root,
337 struct ctree_path *path, struct key *key, 331 struct ctree_path *path, struct key *key,
338 u64 blocknr, int level) 332 u64 blocknr, int level)
339{ 333{
340 struct node *c = path->nodes[level]; 334 struct tree_buffer *t = path->nodes[level];
335 struct node *c = &path->nodes[level]->node;
341 struct node *b; 336 struct node *b;
342 struct node *bal[MAX_LEVEL]; 337 struct tree_buffer *b_buffer;
338 struct tree_buffer *bal[MAX_LEVEL];
343 int bal_level = level; 339 int bal_level = level;
344 int mid; 340 int mid;
345 int bal_start = -1; 341 int bal_start = -1;
346 342
347 memset(bal, 0, ARRAY_SIZE(bal)); 343 memset(bal, 0, ARRAY_SIZE(bal));
348 while(c && c->header.nritems == NODEPTRS_PER_BLOCK) { 344 while(t && t->node.header.nritems == NODEPTRS_PER_BLOCK) {
345 c = &t->node;
349 if (push_node_left(root, path, 346 if (push_node_left(root, path,
350 node_level(c->header.flags)) == 0) 347 node_level(c->header.flags)) == 0)
351 break; 348 break;
@@ -355,8 +352,10 @@ int insert_ptr(struct ctree_root *root,
355 bal_start = bal_level; 352 bal_start = bal_level;
356 if (bal_level == MAX_LEVEL - 1) 353 if (bal_level == MAX_LEVEL - 1)
357 BUG(); 354 BUG();
358 b = malloc(sizeof(struct node)); 355 b_buffer = alloc_free_block(root);
356 b = &b_buffer->node;
359 b->header.flags = c->header.flags; 357 b->header.flags = c->header.flags;
358 b->header.blocknr = b_buffer->blocknr;
360 mid = (c->header.nritems + 1) / 2; 359 mid = (c->header.nritems + 1) / 2;
361 memcpy(b->keys, c->keys + mid, 360 memcpy(b->keys, c->keys + mid,
362 (c->header.nritems - mid) * sizeof(struct key)); 361 (c->header.nritems - mid) * sizeof(struct key));
@@ -364,21 +363,28 @@ int insert_ptr(struct ctree_root *root,
364 (c->header.nritems - mid) * sizeof(u64)); 363 (c->header.nritems - mid) * sizeof(u64));
365 b->header.nritems = c->header.nritems - mid; 364 b->header.nritems = c->header.nritems - mid;
366 c->header.nritems = mid; 365 c->header.nritems = mid;
367 bal[bal_level] = b; 366
367 write_tree_block(root, t);
368 write_tree_block(root, b_buffer);
369
370 bal[bal_level] = b_buffer;
368 if (bal_level == MAX_LEVEL - 1) 371 if (bal_level == MAX_LEVEL - 1)
369 break; 372 break;
370 bal_level += 1; 373 bal_level += 1;
371 c = path->nodes[bal_level]; 374 t = path->nodes[bal_level];
372 } 375 }
373 while(bal_start > 0) { 376 while(bal_start > 0) {
374 b = bal[bal_start]; 377 b_buffer = bal[bal_start];
375 c = path->nodes[bal_start]; 378 c = &path->nodes[bal_start]->node;
376 __insert_ptr(root, path, b->keys, (u64)b, 379 __insert_ptr(root, path, b_buffer->node.keys, b_buffer->blocknr,
377 path->slots[bal_start + 1] + 1, bal_start + 1); 380 path->slots[bal_start + 1] + 1, bal_start + 1);
378 if (path->slots[bal_start] >= c->header.nritems) { 381 if (path->slots[bal_start] >= c->header.nritems) {
379 path->slots[bal_start] -= c->header.nritems; 382 path->slots[bal_start] -= c->header.nritems;
380 path->nodes[bal_start] = b; 383 tree_block_release(root, path->nodes[bal_start]);
384 path->nodes[bal_start] = b_buffer;
381 path->slots[bal_start + 1] += 1; 385 path->slots[bal_start + 1] += 1;
386 } else {
387 tree_block_release(root, b_buffer);
382 } 388 }
383 bal_start--; 389 bal_start--;
384 if (!bal[bal_start]) 390 if (!bal[bal_start])
@@ -404,7 +410,9 @@ int leaf_space_used(struct leaf *l, int start, int nr)
404int push_leaf_left(struct ctree_root *root, struct ctree_path *path, 410int push_leaf_left(struct ctree_root *root, struct ctree_path *path,
405 int data_size) 411 int data_size)
406{ 412{
407 struct leaf *right = (struct leaf *)path->nodes[0]; 413 struct tree_buffer *right_buf = path->nodes[0];
414 struct leaf *right = &right_buf->leaf;
415 struct tree_buffer *t;
408 struct leaf *left; 416 struct leaf *left;
409 int slot; 417 int slot;
410 int i; 418 int i;
@@ -421,9 +429,11 @@ int push_leaf_left(struct ctree_root *root, struct ctree_path *path,
421 if (!path->nodes[1]) { 429 if (!path->nodes[1]) {
422 return 1; 430 return 1;
423 } 431 }
424 left = read_block(path->nodes[1]->blockptrs[slot - 1]); 432 t = read_tree_block(root, path->nodes[1]->node.blockptrs[slot - 1]);
433 left = &t->leaf;
425 free_space = leaf_free_space(left); 434 free_space = leaf_free_space(left);
426 if (free_space < data_size + sizeof(struct item)) { 435 if (free_space < data_size + sizeof(struct item)) {
436 tree_block_release(root, t);
427 return 1; 437 return 1;
428 } 438 }
429 for (i = 0; i < right->header.nritems; i++) { 439 for (i = 0; i < right->header.nritems; i++) {
@@ -436,6 +446,7 @@ int push_leaf_left(struct ctree_root *root, struct ctree_path *path,
436 push_space += item->size + sizeof(*item); 446 push_space += item->size + sizeof(*item);
437 } 447 }
438 if (push_items == 0) { 448 if (push_items == 0) {
449 tree_block_release(root, t);
439 return 1; 450 return 1;
440 } 451 }
441 /* push data from right to left */ 452 /* push data from right to left */
@@ -446,6 +457,8 @@ int push_leaf_left(struct ctree_root *root, struct ctree_path *path,
446 right->data + right->items[push_items - 1].offset, 457 right->data + right->items[push_items - 1].offset,
447 push_space); 458 push_space);
448 old_left_nritems = left->header.nritems; 459 old_left_nritems = left->header.nritems;
460 BUG_ON(old_left_nritems < 0);
461
449 for(i = old_left_nritems; i < old_left_nritems + push_items; i++) { 462 for(i = old_left_nritems; i < old_left_nritems + push_items; i++) {
450 left->items[i].offset -= LEAF_DATA_SIZE - 463 left->items[i].offset -= LEAF_DATA_SIZE -
451 left->items[old_left_nritems -1].offset; 464 left->items[old_left_nritems -1].offset;
@@ -460,30 +473,40 @@ int push_leaf_left(struct ctree_root *root, struct ctree_path *path,
460 (right->header.nritems - push_items) * sizeof(struct item)); 473 (right->header.nritems - push_items) * sizeof(struct item));
461 right->header.nritems -= push_items; 474 right->header.nritems -= push_items;
462 push_space = LEAF_DATA_SIZE; 475 push_space = LEAF_DATA_SIZE;
476
463 for (i = 0; i < right->header.nritems; i++) { 477 for (i = 0; i < right->header.nritems; i++) {
464 right->items[i].offset = push_space - right->items[i].size; 478 right->items[i].offset = push_space - right->items[i].size;
465 push_space = right->items[i].offset; 479 push_space = right->items[i].offset;
466 } 480 }
467 fixup_low_keys(path, &right->items[0].key, 1); 481
482 write_tree_block(root, t);
483 write_tree_block(root, right_buf);
484
485 fixup_low_keys(root, path, &right->items[0].key, 1);
468 486
469 /* then fixup the leaf pointer in the path */ 487 /* then fixup the leaf pointer in the path */
470 if (path->slots[0] < push_items) { 488 if (path->slots[0] < push_items) {
471 path->slots[0] += old_left_nritems; 489 path->slots[0] += old_left_nritems;
472 path->nodes[0] = (struct node*)left; 490 tree_block_release(root, path->nodes[0]);
491 path->nodes[0] = t;
473 path->slots[1] -= 1; 492 path->slots[1] -= 1;
474 } else { 493 } else {
494 tree_block_release(root, t);
475 path->slots[0] -= push_items; 495 path->slots[0] -= push_items;
476 } 496 }
497 BUG_ON(path->slots[0] < 0);
477 return 0; 498 return 0;
478} 499}
479 500
480int split_leaf(struct ctree_root *root, struct ctree_path *path, int data_size) 501int split_leaf(struct ctree_root *root, struct ctree_path *path, int data_size)
481{ 502{
482 struct leaf *l = (struct leaf *)path->nodes[0]; 503 struct tree_buffer *l_buf = path->nodes[0];
483 int nritems = l->header.nritems; 504 struct leaf *l = &l_buf->leaf;
484 int mid = (nritems + 1)/ 2; 505 int nritems;
485 int slot = path->slots[0]; 506 int mid;
507 int slot;
486 struct leaf *right; 508 struct leaf *right;
509 struct tree_buffer *right_buffer;
487 int space_needed = data_size + sizeof(struct item); 510 int space_needed = data_size + sizeof(struct item);
488 int data_copy_size; 511 int data_copy_size;
489 int rt_data_off; 512 int rt_data_off;
@@ -491,9 +514,19 @@ int split_leaf(struct ctree_root *root, struct ctree_path *path, int data_size)
491 int ret; 514 int ret;
492 515
493 if (push_leaf_left(root, path, data_size) == 0) { 516 if (push_leaf_left(root, path, data_size) == 0) {
494 return 0; 517 l_buf = path->nodes[0];
518 l = &l_buf->leaf;
519 if (leaf_free_space(l) >= sizeof(struct item) + data_size)
520 return 0;
495 } 521 }
496 right = malloc(sizeof(struct leaf)); 522 slot = path->slots[0];
523 nritems = l->header.nritems;
524 mid = (nritems + 1)/ 2;
525
526 right_buffer = alloc_free_block(root);
527 BUG_ON(!right_buffer);
528 BUG_ON(mid == nritems);
529 right = &right_buffer->leaf;
497 memset(right, 0, sizeof(*right)); 530 memset(right, 0, sizeof(*right));
498 if (mid <= slot) { 531 if (mid <= slot) {
499 if (leaf_space_used(l, mid, nritems - mid) + space_needed > 532 if (leaf_space_used(l, mid, nritems - mid) + space_needed >
@@ -505,6 +538,8 @@ int split_leaf(struct ctree_root *root, struct ctree_path *path, int data_size)
505 BUG(); 538 BUG();
506 } 539 }
507 right->header.nritems = nritems - mid; 540 right->header.nritems = nritems - mid;
541 right->header.blocknr = right_buffer->blocknr;
542 right->header.flags = node_level(0);
508 data_copy_size = l->items[mid].offset + l->items[mid].size - 543 data_copy_size = l->items[mid].offset + l->items[mid].size -
509 leaf_data_end(l); 544 leaf_data_end(l);
510 memcpy(right->items, l->items + mid, 545 memcpy(right->items, l->items + mid,
@@ -518,12 +553,20 @@ int split_leaf(struct ctree_root *root, struct ctree_path *path, int data_size)
518 } 553 }
519 l->header.nritems = mid; 554 l->header.nritems = mid;
520 ret = insert_ptr(root, path, &right->items[0].key, 555 ret = insert_ptr(root, path, &right->items[0].key,
521 (u64)right, 1); 556 right_buffer->blocknr, 1);
557
558 write_tree_block(root, right_buffer);
559 write_tree_block(root, l_buf);
560
561 BUG_ON(path->slots[0] != slot);
522 if (mid <= slot) { 562 if (mid <= slot) {
523 path->nodes[0] = (struct node *)right; 563 tree_block_release(root, path->nodes[0]);
564 path->nodes[0] = right_buffer;
524 path->slots[0] -= mid; 565 path->slots[0] -= mid;
525 path->slots[1] += 1; 566 path->slots[1] += 1;
526 } 567 } else
568 tree_block_release(root, right_buffer);
569 BUG_ON(path->slots[0] < 0);
527 return ret; 570 return ret;
528} 571}
529 572
@@ -532,28 +575,48 @@ int insert_item(struct ctree_root *root, struct key *key,
532{ 575{
533 int ret; 576 int ret;
534 int slot; 577 int slot;
578 int slot_orig;
535 struct leaf *leaf; 579 struct leaf *leaf;
580 struct tree_buffer *leaf_buf;
536 unsigned int nritems; 581 unsigned int nritems;
537 unsigned int data_end; 582 unsigned int data_end;
538 struct ctree_path path; 583 struct ctree_path path;
539 584
585 if (!root->node) {
586 struct tree_buffer *t;
587 t = alloc_free_block(root);
588 BUG_ON(!t);
589 t->node.header.nritems = 0;
590 t->node.header.flags = node_level(0);
591 t->node.header.blocknr = t->blocknr;
592 root->node = t;
593 write_tree_block(root, t);
594 }
540 init_path(&path); 595 init_path(&path);
541 ret = search_slot(root, key, &path); 596 ret = search_slot(root, key, &path);
542 if (ret == 0) 597 if (ret == 0) {
598 release_path(root, &path);
543 return -EEXIST; 599 return -EEXIST;
600 }
544 601
545 leaf = (struct leaf *)path.nodes[0]; 602 slot_orig = path.slots[0];
546 if (leaf_free_space(leaf) < sizeof(struct item) + data_size) 603 leaf_buf = path.nodes[0];
604 leaf = &leaf_buf->leaf;
605 if (leaf_free_space(leaf) < sizeof(struct item) + data_size) {
547 split_leaf(root, &path, data_size); 606 split_leaf(root, &path, data_size);
548 leaf = (struct leaf *)path.nodes[0]; 607 leaf_buf = path.nodes[0];
608 leaf = &path.nodes[0]->leaf;
609 }
549 nritems = leaf->header.nritems; 610 nritems = leaf->header.nritems;
550 data_end = leaf_data_end(leaf); 611 data_end = leaf_data_end(leaf);
612
551 if (leaf_free_space(leaf) < sizeof(struct item) + data_size) 613 if (leaf_free_space(leaf) < sizeof(struct item) + data_size)
552 BUG(); 614 BUG();
553 615
554 slot = path.slots[0]; 616 slot = path.slots[0];
617 BUG_ON(slot < 0);
555 if (slot == 0) 618 if (slot == 0)
556 fixup_low_keys(&path, key, 1); 619 fixup_low_keys(root, &path, key, 1);
557 if (slot != nritems) { 620 if (slot != nritems) {
558 int i; 621 int i;
559 unsigned int old_data = leaf->items[slot].offset + 622 unsigned int old_data = leaf->items[slot].offset +
@@ -580,21 +643,25 @@ int insert_item(struct ctree_root *root, struct key *key,
580 leaf->items[slot].size = data_size; 643 leaf->items[slot].size = data_size;
581 memcpy(leaf->data + data_end - data_size, data, data_size); 644 memcpy(leaf->data + data_end - data_size, data, data_size);
582 leaf->header.nritems += 1; 645 leaf->header.nritems += 1;
646 write_tree_block(root, leaf_buf);
583 if (leaf_free_space(leaf) < 0) 647 if (leaf_free_space(leaf) < 0)
584 BUG(); 648 BUG();
649 release_path(root, &path);
585 return 0; 650 return 0;
586} 651}
587 652
588int del_ptr(struct ctree_root *root, struct ctree_path *path, int level) 653int del_ptr(struct ctree_root *root, struct ctree_path *path, int level)
589{ 654{
590 int slot; 655 int slot;
656 struct tree_buffer *t;
591 struct node *node; 657 struct node *node;
592 int nritems; 658 int nritems;
593 659
594 while(1) { 660 while(1) {
595 node = path->nodes[level]; 661 t = path->nodes[level];
596 if (!node) 662 if (!t)
597 break; 663 break;
664 node = &t->node;
598 slot = path->slots[level]; 665 slot = path->slots[level];
599 nritems = node->header.nritems; 666 nritems = node->header.nritems;
600 667
@@ -606,28 +673,34 @@ int del_ptr(struct ctree_root *root, struct ctree_path *path, int level)
606 sizeof(u64) * (nritems - slot - 1)); 673 sizeof(u64) * (nritems - slot - 1));
607 } 674 }
608 node->header.nritems--; 675 node->header.nritems--;
676 write_tree_block(root, t);
609 if (node->header.nritems != 0) { 677 if (node->header.nritems != 0) {
610 int tslot; 678 int tslot;
611 if (slot == 0) 679 if (slot == 0)
612 fixup_low_keys(path, node->keys, level + 1); 680 fixup_low_keys(root, path, node->keys,
681 level + 1);
613 tslot = path->slots[level+1]; 682 tslot = path->slots[level+1];
683 t->count++;
614 push_node_left(root, path, level); 684 push_node_left(root, path, level);
615 if (node->header.nritems) { 685 if (node->header.nritems) {
616 push_node_right(root, path, level); 686 push_node_right(root, path, level);
617 } 687 }
618 if (node->header.nritems) 688 if (node->header.nritems) {
689 tree_block_release(root, t);
619 break; 690 break;
691 }
692 tree_block_release(root, t);
620 path->slots[level+1] = tslot; 693 path->slots[level+1] = tslot;
621 } 694 }
622 if (node == root->node) { 695 if (t == root->node) {
623 printf("root is now null!\n"); 696 /* just turn the root into a leaf and break */
624 root->node = NULL; 697 root->node->node.header.flags = node_level(0);
698 write_tree_block(root, t);
625 break; 699 break;
626 } 700 }
627 level++; 701 level++;
628 if (!path->nodes[level]) 702 if (!path->nodes[level])
629 BUG(); 703 BUG();
630 free(node);
631 } 704 }
632 return 0; 705 return 0;
633} 706}
@@ -636,10 +709,12 @@ int del_item(struct ctree_root *root, struct ctree_path *path)
636{ 709{
637 int slot; 710 int slot;
638 struct leaf *leaf; 711 struct leaf *leaf;
712 struct tree_buffer *leaf_buf;
639 int doff; 713 int doff;
640 int dsize; 714 int dsize;
641 715
642 leaf = (struct leaf *)path->nodes[0]; 716 leaf_buf = path->nodes[0];
717 leaf = &leaf_buf->leaf;
643 slot = path->slots[0]; 718 slot = path->slots[0];
644 doff = leaf->items[slot].offset; 719 doff = leaf->items[slot].offset;
645 dsize = leaf->items[slot].size; 720 dsize = leaf->items[slot].size;
@@ -658,14 +733,15 @@ int del_item(struct ctree_root *root, struct ctree_path *path)
658 } 733 }
659 leaf->header.nritems -= 1; 734 leaf->header.nritems -= 1;
660 if (leaf->header.nritems == 0) { 735 if (leaf->header.nritems == 0) {
661 if (leaf == (struct leaf *)root->node) 736 if (leaf_buf == root->node) {
662 root->node = NULL; 737 leaf->header.flags = node_level(0);
663 else 738 write_tree_block(root, leaf_buf);
739 } else
664 del_ptr(root, path, 1); 740 del_ptr(root, path, 1);
665 free(leaf);
666 } else { 741 } else {
667 if (slot == 0) 742 if (slot == 0)
668 fixup_low_keys(path, &leaf->items[0].key, 1); 743 fixup_low_keys(root, path, &leaf->items[0].key, 1);
744 write_tree_block(root, leaf_buf);
669 if (leaf_space_used(leaf, 0, leaf->header.nritems) < 745 if (leaf_space_used(leaf, 0, leaf->header.nritems) <
670 LEAF_DATA_SIZE / 4) { 746 LEAF_DATA_SIZE / 4) {
671 /* push_leaf_left fixes the path. 747 /* push_leaf_left fixes the path.
@@ -673,12 +749,13 @@ int del_item(struct ctree_root *root, struct ctree_path *path)
673 * for possible call to del_ptr below 749 * for possible call to del_ptr below
674 */ 750 */
675 slot = path->slots[1]; 751 slot = path->slots[1];
752 leaf_buf->count++;
676 push_leaf_left(root, path, 1); 753 push_leaf_left(root, path, 1);
677 if (leaf->header.nritems == 0) { 754 if (leaf->header.nritems == 0) {
678 free(leaf);
679 path->slots[1] = slot; 755 path->slots[1] = slot;
680 del_ptr(root, path, 1); 756 del_ptr(root, path, 1);
681 } 757 }
758 tree_block_release(root, leaf_buf);
682 } 759 }
683 } 760 }
684 return 0; 761 return 0;
@@ -689,7 +766,7 @@ void print_leaf(struct leaf *l)
689 int i; 766 int i;
690 int nr = l->header.nritems; 767 int nr = l->header.nritems;
691 struct item *item; 768 struct item *item;
692 printf("leaf %p total ptrs %d free space %d\n", l, nr, 769 printf("leaf %lu total ptrs %d free space %d\n", l->header.blocknr, nr,
693 leaf_free_space(l)); 770 leaf_free_space(l));
694 fflush(stdout); 771 fflush(stdout);
695 for (i = 0 ; i < nr ; i++) { 772 for (i = 0 ; i < nr ; i++) {
@@ -703,38 +780,45 @@ void print_leaf(struct leaf *l)
703 fflush(stdout); 780 fflush(stdout);
704 } 781 }
705} 782}
706void print_tree(struct node *c) 783void print_tree(struct ctree_root *root, struct tree_buffer *t)
707{ 784{
708 int i; 785 int i;
709 int nr; 786 int nr;
787 struct node *c;
710 788
711 if (!c) 789 if (!t)
712 return; 790 return;
791 c = &t->node;
713 nr = c->header.nritems; 792 nr = c->header.nritems;
793 if (c->header.blocknr != t->blocknr)
794 BUG();
714 if (is_leaf(c->header.flags)) { 795 if (is_leaf(c->header.flags)) {
715 print_leaf((struct leaf *)c); 796 print_leaf((struct leaf *)c);
716 return; 797 return;
717 } 798 }
718 printf("node %p level %d total ptrs %d free spc %lu\n", c, 799 printf("node %lu level %d total ptrs %d free spc %lu\n", t->blocknr,
719 node_level(c->header.flags), c->header.nritems, 800 node_level(c->header.flags), c->header.nritems,
720 NODEPTRS_PER_BLOCK - c->header.nritems); 801 NODEPTRS_PER_BLOCK - c->header.nritems);
721 fflush(stdout); 802 fflush(stdout);
722 for (i = 0; i < nr; i++) { 803 for (i = 0; i < nr; i++) {
723 printf("\tkey %d (%lu %u %lu) block %lx\n", 804 printf("\tkey %d (%lu %u %lu) block %lu\n",
724 i, 805 i,
725 c->keys[i].objectid, c->keys[i].flags, c->keys[i].offset, 806 c->keys[i].objectid, c->keys[i].flags, c->keys[i].offset,
726 c->blockptrs[i]); 807 c->blockptrs[i]);
727 fflush(stdout); 808 fflush(stdout);
728 } 809 }
729 for (i = 0; i < nr; i++) { 810 for (i = 0; i < nr; i++) {
730 struct node *next = read_block(c->blockptrs[i]); 811 struct tree_buffer *next_buf = read_tree_block(root,
812 c->blockptrs[i]);
813 struct node *next = &next_buf->node;
731 if (is_leaf(next->header.flags) && 814 if (is_leaf(next->header.flags) &&
732 node_level(c->header.flags) != 1) 815 node_level(c->header.flags) != 1)
733 BUG(); 816 BUG();
734 if (node_level(next->header.flags) != 817 if (node_level(next->header.flags) !=
735 node_level(c->header.flags) - 1) 818 node_level(c->header.flags) - 1)
736 BUG(); 819 BUG();
737 print_tree(next); 820 print_tree(root, next_buf);
821 tree_block_release(root, next_buf);
738 } 822 }
739 823
740} 824}
@@ -746,23 +830,24 @@ int next_key(int i, int max_key) {
746} 830}
747 831
748int main() { 832int main() {
749 struct leaf *first_node = malloc(sizeof(struct leaf)); 833 struct ctree_root *root;
750 struct ctree_root root;
751 struct key ins; 834 struct key ins;
752 struct key last = { (u64)-1, 0, 0}; 835 struct key last = { (u64)-1, 0, 0};
753 char *buf; 836 char *buf;
754 int i; 837 int i;
755 int num; 838 int num;
756 int ret; 839 int ret;
757 int run_size = 100000; 840 int run_size = 1000000;
758 int max_key = 100000000; 841 int max_key = 100000000;
759 int tree_size = 0; 842 int tree_size = 0;
760 struct ctree_path path; 843 struct ctree_path path;
761 844
845 radix_tree_init();
846
847
848 root = open_ctree("dbfile");
762 849
763 srand(55); 850 srand(55);
764 root.node = (struct node *)first_node;
765 memset(first_node, 0, sizeof(*first_node));
766 for (i = 0; i < run_size; i++) { 851 for (i = 0; i < run_size; i++) {
767 buf = malloc(64); 852 buf = malloc(64);
768 num = next_key(i, max_key); 853 num = next_key(i, max_key);
@@ -772,39 +857,46 @@ int main() {
772 ins.objectid = num; 857 ins.objectid = num;
773 ins.offset = 0; 858 ins.offset = 0;
774 ins.flags = 0; 859 ins.flags = 0;
775 ret = insert_item(&root, &ins, buf, strlen(buf)); 860 ret = insert_item(root, &ins, buf, strlen(buf));
776 if (!ret) 861 if (!ret)
777 tree_size++; 862 tree_size++;
778 } 863 }
864 close_ctree(root);
865 root = open_ctree("dbfile");
866 printf("starting search\n");
779 srand(55); 867 srand(55);
780 for (i = 0; i < run_size; i++) { 868 for (i = 0; i < run_size; i++) {
781 num = next_key(i, max_key); 869 num = next_key(i, max_key);
782 ins.objectid = num; 870 ins.objectid = num;
783 init_path(&path); 871 init_path(&path);
784 ret = search_slot(&root, &ins, &path); 872 ret = search_slot(root, &ins, &path);
785 if (ret) { 873 if (ret) {
786 print_tree(root.node); 874 print_tree(root, root->node);
787 printf("unable to find %d\n", num); 875 printf("unable to find %d\n", num);
788 exit(1); 876 exit(1);
789 } 877 }
790 } 878 release_path(root, &path);
791 printf("node %p level %d total ptrs %d free spc %lu\n", root.node, 879 }
792 node_level(root.node->header.flags), root.node->header.nritems, 880 close_ctree(root);
793 NODEPTRS_PER_BLOCK - root.node->header.nritems); 881 root = open_ctree("dbfile");
794 // print_tree(root.node); 882 printf("node %p level %d total ptrs %d free spc %lu\n", root->node,
795 printf("all searches good\n"); 883 node_level(root->node->node.header.flags),
884 root->node->node.header.nritems,
885 NODEPTRS_PER_BLOCK - root->node->node.header.nritems);
886 printf("all searches good, deleting some items\n");
796 i = 0; 887 i = 0;
797 srand(55); 888 srand(55);
798 for (i = 0 ; i < run_size/4; i++) { 889 for (i = 0 ; i < run_size/4; i++) {
799 num = next_key(i, max_key); 890 num = next_key(i, max_key);
800 ins.objectid = num; 891 ins.objectid = num;
801 init_path(&path); 892 init_path(&path);
802 ret = search_slot(&root, &ins, &path); 893 ret = search_slot(root, &ins, &path);
803 if (ret) 894 if (ret)
804 continue; 895 continue;
805 ret = del_item(&root, &path); 896 ret = del_item(root, &path);
806 if (ret != 0) 897 if (ret != 0)
807 BUG(); 898 BUG();
899 release_path(root, &path);
808 tree_size--; 900 tree_size--;
809 } 901 }
810 srand(128); 902 srand(128);
@@ -813,38 +905,58 @@ int main() {
813 num = next_key(i, max_key); 905 num = next_key(i, max_key);
814 sprintf(buf, "string-%d", num); 906 sprintf(buf, "string-%d", num);
815 ins.objectid = num; 907 ins.objectid = num;
816 ret = insert_item(&root, &ins, buf, strlen(buf)); 908 ret = insert_item(root, &ins, buf, strlen(buf));
817 if (!ret) 909 if (!ret)
818 tree_size++; 910 tree_size++;
819 } 911 }
820 while(root.node) { 912 close_ctree(root);
913 root = open_ctree("dbfile");
914 printf("starting search2\n");
915 srand(128);
916 for (i = 0; i < run_size; i++) {
917 num = next_key(i, max_key);
918 ins.objectid = num;
919 init_path(&path);
920 ret = search_slot(root, &ins, &path);
921 if (ret) {
922 print_tree(root, root->node);
923 printf("unable to find %d\n", num);
924 exit(1);
925 }
926 release_path(root, &path);
927 }
928 printf("starting big long delete run\n");
929 while(root->node && root->node->node.header.nritems > 0) {
821 struct leaf *leaf; 930 struct leaf *leaf;
822 int slot; 931 int slot;
823 ins.objectid = (u64)-1; 932 ins.objectid = (u64)-1;
824 init_path(&path); 933 init_path(&path);
825 ret = search_slot(&root, &ins, &path); 934 ret = search_slot(root, &ins, &path);
826 if (ret == 0) 935 if (ret == 0)
827 BUG(); 936 BUG();
828 937
829 leaf = (struct leaf *)(path.nodes[0]); 938 leaf = &path.nodes[0]->leaf;
830 slot = path.slots[0]; 939 slot = path.slots[0];
831 if (slot != leaf->header.nritems) 940 if (slot != leaf->header.nritems)
832 BUG(); 941 BUG();
833 while(path.slots[0] > 0) { 942 while(path.slots[0] > 0) {
834 path.slots[0] -= 1; 943 path.slots[0] -= 1;
835 slot = path.slots[0]; 944 slot = path.slots[0];
836 leaf = (struct leaf *)(path.nodes[0]); 945 leaf = &path.nodes[0]->leaf;
837 946
838 if (comp_keys(&last, &leaf->items[slot].key) <= 0) 947 if (comp_keys(&last, &leaf->items[slot].key) <= 0)
839 BUG(); 948 BUG();
840 memcpy(&last, &leaf->items[slot].key, sizeof(last)); 949 memcpy(&last, &leaf->items[slot].key, sizeof(last));
841 ret = del_item(&root, &path); 950 ret = del_item(root, &path);
842 if (ret != 0) 951 if (ret != 0) {
952 printf("del_item returned %d\n", ret);
843 BUG(); 953 BUG();
954 }
844 tree_size--; 955 tree_size--;
845 } 956 }
957 release_path(root, &path);
846 } 958 }
847 print_tree(root.node); 959 close_ctree(root);
848 printf("tree size is now %d\n", tree_size); 960 printf("tree size is now %d\n", tree_size);
849 return 0; 961 return 0;
850} 962}