aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/md/bcache/bset.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/md/bcache/bset.c')
-rw-r--r--drivers/md/bcache/bset.c172
1 files changed, 149 insertions, 23 deletions
diff --git a/drivers/md/bcache/bset.c b/drivers/md/bcache/bset.c
index 9d9c2edda760..e04e5908e29f 100644
--- a/drivers/md/bcache/bset.c
+++ b/drivers/md/bcache/bset.c
@@ -302,6 +302,115 @@ bool bch_bkey_try_merge(struct btree *b, struct bkey *l, struct bkey *r)
302 return true; 302 return true;
303} 303}
304 304
305/* Auxiliary search trees */
306
307/* 32 bits total: */
308#define BKEY_MID_BITS 3
309#define BKEY_EXPONENT_BITS 7
310#define BKEY_MANTISSA_BITS (32 - BKEY_MID_BITS - BKEY_EXPONENT_BITS)
311#define BKEY_MANTISSA_MASK ((1 << BKEY_MANTISSA_BITS) - 1)
312
313struct bkey_float {
314 unsigned exponent:BKEY_EXPONENT_BITS;
315 unsigned m:BKEY_MID_BITS;
316 unsigned mantissa:BKEY_MANTISSA_BITS;
317} __packed;
318
319/*
320 * BSET_CACHELINE was originally intended to match the hardware cacheline size -
321 * it used to be 64, but I realized the lookup code would touch slightly less
322 * memory if it was 128.
323 *
324 * It definites the number of bytes (in struct bset) per struct bkey_float in
325 * the auxiliar search tree - when we're done searching the bset_float tree we
326 * have this many bytes left that we do a linear search over.
327 *
328 * Since (after level 5) every level of the bset_tree is on a new cacheline,
329 * we're touching one fewer cacheline in the bset tree in exchange for one more
330 * cacheline in the linear search - but the linear search might stop before it
331 * gets to the second cacheline.
332 */
333
334#define BSET_CACHELINE 128
335
336/* Space required for the btree node keys */
337static inline size_t btree_keys_bytes(struct btree *b)
338{
339 return PAGE_SIZE << b->page_order;
340}
341
342static inline size_t btree_keys_cachelines(struct btree *b)
343{
344 return btree_keys_bytes(b) / BSET_CACHELINE;
345}
346
347/* Space required for the auxiliary search trees */
348static inline size_t bset_tree_bytes(struct btree *b)
349{
350 return btree_keys_cachelines(b) * sizeof(struct bkey_float);
351}
352
353/* Space required for the prev pointers */
354static inline size_t bset_prev_bytes(struct btree *b)
355{
356 return btree_keys_cachelines(b) * sizeof(uint8_t);
357}
358
359/* Memory allocation */
360
361void bch_btree_keys_free(struct btree *b)
362{
363 struct bset_tree *t = b->sets;
364
365 if (bset_prev_bytes(b) < PAGE_SIZE)
366 kfree(t->prev);
367 else
368 free_pages((unsigned long) t->prev,
369 get_order(bset_prev_bytes(b)));
370
371 if (bset_tree_bytes(b) < PAGE_SIZE)
372 kfree(t->tree);
373 else
374 free_pages((unsigned long) t->tree,
375 get_order(bset_tree_bytes(b)));
376
377 free_pages((unsigned long) t->data, b->page_order);
378
379 t->prev = NULL;
380 t->tree = NULL;
381 t->data = NULL;
382}
383
384int bch_btree_keys_alloc(struct btree *b, unsigned page_order, gfp_t gfp)
385{
386 struct bset_tree *t = b->sets;
387
388 BUG_ON(t->data);
389
390 b->page_order = page_order;
391
392 t->data = (void *) __get_free_pages(gfp, b->page_order);
393 if (!t->data)
394 goto err;
395
396 t->tree = bset_tree_bytes(b) < PAGE_SIZE
397 ? kmalloc(bset_tree_bytes(b), gfp)
398 : (void *) __get_free_pages(gfp, get_order(bset_tree_bytes(b)));
399 if (!t->tree)
400 goto err;
401
402 t->prev = bset_prev_bytes(b) < PAGE_SIZE
403 ? kmalloc(bset_prev_bytes(b), gfp)
404 : (void *) __get_free_pages(gfp, get_order(bset_prev_bytes(b)));
405 if (!t->prev)
406 goto err;
407
408 return 0;
409err:
410 bch_btree_keys_free(b);
411 return -ENOMEM;
412}
413
305/* Binary tree stuff for auxiliary search trees */ 414/* Binary tree stuff for auxiliary search trees */
306 415
307static unsigned inorder_next(unsigned j, unsigned size) 416static unsigned inorder_next(unsigned j, unsigned size)
@@ -538,21 +647,36 @@ static void bset_alloc_tree(struct btree *b, struct bset_tree *t)
538 t++->size = 0; 647 t++->size = 0;
539} 648}
540 649
541static void bset_build_unwritten_tree(struct btree *b) 650static void bch_bset_build_unwritten_tree(struct btree *b)
542{ 651{
543 struct bset_tree *t = b->sets + b->nsets; 652 struct bset_tree *t = bset_tree_last(b);
544 653
545 bset_alloc_tree(b, t); 654 bset_alloc_tree(b, t);
546 655
547 if (t->tree != b->sets->tree + bset_tree_space(b)) { 656 if (t->tree != b->sets->tree + btree_keys_cachelines(b)) {
548 t->prev[0] = bkey_to_cacheline_offset(t->data->start); 657 t->prev[0] = bkey_to_cacheline_offset(t->data->start);
549 t->size = 1; 658 t->size = 1;
550 } 659 }
551} 660}
552 661
662void bch_bset_init_next(struct btree *b, struct bset *i, uint64_t magic)
663{
664 if (i != b->sets->data) {
665 b->sets[++b->nsets].data = i;
666 i->seq = b->sets->data->seq;
667 } else
668 get_random_bytes(&i->seq, sizeof(uint64_t));
669
670 i->magic = magic;
671 i->version = 0;
672 i->keys = 0;
673
674 bch_bset_build_unwritten_tree(b);
675}
676
553static void bset_build_written_tree(struct btree *b) 677static void bset_build_written_tree(struct btree *b)
554{ 678{
555 struct bset_tree *t = b->sets + b->nsets; 679 struct bset_tree *t = bset_tree_last(b);
556 struct bkey *k = t->data->start; 680 struct bkey *k = t->data->start;
557 unsigned j, cacheline = 1; 681 unsigned j, cacheline = 1;
558 682
@@ -560,7 +684,7 @@ static void bset_build_written_tree(struct btree *b)
560 684
561 t->size = min_t(unsigned, 685 t->size = min_t(unsigned,
562 bkey_to_cacheline(t, bset_bkey_last(t->data)), 686 bkey_to_cacheline(t, bset_bkey_last(t->data)),
563 b->sets->tree + bset_tree_space(b) - t->tree); 687 b->sets->tree + btree_keys_cachelines(b) - t->tree);
564 688
565 if (t->size < 2) { 689 if (t->size < 2) {
566 t->size = 0; 690 t->size = 0;
@@ -599,7 +723,7 @@ void bch_bset_fix_invalidated_key(struct btree *b, struct bkey *k)
599 struct bset_tree *t; 723 struct bset_tree *t;
600 unsigned inorder, j = 1; 724 unsigned inorder, j = 1;
601 725
602 for (t = b->sets; t <= &b->sets[b->nsets]; t++) 726 for (t = b->sets; t <= bset_tree_last(b); t++)
603 if (k < bset_bkey_last(t->data)) 727 if (k < bset_bkey_last(t->data))
604 goto found_set; 728 goto found_set;
605 729
@@ -639,9 +763,10 @@ fix_right: do {
639 } while (j < t->size); 763 } while (j < t->size);
640} 764}
641 765
642void bch_bset_fix_lookup_table(struct btree *b, struct bkey *k) 766static void bch_bset_fix_lookup_table(struct btree *b,
767 struct bset_tree *t,
768 struct bkey *k)
643{ 769{
644 struct bset_tree *t = &b->sets[b->nsets];
645 unsigned shift = bkey_u64s(k); 770 unsigned shift = bkey_u64s(k);
646 unsigned j = bkey_to_cacheline(t, k); 771 unsigned j = bkey_to_cacheline(t, k);
647 772
@@ -673,7 +798,7 @@ void bch_bset_fix_lookup_table(struct btree *b, struct bkey *k)
673 } 798 }
674 } 799 }
675 800
676 if (t->size == b->sets->tree + bset_tree_space(b) - t->tree) 801 if (t->size == b->sets->tree + btree_keys_cachelines(b) - t->tree)
677 return; 802 return;
678 803
679 /* Possibly add a new entry to the end of the lookup table */ 804 /* Possibly add a new entry to the end of the lookup table */
@@ -687,21 +812,23 @@ void bch_bset_fix_lookup_table(struct btree *b, struct bkey *k)
687 } 812 }
688} 813}
689 814
690void bch_bset_init_next(struct btree *b) 815void bch_bset_insert(struct btree *b, struct bkey *where,
816 struct bkey *insert)
691{ 817{
692 struct bset *i = write_block(b); 818 struct bset_tree *t = bset_tree_last(b);
693 819
694 if (i != b->sets[0].data) { 820 BUG_ON(t->data != write_block(b));
695 b->sets[++b->nsets].data = i; 821 BUG_ON(bset_byte_offset(b, t->data) +
696 i->seq = b->sets[0].data->seq; 822 __set_bytes(t->data, t->data->keys + bkey_u64s(insert)) >
697 } else 823 PAGE_SIZE << b->page_order);
698 get_random_bytes(&i->seq, sizeof(uint64_t));
699 824
700 i->magic = bset_magic(&b->c->sb); 825 memmove((uint64_t *) where + bkey_u64s(insert),
701 i->version = 0; 826 where,
702 i->keys = 0; 827 (void *) bset_bkey_last(t->data) - (void *) where);
703 828
704 bset_build_unwritten_tree(b); 829 t->data->keys += bkey_u64s(insert);
830 bkey_copy(where, insert);
831 bch_bset_fix_lookup_table(b, t, where);
705} 832}
706 833
707struct bset_search_iter { 834struct bset_search_iter {
@@ -1154,9 +1281,8 @@ void bch_btree_sort_partial(struct btree *b, unsigned start,
1154 1281
1155 __bch_btree_iter_init(b, &iter, NULL, &b->sets[start]); 1282 __bch_btree_iter_init(b, &iter, NULL, &b->sets[start]);
1156 1283
1157 BUG_ON(b->sets[b->nsets].data == write_block(b) && 1284 BUG_ON(!bset_written(b, bset_tree_last(b)) &&
1158 (b->sets[b->nsets].size || b->nsets)); 1285 (bset_tree_last(b)->size || b->nsets));
1159
1160 1286
1161 if (start) { 1287 if (start) {
1162 unsigned i; 1288 unsigned i;