diff options
Diffstat (limited to 'drivers/md/bcache/bset.c')
-rw-r--r-- | drivers/md/bcache/bset.c | 172 |
1 files changed, 149 insertions, 23 deletions
diff --git a/drivers/md/bcache/bset.c b/drivers/md/bcache/bset.c index 9d9c2edda760..e04e5908e29f 100644 --- a/drivers/md/bcache/bset.c +++ b/drivers/md/bcache/bset.c | |||
@@ -302,6 +302,115 @@ bool bch_bkey_try_merge(struct btree *b, struct bkey *l, struct bkey *r) | |||
302 | return true; | 302 | return true; |
303 | } | 303 | } |
304 | 304 | ||
305 | /* Auxiliary search trees */ | ||
306 | |||
307 | /* 32 bits total: */ | ||
308 | #define BKEY_MID_BITS 3 | ||
309 | #define BKEY_EXPONENT_BITS 7 | ||
310 | #define BKEY_MANTISSA_BITS (32 - BKEY_MID_BITS - BKEY_EXPONENT_BITS) | ||
311 | #define BKEY_MANTISSA_MASK ((1 << BKEY_MANTISSA_BITS) - 1) | ||
312 | |||
313 | struct bkey_float { | ||
314 | unsigned exponent:BKEY_EXPONENT_BITS; | ||
315 | unsigned m:BKEY_MID_BITS; | ||
316 | unsigned mantissa:BKEY_MANTISSA_BITS; | ||
317 | } __packed; | ||
318 | |||
319 | /* | ||
320 | * BSET_CACHELINE was originally intended to match the hardware cacheline size - | ||
321 | * it used to be 64, but I realized the lookup code would touch slightly less | ||
322 | * memory if it was 128. | ||
323 | * | ||
324 | * It definites the number of bytes (in struct bset) per struct bkey_float in | ||
325 | * the auxiliar search tree - when we're done searching the bset_float tree we | ||
326 | * have this many bytes left that we do a linear search over. | ||
327 | * | ||
328 | * Since (after level 5) every level of the bset_tree is on a new cacheline, | ||
329 | * we're touching one fewer cacheline in the bset tree in exchange for one more | ||
330 | * cacheline in the linear search - but the linear search might stop before it | ||
331 | * gets to the second cacheline. | ||
332 | */ | ||
333 | |||
334 | #define BSET_CACHELINE 128 | ||
335 | |||
336 | /* Space required for the btree node keys */ | ||
337 | static inline size_t btree_keys_bytes(struct btree *b) | ||
338 | { | ||
339 | return PAGE_SIZE << b->page_order; | ||
340 | } | ||
341 | |||
342 | static inline size_t btree_keys_cachelines(struct btree *b) | ||
343 | { | ||
344 | return btree_keys_bytes(b) / BSET_CACHELINE; | ||
345 | } | ||
346 | |||
347 | /* Space required for the auxiliary search trees */ | ||
348 | static inline size_t bset_tree_bytes(struct btree *b) | ||
349 | { | ||
350 | return btree_keys_cachelines(b) * sizeof(struct bkey_float); | ||
351 | } | ||
352 | |||
353 | /* Space required for the prev pointers */ | ||
354 | static inline size_t bset_prev_bytes(struct btree *b) | ||
355 | { | ||
356 | return btree_keys_cachelines(b) * sizeof(uint8_t); | ||
357 | } | ||
358 | |||
359 | /* Memory allocation */ | ||
360 | |||
361 | void bch_btree_keys_free(struct btree *b) | ||
362 | { | ||
363 | struct bset_tree *t = b->sets; | ||
364 | |||
365 | if (bset_prev_bytes(b) < PAGE_SIZE) | ||
366 | kfree(t->prev); | ||
367 | else | ||
368 | free_pages((unsigned long) t->prev, | ||
369 | get_order(bset_prev_bytes(b))); | ||
370 | |||
371 | if (bset_tree_bytes(b) < PAGE_SIZE) | ||
372 | kfree(t->tree); | ||
373 | else | ||
374 | free_pages((unsigned long) t->tree, | ||
375 | get_order(bset_tree_bytes(b))); | ||
376 | |||
377 | free_pages((unsigned long) t->data, b->page_order); | ||
378 | |||
379 | t->prev = NULL; | ||
380 | t->tree = NULL; | ||
381 | t->data = NULL; | ||
382 | } | ||
383 | |||
384 | int bch_btree_keys_alloc(struct btree *b, unsigned page_order, gfp_t gfp) | ||
385 | { | ||
386 | struct bset_tree *t = b->sets; | ||
387 | |||
388 | BUG_ON(t->data); | ||
389 | |||
390 | b->page_order = page_order; | ||
391 | |||
392 | t->data = (void *) __get_free_pages(gfp, b->page_order); | ||
393 | if (!t->data) | ||
394 | goto err; | ||
395 | |||
396 | t->tree = bset_tree_bytes(b) < PAGE_SIZE | ||
397 | ? kmalloc(bset_tree_bytes(b), gfp) | ||
398 | : (void *) __get_free_pages(gfp, get_order(bset_tree_bytes(b))); | ||
399 | if (!t->tree) | ||
400 | goto err; | ||
401 | |||
402 | t->prev = bset_prev_bytes(b) < PAGE_SIZE | ||
403 | ? kmalloc(bset_prev_bytes(b), gfp) | ||
404 | : (void *) __get_free_pages(gfp, get_order(bset_prev_bytes(b))); | ||
405 | if (!t->prev) | ||
406 | goto err; | ||
407 | |||
408 | return 0; | ||
409 | err: | ||
410 | bch_btree_keys_free(b); | ||
411 | return -ENOMEM; | ||
412 | } | ||
413 | |||
305 | /* Binary tree stuff for auxiliary search trees */ | 414 | /* Binary tree stuff for auxiliary search trees */ |
306 | 415 | ||
307 | static unsigned inorder_next(unsigned j, unsigned size) | 416 | static unsigned inorder_next(unsigned j, unsigned size) |
@@ -538,21 +647,36 @@ static void bset_alloc_tree(struct btree *b, struct bset_tree *t) | |||
538 | t++->size = 0; | 647 | t++->size = 0; |
539 | } | 648 | } |
540 | 649 | ||
541 | static void bset_build_unwritten_tree(struct btree *b) | 650 | static void bch_bset_build_unwritten_tree(struct btree *b) |
542 | { | 651 | { |
543 | struct bset_tree *t = b->sets + b->nsets; | 652 | struct bset_tree *t = bset_tree_last(b); |
544 | 653 | ||
545 | bset_alloc_tree(b, t); | 654 | bset_alloc_tree(b, t); |
546 | 655 | ||
547 | if (t->tree != b->sets->tree + bset_tree_space(b)) { | 656 | if (t->tree != b->sets->tree + btree_keys_cachelines(b)) { |
548 | t->prev[0] = bkey_to_cacheline_offset(t->data->start); | 657 | t->prev[0] = bkey_to_cacheline_offset(t->data->start); |
549 | t->size = 1; | 658 | t->size = 1; |
550 | } | 659 | } |
551 | } | 660 | } |
552 | 661 | ||
662 | void bch_bset_init_next(struct btree *b, struct bset *i, uint64_t magic) | ||
663 | { | ||
664 | if (i != b->sets->data) { | ||
665 | b->sets[++b->nsets].data = i; | ||
666 | i->seq = b->sets->data->seq; | ||
667 | } else | ||
668 | get_random_bytes(&i->seq, sizeof(uint64_t)); | ||
669 | |||
670 | i->magic = magic; | ||
671 | i->version = 0; | ||
672 | i->keys = 0; | ||
673 | |||
674 | bch_bset_build_unwritten_tree(b); | ||
675 | } | ||
676 | |||
553 | static void bset_build_written_tree(struct btree *b) | 677 | static void bset_build_written_tree(struct btree *b) |
554 | { | 678 | { |
555 | struct bset_tree *t = b->sets + b->nsets; | 679 | struct bset_tree *t = bset_tree_last(b); |
556 | struct bkey *k = t->data->start; | 680 | struct bkey *k = t->data->start; |
557 | unsigned j, cacheline = 1; | 681 | unsigned j, cacheline = 1; |
558 | 682 | ||
@@ -560,7 +684,7 @@ static void bset_build_written_tree(struct btree *b) | |||
560 | 684 | ||
561 | t->size = min_t(unsigned, | 685 | t->size = min_t(unsigned, |
562 | bkey_to_cacheline(t, bset_bkey_last(t->data)), | 686 | bkey_to_cacheline(t, bset_bkey_last(t->data)), |
563 | b->sets->tree + bset_tree_space(b) - t->tree); | 687 | b->sets->tree + btree_keys_cachelines(b) - t->tree); |
564 | 688 | ||
565 | if (t->size < 2) { | 689 | if (t->size < 2) { |
566 | t->size = 0; | 690 | t->size = 0; |
@@ -599,7 +723,7 @@ void bch_bset_fix_invalidated_key(struct btree *b, struct bkey *k) | |||
599 | struct bset_tree *t; | 723 | struct bset_tree *t; |
600 | unsigned inorder, j = 1; | 724 | unsigned inorder, j = 1; |
601 | 725 | ||
602 | for (t = b->sets; t <= &b->sets[b->nsets]; t++) | 726 | for (t = b->sets; t <= bset_tree_last(b); t++) |
603 | if (k < bset_bkey_last(t->data)) | 727 | if (k < bset_bkey_last(t->data)) |
604 | goto found_set; | 728 | goto found_set; |
605 | 729 | ||
@@ -639,9 +763,10 @@ fix_right: do { | |||
639 | } while (j < t->size); | 763 | } while (j < t->size); |
640 | } | 764 | } |
641 | 765 | ||
642 | void bch_bset_fix_lookup_table(struct btree *b, struct bkey *k) | 766 | static void bch_bset_fix_lookup_table(struct btree *b, |
767 | struct bset_tree *t, | ||
768 | struct bkey *k) | ||
643 | { | 769 | { |
644 | struct bset_tree *t = &b->sets[b->nsets]; | ||
645 | unsigned shift = bkey_u64s(k); | 770 | unsigned shift = bkey_u64s(k); |
646 | unsigned j = bkey_to_cacheline(t, k); | 771 | unsigned j = bkey_to_cacheline(t, k); |
647 | 772 | ||
@@ -673,7 +798,7 @@ void bch_bset_fix_lookup_table(struct btree *b, struct bkey *k) | |||
673 | } | 798 | } |
674 | } | 799 | } |
675 | 800 | ||
676 | if (t->size == b->sets->tree + bset_tree_space(b) - t->tree) | 801 | if (t->size == b->sets->tree + btree_keys_cachelines(b) - t->tree) |
677 | return; | 802 | return; |
678 | 803 | ||
679 | /* Possibly add a new entry to the end of the lookup table */ | 804 | /* Possibly add a new entry to the end of the lookup table */ |
@@ -687,21 +812,23 @@ void bch_bset_fix_lookup_table(struct btree *b, struct bkey *k) | |||
687 | } | 812 | } |
688 | } | 813 | } |
689 | 814 | ||
690 | void bch_bset_init_next(struct btree *b) | 815 | void bch_bset_insert(struct btree *b, struct bkey *where, |
816 | struct bkey *insert) | ||
691 | { | 817 | { |
692 | struct bset *i = write_block(b); | 818 | struct bset_tree *t = bset_tree_last(b); |
693 | 819 | ||
694 | if (i != b->sets[0].data) { | 820 | BUG_ON(t->data != write_block(b)); |
695 | b->sets[++b->nsets].data = i; | 821 | BUG_ON(bset_byte_offset(b, t->data) + |
696 | i->seq = b->sets[0].data->seq; | 822 | __set_bytes(t->data, t->data->keys + bkey_u64s(insert)) > |
697 | } else | 823 | PAGE_SIZE << b->page_order); |
698 | get_random_bytes(&i->seq, sizeof(uint64_t)); | ||
699 | 824 | ||
700 | i->magic = bset_magic(&b->c->sb); | 825 | memmove((uint64_t *) where + bkey_u64s(insert), |
701 | i->version = 0; | 826 | where, |
702 | i->keys = 0; | 827 | (void *) bset_bkey_last(t->data) - (void *) where); |
703 | 828 | ||
704 | bset_build_unwritten_tree(b); | 829 | t->data->keys += bkey_u64s(insert); |
830 | bkey_copy(where, insert); | ||
831 | bch_bset_fix_lookup_table(b, t, where); | ||
705 | } | 832 | } |
706 | 833 | ||
707 | struct bset_search_iter { | 834 | struct bset_search_iter { |
@@ -1154,9 +1281,8 @@ void bch_btree_sort_partial(struct btree *b, unsigned start, | |||
1154 | 1281 | ||
1155 | __bch_btree_iter_init(b, &iter, NULL, &b->sets[start]); | 1282 | __bch_btree_iter_init(b, &iter, NULL, &b->sets[start]); |
1156 | 1283 | ||
1157 | BUG_ON(b->sets[b->nsets].data == write_block(b) && | 1284 | BUG_ON(!bset_written(b, bset_tree_last(b)) && |
1158 | (b->sets[b->nsets].size || b->nsets)); | 1285 | (bset_tree_last(b)->size || b->nsets)); |
1159 | |||
1160 | 1286 | ||
1161 | if (start) { | 1287 | if (start) { |
1162 | unsigned i; | 1288 | unsigned i; |