diff options
-rw-r--r-- | drivers/md/bcache/bcache.h | 8 | ||||
-rw-r--r-- | drivers/md/bcache/bset.c | 172 | ||||
-rw-r--r-- | drivers/md/bcache/bset.h | 247 | ||||
-rw-r--r-- | drivers/md/bcache/btree.c | 167 | ||||
-rw-r--r-- | drivers/md/bcache/btree.h | 9 | ||||
-rw-r--r-- | drivers/md/bcache/debug.c | 3 | ||||
-rw-r--r-- | drivers/md/bcache/journal.c | 9 | ||||
-rw-r--r-- | drivers/md/bcache/super.c | 2 |
8 files changed, 341 insertions, 276 deletions
diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h index 7bd4c93475e7..5c74d55cea7f 100644 --- a/drivers/md/bcache/bcache.h +++ b/drivers/md/bcache/bcache.h | |||
@@ -717,14 +717,6 @@ struct bbio { | |||
717 | #define bucket_bytes(c) ((c)->sb.bucket_size << 9) | 717 | #define bucket_bytes(c) ((c)->sb.bucket_size << 9) |
718 | #define block_bytes(c) ((c)->sb.block_size << 9) | 718 | #define block_bytes(c) ((c)->sb.block_size << 9) |
719 | 719 | ||
720 | #define __set_bytes(i, k) (sizeof(*(i)) + (k) * sizeof(uint64_t)) | ||
721 | #define set_bytes(i) __set_bytes(i, i->keys) | ||
722 | |||
723 | #define __set_blocks(i, k, c) DIV_ROUND_UP(__set_bytes(i, k), block_bytes(c)) | ||
724 | #define set_blocks(i, c) __set_blocks(i, (i)->keys, c) | ||
725 | |||
726 | #define btree_data_space(b) (PAGE_SIZE << (b)->page_order) | ||
727 | |||
728 | #define prios_per_bucket(c) \ | 720 | #define prios_per_bucket(c) \ |
729 | ((bucket_bytes(c) - sizeof(struct prio_set)) / \ | 721 | ((bucket_bytes(c) - sizeof(struct prio_set)) / \ |
730 | sizeof(struct bucket_disk)) | 722 | sizeof(struct bucket_disk)) |
diff --git a/drivers/md/bcache/bset.c b/drivers/md/bcache/bset.c index 9d9c2edda760..e04e5908e29f 100644 --- a/drivers/md/bcache/bset.c +++ b/drivers/md/bcache/bset.c | |||
@@ -302,6 +302,115 @@ bool bch_bkey_try_merge(struct btree *b, struct bkey *l, struct bkey *r) | |||
302 | return true; | 302 | return true; |
303 | } | 303 | } |
304 | 304 | ||
305 | /* Auxiliary search trees */ | ||
306 | |||
307 | /* 32 bits total: */ | ||
308 | #define BKEY_MID_BITS 3 | ||
309 | #define BKEY_EXPONENT_BITS 7 | ||
310 | #define BKEY_MANTISSA_BITS (32 - BKEY_MID_BITS - BKEY_EXPONENT_BITS) | ||
311 | #define BKEY_MANTISSA_MASK ((1 << BKEY_MANTISSA_BITS) - 1) | ||
312 | |||
313 | struct bkey_float { | ||
314 | unsigned exponent:BKEY_EXPONENT_BITS; | ||
315 | unsigned m:BKEY_MID_BITS; | ||
316 | unsigned mantissa:BKEY_MANTISSA_BITS; | ||
317 | } __packed; | ||
318 | |||
319 | /* | ||
320 | * BSET_CACHELINE was originally intended to match the hardware cacheline size - | ||
321 | * it used to be 64, but I realized the lookup code would touch slightly less | ||
322 | * memory if it was 128. | ||
323 | * | ||
324 | * It definites the number of bytes (in struct bset) per struct bkey_float in | ||
325 | * the auxiliar search tree - when we're done searching the bset_float tree we | ||
326 | * have this many bytes left that we do a linear search over. | ||
327 | * | ||
328 | * Since (after level 5) every level of the bset_tree is on a new cacheline, | ||
329 | * we're touching one fewer cacheline in the bset tree in exchange for one more | ||
330 | * cacheline in the linear search - but the linear search might stop before it | ||
331 | * gets to the second cacheline. | ||
332 | */ | ||
333 | |||
334 | #define BSET_CACHELINE 128 | ||
335 | |||
336 | /* Space required for the btree node keys */ | ||
337 | static inline size_t btree_keys_bytes(struct btree *b) | ||
338 | { | ||
339 | return PAGE_SIZE << b->page_order; | ||
340 | } | ||
341 | |||
342 | static inline size_t btree_keys_cachelines(struct btree *b) | ||
343 | { | ||
344 | return btree_keys_bytes(b) / BSET_CACHELINE; | ||
345 | } | ||
346 | |||
347 | /* Space required for the auxiliary search trees */ | ||
348 | static inline size_t bset_tree_bytes(struct btree *b) | ||
349 | { | ||
350 | return btree_keys_cachelines(b) * sizeof(struct bkey_float); | ||
351 | } | ||
352 | |||
353 | /* Space required for the prev pointers */ | ||
354 | static inline size_t bset_prev_bytes(struct btree *b) | ||
355 | { | ||
356 | return btree_keys_cachelines(b) * sizeof(uint8_t); | ||
357 | } | ||
358 | |||
359 | /* Memory allocation */ | ||
360 | |||
361 | void bch_btree_keys_free(struct btree *b) | ||
362 | { | ||
363 | struct bset_tree *t = b->sets; | ||
364 | |||
365 | if (bset_prev_bytes(b) < PAGE_SIZE) | ||
366 | kfree(t->prev); | ||
367 | else | ||
368 | free_pages((unsigned long) t->prev, | ||
369 | get_order(bset_prev_bytes(b))); | ||
370 | |||
371 | if (bset_tree_bytes(b) < PAGE_SIZE) | ||
372 | kfree(t->tree); | ||
373 | else | ||
374 | free_pages((unsigned long) t->tree, | ||
375 | get_order(bset_tree_bytes(b))); | ||
376 | |||
377 | free_pages((unsigned long) t->data, b->page_order); | ||
378 | |||
379 | t->prev = NULL; | ||
380 | t->tree = NULL; | ||
381 | t->data = NULL; | ||
382 | } | ||
383 | |||
384 | int bch_btree_keys_alloc(struct btree *b, unsigned page_order, gfp_t gfp) | ||
385 | { | ||
386 | struct bset_tree *t = b->sets; | ||
387 | |||
388 | BUG_ON(t->data); | ||
389 | |||
390 | b->page_order = page_order; | ||
391 | |||
392 | t->data = (void *) __get_free_pages(gfp, b->page_order); | ||
393 | if (!t->data) | ||
394 | goto err; | ||
395 | |||
396 | t->tree = bset_tree_bytes(b) < PAGE_SIZE | ||
397 | ? kmalloc(bset_tree_bytes(b), gfp) | ||
398 | : (void *) __get_free_pages(gfp, get_order(bset_tree_bytes(b))); | ||
399 | if (!t->tree) | ||
400 | goto err; | ||
401 | |||
402 | t->prev = bset_prev_bytes(b) < PAGE_SIZE | ||
403 | ? kmalloc(bset_prev_bytes(b), gfp) | ||
404 | : (void *) __get_free_pages(gfp, get_order(bset_prev_bytes(b))); | ||
405 | if (!t->prev) | ||
406 | goto err; | ||
407 | |||
408 | return 0; | ||
409 | err: | ||
410 | bch_btree_keys_free(b); | ||
411 | return -ENOMEM; | ||
412 | } | ||
413 | |||
305 | /* Binary tree stuff for auxiliary search trees */ | 414 | /* Binary tree stuff for auxiliary search trees */ |
306 | 415 | ||
307 | static unsigned inorder_next(unsigned j, unsigned size) | 416 | static unsigned inorder_next(unsigned j, unsigned size) |
@@ -538,21 +647,36 @@ static void bset_alloc_tree(struct btree *b, struct bset_tree *t) | |||
538 | t++->size = 0; | 647 | t++->size = 0; |
539 | } | 648 | } |
540 | 649 | ||
541 | static void bset_build_unwritten_tree(struct btree *b) | 650 | static void bch_bset_build_unwritten_tree(struct btree *b) |
542 | { | 651 | { |
543 | struct bset_tree *t = b->sets + b->nsets; | 652 | struct bset_tree *t = bset_tree_last(b); |
544 | 653 | ||
545 | bset_alloc_tree(b, t); | 654 | bset_alloc_tree(b, t); |
546 | 655 | ||
547 | if (t->tree != b->sets->tree + bset_tree_space(b)) { | 656 | if (t->tree != b->sets->tree + btree_keys_cachelines(b)) { |
548 | t->prev[0] = bkey_to_cacheline_offset(t->data->start); | 657 | t->prev[0] = bkey_to_cacheline_offset(t->data->start); |
549 | t->size = 1; | 658 | t->size = 1; |
550 | } | 659 | } |
551 | } | 660 | } |
552 | 661 | ||
662 | void bch_bset_init_next(struct btree *b, struct bset *i, uint64_t magic) | ||
663 | { | ||
664 | if (i != b->sets->data) { | ||
665 | b->sets[++b->nsets].data = i; | ||
666 | i->seq = b->sets->data->seq; | ||
667 | } else | ||
668 | get_random_bytes(&i->seq, sizeof(uint64_t)); | ||
669 | |||
670 | i->magic = magic; | ||
671 | i->version = 0; | ||
672 | i->keys = 0; | ||
673 | |||
674 | bch_bset_build_unwritten_tree(b); | ||
675 | } | ||
676 | |||
553 | static void bset_build_written_tree(struct btree *b) | 677 | static void bset_build_written_tree(struct btree *b) |
554 | { | 678 | { |
555 | struct bset_tree *t = b->sets + b->nsets; | 679 | struct bset_tree *t = bset_tree_last(b); |
556 | struct bkey *k = t->data->start; | 680 | struct bkey *k = t->data->start; |
557 | unsigned j, cacheline = 1; | 681 | unsigned j, cacheline = 1; |
558 | 682 | ||
@@ -560,7 +684,7 @@ static void bset_build_written_tree(struct btree *b) | |||
560 | 684 | ||
561 | t->size = min_t(unsigned, | 685 | t->size = min_t(unsigned, |
562 | bkey_to_cacheline(t, bset_bkey_last(t->data)), | 686 | bkey_to_cacheline(t, bset_bkey_last(t->data)), |
563 | b->sets->tree + bset_tree_space(b) - t->tree); | 687 | b->sets->tree + btree_keys_cachelines(b) - t->tree); |
564 | 688 | ||
565 | if (t->size < 2) { | 689 | if (t->size < 2) { |
566 | t->size = 0; | 690 | t->size = 0; |
@@ -599,7 +723,7 @@ void bch_bset_fix_invalidated_key(struct btree *b, struct bkey *k) | |||
599 | struct bset_tree *t; | 723 | struct bset_tree *t; |
600 | unsigned inorder, j = 1; | 724 | unsigned inorder, j = 1; |
601 | 725 | ||
602 | for (t = b->sets; t <= &b->sets[b->nsets]; t++) | 726 | for (t = b->sets; t <= bset_tree_last(b); t++) |
603 | if (k < bset_bkey_last(t->data)) | 727 | if (k < bset_bkey_last(t->data)) |
604 | goto found_set; | 728 | goto found_set; |
605 | 729 | ||
@@ -639,9 +763,10 @@ fix_right: do { | |||
639 | } while (j < t->size); | 763 | } while (j < t->size); |
640 | } | 764 | } |
641 | 765 | ||
642 | void bch_bset_fix_lookup_table(struct btree *b, struct bkey *k) | 766 | static void bch_bset_fix_lookup_table(struct btree *b, |
767 | struct bset_tree *t, | ||
768 | struct bkey *k) | ||
643 | { | 769 | { |
644 | struct bset_tree *t = &b->sets[b->nsets]; | ||
645 | unsigned shift = bkey_u64s(k); | 770 | unsigned shift = bkey_u64s(k); |
646 | unsigned j = bkey_to_cacheline(t, k); | 771 | unsigned j = bkey_to_cacheline(t, k); |
647 | 772 | ||
@@ -673,7 +798,7 @@ void bch_bset_fix_lookup_table(struct btree *b, struct bkey *k) | |||
673 | } | 798 | } |
674 | } | 799 | } |
675 | 800 | ||
676 | if (t->size == b->sets->tree + bset_tree_space(b) - t->tree) | 801 | if (t->size == b->sets->tree + btree_keys_cachelines(b) - t->tree) |
677 | return; | 802 | return; |
678 | 803 | ||
679 | /* Possibly add a new entry to the end of the lookup table */ | 804 | /* Possibly add a new entry to the end of the lookup table */ |
@@ -687,21 +812,23 @@ void bch_bset_fix_lookup_table(struct btree *b, struct bkey *k) | |||
687 | } | 812 | } |
688 | } | 813 | } |
689 | 814 | ||
690 | void bch_bset_init_next(struct btree *b) | 815 | void bch_bset_insert(struct btree *b, struct bkey *where, |
816 | struct bkey *insert) | ||
691 | { | 817 | { |
692 | struct bset *i = write_block(b); | 818 | struct bset_tree *t = bset_tree_last(b); |
693 | 819 | ||
694 | if (i != b->sets[0].data) { | 820 | BUG_ON(t->data != write_block(b)); |
695 | b->sets[++b->nsets].data = i; | 821 | BUG_ON(bset_byte_offset(b, t->data) + |
696 | i->seq = b->sets[0].data->seq; | 822 | __set_bytes(t->data, t->data->keys + bkey_u64s(insert)) > |
697 | } else | 823 | PAGE_SIZE << b->page_order); |
698 | get_random_bytes(&i->seq, sizeof(uint64_t)); | ||
699 | 824 | ||
700 | i->magic = bset_magic(&b->c->sb); | 825 | memmove((uint64_t *) where + bkey_u64s(insert), |
701 | i->version = 0; | 826 | where, |
702 | i->keys = 0; | 827 | (void *) bset_bkey_last(t->data) - (void *) where); |
703 | 828 | ||
704 | bset_build_unwritten_tree(b); | 829 | t->data->keys += bkey_u64s(insert); |
830 | bkey_copy(where, insert); | ||
831 | bch_bset_fix_lookup_table(b, t, where); | ||
705 | } | 832 | } |
706 | 833 | ||
707 | struct bset_search_iter { | 834 | struct bset_search_iter { |
@@ -1154,9 +1281,8 @@ void bch_btree_sort_partial(struct btree *b, unsigned start, | |||
1154 | 1281 | ||
1155 | __bch_btree_iter_init(b, &iter, NULL, &b->sets[start]); | 1282 | __bch_btree_iter_init(b, &iter, NULL, &b->sets[start]); |
1156 | 1283 | ||
1157 | BUG_ON(b->sets[b->nsets].data == write_block(b) && | 1284 | BUG_ON(!bset_written(b, bset_tree_last(b)) && |
1158 | (b->sets[b->nsets].size || b->nsets)); | 1285 | (bset_tree_last(b)->size || b->nsets)); |
1159 | |||
1160 | 1286 | ||
1161 | if (start) { | 1287 | if (start) { |
1162 | unsigned i; | 1288 | unsigned i; |
diff --git a/drivers/md/bcache/bset.h b/drivers/md/bcache/bset.h index 4f60c21c7a38..ab31f3fc8d43 100644 --- a/drivers/md/bcache/bset.h +++ b/drivers/md/bcache/bset.h | |||
@@ -144,22 +144,11 @@ | |||
144 | * first key in that range of bytes again. | 144 | * first key in that range of bytes again. |
145 | */ | 145 | */ |
146 | 146 | ||
147 | struct cache_set; | 147 | struct btree; |
148 | 148 | struct bkey_float; | |
149 | /* Btree key comparison/iteration */ | ||
150 | 149 | ||
151 | #define MAX_BSETS 4U | 150 | #define MAX_BSETS 4U |
152 | 151 | ||
153 | struct btree_iter { | ||
154 | size_t size, used; | ||
155 | #ifdef CONFIG_BCACHE_DEBUG | ||
156 | struct btree *b; | ||
157 | #endif | ||
158 | struct btree_iter_set { | ||
159 | struct bkey *k, *end; | ||
160 | } data[MAX_BSETS]; | ||
161 | }; | ||
162 | |||
163 | struct bset_tree { | 152 | struct bset_tree { |
164 | /* | 153 | /* |
165 | * We construct a binary tree in an array as if the array | 154 | * We construct a binary tree in an array as if the array |
@@ -169,14 +158,14 @@ struct bset_tree { | |||
169 | */ | 158 | */ |
170 | 159 | ||
171 | /* size of the binary tree and prev array */ | 160 | /* size of the binary tree and prev array */ |
172 | unsigned size; | 161 | unsigned size; |
173 | 162 | ||
174 | /* function of size - precalculated for to_inorder() */ | 163 | /* function of size - precalculated for to_inorder() */ |
175 | unsigned extra; | 164 | unsigned extra; |
176 | 165 | ||
177 | /* copy of the last key in the set */ | 166 | /* copy of the last key in the set */ |
178 | struct bkey end; | 167 | struct bkey end; |
179 | struct bkey_float *tree; | 168 | struct bkey_float *tree; |
180 | 169 | ||
181 | /* | 170 | /* |
182 | * The nodes in the bset tree point to specific keys - this | 171 | * The nodes in the bset tree point to specific keys - this |
@@ -186,12 +175,61 @@ struct bset_tree { | |||
186 | * to keep bkey_float to 4 bytes and prev isn't used in the fast | 175 | * to keep bkey_float to 4 bytes and prev isn't used in the fast |
187 | * path. | 176 | * path. |
188 | */ | 177 | */ |
189 | uint8_t *prev; | 178 | uint8_t *prev; |
190 | 179 | ||
191 | /* The actual btree node, with pointers to each sorted set */ | 180 | /* The actual btree node, with pointers to each sorted set */ |
192 | struct bset *data; | 181 | struct bset *data; |
193 | }; | 182 | }; |
194 | 183 | ||
184 | #define __set_bytes(i, k) (sizeof(*(i)) + (k) * sizeof(uint64_t)) | ||
185 | #define set_bytes(i) __set_bytes(i, i->keys) | ||
186 | |||
187 | #define __set_blocks(i, k, block_bytes) \ | ||
188 | DIV_ROUND_UP(__set_bytes(i, k), block_bytes) | ||
189 | #define set_blocks(i, block_bytes) \ | ||
190 | __set_blocks(i, (i)->keys, block_bytes) | ||
191 | |||
192 | void bch_btree_keys_free(struct btree *); | ||
193 | int bch_btree_keys_alloc(struct btree *, unsigned, gfp_t); | ||
194 | |||
195 | void bch_bset_fix_invalidated_key(struct btree *, struct bkey *); | ||
196 | void bch_bset_init_next(struct btree *, struct bset *, uint64_t); | ||
197 | void bch_bset_insert(struct btree *, struct bkey *, struct bkey *); | ||
198 | |||
199 | /* Btree key iteration */ | ||
200 | |||
201 | struct btree_iter { | ||
202 | size_t size, used; | ||
203 | #ifdef CONFIG_BCACHE_DEBUG | ||
204 | struct btree *b; | ||
205 | #endif | ||
206 | struct btree_iter_set { | ||
207 | struct bkey *k, *end; | ||
208 | } data[MAX_BSETS]; | ||
209 | }; | ||
210 | |||
211 | typedef bool (*ptr_filter_fn)(struct btree *, const struct bkey *); | ||
212 | |||
213 | struct bkey *bch_btree_iter_next(struct btree_iter *); | ||
214 | struct bkey *bch_btree_iter_next_filter(struct btree_iter *, | ||
215 | struct btree *, ptr_filter_fn); | ||
216 | |||
217 | void bch_btree_iter_push(struct btree_iter *, struct bkey *, struct bkey *); | ||
218 | struct bkey *bch_btree_iter_init(struct btree *, struct btree_iter *, | ||
219 | struct bkey *); | ||
220 | |||
221 | struct bkey *__bch_bset_search(struct btree *, struct bset_tree *, | ||
222 | const struct bkey *); | ||
223 | |||
224 | /* | ||
225 | * Returns the first key that is strictly greater than search | ||
226 | */ | ||
227 | static inline struct bkey *bch_bset_search(struct btree *b, struct bset_tree *t, | ||
228 | const struct bkey *search) | ||
229 | { | ||
230 | return search ? __bch_bset_search(b, t, search) : t->data->start; | ||
231 | } | ||
232 | |||
195 | /* Sorting */ | 233 | /* Sorting */ |
196 | 234 | ||
197 | struct bset_sort_state { | 235 | struct bset_sort_state { |
@@ -219,6 +257,60 @@ static inline void bch_btree_sort(struct btree *b, | |||
219 | bch_btree_sort_partial(b, 0, state); | 257 | bch_btree_sort_partial(b, 0, state); |
220 | } | 258 | } |
221 | 259 | ||
260 | /* Bkey utility code */ | ||
261 | |||
262 | #define bset_bkey_last(i) bkey_idx((struct bkey *) (i)->d, (i)->keys) | ||
263 | |||
264 | static inline struct bkey *bset_bkey_idx(struct bset *i, unsigned idx) | ||
265 | { | ||
266 | return bkey_idx(i->start, idx); | ||
267 | } | ||
268 | |||
269 | static inline void bkey_init(struct bkey *k) | ||
270 | { | ||
271 | *k = ZERO_KEY; | ||
272 | } | ||
273 | |||
274 | static __always_inline int64_t bkey_cmp(const struct bkey *l, | ||
275 | const struct bkey *r) | ||
276 | { | ||
277 | return unlikely(KEY_INODE(l) != KEY_INODE(r)) | ||
278 | ? (int64_t) KEY_INODE(l) - (int64_t) KEY_INODE(r) | ||
279 | : (int64_t) KEY_OFFSET(l) - (int64_t) KEY_OFFSET(r); | ||
280 | } | ||
281 | |||
282 | void bch_bkey_copy_single_ptr(struct bkey *, const struct bkey *, | ||
283 | unsigned); | ||
284 | bool __bch_cut_front(const struct bkey *, struct bkey *); | ||
285 | bool __bch_cut_back(const struct bkey *, struct bkey *); | ||
286 | |||
287 | static inline bool bch_cut_front(const struct bkey *where, struct bkey *k) | ||
288 | { | ||
289 | BUG_ON(bkey_cmp(where, k) > 0); | ||
290 | return __bch_cut_front(where, k); | ||
291 | } | ||
292 | |||
293 | static inline bool bch_cut_back(const struct bkey *where, struct bkey *k) | ||
294 | { | ||
295 | BUG_ON(bkey_cmp(where, &START_KEY(k)) < 0); | ||
296 | return __bch_cut_back(where, k); | ||
297 | } | ||
298 | |||
299 | #define PRECEDING_KEY(_k) \ | ||
300 | ({ \ | ||
301 | struct bkey *_ret = NULL; \ | ||
302 | \ | ||
303 | if (KEY_INODE(_k) || KEY_OFFSET(_k)) { \ | ||
304 | _ret = &KEY(KEY_INODE(_k), KEY_OFFSET(_k), 0); \ | ||
305 | \ | ||
306 | if (!_ret->low) \ | ||
307 | _ret->high--; \ | ||
308 | _ret->low--; \ | ||
309 | } \ | ||
310 | \ | ||
311 | _ret; \ | ||
312 | }) | ||
313 | |||
222 | /* Keylists */ | 314 | /* Keylists */ |
223 | 315 | ||
224 | struct keylist { | 316 | struct keylist { |
@@ -282,126 +374,15 @@ struct bkey *bch_keylist_pop(struct keylist *); | |||
282 | void bch_keylist_pop_front(struct keylist *); | 374 | void bch_keylist_pop_front(struct keylist *); |
283 | int __bch_keylist_realloc(struct keylist *, unsigned); | 375 | int __bch_keylist_realloc(struct keylist *, unsigned); |
284 | 376 | ||
285 | /* Bkey utility code */ | 377 | struct cache_set; |
286 | |||
287 | #define bset_bkey_last(i) bkey_idx((struct bkey *) (i)->d, (i)->keys) | ||
288 | |||
289 | static inline struct bkey *bset_bkey_idx(struct bset *i, unsigned idx) | ||
290 | { | ||
291 | return bkey_idx(i->start, idx); | ||
292 | } | ||
293 | |||
294 | static inline void bkey_init(struct bkey *k) | ||
295 | { | ||
296 | *k = ZERO_KEY; | ||
297 | } | ||
298 | |||
299 | static __always_inline int64_t bkey_cmp(const struct bkey *l, | ||
300 | const struct bkey *r) | ||
301 | { | ||
302 | return unlikely(KEY_INODE(l) != KEY_INODE(r)) | ||
303 | ? (int64_t) KEY_INODE(l) - (int64_t) KEY_INODE(r) | ||
304 | : (int64_t) KEY_OFFSET(l) - (int64_t) KEY_OFFSET(r); | ||
305 | } | ||
306 | |||
307 | void bch_bkey_copy_single_ptr(struct bkey *, const struct bkey *, | ||
308 | unsigned); | ||
309 | bool __bch_cut_front(const struct bkey *, struct bkey *); | ||
310 | bool __bch_cut_back(const struct bkey *, struct bkey *); | ||
311 | |||
312 | static inline bool bch_cut_front(const struct bkey *where, struct bkey *k) | ||
313 | { | ||
314 | BUG_ON(bkey_cmp(where, k) > 0); | ||
315 | return __bch_cut_front(where, k); | ||
316 | } | ||
317 | |||
318 | static inline bool bch_cut_back(const struct bkey *where, struct bkey *k) | ||
319 | { | ||
320 | BUG_ON(bkey_cmp(where, &START_KEY(k)) < 0); | ||
321 | return __bch_cut_back(where, k); | ||
322 | } | ||
323 | |||
324 | const char *bch_ptr_status(struct cache_set *, const struct bkey *); | 378 | const char *bch_ptr_status(struct cache_set *, const struct bkey *); |
325 | bool bch_btree_ptr_invalid(struct cache_set *, const struct bkey *); | 379 | bool bch_btree_ptr_invalid(struct cache_set *, const struct bkey *); |
326 | bool bch_extent_ptr_invalid(struct cache_set *, const struct bkey *); | 380 | bool bch_extent_ptr_invalid(struct cache_set *, const struct bkey *); |
381 | bool bch_btree_ptr_bad(struct btree *, const struct bkey *); | ||
382 | bool bch_extent_ptr_bad(struct btree *, const struct bkey *); | ||
327 | 383 | ||
328 | bool bch_ptr_bad(struct btree *, const struct bkey *); | 384 | bool bch_ptr_bad(struct btree *, const struct bkey *); |
329 | 385 | ||
330 | typedef bool (*ptr_filter_fn)(struct btree *, const struct bkey *); | ||
331 | |||
332 | struct bkey *bch_btree_iter_next(struct btree_iter *); | ||
333 | struct bkey *bch_btree_iter_next_filter(struct btree_iter *, | ||
334 | struct btree *, ptr_filter_fn); | ||
335 | |||
336 | void bch_btree_iter_push(struct btree_iter *, struct bkey *, struct bkey *); | ||
337 | struct bkey *bch_btree_iter_init(struct btree *, struct btree_iter *, | ||
338 | struct bkey *); | ||
339 | |||
340 | /* 32 bits total: */ | ||
341 | #define BKEY_MID_BITS 3 | ||
342 | #define BKEY_EXPONENT_BITS 7 | ||
343 | #define BKEY_MANTISSA_BITS 22 | ||
344 | #define BKEY_MANTISSA_MASK ((1 << BKEY_MANTISSA_BITS) - 1) | ||
345 | |||
346 | struct bkey_float { | ||
347 | unsigned exponent:BKEY_EXPONENT_BITS; | ||
348 | unsigned m:BKEY_MID_BITS; | ||
349 | unsigned mantissa:BKEY_MANTISSA_BITS; | ||
350 | } __packed; | ||
351 | |||
352 | /* | ||
353 | * BSET_CACHELINE was originally intended to match the hardware cacheline size - | ||
354 | * it used to be 64, but I realized the lookup code would touch slightly less | ||
355 | * memory if it was 128. | ||
356 | * | ||
357 | * It definites the number of bytes (in struct bset) per struct bkey_float in | ||
358 | * the auxiliar search tree - when we're done searching the bset_float tree we | ||
359 | * have this many bytes left that we do a linear search over. | ||
360 | * | ||
361 | * Since (after level 5) every level of the bset_tree is on a new cacheline, | ||
362 | * we're touching one fewer cacheline in the bset tree in exchange for one more | ||
363 | * cacheline in the linear search - but the linear search might stop before it | ||
364 | * gets to the second cacheline. | ||
365 | */ | ||
366 | |||
367 | #define BSET_CACHELINE 128 | ||
368 | #define bset_tree_space(b) (btree_data_space(b) / BSET_CACHELINE) | ||
369 | |||
370 | #define bset_tree_bytes(b) (bset_tree_space(b) * sizeof(struct bkey_float)) | ||
371 | #define bset_prev_bytes(b) (bset_tree_space(b) * sizeof(uint8_t)) | ||
372 | |||
373 | void bch_bset_init_next(struct btree *); | ||
374 | |||
375 | void bch_bset_fix_invalidated_key(struct btree *, struct bkey *); | ||
376 | void bch_bset_fix_lookup_table(struct btree *, struct bkey *); | ||
377 | |||
378 | struct bkey *__bch_bset_search(struct btree *, struct bset_tree *, | ||
379 | const struct bkey *); | ||
380 | |||
381 | /* | ||
382 | * Returns the first key that is strictly greater than search | ||
383 | */ | ||
384 | static inline struct bkey *bch_bset_search(struct btree *b, struct bset_tree *t, | ||
385 | const struct bkey *search) | ||
386 | { | ||
387 | return search ? __bch_bset_search(b, t, search) : t->data->start; | ||
388 | } | ||
389 | |||
390 | #define PRECEDING_KEY(_k) \ | ||
391 | ({ \ | ||
392 | struct bkey *_ret = NULL; \ | ||
393 | \ | ||
394 | if (KEY_INODE(_k) || KEY_OFFSET(_k)) { \ | ||
395 | _ret = &KEY(KEY_INODE(_k), KEY_OFFSET(_k), 0); \ | ||
396 | \ | ||
397 | if (!_ret->low) \ | ||
398 | _ret->high--; \ | ||
399 | _ret->low--; \ | ||
400 | } \ | ||
401 | \ | ||
402 | _ret; \ | ||
403 | }) | ||
404 | |||
405 | bool bch_bkey_try_merge(struct btree *, struct bkey *, struct bkey *); | 386 | bool bch_bkey_try_merge(struct btree *, struct bkey *, struct bkey *); |
406 | 387 | ||
407 | int bch_bset_print_stats(struct cache_set *, char *); | 388 | int bch_bset_print_stats(struct cache_set *, char *); |
diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c index 78ba0b67ac16..89252e7f2879 100644 --- a/drivers/md/bcache/btree.c +++ b/drivers/md/bcache/btree.c | |||
@@ -110,7 +110,7 @@ static inline bool should_split(struct btree *b) | |||
110 | { | 110 | { |
111 | struct bset *i = write_block(b); | 111 | struct bset *i = write_block(b); |
112 | return b->written >= btree_blocks(b) || | 112 | return b->written >= btree_blocks(b) || |
113 | (b->written + __set_blocks(i, i->keys + 15, b->c) | 113 | (b->written + __set_blocks(i, i->keys + 15, block_bytes(b->c)) |
114 | > btree_blocks(b)); | 114 | > btree_blocks(b)); |
115 | } | 115 | } |
116 | 116 | ||
@@ -206,7 +206,7 @@ static uint64_t btree_csum_set(struct btree *b, struct bset *i) | |||
206 | void bch_btree_node_read_done(struct btree *b) | 206 | void bch_btree_node_read_done(struct btree *b) |
207 | { | 207 | { |
208 | const char *err = "bad btree header"; | 208 | const char *err = "bad btree header"; |
209 | struct bset *i = b->sets[0].data; | 209 | struct bset *i = btree_bset_first(b); |
210 | struct btree_iter *iter; | 210 | struct btree_iter *iter; |
211 | 211 | ||
212 | iter = mempool_alloc(b->c->fill_iter, GFP_NOWAIT); | 212 | iter = mempool_alloc(b->c->fill_iter, GFP_NOWAIT); |
@@ -228,7 +228,8 @@ void bch_btree_node_read_done(struct btree *b) | |||
228 | goto err; | 228 | goto err; |
229 | 229 | ||
230 | err = "bad btree header"; | 230 | err = "bad btree header"; |
231 | if (b->written + set_blocks(i, b->c) > btree_blocks(b)) | 231 | if (b->written + set_blocks(i, block_bytes(b->c)) > |
232 | btree_blocks(b)) | ||
232 | goto err; | 233 | goto err; |
233 | 234 | ||
234 | err = "bad magic"; | 235 | err = "bad magic"; |
@@ -253,7 +254,7 @@ void bch_btree_node_read_done(struct btree *b) | |||
253 | 254 | ||
254 | bch_btree_iter_push(iter, i->start, bset_bkey_last(i)); | 255 | bch_btree_iter_push(iter, i->start, bset_bkey_last(i)); |
255 | 256 | ||
256 | b->written += set_blocks(i, b->c); | 257 | b->written += set_blocks(i, block_bytes(b->c)); |
257 | } | 258 | } |
258 | 259 | ||
259 | err = "corrupted btree"; | 260 | err = "corrupted btree"; |
@@ -272,7 +273,7 @@ void bch_btree_node_read_done(struct btree *b) | |||
272 | goto err; | 273 | goto err; |
273 | 274 | ||
274 | if (b->written < btree_blocks(b)) | 275 | if (b->written < btree_blocks(b)) |
275 | bch_bset_init_next(b); | 276 | bch_bset_init_next(b, write_block(b), bset_magic(&b->c->sb)); |
276 | out: | 277 | out: |
277 | mempool_free(iter, b->c->fill_iter); | 278 | mempool_free(iter, b->c->fill_iter); |
278 | return; | 279 | return; |
@@ -393,7 +394,7 @@ static void btree_node_write_endio(struct bio *bio, int error) | |||
393 | static void do_btree_node_write(struct btree *b) | 394 | static void do_btree_node_write(struct btree *b) |
394 | { | 395 | { |
395 | struct closure *cl = &b->io; | 396 | struct closure *cl = &b->io; |
396 | struct bset *i = b->sets[b->nsets].data; | 397 | struct bset *i = btree_bset_last(b); |
397 | BKEY_PADDED(key) k; | 398 | BKEY_PADDED(key) k; |
398 | 399 | ||
399 | i->version = BCACHE_BSET_VERSION; | 400 | i->version = BCACHE_BSET_VERSION; |
@@ -405,7 +406,7 @@ static void do_btree_node_write(struct btree *b) | |||
405 | b->bio->bi_end_io = btree_node_write_endio; | 406 | b->bio->bi_end_io = btree_node_write_endio; |
406 | b->bio->bi_private = cl; | 407 | b->bio->bi_private = cl; |
407 | b->bio->bi_rw = REQ_META|WRITE_SYNC|REQ_FUA; | 408 | b->bio->bi_rw = REQ_META|WRITE_SYNC|REQ_FUA; |
408 | b->bio->bi_iter.bi_size = set_blocks(i, b->c) * block_bytes(b->c); | 409 | b->bio->bi_iter.bi_size = roundup(set_bytes(i), block_bytes(b->c)); |
409 | bch_bio_map(b->bio, i); | 410 | bch_bio_map(b->bio, i); |
410 | 411 | ||
411 | /* | 412 | /* |
@@ -424,7 +425,8 @@ static void do_btree_node_write(struct btree *b) | |||
424 | */ | 425 | */ |
425 | 426 | ||
426 | bkey_copy(&k.key, &b->key); | 427 | bkey_copy(&k.key, &b->key); |
427 | SET_PTR_OFFSET(&k.key, 0, PTR_OFFSET(&k.key, 0) + bset_offset(b, i)); | 428 | SET_PTR_OFFSET(&k.key, 0, PTR_OFFSET(&k.key, 0) + |
429 | bset_sector_offset(b, i)); | ||
428 | 430 | ||
429 | if (!bio_alloc_pages(b->bio, GFP_NOIO)) { | 431 | if (!bio_alloc_pages(b->bio, GFP_NOIO)) { |
430 | int j; | 432 | int j; |
@@ -451,14 +453,14 @@ static void do_btree_node_write(struct btree *b) | |||
451 | 453 | ||
452 | void bch_btree_node_write(struct btree *b, struct closure *parent) | 454 | void bch_btree_node_write(struct btree *b, struct closure *parent) |
453 | { | 455 | { |
454 | struct bset *i = b->sets[b->nsets].data; | 456 | struct bset *i = btree_bset_last(b); |
455 | 457 | ||
456 | trace_bcache_btree_write(b); | 458 | trace_bcache_btree_write(b); |
457 | 459 | ||
458 | BUG_ON(current->bio_list); | 460 | BUG_ON(current->bio_list); |
459 | BUG_ON(b->written >= btree_blocks(b)); | 461 | BUG_ON(b->written >= btree_blocks(b)); |
460 | BUG_ON(b->written && !i->keys); | 462 | BUG_ON(b->written && !i->keys); |
461 | BUG_ON(b->sets->data->seq != i->seq); | 463 | BUG_ON(btree_bset_first(b)->seq != i->seq); |
462 | bch_check_keys(b, "writing"); | 464 | bch_check_keys(b, "writing"); |
463 | 465 | ||
464 | cancel_delayed_work(&b->work); | 466 | cancel_delayed_work(&b->work); |
@@ -472,8 +474,8 @@ void bch_btree_node_write(struct btree *b, struct closure *parent) | |||
472 | 474 | ||
473 | do_btree_node_write(b); | 475 | do_btree_node_write(b); |
474 | 476 | ||
475 | b->written += set_blocks(i, b->c); | 477 | b->written += set_blocks(i, block_bytes(b->c)); |
476 | atomic_long_add(set_blocks(i, b->c) * b->c->sb.block_size, | 478 | atomic_long_add(set_blocks(i, block_bytes(b->c)) * b->c->sb.block_size, |
477 | &PTR_CACHE(b->c, &b->key, 0)->btree_sectors_written); | 479 | &PTR_CACHE(b->c, &b->key, 0)->btree_sectors_written); |
478 | 480 | ||
479 | /* If not a leaf node, always sort */ | 481 | /* If not a leaf node, always sort */ |
@@ -490,7 +492,7 @@ void bch_btree_node_write(struct btree *b, struct closure *parent) | |||
490 | bch_btree_verify(b); | 492 | bch_btree_verify(b); |
491 | 493 | ||
492 | if (b->written < btree_blocks(b)) | 494 | if (b->written < btree_blocks(b)) |
493 | bch_bset_init_next(b); | 495 | bch_bset_init_next(b, write_block(b), bset_magic(&b->c->sb)); |
494 | } | 496 | } |
495 | 497 | ||
496 | static void bch_btree_node_write_sync(struct btree *b) | 498 | static void bch_btree_node_write_sync(struct btree *b) |
@@ -515,7 +517,7 @@ static void btree_node_write_work(struct work_struct *w) | |||
515 | 517 | ||
516 | static void bch_btree_leaf_dirty(struct btree *b, atomic_t *journal_ref) | 518 | static void bch_btree_leaf_dirty(struct btree *b, atomic_t *journal_ref) |
517 | { | 519 | { |
518 | struct bset *i = b->sets[b->nsets].data; | 520 | struct bset *i = btree_bset_last(b); |
519 | struct btree_write *w = btree_current_write(b); | 521 | struct btree_write *w = btree_current_write(b); |
520 | 522 | ||
521 | BUG_ON(!b->written); | 523 | BUG_ON(!b->written); |
@@ -575,29 +577,12 @@ static void mca_reinit(struct btree *b) | |||
575 | 577 | ||
576 | static void mca_data_free(struct btree *b) | 578 | static void mca_data_free(struct btree *b) |
577 | { | 579 | { |
578 | struct bset_tree *t = b->sets; | ||
579 | |||
580 | BUG_ON(b->io_mutex.count != 1); | 580 | BUG_ON(b->io_mutex.count != 1); |
581 | 581 | ||
582 | if (bset_prev_bytes(b) < PAGE_SIZE) | 582 | bch_btree_keys_free(b); |
583 | kfree(t->prev); | ||
584 | else | ||
585 | free_pages((unsigned long) t->prev, | ||
586 | get_order(bset_prev_bytes(b))); | ||
587 | |||
588 | if (bset_tree_bytes(b) < PAGE_SIZE) | ||
589 | kfree(t->tree); | ||
590 | else | ||
591 | free_pages((unsigned long) t->tree, | ||
592 | get_order(bset_tree_bytes(b))); | ||
593 | |||
594 | free_pages((unsigned long) t->data, b->page_order); | ||
595 | 583 | ||
596 | t->prev = NULL; | ||
597 | t->tree = NULL; | ||
598 | t->data = NULL; | ||
599 | list_move(&b->list, &b->c->btree_cache_freed); | ||
600 | b->c->bucket_cache_used--; | 584 | b->c->bucket_cache_used--; |
585 | list_move(&b->list, &b->c->btree_cache_freed); | ||
601 | } | 586 | } |
602 | 587 | ||
603 | static void mca_bucket_free(struct btree *b) | 588 | static void mca_bucket_free(struct btree *b) |
@@ -616,34 +601,16 @@ static unsigned btree_order(struct bkey *k) | |||
616 | 601 | ||
617 | static void mca_data_alloc(struct btree *b, struct bkey *k, gfp_t gfp) | 602 | static void mca_data_alloc(struct btree *b, struct bkey *k, gfp_t gfp) |
618 | { | 603 | { |
619 | struct bset_tree *t = b->sets; | 604 | if (!bch_btree_keys_alloc(b, |
620 | BUG_ON(t->data); | 605 | max_t(unsigned, |
621 | 606 | ilog2(b->c->btree_pages), | |
622 | b->page_order = max_t(unsigned, | 607 | btree_order(k)), |
623 | ilog2(b->c->btree_pages), | 608 | gfp)) { |
624 | btree_order(k)); | 609 | b->c->bucket_cache_used++; |
625 | 610 | list_move(&b->list, &b->c->btree_cache); | |
626 | t->data = (void *) __get_free_pages(gfp, b->page_order); | 611 | } else { |
627 | if (!t->data) | 612 | list_move(&b->list, &b->c->btree_cache_freed); |
628 | goto err; | 613 | } |
629 | |||
630 | t->tree = bset_tree_bytes(b) < PAGE_SIZE | ||
631 | ? kmalloc(bset_tree_bytes(b), gfp) | ||
632 | : (void *) __get_free_pages(gfp, get_order(bset_tree_bytes(b))); | ||
633 | if (!t->tree) | ||
634 | goto err; | ||
635 | |||
636 | t->prev = bset_prev_bytes(b) < PAGE_SIZE | ||
637 | ? kmalloc(bset_prev_bytes(b), gfp) | ||
638 | : (void *) __get_free_pages(gfp, get_order(bset_prev_bytes(b))); | ||
639 | if (!t->prev) | ||
640 | goto err; | ||
641 | |||
642 | list_move(&b->list, &b->c->btree_cache); | ||
643 | b->c->bucket_cache_used++; | ||
644 | return; | ||
645 | err: | ||
646 | mca_data_free(b); | ||
647 | } | 614 | } |
648 | 615 | ||
649 | static struct btree *mca_bucket_alloc(struct cache_set *c, | 616 | static struct btree *mca_bucket_alloc(struct cache_set *c, |
@@ -1111,7 +1078,7 @@ retry: | |||
1111 | } | 1078 | } |
1112 | 1079 | ||
1113 | b->accessed = 1; | 1080 | b->accessed = 1; |
1114 | bch_bset_init_next(b); | 1081 | bch_bset_init_next(b, b->sets->data, bset_magic(&b->c->sb)); |
1115 | 1082 | ||
1116 | mutex_unlock(&c->bucket_lock); | 1083 | mutex_unlock(&c->bucket_lock); |
1117 | 1084 | ||
@@ -1298,7 +1265,8 @@ static int btree_gc_coalesce(struct btree *b, struct btree_op *op, | |||
1298 | blocks = btree_default_blocks(b->c) * 2 / 3; | 1265 | blocks = btree_default_blocks(b->c) * 2 / 3; |
1299 | 1266 | ||
1300 | if (nodes < 2 || | 1267 | if (nodes < 2 || |
1301 | __set_blocks(b->sets[0].data, keys, b->c) > blocks * (nodes - 1)) | 1268 | __set_blocks(b->sets[0].data, keys, |
1269 | block_bytes(b->c)) > blocks * (nodes - 1)) | ||
1302 | return 0; | 1270 | return 0; |
1303 | 1271 | ||
1304 | for (i = 0; i < nodes; i++) { | 1272 | for (i = 0; i < nodes; i++) { |
@@ -1308,8 +1276,8 @@ static int btree_gc_coalesce(struct btree *b, struct btree_op *op, | |||
1308 | } | 1276 | } |
1309 | 1277 | ||
1310 | for (i = nodes - 1; i > 0; --i) { | 1278 | for (i = nodes - 1; i > 0; --i) { |
1311 | struct bset *n1 = new_nodes[i]->sets->data; | 1279 | struct bset *n1 = btree_bset_first(new_nodes[i]); |
1312 | struct bset *n2 = new_nodes[i - 1]->sets->data; | 1280 | struct bset *n2 = btree_bset_first(new_nodes[i - 1]); |
1313 | struct bkey *k, *last = NULL; | 1281 | struct bkey *k, *last = NULL; |
1314 | 1282 | ||
1315 | keys = 0; | 1283 | keys = 0; |
@@ -1319,7 +1287,8 @@ static int btree_gc_coalesce(struct btree *b, struct btree_op *op, | |||
1319 | k < bset_bkey_last(n2); | 1287 | k < bset_bkey_last(n2); |
1320 | k = bkey_next(k)) { | 1288 | k = bkey_next(k)) { |
1321 | if (__set_blocks(n1, n1->keys + keys + | 1289 | if (__set_blocks(n1, n1->keys + keys + |
1322 | bkey_u64s(k), b->c) > blocks) | 1290 | bkey_u64s(k), |
1291 | block_bytes(b->c)) > blocks) | ||
1323 | break; | 1292 | break; |
1324 | 1293 | ||
1325 | last = k; | 1294 | last = k; |
@@ -1335,7 +1304,8 @@ static int btree_gc_coalesce(struct btree *b, struct btree_op *op, | |||
1335 | * though) | 1304 | * though) |
1336 | */ | 1305 | */ |
1337 | if (__set_blocks(n1, n1->keys + n2->keys, | 1306 | if (__set_blocks(n1, n1->keys + n2->keys, |
1338 | b->c) > btree_blocks(new_nodes[i])) | 1307 | block_bytes(b->c)) > |
1308 | btree_blocks(new_nodes[i])) | ||
1339 | goto out_nocoalesce; | 1309 | goto out_nocoalesce; |
1340 | 1310 | ||
1341 | keys = n2->keys; | 1311 | keys = n2->keys; |
@@ -1343,8 +1313,8 @@ static int btree_gc_coalesce(struct btree *b, struct btree_op *op, | |||
1343 | last = &r->b->key; | 1313 | last = &r->b->key; |
1344 | } | 1314 | } |
1345 | 1315 | ||
1346 | BUG_ON(__set_blocks(n1, n1->keys + keys, | 1316 | BUG_ON(__set_blocks(n1, n1->keys + keys, block_bytes(b->c)) > |
1347 | b->c) > btree_blocks(new_nodes[i])); | 1317 | btree_blocks(new_nodes[i])); |
1348 | 1318 | ||
1349 | if (last) | 1319 | if (last) |
1350 | bkey_copy_key(&new_nodes[i]->key, last); | 1320 | bkey_copy_key(&new_nodes[i]->key, last); |
@@ -1380,7 +1350,7 @@ static int btree_gc_coalesce(struct btree *b, struct btree_op *op, | |||
1380 | } | 1350 | } |
1381 | 1351 | ||
1382 | /* We emptied out this node */ | 1352 | /* We emptied out this node */ |
1383 | BUG_ON(new_nodes[0]->sets->data->keys); | 1353 | BUG_ON(btree_bset_first(new_nodes[0])->keys); |
1384 | btree_node_free(new_nodes[0]); | 1354 | btree_node_free(new_nodes[0]); |
1385 | rw_unlock(true, new_nodes[0]); | 1355 | rw_unlock(true, new_nodes[0]); |
1386 | 1356 | ||
@@ -1831,19 +1801,6 @@ err: | |||
1831 | 1801 | ||
1832 | /* Btree insertion */ | 1802 | /* Btree insertion */ |
1833 | 1803 | ||
1834 | static void shift_keys(struct btree *b, struct bkey *where, struct bkey *insert) | ||
1835 | { | ||
1836 | struct bset *i = b->sets[b->nsets].data; | ||
1837 | |||
1838 | memmove((uint64_t *) where + bkey_u64s(insert), | ||
1839 | where, | ||
1840 | (void *) bset_bkey_last(i) - (void *) where); | ||
1841 | |||
1842 | i->keys += bkey_u64s(insert); | ||
1843 | bkey_copy(where, insert); | ||
1844 | bch_bset_fix_lookup_table(b, where); | ||
1845 | } | ||
1846 | |||
1847 | static bool fix_overlapping_extents(struct btree *b, struct bkey *insert, | 1804 | static bool fix_overlapping_extents(struct btree *b, struct bkey *insert, |
1848 | struct btree_iter *iter, | 1805 | struct btree_iter *iter, |
1849 | struct bkey *replace_key) | 1806 | struct bkey *replace_key) |
@@ -1944,13 +1901,13 @@ static bool fix_overlapping_extents(struct btree *b, struct bkey *insert, | |||
1944 | * depends on us inserting a new key for the top | 1901 | * depends on us inserting a new key for the top |
1945 | * here. | 1902 | * here. |
1946 | */ | 1903 | */ |
1947 | top = bch_bset_search(b, &b->sets[b->nsets], | 1904 | top = bch_bset_search(b, bset_tree_last(b), |
1948 | insert); | 1905 | insert); |
1949 | shift_keys(b, top, k); | 1906 | bch_bset_insert(b, top, k); |
1950 | } else { | 1907 | } else { |
1951 | BKEY_PADDED(key) temp; | 1908 | BKEY_PADDED(key) temp; |
1952 | bkey_copy(&temp.key, k); | 1909 | bkey_copy(&temp.key, k); |
1953 | shift_keys(b, k, &temp.key); | 1910 | bch_bset_insert(b, k, &temp.key); |
1954 | top = bkey_next(k); | 1911 | top = bkey_next(k); |
1955 | } | 1912 | } |
1956 | 1913 | ||
@@ -1999,7 +1956,7 @@ check_failed: | |||
1999 | static bool btree_insert_key(struct btree *b, struct btree_op *op, | 1956 | static bool btree_insert_key(struct btree *b, struct btree_op *op, |
2000 | struct bkey *k, struct bkey *replace_key) | 1957 | struct bkey *k, struct bkey *replace_key) |
2001 | { | 1958 | { |
2002 | struct bset *i = b->sets[b->nsets].data; | 1959 | struct bset *i = btree_bset_last(b); |
2003 | struct bkey *m, *prev; | 1960 | struct bkey *m, *prev; |
2004 | unsigned status = BTREE_INSERT_STATUS_INSERT; | 1961 | unsigned status = BTREE_INSERT_STATUS_INSERT; |
2005 | 1962 | ||
@@ -2051,10 +2008,10 @@ static bool btree_insert_key(struct btree *b, struct btree_op *op, | |||
2051 | goto copy; | 2008 | goto copy; |
2052 | } else { | 2009 | } else { |
2053 | BUG_ON(replace_key); | 2010 | BUG_ON(replace_key); |
2054 | m = bch_bset_search(b, &b->sets[b->nsets], k); | 2011 | m = bch_bset_search(b, bset_tree_last(b), k); |
2055 | } | 2012 | } |
2056 | 2013 | ||
2057 | insert: shift_keys(b, m, k); | 2014 | insert: bch_bset_insert(b, m, k); |
2058 | copy: bkey_copy(m, k); | 2015 | copy: bkey_copy(m, k); |
2059 | merged: | 2016 | merged: |
2060 | bch_check_keys(b, "%u for %s", status, | 2017 | bch_check_keys(b, "%u for %s", status, |
@@ -2079,8 +2036,9 @@ static bool bch_btree_insert_keys(struct btree *b, struct btree_op *op, | |||
2079 | struct bset *i = write_block(b); | 2036 | struct bset *i = write_block(b); |
2080 | struct bkey *k = insert_keys->keys; | 2037 | struct bkey *k = insert_keys->keys; |
2081 | 2038 | ||
2082 | if (b->written + __set_blocks(i, i->keys + bkey_u64s(k), b->c) | 2039 | if (b->written + |
2083 | > btree_blocks(b)) | 2040 | __set_blocks(i, i->keys + bkey_u64s(k), |
2041 | block_bytes(b->c)) > btree_blocks(b)) | ||
2084 | break; | 2042 | break; |
2085 | 2043 | ||
2086 | if (bkey_cmp(k, &b->key) <= 0) { | 2044 | if (bkey_cmp(k, &b->key) <= 0) { |
@@ -2130,12 +2088,13 @@ static int btree_split(struct btree *b, struct btree_op *op, | |||
2130 | if (IS_ERR(n1)) | 2088 | if (IS_ERR(n1)) |
2131 | goto err; | 2089 | goto err; |
2132 | 2090 | ||
2133 | split = set_blocks(n1->sets[0].data, n1->c) > (btree_blocks(b) * 4) / 5; | 2091 | split = set_blocks(btree_bset_first(n1), |
2092 | block_bytes(n1->c)) > (btree_blocks(b) * 4) / 5; | ||
2134 | 2093 | ||
2135 | if (split) { | 2094 | if (split) { |
2136 | unsigned keys = 0; | 2095 | unsigned keys = 0; |
2137 | 2096 | ||
2138 | trace_bcache_btree_node_split(b, n1->sets[0].data->keys); | 2097 | trace_bcache_btree_node_split(b, btree_bset_first(n1)->keys); |
2139 | 2098 | ||
2140 | n2 = bch_btree_node_alloc(b->c, b->level, true); | 2099 | n2 = bch_btree_node_alloc(b->c, b->level, true); |
2141 | if (IS_ERR(n2)) | 2100 | if (IS_ERR(n2)) |
@@ -2154,20 +2113,20 @@ static int btree_split(struct btree *b, struct btree_op *op, | |||
2154 | * search tree yet | 2113 | * search tree yet |
2155 | */ | 2114 | */ |
2156 | 2115 | ||
2157 | while (keys < (n1->sets[0].data->keys * 3) / 5) | 2116 | while (keys < (btree_bset_first(n1)->keys * 3) / 5) |
2158 | keys += bkey_u64s(bset_bkey_idx(n1->sets[0].data, | 2117 | keys += bkey_u64s(bset_bkey_idx(btree_bset_first(n1), |
2159 | keys)); | 2118 | keys)); |
2160 | 2119 | ||
2161 | bkey_copy_key(&n1->key, | 2120 | bkey_copy_key(&n1->key, |
2162 | bset_bkey_idx(n1->sets[0].data, keys)); | 2121 | bset_bkey_idx(btree_bset_first(n1), keys)); |
2163 | keys += bkey_u64s(bset_bkey_idx(n1->sets[0].data, keys)); | 2122 | keys += bkey_u64s(bset_bkey_idx(btree_bset_first(n1), keys)); |
2164 | 2123 | ||
2165 | n2->sets[0].data->keys = n1->sets[0].data->keys - keys; | 2124 | btree_bset_first(n2)->keys = btree_bset_first(n1)->keys - keys; |
2166 | n1->sets[0].data->keys = keys; | 2125 | btree_bset_first(n1)->keys = keys; |
2167 | 2126 | ||
2168 | memcpy(n2->sets[0].data->start, | 2127 | memcpy(btree_bset_first(n2)->start, |
2169 | bset_bkey_last(n1->sets[0].data), | 2128 | bset_bkey_last(btree_bset_first(n1)), |
2170 | n2->sets[0].data->keys * sizeof(uint64_t)); | 2129 | btree_bset_first(n2)->keys * sizeof(uint64_t)); |
2171 | 2130 | ||
2172 | bkey_copy_key(&n2->key, &b->key); | 2131 | bkey_copy_key(&n2->key, &b->key); |
2173 | 2132 | ||
@@ -2175,7 +2134,7 @@ static int btree_split(struct btree *b, struct btree_op *op, | |||
2175 | bch_btree_node_write(n2, &cl); | 2134 | bch_btree_node_write(n2, &cl); |
2176 | rw_unlock(true, n2); | 2135 | rw_unlock(true, n2); |
2177 | } else { | 2136 | } else { |
2178 | trace_bcache_btree_node_compact(b, n1->sets[0].data->keys); | 2137 | trace_bcache_btree_node_compact(b, btree_bset_first(n1)->keys); |
2179 | 2138 | ||
2180 | bch_btree_insert_keys(n1, op, insert_keys, replace_key); | 2139 | bch_btree_insert_keys(n1, op, insert_keys, replace_key); |
2181 | } | 2140 | } |
@@ -2256,7 +2215,7 @@ static int bch_btree_insert_node(struct btree *b, struct btree_op *op, | |||
2256 | -EINTR; | 2215 | -EINTR; |
2257 | } | 2216 | } |
2258 | } else { | 2217 | } else { |
2259 | BUG_ON(write_block(b) != b->sets[b->nsets].data); | 2218 | BUG_ON(write_block(b) != btree_bset_last(b)); |
2260 | 2219 | ||
2261 | if (bch_btree_insert_keys(b, op, insert_keys, replace_key)) { | 2220 | if (bch_btree_insert_keys(b, op, insert_keys, replace_key)) { |
2262 | if (!b->level) | 2221 | if (!b->level) |
diff --git a/drivers/md/bcache/btree.h b/drivers/md/bcache/btree.h index 2a5a8481f25e..b3541173ccf2 100644 --- a/drivers/md/bcache/btree.h +++ b/drivers/md/bcache/btree.h | |||
@@ -180,9 +180,9 @@ static inline struct btree_write *btree_prev_write(struct btree *b) | |||
180 | return b->writes + (btree_node_write_idx(b) ^ 1); | 180 | return b->writes + (btree_node_write_idx(b) ^ 1); |
181 | } | 181 | } |
182 | 182 | ||
183 | static inline unsigned bset_offset(struct btree *b, struct bset *i) | 183 | static inline struct bset_tree *bset_tree_last(struct btree *b) |
184 | { | 184 | { |
185 | return (((size_t) i) - ((size_t) b->sets->data)) >> 9; | 185 | return b->sets + b->nsets; |
186 | } | 186 | } |
187 | 187 | ||
188 | static inline struct bset *btree_bset_first(struct btree *b) | 188 | static inline struct bset *btree_bset_first(struct btree *b) |
@@ -190,6 +190,11 @@ static inline struct bset *btree_bset_first(struct btree *b) | |||
190 | return b->sets->data; | 190 | return b->sets->data; |
191 | } | 191 | } |
192 | 192 | ||
193 | static inline struct bset *btree_bset_last(struct btree *b) | ||
194 | { | ||
195 | return bset_tree_last(b)->data; | ||
196 | } | ||
197 | |||
193 | static inline unsigned bset_byte_offset(struct btree *b, struct bset *i) | 198 | static inline unsigned bset_byte_offset(struct btree *b, struct bset *i) |
194 | { | 199 | { |
195 | return ((size_t) i) - ((size_t) b->sets->data); | 200 | return ((size_t) i) - ((size_t) b->sets->data); |
diff --git a/drivers/md/bcache/debug.c b/drivers/md/bcache/debug.c index 955fa1d31774..5a78137c420d 100644 --- a/drivers/md/bcache/debug.c +++ b/drivers/md/bcache/debug.c | |||
@@ -123,7 +123,8 @@ static void bch_dump_bucket(struct btree *b) | |||
123 | for (i = (start); \ | 123 | for (i = (start); \ |
124 | (void *) i < (void *) (start) + (KEY_SIZE(&b->key) << 9) &&\ | 124 | (void *) i < (void *) (start) + (KEY_SIZE(&b->key) << 9) &&\ |
125 | i->seq == (start)->seq; \ | 125 | i->seq == (start)->seq; \ |
126 | i = (void *) i + set_blocks(i, b->c) * block_bytes(b->c)) | 126 | i = (void *) i + set_blocks(i, block_bytes(b->c)) * \ |
127 | block_bytes(b->c)) | ||
127 | 128 | ||
128 | void bch_btree_verify(struct btree *b) | 129 | void bch_btree_verify(struct btree *b) |
129 | { | 130 | { |
diff --git a/drivers/md/bcache/journal.c b/drivers/md/bcache/journal.c index 5e14e3325ec1..18039affc306 100644 --- a/drivers/md/bcache/journal.c +++ b/drivers/md/bcache/journal.c | |||
@@ -95,7 +95,7 @@ reread: left = ca->sb.bucket_size - offset; | |||
95 | return ret; | 95 | return ret; |
96 | } | 96 | } |
97 | 97 | ||
98 | blocks = set_blocks(j, ca->set); | 98 | blocks = set_blocks(j, block_bytes(ca->set)); |
99 | 99 | ||
100 | while (!list_empty(list)) { | 100 | while (!list_empty(list)) { |
101 | i = list_first_entry(list, | 101 | i = list_first_entry(list, |
@@ -579,7 +579,8 @@ static void journal_write_unlocked(struct closure *cl) | |||
579 | struct cache *ca; | 579 | struct cache *ca; |
580 | struct journal_write *w = c->journal.cur; | 580 | struct journal_write *w = c->journal.cur; |
581 | struct bkey *k = &c->journal.key; | 581 | struct bkey *k = &c->journal.key; |
582 | unsigned i, sectors = set_blocks(w->data, c) * c->sb.block_size; | 582 | unsigned i, sectors = set_blocks(w->data, block_bytes(c)) * |
583 | c->sb.block_size; | ||
583 | 584 | ||
584 | struct bio *bio; | 585 | struct bio *bio; |
585 | struct bio_list list; | 586 | struct bio_list list; |
@@ -595,7 +596,7 @@ static void journal_write_unlocked(struct closure *cl) | |||
595 | continue_at(cl, journal_write, system_wq); | 596 | continue_at(cl, journal_write, system_wq); |
596 | } | 597 | } |
597 | 598 | ||
598 | c->journal.blocks_free -= set_blocks(w->data, c); | 599 | c->journal.blocks_free -= set_blocks(w->data, block_bytes(c)); |
599 | 600 | ||
600 | w->data->btree_level = c->root->level; | 601 | w->data->btree_level = c->root->level; |
601 | 602 | ||
@@ -685,7 +686,7 @@ static struct journal_write *journal_wait_for_write(struct cache_set *c, | |||
685 | struct journal_write *w = c->journal.cur; | 686 | struct journal_write *w = c->journal.cur; |
686 | 687 | ||
687 | sectors = __set_blocks(w->data, w->data->keys + nkeys, | 688 | sectors = __set_blocks(w->data, w->data->keys + nkeys, |
688 | c) * c->sb.block_size; | 689 | block_bytes(c)) * c->sb.block_size; |
689 | 690 | ||
690 | if (sectors <= min_t(size_t, | 691 | if (sectors <= min_t(size_t, |
691 | c->journal.blocks_free * c->sb.block_size, | 692 | c->journal.blocks_free * c->sb.block_size, |
diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c index 1fc8165a5c01..12057a43e01f 100644 --- a/drivers/md/bcache/super.c +++ b/drivers/md/bcache/super.c | |||
@@ -1477,7 +1477,7 @@ struct cache_set *bch_cache_set_alloc(struct cache_sb *sb) | |||
1477 | c->block_bits = ilog2(sb->block_size); | 1477 | c->block_bits = ilog2(sb->block_size); |
1478 | c->nr_uuids = bucket_bytes(c) / sizeof(struct uuid_entry); | 1478 | c->nr_uuids = bucket_bytes(c) / sizeof(struct uuid_entry); |
1479 | 1479 | ||
1480 | c->btree_pages = c->sb.bucket_size / PAGE_SECTORS; | 1480 | c->btree_pages = bucket_pages(c); |
1481 | if (c->btree_pages > BTREE_MAX_PAGES) | 1481 | if (c->btree_pages > BTREE_MAX_PAGES) |
1482 | c->btree_pages = max_t(int, c->btree_pages / 4, | 1482 | c->btree_pages = max_t(int, c->btree_pages / 4, |
1483 | BTREE_MAX_PAGES); | 1483 | BTREE_MAX_PAGES); |