diff options
Diffstat (limited to 'drivers/md/bcache/btree.c')
-rw-r--r-- | drivers/md/bcache/btree.c | 676 |
1 files changed, 250 insertions, 426 deletions
diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c index 946ecd3b048b..98cc0a810a36 100644 --- a/drivers/md/bcache/btree.c +++ b/drivers/md/bcache/btree.c | |||
@@ -23,7 +23,7 @@ | |||
23 | #include "bcache.h" | 23 | #include "bcache.h" |
24 | #include "btree.h" | 24 | #include "btree.h" |
25 | #include "debug.h" | 25 | #include "debug.h" |
26 | #include "writeback.h" | 26 | #include "extents.h" |
27 | 27 | ||
28 | #include <linux/slab.h> | 28 | #include <linux/slab.h> |
29 | #include <linux/bitops.h> | 29 | #include <linux/bitops.h> |
@@ -89,13 +89,6 @@ | |||
89 | * Test module load/unload | 89 | * Test module load/unload |
90 | */ | 90 | */ |
91 | 91 | ||
92 | enum { | ||
93 | BTREE_INSERT_STATUS_INSERT, | ||
94 | BTREE_INSERT_STATUS_BACK_MERGE, | ||
95 | BTREE_INSERT_STATUS_OVERWROTE, | ||
96 | BTREE_INSERT_STATUS_FRONT_MERGE, | ||
97 | }; | ||
98 | |||
99 | #define MAX_NEED_GC 64 | 92 | #define MAX_NEED_GC 64 |
100 | #define MAX_SAVE_PRIO 72 | 93 | #define MAX_SAVE_PRIO 72 |
101 | 94 | ||
@@ -106,14 +99,6 @@ enum { | |||
106 | 99 | ||
107 | static struct workqueue_struct *btree_io_wq; | 100 | static struct workqueue_struct *btree_io_wq; |
108 | 101 | ||
109 | static inline bool should_split(struct btree *b) | ||
110 | { | ||
111 | struct bset *i = write_block(b); | ||
112 | return b->written >= btree_blocks(b) || | ||
113 | (b->written + __set_blocks(i, i->keys + 15, b->c) | ||
114 | > btree_blocks(b)); | ||
115 | } | ||
116 | |||
117 | #define insert_lock(s, b) ((b)->level <= (s)->lock) | 102 | #define insert_lock(s, b) ((b)->level <= (s)->lock) |
118 | 103 | ||
119 | /* | 104 | /* |
@@ -167,6 +152,8 @@ static inline bool should_split(struct btree *b) | |||
167 | _r = bch_btree_ ## fn(_b, op, ##__VA_ARGS__); \ | 152 | _r = bch_btree_ ## fn(_b, op, ##__VA_ARGS__); \ |
168 | } \ | 153 | } \ |
169 | rw_unlock(_w, _b); \ | 154 | rw_unlock(_w, _b); \ |
155 | if (_r == -EINTR) \ | ||
156 | schedule(); \ | ||
170 | bch_cannibalize_unlock(c); \ | 157 | bch_cannibalize_unlock(c); \ |
171 | if (_r == -ENOSPC) { \ | 158 | if (_r == -ENOSPC) { \ |
172 | wait_event((c)->try_wait, \ | 159 | wait_event((c)->try_wait, \ |
@@ -175,9 +162,15 @@ static inline bool should_split(struct btree *b) | |||
175 | } \ | 162 | } \ |
176 | } while (_r == -EINTR); \ | 163 | } while (_r == -EINTR); \ |
177 | \ | 164 | \ |
165 | finish_wait(&(c)->bucket_wait, &(op)->wait); \ | ||
178 | _r; \ | 166 | _r; \ |
179 | }) | 167 | }) |
180 | 168 | ||
169 | static inline struct bset *write_block(struct btree *b) | ||
170 | { | ||
171 | return ((void *) btree_bset_first(b)) + b->written * block_bytes(b->c); | ||
172 | } | ||
173 | |||
181 | /* Btree key manipulation */ | 174 | /* Btree key manipulation */ |
182 | 175 | ||
183 | void bkey_put(struct cache_set *c, struct bkey *k) | 176 | void bkey_put(struct cache_set *c, struct bkey *k) |
@@ -194,16 +187,16 @@ void bkey_put(struct cache_set *c, struct bkey *k) | |||
194 | static uint64_t btree_csum_set(struct btree *b, struct bset *i) | 187 | static uint64_t btree_csum_set(struct btree *b, struct bset *i) |
195 | { | 188 | { |
196 | uint64_t crc = b->key.ptr[0]; | 189 | uint64_t crc = b->key.ptr[0]; |
197 | void *data = (void *) i + 8, *end = end(i); | 190 | void *data = (void *) i + 8, *end = bset_bkey_last(i); |
198 | 191 | ||
199 | crc = bch_crc64_update(crc, data, end - data); | 192 | crc = bch_crc64_update(crc, data, end - data); |
200 | return crc ^ 0xffffffffffffffffULL; | 193 | return crc ^ 0xffffffffffffffffULL; |
201 | } | 194 | } |
202 | 195 | ||
203 | static void bch_btree_node_read_done(struct btree *b) | 196 | void bch_btree_node_read_done(struct btree *b) |
204 | { | 197 | { |
205 | const char *err = "bad btree header"; | 198 | const char *err = "bad btree header"; |
206 | struct bset *i = b->sets[0].data; | 199 | struct bset *i = btree_bset_first(b); |
207 | struct btree_iter *iter; | 200 | struct btree_iter *iter; |
208 | 201 | ||
209 | iter = mempool_alloc(b->c->fill_iter, GFP_NOWAIT); | 202 | iter = mempool_alloc(b->c->fill_iter, GFP_NOWAIT); |
@@ -211,21 +204,22 @@ static void bch_btree_node_read_done(struct btree *b) | |||
211 | iter->used = 0; | 204 | iter->used = 0; |
212 | 205 | ||
213 | #ifdef CONFIG_BCACHE_DEBUG | 206 | #ifdef CONFIG_BCACHE_DEBUG |
214 | iter->b = b; | 207 | iter->b = &b->keys; |
215 | #endif | 208 | #endif |
216 | 209 | ||
217 | if (!i->seq) | 210 | if (!i->seq) |
218 | goto err; | 211 | goto err; |
219 | 212 | ||
220 | for (; | 213 | for (; |
221 | b->written < btree_blocks(b) && i->seq == b->sets[0].data->seq; | 214 | b->written < btree_blocks(b) && i->seq == b->keys.set[0].data->seq; |
222 | i = write_block(b)) { | 215 | i = write_block(b)) { |
223 | err = "unsupported bset version"; | 216 | err = "unsupported bset version"; |
224 | if (i->version > BCACHE_BSET_VERSION) | 217 | if (i->version > BCACHE_BSET_VERSION) |
225 | goto err; | 218 | goto err; |
226 | 219 | ||
227 | err = "bad btree header"; | 220 | err = "bad btree header"; |
228 | if (b->written + set_blocks(i, b->c) > btree_blocks(b)) | 221 | if (b->written + set_blocks(i, block_bytes(b->c)) > |
222 | btree_blocks(b)) | ||
229 | goto err; | 223 | goto err; |
230 | 224 | ||
231 | err = "bad magic"; | 225 | err = "bad magic"; |
@@ -245,39 +239,40 @@ static void bch_btree_node_read_done(struct btree *b) | |||
245 | } | 239 | } |
246 | 240 | ||
247 | err = "empty set"; | 241 | err = "empty set"; |
248 | if (i != b->sets[0].data && !i->keys) | 242 | if (i != b->keys.set[0].data && !i->keys) |
249 | goto err; | 243 | goto err; |
250 | 244 | ||
251 | bch_btree_iter_push(iter, i->start, end(i)); | 245 | bch_btree_iter_push(iter, i->start, bset_bkey_last(i)); |
252 | 246 | ||
253 | b->written += set_blocks(i, b->c); | 247 | b->written += set_blocks(i, block_bytes(b->c)); |
254 | } | 248 | } |
255 | 249 | ||
256 | err = "corrupted btree"; | 250 | err = "corrupted btree"; |
257 | for (i = write_block(b); | 251 | for (i = write_block(b); |
258 | index(i, b) < btree_blocks(b); | 252 | bset_sector_offset(&b->keys, i) < KEY_SIZE(&b->key); |
259 | i = ((void *) i) + block_bytes(b->c)) | 253 | i = ((void *) i) + block_bytes(b->c)) |
260 | if (i->seq == b->sets[0].data->seq) | 254 | if (i->seq == b->keys.set[0].data->seq) |
261 | goto err; | 255 | goto err; |
262 | 256 | ||
263 | bch_btree_sort_and_fix_extents(b, iter); | 257 | bch_btree_sort_and_fix_extents(&b->keys, iter, &b->c->sort); |
264 | 258 | ||
265 | i = b->sets[0].data; | 259 | i = b->keys.set[0].data; |
266 | err = "short btree key"; | 260 | err = "short btree key"; |
267 | if (b->sets[0].size && | 261 | if (b->keys.set[0].size && |
268 | bkey_cmp(&b->key, &b->sets[0].end) < 0) | 262 | bkey_cmp(&b->key, &b->keys.set[0].end) < 0) |
269 | goto err; | 263 | goto err; |
270 | 264 | ||
271 | if (b->written < btree_blocks(b)) | 265 | if (b->written < btree_blocks(b)) |
272 | bch_bset_init_next(b); | 266 | bch_bset_init_next(&b->keys, write_block(b), |
267 | bset_magic(&b->c->sb)); | ||
273 | out: | 268 | out: |
274 | mempool_free(iter, b->c->fill_iter); | 269 | mempool_free(iter, b->c->fill_iter); |
275 | return; | 270 | return; |
276 | err: | 271 | err: |
277 | set_btree_node_io_error(b); | 272 | set_btree_node_io_error(b); |
278 | bch_cache_set_error(b->c, "%s at bucket %zu, block %zu, %u keys", | 273 | bch_cache_set_error(b->c, "%s at bucket %zu, block %u, %u keys", |
279 | err, PTR_BUCKET_NR(b->c, &b->key, 0), | 274 | err, PTR_BUCKET_NR(b->c, &b->key, 0), |
280 | index(i, b), i->keys); | 275 | bset_block_offset(b, i), i->keys); |
281 | goto out; | 276 | goto out; |
282 | } | 277 | } |
283 | 278 | ||
@@ -287,7 +282,7 @@ static void btree_node_read_endio(struct bio *bio, int error) | |||
287 | closure_put(cl); | 282 | closure_put(cl); |
288 | } | 283 | } |
289 | 284 | ||
290 | void bch_btree_node_read(struct btree *b) | 285 | static void bch_btree_node_read(struct btree *b) |
291 | { | 286 | { |
292 | uint64_t start_time = local_clock(); | 287 | uint64_t start_time = local_clock(); |
293 | struct closure cl; | 288 | struct closure cl; |
@@ -303,7 +298,7 @@ void bch_btree_node_read(struct btree *b) | |||
303 | bio->bi_end_io = btree_node_read_endio; | 298 | bio->bi_end_io = btree_node_read_endio; |
304 | bio->bi_private = &cl; | 299 | bio->bi_private = &cl; |
305 | 300 | ||
306 | bch_bio_map(bio, b->sets[0].data); | 301 | bch_bio_map(bio, b->keys.set[0].data); |
307 | 302 | ||
308 | bch_submit_bbio(bio, b->c, &b->key, 0); | 303 | bch_submit_bbio(bio, b->c, &b->key, 0); |
309 | closure_sync(&cl); | 304 | closure_sync(&cl); |
@@ -340,9 +335,16 @@ static void btree_complete_write(struct btree *b, struct btree_write *w) | |||
340 | w->journal = NULL; | 335 | w->journal = NULL; |
341 | } | 336 | } |
342 | 337 | ||
338 | static void btree_node_write_unlock(struct closure *cl) | ||
339 | { | ||
340 | struct btree *b = container_of(cl, struct btree, io); | ||
341 | |||
342 | up(&b->io_mutex); | ||
343 | } | ||
344 | |||
343 | static void __btree_node_write_done(struct closure *cl) | 345 | static void __btree_node_write_done(struct closure *cl) |
344 | { | 346 | { |
345 | struct btree *b = container_of(cl, struct btree, io.cl); | 347 | struct btree *b = container_of(cl, struct btree, io); |
346 | struct btree_write *w = btree_prev_write(b); | 348 | struct btree_write *w = btree_prev_write(b); |
347 | 349 | ||
348 | bch_bbio_free(b->bio, b->c); | 350 | bch_bbio_free(b->bio, b->c); |
@@ -353,12 +355,12 @@ static void __btree_node_write_done(struct closure *cl) | |||
353 | queue_delayed_work(btree_io_wq, &b->work, | 355 | queue_delayed_work(btree_io_wq, &b->work, |
354 | msecs_to_jiffies(30000)); | 356 | msecs_to_jiffies(30000)); |
355 | 357 | ||
356 | closure_return(cl); | 358 | closure_return_with_destructor(cl, btree_node_write_unlock); |
357 | } | 359 | } |
358 | 360 | ||
359 | static void btree_node_write_done(struct closure *cl) | 361 | static void btree_node_write_done(struct closure *cl) |
360 | { | 362 | { |
361 | struct btree *b = container_of(cl, struct btree, io.cl); | 363 | struct btree *b = container_of(cl, struct btree, io); |
362 | struct bio_vec *bv; | 364 | struct bio_vec *bv; |
363 | int n; | 365 | int n; |
364 | 366 | ||
@@ -371,7 +373,7 @@ static void btree_node_write_done(struct closure *cl) | |||
371 | static void btree_node_write_endio(struct bio *bio, int error) | 373 | static void btree_node_write_endio(struct bio *bio, int error) |
372 | { | 374 | { |
373 | struct closure *cl = bio->bi_private; | 375 | struct closure *cl = bio->bi_private; |
374 | struct btree *b = container_of(cl, struct btree, io.cl); | 376 | struct btree *b = container_of(cl, struct btree, io); |
375 | 377 | ||
376 | if (error) | 378 | if (error) |
377 | set_btree_node_io_error(b); | 379 | set_btree_node_io_error(b); |
@@ -382,8 +384,8 @@ static void btree_node_write_endio(struct bio *bio, int error) | |||
382 | 384 | ||
383 | static void do_btree_node_write(struct btree *b) | 385 | static void do_btree_node_write(struct btree *b) |
384 | { | 386 | { |
385 | struct closure *cl = &b->io.cl; | 387 | struct closure *cl = &b->io; |
386 | struct bset *i = b->sets[b->nsets].data; | 388 | struct bset *i = btree_bset_last(b); |
387 | BKEY_PADDED(key) k; | 389 | BKEY_PADDED(key) k; |
388 | 390 | ||
389 | i->version = BCACHE_BSET_VERSION; | 391 | i->version = BCACHE_BSET_VERSION; |
@@ -395,7 +397,7 @@ static void do_btree_node_write(struct btree *b) | |||
395 | b->bio->bi_end_io = btree_node_write_endio; | 397 | b->bio->bi_end_io = btree_node_write_endio; |
396 | b->bio->bi_private = cl; | 398 | b->bio->bi_private = cl; |
397 | b->bio->bi_rw = REQ_META|WRITE_SYNC|REQ_FUA; | 399 | b->bio->bi_rw = REQ_META|WRITE_SYNC|REQ_FUA; |
398 | b->bio->bi_iter.bi_size = set_blocks(i, b->c) * block_bytes(b->c); | 400 | b->bio->bi_iter.bi_size = roundup(set_bytes(i), block_bytes(b->c)); |
399 | bch_bio_map(b->bio, i); | 401 | bch_bio_map(b->bio, i); |
400 | 402 | ||
401 | /* | 403 | /* |
@@ -414,7 +416,8 @@ static void do_btree_node_write(struct btree *b) | |||
414 | */ | 416 | */ |
415 | 417 | ||
416 | bkey_copy(&k.key, &b->key); | 418 | bkey_copy(&k.key, &b->key); |
417 | SET_PTR_OFFSET(&k.key, 0, PTR_OFFSET(&k.key, 0) + bset_offset(b, i)); | 419 | SET_PTR_OFFSET(&k.key, 0, PTR_OFFSET(&k.key, 0) + |
420 | bset_sector_offset(&b->keys, i)); | ||
418 | 421 | ||
419 | if (!bio_alloc_pages(b->bio, GFP_NOIO)) { | 422 | if (!bio_alloc_pages(b->bio, GFP_NOIO)) { |
420 | int j; | 423 | int j; |
@@ -435,40 +438,54 @@ static void do_btree_node_write(struct btree *b) | |||
435 | bch_submit_bbio(b->bio, b->c, &k.key, 0); | 438 | bch_submit_bbio(b->bio, b->c, &k.key, 0); |
436 | 439 | ||
437 | closure_sync(cl); | 440 | closure_sync(cl); |
438 | __btree_node_write_done(cl); | 441 | continue_at_nobarrier(cl, __btree_node_write_done, NULL); |
439 | } | 442 | } |
440 | } | 443 | } |
441 | 444 | ||
442 | void bch_btree_node_write(struct btree *b, struct closure *parent) | 445 | void bch_btree_node_write(struct btree *b, struct closure *parent) |
443 | { | 446 | { |
444 | struct bset *i = b->sets[b->nsets].data; | 447 | struct bset *i = btree_bset_last(b); |
445 | 448 | ||
446 | trace_bcache_btree_write(b); | 449 | trace_bcache_btree_write(b); |
447 | 450 | ||
448 | BUG_ON(current->bio_list); | 451 | BUG_ON(current->bio_list); |
449 | BUG_ON(b->written >= btree_blocks(b)); | 452 | BUG_ON(b->written >= btree_blocks(b)); |
450 | BUG_ON(b->written && !i->keys); | 453 | BUG_ON(b->written && !i->keys); |
451 | BUG_ON(b->sets->data->seq != i->seq); | 454 | BUG_ON(btree_bset_first(b)->seq != i->seq); |
452 | bch_check_keys(b, "writing"); | 455 | bch_check_keys(&b->keys, "writing"); |
453 | 456 | ||
454 | cancel_delayed_work(&b->work); | 457 | cancel_delayed_work(&b->work); |
455 | 458 | ||
456 | /* If caller isn't waiting for write, parent refcount is cache set */ | 459 | /* If caller isn't waiting for write, parent refcount is cache set */ |
457 | closure_lock(&b->io, parent ?: &b->c->cl); | 460 | down(&b->io_mutex); |
461 | closure_init(&b->io, parent ?: &b->c->cl); | ||
458 | 462 | ||
459 | clear_bit(BTREE_NODE_dirty, &b->flags); | 463 | clear_bit(BTREE_NODE_dirty, &b->flags); |
460 | change_bit(BTREE_NODE_write_idx, &b->flags); | 464 | change_bit(BTREE_NODE_write_idx, &b->flags); |
461 | 465 | ||
462 | do_btree_node_write(b); | 466 | do_btree_node_write(b); |
463 | 467 | ||
464 | b->written += set_blocks(i, b->c); | 468 | atomic_long_add(set_blocks(i, block_bytes(b->c)) * b->c->sb.block_size, |
465 | atomic_long_add(set_blocks(i, b->c) * b->c->sb.block_size, | ||
466 | &PTR_CACHE(b->c, &b->key, 0)->btree_sectors_written); | 469 | &PTR_CACHE(b->c, &b->key, 0)->btree_sectors_written); |
467 | 470 | ||
468 | bch_btree_sort_lazy(b); | 471 | b->written += set_blocks(i, block_bytes(b->c)); |
472 | |||
473 | /* If not a leaf node, always sort */ | ||
474 | if (b->level && b->keys.nsets) | ||
475 | bch_btree_sort(&b->keys, &b->c->sort); | ||
476 | else | ||
477 | bch_btree_sort_lazy(&b->keys, &b->c->sort); | ||
478 | |||
479 | /* | ||
480 | * do verify if there was more than one set initially (i.e. we did a | ||
481 | * sort) and we sorted down to a single set: | ||
482 | */ | ||
483 | if (i != b->keys.set->data && !b->keys.nsets) | ||
484 | bch_btree_verify(b); | ||
469 | 485 | ||
470 | if (b->written < btree_blocks(b)) | 486 | if (b->written < btree_blocks(b)) |
471 | bch_bset_init_next(b); | 487 | bch_bset_init_next(&b->keys, write_block(b), |
488 | bset_magic(&b->c->sb)); | ||
472 | } | 489 | } |
473 | 490 | ||
474 | static void bch_btree_node_write_sync(struct btree *b) | 491 | static void bch_btree_node_write_sync(struct btree *b) |
@@ -493,7 +510,7 @@ static void btree_node_write_work(struct work_struct *w) | |||
493 | 510 | ||
494 | static void bch_btree_leaf_dirty(struct btree *b, atomic_t *journal_ref) | 511 | static void bch_btree_leaf_dirty(struct btree *b, atomic_t *journal_ref) |
495 | { | 512 | { |
496 | struct bset *i = b->sets[b->nsets].data; | 513 | struct bset *i = btree_bset_last(b); |
497 | struct btree_write *w = btree_current_write(b); | 514 | struct btree_write *w = btree_current_write(b); |
498 | 515 | ||
499 | BUG_ON(!b->written); | 516 | BUG_ON(!b->written); |
@@ -528,24 +545,6 @@ static void bch_btree_leaf_dirty(struct btree *b, atomic_t *journal_ref) | |||
528 | * mca -> memory cache | 545 | * mca -> memory cache |
529 | */ | 546 | */ |
530 | 547 | ||
531 | static void mca_reinit(struct btree *b) | ||
532 | { | ||
533 | unsigned i; | ||
534 | |||
535 | b->flags = 0; | ||
536 | b->written = 0; | ||
537 | b->nsets = 0; | ||
538 | |||
539 | for (i = 0; i < MAX_BSETS; i++) | ||
540 | b->sets[i].size = 0; | ||
541 | /* | ||
542 | * Second loop starts at 1 because b->sets[0]->data is the memory we | ||
543 | * allocated | ||
544 | */ | ||
545 | for (i = 1; i < MAX_BSETS; i++) | ||
546 | b->sets[i].data = NULL; | ||
547 | } | ||
548 | |||
549 | #define mca_reserve(c) (((c->root && c->root->level) \ | 548 | #define mca_reserve(c) (((c->root && c->root->level) \ |
550 | ? c->root->level : 1) * 8 + 16) | 549 | ? c->root->level : 1) * 8 + 16) |
551 | #define mca_can_free(c) \ | 550 | #define mca_can_free(c) \ |
@@ -553,28 +552,12 @@ static void mca_reinit(struct btree *b) | |||
553 | 552 | ||
554 | static void mca_data_free(struct btree *b) | 553 | static void mca_data_free(struct btree *b) |
555 | { | 554 | { |
556 | struct bset_tree *t = b->sets; | 555 | BUG_ON(b->io_mutex.count != 1); |
557 | BUG_ON(!closure_is_unlocked(&b->io.cl)); | ||
558 | 556 | ||
559 | if (bset_prev_bytes(b) < PAGE_SIZE) | 557 | bch_btree_keys_free(&b->keys); |
560 | kfree(t->prev); | ||
561 | else | ||
562 | free_pages((unsigned long) t->prev, | ||
563 | get_order(bset_prev_bytes(b))); | ||
564 | 558 | ||
565 | if (bset_tree_bytes(b) < PAGE_SIZE) | ||
566 | kfree(t->tree); | ||
567 | else | ||
568 | free_pages((unsigned long) t->tree, | ||
569 | get_order(bset_tree_bytes(b))); | ||
570 | |||
571 | free_pages((unsigned long) t->data, b->page_order); | ||
572 | |||
573 | t->prev = NULL; | ||
574 | t->tree = NULL; | ||
575 | t->data = NULL; | ||
576 | list_move(&b->list, &b->c->btree_cache_freed); | ||
577 | b->c->bucket_cache_used--; | 559 | b->c->bucket_cache_used--; |
560 | list_move(&b->list, &b->c->btree_cache_freed); | ||
578 | } | 561 | } |
579 | 562 | ||
580 | static void mca_bucket_free(struct btree *b) | 563 | static void mca_bucket_free(struct btree *b) |
@@ -593,34 +576,16 @@ static unsigned btree_order(struct bkey *k) | |||
593 | 576 | ||
594 | static void mca_data_alloc(struct btree *b, struct bkey *k, gfp_t gfp) | 577 | static void mca_data_alloc(struct btree *b, struct bkey *k, gfp_t gfp) |
595 | { | 578 | { |
596 | struct bset_tree *t = b->sets; | 579 | if (!bch_btree_keys_alloc(&b->keys, |
597 | BUG_ON(t->data); | 580 | max_t(unsigned, |
598 | 581 | ilog2(b->c->btree_pages), | |
599 | b->page_order = max_t(unsigned, | 582 | btree_order(k)), |
600 | ilog2(b->c->btree_pages), | 583 | gfp)) { |
601 | btree_order(k)); | 584 | b->c->bucket_cache_used++; |
602 | 585 | list_move(&b->list, &b->c->btree_cache); | |
603 | t->data = (void *) __get_free_pages(gfp, b->page_order); | 586 | } else { |
604 | if (!t->data) | 587 | list_move(&b->list, &b->c->btree_cache_freed); |
605 | goto err; | 588 | } |
606 | |||
607 | t->tree = bset_tree_bytes(b) < PAGE_SIZE | ||
608 | ? kmalloc(bset_tree_bytes(b), gfp) | ||
609 | : (void *) __get_free_pages(gfp, get_order(bset_tree_bytes(b))); | ||
610 | if (!t->tree) | ||
611 | goto err; | ||
612 | |||
613 | t->prev = bset_prev_bytes(b) < PAGE_SIZE | ||
614 | ? kmalloc(bset_prev_bytes(b), gfp) | ||
615 | : (void *) __get_free_pages(gfp, get_order(bset_prev_bytes(b))); | ||
616 | if (!t->prev) | ||
617 | goto err; | ||
618 | |||
619 | list_move(&b->list, &b->c->btree_cache); | ||
620 | b->c->bucket_cache_used++; | ||
621 | return; | ||
622 | err: | ||
623 | mca_data_free(b); | ||
624 | } | 589 | } |
625 | 590 | ||
626 | static struct btree *mca_bucket_alloc(struct cache_set *c, | 591 | static struct btree *mca_bucket_alloc(struct cache_set *c, |
@@ -635,7 +600,7 @@ static struct btree *mca_bucket_alloc(struct cache_set *c, | |||
635 | INIT_LIST_HEAD(&b->list); | 600 | INIT_LIST_HEAD(&b->list); |
636 | INIT_DELAYED_WORK(&b->work, btree_node_write_work); | 601 | INIT_DELAYED_WORK(&b->work, btree_node_write_work); |
637 | b->c = c; | 602 | b->c = c; |
638 | closure_init_unlocked(&b->io); | 603 | sema_init(&b->io_mutex, 1); |
639 | 604 | ||
640 | mca_data_alloc(b, k, gfp); | 605 | mca_data_alloc(b, k, gfp); |
641 | return b; | 606 | return b; |
@@ -651,24 +616,31 @@ static int mca_reap(struct btree *b, unsigned min_order, bool flush) | |||
651 | if (!down_write_trylock(&b->lock)) | 616 | if (!down_write_trylock(&b->lock)) |
652 | return -ENOMEM; | 617 | return -ENOMEM; |
653 | 618 | ||
654 | BUG_ON(btree_node_dirty(b) && !b->sets[0].data); | 619 | BUG_ON(btree_node_dirty(b) && !b->keys.set[0].data); |
655 | 620 | ||
656 | if (b->page_order < min_order || | 621 | if (b->keys.page_order < min_order) |
657 | (!flush && | 622 | goto out_unlock; |
658 | (btree_node_dirty(b) || | 623 | |
659 | atomic_read(&b->io.cl.remaining) != -1))) { | 624 | if (!flush) { |
660 | rw_unlock(true, b); | 625 | if (btree_node_dirty(b)) |
661 | return -ENOMEM; | 626 | goto out_unlock; |
627 | |||
628 | if (down_trylock(&b->io_mutex)) | ||
629 | goto out_unlock; | ||
630 | up(&b->io_mutex); | ||
662 | } | 631 | } |
663 | 632 | ||
664 | if (btree_node_dirty(b)) | 633 | if (btree_node_dirty(b)) |
665 | bch_btree_node_write_sync(b); | 634 | bch_btree_node_write_sync(b); |
666 | 635 | ||
667 | /* wait for any in flight btree write */ | 636 | /* wait for any in flight btree write */ |
668 | closure_wait_event(&b->io.wait, &cl, | 637 | down(&b->io_mutex); |
669 | atomic_read(&b->io.cl.remaining) == -1); | 638 | up(&b->io_mutex); |
670 | 639 | ||
671 | return 0; | 640 | return 0; |
641 | out_unlock: | ||
642 | rw_unlock(true, b); | ||
643 | return -ENOMEM; | ||
672 | } | 644 | } |
673 | 645 | ||
674 | static unsigned long bch_mca_scan(struct shrinker *shrink, | 646 | static unsigned long bch_mca_scan(struct shrinker *shrink, |
@@ -714,14 +686,10 @@ static unsigned long bch_mca_scan(struct shrinker *shrink, | |||
714 | } | 686 | } |
715 | } | 687 | } |
716 | 688 | ||
717 | /* | ||
718 | * Can happen right when we first start up, before we've read in any | ||
719 | * btree nodes | ||
720 | */ | ||
721 | if (list_empty(&c->btree_cache)) | ||
722 | goto out; | ||
723 | |||
724 | for (i = 0; (nr--) && i < c->bucket_cache_used; i++) { | 689 | for (i = 0; (nr--) && i < c->bucket_cache_used; i++) { |
690 | if (list_empty(&c->btree_cache)) | ||
691 | goto out; | ||
692 | |||
725 | b = list_first_entry(&c->btree_cache, struct btree, list); | 693 | b = list_first_entry(&c->btree_cache, struct btree, list); |
726 | list_rotate_left(&c->btree_cache); | 694 | list_rotate_left(&c->btree_cache); |
727 | 695 | ||
@@ -767,6 +735,8 @@ void bch_btree_cache_free(struct cache_set *c) | |||
767 | #ifdef CONFIG_BCACHE_DEBUG | 735 | #ifdef CONFIG_BCACHE_DEBUG |
768 | if (c->verify_data) | 736 | if (c->verify_data) |
769 | list_move(&c->verify_data->list, &c->btree_cache); | 737 | list_move(&c->verify_data->list, &c->btree_cache); |
738 | |||
739 | free_pages((unsigned long) c->verify_ondisk, ilog2(bucket_pages(c))); | ||
770 | #endif | 740 | #endif |
771 | 741 | ||
772 | list_splice(&c->btree_cache_freeable, | 742 | list_splice(&c->btree_cache_freeable, |
@@ -807,10 +777,13 @@ int bch_btree_cache_alloc(struct cache_set *c) | |||
807 | #ifdef CONFIG_BCACHE_DEBUG | 777 | #ifdef CONFIG_BCACHE_DEBUG |
808 | mutex_init(&c->verify_lock); | 778 | mutex_init(&c->verify_lock); |
809 | 779 | ||
780 | c->verify_ondisk = (void *) | ||
781 | __get_free_pages(GFP_KERNEL, ilog2(bucket_pages(c))); | ||
782 | |||
810 | c->verify_data = mca_bucket_alloc(c, &ZERO_KEY, GFP_KERNEL); | 783 | c->verify_data = mca_bucket_alloc(c, &ZERO_KEY, GFP_KERNEL); |
811 | 784 | ||
812 | if (c->verify_data && | 785 | if (c->verify_data && |
813 | c->verify_data->sets[0].data) | 786 | c->verify_data->keys.set->data) |
814 | list_del_init(&c->verify_data->list); | 787 | list_del_init(&c->verify_data->list); |
815 | else | 788 | else |
816 | c->verify_data = NULL; | 789 | c->verify_data = NULL; |
@@ -908,7 +881,7 @@ static struct btree *mca_alloc(struct cache_set *c, struct bkey *k, int level) | |||
908 | list_for_each_entry(b, &c->btree_cache_freed, list) | 881 | list_for_each_entry(b, &c->btree_cache_freed, list) |
909 | if (!mca_reap(b, 0, false)) { | 882 | if (!mca_reap(b, 0, false)) { |
910 | mca_data_alloc(b, k, __GFP_NOWARN|GFP_NOIO); | 883 | mca_data_alloc(b, k, __GFP_NOWARN|GFP_NOIO); |
911 | if (!b->sets[0].data) | 884 | if (!b->keys.set[0].data) |
912 | goto err; | 885 | goto err; |
913 | else | 886 | else |
914 | goto out; | 887 | goto out; |
@@ -919,10 +892,10 @@ static struct btree *mca_alloc(struct cache_set *c, struct bkey *k, int level) | |||
919 | goto err; | 892 | goto err; |
920 | 893 | ||
921 | BUG_ON(!down_write_trylock(&b->lock)); | 894 | BUG_ON(!down_write_trylock(&b->lock)); |
922 | if (!b->sets->data) | 895 | if (!b->keys.set->data) |
923 | goto err; | 896 | goto err; |
924 | out: | 897 | out: |
925 | BUG_ON(!closure_is_unlocked(&b->io.cl)); | 898 | BUG_ON(b->io_mutex.count != 1); |
926 | 899 | ||
927 | bkey_copy(&b->key, k); | 900 | bkey_copy(&b->key, k); |
928 | list_move(&b->list, &c->btree_cache); | 901 | list_move(&b->list, &c->btree_cache); |
@@ -930,10 +903,17 @@ out: | |||
930 | hlist_add_head_rcu(&b->hash, mca_hash(c, k)); | 903 | hlist_add_head_rcu(&b->hash, mca_hash(c, k)); |
931 | 904 | ||
932 | lock_set_subclass(&b->lock.dep_map, level + 1, _THIS_IP_); | 905 | lock_set_subclass(&b->lock.dep_map, level + 1, _THIS_IP_); |
933 | b->level = level; | ||
934 | b->parent = (void *) ~0UL; | 906 | b->parent = (void *) ~0UL; |
907 | b->flags = 0; | ||
908 | b->written = 0; | ||
909 | b->level = level; | ||
935 | 910 | ||
936 | mca_reinit(b); | 911 | if (!b->level) |
912 | bch_btree_keys_init(&b->keys, &bch_extent_keys_ops, | ||
913 | &b->c->expensive_debug_checks); | ||
914 | else | ||
915 | bch_btree_keys_init(&b->keys, &bch_btree_keys_ops, | ||
916 | &b->c->expensive_debug_checks); | ||
937 | 917 | ||
938 | return b; | 918 | return b; |
939 | err: | 919 | err: |
@@ -994,13 +974,13 @@ retry: | |||
994 | 974 | ||
995 | b->accessed = 1; | 975 | b->accessed = 1; |
996 | 976 | ||
997 | for (; i <= b->nsets && b->sets[i].size; i++) { | 977 | for (; i <= b->keys.nsets && b->keys.set[i].size; i++) { |
998 | prefetch(b->sets[i].tree); | 978 | prefetch(b->keys.set[i].tree); |
999 | prefetch(b->sets[i].data); | 979 | prefetch(b->keys.set[i].data); |
1000 | } | 980 | } |
1001 | 981 | ||
1002 | for (; i <= b->nsets; i++) | 982 | for (; i <= b->keys.nsets; i++) |
1003 | prefetch(b->sets[i].data); | 983 | prefetch(b->keys.set[i].data); |
1004 | 984 | ||
1005 | if (btree_node_io_error(b)) { | 985 | if (btree_node_io_error(b)) { |
1006 | rw_unlock(write, b); | 986 | rw_unlock(write, b); |
@@ -1063,7 +1043,7 @@ struct btree *bch_btree_node_alloc(struct cache_set *c, int level, bool wait) | |||
1063 | 1043 | ||
1064 | mutex_lock(&c->bucket_lock); | 1044 | mutex_lock(&c->bucket_lock); |
1065 | retry: | 1045 | retry: |
1066 | if (__bch_bucket_alloc_set(c, WATERMARK_METADATA, &k.key, 1, wait)) | 1046 | if (__bch_bucket_alloc_set(c, RESERVE_BTREE, &k.key, 1, wait)) |
1067 | goto err; | 1047 | goto err; |
1068 | 1048 | ||
1069 | bkey_put(c, &k.key); | 1049 | bkey_put(c, &k.key); |
@@ -1080,7 +1060,7 @@ retry: | |||
1080 | } | 1060 | } |
1081 | 1061 | ||
1082 | b->accessed = 1; | 1062 | b->accessed = 1; |
1083 | bch_bset_init_next(b); | 1063 | bch_bset_init_next(&b->keys, b->keys.set->data, bset_magic(&b->c->sb)); |
1084 | 1064 | ||
1085 | mutex_unlock(&c->bucket_lock); | 1065 | mutex_unlock(&c->bucket_lock); |
1086 | 1066 | ||
@@ -1098,8 +1078,10 @@ err: | |||
1098 | static struct btree *btree_node_alloc_replacement(struct btree *b, bool wait) | 1078 | static struct btree *btree_node_alloc_replacement(struct btree *b, bool wait) |
1099 | { | 1079 | { |
1100 | struct btree *n = bch_btree_node_alloc(b->c, b->level, wait); | 1080 | struct btree *n = bch_btree_node_alloc(b->c, b->level, wait); |
1101 | if (!IS_ERR_OR_NULL(n)) | 1081 | if (!IS_ERR_OR_NULL(n)) { |
1102 | bch_btree_sort_into(b, n); | 1082 | bch_btree_sort_into(&b->keys, &n->keys, &b->c->sort); |
1083 | bkey_copy_key(&n->key, &b->key); | ||
1084 | } | ||
1103 | 1085 | ||
1104 | return n; | 1086 | return n; |
1105 | } | 1087 | } |
@@ -1120,6 +1102,28 @@ static void make_btree_freeing_key(struct btree *b, struct bkey *k) | |||
1120 | atomic_inc(&b->c->prio_blocked); | 1102 | atomic_inc(&b->c->prio_blocked); |
1121 | } | 1103 | } |
1122 | 1104 | ||
1105 | static int btree_check_reserve(struct btree *b, struct btree_op *op) | ||
1106 | { | ||
1107 | struct cache_set *c = b->c; | ||
1108 | struct cache *ca; | ||
1109 | unsigned i, reserve = c->root->level * 2 + 1; | ||
1110 | int ret = 0; | ||
1111 | |||
1112 | mutex_lock(&c->bucket_lock); | ||
1113 | |||
1114 | for_each_cache(ca, c, i) | ||
1115 | if (fifo_used(&ca->free[RESERVE_BTREE]) < reserve) { | ||
1116 | if (op) | ||
1117 | prepare_to_wait(&c->bucket_wait, &op->wait, | ||
1118 | TASK_UNINTERRUPTIBLE); | ||
1119 | ret = -EINTR; | ||
1120 | break; | ||
1121 | } | ||
1122 | |||
1123 | mutex_unlock(&c->bucket_lock); | ||
1124 | return ret; | ||
1125 | } | ||
1126 | |||
1123 | /* Garbage collection */ | 1127 | /* Garbage collection */ |
1124 | 1128 | ||
1125 | uint8_t __bch_btree_mark_key(struct cache_set *c, int level, struct bkey *k) | 1129 | uint8_t __bch_btree_mark_key(struct cache_set *c, int level, struct bkey *k) |
@@ -1183,11 +1187,11 @@ static bool btree_gc_mark_node(struct btree *b, struct gc_stat *gc) | |||
1183 | 1187 | ||
1184 | gc->nodes++; | 1188 | gc->nodes++; |
1185 | 1189 | ||
1186 | for_each_key_filter(b, k, &iter, bch_ptr_invalid) { | 1190 | for_each_key_filter(&b->keys, k, &iter, bch_ptr_invalid) { |
1187 | stale = max(stale, btree_mark_key(b, k)); | 1191 | stale = max(stale, btree_mark_key(b, k)); |
1188 | keys++; | 1192 | keys++; |
1189 | 1193 | ||
1190 | if (bch_ptr_bad(b, k)) | 1194 | if (bch_ptr_bad(&b->keys, k)) |
1191 | continue; | 1195 | continue; |
1192 | 1196 | ||
1193 | gc->key_bytes += bkey_u64s(k); | 1197 | gc->key_bytes += bkey_u64s(k); |
@@ -1197,9 +1201,9 @@ static bool btree_gc_mark_node(struct btree *b, struct gc_stat *gc) | |||
1197 | gc->data += KEY_SIZE(k); | 1201 | gc->data += KEY_SIZE(k); |
1198 | } | 1202 | } |
1199 | 1203 | ||
1200 | for (t = b->sets; t <= &b->sets[b->nsets]; t++) | 1204 | for (t = b->keys.set; t <= &b->keys.set[b->keys.nsets]; t++) |
1201 | btree_bug_on(t->size && | 1205 | btree_bug_on(t->size && |
1202 | bset_written(b, t) && | 1206 | bset_written(&b->keys, t) && |
1203 | bkey_cmp(&b->key, &t->end) < 0, | 1207 | bkey_cmp(&b->key, &t->end) < 0, |
1204 | b, "found short btree key in gc"); | 1208 | b, "found short btree key in gc"); |
1205 | 1209 | ||
@@ -1243,7 +1247,8 @@ static int btree_gc_coalesce(struct btree *b, struct btree_op *op, | |||
1243 | blocks = btree_default_blocks(b->c) * 2 / 3; | 1247 | blocks = btree_default_blocks(b->c) * 2 / 3; |
1244 | 1248 | ||
1245 | if (nodes < 2 || | 1249 | if (nodes < 2 || |
1246 | __set_blocks(b->sets[0].data, keys, b->c) > blocks * (nodes - 1)) | 1250 | __set_blocks(b->keys.set[0].data, keys, |
1251 | block_bytes(b->c)) > blocks * (nodes - 1)) | ||
1247 | return 0; | 1252 | return 0; |
1248 | 1253 | ||
1249 | for (i = 0; i < nodes; i++) { | 1254 | for (i = 0; i < nodes; i++) { |
@@ -1253,18 +1258,19 @@ static int btree_gc_coalesce(struct btree *b, struct btree_op *op, | |||
1253 | } | 1258 | } |
1254 | 1259 | ||
1255 | for (i = nodes - 1; i > 0; --i) { | 1260 | for (i = nodes - 1; i > 0; --i) { |
1256 | struct bset *n1 = new_nodes[i]->sets->data; | 1261 | struct bset *n1 = btree_bset_first(new_nodes[i]); |
1257 | struct bset *n2 = new_nodes[i - 1]->sets->data; | 1262 | struct bset *n2 = btree_bset_first(new_nodes[i - 1]); |
1258 | struct bkey *k, *last = NULL; | 1263 | struct bkey *k, *last = NULL; |
1259 | 1264 | ||
1260 | keys = 0; | 1265 | keys = 0; |
1261 | 1266 | ||
1262 | if (i > 1) { | 1267 | if (i > 1) { |
1263 | for (k = n2->start; | 1268 | for (k = n2->start; |
1264 | k < end(n2); | 1269 | k < bset_bkey_last(n2); |
1265 | k = bkey_next(k)) { | 1270 | k = bkey_next(k)) { |
1266 | if (__set_blocks(n1, n1->keys + keys + | 1271 | if (__set_blocks(n1, n1->keys + keys + |
1267 | bkey_u64s(k), b->c) > blocks) | 1272 | bkey_u64s(k), |
1273 | block_bytes(b->c)) > blocks) | ||
1268 | break; | 1274 | break; |
1269 | 1275 | ||
1270 | last = k; | 1276 | last = k; |
@@ -1280,7 +1286,8 @@ static int btree_gc_coalesce(struct btree *b, struct btree_op *op, | |||
1280 | * though) | 1286 | * though) |
1281 | */ | 1287 | */ |
1282 | if (__set_blocks(n1, n1->keys + n2->keys, | 1288 | if (__set_blocks(n1, n1->keys + n2->keys, |
1283 | b->c) > btree_blocks(new_nodes[i])) | 1289 | block_bytes(b->c)) > |
1290 | btree_blocks(new_nodes[i])) | ||
1284 | goto out_nocoalesce; | 1291 | goto out_nocoalesce; |
1285 | 1292 | ||
1286 | keys = n2->keys; | 1293 | keys = n2->keys; |
@@ -1288,27 +1295,28 @@ static int btree_gc_coalesce(struct btree *b, struct btree_op *op, | |||
1288 | last = &r->b->key; | 1295 | last = &r->b->key; |
1289 | } | 1296 | } |
1290 | 1297 | ||
1291 | BUG_ON(__set_blocks(n1, n1->keys + keys, | 1298 | BUG_ON(__set_blocks(n1, n1->keys + keys, block_bytes(b->c)) > |
1292 | b->c) > btree_blocks(new_nodes[i])); | 1299 | btree_blocks(new_nodes[i])); |
1293 | 1300 | ||
1294 | if (last) | 1301 | if (last) |
1295 | bkey_copy_key(&new_nodes[i]->key, last); | 1302 | bkey_copy_key(&new_nodes[i]->key, last); |
1296 | 1303 | ||
1297 | memcpy(end(n1), | 1304 | memcpy(bset_bkey_last(n1), |
1298 | n2->start, | 1305 | n2->start, |
1299 | (void *) node(n2, keys) - (void *) n2->start); | 1306 | (void *) bset_bkey_idx(n2, keys) - (void *) n2->start); |
1300 | 1307 | ||
1301 | n1->keys += keys; | 1308 | n1->keys += keys; |
1302 | r[i].keys = n1->keys; | 1309 | r[i].keys = n1->keys; |
1303 | 1310 | ||
1304 | memmove(n2->start, | 1311 | memmove(n2->start, |
1305 | node(n2, keys), | 1312 | bset_bkey_idx(n2, keys), |
1306 | (void *) end(n2) - (void *) node(n2, keys)); | 1313 | (void *) bset_bkey_last(n2) - |
1314 | (void *) bset_bkey_idx(n2, keys)); | ||
1307 | 1315 | ||
1308 | n2->keys -= keys; | 1316 | n2->keys -= keys; |
1309 | 1317 | ||
1310 | if (bch_keylist_realloc(keylist, | 1318 | if (__bch_keylist_realloc(keylist, |
1311 | KEY_PTRS(&new_nodes[i]->key), b->c)) | 1319 | bkey_u64s(&new_nodes[i]->key))) |
1312 | goto out_nocoalesce; | 1320 | goto out_nocoalesce; |
1313 | 1321 | ||
1314 | bch_btree_node_write(new_nodes[i], &cl); | 1322 | bch_btree_node_write(new_nodes[i], &cl); |
@@ -1316,7 +1324,7 @@ static int btree_gc_coalesce(struct btree *b, struct btree_op *op, | |||
1316 | } | 1324 | } |
1317 | 1325 | ||
1318 | for (i = 0; i < nodes; i++) { | 1326 | for (i = 0; i < nodes; i++) { |
1319 | if (bch_keylist_realloc(keylist, KEY_PTRS(&r[i].b->key), b->c)) | 1327 | if (__bch_keylist_realloc(keylist, bkey_u64s(&r[i].b->key))) |
1320 | goto out_nocoalesce; | 1328 | goto out_nocoalesce; |
1321 | 1329 | ||
1322 | make_btree_freeing_key(r[i].b, keylist->top); | 1330 | make_btree_freeing_key(r[i].b, keylist->top); |
@@ -1324,7 +1332,7 @@ static int btree_gc_coalesce(struct btree *b, struct btree_op *op, | |||
1324 | } | 1332 | } |
1325 | 1333 | ||
1326 | /* We emptied out this node */ | 1334 | /* We emptied out this node */ |
1327 | BUG_ON(new_nodes[0]->sets->data->keys); | 1335 | BUG_ON(btree_bset_first(new_nodes[0])->keys); |
1328 | btree_node_free(new_nodes[0]); | 1336 | btree_node_free(new_nodes[0]); |
1329 | rw_unlock(true, new_nodes[0]); | 1337 | rw_unlock(true, new_nodes[0]); |
1330 | 1338 | ||
@@ -1370,7 +1378,7 @@ static unsigned btree_gc_count_keys(struct btree *b) | |||
1370 | struct btree_iter iter; | 1378 | struct btree_iter iter; |
1371 | unsigned ret = 0; | 1379 | unsigned ret = 0; |
1372 | 1380 | ||
1373 | for_each_key_filter(b, k, &iter, bch_ptr_bad) | 1381 | for_each_key_filter(&b->keys, k, &iter, bch_ptr_bad) |
1374 | ret += bkey_u64s(k); | 1382 | ret += bkey_u64s(k); |
1375 | 1383 | ||
1376 | return ret; | 1384 | return ret; |
@@ -1390,13 +1398,13 @@ static int btree_gc_recurse(struct btree *b, struct btree_op *op, | |||
1390 | struct gc_merge_info *last = r + GC_MERGE_NODES - 1; | 1398 | struct gc_merge_info *last = r + GC_MERGE_NODES - 1; |
1391 | 1399 | ||
1392 | bch_keylist_init(&keys); | 1400 | bch_keylist_init(&keys); |
1393 | bch_btree_iter_init(b, &iter, &b->c->gc_done); | 1401 | bch_btree_iter_init(&b->keys, &iter, &b->c->gc_done); |
1394 | 1402 | ||
1395 | for (i = 0; i < GC_MERGE_NODES; i++) | 1403 | for (i = 0; i < GC_MERGE_NODES; i++) |
1396 | r[i].b = ERR_PTR(-EINTR); | 1404 | r[i].b = ERR_PTR(-EINTR); |
1397 | 1405 | ||
1398 | while (1) { | 1406 | while (1) { |
1399 | k = bch_btree_iter_next_filter(&iter, b, bch_ptr_bad); | 1407 | k = bch_btree_iter_next_filter(&iter, &b->keys, bch_ptr_bad); |
1400 | if (k) { | 1408 | if (k) { |
1401 | r->b = bch_btree_node_get(b->c, k, b->level - 1, true); | 1409 | r->b = bch_btree_node_get(b->c, k, b->level - 1, true); |
1402 | if (IS_ERR(r->b)) { | 1410 | if (IS_ERR(r->b)) { |
@@ -1416,7 +1424,8 @@ static int btree_gc_recurse(struct btree *b, struct btree_op *op, | |||
1416 | 1424 | ||
1417 | if (!IS_ERR(last->b)) { | 1425 | if (!IS_ERR(last->b)) { |
1418 | should_rewrite = btree_gc_mark_node(last->b, gc); | 1426 | should_rewrite = btree_gc_mark_node(last->b, gc); |
1419 | if (should_rewrite) { | 1427 | if (should_rewrite && |
1428 | !btree_check_reserve(b, NULL)) { | ||
1420 | n = btree_node_alloc_replacement(last->b, | 1429 | n = btree_node_alloc_replacement(last->b, |
1421 | false); | 1430 | false); |
1422 | 1431 | ||
@@ -1705,7 +1714,7 @@ static int bch_btree_check_recurse(struct btree *b, struct btree_op *op, | |||
1705 | struct bucket *g; | 1714 | struct bucket *g; |
1706 | struct btree_iter iter; | 1715 | struct btree_iter iter; |
1707 | 1716 | ||
1708 | for_each_key_filter(b, k, &iter, bch_ptr_invalid) { | 1717 | for_each_key_filter(&b->keys, k, &iter, bch_ptr_invalid) { |
1709 | for (i = 0; i < KEY_PTRS(k); i++) { | 1718 | for (i = 0; i < KEY_PTRS(k); i++) { |
1710 | if (!ptr_available(b->c, k, i)) | 1719 | if (!ptr_available(b->c, k, i)) |
1711 | continue; | 1720 | continue; |
@@ -1728,10 +1737,11 @@ static int bch_btree_check_recurse(struct btree *b, struct btree_op *op, | |||
1728 | } | 1737 | } |
1729 | 1738 | ||
1730 | if (b->level) { | 1739 | if (b->level) { |
1731 | bch_btree_iter_init(b, &iter, NULL); | 1740 | bch_btree_iter_init(&b->keys, &iter, NULL); |
1732 | 1741 | ||
1733 | do { | 1742 | do { |
1734 | k = bch_btree_iter_next_filter(&iter, b, bch_ptr_bad); | 1743 | k = bch_btree_iter_next_filter(&iter, &b->keys, |
1744 | bch_ptr_bad); | ||
1735 | if (k) | 1745 | if (k) |
1736 | btree_node_prefetch(b->c, k, b->level - 1); | 1746 | btree_node_prefetch(b->c, k, b->level - 1); |
1737 | 1747 | ||
@@ -1774,235 +1784,36 @@ err: | |||
1774 | 1784 | ||
1775 | /* Btree insertion */ | 1785 | /* Btree insertion */ |
1776 | 1786 | ||
1777 | static void shift_keys(struct btree *b, struct bkey *where, struct bkey *insert) | 1787 | static bool btree_insert_key(struct btree *b, struct bkey *k, |
1778 | { | 1788 | struct bkey *replace_key) |
1779 | struct bset *i = b->sets[b->nsets].data; | ||
1780 | |||
1781 | memmove((uint64_t *) where + bkey_u64s(insert), | ||
1782 | where, | ||
1783 | (void *) end(i) - (void *) where); | ||
1784 | |||
1785 | i->keys += bkey_u64s(insert); | ||
1786 | bkey_copy(where, insert); | ||
1787 | bch_bset_fix_lookup_table(b, where); | ||
1788 | } | ||
1789 | |||
1790 | static bool fix_overlapping_extents(struct btree *b, struct bkey *insert, | ||
1791 | struct btree_iter *iter, | ||
1792 | struct bkey *replace_key) | ||
1793 | { | 1789 | { |
1794 | void subtract_dirty(struct bkey *k, uint64_t offset, int sectors) | 1790 | unsigned status; |
1795 | { | ||
1796 | if (KEY_DIRTY(k)) | ||
1797 | bcache_dev_sectors_dirty_add(b->c, KEY_INODE(k), | ||
1798 | offset, -sectors); | ||
1799 | } | ||
1800 | |||
1801 | uint64_t old_offset; | ||
1802 | unsigned old_size, sectors_found = 0; | ||
1803 | |||
1804 | while (1) { | ||
1805 | struct bkey *k = bch_btree_iter_next(iter); | ||
1806 | if (!k || | ||
1807 | bkey_cmp(&START_KEY(k), insert) >= 0) | ||
1808 | break; | ||
1809 | |||
1810 | if (bkey_cmp(k, &START_KEY(insert)) <= 0) | ||
1811 | continue; | ||
1812 | |||
1813 | old_offset = KEY_START(k); | ||
1814 | old_size = KEY_SIZE(k); | ||
1815 | |||
1816 | /* | ||
1817 | * We might overlap with 0 size extents; we can't skip these | ||
1818 | * because if they're in the set we're inserting to we have to | ||
1819 | * adjust them so they don't overlap with the key we're | ||
1820 | * inserting. But we don't want to check them for replace | ||
1821 | * operations. | ||
1822 | */ | ||
1823 | |||
1824 | if (replace_key && KEY_SIZE(k)) { | ||
1825 | /* | ||
1826 | * k might have been split since we inserted/found the | ||
1827 | * key we're replacing | ||
1828 | */ | ||
1829 | unsigned i; | ||
1830 | uint64_t offset = KEY_START(k) - | ||
1831 | KEY_START(replace_key); | ||
1832 | |||
1833 | /* But it must be a subset of the replace key */ | ||
1834 | if (KEY_START(k) < KEY_START(replace_key) || | ||
1835 | KEY_OFFSET(k) > KEY_OFFSET(replace_key)) | ||
1836 | goto check_failed; | ||
1837 | |||
1838 | /* We didn't find a key that we were supposed to */ | ||
1839 | if (KEY_START(k) > KEY_START(insert) + sectors_found) | ||
1840 | goto check_failed; | ||
1841 | |||
1842 | if (KEY_PTRS(k) != KEY_PTRS(replace_key) || | ||
1843 | KEY_DIRTY(k) != KEY_DIRTY(replace_key)) | ||
1844 | goto check_failed; | ||
1845 | |||
1846 | /* skip past gen */ | ||
1847 | offset <<= 8; | ||
1848 | |||
1849 | BUG_ON(!KEY_PTRS(replace_key)); | ||
1850 | 1791 | ||
1851 | for (i = 0; i < KEY_PTRS(replace_key); i++) | 1792 | BUG_ON(bkey_cmp(k, &b->key) > 0); |
1852 | if (k->ptr[i] != replace_key->ptr[i] + offset) | ||
1853 | goto check_failed; | ||
1854 | |||
1855 | sectors_found = KEY_OFFSET(k) - KEY_START(insert); | ||
1856 | } | ||
1857 | |||
1858 | if (bkey_cmp(insert, k) < 0 && | ||
1859 | bkey_cmp(&START_KEY(insert), &START_KEY(k)) > 0) { | ||
1860 | /* | ||
1861 | * We overlapped in the middle of an existing key: that | ||
1862 | * means we have to split the old key. But we have to do | ||
1863 | * slightly different things depending on whether the | ||
1864 | * old key has been written out yet. | ||
1865 | */ | ||
1866 | |||
1867 | struct bkey *top; | ||
1868 | |||
1869 | subtract_dirty(k, KEY_START(insert), KEY_SIZE(insert)); | ||
1870 | |||
1871 | if (bkey_written(b, k)) { | ||
1872 | /* | ||
1873 | * We insert a new key to cover the top of the | ||
1874 | * old key, and the old key is modified in place | ||
1875 | * to represent the bottom split. | ||
1876 | * | ||
1877 | * It's completely arbitrary whether the new key | ||
1878 | * is the top or the bottom, but it has to match | ||
1879 | * up with what btree_sort_fixup() does - it | ||
1880 | * doesn't check for this kind of overlap, it | ||
1881 | * depends on us inserting a new key for the top | ||
1882 | * here. | ||
1883 | */ | ||
1884 | top = bch_bset_search(b, &b->sets[b->nsets], | ||
1885 | insert); | ||
1886 | shift_keys(b, top, k); | ||
1887 | } else { | ||
1888 | BKEY_PADDED(key) temp; | ||
1889 | bkey_copy(&temp.key, k); | ||
1890 | shift_keys(b, k, &temp.key); | ||
1891 | top = bkey_next(k); | ||
1892 | } | ||
1893 | |||
1894 | bch_cut_front(insert, top); | ||
1895 | bch_cut_back(&START_KEY(insert), k); | ||
1896 | bch_bset_fix_invalidated_key(b, k); | ||
1897 | return false; | ||
1898 | } | ||
1899 | |||
1900 | if (bkey_cmp(insert, k) < 0) { | ||
1901 | bch_cut_front(insert, k); | ||
1902 | } else { | ||
1903 | if (bkey_cmp(&START_KEY(insert), &START_KEY(k)) > 0) | ||
1904 | old_offset = KEY_START(insert); | ||
1905 | |||
1906 | if (bkey_written(b, k) && | ||
1907 | bkey_cmp(&START_KEY(insert), &START_KEY(k)) <= 0) { | ||
1908 | /* | ||
1909 | * Completely overwrote, so we don't have to | ||
1910 | * invalidate the binary search tree | ||
1911 | */ | ||
1912 | bch_cut_front(k, k); | ||
1913 | } else { | ||
1914 | __bch_cut_back(&START_KEY(insert), k); | ||
1915 | bch_bset_fix_invalidated_key(b, k); | ||
1916 | } | ||
1917 | } | ||
1918 | |||
1919 | subtract_dirty(k, old_offset, old_size - KEY_SIZE(k)); | ||
1920 | } | ||
1921 | 1793 | ||
1922 | check_failed: | 1794 | status = bch_btree_insert_key(&b->keys, k, replace_key); |
1923 | if (replace_key) { | 1795 | if (status != BTREE_INSERT_STATUS_NO_INSERT) { |
1924 | if (!sectors_found) { | 1796 | bch_check_keys(&b->keys, "%u for %s", status, |
1925 | return true; | 1797 | replace_key ? "replace" : "insert"); |
1926 | } else if (sectors_found < KEY_SIZE(insert)) { | ||
1927 | SET_KEY_OFFSET(insert, KEY_OFFSET(insert) - | ||
1928 | (KEY_SIZE(insert) - sectors_found)); | ||
1929 | SET_KEY_SIZE(insert, sectors_found); | ||
1930 | } | ||
1931 | } | ||
1932 | 1798 | ||
1933 | return false; | 1799 | trace_bcache_btree_insert_key(b, k, replace_key != NULL, |
1800 | status); | ||
1801 | return true; | ||
1802 | } else | ||
1803 | return false; | ||
1934 | } | 1804 | } |
1935 | 1805 | ||
1936 | static bool btree_insert_key(struct btree *b, struct btree_op *op, | 1806 | static size_t insert_u64s_remaining(struct btree *b) |
1937 | struct bkey *k, struct bkey *replace_key) | ||
1938 | { | 1807 | { |
1939 | struct bset *i = b->sets[b->nsets].data; | 1808 | ssize_t ret = bch_btree_keys_u64s_remaining(&b->keys); |
1940 | struct bkey *m, *prev; | ||
1941 | unsigned status = BTREE_INSERT_STATUS_INSERT; | ||
1942 | |||
1943 | BUG_ON(bkey_cmp(k, &b->key) > 0); | ||
1944 | BUG_ON(b->level && !KEY_PTRS(k)); | ||
1945 | BUG_ON(!b->level && !KEY_OFFSET(k)); | ||
1946 | |||
1947 | if (!b->level) { | ||
1948 | struct btree_iter iter; | ||
1949 | |||
1950 | /* | ||
1951 | * bset_search() returns the first key that is strictly greater | ||
1952 | * than the search key - but for back merging, we want to find | ||
1953 | * the previous key. | ||
1954 | */ | ||
1955 | prev = NULL; | ||
1956 | m = bch_btree_iter_init(b, &iter, PRECEDING_KEY(&START_KEY(k))); | ||
1957 | 1809 | ||
1958 | if (fix_overlapping_extents(b, k, &iter, replace_key)) { | 1810 | /* |
1959 | op->insert_collision = true; | 1811 | * Might land in the middle of an existing extent and have to split it |
1960 | return false; | 1812 | */ |
1961 | } | 1813 | if (b->keys.ops->is_extents) |
1962 | 1814 | ret -= KEY_MAX_U64S; | |
1963 | if (KEY_DIRTY(k)) | ||
1964 | bcache_dev_sectors_dirty_add(b->c, KEY_INODE(k), | ||
1965 | KEY_START(k), KEY_SIZE(k)); | ||
1966 | |||
1967 | while (m != end(i) && | ||
1968 | bkey_cmp(k, &START_KEY(m)) > 0) | ||
1969 | prev = m, m = bkey_next(m); | ||
1970 | |||
1971 | if (key_merging_disabled(b->c)) | ||
1972 | goto insert; | ||
1973 | |||
1974 | /* prev is in the tree, if we merge we're done */ | ||
1975 | status = BTREE_INSERT_STATUS_BACK_MERGE; | ||
1976 | if (prev && | ||
1977 | bch_bkey_try_merge(b, prev, k)) | ||
1978 | goto merged; | ||
1979 | |||
1980 | status = BTREE_INSERT_STATUS_OVERWROTE; | ||
1981 | if (m != end(i) && | ||
1982 | KEY_PTRS(m) == KEY_PTRS(k) && !KEY_SIZE(m)) | ||
1983 | goto copy; | ||
1984 | |||
1985 | status = BTREE_INSERT_STATUS_FRONT_MERGE; | ||
1986 | if (m != end(i) && | ||
1987 | bch_bkey_try_merge(b, k, m)) | ||
1988 | goto copy; | ||
1989 | } else { | ||
1990 | BUG_ON(replace_key); | ||
1991 | m = bch_bset_search(b, &b->sets[b->nsets], k); | ||
1992 | } | ||
1993 | |||
1994 | insert: shift_keys(b, m, k); | ||
1995 | copy: bkey_copy(m, k); | ||
1996 | merged: | ||
1997 | bch_check_keys(b, "%u for %s", status, | ||
1998 | replace_key ? "replace" : "insert"); | ||
1999 | |||
2000 | if (b->level && !KEY_OFFSET(k)) | ||
2001 | btree_current_write(b)->prio_blocked++; | ||
2002 | |||
2003 | trace_bcache_btree_insert_key(b, k, replace_key != NULL, status); | ||
2004 | 1815 | ||
2005 | return true; | 1816 | return max(ret, 0L); |
2006 | } | 1817 | } |
2007 | 1818 | ||
2008 | static bool bch_btree_insert_keys(struct btree *b, struct btree_op *op, | 1819 | static bool bch_btree_insert_keys(struct btree *b, struct btree_op *op, |
@@ -2010,21 +1821,19 @@ static bool bch_btree_insert_keys(struct btree *b, struct btree_op *op, | |||
2010 | struct bkey *replace_key) | 1821 | struct bkey *replace_key) |
2011 | { | 1822 | { |
2012 | bool ret = false; | 1823 | bool ret = false; |
2013 | int oldsize = bch_count_data(b); | 1824 | int oldsize = bch_count_data(&b->keys); |
2014 | 1825 | ||
2015 | while (!bch_keylist_empty(insert_keys)) { | 1826 | while (!bch_keylist_empty(insert_keys)) { |
2016 | struct bset *i = write_block(b); | ||
2017 | struct bkey *k = insert_keys->keys; | 1827 | struct bkey *k = insert_keys->keys; |
2018 | 1828 | ||
2019 | if (b->written + __set_blocks(i, i->keys + bkey_u64s(k), b->c) | 1829 | if (bkey_u64s(k) > insert_u64s_remaining(b)) |
2020 | > btree_blocks(b)) | ||
2021 | break; | 1830 | break; |
2022 | 1831 | ||
2023 | if (bkey_cmp(k, &b->key) <= 0) { | 1832 | if (bkey_cmp(k, &b->key) <= 0) { |
2024 | if (!b->level) | 1833 | if (!b->level) |
2025 | bkey_put(b->c, k); | 1834 | bkey_put(b->c, k); |
2026 | 1835 | ||
2027 | ret |= btree_insert_key(b, op, k, replace_key); | 1836 | ret |= btree_insert_key(b, k, replace_key); |
2028 | bch_keylist_pop_front(insert_keys); | 1837 | bch_keylist_pop_front(insert_keys); |
2029 | } else if (bkey_cmp(&START_KEY(k), &b->key) < 0) { | 1838 | } else if (bkey_cmp(&START_KEY(k), &b->key) < 0) { |
2030 | BKEY_PADDED(key) temp; | 1839 | BKEY_PADDED(key) temp; |
@@ -2033,16 +1842,19 @@ static bool bch_btree_insert_keys(struct btree *b, struct btree_op *op, | |||
2033 | bch_cut_back(&b->key, &temp.key); | 1842 | bch_cut_back(&b->key, &temp.key); |
2034 | bch_cut_front(&b->key, insert_keys->keys); | 1843 | bch_cut_front(&b->key, insert_keys->keys); |
2035 | 1844 | ||
2036 | ret |= btree_insert_key(b, op, &temp.key, replace_key); | 1845 | ret |= btree_insert_key(b, &temp.key, replace_key); |
2037 | break; | 1846 | break; |
2038 | } else { | 1847 | } else { |
2039 | break; | 1848 | break; |
2040 | } | 1849 | } |
2041 | } | 1850 | } |
2042 | 1851 | ||
1852 | if (!ret) | ||
1853 | op->insert_collision = true; | ||
1854 | |||
2043 | BUG_ON(!bch_keylist_empty(insert_keys) && b->level); | 1855 | BUG_ON(!bch_keylist_empty(insert_keys) && b->level); |
2044 | 1856 | ||
2045 | BUG_ON(bch_count_data(b) < oldsize); | 1857 | BUG_ON(bch_count_data(&b->keys) < oldsize); |
2046 | return ret; | 1858 | return ret; |
2047 | } | 1859 | } |
2048 | 1860 | ||
@@ -2059,16 +1871,21 @@ static int btree_split(struct btree *b, struct btree_op *op, | |||
2059 | closure_init_stack(&cl); | 1871 | closure_init_stack(&cl); |
2060 | bch_keylist_init(&parent_keys); | 1872 | bch_keylist_init(&parent_keys); |
2061 | 1873 | ||
1874 | if (!b->level && | ||
1875 | btree_check_reserve(b, op)) | ||
1876 | return -EINTR; | ||
1877 | |||
2062 | n1 = btree_node_alloc_replacement(b, true); | 1878 | n1 = btree_node_alloc_replacement(b, true); |
2063 | if (IS_ERR(n1)) | 1879 | if (IS_ERR(n1)) |
2064 | goto err; | 1880 | goto err; |
2065 | 1881 | ||
2066 | split = set_blocks(n1->sets[0].data, n1->c) > (btree_blocks(b) * 4) / 5; | 1882 | split = set_blocks(btree_bset_first(n1), |
1883 | block_bytes(n1->c)) > (btree_blocks(b) * 4) / 5; | ||
2067 | 1884 | ||
2068 | if (split) { | 1885 | if (split) { |
2069 | unsigned keys = 0; | 1886 | unsigned keys = 0; |
2070 | 1887 | ||
2071 | trace_bcache_btree_node_split(b, n1->sets[0].data->keys); | 1888 | trace_bcache_btree_node_split(b, btree_bset_first(n1)->keys); |
2072 | 1889 | ||
2073 | n2 = bch_btree_node_alloc(b->c, b->level, true); | 1890 | n2 = bch_btree_node_alloc(b->c, b->level, true); |
2074 | if (IS_ERR(n2)) | 1891 | if (IS_ERR(n2)) |
@@ -2087,18 +1904,20 @@ static int btree_split(struct btree *b, struct btree_op *op, | |||
2087 | * search tree yet | 1904 | * search tree yet |
2088 | */ | 1905 | */ |
2089 | 1906 | ||
2090 | while (keys < (n1->sets[0].data->keys * 3) / 5) | 1907 | while (keys < (btree_bset_first(n1)->keys * 3) / 5) |
2091 | keys += bkey_u64s(node(n1->sets[0].data, keys)); | 1908 | keys += bkey_u64s(bset_bkey_idx(btree_bset_first(n1), |
1909 | keys)); | ||
2092 | 1910 | ||
2093 | bkey_copy_key(&n1->key, node(n1->sets[0].data, keys)); | 1911 | bkey_copy_key(&n1->key, |
2094 | keys += bkey_u64s(node(n1->sets[0].data, keys)); | 1912 | bset_bkey_idx(btree_bset_first(n1), keys)); |
1913 | keys += bkey_u64s(bset_bkey_idx(btree_bset_first(n1), keys)); | ||
2095 | 1914 | ||
2096 | n2->sets[0].data->keys = n1->sets[0].data->keys - keys; | 1915 | btree_bset_first(n2)->keys = btree_bset_first(n1)->keys - keys; |
2097 | n1->sets[0].data->keys = keys; | 1916 | btree_bset_first(n1)->keys = keys; |
2098 | 1917 | ||
2099 | memcpy(n2->sets[0].data->start, | 1918 | memcpy(btree_bset_first(n2)->start, |
2100 | end(n1->sets[0].data), | 1919 | bset_bkey_last(btree_bset_first(n1)), |
2101 | n2->sets[0].data->keys * sizeof(uint64_t)); | 1920 | btree_bset_first(n2)->keys * sizeof(uint64_t)); |
2102 | 1921 | ||
2103 | bkey_copy_key(&n2->key, &b->key); | 1922 | bkey_copy_key(&n2->key, &b->key); |
2104 | 1923 | ||
@@ -2106,7 +1925,7 @@ static int btree_split(struct btree *b, struct btree_op *op, | |||
2106 | bch_btree_node_write(n2, &cl); | 1925 | bch_btree_node_write(n2, &cl); |
2107 | rw_unlock(true, n2); | 1926 | rw_unlock(true, n2); |
2108 | } else { | 1927 | } else { |
2109 | trace_bcache_btree_node_compact(b, n1->sets[0].data->keys); | 1928 | trace_bcache_btree_node_compact(b, btree_bset_first(n1)->keys); |
2110 | 1929 | ||
2111 | bch_btree_insert_keys(n1, op, insert_keys, replace_key); | 1930 | bch_btree_insert_keys(n1, op, insert_keys, replace_key); |
2112 | } | 1931 | } |
@@ -2149,18 +1968,21 @@ static int btree_split(struct btree *b, struct btree_op *op, | |||
2149 | 1968 | ||
2150 | return 0; | 1969 | return 0; |
2151 | err_free2: | 1970 | err_free2: |
1971 | bkey_put(b->c, &n2->key); | ||
2152 | btree_node_free(n2); | 1972 | btree_node_free(n2); |
2153 | rw_unlock(true, n2); | 1973 | rw_unlock(true, n2); |
2154 | err_free1: | 1974 | err_free1: |
1975 | bkey_put(b->c, &n1->key); | ||
2155 | btree_node_free(n1); | 1976 | btree_node_free(n1); |
2156 | rw_unlock(true, n1); | 1977 | rw_unlock(true, n1); |
2157 | err: | 1978 | err: |
1979 | WARN(1, "bcache: btree split failed"); | ||
1980 | |||
2158 | if (n3 == ERR_PTR(-EAGAIN) || | 1981 | if (n3 == ERR_PTR(-EAGAIN) || |
2159 | n2 == ERR_PTR(-EAGAIN) || | 1982 | n2 == ERR_PTR(-EAGAIN) || |
2160 | n1 == ERR_PTR(-EAGAIN)) | 1983 | n1 == ERR_PTR(-EAGAIN)) |
2161 | return -EAGAIN; | 1984 | return -EAGAIN; |
2162 | 1985 | ||
2163 | pr_warn("couldn't split"); | ||
2164 | return -ENOMEM; | 1986 | return -ENOMEM; |
2165 | } | 1987 | } |
2166 | 1988 | ||
@@ -2171,7 +1993,7 @@ static int bch_btree_insert_node(struct btree *b, struct btree_op *op, | |||
2171 | { | 1993 | { |
2172 | BUG_ON(b->level && replace_key); | 1994 | BUG_ON(b->level && replace_key); |
2173 | 1995 | ||
2174 | if (should_split(b)) { | 1996 | if (bch_keylist_nkeys(insert_keys) > insert_u64s_remaining(b)) { |
2175 | if (current->bio_list) { | 1997 | if (current->bio_list) { |
2176 | op->lock = b->c->root->level + 1; | 1998 | op->lock = b->c->root->level + 1; |
2177 | return -EAGAIN; | 1999 | return -EAGAIN; |
@@ -2180,11 +2002,13 @@ static int bch_btree_insert_node(struct btree *b, struct btree_op *op, | |||
2180 | return -EINTR; | 2002 | return -EINTR; |
2181 | } else { | 2003 | } else { |
2182 | /* Invalidated all iterators */ | 2004 | /* Invalidated all iterators */ |
2183 | return btree_split(b, op, insert_keys, replace_key) ?: | 2005 | int ret = btree_split(b, op, insert_keys, replace_key); |
2184 | -EINTR; | 2006 | |
2007 | return bch_keylist_empty(insert_keys) ? | ||
2008 | 0 : ret ?: -EINTR; | ||
2185 | } | 2009 | } |
2186 | } else { | 2010 | } else { |
2187 | BUG_ON(write_block(b) != b->sets[b->nsets].data); | 2011 | BUG_ON(write_block(b) != btree_bset_last(b)); |
2188 | 2012 | ||
2189 | if (bch_btree_insert_keys(b, op, insert_keys, replace_key)) { | 2013 | if (bch_btree_insert_keys(b, op, insert_keys, replace_key)) { |
2190 | if (!b->level) | 2014 | if (!b->level) |
@@ -2323,9 +2147,9 @@ static int bch_btree_map_nodes_recurse(struct btree *b, struct btree_op *op, | |||
2323 | struct bkey *k; | 2147 | struct bkey *k; |
2324 | struct btree_iter iter; | 2148 | struct btree_iter iter; |
2325 | 2149 | ||
2326 | bch_btree_iter_init(b, &iter, from); | 2150 | bch_btree_iter_init(&b->keys, &iter, from); |
2327 | 2151 | ||
2328 | while ((k = bch_btree_iter_next_filter(&iter, b, | 2152 | while ((k = bch_btree_iter_next_filter(&iter, &b->keys, |
2329 | bch_ptr_bad))) { | 2153 | bch_ptr_bad))) { |
2330 | ret = btree(map_nodes_recurse, k, b, | 2154 | ret = btree(map_nodes_recurse, k, b, |
2331 | op, from, fn, flags); | 2155 | op, from, fn, flags); |
@@ -2356,9 +2180,9 @@ static int bch_btree_map_keys_recurse(struct btree *b, struct btree_op *op, | |||
2356 | struct bkey *k; | 2180 | struct bkey *k; |
2357 | struct btree_iter iter; | 2181 | struct btree_iter iter; |
2358 | 2182 | ||
2359 | bch_btree_iter_init(b, &iter, from); | 2183 | bch_btree_iter_init(&b->keys, &iter, from); |
2360 | 2184 | ||
2361 | while ((k = bch_btree_iter_next_filter(&iter, b, bch_ptr_bad))) { | 2185 | while ((k = bch_btree_iter_next_filter(&iter, &b->keys, bch_ptr_bad))) { |
2362 | ret = !b->level | 2186 | ret = !b->level |
2363 | ? fn(op, b, k) | 2187 | ? fn(op, b, k) |
2364 | : btree(map_keys_recurse, k, b, op, from, fn, flags); | 2188 | : btree(map_keys_recurse, k, b, op, from, fn, flags); |