diff options
Diffstat (limited to 'drivers')
-rw-r--r-- | drivers/md/bcache/bcache.h | 5 | ||||
-rw-r--r-- | drivers/md/bcache/btree.c | 274 | ||||
-rw-r--r-- | drivers/md/bcache/btree.h | 19 | ||||
-rw-r--r-- | drivers/md/bcache/debug.c | 4 | ||||
-rw-r--r-- | drivers/md/bcache/journal.c | 2 | ||||
-rw-r--r-- | drivers/md/bcache/super.c | 12 |
6 files changed, 140 insertions, 176 deletions
diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h index 166c8ddc0be4..ad4957b52f10 100644 --- a/drivers/md/bcache/bcache.h +++ b/drivers/md/bcache/bcache.h | |||
@@ -819,10 +819,9 @@ struct cache_set { | |||
819 | 819 | ||
820 | /* | 820 | /* |
821 | * A btree node on disk could have too many bsets for an iterator to fit | 821 | * A btree node on disk could have too many bsets for an iterator to fit |
822 | * on the stack - this is a single element mempool for btree_read_work() | 822 | * on the stack - have to dynamically allocate them |
823 | */ | 823 | */ |
824 | struct mutex fill_lock; | 824 | mempool_t *fill_iter; |
825 | struct btree_iter *fill_iter; | ||
826 | 825 | ||
827 | /* | 826 | /* |
828 | * btree_sort() is a merge sort and requires temporary space - single | 827 | * btree_sort() is a merge sort and requires temporary space - single |
diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c index 45b88fbffbe0..aaec186f7ba6 100644 --- a/drivers/md/bcache/btree.c +++ b/drivers/md/bcache/btree.c | |||
@@ -134,44 +134,17 @@ static uint64_t btree_csum_set(struct btree *b, struct bset *i) | |||
134 | return crc ^ 0xffffffffffffffffULL; | 134 | return crc ^ 0xffffffffffffffffULL; |
135 | } | 135 | } |
136 | 136 | ||
137 | static void btree_bio_endio(struct bio *bio, int error) | 137 | void bch_btree_node_read_done(struct btree *b) |
138 | { | 138 | { |
139 | struct closure *cl = bio->bi_private; | ||
140 | struct btree *b = container_of(cl, struct btree, io.cl); | ||
141 | |||
142 | if (error) | ||
143 | set_btree_node_io_error(b); | ||
144 | |||
145 | bch_bbio_count_io_errors(b->c, bio, error, (bio->bi_rw & WRITE) | ||
146 | ? "writing btree" : "reading btree"); | ||
147 | closure_put(cl); | ||
148 | } | ||
149 | |||
150 | static void btree_bio_init(struct btree *b) | ||
151 | { | ||
152 | BUG_ON(b->bio); | ||
153 | b->bio = bch_bbio_alloc(b->c); | ||
154 | |||
155 | b->bio->bi_end_io = btree_bio_endio; | ||
156 | b->bio->bi_private = &b->io.cl; | ||
157 | } | ||
158 | |||
159 | void bch_btree_read_done(struct closure *cl) | ||
160 | { | ||
161 | struct btree *b = container_of(cl, struct btree, io.cl); | ||
162 | struct bset *i = b->sets[0].data; | ||
163 | struct btree_iter *iter = b->c->fill_iter; | ||
164 | const char *err = "bad btree header"; | 139 | const char *err = "bad btree header"; |
165 | BUG_ON(b->nsets || b->written); | 140 | struct bset *i = b->sets[0].data; |
166 | 141 | struct btree_iter *iter; | |
167 | bch_bbio_free(b->bio, b->c); | ||
168 | b->bio = NULL; | ||
169 | 142 | ||
170 | mutex_lock(&b->c->fill_lock); | 143 | iter = mempool_alloc(b->c->fill_iter, GFP_NOWAIT); |
144 | iter->size = b->c->sb.bucket_size / b->c->sb.block_size; | ||
171 | iter->used = 0; | 145 | iter->used = 0; |
172 | 146 | ||
173 | if (btree_node_io_error(b) || | 147 | if (!i->seq) |
174 | !i->seq) | ||
175 | goto err; | 148 | goto err; |
176 | 149 | ||
177 | for (; | 150 | for (; |
@@ -228,17 +201,8 @@ void bch_btree_read_done(struct closure *cl) | |||
228 | if (b->written < btree_blocks(b)) | 201 | if (b->written < btree_blocks(b)) |
229 | bch_bset_init_next(b); | 202 | bch_bset_init_next(b); |
230 | out: | 203 | out: |
231 | 204 | mempool_free(iter, b->c->fill_iter); | |
232 | mutex_unlock(&b->c->fill_lock); | 205 | return; |
233 | |||
234 | spin_lock(&b->c->btree_read_time_lock); | ||
235 | bch_time_stats_update(&b->c->btree_read_time, b->io_start_time); | ||
236 | spin_unlock(&b->c->btree_read_time_lock); | ||
237 | |||
238 | smp_wmb(); /* read_done is our write lock */ | ||
239 | set_btree_node_read_done(b); | ||
240 | |||
241 | closure_return(cl); | ||
242 | err: | 206 | err: |
243 | set_btree_node_io_error(b); | 207 | set_btree_node_io_error(b); |
244 | bch_cache_set_error(b->c, "%s at bucket %zu, block %zu, %u keys", | 208 | bch_cache_set_error(b->c, "%s at bucket %zu, block %zu, %u keys", |
@@ -247,26 +211,51 @@ err: | |||
247 | goto out; | 211 | goto out; |
248 | } | 212 | } |
249 | 213 | ||
250 | void bch_btree_read(struct btree *b) | 214 | static void btree_node_read_endio(struct bio *bio, int error) |
251 | { | 215 | { |
252 | BUG_ON(b->nsets || b->written); | 216 | struct closure *cl = bio->bi_private; |
217 | closure_put(cl); | ||
218 | } | ||
253 | 219 | ||
254 | if (!closure_trylock(&b->io.cl, &b->c->cl)) | 220 | void bch_btree_node_read(struct btree *b) |
255 | BUG(); | 221 | { |
222 | uint64_t start_time = local_clock(); | ||
223 | struct closure cl; | ||
224 | struct bio *bio; | ||
256 | 225 | ||
257 | b->io_start_time = local_clock(); | 226 | closure_init_stack(&cl); |
227 | pr_debug("%s", pbtree(b)); | ||
258 | 228 | ||
259 | btree_bio_init(b); | 229 | bio = bch_bbio_alloc(b->c); |
260 | b->bio->bi_rw = REQ_META|READ_SYNC; | 230 | bio->bi_rw = REQ_META|READ_SYNC; |
261 | b->bio->bi_size = KEY_SIZE(&b->key) << 9; | 231 | bio->bi_size = KEY_SIZE(&b->key) << 9; |
232 | bio->bi_end_io = btree_node_read_endio; | ||
233 | bio->bi_private = &cl; | ||
262 | 234 | ||
263 | bch_bio_map(b->bio, b->sets[0].data); | 235 | bch_bio_map(bio, b->sets[0].data); |
264 | 236 | ||
265 | pr_debug("%s", pbtree(b)); | 237 | trace_bcache_btree_read(bio); |
266 | trace_bcache_btree_read(b->bio); | 238 | bch_submit_bbio(bio, b->c, &b->key, 0); |
267 | bch_submit_bbio(b->bio, b->c, &b->key, 0); | 239 | closure_sync(&cl); |
268 | 240 | ||
269 | continue_at(&b->io.cl, bch_btree_read_done, system_wq); | 241 | if (!test_bit(BIO_UPTODATE, &bio->bi_flags)) |
242 | set_btree_node_io_error(b); | ||
243 | |||
244 | bch_bbio_free(bio, b->c); | ||
245 | |||
246 | if (btree_node_io_error(b)) | ||
247 | goto err; | ||
248 | |||
249 | bch_btree_node_read_done(b); | ||
250 | |||
251 | spin_lock(&b->c->btree_read_time_lock); | ||
252 | bch_time_stats_update(&b->c->btree_read_time, start_time); | ||
253 | spin_unlock(&b->c->btree_read_time_lock); | ||
254 | |||
255 | return; | ||
256 | err: | ||
257 | bch_cache_set_error(b->c, "io error reading bucket %lu", | ||
258 | PTR_BUCKET_NR(b->c, &b->key, 0)); | ||
270 | } | 259 | } |
271 | 260 | ||
272 | static void btree_complete_write(struct btree *b, struct btree_write *w) | 261 | static void btree_complete_write(struct btree *b, struct btree_write *w) |
@@ -280,15 +269,11 @@ static void btree_complete_write(struct btree *b, struct btree_write *w) | |||
280 | __closure_wake_up(&b->c->journal.wait); | 269 | __closure_wake_up(&b->c->journal.wait); |
281 | } | 270 | } |
282 | 271 | ||
283 | if (w->owner) | ||
284 | closure_put(w->owner); | ||
285 | |||
286 | w->prio_blocked = 0; | 272 | w->prio_blocked = 0; |
287 | w->journal = NULL; | 273 | w->journal = NULL; |
288 | w->owner = NULL; | ||
289 | } | 274 | } |
290 | 275 | ||
291 | static void __btree_write_done(struct closure *cl) | 276 | static void __btree_node_write_done(struct closure *cl) |
292 | { | 277 | { |
293 | struct btree *b = container_of(cl, struct btree, io.cl); | 278 | struct btree *b = container_of(cl, struct btree, io.cl); |
294 | struct btree_write *w = btree_prev_write(b); | 279 | struct btree_write *w = btree_prev_write(b); |
@@ -304,7 +289,7 @@ static void __btree_write_done(struct closure *cl) | |||
304 | closure_return(cl); | 289 | closure_return(cl); |
305 | } | 290 | } |
306 | 291 | ||
307 | static void btree_write_done(struct closure *cl) | 292 | static void btree_node_write_done(struct closure *cl) |
308 | { | 293 | { |
309 | struct btree *b = container_of(cl, struct btree, io.cl); | 294 | struct btree *b = container_of(cl, struct btree, io.cl); |
310 | struct bio_vec *bv; | 295 | struct bio_vec *bv; |
@@ -313,10 +298,22 @@ static void btree_write_done(struct closure *cl) | |||
313 | __bio_for_each_segment(bv, b->bio, n, 0) | 298 | __bio_for_each_segment(bv, b->bio, n, 0) |
314 | __free_page(bv->bv_page); | 299 | __free_page(bv->bv_page); |
315 | 300 | ||
316 | __btree_write_done(cl); | 301 | __btree_node_write_done(cl); |
317 | } | 302 | } |
318 | 303 | ||
319 | static void do_btree_write(struct btree *b) | 304 | static void btree_node_write_endio(struct bio *bio, int error) |
305 | { | ||
306 | struct closure *cl = bio->bi_private; | ||
307 | struct btree *b = container_of(cl, struct btree, io.cl); | ||
308 | |||
309 | if (error) | ||
310 | set_btree_node_io_error(b); | ||
311 | |||
312 | bch_bbio_count_io_errors(b->c, bio, error, "writing btree"); | ||
313 | closure_put(cl); | ||
314 | } | ||
315 | |||
316 | static void do_btree_node_write(struct btree *b) | ||
320 | { | 317 | { |
321 | struct closure *cl = &b->io.cl; | 318 | struct closure *cl = &b->io.cl; |
322 | struct bset *i = b->sets[b->nsets].data; | 319 | struct bset *i = b->sets[b->nsets].data; |
@@ -325,7 +322,11 @@ static void do_btree_write(struct btree *b) | |||
325 | i->version = BCACHE_BSET_VERSION; | 322 | i->version = BCACHE_BSET_VERSION; |
326 | i->csum = btree_csum_set(b, i); | 323 | i->csum = btree_csum_set(b, i); |
327 | 324 | ||
328 | btree_bio_init(b); | 325 | BUG_ON(b->bio); |
326 | b->bio = bch_bbio_alloc(b->c); | ||
327 | |||
328 | b->bio->bi_end_io = btree_node_write_endio; | ||
329 | b->bio->bi_private = &b->io.cl; | ||
329 | b->bio->bi_rw = REQ_META|WRITE_SYNC; | 330 | b->bio->bi_rw = REQ_META|WRITE_SYNC; |
330 | b->bio->bi_size = set_blocks(i, b->c) * block_bytes(b->c); | 331 | b->bio->bi_size = set_blocks(i, b->c) * block_bytes(b->c); |
331 | bch_bio_map(b->bio, i); | 332 | bch_bio_map(b->bio, i); |
@@ -345,7 +346,7 @@ static void do_btree_write(struct btree *b) | |||
345 | trace_bcache_btree_write(b->bio); | 346 | trace_bcache_btree_write(b->bio); |
346 | bch_submit_bbio(b->bio, b->c, &k.key, 0); | 347 | bch_submit_bbio(b->bio, b->c, &k.key, 0); |
347 | 348 | ||
348 | continue_at(cl, btree_write_done, NULL); | 349 | continue_at(cl, btree_node_write_done, NULL); |
349 | } else { | 350 | } else { |
350 | b->bio->bi_vcnt = 0; | 351 | b->bio->bi_vcnt = 0; |
351 | bch_bio_map(b->bio, i); | 352 | bch_bio_map(b->bio, i); |
@@ -354,26 +355,30 @@ static void do_btree_write(struct btree *b) | |||
354 | bch_submit_bbio(b->bio, b->c, &k.key, 0); | 355 | bch_submit_bbio(b->bio, b->c, &k.key, 0); |
355 | 356 | ||
356 | closure_sync(cl); | 357 | closure_sync(cl); |
357 | __btree_write_done(cl); | 358 | __btree_node_write_done(cl); |
358 | } | 359 | } |
359 | } | 360 | } |
360 | 361 | ||
361 | static void __btree_write(struct btree *b) | 362 | void bch_btree_node_write(struct btree *b, struct closure *parent) |
362 | { | 363 | { |
363 | struct bset *i = b->sets[b->nsets].data; | 364 | struct bset *i = b->sets[b->nsets].data; |
364 | 365 | ||
365 | BUG_ON(current->bio_list); | 366 | BUG_ON(current->bio_list); |
367 | BUG_ON(b->written >= btree_blocks(b)); | ||
368 | BUG_ON(b->written && !i->keys); | ||
369 | BUG_ON(b->sets->data->seq != i->seq); | ||
366 | 370 | ||
367 | closure_lock(&b->io, &b->c->cl); | ||
368 | cancel_delayed_work(&b->work); | 371 | cancel_delayed_work(&b->work); |
369 | 372 | ||
373 | /* If caller isn't waiting for write, parent refcount is cache set */ | ||
374 | closure_lock(&b->io, parent ?: &b->c->cl); | ||
375 | |||
370 | clear_bit(BTREE_NODE_dirty, &b->flags); | 376 | clear_bit(BTREE_NODE_dirty, &b->flags); |
371 | change_bit(BTREE_NODE_write_idx, &b->flags); | 377 | change_bit(BTREE_NODE_write_idx, &b->flags); |
372 | 378 | ||
373 | bch_check_key_order(b, i); | 379 | bch_check_key_order(b, i); |
374 | BUG_ON(b->written && !i->keys); | ||
375 | 380 | ||
376 | do_btree_write(b); | 381 | do_btree_node_write(b); |
377 | 382 | ||
378 | pr_debug("%s block %i keys %i", pbtree(b), b->written, i->keys); | 383 | pr_debug("%s block %i keys %i", pbtree(b), b->written, i->keys); |
379 | 384 | ||
@@ -387,37 +392,31 @@ static void __btree_write(struct btree *b) | |||
387 | bch_bset_init_next(b); | 392 | bch_bset_init_next(b); |
388 | } | 393 | } |
389 | 394 | ||
390 | static void btree_write_work(struct work_struct *w) | 395 | static void btree_node_write_work(struct work_struct *w) |
391 | { | 396 | { |
392 | struct btree *b = container_of(to_delayed_work(w), struct btree, work); | 397 | struct btree *b = container_of(to_delayed_work(w), struct btree, work); |
393 | 398 | ||
394 | down_write(&b->lock); | 399 | rw_lock(true, b, b->level); |
395 | 400 | ||
396 | if (btree_node_dirty(b)) | 401 | if (btree_node_dirty(b)) |
397 | __btree_write(b); | 402 | bch_btree_node_write(b, NULL); |
398 | up_write(&b->lock); | 403 | rw_unlock(true, b); |
399 | } | 404 | } |
400 | 405 | ||
401 | void bch_btree_write(struct btree *b, bool now, struct btree_op *op) | 406 | static void bch_btree_leaf_dirty(struct btree *b, struct btree_op *op) |
402 | { | 407 | { |
403 | struct bset *i = b->sets[b->nsets].data; | 408 | struct bset *i = b->sets[b->nsets].data; |
404 | struct btree_write *w = btree_current_write(b); | 409 | struct btree_write *w = btree_current_write(b); |
405 | 410 | ||
406 | BUG_ON(b->written && | 411 | BUG_ON(!b->written); |
407 | (b->written >= btree_blocks(b) || | 412 | BUG_ON(!i->keys); |
408 | i->seq != b->sets[0].data->seq || | ||
409 | !i->keys)); | ||
410 | 413 | ||
411 | if (!btree_node_dirty(b)) { | 414 | if (!btree_node_dirty(b)) |
412 | set_btree_node_dirty(b); | 415 | queue_delayed_work(btree_io_wq, &b->work, 30 * HZ); |
413 | queue_delayed_work(btree_io_wq, &b->work, | ||
414 | msecs_to_jiffies(30000)); | ||
415 | } | ||
416 | 416 | ||
417 | w->prio_blocked += b->prio_blocked; | 417 | set_btree_node_dirty(b); |
418 | b->prio_blocked = 0; | ||
419 | 418 | ||
420 | if (op && op->journal && !b->level) { | 419 | if (op && op->journal) { |
421 | if (w->journal && | 420 | if (w->journal && |
422 | journal_pin_cmp(b->c, w, op)) { | 421 | journal_pin_cmp(b->c, w, op)) { |
423 | atomic_dec_bug(w->journal); | 422 | atomic_dec_bug(w->journal); |
@@ -430,23 +429,10 @@ void bch_btree_write(struct btree *b, bool now, struct btree_op *op) | |||
430 | } | 429 | } |
431 | } | 430 | } |
432 | 431 | ||
433 | if (current->bio_list) | ||
434 | return; | ||
435 | |||
436 | /* Force write if set is too big */ | 432 | /* Force write if set is too big */ |
437 | if (now || | 433 | if (set_bytes(i) > PAGE_SIZE - 48 && |
438 | b->level || | 434 | !current->bio_list) |
439 | set_bytes(i) > PAGE_SIZE - 48) { | 435 | bch_btree_node_write(b, NULL); |
440 | if (op && now) { | ||
441 | /* Must wait on multiple writes */ | ||
442 | BUG_ON(w->owner); | ||
443 | w->owner = &op->cl; | ||
444 | closure_get(&op->cl); | ||
445 | } | ||
446 | |||
447 | __btree_write(b); | ||
448 | } | ||
449 | BUG_ON(!b->written); | ||
450 | } | 436 | } |
451 | 437 | ||
452 | /* | 438 | /* |
@@ -559,7 +545,7 @@ static struct btree *mca_bucket_alloc(struct cache_set *c, | |||
559 | init_rwsem(&b->lock); | 545 | init_rwsem(&b->lock); |
560 | lockdep_set_novalidate_class(&b->lock); | 546 | lockdep_set_novalidate_class(&b->lock); |
561 | INIT_LIST_HEAD(&b->list); | 547 | INIT_LIST_HEAD(&b->list); |
562 | INIT_DELAYED_WORK(&b->work, btree_write_work); | 548 | INIT_DELAYED_WORK(&b->work, btree_node_write_work); |
563 | b->c = c; | 549 | b->c = c; |
564 | closure_init_unlocked(&b->io); | 550 | closure_init_unlocked(&b->io); |
565 | 551 | ||
@@ -582,7 +568,7 @@ static int mca_reap(struct btree *b, struct closure *cl, unsigned min_order) | |||
582 | BUG_ON(btree_node_dirty(b) && !b->sets[0].data); | 568 | BUG_ON(btree_node_dirty(b) && !b->sets[0].data); |
583 | 569 | ||
584 | if (cl && btree_node_dirty(b)) | 570 | if (cl && btree_node_dirty(b)) |
585 | bch_btree_write(b, true, NULL); | 571 | bch_btree_node_write(b, NULL); |
586 | 572 | ||
587 | if (cl) | 573 | if (cl) |
588 | closure_wait_event_async(&b->io.wait, cl, | 574 | closure_wait_event_async(&b->io.wait, cl, |
@@ -905,6 +891,9 @@ retry: | |||
905 | b = mca_find(c, k); | 891 | b = mca_find(c, k); |
906 | 892 | ||
907 | if (!b) { | 893 | if (!b) { |
894 | if (current->bio_list) | ||
895 | return ERR_PTR(-EAGAIN); | ||
896 | |||
908 | mutex_lock(&c->bucket_lock); | 897 | mutex_lock(&c->bucket_lock); |
909 | b = mca_alloc(c, k, level, &op->cl); | 898 | b = mca_alloc(c, k, level, &op->cl); |
910 | mutex_unlock(&c->bucket_lock); | 899 | mutex_unlock(&c->bucket_lock); |
@@ -914,7 +903,7 @@ retry: | |||
914 | if (IS_ERR(b)) | 903 | if (IS_ERR(b)) |
915 | return b; | 904 | return b; |
916 | 905 | ||
917 | bch_btree_read(b); | 906 | bch_btree_node_read(b); |
918 | 907 | ||
919 | if (!write) | 908 | if (!write) |
920 | downgrade_write(&b->lock); | 909 | downgrade_write(&b->lock); |
@@ -937,15 +926,12 @@ retry: | |||
937 | for (; i <= b->nsets; i++) | 926 | for (; i <= b->nsets; i++) |
938 | prefetch(b->sets[i].data); | 927 | prefetch(b->sets[i].data); |
939 | 928 | ||
940 | if (!closure_wait_event(&b->io.wait, &op->cl, | 929 | if (btree_node_io_error(b)) { |
941 | btree_node_read_done(b))) { | ||
942 | rw_unlock(write, b); | ||
943 | b = ERR_PTR(-EAGAIN); | ||
944 | } else if (btree_node_io_error(b)) { | ||
945 | rw_unlock(write, b); | 930 | rw_unlock(write, b); |
946 | b = ERR_PTR(-EIO); | 931 | return ERR_PTR(-EIO); |
947 | } else | 932 | } |
948 | BUG_ON(!b->written); | 933 | |
934 | BUG_ON(!b->written); | ||
949 | 935 | ||
950 | return b; | 936 | return b; |
951 | } | 937 | } |
@@ -959,7 +945,7 @@ static void btree_node_prefetch(struct cache_set *c, struct bkey *k, int level) | |||
959 | mutex_unlock(&c->bucket_lock); | 945 | mutex_unlock(&c->bucket_lock); |
960 | 946 | ||
961 | if (!IS_ERR_OR_NULL(b)) { | 947 | if (!IS_ERR_OR_NULL(b)) { |
962 | bch_btree_read(b); | 948 | bch_btree_node_read(b); |
963 | rw_unlock(true, b); | 949 | rw_unlock(true, b); |
964 | } | 950 | } |
965 | } | 951 | } |
@@ -982,12 +968,6 @@ static void btree_node_free(struct btree *b, struct btree_op *op) | |||
982 | btree_complete_write(b, btree_current_write(b)); | 968 | btree_complete_write(b, btree_current_write(b)); |
983 | clear_bit(BTREE_NODE_dirty, &b->flags); | 969 | clear_bit(BTREE_NODE_dirty, &b->flags); |
984 | 970 | ||
985 | if (b->prio_blocked && | ||
986 | !atomic_sub_return(b->prio_blocked, &b->c->prio_blocked)) | ||
987 | wake_up_allocators(b->c); | ||
988 | |||
989 | b->prio_blocked = 0; | ||
990 | |||
991 | cancel_delayed_work(&b->work); | 971 | cancel_delayed_work(&b->work); |
992 | 972 | ||
993 | mutex_lock(&b->c->bucket_lock); | 973 | mutex_lock(&b->c->bucket_lock); |
@@ -1028,7 +1008,6 @@ retry: | |||
1028 | goto retry; | 1008 | goto retry; |
1029 | } | 1009 | } |
1030 | 1010 | ||
1031 | set_btree_node_read_done(b); | ||
1032 | b->accessed = 1; | 1011 | b->accessed = 1; |
1033 | bch_bset_init_next(b); | 1012 | bch_bset_init_next(b); |
1034 | 1013 | ||
@@ -1166,14 +1145,11 @@ static struct btree *btree_gc_alloc(struct btree *b, struct bkey *k, | |||
1166 | 1145 | ||
1167 | if (!IS_ERR_OR_NULL(n)) { | 1146 | if (!IS_ERR_OR_NULL(n)) { |
1168 | swap(b, n); | 1147 | swap(b, n); |
1148 | __bkey_put(b->c, &b->key); | ||
1169 | 1149 | ||
1170 | memcpy(k->ptr, b->key.ptr, | 1150 | memcpy(k->ptr, b->key.ptr, |
1171 | sizeof(uint64_t) * KEY_PTRS(&b->key)); | 1151 | sizeof(uint64_t) * KEY_PTRS(&b->key)); |
1172 | 1152 | ||
1173 | __bkey_put(b->c, &b->key); | ||
1174 | atomic_inc(&b->c->prio_blocked); | ||
1175 | b->prio_blocked++; | ||
1176 | |||
1177 | btree_node_free(n, op); | 1153 | btree_node_free(n, op); |
1178 | up_write(&n->lock); | 1154 | up_write(&n->lock); |
1179 | } | 1155 | } |
@@ -1293,14 +1269,9 @@ static int btree_gc_recurse(struct btree *b, struct btree_op *op, | |||
1293 | void write(struct btree *r) | 1269 | void write(struct btree *r) |
1294 | { | 1270 | { |
1295 | if (!r->written) | 1271 | if (!r->written) |
1296 | bch_btree_write(r, true, op); | 1272 | bch_btree_node_write(r, &op->cl); |
1297 | else if (btree_node_dirty(r)) { | 1273 | else if (btree_node_dirty(r)) |
1298 | BUG_ON(btree_current_write(r)->owner); | 1274 | bch_btree_node_write(r, writes); |
1299 | btree_current_write(r)->owner = writes; | ||
1300 | closure_get(writes); | ||
1301 | |||
1302 | bch_btree_write(r, true, NULL); | ||
1303 | } | ||
1304 | 1275 | ||
1305 | up_write(&r->lock); | 1276 | up_write(&r->lock); |
1306 | } | 1277 | } |
@@ -1386,9 +1357,7 @@ static int bch_btree_gc_root(struct btree *b, struct btree_op *op, | |||
1386 | ret = btree_gc_recurse(b, op, writes, gc); | 1357 | ret = btree_gc_recurse(b, op, writes, gc); |
1387 | 1358 | ||
1388 | if (!b->written || btree_node_dirty(b)) { | 1359 | if (!b->written || btree_node_dirty(b)) { |
1389 | atomic_inc(&b->c->prio_blocked); | 1360 | bch_btree_node_write(b, n ? &op->cl : NULL); |
1390 | b->prio_blocked++; | ||
1391 | bch_btree_write(b, true, n ? op : NULL); | ||
1392 | } | 1361 | } |
1393 | 1362 | ||
1394 | if (!IS_ERR_OR_NULL(n)) { | 1363 | if (!IS_ERR_OR_NULL(n)) { |
@@ -1508,8 +1477,8 @@ static void bch_btree_gc(struct closure *cl) | |||
1508 | struct gc_stat stats; | 1477 | struct gc_stat stats; |
1509 | struct closure writes; | 1478 | struct closure writes; |
1510 | struct btree_op op; | 1479 | struct btree_op op; |
1511 | |||
1512 | uint64_t start_time = local_clock(); | 1480 | uint64_t start_time = local_clock(); |
1481 | |||
1513 | trace_bcache_gc_start(c->sb.set_uuid); | 1482 | trace_bcache_gc_start(c->sb.set_uuid); |
1514 | blktrace_msg_all(c, "Starting gc"); | 1483 | blktrace_msg_all(c, "Starting gc"); |
1515 | 1484 | ||
@@ -1520,6 +1489,8 @@ static void bch_btree_gc(struct closure *cl) | |||
1520 | 1489 | ||
1521 | btree_gc_start(c); | 1490 | btree_gc_start(c); |
1522 | 1491 | ||
1492 | atomic_inc(&c->prio_blocked); | ||
1493 | |||
1523 | ret = btree_root(gc_root, c, &op, &writes, &stats); | 1494 | ret = btree_root(gc_root, c, &op, &writes, &stats); |
1524 | closure_sync(&op.cl); | 1495 | closure_sync(&op.cl); |
1525 | closure_sync(&writes); | 1496 | closure_sync(&writes); |
@@ -1537,6 +1508,9 @@ static void bch_btree_gc(struct closure *cl) | |||
1537 | 1508 | ||
1538 | available = bch_btree_gc_finish(c); | 1509 | available = bch_btree_gc_finish(c); |
1539 | 1510 | ||
1511 | atomic_dec(&c->prio_blocked); | ||
1512 | wake_up_allocators(c); | ||
1513 | |||
1540 | bch_time_stats_update(&c->btree_gc_time, start_time); | 1514 | bch_time_stats_update(&c->btree_gc_time, start_time); |
1541 | 1515 | ||
1542 | stats.key_bytes *= sizeof(uint64_t); | 1516 | stats.key_bytes *= sizeof(uint64_t); |
@@ -1544,10 +1518,9 @@ static void bch_btree_gc(struct closure *cl) | |||
1544 | stats.data <<= 9; | 1518 | stats.data <<= 9; |
1545 | stats.in_use = (c->nbuckets - available) * 100 / c->nbuckets; | 1519 | stats.in_use = (c->nbuckets - available) * 100 / c->nbuckets; |
1546 | memcpy(&c->gc_stats, &stats, sizeof(struct gc_stat)); | 1520 | memcpy(&c->gc_stats, &stats, sizeof(struct gc_stat)); |
1547 | blktrace_msg_all(c, "Finished gc"); | ||
1548 | 1521 | ||
1522 | blktrace_msg_all(c, "Finished gc"); | ||
1549 | trace_bcache_gc_end(c->sb.set_uuid); | 1523 | trace_bcache_gc_end(c->sb.set_uuid); |
1550 | wake_up_allocators(c); | ||
1551 | 1524 | ||
1552 | continue_at(cl, bch_moving_gc, bch_gc_wq); | 1525 | continue_at(cl, bch_moving_gc, bch_gc_wq); |
1553 | } | 1526 | } |
@@ -1857,7 +1830,7 @@ merged: | |||
1857 | op_type(op), pbtree(b), pkey(k)); | 1830 | op_type(op), pbtree(b), pkey(k)); |
1858 | 1831 | ||
1859 | if (b->level && !KEY_OFFSET(k)) | 1832 | if (b->level && !KEY_OFFSET(k)) |
1860 | b->prio_blocked++; | 1833 | btree_current_write(b)->prio_blocked++; |
1861 | 1834 | ||
1862 | pr_debug("%s for %s at %s: %s", status, | 1835 | pr_debug("%s for %s at %s: %s", status, |
1863 | op_type(op), pbtree(b), pkey(k)); | 1836 | op_type(op), pbtree(b), pkey(k)); |
@@ -1907,7 +1880,6 @@ bool bch_btree_insert_check_key(struct btree *b, struct btree_op *op, | |||
1907 | 1880 | ||
1908 | BUG_ON(op->type != BTREE_INSERT); | 1881 | BUG_ON(op->type != BTREE_INSERT); |
1909 | BUG_ON(!btree_insert_key(b, op, &tmp.k)); | 1882 | BUG_ON(!btree_insert_key(b, op, &tmp.k)); |
1910 | bch_btree_write(b, false, NULL); | ||
1911 | ret = true; | 1883 | ret = true; |
1912 | out: | 1884 | out: |
1913 | downgrade_write(&b->lock); | 1885 | downgrade_write(&b->lock); |
@@ -1967,18 +1939,18 @@ static int btree_split(struct btree *b, struct btree_op *op) | |||
1967 | bkey_copy_key(&n2->key, &b->key); | 1939 | bkey_copy_key(&n2->key, &b->key); |
1968 | 1940 | ||
1969 | bch_keylist_add(&op->keys, &n2->key); | 1941 | bch_keylist_add(&op->keys, &n2->key); |
1970 | bch_btree_write(n2, true, op); | 1942 | bch_btree_node_write(n2, &op->cl); |
1971 | rw_unlock(true, n2); | 1943 | rw_unlock(true, n2); |
1972 | } else | 1944 | } else |
1973 | bch_btree_insert_keys(n1, op); | 1945 | bch_btree_insert_keys(n1, op); |
1974 | 1946 | ||
1975 | bch_keylist_add(&op->keys, &n1->key); | 1947 | bch_keylist_add(&op->keys, &n1->key); |
1976 | bch_btree_write(n1, true, op); | 1948 | bch_btree_node_write(n1, &op->cl); |
1977 | 1949 | ||
1978 | if (n3) { | 1950 | if (n3) { |
1979 | bkey_copy_key(&n3->key, &MAX_KEY); | 1951 | bkey_copy_key(&n3->key, &MAX_KEY); |
1980 | bch_btree_insert_keys(n3, op); | 1952 | bch_btree_insert_keys(n3, op); |
1981 | bch_btree_write(n3, true, op); | 1953 | bch_btree_node_write(n3, &op->cl); |
1982 | 1954 | ||
1983 | closure_sync(&op->cl); | 1955 | closure_sync(&op->cl); |
1984 | bch_btree_set_root(n3); | 1956 | bch_btree_set_root(n3); |
@@ -2082,8 +2054,12 @@ static int bch_btree_insert_recurse(struct btree *b, struct btree_op *op, | |||
2082 | 2054 | ||
2083 | BUG_ON(write_block(b) != b->sets[b->nsets].data); | 2055 | BUG_ON(write_block(b) != b->sets[b->nsets].data); |
2084 | 2056 | ||
2085 | if (bch_btree_insert_keys(b, op)) | 2057 | if (bch_btree_insert_keys(b, op)) { |
2086 | bch_btree_write(b, false, op); | 2058 | if (!b->level) |
2059 | bch_btree_leaf_dirty(b, op); | ||
2060 | else | ||
2061 | bch_btree_node_write(b, &op->cl); | ||
2062 | } | ||
2087 | } | 2063 | } |
2088 | 2064 | ||
2089 | return 0; | 2065 | return 0; |
diff --git a/drivers/md/bcache/btree.h b/drivers/md/bcache/btree.h index af4a7092a28c..809bd77847a2 100644 --- a/drivers/md/bcache/btree.h +++ b/drivers/md/bcache/btree.h | |||
@@ -102,7 +102,6 @@ | |||
102 | #include "debug.h" | 102 | #include "debug.h" |
103 | 103 | ||
104 | struct btree_write { | 104 | struct btree_write { |
105 | struct closure *owner; | ||
106 | atomic_t *journal; | 105 | atomic_t *journal; |
107 | 106 | ||
108 | /* If btree_split() frees a btree node, it writes a new pointer to that | 107 | /* If btree_split() frees a btree node, it writes a new pointer to that |
@@ -142,16 +141,12 @@ struct btree { | |||
142 | */ | 141 | */ |
143 | struct bset_tree sets[MAX_BSETS]; | 142 | struct bset_tree sets[MAX_BSETS]; |
144 | 143 | ||
145 | /* Used to refcount bio splits, also protects b->bio */ | 144 | /* For outstanding btree writes, used as a lock - protects write_idx */ |
146 | struct closure_with_waitlist io; | 145 | struct closure_with_waitlist io; |
147 | 146 | ||
148 | /* Gets transferred to w->prio_blocked - see the comment there */ | ||
149 | int prio_blocked; | ||
150 | |||
151 | struct list_head list; | 147 | struct list_head list; |
152 | struct delayed_work work; | 148 | struct delayed_work work; |
153 | 149 | ||
154 | uint64_t io_start_time; | ||
155 | struct btree_write writes[2]; | 150 | struct btree_write writes[2]; |
156 | struct bio *bio; | 151 | struct bio *bio; |
157 | }; | 152 | }; |
@@ -164,13 +159,11 @@ static inline void set_btree_node_ ## flag(struct btree *b) \ | |||
164 | { set_bit(BTREE_NODE_ ## flag, &b->flags); } \ | 159 | { set_bit(BTREE_NODE_ ## flag, &b->flags); } \ |
165 | 160 | ||
166 | enum btree_flags { | 161 | enum btree_flags { |
167 | BTREE_NODE_read_done, | ||
168 | BTREE_NODE_io_error, | 162 | BTREE_NODE_io_error, |
169 | BTREE_NODE_dirty, | 163 | BTREE_NODE_dirty, |
170 | BTREE_NODE_write_idx, | 164 | BTREE_NODE_write_idx, |
171 | }; | 165 | }; |
172 | 166 | ||
173 | BTREE_FLAG(read_done); | ||
174 | BTREE_FLAG(io_error); | 167 | BTREE_FLAG(io_error); |
175 | BTREE_FLAG(dirty); | 168 | BTREE_FLAG(dirty); |
176 | BTREE_FLAG(write_idx); | 169 | BTREE_FLAG(write_idx); |
@@ -293,9 +286,7 @@ static inline void rw_unlock(bool w, struct btree *b) | |||
293 | #ifdef CONFIG_BCACHE_EDEBUG | 286 | #ifdef CONFIG_BCACHE_EDEBUG |
294 | unsigned i; | 287 | unsigned i; |
295 | 288 | ||
296 | if (w && | 289 | if (w && b->key.ptr[0]) |
297 | b->key.ptr[0] && | ||
298 | btree_node_read_done(b)) | ||
299 | for (i = 0; i <= b->nsets; i++) | 290 | for (i = 0; i <= b->nsets; i++) |
300 | bch_check_key_order(b, b->sets[i].data); | 291 | bch_check_key_order(b, b->sets[i].data); |
301 | #endif | 292 | #endif |
@@ -370,9 +361,9 @@ static inline bool should_split(struct btree *b) | |||
370 | > btree_blocks(b)); | 361 | > btree_blocks(b)); |
371 | } | 362 | } |
372 | 363 | ||
373 | void bch_btree_read_done(struct closure *); | 364 | void bch_btree_node_read(struct btree *); |
374 | void bch_btree_read(struct btree *); | 365 | void bch_btree_node_read_done(struct btree *); |
375 | void bch_btree_write(struct btree *b, bool now, struct btree_op *op); | 366 | void bch_btree_node_write(struct btree *, struct closure *); |
376 | 367 | ||
377 | void bch_cannibalize_unlock(struct cache_set *, struct closure *); | 368 | void bch_cannibalize_unlock(struct cache_set *, struct closure *); |
378 | void bch_btree_set_root(struct btree *); | 369 | void bch_btree_set_root(struct btree *); |
diff --git a/drivers/md/bcache/debug.c b/drivers/md/bcache/debug.c index 89fd5204924e..ae6096c6845d 100644 --- a/drivers/md/bcache/debug.c +++ b/drivers/md/bcache/debug.c | |||
@@ -144,7 +144,7 @@ void bch_btree_verify(struct btree *b, struct bset *new) | |||
144 | v->written = 0; | 144 | v->written = 0; |
145 | v->level = b->level; | 145 | v->level = b->level; |
146 | 146 | ||
147 | bch_btree_read(v); | 147 | bch_btree_node_read(v); |
148 | closure_wait_event(&v->io.wait, &cl, | 148 | closure_wait_event(&v->io.wait, &cl, |
149 | atomic_read(&b->io.cl.remaining) == -1); | 149 | atomic_read(&b->io.cl.remaining) == -1); |
150 | 150 | ||
@@ -512,7 +512,7 @@ static ssize_t btree_fuzz(struct kobject *k, struct kobj_attribute *a, | |||
512 | 512 | ||
513 | bch_btree_sort(b); | 513 | bch_btree_sort(b); |
514 | fill->written = 0; | 514 | fill->written = 0; |
515 | bch_btree_read_done(&fill->io.cl); | 515 | bch_btree_node_read_done(fill); |
516 | 516 | ||
517 | if (b->sets[0].data->keys != fill->sets[0].data->keys || | 517 | if (b->sets[0].data->keys != fill->sets[0].data->keys || |
518 | memcmp(b->sets[0].data->start, | 518 | memcmp(b->sets[0].data->start, |
diff --git a/drivers/md/bcache/journal.c b/drivers/md/bcache/journal.c index 8c8dfdcd9d4c..970d819d4350 100644 --- a/drivers/md/bcache/journal.c +++ b/drivers/md/bcache/journal.c | |||
@@ -384,7 +384,7 @@ out: | |||
384 | return; | 384 | return; |
385 | found: | 385 | found: |
386 | if (btree_node_dirty(best)) | 386 | if (btree_node_dirty(best)) |
387 | bch_btree_write(best, true, NULL); | 387 | bch_btree_node_write(best, NULL); |
388 | rw_unlock(true, best); | 388 | rw_unlock(true, best); |
389 | } | 389 | } |
390 | 390 | ||
diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c index aaeda235fc75..e53f89988b08 100644 --- a/drivers/md/bcache/super.c +++ b/drivers/md/bcache/super.c | |||
@@ -1255,9 +1255,10 @@ static void cache_set_free(struct closure *cl) | |||
1255 | free_pages((unsigned long) c->uuids, ilog2(bucket_pages(c))); | 1255 | free_pages((unsigned long) c->uuids, ilog2(bucket_pages(c))); |
1256 | free_pages((unsigned long) c->sort, ilog2(bucket_pages(c))); | 1256 | free_pages((unsigned long) c->sort, ilog2(bucket_pages(c))); |
1257 | 1257 | ||
1258 | kfree(c->fill_iter); | ||
1259 | if (c->bio_split) | 1258 | if (c->bio_split) |
1260 | bioset_free(c->bio_split); | 1259 | bioset_free(c->bio_split); |
1260 | if (c->fill_iter) | ||
1261 | mempool_destroy(c->fill_iter); | ||
1261 | if (c->bio_meta) | 1262 | if (c->bio_meta) |
1262 | mempool_destroy(c->bio_meta); | 1263 | mempool_destroy(c->bio_meta); |
1263 | if (c->search) | 1264 | if (c->search) |
@@ -1295,7 +1296,7 @@ static void cache_set_flush(struct closure *cl) | |||
1295 | /* Should skip this if we're unregistering because of an error */ | 1296 | /* Should skip this if we're unregistering because of an error */ |
1296 | list_for_each_entry(b, &c->btree_cache, list) | 1297 | list_for_each_entry(b, &c->btree_cache, list) |
1297 | if (btree_node_dirty(b)) | 1298 | if (btree_node_dirty(b)) |
1298 | bch_btree_write(b, true, NULL); | 1299 | bch_btree_node_write(b, NULL); |
1299 | 1300 | ||
1300 | closure_return(cl); | 1301 | closure_return(cl); |
1301 | } | 1302 | } |
@@ -1374,7 +1375,6 @@ struct cache_set *bch_cache_set_alloc(struct cache_sb *sb) | |||
1374 | BTREE_MAX_PAGES); | 1375 | BTREE_MAX_PAGES); |
1375 | 1376 | ||
1376 | mutex_init(&c->bucket_lock); | 1377 | mutex_init(&c->bucket_lock); |
1377 | mutex_init(&c->fill_lock); | ||
1378 | mutex_init(&c->sort_lock); | 1378 | mutex_init(&c->sort_lock); |
1379 | spin_lock_init(&c->sort_time_lock); | 1379 | spin_lock_init(&c->sort_time_lock); |
1380 | closure_init_unlocked(&c->sb_write); | 1380 | closure_init_unlocked(&c->sb_write); |
@@ -1400,8 +1400,8 @@ struct cache_set *bch_cache_set_alloc(struct cache_sb *sb) | |||
1400 | !(c->bio_meta = mempool_create_kmalloc_pool(2, | 1400 | !(c->bio_meta = mempool_create_kmalloc_pool(2, |
1401 | sizeof(struct bbio) + sizeof(struct bio_vec) * | 1401 | sizeof(struct bbio) + sizeof(struct bio_vec) * |
1402 | bucket_pages(c))) || | 1402 | bucket_pages(c))) || |
1403 | !(c->fill_iter = mempool_create_kmalloc_pool(1, iter_size)) || | ||
1403 | !(c->bio_split = bioset_create(4, offsetof(struct bbio, bio))) || | 1404 | !(c->bio_split = bioset_create(4, offsetof(struct bbio, bio))) || |
1404 | !(c->fill_iter = kmalloc(iter_size, GFP_KERNEL)) || | ||
1405 | !(c->sort = alloc_bucket_pages(GFP_KERNEL, c)) || | 1405 | !(c->sort = alloc_bucket_pages(GFP_KERNEL, c)) || |
1406 | !(c->uuids = alloc_bucket_pages(GFP_KERNEL, c)) || | 1406 | !(c->uuids = alloc_bucket_pages(GFP_KERNEL, c)) || |
1407 | bch_journal_alloc(c) || | 1407 | bch_journal_alloc(c) || |
@@ -1409,8 +1409,6 @@ struct cache_set *bch_cache_set_alloc(struct cache_sb *sb) | |||
1409 | bch_open_buckets_alloc(c)) | 1409 | bch_open_buckets_alloc(c)) |
1410 | goto err; | 1410 | goto err; |
1411 | 1411 | ||
1412 | c->fill_iter->size = sb->bucket_size / sb->block_size; | ||
1413 | |||
1414 | c->congested_read_threshold_us = 2000; | 1412 | c->congested_read_threshold_us = 2000; |
1415 | c->congested_write_threshold_us = 20000; | 1413 | c->congested_write_threshold_us = 20000; |
1416 | c->error_limit = 8 << IO_ERROR_SHIFT; | 1414 | c->error_limit = 8 << IO_ERROR_SHIFT; |
@@ -1551,7 +1549,7 @@ static void run_cache_set(struct cache_set *c) | |||
1551 | goto err_unlock_gc; | 1549 | goto err_unlock_gc; |
1552 | 1550 | ||
1553 | bkey_copy_key(&c->root->key, &MAX_KEY); | 1551 | bkey_copy_key(&c->root->key, &MAX_KEY); |
1554 | bch_btree_write(c->root, true, &op); | 1552 | bch_btree_node_write(c->root, &op.cl); |
1555 | 1553 | ||
1556 | bch_btree_set_root(c->root); | 1554 | bch_btree_set_root(c->root); |
1557 | rw_unlock(true, c->root); | 1555 | rw_unlock(true, c->root); |