diff options
author | Kent Overstreet <kmo@daterainc.com> | 2013-09-10 22:02:45 -0400 |
---|---|---|
committer | Kent Overstreet <kmo@daterainc.com> | 2013-11-11 00:56:32 -0500 |
commit | 220bb38c21b83e2f7b842f33220bf727093eca89 (patch) | |
tree | 7388a855ca920ca88c0b9a4d66839946ba4cd848 /drivers/md/bcache/request.c | |
parent | cc7b8819212f437fc82f0f9cdc24deb0fb5d775f (diff) |
bcache: Break up struct search
With all the recent refactoring around struct btree op struct search has
gotten rather large.
But we can now easily break it up in a different way - we break out
struct btree_insert_op which is for inserting data into the cache, and
that's now what the copying gc code uses - struct search is now specific
to request.c
Signed-off-by: Kent Overstreet <kmo@daterainc.com>
Diffstat (limited to 'drivers/md/bcache/request.c')
-rw-r--r-- | drivers/md/bcache/request.c | 580 |
1 files changed, 303 insertions, 277 deletions
diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c index 6cee2ae1d87f..05c7c216f65e 100644 --- a/drivers/md/bcache/request.c +++ b/drivers/md/bcache/request.c | |||
@@ -215,9 +215,9 @@ static void bio_csum(struct bio *bio, struct bkey *k) | |||
215 | 215 | ||
216 | static void bch_data_insert_keys(struct closure *cl) | 216 | static void bch_data_insert_keys(struct closure *cl) |
217 | { | 217 | { |
218 | struct search *s = container_of(cl, struct search, btree); | 218 | struct data_insert_op *op = container_of(cl, struct data_insert_op, cl); |
219 | atomic_t *journal_ref = NULL; | 219 | atomic_t *journal_ref = NULL; |
220 | struct bkey *replace_key = s->replace ? &s->replace_key : NULL; | 220 | struct bkey *replace_key = op->replace ? &op->replace_key : NULL; |
221 | int ret; | 221 | int ret; |
222 | 222 | ||
223 | /* | 223 | /* |
@@ -232,27 +232,26 @@ static void bch_data_insert_keys(struct closure *cl) | |||
232 | closure_sync(&s->cl); | 232 | closure_sync(&s->cl); |
233 | #endif | 233 | #endif |
234 | 234 | ||
235 | if (s->write) | 235 | if (!op->replace) |
236 | journal_ref = bch_journal(s->c, &s->insert_keys, | 236 | journal_ref = bch_journal(op->c, &op->insert_keys, |
237 | s->flush_journal | 237 | op->flush_journal ? cl : NULL); |
238 | ? &s->cl : NULL); | ||
239 | 238 | ||
240 | ret = bch_btree_insert(s->c, &s->insert_keys, | 239 | ret = bch_btree_insert(op->c, &op->insert_keys, |
241 | journal_ref, replace_key); | 240 | journal_ref, replace_key); |
242 | if (ret == -ESRCH) { | 241 | if (ret == -ESRCH) { |
243 | s->insert_collision = true; | 242 | op->replace_collision = true; |
244 | } else if (ret) { | 243 | } else if (ret) { |
245 | s->error = -ENOMEM; | 244 | op->error = -ENOMEM; |
246 | s->insert_data_done = true; | 245 | op->insert_data_done = true; |
247 | } | 246 | } |
248 | 247 | ||
249 | if (journal_ref) | 248 | if (journal_ref) |
250 | atomic_dec_bug(journal_ref); | 249 | atomic_dec_bug(journal_ref); |
251 | 250 | ||
252 | if (!s->insert_data_done) | 251 | if (!op->insert_data_done) |
253 | continue_at(cl, bch_data_insert_start, bcache_wq); | 252 | continue_at(cl, bch_data_insert_start, bcache_wq); |
254 | 253 | ||
255 | bch_keylist_free(&s->insert_keys); | 254 | bch_keylist_free(&op->insert_keys); |
256 | closure_return(cl); | 255 | closure_return(cl); |
257 | } | 256 | } |
258 | 257 | ||
@@ -349,10 +348,10 @@ found: | |||
349 | * | 348 | * |
350 | * If s->writeback is true, will not fail. | 349 | * If s->writeback is true, will not fail. |
351 | */ | 350 | */ |
352 | static bool bch_alloc_sectors(struct bkey *k, unsigned sectors, | 351 | static bool bch_alloc_sectors(struct data_insert_op *op, |
353 | struct search *s) | 352 | struct bkey *k, unsigned sectors) |
354 | { | 353 | { |
355 | struct cache_set *c = s->c; | 354 | struct cache_set *c = op->c; |
356 | struct open_bucket *b; | 355 | struct open_bucket *b; |
357 | BKEY_PADDED(key) alloc; | 356 | BKEY_PADDED(key) alloc; |
358 | unsigned i; | 357 | unsigned i; |
@@ -367,15 +366,15 @@ static bool bch_alloc_sectors(struct bkey *k, unsigned sectors, | |||
367 | bkey_init(&alloc.key); | 366 | bkey_init(&alloc.key); |
368 | spin_lock(&c->data_bucket_lock); | 367 | spin_lock(&c->data_bucket_lock); |
369 | 368 | ||
370 | while (!(b = pick_data_bucket(c, k, s->task, &alloc.key))) { | 369 | while (!(b = pick_data_bucket(c, k, op->task, &alloc.key))) { |
371 | unsigned watermark = s->write_prio | 370 | unsigned watermark = op->write_prio |
372 | ? WATERMARK_MOVINGGC | 371 | ? WATERMARK_MOVINGGC |
373 | : WATERMARK_NONE; | 372 | : WATERMARK_NONE; |
374 | 373 | ||
375 | spin_unlock(&c->data_bucket_lock); | 374 | spin_unlock(&c->data_bucket_lock); |
376 | 375 | ||
377 | if (bch_bucket_alloc_set(c, watermark, &alloc.key, | 376 | if (bch_bucket_alloc_set(c, watermark, &alloc.key, |
378 | 1, s->writeback)) | 377 | 1, op->writeback)) |
379 | return false; | 378 | return false; |
380 | 379 | ||
381 | spin_lock(&c->data_bucket_lock); | 380 | spin_lock(&c->data_bucket_lock); |
@@ -409,7 +408,7 @@ static bool bch_alloc_sectors(struct bkey *k, unsigned sectors, | |||
409 | */ | 408 | */ |
410 | list_move_tail(&b->list, &c->data_buckets); | 409 | list_move_tail(&b->list, &c->data_buckets); |
411 | bkey_copy_key(&b->key, k); | 410 | bkey_copy_key(&b->key, k); |
412 | b->last = s->task; | 411 | b->last = op->task; |
413 | 412 | ||
414 | b->sectors_free -= sectors; | 413 | b->sectors_free -= sectors; |
415 | 414 | ||
@@ -438,8 +437,8 @@ static bool bch_alloc_sectors(struct bkey *k, unsigned sectors, | |||
438 | 437 | ||
439 | static void bch_data_invalidate(struct closure *cl) | 438 | static void bch_data_invalidate(struct closure *cl) |
440 | { | 439 | { |
441 | struct search *s = container_of(cl, struct search, btree); | 440 | struct data_insert_op *op = container_of(cl, struct data_insert_op, cl); |
442 | struct bio *bio = s->cache_bio; | 441 | struct bio *bio = op->bio; |
443 | 442 | ||
444 | pr_debug("invalidating %i sectors from %llu", | 443 | pr_debug("invalidating %i sectors from %llu", |
445 | bio_sectors(bio), (uint64_t) bio->bi_sector); | 444 | bio_sectors(bio), (uint64_t) bio->bi_sector); |
@@ -447,17 +446,17 @@ static void bch_data_invalidate(struct closure *cl) | |||
447 | while (bio_sectors(bio)) { | 446 | while (bio_sectors(bio)) { |
448 | unsigned len = min(bio_sectors(bio), 1U << 14); | 447 | unsigned len = min(bio_sectors(bio), 1U << 14); |
449 | 448 | ||
450 | if (bch_keylist_realloc(&s->insert_keys, 0, s->c)) | 449 | if (bch_keylist_realloc(&op->insert_keys, 0, op->c)) |
451 | goto out; | 450 | goto out; |
452 | 451 | ||
453 | bio->bi_sector += len; | 452 | bio->bi_sector += len; |
454 | bio->bi_size -= len << 9; | 453 | bio->bi_size -= len << 9; |
455 | 454 | ||
456 | bch_keylist_add(&s->insert_keys, | 455 | bch_keylist_add(&op->insert_keys, |
457 | &KEY(s->inode, bio->bi_sector, len)); | 456 | &KEY(op->inode, bio->bi_sector, len)); |
458 | } | 457 | } |
459 | 458 | ||
460 | s->insert_data_done = true; | 459 | op->insert_data_done = true; |
461 | bio_put(bio); | 460 | bio_put(bio); |
462 | out: | 461 | out: |
463 | continue_at(cl, bch_data_insert_keys, bcache_wq); | 462 | continue_at(cl, bch_data_insert_keys, bcache_wq); |
@@ -465,7 +464,7 @@ out: | |||
465 | 464 | ||
466 | static void bch_data_insert_error(struct closure *cl) | 465 | static void bch_data_insert_error(struct closure *cl) |
467 | { | 466 | { |
468 | struct search *s = container_of(cl, struct search, btree); | 467 | struct data_insert_op *op = container_of(cl, struct data_insert_op, cl); |
469 | 468 | ||
470 | /* | 469 | /* |
471 | * Our data write just errored, which means we've got a bunch of keys to | 470 | * Our data write just errored, which means we've got a bunch of keys to |
@@ -476,9 +475,9 @@ static void bch_data_insert_error(struct closure *cl) | |||
476 | * from the keys we'll accomplish just that. | 475 | * from the keys we'll accomplish just that. |
477 | */ | 476 | */ |
478 | 477 | ||
479 | struct bkey *src = s->insert_keys.keys, *dst = s->insert_keys.keys; | 478 | struct bkey *src = op->insert_keys.keys, *dst = op->insert_keys.keys; |
480 | 479 | ||
481 | while (src != s->insert_keys.top) { | 480 | while (src != op->insert_keys.top) { |
482 | struct bkey *n = bkey_next(src); | 481 | struct bkey *n = bkey_next(src); |
483 | 482 | ||
484 | SET_KEY_PTRS(src, 0); | 483 | SET_KEY_PTRS(src, 0); |
@@ -488,7 +487,7 @@ static void bch_data_insert_error(struct closure *cl) | |||
488 | src = n; | 487 | src = n; |
489 | } | 488 | } |
490 | 489 | ||
491 | s->insert_keys.top = dst; | 490 | op->insert_keys.top = dst; |
492 | 491 | ||
493 | bch_data_insert_keys(cl); | 492 | bch_data_insert_keys(cl); |
494 | } | 493 | } |
@@ -496,32 +495,32 @@ static void bch_data_insert_error(struct closure *cl) | |||
496 | static void bch_data_insert_endio(struct bio *bio, int error) | 495 | static void bch_data_insert_endio(struct bio *bio, int error) |
497 | { | 496 | { |
498 | struct closure *cl = bio->bi_private; | 497 | struct closure *cl = bio->bi_private; |
499 | struct search *s = container_of(cl, struct search, btree); | 498 | struct data_insert_op *op = container_of(cl, struct data_insert_op, cl); |
500 | 499 | ||
501 | if (error) { | 500 | if (error) { |
502 | /* TODO: We could try to recover from this. */ | 501 | /* TODO: We could try to recover from this. */ |
503 | if (s->writeback) | 502 | if (op->writeback) |
504 | s->error = error; | 503 | op->error = error; |
505 | else if (s->write) | 504 | else if (!op->replace) |
506 | set_closure_fn(cl, bch_data_insert_error, bcache_wq); | 505 | set_closure_fn(cl, bch_data_insert_error, bcache_wq); |
507 | else | 506 | else |
508 | set_closure_fn(cl, NULL, NULL); | 507 | set_closure_fn(cl, NULL, NULL); |
509 | } | 508 | } |
510 | 509 | ||
511 | bch_bbio_endio(s->c, bio, error, "writing data to cache"); | 510 | bch_bbio_endio(op->c, bio, error, "writing data to cache"); |
512 | } | 511 | } |
513 | 512 | ||
514 | static void bch_data_insert_start(struct closure *cl) | 513 | static void bch_data_insert_start(struct closure *cl) |
515 | { | 514 | { |
516 | struct search *s = container_of(cl, struct search, btree); | 515 | struct data_insert_op *op = container_of(cl, struct data_insert_op, cl); |
517 | struct bio *bio = s->cache_bio, *n; | 516 | struct bio *bio = op->bio, *n; |
518 | 517 | ||
519 | if (s->bypass) | 518 | if (op->bypass) |
520 | return bch_data_invalidate(cl); | 519 | return bch_data_invalidate(cl); |
521 | 520 | ||
522 | if (atomic_sub_return(bio_sectors(bio), &s->c->sectors_to_gc) < 0) { | 521 | if (atomic_sub_return(bio_sectors(bio), &op->c->sectors_to_gc) < 0) { |
523 | set_gc_sectors(s->c); | 522 | set_gc_sectors(op->c); |
524 | wake_up_gc(s->c); | 523 | wake_up_gc(op->c); |
525 | } | 524 | } |
526 | 525 | ||
527 | /* | 526 | /* |
@@ -533,21 +532,20 @@ static void bch_data_insert_start(struct closure *cl) | |||
533 | do { | 532 | do { |
534 | unsigned i; | 533 | unsigned i; |
535 | struct bkey *k; | 534 | struct bkey *k; |
536 | struct bio_set *split = s->d | 535 | struct bio_set *split = op->c->bio_split; |
537 | ? s->d->bio_split : s->c->bio_split; | ||
538 | 536 | ||
539 | /* 1 for the device pointer and 1 for the chksum */ | 537 | /* 1 for the device pointer and 1 for the chksum */ |
540 | if (bch_keylist_realloc(&s->insert_keys, | 538 | if (bch_keylist_realloc(&op->insert_keys, |
541 | 1 + (s->csum ? 1 : 0), | 539 | 1 + (op->csum ? 1 : 0), |
542 | s->c)) | 540 | op->c)) |
543 | continue_at(cl, bch_data_insert_keys, bcache_wq); | 541 | continue_at(cl, bch_data_insert_keys, bcache_wq); |
544 | 542 | ||
545 | k = s->insert_keys.top; | 543 | k = op->insert_keys.top; |
546 | bkey_init(k); | 544 | bkey_init(k); |
547 | SET_KEY_INODE(k, s->inode); | 545 | SET_KEY_INODE(k, op->inode); |
548 | SET_KEY_OFFSET(k, bio->bi_sector); | 546 | SET_KEY_OFFSET(k, bio->bi_sector); |
549 | 547 | ||
550 | if (!bch_alloc_sectors(k, bio_sectors(bio), s)) | 548 | if (!bch_alloc_sectors(op, k, bio_sectors(bio))) |
551 | goto err; | 549 | goto err; |
552 | 550 | ||
553 | n = bch_bio_split(bio, KEY_SIZE(k), GFP_NOIO, split); | 551 | n = bch_bio_split(bio, KEY_SIZE(k), GFP_NOIO, split); |
@@ -555,30 +553,30 @@ static void bch_data_insert_start(struct closure *cl) | |||
555 | n->bi_end_io = bch_data_insert_endio; | 553 | n->bi_end_io = bch_data_insert_endio; |
556 | n->bi_private = cl; | 554 | n->bi_private = cl; |
557 | 555 | ||
558 | if (s->writeback) { | 556 | if (op->writeback) { |
559 | SET_KEY_DIRTY(k, true); | 557 | SET_KEY_DIRTY(k, true); |
560 | 558 | ||
561 | for (i = 0; i < KEY_PTRS(k); i++) | 559 | for (i = 0; i < KEY_PTRS(k); i++) |
562 | SET_GC_MARK(PTR_BUCKET(s->c, k, i), | 560 | SET_GC_MARK(PTR_BUCKET(op->c, k, i), |
563 | GC_MARK_DIRTY); | 561 | GC_MARK_DIRTY); |
564 | } | 562 | } |
565 | 563 | ||
566 | SET_KEY_CSUM(k, s->csum); | 564 | SET_KEY_CSUM(k, op->csum); |
567 | if (KEY_CSUM(k)) | 565 | if (KEY_CSUM(k)) |
568 | bio_csum(n, k); | 566 | bio_csum(n, k); |
569 | 567 | ||
570 | trace_bcache_cache_insert(k); | 568 | trace_bcache_cache_insert(k); |
571 | bch_keylist_push(&s->insert_keys); | 569 | bch_keylist_push(&op->insert_keys); |
572 | 570 | ||
573 | n->bi_rw |= REQ_WRITE; | 571 | n->bi_rw |= REQ_WRITE; |
574 | bch_submit_bbio(n, s->c, k, 0); | 572 | bch_submit_bbio(n, op->c, k, 0); |
575 | } while (n != bio); | 573 | } while (n != bio); |
576 | 574 | ||
577 | s->insert_data_done = true; | 575 | op->insert_data_done = true; |
578 | continue_at(cl, bch_data_insert_keys, bcache_wq); | 576 | continue_at(cl, bch_data_insert_keys, bcache_wq); |
579 | err: | 577 | err: |
580 | /* bch_alloc_sectors() blocks if s->writeback = true */ | 578 | /* bch_alloc_sectors() blocks if s->writeback = true */ |
581 | BUG_ON(s->writeback); | 579 | BUG_ON(op->writeback); |
582 | 580 | ||
583 | /* | 581 | /* |
584 | * But if it's not a writeback write we'd rather just bail out if | 582 | * But if it's not a writeback write we'd rather just bail out if |
@@ -586,24 +584,24 @@ err: | |||
586 | * we might be starving btree writes for gc or something. | 584 | * we might be starving btree writes for gc or something. |
587 | */ | 585 | */ |
588 | 586 | ||
589 | if (s->write) { | 587 | if (!op->replace) { |
590 | /* | 588 | /* |
591 | * Writethrough write: We can't complete the write until we've | 589 | * Writethrough write: We can't complete the write until we've |
592 | * updated the index. But we don't want to delay the write while | 590 | * updated the index. But we don't want to delay the write while |
593 | * we wait for buckets to be freed up, so just invalidate the | 591 | * we wait for buckets to be freed up, so just invalidate the |
594 | * rest of the write. | 592 | * rest of the write. |
595 | */ | 593 | */ |
596 | s->bypass = true; | 594 | op->bypass = true; |
597 | return bch_data_invalidate(cl); | 595 | return bch_data_invalidate(cl); |
598 | } else { | 596 | } else { |
599 | /* | 597 | /* |
600 | * From a cache miss, we can just insert the keys for the data | 598 | * From a cache miss, we can just insert the keys for the data |
601 | * we have written or bail out if we didn't do anything. | 599 | * we have written or bail out if we didn't do anything. |
602 | */ | 600 | */ |
603 | s->insert_data_done = true; | 601 | op->insert_data_done = true; |
604 | bio_put(bio); | 602 | bio_put(bio); |
605 | 603 | ||
606 | if (!bch_keylist_empty(&s->insert_keys)) | 604 | if (!bch_keylist_empty(&op->insert_keys)) |
607 | continue_at(cl, bch_data_insert_keys, bcache_wq); | 605 | continue_at(cl, bch_data_insert_keys, bcache_wq); |
608 | else | 606 | else |
609 | closure_return(cl); | 607 | closure_return(cl); |
@@ -631,15 +629,169 @@ err: | |||
631 | */ | 629 | */ |
632 | void bch_data_insert(struct closure *cl) | 630 | void bch_data_insert(struct closure *cl) |
633 | { | 631 | { |
634 | struct search *s = container_of(cl, struct search, btree); | 632 | struct data_insert_op *op = container_of(cl, struct data_insert_op, cl); |
635 | 633 | ||
636 | bch_keylist_init(&s->insert_keys); | 634 | trace_bcache_write(op->bio, op->writeback, op->bypass); |
637 | bio_get(s->cache_bio); | 635 | |
636 | bch_keylist_init(&op->insert_keys); | ||
637 | bio_get(op->bio); | ||
638 | bch_data_insert_start(cl); | 638 | bch_data_insert_start(cl); |
639 | } | 639 | } |
640 | 640 | ||
641 | /* Congested? */ | ||
642 | |||
643 | unsigned bch_get_congested(struct cache_set *c) | ||
644 | { | ||
645 | int i; | ||
646 | long rand; | ||
647 | |||
648 | if (!c->congested_read_threshold_us && | ||
649 | !c->congested_write_threshold_us) | ||
650 | return 0; | ||
651 | |||
652 | i = (local_clock_us() - c->congested_last_us) / 1024; | ||
653 | if (i < 0) | ||
654 | return 0; | ||
655 | |||
656 | i += atomic_read(&c->congested); | ||
657 | if (i >= 0) | ||
658 | return 0; | ||
659 | |||
660 | i += CONGESTED_MAX; | ||
661 | |||
662 | if (i > 0) | ||
663 | i = fract_exp_two(i, 6); | ||
664 | |||
665 | rand = get_random_int(); | ||
666 | i -= bitmap_weight(&rand, BITS_PER_LONG); | ||
667 | |||
668 | return i > 0 ? i : 1; | ||
669 | } | ||
670 | |||
671 | static void add_sequential(struct task_struct *t) | ||
672 | { | ||
673 | ewma_add(t->sequential_io_avg, | ||
674 | t->sequential_io, 8, 0); | ||
675 | |||
676 | t->sequential_io = 0; | ||
677 | } | ||
678 | |||
679 | static struct hlist_head *iohash(struct cached_dev *dc, uint64_t k) | ||
680 | { | ||
681 | return &dc->io_hash[hash_64(k, RECENT_IO_BITS)]; | ||
682 | } | ||
683 | |||
684 | static bool check_should_bypass(struct cached_dev *dc, struct bio *bio) | ||
685 | { | ||
686 | struct cache_set *c = dc->disk.c; | ||
687 | unsigned mode = cache_mode(dc, bio); | ||
688 | unsigned sectors, congested = bch_get_congested(c); | ||
689 | struct task_struct *task = current; | ||
690 | |||
691 | if (atomic_read(&dc->disk.detaching) || | ||
692 | c->gc_stats.in_use > CUTOFF_CACHE_ADD || | ||
693 | (bio->bi_rw & REQ_DISCARD)) | ||
694 | goto skip; | ||
695 | |||
696 | if (mode == CACHE_MODE_NONE || | ||
697 | (mode == CACHE_MODE_WRITEAROUND && | ||
698 | (bio->bi_rw & REQ_WRITE))) | ||
699 | goto skip; | ||
700 | |||
701 | if (bio->bi_sector & (c->sb.block_size - 1) || | ||
702 | bio_sectors(bio) & (c->sb.block_size - 1)) { | ||
703 | pr_debug("skipping unaligned io"); | ||
704 | goto skip; | ||
705 | } | ||
706 | |||
707 | if (!congested && !dc->sequential_cutoff) | ||
708 | goto rescale; | ||
709 | |||
710 | if (!congested && | ||
711 | mode == CACHE_MODE_WRITEBACK && | ||
712 | (bio->bi_rw & REQ_WRITE) && | ||
713 | (bio->bi_rw & REQ_SYNC)) | ||
714 | goto rescale; | ||
715 | |||
716 | if (dc->sequential_merge) { | ||
717 | struct io *i; | ||
718 | |||
719 | spin_lock(&dc->io_lock); | ||
720 | |||
721 | hlist_for_each_entry(i, iohash(dc, bio->bi_sector), hash) | ||
722 | if (i->last == bio->bi_sector && | ||
723 | time_before(jiffies, i->jiffies)) | ||
724 | goto found; | ||
725 | |||
726 | i = list_first_entry(&dc->io_lru, struct io, lru); | ||
727 | |||
728 | add_sequential(task); | ||
729 | i->sequential = 0; | ||
730 | found: | ||
731 | if (i->sequential + bio->bi_size > i->sequential) | ||
732 | i->sequential += bio->bi_size; | ||
733 | |||
734 | i->last = bio_end_sector(bio); | ||
735 | i->jiffies = jiffies + msecs_to_jiffies(5000); | ||
736 | task->sequential_io = i->sequential; | ||
737 | |||
738 | hlist_del(&i->hash); | ||
739 | hlist_add_head(&i->hash, iohash(dc, i->last)); | ||
740 | list_move_tail(&i->lru, &dc->io_lru); | ||
741 | |||
742 | spin_unlock(&dc->io_lock); | ||
743 | } else { | ||
744 | task->sequential_io = bio->bi_size; | ||
745 | |||
746 | add_sequential(task); | ||
747 | } | ||
748 | |||
749 | sectors = max(task->sequential_io, | ||
750 | task->sequential_io_avg) >> 9; | ||
751 | |||
752 | if (dc->sequential_cutoff && | ||
753 | sectors >= dc->sequential_cutoff >> 9) { | ||
754 | trace_bcache_bypass_sequential(bio); | ||
755 | goto skip; | ||
756 | } | ||
757 | |||
758 | if (congested && sectors >= congested) { | ||
759 | trace_bcache_bypass_congested(bio); | ||
760 | goto skip; | ||
761 | } | ||
762 | |||
763 | rescale: | ||
764 | bch_rescale_priorities(c, bio_sectors(bio)); | ||
765 | return false; | ||
766 | skip: | ||
767 | bch_mark_sectors_bypassed(c, dc, bio_sectors(bio)); | ||
768 | return true; | ||
769 | } | ||
770 | |||
641 | /* Cache lookup */ | 771 | /* Cache lookup */ |
642 | 772 | ||
773 | struct search { | ||
774 | /* Stack frame for bio_complete */ | ||
775 | struct closure cl; | ||
776 | |||
777 | struct bcache_device *d; | ||
778 | |||
779 | struct bbio bio; | ||
780 | struct bio *orig_bio; | ||
781 | struct bio *cache_miss; | ||
782 | |||
783 | unsigned insert_bio_sectors; | ||
784 | |||
785 | unsigned recoverable:1; | ||
786 | unsigned unaligned_bvec:1; | ||
787 | unsigned write:1; | ||
788 | |||
789 | unsigned long start_time; | ||
790 | |||
791 | struct btree_op op; | ||
792 | struct data_insert_op iop; | ||
793 | }; | ||
794 | |||
643 | static void bch_cache_read_endio(struct bio *bio, int error) | 795 | static void bch_cache_read_endio(struct bio *bio, int error) |
644 | { | 796 | { |
645 | struct bbio *b = container_of(bio, struct bbio, bio); | 797 | struct bbio *b = container_of(bio, struct bbio, bio); |
@@ -654,13 +806,13 @@ static void bch_cache_read_endio(struct bio *bio, int error) | |||
654 | */ | 806 | */ |
655 | 807 | ||
656 | if (error) | 808 | if (error) |
657 | s->error = error; | 809 | s->iop.error = error; |
658 | else if (ptr_stale(s->c, &b->key, 0)) { | 810 | else if (ptr_stale(s->iop.c, &b->key, 0)) { |
659 | atomic_long_inc(&s->c->cache_read_races); | 811 | atomic_long_inc(&s->iop.c->cache_read_races); |
660 | s->error = -EINTR; | 812 | s->iop.error = -EINTR; |
661 | } | 813 | } |
662 | 814 | ||
663 | bch_bbio_endio(s->c, bio, error, "reading from cache"); | 815 | bch_bbio_endio(s->iop.c, bio, error, "reading from cache"); |
664 | } | 816 | } |
665 | 817 | ||
666 | /* | 818 | /* |
@@ -674,13 +826,13 @@ static int cache_lookup_fn(struct btree_op *op, struct btree *b, struct bkey *k) | |||
674 | struct bkey *bio_key; | 826 | struct bkey *bio_key; |
675 | unsigned ptr; | 827 | unsigned ptr; |
676 | 828 | ||
677 | if (bkey_cmp(k, &KEY(s->inode, bio->bi_sector, 0)) <= 0) | 829 | if (bkey_cmp(k, &KEY(s->iop.inode, bio->bi_sector, 0)) <= 0) |
678 | return MAP_CONTINUE; | 830 | return MAP_CONTINUE; |
679 | 831 | ||
680 | if (KEY_INODE(k) != s->inode || | 832 | if (KEY_INODE(k) != s->iop.inode || |
681 | KEY_START(k) > bio->bi_sector) { | 833 | KEY_START(k) > bio->bi_sector) { |
682 | unsigned bio_sectors = bio_sectors(bio); | 834 | unsigned bio_sectors = bio_sectors(bio); |
683 | unsigned sectors = KEY_INODE(k) == s->inode | 835 | unsigned sectors = KEY_INODE(k) == s->iop.inode |
684 | ? min_t(uint64_t, INT_MAX, | 836 | ? min_t(uint64_t, INT_MAX, |
685 | KEY_START(k) - bio->bi_sector) | 837 | KEY_START(k) - bio->bi_sector) |
686 | : INT_MAX; | 838 | : INT_MAX; |
@@ -708,8 +860,8 @@ static int cache_lookup_fn(struct btree_op *op, struct btree *b, struct bkey *k) | |||
708 | bio_key = &container_of(n, struct bbio, bio)->key; | 860 | bio_key = &container_of(n, struct bbio, bio)->key; |
709 | bch_bkey_copy_single_ptr(bio_key, k, ptr); | 861 | bch_bkey_copy_single_ptr(bio_key, k, ptr); |
710 | 862 | ||
711 | bch_cut_front(&KEY(s->inode, n->bi_sector, 0), bio_key); | 863 | bch_cut_front(&KEY(s->iop.inode, n->bi_sector, 0), bio_key); |
712 | bch_cut_back(&KEY(s->inode, bio_end_sector(n), 0), bio_key); | 864 | bch_cut_back(&KEY(s->iop.inode, bio_end_sector(n), 0), bio_key); |
713 | 865 | ||
714 | n->bi_end_io = bch_cache_read_endio; | 866 | n->bi_end_io = bch_cache_read_endio; |
715 | n->bi_private = &s->cl; | 867 | n->bi_private = &s->cl; |
@@ -731,11 +883,11 @@ static int cache_lookup_fn(struct btree_op *op, struct btree *b, struct bkey *k) | |||
731 | 883 | ||
732 | static void cache_lookup(struct closure *cl) | 884 | static void cache_lookup(struct closure *cl) |
733 | { | 885 | { |
734 | struct search *s = container_of(cl, struct search, btree); | 886 | struct search *s = container_of(cl, struct search, iop.cl); |
735 | struct bio *bio = &s->bio.bio; | 887 | struct bio *bio = &s->bio.bio; |
736 | 888 | ||
737 | int ret = bch_btree_map_keys(&s->op, s->c, | 889 | int ret = bch_btree_map_keys(&s->op, s->iop.c, |
738 | &KEY(s->inode, bio->bi_sector, 0), | 890 | &KEY(s->iop.inode, bio->bi_sector, 0), |
739 | cache_lookup_fn, MAP_END_KEY); | 891 | cache_lookup_fn, MAP_END_KEY); |
740 | if (ret == -EAGAIN) | 892 | if (ret == -EAGAIN) |
741 | continue_at(cl, cache_lookup, bcache_wq); | 893 | continue_at(cl, cache_lookup, bcache_wq); |
@@ -751,7 +903,7 @@ static void request_endio(struct bio *bio, int error) | |||
751 | 903 | ||
752 | if (error) { | 904 | if (error) { |
753 | struct search *s = container_of(cl, struct search, cl); | 905 | struct search *s = container_of(cl, struct search, cl); |
754 | s->error = error; | 906 | s->iop.error = error; |
755 | /* Only cache read errors are recoverable */ | 907 | /* Only cache read errors are recoverable */ |
756 | s->recoverable = false; | 908 | s->recoverable = false; |
757 | } | 909 | } |
@@ -771,8 +923,8 @@ static void bio_complete(struct search *s) | |||
771 | part_stat_add(cpu, &s->d->disk->part0, ticks[rw], duration); | 923 | part_stat_add(cpu, &s->d->disk->part0, ticks[rw], duration); |
772 | part_stat_unlock(); | 924 | part_stat_unlock(); |
773 | 925 | ||
774 | trace_bcache_request_end(s, s->orig_bio); | 926 | trace_bcache_request_end(s->d, s->orig_bio); |
775 | bio_endio(s->orig_bio, s->error); | 927 | bio_endio(s->orig_bio, s->iop.error); |
776 | s->orig_bio = NULL; | 928 | s->orig_bio = NULL; |
777 | } | 929 | } |
778 | } | 930 | } |
@@ -792,8 +944,8 @@ static void search_free(struct closure *cl) | |||
792 | struct search *s = container_of(cl, struct search, cl); | 944 | struct search *s = container_of(cl, struct search, cl); |
793 | bio_complete(s); | 945 | bio_complete(s); |
794 | 946 | ||
795 | if (s->cache_bio) | 947 | if (s->iop.bio) |
796 | bio_put(s->cache_bio); | 948 | bio_put(s->iop.bio); |
797 | 949 | ||
798 | if (s->unaligned_bvec) | 950 | if (s->unaligned_bvec) |
799 | mempool_free(s->bio.bio.bi_io_vec, s->d->unaligned_bvec); | 951 | mempool_free(s->bio.bio.bi_io_vec, s->d->unaligned_bvec); |
@@ -808,18 +960,18 @@ static struct search *search_alloc(struct bio *bio, struct bcache_device *d) | |||
808 | struct bio_vec *bv; | 960 | struct bio_vec *bv; |
809 | 961 | ||
810 | s = mempool_alloc(d->c->search, GFP_NOIO); | 962 | s = mempool_alloc(d->c->search, GFP_NOIO); |
811 | memset(s, 0, offsetof(struct search, insert_keys)); | 963 | memset(s, 0, offsetof(struct search, iop.insert_keys)); |
812 | 964 | ||
813 | __closure_init(&s->cl, NULL); | 965 | __closure_init(&s->cl, NULL); |
814 | 966 | ||
815 | s->inode = d->id; | 967 | s->iop.inode = d->id; |
816 | s->c = d->c; | 968 | s->iop.c = d->c; |
817 | s->d = d; | 969 | s->d = d; |
818 | s->op.lock = -1; | 970 | s->op.lock = -1; |
819 | s->task = current; | 971 | s->iop.task = current; |
820 | s->orig_bio = bio; | 972 | s->orig_bio = bio; |
821 | s->write = (bio->bi_rw & REQ_WRITE) != 0; | 973 | s->write = (bio->bi_rw & REQ_WRITE) != 0; |
822 | s->flush_journal = (bio->bi_rw & (REQ_FLUSH|REQ_FUA)) != 0; | 974 | s->iop.flush_journal = (bio->bi_rw & (REQ_FLUSH|REQ_FUA)) != 0; |
823 | s->recoverable = 1; | 975 | s->recoverable = 1; |
824 | s->start_time = jiffies; | 976 | s->start_time = jiffies; |
825 | do_bio_hook(s); | 977 | do_bio_hook(s); |
@@ -847,148 +999,20 @@ static void cached_dev_bio_complete(struct closure *cl) | |||
847 | cached_dev_put(dc); | 999 | cached_dev_put(dc); |
848 | } | 1000 | } |
849 | 1001 | ||
850 | unsigned bch_get_congested(struct cache_set *c) | ||
851 | { | ||
852 | int i; | ||
853 | long rand; | ||
854 | |||
855 | if (!c->congested_read_threshold_us && | ||
856 | !c->congested_write_threshold_us) | ||
857 | return 0; | ||
858 | |||
859 | i = (local_clock_us() - c->congested_last_us) / 1024; | ||
860 | if (i < 0) | ||
861 | return 0; | ||
862 | |||
863 | i += atomic_read(&c->congested); | ||
864 | if (i >= 0) | ||
865 | return 0; | ||
866 | |||
867 | i += CONGESTED_MAX; | ||
868 | |||
869 | if (i > 0) | ||
870 | i = fract_exp_two(i, 6); | ||
871 | |||
872 | rand = get_random_int(); | ||
873 | i -= bitmap_weight(&rand, BITS_PER_LONG); | ||
874 | |||
875 | return i > 0 ? i : 1; | ||
876 | } | ||
877 | |||
878 | static void add_sequential(struct task_struct *t) | ||
879 | { | ||
880 | ewma_add(t->sequential_io_avg, | ||
881 | t->sequential_io, 8, 0); | ||
882 | |||
883 | t->sequential_io = 0; | ||
884 | } | ||
885 | |||
886 | static struct hlist_head *iohash(struct cached_dev *dc, uint64_t k) | ||
887 | { | ||
888 | return &dc->io_hash[hash_64(k, RECENT_IO_BITS)]; | ||
889 | } | ||
890 | |||
891 | static bool check_should_bypass(struct cached_dev *dc, struct search *s) | ||
892 | { | ||
893 | struct cache_set *c = s->c; | ||
894 | struct bio *bio = &s->bio.bio; | ||
895 | unsigned mode = cache_mode(dc, bio); | ||
896 | unsigned sectors, congested = bch_get_congested(c); | ||
897 | |||
898 | if (atomic_read(&dc->disk.detaching) || | ||
899 | c->gc_stats.in_use > CUTOFF_CACHE_ADD || | ||
900 | (bio->bi_rw & REQ_DISCARD)) | ||
901 | goto skip; | ||
902 | |||
903 | if (mode == CACHE_MODE_NONE || | ||
904 | (mode == CACHE_MODE_WRITEAROUND && | ||
905 | (bio->bi_rw & REQ_WRITE))) | ||
906 | goto skip; | ||
907 | |||
908 | if (bio->bi_sector & (c->sb.block_size - 1) || | ||
909 | bio_sectors(bio) & (c->sb.block_size - 1)) { | ||
910 | pr_debug("skipping unaligned io"); | ||
911 | goto skip; | ||
912 | } | ||
913 | |||
914 | if (!congested && !dc->sequential_cutoff) | ||
915 | goto rescale; | ||
916 | |||
917 | if (!congested && | ||
918 | mode == CACHE_MODE_WRITEBACK && | ||
919 | (bio->bi_rw & REQ_WRITE) && | ||
920 | (bio->bi_rw & REQ_SYNC)) | ||
921 | goto rescale; | ||
922 | |||
923 | if (dc->sequential_merge) { | ||
924 | struct io *i; | ||
925 | |||
926 | spin_lock(&dc->io_lock); | ||
927 | |||
928 | hlist_for_each_entry(i, iohash(dc, bio->bi_sector), hash) | ||
929 | if (i->last == bio->bi_sector && | ||
930 | time_before(jiffies, i->jiffies)) | ||
931 | goto found; | ||
932 | |||
933 | i = list_first_entry(&dc->io_lru, struct io, lru); | ||
934 | |||
935 | add_sequential(s->task); | ||
936 | i->sequential = 0; | ||
937 | found: | ||
938 | if (i->sequential + bio->bi_size > i->sequential) | ||
939 | i->sequential += bio->bi_size; | ||
940 | |||
941 | i->last = bio_end_sector(bio); | ||
942 | i->jiffies = jiffies + msecs_to_jiffies(5000); | ||
943 | s->task->sequential_io = i->sequential; | ||
944 | |||
945 | hlist_del(&i->hash); | ||
946 | hlist_add_head(&i->hash, iohash(dc, i->last)); | ||
947 | list_move_tail(&i->lru, &dc->io_lru); | ||
948 | |||
949 | spin_unlock(&dc->io_lock); | ||
950 | } else { | ||
951 | s->task->sequential_io = bio->bi_size; | ||
952 | |||
953 | add_sequential(s->task); | ||
954 | } | ||
955 | |||
956 | sectors = max(s->task->sequential_io, | ||
957 | s->task->sequential_io_avg) >> 9; | ||
958 | |||
959 | if (dc->sequential_cutoff && | ||
960 | sectors >= dc->sequential_cutoff >> 9) { | ||
961 | trace_bcache_bypass_sequential(s->orig_bio); | ||
962 | goto skip; | ||
963 | } | ||
964 | |||
965 | if (congested && sectors >= congested) { | ||
966 | trace_bcache_bypass_congested(s->orig_bio); | ||
967 | goto skip; | ||
968 | } | ||
969 | |||
970 | rescale: | ||
971 | bch_rescale_priorities(c, bio_sectors(bio)); | ||
972 | return false; | ||
973 | skip: | ||
974 | bch_mark_sectors_bypassed(s, bio_sectors(bio)); | ||
975 | return true; | ||
976 | } | ||
977 | |||
978 | /* Process reads */ | 1002 | /* Process reads */ |
979 | 1003 | ||
980 | static void cached_dev_cache_miss_done(struct closure *cl) | 1004 | static void cached_dev_cache_miss_done(struct closure *cl) |
981 | { | 1005 | { |
982 | struct search *s = container_of(cl, struct search, cl); | 1006 | struct search *s = container_of(cl, struct search, cl); |
983 | 1007 | ||
984 | if (s->insert_collision) | 1008 | if (s->iop.replace_collision) |
985 | bch_mark_cache_miss_collision(s); | 1009 | bch_mark_cache_miss_collision(s->iop.c, s->d); |
986 | 1010 | ||
987 | if (s->cache_bio) { | 1011 | if (s->iop.bio) { |
988 | int i; | 1012 | int i; |
989 | struct bio_vec *bv; | 1013 | struct bio_vec *bv; |
990 | 1014 | ||
991 | bio_for_each_segment_all(bv, s->cache_bio, i) | 1015 | bio_for_each_segment_all(bv, s->iop.bio, i) |
992 | __free_page(bv->bv_page); | 1016 | __free_page(bv->bv_page); |
993 | } | 1017 | } |
994 | 1018 | ||
@@ -1006,7 +1030,7 @@ static void cached_dev_read_error(struct closure *cl) | |||
1006 | /* Retry from the backing device: */ | 1030 | /* Retry from the backing device: */ |
1007 | trace_bcache_read_retry(s->orig_bio); | 1031 | trace_bcache_read_retry(s->orig_bio); |
1008 | 1032 | ||
1009 | s->error = 0; | 1033 | s->iop.error = 0; |
1010 | bv = s->bio.bio.bi_io_vec; | 1034 | bv = s->bio.bio.bi_io_vec; |
1011 | do_bio_hook(s); | 1035 | do_bio_hook(s); |
1012 | s->bio.bio.bi_io_vec = bv; | 1036 | s->bio.bio.bi_io_vec = bv; |
@@ -1041,29 +1065,28 @@ static void cached_dev_read_done(struct closure *cl) | |||
1041 | * to the buffers the original bio pointed to: | 1065 | * to the buffers the original bio pointed to: |
1042 | */ | 1066 | */ |
1043 | 1067 | ||
1044 | if (s->cache_bio) { | 1068 | if (s->iop.bio) { |
1045 | bio_reset(s->cache_bio); | 1069 | bio_reset(s->iop.bio); |
1046 | s->cache_bio->bi_sector = | 1070 | s->iop.bio->bi_sector = s->cache_miss->bi_sector; |
1047 | s->cache_miss->bi_sector; | 1071 | s->iop.bio->bi_bdev = s->cache_miss->bi_bdev; |
1048 | s->cache_bio->bi_bdev = s->cache_miss->bi_bdev; | 1072 | s->iop.bio->bi_size = s->insert_bio_sectors << 9; |
1049 | s->cache_bio->bi_size = s->cache_bio_sectors << 9; | 1073 | bch_bio_map(s->iop.bio, NULL); |
1050 | bch_bio_map(s->cache_bio, NULL); | ||
1051 | 1074 | ||
1052 | bio_copy_data(s->cache_miss, s->cache_bio); | 1075 | bio_copy_data(s->cache_miss, s->iop.bio); |
1053 | 1076 | ||
1054 | bio_put(s->cache_miss); | 1077 | bio_put(s->cache_miss); |
1055 | s->cache_miss = NULL; | 1078 | s->cache_miss = NULL; |
1056 | } | 1079 | } |
1057 | 1080 | ||
1058 | if (verify(dc, &s->bio.bio) && s->recoverable) | 1081 | if (verify(dc, &s->bio.bio) && s->recoverable && !s->unaligned_bvec) |
1059 | bch_data_verify(s); | 1082 | bch_data_verify(dc, s->orig_bio); |
1060 | 1083 | ||
1061 | bio_complete(s); | 1084 | bio_complete(s); |
1062 | 1085 | ||
1063 | if (s->cache_bio && | 1086 | if (s->iop.bio && |
1064 | !test_bit(CACHE_SET_STOPPING, &s->c->flags)) { | 1087 | !test_bit(CACHE_SET_STOPPING, &s->iop.c->flags)) { |
1065 | BUG_ON(!s->replace); | 1088 | BUG_ON(!s->iop.replace); |
1066 | closure_call(&s->btree, bch_data_insert, NULL, cl); | 1089 | closure_call(&s->iop.cl, bch_data_insert, NULL, cl); |
1067 | } | 1090 | } |
1068 | 1091 | ||
1069 | continue_at(cl, cached_dev_cache_miss_done, NULL); | 1092 | continue_at(cl, cached_dev_cache_miss_done, NULL); |
@@ -1074,12 +1097,13 @@ static void cached_dev_read_done_bh(struct closure *cl) | |||
1074 | struct search *s = container_of(cl, struct search, cl); | 1097 | struct search *s = container_of(cl, struct search, cl); |
1075 | struct cached_dev *dc = container_of(s->d, struct cached_dev, disk); | 1098 | struct cached_dev *dc = container_of(s->d, struct cached_dev, disk); |
1076 | 1099 | ||
1077 | bch_mark_cache_accounting(s, !s->cache_miss, s->bypass); | 1100 | bch_mark_cache_accounting(s->iop.c, s->d, |
1078 | trace_bcache_read(s->orig_bio, !s->cache_miss, s->bypass); | 1101 | !s->cache_miss, s->iop.bypass); |
1102 | trace_bcache_read(s->orig_bio, !s->cache_miss, s->iop.bypass); | ||
1079 | 1103 | ||
1080 | if (s->error) | 1104 | if (s->iop.error) |
1081 | continue_at_nobarrier(cl, cached_dev_read_error, bcache_wq); | 1105 | continue_at_nobarrier(cl, cached_dev_read_error, bcache_wq); |
1082 | else if (s->cache_bio || verify(dc, &s->bio.bio)) | 1106 | else if (s->iop.bio || verify(dc, &s->bio.bio)) |
1083 | continue_at_nobarrier(cl, cached_dev_read_done, bcache_wq); | 1107 | continue_at_nobarrier(cl, cached_dev_read_done, bcache_wq); |
1084 | else | 1108 | else |
1085 | continue_at_nobarrier(cl, cached_dev_bio_complete, NULL); | 1109 | continue_at_nobarrier(cl, cached_dev_bio_complete, NULL); |
@@ -1093,7 +1117,7 @@ static int cached_dev_cache_miss(struct btree *b, struct search *s, | |||
1093 | struct cached_dev *dc = container_of(s->d, struct cached_dev, disk); | 1117 | struct cached_dev *dc = container_of(s->d, struct cached_dev, disk); |
1094 | struct bio *miss, *cache_bio; | 1118 | struct bio *miss, *cache_bio; |
1095 | 1119 | ||
1096 | if (s->cache_miss || s->bypass) { | 1120 | if (s->cache_miss || s->iop.bypass) { |
1097 | miss = bch_bio_split(bio, sectors, GFP_NOIO, s->d->bio_split); | 1121 | miss = bch_bio_split(bio, sectors, GFP_NOIO, s->d->bio_split); |
1098 | ret = miss == bio ? MAP_DONE : MAP_CONTINUE; | 1122 | ret = miss == bio ? MAP_DONE : MAP_CONTINUE; |
1099 | goto out_submit; | 1123 | goto out_submit; |
@@ -1101,20 +1125,21 @@ static int cached_dev_cache_miss(struct btree *b, struct search *s, | |||
1101 | 1125 | ||
1102 | if (!(bio->bi_rw & REQ_RAHEAD) && | 1126 | if (!(bio->bi_rw & REQ_RAHEAD) && |
1103 | !(bio->bi_rw & REQ_META) && | 1127 | !(bio->bi_rw & REQ_META) && |
1104 | s->c->gc_stats.in_use < CUTOFF_CACHE_READA) | 1128 | s->iop.c->gc_stats.in_use < CUTOFF_CACHE_READA) |
1105 | reada = min_t(sector_t, dc->readahead >> 9, | 1129 | reada = min_t(sector_t, dc->readahead >> 9, |
1106 | bdev_sectors(bio->bi_bdev) - bio_end_sector(bio)); | 1130 | bdev_sectors(bio->bi_bdev) - bio_end_sector(bio)); |
1107 | 1131 | ||
1108 | s->cache_bio_sectors = min(sectors, bio_sectors(bio) + reada); | 1132 | s->insert_bio_sectors = min(sectors, bio_sectors(bio) + reada); |
1109 | 1133 | ||
1110 | s->replace_key = KEY(s->inode, bio->bi_sector + | 1134 | s->iop.replace_key = KEY(s->iop.inode, |
1111 | s->cache_bio_sectors, s->cache_bio_sectors); | 1135 | bio->bi_sector + s->insert_bio_sectors, |
1136 | s->insert_bio_sectors); | ||
1112 | 1137 | ||
1113 | ret = bch_btree_insert_check_key(b, &s->op, &s->replace_key); | 1138 | ret = bch_btree_insert_check_key(b, &s->op, &s->iop.replace_key); |
1114 | if (ret) | 1139 | if (ret) |
1115 | return ret; | 1140 | return ret; |
1116 | 1141 | ||
1117 | s->replace = true; | 1142 | s->iop.replace = true; |
1118 | 1143 | ||
1119 | miss = bch_bio_split(bio, sectors, GFP_NOIO, s->d->bio_split); | 1144 | miss = bch_bio_split(bio, sectors, GFP_NOIO, s->d->bio_split); |
1120 | 1145 | ||
@@ -1122,14 +1147,14 @@ static int cached_dev_cache_miss(struct btree *b, struct search *s, | |||
1122 | ret = miss == bio ? MAP_DONE : -EINTR; | 1147 | ret = miss == bio ? MAP_DONE : -EINTR; |
1123 | 1148 | ||
1124 | cache_bio = bio_alloc_bioset(GFP_NOWAIT, | 1149 | cache_bio = bio_alloc_bioset(GFP_NOWAIT, |
1125 | DIV_ROUND_UP(s->cache_bio_sectors, PAGE_SECTORS), | 1150 | DIV_ROUND_UP(s->insert_bio_sectors, PAGE_SECTORS), |
1126 | dc->disk.bio_split); | 1151 | dc->disk.bio_split); |
1127 | if (!cache_bio) | 1152 | if (!cache_bio) |
1128 | goto out_submit; | 1153 | goto out_submit; |
1129 | 1154 | ||
1130 | cache_bio->bi_sector = miss->bi_sector; | 1155 | cache_bio->bi_sector = miss->bi_sector; |
1131 | cache_bio->bi_bdev = miss->bi_bdev; | 1156 | cache_bio->bi_bdev = miss->bi_bdev; |
1132 | cache_bio->bi_size = s->cache_bio_sectors << 9; | 1157 | cache_bio->bi_size = s->insert_bio_sectors << 9; |
1133 | 1158 | ||
1134 | cache_bio->bi_end_io = request_endio; | 1159 | cache_bio->bi_end_io = request_endio; |
1135 | cache_bio->bi_private = &s->cl; | 1160 | cache_bio->bi_private = &s->cl; |
@@ -1138,8 +1163,11 @@ static int cached_dev_cache_miss(struct btree *b, struct search *s, | |||
1138 | if (bio_alloc_pages(cache_bio, __GFP_NOWARN|GFP_NOIO)) | 1163 | if (bio_alloc_pages(cache_bio, __GFP_NOWARN|GFP_NOIO)) |
1139 | goto out_put; | 1164 | goto out_put; |
1140 | 1165 | ||
1166 | if (reada) | ||
1167 | bch_mark_cache_readahead(s->iop.c, s->d); | ||
1168 | |||
1141 | s->cache_miss = miss; | 1169 | s->cache_miss = miss; |
1142 | s->cache_bio = cache_bio; | 1170 | s->iop.bio = cache_bio; |
1143 | bio_get(cache_bio); | 1171 | bio_get(cache_bio); |
1144 | closure_bio_submit(cache_bio, &s->cl, s->d); | 1172 | closure_bio_submit(cache_bio, &s->cl, s->d); |
1145 | 1173 | ||
@@ -1157,7 +1185,7 @@ static void cached_dev_read(struct cached_dev *dc, struct search *s) | |||
1157 | { | 1185 | { |
1158 | struct closure *cl = &s->cl; | 1186 | struct closure *cl = &s->cl; |
1159 | 1187 | ||
1160 | closure_call(&s->btree, cache_lookup, NULL, cl); | 1188 | closure_call(&s->iop.cl, cache_lookup, NULL, cl); |
1161 | continue_at(cl, cached_dev_read_done_bh, NULL); | 1189 | continue_at(cl, cached_dev_read_done_bh, NULL); |
1162 | } | 1190 | } |
1163 | 1191 | ||
@@ -1179,7 +1207,7 @@ static void cached_dev_write(struct cached_dev *dc, struct search *s) | |||
1179 | struct bkey start = KEY(dc->disk.id, bio->bi_sector, 0); | 1207 | struct bkey start = KEY(dc->disk.id, bio->bi_sector, 0); |
1180 | struct bkey end = KEY(dc->disk.id, bio_end_sector(bio), 0); | 1208 | struct bkey end = KEY(dc->disk.id, bio_end_sector(bio), 0); |
1181 | 1209 | ||
1182 | bch_keybuf_check_overlapping(&s->c->moving_gc_keys, &start, &end); | 1210 | bch_keybuf_check_overlapping(&s->iop.c->moving_gc_keys, &start, &end); |
1183 | 1211 | ||
1184 | down_read_non_owner(&dc->writeback_lock); | 1212 | down_read_non_owner(&dc->writeback_lock); |
1185 | if (bch_keybuf_check_overlapping(&dc->writeback_keys, &start, &end)) { | 1213 | if (bch_keybuf_check_overlapping(&dc->writeback_keys, &start, &end)) { |
@@ -1187,8 +1215,8 @@ static void cached_dev_write(struct cached_dev *dc, struct search *s) | |||
1187 | * We overlap with some dirty data undergoing background | 1215 | * We overlap with some dirty data undergoing background |
1188 | * writeback, force this write to writeback | 1216 | * writeback, force this write to writeback |
1189 | */ | 1217 | */ |
1190 | s->bypass = false; | 1218 | s->iop.bypass = false; |
1191 | s->writeback = true; | 1219 | s->iop.writeback = true; |
1192 | } | 1220 | } |
1193 | 1221 | ||
1194 | /* | 1222 | /* |
@@ -1199,27 +1227,25 @@ static void cached_dev_write(struct cached_dev *dc, struct search *s) | |||
1199 | * so we still want to call it. | 1227 | * so we still want to call it. |
1200 | */ | 1228 | */ |
1201 | if (bio->bi_rw & REQ_DISCARD) | 1229 | if (bio->bi_rw & REQ_DISCARD) |
1202 | s->bypass = true; | 1230 | s->iop.bypass = true; |
1203 | 1231 | ||
1204 | if (should_writeback(dc, s->orig_bio, | 1232 | if (should_writeback(dc, s->orig_bio, |
1205 | cache_mode(dc, bio), | 1233 | cache_mode(dc, bio), |
1206 | s->bypass)) { | 1234 | s->iop.bypass)) { |
1207 | s->bypass = false; | 1235 | s->iop.bypass = false; |
1208 | s->writeback = true; | 1236 | s->iop.writeback = true; |
1209 | } | 1237 | } |
1210 | 1238 | ||
1211 | trace_bcache_write(s->orig_bio, s->writeback, s->bypass); | 1239 | if (s->iop.bypass) { |
1212 | 1240 | s->iop.bio = s->orig_bio; | |
1213 | if (s->bypass) { | 1241 | bio_get(s->iop.bio); |
1214 | s->cache_bio = s->orig_bio; | ||
1215 | bio_get(s->cache_bio); | ||
1216 | 1242 | ||
1217 | if (!(bio->bi_rw & REQ_DISCARD) || | 1243 | if (!(bio->bi_rw & REQ_DISCARD) || |
1218 | blk_queue_discard(bdev_get_queue(dc->bdev))) | 1244 | blk_queue_discard(bdev_get_queue(dc->bdev))) |
1219 | closure_bio_submit(bio, cl, s->d); | 1245 | closure_bio_submit(bio, cl, s->d); |
1220 | } else if (s->writeback) { | 1246 | } else if (s->iop.writeback) { |
1221 | bch_writeback_add(dc); | 1247 | bch_writeback_add(dc); |
1222 | s->cache_bio = bio; | 1248 | s->iop.bio = bio; |
1223 | 1249 | ||
1224 | if (bio->bi_rw & REQ_FLUSH) { | 1250 | if (bio->bi_rw & REQ_FLUSH) { |
1225 | /* Also need to send a flush to the backing device */ | 1251 | /* Also need to send a flush to the backing device */ |
@@ -1234,13 +1260,13 @@ static void cached_dev_write(struct cached_dev *dc, struct search *s) | |||
1234 | closure_bio_submit(flush, cl, s->d); | 1260 | closure_bio_submit(flush, cl, s->d); |
1235 | } | 1261 | } |
1236 | } else { | 1262 | } else { |
1237 | s->cache_bio = bio_clone_bioset(bio, GFP_NOIO, | 1263 | s->iop.bio = bio_clone_bioset(bio, GFP_NOIO, |
1238 | dc->disk.bio_split); | 1264 | dc->disk.bio_split); |
1239 | 1265 | ||
1240 | closure_bio_submit(bio, cl, s->d); | 1266 | closure_bio_submit(bio, cl, s->d); |
1241 | } | 1267 | } |
1242 | 1268 | ||
1243 | closure_call(&s->btree, bch_data_insert, NULL, cl); | 1269 | closure_call(&s->iop.cl, bch_data_insert, NULL, cl); |
1244 | continue_at(cl, cached_dev_write_complete, NULL); | 1270 | continue_at(cl, cached_dev_write_complete, NULL); |
1245 | } | 1271 | } |
1246 | 1272 | ||
@@ -1249,8 +1275,8 @@ static void cached_dev_nodata(struct closure *cl) | |||
1249 | struct search *s = container_of(cl, struct search, cl); | 1275 | struct search *s = container_of(cl, struct search, cl); |
1250 | struct bio *bio = &s->bio.bio; | 1276 | struct bio *bio = &s->bio.bio; |
1251 | 1277 | ||
1252 | if (s->flush_journal) | 1278 | if (s->iop.flush_journal) |
1253 | bch_journal_meta(s->c, cl); | 1279 | bch_journal_meta(s->iop.c, cl); |
1254 | 1280 | ||
1255 | /* If it's a flush, we send the flush to the backing device too */ | 1281 | /* If it's a flush, we send the flush to the backing device too */ |
1256 | closure_bio_submit(bio, cl, s->d); | 1282 | closure_bio_submit(bio, cl, s->d); |
@@ -1277,7 +1303,7 @@ static void cached_dev_make_request(struct request_queue *q, struct bio *bio) | |||
1277 | 1303 | ||
1278 | if (cached_dev_get(dc)) { | 1304 | if (cached_dev_get(dc)) { |
1279 | s = search_alloc(bio, d); | 1305 | s = search_alloc(bio, d); |
1280 | trace_bcache_request_start(s, bio); | 1306 | trace_bcache_request_start(s->d, bio); |
1281 | 1307 | ||
1282 | if (!bio->bi_size) { | 1308 | if (!bio->bi_size) { |
1283 | /* | 1309 | /* |
@@ -1288,7 +1314,7 @@ static void cached_dev_make_request(struct request_queue *q, struct bio *bio) | |||
1288 | cached_dev_nodata, | 1314 | cached_dev_nodata, |
1289 | bcache_wq); | 1315 | bcache_wq); |
1290 | } else { | 1316 | } else { |
1291 | s->bypass = check_should_bypass(dc, s); | 1317 | s->iop.bypass = check_should_bypass(dc, bio); |
1292 | 1318 | ||
1293 | if (rw) | 1319 | if (rw) |
1294 | cached_dev_write(dc, s); | 1320 | cached_dev_write(dc, s); |
@@ -1378,8 +1404,8 @@ static void flash_dev_nodata(struct closure *cl) | |||
1378 | { | 1404 | { |
1379 | struct search *s = container_of(cl, struct search, cl); | 1405 | struct search *s = container_of(cl, struct search, cl); |
1380 | 1406 | ||
1381 | if (s->flush_journal) | 1407 | if (s->iop.flush_journal) |
1382 | bch_journal_meta(s->c, cl); | 1408 | bch_journal_meta(s->iop.c, cl); |
1383 | 1409 | ||
1384 | continue_at(cl, search_free, NULL); | 1410 | continue_at(cl, search_free, NULL); |
1385 | } | 1411 | } |
@@ -1400,7 +1426,7 @@ static void flash_dev_make_request(struct request_queue *q, struct bio *bio) | |||
1400 | cl = &s->cl; | 1426 | cl = &s->cl; |
1401 | bio = &s->bio.bio; | 1427 | bio = &s->bio.bio; |
1402 | 1428 | ||
1403 | trace_bcache_request_start(s, bio); | 1429 | trace_bcache_request_start(s->d, bio); |
1404 | 1430 | ||
1405 | if (!bio->bi_size) { | 1431 | if (!bio->bi_size) { |
1406 | /* | 1432 | /* |
@@ -1411,17 +1437,17 @@ static void flash_dev_make_request(struct request_queue *q, struct bio *bio) | |||
1411 | flash_dev_nodata, | 1437 | flash_dev_nodata, |
1412 | bcache_wq); | 1438 | bcache_wq); |
1413 | } else if (rw) { | 1439 | } else if (rw) { |
1414 | bch_keybuf_check_overlapping(&s->c->moving_gc_keys, | 1440 | bch_keybuf_check_overlapping(&s->iop.c->moving_gc_keys, |
1415 | &KEY(d->id, bio->bi_sector, 0), | 1441 | &KEY(d->id, bio->bi_sector, 0), |
1416 | &KEY(d->id, bio_end_sector(bio), 0)); | 1442 | &KEY(d->id, bio_end_sector(bio), 0)); |
1417 | 1443 | ||
1418 | s->bypass = (bio->bi_rw & REQ_DISCARD) != 0; | 1444 | s->iop.bypass = (bio->bi_rw & REQ_DISCARD) != 0; |
1419 | s->writeback = true; | 1445 | s->iop.writeback = true; |
1420 | s->cache_bio = bio; | 1446 | s->iop.bio = bio; |
1421 | 1447 | ||
1422 | closure_call(&s->btree, bch_data_insert, NULL, cl); | 1448 | closure_call(&s->iop.cl, bch_data_insert, NULL, cl); |
1423 | } else { | 1449 | } else { |
1424 | closure_call(&s->btree, cache_lookup, NULL, cl); | 1450 | closure_call(&s->iop.cl, cache_lookup, NULL, cl); |
1425 | } | 1451 | } |
1426 | 1452 | ||
1427 | continue_at(cl, search_free, NULL); | 1453 | continue_at(cl, search_free, NULL); |