aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/md/bcache
diff options
context:
space:
mode:
authorKent Overstreet <koverstreet@google.com>2013-04-26 18:39:55 -0400
committerKent Overstreet <koverstreet@google.com>2013-06-26 20:09:15 -0400
commitc37511b863f36c1cc6e18440717fd4cc0e881b8a (patch)
tree64d82c648bd092f38c35c4b808411bc1cdb3a9f0 /drivers/md/bcache
parent5794351146199b9ac67a5ab1beab82be8bfd7b5d (diff)
bcache: Fix/revamp tracepoints
The tracepoints were reworked to be more sensible, and fixed a null pointer deref in one of the tracepoints. Converted some of the pr_debug()s to tracepoints - this is partly a performance optimization; it used to be that with DEBUG or CONFIG_DYNAMIC_DEBUG pr_debug() was an empty macro; but at some point it was changed to an empty inline function. Some of the pr_debug() statements had rather expensive function calls as part of the arguments, so this code was getting run unnecessarily even on non debug kernels - in some fast paths, too. Signed-off-by: Kent Overstreet <koverstreet@google.com>
Diffstat (limited to 'drivers/md/bcache')
-rw-r--r--drivers/md/bcache/alloc.c10
-rw-r--r--drivers/md/bcache/bcache.h20
-rw-r--r--drivers/md/bcache/bset.h4
-rw-r--r--drivers/md/bcache/btree.c47
-rw-r--r--drivers/md/bcache/io.c2
-rw-r--r--drivers/md/bcache/journal.c14
-rw-r--r--drivers/md/bcache/movinggc.c12
-rw-r--r--drivers/md/bcache/request.c65
-rw-r--r--drivers/md/bcache/request.h2
-rw-r--r--drivers/md/bcache/super.c2
-rw-r--r--drivers/md/bcache/sysfs.c1
-rw-r--r--drivers/md/bcache/trace.c45
-rw-r--r--drivers/md/bcache/util.h2
-rw-r--r--drivers/md/bcache/writeback.c10
14 files changed, 126 insertions, 110 deletions
diff --git a/drivers/md/bcache/alloc.c b/drivers/md/bcache/alloc.c
index 38428f46ea74..b54b73b9b2b7 100644
--- a/drivers/md/bcache/alloc.c
+++ b/drivers/md/bcache/alloc.c
@@ -65,6 +65,7 @@
65 65
66#include <linux/kthread.h> 66#include <linux/kthread.h>
67#include <linux/random.h> 67#include <linux/random.h>
68#include <trace/events/bcache.h>
68 69
69#define MAX_IN_FLIGHT_DISCARDS 8U 70#define MAX_IN_FLIGHT_DISCARDS 8U
70 71
@@ -351,10 +352,7 @@ static void invalidate_buckets(struct cache *ca)
351 break; 352 break;
352 } 353 }
353 354
354 pr_debug("free %zu/%zu free_inc %zu/%zu unused %zu/%zu", 355 trace_bcache_alloc_invalidate(ca);
355 fifo_used(&ca->free), ca->free.size,
356 fifo_used(&ca->free_inc), ca->free_inc.size,
357 fifo_used(&ca->unused), ca->unused.size);
358} 356}
359 357
360#define allocator_wait(ca, cond) \ 358#define allocator_wait(ca, cond) \
@@ -473,9 +471,7 @@ again:
473 return r; 471 return r;
474 } 472 }
475 473
476 pr_debug("alloc failure: blocked %i free %zu free_inc %zu unused %zu", 474 trace_bcache_alloc_fail(ca);
477 atomic_read(&ca->set->prio_blocked), fifo_used(&ca->free),
478 fifo_used(&ca->free_inc), fifo_used(&ca->unused));
479 475
480 if (cl) { 476 if (cl) {
481 closure_wait(&ca->set->bucket_wait, cl); 477 closure_wait(&ca->set->bucket_wait, cl);
diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h
index ad4957b52f10..59c15e09e4dd 100644
--- a/drivers/md/bcache/bcache.h
+++ b/drivers/md/bcache/bcache.h
@@ -178,7 +178,6 @@
178#define pr_fmt(fmt) "bcache: %s() " fmt "\n", __func__ 178#define pr_fmt(fmt) "bcache: %s() " fmt "\n", __func__
179 179
180#include <linux/bio.h> 180#include <linux/bio.h>
181#include <linux/blktrace_api.h>
182#include <linux/kobject.h> 181#include <linux/kobject.h>
183#include <linux/list.h> 182#include <linux/list.h>
184#include <linux/mutex.h> 183#include <linux/mutex.h>
@@ -901,8 +900,6 @@ static inline unsigned local_clock_us(void)
901 return local_clock() >> 10; 900 return local_clock() >> 10;
902} 901}
903 902
904#define MAX_BSETS 4U
905
906#define BTREE_PRIO USHRT_MAX 903#define BTREE_PRIO USHRT_MAX
907#define INITIAL_PRIO 32768 904#define INITIAL_PRIO 32768
908 905
@@ -1107,23 +1104,6 @@ static inline void __bkey_put(struct cache_set *c, struct bkey *k)
1107 atomic_dec_bug(&PTR_BUCKET(c, k, i)->pin); 1104 atomic_dec_bug(&PTR_BUCKET(c, k, i)->pin);
1108} 1105}
1109 1106
1110/* Blktrace macros */
1111
1112#define blktrace_msg(c, fmt, ...) \
1113do { \
1114 struct request_queue *q = bdev_get_queue(c->bdev); \
1115 if (q) \
1116 blk_add_trace_msg(q, fmt, ##__VA_ARGS__); \
1117} while (0)
1118
1119#define blktrace_msg_all(s, fmt, ...) \
1120do { \
1121 struct cache *_c; \
1122 unsigned i; \
1123 for_each_cache(_c, (s), i) \
1124 blktrace_msg(_c, fmt, ##__VA_ARGS__); \
1125} while (0)
1126
1127static inline void cached_dev_put(struct cached_dev *dc) 1107static inline void cached_dev_put(struct cached_dev *dc)
1128{ 1108{
1129 if (atomic_dec_and_test(&dc->count)) 1109 if (atomic_dec_and_test(&dc->count))
diff --git a/drivers/md/bcache/bset.h b/drivers/md/bcache/bset.h
index 57a9cff41546..ae115a253d73 100644
--- a/drivers/md/bcache/bset.h
+++ b/drivers/md/bcache/bset.h
@@ -1,6 +1,8 @@
1#ifndef _BCACHE_BSET_H 1#ifndef _BCACHE_BSET_H
2#define _BCACHE_BSET_H 2#define _BCACHE_BSET_H
3 3
4#include <linux/slab.h>
5
4/* 6/*
5 * BKEYS: 7 * BKEYS:
6 * 8 *
@@ -142,6 +144,8 @@
142 144
143/* Btree key comparison/iteration */ 145/* Btree key comparison/iteration */
144 146
147#define MAX_BSETS 4U
148
145struct btree_iter { 149struct btree_iter {
146 size_t size, used; 150 size_t size, used;
147 struct btree_iter_set { 151 struct btree_iter_set {
diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c
index aaec186f7ba6..218d486259a3 100644
--- a/drivers/md/bcache/btree.c
+++ b/drivers/md/bcache/btree.c
@@ -223,8 +223,9 @@ void bch_btree_node_read(struct btree *b)
223 struct closure cl; 223 struct closure cl;
224 struct bio *bio; 224 struct bio *bio;
225 225
226 trace_bcache_btree_read(b);
227
226 closure_init_stack(&cl); 228 closure_init_stack(&cl);
227 pr_debug("%s", pbtree(b));
228 229
229 bio = bch_bbio_alloc(b->c); 230 bio = bch_bbio_alloc(b->c);
230 bio->bi_rw = REQ_META|READ_SYNC; 231 bio->bi_rw = REQ_META|READ_SYNC;
@@ -234,7 +235,6 @@ void bch_btree_node_read(struct btree *b)
234 235
235 bch_bio_map(bio, b->sets[0].data); 236 bch_bio_map(bio, b->sets[0].data);
236 237
237 trace_bcache_btree_read(bio);
238 bch_submit_bbio(bio, b->c, &b->key, 0); 238 bch_submit_bbio(bio, b->c, &b->key, 0);
239 closure_sync(&cl); 239 closure_sync(&cl);
240 240
@@ -343,7 +343,6 @@ static void do_btree_node_write(struct btree *b)
343 memcpy(page_address(bv->bv_page), 343 memcpy(page_address(bv->bv_page),
344 base + j * PAGE_SIZE, PAGE_SIZE); 344 base + j * PAGE_SIZE, PAGE_SIZE);
345 345
346 trace_bcache_btree_write(b->bio);
347 bch_submit_bbio(b->bio, b->c, &k.key, 0); 346 bch_submit_bbio(b->bio, b->c, &k.key, 0);
348 347
349 continue_at(cl, btree_node_write_done, NULL); 348 continue_at(cl, btree_node_write_done, NULL);
@@ -351,7 +350,6 @@ static void do_btree_node_write(struct btree *b)
351 b->bio->bi_vcnt = 0; 350 b->bio->bi_vcnt = 0;
352 bch_bio_map(b->bio, i); 351 bch_bio_map(b->bio, i);
353 352
354 trace_bcache_btree_write(b->bio);
355 bch_submit_bbio(b->bio, b->c, &k.key, 0); 353 bch_submit_bbio(b->bio, b->c, &k.key, 0);
356 354
357 closure_sync(cl); 355 closure_sync(cl);
@@ -363,10 +361,13 @@ void bch_btree_node_write(struct btree *b, struct closure *parent)
363{ 361{
364 struct bset *i = b->sets[b->nsets].data; 362 struct bset *i = b->sets[b->nsets].data;
365 363
364 trace_bcache_btree_write(b);
365
366 BUG_ON(current->bio_list); 366 BUG_ON(current->bio_list);
367 BUG_ON(b->written >= btree_blocks(b)); 367 BUG_ON(b->written >= btree_blocks(b));
368 BUG_ON(b->written && !i->keys); 368 BUG_ON(b->written && !i->keys);
369 BUG_ON(b->sets->data->seq != i->seq); 369 BUG_ON(b->sets->data->seq != i->seq);
370 bch_check_key_order(b, i);
370 371
371 cancel_delayed_work(&b->work); 372 cancel_delayed_work(&b->work);
372 373
@@ -376,12 +377,8 @@ void bch_btree_node_write(struct btree *b, struct closure *parent)
376 clear_bit(BTREE_NODE_dirty, &b->flags); 377 clear_bit(BTREE_NODE_dirty, &b->flags);
377 change_bit(BTREE_NODE_write_idx, &b->flags); 378 change_bit(BTREE_NODE_write_idx, &b->flags);
378 379
379 bch_check_key_order(b, i);
380
381 do_btree_node_write(b); 380 do_btree_node_write(b);
382 381
383 pr_debug("%s block %i keys %i", pbtree(b), b->written, i->keys);
384
385 b->written += set_blocks(i, b->c); 382 b->written += set_blocks(i, b->c);
386 atomic_long_add(set_blocks(i, b->c) * b->c->sb.block_size, 383 atomic_long_add(set_blocks(i, b->c) * b->c->sb.block_size,
387 &PTR_CACHE(b->c, &b->key, 0)->btree_sectors_written); 384 &PTR_CACHE(b->c, &b->key, 0)->btree_sectors_written);
@@ -752,6 +749,8 @@ static struct btree *mca_cannibalize(struct cache_set *c, struct bkey *k,
752 int ret = -ENOMEM; 749 int ret = -ENOMEM;
753 struct btree *i; 750 struct btree *i;
754 751
752 trace_bcache_btree_cache_cannibalize(c);
753
755 if (!cl) 754 if (!cl)
756 return ERR_PTR(-ENOMEM); 755 return ERR_PTR(-ENOMEM);
757 756
@@ -770,7 +769,6 @@ static struct btree *mca_cannibalize(struct cache_set *c, struct bkey *k,
770 return ERR_PTR(-EAGAIN); 769 return ERR_PTR(-EAGAIN);
771 } 770 }
772 771
773 /* XXX: tracepoint */
774 c->try_harder = cl; 772 c->try_harder = cl;
775 c->try_harder_start = local_clock(); 773 c->try_harder_start = local_clock();
776retry: 774retry:
@@ -956,13 +954,14 @@ static void btree_node_free(struct btree *b, struct btree_op *op)
956{ 954{
957 unsigned i; 955 unsigned i;
958 956
957 trace_bcache_btree_node_free(b);
958
959 /* 959 /*
960 * The BUG_ON() in btree_node_get() implies that we must have a write 960 * The BUG_ON() in btree_node_get() implies that we must have a write
961 * lock on parent to free or even invalidate a node 961 * lock on parent to free or even invalidate a node
962 */ 962 */
963 BUG_ON(op->lock <= b->level); 963 BUG_ON(op->lock <= b->level);
964 BUG_ON(b == b->c->root); 964 BUG_ON(b == b->c->root);
965 pr_debug("bucket %s", pbtree(b));
966 965
967 if (btree_node_dirty(b)) 966 if (btree_node_dirty(b))
968 btree_complete_write(b, btree_current_write(b)); 967 btree_complete_write(b, btree_current_write(b));
@@ -1012,12 +1011,16 @@ retry:
1012 bch_bset_init_next(b); 1011 bch_bset_init_next(b);
1013 1012
1014 mutex_unlock(&c->bucket_lock); 1013 mutex_unlock(&c->bucket_lock);
1014
1015 trace_bcache_btree_node_alloc(b);
1015 return b; 1016 return b;
1016err_free: 1017err_free:
1017 bch_bucket_free(c, &k.key); 1018 bch_bucket_free(c, &k.key);
1018 __bkey_put(c, &k.key); 1019 __bkey_put(c, &k.key);
1019err: 1020err:
1020 mutex_unlock(&c->bucket_lock); 1021 mutex_unlock(&c->bucket_lock);
1022
1023 trace_bcache_btree_node_alloc_fail(b);
1021 return b; 1024 return b;
1022} 1025}
1023 1026
@@ -1254,7 +1257,7 @@ static void btree_gc_coalesce(struct btree *b, struct btree_op *op,
1254 btree_node_free(r->b, op); 1257 btree_node_free(r->b, op);
1255 up_write(&r->b->lock); 1258 up_write(&r->b->lock);
1256 1259
1257 pr_debug("coalesced %u nodes", nodes); 1260 trace_bcache_btree_gc_coalesce(nodes);
1258 1261
1259 gc->nodes--; 1262 gc->nodes--;
1260 nodes--; 1263 nodes--;
@@ -1479,8 +1482,7 @@ static void bch_btree_gc(struct closure *cl)
1479 struct btree_op op; 1482 struct btree_op op;
1480 uint64_t start_time = local_clock(); 1483 uint64_t start_time = local_clock();
1481 1484
1482 trace_bcache_gc_start(c->sb.set_uuid); 1485 trace_bcache_gc_start(c);
1483 blktrace_msg_all(c, "Starting gc");
1484 1486
1485 memset(&stats, 0, sizeof(struct gc_stat)); 1487 memset(&stats, 0, sizeof(struct gc_stat));
1486 closure_init_stack(&writes); 1488 closure_init_stack(&writes);
@@ -1496,9 +1498,7 @@ static void bch_btree_gc(struct closure *cl)
1496 closure_sync(&writes); 1498 closure_sync(&writes);
1497 1499
1498 if (ret) { 1500 if (ret) {
1499 blktrace_msg_all(c, "Stopped gc");
1500 pr_warn("gc failed!"); 1501 pr_warn("gc failed!");
1501
1502 continue_at(cl, bch_btree_gc, bch_gc_wq); 1502 continue_at(cl, bch_btree_gc, bch_gc_wq);
1503 } 1503 }
1504 1504
@@ -1519,8 +1519,7 @@ static void bch_btree_gc(struct closure *cl)
1519 stats.in_use = (c->nbuckets - available) * 100 / c->nbuckets; 1519 stats.in_use = (c->nbuckets - available) * 100 / c->nbuckets;
1520 memcpy(&c->gc_stats, &stats, sizeof(struct gc_stat)); 1520 memcpy(&c->gc_stats, &stats, sizeof(struct gc_stat));
1521 1521
1522 blktrace_msg_all(c, "Finished gc"); 1522 trace_bcache_gc_end(c);
1523 trace_bcache_gc_end(c->sb.set_uuid);
1524 1523
1525 continue_at(cl, bch_moving_gc, bch_gc_wq); 1524 continue_at(cl, bch_moving_gc, bch_gc_wq);
1526} 1525}
@@ -1901,12 +1900,11 @@ static int btree_split(struct btree *b, struct btree_op *op)
1901 1900
1902 split = set_blocks(n1->sets[0].data, n1->c) > (btree_blocks(b) * 4) / 5; 1901 split = set_blocks(n1->sets[0].data, n1->c) > (btree_blocks(b) * 4) / 5;
1903 1902
1904 pr_debug("%ssplitting at %s keys %i", split ? "" : "not ",
1905 pbtree(b), n1->sets[0].data->keys);
1906
1907 if (split) { 1903 if (split) {
1908 unsigned keys = 0; 1904 unsigned keys = 0;
1909 1905
1906 trace_bcache_btree_node_split(b, n1->sets[0].data->keys);
1907
1910 n2 = bch_btree_node_alloc(b->c, b->level, &op->cl); 1908 n2 = bch_btree_node_alloc(b->c, b->level, &op->cl);
1911 if (IS_ERR(n2)) 1909 if (IS_ERR(n2))
1912 goto err_free1; 1910 goto err_free1;
@@ -1941,8 +1939,11 @@ static int btree_split(struct btree *b, struct btree_op *op)
1941 bch_keylist_add(&op->keys, &n2->key); 1939 bch_keylist_add(&op->keys, &n2->key);
1942 bch_btree_node_write(n2, &op->cl); 1940 bch_btree_node_write(n2, &op->cl);
1943 rw_unlock(true, n2); 1941 rw_unlock(true, n2);
1944 } else 1942 } else {
1943 trace_bcache_btree_node_compact(b, n1->sets[0].data->keys);
1944
1945 bch_btree_insert_keys(n1, op); 1945 bch_btree_insert_keys(n1, op);
1946 }
1946 1947
1947 bch_keylist_add(&op->keys, &n1->key); 1948 bch_keylist_add(&op->keys, &n1->key);
1948 bch_btree_node_write(n1, &op->cl); 1949 bch_btree_node_write(n1, &op->cl);
@@ -2117,6 +2118,8 @@ void bch_btree_set_root(struct btree *b)
2117{ 2118{
2118 unsigned i; 2119 unsigned i;
2119 2120
2121 trace_bcache_btree_set_root(b);
2122
2120 BUG_ON(!b->written); 2123 BUG_ON(!b->written);
2121 2124
2122 for (i = 0; i < KEY_PTRS(&b->key); i++) 2125 for (i = 0; i < KEY_PTRS(&b->key); i++)
@@ -2130,7 +2133,6 @@ void bch_btree_set_root(struct btree *b)
2130 __bkey_put(b->c, &b->key); 2133 __bkey_put(b->c, &b->key);
2131 2134
2132 bch_journal_meta(b->c, NULL); 2135 bch_journal_meta(b->c, NULL);
2133 pr_debug("%s for %pf", pbtree(b), __builtin_return_address(0));
2134} 2136}
2135 2137
2136/* Cache lookup */ 2138/* Cache lookup */
@@ -2216,7 +2218,6 @@ static int submit_partial_cache_hit(struct btree *b, struct btree_op *op,
2216 n->bi_end_io = bch_cache_read_endio; 2218 n->bi_end_io = bch_cache_read_endio;
2217 n->bi_private = &s->cl; 2219 n->bi_private = &s->cl;
2218 2220
2219 trace_bcache_cache_hit(n);
2220 __bch_submit_bbio(n, b->c); 2221 __bch_submit_bbio(n, b->c);
2221 } 2222 }
2222 2223
diff --git a/drivers/md/bcache/io.c b/drivers/md/bcache/io.c
index d285cd49104c..0f6d69658b61 100644
--- a/drivers/md/bcache/io.c
+++ b/drivers/md/bcache/io.c
@@ -9,6 +9,8 @@
9#include "bset.h" 9#include "bset.h"
10#include "debug.h" 10#include "debug.h"
11 11
12#include <linux/blkdev.h>
13
12static void bch_bi_idx_hack_endio(struct bio *bio, int error) 14static void bch_bi_idx_hack_endio(struct bio *bio, int error)
13{ 15{
14 struct bio *p = bio->bi_private; 16 struct bio *p = bio->bi_private;
diff --git a/drivers/md/bcache/journal.c b/drivers/md/bcache/journal.c
index 970d819d4350..5ca22149b749 100644
--- a/drivers/md/bcache/journal.c
+++ b/drivers/md/bcache/journal.c
@@ -9,6 +9,8 @@
9#include "debug.h" 9#include "debug.h"
10#include "request.h" 10#include "request.h"
11 11
12#include <trace/events/bcache.h>
13
12/* 14/*
13 * Journal replay/recovery: 15 * Journal replay/recovery:
14 * 16 *
@@ -300,7 +302,8 @@ int bch_journal_replay(struct cache_set *s, struct list_head *list,
300 for (k = i->j.start; 302 for (k = i->j.start;
301 k < end(&i->j); 303 k < end(&i->j);
302 k = bkey_next(k)) { 304 k = bkey_next(k)) {
303 pr_debug("%s", pkey(k)); 305 trace_bcache_journal_replay_key(k);
306
304 bkey_copy(op->keys.top, k); 307 bkey_copy(op->keys.top, k);
305 bch_keylist_push(&op->keys); 308 bch_keylist_push(&op->keys);
306 309
@@ -712,7 +715,8 @@ void bch_journal(struct closure *cl)
712 spin_lock(&c->journal.lock); 715 spin_lock(&c->journal.lock);
713 716
714 if (journal_full(&c->journal)) { 717 if (journal_full(&c->journal)) {
715 /* XXX: tracepoint */ 718 trace_bcache_journal_full(c);
719
716 closure_wait(&c->journal.wait, cl); 720 closure_wait(&c->journal.wait, cl);
717 721
718 journal_reclaim(c); 722 journal_reclaim(c);
@@ -728,13 +732,15 @@ void bch_journal(struct closure *cl)
728 732
729 if (b * c->sb.block_size > PAGE_SECTORS << JSET_BITS || 733 if (b * c->sb.block_size > PAGE_SECTORS << JSET_BITS ||
730 b > c->journal.blocks_free) { 734 b > c->journal.blocks_free) {
731 /* XXX: If we were inserting so many keys that they won't fit in 735 trace_bcache_journal_entry_full(c);
736
737 /*
738 * XXX: If we were inserting so many keys that they won't fit in
732 * an _empty_ journal write, we'll deadlock. For now, handle 739 * an _empty_ journal write, we'll deadlock. For now, handle
733 * this in bch_keylist_realloc() - but something to think about. 740 * this in bch_keylist_realloc() - but something to think about.
734 */ 741 */
735 BUG_ON(!w->data->keys); 742 BUG_ON(!w->data->keys);
736 743
737 /* XXX: tracepoint */
738 BUG_ON(!closure_wait(&w->wait, cl)); 744 BUG_ON(!closure_wait(&w->wait, cl));
739 745
740 closure_flush(&c->journal.io); 746 closure_flush(&c->journal.io);
diff --git a/drivers/md/bcache/movinggc.c b/drivers/md/bcache/movinggc.c
index 8589512c972e..04f6b97ffda6 100644
--- a/drivers/md/bcache/movinggc.c
+++ b/drivers/md/bcache/movinggc.c
@@ -9,6 +9,8 @@
9#include "debug.h" 9#include "debug.h"
10#include "request.h" 10#include "request.h"
11 11
12#include <trace/events/bcache.h>
13
12struct moving_io { 14struct moving_io {
13 struct keybuf_key *w; 15 struct keybuf_key *w;
14 struct search s; 16 struct search s;
@@ -49,9 +51,8 @@ static void write_moving_finish(struct closure *cl)
49 while (bv-- != bio->bi_io_vec) 51 while (bv-- != bio->bi_io_vec)
50 __free_page(bv->bv_page); 52 __free_page(bv->bv_page);
51 53
52 pr_debug("%s %s", io->s.op.insert_collision 54 if (io->s.op.insert_collision)
53 ? "collision moving" : "moved", 55 trace_bcache_gc_copy_collision(&io->w->key);
54 pkey(&io->w->key));
55 56
56 bch_keybuf_del(&io->s.op.c->moving_gc_keys, io->w); 57 bch_keybuf_del(&io->s.op.c->moving_gc_keys, io->w);
57 58
@@ -94,8 +95,6 @@ static void write_moving(struct closure *cl)
94 struct moving_io *io = container_of(s, struct moving_io, s); 95 struct moving_io *io = container_of(s, struct moving_io, s);
95 96
96 if (!s->error) { 97 if (!s->error) {
97 trace_bcache_write_moving(&io->bio.bio);
98
99 moving_init(io); 98 moving_init(io);
100 99
101 io->bio.bio.bi_sector = KEY_START(&io->w->key); 100 io->bio.bio.bi_sector = KEY_START(&io->w->key);
@@ -122,7 +121,6 @@ static void read_moving_submit(struct closure *cl)
122 struct moving_io *io = container_of(s, struct moving_io, s); 121 struct moving_io *io = container_of(s, struct moving_io, s);
123 struct bio *bio = &io->bio.bio; 122 struct bio *bio = &io->bio.bio;
124 123
125 trace_bcache_read_moving(bio);
126 bch_submit_bbio(bio, s->op.c, &io->w->key, 0); 124 bch_submit_bbio(bio, s->op.c, &io->w->key, 0);
127 125
128 continue_at(cl, write_moving, bch_gc_wq); 126 continue_at(cl, write_moving, bch_gc_wq);
@@ -162,7 +160,7 @@ static void read_moving(struct closure *cl)
162 if (bch_bio_alloc_pages(bio, GFP_KERNEL)) 160 if (bch_bio_alloc_pages(bio, GFP_KERNEL))
163 goto err; 161 goto err;
164 162
165 pr_debug("%s", pkey(&w->key)); 163 trace_bcache_gc_copy(&w->key);
166 164
167 closure_call(&io->s.cl, read_moving_submit, NULL, &c->gc.cl); 165 closure_call(&io->s.cl, read_moving_submit, NULL, &c->gc.cl);
168 166
diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c
index e5ff12e52d5b..695469958c1e 100644
--- a/drivers/md/bcache/request.c
+++ b/drivers/md/bcache/request.c
@@ -530,10 +530,9 @@ static void bch_insert_data_loop(struct closure *cl)
530 if (KEY_CSUM(k)) 530 if (KEY_CSUM(k))
531 bio_csum(n, k); 531 bio_csum(n, k);
532 532
533 pr_debug("%s", pkey(k)); 533 trace_bcache_cache_insert(k);
534 bch_keylist_push(&op->keys); 534 bch_keylist_push(&op->keys);
535 535
536 trace_bcache_cache_insert(n, n->bi_sector, n->bi_bdev);
537 n->bi_rw |= REQ_WRITE; 536 n->bi_rw |= REQ_WRITE;
538 bch_submit_bbio(n, op->c, k, 0); 537 bch_submit_bbio(n, op->c, k, 0);
539 } while (n != bio); 538 } while (n != bio);
@@ -784,11 +783,8 @@ static void request_read_error(struct closure *cl)
784 int i; 783 int i;
785 784
786 if (s->recoverable) { 785 if (s->recoverable) {
787 /* The cache read failed, but we can retry from the backing 786 /* Retry from the backing device: */
788 * device. 787 trace_bcache_read_retry(s->orig_bio);
789 */
790 pr_debug("recovering at sector %llu",
791 (uint64_t) s->orig_bio->bi_sector);
792 788
793 s->error = 0; 789 s->error = 0;
794 bv = s->bio.bio.bi_io_vec; 790 bv = s->bio.bio.bi_io_vec;
@@ -806,7 +802,6 @@ static void request_read_error(struct closure *cl)
806 802
807 /* XXX: invalidate cache */ 803 /* XXX: invalidate cache */
808 804
809 trace_bcache_read_retry(&s->bio.bio);
810 closure_bio_submit(&s->bio.bio, &s->cl, s->d); 805 closure_bio_submit(&s->bio.bio, &s->cl, s->d);
811 } 806 }
812 807
@@ -899,6 +894,7 @@ static void request_read_done_bh(struct closure *cl)
899 struct cached_dev *dc = container_of(s->d, struct cached_dev, disk); 894 struct cached_dev *dc = container_of(s->d, struct cached_dev, disk);
900 895
901 bch_mark_cache_accounting(s, !s->cache_miss, s->op.skip); 896 bch_mark_cache_accounting(s, !s->cache_miss, s->op.skip);
897 trace_bcache_read(s->orig_bio, !s->cache_miss, s->op.skip);
902 898
903 if (s->error) 899 if (s->error)
904 continue_at_nobarrier(cl, request_read_error, bcache_wq); 900 continue_at_nobarrier(cl, request_read_error, bcache_wq);
@@ -969,7 +965,6 @@ static int cached_dev_cache_miss(struct btree *b, struct search *s,
969 s->cache_miss = miss; 965 s->cache_miss = miss;
970 bio_get(s->op.cache_bio); 966 bio_get(s->op.cache_bio);
971 967
972 trace_bcache_cache_miss(s->orig_bio);
973 closure_bio_submit(s->op.cache_bio, &s->cl, s->d); 968 closure_bio_submit(s->op.cache_bio, &s->cl, s->d);
974 969
975 return ret; 970 return ret;
@@ -1040,15 +1035,15 @@ static void request_write(struct cached_dev *dc, struct search *s)
1040 if (should_writeback(dc, s->orig_bio)) 1035 if (should_writeback(dc, s->orig_bio))
1041 s->writeback = true; 1036 s->writeback = true;
1042 1037
1038 trace_bcache_write(s->orig_bio, s->writeback, s->op.skip);
1039
1043 if (!s->writeback) { 1040 if (!s->writeback) {
1044 s->op.cache_bio = bio_clone_bioset(bio, GFP_NOIO, 1041 s->op.cache_bio = bio_clone_bioset(bio, GFP_NOIO,
1045 dc->disk.bio_split); 1042 dc->disk.bio_split);
1046 1043
1047 trace_bcache_writethrough(s->orig_bio);
1048 closure_bio_submit(bio, cl, s->d); 1044 closure_bio_submit(bio, cl, s->d);
1049 } else { 1045 } else {
1050 s->op.cache_bio = bio; 1046 s->op.cache_bio = bio;
1051 trace_bcache_writeback(s->orig_bio);
1052 bch_writeback_add(dc, bio_sectors(bio)); 1047 bch_writeback_add(dc, bio_sectors(bio));
1053 } 1048 }
1054out: 1049out:
@@ -1058,7 +1053,6 @@ skip:
1058 s->op.skip = true; 1053 s->op.skip = true;
1059 s->op.cache_bio = s->orig_bio; 1054 s->op.cache_bio = s->orig_bio;
1060 bio_get(s->op.cache_bio); 1055 bio_get(s->op.cache_bio);
1061 trace_bcache_write_skip(s->orig_bio);
1062 1056
1063 if ((bio->bi_rw & REQ_DISCARD) && 1057 if ((bio->bi_rw & REQ_DISCARD) &&
1064 !blk_queue_discard(bdev_get_queue(dc->bdev))) 1058 !blk_queue_discard(bdev_get_queue(dc->bdev)))
@@ -1088,9 +1082,10 @@ static void request_nodata(struct cached_dev *dc, struct search *s)
1088 1082
1089/* Cached devices - read & write stuff */ 1083/* Cached devices - read & write stuff */
1090 1084
1091int bch_get_congested(struct cache_set *c) 1085unsigned bch_get_congested(struct cache_set *c)
1092{ 1086{
1093 int i; 1087 int i;
1088 long rand;
1094 1089
1095 if (!c->congested_read_threshold_us && 1090 if (!c->congested_read_threshold_us &&
1096 !c->congested_write_threshold_us) 1091 !c->congested_write_threshold_us)
@@ -1106,7 +1101,13 @@ int bch_get_congested(struct cache_set *c)
1106 1101
1107 i += CONGESTED_MAX; 1102 i += CONGESTED_MAX;
1108 1103
1109 return i <= 0 ? 1 : fract_exp_two(i, 6); 1104 if (i > 0)
1105 i = fract_exp_two(i, 6);
1106
1107 rand = get_random_int();
1108 i -= bitmap_weight(&rand, BITS_PER_LONG);
1109
1110 return i > 0 ? i : 1;
1110} 1111}
1111 1112
1112static void add_sequential(struct task_struct *t) 1113static void add_sequential(struct task_struct *t)
@@ -1126,10 +1127,8 @@ static void check_should_skip(struct cached_dev *dc, struct search *s)
1126{ 1127{
1127 struct cache_set *c = s->op.c; 1128 struct cache_set *c = s->op.c;
1128 struct bio *bio = &s->bio.bio; 1129 struct bio *bio = &s->bio.bio;
1129
1130 long rand;
1131 int cutoff = bch_get_congested(c);
1132 unsigned mode = cache_mode(dc, bio); 1130 unsigned mode = cache_mode(dc, bio);
1131 unsigned sectors, congested = bch_get_congested(c);
1133 1132
1134 if (atomic_read(&dc->disk.detaching) || 1133 if (atomic_read(&dc->disk.detaching) ||
1135 c->gc_stats.in_use > CUTOFF_CACHE_ADD || 1134 c->gc_stats.in_use > CUTOFF_CACHE_ADD ||
@@ -1147,17 +1146,14 @@ static void check_should_skip(struct cached_dev *dc, struct search *s)
1147 goto skip; 1146 goto skip;
1148 } 1147 }
1149 1148
1150 if (!cutoff) { 1149 if (!congested && !dc->sequential_cutoff)
1151 cutoff = dc->sequential_cutoff >> 9; 1150 goto rescale;
1152 1151
1153 if (!cutoff) 1152 if (!congested &&
1154 goto rescale; 1153 mode == CACHE_MODE_WRITEBACK &&
1155 1154 (bio->bi_rw & REQ_WRITE) &&
1156 if (mode == CACHE_MODE_WRITEBACK && 1155 (bio->bi_rw & REQ_SYNC))
1157 (bio->bi_rw & REQ_WRITE) && 1156 goto rescale;
1158 (bio->bi_rw & REQ_SYNC))
1159 goto rescale;
1160 }
1161 1157
1162 if (dc->sequential_merge) { 1158 if (dc->sequential_merge) {
1163 struct io *i; 1159 struct io *i;
@@ -1192,12 +1188,19 @@ found:
1192 add_sequential(s->task); 1188 add_sequential(s->task);
1193 } 1189 }
1194 1190
1195 rand = get_random_int(); 1191 sectors = max(s->task->sequential_io,
1196 cutoff -= bitmap_weight(&rand, BITS_PER_LONG); 1192 s->task->sequential_io_avg) >> 9;
1197 1193
1198 if (cutoff <= (int) (max(s->task->sequential_io, 1194 if (dc->sequential_cutoff &&
1199 s->task->sequential_io_avg) >> 9)) 1195 sectors >= dc->sequential_cutoff >> 9) {
1196 trace_bcache_bypass_sequential(s->orig_bio);
1200 goto skip; 1197 goto skip;
1198 }
1199
1200 if (congested && sectors >= congested) {
1201 trace_bcache_bypass_congested(s->orig_bio);
1202 goto skip;
1203 }
1201 1204
1202rescale: 1205rescale:
1203 bch_rescale_priorities(c, bio_sectors(bio)); 1206 bch_rescale_priorities(c, bio_sectors(bio));
diff --git a/drivers/md/bcache/request.h b/drivers/md/bcache/request.h
index 254d9ab5707c..57dc4784f4f4 100644
--- a/drivers/md/bcache/request.h
+++ b/drivers/md/bcache/request.h
@@ -30,7 +30,7 @@ struct search {
30}; 30};
31 31
32void bch_cache_read_endio(struct bio *, int); 32void bch_cache_read_endio(struct bio *, int);
33int bch_get_congested(struct cache_set *); 33unsigned bch_get_congested(struct cache_set *);
34void bch_insert_data(struct closure *cl); 34void bch_insert_data(struct closure *cl);
35void bch_btree_insert_async(struct closure *); 35void bch_btree_insert_async(struct closure *);
36void bch_cache_read_endio(struct bio *, int); 36void bch_cache_read_endio(struct bio *, int);
diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
index e53f89988b08..47bc13745068 100644
--- a/drivers/md/bcache/super.c
+++ b/drivers/md/bcache/super.c
@@ -11,6 +11,7 @@
11#include "debug.h" 11#include "debug.h"
12#include "request.h" 12#include "request.h"
13 13
14#include <linux/blkdev.h>
14#include <linux/buffer_head.h> 15#include <linux/buffer_head.h>
15#include <linux/debugfs.h> 16#include <linux/debugfs.h>
16#include <linux/genhd.h> 17#include <linux/genhd.h>
@@ -543,7 +544,6 @@ void bch_prio_write(struct cache *ca)
543 544
544 pr_debug("free %zu, free_inc %zu, unused %zu", fifo_used(&ca->free), 545 pr_debug("free %zu, free_inc %zu, unused %zu", fifo_used(&ca->free),
545 fifo_used(&ca->free_inc), fifo_used(&ca->unused)); 546 fifo_used(&ca->free_inc), fifo_used(&ca->unused));
546 blktrace_msg(ca, "Starting priorities: " buckets_free(ca));
547 547
548 for (i = prio_buckets(ca) - 1; i >= 0; --i) { 548 for (i = prio_buckets(ca) - 1; i >= 0; --i) {
549 long bucket; 549 long bucket;
diff --git a/drivers/md/bcache/sysfs.c b/drivers/md/bcache/sysfs.c
index 29228b8a6ffe..f5c2d8695230 100644
--- a/drivers/md/bcache/sysfs.c
+++ b/drivers/md/bcache/sysfs.c
@@ -10,6 +10,7 @@
10#include "btree.h" 10#include "btree.h"
11#include "request.h" 11#include "request.h"
12 12
13#include <linux/blkdev.h>
13#include <linux/sort.h> 14#include <linux/sort.h>
14 15
15static const char * const cache_replacement_policies[] = { 16static const char * const cache_replacement_policies[] = {
diff --git a/drivers/md/bcache/trace.c b/drivers/md/bcache/trace.c
index 983f9bb411bc..7f4f38aa16ae 100644
--- a/drivers/md/bcache/trace.c
+++ b/drivers/md/bcache/trace.c
@@ -2,6 +2,7 @@
2#include "btree.h" 2#include "btree.h"
3#include "request.h" 3#include "request.h"
4 4
5#include <linux/blktrace_api.h>
5#include <linux/module.h> 6#include <linux/module.h>
6 7
7#define CREATE_TRACE_POINTS 8#define CREATE_TRACE_POINTS
@@ -9,18 +10,42 @@
9 10
10EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_request_start); 11EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_request_start);
11EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_request_end); 12EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_request_end);
12EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_passthrough); 13
13EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_cache_hit); 14EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_bypass_sequential);
14EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_cache_miss); 15EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_bypass_congested);
16
17EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_read);
18EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_write);
15EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_read_retry); 19EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_read_retry);
16EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_writethrough); 20
17EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_writeback); 21EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_cache_insert);
18EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_write_skip); 22
23EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_journal_replay_key);
24EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_journal_write);
25EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_journal_full);
26EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_journal_entry_full);
27
28EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_btree_cache_cannibalize);
29
19EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_btree_read); 30EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_btree_read);
20EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_btree_write); 31EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_btree_write);
21EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_write_dirty); 32
22EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_read_dirty); 33EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_btree_node_alloc);
23EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_journal_write); 34EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_btree_node_alloc_fail);
24EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_cache_insert); 35EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_btree_node_free);
36
37EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_btree_gc_coalesce);
25EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_gc_start); 38EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_gc_start);
26EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_gc_end); 39EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_gc_end);
40EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_gc_copy);
41EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_gc_copy_collision);
42
43EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_btree_node_split);
44EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_btree_node_compact);
45EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_btree_set_root);
46
47EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_alloc_invalidate);
48EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_alloc_fail);
49
50EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_writeback);
51EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_writeback_collision);
diff --git a/drivers/md/bcache/util.h b/drivers/md/bcache/util.h
index 577393e38c3a..e02780545f12 100644
--- a/drivers/md/bcache/util.h
+++ b/drivers/md/bcache/util.h
@@ -15,8 +15,6 @@
15 15
16struct closure; 16struct closure;
17 17
18#include <trace/events/bcache.h>
19
20#ifdef CONFIG_BCACHE_EDEBUG 18#ifdef CONFIG_BCACHE_EDEBUG
21 19
22#define atomic_dec_bug(v) BUG_ON(atomic_dec_return(v) < 0) 20#define atomic_dec_bug(v) BUG_ON(atomic_dec_return(v) < 0)
diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c
index 2714ed3991d1..82f6d4577be2 100644
--- a/drivers/md/bcache/writeback.c
+++ b/drivers/md/bcache/writeback.c
@@ -10,6 +10,8 @@
10#include "btree.h" 10#include "btree.h"
11#include "debug.h" 11#include "debug.h"
12 12
13#include <trace/events/bcache.h>
14
13static struct workqueue_struct *dirty_wq; 15static struct workqueue_struct *dirty_wq;
14 16
15static void read_dirty(struct closure *); 17static void read_dirty(struct closure *);
@@ -236,10 +238,12 @@ static void write_dirty_finish(struct closure *cl)
236 for (i = 0; i < KEY_PTRS(&w->key); i++) 238 for (i = 0; i < KEY_PTRS(&w->key); i++)
237 atomic_inc(&PTR_BUCKET(dc->disk.c, &w->key, i)->pin); 239 atomic_inc(&PTR_BUCKET(dc->disk.c, &w->key, i)->pin);
238 240
239 pr_debug("clearing %s", pkey(&w->key));
240 bch_btree_insert(&op, dc->disk.c); 241 bch_btree_insert(&op, dc->disk.c);
241 closure_sync(&op.cl); 242 closure_sync(&op.cl);
242 243
244 if (op.insert_collision)
245 trace_bcache_writeback_collision(&w->key);
246
243 atomic_long_inc(op.insert_collision 247 atomic_long_inc(op.insert_collision
244 ? &dc->disk.c->writeback_keys_failed 248 ? &dc->disk.c->writeback_keys_failed
245 : &dc->disk.c->writeback_keys_done); 249 : &dc->disk.c->writeback_keys_done);
@@ -275,7 +279,6 @@ static void write_dirty(struct closure *cl)
275 io->bio.bi_bdev = io->dc->bdev; 279 io->bio.bi_bdev = io->dc->bdev;
276 io->bio.bi_end_io = dirty_endio; 280 io->bio.bi_end_io = dirty_endio;
277 281
278 trace_bcache_write_dirty(&io->bio);
279 closure_bio_submit(&io->bio, cl, &io->dc->disk); 282 closure_bio_submit(&io->bio, cl, &io->dc->disk);
280 283
281 continue_at(cl, write_dirty_finish, dirty_wq); 284 continue_at(cl, write_dirty_finish, dirty_wq);
@@ -296,7 +299,6 @@ static void read_dirty_submit(struct closure *cl)
296{ 299{
297 struct dirty_io *io = container_of(cl, struct dirty_io, cl); 300 struct dirty_io *io = container_of(cl, struct dirty_io, cl);
298 301
299 trace_bcache_read_dirty(&io->bio);
300 closure_bio_submit(&io->bio, cl, &io->dc->disk); 302 closure_bio_submit(&io->bio, cl, &io->dc->disk);
301 303
302 continue_at(cl, write_dirty, dirty_wq); 304 continue_at(cl, write_dirty, dirty_wq);
@@ -352,7 +354,7 @@ static void read_dirty(struct closure *cl)
352 if (bch_bio_alloc_pages(&io->bio, GFP_KERNEL)) 354 if (bch_bio_alloc_pages(&io->bio, GFP_KERNEL))
353 goto err_free; 355 goto err_free;
354 356
355 pr_debug("%s", pkey(&w->key)); 357 trace_bcache_writeback(&w->key);
356 358
357 closure_call(&io->cl, read_dirty_submit, NULL, &dc->disk.cl); 359 closure_call(&io->cl, read_dirty_submit, NULL, &dc->disk.cl);
358 360