aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/md
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2014-08-14 11:10:21 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2014-08-14 11:10:21 -0400
commitd429a3639ca967ce2f35e3e8d4e70caec7149ded (patch)
treecad1e5602551b6a744f63ef062de2c2e21cfe39a /drivers/md
parent4a319a490ca59a746b3d36768c0e29ee19832366 (diff)
parent99d540018caa920b7a54e2d3048f1dff530b294b (diff)
Merge branch 'for-3.17/drivers' of git://git.kernel.dk/linux-block
Pull block driver changes from Jens Axboe: "Nothing out of the ordinary here, this pull request contains: - A big round of fixes for bcache from Kent Overstreet, Slava Pestov, and Surbhi Palande. No new features, just a lot of fixes. - The usual round of drbd updates from Andreas Gruenbacher, Lars Ellenberg, and Philipp Reisner. - virtio_blk was converted to blk-mq back in 3.13, but now Ming Lei has taken it one step further and added support for actually using more than one queue. - Addition of an explicit SG_FLAG_Q_AT_HEAD for block/bsg, to compliment the the default behavior of adding to the tail of the queue. From Douglas Gilbert" * 'for-3.17/drivers' of git://git.kernel.dk/linux-block: (86 commits) bcache: Drop unneeded blk_sync_queue() calls bcache: add mutex lock for bch_is_open bcache: Correct printing of btree_gc_max_duration_ms bcache: try to set b->parent properly bcache: fix memory corruption in init error path bcache: fix crash with incomplete cache set bcache: Fix more early shutdown bugs bcache: fix use-after-free in btree_gc_coalesce() bcache: Fix an infinite loop in journal replay bcache: fix crash in bcache_btree_node_alloc_fail tracepoint bcache: bcache_write tracepoint was crashing bcache: fix typo in bch_bkey_equal_header bcache: Allocate bounce buffers with GFP_NOWAIT bcache: Make sure to pass GFP_WAIT to mempool_alloc() bcache: fix uninterruptible sleep in writeback thread bcache: wait for buckets when allocating new btree root bcache: fix crash on shutdown in passthrough mode bcache: fix lockdep warnings on shutdown bcache allocator: send discards with correct size bcache: Fix to remove the rcu_sched stalls. ...
Diffstat (limited to 'drivers/md')
-rw-r--r--drivers/md/bcache/alloc.c2
-rw-r--r--drivers/md/bcache/bcache.h4
-rw-r--r--drivers/md/bcache/bset.c2
-rw-r--r--drivers/md/bcache/bset.h2
-rw-r--r--drivers/md/bcache/btree.c50
-rw-r--r--drivers/md/bcache/btree.h5
-rw-r--r--drivers/md/bcache/extents.c13
-rw-r--r--drivers/md/bcache/extents.h1
-rw-r--r--drivers/md/bcache/journal.c24
-rw-r--r--drivers/md/bcache/request.c3
-rw-r--r--drivers/md/bcache/super.c57
-rw-r--r--drivers/md/bcache/util.h4
-rw-r--r--drivers/md/bcache/writeback.c14
-rw-r--r--drivers/md/bcache/writeback.h3
14 files changed, 119 insertions, 65 deletions
diff --git a/drivers/md/bcache/alloc.c b/drivers/md/bcache/alloc.c
index 443d03fbac47..8eeab72b93e2 100644
--- a/drivers/md/bcache/alloc.c
+++ b/drivers/md/bcache/alloc.c
@@ -331,7 +331,7 @@ static int bch_allocator_thread(void *arg)
331 mutex_unlock(&ca->set->bucket_lock); 331 mutex_unlock(&ca->set->bucket_lock);
332 blkdev_issue_discard(ca->bdev, 332 blkdev_issue_discard(ca->bdev,
333 bucket_to_sector(ca->set, bucket), 333 bucket_to_sector(ca->set, bucket),
334 ca->sb.block_size, GFP_KERNEL, 0); 334 ca->sb.bucket_size, GFP_KERNEL, 0);
335 mutex_lock(&ca->set->bucket_lock); 335 mutex_lock(&ca->set->bucket_lock);
336 } 336 }
337 337
diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h
index d2ebcf323094..04f7bc28ef83 100644
--- a/drivers/md/bcache/bcache.h
+++ b/drivers/md/bcache/bcache.h
@@ -477,9 +477,13 @@ struct gc_stat {
477 * CACHE_SET_STOPPING always gets set first when we're closing down a cache set; 477 * CACHE_SET_STOPPING always gets set first when we're closing down a cache set;
478 * we'll continue to run normally for awhile with CACHE_SET_STOPPING set (i.e. 478 * we'll continue to run normally for awhile with CACHE_SET_STOPPING set (i.e.
479 * flushing dirty data). 479 * flushing dirty data).
480 *
481 * CACHE_SET_RUNNING means all cache devices have been registered and journal
482 * replay is complete.
480 */ 483 */
481#define CACHE_SET_UNREGISTERING 0 484#define CACHE_SET_UNREGISTERING 0
482#define CACHE_SET_STOPPING 1 485#define CACHE_SET_STOPPING 1
486#define CACHE_SET_RUNNING 2
483 487
484struct cache_set { 488struct cache_set {
485 struct closure cl; 489 struct closure cl;
diff --git a/drivers/md/bcache/bset.c b/drivers/md/bcache/bset.c
index 545416415305..646fe85261c1 100644
--- a/drivers/md/bcache/bset.c
+++ b/drivers/md/bcache/bset.c
@@ -1182,7 +1182,7 @@ static void __btree_sort(struct btree_keys *b, struct btree_iter *iter,
1182{ 1182{
1183 uint64_t start_time; 1183 uint64_t start_time;
1184 bool used_mempool = false; 1184 bool used_mempool = false;
1185 struct bset *out = (void *) __get_free_pages(__GFP_NOWARN|GFP_NOIO, 1185 struct bset *out = (void *) __get_free_pages(__GFP_NOWARN|GFP_NOWAIT,
1186 order); 1186 order);
1187 if (!out) { 1187 if (!out) {
1188 struct page *outp; 1188 struct page *outp;
diff --git a/drivers/md/bcache/bset.h b/drivers/md/bcache/bset.h
index 5f6728d5d4dd..ae964624efb2 100644
--- a/drivers/md/bcache/bset.h
+++ b/drivers/md/bcache/bset.h
@@ -453,7 +453,7 @@ static inline bool bch_bkey_equal_header(const struct bkey *l,
453{ 453{
454 return (KEY_DIRTY(l) == KEY_DIRTY(r) && 454 return (KEY_DIRTY(l) == KEY_DIRTY(r) &&
455 KEY_PTRS(l) == KEY_PTRS(r) && 455 KEY_PTRS(l) == KEY_PTRS(r) &&
456 KEY_CSUM(l) == KEY_CSUM(l)); 456 KEY_CSUM(l) == KEY_CSUM(r));
457} 457}
458 458
459/* Keylists */ 459/* Keylists */
diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c
index 7347b6100961..00cde40db572 100644
--- a/drivers/md/bcache/btree.c
+++ b/drivers/md/bcache/btree.c
@@ -117,9 +117,9 @@
117({ \ 117({ \
118 int _r, l = (b)->level - 1; \ 118 int _r, l = (b)->level - 1; \
119 bool _w = l <= (op)->lock; \ 119 bool _w = l <= (op)->lock; \
120 struct btree *_child = bch_btree_node_get((b)->c, op, key, l, _w);\ 120 struct btree *_child = bch_btree_node_get((b)->c, op, key, l, \
121 _w, b); \
121 if (!IS_ERR(_child)) { \ 122 if (!IS_ERR(_child)) { \
122 _child->parent = (b); \
123 _r = bch_btree_ ## fn(_child, op, ##__VA_ARGS__); \ 123 _r = bch_btree_ ## fn(_child, op, ##__VA_ARGS__); \
124 rw_unlock(_w, _child); \ 124 rw_unlock(_w, _child); \
125 } else \ 125 } else \
@@ -142,7 +142,6 @@
142 rw_lock(_w, _b, _b->level); \ 142 rw_lock(_w, _b, _b->level); \
143 if (_b == (c)->root && \ 143 if (_b == (c)->root && \
144 _w == insert_lock(op, _b)) { \ 144 _w == insert_lock(op, _b)) { \
145 _b->parent = NULL; \
146 _r = bch_btree_ ## fn(_b, op, ##__VA_ARGS__); \ 145 _r = bch_btree_ ## fn(_b, op, ##__VA_ARGS__); \
147 } \ 146 } \
148 rw_unlock(_w, _b); \ 147 rw_unlock(_w, _b); \
@@ -202,7 +201,7 @@ void bch_btree_node_read_done(struct btree *b)
202 struct bset *i = btree_bset_first(b); 201 struct bset *i = btree_bset_first(b);
203 struct btree_iter *iter; 202 struct btree_iter *iter;
204 203
205 iter = mempool_alloc(b->c->fill_iter, GFP_NOWAIT); 204 iter = mempool_alloc(b->c->fill_iter, GFP_NOIO);
206 iter->size = b->c->sb.bucket_size / b->c->sb.block_size; 205 iter->size = b->c->sb.bucket_size / b->c->sb.block_size;
207 iter->used = 0; 206 iter->used = 0;
208 207
@@ -421,7 +420,7 @@ static void do_btree_node_write(struct btree *b)
421 SET_PTR_OFFSET(&k.key, 0, PTR_OFFSET(&k.key, 0) + 420 SET_PTR_OFFSET(&k.key, 0, PTR_OFFSET(&k.key, 0) +
422 bset_sector_offset(&b->keys, i)); 421 bset_sector_offset(&b->keys, i));
423 422
424 if (!bio_alloc_pages(b->bio, GFP_NOIO)) { 423 if (!bio_alloc_pages(b->bio, __GFP_NOWARN|GFP_NOWAIT)) {
425 int j; 424 int j;
426 struct bio_vec *bv; 425 struct bio_vec *bv;
427 void *base = (void *) ((unsigned long) i & ~(PAGE_SIZE - 1)); 426 void *base = (void *) ((unsigned long) i & ~(PAGE_SIZE - 1));
@@ -967,7 +966,8 @@ err:
967 * level and op->lock. 966 * level and op->lock.
968 */ 967 */
969struct btree *bch_btree_node_get(struct cache_set *c, struct btree_op *op, 968struct btree *bch_btree_node_get(struct cache_set *c, struct btree_op *op,
970 struct bkey *k, int level, bool write) 969 struct bkey *k, int level, bool write,
970 struct btree *parent)
971{ 971{
972 int i = 0; 972 int i = 0;
973 struct btree *b; 973 struct btree *b;
@@ -1002,6 +1002,7 @@ retry:
1002 BUG_ON(b->level != level); 1002 BUG_ON(b->level != level);
1003 } 1003 }
1004 1004
1005 b->parent = parent;
1005 b->accessed = 1; 1006 b->accessed = 1;
1006 1007
1007 for (; i <= b->keys.nsets && b->keys.set[i].size; i++) { 1008 for (; i <= b->keys.nsets && b->keys.set[i].size; i++) {
@@ -1022,15 +1023,16 @@ retry:
1022 return b; 1023 return b;
1023} 1024}
1024 1025
1025static void btree_node_prefetch(struct cache_set *c, struct bkey *k, int level) 1026static void btree_node_prefetch(struct btree *parent, struct bkey *k)
1026{ 1027{
1027 struct btree *b; 1028 struct btree *b;
1028 1029
1029 mutex_lock(&c->bucket_lock); 1030 mutex_lock(&parent->c->bucket_lock);
1030 b = mca_alloc(c, NULL, k, level); 1031 b = mca_alloc(parent->c, NULL, k, parent->level - 1);
1031 mutex_unlock(&c->bucket_lock); 1032 mutex_unlock(&parent->c->bucket_lock);
1032 1033
1033 if (!IS_ERR_OR_NULL(b)) { 1034 if (!IS_ERR_OR_NULL(b)) {
1035 b->parent = parent;
1034 bch_btree_node_read(b); 1036 bch_btree_node_read(b);
1035 rw_unlock(true, b); 1037 rw_unlock(true, b);
1036 } 1038 }
@@ -1060,15 +1062,16 @@ static void btree_node_free(struct btree *b)
1060 mutex_unlock(&b->c->bucket_lock); 1062 mutex_unlock(&b->c->bucket_lock);
1061} 1063}
1062 1064
1063struct btree *bch_btree_node_alloc(struct cache_set *c, struct btree_op *op, 1065struct btree *__bch_btree_node_alloc(struct cache_set *c, struct btree_op *op,
1064 int level) 1066 int level, bool wait,
1067 struct btree *parent)
1065{ 1068{
1066 BKEY_PADDED(key) k; 1069 BKEY_PADDED(key) k;
1067 struct btree *b = ERR_PTR(-EAGAIN); 1070 struct btree *b = ERR_PTR(-EAGAIN);
1068 1071
1069 mutex_lock(&c->bucket_lock); 1072 mutex_lock(&c->bucket_lock);
1070retry: 1073retry:
1071 if (__bch_bucket_alloc_set(c, RESERVE_BTREE, &k.key, 1, op != NULL)) 1074 if (__bch_bucket_alloc_set(c, RESERVE_BTREE, &k.key, 1, wait))
1072 goto err; 1075 goto err;
1073 1076
1074 bkey_put(c, &k.key); 1077 bkey_put(c, &k.key);
@@ -1085,6 +1088,7 @@ retry:
1085 } 1088 }
1086 1089
1087 b->accessed = 1; 1090 b->accessed = 1;
1091 b->parent = parent;
1088 bch_bset_init_next(&b->keys, b->keys.set->data, bset_magic(&b->c->sb)); 1092 bch_bset_init_next(&b->keys, b->keys.set->data, bset_magic(&b->c->sb));
1089 1093
1090 mutex_unlock(&c->bucket_lock); 1094 mutex_unlock(&c->bucket_lock);
@@ -1096,14 +1100,21 @@ err_free:
1096err: 1100err:
1097 mutex_unlock(&c->bucket_lock); 1101 mutex_unlock(&c->bucket_lock);
1098 1102
1099 trace_bcache_btree_node_alloc_fail(b); 1103 trace_bcache_btree_node_alloc_fail(c);
1100 return b; 1104 return b;
1101} 1105}
1102 1106
1107static struct btree *bch_btree_node_alloc(struct cache_set *c,
1108 struct btree_op *op, int level,
1109 struct btree *parent)
1110{
1111 return __bch_btree_node_alloc(c, op, level, op != NULL, parent);
1112}
1113
1103static struct btree *btree_node_alloc_replacement(struct btree *b, 1114static struct btree *btree_node_alloc_replacement(struct btree *b,
1104 struct btree_op *op) 1115 struct btree_op *op)
1105{ 1116{
1106 struct btree *n = bch_btree_node_alloc(b->c, op, b->level); 1117 struct btree *n = bch_btree_node_alloc(b->c, op, b->level, b->parent);
1107 if (!IS_ERR_OR_NULL(n)) { 1118 if (!IS_ERR_OR_NULL(n)) {
1108 mutex_lock(&n->write_lock); 1119 mutex_lock(&n->write_lock);
1109 bch_btree_sort_into(&b->keys, &n->keys, &b->c->sort); 1120 bch_btree_sort_into(&b->keys, &n->keys, &b->c->sort);
@@ -1403,6 +1414,7 @@ static int btree_gc_coalesce(struct btree *b, struct btree_op *op,
1403 BUG_ON(btree_bset_first(new_nodes[0])->keys); 1414 BUG_ON(btree_bset_first(new_nodes[0])->keys);
1404 btree_node_free(new_nodes[0]); 1415 btree_node_free(new_nodes[0]);
1405 rw_unlock(true, new_nodes[0]); 1416 rw_unlock(true, new_nodes[0]);
1417 new_nodes[0] = NULL;
1406 1418
1407 for (i = 0; i < nodes; i++) { 1419 for (i = 0; i < nodes; i++) {
1408 if (__bch_keylist_realloc(&keylist, bkey_u64s(&r[i].b->key))) 1420 if (__bch_keylist_realloc(&keylist, bkey_u64s(&r[i].b->key)))
@@ -1516,7 +1528,7 @@ static int btree_gc_recurse(struct btree *b, struct btree_op *op,
1516 k = bch_btree_iter_next_filter(&iter, &b->keys, bch_ptr_bad); 1528 k = bch_btree_iter_next_filter(&iter, &b->keys, bch_ptr_bad);
1517 if (k) { 1529 if (k) {
1518 r->b = bch_btree_node_get(b->c, op, k, b->level - 1, 1530 r->b = bch_btree_node_get(b->c, op, k, b->level - 1,
1519 true); 1531 true, b);
1520 if (IS_ERR(r->b)) { 1532 if (IS_ERR(r->b)) {
1521 ret = PTR_ERR(r->b); 1533 ret = PTR_ERR(r->b);
1522 break; 1534 break;
@@ -1811,7 +1823,7 @@ static int bch_btree_check_recurse(struct btree *b, struct btree_op *op)
1811 k = bch_btree_iter_next_filter(&iter, &b->keys, 1823 k = bch_btree_iter_next_filter(&iter, &b->keys,
1812 bch_ptr_bad); 1824 bch_ptr_bad);
1813 if (k) 1825 if (k)
1814 btree_node_prefetch(b->c, k, b->level - 1); 1826 btree_node_prefetch(b, k);
1815 1827
1816 if (p) 1828 if (p)
1817 ret = btree(check_recurse, p, b, op); 1829 ret = btree(check_recurse, p, b, op);
@@ -1976,12 +1988,12 @@ static int btree_split(struct btree *b, struct btree_op *op,
1976 1988
1977 trace_bcache_btree_node_split(b, btree_bset_first(n1)->keys); 1989 trace_bcache_btree_node_split(b, btree_bset_first(n1)->keys);
1978 1990
1979 n2 = bch_btree_node_alloc(b->c, op, b->level); 1991 n2 = bch_btree_node_alloc(b->c, op, b->level, b->parent);
1980 if (IS_ERR(n2)) 1992 if (IS_ERR(n2))
1981 goto err_free1; 1993 goto err_free1;
1982 1994
1983 if (!b->parent) { 1995 if (!b->parent) {
1984 n3 = bch_btree_node_alloc(b->c, op, b->level + 1); 1996 n3 = bch_btree_node_alloc(b->c, op, b->level + 1, NULL);
1985 if (IS_ERR(n3)) 1997 if (IS_ERR(n3))
1986 goto err_free2; 1998 goto err_free2;
1987 } 1999 }
diff --git a/drivers/md/bcache/btree.h b/drivers/md/bcache/btree.h
index 91dfa5e69685..5c391fa01bed 100644
--- a/drivers/md/bcache/btree.h
+++ b/drivers/md/bcache/btree.h
@@ -242,9 +242,10 @@ void __bch_btree_node_write(struct btree *, struct closure *);
242void bch_btree_node_write(struct btree *, struct closure *); 242void bch_btree_node_write(struct btree *, struct closure *);
243 243
244void bch_btree_set_root(struct btree *); 244void bch_btree_set_root(struct btree *);
245struct btree *bch_btree_node_alloc(struct cache_set *, struct btree_op *, int); 245struct btree *__bch_btree_node_alloc(struct cache_set *, struct btree_op *,
246 int, bool, struct btree *);
246struct btree *bch_btree_node_get(struct cache_set *, struct btree_op *, 247struct btree *bch_btree_node_get(struct cache_set *, struct btree_op *,
247 struct bkey *, int, bool); 248 struct bkey *, int, bool, struct btree *);
248 249
249int bch_btree_insert_check_key(struct btree *, struct btree_op *, 250int bch_btree_insert_check_key(struct btree *, struct btree_op *,
250 struct bkey *); 251 struct bkey *);
diff --git a/drivers/md/bcache/extents.c b/drivers/md/bcache/extents.c
index 3a0de4cf9771..243de0bf15cd 100644
--- a/drivers/md/bcache/extents.c
+++ b/drivers/md/bcache/extents.c
@@ -474,9 +474,8 @@ out:
474 return false; 474 return false;
475} 475}
476 476
477static bool bch_extent_invalid(struct btree_keys *bk, const struct bkey *k) 477bool __bch_extent_invalid(struct cache_set *c, const struct bkey *k)
478{ 478{
479 struct btree *b = container_of(bk, struct btree, keys);
480 char buf[80]; 479 char buf[80];
481 480
482 if (!KEY_SIZE(k)) 481 if (!KEY_SIZE(k))
@@ -485,16 +484,22 @@ static bool bch_extent_invalid(struct btree_keys *bk, const struct bkey *k)
485 if (KEY_SIZE(k) > KEY_OFFSET(k)) 484 if (KEY_SIZE(k) > KEY_OFFSET(k))
486 goto bad; 485 goto bad;
487 486
488 if (__ptr_invalid(b->c, k)) 487 if (__ptr_invalid(c, k))
489 goto bad; 488 goto bad;
490 489
491 return false; 490 return false;
492bad: 491bad:
493 bch_extent_to_text(buf, sizeof(buf), k); 492 bch_extent_to_text(buf, sizeof(buf), k);
494 cache_bug(b->c, "spotted extent %s: %s", buf, bch_ptr_status(b->c, k)); 493 cache_bug(c, "spotted extent %s: %s", buf, bch_ptr_status(c, k));
495 return true; 494 return true;
496} 495}
497 496
497static bool bch_extent_invalid(struct btree_keys *bk, const struct bkey *k)
498{
499 struct btree *b = container_of(bk, struct btree, keys);
500 return __bch_extent_invalid(b->c, k);
501}
502
498static bool bch_extent_bad_expensive(struct btree *b, const struct bkey *k, 503static bool bch_extent_bad_expensive(struct btree *b, const struct bkey *k,
499 unsigned ptr) 504 unsigned ptr)
500{ 505{
diff --git a/drivers/md/bcache/extents.h b/drivers/md/bcache/extents.h
index e4e23409782d..e2ed54054e7a 100644
--- a/drivers/md/bcache/extents.h
+++ b/drivers/md/bcache/extents.h
@@ -9,5 +9,6 @@ struct cache_set;
9 9
10void bch_extent_to_text(char *, size_t, const struct bkey *); 10void bch_extent_to_text(char *, size_t, const struct bkey *);
11bool __bch_btree_ptr_invalid(struct cache_set *, const struct bkey *); 11bool __bch_btree_ptr_invalid(struct cache_set *, const struct bkey *);
12bool __bch_extent_invalid(struct cache_set *, const struct bkey *);
12 13
13#endif /* _BCACHE_EXTENTS_H */ 14#endif /* _BCACHE_EXTENTS_H */
diff --git a/drivers/md/bcache/journal.c b/drivers/md/bcache/journal.c
index 59e82021b5bb..fe080ad0e558 100644
--- a/drivers/md/bcache/journal.c
+++ b/drivers/md/bcache/journal.c
@@ -7,6 +7,7 @@
7#include "bcache.h" 7#include "bcache.h"
8#include "btree.h" 8#include "btree.h"
9#include "debug.h" 9#include "debug.h"
10#include "extents.h"
10 11
11#include <trace/events/bcache.h> 12#include <trace/events/bcache.h>
12 13
@@ -189,11 +190,15 @@ int bch_journal_read(struct cache_set *c, struct list_head *list)
189 if (read_bucket(l)) 190 if (read_bucket(l))
190 goto bsearch; 191 goto bsearch;
191 192
192 if (list_empty(list)) 193 /* no journal entries on this device? */
194 if (l == ca->sb.njournal_buckets)
193 continue; 195 continue;
194bsearch: 196bsearch:
197 BUG_ON(list_empty(list));
198
195 /* Binary search */ 199 /* Binary search */
196 m = r = find_next_bit(bitmap, ca->sb.njournal_buckets, l + 1); 200 m = l;
201 r = find_next_bit(bitmap, ca->sb.njournal_buckets, l + 1);
197 pr_debug("starting binary search, l %u r %u", l, r); 202 pr_debug("starting binary search, l %u r %u", l, r);
198 203
199 while (l + 1 < r) { 204 while (l + 1 < r) {
@@ -291,15 +296,16 @@ void bch_journal_mark(struct cache_set *c, struct list_head *list)
291 296
292 for (k = i->j.start; 297 for (k = i->j.start;
293 k < bset_bkey_last(&i->j); 298 k < bset_bkey_last(&i->j);
294 k = bkey_next(k)) { 299 k = bkey_next(k))
295 unsigned j; 300 if (!__bch_extent_invalid(c, k)) {
301 unsigned j;
296 302
297 for (j = 0; j < KEY_PTRS(k); j++) 303 for (j = 0; j < KEY_PTRS(k); j++)
298 if (ptr_available(c, k, j)) 304 if (ptr_available(c, k, j))
299 atomic_inc(&PTR_BUCKET(c, k, j)->pin); 305 atomic_inc(&PTR_BUCKET(c, k, j)->pin);
300 306
301 bch_initial_mark_key(c, 0, k); 307 bch_initial_mark_key(c, 0, k);
302 } 308 }
303 } 309 }
304} 310}
305 311
diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c
index 15fff4f68a7c..62e6e98186b5 100644
--- a/drivers/md/bcache/request.c
+++ b/drivers/md/bcache/request.c
@@ -311,7 +311,8 @@ void bch_data_insert(struct closure *cl)
311{ 311{
312 struct data_insert_op *op = container_of(cl, struct data_insert_op, cl); 312 struct data_insert_op *op = container_of(cl, struct data_insert_op, cl);
313 313
314 trace_bcache_write(op->bio, op->writeback, op->bypass); 314 trace_bcache_write(op->c, op->inode, op->bio,
315 op->writeback, op->bypass);
315 316
316 bch_keylist_init(&op->insert_keys); 317 bch_keylist_init(&op->insert_keys);
317 bio_get(op->bio); 318 bio_get(op->bio);
diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
index 926ded8ccbf5..d4713d098a39 100644
--- a/drivers/md/bcache/super.c
+++ b/drivers/md/bcache/super.c
@@ -733,8 +733,6 @@ static void bcache_device_detach(struct bcache_device *d)
733static void bcache_device_attach(struct bcache_device *d, struct cache_set *c, 733static void bcache_device_attach(struct bcache_device *d, struct cache_set *c,
734 unsigned id) 734 unsigned id)
735{ 735{
736 BUG_ON(test_bit(CACHE_SET_STOPPING, &c->flags));
737
738 d->id = id; 736 d->id = id;
739 d->c = c; 737 d->c = c;
740 c->devices[id] = d; 738 c->devices[id] = d;
@@ -927,6 +925,7 @@ static void cached_dev_detach_finish(struct work_struct *w)
927 list_move(&dc->list, &uncached_devices); 925 list_move(&dc->list, &uncached_devices);
928 926
929 clear_bit(BCACHE_DEV_DETACHING, &dc->disk.flags); 927 clear_bit(BCACHE_DEV_DETACHING, &dc->disk.flags);
928 clear_bit(BCACHE_DEV_UNLINK_DONE, &dc->disk.flags);
930 929
931 mutex_unlock(&bch_register_lock); 930 mutex_unlock(&bch_register_lock);
932 931
@@ -1041,6 +1040,9 @@ int bch_cached_dev_attach(struct cached_dev *dc, struct cache_set *c)
1041 */ 1040 */
1042 atomic_set(&dc->count, 1); 1041 atomic_set(&dc->count, 1);
1043 1042
1043 if (bch_cached_dev_writeback_start(dc))
1044 return -ENOMEM;
1045
1044 if (BDEV_STATE(&dc->sb) == BDEV_STATE_DIRTY) { 1046 if (BDEV_STATE(&dc->sb) == BDEV_STATE_DIRTY) {
1045 bch_sectors_dirty_init(dc); 1047 bch_sectors_dirty_init(dc);
1046 atomic_set(&dc->has_dirty, 1); 1048 atomic_set(&dc->has_dirty, 1);
@@ -1070,7 +1072,8 @@ static void cached_dev_free(struct closure *cl)
1070 struct cached_dev *dc = container_of(cl, struct cached_dev, disk.cl); 1072 struct cached_dev *dc = container_of(cl, struct cached_dev, disk.cl);
1071 1073
1072 cancel_delayed_work_sync(&dc->writeback_rate_update); 1074 cancel_delayed_work_sync(&dc->writeback_rate_update);
1073 kthread_stop(dc->writeback_thread); 1075 if (!IS_ERR_OR_NULL(dc->writeback_thread))
1076 kthread_stop(dc->writeback_thread);
1074 1077
1075 mutex_lock(&bch_register_lock); 1078 mutex_lock(&bch_register_lock);
1076 1079
@@ -1081,12 +1084,8 @@ static void cached_dev_free(struct closure *cl)
1081 1084
1082 mutex_unlock(&bch_register_lock); 1085 mutex_unlock(&bch_register_lock);
1083 1086
1084 if (!IS_ERR_OR_NULL(dc->bdev)) { 1087 if (!IS_ERR_OR_NULL(dc->bdev))
1085 if (dc->bdev->bd_disk)
1086 blk_sync_queue(bdev_get_queue(dc->bdev));
1087
1088 blkdev_put(dc->bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL); 1088 blkdev_put(dc->bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL);
1089 }
1090 1089
1091 wake_up(&unregister_wait); 1090 wake_up(&unregister_wait);
1092 1091
@@ -1213,7 +1212,9 @@ void bch_flash_dev_release(struct kobject *kobj)
1213static void flash_dev_free(struct closure *cl) 1212static void flash_dev_free(struct closure *cl)
1214{ 1213{
1215 struct bcache_device *d = container_of(cl, struct bcache_device, cl); 1214 struct bcache_device *d = container_of(cl, struct bcache_device, cl);
1215 mutex_lock(&bch_register_lock);
1216 bcache_device_free(d); 1216 bcache_device_free(d);
1217 mutex_unlock(&bch_register_lock);
1217 kobject_put(&d->kobj); 1218 kobject_put(&d->kobj);
1218} 1219}
1219 1220
@@ -1221,7 +1222,9 @@ static void flash_dev_flush(struct closure *cl)
1221{ 1222{
1222 struct bcache_device *d = container_of(cl, struct bcache_device, cl); 1223 struct bcache_device *d = container_of(cl, struct bcache_device, cl);
1223 1224
1225 mutex_lock(&bch_register_lock);
1224 bcache_device_unlink(d); 1226 bcache_device_unlink(d);
1227 mutex_unlock(&bch_register_lock);
1225 kobject_del(&d->kobj); 1228 kobject_del(&d->kobj);
1226 continue_at(cl, flash_dev_free, system_wq); 1229 continue_at(cl, flash_dev_free, system_wq);
1227} 1230}
@@ -1277,6 +1280,9 @@ int bch_flash_dev_create(struct cache_set *c, uint64_t size)
1277 if (test_bit(CACHE_SET_STOPPING, &c->flags)) 1280 if (test_bit(CACHE_SET_STOPPING, &c->flags))
1278 return -EINTR; 1281 return -EINTR;
1279 1282
1283 if (!test_bit(CACHE_SET_RUNNING, &c->flags))
1284 return -EPERM;
1285
1280 u = uuid_find_empty(c); 1286 u = uuid_find_empty(c);
1281 if (!u) { 1287 if (!u) {
1282 pr_err("Can't create volume, no room for UUID"); 1288 pr_err("Can't create volume, no room for UUID");
@@ -1346,8 +1352,11 @@ static void cache_set_free(struct closure *cl)
1346 bch_journal_free(c); 1352 bch_journal_free(c);
1347 1353
1348 for_each_cache(ca, c, i) 1354 for_each_cache(ca, c, i)
1349 if (ca) 1355 if (ca) {
1356 ca->set = NULL;
1357 c->cache[ca->sb.nr_this_dev] = NULL;
1350 kobject_put(&ca->kobj); 1358 kobject_put(&ca->kobj);
1359 }
1351 1360
1352 bch_bset_sort_state_free(&c->sort); 1361 bch_bset_sort_state_free(&c->sort);
1353 free_pages((unsigned long) c->uuids, ilog2(bucket_pages(c))); 1362 free_pages((unsigned long) c->uuids, ilog2(bucket_pages(c)));
@@ -1405,9 +1414,11 @@ static void cache_set_flush(struct closure *cl)
1405 if (ca->alloc_thread) 1414 if (ca->alloc_thread)
1406 kthread_stop(ca->alloc_thread); 1415 kthread_stop(ca->alloc_thread);
1407 1416
1408 cancel_delayed_work_sync(&c->journal.work); 1417 if (c->journal.cur) {
1409 /* flush last journal entry if needed */ 1418 cancel_delayed_work_sync(&c->journal.work);
1410 c->journal.work.work.func(&c->journal.work.work); 1419 /* flush last journal entry if needed */
1420 c->journal.work.work.func(&c->journal.work.work);
1421 }
1411 1422
1412 closure_return(cl); 1423 closure_return(cl);
1413} 1424}
@@ -1586,7 +1597,7 @@ static void run_cache_set(struct cache_set *c)
1586 goto err; 1597 goto err;
1587 1598
1588 err = "error reading btree root"; 1599 err = "error reading btree root";
1589 c->root = bch_btree_node_get(c, NULL, k, j->btree_level, true); 1600 c->root = bch_btree_node_get(c, NULL, k, j->btree_level, true, NULL);
1590 if (IS_ERR_OR_NULL(c->root)) 1601 if (IS_ERR_OR_NULL(c->root))
1591 goto err; 1602 goto err;
1592 1603
@@ -1661,7 +1672,7 @@ static void run_cache_set(struct cache_set *c)
1661 goto err; 1672 goto err;
1662 1673
1663 err = "cannot allocate new btree root"; 1674 err = "cannot allocate new btree root";
1664 c->root = bch_btree_node_alloc(c, NULL, 0); 1675 c->root = __bch_btree_node_alloc(c, NULL, 0, true, NULL);
1665 if (IS_ERR_OR_NULL(c->root)) 1676 if (IS_ERR_OR_NULL(c->root))
1666 goto err; 1677 goto err;
1667 1678
@@ -1697,6 +1708,7 @@ static void run_cache_set(struct cache_set *c)
1697 1708
1698 flash_devs_run(c); 1709 flash_devs_run(c);
1699 1710
1711 set_bit(CACHE_SET_RUNNING, &c->flags);
1700 return; 1712 return;
1701err: 1713err:
1702 closure_sync(&cl); 1714 closure_sync(&cl);
@@ -1760,6 +1772,7 @@ found:
1760 pr_debug("set version = %llu", c->sb.version); 1772 pr_debug("set version = %llu", c->sb.version);
1761 } 1773 }
1762 1774
1775 kobject_get(&ca->kobj);
1763 ca->set = c; 1776 ca->set = c;
1764 ca->set->cache[ca->sb.nr_this_dev] = ca; 1777 ca->set->cache[ca->sb.nr_this_dev] = ca;
1765 c->cache_by_alloc[c->caches_loaded++] = ca; 1778 c->cache_by_alloc[c->caches_loaded++] = ca;
@@ -1780,8 +1793,10 @@ void bch_cache_release(struct kobject *kobj)
1780 struct cache *ca = container_of(kobj, struct cache, kobj); 1793 struct cache *ca = container_of(kobj, struct cache, kobj);
1781 unsigned i; 1794 unsigned i;
1782 1795
1783 if (ca->set) 1796 if (ca->set) {
1797 BUG_ON(ca->set->cache[ca->sb.nr_this_dev] != ca);
1784 ca->set->cache[ca->sb.nr_this_dev] = NULL; 1798 ca->set->cache[ca->sb.nr_this_dev] = NULL;
1799 }
1785 1800
1786 bio_split_pool_free(&ca->bio_split_hook); 1801 bio_split_pool_free(&ca->bio_split_hook);
1787 1802
@@ -1798,10 +1813,8 @@ void bch_cache_release(struct kobject *kobj)
1798 if (ca->sb_bio.bi_inline_vecs[0].bv_page) 1813 if (ca->sb_bio.bi_inline_vecs[0].bv_page)
1799 put_page(ca->sb_bio.bi_io_vec[0].bv_page); 1814 put_page(ca->sb_bio.bi_io_vec[0].bv_page);
1800 1815
1801 if (!IS_ERR_OR_NULL(ca->bdev)) { 1816 if (!IS_ERR_OR_NULL(ca->bdev))
1802 blk_sync_queue(bdev_get_queue(ca->bdev));
1803 blkdev_put(ca->bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL); 1817 blkdev_put(ca->bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL);
1804 }
1805 1818
1806 kfree(ca); 1819 kfree(ca);
1807 module_put(THIS_MODULE); 1820 module_put(THIS_MODULE);
@@ -1844,7 +1857,7 @@ static int cache_alloc(struct cache_sb *sb, struct cache *ca)
1844} 1857}
1845 1858
1846static void register_cache(struct cache_sb *sb, struct page *sb_page, 1859static void register_cache(struct cache_sb *sb, struct page *sb_page,
1847 struct block_device *bdev, struct cache *ca) 1860 struct block_device *bdev, struct cache *ca)
1848{ 1861{
1849 char name[BDEVNAME_SIZE]; 1862 char name[BDEVNAME_SIZE];
1850 const char *err = "cannot allocate memory"; 1863 const char *err = "cannot allocate memory";
@@ -1877,10 +1890,12 @@ static void register_cache(struct cache_sb *sb, struct page *sb_page,
1877 goto err; 1890 goto err;
1878 1891
1879 pr_info("registered cache device %s", bdevname(bdev, name)); 1892 pr_info("registered cache device %s", bdevname(bdev, name));
1893out:
1894 kobject_put(&ca->kobj);
1880 return; 1895 return;
1881err: 1896err:
1882 pr_notice("error opening %s: %s", bdevname(bdev, name), err); 1897 pr_notice("error opening %s: %s", bdevname(bdev, name), err);
1883 kobject_put(&ca->kobj); 1898 goto out;
1884} 1899}
1885 1900
1886/* Global interfaces/init */ 1901/* Global interfaces/init */
@@ -1945,10 +1960,12 @@ static ssize_t register_bcache(struct kobject *k, struct kobj_attribute *attr,
1945 if (IS_ERR(bdev)) { 1960 if (IS_ERR(bdev)) {
1946 if (bdev == ERR_PTR(-EBUSY)) { 1961 if (bdev == ERR_PTR(-EBUSY)) {
1947 bdev = lookup_bdev(strim(path)); 1962 bdev = lookup_bdev(strim(path));
1963 mutex_lock(&bch_register_lock);
1948 if (!IS_ERR(bdev) && bch_is_open(bdev)) 1964 if (!IS_ERR(bdev) && bch_is_open(bdev))
1949 err = "device already registered"; 1965 err = "device already registered";
1950 else 1966 else
1951 err = "device busy"; 1967 err = "device busy";
1968 mutex_unlock(&bch_register_lock);
1952 } 1969 }
1953 goto err; 1970 goto err;
1954 } 1971 }
diff --git a/drivers/md/bcache/util.h b/drivers/md/bcache/util.h
index ac7d0d1f70d7..98df7572b5f7 100644
--- a/drivers/md/bcache/util.h
+++ b/drivers/md/bcache/util.h
@@ -416,8 +416,8 @@ do { \
416 average_frequency, frequency_units); \ 416 average_frequency, frequency_units); \
417 __print_time_stat(stats, name, \ 417 __print_time_stat(stats, name, \
418 average_duration, duration_units); \ 418 average_duration, duration_units); \
419 __print_time_stat(stats, name, \ 419 sysfs_print(name ## _ ##max_duration ## _ ## duration_units, \
420 max_duration, duration_units); \ 420 div_u64((stats)->max_duration, NSEC_PER_ ## duration_units));\
421 \ 421 \
422 sysfs_print(name ## _last_ ## frequency_units, (stats)->last \ 422 sysfs_print(name ## _last_ ## frequency_units, (stats)->last \
423 ? div_s64(local_clock() - (stats)->last, \ 423 ? div_s64(local_clock() - (stats)->last, \
diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c
index f4300e4c0114..f1986bcd1bf0 100644
--- a/drivers/md/bcache/writeback.c
+++ b/drivers/md/bcache/writeback.c
@@ -239,7 +239,7 @@ static void read_dirty(struct cached_dev *dc)
239 if (KEY_START(&w->key) != dc->last_read || 239 if (KEY_START(&w->key) != dc->last_read ||
240 jiffies_to_msecs(delay) > 50) 240 jiffies_to_msecs(delay) > 50)
241 while (!kthread_should_stop() && delay) 241 while (!kthread_should_stop() && delay)
242 delay = schedule_timeout_uninterruptible(delay); 242 delay = schedule_timeout_interruptible(delay);
243 243
244 dc->last_read = KEY_OFFSET(&w->key); 244 dc->last_read = KEY_OFFSET(&w->key);
245 245
@@ -436,7 +436,7 @@ static int bch_writeback_thread(void *arg)
436 while (delay && 436 while (delay &&
437 !kthread_should_stop() && 437 !kthread_should_stop() &&
438 !test_bit(BCACHE_DEV_DETACHING, &dc->disk.flags)) 438 !test_bit(BCACHE_DEV_DETACHING, &dc->disk.flags))
439 delay = schedule_timeout_uninterruptible(delay); 439 delay = schedule_timeout_interruptible(delay);
440 } 440 }
441 } 441 }
442 442
@@ -478,7 +478,7 @@ void bch_sectors_dirty_init(struct cached_dev *dc)
478 dc->disk.sectors_dirty_last = bcache_dev_sectors_dirty(&dc->disk); 478 dc->disk.sectors_dirty_last = bcache_dev_sectors_dirty(&dc->disk);
479} 479}
480 480
481int bch_cached_dev_writeback_init(struct cached_dev *dc) 481void bch_cached_dev_writeback_init(struct cached_dev *dc)
482{ 482{
483 sema_init(&dc->in_flight, 64); 483 sema_init(&dc->in_flight, 64);
484 init_rwsem(&dc->writeback_lock); 484 init_rwsem(&dc->writeback_lock);
@@ -494,14 +494,20 @@ int bch_cached_dev_writeback_init(struct cached_dev *dc)
494 dc->writeback_rate_d_term = 30; 494 dc->writeback_rate_d_term = 30;
495 dc->writeback_rate_p_term_inverse = 6000; 495 dc->writeback_rate_p_term_inverse = 6000;
496 496
497 INIT_DELAYED_WORK(&dc->writeback_rate_update, update_writeback_rate);
498}
499
500int bch_cached_dev_writeback_start(struct cached_dev *dc)
501{
497 dc->writeback_thread = kthread_create(bch_writeback_thread, dc, 502 dc->writeback_thread = kthread_create(bch_writeback_thread, dc,
498 "bcache_writeback"); 503 "bcache_writeback");
499 if (IS_ERR(dc->writeback_thread)) 504 if (IS_ERR(dc->writeback_thread))
500 return PTR_ERR(dc->writeback_thread); 505 return PTR_ERR(dc->writeback_thread);
501 506
502 INIT_DELAYED_WORK(&dc->writeback_rate_update, update_writeback_rate);
503 schedule_delayed_work(&dc->writeback_rate_update, 507 schedule_delayed_work(&dc->writeback_rate_update,
504 dc->writeback_rate_update_seconds * HZ); 508 dc->writeback_rate_update_seconds * HZ);
505 509
510 bch_writeback_queue(dc);
511
506 return 0; 512 return 0;
507} 513}
diff --git a/drivers/md/bcache/writeback.h b/drivers/md/bcache/writeback.h
index e2f8598937ac..0a9dab187b79 100644
--- a/drivers/md/bcache/writeback.h
+++ b/drivers/md/bcache/writeback.h
@@ -85,6 +85,7 @@ static inline void bch_writeback_add(struct cached_dev *dc)
85void bcache_dev_sectors_dirty_add(struct cache_set *, unsigned, uint64_t, int); 85void bcache_dev_sectors_dirty_add(struct cache_set *, unsigned, uint64_t, int);
86 86
87void bch_sectors_dirty_init(struct cached_dev *dc); 87void bch_sectors_dirty_init(struct cached_dev *dc);
88int bch_cached_dev_writeback_init(struct cached_dev *); 88void bch_cached_dev_writeback_init(struct cached_dev *);
89int bch_cached_dev_writeback_start(struct cached_dev *);
89 90
90#endif 91#endif