summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorKent Overstreet <koverstreet@google.com>2013-06-26 20:25:38 -0400
committerKent Overstreet <kmo@daterainc.com>2013-07-01 17:42:47 -0400
commite49c7c374e7aacd1f04ecbc21d9dbbeeea4a77d6 (patch)
tree3382c6a5512878494e9dfda787e142c3cddae470
parentcecd628d9a9966ed0af1237df5cc5818945fe9f2 (diff)
bcache: FUA fixes
Journal writes need to be marked FUA, not just REQ_FLUSH. And btree node writes have... weird ordering requirements. Signed-off-by: Kent Overstreet <koverstreet@google.com>
-rw-r--r--drivers/md/bcache/btree.c25
-rw-r--r--drivers/md/bcache/journal.c2
-rw-r--r--drivers/md/bcache/request.c13
3 files changed, 35 insertions, 5 deletions
diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c
index 09fb8a2f43da..a6ad49ac5f2b 100644
--- a/drivers/md/bcache/btree.c
+++ b/drivers/md/bcache/btree.c
@@ -328,10 +328,25 @@ static void do_btree_node_write(struct btree *b)
328 328
329 b->bio->bi_end_io = btree_node_write_endio; 329 b->bio->bi_end_io = btree_node_write_endio;
330 b->bio->bi_private = &b->io.cl; 330 b->bio->bi_private = &b->io.cl;
331 b->bio->bi_rw = REQ_META|WRITE_SYNC; 331 b->bio->bi_rw = REQ_META|WRITE_SYNC|REQ_FUA;
332 b->bio->bi_size = set_blocks(i, b->c) * block_bytes(b->c); 332 b->bio->bi_size = set_blocks(i, b->c) * block_bytes(b->c);
333 bch_bio_map(b->bio, i); 333 bch_bio_map(b->bio, i);
334 334
335 /*
336 * If we're appending to a leaf node, we don't technically need FUA -
337 * this write just needs to be persisted before the next journal write,
338 * which will be marked FLUSH|FUA.
339 *
340 * Similarly if we're writing a new btree root - the pointer is going to
341 * be in the next journal entry.
342 *
343 * But if we're writing a new btree node (that isn't a root) or
344 * appending to a non leaf btree node, we need either FUA or a flush
345 * when we write the parent with the new pointer. FUA is cheaper than a
346 * flush, and writes appending to leaf nodes aren't blocking anything so
347 * just make all btree node writes FUA to keep things sane.
348 */
349
335 bkey_copy(&k.key, &b->key); 350 bkey_copy(&k.key, &b->key);
336 SET_PTR_OFFSET(&k.key, 0, PTR_OFFSET(&k.key, 0) + bset_offset(b, i)); 351 SET_PTR_OFFSET(&k.key, 0, PTR_OFFSET(&k.key, 0) + bset_offset(b, i));
337 352
@@ -2092,6 +2107,9 @@ int bch_btree_insert(struct btree_op *op, struct cache_set *c)
2092void bch_btree_set_root(struct btree *b) 2107void bch_btree_set_root(struct btree *b)
2093{ 2108{
2094 unsigned i; 2109 unsigned i;
2110 struct closure cl;
2111
2112 closure_init_stack(&cl);
2095 2113
2096 trace_bcache_btree_set_root(b); 2114 trace_bcache_btree_set_root(b);
2097 2115
@@ -2107,7 +2125,8 @@ void bch_btree_set_root(struct btree *b)
2107 b->c->root = b; 2125 b->c->root = b;
2108 __bkey_put(b->c, &b->key); 2126 __bkey_put(b->c, &b->key);
2109 2127
2110 bch_journal_meta(b->c, NULL); 2128 bch_journal_meta(b->c, &cl);
2129 closure_sync(&cl);
2111} 2130}
2112 2131
2113/* Cache lookup */ 2132/* Cache lookup */
diff --git a/drivers/md/bcache/journal.c b/drivers/md/bcache/journal.c
index 5ca22149b749..4b250667bb7f 100644
--- a/drivers/md/bcache/journal.c
+++ b/drivers/md/bcache/journal.c
@@ -620,7 +620,7 @@ static void journal_write_unlocked(struct closure *cl)
620 bio_reset(bio); 620 bio_reset(bio);
621 bio->bi_sector = PTR_OFFSET(k, i); 621 bio->bi_sector = PTR_OFFSET(k, i);
622 bio->bi_bdev = ca->bdev; 622 bio->bi_bdev = ca->bdev;
623 bio->bi_rw = REQ_WRITE|REQ_SYNC|REQ_META|REQ_FLUSH; 623 bio->bi_rw = REQ_WRITE|REQ_SYNC|REQ_META|REQ_FLUSH|REQ_FUA;
624 bio->bi_size = sectors << 9; 624 bio->bi_size = sectors << 9;
625 625
626 bio->bi_end_io = journal_write_endio; 626 bio->bi_end_io = journal_write_endio;
diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c
index 17bd59704eba..bcdf1f782c3e 100644
--- a/drivers/md/bcache/request.c
+++ b/drivers/md/bcache/request.c
@@ -1035,8 +1035,19 @@ static void request_write(struct cached_dev *dc, struct search *s)
1035 1035
1036 closure_bio_submit(bio, cl, s->d); 1036 closure_bio_submit(bio, cl, s->d);
1037 } else { 1037 } else {
1038 s->op.cache_bio = bio;
1039 bch_writeback_add(dc); 1038 bch_writeback_add(dc);
1039
1040 if (s->op.flush_journal) {
1041 /* Also need to send a flush to the backing device */
1042 s->op.cache_bio = bio_clone_bioset(bio, GFP_NOIO,
1043 dc->disk.bio_split);
1044
1045 bio->bi_size = 0;
1046 bio->bi_vcnt = 0;
1047 closure_bio_submit(bio, cl, s->d);
1048 } else {
1049 s->op.cache_bio = bio;
1050 }
1040 } 1051 }
1041out: 1052out:
1042 closure_call(&s->op.cl, bch_insert_data, NULL, cl); 1053 closure_call(&s->op.cl, bch_insert_data, NULL, cl);