diff options
author | Kent Overstreet <koverstreet@google.com> | 2013-06-26 20:25:38 -0400 |
---|---|---|
committer | Greg Kroah-Hartman <gregkh@linuxfoundation.org> | 2013-08-29 12:47:40 -0400 |
commit | ae61fd4496f9d9290b9e84fc373818b2ee780137 (patch) | |
tree | 0b84deba412f79533e43c20601dad2d9d6eb2909 | |
parent | ba5c60fc8f5f574c7cec70740ca19d358b780c57 (diff) |
bcache: FUA fixes
commit e49c7c374e7aacd1f04ecbc21d9dbbeeea4a77d6 upstream.
Journal writes need to be marked FUA, not just REQ_FLUSH. And btree node
writes have... weird ordering requirements.
Signed-off-by: Kent Overstreet <koverstreet@google.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
-rw-r--r-- | drivers/md/bcache/btree.c | 23 | ||||
-rw-r--r-- | drivers/md/bcache/journal.c | 2 | ||||
-rw-r--r-- | drivers/md/bcache/request.c | 13 |
3 files changed, 34 insertions, 4 deletions
diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c index 7b687a6f3dec..833c590806ba 100644 --- a/drivers/md/bcache/btree.c +++ b/drivers/md/bcache/btree.c | |||
@@ -326,10 +326,25 @@ static void do_btree_write(struct btree *b) | |||
326 | i->csum = btree_csum_set(b, i); | 326 | i->csum = btree_csum_set(b, i); |
327 | 327 | ||
328 | btree_bio_init(b); | 328 | btree_bio_init(b); |
329 | b->bio->bi_rw = REQ_META|WRITE_SYNC; | 329 | b->bio->bi_rw = REQ_META|WRITE_SYNC|REQ_FUA; |
330 | b->bio->bi_size = set_blocks(i, b->c) * block_bytes(b->c); | 330 | b->bio->bi_size = set_blocks(i, b->c) * block_bytes(b->c); |
331 | bch_bio_map(b->bio, i); | 331 | bch_bio_map(b->bio, i); |
332 | 332 | ||
333 | /* | ||
334 | * If we're appending to a leaf node, we don't technically need FUA - | ||
335 | * this write just needs to be persisted before the next journal write, | ||
336 | * which will be marked FLUSH|FUA. | ||
337 | * | ||
338 | * Similarly if we're writing a new btree root - the pointer is going to | ||
339 | * be in the next journal entry. | ||
340 | * | ||
341 | * But if we're writing a new btree node (that isn't a root) or | ||
342 | * appending to a non leaf btree node, we need either FUA or a flush | ||
343 | * when we write the parent with the new pointer. FUA is cheaper than a | ||
344 | * flush, and writes appending to leaf nodes aren't blocking anything so | ||
345 | * just make all btree node writes FUA to keep things sane. | ||
346 | */ | ||
347 | |||
333 | bkey_copy(&k.key, &b->key); | 348 | bkey_copy(&k.key, &b->key); |
334 | SET_PTR_OFFSET(&k.key, 0, PTR_OFFSET(&k.key, 0) + bset_offset(b, i)); | 349 | SET_PTR_OFFSET(&k.key, 0, PTR_OFFSET(&k.key, 0) + bset_offset(b, i)); |
335 | 350 | ||
@@ -2142,6 +2157,9 @@ int bch_btree_insert(struct btree_op *op, struct cache_set *c) | |||
2142 | void bch_btree_set_root(struct btree *b) | 2157 | void bch_btree_set_root(struct btree *b) |
2143 | { | 2158 | { |
2144 | unsigned i; | 2159 | unsigned i; |
2160 | struct closure cl; | ||
2161 | |||
2162 | closure_init_stack(&cl); | ||
2145 | 2163 | ||
2146 | BUG_ON(!b->written); | 2164 | BUG_ON(!b->written); |
2147 | 2165 | ||
@@ -2155,8 +2173,9 @@ void bch_btree_set_root(struct btree *b) | |||
2155 | b->c->root = b; | 2173 | b->c->root = b; |
2156 | __bkey_put(b->c, &b->key); | 2174 | __bkey_put(b->c, &b->key); |
2157 | 2175 | ||
2158 | bch_journal_meta(b->c, NULL); | 2176 | bch_journal_meta(b->c, &cl); |
2159 | pr_debug("%s for %pf", pbtree(b), __builtin_return_address(0)); | 2177 | pr_debug("%s for %pf", pbtree(b), __builtin_return_address(0)); |
2178 | closure_sync(&cl); | ||
2160 | } | 2179 | } |
2161 | 2180 | ||
2162 | /* Cache lookup */ | 2181 | /* Cache lookup */ |
diff --git a/drivers/md/bcache/journal.c b/drivers/md/bcache/journal.c index 8a54d3b4f517..b49abb246bb6 100644 --- a/drivers/md/bcache/journal.c +++ b/drivers/md/bcache/journal.c | |||
@@ -622,7 +622,7 @@ static void journal_write_unlocked(struct closure *cl) | |||
622 | bio_reset(bio); | 622 | bio_reset(bio); |
623 | bio->bi_sector = PTR_OFFSET(k, i); | 623 | bio->bi_sector = PTR_OFFSET(k, i); |
624 | bio->bi_bdev = ca->bdev; | 624 | bio->bi_bdev = ca->bdev; |
625 | bio->bi_rw = REQ_WRITE|REQ_SYNC|REQ_META|REQ_FLUSH; | 625 | bio->bi_rw = REQ_WRITE|REQ_SYNC|REQ_META|REQ_FLUSH|REQ_FUA; |
626 | bio->bi_size = sectors << 9; | 626 | bio->bi_size = sectors << 9; |
627 | 627 | ||
628 | bio->bi_end_io = journal_write_endio; | 628 | bio->bi_end_io = journal_write_endio; |
diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c index 2f36743ce708..afb9a998a737 100644 --- a/drivers/md/bcache/request.c +++ b/drivers/md/bcache/request.c | |||
@@ -1053,9 +1053,20 @@ static void request_write(struct cached_dev *dc, struct search *s) | |||
1053 | trace_bcache_writethrough(s->orig_bio); | 1053 | trace_bcache_writethrough(s->orig_bio); |
1054 | closure_bio_submit(bio, cl, s->d); | 1054 | closure_bio_submit(bio, cl, s->d); |
1055 | } else { | 1055 | } else { |
1056 | s->op.cache_bio = bio; | ||
1057 | trace_bcache_writeback(s->orig_bio); | 1056 | trace_bcache_writeback(s->orig_bio); |
1058 | bch_writeback_add(dc, bio_sectors(bio)); | 1057 | bch_writeback_add(dc, bio_sectors(bio)); |
1058 | |||
1059 | if (s->op.flush_journal) { | ||
1060 | /* Also need to send a flush to the backing device */ | ||
1061 | s->op.cache_bio = bio_clone_bioset(bio, GFP_NOIO, | ||
1062 | dc->disk.bio_split); | ||
1063 | |||
1064 | bio->bi_size = 0; | ||
1065 | bio->bi_vcnt = 0; | ||
1066 | closure_bio_submit(bio, cl, s->d); | ||
1067 | } else { | ||
1068 | s->op.cache_bio = bio; | ||
1069 | } | ||
1059 | } | 1070 | } |
1060 | out: | 1071 | out: |
1061 | closure_call(&s->op.cl, bch_insert_data, NULL, cl); | 1072 | closure_call(&s->op.cl, bch_insert_data, NULL, cl); |