aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/md/bcache/bset.c
diff options
context:
space:
mode:
authorKent Overstreet <kmo@daterainc.com>2013-12-17 20:51:02 -0500
committerKent Overstreet <kmo@daterainc.com>2014-01-08 16:05:06 -0500
commitef71ec00002d92a08eb27e9d036e3d48835b6597 (patch)
tree0a7f8e6a199076bcd4524cc326776085dd52e7bd /drivers/md/bcache/bset.c
parent54a387cb9e600256e50cb9e2209e7e4f06f464de (diff)
bcache: Data corruption fix
The code that handles overlapping extents that we've just read back in from disk was depending on the behaviour of the code that handles overlapping extents as we're inserting into a btree node in the case of an insert that forced an existing extent to be split: on insert, if we had to split we'd also insert a new extent to represent the top part of the old extent - and then that new extent would get written out. The code that read the extents back in thus not bother with splitting extents - if it saw an extent that ovelapped in the middle of an older extent, it would trim the old extent to only represent the bottom part, assuming that the original insert would've inserted a new extent to represent the top part. I still haven't figured out _how_ it can happen, but I'm now pretty convinced (and testing has confirmed) that there's some kind of an obscure corner case (probably involving extent merging, and multiple overwrites in different sets) that breaks this. The fix is to change the mergesort fixup code to split extents itself when required. Signed-off-by: Kent Overstreet <kmo@daterainc.com> Cc: linux-stable <stable@vger.kernel.org> # >= v3.10
Diffstat (limited to 'drivers/md/bcache/bset.c')
-rw-r--r--drivers/md/bcache/bset.c26
1 files changed, 22 insertions, 4 deletions
diff --git a/drivers/md/bcache/bset.c b/drivers/md/bcache/bset.c
index 7d388b8bb50e..16958704af3b 100644
--- a/drivers/md/bcache/bset.c
+++ b/drivers/md/bcache/bset.c
@@ -955,7 +955,7 @@ static void sort_key_next(struct btree_iter *iter,
955 *i = iter->data[--iter->used]; 955 *i = iter->data[--iter->used];
956} 956}
957 957
958static void btree_sort_fixup(struct btree_iter *iter) 958static struct bkey *btree_sort_fixup(struct btree_iter *iter, struct bkey *tmp)
959{ 959{
960 while (iter->used > 1) { 960 while (iter->used > 1) {
961 struct btree_iter_set *top = iter->data, *i = top + 1; 961 struct btree_iter_set *top = iter->data, *i = top + 1;
@@ -983,9 +983,22 @@ static void btree_sort_fixup(struct btree_iter *iter)
983 } else { 983 } else {
984 /* can't happen because of comparison func */ 984 /* can't happen because of comparison func */
985 BUG_ON(!bkey_cmp(&START_KEY(top->k), &START_KEY(i->k))); 985 BUG_ON(!bkey_cmp(&START_KEY(top->k), &START_KEY(i->k)));
986 bch_cut_back(&START_KEY(i->k), top->k); 986
987 if (bkey_cmp(i->k, top->k) < 0) {
988 bkey_copy(tmp, top->k);
989
990 bch_cut_back(&START_KEY(i->k), tmp);
991 bch_cut_front(i->k, top->k);
992 heap_sift(iter, 0, btree_iter_cmp);
993
994 return tmp;
995 } else {
996 bch_cut_back(&START_KEY(i->k), top->k);
997 }
987 } 998 }
988 } 999 }
1000
1001 return NULL;
989} 1002}
990 1003
991static void btree_mergesort(struct btree *b, struct bset *out, 1004static void btree_mergesort(struct btree *b, struct bset *out,
@@ -993,15 +1006,20 @@ static void btree_mergesort(struct btree *b, struct bset *out,
993 bool fixup, bool remove_stale) 1006 bool fixup, bool remove_stale)
994{ 1007{
995 struct bkey *k, *last = NULL; 1008 struct bkey *k, *last = NULL;
1009 BKEY_PADDED(k) tmp;
996 bool (*bad)(struct btree *, const struct bkey *) = remove_stale 1010 bool (*bad)(struct btree *, const struct bkey *) = remove_stale
997 ? bch_ptr_bad 1011 ? bch_ptr_bad
998 : bch_ptr_invalid; 1012 : bch_ptr_invalid;
999 1013
1000 while (!btree_iter_end(iter)) { 1014 while (!btree_iter_end(iter)) {
1001 if (fixup && !b->level) 1015 if (fixup && !b->level)
1002 btree_sort_fixup(iter); 1016 k = btree_sort_fixup(iter, &tmp.k);
1017 else
1018 k = NULL;
1019
1020 if (!k)
1021 k = bch_btree_iter_next(iter);
1003 1022
1004 k = bch_btree_iter_next(iter);
1005 if (bad(b, k)) 1023 if (bad(b, k))
1006 continue; 1024 continue;
1007 1025