aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/md
diff options
context:
space:
mode:
authorKent Overstreet <kmo@daterainc.com>2013-12-20 20:22:05 -0500
committerKent Overstreet <kmo@daterainc.com>2014-01-08 16:05:12 -0500
commit65d45231b56efb3db51eb441e2c68f8252ecdd12 (patch)
treeb862e6fa72d076373c79841b555ef525d3b0f41b /drivers/md
parentee811287c9f241641899788cbfc9d70ed96ba3a5 (diff)
bcache: Abstract out stuff needed for sorting
Signed-off-by: Kent Overstreet <kmo@daterainc.com>
Diffstat (limited to 'drivers/md')
-rw-r--r--drivers/md/bcache/Makefile5
-rw-r--r--drivers/md/bcache/bset.c279
-rw-r--r--drivers/md/bcache/bset.h8
-rw-r--r--drivers/md/bcache/btree.c6
-rw-r--r--drivers/md/bcache/btree.h42
-rw-r--r--drivers/md/bcache/debug.c1
-rw-r--r--drivers/md/bcache/extents.c354
-rw-r--r--drivers/md/bcache/extents.h12
-rw-r--r--drivers/md/bcache/super.c5
9 files changed, 423 insertions, 289 deletions
diff --git a/drivers/md/bcache/Makefile b/drivers/md/bcache/Makefile
index 0e9c82523be6..c488b846f831 100644
--- a/drivers/md/bcache/Makefile
+++ b/drivers/md/bcache/Makefile
@@ -1,7 +1,8 @@
1 1
2obj-$(CONFIG_BCACHE) += bcache.o 2obj-$(CONFIG_BCACHE) += bcache.o
3 3
4bcache-y := alloc.o btree.o bset.o io.o journal.o writeback.o\ 4bcache-y := alloc.o bset.o btree.o closure.o debug.o extents.o\
5 movinggc.o request.o super.o sysfs.o debug.o util.o trace.o stats.o closure.o 5 io.o journal.o movinggc.o request.o stats.o super.o sysfs.o trace.o\
6 util.o writeback.o
6 7
7CFLAGS_request.o += -Iblock 8CFLAGS_request.o += -Iblock
diff --git a/drivers/md/bcache/bset.c b/drivers/md/bcache/bset.c
index e04e5908e29f..c2c42cbbe885 100644
--- a/drivers/md/bcache/bset.c
+++ b/drivers/md/bcache/bset.c
@@ -63,140 +63,6 @@ void bch_keylist_pop_front(struct keylist *l)
63 bch_keylist_bytes(l)); 63 bch_keylist_bytes(l));
64} 64}
65 65
66/* Pointer validation */
67
68static bool __ptr_invalid(struct cache_set *c, const struct bkey *k)
69{
70 unsigned i;
71
72 for (i = 0; i < KEY_PTRS(k); i++)
73 if (ptr_available(c, k, i)) {
74 struct cache *ca = PTR_CACHE(c, k, i);
75 size_t bucket = PTR_BUCKET_NR(c, k, i);
76 size_t r = bucket_remainder(c, PTR_OFFSET(k, i));
77
78 if (KEY_SIZE(k) + r > c->sb.bucket_size ||
79 bucket < ca->sb.first_bucket ||
80 bucket >= ca->sb.nbuckets)
81 return true;
82 }
83
84 return false;
85}
86
87bool bch_btree_ptr_invalid(struct cache_set *c, const struct bkey *k)
88{
89 char buf[80];
90
91 if (!KEY_PTRS(k) || !KEY_SIZE(k) || KEY_DIRTY(k))
92 goto bad;
93
94 if (__ptr_invalid(c, k))
95 goto bad;
96
97 return false;
98bad:
99 bch_bkey_to_text(buf, sizeof(buf), k);
100 cache_bug(c, "spotted btree ptr %s: %s", buf, bch_ptr_status(c, k));
101 return true;
102}
103
104bool bch_extent_ptr_invalid(struct cache_set *c, const struct bkey *k)
105{
106 char buf[80];
107
108 if (!KEY_SIZE(k))
109 return true;
110
111 if (KEY_SIZE(k) > KEY_OFFSET(k))
112 goto bad;
113
114 if (__ptr_invalid(c, k))
115 goto bad;
116
117 return false;
118bad:
119 bch_bkey_to_text(buf, sizeof(buf), k);
120 cache_bug(c, "spotted extent %s: %s", buf, bch_ptr_status(c, k));
121 return true;
122}
123
124static bool ptr_bad_expensive_checks(struct btree *b, const struct bkey *k,
125 unsigned ptr)
126{
127 struct bucket *g = PTR_BUCKET(b->c, k, ptr);
128 char buf[80];
129
130 if (mutex_trylock(&b->c->bucket_lock)) {
131 if (b->level) {
132 if (KEY_DIRTY(k) ||
133 g->prio != BTREE_PRIO ||
134 (b->c->gc_mark_valid &&
135 GC_MARK(g) != GC_MARK_METADATA))
136 goto err;
137
138 } else {
139 if (g->prio == BTREE_PRIO)
140 goto err;
141
142 if (KEY_DIRTY(k) &&
143 b->c->gc_mark_valid &&
144 GC_MARK(g) != GC_MARK_DIRTY)
145 goto err;
146 }
147 mutex_unlock(&b->c->bucket_lock);
148 }
149
150 return false;
151err:
152 mutex_unlock(&b->c->bucket_lock);
153 bch_bkey_to_text(buf, sizeof(buf), k);
154 btree_bug(b,
155"inconsistent pointer %s: bucket %zu pin %i prio %i gen %i last_gc %i mark %llu gc_gen %i",
156 buf, PTR_BUCKET_NR(b->c, k, ptr), atomic_read(&g->pin),
157 g->prio, g->gen, g->last_gc, GC_MARK(g), g->gc_gen);
158 return true;
159}
160
161bool bch_ptr_bad(struct btree *b, const struct bkey *k)
162{
163 struct bucket *g;
164 unsigned i, stale;
165
166 if (!bkey_cmp(k, &ZERO_KEY) ||
167 !KEY_PTRS(k) ||
168 bch_ptr_invalid(b, k))
169 return true;
170
171 for (i = 0; i < KEY_PTRS(k); i++)
172 if (!ptr_available(b->c, k, i))
173 return true;
174
175 if (!expensive_debug_checks(b->c) && KEY_DIRTY(k))
176 return false;
177
178 for (i = 0; i < KEY_PTRS(k); i++) {
179 g = PTR_BUCKET(b->c, k, i);
180 stale = ptr_stale(b->c, k, i);
181
182 btree_bug_on(stale > 96, b,
183 "key too stale: %i, need_gc %u",
184 stale, b->c->need_gc);
185
186 btree_bug_on(stale && KEY_DIRTY(k) && KEY_SIZE(k),
187 b, "stale dirty pointer");
188
189 if (stale)
190 return true;
191
192 if (expensive_debug_checks(b->c) &&
193 ptr_bad_expensive_checks(b, k, i))
194 return true;
195 }
196
197 return false;
198}
199
200/* Key/pointer manipulation */ 66/* Key/pointer manipulation */
201 67
202void bch_bkey_copy_single_ptr(struct bkey *dest, const struct bkey *src, 68void bch_bkey_copy_single_ptr(struct bkey *dest, const struct bkey *src,
@@ -251,57 +117,6 @@ bool __bch_cut_back(const struct bkey *where, struct bkey *k)
251 return true; 117 return true;
252} 118}
253 119
254static uint64_t merge_chksums(struct bkey *l, struct bkey *r)
255{
256 return (l->ptr[KEY_PTRS(l)] + r->ptr[KEY_PTRS(r)]) &
257 ~((uint64_t)1 << 63);
258}
259
260/* Tries to merge l and r: l should be lower than r
261 * Returns true if we were able to merge. If we did merge, l will be the merged
262 * key, r will be untouched.
263 */
264bool bch_bkey_try_merge(struct btree *b, struct bkey *l, struct bkey *r)
265{
266 unsigned i;
267
268 if (key_merging_disabled(b->c))
269 return false;
270
271 if (KEY_PTRS(l) != KEY_PTRS(r) ||
272 KEY_DIRTY(l) != KEY_DIRTY(r) ||
273 bkey_cmp(l, &START_KEY(r)))
274 return false;
275
276 for (i = 0; i < KEY_PTRS(l); i++)
277 if (l->ptr[i] + PTR(0, KEY_SIZE(l), 0) != r->ptr[i] ||
278 PTR_BUCKET_NR(b->c, l, i) != PTR_BUCKET_NR(b->c, r, i))
279 return false;
280
281 /* Keys with no pointers aren't restricted to one bucket and could
282 * overflow KEY_SIZE
283 */
284 if (KEY_SIZE(l) + KEY_SIZE(r) > USHRT_MAX) {
285 SET_KEY_OFFSET(l, KEY_OFFSET(l) + USHRT_MAX - KEY_SIZE(l));
286 SET_KEY_SIZE(l, USHRT_MAX);
287
288 bch_cut_front(l, r);
289 return false;
290 }
291
292 if (KEY_CSUM(l)) {
293 if (KEY_CSUM(r))
294 l->ptr[KEY_PTRS(l)] = merge_chksums(l, r);
295 else
296 SET_KEY_CSUM(l, 0);
297 }
298
299 SET_KEY_OFFSET(l, KEY_OFFSET(l) + KEY_SIZE(r));
300 SET_KEY_SIZE(l, KEY_SIZE(l) + KEY_SIZE(r));
301
302 return true;
303}
304
305/* Auxiliary search trees */ 120/* Auxiliary search trees */
306 121
307/* 32 bits total: */ 122/* 32 bits total: */
@@ -1099,85 +914,6 @@ int bch_bset_sort_state_init(struct bset_sort_state *state, unsigned page_order)
1099 return 0; 914 return 0;
1100} 915}
1101 916
1102static void sort_key_next(struct btree_iter *iter,
1103 struct btree_iter_set *i)
1104{
1105 i->k = bkey_next(i->k);
1106
1107 if (i->k == i->end)
1108 *i = iter->data[--iter->used];
1109}
1110
1111/*
1112 * Returns true if l > r - unless l == r, in which case returns true if l is
1113 * older than r.
1114 *
1115 * Necessary for btree_sort_fixup() - if there are multiple keys that compare
1116 * equal in different sets, we have to process them newest to oldest.
1117 */
1118static inline bool sort_extent_cmp(struct btree_iter_set l,
1119 struct btree_iter_set r)
1120{
1121 int64_t c = bkey_cmp(&START_KEY(l.k), &START_KEY(r.k));
1122
1123 return c ? c > 0 : l.k < r.k;
1124}
1125
1126static inline bool sort_cmp(struct btree_iter_set l,
1127 struct btree_iter_set r)
1128{
1129 int64_t c = bkey_cmp(l.k, r.k);
1130
1131 return c ? c > 0 : l.k < r.k;
1132}
1133
1134static struct bkey *btree_sort_fixup_extents(struct btree_iter *iter,
1135 struct bkey *tmp)
1136{
1137 while (iter->used > 1) {
1138 struct btree_iter_set *top = iter->data, *i = top + 1;
1139
1140 if (iter->used > 2 &&
1141 sort_extent_cmp(i[0], i[1]))
1142 i++;
1143
1144 if (bkey_cmp(top->k, &START_KEY(i->k)) <= 0)
1145 break;
1146
1147 if (!KEY_SIZE(i->k)) {
1148 sort_key_next(iter, i);
1149 heap_sift(iter, i - top, sort_extent_cmp);
1150 continue;
1151 }
1152
1153 if (top->k > i->k) {
1154 if (bkey_cmp(top->k, i->k) >= 0)
1155 sort_key_next(iter, i);
1156 else
1157 bch_cut_front(top->k, i->k);
1158
1159 heap_sift(iter, i - top, sort_extent_cmp);
1160 } else {
1161 /* can't happen because of comparison func */
1162 BUG_ON(!bkey_cmp(&START_KEY(top->k), &START_KEY(i->k)));
1163
1164 if (bkey_cmp(i->k, top->k) < 0) {
1165 bkey_copy(tmp, top->k);
1166
1167 bch_cut_back(&START_KEY(i->k), tmp);
1168 bch_cut_front(i->k, top->k);
1169 heap_sift(iter, 0, btree_iter_cmp);
1170
1171 return tmp;
1172 } else {
1173 bch_cut_back(&START_KEY(i->k), top->k);
1174 }
1175 }
1176 }
1177
1178 return NULL;
1179}
1180
1181static void btree_mergesort(struct btree *b, struct bset *out, 917static void btree_mergesort(struct btree *b, struct bset *out,
1182 struct btree_iter *iter, 918 struct btree_iter *iter,
1183 bool fixup, bool remove_stale) 919 bool fixup, bool remove_stale)
@@ -1185,25 +921,22 @@ static void btree_mergesort(struct btree *b, struct bset *out,
1185 int i; 921 int i;
1186 struct bkey *k, *last = NULL; 922 struct bkey *k, *last = NULL;
1187 BKEY_PADDED(k) tmp; 923 BKEY_PADDED(k) tmp;
1188 btree_iter_cmp_fn *cmp = b->level
1189 ? sort_cmp
1190 : sort_extent_cmp;
1191 bool (*bad)(struct btree *, const struct bkey *) = remove_stale 924 bool (*bad)(struct btree *, const struct bkey *) = remove_stale
1192 ? bch_ptr_bad 925 ? bch_ptr_bad
1193 : bch_ptr_invalid; 926 : bch_ptr_invalid;
1194 927
1195 /* Heapify the iterator, using our comparison function */ 928 /* Heapify the iterator, using our comparison function */
1196 for (i = iter->used / 2 - 1; i >= 0; --i) 929 for (i = iter->used / 2 - 1; i >= 0; --i)
1197 heap_sift(iter, i, cmp); 930 heap_sift(iter, i, b->ops->sort_cmp);
1198 931
1199 while (!btree_iter_end(iter)) { 932 while (!btree_iter_end(iter)) {
1200 if (fixup && !b->level) 933 if (b->ops->sort_fixup && fixup)
1201 k = btree_sort_fixup_extents(iter, &tmp.k); 934 k = b->ops->sort_fixup(iter, &tmp.k);
1202 else 935 else
1203 k = NULL; 936 k = NULL;
1204 937
1205 if (!k) 938 if (!k)
1206 k = __bch_btree_iter_next(iter, cmp); 939 k = __bch_btree_iter_next(iter, b->ops->sort_cmp);
1207 940
1208 if (bad(b, k)) 941 if (bad(b, k))
1209 continue; 942 continue;
@@ -1211,8 +944,7 @@ static void btree_mergesort(struct btree *b, struct bset *out,
1211 if (!last) { 944 if (!last) {
1212 last = out->start; 945 last = out->start;
1213 bkey_copy(last, k); 946 bkey_copy(last, k);
1214 } else if (b->level || 947 } else if (!bch_bkey_try_merge(b, last, k)) {
1215 !bch_bkey_try_merge(b, last, k)) {
1216 last = bkey_next(last); 948 last = bkey_next(last);
1217 bkey_copy(last, k); 949 bkey_copy(last, k);
1218 } 950 }
@@ -1300,6 +1032,7 @@ void bch_btree_sort_partial(struct btree *b, unsigned start,
1300 1032
1301 EBUG_ON(b->written && oldsize >= 0 && bch_count_data(b) != oldsize); 1033 EBUG_ON(b->written && oldsize >= 0 && bch_count_data(b) != oldsize);
1302} 1034}
1035EXPORT_SYMBOL(bch_btree_sort_partial);
1303 1036
1304void bch_btree_sort_and_fix_extents(struct btree *b, struct btree_iter *iter, 1037void bch_btree_sort_and_fix_extents(struct btree *b, struct btree_iter *iter,
1305 struct bset_sort_state *state) 1038 struct bset_sort_state *state)
diff --git a/drivers/md/bcache/bset.h b/drivers/md/bcache/bset.h
index ab31f3fc8d43..b5797129e919 100644
--- a/drivers/md/bcache/bset.h
+++ b/drivers/md/bcache/bset.h
@@ -376,14 +376,6 @@ int __bch_keylist_realloc(struct keylist *, unsigned);
376 376
377struct cache_set; 377struct cache_set;
378const char *bch_ptr_status(struct cache_set *, const struct bkey *); 378const char *bch_ptr_status(struct cache_set *, const struct bkey *);
379bool bch_btree_ptr_invalid(struct cache_set *, const struct bkey *);
380bool bch_extent_ptr_invalid(struct cache_set *, const struct bkey *);
381bool bch_btree_ptr_bad(struct btree *, const struct bkey *);
382bool bch_extent_ptr_bad(struct btree *, const struct bkey *);
383
384bool bch_ptr_bad(struct btree *, const struct bkey *);
385
386bool bch_bkey_try_merge(struct btree *, struct bkey *, struct bkey *);
387 379
388int bch_bset_print_stats(struct cache_set *, char *); 380int bch_bset_print_stats(struct cache_set *, char *);
389 381
diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c
index 89252e7f2879..6734e2759b93 100644
--- a/drivers/md/bcache/btree.c
+++ b/drivers/md/bcache/btree.c
@@ -23,6 +23,7 @@
23#include "bcache.h" 23#include "bcache.h"
24#include "btree.h" 24#include "btree.h"
25#include "debug.h" 25#include "debug.h"
26#include "extents.h"
26#include "writeback.h" 27#include "writeback.h"
27 28
28#include <linux/slab.h> 29#include <linux/slab.h>
@@ -931,6 +932,11 @@ out:
931 b->level = level; 932 b->level = level;
932 b->parent = (void *) ~0UL; 933 b->parent = (void *) ~0UL;
933 934
935 if (!b->level)
936 b->ops = &bch_extent_keys_ops;
937 else
938 b->ops = &bch_btree_keys_ops;
939
934 mca_reinit(b); 940 mca_reinit(b);
935 941
936 return b; 942 return b;
diff --git a/drivers/md/bcache/btree.h b/drivers/md/bcache/btree.h
index b3541173ccf2..0b436079db71 100644
--- a/drivers/md/bcache/btree.h
+++ b/drivers/md/bcache/btree.h
@@ -113,7 +113,28 @@ struct btree_write {
113 int prio_blocked; 113 int prio_blocked;
114}; 114};
115 115
116struct btree_keys_ops {
117 bool (*sort_cmp)(struct btree_iter_set,
118 struct btree_iter_set);
119 struct bkey *(*sort_fixup)(struct btree_iter *,
120 struct bkey *);
121 bool (*key_invalid)(struct btree *,
122 const struct bkey *);
123 bool (*key_bad)(struct btree *,
124 const struct bkey *);
125 bool (*key_merge)(struct btree *,
126 struct bkey *, struct bkey *);
127
128
129 /*
130 * Only used for deciding whether to use START_KEY(k) or just the key
131 * itself in a couple places
132 */
133 bool is_extents;
134};
135
116struct btree { 136struct btree {
137 const struct btree_keys_ops *ops;
117 /* Hottest entries first */ 138 /* Hottest entries first */
118 struct hlist_node hash; 139 struct hlist_node hash;
119 140
@@ -232,10 +253,23 @@ static inline void set_gc_sectors(struct cache_set *c)
232 253
233static inline bool bch_ptr_invalid(struct btree *b, const struct bkey *k) 254static inline bool bch_ptr_invalid(struct btree *b, const struct bkey *k)
234{ 255{
235 if (b->level) 256 return b->ops->key_invalid(b, k);
236 return bch_btree_ptr_invalid(b->c, k); 257}
237 else 258
238 return bch_extent_ptr_invalid(b->c, k); 259static inline bool bch_ptr_bad(struct btree *b, const struct bkey *k)
260{
261 return b->ops->key_bad(b, k);
262}
263
264/*
265 * Tries to merge l and r: l should be lower than r
266 * Returns true if we were able to merge. If we did merge, l will be the merged
267 * key, r will be untouched.
268 */
269static inline bool bch_bkey_try_merge(struct btree *b,
270 struct bkey *l, struct bkey *r)
271{
272 return b->ops->key_merge ? b->ops->key_merge(b, l, r) : false;
239} 273}
240 274
241void bkey_put(struct cache_set *c, struct bkey *k); 275void bkey_put(struct cache_set *c, struct bkey *k);
diff --git a/drivers/md/bcache/debug.c b/drivers/md/bcache/debug.c
index 5a78137c420d..2c6587d016db 100644
--- a/drivers/md/bcache/debug.c
+++ b/drivers/md/bcache/debug.c
@@ -145,6 +145,7 @@ void bch_btree_verify(struct btree *b)
145 bkey_copy(&v->key, &b->key); 145 bkey_copy(&v->key, &b->key);
146 v->written = 0; 146 v->written = 0;
147 v->level = b->level; 147 v->level = b->level;
148 v->ops = b->ops;
148 149
149 bio = bch_bbio_alloc(b->c); 150 bio = bch_bbio_alloc(b->c);
150 bio->bi_bdev = PTR_CACHE(b->c, &b->key, 0)->bdev; 151 bio->bi_bdev = PTR_CACHE(b->c, &b->key, 0)->bdev;
diff --git a/drivers/md/bcache/extents.c b/drivers/md/bcache/extents.c
new file mode 100644
index 000000000000..8fe6aaece41d
--- /dev/null
+++ b/drivers/md/bcache/extents.c
@@ -0,0 +1,354 @@
1/*
2 * Copyright (C) 2010 Kent Overstreet <kent.overstreet@gmail.com>
3 *
4 * Uses a block device as cache for other block devices; optimized for SSDs.
5 * All allocation is done in buckets, which should match the erase block size
6 * of the device.
7 *
8 * Buckets containing cached data are kept on a heap sorted by priority;
9 * bucket priority is increased on cache hit, and periodically all the buckets
10 * on the heap have their priority scaled down. This currently is just used as
11 * an LRU but in the future should allow for more intelligent heuristics.
12 *
13 * Buckets have an 8 bit counter; freeing is accomplished by incrementing the
14 * counter. Garbage collection is used to remove stale pointers.
15 *
16 * Indexing is done via a btree; nodes are not necessarily fully sorted, rather
17 * as keys are inserted we only sort the pages that have not yet been written.
18 * When garbage collection is run, we resort the entire node.
19 *
20 * All configuration is done via sysfs; see Documentation/bcache.txt.
21 */
22
23#include "bcache.h"
24#include "btree.h"
25#include "debug.h"
26#include "extents.h"
27#include "writeback.h"
28
29static void sort_key_next(struct btree_iter *iter,
30 struct btree_iter_set *i)
31{
32 i->k = bkey_next(i->k);
33
34 if (i->k == i->end)
35 *i = iter->data[--iter->used];
36}
37
38static bool bch_key_sort_cmp(struct btree_iter_set l,
39 struct btree_iter_set r)
40{
41 int64_t c = bkey_cmp(l.k, r.k);
42
43 return c ? c > 0 : l.k < r.k;
44}
45
46static bool __ptr_invalid(struct cache_set *c, const struct bkey *k)
47{
48 unsigned i;
49
50 for (i = 0; i < KEY_PTRS(k); i++)
51 if (ptr_available(c, k, i)) {
52 struct cache *ca = PTR_CACHE(c, k, i);
53 size_t bucket = PTR_BUCKET_NR(c, k, i);
54 size_t r = bucket_remainder(c, PTR_OFFSET(k, i));
55
56 if (KEY_SIZE(k) + r > c->sb.bucket_size ||
57 bucket < ca->sb.first_bucket ||
58 bucket >= ca->sb.nbuckets)
59 return true;
60 }
61
62 return false;
63}
64
65/* Btree ptrs */
66
67bool __bch_btree_ptr_invalid(struct cache_set *c, const struct bkey *k)
68{
69 char buf[80];
70
71 if (!KEY_PTRS(k) || !KEY_SIZE(k) || KEY_DIRTY(k))
72 goto bad;
73
74 if (__ptr_invalid(c, k))
75 goto bad;
76
77 return false;
78bad:
79 bch_bkey_to_text(buf, sizeof(buf), k);
80 cache_bug(c, "spotted btree ptr %s: %s", buf, bch_ptr_status(c, k));
81 return true;
82}
83
84static bool bch_btree_ptr_invalid(struct btree *b, const struct bkey *k)
85{
86 return __bch_btree_ptr_invalid(b->c, k);
87}
88
89static bool btree_ptr_bad_expensive(struct btree *b, const struct bkey *k)
90{
91 unsigned i;
92 char buf[80];
93 struct bucket *g;
94
95 if (mutex_trylock(&b->c->bucket_lock)) {
96 for (i = 0; i < KEY_PTRS(k); i++)
97 if (ptr_available(b->c, k, i)) {
98 g = PTR_BUCKET(b->c, k, i);
99
100 if (KEY_DIRTY(k) ||
101 g->prio != BTREE_PRIO ||
102 (b->c->gc_mark_valid &&
103 GC_MARK(g) != GC_MARK_METADATA))
104 goto err;
105 }
106
107 mutex_unlock(&b->c->bucket_lock);
108 }
109
110 return false;
111err:
112 mutex_unlock(&b->c->bucket_lock);
113 bch_bkey_to_text(buf, sizeof(buf), k);
114 btree_bug(b,
115"inconsistent btree pointer %s: bucket %li pin %i prio %i gen %i last_gc %i mark %llu gc_gen %i",
116 buf, PTR_BUCKET_NR(b->c, k, i), atomic_read(&g->pin),
117 g->prio, g->gen, g->last_gc, GC_MARK(g), g->gc_gen);
118 return true;
119}
120
121static bool bch_btree_ptr_bad(struct btree *b, const struct bkey *k)
122{
123 unsigned i;
124
125 if (!bkey_cmp(k, &ZERO_KEY) ||
126 !KEY_PTRS(k) ||
127 bch_ptr_invalid(b, k))
128 return true;
129
130 for (i = 0; i < KEY_PTRS(k); i++)
131 if (!ptr_available(b->c, k, i) ||
132 ptr_stale(b->c, k, i))
133 return true;
134
135 if (expensive_debug_checks(b->c) &&
136 btree_ptr_bad_expensive(b, k))
137 return true;
138
139 return false;
140}
141
142const struct btree_keys_ops bch_btree_keys_ops = {
143 .sort_cmp = bch_key_sort_cmp,
144 .key_invalid = bch_btree_ptr_invalid,
145 .key_bad = bch_btree_ptr_bad,
146};
147
148/* Extents */
149
150/*
151 * Returns true if l > r - unless l == r, in which case returns true if l is
152 * older than r.
153 *
154 * Necessary for btree_sort_fixup() - if there are multiple keys that compare
155 * equal in different sets, we have to process them newest to oldest.
156 */
157static bool bch_extent_sort_cmp(struct btree_iter_set l,
158 struct btree_iter_set r)
159{
160 int64_t c = bkey_cmp(&START_KEY(l.k), &START_KEY(r.k));
161
162 return c ? c > 0 : l.k < r.k;
163}
164
165static struct bkey *bch_extent_sort_fixup(struct btree_iter *iter,
166 struct bkey *tmp)
167{
168 while (iter->used > 1) {
169 struct btree_iter_set *top = iter->data, *i = top + 1;
170
171 if (iter->used > 2 &&
172 bch_extent_sort_cmp(i[0], i[1]))
173 i++;
174
175 if (bkey_cmp(top->k, &START_KEY(i->k)) <= 0)
176 break;
177
178 if (!KEY_SIZE(i->k)) {
179 sort_key_next(iter, i);
180 heap_sift(iter, i - top, bch_extent_sort_cmp);
181 continue;
182 }
183
184 if (top->k > i->k) {
185 if (bkey_cmp(top->k, i->k) >= 0)
186 sort_key_next(iter, i);
187 else
188 bch_cut_front(top->k, i->k);
189
190 heap_sift(iter, i - top, bch_extent_sort_cmp);
191 } else {
192 /* can't happen because of comparison func */
193 BUG_ON(!bkey_cmp(&START_KEY(top->k), &START_KEY(i->k)));
194
195 if (bkey_cmp(i->k, top->k) < 0) {
196 bkey_copy(tmp, top->k);
197
198 bch_cut_back(&START_KEY(i->k), tmp);
199 bch_cut_front(i->k, top->k);
200 heap_sift(iter, 0, bch_extent_sort_cmp);
201
202 return tmp;
203 } else {
204 bch_cut_back(&START_KEY(i->k), top->k);
205 }
206 }
207 }
208
209 return NULL;
210}
211
212static bool bch_extent_invalid(struct btree *b, const struct bkey *k)
213{
214 char buf[80];
215
216 if (!KEY_SIZE(k))
217 return true;
218
219 if (KEY_SIZE(k) > KEY_OFFSET(k))
220 goto bad;
221
222 if (__ptr_invalid(b->c, k))
223 goto bad;
224
225 return false;
226bad:
227 bch_bkey_to_text(buf, sizeof(buf), k);
228 cache_bug(b->c, "spotted extent %s: %s", buf, bch_ptr_status(b->c, k));
229 return true;
230}
231
232static bool bch_extent_bad_expensive(struct btree *b, const struct bkey *k,
233 unsigned ptr)
234{
235 struct bucket *g = PTR_BUCKET(b->c, k, ptr);
236 char buf[80];
237
238 if (mutex_trylock(&b->c->bucket_lock)) {
239 if (b->c->gc_mark_valid &&
240 ((GC_MARK(g) != GC_MARK_DIRTY &&
241 KEY_DIRTY(k)) ||
242 GC_MARK(g) == GC_MARK_METADATA))
243 goto err;
244
245 if (g->prio == BTREE_PRIO)
246 goto err;
247
248 mutex_unlock(&b->c->bucket_lock);
249 }
250
251 return false;
252err:
253 mutex_unlock(&b->c->bucket_lock);
254 bch_bkey_to_text(buf, sizeof(buf), k);
255 btree_bug(b,
256"inconsistent extent pointer %s:\nbucket %zu pin %i prio %i gen %i last_gc %i mark %llu gc_gen %i",
257 buf, PTR_BUCKET_NR(b->c, k, ptr), atomic_read(&g->pin),
258 g->prio, g->gen, g->last_gc, GC_MARK(g), g->gc_gen);
259 return true;
260}
261
262static bool bch_extent_bad(struct btree *b, const struct bkey *k)
263{
264 struct bucket *g;
265 unsigned i, stale;
266
267 if (!KEY_PTRS(k) ||
268 bch_extent_invalid(b, k))
269 return true;
270
271 for (i = 0; i < KEY_PTRS(k); i++)
272 if (!ptr_available(b->c, k, i))
273 return true;
274
275 if (!expensive_debug_checks(b->c) && KEY_DIRTY(k))
276 return false;
277
278 for (i = 0; i < KEY_PTRS(k); i++) {
279 g = PTR_BUCKET(b->c, k, i);
280 stale = ptr_stale(b->c, k, i);
281
282 btree_bug_on(stale > 96, b,
283 "key too stale: %i, need_gc %u",
284 stale, b->c->need_gc);
285
286 btree_bug_on(stale && KEY_DIRTY(k) && KEY_SIZE(k),
287 b, "stale dirty pointer");
288
289 if (stale)
290 return true;
291
292 if (expensive_debug_checks(b->c) &&
293 bch_extent_bad_expensive(b, k, i))
294 return true;
295 }
296
297 return false;
298}
299
300static uint64_t merge_chksums(struct bkey *l, struct bkey *r)
301{
302 return (l->ptr[KEY_PTRS(l)] + r->ptr[KEY_PTRS(r)]) &
303 ~((uint64_t)1 << 63);
304}
305
306static bool bch_extent_merge(struct btree *b, struct bkey *l, struct bkey *r)
307{
308 unsigned i;
309
310 if (key_merging_disabled(b->c))
311 return false;
312
313 if (KEY_PTRS(l) != KEY_PTRS(r) ||
314 KEY_DIRTY(l) != KEY_DIRTY(r) ||
315 bkey_cmp(l, &START_KEY(r)))
316 return false;
317
318 for (i = 0; i < KEY_PTRS(l); i++)
319 if (l->ptr[i] + PTR(0, KEY_SIZE(l), 0) != r->ptr[i] ||
320 PTR_BUCKET_NR(b->c, l, i) != PTR_BUCKET_NR(b->c, r, i))
321 return false;
322
323 /* Keys with no pointers aren't restricted to one bucket and could
324 * overflow KEY_SIZE
325 */
326 if (KEY_SIZE(l) + KEY_SIZE(r) > USHRT_MAX) {
327 SET_KEY_OFFSET(l, KEY_OFFSET(l) + USHRT_MAX - KEY_SIZE(l));
328 SET_KEY_SIZE(l, USHRT_MAX);
329
330 bch_cut_front(l, r);
331 return false;
332 }
333
334 if (KEY_CSUM(l)) {
335 if (KEY_CSUM(r))
336 l->ptr[KEY_PTRS(l)] = merge_chksums(l, r);
337 else
338 SET_KEY_CSUM(l, 0);
339 }
340
341 SET_KEY_OFFSET(l, KEY_OFFSET(l) + KEY_SIZE(r));
342 SET_KEY_SIZE(l, KEY_SIZE(l) + KEY_SIZE(r));
343
344 return true;
345}
346
347const struct btree_keys_ops bch_extent_keys_ops = {
348 .sort_cmp = bch_extent_sort_cmp,
349 .sort_fixup = bch_extent_sort_fixup,
350 .key_invalid = bch_extent_invalid,
351 .key_bad = bch_extent_bad,
352 .key_merge = bch_extent_merge,
353 .is_extents = true,
354};
diff --git a/drivers/md/bcache/extents.h b/drivers/md/bcache/extents.h
new file mode 100644
index 000000000000..e0c0b685d2f7
--- /dev/null
+++ b/drivers/md/bcache/extents.h
@@ -0,0 +1,12 @@
1#ifndef _BCACHE_EXTENTS_H
2#define _BCACHE_EXTENTS_H
3
4extern const struct btree_keys_ops bch_btree_keys_ops;
5extern const struct btree_keys_ops bch_extent_keys_ops;
6
7struct bkey;
8struct cache_set;
9
10bool __bch_btree_ptr_invalid(struct cache_set *, const struct bkey *);
11
12#endif /* _BCACHE_EXTENTS_H */
diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
index 12057a43e01f..6d6a7a15043e 100644
--- a/drivers/md/bcache/super.c
+++ b/drivers/md/bcache/super.c
@@ -9,6 +9,7 @@
9#include "bcache.h" 9#include "bcache.h"
10#include "btree.h" 10#include "btree.h"
11#include "debug.h" 11#include "debug.h"
12#include "extents.h"
12#include "request.h" 13#include "request.h"
13#include "writeback.h" 14#include "writeback.h"
14 15
@@ -399,7 +400,7 @@ static char *uuid_read(struct cache_set *c, struct jset *j, struct closure *cl)
399{ 400{
400 struct bkey *k = &j->uuid_bucket; 401 struct bkey *k = &j->uuid_bucket;
401 402
402 if (bch_btree_ptr_invalid(c, k)) 403 if (__bch_btree_ptr_invalid(c, k))
403 return "bad uuid pointer"; 404 return "bad uuid pointer";
404 405
405 bkey_copy(&c->uuid_bucket, k); 406 bkey_copy(&c->uuid_bucket, k);
@@ -1575,7 +1576,7 @@ static void run_cache_set(struct cache_set *c)
1575 k = &j->btree_root; 1576 k = &j->btree_root;
1576 1577
1577 err = "bad btree root"; 1578 err = "bad btree root";
1578 if (bch_btree_ptr_invalid(c, k)) 1579 if (__bch_btree_ptr_invalid(c, k))
1579 goto err; 1580 goto err;
1580 1581
1581 err = "error reading btree root"; 1582 err = "error reading btree root";