diff options
author | Kent Overstreet <kmo@daterainc.com> | 2013-12-20 20:22:05 -0500 |
---|---|---|
committer | Kent Overstreet <kmo@daterainc.com> | 2014-01-08 16:05:12 -0500 |
commit | 65d45231b56efb3db51eb441e2c68f8252ecdd12 (patch) | |
tree | b862e6fa72d076373c79841b555ef525d3b0f41b /drivers/md | |
parent | ee811287c9f241641899788cbfc9d70ed96ba3a5 (diff) |
bcache: Abstract out stuff needed for sorting
Signed-off-by: Kent Overstreet <kmo@daterainc.com>
Diffstat (limited to 'drivers/md')
-rw-r--r-- | drivers/md/bcache/Makefile | 5 | ||||
-rw-r--r-- | drivers/md/bcache/bset.c | 279 | ||||
-rw-r--r-- | drivers/md/bcache/bset.h | 8 | ||||
-rw-r--r-- | drivers/md/bcache/btree.c | 6 | ||||
-rw-r--r-- | drivers/md/bcache/btree.h | 42 | ||||
-rw-r--r-- | drivers/md/bcache/debug.c | 1 | ||||
-rw-r--r-- | drivers/md/bcache/extents.c | 354 | ||||
-rw-r--r-- | drivers/md/bcache/extents.h | 12 | ||||
-rw-r--r-- | drivers/md/bcache/super.c | 5 |
9 files changed, 423 insertions, 289 deletions
diff --git a/drivers/md/bcache/Makefile b/drivers/md/bcache/Makefile index 0e9c82523be6..c488b846f831 100644 --- a/drivers/md/bcache/Makefile +++ b/drivers/md/bcache/Makefile | |||
@@ -1,7 +1,8 @@ | |||
1 | 1 | ||
2 | obj-$(CONFIG_BCACHE) += bcache.o | 2 | obj-$(CONFIG_BCACHE) += bcache.o |
3 | 3 | ||
4 | bcache-y := alloc.o btree.o bset.o io.o journal.o writeback.o\ | 4 | bcache-y := alloc.o bset.o btree.o closure.o debug.o extents.o\ |
5 | movinggc.o request.o super.o sysfs.o debug.o util.o trace.o stats.o closure.o | 5 | io.o journal.o movinggc.o request.o stats.o super.o sysfs.o trace.o\ |
6 | util.o writeback.o | ||
6 | 7 | ||
7 | CFLAGS_request.o += -Iblock | 8 | CFLAGS_request.o += -Iblock |
diff --git a/drivers/md/bcache/bset.c b/drivers/md/bcache/bset.c index e04e5908e29f..c2c42cbbe885 100644 --- a/drivers/md/bcache/bset.c +++ b/drivers/md/bcache/bset.c | |||
@@ -63,140 +63,6 @@ void bch_keylist_pop_front(struct keylist *l) | |||
63 | bch_keylist_bytes(l)); | 63 | bch_keylist_bytes(l)); |
64 | } | 64 | } |
65 | 65 | ||
66 | /* Pointer validation */ | ||
67 | |||
68 | static bool __ptr_invalid(struct cache_set *c, const struct bkey *k) | ||
69 | { | ||
70 | unsigned i; | ||
71 | |||
72 | for (i = 0; i < KEY_PTRS(k); i++) | ||
73 | if (ptr_available(c, k, i)) { | ||
74 | struct cache *ca = PTR_CACHE(c, k, i); | ||
75 | size_t bucket = PTR_BUCKET_NR(c, k, i); | ||
76 | size_t r = bucket_remainder(c, PTR_OFFSET(k, i)); | ||
77 | |||
78 | if (KEY_SIZE(k) + r > c->sb.bucket_size || | ||
79 | bucket < ca->sb.first_bucket || | ||
80 | bucket >= ca->sb.nbuckets) | ||
81 | return true; | ||
82 | } | ||
83 | |||
84 | return false; | ||
85 | } | ||
86 | |||
87 | bool bch_btree_ptr_invalid(struct cache_set *c, const struct bkey *k) | ||
88 | { | ||
89 | char buf[80]; | ||
90 | |||
91 | if (!KEY_PTRS(k) || !KEY_SIZE(k) || KEY_DIRTY(k)) | ||
92 | goto bad; | ||
93 | |||
94 | if (__ptr_invalid(c, k)) | ||
95 | goto bad; | ||
96 | |||
97 | return false; | ||
98 | bad: | ||
99 | bch_bkey_to_text(buf, sizeof(buf), k); | ||
100 | cache_bug(c, "spotted btree ptr %s: %s", buf, bch_ptr_status(c, k)); | ||
101 | return true; | ||
102 | } | ||
103 | |||
104 | bool bch_extent_ptr_invalid(struct cache_set *c, const struct bkey *k) | ||
105 | { | ||
106 | char buf[80]; | ||
107 | |||
108 | if (!KEY_SIZE(k)) | ||
109 | return true; | ||
110 | |||
111 | if (KEY_SIZE(k) > KEY_OFFSET(k)) | ||
112 | goto bad; | ||
113 | |||
114 | if (__ptr_invalid(c, k)) | ||
115 | goto bad; | ||
116 | |||
117 | return false; | ||
118 | bad: | ||
119 | bch_bkey_to_text(buf, sizeof(buf), k); | ||
120 | cache_bug(c, "spotted extent %s: %s", buf, bch_ptr_status(c, k)); | ||
121 | return true; | ||
122 | } | ||
123 | |||
124 | static bool ptr_bad_expensive_checks(struct btree *b, const struct bkey *k, | ||
125 | unsigned ptr) | ||
126 | { | ||
127 | struct bucket *g = PTR_BUCKET(b->c, k, ptr); | ||
128 | char buf[80]; | ||
129 | |||
130 | if (mutex_trylock(&b->c->bucket_lock)) { | ||
131 | if (b->level) { | ||
132 | if (KEY_DIRTY(k) || | ||
133 | g->prio != BTREE_PRIO || | ||
134 | (b->c->gc_mark_valid && | ||
135 | GC_MARK(g) != GC_MARK_METADATA)) | ||
136 | goto err; | ||
137 | |||
138 | } else { | ||
139 | if (g->prio == BTREE_PRIO) | ||
140 | goto err; | ||
141 | |||
142 | if (KEY_DIRTY(k) && | ||
143 | b->c->gc_mark_valid && | ||
144 | GC_MARK(g) != GC_MARK_DIRTY) | ||
145 | goto err; | ||
146 | } | ||
147 | mutex_unlock(&b->c->bucket_lock); | ||
148 | } | ||
149 | |||
150 | return false; | ||
151 | err: | ||
152 | mutex_unlock(&b->c->bucket_lock); | ||
153 | bch_bkey_to_text(buf, sizeof(buf), k); | ||
154 | btree_bug(b, | ||
155 | "inconsistent pointer %s: bucket %zu pin %i prio %i gen %i last_gc %i mark %llu gc_gen %i", | ||
156 | buf, PTR_BUCKET_NR(b->c, k, ptr), atomic_read(&g->pin), | ||
157 | g->prio, g->gen, g->last_gc, GC_MARK(g), g->gc_gen); | ||
158 | return true; | ||
159 | } | ||
160 | |||
161 | bool bch_ptr_bad(struct btree *b, const struct bkey *k) | ||
162 | { | ||
163 | struct bucket *g; | ||
164 | unsigned i, stale; | ||
165 | |||
166 | if (!bkey_cmp(k, &ZERO_KEY) || | ||
167 | !KEY_PTRS(k) || | ||
168 | bch_ptr_invalid(b, k)) | ||
169 | return true; | ||
170 | |||
171 | for (i = 0; i < KEY_PTRS(k); i++) | ||
172 | if (!ptr_available(b->c, k, i)) | ||
173 | return true; | ||
174 | |||
175 | if (!expensive_debug_checks(b->c) && KEY_DIRTY(k)) | ||
176 | return false; | ||
177 | |||
178 | for (i = 0; i < KEY_PTRS(k); i++) { | ||
179 | g = PTR_BUCKET(b->c, k, i); | ||
180 | stale = ptr_stale(b->c, k, i); | ||
181 | |||
182 | btree_bug_on(stale > 96, b, | ||
183 | "key too stale: %i, need_gc %u", | ||
184 | stale, b->c->need_gc); | ||
185 | |||
186 | btree_bug_on(stale && KEY_DIRTY(k) && KEY_SIZE(k), | ||
187 | b, "stale dirty pointer"); | ||
188 | |||
189 | if (stale) | ||
190 | return true; | ||
191 | |||
192 | if (expensive_debug_checks(b->c) && | ||
193 | ptr_bad_expensive_checks(b, k, i)) | ||
194 | return true; | ||
195 | } | ||
196 | |||
197 | return false; | ||
198 | } | ||
199 | |||
200 | /* Key/pointer manipulation */ | 66 | /* Key/pointer manipulation */ |
201 | 67 | ||
202 | void bch_bkey_copy_single_ptr(struct bkey *dest, const struct bkey *src, | 68 | void bch_bkey_copy_single_ptr(struct bkey *dest, const struct bkey *src, |
@@ -251,57 +117,6 @@ bool __bch_cut_back(const struct bkey *where, struct bkey *k) | |||
251 | return true; | 117 | return true; |
252 | } | 118 | } |
253 | 119 | ||
254 | static uint64_t merge_chksums(struct bkey *l, struct bkey *r) | ||
255 | { | ||
256 | return (l->ptr[KEY_PTRS(l)] + r->ptr[KEY_PTRS(r)]) & | ||
257 | ~((uint64_t)1 << 63); | ||
258 | } | ||
259 | |||
260 | /* Tries to merge l and r: l should be lower than r | ||
261 | * Returns true if we were able to merge. If we did merge, l will be the merged | ||
262 | * key, r will be untouched. | ||
263 | */ | ||
264 | bool bch_bkey_try_merge(struct btree *b, struct bkey *l, struct bkey *r) | ||
265 | { | ||
266 | unsigned i; | ||
267 | |||
268 | if (key_merging_disabled(b->c)) | ||
269 | return false; | ||
270 | |||
271 | if (KEY_PTRS(l) != KEY_PTRS(r) || | ||
272 | KEY_DIRTY(l) != KEY_DIRTY(r) || | ||
273 | bkey_cmp(l, &START_KEY(r))) | ||
274 | return false; | ||
275 | |||
276 | for (i = 0; i < KEY_PTRS(l); i++) | ||
277 | if (l->ptr[i] + PTR(0, KEY_SIZE(l), 0) != r->ptr[i] || | ||
278 | PTR_BUCKET_NR(b->c, l, i) != PTR_BUCKET_NR(b->c, r, i)) | ||
279 | return false; | ||
280 | |||
281 | /* Keys with no pointers aren't restricted to one bucket and could | ||
282 | * overflow KEY_SIZE | ||
283 | */ | ||
284 | if (KEY_SIZE(l) + KEY_SIZE(r) > USHRT_MAX) { | ||
285 | SET_KEY_OFFSET(l, KEY_OFFSET(l) + USHRT_MAX - KEY_SIZE(l)); | ||
286 | SET_KEY_SIZE(l, USHRT_MAX); | ||
287 | |||
288 | bch_cut_front(l, r); | ||
289 | return false; | ||
290 | } | ||
291 | |||
292 | if (KEY_CSUM(l)) { | ||
293 | if (KEY_CSUM(r)) | ||
294 | l->ptr[KEY_PTRS(l)] = merge_chksums(l, r); | ||
295 | else | ||
296 | SET_KEY_CSUM(l, 0); | ||
297 | } | ||
298 | |||
299 | SET_KEY_OFFSET(l, KEY_OFFSET(l) + KEY_SIZE(r)); | ||
300 | SET_KEY_SIZE(l, KEY_SIZE(l) + KEY_SIZE(r)); | ||
301 | |||
302 | return true; | ||
303 | } | ||
304 | |||
305 | /* Auxiliary search trees */ | 120 | /* Auxiliary search trees */ |
306 | 121 | ||
307 | /* 32 bits total: */ | 122 | /* 32 bits total: */ |
@@ -1099,85 +914,6 @@ int bch_bset_sort_state_init(struct bset_sort_state *state, unsigned page_order) | |||
1099 | return 0; | 914 | return 0; |
1100 | } | 915 | } |
1101 | 916 | ||
1102 | static void sort_key_next(struct btree_iter *iter, | ||
1103 | struct btree_iter_set *i) | ||
1104 | { | ||
1105 | i->k = bkey_next(i->k); | ||
1106 | |||
1107 | if (i->k == i->end) | ||
1108 | *i = iter->data[--iter->used]; | ||
1109 | } | ||
1110 | |||
1111 | /* | ||
1112 | * Returns true if l > r - unless l == r, in which case returns true if l is | ||
1113 | * older than r. | ||
1114 | * | ||
1115 | * Necessary for btree_sort_fixup() - if there are multiple keys that compare | ||
1116 | * equal in different sets, we have to process them newest to oldest. | ||
1117 | */ | ||
1118 | static inline bool sort_extent_cmp(struct btree_iter_set l, | ||
1119 | struct btree_iter_set r) | ||
1120 | { | ||
1121 | int64_t c = bkey_cmp(&START_KEY(l.k), &START_KEY(r.k)); | ||
1122 | |||
1123 | return c ? c > 0 : l.k < r.k; | ||
1124 | } | ||
1125 | |||
1126 | static inline bool sort_cmp(struct btree_iter_set l, | ||
1127 | struct btree_iter_set r) | ||
1128 | { | ||
1129 | int64_t c = bkey_cmp(l.k, r.k); | ||
1130 | |||
1131 | return c ? c > 0 : l.k < r.k; | ||
1132 | } | ||
1133 | |||
1134 | static struct bkey *btree_sort_fixup_extents(struct btree_iter *iter, | ||
1135 | struct bkey *tmp) | ||
1136 | { | ||
1137 | while (iter->used > 1) { | ||
1138 | struct btree_iter_set *top = iter->data, *i = top + 1; | ||
1139 | |||
1140 | if (iter->used > 2 && | ||
1141 | sort_extent_cmp(i[0], i[1])) | ||
1142 | i++; | ||
1143 | |||
1144 | if (bkey_cmp(top->k, &START_KEY(i->k)) <= 0) | ||
1145 | break; | ||
1146 | |||
1147 | if (!KEY_SIZE(i->k)) { | ||
1148 | sort_key_next(iter, i); | ||
1149 | heap_sift(iter, i - top, sort_extent_cmp); | ||
1150 | continue; | ||
1151 | } | ||
1152 | |||
1153 | if (top->k > i->k) { | ||
1154 | if (bkey_cmp(top->k, i->k) >= 0) | ||
1155 | sort_key_next(iter, i); | ||
1156 | else | ||
1157 | bch_cut_front(top->k, i->k); | ||
1158 | |||
1159 | heap_sift(iter, i - top, sort_extent_cmp); | ||
1160 | } else { | ||
1161 | /* can't happen because of comparison func */ | ||
1162 | BUG_ON(!bkey_cmp(&START_KEY(top->k), &START_KEY(i->k))); | ||
1163 | |||
1164 | if (bkey_cmp(i->k, top->k) < 0) { | ||
1165 | bkey_copy(tmp, top->k); | ||
1166 | |||
1167 | bch_cut_back(&START_KEY(i->k), tmp); | ||
1168 | bch_cut_front(i->k, top->k); | ||
1169 | heap_sift(iter, 0, btree_iter_cmp); | ||
1170 | |||
1171 | return tmp; | ||
1172 | } else { | ||
1173 | bch_cut_back(&START_KEY(i->k), top->k); | ||
1174 | } | ||
1175 | } | ||
1176 | } | ||
1177 | |||
1178 | return NULL; | ||
1179 | } | ||
1180 | |||
1181 | static void btree_mergesort(struct btree *b, struct bset *out, | 917 | static void btree_mergesort(struct btree *b, struct bset *out, |
1182 | struct btree_iter *iter, | 918 | struct btree_iter *iter, |
1183 | bool fixup, bool remove_stale) | 919 | bool fixup, bool remove_stale) |
@@ -1185,25 +921,22 @@ static void btree_mergesort(struct btree *b, struct bset *out, | |||
1185 | int i; | 921 | int i; |
1186 | struct bkey *k, *last = NULL; | 922 | struct bkey *k, *last = NULL; |
1187 | BKEY_PADDED(k) tmp; | 923 | BKEY_PADDED(k) tmp; |
1188 | btree_iter_cmp_fn *cmp = b->level | ||
1189 | ? sort_cmp | ||
1190 | : sort_extent_cmp; | ||
1191 | bool (*bad)(struct btree *, const struct bkey *) = remove_stale | 924 | bool (*bad)(struct btree *, const struct bkey *) = remove_stale |
1192 | ? bch_ptr_bad | 925 | ? bch_ptr_bad |
1193 | : bch_ptr_invalid; | 926 | : bch_ptr_invalid; |
1194 | 927 | ||
1195 | /* Heapify the iterator, using our comparison function */ | 928 | /* Heapify the iterator, using our comparison function */ |
1196 | for (i = iter->used / 2 - 1; i >= 0; --i) | 929 | for (i = iter->used / 2 - 1; i >= 0; --i) |
1197 | heap_sift(iter, i, cmp); | 930 | heap_sift(iter, i, b->ops->sort_cmp); |
1198 | 931 | ||
1199 | while (!btree_iter_end(iter)) { | 932 | while (!btree_iter_end(iter)) { |
1200 | if (fixup && !b->level) | 933 | if (b->ops->sort_fixup && fixup) |
1201 | k = btree_sort_fixup_extents(iter, &tmp.k); | 934 | k = b->ops->sort_fixup(iter, &tmp.k); |
1202 | else | 935 | else |
1203 | k = NULL; | 936 | k = NULL; |
1204 | 937 | ||
1205 | if (!k) | 938 | if (!k) |
1206 | k = __bch_btree_iter_next(iter, cmp); | 939 | k = __bch_btree_iter_next(iter, b->ops->sort_cmp); |
1207 | 940 | ||
1208 | if (bad(b, k)) | 941 | if (bad(b, k)) |
1209 | continue; | 942 | continue; |
@@ -1211,8 +944,7 @@ static void btree_mergesort(struct btree *b, struct bset *out, | |||
1211 | if (!last) { | 944 | if (!last) { |
1212 | last = out->start; | 945 | last = out->start; |
1213 | bkey_copy(last, k); | 946 | bkey_copy(last, k); |
1214 | } else if (b->level || | 947 | } else if (!bch_bkey_try_merge(b, last, k)) { |
1215 | !bch_bkey_try_merge(b, last, k)) { | ||
1216 | last = bkey_next(last); | 948 | last = bkey_next(last); |
1217 | bkey_copy(last, k); | 949 | bkey_copy(last, k); |
1218 | } | 950 | } |
@@ -1300,6 +1032,7 @@ void bch_btree_sort_partial(struct btree *b, unsigned start, | |||
1300 | 1032 | ||
1301 | EBUG_ON(b->written && oldsize >= 0 && bch_count_data(b) != oldsize); | 1033 | EBUG_ON(b->written && oldsize >= 0 && bch_count_data(b) != oldsize); |
1302 | } | 1034 | } |
1035 | EXPORT_SYMBOL(bch_btree_sort_partial); | ||
1303 | 1036 | ||
1304 | void bch_btree_sort_and_fix_extents(struct btree *b, struct btree_iter *iter, | 1037 | void bch_btree_sort_and_fix_extents(struct btree *b, struct btree_iter *iter, |
1305 | struct bset_sort_state *state) | 1038 | struct bset_sort_state *state) |
diff --git a/drivers/md/bcache/bset.h b/drivers/md/bcache/bset.h index ab31f3fc8d43..b5797129e919 100644 --- a/drivers/md/bcache/bset.h +++ b/drivers/md/bcache/bset.h | |||
@@ -376,14 +376,6 @@ int __bch_keylist_realloc(struct keylist *, unsigned); | |||
376 | 376 | ||
377 | struct cache_set; | 377 | struct cache_set; |
378 | const char *bch_ptr_status(struct cache_set *, const struct bkey *); | 378 | const char *bch_ptr_status(struct cache_set *, const struct bkey *); |
379 | bool bch_btree_ptr_invalid(struct cache_set *, const struct bkey *); | ||
380 | bool bch_extent_ptr_invalid(struct cache_set *, const struct bkey *); | ||
381 | bool bch_btree_ptr_bad(struct btree *, const struct bkey *); | ||
382 | bool bch_extent_ptr_bad(struct btree *, const struct bkey *); | ||
383 | |||
384 | bool bch_ptr_bad(struct btree *, const struct bkey *); | ||
385 | |||
386 | bool bch_bkey_try_merge(struct btree *, struct bkey *, struct bkey *); | ||
387 | 379 | ||
388 | int bch_bset_print_stats(struct cache_set *, char *); | 380 | int bch_bset_print_stats(struct cache_set *, char *); |
389 | 381 | ||
diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c index 89252e7f2879..6734e2759b93 100644 --- a/drivers/md/bcache/btree.c +++ b/drivers/md/bcache/btree.c | |||
@@ -23,6 +23,7 @@ | |||
23 | #include "bcache.h" | 23 | #include "bcache.h" |
24 | #include "btree.h" | 24 | #include "btree.h" |
25 | #include "debug.h" | 25 | #include "debug.h" |
26 | #include "extents.h" | ||
26 | #include "writeback.h" | 27 | #include "writeback.h" |
27 | 28 | ||
28 | #include <linux/slab.h> | 29 | #include <linux/slab.h> |
@@ -931,6 +932,11 @@ out: | |||
931 | b->level = level; | 932 | b->level = level; |
932 | b->parent = (void *) ~0UL; | 933 | b->parent = (void *) ~0UL; |
933 | 934 | ||
935 | if (!b->level) | ||
936 | b->ops = &bch_extent_keys_ops; | ||
937 | else | ||
938 | b->ops = &bch_btree_keys_ops; | ||
939 | |||
934 | mca_reinit(b); | 940 | mca_reinit(b); |
935 | 941 | ||
936 | return b; | 942 | return b; |
diff --git a/drivers/md/bcache/btree.h b/drivers/md/bcache/btree.h index b3541173ccf2..0b436079db71 100644 --- a/drivers/md/bcache/btree.h +++ b/drivers/md/bcache/btree.h | |||
@@ -113,7 +113,28 @@ struct btree_write { | |||
113 | int prio_blocked; | 113 | int prio_blocked; |
114 | }; | 114 | }; |
115 | 115 | ||
116 | struct btree_keys_ops { | ||
117 | bool (*sort_cmp)(struct btree_iter_set, | ||
118 | struct btree_iter_set); | ||
119 | struct bkey *(*sort_fixup)(struct btree_iter *, | ||
120 | struct bkey *); | ||
121 | bool (*key_invalid)(struct btree *, | ||
122 | const struct bkey *); | ||
123 | bool (*key_bad)(struct btree *, | ||
124 | const struct bkey *); | ||
125 | bool (*key_merge)(struct btree *, | ||
126 | struct bkey *, struct bkey *); | ||
127 | |||
128 | |||
129 | /* | ||
130 | * Only used for deciding whether to use START_KEY(k) or just the key | ||
131 | * itself in a couple places | ||
132 | */ | ||
133 | bool is_extents; | ||
134 | }; | ||
135 | |||
116 | struct btree { | 136 | struct btree { |
137 | const struct btree_keys_ops *ops; | ||
117 | /* Hottest entries first */ | 138 | /* Hottest entries first */ |
118 | struct hlist_node hash; | 139 | struct hlist_node hash; |
119 | 140 | ||
@@ -232,10 +253,23 @@ static inline void set_gc_sectors(struct cache_set *c) | |||
232 | 253 | ||
233 | static inline bool bch_ptr_invalid(struct btree *b, const struct bkey *k) | 254 | static inline bool bch_ptr_invalid(struct btree *b, const struct bkey *k) |
234 | { | 255 | { |
235 | if (b->level) | 256 | return b->ops->key_invalid(b, k); |
236 | return bch_btree_ptr_invalid(b->c, k); | 257 | } |
237 | else | 258 | |
238 | return bch_extent_ptr_invalid(b->c, k); | 259 | static inline bool bch_ptr_bad(struct btree *b, const struct bkey *k) |
260 | { | ||
261 | return b->ops->key_bad(b, k); | ||
262 | } | ||
263 | |||
264 | /* | ||
265 | * Tries to merge l and r: l should be lower than r | ||
266 | * Returns true if we were able to merge. If we did merge, l will be the merged | ||
267 | * key, r will be untouched. | ||
268 | */ | ||
269 | static inline bool bch_bkey_try_merge(struct btree *b, | ||
270 | struct bkey *l, struct bkey *r) | ||
271 | { | ||
272 | return b->ops->key_merge ? b->ops->key_merge(b, l, r) : false; | ||
239 | } | 273 | } |
240 | 274 | ||
241 | void bkey_put(struct cache_set *c, struct bkey *k); | 275 | void bkey_put(struct cache_set *c, struct bkey *k); |
diff --git a/drivers/md/bcache/debug.c b/drivers/md/bcache/debug.c index 5a78137c420d..2c6587d016db 100644 --- a/drivers/md/bcache/debug.c +++ b/drivers/md/bcache/debug.c | |||
@@ -145,6 +145,7 @@ void bch_btree_verify(struct btree *b) | |||
145 | bkey_copy(&v->key, &b->key); | 145 | bkey_copy(&v->key, &b->key); |
146 | v->written = 0; | 146 | v->written = 0; |
147 | v->level = b->level; | 147 | v->level = b->level; |
148 | v->ops = b->ops; | ||
148 | 149 | ||
149 | bio = bch_bbio_alloc(b->c); | 150 | bio = bch_bbio_alloc(b->c); |
150 | bio->bi_bdev = PTR_CACHE(b->c, &b->key, 0)->bdev; | 151 | bio->bi_bdev = PTR_CACHE(b->c, &b->key, 0)->bdev; |
diff --git a/drivers/md/bcache/extents.c b/drivers/md/bcache/extents.c new file mode 100644 index 000000000000..8fe6aaece41d --- /dev/null +++ b/drivers/md/bcache/extents.c | |||
@@ -0,0 +1,354 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2010 Kent Overstreet <kent.overstreet@gmail.com> | ||
3 | * | ||
4 | * Uses a block device as cache for other block devices; optimized for SSDs. | ||
5 | * All allocation is done in buckets, which should match the erase block size | ||
6 | * of the device. | ||
7 | * | ||
8 | * Buckets containing cached data are kept on a heap sorted by priority; | ||
9 | * bucket priority is increased on cache hit, and periodically all the buckets | ||
10 | * on the heap have their priority scaled down. This currently is just used as | ||
11 | * an LRU but in the future should allow for more intelligent heuristics. | ||
12 | * | ||
13 | * Buckets have an 8 bit counter; freeing is accomplished by incrementing the | ||
14 | * counter. Garbage collection is used to remove stale pointers. | ||
15 | * | ||
16 | * Indexing is done via a btree; nodes are not necessarily fully sorted, rather | ||
17 | * as keys are inserted we only sort the pages that have not yet been written. | ||
18 | * When garbage collection is run, we resort the entire node. | ||
19 | * | ||
20 | * All configuration is done via sysfs; see Documentation/bcache.txt. | ||
21 | */ | ||
22 | |||
23 | #include "bcache.h" | ||
24 | #include "btree.h" | ||
25 | #include "debug.h" | ||
26 | #include "extents.h" | ||
27 | #include "writeback.h" | ||
28 | |||
29 | static void sort_key_next(struct btree_iter *iter, | ||
30 | struct btree_iter_set *i) | ||
31 | { | ||
32 | i->k = bkey_next(i->k); | ||
33 | |||
34 | if (i->k == i->end) | ||
35 | *i = iter->data[--iter->used]; | ||
36 | } | ||
37 | |||
38 | static bool bch_key_sort_cmp(struct btree_iter_set l, | ||
39 | struct btree_iter_set r) | ||
40 | { | ||
41 | int64_t c = bkey_cmp(l.k, r.k); | ||
42 | |||
43 | return c ? c > 0 : l.k < r.k; | ||
44 | } | ||
45 | |||
46 | static bool __ptr_invalid(struct cache_set *c, const struct bkey *k) | ||
47 | { | ||
48 | unsigned i; | ||
49 | |||
50 | for (i = 0; i < KEY_PTRS(k); i++) | ||
51 | if (ptr_available(c, k, i)) { | ||
52 | struct cache *ca = PTR_CACHE(c, k, i); | ||
53 | size_t bucket = PTR_BUCKET_NR(c, k, i); | ||
54 | size_t r = bucket_remainder(c, PTR_OFFSET(k, i)); | ||
55 | |||
56 | if (KEY_SIZE(k) + r > c->sb.bucket_size || | ||
57 | bucket < ca->sb.first_bucket || | ||
58 | bucket >= ca->sb.nbuckets) | ||
59 | return true; | ||
60 | } | ||
61 | |||
62 | return false; | ||
63 | } | ||
64 | |||
65 | /* Btree ptrs */ | ||
66 | |||
67 | bool __bch_btree_ptr_invalid(struct cache_set *c, const struct bkey *k) | ||
68 | { | ||
69 | char buf[80]; | ||
70 | |||
71 | if (!KEY_PTRS(k) || !KEY_SIZE(k) || KEY_DIRTY(k)) | ||
72 | goto bad; | ||
73 | |||
74 | if (__ptr_invalid(c, k)) | ||
75 | goto bad; | ||
76 | |||
77 | return false; | ||
78 | bad: | ||
79 | bch_bkey_to_text(buf, sizeof(buf), k); | ||
80 | cache_bug(c, "spotted btree ptr %s: %s", buf, bch_ptr_status(c, k)); | ||
81 | return true; | ||
82 | } | ||
83 | |||
84 | static bool bch_btree_ptr_invalid(struct btree *b, const struct bkey *k) | ||
85 | { | ||
86 | return __bch_btree_ptr_invalid(b->c, k); | ||
87 | } | ||
88 | |||
89 | static bool btree_ptr_bad_expensive(struct btree *b, const struct bkey *k) | ||
90 | { | ||
91 | unsigned i; | ||
92 | char buf[80]; | ||
93 | struct bucket *g; | ||
94 | |||
95 | if (mutex_trylock(&b->c->bucket_lock)) { | ||
96 | for (i = 0; i < KEY_PTRS(k); i++) | ||
97 | if (ptr_available(b->c, k, i)) { | ||
98 | g = PTR_BUCKET(b->c, k, i); | ||
99 | |||
100 | if (KEY_DIRTY(k) || | ||
101 | g->prio != BTREE_PRIO || | ||
102 | (b->c->gc_mark_valid && | ||
103 | GC_MARK(g) != GC_MARK_METADATA)) | ||
104 | goto err; | ||
105 | } | ||
106 | |||
107 | mutex_unlock(&b->c->bucket_lock); | ||
108 | } | ||
109 | |||
110 | return false; | ||
111 | err: | ||
112 | mutex_unlock(&b->c->bucket_lock); | ||
113 | bch_bkey_to_text(buf, sizeof(buf), k); | ||
114 | btree_bug(b, | ||
115 | "inconsistent btree pointer %s: bucket %li pin %i prio %i gen %i last_gc %i mark %llu gc_gen %i", | ||
116 | buf, PTR_BUCKET_NR(b->c, k, i), atomic_read(&g->pin), | ||
117 | g->prio, g->gen, g->last_gc, GC_MARK(g), g->gc_gen); | ||
118 | return true; | ||
119 | } | ||
120 | |||
121 | static bool bch_btree_ptr_bad(struct btree *b, const struct bkey *k) | ||
122 | { | ||
123 | unsigned i; | ||
124 | |||
125 | if (!bkey_cmp(k, &ZERO_KEY) || | ||
126 | !KEY_PTRS(k) || | ||
127 | bch_ptr_invalid(b, k)) | ||
128 | return true; | ||
129 | |||
130 | for (i = 0; i < KEY_PTRS(k); i++) | ||
131 | if (!ptr_available(b->c, k, i) || | ||
132 | ptr_stale(b->c, k, i)) | ||
133 | return true; | ||
134 | |||
135 | if (expensive_debug_checks(b->c) && | ||
136 | btree_ptr_bad_expensive(b, k)) | ||
137 | return true; | ||
138 | |||
139 | return false; | ||
140 | } | ||
141 | |||
142 | const struct btree_keys_ops bch_btree_keys_ops = { | ||
143 | .sort_cmp = bch_key_sort_cmp, | ||
144 | .key_invalid = bch_btree_ptr_invalid, | ||
145 | .key_bad = bch_btree_ptr_bad, | ||
146 | }; | ||
147 | |||
148 | /* Extents */ | ||
149 | |||
150 | /* | ||
151 | * Returns true if l > r - unless l == r, in which case returns true if l is | ||
152 | * older than r. | ||
153 | * | ||
154 | * Necessary for btree_sort_fixup() - if there are multiple keys that compare | ||
155 | * equal in different sets, we have to process them newest to oldest. | ||
156 | */ | ||
157 | static bool bch_extent_sort_cmp(struct btree_iter_set l, | ||
158 | struct btree_iter_set r) | ||
159 | { | ||
160 | int64_t c = bkey_cmp(&START_KEY(l.k), &START_KEY(r.k)); | ||
161 | |||
162 | return c ? c > 0 : l.k < r.k; | ||
163 | } | ||
164 | |||
165 | static struct bkey *bch_extent_sort_fixup(struct btree_iter *iter, | ||
166 | struct bkey *tmp) | ||
167 | { | ||
168 | while (iter->used > 1) { | ||
169 | struct btree_iter_set *top = iter->data, *i = top + 1; | ||
170 | |||
171 | if (iter->used > 2 && | ||
172 | bch_extent_sort_cmp(i[0], i[1])) | ||
173 | i++; | ||
174 | |||
175 | if (bkey_cmp(top->k, &START_KEY(i->k)) <= 0) | ||
176 | break; | ||
177 | |||
178 | if (!KEY_SIZE(i->k)) { | ||
179 | sort_key_next(iter, i); | ||
180 | heap_sift(iter, i - top, bch_extent_sort_cmp); | ||
181 | continue; | ||
182 | } | ||
183 | |||
184 | if (top->k > i->k) { | ||
185 | if (bkey_cmp(top->k, i->k) >= 0) | ||
186 | sort_key_next(iter, i); | ||
187 | else | ||
188 | bch_cut_front(top->k, i->k); | ||
189 | |||
190 | heap_sift(iter, i - top, bch_extent_sort_cmp); | ||
191 | } else { | ||
192 | /* can't happen because of comparison func */ | ||
193 | BUG_ON(!bkey_cmp(&START_KEY(top->k), &START_KEY(i->k))); | ||
194 | |||
195 | if (bkey_cmp(i->k, top->k) < 0) { | ||
196 | bkey_copy(tmp, top->k); | ||
197 | |||
198 | bch_cut_back(&START_KEY(i->k), tmp); | ||
199 | bch_cut_front(i->k, top->k); | ||
200 | heap_sift(iter, 0, bch_extent_sort_cmp); | ||
201 | |||
202 | return tmp; | ||
203 | } else { | ||
204 | bch_cut_back(&START_KEY(i->k), top->k); | ||
205 | } | ||
206 | } | ||
207 | } | ||
208 | |||
209 | return NULL; | ||
210 | } | ||
211 | |||
212 | static bool bch_extent_invalid(struct btree *b, const struct bkey *k) | ||
213 | { | ||
214 | char buf[80]; | ||
215 | |||
216 | if (!KEY_SIZE(k)) | ||
217 | return true; | ||
218 | |||
219 | if (KEY_SIZE(k) > KEY_OFFSET(k)) | ||
220 | goto bad; | ||
221 | |||
222 | if (__ptr_invalid(b->c, k)) | ||
223 | goto bad; | ||
224 | |||
225 | return false; | ||
226 | bad: | ||
227 | bch_bkey_to_text(buf, sizeof(buf), k); | ||
228 | cache_bug(b->c, "spotted extent %s: %s", buf, bch_ptr_status(b->c, k)); | ||
229 | return true; | ||
230 | } | ||
231 | |||
232 | static bool bch_extent_bad_expensive(struct btree *b, const struct bkey *k, | ||
233 | unsigned ptr) | ||
234 | { | ||
235 | struct bucket *g = PTR_BUCKET(b->c, k, ptr); | ||
236 | char buf[80]; | ||
237 | |||
238 | if (mutex_trylock(&b->c->bucket_lock)) { | ||
239 | if (b->c->gc_mark_valid && | ||
240 | ((GC_MARK(g) != GC_MARK_DIRTY && | ||
241 | KEY_DIRTY(k)) || | ||
242 | GC_MARK(g) == GC_MARK_METADATA)) | ||
243 | goto err; | ||
244 | |||
245 | if (g->prio == BTREE_PRIO) | ||
246 | goto err; | ||
247 | |||
248 | mutex_unlock(&b->c->bucket_lock); | ||
249 | } | ||
250 | |||
251 | return false; | ||
252 | err: | ||
253 | mutex_unlock(&b->c->bucket_lock); | ||
254 | bch_bkey_to_text(buf, sizeof(buf), k); | ||
255 | btree_bug(b, | ||
256 | "inconsistent extent pointer %s:\nbucket %zu pin %i prio %i gen %i last_gc %i mark %llu gc_gen %i", | ||
257 | buf, PTR_BUCKET_NR(b->c, k, ptr), atomic_read(&g->pin), | ||
258 | g->prio, g->gen, g->last_gc, GC_MARK(g), g->gc_gen); | ||
259 | return true; | ||
260 | } | ||
261 | |||
262 | static bool bch_extent_bad(struct btree *b, const struct bkey *k) | ||
263 | { | ||
264 | struct bucket *g; | ||
265 | unsigned i, stale; | ||
266 | |||
267 | if (!KEY_PTRS(k) || | ||
268 | bch_extent_invalid(b, k)) | ||
269 | return true; | ||
270 | |||
271 | for (i = 0; i < KEY_PTRS(k); i++) | ||
272 | if (!ptr_available(b->c, k, i)) | ||
273 | return true; | ||
274 | |||
275 | if (!expensive_debug_checks(b->c) && KEY_DIRTY(k)) | ||
276 | return false; | ||
277 | |||
278 | for (i = 0; i < KEY_PTRS(k); i++) { | ||
279 | g = PTR_BUCKET(b->c, k, i); | ||
280 | stale = ptr_stale(b->c, k, i); | ||
281 | |||
282 | btree_bug_on(stale > 96, b, | ||
283 | "key too stale: %i, need_gc %u", | ||
284 | stale, b->c->need_gc); | ||
285 | |||
286 | btree_bug_on(stale && KEY_DIRTY(k) && KEY_SIZE(k), | ||
287 | b, "stale dirty pointer"); | ||
288 | |||
289 | if (stale) | ||
290 | return true; | ||
291 | |||
292 | if (expensive_debug_checks(b->c) && | ||
293 | bch_extent_bad_expensive(b, k, i)) | ||
294 | return true; | ||
295 | } | ||
296 | |||
297 | return false; | ||
298 | } | ||
299 | |||
300 | static uint64_t merge_chksums(struct bkey *l, struct bkey *r) | ||
301 | { | ||
302 | return (l->ptr[KEY_PTRS(l)] + r->ptr[KEY_PTRS(r)]) & | ||
303 | ~((uint64_t)1 << 63); | ||
304 | } | ||
305 | |||
306 | static bool bch_extent_merge(struct btree *b, struct bkey *l, struct bkey *r) | ||
307 | { | ||
308 | unsigned i; | ||
309 | |||
310 | if (key_merging_disabled(b->c)) | ||
311 | return false; | ||
312 | |||
313 | if (KEY_PTRS(l) != KEY_PTRS(r) || | ||
314 | KEY_DIRTY(l) != KEY_DIRTY(r) || | ||
315 | bkey_cmp(l, &START_KEY(r))) | ||
316 | return false; | ||
317 | |||
318 | for (i = 0; i < KEY_PTRS(l); i++) | ||
319 | if (l->ptr[i] + PTR(0, KEY_SIZE(l), 0) != r->ptr[i] || | ||
320 | PTR_BUCKET_NR(b->c, l, i) != PTR_BUCKET_NR(b->c, r, i)) | ||
321 | return false; | ||
322 | |||
323 | /* Keys with no pointers aren't restricted to one bucket and could | ||
324 | * overflow KEY_SIZE | ||
325 | */ | ||
326 | if (KEY_SIZE(l) + KEY_SIZE(r) > USHRT_MAX) { | ||
327 | SET_KEY_OFFSET(l, KEY_OFFSET(l) + USHRT_MAX - KEY_SIZE(l)); | ||
328 | SET_KEY_SIZE(l, USHRT_MAX); | ||
329 | |||
330 | bch_cut_front(l, r); | ||
331 | return false; | ||
332 | } | ||
333 | |||
334 | if (KEY_CSUM(l)) { | ||
335 | if (KEY_CSUM(r)) | ||
336 | l->ptr[KEY_PTRS(l)] = merge_chksums(l, r); | ||
337 | else | ||
338 | SET_KEY_CSUM(l, 0); | ||
339 | } | ||
340 | |||
341 | SET_KEY_OFFSET(l, KEY_OFFSET(l) + KEY_SIZE(r)); | ||
342 | SET_KEY_SIZE(l, KEY_SIZE(l) + KEY_SIZE(r)); | ||
343 | |||
344 | return true; | ||
345 | } | ||
346 | |||
347 | const struct btree_keys_ops bch_extent_keys_ops = { | ||
348 | .sort_cmp = bch_extent_sort_cmp, | ||
349 | .sort_fixup = bch_extent_sort_fixup, | ||
350 | .key_invalid = bch_extent_invalid, | ||
351 | .key_bad = bch_extent_bad, | ||
352 | .key_merge = bch_extent_merge, | ||
353 | .is_extents = true, | ||
354 | }; | ||
diff --git a/drivers/md/bcache/extents.h b/drivers/md/bcache/extents.h new file mode 100644 index 000000000000..e0c0b685d2f7 --- /dev/null +++ b/drivers/md/bcache/extents.h | |||
@@ -0,0 +1,12 @@ | |||
1 | #ifndef _BCACHE_EXTENTS_H | ||
2 | #define _BCACHE_EXTENTS_H | ||
3 | |||
4 | extern const struct btree_keys_ops bch_btree_keys_ops; | ||
5 | extern const struct btree_keys_ops bch_extent_keys_ops; | ||
6 | |||
7 | struct bkey; | ||
8 | struct cache_set; | ||
9 | |||
10 | bool __bch_btree_ptr_invalid(struct cache_set *, const struct bkey *); | ||
11 | |||
12 | #endif /* _BCACHE_EXTENTS_H */ | ||
diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c index 12057a43e01f..6d6a7a15043e 100644 --- a/drivers/md/bcache/super.c +++ b/drivers/md/bcache/super.c | |||
@@ -9,6 +9,7 @@ | |||
9 | #include "bcache.h" | 9 | #include "bcache.h" |
10 | #include "btree.h" | 10 | #include "btree.h" |
11 | #include "debug.h" | 11 | #include "debug.h" |
12 | #include "extents.h" | ||
12 | #include "request.h" | 13 | #include "request.h" |
13 | #include "writeback.h" | 14 | #include "writeback.h" |
14 | 15 | ||
@@ -399,7 +400,7 @@ static char *uuid_read(struct cache_set *c, struct jset *j, struct closure *cl) | |||
399 | { | 400 | { |
400 | struct bkey *k = &j->uuid_bucket; | 401 | struct bkey *k = &j->uuid_bucket; |
401 | 402 | ||
402 | if (bch_btree_ptr_invalid(c, k)) | 403 | if (__bch_btree_ptr_invalid(c, k)) |
403 | return "bad uuid pointer"; | 404 | return "bad uuid pointer"; |
404 | 405 | ||
405 | bkey_copy(&c->uuid_bucket, k); | 406 | bkey_copy(&c->uuid_bucket, k); |
@@ -1575,7 +1576,7 @@ static void run_cache_set(struct cache_set *c) | |||
1575 | k = &j->btree_root; | 1576 | k = &j->btree_root; |
1576 | 1577 | ||
1577 | err = "bad btree root"; | 1578 | err = "bad btree root"; |
1578 | if (bch_btree_ptr_invalid(c, k)) | 1579 | if (__bch_btree_ptr_invalid(c, k)) |
1579 | goto err; | 1580 | goto err; |
1580 | 1581 | ||
1581 | err = "error reading btree root"; | 1582 | err = "error reading btree root"; |