diff options
author | Kent Overstreet <kmo@daterainc.com> | 2013-10-31 18:46:42 -0400 |
---|---|---|
committer | Kent Overstreet <kmo@daterainc.com> | 2013-11-11 00:56:33 -0500 |
commit | 81ab4190ac17df41686a37c97f701623276b652a (patch) | |
tree | be7f48b5ad6d36fadbac4658d37953d324d760d0 | |
parent | 2599b53b7b0ea6103d1661dca74d35480cb8fa1f (diff) |
bcache: Pull on disk data structures out into a separate header
Now, the on disk data structures are in a header that can be exported to
userspace - and having them all centralized is nice too.
Signed-off-by: Kent Overstreet <kmo@daterainc.com>
-rw-r--r-- | drivers/md/bcache/bcache.h | 244 | ||||
-rw-r--r-- | drivers/md/bcache/bset.c | 4 | ||||
-rw-r--r-- | drivers/md/bcache/bset.h | 31 | ||||
-rw-r--r-- | drivers/md/bcache/btree.c | 2 | ||||
-rw-r--r-- | drivers/md/bcache/journal.c | 4 | ||||
-rw-r--r-- | drivers/md/bcache/journal.h | 37 | ||||
-rw-r--r-- | drivers/md/bcache/request.c | 9 | ||||
-rw-r--r-- | drivers/md/bcache/super.c | 13 | ||||
-rw-r--r-- | drivers/md/bcache/util.h | 10 | ||||
-rw-r--r-- | include/uapi/linux/bcache.h | 373 |
10 files changed, 387 insertions, 340 deletions
diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h index e32f6fd91755..045cb99f1ca6 100644 --- a/drivers/md/bcache/bcache.h +++ b/drivers/md/bcache/bcache.h | |||
@@ -177,6 +177,7 @@ | |||
177 | 177 | ||
178 | #define pr_fmt(fmt) "bcache: %s() " fmt "\n", __func__ | 178 | #define pr_fmt(fmt) "bcache: %s() " fmt "\n", __func__ |
179 | 179 | ||
180 | #include <linux/bcache.h> | ||
180 | #include <linux/bio.h> | 181 | #include <linux/bio.h> |
181 | #include <linux/kobject.h> | 182 | #include <linux/kobject.h> |
182 | #include <linux/list.h> | 183 | #include <linux/list.h> |
@@ -210,168 +211,6 @@ BITMASK(GC_MARK, struct bucket, gc_mark, 0, 2); | |||
210 | #define GC_MARK_METADATA 2 | 211 | #define GC_MARK_METADATA 2 |
211 | BITMASK(GC_SECTORS_USED, struct bucket, gc_mark, 2, 14); | 212 | BITMASK(GC_SECTORS_USED, struct bucket, gc_mark, 2, 14); |
212 | 213 | ||
213 | struct bkey { | ||
214 | uint64_t high; | ||
215 | uint64_t low; | ||
216 | uint64_t ptr[]; | ||
217 | }; | ||
218 | |||
219 | /* Enough for a key with 6 pointers */ | ||
220 | #define BKEY_PAD 8 | ||
221 | |||
222 | #define BKEY_PADDED(key) \ | ||
223 | union { struct bkey key; uint64_t key ## _pad[BKEY_PAD]; } | ||
224 | |||
225 | /* Version 0: Cache device | ||
226 | * Version 1: Backing device | ||
227 | * Version 2: Seed pointer into btree node checksum | ||
228 | * Version 3: Cache device with new UUID format | ||
229 | * Version 4: Backing device with data offset | ||
230 | */ | ||
231 | #define BCACHE_SB_VERSION_CDEV 0 | ||
232 | #define BCACHE_SB_VERSION_BDEV 1 | ||
233 | #define BCACHE_SB_VERSION_CDEV_WITH_UUID 3 | ||
234 | #define BCACHE_SB_VERSION_BDEV_WITH_OFFSET 4 | ||
235 | #define BCACHE_SB_MAX_VERSION 4 | ||
236 | |||
237 | #define SB_SECTOR 8 | ||
238 | #define SB_SIZE 4096 | ||
239 | #define SB_LABEL_SIZE 32 | ||
240 | #define SB_JOURNAL_BUCKETS 256U | ||
241 | /* SB_JOURNAL_BUCKETS must be divisible by BITS_PER_LONG */ | ||
242 | #define MAX_CACHES_PER_SET 8 | ||
243 | |||
244 | #define BDEV_DATA_START_DEFAULT 16 /* sectors */ | ||
245 | |||
246 | struct cache_sb { | ||
247 | uint64_t csum; | ||
248 | uint64_t offset; /* sector where this sb was written */ | ||
249 | uint64_t version; | ||
250 | |||
251 | uint8_t magic[16]; | ||
252 | |||
253 | uint8_t uuid[16]; | ||
254 | union { | ||
255 | uint8_t set_uuid[16]; | ||
256 | uint64_t set_magic; | ||
257 | }; | ||
258 | uint8_t label[SB_LABEL_SIZE]; | ||
259 | |||
260 | uint64_t flags; | ||
261 | uint64_t seq; | ||
262 | uint64_t pad[8]; | ||
263 | |||
264 | union { | ||
265 | struct { | ||
266 | /* Cache devices */ | ||
267 | uint64_t nbuckets; /* device size */ | ||
268 | |||
269 | uint16_t block_size; /* sectors */ | ||
270 | uint16_t bucket_size; /* sectors */ | ||
271 | |||
272 | uint16_t nr_in_set; | ||
273 | uint16_t nr_this_dev; | ||
274 | }; | ||
275 | struct { | ||
276 | /* Backing devices */ | ||
277 | uint64_t data_offset; | ||
278 | |||
279 | /* | ||
280 | * block_size from the cache device section is still used by | ||
281 | * backing devices, so don't add anything here until we fix | ||
282 | * things to not need it for backing devices anymore | ||
283 | */ | ||
284 | }; | ||
285 | }; | ||
286 | |||
287 | uint32_t last_mount; /* time_t */ | ||
288 | |||
289 | uint16_t first_bucket; | ||
290 | union { | ||
291 | uint16_t njournal_buckets; | ||
292 | uint16_t keys; | ||
293 | }; | ||
294 | uint64_t d[SB_JOURNAL_BUCKETS]; /* journal buckets */ | ||
295 | }; | ||
296 | |||
297 | BITMASK(CACHE_SYNC, struct cache_sb, flags, 0, 1); | ||
298 | BITMASK(CACHE_DISCARD, struct cache_sb, flags, 1, 1); | ||
299 | BITMASK(CACHE_REPLACEMENT, struct cache_sb, flags, 2, 3); | ||
300 | #define CACHE_REPLACEMENT_LRU 0U | ||
301 | #define CACHE_REPLACEMENT_FIFO 1U | ||
302 | #define CACHE_REPLACEMENT_RANDOM 2U | ||
303 | |||
304 | BITMASK(BDEV_CACHE_MODE, struct cache_sb, flags, 0, 4); | ||
305 | #define CACHE_MODE_WRITETHROUGH 0U | ||
306 | #define CACHE_MODE_WRITEBACK 1U | ||
307 | #define CACHE_MODE_WRITEAROUND 2U | ||
308 | #define CACHE_MODE_NONE 3U | ||
309 | BITMASK(BDEV_STATE, struct cache_sb, flags, 61, 2); | ||
310 | #define BDEV_STATE_NONE 0U | ||
311 | #define BDEV_STATE_CLEAN 1U | ||
312 | #define BDEV_STATE_DIRTY 2U | ||
313 | #define BDEV_STATE_STALE 3U | ||
314 | |||
315 | /* Version 1: Seed pointer into btree node checksum | ||
316 | */ | ||
317 | #define BCACHE_BSET_VERSION 1 | ||
318 | |||
319 | /* | ||
320 | * This is the on disk format for btree nodes - a btree node on disk is a list | ||
321 | * of these; within each set the keys are sorted | ||
322 | */ | ||
323 | struct bset { | ||
324 | uint64_t csum; | ||
325 | uint64_t magic; | ||
326 | uint64_t seq; | ||
327 | uint32_t version; | ||
328 | uint32_t keys; | ||
329 | |||
330 | union { | ||
331 | struct bkey start[0]; | ||
332 | uint64_t d[0]; | ||
333 | }; | ||
334 | }; | ||
335 | |||
336 | /* | ||
337 | * On disk format for priorities and gens - see super.c near prio_write() for | ||
338 | * more. | ||
339 | */ | ||
340 | struct prio_set { | ||
341 | uint64_t csum; | ||
342 | uint64_t magic; | ||
343 | uint64_t seq; | ||
344 | uint32_t version; | ||
345 | uint32_t pad; | ||
346 | |||
347 | uint64_t next_bucket; | ||
348 | |||
349 | struct bucket_disk { | ||
350 | uint16_t prio; | ||
351 | uint8_t gen; | ||
352 | } __attribute((packed)) data[]; | ||
353 | }; | ||
354 | |||
355 | struct uuid_entry { | ||
356 | union { | ||
357 | struct { | ||
358 | uint8_t uuid[16]; | ||
359 | uint8_t label[32]; | ||
360 | uint32_t first_reg; | ||
361 | uint32_t last_reg; | ||
362 | uint32_t invalidated; | ||
363 | |||
364 | uint32_t flags; | ||
365 | /* Size of flash only volumes */ | ||
366 | uint64_t sectors; | ||
367 | }; | ||
368 | |||
369 | uint8_t pad[128]; | ||
370 | }; | ||
371 | }; | ||
372 | |||
373 | BITMASK(UUID_FLASH_ONLY, struct uuid_entry, flags, 0, 1); | ||
374 | |||
375 | #include "journal.h" | 214 | #include "journal.h" |
376 | #include "stats.h" | 215 | #include "stats.h" |
377 | struct search; | 216 | struct search; |
@@ -868,12 +707,6 @@ static inline bool key_merging_disabled(struct cache_set *c) | |||
868 | #endif | 707 | #endif |
869 | } | 708 | } |
870 | 709 | ||
871 | static inline bool SB_IS_BDEV(const struct cache_sb *sb) | ||
872 | { | ||
873 | return sb->version == BCACHE_SB_VERSION_BDEV | ||
874 | || sb->version == BCACHE_SB_VERSION_BDEV_WITH_OFFSET; | ||
875 | } | ||
876 | |||
877 | struct bbio { | 710 | struct bbio { |
878 | unsigned submit_time_us; | 711 | unsigned submit_time_us; |
879 | union { | 712 | union { |
@@ -927,59 +760,6 @@ static inline unsigned local_clock_us(void) | |||
927 | #define prio_buckets(c) \ | 760 | #define prio_buckets(c) \ |
928 | DIV_ROUND_UP((size_t) (c)->sb.nbuckets, prios_per_bucket(c)) | 761 | DIV_ROUND_UP((size_t) (c)->sb.nbuckets, prios_per_bucket(c)) |
929 | 762 | ||
930 | #define JSET_MAGIC 0x245235c1a3625032ULL | ||
931 | #define PSET_MAGIC 0x6750e15f87337f91ULL | ||
932 | #define BSET_MAGIC 0x90135c78b99e07f5ULL | ||
933 | |||
934 | #define jset_magic(c) ((c)->sb.set_magic ^ JSET_MAGIC) | ||
935 | #define pset_magic(c) ((c)->sb.set_magic ^ PSET_MAGIC) | ||
936 | #define bset_magic(c) ((c)->sb.set_magic ^ BSET_MAGIC) | ||
937 | |||
938 | /* Bkey fields: all units are in sectors */ | ||
939 | |||
940 | #define KEY_FIELD(name, field, offset, size) \ | ||
941 | BITMASK(name, struct bkey, field, offset, size) | ||
942 | |||
943 | #define PTR_FIELD(name, offset, size) \ | ||
944 | static inline uint64_t name(const struct bkey *k, unsigned i) \ | ||
945 | { return (k->ptr[i] >> offset) & ~(((uint64_t) ~0) << size); } \ | ||
946 | \ | ||
947 | static inline void SET_##name(struct bkey *k, unsigned i, uint64_t v)\ | ||
948 | { \ | ||
949 | k->ptr[i] &= ~(~((uint64_t) ~0 << size) << offset); \ | ||
950 | k->ptr[i] |= v << offset; \ | ||
951 | } | ||
952 | |||
953 | KEY_FIELD(KEY_PTRS, high, 60, 3) | ||
954 | KEY_FIELD(HEADER_SIZE, high, 58, 2) | ||
955 | KEY_FIELD(KEY_CSUM, high, 56, 2) | ||
956 | KEY_FIELD(KEY_PINNED, high, 55, 1) | ||
957 | KEY_FIELD(KEY_DIRTY, high, 36, 1) | ||
958 | |||
959 | KEY_FIELD(KEY_SIZE, high, 20, 16) | ||
960 | KEY_FIELD(KEY_INODE, high, 0, 20) | ||
961 | |||
962 | /* Next time I change the on disk format, KEY_OFFSET() won't be 64 bits */ | ||
963 | |||
964 | static inline uint64_t KEY_OFFSET(const struct bkey *k) | ||
965 | { | ||
966 | return k->low; | ||
967 | } | ||
968 | |||
969 | static inline void SET_KEY_OFFSET(struct bkey *k, uint64_t v) | ||
970 | { | ||
971 | k->low = v; | ||
972 | } | ||
973 | |||
974 | PTR_FIELD(PTR_DEV, 51, 12) | ||
975 | PTR_FIELD(PTR_OFFSET, 8, 43) | ||
976 | PTR_FIELD(PTR_GEN, 0, 8) | ||
977 | |||
978 | #define PTR_CHECK_DEV ((1 << 12) - 1) | ||
979 | |||
980 | #define PTR(gen, offset, dev) \ | ||
981 | ((((uint64_t) dev) << 51) | ((uint64_t) offset) << 8 | gen) | ||
982 | |||
983 | static inline size_t sector_to_bucket(struct cache_set *c, sector_t s) | 763 | static inline size_t sector_to_bucket(struct cache_set *c, sector_t s) |
984 | { | 764 | { |
985 | return s >> c->bucket_bits; | 765 | return s >> c->bucket_bits; |
@@ -1018,31 +798,11 @@ static inline struct bucket *PTR_BUCKET(struct cache_set *c, | |||
1018 | 798 | ||
1019 | /* Btree key macros */ | 799 | /* Btree key macros */ |
1020 | 800 | ||
1021 | /* | ||
1022 | * The high bit being set is a relic from when we used it to do binary | ||
1023 | * searches - it told you where a key started. It's not used anymore, | ||
1024 | * and can probably be safely dropped. | ||
1025 | */ | ||
1026 | #define KEY(dev, sector, len) \ | ||
1027 | ((struct bkey) { \ | ||
1028 | .high = (1ULL << 63) | ((uint64_t) (len) << 20) | (dev), \ | ||
1029 | .low = (sector) \ | ||
1030 | }) | ||
1031 | |||
1032 | static inline void bkey_init(struct bkey *k) | 801 | static inline void bkey_init(struct bkey *k) |
1033 | { | 802 | { |
1034 | *k = KEY(0, 0, 0); | 803 | *k = ZERO_KEY; |
1035 | } | 804 | } |
1036 | 805 | ||
1037 | #define KEY_START(k) (KEY_OFFSET(k) - KEY_SIZE(k)) | ||
1038 | #define START_KEY(k) KEY(KEY_INODE(k), KEY_START(k), 0) | ||
1039 | |||
1040 | #define MAX_KEY_INODE (~(~0 << 20)) | ||
1041 | #define MAX_KEY_OFFSET (((uint64_t) ~0) >> 1) | ||
1042 | #define MAX_KEY KEY(MAX_KEY_INODE, MAX_KEY_OFFSET, 0) | ||
1043 | |||
1044 | #define ZERO_KEY KEY(0, 0, 0) | ||
1045 | |||
1046 | /* | 806 | /* |
1047 | * This is used for various on disk data structures - cache_sb, prio_set, bset, | 807 | * This is used for various on disk data structures - cache_sb, prio_set, bset, |
1048 | * jset: The checksum is _always_ the first 8 bytes of these structs | 808 | * jset: The checksum is _always_ the first 8 bytes of these structs |
diff --git a/drivers/md/bcache/bset.c b/drivers/md/bcache/bset.c index f7b5525ddafa..7b8713c66050 100644 --- a/drivers/md/bcache/bset.c +++ b/drivers/md/bcache/bset.c | |||
@@ -684,7 +684,7 @@ void bch_bset_init_next(struct btree *b) | |||
684 | } else | 684 | } else |
685 | get_random_bytes(&i->seq, sizeof(uint64_t)); | 685 | get_random_bytes(&i->seq, sizeof(uint64_t)); |
686 | 686 | ||
687 | i->magic = bset_magic(b->c); | 687 | i->magic = bset_magic(&b->c->sb); |
688 | i->version = 0; | 688 | i->version = 0; |
689 | i->keys = 0; | 689 | i->keys = 0; |
690 | 690 | ||
@@ -1034,7 +1034,7 @@ static void __btree_sort(struct btree *b, struct btree_iter *iter, | |||
1034 | * memcpy() | 1034 | * memcpy() |
1035 | */ | 1035 | */ |
1036 | 1036 | ||
1037 | out->magic = bset_magic(b->c); | 1037 | out->magic = bset_magic(&b->c->sb); |
1038 | out->seq = b->sets[0].data->seq; | 1038 | out->seq = b->sets[0].data->seq; |
1039 | out->version = b->sets[0].data->version; | 1039 | out->version = b->sets[0].data->version; |
1040 | swap(out, b->sets[0].data); | 1040 | swap(out, b->sets[0].data); |
diff --git a/drivers/md/bcache/bset.h b/drivers/md/bcache/bset.h index 8a9305685b7e..5cd90565dfe2 100644 --- a/drivers/md/bcache/bset.h +++ b/drivers/md/bcache/bset.h | |||
@@ -193,37 +193,6 @@ static __always_inline int64_t bkey_cmp(const struct bkey *l, | |||
193 | : (int64_t) KEY_OFFSET(l) - (int64_t) KEY_OFFSET(r); | 193 | : (int64_t) KEY_OFFSET(l) - (int64_t) KEY_OFFSET(r); |
194 | } | 194 | } |
195 | 195 | ||
196 | static inline size_t bkey_u64s(const struct bkey *k) | ||
197 | { | ||
198 | BUG_ON(KEY_CSUM(k) > 1); | ||
199 | return 2 + KEY_PTRS(k) + (KEY_CSUM(k) ? 1 : 0); | ||
200 | } | ||
201 | |||
202 | static inline size_t bkey_bytes(const struct bkey *k) | ||
203 | { | ||
204 | return bkey_u64s(k) * sizeof(uint64_t); | ||
205 | } | ||
206 | |||
207 | static inline void bkey_copy(struct bkey *dest, const struct bkey *src) | ||
208 | { | ||
209 | memcpy(dest, src, bkey_bytes(src)); | ||
210 | } | ||
211 | |||
212 | static inline void bkey_copy_key(struct bkey *dest, const struct bkey *src) | ||
213 | { | ||
214 | if (!src) | ||
215 | src = &KEY(0, 0, 0); | ||
216 | |||
217 | SET_KEY_INODE(dest, KEY_INODE(src)); | ||
218 | SET_KEY_OFFSET(dest, KEY_OFFSET(src)); | ||
219 | } | ||
220 | |||
221 | static inline struct bkey *bkey_next(const struct bkey *k) | ||
222 | { | ||
223 | uint64_t *d = (void *) k; | ||
224 | return (struct bkey *) (d + bkey_u64s(k)); | ||
225 | } | ||
226 | |||
227 | /* Keylists */ | 196 | /* Keylists */ |
228 | 197 | ||
229 | struct keylist { | 198 | struct keylist { |
diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c index f5aa4adadf1d..aba787d954e5 100644 --- a/drivers/md/bcache/btree.c +++ b/drivers/md/bcache/btree.c | |||
@@ -231,7 +231,7 @@ static void bch_btree_node_read_done(struct btree *b) | |||
231 | goto err; | 231 | goto err; |
232 | 232 | ||
233 | err = "bad magic"; | 233 | err = "bad magic"; |
234 | if (i->magic != bset_magic(b->c)) | 234 | if (i->magic != bset_magic(&b->c->sb)) |
235 | goto err; | 235 | goto err; |
236 | 236 | ||
237 | err = "bad checksum"; | 237 | err = "bad checksum"; |
diff --git a/drivers/md/bcache/journal.c b/drivers/md/bcache/journal.c index 86de64a6bf26..ecdaa671bd50 100644 --- a/drivers/md/bcache/journal.c +++ b/drivers/md/bcache/journal.c | |||
@@ -74,7 +74,7 @@ reread: left = ca->sb.bucket_size - offset; | |||
74 | struct list_head *where; | 74 | struct list_head *where; |
75 | size_t blocks, bytes = set_bytes(j); | 75 | size_t blocks, bytes = set_bytes(j); |
76 | 76 | ||
77 | if (j->magic != jset_magic(ca->set)) | 77 | if (j->magic != jset_magic(&ca->sb)) |
78 | return ret; | 78 | return ret; |
79 | 79 | ||
80 | if (bytes > left << 9) | 80 | if (bytes > left << 9) |
@@ -596,7 +596,7 @@ static void journal_write_unlocked(struct closure *cl) | |||
596 | for_each_cache(ca, c, i) | 596 | for_each_cache(ca, c, i) |
597 | w->data->prio_bucket[ca->sb.nr_this_dev] = ca->prio_buckets[0]; | 597 | w->data->prio_bucket[ca->sb.nr_this_dev] = ca->prio_buckets[0]; |
598 | 598 | ||
599 | w->data->magic = jset_magic(c); | 599 | w->data->magic = jset_magic(&c->sb); |
600 | w->data->version = BCACHE_JSET_VERSION; | 600 | w->data->version = BCACHE_JSET_VERSION; |
601 | w->data->last_seq = last_seq(&c->journal); | 601 | w->data->last_seq = last_seq(&c->journal); |
602 | w->data->csum = csum_set(w->data); | 602 | w->data->csum = csum_set(w->data); |
diff --git a/drivers/md/bcache/journal.h b/drivers/md/bcache/journal.h index 5e9edb9ef376..a6472fda94b2 100644 --- a/drivers/md/bcache/journal.h +++ b/drivers/md/bcache/journal.h | |||
@@ -75,43 +75,6 @@ | |||
75 | * nodes that are pinning the oldest journal entries first. | 75 | * nodes that are pinning the oldest journal entries first. |
76 | */ | 76 | */ |
77 | 77 | ||
78 | #define BCACHE_JSET_VERSION_UUIDv1 1 | ||
79 | /* Always latest UUID format */ | ||
80 | #define BCACHE_JSET_VERSION_UUID 1 | ||
81 | #define BCACHE_JSET_VERSION 1 | ||
82 | |||
83 | /* | ||
84 | * On disk format for a journal entry: | ||
85 | * seq is monotonically increasing; every journal entry has its own unique | ||
86 | * sequence number. | ||
87 | * | ||
88 | * last_seq is the oldest journal entry that still has keys the btree hasn't | ||
89 | * flushed to disk yet. | ||
90 | * | ||
91 | * version is for on disk format changes. | ||
92 | */ | ||
93 | struct jset { | ||
94 | uint64_t csum; | ||
95 | uint64_t magic; | ||
96 | uint64_t seq; | ||
97 | uint32_t version; | ||
98 | uint32_t keys; | ||
99 | |||
100 | uint64_t last_seq; | ||
101 | |||
102 | BKEY_PADDED(uuid_bucket); | ||
103 | BKEY_PADDED(btree_root); | ||
104 | uint16_t btree_level; | ||
105 | uint16_t pad[3]; | ||
106 | |||
107 | uint64_t prio_bucket[MAX_CACHES_PER_SET]; | ||
108 | |||
109 | union { | ||
110 | struct bkey start[0]; | ||
111 | uint64_t d[0]; | ||
112 | }; | ||
113 | }; | ||
114 | |||
115 | /* | 78 | /* |
116 | * Only used for holding the journal entries we read in btree_journal_read() | 79 | * Only used for holding the journal entries we read in btree_journal_read() |
117 | * during cache_registration | 80 | * during cache_registration |
diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c index cf7850a7592c..932300f18973 100644 --- a/drivers/md/bcache/request.c +++ b/drivers/md/bcache/request.c | |||
@@ -264,16 +264,17 @@ static void bch_data_invalidate(struct closure *cl) | |||
264 | bio_sectors(bio), (uint64_t) bio->bi_sector); | 264 | bio_sectors(bio), (uint64_t) bio->bi_sector); |
265 | 265 | ||
266 | while (bio_sectors(bio)) { | 266 | while (bio_sectors(bio)) { |
267 | unsigned len = min(bio_sectors(bio), 1U << 14); | 267 | unsigned sectors = min(bio_sectors(bio), |
268 | 1U << (KEY_SIZE_BITS - 1)); | ||
268 | 269 | ||
269 | if (bch_keylist_realloc(&op->insert_keys, 0, op->c)) | 270 | if (bch_keylist_realloc(&op->insert_keys, 0, op->c)) |
270 | goto out; | 271 | goto out; |
271 | 272 | ||
272 | bio->bi_sector += len; | 273 | bio->bi_sector += sectors; |
273 | bio->bi_size -= len << 9; | 274 | bio->bi_size -= sectors << 9; |
274 | 275 | ||
275 | bch_keylist_add(&op->insert_keys, | 276 | bch_keylist_add(&op->insert_keys, |
276 | &KEY(op->inode, bio->bi_sector, len)); | 277 | &KEY(op->inode, bio->bi_sector, sectors)); |
277 | } | 278 | } |
278 | 279 | ||
279 | op->insert_data_done = true; | 280 | op->insert_data_done = true; |
diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c index a314c771263f..c67d19a8913d 100644 --- a/drivers/md/bcache/super.c +++ b/drivers/md/bcache/super.c | |||
@@ -45,15 +45,6 @@ const char * const bch_cache_modes[] = { | |||
45 | NULL | 45 | NULL |
46 | }; | 46 | }; |
47 | 47 | ||
48 | struct uuid_entry_v0 { | ||
49 | uint8_t uuid[16]; | ||
50 | uint8_t label[32]; | ||
51 | uint32_t first_reg; | ||
52 | uint32_t last_reg; | ||
53 | uint32_t invalidated; | ||
54 | uint32_t pad; | ||
55 | }; | ||
56 | |||
57 | static struct kobject *bcache_kobj; | 48 | static struct kobject *bcache_kobj; |
58 | struct mutex bch_register_lock; | 49 | struct mutex bch_register_lock; |
59 | LIST_HEAD(bch_cache_sets); | 50 | LIST_HEAD(bch_cache_sets); |
@@ -562,7 +553,7 @@ void bch_prio_write(struct cache *ca) | |||
562 | } | 553 | } |
563 | 554 | ||
564 | p->next_bucket = ca->prio_buckets[i + 1]; | 555 | p->next_bucket = ca->prio_buckets[i + 1]; |
565 | p->magic = pset_magic(ca); | 556 | p->magic = pset_magic(&ca->sb); |
566 | p->csum = bch_crc64(&p->magic, bucket_bytes(ca) - 8); | 557 | p->csum = bch_crc64(&p->magic, bucket_bytes(ca) - 8); |
567 | 558 | ||
568 | bucket = bch_bucket_alloc(ca, WATERMARK_PRIO, true); | 559 | bucket = bch_bucket_alloc(ca, WATERMARK_PRIO, true); |
@@ -613,7 +604,7 @@ static void prio_read(struct cache *ca, uint64_t bucket) | |||
613 | if (p->csum != bch_crc64(&p->magic, bucket_bytes(ca) - 8)) | 604 | if (p->csum != bch_crc64(&p->magic, bucket_bytes(ca) - 8)) |
614 | pr_warn("bad csum reading priorities"); | 605 | pr_warn("bad csum reading priorities"); |
615 | 606 | ||
616 | if (p->magic != pset_magic(ca)) | 607 | if (p->magic != pset_magic(&ca->sb)) |
617 | pr_warn("bad magic reading priorities"); | 608 | pr_warn("bad magic reading priorities"); |
618 | 609 | ||
619 | bucket = p->next_bucket; | 610 | bucket = p->next_bucket; |
diff --git a/drivers/md/bcache/util.h b/drivers/md/bcache/util.h index ea345c6896f4..38ae7a4ce928 100644 --- a/drivers/md/bcache/util.h +++ b/drivers/md/bcache/util.h | |||
@@ -27,16 +27,6 @@ struct closure; | |||
27 | 27 | ||
28 | #endif | 28 | #endif |
29 | 29 | ||
30 | #define BITMASK(name, type, field, offset, size) \ | ||
31 | static inline uint64_t name(const type *k) \ | ||
32 | { return (k->field >> offset) & ~(((uint64_t) ~0) << size); } \ | ||
33 | \ | ||
34 | static inline void SET_##name(type *k, uint64_t v) \ | ||
35 | { \ | ||
36 | k->field &= ~(~((uint64_t) ~0 << size) << offset); \ | ||
37 | k->field |= v << offset; \ | ||
38 | } | ||
39 | |||
40 | #define DECLARE_HEAP(type, name) \ | 30 | #define DECLARE_HEAP(type, name) \ |
41 | struct { \ | 31 | struct { \ |
42 | size_t size, used; \ | 32 | size_t size, used; \ |
diff --git a/include/uapi/linux/bcache.h b/include/uapi/linux/bcache.h new file mode 100644 index 000000000000..164a7e263988 --- /dev/null +++ b/include/uapi/linux/bcache.h | |||
@@ -0,0 +1,373 @@ | |||
1 | #ifndef _LINUX_BCACHE_H | ||
2 | #define _LINUX_BCACHE_H | ||
3 | |||
4 | /* | ||
5 | * Bcache on disk data structures | ||
6 | */ | ||
7 | |||
8 | #include <asm/types.h> | ||
9 | |||
10 | #define BITMASK(name, type, field, offset, size) \ | ||
11 | static inline __u64 name(const type *k) \ | ||
12 | { return (k->field >> offset) & ~(~0ULL << size); } \ | ||
13 | \ | ||
14 | static inline void SET_##name(type *k, __u64 v) \ | ||
15 | { \ | ||
16 | k->field &= ~(~(~0ULL << size) << offset); \ | ||
17 | k->field |= (v & ~(~0ULL << size)) << offset; \ | ||
18 | } | ||
19 | |||
20 | /* Btree keys - all units are in sectors */ | ||
21 | |||
22 | struct bkey { | ||
23 | __u64 high; | ||
24 | __u64 low; | ||
25 | __u64 ptr[]; | ||
26 | }; | ||
27 | |||
28 | #define KEY_FIELD(name, field, offset, size) \ | ||
29 | BITMASK(name, struct bkey, field, offset, size) | ||
30 | |||
31 | #define PTR_FIELD(name, offset, size) \ | ||
32 | static inline __u64 name(const struct bkey *k, unsigned i) \ | ||
33 | { return (k->ptr[i] >> offset) & ~(~0ULL << size); } \ | ||
34 | \ | ||
35 | static inline void SET_##name(struct bkey *k, unsigned i, __u64 v) \ | ||
36 | { \ | ||
37 | k->ptr[i] &= ~(~(~0ULL << size) << offset); \ | ||
38 | k->ptr[i] |= (v & ~(~0ULL << size)) << offset; \ | ||
39 | } | ||
40 | |||
41 | #define KEY_SIZE_BITS 16 | ||
42 | |||
43 | KEY_FIELD(KEY_PTRS, high, 60, 3) | ||
44 | KEY_FIELD(HEADER_SIZE, high, 58, 2) | ||
45 | KEY_FIELD(KEY_CSUM, high, 56, 2) | ||
46 | KEY_FIELD(KEY_PINNED, high, 55, 1) | ||
47 | KEY_FIELD(KEY_DIRTY, high, 36, 1) | ||
48 | |||
49 | KEY_FIELD(KEY_SIZE, high, 20, KEY_SIZE_BITS) | ||
50 | KEY_FIELD(KEY_INODE, high, 0, 20) | ||
51 | |||
52 | /* Next time I change the on disk format, KEY_OFFSET() won't be 64 bits */ | ||
53 | |||
54 | static inline __u64 KEY_OFFSET(const struct bkey *k) | ||
55 | { | ||
56 | return k->low; | ||
57 | } | ||
58 | |||
59 | static inline void SET_KEY_OFFSET(struct bkey *k, __u64 v) | ||
60 | { | ||
61 | k->low = v; | ||
62 | } | ||
63 | |||
64 | /* | ||
65 | * The high bit being set is a relic from when we used it to do binary | ||
66 | * searches - it told you where a key started. It's not used anymore, | ||
67 | * and can probably be safely dropped. | ||
68 | */ | ||
69 | #define KEY(inode, offset, size) \ | ||
70 | ((struct bkey) { \ | ||
71 | .high = (1ULL << 63) | ((__u64) (size) << 20) | (inode), \ | ||
72 | .low = (offset) \ | ||
73 | }) | ||
74 | |||
75 | #define ZERO_KEY KEY(0, 0, 0) | ||
76 | |||
77 | #define MAX_KEY_INODE (~(~0 << 20)) | ||
78 | #define MAX_KEY_OFFSET (~0ULL >> 1) | ||
79 | #define MAX_KEY KEY(MAX_KEY_INODE, MAX_KEY_OFFSET, 0) | ||
80 | |||
81 | #define KEY_START(k) (KEY_OFFSET(k) - KEY_SIZE(k)) | ||
82 | #define START_KEY(k) KEY(KEY_INODE(k), KEY_START(k), 0) | ||
83 | |||
84 | #define PTR_DEV_BITS 12 | ||
85 | |||
86 | PTR_FIELD(PTR_DEV, 51, PTR_DEV_BITS) | ||
87 | PTR_FIELD(PTR_OFFSET, 8, 43) | ||
88 | PTR_FIELD(PTR_GEN, 0, 8) | ||
89 | |||
90 | #define PTR_CHECK_DEV ((1 << PTR_DEV_BITS) - 1) | ||
91 | |||
92 | #define PTR(gen, offset, dev) \ | ||
93 | ((((__u64) dev) << 51) | ((__u64) offset) << 8 | gen) | ||
94 | |||
95 | /* Bkey utility code */ | ||
96 | |||
97 | static inline unsigned long bkey_u64s(const struct bkey *k) | ||
98 | { | ||
99 | return (sizeof(struct bkey) / sizeof(__u64)) + KEY_PTRS(k); | ||
100 | } | ||
101 | |||
102 | static inline unsigned long bkey_bytes(const struct bkey *k) | ||
103 | { | ||
104 | return bkey_u64s(k) * sizeof(__u64); | ||
105 | } | ||
106 | |||
107 | #define bkey_copy(_dest, _src) memcpy(_dest, _src, bkey_bytes(_src)) | ||
108 | |||
109 | static inline void bkey_copy_key(struct bkey *dest, const struct bkey *src) | ||
110 | { | ||
111 | SET_KEY_INODE(dest, KEY_INODE(src)); | ||
112 | SET_KEY_OFFSET(dest, KEY_OFFSET(src)); | ||
113 | } | ||
114 | |||
115 | static inline struct bkey *bkey_next(const struct bkey *k) | ||
116 | { | ||
117 | __u64 *d = (void *) k; | ||
118 | return (struct bkey *) (d + bkey_u64s(k)); | ||
119 | } | ||
120 | |||
121 | static inline struct bkey *bkey_last(const struct bkey *k, unsigned nr_keys) | ||
122 | { | ||
123 | __u64 *d = (void *) k; | ||
124 | return (struct bkey *) (d + nr_keys); | ||
125 | } | ||
126 | /* Enough for a key with 6 pointers */ | ||
127 | #define BKEY_PAD 8 | ||
128 | |||
129 | #define BKEY_PADDED(key) \ | ||
130 | union { struct bkey key; __u64 key ## _pad[BKEY_PAD]; } | ||
131 | |||
132 | /* Superblock */ | ||
133 | |||
134 | /* Version 0: Cache device | ||
135 | * Version 1: Backing device | ||
136 | * Version 2: Seed pointer into btree node checksum | ||
137 | * Version 3: Cache device with new UUID format | ||
138 | * Version 4: Backing device with data offset | ||
139 | */ | ||
140 | #define BCACHE_SB_VERSION_CDEV 0 | ||
141 | #define BCACHE_SB_VERSION_BDEV 1 | ||
142 | #define BCACHE_SB_VERSION_CDEV_WITH_UUID 3 | ||
143 | #define BCACHE_SB_VERSION_BDEV_WITH_OFFSET 4 | ||
144 | #define BCACHE_SB_MAX_VERSION 4 | ||
145 | |||
146 | #define SB_SECTOR 8 | ||
147 | #define SB_SIZE 4096 | ||
148 | #define SB_LABEL_SIZE 32 | ||
149 | #define SB_JOURNAL_BUCKETS 256U | ||
150 | /* SB_JOURNAL_BUCKETS must be divisible by BITS_PER_LONG */ | ||
151 | #define MAX_CACHES_PER_SET 8 | ||
152 | |||
153 | #define BDEV_DATA_START_DEFAULT 16 /* sectors */ | ||
154 | |||
155 | struct cache_sb { | ||
156 | __u64 csum; | ||
157 | __u64 offset; /* sector where this sb was written */ | ||
158 | __u64 version; | ||
159 | |||
160 | __u8 magic[16]; | ||
161 | |||
162 | __u8 uuid[16]; | ||
163 | union { | ||
164 | __u8 set_uuid[16]; | ||
165 | __u64 set_magic; | ||
166 | }; | ||
167 | __u8 label[SB_LABEL_SIZE]; | ||
168 | |||
169 | __u64 flags; | ||
170 | __u64 seq; | ||
171 | __u64 pad[8]; | ||
172 | |||
173 | union { | ||
174 | struct { | ||
175 | /* Cache devices */ | ||
176 | __u64 nbuckets; /* device size */ | ||
177 | |||
178 | __u16 block_size; /* sectors */ | ||
179 | __u16 bucket_size; /* sectors */ | ||
180 | |||
181 | __u16 nr_in_set; | ||
182 | __u16 nr_this_dev; | ||
183 | }; | ||
184 | struct { | ||
185 | /* Backing devices */ | ||
186 | __u64 data_offset; | ||
187 | |||
188 | /* | ||
189 | * block_size from the cache device section is still used by | ||
190 | * backing devices, so don't add anything here until we fix | ||
191 | * things to not need it for backing devices anymore | ||
192 | */ | ||
193 | }; | ||
194 | }; | ||
195 | |||
196 | __u32 last_mount; /* time_t */ | ||
197 | |||
198 | __u16 first_bucket; | ||
199 | union { | ||
200 | __u16 njournal_buckets; | ||
201 | __u16 keys; | ||
202 | }; | ||
203 | __u64 d[SB_JOURNAL_BUCKETS]; /* journal buckets */ | ||
204 | }; | ||
205 | |||
206 | static inline _Bool SB_IS_BDEV(const struct cache_sb *sb) | ||
207 | { | ||
208 | return sb->version == BCACHE_SB_VERSION_BDEV | ||
209 | || sb->version == BCACHE_SB_VERSION_BDEV_WITH_OFFSET; | ||
210 | } | ||
211 | |||
212 | BITMASK(CACHE_SYNC, struct cache_sb, flags, 0, 1); | ||
213 | BITMASK(CACHE_DISCARD, struct cache_sb, flags, 1, 1); | ||
214 | BITMASK(CACHE_REPLACEMENT, struct cache_sb, flags, 2, 3); | ||
215 | #define CACHE_REPLACEMENT_LRU 0U | ||
216 | #define CACHE_REPLACEMENT_FIFO 1U | ||
217 | #define CACHE_REPLACEMENT_RANDOM 2U | ||
218 | |||
219 | BITMASK(BDEV_CACHE_MODE, struct cache_sb, flags, 0, 4); | ||
220 | #define CACHE_MODE_WRITETHROUGH 0U | ||
221 | #define CACHE_MODE_WRITEBACK 1U | ||
222 | #define CACHE_MODE_WRITEAROUND 2U | ||
223 | #define CACHE_MODE_NONE 3U | ||
224 | BITMASK(BDEV_STATE, struct cache_sb, flags, 61, 2); | ||
225 | #define BDEV_STATE_NONE 0U | ||
226 | #define BDEV_STATE_CLEAN 1U | ||
227 | #define BDEV_STATE_DIRTY 2U | ||
228 | #define BDEV_STATE_STALE 3U | ||
229 | |||
230 | /* | ||
231 | * Magic numbers | ||
232 | * | ||
233 | * The various other data structures have their own magic numbers, which are | ||
234 | * xored with the first part of the cache set's UUID | ||
235 | */ | ||
236 | |||
237 | #define JSET_MAGIC 0x245235c1a3625032ULL | ||
238 | #define PSET_MAGIC 0x6750e15f87337f91ULL | ||
239 | #define BSET_MAGIC 0x90135c78b99e07f5ULL | ||
240 | |||
241 | static inline __u64 jset_magic(struct cache_sb *sb) | ||
242 | { | ||
243 | return sb->set_magic ^ JSET_MAGIC; | ||
244 | } | ||
245 | |||
246 | static inline __u64 pset_magic(struct cache_sb *sb) | ||
247 | { | ||
248 | return sb->set_magic ^ PSET_MAGIC; | ||
249 | } | ||
250 | |||
251 | static inline __u64 bset_magic(struct cache_sb *sb) | ||
252 | { | ||
253 | return sb->set_magic ^ BSET_MAGIC; | ||
254 | } | ||
255 | |||
256 | /* | ||
257 | * Journal | ||
258 | * | ||
259 | * On disk format for a journal entry: | ||
260 | * seq is monotonically increasing; every journal entry has its own unique | ||
261 | * sequence number. | ||
262 | * | ||
263 | * last_seq is the oldest journal entry that still has keys the btree hasn't | ||
264 | * flushed to disk yet. | ||
265 | * | ||
266 | * version is for on disk format changes. | ||
267 | */ | ||
268 | |||
269 | #define BCACHE_JSET_VERSION_UUIDv1 1 | ||
270 | #define BCACHE_JSET_VERSION_UUID 1 /* Always latest UUID format */ | ||
271 | #define BCACHE_JSET_VERSION 1 | ||
272 | |||
273 | struct jset { | ||
274 | __u64 csum; | ||
275 | __u64 magic; | ||
276 | __u64 seq; | ||
277 | __u32 version; | ||
278 | __u32 keys; | ||
279 | |||
280 | __u64 last_seq; | ||
281 | |||
282 | BKEY_PADDED(uuid_bucket); | ||
283 | BKEY_PADDED(btree_root); | ||
284 | __u16 btree_level; | ||
285 | __u16 pad[3]; | ||
286 | |||
287 | __u64 prio_bucket[MAX_CACHES_PER_SET]; | ||
288 | |||
289 | union { | ||
290 | struct bkey start[0]; | ||
291 | __u64 d[0]; | ||
292 | }; | ||
293 | }; | ||
294 | |||
295 | /* Bucket prios/gens */ | ||
296 | |||
297 | struct prio_set { | ||
298 | __u64 csum; | ||
299 | __u64 magic; | ||
300 | __u64 seq; | ||
301 | __u32 version; | ||
302 | __u32 pad; | ||
303 | |||
304 | __u64 next_bucket; | ||
305 | |||
306 | struct bucket_disk { | ||
307 | __u16 prio; | ||
308 | __u8 gen; | ||
309 | } __attribute((packed)) data[]; | ||
310 | }; | ||
311 | |||
312 | /* UUIDS - per backing device/flash only volume metadata */ | ||
313 | |||
314 | struct uuid_entry { | ||
315 | union { | ||
316 | struct { | ||
317 | __u8 uuid[16]; | ||
318 | __u8 label[32]; | ||
319 | __u32 first_reg; | ||
320 | __u32 last_reg; | ||
321 | __u32 invalidated; | ||
322 | |||
323 | __u32 flags; | ||
324 | /* Size of flash only volumes */ | ||
325 | __u64 sectors; | ||
326 | }; | ||
327 | |||
328 | __u8 pad[128]; | ||
329 | }; | ||
330 | }; | ||
331 | |||
332 | BITMASK(UUID_FLASH_ONLY, struct uuid_entry, flags, 0, 1); | ||
333 | |||
334 | /* Btree nodes */ | ||
335 | |||
336 | /* Version 1: Seed pointer into btree node checksum | ||
337 | */ | ||
338 | #define BCACHE_BSET_CSUM 1 | ||
339 | #define BCACHE_BSET_VERSION 1 | ||
340 | |||
341 | /* | ||
342 | * Btree nodes | ||
343 | * | ||
344 | * On disk a btree node is a list/log of these; within each set the keys are | ||
345 | * sorted | ||
346 | */ | ||
347 | struct bset { | ||
348 | __u64 csum; | ||
349 | __u64 magic; | ||
350 | __u64 seq; | ||
351 | __u32 version; | ||
352 | __u32 keys; | ||
353 | |||
354 | union { | ||
355 | struct bkey start[0]; | ||
356 | __u64 d[0]; | ||
357 | }; | ||
358 | }; | ||
359 | |||
360 | /* OBSOLETE */ | ||
361 | |||
362 | /* UUIDS - per backing device/flash only volume metadata */ | ||
363 | |||
364 | struct uuid_entry_v0 { | ||
365 | __u8 uuid[16]; | ||
366 | __u8 label[32]; | ||
367 | __u32 first_reg; | ||
368 | __u32 last_reg; | ||
369 | __u32 invalidated; | ||
370 | __u32 pad; | ||
371 | }; | ||
372 | |||
373 | #endif /* _LINUX_BCACHE_H */ | ||