diff options
author | Jens Axboe <axboe@kernel.dk> | 2013-05-01 03:23:05 -0400 |
---|---|---|
committer | Jens Axboe <axboe@kernel.dk> | 2013-05-01 03:23:05 -0400 |
commit | f50efd2fdbd9b35b11f5778ed85beb764184bda9 (patch) | |
tree | ed92b40995d60136fb387d210886e4aae2c37231 | |
parent | 0821e904057505c7e25d72e1a282105d023b26c9 (diff) | |
parent | ee66850642efda91d04179cae2414310675a1f73 (diff) |
Merge branch 'bcache-for-upstream' of http://evilpiepirate.org/git/linux-bcache into for-3.10/drivers
Kent writes:
Hey Jens, this is everything I've got ready for 3.10 - there's _still_
one more bug I'm trying to track down.
Andrew - I've got patches that rip out the pkey() and pbtree() macros,
but they're somewhat tied up with some other nontrivial refactorings so
I think I'm going to wait a bit on those.
-rw-r--r-- | drivers/md/bcache/alloc.c | 72 | ||||
-rw-r--r-- | drivers/md/bcache/bcache.h | 47 | ||||
-rw-r--r-- | drivers/md/bcache/btree.c | 3 | ||||
-rw-r--r-- | drivers/md/bcache/io.c | 35 | ||||
-rw-r--r-- | drivers/md/bcache/request.c | 2 | ||||
-rw-r--r-- | drivers/md/bcache/super.c | 166 |
6 files changed, 213 insertions, 112 deletions
diff --git a/drivers/md/bcache/alloc.c b/drivers/md/bcache/alloc.c index 2879487d036a..048f2947e08b 100644 --- a/drivers/md/bcache/alloc.c +++ b/drivers/md/bcache/alloc.c | |||
@@ -243,31 +243,37 @@ static void invalidate_buckets_lru(struct cache *ca) | |||
243 | ca->heap.used = 0; | 243 | ca->heap.used = 0; |
244 | 244 | ||
245 | for_each_bucket(b, ca) { | 245 | for_each_bucket(b, ca) { |
246 | /* | ||
247 | * If we fill up the unused list, if we then return before | ||
248 | * adding anything to the free_inc list we'll skip writing | ||
249 | * prios/gens and just go back to allocating from the unused | ||
250 | * list: | ||
251 | */ | ||
252 | if (fifo_full(&ca->unused)) | ||
253 | return; | ||
254 | |||
246 | if (!can_invalidate_bucket(ca, b)) | 255 | if (!can_invalidate_bucket(ca, b)) |
247 | continue; | 256 | continue; |
248 | 257 | ||
249 | if (!GC_SECTORS_USED(b)) { | 258 | if (!GC_SECTORS_USED(b) && |
250 | if (!bch_bucket_add_unused(ca, b)) | 259 | bch_bucket_add_unused(ca, b)) |
251 | return; | 260 | continue; |
252 | } else { | 261 | |
253 | if (!heap_full(&ca->heap)) | 262 | if (!heap_full(&ca->heap)) |
254 | heap_add(&ca->heap, b, bucket_max_cmp); | 263 | heap_add(&ca->heap, b, bucket_max_cmp); |
255 | else if (bucket_max_cmp(b, heap_peek(&ca->heap))) { | 264 | else if (bucket_max_cmp(b, heap_peek(&ca->heap))) { |
256 | ca->heap.data[0] = b; | 265 | ca->heap.data[0] = b; |
257 | heap_sift(&ca->heap, 0, bucket_max_cmp); | 266 | heap_sift(&ca->heap, 0, bucket_max_cmp); |
258 | } | ||
259 | } | 267 | } |
260 | } | 268 | } |
261 | 269 | ||
262 | if (ca->heap.used * 2 < ca->heap.size) | ||
263 | bch_queue_gc(ca->set); | ||
264 | |||
265 | for (i = ca->heap.used / 2 - 1; i >= 0; --i) | 270 | for (i = ca->heap.used / 2 - 1; i >= 0; --i) |
266 | heap_sift(&ca->heap, i, bucket_min_cmp); | 271 | heap_sift(&ca->heap, i, bucket_min_cmp); |
267 | 272 | ||
268 | while (!fifo_full(&ca->free_inc)) { | 273 | while (!fifo_full(&ca->free_inc)) { |
269 | if (!heap_pop(&ca->heap, b, bucket_min_cmp)) { | 274 | if (!heap_pop(&ca->heap, b, bucket_min_cmp)) { |
270 | /* We don't want to be calling invalidate_buckets() | 275 | /* |
276 | * We don't want to be calling invalidate_buckets() | ||
271 | * multiple times when it can't do anything | 277 | * multiple times when it can't do anything |
272 | */ | 278 | */ |
273 | ca->invalidate_needs_gc = 1; | 279 | ca->invalidate_needs_gc = 1; |
@@ -343,15 +349,22 @@ static void invalidate_buckets(struct cache *ca) | |||
343 | invalidate_buckets_random(ca); | 349 | invalidate_buckets_random(ca); |
344 | break; | 350 | break; |
345 | } | 351 | } |
352 | |||
353 | pr_debug("free %zu/%zu free_inc %zu/%zu unused %zu/%zu", | ||
354 | fifo_used(&ca->free), ca->free.size, | ||
355 | fifo_used(&ca->free_inc), ca->free_inc.size, | ||
356 | fifo_used(&ca->unused), ca->unused.size); | ||
346 | } | 357 | } |
347 | 358 | ||
348 | #define allocator_wait(ca, cond) \ | 359 | #define allocator_wait(ca, cond) \ |
349 | do { \ | 360 | do { \ |
350 | DEFINE_WAIT(__wait); \ | 361 | DEFINE_WAIT(__wait); \ |
351 | \ | 362 | \ |
352 | while (!(cond)) { \ | 363 | while (1) { \ |
353 | prepare_to_wait(&ca->set->alloc_wait, \ | 364 | prepare_to_wait(&ca->set->alloc_wait, \ |
354 | &__wait, TASK_INTERRUPTIBLE); \ | 365 | &__wait, TASK_INTERRUPTIBLE); \ |
366 | if (cond) \ | ||
367 | break; \ | ||
355 | \ | 368 | \ |
356 | mutex_unlock(&(ca)->set->bucket_lock); \ | 369 | mutex_unlock(&(ca)->set->bucket_lock); \ |
357 | if (test_bit(CACHE_SET_STOPPING_2, &ca->set->flags)) { \ | 370 | if (test_bit(CACHE_SET_STOPPING_2, &ca->set->flags)) { \ |
@@ -360,7 +373,6 @@ do { \ | |||
360 | } \ | 373 | } \ |
361 | \ | 374 | \ |
362 | schedule(); \ | 375 | schedule(); \ |
363 | __set_current_state(TASK_RUNNING); \ | ||
364 | mutex_lock(&(ca)->set->bucket_lock); \ | 376 | mutex_lock(&(ca)->set->bucket_lock); \ |
365 | } \ | 377 | } \ |
366 | \ | 378 | \ |
@@ -374,6 +386,11 @@ void bch_allocator_thread(struct closure *cl) | |||
374 | mutex_lock(&ca->set->bucket_lock); | 386 | mutex_lock(&ca->set->bucket_lock); |
375 | 387 | ||
376 | while (1) { | 388 | while (1) { |
389 | /* | ||
390 | * First, we pull buckets off of the unused and free_inc lists, | ||
391 | * possibly issue discards to them, then we add the bucket to | ||
392 | * the free list: | ||
393 | */ | ||
377 | while (1) { | 394 | while (1) { |
378 | long bucket; | 395 | long bucket; |
379 | 396 | ||
@@ -398,17 +415,26 @@ void bch_allocator_thread(struct closure *cl) | |||
398 | } | 415 | } |
399 | } | 416 | } |
400 | 417 | ||
401 | allocator_wait(ca, ca->set->gc_mark_valid); | 418 | /* |
402 | invalidate_buckets(ca); | 419 | * We've run out of free buckets, we need to find some buckets |
420 | * we can invalidate. First, invalidate them in memory and add | ||
421 | * them to the free_inc list: | ||
422 | */ | ||
403 | 423 | ||
404 | allocator_wait(ca, !atomic_read(&ca->set->prio_blocked) || | 424 | allocator_wait(ca, ca->set->gc_mark_valid && |
405 | !CACHE_SYNC(&ca->set->sb)); | 425 | (ca->need_save_prio > 64 || |
426 | !ca->invalidate_needs_gc)); | ||
427 | invalidate_buckets(ca); | ||
406 | 428 | ||
429 | /* | ||
430 | * Now, we write their new gens to disk so we can start writing | ||
431 | * new stuff to them: | ||
432 | */ | ||
433 | allocator_wait(ca, !atomic_read(&ca->set->prio_blocked)); | ||
407 | if (CACHE_SYNC(&ca->set->sb) && | 434 | if (CACHE_SYNC(&ca->set->sb) && |
408 | (!fifo_empty(&ca->free_inc) || | 435 | (!fifo_empty(&ca->free_inc) || |
409 | ca->need_save_prio > 64)) { | 436 | ca->need_save_prio > 64)) |
410 | bch_prio_write(ca); | 437 | bch_prio_write(ca); |
411 | } | ||
412 | } | 438 | } |
413 | } | 439 | } |
414 | 440 | ||
@@ -475,7 +501,7 @@ void bch_bucket_free(struct cache_set *c, struct bkey *k) | |||
475 | for (i = 0; i < KEY_PTRS(k); i++) { | 501 | for (i = 0; i < KEY_PTRS(k); i++) { |
476 | struct bucket *b = PTR_BUCKET(c, k, i); | 502 | struct bucket *b = PTR_BUCKET(c, k, i); |
477 | 503 | ||
478 | SET_GC_MARK(b, 0); | 504 | SET_GC_MARK(b, GC_MARK_RECLAIMABLE); |
479 | SET_GC_SECTORS_USED(b, 0); | 505 | SET_GC_SECTORS_USED(b, 0); |
480 | bch_bucket_add_unused(PTR_CACHE(c, k, i), b); | 506 | bch_bucket_add_unused(PTR_CACHE(c, k, i), b); |
481 | } | 507 | } |
diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h index f05723565f17..340146d7c17f 100644 --- a/drivers/md/bcache/bcache.h +++ b/drivers/md/bcache/bcache.h | |||
@@ -223,11 +223,17 @@ struct bkey { | |||
223 | #define BKEY_PADDED(key) \ | 223 | #define BKEY_PADDED(key) \ |
224 | union { struct bkey key; uint64_t key ## _pad[BKEY_PAD]; } | 224 | union { struct bkey key; uint64_t key ## _pad[BKEY_PAD]; } |
225 | 225 | ||
226 | /* Version 1: Backing device | 226 | /* Version 0: Cache device |
227 | * Version 1: Backing device | ||
227 | * Version 2: Seed pointer into btree node checksum | 228 | * Version 2: Seed pointer into btree node checksum |
228 | * Version 3: New UUID format | 229 | * Version 3: Cache device with new UUID format |
230 | * Version 4: Backing device with data offset | ||
229 | */ | 231 | */ |
230 | #define BCACHE_SB_VERSION 3 | 232 | #define BCACHE_SB_VERSION_CDEV 0 |
233 | #define BCACHE_SB_VERSION_BDEV 1 | ||
234 | #define BCACHE_SB_VERSION_CDEV_WITH_UUID 3 | ||
235 | #define BCACHE_SB_VERSION_BDEV_WITH_OFFSET 4 | ||
236 | #define BCACHE_SB_MAX_VERSION 4 | ||
231 | 237 | ||
232 | #define SB_SECTOR 8 | 238 | #define SB_SECTOR 8 |
233 | #define SB_SIZE 4096 | 239 | #define SB_SIZE 4096 |
@@ -236,13 +242,12 @@ struct bkey { | |||
236 | /* SB_JOURNAL_BUCKETS must be divisible by BITS_PER_LONG */ | 242 | /* SB_JOURNAL_BUCKETS must be divisible by BITS_PER_LONG */ |
237 | #define MAX_CACHES_PER_SET 8 | 243 | #define MAX_CACHES_PER_SET 8 |
238 | 244 | ||
239 | #define BDEV_DATA_START 16 /* sectors */ | 245 | #define BDEV_DATA_START_DEFAULT 16 /* sectors */ |
240 | 246 | ||
241 | struct cache_sb { | 247 | struct cache_sb { |
242 | uint64_t csum; | 248 | uint64_t csum; |
243 | uint64_t offset; /* sector where this sb was written */ | 249 | uint64_t offset; /* sector where this sb was written */ |
244 | uint64_t version; | 250 | uint64_t version; |
245 | #define CACHE_BACKING_DEV 1 | ||
246 | 251 | ||
247 | uint8_t magic[16]; | 252 | uint8_t magic[16]; |
248 | 253 | ||
@@ -257,12 +262,28 @@ struct cache_sb { | |||
257 | uint64_t seq; | 262 | uint64_t seq; |
258 | uint64_t pad[8]; | 263 | uint64_t pad[8]; |
259 | 264 | ||
260 | uint64_t nbuckets; /* device size */ | 265 | union { |
261 | uint16_t block_size; /* sectors */ | 266 | struct { |
262 | uint16_t bucket_size; /* sectors */ | 267 | /* Cache devices */ |
268 | uint64_t nbuckets; /* device size */ | ||
269 | |||
270 | uint16_t block_size; /* sectors */ | ||
271 | uint16_t bucket_size; /* sectors */ | ||
263 | 272 | ||
264 | uint16_t nr_in_set; | 273 | uint16_t nr_in_set; |
265 | uint16_t nr_this_dev; | 274 | uint16_t nr_this_dev; |
275 | }; | ||
276 | struct { | ||
277 | /* Backing devices */ | ||
278 | uint64_t data_offset; | ||
279 | |||
280 | /* | ||
281 | * block_size from the cache device section is still used by | ||
282 | * backing devices, so don't add anything here until we fix | ||
283 | * things to not need it for backing devices anymore | ||
284 | */ | ||
285 | }; | ||
286 | }; | ||
266 | 287 | ||
267 | uint32_t last_mount; /* time_t */ | 288 | uint32_t last_mount; /* time_t */ |
268 | 289 | ||
@@ -861,6 +882,12 @@ static inline bool key_merging_disabled(struct cache_set *c) | |||
861 | #endif | 882 | #endif |
862 | } | 883 | } |
863 | 884 | ||
885 | static inline bool SB_IS_BDEV(const struct cache_sb *sb) | ||
886 | { | ||
887 | return sb->version == BCACHE_SB_VERSION_BDEV | ||
888 | || sb->version == BCACHE_SB_VERSION_BDEV_WITH_OFFSET; | ||
889 | } | ||
890 | |||
864 | struct bbio { | 891 | struct bbio { |
865 | unsigned submit_time_us; | 892 | unsigned submit_time_us; |
866 | union { | 893 | union { |
diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c index 852340793777..7a5658f04e62 100644 --- a/drivers/md/bcache/btree.c +++ b/drivers/md/bcache/btree.c | |||
@@ -984,7 +984,7 @@ static void btree_node_free(struct btree *b, struct btree_op *op) | |||
984 | 984 | ||
985 | if (b->prio_blocked && | 985 | if (b->prio_blocked && |
986 | !atomic_sub_return(b->prio_blocked, &b->c->prio_blocked)) | 986 | !atomic_sub_return(b->prio_blocked, &b->c->prio_blocked)) |
987 | closure_wake_up(&b->c->bucket_wait); | 987 | wake_up(&b->c->alloc_wait); |
988 | 988 | ||
989 | b->prio_blocked = 0; | 989 | b->prio_blocked = 0; |
990 | 990 | ||
@@ -1548,7 +1548,6 @@ static void bch_btree_gc(struct closure *cl) | |||
1548 | 1548 | ||
1549 | trace_bcache_gc_end(c->sb.set_uuid); | 1549 | trace_bcache_gc_end(c->sb.set_uuid); |
1550 | wake_up(&c->alloc_wait); | 1550 | wake_up(&c->alloc_wait); |
1551 | closure_wake_up(&c->bucket_wait); | ||
1552 | 1551 | ||
1553 | continue_at(cl, bch_moving_gc, bch_gc_wq); | 1552 | continue_at(cl, bch_moving_gc, bch_gc_wq); |
1554 | } | 1553 | } |
diff --git a/drivers/md/bcache/io.c b/drivers/md/bcache/io.c index f565512f6fac..48efd4dea645 100644 --- a/drivers/md/bcache/io.c +++ b/drivers/md/bcache/io.c | |||
@@ -38,6 +38,15 @@ static void bch_generic_make_request_hack(struct bio *bio) | |||
38 | bio = clone; | 38 | bio = clone; |
39 | } | 39 | } |
40 | 40 | ||
41 | /* | ||
42 | * Hack, since drivers that clone bios clone up to bi_max_vecs, but our | ||
43 | * bios might have had more than that (before we split them per device | ||
44 | * limitations). | ||
45 | * | ||
46 | * To be taken out once immutable bvec stuff is in. | ||
47 | */ | ||
48 | bio->bi_max_vecs = bio->bi_vcnt; | ||
49 | |||
41 | generic_make_request(bio); | 50 | generic_make_request(bio); |
42 | } | 51 | } |
43 | 52 | ||
@@ -149,34 +158,32 @@ static unsigned bch_bio_max_sectors(struct bio *bio) | |||
149 | { | 158 | { |
150 | unsigned ret = bio_sectors(bio); | 159 | unsigned ret = bio_sectors(bio); |
151 | struct request_queue *q = bdev_get_queue(bio->bi_bdev); | 160 | struct request_queue *q = bdev_get_queue(bio->bi_bdev); |
161 | unsigned max_segments = min_t(unsigned, BIO_MAX_PAGES, | ||
162 | queue_max_segments(q)); | ||
152 | struct bio_vec *bv, *end = bio_iovec(bio) + | 163 | struct bio_vec *bv, *end = bio_iovec(bio) + |
153 | min_t(int, bio_segments(bio), queue_max_segments(q)); | 164 | min_t(int, bio_segments(bio), max_segments); |
154 | |||
155 | struct bvec_merge_data bvm = { | ||
156 | .bi_bdev = bio->bi_bdev, | ||
157 | .bi_sector = bio->bi_sector, | ||
158 | .bi_size = 0, | ||
159 | .bi_rw = bio->bi_rw, | ||
160 | }; | ||
161 | 165 | ||
162 | if (bio->bi_rw & REQ_DISCARD) | 166 | if (bio->bi_rw & REQ_DISCARD) |
163 | return min(ret, q->limits.max_discard_sectors); | 167 | return min(ret, q->limits.max_discard_sectors); |
164 | 168 | ||
165 | if (bio_segments(bio) > queue_max_segments(q) || | 169 | if (bio_segments(bio) > max_segments || |
166 | q->merge_bvec_fn) { | 170 | q->merge_bvec_fn) { |
167 | ret = 0; | 171 | ret = 0; |
168 | 172 | ||
169 | for (bv = bio_iovec(bio); bv < end; bv++) { | 173 | for (bv = bio_iovec(bio); bv < end; bv++) { |
174 | struct bvec_merge_data bvm = { | ||
175 | .bi_bdev = bio->bi_bdev, | ||
176 | .bi_sector = bio->bi_sector, | ||
177 | .bi_size = ret << 9, | ||
178 | .bi_rw = bio->bi_rw, | ||
179 | }; | ||
180 | |||
170 | if (q->merge_bvec_fn && | 181 | if (q->merge_bvec_fn && |
171 | q->merge_bvec_fn(q, &bvm, bv) < (int) bv->bv_len) | 182 | q->merge_bvec_fn(q, &bvm, bv) < (int) bv->bv_len) |
172 | break; | 183 | break; |
173 | 184 | ||
174 | ret += bv->bv_len >> 9; | 185 | ret += bv->bv_len >> 9; |
175 | bvm.bi_size += bv->bv_len; | ||
176 | } | 186 | } |
177 | |||
178 | if (ret >= (BIO_MAX_PAGES * PAGE_SIZE) >> 9) | ||
179 | return (BIO_MAX_PAGES * PAGE_SIZE) >> 9; | ||
180 | } | 187 | } |
181 | 188 | ||
182 | ret = min(ret, queue_max_sectors(q)); | 189 | ret = min(ret, queue_max_sectors(q)); |
diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c index 83731dc36f34..e5ff12e52d5b 100644 --- a/drivers/md/bcache/request.c +++ b/drivers/md/bcache/request.c | |||
@@ -1220,7 +1220,7 @@ static void cached_dev_make_request(struct request_queue *q, struct bio *bio) | |||
1220 | part_stat_unlock(); | 1220 | part_stat_unlock(); |
1221 | 1221 | ||
1222 | bio->bi_bdev = dc->bdev; | 1222 | bio->bi_bdev = dc->bdev; |
1223 | bio->bi_sector += BDEV_DATA_START; | 1223 | bio->bi_sector += dc->sb.data_offset; |
1224 | 1224 | ||
1225 | if (cached_dev_get(dc)) { | 1225 | if (cached_dev_get(dc)) { |
1226 | s = search_alloc(bio, d); | 1226 | s = search_alloc(bio, d); |
diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c index 5fa3cd2d9ff0..c8046bc4aa57 100644 --- a/drivers/md/bcache/super.c +++ b/drivers/md/bcache/super.c | |||
@@ -110,15 +110,7 @@ static const char *read_super(struct cache_sb *sb, struct block_device *bdev, | |||
110 | 110 | ||
111 | sb->flags = le64_to_cpu(s->flags); | 111 | sb->flags = le64_to_cpu(s->flags); |
112 | sb->seq = le64_to_cpu(s->seq); | 112 | sb->seq = le64_to_cpu(s->seq); |
113 | |||
114 | sb->nbuckets = le64_to_cpu(s->nbuckets); | ||
115 | sb->block_size = le16_to_cpu(s->block_size); | ||
116 | sb->bucket_size = le16_to_cpu(s->bucket_size); | ||
117 | |||
118 | sb->nr_in_set = le16_to_cpu(s->nr_in_set); | ||
119 | sb->nr_this_dev = le16_to_cpu(s->nr_this_dev); | ||
120 | sb->last_mount = le32_to_cpu(s->last_mount); | 113 | sb->last_mount = le32_to_cpu(s->last_mount); |
121 | |||
122 | sb->first_bucket = le16_to_cpu(s->first_bucket); | 114 | sb->first_bucket = le16_to_cpu(s->first_bucket); |
123 | sb->keys = le16_to_cpu(s->keys); | 115 | sb->keys = le16_to_cpu(s->keys); |
124 | 116 | ||
@@ -147,53 +139,81 @@ static const char *read_super(struct cache_sb *sb, struct block_device *bdev, | |||
147 | if (bch_is_zero(sb->uuid, 16)) | 139 | if (bch_is_zero(sb->uuid, 16)) |
148 | goto err; | 140 | goto err; |
149 | 141 | ||
150 | err = "Unsupported superblock version"; | 142 | sb->block_size = le16_to_cpu(s->block_size); |
151 | if (sb->version > BCACHE_SB_VERSION) | ||
152 | goto err; | ||
153 | 143 | ||
154 | err = "Bad block/bucket size"; | 144 | err = "Superblock block size smaller than device block size"; |
155 | if (!is_power_of_2(sb->block_size) || sb->block_size > PAGE_SECTORS || | 145 | if (sb->block_size << 9 < bdev_logical_block_size(bdev)) |
156 | !is_power_of_2(sb->bucket_size) || sb->bucket_size < PAGE_SECTORS) | ||
157 | goto err; | 146 | goto err; |
158 | 147 | ||
159 | err = "Too many buckets"; | 148 | switch (sb->version) { |
160 | if (sb->nbuckets > LONG_MAX) | 149 | case BCACHE_SB_VERSION_BDEV: |
161 | goto err; | 150 | sb->data_offset = BDEV_DATA_START_DEFAULT; |
151 | break; | ||
152 | case BCACHE_SB_VERSION_BDEV_WITH_OFFSET: | ||
153 | sb->data_offset = le64_to_cpu(s->data_offset); | ||
162 | 154 | ||
163 | err = "Not enough buckets"; | 155 | err = "Bad data offset"; |
164 | if (sb->nbuckets < 1 << 7) | 156 | if (sb->data_offset < BDEV_DATA_START_DEFAULT) |
165 | goto err; | 157 | goto err; |
166 | 158 | ||
167 | err = "Invalid superblock: device too small"; | 159 | break; |
168 | if (get_capacity(bdev->bd_disk) < sb->bucket_size * sb->nbuckets) | 160 | case BCACHE_SB_VERSION_CDEV: |
169 | goto err; | 161 | case BCACHE_SB_VERSION_CDEV_WITH_UUID: |
162 | sb->nbuckets = le64_to_cpu(s->nbuckets); | ||
163 | sb->block_size = le16_to_cpu(s->block_size); | ||
164 | sb->bucket_size = le16_to_cpu(s->bucket_size); | ||
170 | 165 | ||
171 | if (sb->version == CACHE_BACKING_DEV) | 166 | sb->nr_in_set = le16_to_cpu(s->nr_in_set); |
172 | goto out; | 167 | sb->nr_this_dev = le16_to_cpu(s->nr_this_dev); |
173 | 168 | ||
174 | err = "Bad UUID"; | 169 | err = "Too many buckets"; |
175 | if (bch_is_zero(sb->set_uuid, 16)) | 170 | if (sb->nbuckets > LONG_MAX) |
176 | goto err; | 171 | goto err; |
177 | 172 | ||
178 | err = "Bad cache device number in set"; | 173 | err = "Not enough buckets"; |
179 | if (!sb->nr_in_set || | 174 | if (sb->nbuckets < 1 << 7) |
180 | sb->nr_in_set <= sb->nr_this_dev || | 175 | goto err; |
181 | sb->nr_in_set > MAX_CACHES_PER_SET) | ||
182 | goto err; | ||
183 | 176 | ||
184 | err = "Journal buckets not sequential"; | 177 | err = "Bad block/bucket size"; |
185 | for (i = 0; i < sb->keys; i++) | 178 | if (!is_power_of_2(sb->block_size) || |
186 | if (sb->d[i] != sb->first_bucket + i) | 179 | sb->block_size > PAGE_SECTORS || |
180 | !is_power_of_2(sb->bucket_size) || | ||
181 | sb->bucket_size < PAGE_SECTORS) | ||
187 | goto err; | 182 | goto err; |
188 | 183 | ||
189 | err = "Too many journal buckets"; | 184 | err = "Invalid superblock: device too small"; |
190 | if (sb->first_bucket + sb->keys > sb->nbuckets) | 185 | if (get_capacity(bdev->bd_disk) < sb->bucket_size * sb->nbuckets) |
191 | goto err; | 186 | goto err; |
187 | |||
188 | err = "Bad UUID"; | ||
189 | if (bch_is_zero(sb->set_uuid, 16)) | ||
190 | goto err; | ||
191 | |||
192 | err = "Bad cache device number in set"; | ||
193 | if (!sb->nr_in_set || | ||
194 | sb->nr_in_set <= sb->nr_this_dev || | ||
195 | sb->nr_in_set > MAX_CACHES_PER_SET) | ||
196 | goto err; | ||
192 | 197 | ||
193 | err = "Invalid superblock: first bucket comes before end of super"; | 198 | err = "Journal buckets not sequential"; |
194 | if (sb->first_bucket * sb->bucket_size < 16) | 199 | for (i = 0; i < sb->keys; i++) |
200 | if (sb->d[i] != sb->first_bucket + i) | ||
201 | goto err; | ||
202 | |||
203 | err = "Too many journal buckets"; | ||
204 | if (sb->first_bucket + sb->keys > sb->nbuckets) | ||
205 | goto err; | ||
206 | |||
207 | err = "Invalid superblock: first bucket comes before end of super"; | ||
208 | if (sb->first_bucket * sb->bucket_size < 16) | ||
209 | goto err; | ||
210 | |||
211 | break; | ||
212 | default: | ||
213 | err = "Unsupported superblock version"; | ||
195 | goto err; | 214 | goto err; |
196 | out: | 215 | } |
216 | |||
197 | sb->last_mount = get_seconds(); | 217 | sb->last_mount = get_seconds(); |
198 | err = NULL; | 218 | err = NULL; |
199 | 219 | ||
@@ -286,7 +306,7 @@ void bcache_write_super(struct cache_set *c) | |||
286 | for_each_cache(ca, c, i) { | 306 | for_each_cache(ca, c, i) { |
287 | struct bio *bio = &ca->sb_bio; | 307 | struct bio *bio = &ca->sb_bio; |
288 | 308 | ||
289 | ca->sb.version = BCACHE_SB_VERSION; | 309 | ca->sb.version = BCACHE_SB_VERSION_CDEV_WITH_UUID; |
290 | ca->sb.seq = c->sb.seq; | 310 | ca->sb.seq = c->sb.seq; |
291 | ca->sb.last_mount = c->sb.last_mount; | 311 | ca->sb.last_mount = c->sb.last_mount; |
292 | 312 | ||
@@ -641,6 +661,35 @@ void bcache_device_stop(struct bcache_device *d) | |||
641 | closure_queue(&d->cl); | 661 | closure_queue(&d->cl); |
642 | } | 662 | } |
643 | 663 | ||
664 | static void bcache_device_unlink(struct bcache_device *d) | ||
665 | { | ||
666 | unsigned i; | ||
667 | struct cache *ca; | ||
668 | |||
669 | sysfs_remove_link(&d->c->kobj, d->name); | ||
670 | sysfs_remove_link(&d->kobj, "cache"); | ||
671 | |||
672 | for_each_cache(ca, d->c, i) | ||
673 | bd_unlink_disk_holder(ca->bdev, d->disk); | ||
674 | } | ||
675 | |||
676 | static void bcache_device_link(struct bcache_device *d, struct cache_set *c, | ||
677 | const char *name) | ||
678 | { | ||
679 | unsigned i; | ||
680 | struct cache *ca; | ||
681 | |||
682 | for_each_cache(ca, d->c, i) | ||
683 | bd_link_disk_holder(ca->bdev, d->disk); | ||
684 | |||
685 | snprintf(d->name, BCACHEDEVNAME_SIZE, | ||
686 | "%s%u", name, d->id); | ||
687 | |||
688 | WARN(sysfs_create_link(&d->kobj, &c->kobj, "cache") || | ||
689 | sysfs_create_link(&c->kobj, &d->kobj, d->name), | ||
690 | "Couldn't create device <-> cache set symlinks"); | ||
691 | } | ||
692 | |||
644 | static void bcache_device_detach(struct bcache_device *d) | 693 | static void bcache_device_detach(struct bcache_device *d) |
645 | { | 694 | { |
646 | lockdep_assert_held(&bch_register_lock); | 695 | lockdep_assert_held(&bch_register_lock); |
@@ -656,6 +705,8 @@ static void bcache_device_detach(struct bcache_device *d) | |||
656 | atomic_set(&d->detaching, 0); | 705 | atomic_set(&d->detaching, 0); |
657 | } | 706 | } |
658 | 707 | ||
708 | bcache_device_unlink(d); | ||
709 | |||
659 | d->c->devices[d->id] = NULL; | 710 | d->c->devices[d->id] = NULL; |
660 | closure_put(&d->c->caching); | 711 | closure_put(&d->c->caching); |
661 | d->c = NULL; | 712 | d->c = NULL; |
@@ -673,17 +724,6 @@ static void bcache_device_attach(struct bcache_device *d, struct cache_set *c, | |||
673 | closure_get(&c->caching); | 724 | closure_get(&c->caching); |
674 | } | 725 | } |
675 | 726 | ||
676 | static void bcache_device_link(struct bcache_device *d, struct cache_set *c, | ||
677 | const char *name) | ||
678 | { | ||
679 | snprintf(d->name, BCACHEDEVNAME_SIZE, | ||
680 | "%s%u", name, d->id); | ||
681 | |||
682 | WARN(sysfs_create_link(&d->kobj, &c->kobj, "cache") || | ||
683 | sysfs_create_link(&c->kobj, &d->kobj, d->name), | ||
684 | "Couldn't create device <-> cache set symlinks"); | ||
685 | } | ||
686 | |||
687 | static void bcache_device_free(struct bcache_device *d) | 727 | static void bcache_device_free(struct bcache_device *d) |
688 | { | 728 | { |
689 | lockdep_assert_held(&bch_register_lock); | 729 | lockdep_assert_held(&bch_register_lock); |
@@ -784,6 +824,7 @@ void bch_cached_dev_run(struct cached_dev *dc) | |||
784 | } | 824 | } |
785 | 825 | ||
786 | add_disk(d->disk); | 826 | add_disk(d->disk); |
827 | bd_link_disk_holder(dc->bdev, dc->disk.disk); | ||
787 | #if 0 | 828 | #if 0 |
788 | char *env[] = { "SYMLINK=label" , NULL }; | 829 | char *env[] = { "SYMLINK=label" , NULL }; |
789 | kobject_uevent_env(&disk_to_dev(d->disk)->kobj, KOBJ_CHANGE, env); | 830 | kobject_uevent_env(&disk_to_dev(d->disk)->kobj, KOBJ_CHANGE, env); |
@@ -803,9 +844,6 @@ static void cached_dev_detach_finish(struct work_struct *w) | |||
803 | BUG_ON(!atomic_read(&dc->disk.detaching)); | 844 | BUG_ON(!atomic_read(&dc->disk.detaching)); |
804 | BUG_ON(atomic_read(&dc->count)); | 845 | BUG_ON(atomic_read(&dc->count)); |
805 | 846 | ||
806 | sysfs_remove_link(&dc->disk.c->kobj, dc->disk.name); | ||
807 | sysfs_remove_link(&dc->disk.kobj, "cache"); | ||
808 | |||
809 | mutex_lock(&bch_register_lock); | 847 | mutex_lock(&bch_register_lock); |
810 | 848 | ||
811 | memset(&dc->sb.set_uuid, 0, 16); | 849 | memset(&dc->sb.set_uuid, 0, 16); |
@@ -920,7 +958,6 @@ int bch_cached_dev_attach(struct cached_dev *dc, struct cache_set *c) | |||
920 | } | 958 | } |
921 | 959 | ||
922 | bcache_device_attach(&dc->disk, c, u - c->uuids); | 960 | bcache_device_attach(&dc->disk, c, u - c->uuids); |
923 | bcache_device_link(&dc->disk, c, "bdev"); | ||
924 | list_move(&dc->list, &c->cached_devs); | 961 | list_move(&dc->list, &c->cached_devs); |
925 | calc_cached_dev_sectors(c); | 962 | calc_cached_dev_sectors(c); |
926 | 963 | ||
@@ -938,6 +975,7 @@ int bch_cached_dev_attach(struct cached_dev *dc, struct cache_set *c) | |||
938 | } | 975 | } |
939 | 976 | ||
940 | bch_cached_dev_run(dc); | 977 | bch_cached_dev_run(dc); |
978 | bcache_device_link(&dc->disk, c, "bdev"); | ||
941 | 979 | ||
942 | pr_info("Caching %s as %s on set %pU", | 980 | pr_info("Caching %s as %s on set %pU", |
943 | bdevname(dc->bdev, buf), dc->disk.disk->disk_name, | 981 | bdevname(dc->bdev, buf), dc->disk.disk->disk_name, |
@@ -961,6 +999,7 @@ static void cached_dev_free(struct closure *cl) | |||
961 | 999 | ||
962 | mutex_lock(&bch_register_lock); | 1000 | mutex_lock(&bch_register_lock); |
963 | 1001 | ||
1002 | bd_unlink_disk_holder(dc->bdev, dc->disk.disk); | ||
964 | bcache_device_free(&dc->disk); | 1003 | bcache_device_free(&dc->disk); |
965 | list_del(&dc->list); | 1004 | list_del(&dc->list); |
966 | 1005 | ||
@@ -1049,7 +1088,11 @@ static const char *register_bdev(struct cache_sb *sb, struct page *sb_page, | |||
1049 | 1088 | ||
1050 | g = dc->disk.disk; | 1089 | g = dc->disk.disk; |
1051 | 1090 | ||
1052 | set_capacity(g, dc->bdev->bd_part->nr_sects - 16); | 1091 | set_capacity(g, dc->bdev->bd_part->nr_sects - dc->sb.data_offset); |
1092 | |||
1093 | g->queue->backing_dev_info.ra_pages = | ||
1094 | max(g->queue->backing_dev_info.ra_pages, | ||
1095 | bdev->bd_queue->backing_dev_info.ra_pages); | ||
1053 | 1096 | ||
1054 | bch_cached_dev_request_init(dc); | 1097 | bch_cached_dev_request_init(dc); |
1055 | 1098 | ||
@@ -1099,8 +1142,7 @@ static void flash_dev_flush(struct closure *cl) | |||
1099 | { | 1142 | { |
1100 | struct bcache_device *d = container_of(cl, struct bcache_device, cl); | 1143 | struct bcache_device *d = container_of(cl, struct bcache_device, cl); |
1101 | 1144 | ||
1102 | sysfs_remove_link(&d->c->kobj, d->name); | 1145 | bcache_device_unlink(d); |
1103 | sysfs_remove_link(&d->kobj, "cache"); | ||
1104 | kobject_del(&d->kobj); | 1146 | kobject_del(&d->kobj); |
1105 | continue_at(cl, flash_dev_free, system_wq); | 1147 | continue_at(cl, flash_dev_free, system_wq); |
1106 | } | 1148 | } |
@@ -1802,7 +1844,7 @@ static ssize_t register_bcache(struct kobject *k, struct kobj_attribute *attr, | |||
1802 | if (err) | 1844 | if (err) |
1803 | goto err_close; | 1845 | goto err_close; |
1804 | 1846 | ||
1805 | if (sb->version == CACHE_BACKING_DEV) { | 1847 | if (SB_IS_BDEV(sb)) { |
1806 | struct cached_dev *dc = kzalloc(sizeof(*dc), GFP_KERNEL); | 1848 | struct cached_dev *dc = kzalloc(sizeof(*dc), GFP_KERNEL); |
1807 | 1849 | ||
1808 | err = register_bdev(sb, sb_page, bdev, dc); | 1850 | err = register_bdev(sb, sb_page, bdev, dc); |