aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJens Axboe <axboe@kernel.dk>2013-05-01 03:23:05 -0400
committerJens Axboe <axboe@kernel.dk>2013-05-01 03:23:05 -0400
commitf50efd2fdbd9b35b11f5778ed85beb764184bda9 (patch)
treeed92b40995d60136fb387d210886e4aae2c37231
parent0821e904057505c7e25d72e1a282105d023b26c9 (diff)
parentee66850642efda91d04179cae2414310675a1f73 (diff)
Merge branch 'bcache-for-upstream' of http://evilpiepirate.org/git/linux-bcache into for-3.10/drivers
Kent writes: Hey Jens, this is everything I've got ready for 3.10 - there's _still_ one more bug I'm trying to track down. Andrew - I've got patches that rip out the pkey() and pbtree() macros, but they're somewhat tied up with some other nontrivial refactorings so I think I'm going to wait a bit on those.
-rw-r--r--drivers/md/bcache/alloc.c72
-rw-r--r--drivers/md/bcache/bcache.h47
-rw-r--r--drivers/md/bcache/btree.c3
-rw-r--r--drivers/md/bcache/io.c35
-rw-r--r--drivers/md/bcache/request.c2
-rw-r--r--drivers/md/bcache/super.c166
6 files changed, 213 insertions, 112 deletions
diff --git a/drivers/md/bcache/alloc.c b/drivers/md/bcache/alloc.c
index 2879487d036a..048f2947e08b 100644
--- a/drivers/md/bcache/alloc.c
+++ b/drivers/md/bcache/alloc.c
@@ -243,31 +243,37 @@ static void invalidate_buckets_lru(struct cache *ca)
243 ca->heap.used = 0; 243 ca->heap.used = 0;
244 244
245 for_each_bucket(b, ca) { 245 for_each_bucket(b, ca) {
246 /*
247 * If we fill up the unused list, if we then return before
248 * adding anything to the free_inc list we'll skip writing
249 * prios/gens and just go back to allocating from the unused
250 * list:
251 */
252 if (fifo_full(&ca->unused))
253 return;
254
246 if (!can_invalidate_bucket(ca, b)) 255 if (!can_invalidate_bucket(ca, b))
247 continue; 256 continue;
248 257
249 if (!GC_SECTORS_USED(b)) { 258 if (!GC_SECTORS_USED(b) &&
250 if (!bch_bucket_add_unused(ca, b)) 259 bch_bucket_add_unused(ca, b))
251 return; 260 continue;
252 } else { 261
253 if (!heap_full(&ca->heap)) 262 if (!heap_full(&ca->heap))
254 heap_add(&ca->heap, b, bucket_max_cmp); 263 heap_add(&ca->heap, b, bucket_max_cmp);
255 else if (bucket_max_cmp(b, heap_peek(&ca->heap))) { 264 else if (bucket_max_cmp(b, heap_peek(&ca->heap))) {
256 ca->heap.data[0] = b; 265 ca->heap.data[0] = b;
257 heap_sift(&ca->heap, 0, bucket_max_cmp); 266 heap_sift(&ca->heap, 0, bucket_max_cmp);
258 }
259 } 267 }
260 } 268 }
261 269
262 if (ca->heap.used * 2 < ca->heap.size)
263 bch_queue_gc(ca->set);
264
265 for (i = ca->heap.used / 2 - 1; i >= 0; --i) 270 for (i = ca->heap.used / 2 - 1; i >= 0; --i)
266 heap_sift(&ca->heap, i, bucket_min_cmp); 271 heap_sift(&ca->heap, i, bucket_min_cmp);
267 272
268 while (!fifo_full(&ca->free_inc)) { 273 while (!fifo_full(&ca->free_inc)) {
269 if (!heap_pop(&ca->heap, b, bucket_min_cmp)) { 274 if (!heap_pop(&ca->heap, b, bucket_min_cmp)) {
270 /* We don't want to be calling invalidate_buckets() 275 /*
276 * We don't want to be calling invalidate_buckets()
271 * multiple times when it can't do anything 277 * multiple times when it can't do anything
272 */ 278 */
273 ca->invalidate_needs_gc = 1; 279 ca->invalidate_needs_gc = 1;
@@ -343,15 +349,22 @@ static void invalidate_buckets(struct cache *ca)
343 invalidate_buckets_random(ca); 349 invalidate_buckets_random(ca);
344 break; 350 break;
345 } 351 }
352
353 pr_debug("free %zu/%zu free_inc %zu/%zu unused %zu/%zu",
354 fifo_used(&ca->free), ca->free.size,
355 fifo_used(&ca->free_inc), ca->free_inc.size,
356 fifo_used(&ca->unused), ca->unused.size);
346} 357}
347 358
348#define allocator_wait(ca, cond) \ 359#define allocator_wait(ca, cond) \
349do { \ 360do { \
350 DEFINE_WAIT(__wait); \ 361 DEFINE_WAIT(__wait); \
351 \ 362 \
352 while (!(cond)) { \ 363 while (1) { \
353 prepare_to_wait(&ca->set->alloc_wait, \ 364 prepare_to_wait(&ca->set->alloc_wait, \
354 &__wait, TASK_INTERRUPTIBLE); \ 365 &__wait, TASK_INTERRUPTIBLE); \
366 if (cond) \
367 break; \
355 \ 368 \
356 mutex_unlock(&(ca)->set->bucket_lock); \ 369 mutex_unlock(&(ca)->set->bucket_lock); \
357 if (test_bit(CACHE_SET_STOPPING_2, &ca->set->flags)) { \ 370 if (test_bit(CACHE_SET_STOPPING_2, &ca->set->flags)) { \
@@ -360,7 +373,6 @@ do { \
360 } \ 373 } \
361 \ 374 \
362 schedule(); \ 375 schedule(); \
363 __set_current_state(TASK_RUNNING); \
364 mutex_lock(&(ca)->set->bucket_lock); \ 376 mutex_lock(&(ca)->set->bucket_lock); \
365 } \ 377 } \
366 \ 378 \
@@ -374,6 +386,11 @@ void bch_allocator_thread(struct closure *cl)
374 mutex_lock(&ca->set->bucket_lock); 386 mutex_lock(&ca->set->bucket_lock);
375 387
376 while (1) { 388 while (1) {
389 /*
390 * First, we pull buckets off of the unused and free_inc lists,
391 * possibly issue discards to them, then we add the bucket to
392 * the free list:
393 */
377 while (1) { 394 while (1) {
378 long bucket; 395 long bucket;
379 396
@@ -398,17 +415,26 @@ void bch_allocator_thread(struct closure *cl)
398 } 415 }
399 } 416 }
400 417
401 allocator_wait(ca, ca->set->gc_mark_valid); 418 /*
402 invalidate_buckets(ca); 419 * We've run out of free buckets, we need to find some buckets
420 * we can invalidate. First, invalidate them in memory and add
421 * them to the free_inc list:
422 */
403 423
404 allocator_wait(ca, !atomic_read(&ca->set->prio_blocked) || 424 allocator_wait(ca, ca->set->gc_mark_valid &&
405 !CACHE_SYNC(&ca->set->sb)); 425 (ca->need_save_prio > 64 ||
426 !ca->invalidate_needs_gc));
427 invalidate_buckets(ca);
406 428
429 /*
430 * Now, we write their new gens to disk so we can start writing
431 * new stuff to them:
432 */
433 allocator_wait(ca, !atomic_read(&ca->set->prio_blocked));
407 if (CACHE_SYNC(&ca->set->sb) && 434 if (CACHE_SYNC(&ca->set->sb) &&
408 (!fifo_empty(&ca->free_inc) || 435 (!fifo_empty(&ca->free_inc) ||
409 ca->need_save_prio > 64)) { 436 ca->need_save_prio > 64))
410 bch_prio_write(ca); 437 bch_prio_write(ca);
411 }
412 } 438 }
413} 439}
414 440
@@ -475,7 +501,7 @@ void bch_bucket_free(struct cache_set *c, struct bkey *k)
475 for (i = 0; i < KEY_PTRS(k); i++) { 501 for (i = 0; i < KEY_PTRS(k); i++) {
476 struct bucket *b = PTR_BUCKET(c, k, i); 502 struct bucket *b = PTR_BUCKET(c, k, i);
477 503
478 SET_GC_MARK(b, 0); 504 SET_GC_MARK(b, GC_MARK_RECLAIMABLE);
479 SET_GC_SECTORS_USED(b, 0); 505 SET_GC_SECTORS_USED(b, 0);
480 bch_bucket_add_unused(PTR_CACHE(c, k, i), b); 506 bch_bucket_add_unused(PTR_CACHE(c, k, i), b);
481 } 507 }
diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h
index f05723565f17..340146d7c17f 100644
--- a/drivers/md/bcache/bcache.h
+++ b/drivers/md/bcache/bcache.h
@@ -223,11 +223,17 @@ struct bkey {
223#define BKEY_PADDED(key) \ 223#define BKEY_PADDED(key) \
224 union { struct bkey key; uint64_t key ## _pad[BKEY_PAD]; } 224 union { struct bkey key; uint64_t key ## _pad[BKEY_PAD]; }
225 225
226/* Version 1: Backing device 226/* Version 0: Cache device
227 * Version 1: Backing device
227 * Version 2: Seed pointer into btree node checksum 228 * Version 2: Seed pointer into btree node checksum
228 * Version 3: New UUID format 229 * Version 3: Cache device with new UUID format
230 * Version 4: Backing device with data offset
229 */ 231 */
230#define BCACHE_SB_VERSION 3 232#define BCACHE_SB_VERSION_CDEV 0
233#define BCACHE_SB_VERSION_BDEV 1
234#define BCACHE_SB_VERSION_CDEV_WITH_UUID 3
235#define BCACHE_SB_VERSION_BDEV_WITH_OFFSET 4
236#define BCACHE_SB_MAX_VERSION 4
231 237
232#define SB_SECTOR 8 238#define SB_SECTOR 8
233#define SB_SIZE 4096 239#define SB_SIZE 4096
@@ -236,13 +242,12 @@ struct bkey {
236/* SB_JOURNAL_BUCKETS must be divisible by BITS_PER_LONG */ 242/* SB_JOURNAL_BUCKETS must be divisible by BITS_PER_LONG */
237#define MAX_CACHES_PER_SET 8 243#define MAX_CACHES_PER_SET 8
238 244
239#define BDEV_DATA_START 16 /* sectors */ 245#define BDEV_DATA_START_DEFAULT 16 /* sectors */
240 246
241struct cache_sb { 247struct cache_sb {
242 uint64_t csum; 248 uint64_t csum;
243 uint64_t offset; /* sector where this sb was written */ 249 uint64_t offset; /* sector where this sb was written */
244 uint64_t version; 250 uint64_t version;
245#define CACHE_BACKING_DEV 1
246 251
247 uint8_t magic[16]; 252 uint8_t magic[16];
248 253
@@ -257,12 +262,28 @@ struct cache_sb {
257 uint64_t seq; 262 uint64_t seq;
258 uint64_t pad[8]; 263 uint64_t pad[8];
259 264
260 uint64_t nbuckets; /* device size */ 265 union {
261 uint16_t block_size; /* sectors */ 266 struct {
262 uint16_t bucket_size; /* sectors */ 267 /* Cache devices */
268 uint64_t nbuckets; /* device size */
269
270 uint16_t block_size; /* sectors */
271 uint16_t bucket_size; /* sectors */
263 272
264 uint16_t nr_in_set; 273 uint16_t nr_in_set;
265 uint16_t nr_this_dev; 274 uint16_t nr_this_dev;
275 };
276 struct {
277 /* Backing devices */
278 uint64_t data_offset;
279
280 /*
281 * block_size from the cache device section is still used by
282 * backing devices, so don't add anything here until we fix
283 * things to not need it for backing devices anymore
284 */
285 };
286 };
266 287
267 uint32_t last_mount; /* time_t */ 288 uint32_t last_mount; /* time_t */
268 289
@@ -861,6 +882,12 @@ static inline bool key_merging_disabled(struct cache_set *c)
861#endif 882#endif
862} 883}
863 884
885static inline bool SB_IS_BDEV(const struct cache_sb *sb)
886{
887 return sb->version == BCACHE_SB_VERSION_BDEV
888 || sb->version == BCACHE_SB_VERSION_BDEV_WITH_OFFSET;
889}
890
864struct bbio { 891struct bbio {
865 unsigned submit_time_us; 892 unsigned submit_time_us;
866 union { 893 union {
diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c
index 852340793777..7a5658f04e62 100644
--- a/drivers/md/bcache/btree.c
+++ b/drivers/md/bcache/btree.c
@@ -984,7 +984,7 @@ static void btree_node_free(struct btree *b, struct btree_op *op)
984 984
985 if (b->prio_blocked && 985 if (b->prio_blocked &&
986 !atomic_sub_return(b->prio_blocked, &b->c->prio_blocked)) 986 !atomic_sub_return(b->prio_blocked, &b->c->prio_blocked))
987 closure_wake_up(&b->c->bucket_wait); 987 wake_up(&b->c->alloc_wait);
988 988
989 b->prio_blocked = 0; 989 b->prio_blocked = 0;
990 990
@@ -1548,7 +1548,6 @@ static void bch_btree_gc(struct closure *cl)
1548 1548
1549 trace_bcache_gc_end(c->sb.set_uuid); 1549 trace_bcache_gc_end(c->sb.set_uuid);
1550 wake_up(&c->alloc_wait); 1550 wake_up(&c->alloc_wait);
1551 closure_wake_up(&c->bucket_wait);
1552 1551
1553 continue_at(cl, bch_moving_gc, bch_gc_wq); 1552 continue_at(cl, bch_moving_gc, bch_gc_wq);
1554} 1553}
diff --git a/drivers/md/bcache/io.c b/drivers/md/bcache/io.c
index f565512f6fac..48efd4dea645 100644
--- a/drivers/md/bcache/io.c
+++ b/drivers/md/bcache/io.c
@@ -38,6 +38,15 @@ static void bch_generic_make_request_hack(struct bio *bio)
38 bio = clone; 38 bio = clone;
39 } 39 }
40 40
41 /*
42 * Hack, since drivers that clone bios clone up to bi_max_vecs, but our
43 * bios might have had more than that (before we split them per device
44 * limitations).
45 *
46 * To be taken out once immutable bvec stuff is in.
47 */
48 bio->bi_max_vecs = bio->bi_vcnt;
49
41 generic_make_request(bio); 50 generic_make_request(bio);
42} 51}
43 52
@@ -149,34 +158,32 @@ static unsigned bch_bio_max_sectors(struct bio *bio)
149{ 158{
150 unsigned ret = bio_sectors(bio); 159 unsigned ret = bio_sectors(bio);
151 struct request_queue *q = bdev_get_queue(bio->bi_bdev); 160 struct request_queue *q = bdev_get_queue(bio->bi_bdev);
161 unsigned max_segments = min_t(unsigned, BIO_MAX_PAGES,
162 queue_max_segments(q));
152 struct bio_vec *bv, *end = bio_iovec(bio) + 163 struct bio_vec *bv, *end = bio_iovec(bio) +
153 min_t(int, bio_segments(bio), queue_max_segments(q)); 164 min_t(int, bio_segments(bio), max_segments);
154
155 struct bvec_merge_data bvm = {
156 .bi_bdev = bio->bi_bdev,
157 .bi_sector = bio->bi_sector,
158 .bi_size = 0,
159 .bi_rw = bio->bi_rw,
160 };
161 165
162 if (bio->bi_rw & REQ_DISCARD) 166 if (bio->bi_rw & REQ_DISCARD)
163 return min(ret, q->limits.max_discard_sectors); 167 return min(ret, q->limits.max_discard_sectors);
164 168
165 if (bio_segments(bio) > queue_max_segments(q) || 169 if (bio_segments(bio) > max_segments ||
166 q->merge_bvec_fn) { 170 q->merge_bvec_fn) {
167 ret = 0; 171 ret = 0;
168 172
169 for (bv = bio_iovec(bio); bv < end; bv++) { 173 for (bv = bio_iovec(bio); bv < end; bv++) {
174 struct bvec_merge_data bvm = {
175 .bi_bdev = bio->bi_bdev,
176 .bi_sector = bio->bi_sector,
177 .bi_size = ret << 9,
178 .bi_rw = bio->bi_rw,
179 };
180
170 if (q->merge_bvec_fn && 181 if (q->merge_bvec_fn &&
171 q->merge_bvec_fn(q, &bvm, bv) < (int) bv->bv_len) 182 q->merge_bvec_fn(q, &bvm, bv) < (int) bv->bv_len)
172 break; 183 break;
173 184
174 ret += bv->bv_len >> 9; 185 ret += bv->bv_len >> 9;
175 bvm.bi_size += bv->bv_len;
176 } 186 }
177
178 if (ret >= (BIO_MAX_PAGES * PAGE_SIZE) >> 9)
179 return (BIO_MAX_PAGES * PAGE_SIZE) >> 9;
180 } 187 }
181 188
182 ret = min(ret, queue_max_sectors(q)); 189 ret = min(ret, queue_max_sectors(q));
diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c
index 83731dc36f34..e5ff12e52d5b 100644
--- a/drivers/md/bcache/request.c
+++ b/drivers/md/bcache/request.c
@@ -1220,7 +1220,7 @@ static void cached_dev_make_request(struct request_queue *q, struct bio *bio)
1220 part_stat_unlock(); 1220 part_stat_unlock();
1221 1221
1222 bio->bi_bdev = dc->bdev; 1222 bio->bi_bdev = dc->bdev;
1223 bio->bi_sector += BDEV_DATA_START; 1223 bio->bi_sector += dc->sb.data_offset;
1224 1224
1225 if (cached_dev_get(dc)) { 1225 if (cached_dev_get(dc)) {
1226 s = search_alloc(bio, d); 1226 s = search_alloc(bio, d);
diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
index 5fa3cd2d9ff0..c8046bc4aa57 100644
--- a/drivers/md/bcache/super.c
+++ b/drivers/md/bcache/super.c
@@ -110,15 +110,7 @@ static const char *read_super(struct cache_sb *sb, struct block_device *bdev,
110 110
111 sb->flags = le64_to_cpu(s->flags); 111 sb->flags = le64_to_cpu(s->flags);
112 sb->seq = le64_to_cpu(s->seq); 112 sb->seq = le64_to_cpu(s->seq);
113
114 sb->nbuckets = le64_to_cpu(s->nbuckets);
115 sb->block_size = le16_to_cpu(s->block_size);
116 sb->bucket_size = le16_to_cpu(s->bucket_size);
117
118 sb->nr_in_set = le16_to_cpu(s->nr_in_set);
119 sb->nr_this_dev = le16_to_cpu(s->nr_this_dev);
120 sb->last_mount = le32_to_cpu(s->last_mount); 113 sb->last_mount = le32_to_cpu(s->last_mount);
121
122 sb->first_bucket = le16_to_cpu(s->first_bucket); 114 sb->first_bucket = le16_to_cpu(s->first_bucket);
123 sb->keys = le16_to_cpu(s->keys); 115 sb->keys = le16_to_cpu(s->keys);
124 116
@@ -147,53 +139,81 @@ static const char *read_super(struct cache_sb *sb, struct block_device *bdev,
147 if (bch_is_zero(sb->uuid, 16)) 139 if (bch_is_zero(sb->uuid, 16))
148 goto err; 140 goto err;
149 141
150 err = "Unsupported superblock version"; 142 sb->block_size = le16_to_cpu(s->block_size);
151 if (sb->version > BCACHE_SB_VERSION)
152 goto err;
153 143
154 err = "Bad block/bucket size"; 144 err = "Superblock block size smaller than device block size";
155 if (!is_power_of_2(sb->block_size) || sb->block_size > PAGE_SECTORS || 145 if (sb->block_size << 9 < bdev_logical_block_size(bdev))
156 !is_power_of_2(sb->bucket_size) || sb->bucket_size < PAGE_SECTORS)
157 goto err; 146 goto err;
158 147
159 err = "Too many buckets"; 148 switch (sb->version) {
160 if (sb->nbuckets > LONG_MAX) 149 case BCACHE_SB_VERSION_BDEV:
161 goto err; 150 sb->data_offset = BDEV_DATA_START_DEFAULT;
151 break;
152 case BCACHE_SB_VERSION_BDEV_WITH_OFFSET:
153 sb->data_offset = le64_to_cpu(s->data_offset);
162 154
163 err = "Not enough buckets"; 155 err = "Bad data offset";
164 if (sb->nbuckets < 1 << 7) 156 if (sb->data_offset < BDEV_DATA_START_DEFAULT)
165 goto err; 157 goto err;
166 158
167 err = "Invalid superblock: device too small"; 159 break;
168 if (get_capacity(bdev->bd_disk) < sb->bucket_size * sb->nbuckets) 160 case BCACHE_SB_VERSION_CDEV:
169 goto err; 161 case BCACHE_SB_VERSION_CDEV_WITH_UUID:
162 sb->nbuckets = le64_to_cpu(s->nbuckets);
163 sb->block_size = le16_to_cpu(s->block_size);
164 sb->bucket_size = le16_to_cpu(s->bucket_size);
170 165
171 if (sb->version == CACHE_BACKING_DEV) 166 sb->nr_in_set = le16_to_cpu(s->nr_in_set);
172 goto out; 167 sb->nr_this_dev = le16_to_cpu(s->nr_this_dev);
173 168
174 err = "Bad UUID"; 169 err = "Too many buckets";
175 if (bch_is_zero(sb->set_uuid, 16)) 170 if (sb->nbuckets > LONG_MAX)
176 goto err; 171 goto err;
177 172
178 err = "Bad cache device number in set"; 173 err = "Not enough buckets";
179 if (!sb->nr_in_set || 174 if (sb->nbuckets < 1 << 7)
180 sb->nr_in_set <= sb->nr_this_dev || 175 goto err;
181 sb->nr_in_set > MAX_CACHES_PER_SET)
182 goto err;
183 176
184 err = "Journal buckets not sequential"; 177 err = "Bad block/bucket size";
185 for (i = 0; i < sb->keys; i++) 178 if (!is_power_of_2(sb->block_size) ||
186 if (sb->d[i] != sb->first_bucket + i) 179 sb->block_size > PAGE_SECTORS ||
180 !is_power_of_2(sb->bucket_size) ||
181 sb->bucket_size < PAGE_SECTORS)
187 goto err; 182 goto err;
188 183
189 err = "Too many journal buckets"; 184 err = "Invalid superblock: device too small";
190 if (sb->first_bucket + sb->keys > sb->nbuckets) 185 if (get_capacity(bdev->bd_disk) < sb->bucket_size * sb->nbuckets)
191 goto err; 186 goto err;
187
188 err = "Bad UUID";
189 if (bch_is_zero(sb->set_uuid, 16))
190 goto err;
191
192 err = "Bad cache device number in set";
193 if (!sb->nr_in_set ||
194 sb->nr_in_set <= sb->nr_this_dev ||
195 sb->nr_in_set > MAX_CACHES_PER_SET)
196 goto err;
192 197
193 err = "Invalid superblock: first bucket comes before end of super"; 198 err = "Journal buckets not sequential";
194 if (sb->first_bucket * sb->bucket_size < 16) 199 for (i = 0; i < sb->keys; i++)
200 if (sb->d[i] != sb->first_bucket + i)
201 goto err;
202
203 err = "Too many journal buckets";
204 if (sb->first_bucket + sb->keys > sb->nbuckets)
205 goto err;
206
207 err = "Invalid superblock: first bucket comes before end of super";
208 if (sb->first_bucket * sb->bucket_size < 16)
209 goto err;
210
211 break;
212 default:
213 err = "Unsupported superblock version";
195 goto err; 214 goto err;
196out: 215 }
216
197 sb->last_mount = get_seconds(); 217 sb->last_mount = get_seconds();
198 err = NULL; 218 err = NULL;
199 219
@@ -286,7 +306,7 @@ void bcache_write_super(struct cache_set *c)
286 for_each_cache(ca, c, i) { 306 for_each_cache(ca, c, i) {
287 struct bio *bio = &ca->sb_bio; 307 struct bio *bio = &ca->sb_bio;
288 308
289 ca->sb.version = BCACHE_SB_VERSION; 309 ca->sb.version = BCACHE_SB_VERSION_CDEV_WITH_UUID;
290 ca->sb.seq = c->sb.seq; 310 ca->sb.seq = c->sb.seq;
291 ca->sb.last_mount = c->sb.last_mount; 311 ca->sb.last_mount = c->sb.last_mount;
292 312
@@ -641,6 +661,35 @@ void bcache_device_stop(struct bcache_device *d)
641 closure_queue(&d->cl); 661 closure_queue(&d->cl);
642} 662}
643 663
664static void bcache_device_unlink(struct bcache_device *d)
665{
666 unsigned i;
667 struct cache *ca;
668
669 sysfs_remove_link(&d->c->kobj, d->name);
670 sysfs_remove_link(&d->kobj, "cache");
671
672 for_each_cache(ca, d->c, i)
673 bd_unlink_disk_holder(ca->bdev, d->disk);
674}
675
676static void bcache_device_link(struct bcache_device *d, struct cache_set *c,
677 const char *name)
678{
679 unsigned i;
680 struct cache *ca;
681
682 for_each_cache(ca, d->c, i)
683 bd_link_disk_holder(ca->bdev, d->disk);
684
685 snprintf(d->name, BCACHEDEVNAME_SIZE,
686 "%s%u", name, d->id);
687
688 WARN(sysfs_create_link(&d->kobj, &c->kobj, "cache") ||
689 sysfs_create_link(&c->kobj, &d->kobj, d->name),
690 "Couldn't create device <-> cache set symlinks");
691}
692
644static void bcache_device_detach(struct bcache_device *d) 693static void bcache_device_detach(struct bcache_device *d)
645{ 694{
646 lockdep_assert_held(&bch_register_lock); 695 lockdep_assert_held(&bch_register_lock);
@@ -656,6 +705,8 @@ static void bcache_device_detach(struct bcache_device *d)
656 atomic_set(&d->detaching, 0); 705 atomic_set(&d->detaching, 0);
657 } 706 }
658 707
708 bcache_device_unlink(d);
709
659 d->c->devices[d->id] = NULL; 710 d->c->devices[d->id] = NULL;
660 closure_put(&d->c->caching); 711 closure_put(&d->c->caching);
661 d->c = NULL; 712 d->c = NULL;
@@ -673,17 +724,6 @@ static void bcache_device_attach(struct bcache_device *d, struct cache_set *c,
673 closure_get(&c->caching); 724 closure_get(&c->caching);
674} 725}
675 726
676static void bcache_device_link(struct bcache_device *d, struct cache_set *c,
677 const char *name)
678{
679 snprintf(d->name, BCACHEDEVNAME_SIZE,
680 "%s%u", name, d->id);
681
682 WARN(sysfs_create_link(&d->kobj, &c->kobj, "cache") ||
683 sysfs_create_link(&c->kobj, &d->kobj, d->name),
684 "Couldn't create device <-> cache set symlinks");
685}
686
687static void bcache_device_free(struct bcache_device *d) 727static void bcache_device_free(struct bcache_device *d)
688{ 728{
689 lockdep_assert_held(&bch_register_lock); 729 lockdep_assert_held(&bch_register_lock);
@@ -784,6 +824,7 @@ void bch_cached_dev_run(struct cached_dev *dc)
784 } 824 }
785 825
786 add_disk(d->disk); 826 add_disk(d->disk);
827 bd_link_disk_holder(dc->bdev, dc->disk.disk);
787#if 0 828#if 0
788 char *env[] = { "SYMLINK=label" , NULL }; 829 char *env[] = { "SYMLINK=label" , NULL };
789 kobject_uevent_env(&disk_to_dev(d->disk)->kobj, KOBJ_CHANGE, env); 830 kobject_uevent_env(&disk_to_dev(d->disk)->kobj, KOBJ_CHANGE, env);
@@ -803,9 +844,6 @@ static void cached_dev_detach_finish(struct work_struct *w)
803 BUG_ON(!atomic_read(&dc->disk.detaching)); 844 BUG_ON(!atomic_read(&dc->disk.detaching));
804 BUG_ON(atomic_read(&dc->count)); 845 BUG_ON(atomic_read(&dc->count));
805 846
806 sysfs_remove_link(&dc->disk.c->kobj, dc->disk.name);
807 sysfs_remove_link(&dc->disk.kobj, "cache");
808
809 mutex_lock(&bch_register_lock); 847 mutex_lock(&bch_register_lock);
810 848
811 memset(&dc->sb.set_uuid, 0, 16); 849 memset(&dc->sb.set_uuid, 0, 16);
@@ -920,7 +958,6 @@ int bch_cached_dev_attach(struct cached_dev *dc, struct cache_set *c)
920 } 958 }
921 959
922 bcache_device_attach(&dc->disk, c, u - c->uuids); 960 bcache_device_attach(&dc->disk, c, u - c->uuids);
923 bcache_device_link(&dc->disk, c, "bdev");
924 list_move(&dc->list, &c->cached_devs); 961 list_move(&dc->list, &c->cached_devs);
925 calc_cached_dev_sectors(c); 962 calc_cached_dev_sectors(c);
926 963
@@ -938,6 +975,7 @@ int bch_cached_dev_attach(struct cached_dev *dc, struct cache_set *c)
938 } 975 }
939 976
940 bch_cached_dev_run(dc); 977 bch_cached_dev_run(dc);
978 bcache_device_link(&dc->disk, c, "bdev");
941 979
942 pr_info("Caching %s as %s on set %pU", 980 pr_info("Caching %s as %s on set %pU",
943 bdevname(dc->bdev, buf), dc->disk.disk->disk_name, 981 bdevname(dc->bdev, buf), dc->disk.disk->disk_name,
@@ -961,6 +999,7 @@ static void cached_dev_free(struct closure *cl)
961 999
962 mutex_lock(&bch_register_lock); 1000 mutex_lock(&bch_register_lock);
963 1001
1002 bd_unlink_disk_holder(dc->bdev, dc->disk.disk);
964 bcache_device_free(&dc->disk); 1003 bcache_device_free(&dc->disk);
965 list_del(&dc->list); 1004 list_del(&dc->list);
966 1005
@@ -1049,7 +1088,11 @@ static const char *register_bdev(struct cache_sb *sb, struct page *sb_page,
1049 1088
1050 g = dc->disk.disk; 1089 g = dc->disk.disk;
1051 1090
1052 set_capacity(g, dc->bdev->bd_part->nr_sects - 16); 1091 set_capacity(g, dc->bdev->bd_part->nr_sects - dc->sb.data_offset);
1092
1093 g->queue->backing_dev_info.ra_pages =
1094 max(g->queue->backing_dev_info.ra_pages,
1095 bdev->bd_queue->backing_dev_info.ra_pages);
1053 1096
1054 bch_cached_dev_request_init(dc); 1097 bch_cached_dev_request_init(dc);
1055 1098
@@ -1099,8 +1142,7 @@ static void flash_dev_flush(struct closure *cl)
1099{ 1142{
1100 struct bcache_device *d = container_of(cl, struct bcache_device, cl); 1143 struct bcache_device *d = container_of(cl, struct bcache_device, cl);
1101 1144
1102 sysfs_remove_link(&d->c->kobj, d->name); 1145 bcache_device_unlink(d);
1103 sysfs_remove_link(&d->kobj, "cache");
1104 kobject_del(&d->kobj); 1146 kobject_del(&d->kobj);
1105 continue_at(cl, flash_dev_free, system_wq); 1147 continue_at(cl, flash_dev_free, system_wq);
1106} 1148}
@@ -1802,7 +1844,7 @@ static ssize_t register_bcache(struct kobject *k, struct kobj_attribute *attr,
1802 if (err) 1844 if (err)
1803 goto err_close; 1845 goto err_close;
1804 1846
1805 if (sb->version == CACHE_BACKING_DEV) { 1847 if (SB_IS_BDEV(sb)) {
1806 struct cached_dev *dc = kzalloc(sizeof(*dc), GFP_KERNEL); 1848 struct cached_dev *dc = kzalloc(sizeof(*dc), GFP_KERNEL);
1807 1849
1808 err = register_bdev(sb, sb_page, bdev, dc); 1850 err = register_bdev(sb, sb_page, bdev, dc);