diff options
Diffstat (limited to 'drivers/md/bcache/alloc.c')
-rw-r--r-- | drivers/md/bcache/alloc.c | 173 |
1 files changed, 68 insertions, 105 deletions
diff --git a/drivers/md/bcache/alloc.c b/drivers/md/bcache/alloc.c index c0d37d082443..443d03fbac47 100644 --- a/drivers/md/bcache/alloc.c +++ b/drivers/md/bcache/alloc.c | |||
@@ -78,12 +78,6 @@ uint8_t bch_inc_gen(struct cache *ca, struct bucket *b) | |||
78 | ca->set->need_gc = max(ca->set->need_gc, bucket_gc_gen(b)); | 78 | ca->set->need_gc = max(ca->set->need_gc, bucket_gc_gen(b)); |
79 | WARN_ON_ONCE(ca->set->need_gc > BUCKET_GC_GEN_MAX); | 79 | WARN_ON_ONCE(ca->set->need_gc > BUCKET_GC_GEN_MAX); |
80 | 80 | ||
81 | if (CACHE_SYNC(&ca->set->sb)) { | ||
82 | ca->need_save_prio = max(ca->need_save_prio, | ||
83 | bucket_disk_gen(b)); | ||
84 | WARN_ON_ONCE(ca->need_save_prio > BUCKET_DISK_GEN_MAX); | ||
85 | } | ||
86 | |||
87 | return ret; | 81 | return ret; |
88 | } | 82 | } |
89 | 83 | ||
@@ -120,51 +114,45 @@ void bch_rescale_priorities(struct cache_set *c, int sectors) | |||
120 | mutex_unlock(&c->bucket_lock); | 114 | mutex_unlock(&c->bucket_lock); |
121 | } | 115 | } |
122 | 116 | ||
123 | /* Allocation */ | 117 | /* |
118 | * Background allocation thread: scans for buckets to be invalidated, | ||
119 | * invalidates them, rewrites prios/gens (marking them as invalidated on disk), | ||
120 | * then optionally issues discard commands to the newly free buckets, then puts | ||
121 | * them on the various freelists. | ||
122 | */ | ||
124 | 123 | ||
125 | static inline bool can_inc_bucket_gen(struct bucket *b) | 124 | static inline bool can_inc_bucket_gen(struct bucket *b) |
126 | { | 125 | { |
127 | return bucket_gc_gen(b) < BUCKET_GC_GEN_MAX && | 126 | return bucket_gc_gen(b) < BUCKET_GC_GEN_MAX; |
128 | bucket_disk_gen(b) < BUCKET_DISK_GEN_MAX; | ||
129 | } | 127 | } |
130 | 128 | ||
131 | bool bch_bucket_add_unused(struct cache *ca, struct bucket *b) | 129 | bool bch_can_invalidate_bucket(struct cache *ca, struct bucket *b) |
132 | { | 130 | { |
133 | BUG_ON(GC_MARK(b) || GC_SECTORS_USED(b)); | 131 | BUG_ON(!ca->set->gc_mark_valid); |
134 | |||
135 | if (CACHE_REPLACEMENT(&ca->sb) == CACHE_REPLACEMENT_FIFO) { | ||
136 | unsigned i; | ||
137 | |||
138 | for (i = 0; i < RESERVE_NONE; i++) | ||
139 | if (!fifo_full(&ca->free[i])) | ||
140 | goto add; | ||
141 | 132 | ||
142 | return false; | 133 | return (!GC_MARK(b) || |
143 | } | 134 | GC_MARK(b) == GC_MARK_RECLAIMABLE) && |
144 | add: | ||
145 | b->prio = 0; | ||
146 | |||
147 | if (can_inc_bucket_gen(b) && | ||
148 | fifo_push(&ca->unused, b - ca->buckets)) { | ||
149 | atomic_inc(&b->pin); | ||
150 | return true; | ||
151 | } | ||
152 | |||
153 | return false; | ||
154 | } | ||
155 | |||
156 | static bool can_invalidate_bucket(struct cache *ca, struct bucket *b) | ||
157 | { | ||
158 | return GC_MARK(b) == GC_MARK_RECLAIMABLE && | ||
159 | !atomic_read(&b->pin) && | 135 | !atomic_read(&b->pin) && |
160 | can_inc_bucket_gen(b); | 136 | can_inc_bucket_gen(b); |
161 | } | 137 | } |
162 | 138 | ||
163 | static void invalidate_one_bucket(struct cache *ca, struct bucket *b) | 139 | void __bch_invalidate_one_bucket(struct cache *ca, struct bucket *b) |
164 | { | 140 | { |
141 | lockdep_assert_held(&ca->set->bucket_lock); | ||
142 | BUG_ON(GC_MARK(b) && GC_MARK(b) != GC_MARK_RECLAIMABLE); | ||
143 | |||
144 | if (GC_SECTORS_USED(b)) | ||
145 | trace_bcache_invalidate(ca, b - ca->buckets); | ||
146 | |||
165 | bch_inc_gen(ca, b); | 147 | bch_inc_gen(ca, b); |
166 | b->prio = INITIAL_PRIO; | 148 | b->prio = INITIAL_PRIO; |
167 | atomic_inc(&b->pin); | 149 | atomic_inc(&b->pin); |
150 | } | ||
151 | |||
152 | static void bch_invalidate_one_bucket(struct cache *ca, struct bucket *b) | ||
153 | { | ||
154 | __bch_invalidate_one_bucket(ca, b); | ||
155 | |||
168 | fifo_push(&ca->free_inc, b - ca->buckets); | 156 | fifo_push(&ca->free_inc, b - ca->buckets); |
169 | } | 157 | } |
170 | 158 | ||
@@ -195,20 +183,7 @@ static void invalidate_buckets_lru(struct cache *ca) | |||
195 | ca->heap.used = 0; | 183 | ca->heap.used = 0; |
196 | 184 | ||
197 | for_each_bucket(b, ca) { | 185 | for_each_bucket(b, ca) { |
198 | /* | 186 | if (!bch_can_invalidate_bucket(ca, b)) |
199 | * If we fill up the unused list, if we then return before | ||
200 | * adding anything to the free_inc list we'll skip writing | ||
201 | * prios/gens and just go back to allocating from the unused | ||
202 | * list: | ||
203 | */ | ||
204 | if (fifo_full(&ca->unused)) | ||
205 | return; | ||
206 | |||
207 | if (!can_invalidate_bucket(ca, b)) | ||
208 | continue; | ||
209 | |||
210 | if (!GC_SECTORS_USED(b) && | ||
211 | bch_bucket_add_unused(ca, b)) | ||
212 | continue; | 187 | continue; |
213 | 188 | ||
214 | if (!heap_full(&ca->heap)) | 189 | if (!heap_full(&ca->heap)) |
@@ -233,7 +208,7 @@ static void invalidate_buckets_lru(struct cache *ca) | |||
233 | return; | 208 | return; |
234 | } | 209 | } |
235 | 210 | ||
236 | invalidate_one_bucket(ca, b); | 211 | bch_invalidate_one_bucket(ca, b); |
237 | } | 212 | } |
238 | } | 213 | } |
239 | 214 | ||
@@ -249,8 +224,8 @@ static void invalidate_buckets_fifo(struct cache *ca) | |||
249 | 224 | ||
250 | b = ca->buckets + ca->fifo_last_bucket++; | 225 | b = ca->buckets + ca->fifo_last_bucket++; |
251 | 226 | ||
252 | if (can_invalidate_bucket(ca, b)) | 227 | if (bch_can_invalidate_bucket(ca, b)) |
253 | invalidate_one_bucket(ca, b); | 228 | bch_invalidate_one_bucket(ca, b); |
254 | 229 | ||
255 | if (++checked >= ca->sb.nbuckets) { | 230 | if (++checked >= ca->sb.nbuckets) { |
256 | ca->invalidate_needs_gc = 1; | 231 | ca->invalidate_needs_gc = 1; |
@@ -274,8 +249,8 @@ static void invalidate_buckets_random(struct cache *ca) | |||
274 | 249 | ||
275 | b = ca->buckets + n; | 250 | b = ca->buckets + n; |
276 | 251 | ||
277 | if (can_invalidate_bucket(ca, b)) | 252 | if (bch_can_invalidate_bucket(ca, b)) |
278 | invalidate_one_bucket(ca, b); | 253 | bch_invalidate_one_bucket(ca, b); |
279 | 254 | ||
280 | if (++checked >= ca->sb.nbuckets / 2) { | 255 | if (++checked >= ca->sb.nbuckets / 2) { |
281 | ca->invalidate_needs_gc = 1; | 256 | ca->invalidate_needs_gc = 1; |
@@ -287,8 +262,7 @@ static void invalidate_buckets_random(struct cache *ca) | |||
287 | 262 | ||
288 | static void invalidate_buckets(struct cache *ca) | 263 | static void invalidate_buckets(struct cache *ca) |
289 | { | 264 | { |
290 | if (ca->invalidate_needs_gc) | 265 | BUG_ON(ca->invalidate_needs_gc); |
291 | return; | ||
292 | 266 | ||
293 | switch (CACHE_REPLACEMENT(&ca->sb)) { | 267 | switch (CACHE_REPLACEMENT(&ca->sb)) { |
294 | case CACHE_REPLACEMENT_LRU: | 268 | case CACHE_REPLACEMENT_LRU: |
@@ -301,8 +275,6 @@ static void invalidate_buckets(struct cache *ca) | |||
301 | invalidate_buckets_random(ca); | 275 | invalidate_buckets_random(ca); |
302 | break; | 276 | break; |
303 | } | 277 | } |
304 | |||
305 | trace_bcache_alloc_invalidate(ca); | ||
306 | } | 278 | } |
307 | 279 | ||
308 | #define allocator_wait(ca, cond) \ | 280 | #define allocator_wait(ca, cond) \ |
@@ -350,17 +322,10 @@ static int bch_allocator_thread(void *arg) | |||
350 | * possibly issue discards to them, then we add the bucket to | 322 | * possibly issue discards to them, then we add the bucket to |
351 | * the free list: | 323 | * the free list: |
352 | */ | 324 | */ |
353 | while (1) { | 325 | while (!fifo_empty(&ca->free_inc)) { |
354 | long bucket; | 326 | long bucket; |
355 | 327 | ||
356 | if ((!atomic_read(&ca->set->prio_blocked) || | 328 | fifo_pop(&ca->free_inc, bucket); |
357 | !CACHE_SYNC(&ca->set->sb)) && | ||
358 | !fifo_empty(&ca->unused)) | ||
359 | fifo_pop(&ca->unused, bucket); | ||
360 | else if (!fifo_empty(&ca->free_inc)) | ||
361 | fifo_pop(&ca->free_inc, bucket); | ||
362 | else | ||
363 | break; | ||
364 | 329 | ||
365 | if (ca->discard) { | 330 | if (ca->discard) { |
366 | mutex_unlock(&ca->set->bucket_lock); | 331 | mutex_unlock(&ca->set->bucket_lock); |
@@ -371,6 +336,7 @@ static int bch_allocator_thread(void *arg) | |||
371 | } | 336 | } |
372 | 337 | ||
373 | allocator_wait(ca, bch_allocator_push(ca, bucket)); | 338 | allocator_wait(ca, bch_allocator_push(ca, bucket)); |
339 | wake_up(&ca->set->btree_cache_wait); | ||
374 | wake_up(&ca->set->bucket_wait); | 340 | wake_up(&ca->set->bucket_wait); |
375 | } | 341 | } |
376 | 342 | ||
@@ -380,9 +346,9 @@ static int bch_allocator_thread(void *arg) | |||
380 | * them to the free_inc list: | 346 | * them to the free_inc list: |
381 | */ | 347 | */ |
382 | 348 | ||
349 | retry_invalidate: | ||
383 | allocator_wait(ca, ca->set->gc_mark_valid && | 350 | allocator_wait(ca, ca->set->gc_mark_valid && |
384 | (ca->need_save_prio > 64 || | 351 | !ca->invalidate_needs_gc); |
385 | !ca->invalidate_needs_gc)); | ||
386 | invalidate_buckets(ca); | 352 | invalidate_buckets(ca); |
387 | 353 | ||
388 | /* | 354 | /* |
@@ -390,13 +356,28 @@ static int bch_allocator_thread(void *arg) | |||
390 | * new stuff to them: | 356 | * new stuff to them: |
391 | */ | 357 | */ |
392 | allocator_wait(ca, !atomic_read(&ca->set->prio_blocked)); | 358 | allocator_wait(ca, !atomic_read(&ca->set->prio_blocked)); |
393 | if (CACHE_SYNC(&ca->set->sb) && | 359 | if (CACHE_SYNC(&ca->set->sb)) { |
394 | (!fifo_empty(&ca->free_inc) || | 360 | /* |
395 | ca->need_save_prio > 64)) | 361 | * This could deadlock if an allocation with a btree |
362 | * node locked ever blocked - having the btree node | ||
363 | * locked would block garbage collection, but here we're | ||
364 | * waiting on garbage collection before we invalidate | ||
365 | * and free anything. | ||
366 | * | ||
367 | * But this should be safe since the btree code always | ||
368 | * uses btree_check_reserve() before allocating now, and | ||
369 | * if it fails it blocks without btree nodes locked. | ||
370 | */ | ||
371 | if (!fifo_full(&ca->free_inc)) | ||
372 | goto retry_invalidate; | ||
373 | |||
396 | bch_prio_write(ca); | 374 | bch_prio_write(ca); |
375 | } | ||
397 | } | 376 | } |
398 | } | 377 | } |
399 | 378 | ||
379 | /* Allocation */ | ||
380 | |||
400 | long bch_bucket_alloc(struct cache *ca, unsigned reserve, bool wait) | 381 | long bch_bucket_alloc(struct cache *ca, unsigned reserve, bool wait) |
401 | { | 382 | { |
402 | DEFINE_WAIT(w); | 383 | DEFINE_WAIT(w); |
@@ -408,8 +389,10 @@ long bch_bucket_alloc(struct cache *ca, unsigned reserve, bool wait) | |||
408 | fifo_pop(&ca->free[reserve], r)) | 389 | fifo_pop(&ca->free[reserve], r)) |
409 | goto out; | 390 | goto out; |
410 | 391 | ||
411 | if (!wait) | 392 | if (!wait) { |
393 | trace_bcache_alloc_fail(ca, reserve); | ||
412 | return -1; | 394 | return -1; |
395 | } | ||
413 | 396 | ||
414 | do { | 397 | do { |
415 | prepare_to_wait(&ca->set->bucket_wait, &w, | 398 | prepare_to_wait(&ca->set->bucket_wait, &w, |
@@ -425,6 +408,8 @@ long bch_bucket_alloc(struct cache *ca, unsigned reserve, bool wait) | |||
425 | out: | 408 | out: |
426 | wake_up_process(ca->alloc_thread); | 409 | wake_up_process(ca->alloc_thread); |
427 | 410 | ||
411 | trace_bcache_alloc(ca, reserve); | ||
412 | |||
428 | if (expensive_debug_checks(ca->set)) { | 413 | if (expensive_debug_checks(ca->set)) { |
429 | size_t iter; | 414 | size_t iter; |
430 | long i; | 415 | long i; |
@@ -438,8 +423,6 @@ out: | |||
438 | BUG_ON(i == r); | 423 | BUG_ON(i == r); |
439 | fifo_for_each(i, &ca->free_inc, iter) | 424 | fifo_for_each(i, &ca->free_inc, iter) |
440 | BUG_ON(i == r); | 425 | BUG_ON(i == r); |
441 | fifo_for_each(i, &ca->unused, iter) | ||
442 | BUG_ON(i == r); | ||
443 | } | 426 | } |
444 | 427 | ||
445 | b = ca->buckets + r; | 428 | b = ca->buckets + r; |
@@ -461,17 +444,19 @@ out: | |||
461 | return r; | 444 | return r; |
462 | } | 445 | } |
463 | 446 | ||
447 | void __bch_bucket_free(struct cache *ca, struct bucket *b) | ||
448 | { | ||
449 | SET_GC_MARK(b, 0); | ||
450 | SET_GC_SECTORS_USED(b, 0); | ||
451 | } | ||
452 | |||
464 | void bch_bucket_free(struct cache_set *c, struct bkey *k) | 453 | void bch_bucket_free(struct cache_set *c, struct bkey *k) |
465 | { | 454 | { |
466 | unsigned i; | 455 | unsigned i; |
467 | 456 | ||
468 | for (i = 0; i < KEY_PTRS(k); i++) { | 457 | for (i = 0; i < KEY_PTRS(k); i++) |
469 | struct bucket *b = PTR_BUCKET(c, k, i); | 458 | __bch_bucket_free(PTR_CACHE(c, k, i), |
470 | 459 | PTR_BUCKET(c, k, i)); | |
471 | SET_GC_MARK(b, GC_MARK_RECLAIMABLE); | ||
472 | SET_GC_SECTORS_USED(b, 0); | ||
473 | bch_bucket_add_unused(PTR_CACHE(c, k, i), b); | ||
474 | } | ||
475 | } | 460 | } |
476 | 461 | ||
477 | int __bch_bucket_alloc_set(struct cache_set *c, unsigned reserve, | 462 | int __bch_bucket_alloc_set(struct cache_set *c, unsigned reserve, |
@@ -709,25 +694,3 @@ int bch_cache_allocator_start(struct cache *ca) | |||
709 | ca->alloc_thread = k; | 694 | ca->alloc_thread = k; |
710 | return 0; | 695 | return 0; |
711 | } | 696 | } |
712 | |||
713 | int bch_cache_allocator_init(struct cache *ca) | ||
714 | { | ||
715 | /* | ||
716 | * Reserve: | ||
717 | * Prio/gen writes first | ||
718 | * Then 8 for btree allocations | ||
719 | * Then half for the moving garbage collector | ||
720 | */ | ||
721 | #if 0 | ||
722 | ca->watermark[WATERMARK_PRIO] = 0; | ||
723 | |||
724 | ca->watermark[WATERMARK_METADATA] = prio_buckets(ca); | ||
725 | |||
726 | ca->watermark[WATERMARK_MOVINGGC] = 8 + | ||
727 | ca->watermark[WATERMARK_METADATA]; | ||
728 | |||
729 | ca->watermark[WATERMARK_NONE] = ca->free.size / 2 + | ||
730 | ca->watermark[WATERMARK_MOVINGGC]; | ||
731 | #endif | ||
732 | return 0; | ||
733 | } | ||