aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--drivers/md/dm-cache-policy-mq.c543
1 files changed, 231 insertions, 312 deletions
diff --git a/drivers/md/dm-cache-policy-mq.c b/drivers/md/dm-cache-policy-mq.c
index 444f0bf10b21..782bf854666a 100644
--- a/drivers/md/dm-cache-policy-mq.c
+++ b/drivers/md/dm-cache-policy-mq.c
@@ -26,19 +26,6 @@ static unsigned next_power(unsigned n, unsigned min)
26 26
27/*----------------------------------------------------------------*/ 27/*----------------------------------------------------------------*/
28 28
29static unsigned long *alloc_bitset(unsigned nr_entries)
30{
31 size_t s = sizeof(unsigned long) * dm_div_up(nr_entries, BITS_PER_LONG);
32 return vzalloc(s);
33}
34
35static void free_bitset(unsigned long *bits)
36{
37 vfree(bits);
38}
39
40/*----------------------------------------------------------------*/
41
42/* 29/*
43 * Large, sequential ios are probably better left on the origin device since 30 * Large, sequential ios are probably better left on the origin device since
44 * spindles tend to have good bandwidth. 31 * spindles tend to have good bandwidth.
@@ -233,18 +220,107 @@ struct entry {
233 struct hlist_node hlist; 220 struct hlist_node hlist;
234 struct list_head list; 221 struct list_head list;
235 dm_oblock_t oblock; 222 dm_oblock_t oblock;
236 dm_cblock_t cblock; /* valid iff in_cache */
237 223
238 /* 224 /*
239 * FIXME: pack these better 225 * FIXME: pack these better
240 */ 226 */
241 bool in_cache:1;
242 bool dirty:1; 227 bool dirty:1;
243 unsigned hit_count; 228 unsigned hit_count;
244 unsigned generation; 229 unsigned generation;
245 unsigned tick; 230 unsigned tick;
246}; 231};
247 232
233/*
234 * Rather than storing the cblock in an entry, we allocate all entries in
235 * an array, and infer the cblock from the entry position.
236 *
237 * Free entries are linked together into a list.
238 */
239struct entry_pool {
240 struct entry *entries, *entries_end;
241 struct list_head free;
242 unsigned nr_allocated;
243};
244
245static int epool_init(struct entry_pool *ep, unsigned nr_entries)
246{
247 unsigned i;
248
249 ep->entries = vzalloc(sizeof(struct entry) * nr_entries);
250 if (!ep->entries)
251 return -ENOMEM;
252
253 ep->entries_end = ep->entries + nr_entries;
254
255 INIT_LIST_HEAD(&ep->free);
256 for (i = 0; i < nr_entries; i++)
257 list_add(&ep->entries[i].list, &ep->free);
258
259 ep->nr_allocated = 0;
260
261 return 0;
262}
263
264static void epool_exit(struct entry_pool *ep)
265{
266 vfree(ep->entries);
267}
268
269static struct entry *alloc_entry(struct entry_pool *ep)
270{
271 struct entry *e;
272
273 if (list_empty(&ep->free))
274 return NULL;
275
276 e = list_entry(list_pop(&ep->free), struct entry, list);
277 INIT_LIST_HEAD(&e->list);
278 INIT_HLIST_NODE(&e->hlist);
279 ep->nr_allocated++;
280
281 return e;
282}
283
284/*
285 * This assumes the cblock hasn't already been allocated.
286 */
287static struct entry *alloc_particular_entry(struct entry_pool *ep, dm_cblock_t cblock)
288{
289 struct entry *e = ep->entries + from_cblock(cblock);
290 list_del(&e->list);
291
292 INIT_LIST_HEAD(&e->list);
293 INIT_HLIST_NODE(&e->hlist);
294 ep->nr_allocated++;
295
296 return e;
297}
298
299static void free_entry(struct entry_pool *ep, struct entry *e)
300{
301 BUG_ON(!ep->nr_allocated);
302 ep->nr_allocated--;
303 INIT_HLIST_NODE(&e->hlist);
304 list_add(&e->list, &ep->free);
305}
306
307static bool epool_empty(struct entry_pool *ep)
308{
309 return list_empty(&ep->free);
310}
311
312static bool in_pool(struct entry_pool *ep, struct entry *e)
313{
314 return e >= ep->entries && e < ep->entries_end;
315}
316
317static dm_cblock_t infer_cblock(struct entry_pool *ep, struct entry *e)
318{
319 return to_cblock(e - ep->entries);
320}
321
322/*----------------------------------------------------------------*/
323
248struct mq_policy { 324struct mq_policy {
249 struct dm_cache_policy policy; 325 struct dm_cache_policy policy;
250 326
@@ -254,6 +330,13 @@ struct mq_policy {
254 struct io_tracker tracker; 330 struct io_tracker tracker;
255 331
256 /* 332 /*
333 * Entries come from two pools, one of pre-cache entries, and one
334 * for the cache proper.
335 */
336 struct entry_pool pre_cache_pool;
337 struct entry_pool cache_pool;
338
339 /*
257 * We maintain three queues of entries. The cache proper, 340 * We maintain three queues of entries. The cache proper,
258 * consisting of a clean and dirty queue, contains the currently 341 * consisting of a clean and dirty queue, contains the currently
259 * active mappings. Whereas the pre_cache tracks blocks that 342 * active mappings. Whereas the pre_cache tracks blocks that
@@ -300,25 +383,6 @@ struct mq_policy {
300 unsigned promote_threshold; 383 unsigned promote_threshold;
301 384
302 /* 385 /*
303 * We need cache_size entries for the cache, and choose to have
304 * cache_size entries for the pre_cache too. One motivation for
305 * using the same size is to make the hit counts directly
306 * comparable between pre_cache and cache.
307 */
308 unsigned nr_entries;
309 unsigned nr_entries_allocated;
310 struct list_head free;
311
312 /*
313 * Cache blocks may be unallocated. We store this info in a
314 * bitset.
315 */
316 unsigned long *allocation_bitset;
317 unsigned nr_cblocks_allocated;
318 unsigned find_free_nr_words;
319 unsigned find_free_last_word;
320
321 /*
322 * The hash table allows us to quickly find an entry by origin 386 * The hash table allows us to quickly find an entry by origin
323 * block. Both pre_cache and cache entries are in here. 387 * block. Both pre_cache and cache entries are in here.
324 */ 388 */
@@ -328,50 +392,6 @@ struct mq_policy {
328}; 392};
329 393
330/*----------------------------------------------------------------*/ 394/*----------------------------------------------------------------*/
331/* Free/alloc mq cache entry structures. */
332static void concat_queue(struct list_head *lh, struct queue *q)
333{
334 unsigned level;
335
336 for (level = 0; level < NR_QUEUE_LEVELS; level++)
337 list_splice(q->qs + level, lh);
338}
339
340static void free_entries(struct mq_policy *mq)
341{
342 struct entry *e, *tmp;
343
344 concat_queue(&mq->free, &mq->pre_cache);
345 concat_queue(&mq->free, &mq->cache_clean);
346 concat_queue(&mq->free, &mq->cache_dirty);
347
348 list_for_each_entry_safe(e, tmp, &mq->free, list)
349 kmem_cache_free(mq_entry_cache, e);
350}
351
352static int alloc_entries(struct mq_policy *mq, unsigned elts)
353{
354 unsigned u = mq->nr_entries;
355
356 INIT_LIST_HEAD(&mq->free);
357 mq->nr_entries_allocated = 0;
358
359 while (u--) {
360 struct entry *e = kmem_cache_zalloc(mq_entry_cache, GFP_KERNEL);
361
362 if (!e) {
363 free_entries(mq);
364 return -ENOMEM;
365 }
366
367
368 list_add(&e->list, &mq->free);
369 }
370
371 return 0;
372}
373
374/*----------------------------------------------------------------*/
375 395
376/* 396/*
377 * Simple hash table implementation. Should replace with the standard hash 397 * Simple hash table implementation. Should replace with the standard hash
@@ -407,54 +427,9 @@ static void hash_remove(struct entry *e)
407 427
408/*----------------------------------------------------------------*/ 428/*----------------------------------------------------------------*/
409 429
410/*
411 * Allocates a new entry structure. The memory is allocated in one lump,
412 * so we just handing it out here. Returns NULL if all entries have
413 * already been allocated. Cannot fail otherwise.
414 */
415static struct entry *alloc_entry(struct mq_policy *mq)
416{
417 struct entry *e;
418
419 if (mq->nr_entries_allocated >= mq->nr_entries) {
420 BUG_ON(!list_empty(&mq->free));
421 return NULL;
422 }
423
424 e = list_entry(list_pop(&mq->free), struct entry, list);
425 INIT_LIST_HEAD(&e->list);
426 INIT_HLIST_NODE(&e->hlist);
427
428 mq->nr_entries_allocated++;
429 return e;
430}
431
432/*----------------------------------------------------------------*/
433
434/*
435 * Mark cache blocks allocated or not in the bitset.
436 */
437static void alloc_cblock(struct mq_policy *mq, dm_cblock_t cblock)
438{
439 BUG_ON(from_cblock(cblock) > from_cblock(mq->cache_size));
440 BUG_ON(test_bit(from_cblock(cblock), mq->allocation_bitset));
441
442 set_bit(from_cblock(cblock), mq->allocation_bitset);
443 mq->nr_cblocks_allocated++;
444}
445
446static void free_cblock(struct mq_policy *mq, dm_cblock_t cblock)
447{
448 BUG_ON(from_cblock(cblock) > from_cblock(mq->cache_size));
449 BUG_ON(!test_bit(from_cblock(cblock), mq->allocation_bitset));
450
451 clear_bit(from_cblock(cblock), mq->allocation_bitset);
452 mq->nr_cblocks_allocated--;
453}
454
455static bool any_free_cblocks(struct mq_policy *mq) 430static bool any_free_cblocks(struct mq_policy *mq)
456{ 431{
457 return mq->nr_cblocks_allocated < from_cblock(mq->cache_size); 432 return !epool_empty(&mq->cache_pool);
458} 433}
459 434
460static bool any_clean_cblocks(struct mq_policy *mq) 435static bool any_clean_cblocks(struct mq_policy *mq)
@@ -462,48 +437,6 @@ static bool any_clean_cblocks(struct mq_policy *mq)
462 return !queue_empty(&mq->cache_clean); 437 return !queue_empty(&mq->cache_clean);
463} 438}
464 439
465/*
466 * Fills result out with a cache block that isn't in use, or return
467 * -ENOSPC. This does _not_ mark the cblock as allocated, the caller is
468 * reponsible for that.
469 */
470static int __find_free_cblock(struct mq_policy *mq, unsigned begin, unsigned end,
471 dm_cblock_t *result, unsigned *last_word)
472{
473 int r = -ENOSPC;
474 unsigned w;
475
476 for (w = begin; w < end; w++) {
477 /*
478 * ffz is undefined if no zero exists
479 */
480 if (mq->allocation_bitset[w] != ~0UL) {
481 *last_word = w;
482 *result = to_cblock((w * BITS_PER_LONG) + ffz(mq->allocation_bitset[w]));
483 if (from_cblock(*result) < from_cblock(mq->cache_size))
484 r = 0;
485
486 break;
487 }
488 }
489
490 return r;
491}
492
493static int find_free_cblock(struct mq_policy *mq, dm_cblock_t *result)
494{
495 int r;
496
497 if (!any_free_cblocks(mq))
498 return -ENOSPC;
499
500 r = __find_free_cblock(mq, mq->find_free_last_word, mq->find_free_nr_words, result, &mq->find_free_last_word);
501 if (r == -ENOSPC && mq->find_free_last_word)
502 r = __find_free_cblock(mq, 0, mq->find_free_last_word, result, &mq->find_free_last_word);
503
504 return r;
505}
506
507/*----------------------------------------------------------------*/ 440/*----------------------------------------------------------------*/
508 441
509/* 442/*
@@ -520,34 +453,35 @@ static unsigned queue_level(struct entry *e)
520 return min((unsigned) ilog2(e->hit_count), NR_QUEUE_LEVELS - 1u); 453 return min((unsigned) ilog2(e->hit_count), NR_QUEUE_LEVELS - 1u);
521} 454}
522 455
456static bool in_cache(struct mq_policy *mq, struct entry *e)
457{
458 return in_pool(&mq->cache_pool, e);
459}
460
523/* 461/*
524 * Inserts the entry into the pre_cache or the cache. Ensures the cache 462 * Inserts the entry into the pre_cache or the cache. Ensures the cache
525 * block is marked as allocated if necc. Inserts into the hash table. Sets the 463 * block is marked as allocated if necc. Inserts into the hash table.
526 * tick which records when the entry was last moved about. 464 * Sets the tick which records when the entry was last moved about.
527 */ 465 */
528static void push(struct mq_policy *mq, struct entry *e) 466static void push(struct mq_policy *mq, struct entry *e)
529{ 467{
530 e->tick = mq->tick; 468 e->tick = mq->tick;
531 hash_insert(mq, e); 469 hash_insert(mq, e);
532 470
533 if (e->in_cache) { 471 if (in_cache(mq, e))
534 alloc_cblock(mq, e->cblock);
535 queue_push(e->dirty ? &mq->cache_dirty : &mq->cache_clean, 472 queue_push(e->dirty ? &mq->cache_dirty : &mq->cache_clean,
536 queue_level(e), &e->list); 473 queue_level(e), &e->list);
537 } else 474 else
538 queue_push(&mq->pre_cache, queue_level(e), &e->list); 475 queue_push(&mq->pre_cache, queue_level(e), &e->list);
539} 476}
540 477
541/* 478/*
542 * Removes an entry from pre_cache or cache. Removes from the hash table. 479 * Removes an entry from pre_cache or cache. Removes from the hash table.
543 * Frees off the cache block if necc.
544 */ 480 */
545static void del(struct mq_policy *mq, struct entry *e) 481static void del(struct mq_policy *mq, struct entry *e)
546{ 482{
547 queue_remove(&e->list); 483 queue_remove(&e->list);
548 hash_remove(e); 484 hash_remove(e);
549 if (e->in_cache)
550 free_cblock(mq, e->cblock);
551} 485}
552 486
553/* 487/*
@@ -564,8 +498,6 @@ static struct entry *pop(struct mq_policy *mq, struct queue *q)
564 498
565 e = container_of(h, struct entry, list); 499 e = container_of(h, struct entry, list);
566 hash_remove(e); 500 hash_remove(e);
567 if (e->in_cache)
568 free_cblock(mq, e->cblock);
569 501
570 return e; 502 return e;
571} 503}
@@ -599,9 +531,7 @@ static void check_generation(struct mq_policy *mq)
599 struct list_head *head; 531 struct list_head *head;
600 struct entry *e; 532 struct entry *e;
601 533
602 if ((mq->hit_count >= mq->generation_period) && 534 if ((mq->hit_count >= mq->generation_period) && (epool_empty(&mq->cache_pool))) {
603 (mq->nr_cblocks_allocated == from_cblock(mq->cache_size))) {
604
605 mq->hit_count = 0; 535 mq->hit_count = 0;
606 mq->generation++; 536 mq->generation++;
607 537
@@ -668,7 +598,7 @@ static void requeue_and_update_tick(struct mq_policy *mq, struct entry *e)
668 * - set the hit count to a hard coded value other than 1, eg, is it better 598 * - set the hit count to a hard coded value other than 1, eg, is it better
669 * if it goes in at level 2? 599 * if it goes in at level 2?
670 */ 600 */
671static int demote_cblock(struct mq_policy *mq, dm_oblock_t *oblock, dm_cblock_t *cblock) 601static int demote_cblock(struct mq_policy *mq, dm_oblock_t *oblock)
672{ 602{
673 struct entry *demoted = pop(mq, &mq->cache_clean); 603 struct entry *demoted = pop(mq, &mq->cache_clean);
674 604
@@ -682,12 +612,14 @@ static int demote_cblock(struct mq_policy *mq, dm_oblock_t *oblock, dm_cblock_t
682 */ 612 */
683 return -ENOSPC; 613 return -ENOSPC;
684 614
685 *cblock = demoted->cblock;
686 *oblock = demoted->oblock; 615 *oblock = demoted->oblock;
687 demoted->in_cache = false; 616 free_entry(&mq->cache_pool, demoted);
688 demoted->dirty = false; 617
689 demoted->hit_count = 1; 618 /*
690 push(mq, demoted); 619 * We used to put the demoted block into the pre-cache, but I think
620 * it's simpler to just let it work it's way up from zero again.
621 * Stops blocks flickering in and out of the cache.
622 */
691 623
692 return 0; 624 return 0;
693} 625}
@@ -735,9 +667,9 @@ static int cache_entry_found(struct mq_policy *mq,
735{ 667{
736 requeue_and_update_tick(mq, e); 668 requeue_and_update_tick(mq, e);
737 669
738 if (e->in_cache) { 670 if (in_cache(mq, e)) {
739 result->op = POLICY_HIT; 671 result->op = POLICY_HIT;
740 result->cblock = e->cblock; 672 result->cblock = infer_cblock(&mq->cache_pool, e);
741 } 673 }
742 674
743 return 0; 675 return 0;
@@ -751,11 +683,12 @@ static int pre_cache_to_cache(struct mq_policy *mq, struct entry *e,
751 struct policy_result *result) 683 struct policy_result *result)
752{ 684{
753 int r; 685 int r;
754 dm_cblock_t cblock; 686 struct entry *new_e;
755 687
756 if (find_free_cblock(mq, &cblock) == -ENOSPC) { 688 /* Ensure there's a free cblock in the cache */
689 if (epool_empty(&mq->cache_pool)) {
757 result->op = POLICY_REPLACE; 690 result->op = POLICY_REPLACE;
758 r = demote_cblock(mq, &result->old_oblock, &cblock); 691 r = demote_cblock(mq, &result->old_oblock);
759 if (r) { 692 if (r) {
760 result->op = POLICY_MISS; 693 result->op = POLICY_MISS;
761 return 0; 694 return 0;
@@ -763,12 +696,20 @@ static int pre_cache_to_cache(struct mq_policy *mq, struct entry *e,
763 } else 696 } else
764 result->op = POLICY_NEW; 697 result->op = POLICY_NEW;
765 698
766 result->cblock = e->cblock = cblock; 699 new_e = alloc_entry(&mq->cache_pool);
700 BUG_ON(!new_e);
701
702 new_e->oblock = e->oblock;
703 new_e->dirty = false;
704 new_e->hit_count = e->hit_count;
705 new_e->generation = e->generation;
706 new_e->tick = e->tick;
767 707
768 del(mq, e); 708 del(mq, e);
769 e->in_cache = true; 709 free_entry(&mq->pre_cache_pool, e);
770 e->dirty = false; 710 push(mq, new_e);
771 push(mq, e); 711
712 result->cblock = infer_cblock(&mq->cache_pool, new_e);
772 713
773 return 0; 714 return 0;
774} 715}
@@ -793,21 +734,10 @@ static int pre_cache_entry_found(struct mq_policy *mq, struct entry *e,
793 return r; 734 return r;
794} 735}
795 736
796static void insert_entry_in_pre_cache(struct mq_policy *mq,
797 struct entry *e, dm_oblock_t oblock)
798{
799 e->in_cache = false;
800 e->dirty = false;
801 e->oblock = oblock;
802 e->hit_count = 1;
803 e->generation = mq->generation;
804 push(mq, e);
805}
806
807static void insert_in_pre_cache(struct mq_policy *mq, 737static void insert_in_pre_cache(struct mq_policy *mq,
808 dm_oblock_t oblock) 738 dm_oblock_t oblock)
809{ 739{
810 struct entry *e = alloc_entry(mq); 740 struct entry *e = alloc_entry(&mq->pre_cache_pool);
811 741
812 if (!e) 742 if (!e)
813 /* 743 /*
@@ -821,7 +751,11 @@ static void insert_in_pre_cache(struct mq_policy *mq,
821 return; 751 return;
822 } 752 }
823 753
824 insert_entry_in_pre_cache(mq, e, oblock); 754 e->dirty = false;
755 e->oblock = oblock;
756 e->hit_count = 1;
757 e->generation = mq->generation;
758 push(mq, e);
825} 759}
826 760
827static void insert_in_cache(struct mq_policy *mq, dm_oblock_t oblock, 761static void insert_in_cache(struct mq_policy *mq, dm_oblock_t oblock,
@@ -829,10 +763,10 @@ static void insert_in_cache(struct mq_policy *mq, dm_oblock_t oblock,
829{ 763{
830 int r; 764 int r;
831 struct entry *e; 765 struct entry *e;
832 dm_cblock_t cblock;
833 766
834 if (find_free_cblock(mq, &cblock) == -ENOSPC) { 767 if (epool_empty(&mq->cache_pool)) {
835 r = demote_cblock(mq, &result->old_oblock, &cblock); 768 result->op = POLICY_REPLACE;
769 r = demote_cblock(mq, &result->old_oblock);
836 if (unlikely(r)) { 770 if (unlikely(r)) {
837 result->op = POLICY_MISS; 771 result->op = POLICY_MISS;
838 insert_in_pre_cache(mq, oblock); 772 insert_in_pre_cache(mq, oblock);
@@ -842,31 +776,21 @@ static void insert_in_cache(struct mq_policy *mq, dm_oblock_t oblock,
842 /* 776 /*
843 * This will always succeed, since we've just demoted. 777 * This will always succeed, since we've just demoted.
844 */ 778 */
845 e = pop(mq, &mq->pre_cache); 779 e = alloc_entry(&mq->cache_pool);
846 result->op = POLICY_REPLACE; 780 BUG_ON(!e);
847 781
848 } else { 782 } else {
849 e = alloc_entry(mq); 783 e = alloc_entry(&mq->cache_pool);
850 if (unlikely(!e))
851 e = pop(mq, &mq->pre_cache);
852
853 if (unlikely(!e)) {
854 result->op = POLICY_MISS;
855 return;
856 }
857
858 result->op = POLICY_NEW; 784 result->op = POLICY_NEW;
859 } 785 }
860 786
861 e->oblock = oblock; 787 e->oblock = oblock;
862 e->cblock = cblock;
863 e->in_cache = true;
864 e->dirty = false; 788 e->dirty = false;
865 e->hit_count = 1; 789 e->hit_count = 1;
866 e->generation = mq->generation; 790 e->generation = mq->generation;
867 push(mq, e); 791 push(mq, e);
868 792
869 result->cblock = e->cblock; 793 result->cblock = infer_cblock(&mq->cache_pool, e);
870} 794}
871 795
872static int no_entry_found(struct mq_policy *mq, dm_oblock_t oblock, 796static int no_entry_found(struct mq_policy *mq, dm_oblock_t oblock,
@@ -897,13 +821,16 @@ static int map(struct mq_policy *mq, dm_oblock_t oblock,
897 int r = 0; 821 int r = 0;
898 struct entry *e = hash_lookup(mq, oblock); 822 struct entry *e = hash_lookup(mq, oblock);
899 823
900 if (e && e->in_cache) 824 if (e && in_cache(mq, e))
901 r = cache_entry_found(mq, e, result); 825 r = cache_entry_found(mq, e, result);
826
902 else if (iot_pattern(&mq->tracker) == PATTERN_SEQUENTIAL) 827 else if (iot_pattern(&mq->tracker) == PATTERN_SEQUENTIAL)
903 result->op = POLICY_MISS; 828 result->op = POLICY_MISS;
829
904 else if (e) 830 else if (e)
905 r = pre_cache_entry_found(mq, e, can_migrate, discarded_oblock, 831 r = pre_cache_entry_found(mq, e, can_migrate, discarded_oblock,
906 data_dir, result); 832 data_dir, result);
833
907 else 834 else
908 r = no_entry_found(mq, oblock, can_migrate, discarded_oblock, 835 r = no_entry_found(mq, oblock, can_migrate, discarded_oblock,
909 data_dir, result); 836 data_dir, result);
@@ -930,9 +857,9 @@ static void mq_destroy(struct dm_cache_policy *p)
930{ 857{
931 struct mq_policy *mq = to_mq_policy(p); 858 struct mq_policy *mq = to_mq_policy(p);
932 859
933 free_bitset(mq->allocation_bitset);
934 kfree(mq->table); 860 kfree(mq->table);
935 free_entries(mq); 861 epool_exit(&mq->cache_pool);
862 epool_exit(&mq->pre_cache_pool);
936 kfree(mq); 863 kfree(mq);
937} 864}
938 865
@@ -980,8 +907,8 @@ static int mq_lookup(struct dm_cache_policy *p, dm_oblock_t oblock, dm_cblock_t
980 return -EWOULDBLOCK; 907 return -EWOULDBLOCK;
981 908
982 e = hash_lookup(mq, oblock); 909 e = hash_lookup(mq, oblock);
983 if (e && e->in_cache) { 910 if (e && in_cache(mq, e)) {
984 *cblock = e->cblock; 911 *cblock = infer_cblock(&mq->cache_pool, e);
985 r = 0; 912 r = 0;
986 } else 913 } else
987 r = -ENOENT; 914 r = -ENOENT;
@@ -991,38 +918,34 @@ static int mq_lookup(struct dm_cache_policy *p, dm_oblock_t oblock, dm_cblock_t
991 return r; 918 return r;
992} 919}
993 920
994/* 921static void __mq_set_clear_dirty(struct mq_policy *mq, dm_oblock_t oblock, bool set)
995 * FIXME: __mq_set_clear_dirty can block due to mutex.
996 * Ideally a policy should not block in functions called
997 * from the map() function. Explore using RCU.
998 */
999static void __mq_set_clear_dirty(struct dm_cache_policy *p, dm_oblock_t oblock, bool set)
1000{ 922{
1001 struct mq_policy *mq = to_mq_policy(p);
1002 struct entry *e; 923 struct entry *e;
1003 924
1004 mutex_lock(&mq->lock);
1005 e = hash_lookup(mq, oblock); 925 e = hash_lookup(mq, oblock);
1006 if (!e) 926 BUG_ON(!e || !in_cache(mq, e));
1007 DMWARN("__mq_set_clear_dirty called for a block that isn't in the cache");
1008 else {
1009 BUG_ON(!e->in_cache);
1010 927
1011 del(mq, e); 928 del(mq, e);
1012 e->dirty = set; 929 e->dirty = set;
1013 push(mq, e); 930 push(mq, e);
1014 }
1015 mutex_unlock(&mq->lock);
1016} 931}
1017 932
1018static void mq_set_dirty(struct dm_cache_policy *p, dm_oblock_t oblock) 933static void mq_set_dirty(struct dm_cache_policy *p, dm_oblock_t oblock)
1019{ 934{
1020 __mq_set_clear_dirty(p, oblock, true); 935 struct mq_policy *mq = to_mq_policy(p);
936
937 mutex_lock(&mq->lock);
938 __mq_set_clear_dirty(mq, oblock, true);
939 mutex_unlock(&mq->lock);
1021} 940}
1022 941
1023static void mq_clear_dirty(struct dm_cache_policy *p, dm_oblock_t oblock) 942static void mq_clear_dirty(struct dm_cache_policy *p, dm_oblock_t oblock)
1024{ 943{
1025 __mq_set_clear_dirty(p, oblock, false); 944 struct mq_policy *mq = to_mq_policy(p);
945
946 mutex_lock(&mq->lock);
947 __mq_set_clear_dirty(mq, oblock, false);
948 mutex_unlock(&mq->lock);
1026} 949}
1027 950
1028static int mq_load_mapping(struct dm_cache_policy *p, 951static int mq_load_mapping(struct dm_cache_policy *p,
@@ -1032,13 +955,8 @@ static int mq_load_mapping(struct dm_cache_policy *p,
1032 struct mq_policy *mq = to_mq_policy(p); 955 struct mq_policy *mq = to_mq_policy(p);
1033 struct entry *e; 956 struct entry *e;
1034 957
1035 e = alloc_entry(mq); 958 e = alloc_particular_entry(&mq->cache_pool, cblock);
1036 if (!e)
1037 return -ENOMEM;
1038
1039 e->cblock = cblock;
1040 e->oblock = oblock; 959 e->oblock = oblock;
1041 e->in_cache = true;
1042 e->dirty = false; /* this gets corrected in a minute */ 960 e->dirty = false; /* this gets corrected in a minute */
1043 e->hit_count = hint_valid ? hint : 1; 961 e->hit_count = hint_valid ? hint : 1;
1044 e->generation = mq->generation; 962 e->generation = mq->generation;
@@ -1047,52 +965,58 @@ static int mq_load_mapping(struct dm_cache_policy *p,
1047 return 0; 965 return 0;
1048} 966}
1049 967
968static int mq_save_hints(struct mq_policy *mq, struct queue *q,
969 policy_walk_fn fn, void *context)
970{
971 int r;
972 unsigned level;
973 struct entry *e;
974
975 for (level = 0; level < NR_QUEUE_LEVELS; level++)
976 list_for_each_entry(e, q->qs + level, list) {
977 r = fn(context, infer_cblock(&mq->cache_pool, e),
978 e->oblock, e->hit_count);
979 if (r)
980 return r;
981 }
982
983 return 0;
984}
985
1050static int mq_walk_mappings(struct dm_cache_policy *p, policy_walk_fn fn, 986static int mq_walk_mappings(struct dm_cache_policy *p, policy_walk_fn fn,
1051 void *context) 987 void *context)
1052{ 988{
1053 struct mq_policy *mq = to_mq_policy(p); 989 struct mq_policy *mq = to_mq_policy(p);
1054 int r = 0; 990 int r = 0;
1055 struct entry *e;
1056 unsigned level;
1057 991
1058 mutex_lock(&mq->lock); 992 mutex_lock(&mq->lock);
1059 993
1060 for (level = 0; level < NR_QUEUE_LEVELS; level++) 994 r = mq_save_hints(mq, &mq->cache_clean, fn, context);
1061 list_for_each_entry(e, &mq->cache_clean.qs[level], list) { 995 if (!r)
1062 r = fn(context, e->cblock, e->oblock, e->hit_count); 996 r = mq_save_hints(mq, &mq->cache_dirty, fn, context);
1063 if (r)
1064 goto out;
1065 }
1066
1067 for (level = 0; level < NR_QUEUE_LEVELS; level++)
1068 list_for_each_entry(e, &mq->cache_dirty.qs[level], list) {
1069 r = fn(context, e->cblock, e->oblock, e->hit_count);
1070 if (r)
1071 goto out;
1072 }
1073 997
1074out:
1075 mutex_unlock(&mq->lock); 998 mutex_unlock(&mq->lock);
1076 999
1077 return r; 1000 return r;
1078} 1001}
1079 1002
1080static void mq_remove_mapping(struct dm_cache_policy *p, dm_oblock_t oblock) 1003static void __remove_mapping(struct mq_policy *mq, dm_oblock_t oblock)
1081{ 1004{
1082 struct mq_policy *mq = to_mq_policy(p);
1083 struct entry *e; 1005 struct entry *e;
1084 1006
1085 mutex_lock(&mq->lock);
1086
1087 e = hash_lookup(mq, oblock); 1007 e = hash_lookup(mq, oblock);
1088 1008 BUG_ON(!e || !in_cache(mq, e));
1089 BUG_ON(!e || !e->in_cache);
1090 1009
1091 del(mq, e); 1010 del(mq, e);
1092 e->in_cache = false; 1011 free_entry(&mq->cache_pool, e);
1093 e->dirty = false; 1012}
1094 push(mq, e); 1013
1014static void mq_remove_mapping(struct dm_cache_policy *p, dm_oblock_t oblock)
1015{
1016 struct mq_policy *mq = to_mq_policy(p);
1095 1017
1018 mutex_lock(&mq->lock);
1019 __remove_mapping(mq, oblock);
1096 mutex_unlock(&mq->lock); 1020 mutex_unlock(&mq->lock);
1097} 1021}
1098 1022
@@ -1105,7 +1029,7 @@ static int __mq_writeback_work(struct mq_policy *mq, dm_oblock_t *oblock,
1105 return -ENODATA; 1029 return -ENODATA;
1106 1030
1107 *oblock = e->oblock; 1031 *oblock = e->oblock;
1108 *cblock = e->cblock; 1032 *cblock = infer_cblock(&mq->cache_pool, e);
1109 e->dirty = false; 1033 e->dirty = false;
1110 push(mq, e); 1034 push(mq, e);
1111 1035
@@ -1125,17 +1049,17 @@ static int mq_writeback_work(struct dm_cache_policy *p, dm_oblock_t *oblock,
1125 return r; 1049 return r;
1126} 1050}
1127 1051
1128static void force_mapping(struct mq_policy *mq, 1052static void __force_mapping(struct mq_policy *mq,
1129 dm_oblock_t current_oblock, dm_oblock_t new_oblock) 1053 dm_oblock_t current_oblock, dm_oblock_t new_oblock)
1130{ 1054{
1131 struct entry *e = hash_lookup(mq, current_oblock); 1055 struct entry *e = hash_lookup(mq, current_oblock);
1132 1056
1133 BUG_ON(!e || !e->in_cache); 1057 if (e && in_cache(mq, e)) {
1134 1058 del(mq, e);
1135 del(mq, e); 1059 e->oblock = new_oblock;
1136 e->oblock = new_oblock; 1060 e->dirty = true;
1137 e->dirty = true; 1061 push(mq, e);
1138 push(mq, e); 1062 }
1139} 1063}
1140 1064
1141static void mq_force_mapping(struct dm_cache_policy *p, 1065static void mq_force_mapping(struct dm_cache_policy *p,
@@ -1144,7 +1068,7 @@ static void mq_force_mapping(struct dm_cache_policy *p,
1144 struct mq_policy *mq = to_mq_policy(p); 1068 struct mq_policy *mq = to_mq_policy(p);
1145 1069
1146 mutex_lock(&mq->lock); 1070 mutex_lock(&mq->lock);
1147 force_mapping(mq, current_oblock, new_oblock); 1071 __force_mapping(mq, current_oblock, new_oblock);
1148 mutex_unlock(&mq->lock); 1072 mutex_unlock(&mq->lock);
1149} 1073}
1150 1074
@@ -1154,7 +1078,7 @@ static dm_cblock_t mq_residency(struct dm_cache_policy *p)
1154 struct mq_policy *mq = to_mq_policy(p); 1078 struct mq_policy *mq = to_mq_policy(p);
1155 1079
1156 mutex_lock(&mq->lock); 1080 mutex_lock(&mq->lock);
1157 r = to_cblock(mq->nr_cblocks_allocated); 1081 r = to_cblock(mq->cache_pool.nr_allocated);
1158 mutex_unlock(&mq->lock); 1082 mutex_unlock(&mq->lock);
1159 1083
1160 return r; 1084 return r;
@@ -1227,7 +1151,6 @@ static struct dm_cache_policy *mq_create(dm_cblock_t cache_size,
1227 sector_t origin_size, 1151 sector_t origin_size,
1228 sector_t cache_block_size) 1152 sector_t cache_block_size)
1229{ 1153{
1230 int r;
1231 struct mq_policy *mq = kzalloc(sizeof(*mq), GFP_KERNEL); 1154 struct mq_policy *mq = kzalloc(sizeof(*mq), GFP_KERNEL);
1232 1155
1233 if (!mq) 1156 if (!mq)
@@ -1235,8 +1158,18 @@ static struct dm_cache_policy *mq_create(dm_cblock_t cache_size,
1235 1158
1236 init_policy_functions(mq); 1159 init_policy_functions(mq);
1237 iot_init(&mq->tracker, SEQUENTIAL_THRESHOLD_DEFAULT, RANDOM_THRESHOLD_DEFAULT); 1160 iot_init(&mq->tracker, SEQUENTIAL_THRESHOLD_DEFAULT, RANDOM_THRESHOLD_DEFAULT);
1238
1239 mq->cache_size = cache_size; 1161 mq->cache_size = cache_size;
1162
1163 if (epool_init(&mq->pre_cache_pool, from_cblock(cache_size))) {
1164 DMERR("couldn't initialize pool of pre-cache entries");
1165 goto bad_pre_cache_init;
1166 }
1167
1168 if (epool_init(&mq->cache_pool, from_cblock(cache_size))) {
1169 DMERR("couldn't initialize pool of cache entries");
1170 goto bad_cache_init;
1171 }
1172
1240 mq->tick_protected = 0; 1173 mq->tick_protected = 0;
1241 mq->tick = 0; 1174 mq->tick = 0;
1242 mq->hit_count = 0; 1175 mq->hit_count = 0;
@@ -1244,8 +1177,6 @@ static struct dm_cache_policy *mq_create(dm_cblock_t cache_size,
1244 mq->promote_threshold = 0; 1177 mq->promote_threshold = 0;
1245 mutex_init(&mq->lock); 1178 mutex_init(&mq->lock);
1246 spin_lock_init(&mq->tick_lock); 1179 spin_lock_init(&mq->tick_lock);
1247 mq->find_free_nr_words = dm_div_up(from_cblock(mq->cache_size), BITS_PER_LONG);
1248 mq->find_free_last_word = 0;
1249 1180
1250 queue_init(&mq->pre_cache); 1181 queue_init(&mq->pre_cache);
1251 queue_init(&mq->cache_clean); 1182 queue_init(&mq->cache_clean);
@@ -1253,31 +1184,19 @@ static struct dm_cache_policy *mq_create(dm_cblock_t cache_size,
1253 1184
1254 mq->generation_period = max((unsigned) from_cblock(cache_size), 1024U); 1185 mq->generation_period = max((unsigned) from_cblock(cache_size), 1024U);
1255 1186
1256 mq->nr_entries = 2 * from_cblock(cache_size);
1257 r = alloc_entries(mq, mq->nr_entries);
1258 if (r)
1259 goto bad_cache_alloc;
1260
1261 mq->nr_entries_allocated = 0;
1262 mq->nr_cblocks_allocated = 0;
1263
1264 mq->nr_buckets = next_power(from_cblock(cache_size) / 2, 16); 1187 mq->nr_buckets = next_power(from_cblock(cache_size) / 2, 16);
1265 mq->hash_bits = ffs(mq->nr_buckets) - 1; 1188 mq->hash_bits = ffs(mq->nr_buckets) - 1;
1266 mq->table = kzalloc(sizeof(*mq->table) * mq->nr_buckets, GFP_KERNEL); 1189 mq->table = kzalloc(sizeof(*mq->table) * mq->nr_buckets, GFP_KERNEL);
1267 if (!mq->table) 1190 if (!mq->table)
1268 goto bad_alloc_table; 1191 goto bad_alloc_table;
1269 1192
1270 mq->allocation_bitset = alloc_bitset(from_cblock(cache_size));
1271 if (!mq->allocation_bitset)
1272 goto bad_alloc_bitset;
1273
1274 return &mq->policy; 1193 return &mq->policy;
1275 1194
1276bad_alloc_bitset:
1277 kfree(mq->table);
1278bad_alloc_table: 1195bad_alloc_table:
1279 free_entries(mq); 1196 epool_exit(&mq->cache_pool);
1280bad_cache_alloc: 1197bad_cache_init:
1198 epool_exit(&mq->pre_cache_pool);
1199bad_pre_cache_init:
1281 kfree(mq); 1200 kfree(mq);
1282 1201
1283 return NULL; 1202 return NULL;
@@ -1287,7 +1206,7 @@ bad_cache_alloc:
1287 1206
1288static struct dm_cache_policy_type mq_policy_type = { 1207static struct dm_cache_policy_type mq_policy_type = {
1289 .name = "mq", 1208 .name = "mq",
1290 .version = {1, 0, 0}, 1209 .version = {1, 1, 0},
1291 .hint_size = 4, 1210 .hint_size = 4,
1292 .owner = THIS_MODULE, 1211 .owner = THIS_MODULE,
1293 .create = mq_create 1212 .create = mq_create
@@ -1295,7 +1214,7 @@ static struct dm_cache_policy_type mq_policy_type = {
1295 1214
1296static struct dm_cache_policy_type default_policy_type = { 1215static struct dm_cache_policy_type default_policy_type = {
1297 .name = "default", 1216 .name = "default",
1298 .version = {1, 0, 0}, 1217 .version = {1, 1, 0},
1299 .hint_size = 4, 1218 .hint_size = 4,
1300 .owner = THIS_MODULE, 1219 .owner = THIS_MODULE,
1301 .create = mq_create 1220 .create = mq_create