summaryrefslogtreecommitdiffstats
path: root/drivers/md/dm-cache-policy-smq.c
diff options
context:
space:
mode:
authorJoe Thornber <ejt@redhat.com>2016-12-15 04:57:31 -0500
committerMike Snitzer <snitzer@redhat.com>2017-03-07 13:28:31 -0500
commitb29d4986d0da1a27cd35917cdb433672f5c95d7f (patch)
treea5d94b86cf1eb759bfef5761015135d747e80561 /drivers/md/dm-cache-policy-smq.c
parent742c8fdc31e820503f9267070311d894978d1349 (diff)
dm cache: significant rework to leverage dm-bio-prison-v2
The cache policy interfaces have been updated to work well with the new bio-prison v2 interface's ability to queue work immediately (promotion, demotion, etc) -- overriding benefit being reduced latency on processing IO through the cache. Previously such work would be left for the DM cache core to queue on various lists and then process in batches later -- this caused a serious delay in latency for IO driven by the cache. The background tracker code was factored out so that all cache policies can make use of it. Also, the "cleaner" policy has been removed and is now a variant of the smq policy that simply disallows migrations. Signed-off-by: Joe Thornber <ejt@redhat.com> Signed-off-by: Mike Snitzer <snitzer@redhat.com>
Diffstat (limited to 'drivers/md/dm-cache-policy-smq.c')
-rw-r--r--drivers/md/dm-cache-policy-smq.c821
1 files changed, 461 insertions, 360 deletions
diff --git a/drivers/md/dm-cache-policy-smq.c b/drivers/md/dm-cache-policy-smq.c
index f19c6930a67c..74436dc2122f 100644
--- a/drivers/md/dm-cache-policy-smq.c
+++ b/drivers/md/dm-cache-policy-smq.c
@@ -4,8 +4,9 @@
4 * This file is released under the GPL. 4 * This file is released under the GPL.
5 */ 5 */
6 6
7#include "dm-cache-policy.h" 7#include "dm-cache-background-tracker.h"
8#include "dm-cache-policy-internal.h" 8#include "dm-cache-policy-internal.h"
9#include "dm-cache-policy.h"
9#include "dm.h" 10#include "dm.h"
10 11
11#include <linux/hash.h> 12#include <linux/hash.h>
@@ -38,10 +39,11 @@ struct entry {
38 unsigned hash_next:28; 39 unsigned hash_next:28;
39 unsigned prev:28; 40 unsigned prev:28;
40 unsigned next:28; 41 unsigned next:28;
41 unsigned level:7; 42 unsigned level:6;
42 bool dirty:1; 43 bool dirty:1;
43 bool allocated:1; 44 bool allocated:1;
44 bool sentinel:1; 45 bool sentinel:1;
46 bool pending_work:1;
45 47
46 dm_oblock_t oblock; 48 dm_oblock_t oblock;
47}; 49};
@@ -279,14 +281,28 @@ static unsigned q_size(struct queue *q)
279 */ 281 */
280static void q_push(struct queue *q, struct entry *e) 282static void q_push(struct queue *q, struct entry *e)
281{ 283{
284 BUG_ON(e->pending_work);
285
282 if (!e->sentinel) 286 if (!e->sentinel)
283 q->nr_elts++; 287 q->nr_elts++;
284 288
285 l_add_tail(q->es, q->qs + e->level, e); 289 l_add_tail(q->es, q->qs + e->level, e);
286} 290}
287 291
292static void q_push_front(struct queue *q, struct entry *e)
293{
294 BUG_ON(e->pending_work);
295
296 if (!e->sentinel)
297 q->nr_elts++;
298
299 l_add_head(q->es, q->qs + e->level, e);
300}
301
288static void q_push_before(struct queue *q, struct entry *old, struct entry *e) 302static void q_push_before(struct queue *q, struct entry *old, struct entry *e)
289{ 303{
304 BUG_ON(e->pending_work);
305
290 if (!e->sentinel) 306 if (!e->sentinel)
291 q->nr_elts++; 307 q->nr_elts++;
292 308
@@ -336,19 +352,6 @@ static struct entry *q_pop(struct queue *q)
336} 352}
337 353
338/* 354/*
339 * Pops an entry from a level that is not past a sentinel.
340 */
341static struct entry *q_pop_old(struct queue *q, unsigned max_level)
342{
343 struct entry *e = q_peek(q, max_level, false);
344
345 if (e)
346 q_del(q, e);
347
348 return e;
349}
350
351/*
352 * This function assumes there is a non-sentinel entry to pop. It's only 355 * This function assumes there is a non-sentinel entry to pop. It's only
353 * used by redistribute, so we know this is true. It also doesn't adjust 356 * used by redistribute, so we know this is true. It also doesn't adjust
354 * the q->nr_elts count. 357 * the q->nr_elts count.
@@ -446,45 +449,49 @@ static void q_redistribute(struct queue *q)
446 break; 449 break;
447 450
448 e->level = level + 1u; 451 e->level = level + 1u;
449 l_add_head(q->es, l_above, e); 452 l_add_tail(q->es, l_above, e);
450 } 453 }
451 } 454 }
452} 455}
453 456
454static void q_requeue_before(struct queue *q, struct entry *dest, struct entry *e, unsigned extra_levels) 457static void q_requeue(struct queue *q, struct entry *e, unsigned extra_levels,
458 struct entry *s1, struct entry *s2)
455{ 459{
456 struct entry *de; 460 struct entry *de;
457 unsigned new_level; 461 unsigned sentinels_passed = 0;
458 462 unsigned new_level = min(q->nr_levels - 1u, e->level + extra_levels);
459 q_del(q, e);
460 463
464 /* try and find an entry to swap with */
461 if (extra_levels && (e->level < q->nr_levels - 1u)) { 465 if (extra_levels && (e->level < q->nr_levels - 1u)) {
462 new_level = min(q->nr_levels - 1u, e->level + extra_levels); 466 for (de = l_head(q->es, q->qs + new_level); de && de->sentinel; de = l_next(q->es, de))
463 for (de = l_head(q->es, q->qs + new_level); de; de = l_next(q->es, de)) { 467 sentinels_passed++;
464 if (de->sentinel)
465 continue;
466 468
469 if (de) {
467 q_del(q, de); 470 q_del(q, de);
468 de->level = e->level; 471 de->level = e->level;
472 if (s1) {
473 switch (sentinels_passed) {
474 case 0:
475 q_push_before(q, s1, de);
476 break;
477
478 case 1:
479 q_push_before(q, s2, de);
480 break;
469 481
470 if (dest) 482 default:
471 q_push_before(q, dest, de); 483 q_push(q, de);
472 else 484 }
485 } else
473 q_push(q, de); 486 q_push(q, de);
474 break;
475 } 487 }
476
477 e->level = new_level;
478 } 488 }
479 489
490 q_del(q, e);
491 e->level = new_level;
480 q_push(q, e); 492 q_push(q, e);
481} 493}
482 494
483static void q_requeue(struct queue *q, struct entry *e, unsigned extra_levels)
484{
485 q_requeue_before(q, NULL, e, extra_levels);
486}
487
488/*----------------------------------------------------------------*/ 495/*----------------------------------------------------------------*/
489 496
490#define FP_SHIFT 8 497#define FP_SHIFT 8
@@ -550,7 +557,7 @@ static enum performance stats_assess(struct stats *s)
550 557
551/*----------------------------------------------------------------*/ 558/*----------------------------------------------------------------*/
552 559
553struct hash_table { 560struct smq_hash_table {
554 struct entry_space *es; 561 struct entry_space *es;
555 unsigned long long hash_bits; 562 unsigned long long hash_bits;
556 unsigned *buckets; 563 unsigned *buckets;
@@ -560,7 +567,7 @@ struct hash_table {
560 * All cache entries are stored in a chained hash table. To save space we 567 * All cache entries are stored in a chained hash table. To save space we
561 * use indexing again, and only store indexes to the next entry. 568 * use indexing again, and only store indexes to the next entry.
562 */ 569 */
563static int h_init(struct hash_table *ht, struct entry_space *es, unsigned nr_entries) 570static int h_init(struct smq_hash_table *ht, struct entry_space *es, unsigned nr_entries)
564{ 571{
565 unsigned i, nr_buckets; 572 unsigned i, nr_buckets;
566 573
@@ -578,34 +585,34 @@ static int h_init(struct hash_table *ht, struct entry_space *es, unsigned nr_ent
578 return 0; 585 return 0;
579} 586}
580 587
581static void h_exit(struct hash_table *ht) 588static void h_exit(struct smq_hash_table *ht)
582{ 589{
583 vfree(ht->buckets); 590 vfree(ht->buckets);
584} 591}
585 592
586static struct entry *h_head(struct hash_table *ht, unsigned bucket) 593static struct entry *h_head(struct smq_hash_table *ht, unsigned bucket)
587{ 594{
588 return to_entry(ht->es, ht->buckets[bucket]); 595 return to_entry(ht->es, ht->buckets[bucket]);
589} 596}
590 597
591static struct entry *h_next(struct hash_table *ht, struct entry *e) 598static struct entry *h_next(struct smq_hash_table *ht, struct entry *e)
592{ 599{
593 return to_entry(ht->es, e->hash_next); 600 return to_entry(ht->es, e->hash_next);
594} 601}
595 602
596static void __h_insert(struct hash_table *ht, unsigned bucket, struct entry *e) 603static void __h_insert(struct smq_hash_table *ht, unsigned bucket, struct entry *e)
597{ 604{
598 e->hash_next = ht->buckets[bucket]; 605 e->hash_next = ht->buckets[bucket];
599 ht->buckets[bucket] = to_index(ht->es, e); 606 ht->buckets[bucket] = to_index(ht->es, e);
600} 607}
601 608
602static void h_insert(struct hash_table *ht, struct entry *e) 609static void h_insert(struct smq_hash_table *ht, struct entry *e)
603{ 610{
604 unsigned h = hash_64(from_oblock(e->oblock), ht->hash_bits); 611 unsigned h = hash_64(from_oblock(e->oblock), ht->hash_bits);
605 __h_insert(ht, h, e); 612 __h_insert(ht, h, e);
606} 613}
607 614
608static struct entry *__h_lookup(struct hash_table *ht, unsigned h, dm_oblock_t oblock, 615static struct entry *__h_lookup(struct smq_hash_table *ht, unsigned h, dm_oblock_t oblock,
609 struct entry **prev) 616 struct entry **prev)
610{ 617{
611 struct entry *e; 618 struct entry *e;
@@ -621,7 +628,7 @@ static struct entry *__h_lookup(struct hash_table *ht, unsigned h, dm_oblock_t o
621 return NULL; 628 return NULL;
622} 629}
623 630
624static void __h_unlink(struct hash_table *ht, unsigned h, 631static void __h_unlink(struct smq_hash_table *ht, unsigned h,
625 struct entry *e, struct entry *prev) 632 struct entry *e, struct entry *prev)
626{ 633{
627 if (prev) 634 if (prev)
@@ -633,7 +640,7 @@ static void __h_unlink(struct hash_table *ht, unsigned h,
633/* 640/*
634 * Also moves each entry to the front of the bucket. 641 * Also moves each entry to the front of the bucket.
635 */ 642 */
636static struct entry *h_lookup(struct hash_table *ht, dm_oblock_t oblock) 643static struct entry *h_lookup(struct smq_hash_table *ht, dm_oblock_t oblock)
637{ 644{
638 struct entry *e, *prev; 645 struct entry *e, *prev;
639 unsigned h = hash_64(from_oblock(oblock), ht->hash_bits); 646 unsigned h = hash_64(from_oblock(oblock), ht->hash_bits);
@@ -651,7 +658,7 @@ static struct entry *h_lookup(struct hash_table *ht, dm_oblock_t oblock)
651 return e; 658 return e;
652} 659}
653 660
654static void h_remove(struct hash_table *ht, struct entry *e) 661static void h_remove(struct smq_hash_table *ht, struct entry *e)
655{ 662{
656 unsigned h = hash_64(from_oblock(e->oblock), ht->hash_bits); 663 unsigned h = hash_64(from_oblock(e->oblock), ht->hash_bits);
657 struct entry *prev; 664 struct entry *prev;
@@ -699,7 +706,10 @@ static void init_entry(struct entry *e)
699 e->next = INDEXER_NULL; 706 e->next = INDEXER_NULL;
700 e->prev = INDEXER_NULL; 707 e->prev = INDEXER_NULL;
701 e->level = 0u; 708 e->level = 0u;
709 e->dirty = true; /* FIXME: audit */
702 e->allocated = true; 710 e->allocated = true;
711 e->sentinel = false;
712 e->pending_work = false;
703} 713}
704 714
705static struct entry *alloc_entry(struct entry_alloc *ea) 715static struct entry *alloc_entry(struct entry_alloc *ea)
@@ -762,11 +772,11 @@ static struct entry *get_entry(struct entry_alloc *ea, unsigned index)
762#define NR_HOTSPOT_LEVELS 64u 772#define NR_HOTSPOT_LEVELS 64u
763#define NR_CACHE_LEVELS 64u 773#define NR_CACHE_LEVELS 64u
764 774
765#define WRITEBACK_PERIOD (10 * HZ) 775#define WRITEBACK_PERIOD (10ul * HZ)
766#define DEMOTE_PERIOD (60 * HZ) 776#define DEMOTE_PERIOD (60ul * HZ)
767 777
768#define HOTSPOT_UPDATE_PERIOD (HZ) 778#define HOTSPOT_UPDATE_PERIOD (HZ)
769#define CACHE_UPDATE_PERIOD (10u * HZ) 779#define CACHE_UPDATE_PERIOD (60ul * HZ)
770 780
771struct smq_policy { 781struct smq_policy {
772 struct dm_cache_policy policy; 782 struct dm_cache_policy policy;
@@ -814,8 +824,8 @@ struct smq_policy {
814 * The hash tables allows us to quickly find an entry by origin 824 * The hash tables allows us to quickly find an entry by origin
815 * block. 825 * block.
816 */ 826 */
817 struct hash_table table; 827 struct smq_hash_table table;
818 struct hash_table hotspot_table; 828 struct smq_hash_table hotspot_table;
819 829
820 bool current_writeback_sentinels; 830 bool current_writeback_sentinels;
821 unsigned long next_writeback_period; 831 unsigned long next_writeback_period;
@@ -828,6 +838,10 @@ struct smq_policy {
828 838
829 unsigned long next_hotspot_period; 839 unsigned long next_hotspot_period;
830 unsigned long next_cache_period; 840 unsigned long next_cache_period;
841
842 struct background_tracker *bg_work;
843
844 bool migrations_allowed;
831}; 845};
832 846
833/*----------------------------------------------------------------*/ 847/*----------------------------------------------------------------*/
@@ -876,15 +890,15 @@ static void __update_demote_sentinels(struct smq_policy *mq)
876static void update_sentinels(struct smq_policy *mq) 890static void update_sentinels(struct smq_policy *mq)
877{ 891{
878 if (time_after(jiffies, mq->next_writeback_period)) { 892 if (time_after(jiffies, mq->next_writeback_period)) {
879 __update_writeback_sentinels(mq);
880 mq->next_writeback_period = jiffies + WRITEBACK_PERIOD; 893 mq->next_writeback_period = jiffies + WRITEBACK_PERIOD;
881 mq->current_writeback_sentinels = !mq->current_writeback_sentinels; 894 mq->current_writeback_sentinels = !mq->current_writeback_sentinels;
895 __update_writeback_sentinels(mq);
882 } 896 }
883 897
884 if (time_after(jiffies, mq->next_demote_period)) { 898 if (time_after(jiffies, mq->next_demote_period)) {
885 __update_demote_sentinels(mq);
886 mq->next_demote_period = jiffies + DEMOTE_PERIOD; 899 mq->next_demote_period = jiffies + DEMOTE_PERIOD;
887 mq->current_demote_sentinels = !mq->current_demote_sentinels; 900 mq->current_demote_sentinels = !mq->current_demote_sentinels;
901 __update_demote_sentinels(mq);
888 } 902 }
889} 903}
890 904
@@ -920,55 +934,40 @@ static void sentinels_init(struct smq_policy *mq)
920 934
921/*----------------------------------------------------------------*/ 935/*----------------------------------------------------------------*/
922 936
923/* 937static void del_queue(struct smq_policy *mq, struct entry *e)
924 * These methods tie together the dirty queue, clean queue and hash table.
925 */
926static void push_new(struct smq_policy *mq, struct entry *e)
927{ 938{
928 struct queue *q = e->dirty ? &mq->dirty : &mq->clean; 939 q_del(e->dirty ? &mq->dirty : &mq->clean, e);
929 h_insert(&mq->table, e);
930 q_push(q, e);
931} 940}
932 941
933static void push(struct smq_policy *mq, struct entry *e) 942static void push_queue(struct smq_policy *mq, struct entry *e)
934{ 943{
935 struct entry *sentinel; 944 if (e->dirty)
936 945 q_push(&mq->dirty, e);
937 h_insert(&mq->table, e); 946 else
938 947 q_push(&mq->clean, e);
939 /*
940 * Punch this into the queue just in front of the sentinel, to
941 * ensure it's cleaned straight away.
942 */
943 if (e->dirty) {
944 sentinel = writeback_sentinel(mq, e->level);
945 q_push_before(&mq->dirty, sentinel, e);
946 } else {
947 sentinel = demote_sentinel(mq, e->level);
948 q_push_before(&mq->clean, sentinel, e);
949 }
950} 948}
951 949
952/* 950// !h, !q, a -> h, q, a
953 * Removes an entry from cache. Removes from the hash table. 951static void push(struct smq_policy *mq, struct entry *e)
954 */
955static void __del(struct smq_policy *mq, struct queue *q, struct entry *e)
956{ 952{
957 q_del(q, e); 953 h_insert(&mq->table, e);
958 h_remove(&mq->table, e); 954 if (!e->pending_work)
955 push_queue(mq, e);
959} 956}
960 957
961static void del(struct smq_policy *mq, struct entry *e) 958static void push_queue_front(struct smq_policy *mq, struct entry *e)
962{ 959{
963 __del(mq, e->dirty ? &mq->dirty : &mq->clean, e); 960 if (e->dirty)
961 q_push_front(&mq->dirty, e);
962 else
963 q_push_front(&mq->clean, e);
964} 964}
965 965
966static struct entry *pop_old(struct smq_policy *mq, struct queue *q, unsigned max_level) 966static void push_front(struct smq_policy *mq, struct entry *e)
967{ 967{
968 struct entry *e = q_pop_old(q, max_level); 968 h_insert(&mq->table, e);
969 if (e) 969 if (!e->pending_work)
970 h_remove(&mq->table, e); 970 push_queue_front(mq, e);
971 return e;
972} 971}
973 972
974static dm_cblock_t infer_cblock(struct smq_policy *mq, struct entry *e) 973static dm_cblock_t infer_cblock(struct smq_policy *mq, struct entry *e)
@@ -978,16 +977,21 @@ static dm_cblock_t infer_cblock(struct smq_policy *mq, struct entry *e)
978 977
979static void requeue(struct smq_policy *mq, struct entry *e) 978static void requeue(struct smq_policy *mq, struct entry *e)
980{ 979{
981 struct entry *sentinel; 980 /*
981 * Pending work has temporarily been taken out of the queues.
982 */
983 if (e->pending_work)
984 return;
982 985
983 if (!test_and_set_bit(from_cblock(infer_cblock(mq, e)), mq->cache_hit_bits)) { 986 if (!test_and_set_bit(from_cblock(infer_cblock(mq, e)), mq->cache_hit_bits)) {
984 if (e->dirty) { 987 if (!e->dirty) {
985 sentinel = writeback_sentinel(mq, e->level); 988 q_requeue(&mq->clean, e, 1u, NULL, NULL);
986 q_requeue_before(&mq->dirty, sentinel, e, 1u); 989 return;
987 } else {
988 sentinel = demote_sentinel(mq, e->level);
989 q_requeue_before(&mq->clean, sentinel, e, 1u);
990 } 990 }
991
992 q_requeue(&mq->dirty, e, 1u,
993 get_sentinel(&mq->writeback_sentinel_alloc, e->level, !mq->current_writeback_sentinels),
994 get_sentinel(&mq->writeback_sentinel_alloc, e->level, mq->current_writeback_sentinels));
991 } 995 }
992} 996}
993 997
@@ -1026,6 +1030,8 @@ static void update_promote_levels(struct smq_policy *mq)
1026 unsigned threshold_level = allocator_empty(&mq->cache_alloc) ? 1030 unsigned threshold_level = allocator_empty(&mq->cache_alloc) ?
1027 default_promote_level(mq) : (NR_HOTSPOT_LEVELS / 2u); 1031 default_promote_level(mq) : (NR_HOTSPOT_LEVELS / 2u);
1028 1032
1033 threshold_level = max(threshold_level, NR_HOTSPOT_LEVELS);
1034
1029 /* 1035 /*
1030 * If the hotspot queue is performing badly then we have little 1036 * If the hotspot queue is performing badly then we have little
1031 * confidence that we know which blocks to promote. So we cut down 1037 * confidence that we know which blocks to promote. So we cut down
@@ -1045,7 +1051,7 @@ static void update_promote_levels(struct smq_policy *mq)
1045 } 1051 }
1046 1052
1047 mq->read_promote_level = NR_HOTSPOT_LEVELS - threshold_level; 1053 mq->read_promote_level = NR_HOTSPOT_LEVELS - threshold_level;
1048 mq->write_promote_level = (NR_HOTSPOT_LEVELS - threshold_level) + 2u; 1054 mq->write_promote_level = (NR_HOTSPOT_LEVELS - threshold_level);
1049} 1055}
1050 1056
1051/* 1057/*
@@ -1095,34 +1101,142 @@ static void end_cache_period(struct smq_policy *mq)
1095 } 1101 }
1096} 1102}
1097 1103
1098static int demote_cblock(struct smq_policy *mq, 1104/*----------------------------------------------------------------*/
1099 struct policy_locker *locker, 1105
1100 dm_oblock_t *oblock) 1106/*
1107 * Targets are given as a percentage.
1108 */
1109#define CLEAN_TARGET 25u
1110#define FREE_TARGET 25u
1111
1112static unsigned percent_to_target(struct smq_policy *mq, unsigned p)
1101{ 1113{
1102 struct entry *demoted = q_peek(&mq->clean, mq->clean.nr_levels, false); 1114 return from_cblock(mq->cache_size) * p / 100u;
1103 if (!demoted) 1115}
1104 /* 1116
1105 * We could get a block from mq->dirty, but that 1117static bool clean_target_met(struct smq_policy *mq, bool idle)
1106 * would add extra latency to the triggering bio as it 1118{
1107 * waits for the writeback. Better to not promote this 1119 /*
1108 * time and hope there's a clean block next time this block 1120 * Cache entries may not be populated. So we cannot rely on the
1109 * is hit. 1121 * size of the clean queue.
1110 */ 1122 */
1111 return -ENOSPC; 1123 unsigned nr_clean = from_cblock(mq->cache_size) - q_size(&mq->dirty);
1112 1124
1113 if (locker->fn(locker, demoted->oblock)) 1125 if (idle)
1114 /* 1126 /*
1115 * We couldn't lock this block. 1127 * We'd like to clean everything.
1116 */ 1128 */
1117 return -EBUSY; 1129 return q_size(&mq->dirty) == 0u;
1130 else
1131 return (nr_clean + btracker_nr_writebacks_queued(mq->bg_work)) >=
1132 percent_to_target(mq, CLEAN_TARGET);
1133}
1118 1134
1119 del(mq, demoted); 1135static bool free_target_met(struct smq_policy *mq, bool idle)
1120 *oblock = demoted->oblock; 1136{
1121 free_entry(&mq->cache_alloc, demoted); 1137 unsigned nr_free = from_cblock(mq->cache_size) -
1138 mq->cache_alloc.nr_allocated;
1122 1139
1123 return 0; 1140 if (idle)
1141 return (nr_free + btracker_nr_demotions_queued(mq->bg_work)) >=
1142 percent_to_target(mq, FREE_TARGET);
1143 else
1144 return true;
1124} 1145}
1125 1146
1147/*----------------------------------------------------------------*/
1148
1149static void mark_pending(struct smq_policy *mq, struct entry *e)
1150{
1151 BUG_ON(e->sentinel);
1152 BUG_ON(!e->allocated);
1153 BUG_ON(e->pending_work);
1154 e->pending_work = true;
1155}
1156
1157static void clear_pending(struct smq_policy *mq, struct entry *e)
1158{
1159 BUG_ON(!e->pending_work);
1160 e->pending_work = false;
1161}
1162
1163static void queue_writeback(struct smq_policy *mq)
1164{
1165 int r;
1166 struct policy_work work;
1167 struct entry *e;
1168
1169 e = q_peek(&mq->dirty, mq->dirty.nr_levels, false);
1170 if (e) {
1171 mark_pending(mq, e);
1172 q_del(&mq->dirty, e);
1173
1174 work.op = POLICY_WRITEBACK;
1175 work.oblock = e->oblock;
1176 work.cblock = infer_cblock(mq, e);
1177
1178 r = btracker_queue(mq->bg_work, &work, NULL);
1179 WARN_ON_ONCE(r); // FIXME: finish, I think we have to get rid of this race.
1180 }
1181}
1182
1183static void queue_demotion(struct smq_policy *mq)
1184{
1185 struct policy_work work;
1186 struct entry *e;
1187
1188 if (unlikely(WARN_ON_ONCE(!mq->migrations_allowed)))
1189 return;
1190
1191 e = q_peek(&mq->clean, mq->clean.nr_levels, true);
1192 if (!e) {
1193 if (!clean_target_met(mq, false))
1194 queue_writeback(mq);
1195 return;
1196 }
1197
1198 mark_pending(mq, e);
1199 q_del(&mq->clean, e);
1200
1201 work.op = POLICY_DEMOTE;
1202 work.oblock = e->oblock;
1203 work.cblock = infer_cblock(mq, e);
1204 btracker_queue(mq->bg_work, &work, NULL);
1205}
1206
1207static void queue_promotion(struct smq_policy *mq, dm_oblock_t oblock,
1208 struct policy_work **workp)
1209{
1210 struct entry *e;
1211 struct policy_work work;
1212
1213 if (!mq->migrations_allowed)
1214 return;
1215
1216 if (allocator_empty(&mq->cache_alloc)) {
1217 if (!free_target_met(mq, false))
1218 queue_demotion(mq);
1219 return;
1220 }
1221
1222 if (btracker_promotion_already_present(mq->bg_work, oblock))
1223 return;
1224
1225 /*
1226 * We allocate the entry now to reserve the cblock. If the
1227 * background work is aborted we must remember to free it.
1228 */
1229 e = alloc_entry(&mq->cache_alloc);
1230 BUG_ON(!e);
1231 e->pending_work = true;
1232 work.op = POLICY_PROMOTE;
1233 work.oblock = oblock;
1234 work.cblock = infer_cblock(mq, e);
1235 btracker_queue(mq->bg_work, &work, workp);
1236}
1237
1238/*----------------------------------------------------------------*/
1239
1126enum promote_result { 1240enum promote_result {
1127 PROMOTE_NOT, 1241 PROMOTE_NOT,
1128 PROMOTE_TEMPORARY, 1242 PROMOTE_TEMPORARY,
@@ -1137,49 +1251,18 @@ static enum promote_result maybe_promote(bool promote)
1137 return promote ? PROMOTE_PERMANENT : PROMOTE_NOT; 1251 return promote ? PROMOTE_PERMANENT : PROMOTE_NOT;
1138} 1252}
1139 1253
1140static enum promote_result should_promote(struct smq_policy *mq, struct entry *hs_e, struct bio *bio, 1254static enum promote_result should_promote(struct smq_policy *mq, struct entry *hs_e,
1141 bool fast_promote) 1255 int data_dir, bool fast_promote)
1142{ 1256{
1143 if (bio_data_dir(bio) == WRITE) { 1257 if (data_dir == WRITE) {
1144 if (!allocator_empty(&mq->cache_alloc) && fast_promote) 1258 if (!allocator_empty(&mq->cache_alloc) && fast_promote)
1145 return PROMOTE_TEMPORARY; 1259 return PROMOTE_TEMPORARY;
1146 1260
1147 else 1261 return maybe_promote(hs_e->level >= mq->write_promote_level);
1148 return maybe_promote(hs_e->level >= mq->write_promote_level);
1149 } else 1262 } else
1150 return maybe_promote(hs_e->level >= mq->read_promote_level); 1263 return maybe_promote(hs_e->level >= mq->read_promote_level);
1151} 1264}
1152 1265
1153static void insert_in_cache(struct smq_policy *mq, dm_oblock_t oblock,
1154 struct policy_locker *locker,
1155 struct policy_result *result, enum promote_result pr)
1156{
1157 int r;
1158 struct entry *e;
1159
1160 if (allocator_empty(&mq->cache_alloc)) {
1161 result->op = POLICY_REPLACE;
1162 r = demote_cblock(mq, locker, &result->old_oblock);
1163 if (r) {
1164 result->op = POLICY_MISS;
1165 return;
1166 }
1167
1168 } else
1169 result->op = POLICY_NEW;
1170
1171 e = alloc_entry(&mq->cache_alloc);
1172 BUG_ON(!e);
1173 e->oblock = oblock;
1174
1175 if (pr == PROMOTE_TEMPORARY)
1176 push(mq, e);
1177 else
1178 push_new(mq, e);
1179
1180 result->cblock = infer_cblock(mq, e);
1181}
1182
1183static dm_oblock_t to_hblock(struct smq_policy *mq, dm_oblock_t b) 1266static dm_oblock_t to_hblock(struct smq_policy *mq, dm_oblock_t b)
1184{ 1267{
1185 sector_t r = from_oblock(b); 1268 sector_t r = from_oblock(b);
@@ -1187,7 +1270,7 @@ static dm_oblock_t to_hblock(struct smq_policy *mq, dm_oblock_t b)
1187 return to_oblock(r); 1270 return to_oblock(r);
1188} 1271}
1189 1272
1190static struct entry *update_hotspot_queue(struct smq_policy *mq, dm_oblock_t b, struct bio *bio) 1273static struct entry *update_hotspot_queue(struct smq_policy *mq, dm_oblock_t b)
1191{ 1274{
1192 unsigned hi; 1275 unsigned hi;
1193 dm_oblock_t hb = to_hblock(mq, b); 1276 dm_oblock_t hb = to_hblock(mq, b);
@@ -1199,7 +1282,8 @@ static struct entry *update_hotspot_queue(struct smq_policy *mq, dm_oblock_t b,
1199 hi = get_index(&mq->hotspot_alloc, e); 1282 hi = get_index(&mq->hotspot_alloc, e);
1200 q_requeue(&mq->hotspot, e, 1283 q_requeue(&mq->hotspot, e,
1201 test_and_set_bit(hi, mq->hotspot_hit_bits) ? 1284 test_and_set_bit(hi, mq->hotspot_hit_bits) ?
1202 0u : mq->hotspot_level_jump); 1285 0u : mq->hotspot_level_jump,
1286 NULL, NULL);
1203 1287
1204 } else { 1288 } else {
1205 stats_miss(&mq->hotspot_stats); 1289 stats_miss(&mq->hotspot_stats);
@@ -1225,47 +1309,6 @@ static struct entry *update_hotspot_queue(struct smq_policy *mq, dm_oblock_t b,
1225 return e; 1309 return e;
1226} 1310}
1227 1311
1228/*
1229 * Looks the oblock up in the hash table, then decides whether to put in
1230 * pre_cache, or cache etc.
1231 */
1232static int map(struct smq_policy *mq, struct bio *bio, dm_oblock_t oblock,
1233 bool can_migrate, bool fast_promote,
1234 struct policy_locker *locker, struct policy_result *result)
1235{
1236 struct entry *e, *hs_e;
1237 enum promote_result pr;
1238
1239 hs_e = update_hotspot_queue(mq, oblock, bio);
1240
1241 e = h_lookup(&mq->table, oblock);
1242 if (e) {
1243 stats_level_accessed(&mq->cache_stats, e->level);
1244
1245 requeue(mq, e);
1246 result->op = POLICY_HIT;
1247 result->cblock = infer_cblock(mq, e);
1248
1249 } else {
1250 stats_miss(&mq->cache_stats);
1251
1252 pr = should_promote(mq, hs_e, bio, fast_promote);
1253 if (pr == PROMOTE_NOT)
1254 result->op = POLICY_MISS;
1255
1256 else {
1257 if (!can_migrate) {
1258 result->op = POLICY_MISS;
1259 return -EWOULDBLOCK;
1260 }
1261
1262 insert_in_cache(mq, oblock, locker, result, pr);
1263 }
1264 }
1265
1266 return 0;
1267}
1268
1269/*----------------------------------------------------------------*/ 1312/*----------------------------------------------------------------*/
1270 1313
1271/* 1314/*
@@ -1282,6 +1325,7 @@ static void smq_destroy(struct dm_cache_policy *p)
1282{ 1325{
1283 struct smq_policy *mq = to_smq_policy(p); 1326 struct smq_policy *mq = to_smq_policy(p);
1284 1327
1328 btracker_destroy(mq->bg_work);
1285 h_exit(&mq->hotspot_table); 1329 h_exit(&mq->hotspot_table);
1286 h_exit(&mq->table); 1330 h_exit(&mq->table);
1287 free_bitset(mq->hotspot_hit_bits); 1331 free_bitset(mq->hotspot_hit_bits);
@@ -1290,234 +1334,247 @@ static void smq_destroy(struct dm_cache_policy *p)
1290 kfree(mq); 1334 kfree(mq);
1291} 1335}
1292 1336
1293static int smq_map(struct dm_cache_policy *p, dm_oblock_t oblock, 1337/*----------------------------------------------------------------*/
1294 bool can_block, bool can_migrate, bool fast_promote,
1295 struct bio *bio, struct policy_locker *locker,
1296 struct policy_result *result)
1297{
1298 int r;
1299 unsigned long flags;
1300 struct smq_policy *mq = to_smq_policy(p);
1301
1302 result->op = POLICY_MISS;
1303
1304 spin_lock_irqsave(&mq->lock, flags);
1305 r = map(mq, bio, oblock, can_migrate, fast_promote, locker, result);
1306 spin_unlock_irqrestore(&mq->lock, flags);
1307
1308 return r;
1309}
1310 1338
1311static int smq_lookup(struct dm_cache_policy *p, dm_oblock_t oblock, dm_cblock_t *cblock) 1339static int __lookup(struct smq_policy *mq, dm_oblock_t oblock, dm_cblock_t *cblock,
1340 int data_dir, bool fast_copy,
1341 struct policy_work **work, bool *background_work)
1312{ 1342{
1313 int r; 1343 struct entry *e, *hs_e;
1314 unsigned long flags; 1344 enum promote_result pr;
1315 struct smq_policy *mq = to_smq_policy(p); 1345
1316 struct entry *e; 1346 *background_work = false;
1317 1347
1318 spin_lock_irqsave(&mq->lock, flags);
1319 e = h_lookup(&mq->table, oblock); 1348 e = h_lookup(&mq->table, oblock);
1320 if (e) { 1349 if (e) {
1350 stats_level_accessed(&mq->cache_stats, e->level);
1351
1352 requeue(mq, e);
1321 *cblock = infer_cblock(mq, e); 1353 *cblock = infer_cblock(mq, e);
1322 r = 0; 1354 return 0;
1323 } else
1324 r = -ENOENT;
1325 spin_unlock_irqrestore(&mq->lock, flags);
1326 1355
1327 return r; 1356 } else {
1328} 1357 stats_miss(&mq->cache_stats);
1329 1358
1330static void __smq_set_clear_dirty(struct smq_policy *mq, dm_oblock_t oblock, bool set) 1359 /*
1331{ 1360 * The hotspot queue only gets updated with misses.
1332 struct entry *e; 1361 */
1362 hs_e = update_hotspot_queue(mq, oblock);
1333 1363
1334 e = h_lookup(&mq->table, oblock); 1364 pr = should_promote(mq, hs_e, data_dir, fast_copy);
1335 BUG_ON(!e); 1365 if (pr != PROMOTE_NOT) {
1366 queue_promotion(mq, oblock, work);
1367 *background_work = true;
1368 }
1336 1369
1337 del(mq, e); 1370 return -ENOENT;
1338 e->dirty = set; 1371 }
1339 push(mq, e);
1340} 1372}
1341 1373
1342static void smq_set_dirty(struct dm_cache_policy *p, dm_oblock_t oblock) 1374static int smq_lookup(struct dm_cache_policy *p, dm_oblock_t oblock, dm_cblock_t *cblock,
1375 int data_dir, bool fast_copy,
1376 bool *background_work)
1343{ 1377{
1378 int r;
1344 unsigned long flags; 1379 unsigned long flags;
1345 struct smq_policy *mq = to_smq_policy(p); 1380 struct smq_policy *mq = to_smq_policy(p);
1346 1381
1347 spin_lock_irqsave(&mq->lock, flags); 1382 spin_lock_irqsave(&mq->lock, flags);
1348 __smq_set_clear_dirty(mq, oblock, true); 1383 r = __lookup(mq, oblock, cblock,
1384 data_dir, fast_copy,
1385 NULL, background_work);
1349 spin_unlock_irqrestore(&mq->lock, flags); 1386 spin_unlock_irqrestore(&mq->lock, flags);
1387
1388 return r;
1350} 1389}
1351 1390
1352static void smq_clear_dirty(struct dm_cache_policy *p, dm_oblock_t oblock) 1391static int smq_lookup_with_work(struct dm_cache_policy *p,
1392 dm_oblock_t oblock, dm_cblock_t *cblock,
1393 int data_dir, bool fast_copy,
1394 struct policy_work **work)
1353{ 1395{
1354 struct smq_policy *mq = to_smq_policy(p); 1396 int r;
1397 bool background_queued;
1355 unsigned long flags; 1398 unsigned long flags;
1399 struct smq_policy *mq = to_smq_policy(p);
1356 1400
1357 spin_lock_irqsave(&mq->lock, flags); 1401 spin_lock_irqsave(&mq->lock, flags);
1358 __smq_set_clear_dirty(mq, oblock, false); 1402 r = __lookup(mq, oblock, cblock, data_dir, fast_copy, work, &background_queued);
1359 spin_unlock_irqrestore(&mq->lock, flags); 1403 spin_unlock_irqrestore(&mq->lock, flags);
1360}
1361 1404
1362static unsigned random_level(dm_cblock_t cblock) 1405 return r;
1363{
1364 return hash_32(from_cblock(cblock), 9) & (NR_CACHE_LEVELS - 1);
1365} 1406}
1366 1407
1367static int smq_load_mapping(struct dm_cache_policy *p, 1408static int smq_get_background_work(struct dm_cache_policy *p, bool idle,
1368 dm_oblock_t oblock, dm_cblock_t cblock, 1409 struct policy_work **result)
1369 uint32_t hint, bool hint_valid)
1370{ 1410{
1411 int r;
1412 unsigned long flags;
1371 struct smq_policy *mq = to_smq_policy(p); 1413 struct smq_policy *mq = to_smq_policy(p);
1372 struct entry *e;
1373 1414
1374 e = alloc_particular_entry(&mq->cache_alloc, from_cblock(cblock)); 1415 spin_lock_irqsave(&mq->lock, flags);
1375 e->oblock = oblock; 1416 r = btracker_issue(mq->bg_work, result);
1376 e->dirty = false; /* this gets corrected in a minute */ 1417 if (r == -ENODATA) {
1377 e->level = hint_valid ? min(hint, NR_CACHE_LEVELS - 1) : random_level(cblock); 1418 /* find some writeback work to do */
1378 push(mq, e); 1419 if (mq->migrations_allowed && !free_target_met(mq, idle))
1379 1420 queue_demotion(mq);
1380 return 0;
1381}
1382 1421
1383static uint32_t smq_get_hint(struct dm_cache_policy *p, dm_cblock_t cblock) 1422 else if (!clean_target_met(mq, idle))
1384{ 1423 queue_writeback(mq);
1385 struct smq_policy *mq = to_smq_policy(p);
1386 struct entry *e = get_entry(&mq->cache_alloc, from_cblock(cblock));
1387 1424
1388 if (!e->allocated) 1425 r = btracker_issue(mq->bg_work, result);
1389 return 0; 1426 }
1427 spin_unlock_irqrestore(&mq->lock, flags);
1390 1428
1391 return e->level; 1429 return r;
1392} 1430}
1393 1431
1394static void __remove_mapping(struct smq_policy *mq, dm_oblock_t oblock) 1432/*
1395{ 1433 * We need to clear any pending work flags that have been set, and in the
1396 struct entry *e; 1434 * case of promotion free the entry for the destination cblock.
1435 */
1436static void __complete_background_work(struct smq_policy *mq,
1437 struct policy_work *work,
1438 bool success)
1439{
1440 struct entry *e = get_entry(&mq->cache_alloc,
1441 from_cblock(work->cblock));
1442
1443 switch (work->op) {
1444 case POLICY_PROMOTE:
1445 // !h, !q, a
1446 clear_pending(mq, e);
1447 if (success) {
1448 e->oblock = work->oblock;
1449 push(mq, e);
1450 // h, q, a
1451 } else {
1452 free_entry(&mq->cache_alloc, e);
1453 // !h, !q, !a
1454 }
1455 break;
1397 1456
1398 e = h_lookup(&mq->table, oblock); 1457 case POLICY_DEMOTE:
1399 BUG_ON(!e); 1458 // h, !q, a
1459 if (success) {
1460 h_remove(&mq->table, e);
1461 free_entry(&mq->cache_alloc, e);
1462 // !h, !q, !a
1463 } else {
1464 clear_pending(mq, e);
1465 push_queue(mq, e);
1466 // h, q, a
1467 }
1468 break;
1400 1469
1401 del(mq, e); 1470 case POLICY_WRITEBACK:
1402 free_entry(&mq->cache_alloc, e); 1471 // h, !q, a
1472 clear_pending(mq, e);
1473 push_queue(mq, e);
1474 // h, q, a
1475 break;
1476 }
1477
1478 btracker_complete(mq->bg_work, work);
1403} 1479}
1404 1480
1405static void smq_remove_mapping(struct dm_cache_policy *p, dm_oblock_t oblock) 1481static void smq_complete_background_work(struct dm_cache_policy *p,
1482 struct policy_work *work,
1483 bool success)
1406{ 1484{
1407 struct smq_policy *mq = to_smq_policy(p);
1408 unsigned long flags; 1485 unsigned long flags;
1486 struct smq_policy *mq = to_smq_policy(p);
1409 1487
1410 spin_lock_irqsave(&mq->lock, flags); 1488 spin_lock_irqsave(&mq->lock, flags);
1411 __remove_mapping(mq, oblock); 1489 __complete_background_work(mq, work, success);
1412 spin_unlock_irqrestore(&mq->lock, flags); 1490 spin_unlock_irqrestore(&mq->lock, flags);
1413} 1491}
1414 1492
1415static int __remove_cblock(struct smq_policy *mq, dm_cblock_t cblock) 1493// in_hash(oblock) -> in_hash(oblock)
1494static void __smq_set_clear_dirty(struct smq_policy *mq, dm_cblock_t cblock, bool set)
1416{ 1495{
1417 struct entry *e = get_entry(&mq->cache_alloc, from_cblock(cblock)); 1496 struct entry *e = get_entry(&mq->cache_alloc, from_cblock(cblock));
1418 1497
1419 if (!e || !e->allocated) 1498 if (e->pending_work)
1420 return -ENODATA; 1499 e->dirty = set;
1421 1500 else {
1422 del(mq, e); 1501 del_queue(mq, e);
1423 free_entry(&mq->cache_alloc, e); 1502 e->dirty = set;
1424 1503 push_queue(mq, e);
1425 return 0; 1504 }
1426} 1505}
1427 1506
1428static int smq_remove_cblock(struct dm_cache_policy *p, dm_cblock_t cblock) 1507static void smq_set_dirty(struct dm_cache_policy *p, dm_cblock_t cblock)
1429{ 1508{
1430 int r;
1431 unsigned long flags; 1509 unsigned long flags;
1432 struct smq_policy *mq = to_smq_policy(p); 1510 struct smq_policy *mq = to_smq_policy(p);
1433 1511
1434 spin_lock_irqsave(&mq->lock, flags); 1512 spin_lock_irqsave(&mq->lock, flags);
1435 r = __remove_cblock(mq, cblock); 1513 __smq_set_clear_dirty(mq, cblock, true);
1436 spin_unlock_irqrestore(&mq->lock, flags); 1514 spin_unlock_irqrestore(&mq->lock, flags);
1437
1438 return r;
1439} 1515}
1440 1516
1441 1517static void smq_clear_dirty(struct dm_cache_policy *p, dm_cblock_t cblock)
1442#define CLEAN_TARGET_CRITICAL 5u /* percent */
1443
1444static bool clean_target_met(struct smq_policy *mq, bool critical)
1445{ 1518{
1446 if (critical) { 1519 struct smq_policy *mq = to_smq_policy(p);
1447 /* 1520 unsigned long flags;
1448 * Cache entries may not be populated. So we're cannot rely on the
1449 * size of the clean queue.
1450 */
1451 unsigned nr_clean = from_cblock(mq->cache_size) - q_size(&mq->dirty);
1452 unsigned target = from_cblock(mq->cache_size) * CLEAN_TARGET_CRITICAL / 100u;
1453 1521
1454 return nr_clean >= target; 1522 spin_lock_irqsave(&mq->lock, flags);
1455 } else 1523 __smq_set_clear_dirty(mq, cblock, false);
1456 return !q_size(&mq->dirty); 1524 spin_unlock_irqrestore(&mq->lock, flags);
1457} 1525}
1458 1526
1459static int __smq_writeback_work(struct smq_policy *mq, dm_oblock_t *oblock, 1527static unsigned random_level(dm_cblock_t cblock)
1460 dm_cblock_t *cblock, bool critical_only)
1461{ 1528{
1462 struct entry *e = NULL; 1529 return hash_32(from_cblock(cblock), 9) & (NR_CACHE_LEVELS - 1);
1463 bool target_met = clean_target_met(mq, critical_only); 1530}
1464
1465 if (critical_only)
1466 /*
1467 * Always try and keep the bottom level clean.
1468 */
1469 e = pop_old(mq, &mq->dirty, target_met ? 1u : mq->dirty.nr_levels);
1470 1531
1471 else 1532static int smq_load_mapping(struct dm_cache_policy *p,
1472 e = pop_old(mq, &mq->dirty, mq->dirty.nr_levels); 1533 dm_oblock_t oblock, dm_cblock_t cblock,
1534 bool dirty, uint32_t hint, bool hint_valid)
1535{
1536 struct smq_policy *mq = to_smq_policy(p);
1537 struct entry *e;
1473 1538
1474 if (!e) 1539 e = alloc_particular_entry(&mq->cache_alloc, from_cblock(cblock));
1475 return -ENODATA; 1540 e->oblock = oblock;
1541 e->dirty = dirty;
1542 e->level = hint_valid ? min(hint, NR_CACHE_LEVELS - 1) : random_level(cblock);
1543 e->pending_work = false;
1476 1544
1477 *oblock = e->oblock; 1545 /*
1478 *cblock = infer_cblock(mq, e); 1546 * When we load mappings we push ahead of both sentinels in order to
1479 e->dirty = false; 1547 * allow demotions and cleaning to occur immediately.
1480 push_new(mq, e); 1548 */
1549 push_front(mq, e);
1481 1550
1482 return 0; 1551 return 0;
1483} 1552}
1484 1553
1485static int smq_writeback_work(struct dm_cache_policy *p, dm_oblock_t *oblock, 1554static int smq_invalidate_mapping(struct dm_cache_policy *p, dm_cblock_t cblock)
1486 dm_cblock_t *cblock, bool critical_only)
1487{ 1555{
1488 int r;
1489 unsigned long flags;
1490 struct smq_policy *mq = to_smq_policy(p); 1556 struct smq_policy *mq = to_smq_policy(p);
1557 struct entry *e = get_entry(&mq->cache_alloc, from_cblock(cblock));
1491 1558
1492 spin_lock_irqsave(&mq->lock, flags); 1559 if (!e->allocated)
1493 r = __smq_writeback_work(mq, oblock, cblock, critical_only); 1560 return -ENODATA;
1494 spin_unlock_irqrestore(&mq->lock, flags);
1495
1496 return r;
1497}
1498
1499static void __force_mapping(struct smq_policy *mq,
1500 dm_oblock_t current_oblock, dm_oblock_t new_oblock)
1501{
1502 struct entry *e = h_lookup(&mq->table, current_oblock);
1503 1561
1504 if (e) { 1562 // FIXME: what if this block has pending background work?
1505 del(mq, e); 1563 del_queue(mq, e);
1506 e->oblock = new_oblock; 1564 h_remove(&mq->table, e);
1507 e->dirty = true; 1565 free_entry(&mq->cache_alloc, e);
1508 push(mq, e); 1566 return 0;
1509 }
1510} 1567}
1511 1568
1512static void smq_force_mapping(struct dm_cache_policy *p, 1569static uint32_t smq_get_hint(struct dm_cache_policy *p, dm_cblock_t cblock)
1513 dm_oblock_t current_oblock, dm_oblock_t new_oblock)
1514{ 1570{
1515 unsigned long flags;
1516 struct smq_policy *mq = to_smq_policy(p); 1571 struct smq_policy *mq = to_smq_policy(p);
1572 struct entry *e = get_entry(&mq->cache_alloc, from_cblock(cblock));
1517 1573
1518 spin_lock_irqsave(&mq->lock, flags); 1574 if (!e->allocated)
1519 __force_mapping(mq, current_oblock, new_oblock); 1575 return 0;
1520 spin_unlock_irqrestore(&mq->lock, flags); 1576
1577 return e->level;
1521} 1578}
1522 1579
1523static dm_cblock_t smq_residency(struct dm_cache_policy *p) 1580static dm_cblock_t smq_residency(struct dm_cache_policy *p)
@@ -1546,6 +1603,12 @@ static void smq_tick(struct dm_cache_policy *p, bool can_block)
1546 spin_unlock_irqrestore(&mq->lock, flags); 1603 spin_unlock_irqrestore(&mq->lock, flags);
1547} 1604}
1548 1605
1606static void smq_allow_migrations(struct dm_cache_policy *p, bool allow)
1607{
1608 struct smq_policy *mq = to_smq_policy(p);
1609 mq->migrations_allowed = allow;
1610}
1611
1549/* 1612/*
1550 * smq has no config values, but the old mq policy did. To avoid breaking 1613 * smq has no config values, but the old mq policy did. To avoid breaking
1551 * software we continue to accept these configurables for the mq policy, 1614 * software we continue to accept these configurables for the mq policy,
@@ -1590,18 +1653,18 @@ static int mq_emit_config_values(struct dm_cache_policy *p, char *result,
1590static void init_policy_functions(struct smq_policy *mq, bool mimic_mq) 1653static void init_policy_functions(struct smq_policy *mq, bool mimic_mq)
1591{ 1654{
1592 mq->policy.destroy = smq_destroy; 1655 mq->policy.destroy = smq_destroy;
1593 mq->policy.map = smq_map;
1594 mq->policy.lookup = smq_lookup; 1656 mq->policy.lookup = smq_lookup;
1657 mq->policy.lookup_with_work = smq_lookup_with_work;
1658 mq->policy.get_background_work = smq_get_background_work;
1659 mq->policy.complete_background_work = smq_complete_background_work;
1595 mq->policy.set_dirty = smq_set_dirty; 1660 mq->policy.set_dirty = smq_set_dirty;
1596 mq->policy.clear_dirty = smq_clear_dirty; 1661 mq->policy.clear_dirty = smq_clear_dirty;
1597 mq->policy.load_mapping = smq_load_mapping; 1662 mq->policy.load_mapping = smq_load_mapping;
1663 mq->policy.invalidate_mapping = smq_invalidate_mapping;
1598 mq->policy.get_hint = smq_get_hint; 1664 mq->policy.get_hint = smq_get_hint;
1599 mq->policy.remove_mapping = smq_remove_mapping;
1600 mq->policy.remove_cblock = smq_remove_cblock;
1601 mq->policy.writeback_work = smq_writeback_work;
1602 mq->policy.force_mapping = smq_force_mapping;
1603 mq->policy.residency = smq_residency; 1665 mq->policy.residency = smq_residency;
1604 mq->policy.tick = smq_tick; 1666 mq->policy.tick = smq_tick;
1667 mq->policy.allow_migrations = smq_allow_migrations;
1605 1668
1606 if (mimic_mq) { 1669 if (mimic_mq) {
1607 mq->policy.set_config_value = mq_set_config_value; 1670 mq->policy.set_config_value = mq_set_config_value;
@@ -1633,7 +1696,8 @@ static void calc_hotspot_params(sector_t origin_size,
1633static struct dm_cache_policy *__smq_create(dm_cblock_t cache_size, 1696static struct dm_cache_policy *__smq_create(dm_cblock_t cache_size,
1634 sector_t origin_size, 1697 sector_t origin_size,
1635 sector_t cache_block_size, 1698 sector_t cache_block_size,
1636 bool mimic_mq) 1699 bool mimic_mq,
1700 bool migrations_allowed)
1637{ 1701{
1638 unsigned i; 1702 unsigned i;
1639 unsigned nr_sentinels_per_queue = 2u * NR_CACHE_LEVELS; 1703 unsigned nr_sentinels_per_queue = 2u * NR_CACHE_LEVELS;
@@ -1658,11 +1722,11 @@ static struct dm_cache_policy *__smq_create(dm_cblock_t cache_size,
1658 } 1722 }
1659 1723
1660 init_allocator(&mq->writeback_sentinel_alloc, &mq->es, 0, nr_sentinels_per_queue); 1724 init_allocator(&mq->writeback_sentinel_alloc, &mq->es, 0, nr_sentinels_per_queue);
1661 for (i = 0; i < nr_sentinels_per_queue; i++) 1725 for (i = 0; i < nr_sentinels_per_queue; i++)
1662 get_entry(&mq->writeback_sentinel_alloc, i)->sentinel = true; 1726 get_entry(&mq->writeback_sentinel_alloc, i)->sentinel = true;
1663 1727
1664 init_allocator(&mq->demote_sentinel_alloc, &mq->es, nr_sentinels_per_queue, total_sentinels); 1728 init_allocator(&mq->demote_sentinel_alloc, &mq->es, nr_sentinels_per_queue, total_sentinels);
1665 for (i = 0; i < nr_sentinels_per_queue; i++) 1729 for (i = 0; i < nr_sentinels_per_queue; i++)
1666 get_entry(&mq->demote_sentinel_alloc, i)->sentinel = true; 1730 get_entry(&mq->demote_sentinel_alloc, i)->sentinel = true;
1667 1731
1668 init_allocator(&mq->hotspot_alloc, &mq->es, total_sentinels, 1732 init_allocator(&mq->hotspot_alloc, &mq->es, total_sentinels,
@@ -1715,8 +1779,16 @@ static struct dm_cache_policy *__smq_create(dm_cblock_t cache_size,
1715 mq->next_hotspot_period = jiffies; 1779 mq->next_hotspot_period = jiffies;
1716 mq->next_cache_period = jiffies; 1780 mq->next_cache_period = jiffies;
1717 1781
1782 mq->bg_work = btracker_create(10240); /* FIXME: hard coded value */
1783 if (!mq->bg_work)
1784 goto bad_btracker;
1785
1786 mq->migrations_allowed = migrations_allowed;
1787
1718 return &mq->policy; 1788 return &mq->policy;
1719 1789
1790bad_btracker:
1791 h_exit(&mq->hotspot_table);
1720bad_alloc_hotspot_table: 1792bad_alloc_hotspot_table:
1721 h_exit(&mq->table); 1793 h_exit(&mq->table);
1722bad_alloc_table: 1794bad_alloc_table:
@@ -1735,21 +1807,28 @@ static struct dm_cache_policy *smq_create(dm_cblock_t cache_size,
1735 sector_t origin_size, 1807 sector_t origin_size,
1736 sector_t cache_block_size) 1808 sector_t cache_block_size)
1737{ 1809{
1738 return __smq_create(cache_size, origin_size, cache_block_size, false); 1810 return __smq_create(cache_size, origin_size, cache_block_size, false, true);
1739} 1811}
1740 1812
1741static struct dm_cache_policy *mq_create(dm_cblock_t cache_size, 1813static struct dm_cache_policy *mq_create(dm_cblock_t cache_size,
1742 sector_t origin_size, 1814 sector_t origin_size,
1743 sector_t cache_block_size) 1815 sector_t cache_block_size)
1744{ 1816{
1745 return __smq_create(cache_size, origin_size, cache_block_size, true); 1817 return __smq_create(cache_size, origin_size, cache_block_size, true, true);
1818}
1819
1820static struct dm_cache_policy *cleaner_create(dm_cblock_t cache_size,
1821 sector_t origin_size,
1822 sector_t cache_block_size)
1823{
1824 return __smq_create(cache_size, origin_size, cache_block_size, false, false);
1746} 1825}
1747 1826
1748/*----------------------------------------------------------------*/ 1827/*----------------------------------------------------------------*/
1749 1828
1750static struct dm_cache_policy_type smq_policy_type = { 1829static struct dm_cache_policy_type smq_policy_type = {
1751 .name = "smq", 1830 .name = "smq",
1752 .version = {1, 5, 0}, 1831 .version = {2, 0, 0},
1753 .hint_size = 4, 1832 .hint_size = 4,
1754 .owner = THIS_MODULE, 1833 .owner = THIS_MODULE,
1755 .create = smq_create 1834 .create = smq_create
@@ -1757,15 +1836,23 @@ static struct dm_cache_policy_type smq_policy_type = {
1757 1836
1758static struct dm_cache_policy_type mq_policy_type = { 1837static struct dm_cache_policy_type mq_policy_type = {
1759 .name = "mq", 1838 .name = "mq",
1760 .version = {1, 5, 0}, 1839 .version = {2, 0, 0},
1761 .hint_size = 4, 1840 .hint_size = 4,
1762 .owner = THIS_MODULE, 1841 .owner = THIS_MODULE,
1763 .create = mq_create, 1842 .create = mq_create,
1764}; 1843};
1765 1844
1845static struct dm_cache_policy_type cleaner_policy_type = {
1846 .name = "cleaner",
1847 .version = {2, 0, 0},
1848 .hint_size = 4,
1849 .owner = THIS_MODULE,
1850 .create = cleaner_create,
1851};
1852
1766static struct dm_cache_policy_type default_policy_type = { 1853static struct dm_cache_policy_type default_policy_type = {
1767 .name = "default", 1854 .name = "default",
1768 .version = {1, 5, 0}, 1855 .version = {2, 0, 0},
1769 .hint_size = 4, 1856 .hint_size = 4,
1770 .owner = THIS_MODULE, 1857 .owner = THIS_MODULE,
1771 .create = smq_create, 1858 .create = smq_create,
@@ -1785,23 +1872,36 @@ static int __init smq_init(void)
1785 r = dm_cache_policy_register(&mq_policy_type); 1872 r = dm_cache_policy_register(&mq_policy_type);
1786 if (r) { 1873 if (r) {
1787 DMERR("register failed (as mq) %d", r); 1874 DMERR("register failed (as mq) %d", r);
1788 dm_cache_policy_unregister(&smq_policy_type); 1875 goto out_mq;
1789 return -ENOMEM; 1876 }
1877
1878 r = dm_cache_policy_register(&cleaner_policy_type);
1879 if (r) {
1880 DMERR("register failed (as cleaner) %d", r);
1881 goto out_cleaner;
1790 } 1882 }
1791 1883
1792 r = dm_cache_policy_register(&default_policy_type); 1884 r = dm_cache_policy_register(&default_policy_type);
1793 if (r) { 1885 if (r) {
1794 DMERR("register failed (as default) %d", r); 1886 DMERR("register failed (as default) %d", r);
1795 dm_cache_policy_unregister(&mq_policy_type); 1887 goto out_default;
1796 dm_cache_policy_unregister(&smq_policy_type);
1797 return -ENOMEM;
1798 } 1888 }
1799 1889
1800 return 0; 1890 return 0;
1891
1892out_default:
1893 dm_cache_policy_unregister(&cleaner_policy_type);
1894out_cleaner:
1895 dm_cache_policy_unregister(&mq_policy_type);
1896out_mq:
1897 dm_cache_policy_unregister(&smq_policy_type);
1898
1899 return -ENOMEM;
1801} 1900}
1802 1901
1803static void __exit smq_exit(void) 1902static void __exit smq_exit(void)
1804{ 1903{
1904 dm_cache_policy_unregister(&cleaner_policy_type);
1805 dm_cache_policy_unregister(&smq_policy_type); 1905 dm_cache_policy_unregister(&smq_policy_type);
1806 dm_cache_policy_unregister(&mq_policy_type); 1906 dm_cache_policy_unregister(&mq_policy_type);
1807 dm_cache_policy_unregister(&default_policy_type); 1907 dm_cache_policy_unregister(&default_policy_type);
@@ -1816,3 +1916,4 @@ MODULE_DESCRIPTION("smq cache policy");
1816 1916
1817MODULE_ALIAS("dm-cache-default"); 1917MODULE_ALIAS("dm-cache-default");
1818MODULE_ALIAS("dm-cache-mq"); 1918MODULE_ALIAS("dm-cache-mq");
1919MODULE_ALIAS("dm-cache-cleaner");