aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/md/dm-cache-policy-mq.c
diff options
context:
space:
mode:
authorJoe Thornber <ejt@redhat.com>2013-10-24 14:10:28 -0400
committerMike Snitzer <snitzer@redhat.com>2013-11-09 18:20:25 -0500
commit01911c19bea63b1a958b9d9024504c2e9079f155 (patch)
tree613f9334c9d85702ff89168f587d4d90d24378b9 /drivers/md/dm-cache-policy-mq.c
parentffcbcb6720ab6a4bb6e0a51b3711e8c60872d281 (diff)
dm cache policy mq: implement writeback_work() and mq_{set,clear}_dirty()
There are now two multiqueues for in cache blocks. A clean one and a dirty one. writeback_work comes from the dirty one. Demotions come from the clean one. There are two benefits: - Performance improvement, since demoting a clean block is a noop. - The cache cleans itself when io load is light. Signed-off-by: Joe Thornber <ejt@redhat.com> Signed-off-by: Heinz Mauelshagen <heinzm@redhat.com> Signed-off-by: Mike Snitzer <snitzer@redhat.com>
Diffstat (limited to 'drivers/md/dm-cache-policy-mq.c')
-rw-r--r--drivers/md/dm-cache-policy-mq.c147
1 files changed, 128 insertions, 19 deletions
diff --git a/drivers/md/dm-cache-policy-mq.c b/drivers/md/dm-cache-policy-mq.c
index a9a25de5b011..6710e038c730 100644
--- a/drivers/md/dm-cache-policy-mq.c
+++ b/drivers/md/dm-cache-policy-mq.c
@@ -224,6 +224,7 @@ struct entry {
224 * FIXME: pack these better 224 * FIXME: pack these better
225 */ 225 */
226 bool in_cache:1; 226 bool in_cache:1;
227 bool dirty:1;
227 unsigned hit_count; 228 unsigned hit_count;
228 unsigned generation; 229 unsigned generation;
229 unsigned tick; 230 unsigned tick;
@@ -238,13 +239,15 @@ struct mq_policy {
238 struct io_tracker tracker; 239 struct io_tracker tracker;
239 240
240 /* 241 /*
241 * We maintain two queues of entries. The cache proper contains 242 * We maintain three queues of entries. The cache proper,
242 * the currently active mappings. Whereas the pre_cache tracks 243 * consisting of a clean and dirty queue, contains the currently
243 * blocks that are being hit frequently and potential candidates 244 * active mappings. Whereas the pre_cache tracks blocks that
244 * for promotion to the cache. 245 * are being hit frequently and potential candidates for promotion
246 * to the cache.
245 */ 247 */
246 struct queue pre_cache; 248 struct queue pre_cache;
247 struct queue cache; 249 struct queue cache_clean;
250 struct queue cache_dirty;
248 251
249 /* 252 /*
250 * Keeps track of time, incremented by the core. We use this to 253 * Keeps track of time, incremented by the core. We use this to
@@ -324,7 +327,8 @@ static void free_entries(struct mq_policy *mq)
324 struct entry *e, *tmp; 327 struct entry *e, *tmp;
325 328
326 concat_queue(&mq->free, &mq->pre_cache); 329 concat_queue(&mq->free, &mq->pre_cache);
327 concat_queue(&mq->free, &mq->cache); 330 concat_queue(&mq->free, &mq->cache_clean);
331 concat_queue(&mq->free, &mq->cache_dirty);
328 332
329 list_for_each_entry_safe(e, tmp, &mq->free, list) 333 list_for_each_entry_safe(e, tmp, &mq->free, list)
330 kmem_cache_free(mq_entry_cache, e); 334 kmem_cache_free(mq_entry_cache, e);
@@ -508,7 +512,8 @@ static void push(struct mq_policy *mq, struct entry *e)
508 512
509 if (e->in_cache) { 513 if (e->in_cache) {
510 alloc_cblock(mq, e->cblock); 514 alloc_cblock(mq, e->cblock);
511 queue_push(&mq->cache, queue_level(e), &e->list); 515 queue_push(e->dirty ? &mq->cache_dirty : &mq->cache_clean,
516 queue_level(e), &e->list);
512 } else 517 } else
513 queue_push(&mq->pre_cache, queue_level(e), &e->list); 518 queue_push(&mq->pre_cache, queue_level(e), &e->list);
514} 519}
@@ -558,7 +563,8 @@ static bool updated_this_tick(struct mq_policy *mq, struct entry *e)
558 * of the entries. 563 * of the entries.
559 * 564 *
560 * At the moment the threshold is taken by averaging the hit counts of some 565 * At the moment the threshold is taken by averaging the hit counts of some
561 * of the entries in the cache (the first 20 entries of the first level). 566 * of the entries in the cache (the first 20 entries across all levels in
567 * ascending order, giving preference to the clean entries at each level).
562 * 568 *
563 * We can be much cleverer than this though. For example, each promotion 569 * We can be much cleverer than this though. For example, each promotion
564 * could bump up the threshold helping to prevent churn. Much more to do 570 * could bump up the threshold helping to prevent churn. Much more to do
@@ -580,7 +586,16 @@ static void check_generation(struct mq_policy *mq)
580 mq->generation++; 586 mq->generation++;
581 587
582 for (level = 0; level < NR_QUEUE_LEVELS && count < MAX_TO_AVERAGE; level++) { 588 for (level = 0; level < NR_QUEUE_LEVELS && count < MAX_TO_AVERAGE; level++) {
583 head = mq->cache.qs + level; 589 head = mq->cache_clean.qs + level;
590 list_for_each_entry(e, head, list) {
591 nr++;
592 total += e->hit_count;
593
594 if (++count >= MAX_TO_AVERAGE)
595 break;
596 }
597
598 head = mq->cache_dirty.qs + level;
584 list_for_each_entry(e, head, list) { 599 list_for_each_entry(e, head, list) {
585 nr++; 600 nr++;
586 total += e->hit_count; 601 total += e->hit_count;
@@ -633,19 +648,28 @@ static void requeue_and_update_tick(struct mq_policy *mq, struct entry *e)
633 * - set the hit count to a hard coded value other than 1, eg, is it better 648 * - set the hit count to a hard coded value other than 1, eg, is it better
634 * if it goes in at level 2? 649 * if it goes in at level 2?
635 */ 650 */
636static dm_cblock_t demote_cblock(struct mq_policy *mq, dm_oblock_t *oblock) 651static int demote_cblock(struct mq_policy *mq, dm_oblock_t *oblock, dm_cblock_t *cblock)
637{ 652{
638 dm_cblock_t result; 653 struct entry *demoted = pop(mq, &mq->cache_clean);
639 struct entry *demoted = pop(mq, &mq->cache);
640 654
641 BUG_ON(!demoted); 655 if (!demoted)
642 result = demoted->cblock; 656 /*
657 * We could get a block from mq->cache_dirty, but that
658 * would add extra latency to the triggering bio as it
659 * waits for the writeback. Better to not promote this
660 * time and hope there's a clean block next time this block
661 * is hit.
662 */
663 return -ENOSPC;
664
665 *cblock = demoted->cblock;
643 *oblock = demoted->oblock; 666 *oblock = demoted->oblock;
644 demoted->in_cache = false; 667 demoted->in_cache = false;
668 demoted->dirty = false;
645 demoted->hit_count = 1; 669 demoted->hit_count = 1;
646 push(mq, demoted); 670 push(mq, demoted);
647 671
648 return result; 672 return 0;
649} 673}
650 674
651/* 675/*
@@ -705,11 +729,16 @@ static int cache_entry_found(struct mq_policy *mq,
705static int pre_cache_to_cache(struct mq_policy *mq, struct entry *e, 729static int pre_cache_to_cache(struct mq_policy *mq, struct entry *e,
706 struct policy_result *result) 730 struct policy_result *result)
707{ 731{
732 int r;
708 dm_cblock_t cblock; 733 dm_cblock_t cblock;
709 734
710 if (find_free_cblock(mq, &cblock) == -ENOSPC) { 735 if (find_free_cblock(mq, &cblock) == -ENOSPC) {
711 result->op = POLICY_REPLACE; 736 result->op = POLICY_REPLACE;
712 cblock = demote_cblock(mq, &result->old_oblock); 737 r = demote_cblock(mq, &result->old_oblock, &cblock);
738 if (r) {
739 result->op = POLICY_MISS;
740 return 0;
741 }
713 } else 742 } else
714 result->op = POLICY_NEW; 743 result->op = POLICY_NEW;
715 744
@@ -717,6 +746,7 @@ static int pre_cache_to_cache(struct mq_policy *mq, struct entry *e,
717 746
718 del(mq, e); 747 del(mq, e);
719 e->in_cache = true; 748 e->in_cache = true;
749 e->dirty = false;
720 push(mq, e); 750 push(mq, e);
721 751
722 return 0; 752 return 0;
@@ -760,6 +790,7 @@ static void insert_in_pre_cache(struct mq_policy *mq,
760 } 790 }
761 791
762 e->in_cache = false; 792 e->in_cache = false;
793 e->dirty = false;
763 e->oblock = oblock; 794 e->oblock = oblock;
764 e->hit_count = 1; 795 e->hit_count = 1;
765 e->generation = mq->generation; 796 e->generation = mq->generation;
@@ -787,6 +818,7 @@ static void insert_in_cache(struct mq_policy *mq, dm_oblock_t oblock,
787 e->oblock = oblock; 818 e->oblock = oblock;
788 e->cblock = cblock; 819 e->cblock = cblock;
789 e->in_cache = true; 820 e->in_cache = true;
821 e->dirty = false;
790 e->hit_count = 1; 822 e->hit_count = 1;
791 e->generation = mq->generation; 823 e->generation = mq->generation;
792 push(mq, e); 824 push(mq, e);
@@ -917,6 +949,40 @@ static int mq_lookup(struct dm_cache_policy *p, dm_oblock_t oblock, dm_cblock_t
917 return r; 949 return r;
918} 950}
919 951
952/*
953 * FIXME: __mq_set_clear_dirty can block due to mutex.
954 * Ideally a policy should not block in functions called
955 * from the map() function. Explore using RCU.
956 */
957static void __mq_set_clear_dirty(struct dm_cache_policy *p, dm_oblock_t oblock, bool set)
958{
959 struct mq_policy *mq = to_mq_policy(p);
960 struct entry *e;
961
962 mutex_lock(&mq->lock);
963 e = hash_lookup(mq, oblock);
964 if (!e)
965 DMWARN("__mq_set_clear_dirty called for a block that isn't in the cache");
966 else {
967 BUG_ON(!e->in_cache);
968
969 del(mq, e);
970 e->dirty = set;
971 push(mq, e);
972 }
973 mutex_unlock(&mq->lock);
974}
975
976static void mq_set_dirty(struct dm_cache_policy *p, dm_oblock_t oblock)
977{
978 __mq_set_clear_dirty(p, oblock, true);
979}
980
981static void mq_clear_dirty(struct dm_cache_policy *p, dm_oblock_t oblock)
982{
983 __mq_set_clear_dirty(p, oblock, false);
984}
985
920static int mq_load_mapping(struct dm_cache_policy *p, 986static int mq_load_mapping(struct dm_cache_policy *p,
921 dm_oblock_t oblock, dm_cblock_t cblock, 987 dm_oblock_t oblock, dm_cblock_t cblock,
922 uint32_t hint, bool hint_valid) 988 uint32_t hint, bool hint_valid)
@@ -931,6 +997,7 @@ static int mq_load_mapping(struct dm_cache_policy *p,
931 e->cblock = cblock; 997 e->cblock = cblock;
932 e->oblock = oblock; 998 e->oblock = oblock;
933 e->in_cache = true; 999 e->in_cache = true;
1000 e->dirty = false; /* this gets corrected in a minute */
934 e->hit_count = hint_valid ? hint : 1; 1001 e->hit_count = hint_valid ? hint : 1;
935 e->generation = mq->generation; 1002 e->generation = mq->generation;
936 push(mq, e); 1003 push(mq, e);
@@ -949,7 +1016,14 @@ static int mq_walk_mappings(struct dm_cache_policy *p, policy_walk_fn fn,
949 mutex_lock(&mq->lock); 1016 mutex_lock(&mq->lock);
950 1017
951 for (level = 0; level < NR_QUEUE_LEVELS; level++) 1018 for (level = 0; level < NR_QUEUE_LEVELS; level++)
952 list_for_each_entry(e, &mq->cache.qs[level], list) { 1019 list_for_each_entry(e, &mq->cache_clean.qs[level], list) {
1020 r = fn(context, e->cblock, e->oblock, e->hit_count);
1021 if (r)
1022 goto out;
1023 }
1024
1025 for (level = 0; level < NR_QUEUE_LEVELS; level++)
1026 list_for_each_entry(e, &mq->cache_dirty.qs[level], list) {
953 r = fn(context, e->cblock, e->oblock, e->hit_count); 1027 r = fn(context, e->cblock, e->oblock, e->hit_count);
954 if (r) 1028 if (r)
955 goto out; 1029 goto out;
@@ -974,11 +1048,41 @@ static void mq_remove_mapping(struct dm_cache_policy *p, dm_oblock_t oblock)
974 1048
975 del(mq, e); 1049 del(mq, e);
976 e->in_cache = false; 1050 e->in_cache = false;
1051 e->dirty = false;
977 push(mq, e); 1052 push(mq, e);
978 1053
979 mutex_unlock(&mq->lock); 1054 mutex_unlock(&mq->lock);
980} 1055}
981 1056
1057static int __mq_writeback_work(struct mq_policy *mq, dm_oblock_t *oblock,
1058 dm_cblock_t *cblock)
1059{
1060 struct entry *e = pop(mq, &mq->cache_dirty);
1061
1062 if (!e)
1063 return -ENODATA;
1064
1065 *oblock = e->oblock;
1066 *cblock = e->cblock;
1067 e->dirty = false;
1068 push(mq, e);
1069
1070 return 0;
1071}
1072
1073static int mq_writeback_work(struct dm_cache_policy *p, dm_oblock_t *oblock,
1074 dm_cblock_t *cblock)
1075{
1076 int r;
1077 struct mq_policy *mq = to_mq_policy(p);
1078
1079 mutex_lock(&mq->lock);
1080 r = __mq_writeback_work(mq, oblock, cblock);
1081 mutex_unlock(&mq->lock);
1082
1083 return r;
1084}
1085
982static void force_mapping(struct mq_policy *mq, 1086static void force_mapping(struct mq_policy *mq,
983 dm_oblock_t current_oblock, dm_oblock_t new_oblock) 1087 dm_oblock_t current_oblock, dm_oblock_t new_oblock)
984{ 1088{
@@ -988,6 +1092,7 @@ static void force_mapping(struct mq_policy *mq,
988 1092
989 del(mq, e); 1093 del(mq, e);
990 e->oblock = new_oblock; 1094 e->oblock = new_oblock;
1095 e->dirty = true;
991 push(mq, e); 1096 push(mq, e);
992} 1097}
993 1098
@@ -1063,10 +1168,12 @@ static void init_policy_functions(struct mq_policy *mq)
1063 mq->policy.destroy = mq_destroy; 1168 mq->policy.destroy = mq_destroy;
1064 mq->policy.map = mq_map; 1169 mq->policy.map = mq_map;
1065 mq->policy.lookup = mq_lookup; 1170 mq->policy.lookup = mq_lookup;
1171 mq->policy.set_dirty = mq_set_dirty;
1172 mq->policy.clear_dirty = mq_clear_dirty;
1066 mq->policy.load_mapping = mq_load_mapping; 1173 mq->policy.load_mapping = mq_load_mapping;
1067 mq->policy.walk_mappings = mq_walk_mappings; 1174 mq->policy.walk_mappings = mq_walk_mappings;
1068 mq->policy.remove_mapping = mq_remove_mapping; 1175 mq->policy.remove_mapping = mq_remove_mapping;
1069 mq->policy.writeback_work = NULL; 1176 mq->policy.writeback_work = mq_writeback_work;
1070 mq->policy.force_mapping = mq_force_mapping; 1177 mq->policy.force_mapping = mq_force_mapping;
1071 mq->policy.residency = mq_residency; 1178 mq->policy.residency = mq_residency;
1072 mq->policy.tick = mq_tick; 1179 mq->policy.tick = mq_tick;
@@ -1099,7 +1206,9 @@ static struct dm_cache_policy *mq_create(dm_cblock_t cache_size,
1099 mq->find_free_last_word = 0; 1206 mq->find_free_last_word = 0;
1100 1207
1101 queue_init(&mq->pre_cache); 1208 queue_init(&mq->pre_cache);
1102 queue_init(&mq->cache); 1209 queue_init(&mq->cache_clean);
1210 queue_init(&mq->cache_dirty);
1211
1103 mq->generation_period = max((unsigned) from_cblock(cache_size), 1024U); 1212 mq->generation_period = max((unsigned) from_cblock(cache_size), 1024U);
1104 1213
1105 mq->nr_entries = 2 * from_cblock(cache_size); 1214 mq->nr_entries = 2 * from_cblock(cache_size);