diff options
author | Joe Thornber <ejt@redhat.com> | 2013-10-24 14:10:28 -0400 |
---|---|---|
committer | Mike Snitzer <snitzer@redhat.com> | 2013-11-09 18:20:25 -0500 |
commit | 01911c19bea63b1a958b9d9024504c2e9079f155 (patch) | |
tree | 613f9334c9d85702ff89168f587d4d90d24378b9 /drivers/md/dm-cache-policy-mq.c | |
parent | ffcbcb6720ab6a4bb6e0a51b3711e8c60872d281 (diff) |
dm cache policy mq: implement writeback_work() and mq_{set,clear}_dirty()
There are now two multiqueues for in cache blocks. A clean one and a
dirty one.
writeback_work comes from the dirty one. Demotions come from the clean
one.
There are two benefits:
- Performance improvement, since demoting a clean block is a noop.
- The cache cleans itself when io load is light.
Signed-off-by: Joe Thornber <ejt@redhat.com>
Signed-off-by: Heinz Mauelshagen <heinzm@redhat.com>
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
Diffstat (limited to 'drivers/md/dm-cache-policy-mq.c')
-rw-r--r-- | drivers/md/dm-cache-policy-mq.c | 147 |
1 files changed, 128 insertions, 19 deletions
diff --git a/drivers/md/dm-cache-policy-mq.c b/drivers/md/dm-cache-policy-mq.c index a9a25de5b011..6710e038c730 100644 --- a/drivers/md/dm-cache-policy-mq.c +++ b/drivers/md/dm-cache-policy-mq.c | |||
@@ -224,6 +224,7 @@ struct entry { | |||
224 | * FIXME: pack these better | 224 | * FIXME: pack these better |
225 | */ | 225 | */ |
226 | bool in_cache:1; | 226 | bool in_cache:1; |
227 | bool dirty:1; | ||
227 | unsigned hit_count; | 228 | unsigned hit_count; |
228 | unsigned generation; | 229 | unsigned generation; |
229 | unsigned tick; | 230 | unsigned tick; |
@@ -238,13 +239,15 @@ struct mq_policy { | |||
238 | struct io_tracker tracker; | 239 | struct io_tracker tracker; |
239 | 240 | ||
240 | /* | 241 | /* |
241 | * We maintain two queues of entries. The cache proper contains | 242 | * We maintain three queues of entries. The cache proper, |
242 | * the currently active mappings. Whereas the pre_cache tracks | 243 | * consisting of a clean and dirty queue, contains the currently |
243 | * blocks that are being hit frequently and potential candidates | 244 | * active mappings. Whereas the pre_cache tracks blocks that |
244 | * for promotion to the cache. | 245 | * are being hit frequently and potential candidates for promotion |
246 | * to the cache. | ||
245 | */ | 247 | */ |
246 | struct queue pre_cache; | 248 | struct queue pre_cache; |
247 | struct queue cache; | 249 | struct queue cache_clean; |
250 | struct queue cache_dirty; | ||
248 | 251 | ||
249 | /* | 252 | /* |
250 | * Keeps track of time, incremented by the core. We use this to | 253 | * Keeps track of time, incremented by the core. We use this to |
@@ -324,7 +327,8 @@ static void free_entries(struct mq_policy *mq) | |||
324 | struct entry *e, *tmp; | 327 | struct entry *e, *tmp; |
325 | 328 | ||
326 | concat_queue(&mq->free, &mq->pre_cache); | 329 | concat_queue(&mq->free, &mq->pre_cache); |
327 | concat_queue(&mq->free, &mq->cache); | 330 | concat_queue(&mq->free, &mq->cache_clean); |
331 | concat_queue(&mq->free, &mq->cache_dirty); | ||
328 | 332 | ||
329 | list_for_each_entry_safe(e, tmp, &mq->free, list) | 333 | list_for_each_entry_safe(e, tmp, &mq->free, list) |
330 | kmem_cache_free(mq_entry_cache, e); | 334 | kmem_cache_free(mq_entry_cache, e); |
@@ -508,7 +512,8 @@ static void push(struct mq_policy *mq, struct entry *e) | |||
508 | 512 | ||
509 | if (e->in_cache) { | 513 | if (e->in_cache) { |
510 | alloc_cblock(mq, e->cblock); | 514 | alloc_cblock(mq, e->cblock); |
511 | queue_push(&mq->cache, queue_level(e), &e->list); | 515 | queue_push(e->dirty ? &mq->cache_dirty : &mq->cache_clean, |
516 | queue_level(e), &e->list); | ||
512 | } else | 517 | } else |
513 | queue_push(&mq->pre_cache, queue_level(e), &e->list); | 518 | queue_push(&mq->pre_cache, queue_level(e), &e->list); |
514 | } | 519 | } |
@@ -558,7 +563,8 @@ static bool updated_this_tick(struct mq_policy *mq, struct entry *e) | |||
558 | * of the entries. | 563 | * of the entries. |
559 | * | 564 | * |
560 | * At the moment the threshold is taken by averaging the hit counts of some | 565 | * At the moment the threshold is taken by averaging the hit counts of some |
561 | * of the entries in the cache (the first 20 entries of the first level). | 566 | * of the entries in the cache (the first 20 entries across all levels in |
567 | * ascending order, giving preference to the clean entries at each level). | ||
562 | * | 568 | * |
563 | * We can be much cleverer than this though. For example, each promotion | 569 | * We can be much cleverer than this though. For example, each promotion |
564 | * could bump up the threshold helping to prevent churn. Much more to do | 570 | * could bump up the threshold helping to prevent churn. Much more to do |
@@ -580,7 +586,16 @@ static void check_generation(struct mq_policy *mq) | |||
580 | mq->generation++; | 586 | mq->generation++; |
581 | 587 | ||
582 | for (level = 0; level < NR_QUEUE_LEVELS && count < MAX_TO_AVERAGE; level++) { | 588 | for (level = 0; level < NR_QUEUE_LEVELS && count < MAX_TO_AVERAGE; level++) { |
583 | head = mq->cache.qs + level; | 589 | head = mq->cache_clean.qs + level; |
590 | list_for_each_entry(e, head, list) { | ||
591 | nr++; | ||
592 | total += e->hit_count; | ||
593 | |||
594 | if (++count >= MAX_TO_AVERAGE) | ||
595 | break; | ||
596 | } | ||
597 | |||
598 | head = mq->cache_dirty.qs + level; | ||
584 | list_for_each_entry(e, head, list) { | 599 | list_for_each_entry(e, head, list) { |
585 | nr++; | 600 | nr++; |
586 | total += e->hit_count; | 601 | total += e->hit_count; |
@@ -633,19 +648,28 @@ static void requeue_and_update_tick(struct mq_policy *mq, struct entry *e) | |||
633 | * - set the hit count to a hard coded value other than 1, eg, is it better | 648 | * - set the hit count to a hard coded value other than 1, eg, is it better |
634 | * if it goes in at level 2? | 649 | * if it goes in at level 2? |
635 | */ | 650 | */ |
636 | static dm_cblock_t demote_cblock(struct mq_policy *mq, dm_oblock_t *oblock) | 651 | static int demote_cblock(struct mq_policy *mq, dm_oblock_t *oblock, dm_cblock_t *cblock) |
637 | { | 652 | { |
638 | dm_cblock_t result; | 653 | struct entry *demoted = pop(mq, &mq->cache_clean); |
639 | struct entry *demoted = pop(mq, &mq->cache); | ||
640 | 654 | ||
641 | BUG_ON(!demoted); | 655 | if (!demoted) |
642 | result = demoted->cblock; | 656 | /* |
657 | * We could get a block from mq->cache_dirty, but that | ||
658 | * would add extra latency to the triggering bio as it | ||
659 | * waits for the writeback. Better to not promote this | ||
660 | * time and hope there's a clean block next time this block | ||
661 | * is hit. | ||
662 | */ | ||
663 | return -ENOSPC; | ||
664 | |||
665 | *cblock = demoted->cblock; | ||
643 | *oblock = demoted->oblock; | 666 | *oblock = demoted->oblock; |
644 | demoted->in_cache = false; | 667 | demoted->in_cache = false; |
668 | demoted->dirty = false; | ||
645 | demoted->hit_count = 1; | 669 | demoted->hit_count = 1; |
646 | push(mq, demoted); | 670 | push(mq, demoted); |
647 | 671 | ||
648 | return result; | 672 | return 0; |
649 | } | 673 | } |
650 | 674 | ||
651 | /* | 675 | /* |
@@ -705,11 +729,16 @@ static int cache_entry_found(struct mq_policy *mq, | |||
705 | static int pre_cache_to_cache(struct mq_policy *mq, struct entry *e, | 729 | static int pre_cache_to_cache(struct mq_policy *mq, struct entry *e, |
706 | struct policy_result *result) | 730 | struct policy_result *result) |
707 | { | 731 | { |
732 | int r; | ||
708 | dm_cblock_t cblock; | 733 | dm_cblock_t cblock; |
709 | 734 | ||
710 | if (find_free_cblock(mq, &cblock) == -ENOSPC) { | 735 | if (find_free_cblock(mq, &cblock) == -ENOSPC) { |
711 | result->op = POLICY_REPLACE; | 736 | result->op = POLICY_REPLACE; |
712 | cblock = demote_cblock(mq, &result->old_oblock); | 737 | r = demote_cblock(mq, &result->old_oblock, &cblock); |
738 | if (r) { | ||
739 | result->op = POLICY_MISS; | ||
740 | return 0; | ||
741 | } | ||
713 | } else | 742 | } else |
714 | result->op = POLICY_NEW; | 743 | result->op = POLICY_NEW; |
715 | 744 | ||
@@ -717,6 +746,7 @@ static int pre_cache_to_cache(struct mq_policy *mq, struct entry *e, | |||
717 | 746 | ||
718 | del(mq, e); | 747 | del(mq, e); |
719 | e->in_cache = true; | 748 | e->in_cache = true; |
749 | e->dirty = false; | ||
720 | push(mq, e); | 750 | push(mq, e); |
721 | 751 | ||
722 | return 0; | 752 | return 0; |
@@ -760,6 +790,7 @@ static void insert_in_pre_cache(struct mq_policy *mq, | |||
760 | } | 790 | } |
761 | 791 | ||
762 | e->in_cache = false; | 792 | e->in_cache = false; |
793 | e->dirty = false; | ||
763 | e->oblock = oblock; | 794 | e->oblock = oblock; |
764 | e->hit_count = 1; | 795 | e->hit_count = 1; |
765 | e->generation = mq->generation; | 796 | e->generation = mq->generation; |
@@ -787,6 +818,7 @@ static void insert_in_cache(struct mq_policy *mq, dm_oblock_t oblock, | |||
787 | e->oblock = oblock; | 818 | e->oblock = oblock; |
788 | e->cblock = cblock; | 819 | e->cblock = cblock; |
789 | e->in_cache = true; | 820 | e->in_cache = true; |
821 | e->dirty = false; | ||
790 | e->hit_count = 1; | 822 | e->hit_count = 1; |
791 | e->generation = mq->generation; | 823 | e->generation = mq->generation; |
792 | push(mq, e); | 824 | push(mq, e); |
@@ -917,6 +949,40 @@ static int mq_lookup(struct dm_cache_policy *p, dm_oblock_t oblock, dm_cblock_t | |||
917 | return r; | 949 | return r; |
918 | } | 950 | } |
919 | 951 | ||
952 | /* | ||
953 | * FIXME: __mq_set_clear_dirty can block due to mutex. | ||
954 | * Ideally a policy should not block in functions called | ||
955 | * from the map() function. Explore using RCU. | ||
956 | */ | ||
957 | static void __mq_set_clear_dirty(struct dm_cache_policy *p, dm_oblock_t oblock, bool set) | ||
958 | { | ||
959 | struct mq_policy *mq = to_mq_policy(p); | ||
960 | struct entry *e; | ||
961 | |||
962 | mutex_lock(&mq->lock); | ||
963 | e = hash_lookup(mq, oblock); | ||
964 | if (!e) | ||
965 | DMWARN("__mq_set_clear_dirty called for a block that isn't in the cache"); | ||
966 | else { | ||
967 | BUG_ON(!e->in_cache); | ||
968 | |||
969 | del(mq, e); | ||
970 | e->dirty = set; | ||
971 | push(mq, e); | ||
972 | } | ||
973 | mutex_unlock(&mq->lock); | ||
974 | } | ||
975 | |||
976 | static void mq_set_dirty(struct dm_cache_policy *p, dm_oblock_t oblock) | ||
977 | { | ||
978 | __mq_set_clear_dirty(p, oblock, true); | ||
979 | } | ||
980 | |||
981 | static void mq_clear_dirty(struct dm_cache_policy *p, dm_oblock_t oblock) | ||
982 | { | ||
983 | __mq_set_clear_dirty(p, oblock, false); | ||
984 | } | ||
985 | |||
920 | static int mq_load_mapping(struct dm_cache_policy *p, | 986 | static int mq_load_mapping(struct dm_cache_policy *p, |
921 | dm_oblock_t oblock, dm_cblock_t cblock, | 987 | dm_oblock_t oblock, dm_cblock_t cblock, |
922 | uint32_t hint, bool hint_valid) | 988 | uint32_t hint, bool hint_valid) |
@@ -931,6 +997,7 @@ static int mq_load_mapping(struct dm_cache_policy *p, | |||
931 | e->cblock = cblock; | 997 | e->cblock = cblock; |
932 | e->oblock = oblock; | 998 | e->oblock = oblock; |
933 | e->in_cache = true; | 999 | e->in_cache = true; |
1000 | e->dirty = false; /* this gets corrected in a minute */ | ||
934 | e->hit_count = hint_valid ? hint : 1; | 1001 | e->hit_count = hint_valid ? hint : 1; |
935 | e->generation = mq->generation; | 1002 | e->generation = mq->generation; |
936 | push(mq, e); | 1003 | push(mq, e); |
@@ -949,7 +1016,14 @@ static int mq_walk_mappings(struct dm_cache_policy *p, policy_walk_fn fn, | |||
949 | mutex_lock(&mq->lock); | 1016 | mutex_lock(&mq->lock); |
950 | 1017 | ||
951 | for (level = 0; level < NR_QUEUE_LEVELS; level++) | 1018 | for (level = 0; level < NR_QUEUE_LEVELS; level++) |
952 | list_for_each_entry(e, &mq->cache.qs[level], list) { | 1019 | list_for_each_entry(e, &mq->cache_clean.qs[level], list) { |
1020 | r = fn(context, e->cblock, e->oblock, e->hit_count); | ||
1021 | if (r) | ||
1022 | goto out; | ||
1023 | } | ||
1024 | |||
1025 | for (level = 0; level < NR_QUEUE_LEVELS; level++) | ||
1026 | list_for_each_entry(e, &mq->cache_dirty.qs[level], list) { | ||
953 | r = fn(context, e->cblock, e->oblock, e->hit_count); | 1027 | r = fn(context, e->cblock, e->oblock, e->hit_count); |
954 | if (r) | 1028 | if (r) |
955 | goto out; | 1029 | goto out; |
@@ -974,11 +1048,41 @@ static void mq_remove_mapping(struct dm_cache_policy *p, dm_oblock_t oblock) | |||
974 | 1048 | ||
975 | del(mq, e); | 1049 | del(mq, e); |
976 | e->in_cache = false; | 1050 | e->in_cache = false; |
1051 | e->dirty = false; | ||
977 | push(mq, e); | 1052 | push(mq, e); |
978 | 1053 | ||
979 | mutex_unlock(&mq->lock); | 1054 | mutex_unlock(&mq->lock); |
980 | } | 1055 | } |
981 | 1056 | ||
1057 | static int __mq_writeback_work(struct mq_policy *mq, dm_oblock_t *oblock, | ||
1058 | dm_cblock_t *cblock) | ||
1059 | { | ||
1060 | struct entry *e = pop(mq, &mq->cache_dirty); | ||
1061 | |||
1062 | if (!e) | ||
1063 | return -ENODATA; | ||
1064 | |||
1065 | *oblock = e->oblock; | ||
1066 | *cblock = e->cblock; | ||
1067 | e->dirty = false; | ||
1068 | push(mq, e); | ||
1069 | |||
1070 | return 0; | ||
1071 | } | ||
1072 | |||
1073 | static int mq_writeback_work(struct dm_cache_policy *p, dm_oblock_t *oblock, | ||
1074 | dm_cblock_t *cblock) | ||
1075 | { | ||
1076 | int r; | ||
1077 | struct mq_policy *mq = to_mq_policy(p); | ||
1078 | |||
1079 | mutex_lock(&mq->lock); | ||
1080 | r = __mq_writeback_work(mq, oblock, cblock); | ||
1081 | mutex_unlock(&mq->lock); | ||
1082 | |||
1083 | return r; | ||
1084 | } | ||
1085 | |||
982 | static void force_mapping(struct mq_policy *mq, | 1086 | static void force_mapping(struct mq_policy *mq, |
983 | dm_oblock_t current_oblock, dm_oblock_t new_oblock) | 1087 | dm_oblock_t current_oblock, dm_oblock_t new_oblock) |
984 | { | 1088 | { |
@@ -988,6 +1092,7 @@ static void force_mapping(struct mq_policy *mq, | |||
988 | 1092 | ||
989 | del(mq, e); | 1093 | del(mq, e); |
990 | e->oblock = new_oblock; | 1094 | e->oblock = new_oblock; |
1095 | e->dirty = true; | ||
991 | push(mq, e); | 1096 | push(mq, e); |
992 | } | 1097 | } |
993 | 1098 | ||
@@ -1063,10 +1168,12 @@ static void init_policy_functions(struct mq_policy *mq) | |||
1063 | mq->policy.destroy = mq_destroy; | 1168 | mq->policy.destroy = mq_destroy; |
1064 | mq->policy.map = mq_map; | 1169 | mq->policy.map = mq_map; |
1065 | mq->policy.lookup = mq_lookup; | 1170 | mq->policy.lookup = mq_lookup; |
1171 | mq->policy.set_dirty = mq_set_dirty; | ||
1172 | mq->policy.clear_dirty = mq_clear_dirty; | ||
1066 | mq->policy.load_mapping = mq_load_mapping; | 1173 | mq->policy.load_mapping = mq_load_mapping; |
1067 | mq->policy.walk_mappings = mq_walk_mappings; | 1174 | mq->policy.walk_mappings = mq_walk_mappings; |
1068 | mq->policy.remove_mapping = mq_remove_mapping; | 1175 | mq->policy.remove_mapping = mq_remove_mapping; |
1069 | mq->policy.writeback_work = NULL; | 1176 | mq->policy.writeback_work = mq_writeback_work; |
1070 | mq->policy.force_mapping = mq_force_mapping; | 1177 | mq->policy.force_mapping = mq_force_mapping; |
1071 | mq->policy.residency = mq_residency; | 1178 | mq->policy.residency = mq_residency; |
1072 | mq->policy.tick = mq_tick; | 1179 | mq->policy.tick = mq_tick; |
@@ -1099,7 +1206,9 @@ static struct dm_cache_policy *mq_create(dm_cblock_t cache_size, | |||
1099 | mq->find_free_last_word = 0; | 1206 | mq->find_free_last_word = 0; |
1100 | 1207 | ||
1101 | queue_init(&mq->pre_cache); | 1208 | queue_init(&mq->pre_cache); |
1102 | queue_init(&mq->cache); | 1209 | queue_init(&mq->cache_clean); |
1210 | queue_init(&mq->cache_dirty); | ||
1211 | |||
1103 | mq->generation_period = max((unsigned) from_cblock(cache_size), 1024U); | 1212 | mq->generation_period = max((unsigned) from_cblock(cache_size), 1024U); |
1104 | 1213 | ||
1105 | mq->nr_entries = 2 * from_cblock(cache_size); | 1214 | mq->nr_entries = 2 * from_cblock(cache_size); |