aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/md/dm-cache-policy-mq.c
diff options
context:
space:
mode:
authorJoe Thornber <ejt@redhat.com>2013-11-09 06:12:51 -0500
committerMike Snitzer <snitzer@redhat.com>2013-11-11 11:37:50 -0500
commit633618e3353f8953e43d989d08302f5dcd51d8be (patch)
tree6a9de60cb6e659fd14216e3bd49689441070efbf /drivers/md/dm-cache-policy-mq.c
parent53d498198d3e8bce4287112beafc30befcba98cc (diff)
dm cache policy mq: reduce memory requirements
Rather than storing the cblock in each cache entry, we allocate all entries in an array and infer the cblock from the entry position. Saves 4 bytes of memory per cache block. In addition, this gives us an easy way of looking up cache entries by cblock. We no longer need to keep an explicit bitset to track which cblocks have been allocated. And no searching is needed to find free cblocks. Signed-off-by: Joe Thornber <ejt@redhat.com> Signed-off-by: Mike Snitzer <snitzer@redhat.com>
Diffstat (limited to 'drivers/md/dm-cache-policy-mq.c')
-rw-r--r--drivers/md/dm-cache-policy-mq.c543
1 files changed, 231 insertions, 312 deletions
diff --git a/drivers/md/dm-cache-policy-mq.c b/drivers/md/dm-cache-policy-mq.c
index 444f0bf10b21..782bf854666a 100644
--- a/drivers/md/dm-cache-policy-mq.c
+++ b/drivers/md/dm-cache-policy-mq.c
@@ -26,19 +26,6 @@ static unsigned next_power(unsigned n, unsigned min)
26 26
27/*----------------------------------------------------------------*/ 27/*----------------------------------------------------------------*/
28 28
29static unsigned long *alloc_bitset(unsigned nr_entries)
30{
31 size_t s = sizeof(unsigned long) * dm_div_up(nr_entries, BITS_PER_LONG);
32 return vzalloc(s);
33}
34
35static void free_bitset(unsigned long *bits)
36{
37 vfree(bits);
38}
39
40/*----------------------------------------------------------------*/
41
42/* 29/*
43 * Large, sequential ios are probably better left on the origin device since 30 * Large, sequential ios are probably better left on the origin device since
44 * spindles tend to have good bandwidth. 31 * spindles tend to have good bandwidth.
@@ -233,18 +220,107 @@ struct entry {
233 struct hlist_node hlist; 220 struct hlist_node hlist;
234 struct list_head list; 221 struct list_head list;
235 dm_oblock_t oblock; 222 dm_oblock_t oblock;
236 dm_cblock_t cblock; /* valid iff in_cache */
237 223
238 /* 224 /*
239 * FIXME: pack these better 225 * FIXME: pack these better
240 */ 226 */
241 bool in_cache:1;
242 bool dirty:1; 227 bool dirty:1;
243 unsigned hit_count; 228 unsigned hit_count;
244 unsigned generation; 229 unsigned generation;
245 unsigned tick; 230 unsigned tick;
246}; 231};
247 232
233/*
234 * Rather than storing the cblock in an entry, we allocate all entries in
235 * an array, and infer the cblock from the entry position.
236 *
237 * Free entries are linked together into a list.
238 */
239struct entry_pool {
240 struct entry *entries, *entries_end;
241 struct list_head free;
242 unsigned nr_allocated;
243};
244
245static int epool_init(struct entry_pool *ep, unsigned nr_entries)
246{
247 unsigned i;
248
249 ep->entries = vzalloc(sizeof(struct entry) * nr_entries);
250 if (!ep->entries)
251 return -ENOMEM;
252
253 ep->entries_end = ep->entries + nr_entries;
254
255 INIT_LIST_HEAD(&ep->free);
256 for (i = 0; i < nr_entries; i++)
257 list_add(&ep->entries[i].list, &ep->free);
258
259 ep->nr_allocated = 0;
260
261 return 0;
262}
263
264static void epool_exit(struct entry_pool *ep)
265{
266 vfree(ep->entries);
267}
268
269static struct entry *alloc_entry(struct entry_pool *ep)
270{
271 struct entry *e;
272
273 if (list_empty(&ep->free))
274 return NULL;
275
276 e = list_entry(list_pop(&ep->free), struct entry, list);
277 INIT_LIST_HEAD(&e->list);
278 INIT_HLIST_NODE(&e->hlist);
279 ep->nr_allocated++;
280
281 return e;
282}
283
284/*
285 * This assumes the cblock hasn't already been allocated.
286 */
287static struct entry *alloc_particular_entry(struct entry_pool *ep, dm_cblock_t cblock)
288{
289 struct entry *e = ep->entries + from_cblock(cblock);
290 list_del(&e->list);
291
292 INIT_LIST_HEAD(&e->list);
293 INIT_HLIST_NODE(&e->hlist);
294 ep->nr_allocated++;
295
296 return e;
297}
298
299static void free_entry(struct entry_pool *ep, struct entry *e)
300{
301 BUG_ON(!ep->nr_allocated);
302 ep->nr_allocated--;
303 INIT_HLIST_NODE(&e->hlist);
304 list_add(&e->list, &ep->free);
305}
306
307static bool epool_empty(struct entry_pool *ep)
308{
309 return list_empty(&ep->free);
310}
311
312static bool in_pool(struct entry_pool *ep, struct entry *e)
313{
314 return e >= ep->entries && e < ep->entries_end;
315}
316
317static dm_cblock_t infer_cblock(struct entry_pool *ep, struct entry *e)
318{
319 return to_cblock(e - ep->entries);
320}
321
322/*----------------------------------------------------------------*/
323
248struct mq_policy { 324struct mq_policy {
249 struct dm_cache_policy policy; 325 struct dm_cache_policy policy;
250 326
@@ -254,6 +330,13 @@ struct mq_policy {
254 struct io_tracker tracker; 330 struct io_tracker tracker;
255 331
256 /* 332 /*
333 * Entries come from two pools, one of pre-cache entries, and one
334 * for the cache proper.
335 */
336 struct entry_pool pre_cache_pool;
337 struct entry_pool cache_pool;
338
339 /*
257 * We maintain three queues of entries. The cache proper, 340 * We maintain three queues of entries. The cache proper,
258 * consisting of a clean and dirty queue, contains the currently 341 * consisting of a clean and dirty queue, contains the currently
259 * active mappings. Whereas the pre_cache tracks blocks that 342 * active mappings. Whereas the pre_cache tracks blocks that
@@ -300,25 +383,6 @@ struct mq_policy {
300 unsigned promote_threshold; 383 unsigned promote_threshold;
301 384
302 /* 385 /*
303 * We need cache_size entries for the cache, and choose to have
304 * cache_size entries for the pre_cache too. One motivation for
305 * using the same size is to make the hit counts directly
306 * comparable between pre_cache and cache.
307 */
308 unsigned nr_entries;
309 unsigned nr_entries_allocated;
310 struct list_head free;
311
312 /*
313 * Cache blocks may be unallocated. We store this info in a
314 * bitset.
315 */
316 unsigned long *allocation_bitset;
317 unsigned nr_cblocks_allocated;
318 unsigned find_free_nr_words;
319 unsigned find_free_last_word;
320
321 /*
322 * The hash table allows us to quickly find an entry by origin 386 * The hash table allows us to quickly find an entry by origin
323 * block. Both pre_cache and cache entries are in here. 387 * block. Both pre_cache and cache entries are in here.
324 */ 388 */
@@ -328,50 +392,6 @@ struct mq_policy {
328}; 392};
329 393
330/*----------------------------------------------------------------*/ 394/*----------------------------------------------------------------*/
331/* Free/alloc mq cache entry structures. */
332static void concat_queue(struct list_head *lh, struct queue *q)
333{
334 unsigned level;
335
336 for (level = 0; level < NR_QUEUE_LEVELS; level++)
337 list_splice(q->qs + level, lh);
338}
339
340static void free_entries(struct mq_policy *mq)
341{
342 struct entry *e, *tmp;
343
344 concat_queue(&mq->free, &mq->pre_cache);
345 concat_queue(&mq->free, &mq->cache_clean);
346 concat_queue(&mq->free, &mq->cache_dirty);
347
348 list_for_each_entry_safe(e, tmp, &mq->free, list)
349 kmem_cache_free(mq_entry_cache, e);
350}
351
352static int alloc_entries(struct mq_policy *mq, unsigned elts)
353{
354 unsigned u = mq->nr_entries;
355
356 INIT_LIST_HEAD(&mq->free);
357 mq->nr_entries_allocated = 0;
358
359 while (u--) {
360 struct entry *e = kmem_cache_zalloc(mq_entry_cache, GFP_KERNEL);
361
362 if (!e) {
363 free_entries(mq);
364 return -ENOMEM;
365 }
366
367
368 list_add(&e->list, &mq->free);
369 }
370
371 return 0;
372}
373
374/*----------------------------------------------------------------*/
375 395
376/* 396/*
377 * Simple hash table implementation. Should replace with the standard hash 397 * Simple hash table implementation. Should replace with the standard hash
@@ -407,54 +427,9 @@ static void hash_remove(struct entry *e)
407 427
408/*----------------------------------------------------------------*/ 428/*----------------------------------------------------------------*/
409 429
410/*
411 * Allocates a new entry structure. The memory is allocated in one lump,
412 * so we just handing it out here. Returns NULL if all entries have
413 * already been allocated. Cannot fail otherwise.
414 */
415static struct entry *alloc_entry(struct mq_policy *mq)
416{
417 struct entry *e;
418
419 if (mq->nr_entries_allocated >= mq->nr_entries) {
420 BUG_ON(!list_empty(&mq->free));
421 return NULL;
422 }
423
424 e = list_entry(list_pop(&mq->free), struct entry, list);
425 INIT_LIST_HEAD(&e->list);
426 INIT_HLIST_NODE(&e->hlist);
427
428 mq->nr_entries_allocated++;
429 return e;
430}
431
432/*----------------------------------------------------------------*/
433
434/*
435 * Mark cache blocks allocated or not in the bitset.
436 */
437static void alloc_cblock(struct mq_policy *mq, dm_cblock_t cblock)
438{
439 BUG_ON(from_cblock(cblock) > from_cblock(mq->cache_size));
440 BUG_ON(test_bit(from_cblock(cblock), mq->allocation_bitset));
441
442 set_bit(from_cblock(cblock), mq->allocation_bitset);
443 mq->nr_cblocks_allocated++;
444}
445
446static void free_cblock(struct mq_policy *mq, dm_cblock_t cblock)
447{
448 BUG_ON(from_cblock(cblock) > from_cblock(mq->cache_size));
449 BUG_ON(!test_bit(from_cblock(cblock), mq->allocation_bitset));
450
451 clear_bit(from_cblock(cblock), mq->allocation_bitset);
452 mq->nr_cblocks_allocated--;
453}
454
455static bool any_free_cblocks(struct mq_policy *mq) 430static bool any_free_cblocks(struct mq_policy *mq)
456{ 431{
457 return mq->nr_cblocks_allocated < from_cblock(mq->cache_size); 432 return !epool_empty(&mq->cache_pool);
458} 433}
459 434
460static bool any_clean_cblocks(struct mq_policy *mq) 435static bool any_clean_cblocks(struct mq_policy *mq)
@@ -462,48 +437,6 @@ static bool any_clean_cblocks(struct mq_policy *mq)
462 return !queue_empty(&mq->cache_clean); 437 return !queue_empty(&mq->cache_clean);
463} 438}
464 439
465/*
466 * Fills result out with a cache block that isn't in use, or return
467 * -ENOSPC. This does _not_ mark the cblock as allocated, the caller is
468 * reponsible for that.
469 */
470static int __find_free_cblock(struct mq_policy *mq, unsigned begin, unsigned end,
471 dm_cblock_t *result, unsigned *last_word)
472{
473 int r = -ENOSPC;
474 unsigned w;
475
476 for (w = begin; w < end; w++) {
477 /*
478 * ffz is undefined if no zero exists
479 */
480 if (mq->allocation_bitset[w] != ~0UL) {
481 *last_word = w;
482 *result = to_cblock((w * BITS_PER_LONG) + ffz(mq->allocation_bitset[w]));
483 if (from_cblock(*result) < from_cblock(mq->cache_size))
484 r = 0;
485
486 break;
487 }
488 }
489
490 return r;
491}
492
493static int find_free_cblock(struct mq_policy *mq, dm_cblock_t *result)
494{
495 int r;
496
497 if (!any_free_cblocks(mq))
498 return -ENOSPC;
499
500 r = __find_free_cblock(mq, mq->find_free_last_word, mq->find_free_nr_words, result, &mq->find_free_last_word);
501 if (r == -ENOSPC && mq->find_free_last_word)
502 r = __find_free_cblock(mq, 0, mq->find_free_last_word, result, &mq->find_free_last_word);
503
504 return r;
505}
506
507/*----------------------------------------------------------------*/ 440/*----------------------------------------------------------------*/
508 441
509/* 442/*
@@ -520,34 +453,35 @@ static unsigned queue_level(struct entry *e)
520 return min((unsigned) ilog2(e->hit_count), NR_QUEUE_LEVELS - 1u); 453 return min((unsigned) ilog2(e->hit_count), NR_QUEUE_LEVELS - 1u);
521} 454}
522 455
456static bool in_cache(struct mq_policy *mq, struct entry *e)
457{
458 return in_pool(&mq->cache_pool, e);
459}
460
523/* 461/*
524 * Inserts the entry into the pre_cache or the cache. Ensures the cache 462 * Inserts the entry into the pre_cache or the cache. Ensures the cache
525 * block is marked as allocated if necc. Inserts into the hash table. Sets the 463 * block is marked as allocated if necc. Inserts into the hash table.
526 * tick which records when the entry was last moved about. 464 * Sets the tick which records when the entry was last moved about.
527 */ 465 */
528static void push(struct mq_policy *mq, struct entry *e) 466static void push(struct mq_policy *mq, struct entry *e)
529{ 467{
530 e->tick = mq->tick; 468 e->tick = mq->tick;
531 hash_insert(mq, e); 469 hash_insert(mq, e);
532 470
533 if (e->in_cache) { 471 if (in_cache(mq, e))
534 alloc_cblock(mq, e->cblock);
535 queue_push(e->dirty ? &mq->cache_dirty : &mq->cache_clean, 472 queue_push(e->dirty ? &mq->cache_dirty : &mq->cache_clean,
536 queue_level(e), &e->list); 473 queue_level(e), &e->list);
537 } else 474 else
538 queue_push(&mq->pre_cache, queue_level(e), &e->list); 475 queue_push(&mq->pre_cache, queue_level(e), &e->list);
539} 476}
540 477
541/* 478/*
542 * Removes an entry from pre_cache or cache. Removes from the hash table. 479 * Removes an entry from pre_cache or cache. Removes from the hash table.
543 * Frees off the cache block if necc.
544 */ 480 */
545static void del(struct mq_policy *mq, struct entry *e) 481static void del(struct mq_policy *mq, struct entry *e)
546{ 482{
547 queue_remove(&e->list); 483 queue_remove(&e->list);
548 hash_remove(e); 484 hash_remove(e);
549 if (e->in_cache)
550 free_cblock(mq, e->cblock);
551} 485}
552 486
553/* 487/*
@@ -564,8 +498,6 @@ static struct entry *pop(struct mq_policy *mq, struct queue *q)
564 498
565 e = container_of(h, struct entry, list); 499 e = container_of(h, struct entry, list);
566 hash_remove(e); 500 hash_remove(e);
567 if (e->in_cache)
568 free_cblock(mq, e->cblock);
569 501
570 return e; 502 return e;
571} 503}
@@ -599,9 +531,7 @@ static void check_generation(struct mq_policy *mq)
599 struct list_head *head; 531 struct list_head *head;
600 struct entry *e; 532 struct entry *e;
601 533
602 if ((mq->hit_count >= mq->generation_period) && 534 if ((mq->hit_count >= mq->generation_period) && (epool_empty(&mq->cache_pool))) {
603 (mq->nr_cblocks_allocated == from_cblock(mq->cache_size))) {
604
605 mq->hit_count = 0; 535 mq->hit_count = 0;
606 mq->generation++; 536 mq->generation++;
607 537
@@ -668,7 +598,7 @@ static void requeue_and_update_tick(struct mq_policy *mq, struct entry *e)
668 * - set the hit count to a hard coded value other than 1, eg, is it better 598 * - set the hit count to a hard coded value other than 1, eg, is it better
669 * if it goes in at level 2? 599 * if it goes in at level 2?
670 */ 600 */
671static int demote_cblock(struct mq_policy *mq, dm_oblock_t *oblock, dm_cblock_t *cblock) 601static int demote_cblock(struct mq_policy *mq, dm_oblock_t *oblock)
672{ 602{
673 struct entry *demoted = pop(mq, &mq->cache_clean); 603 struct entry *demoted = pop(mq, &mq->cache_clean);
674 604
@@ -682,12 +612,14 @@ static int demote_cblock(struct mq_policy *mq, dm_oblock_t *oblock, dm_cblock_t
682 */ 612 */
683 return -ENOSPC; 613 return -ENOSPC;
684 614
685 *cblock = demoted->cblock;
686 *oblock = demoted->oblock; 615 *oblock = demoted->oblock;
687 demoted->in_cache = false; 616 free_entry(&mq->cache_pool, demoted);
688 demoted->dirty = false; 617
689 demoted->hit_count = 1; 618 /*
690 push(mq, demoted); 619 * We used to put the demoted block into the pre-cache, but I think
620 * it's simpler to just let it work it's way up from zero again.
621 * Stops blocks flickering in and out of the cache.
622 */
691 623
692 return 0; 624 return 0;
693} 625}
@@ -735,9 +667,9 @@ static int cache_entry_found(struct mq_policy *mq,
735{ 667{
736 requeue_and_update_tick(mq, e); 668 requeue_and_update_tick(mq, e);
737 669
738 if (e->in_cache) { 670 if (in_cache(mq, e)) {
739 result->op = POLICY_HIT; 671 result->op = POLICY_HIT;
740 result->cblock = e->cblock; 672 result->cblock = infer_cblock(&mq->cache_pool, e);
741 } 673 }
742 674
743 return 0; 675 return 0;
@@ -751,11 +683,12 @@ static int pre_cache_to_cache(struct mq_policy *mq, struct entry *e,
751 struct policy_result *result) 683 struct policy_result *result)
752{ 684{
753 int r; 685 int r;
754 dm_cblock_t cblock; 686 struct entry *new_e;
755 687
756 if (find_free_cblock(mq, &cblock) == -ENOSPC) { 688 /* Ensure there's a free cblock in the cache */
689 if (epool_empty(&mq->cache_pool)) {
757 result->op = POLICY_REPLACE; 690 result->op = POLICY_REPLACE;
758 r = demote_cblock(mq, &result->old_oblock, &cblock); 691 r = demote_cblock(mq, &result->old_oblock);
759 if (r) { 692 if (r) {
760 result->op = POLICY_MISS; 693 result->op = POLICY_MISS;
761 return 0; 694 return 0;
@@ -763,12 +696,20 @@ static int pre_cache_to_cache(struct mq_policy *mq, struct entry *e,
763 } else 696 } else
764 result->op = POLICY_NEW; 697 result->op = POLICY_NEW;
765 698
766 result->cblock = e->cblock = cblock; 699 new_e = alloc_entry(&mq->cache_pool);
700 BUG_ON(!new_e);
701
702 new_e->oblock = e->oblock;
703 new_e->dirty = false;
704 new_e->hit_count = e->hit_count;
705 new_e->generation = e->generation;
706 new_e->tick = e->tick;
767 707
768 del(mq, e); 708 del(mq, e);
769 e->in_cache = true; 709 free_entry(&mq->pre_cache_pool, e);
770 e->dirty = false; 710 push(mq, new_e);
771 push(mq, e); 711
712 result->cblock = infer_cblock(&mq->cache_pool, new_e);
772 713
773 return 0; 714 return 0;
774} 715}
@@ -793,21 +734,10 @@ static int pre_cache_entry_found(struct mq_policy *mq, struct entry *e,
793 return r; 734 return r;
794} 735}
795 736
796static void insert_entry_in_pre_cache(struct mq_policy *mq,
797 struct entry *e, dm_oblock_t oblock)
798{
799 e->in_cache = false;
800 e->dirty = false;
801 e->oblock = oblock;
802 e->hit_count = 1;
803 e->generation = mq->generation;
804 push(mq, e);
805}
806
807static void insert_in_pre_cache(struct mq_policy *mq, 737static void insert_in_pre_cache(struct mq_policy *mq,
808 dm_oblock_t oblock) 738 dm_oblock_t oblock)
809{ 739{
810 struct entry *e = alloc_entry(mq); 740 struct entry *e = alloc_entry(&mq->pre_cache_pool);
811 741
812 if (!e) 742 if (!e)
813 /* 743 /*
@@ -821,7 +751,11 @@ static void insert_in_pre_cache(struct mq_policy *mq,
821 return; 751 return;
822 } 752 }
823 753
824 insert_entry_in_pre_cache(mq, e, oblock); 754 e->dirty = false;
755 e->oblock = oblock;
756 e->hit_count = 1;
757 e->generation = mq->generation;
758 push(mq, e);
825} 759}
826 760
827static void insert_in_cache(struct mq_policy *mq, dm_oblock_t oblock, 761static void insert_in_cache(struct mq_policy *mq, dm_oblock_t oblock,
@@ -829,10 +763,10 @@ static void insert_in_cache(struct mq_policy *mq, dm_oblock_t oblock,
829{ 763{
830 int r; 764 int r;
831 struct entry *e; 765 struct entry *e;
832 dm_cblock_t cblock;
833 766
834 if (find_free_cblock(mq, &cblock) == -ENOSPC) { 767 if (epool_empty(&mq->cache_pool)) {
835 r = demote_cblock(mq, &result->old_oblock, &cblock); 768 result->op = POLICY_REPLACE;
769 r = demote_cblock(mq, &result->old_oblock);
836 if (unlikely(r)) { 770 if (unlikely(r)) {
837 result->op = POLICY_MISS; 771 result->op = POLICY_MISS;
838 insert_in_pre_cache(mq, oblock); 772 insert_in_pre_cache(mq, oblock);
@@ -842,31 +776,21 @@ static void insert_in_cache(struct mq_policy *mq, dm_oblock_t oblock,
842 /* 776 /*
843 * This will always succeed, since we've just demoted. 777 * This will always succeed, since we've just demoted.
844 */ 778 */
845 e = pop(mq, &mq->pre_cache); 779 e = alloc_entry(&mq->cache_pool);
846 result->op = POLICY_REPLACE; 780 BUG_ON(!e);
847 781
848 } else { 782 } else {
849 e = alloc_entry(mq); 783 e = alloc_entry(&mq->cache_pool);
850 if (unlikely(!e))
851 e = pop(mq, &mq->pre_cache);
852
853 if (unlikely(!e)) {
854 result->op = POLICY_MISS;
855 return;
856 }
857
858 result->op = POLICY_NEW; 784 result->op = POLICY_NEW;
859 } 785 }
860 786
861 e->oblock = oblock; 787 e->oblock = oblock;
862 e->cblock = cblock;
863 e->in_cache = true;
864 e->dirty = false; 788 e->dirty = false;
865 e->hit_count = 1; 789 e->hit_count = 1;
866 e->generation = mq->generation; 790 e->generation = mq->generation;
867 push(mq, e); 791 push(mq, e);
868 792
869 result->cblock = e->cblock; 793 result->cblock = infer_cblock(&mq->cache_pool, e);
870} 794}
871 795
872static int no_entry_found(struct mq_policy *mq, dm_oblock_t oblock, 796static int no_entry_found(struct mq_policy *mq, dm_oblock_t oblock,
@@ -897,13 +821,16 @@ static int map(struct mq_policy *mq, dm_oblock_t oblock,
897 int r = 0; 821 int r = 0;
898 struct entry *e = hash_lookup(mq, oblock); 822 struct entry *e = hash_lookup(mq, oblock);
899 823
900 if (e && e->in_cache) 824 if (e && in_cache(mq, e))
901 r = cache_entry_found(mq, e, result); 825 r = cache_entry_found(mq, e, result);
826
902 else if (iot_pattern(&mq->tracker) == PATTERN_SEQUENTIAL) 827 else if (iot_pattern(&mq->tracker) == PATTERN_SEQUENTIAL)
903 result->op = POLICY_MISS; 828 result->op = POLICY_MISS;
829
904 else if (e) 830 else if (e)
905 r = pre_cache_entry_found(mq, e, can_migrate, discarded_oblock, 831 r = pre_cache_entry_found(mq, e, can_migrate, discarded_oblock,
906 data_dir, result); 832 data_dir, result);
833
907 else 834 else
908 r = no_entry_found(mq, oblock, can_migrate, discarded_oblock, 835 r = no_entry_found(mq, oblock, can_migrate, discarded_oblock,
909 data_dir, result); 836 data_dir, result);
@@ -930,9 +857,9 @@ static void mq_destroy(struct dm_cache_policy *p)
930{ 857{
931 struct mq_policy *mq = to_mq_policy(p); 858 struct mq_policy *mq = to_mq_policy(p);
932 859
933 free_bitset(mq->allocation_bitset);
934 kfree(mq->table); 860 kfree(mq->table);
935 free_entries(mq); 861 epool_exit(&mq->cache_pool);
862 epool_exit(&mq->pre_cache_pool);
936 kfree(mq); 863 kfree(mq);
937} 864}
938 865
@@ -980,8 +907,8 @@ static int mq_lookup(struct dm_cache_policy *p, dm_oblock_t oblock, dm_cblock_t
980 return -EWOULDBLOCK; 907 return -EWOULDBLOCK;
981 908
982 e = hash_lookup(mq, oblock); 909 e = hash_lookup(mq, oblock);
983 if (e && e->in_cache) { 910 if (e && in_cache(mq, e)) {
984 *cblock = e->cblock; 911 *cblock = infer_cblock(&mq->cache_pool, e);
985 r = 0; 912 r = 0;
986 } else 913 } else
987 r = -ENOENT; 914 r = -ENOENT;
@@ -991,38 +918,34 @@ static int mq_lookup(struct dm_cache_policy *p, dm_oblock_t oblock, dm_cblock_t
991 return r; 918 return r;
992} 919}
993 920
994/* 921static void __mq_set_clear_dirty(struct mq_policy *mq, dm_oblock_t oblock, bool set)
995 * FIXME: __mq_set_clear_dirty can block due to mutex.
996 * Ideally a policy should not block in functions called
997 * from the map() function. Explore using RCU.
998 */
999static void __mq_set_clear_dirty(struct dm_cache_policy *p, dm_oblock_t oblock, bool set)
1000{ 922{
1001 struct mq_policy *mq = to_mq_policy(p);
1002 struct entry *e; 923 struct entry *e;
1003 924
1004 mutex_lock(&mq->lock);
1005 e = hash_lookup(mq, oblock); 925 e = hash_lookup(mq, oblock);
1006 if (!e) 926 BUG_ON(!e || !in_cache(mq, e));
1007 DMWARN("__mq_set_clear_dirty called for a block that isn't in the cache");
1008 else {
1009 BUG_ON(!e->in_cache);
1010 927
1011 del(mq, e); 928 del(mq, e);
1012 e->dirty = set; 929 e->dirty = set;
1013 push(mq, e); 930 push(mq, e);
1014 }
1015 mutex_unlock(&mq->lock);
1016} 931}
1017 932
1018static void mq_set_dirty(struct dm_cache_policy *p, dm_oblock_t oblock) 933static void mq_set_dirty(struct dm_cache_policy *p, dm_oblock_t oblock)
1019{ 934{
1020 __mq_set_clear_dirty(p, oblock, true); 935 struct mq_policy *mq = to_mq_policy(p);
936
937 mutex_lock(&mq->lock);
938 __mq_set_clear_dirty(mq, oblock, true);
939 mutex_unlock(&mq->lock);
1021} 940}
1022 941
1023static void mq_clear_dirty(struct dm_cache_policy *p, dm_oblock_t oblock) 942static void mq_clear_dirty(struct dm_cache_policy *p, dm_oblock_t oblock)
1024{ 943{
1025 __mq_set_clear_dirty(p, oblock, false); 944 struct mq_policy *mq = to_mq_policy(p);
945
946 mutex_lock(&mq->lock);
947 __mq_set_clear_dirty(mq, oblock, false);
948 mutex_unlock(&mq->lock);
1026} 949}
1027 950
1028static int mq_load_mapping(struct dm_cache_policy *p, 951static int mq_load_mapping(struct dm_cache_policy *p,
@@ -1032,13 +955,8 @@ static int mq_load_mapping(struct dm_cache_policy *p,
1032 struct mq_policy *mq = to_mq_policy(p); 955 struct mq_policy *mq = to_mq_policy(p);
1033 struct entry *e; 956 struct entry *e;
1034 957
1035 e = alloc_entry(mq); 958 e = alloc_particular_entry(&mq->cache_pool, cblock);
1036 if (!e)
1037 return -ENOMEM;
1038
1039 e->cblock = cblock;
1040 e->oblock = oblock; 959 e->oblock = oblock;
1041 e->in_cache = true;
1042 e->dirty = false; /* this gets corrected in a minute */ 960 e->dirty = false; /* this gets corrected in a minute */
1043 e->hit_count = hint_valid ? hint : 1; 961 e->hit_count = hint_valid ? hint : 1;
1044 e->generation = mq->generation; 962 e->generation = mq->generation;
@@ -1047,52 +965,58 @@ static int mq_load_mapping(struct dm_cache_policy *p,
1047 return 0; 965 return 0;
1048} 966}
1049 967
968static int mq_save_hints(struct mq_policy *mq, struct queue *q,
969 policy_walk_fn fn, void *context)
970{
971 int r;
972 unsigned level;
973 struct entry *e;
974
975 for (level = 0; level < NR_QUEUE_LEVELS; level++)
976 list_for_each_entry(e, q->qs + level, list) {
977 r = fn(context, infer_cblock(&mq->cache_pool, e),
978 e->oblock, e->hit_count);
979 if (r)
980 return r;
981 }
982
983 return 0;
984}
985
1050static int mq_walk_mappings(struct dm_cache_policy *p, policy_walk_fn fn, 986static int mq_walk_mappings(struct dm_cache_policy *p, policy_walk_fn fn,
1051 void *context) 987 void *context)
1052{ 988{
1053 struct mq_policy *mq = to_mq_policy(p); 989 struct mq_policy *mq = to_mq_policy(p);
1054 int r = 0; 990 int r = 0;
1055 struct entry *e;
1056 unsigned level;
1057 991
1058 mutex_lock(&mq->lock); 992 mutex_lock(&mq->lock);
1059 993
1060 for (level = 0; level < NR_QUEUE_LEVELS; level++) 994 r = mq_save_hints(mq, &mq->cache_clean, fn, context);
1061 list_for_each_entry(e, &mq->cache_clean.qs[level], list) { 995 if (!r)
1062 r = fn(context, e->cblock, e->oblock, e->hit_count); 996 r = mq_save_hints(mq, &mq->cache_dirty, fn, context);
1063 if (r)
1064 goto out;
1065 }
1066
1067 for (level = 0; level < NR_QUEUE_LEVELS; level++)
1068 list_for_each_entry(e, &mq->cache_dirty.qs[level], list) {
1069 r = fn(context, e->cblock, e->oblock, e->hit_count);
1070 if (r)
1071 goto out;
1072 }
1073 997
1074out:
1075 mutex_unlock(&mq->lock); 998 mutex_unlock(&mq->lock);
1076 999
1077 return r; 1000 return r;
1078} 1001}
1079 1002
1080static void mq_remove_mapping(struct dm_cache_policy *p, dm_oblock_t oblock) 1003static void __remove_mapping(struct mq_policy *mq, dm_oblock_t oblock)
1081{ 1004{
1082 struct mq_policy *mq = to_mq_policy(p);
1083 struct entry *e; 1005 struct entry *e;
1084 1006
1085 mutex_lock(&mq->lock);
1086
1087 e = hash_lookup(mq, oblock); 1007 e = hash_lookup(mq, oblock);
1088 1008 BUG_ON(!e || !in_cache(mq, e));
1089 BUG_ON(!e || !e->in_cache);
1090 1009
1091 del(mq, e); 1010 del(mq, e);
1092 e->in_cache = false; 1011 free_entry(&mq->cache_pool, e);
1093 e->dirty = false; 1012}
1094 push(mq, e); 1013
1014static void mq_remove_mapping(struct dm_cache_policy *p, dm_oblock_t oblock)
1015{
1016 struct mq_policy *mq = to_mq_policy(p);
1095 1017
1018 mutex_lock(&mq->lock);
1019 __remove_mapping(mq, oblock);
1096 mutex_unlock(&mq->lock); 1020 mutex_unlock(&mq->lock);
1097} 1021}
1098 1022
@@ -1105,7 +1029,7 @@ static int __mq_writeback_work(struct mq_policy *mq, dm_oblock_t *oblock,
1105 return -ENODATA; 1029 return -ENODATA;
1106 1030
1107 *oblock = e->oblock; 1031 *oblock = e->oblock;
1108 *cblock = e->cblock; 1032 *cblock = infer_cblock(&mq->cache_pool, e);
1109 e->dirty = false; 1033 e->dirty = false;
1110 push(mq, e); 1034 push(mq, e);
1111 1035
@@ -1125,17 +1049,17 @@ static int mq_writeback_work(struct dm_cache_policy *p, dm_oblock_t *oblock,
1125 return r; 1049 return r;
1126} 1050}
1127 1051
1128static void force_mapping(struct mq_policy *mq, 1052static void __force_mapping(struct mq_policy *mq,
1129 dm_oblock_t current_oblock, dm_oblock_t new_oblock) 1053 dm_oblock_t current_oblock, dm_oblock_t new_oblock)
1130{ 1054{
1131 struct entry *e = hash_lookup(mq, current_oblock); 1055 struct entry *e = hash_lookup(mq, current_oblock);
1132 1056
1133 BUG_ON(!e || !e->in_cache); 1057 if (e && in_cache(mq, e)) {
1134 1058 del(mq, e);
1135 del(mq, e); 1059 e->oblock = new_oblock;
1136 e->oblock = new_oblock; 1060 e->dirty = true;
1137 e->dirty = true; 1061 push(mq, e);
1138 push(mq, e); 1062 }
1139} 1063}
1140 1064
1141static void mq_force_mapping(struct dm_cache_policy *p, 1065static void mq_force_mapping(struct dm_cache_policy *p,
@@ -1144,7 +1068,7 @@ static void mq_force_mapping(struct dm_cache_policy *p,
1144 struct mq_policy *mq = to_mq_policy(p); 1068 struct mq_policy *mq = to_mq_policy(p);
1145 1069
1146 mutex_lock(&mq->lock); 1070 mutex_lock(&mq->lock);
1147 force_mapping(mq, current_oblock, new_oblock); 1071 __force_mapping(mq, current_oblock, new_oblock);
1148 mutex_unlock(&mq->lock); 1072 mutex_unlock(&mq->lock);
1149} 1073}
1150 1074
@@ -1154,7 +1078,7 @@ static dm_cblock_t mq_residency(struct dm_cache_policy *p)
1154 struct mq_policy *mq = to_mq_policy(p); 1078 struct mq_policy *mq = to_mq_policy(p);
1155 1079
1156 mutex_lock(&mq->lock); 1080 mutex_lock(&mq->lock);
1157 r = to_cblock(mq->nr_cblocks_allocated); 1081 r = to_cblock(mq->cache_pool.nr_allocated);
1158 mutex_unlock(&mq->lock); 1082 mutex_unlock(&mq->lock);
1159 1083
1160 return r; 1084 return r;
@@ -1227,7 +1151,6 @@ static struct dm_cache_policy *mq_create(dm_cblock_t cache_size,
1227 sector_t origin_size, 1151 sector_t origin_size,
1228 sector_t cache_block_size) 1152 sector_t cache_block_size)
1229{ 1153{
1230 int r;
1231 struct mq_policy *mq = kzalloc(sizeof(*mq), GFP_KERNEL); 1154 struct mq_policy *mq = kzalloc(sizeof(*mq), GFP_KERNEL);
1232 1155
1233 if (!mq) 1156 if (!mq)
@@ -1235,8 +1158,18 @@ static struct dm_cache_policy *mq_create(dm_cblock_t cache_size,
1235 1158
1236 init_policy_functions(mq); 1159 init_policy_functions(mq);
1237 iot_init(&mq->tracker, SEQUENTIAL_THRESHOLD_DEFAULT, RANDOM_THRESHOLD_DEFAULT); 1160 iot_init(&mq->tracker, SEQUENTIAL_THRESHOLD_DEFAULT, RANDOM_THRESHOLD_DEFAULT);
1238
1239 mq->cache_size = cache_size; 1161 mq->cache_size = cache_size;
1162
1163 if (epool_init(&mq->pre_cache_pool, from_cblock(cache_size))) {
1164 DMERR("couldn't initialize pool of pre-cache entries");
1165 goto bad_pre_cache_init;
1166 }
1167
1168 if (epool_init(&mq->cache_pool, from_cblock(cache_size))) {
1169 DMERR("couldn't initialize pool of cache entries");
1170 goto bad_cache_init;
1171 }
1172
1240 mq->tick_protected = 0; 1173 mq->tick_protected = 0;
1241 mq->tick = 0; 1174 mq->tick = 0;
1242 mq->hit_count = 0; 1175 mq->hit_count = 0;
@@ -1244,8 +1177,6 @@ static struct dm_cache_policy *mq_create(dm_cblock_t cache_size,
1244 mq->promote_threshold = 0; 1177 mq->promote_threshold = 0;
1245 mutex_init(&mq->lock); 1178 mutex_init(&mq->lock);
1246 spin_lock_init(&mq->tick_lock); 1179 spin_lock_init(&mq->tick_lock);
1247 mq->find_free_nr_words = dm_div_up(from_cblock(mq->cache_size), BITS_PER_LONG);
1248 mq->find_free_last_word = 0;
1249 1180
1250 queue_init(&mq->pre_cache); 1181 queue_init(&mq->pre_cache);
1251 queue_init(&mq->cache_clean); 1182 queue_init(&mq->cache_clean);
@@ -1253,31 +1184,19 @@ static struct dm_cache_policy *mq_create(dm_cblock_t cache_size,
1253 1184
1254 mq->generation_period = max((unsigned) from_cblock(cache_size), 1024U); 1185 mq->generation_period = max((unsigned) from_cblock(cache_size), 1024U);
1255 1186
1256 mq->nr_entries = 2 * from_cblock(cache_size);
1257 r = alloc_entries(mq, mq->nr_entries);
1258 if (r)
1259 goto bad_cache_alloc;
1260
1261 mq->nr_entries_allocated = 0;
1262 mq->nr_cblocks_allocated = 0;
1263
1264 mq->nr_buckets = next_power(from_cblock(cache_size) / 2, 16); 1187 mq->nr_buckets = next_power(from_cblock(cache_size) / 2, 16);
1265 mq->hash_bits = ffs(mq->nr_buckets) - 1; 1188 mq->hash_bits = ffs(mq->nr_buckets) - 1;
1266 mq->table = kzalloc(sizeof(*mq->table) * mq->nr_buckets, GFP_KERNEL); 1189 mq->table = kzalloc(sizeof(*mq->table) * mq->nr_buckets, GFP_KERNEL);
1267 if (!mq->table) 1190 if (!mq->table)
1268 goto bad_alloc_table; 1191 goto bad_alloc_table;
1269 1192
1270 mq->allocation_bitset = alloc_bitset(from_cblock(cache_size));
1271 if (!mq->allocation_bitset)
1272 goto bad_alloc_bitset;
1273
1274 return &mq->policy; 1193 return &mq->policy;
1275 1194
1276bad_alloc_bitset:
1277 kfree(mq->table);
1278bad_alloc_table: 1195bad_alloc_table:
1279 free_entries(mq); 1196 epool_exit(&mq->cache_pool);
1280bad_cache_alloc: 1197bad_cache_init:
1198 epool_exit(&mq->pre_cache_pool);
1199bad_pre_cache_init:
1281 kfree(mq); 1200 kfree(mq);
1282 1201
1283 return NULL; 1202 return NULL;
@@ -1287,7 +1206,7 @@ bad_cache_alloc:
1287 1206
1288static struct dm_cache_policy_type mq_policy_type = { 1207static struct dm_cache_policy_type mq_policy_type = {
1289 .name = "mq", 1208 .name = "mq",
1290 .version = {1, 0, 0}, 1209 .version = {1, 1, 0},
1291 .hint_size = 4, 1210 .hint_size = 4,
1292 .owner = THIS_MODULE, 1211 .owner = THIS_MODULE,
1293 .create = mq_create 1212 .create = mq_create
@@ -1295,7 +1214,7 @@ static struct dm_cache_policy_type mq_policy_type = {
1295 1214
1296static struct dm_cache_policy_type default_policy_type = { 1215static struct dm_cache_policy_type default_policy_type = {
1297 .name = "default", 1216 .name = "default",
1298 .version = {1, 0, 0}, 1217 .version = {1, 1, 0},
1299 .hint_size = 4, 1218 .hint_size = 4,
1300 .owner = THIS_MODULE, 1219 .owner = THIS_MODULE,
1301 .create = mq_create 1220 .create = mq_create