aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/staging
diff options
context:
space:
mode:
authorDan Magenheimer <dan.magenheimer@oracle.com>2011-07-07 10:37:19 -0400
committerGreg Kroah-Hartman <gregkh@suse.de>2011-07-08 17:18:53 -0400
commit966b9016a175f0c2a555e937fb918fd845e4b2cc (patch)
treece8f7df6269bc77a894c095eb8f9d168d99dcd87 /drivers/staging
parent94c97e8e0692ee3a58868a013b973fcf7fed348c (diff)
staging: zcache: support multiple clients, prep for KVM and RAMster
This is version 3 of an update to zcache, incorporating feedback from the list. This patch adds support to the in-kernel transcendent memory ("tmem") code and the zcache driver for multiple clients, which will be needed for both RAMster and KVM support. It also adds additional tmem callbacks to support RAMster and corresponding no-op stubs in the zcache driver. In v2, I've also taken the liberty of adding some additional sysfs variables to both surface information and allow policy control. Those experimenting with zcache should find them useful. V3 clarifies some code walking and declaring arrays. Signed-off-by: Dan Magenheimer <dan.magenheimer@oracle.com> [v3: error27@gmail.com: fix array bounds/walking] [v2: konrad.wilk@oracle.com: fix bools, add check for NULL, fix a comment] [v2: sjenning@linux.vnet.ibm.com: add info/tunables for poor compression] [v2: marcusklemm@googlemail.com: add tunable for max persistent pages] Acked-by: Dan Carpenter <error27@gmail.com> Cc: Nitin Gupta <ngupta@vflare.org> Cc: linux-mm@kvack.org Cc: kvm@vger.kernel.org Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
Diffstat (limited to 'drivers/staging')
-rw-r--r--drivers/staging/zcache/tmem.c100
-rw-r--r--drivers/staging/zcache/tmem.h23
-rw-r--r--drivers/staging/zcache/zcache.c517
3 files changed, 523 insertions, 117 deletions
diff --git a/drivers/staging/zcache/tmem.c b/drivers/staging/zcache/tmem.c
index e954d405b138..975e34bcd722 100644
--- a/drivers/staging/zcache/tmem.c
+++ b/drivers/staging/zcache/tmem.c
@@ -142,6 +142,7 @@ static void tmem_obj_init(struct tmem_obj *obj, struct tmem_hashbucket *hb,
142 obj->oid = *oidp; 142 obj->oid = *oidp;
143 obj->objnode_count = 0; 143 obj->objnode_count = 0;
144 obj->pampd_count = 0; 144 obj->pampd_count = 0;
145 (*tmem_pamops.new_obj)(obj);
145 SET_SENTINEL(obj, OBJ); 146 SET_SENTINEL(obj, OBJ);
146 while (*new) { 147 while (*new) {
147 BUG_ON(RB_EMPTY_NODE(*new)); 148 BUG_ON(RB_EMPTY_NODE(*new));
@@ -274,7 +275,7 @@ static void tmem_objnode_free(struct tmem_objnode *objnode)
274/* 275/*
275 * lookup index in object and return associated pampd (or NULL if not found) 276 * lookup index in object and return associated pampd (or NULL if not found)
276 */ 277 */
277static void *tmem_pampd_lookup_in_obj(struct tmem_obj *obj, uint32_t index) 278static void **__tmem_pampd_lookup_in_obj(struct tmem_obj *obj, uint32_t index)
278{ 279{
279 unsigned int height, shift; 280 unsigned int height, shift;
280 struct tmem_objnode **slot = NULL; 281 struct tmem_objnode **slot = NULL;
@@ -303,9 +304,33 @@ static void *tmem_pampd_lookup_in_obj(struct tmem_obj *obj, uint32_t index)
303 height--; 304 height--;
304 } 305 }
305out: 306out:
307 return slot != NULL ? (void **)slot : NULL;
308}
309
310static void *tmem_pampd_lookup_in_obj(struct tmem_obj *obj, uint32_t index)
311{
312 struct tmem_objnode **slot;
313
314 slot = (struct tmem_objnode **)__tmem_pampd_lookup_in_obj(obj, index);
306 return slot != NULL ? *slot : NULL; 315 return slot != NULL ? *slot : NULL;
307} 316}
308 317
318static void *tmem_pampd_replace_in_obj(struct tmem_obj *obj, uint32_t index,
319 void *new_pampd)
320{
321 struct tmem_objnode **slot;
322 void *ret = NULL;
323
324 slot = (struct tmem_objnode **)__tmem_pampd_lookup_in_obj(obj, index);
325 if ((slot != NULL) && (*slot != NULL)) {
326 void *old_pampd = *(void **)slot;
327 *(void **)slot = new_pampd;
328 (*tmem_pamops.free)(old_pampd, obj->pool, NULL, 0);
329 ret = new_pampd;
330 }
331 return ret;
332}
333
309static int tmem_pampd_add_to_obj(struct tmem_obj *obj, uint32_t index, 334static int tmem_pampd_add_to_obj(struct tmem_obj *obj, uint32_t index,
310 void *pampd) 335 void *pampd)
311{ 336{
@@ -456,7 +481,7 @@ static void tmem_objnode_node_destroy(struct tmem_obj *obj,
456 if (ht == 1) { 481 if (ht == 1) {
457 obj->pampd_count--; 482 obj->pampd_count--;
458 (*tmem_pamops.free)(objnode->slots[i], 483 (*tmem_pamops.free)(objnode->slots[i],
459 obj->pool); 484 obj->pool, NULL, 0);
460 objnode->slots[i] = NULL; 485 objnode->slots[i] = NULL;
461 continue; 486 continue;
462 } 487 }
@@ -473,7 +498,7 @@ static void tmem_pampd_destroy_all_in_obj(struct tmem_obj *obj)
473 return; 498 return;
474 if (obj->objnode_tree_height == 0) { 499 if (obj->objnode_tree_height == 0) {
475 obj->pampd_count--; 500 obj->pampd_count--;
476 (*tmem_pamops.free)(obj->objnode_tree_root, obj->pool); 501 (*tmem_pamops.free)(obj->objnode_tree_root, obj->pool, NULL, 0);
477 } else { 502 } else {
478 tmem_objnode_node_destroy(obj, obj->objnode_tree_root, 503 tmem_objnode_node_destroy(obj, obj->objnode_tree_root,
479 obj->objnode_tree_height); 504 obj->objnode_tree_height);
@@ -481,6 +506,7 @@ static void tmem_pampd_destroy_all_in_obj(struct tmem_obj *obj)
481 obj->objnode_tree_height = 0; 506 obj->objnode_tree_height = 0;
482 } 507 }
483 obj->objnode_tree_root = NULL; 508 obj->objnode_tree_root = NULL;
509 (*tmem_pamops.free_obj)(obj->pool, obj);
484} 510}
485 511
486/* 512/*
@@ -503,15 +529,13 @@ static void tmem_pampd_destroy_all_in_obj(struct tmem_obj *obj)
503 * always flushes for simplicity. 529 * always flushes for simplicity.
504 */ 530 */
505int tmem_put(struct tmem_pool *pool, struct tmem_oid *oidp, uint32_t index, 531int tmem_put(struct tmem_pool *pool, struct tmem_oid *oidp, uint32_t index,
506 struct page *page) 532 char *data, size_t size, bool raw, bool ephemeral)
507{ 533{
508 struct tmem_obj *obj = NULL, *objfound = NULL, *objnew = NULL; 534 struct tmem_obj *obj = NULL, *objfound = NULL, *objnew = NULL;
509 void *pampd = NULL, *pampd_del = NULL; 535 void *pampd = NULL, *pampd_del = NULL;
510 int ret = -ENOMEM; 536 int ret = -ENOMEM;
511 bool ephemeral;
512 struct tmem_hashbucket *hb; 537 struct tmem_hashbucket *hb;
513 538
514 ephemeral = is_ephemeral(pool);
515 hb = &pool->hashbucket[tmem_oid_hash(oidp)]; 539 hb = &pool->hashbucket[tmem_oid_hash(oidp)];
516 spin_lock(&hb->lock); 540 spin_lock(&hb->lock);
517 obj = objfound = tmem_obj_find(hb, oidp); 541 obj = objfound = tmem_obj_find(hb, oidp);
@@ -521,7 +545,7 @@ int tmem_put(struct tmem_pool *pool, struct tmem_oid *oidp, uint32_t index,
521 /* if found, is a dup put, flush the old one */ 545 /* if found, is a dup put, flush the old one */
522 pampd_del = tmem_pampd_delete_from_obj(obj, index); 546 pampd_del = tmem_pampd_delete_from_obj(obj, index);
523 BUG_ON(pampd_del != pampd); 547 BUG_ON(pampd_del != pampd);
524 (*tmem_pamops.free)(pampd, pool); 548 (*tmem_pamops.free)(pampd, pool, oidp, index);
525 if (obj->pampd_count == 0) { 549 if (obj->pampd_count == 0) {
526 objnew = obj; 550 objnew = obj;
527 objfound = NULL; 551 objfound = NULL;
@@ -538,7 +562,8 @@ int tmem_put(struct tmem_pool *pool, struct tmem_oid *oidp, uint32_t index,
538 } 562 }
539 BUG_ON(obj == NULL); 563 BUG_ON(obj == NULL);
540 BUG_ON(((objnew != obj) && (objfound != obj)) || (objnew == objfound)); 564 BUG_ON(((objnew != obj) && (objfound != obj)) || (objnew == objfound));
541 pampd = (*tmem_pamops.create)(obj->pool, &obj->oid, index, page); 565 pampd = (*tmem_pamops.create)(data, size, raw, ephemeral,
566 obj->pool, &obj->oid, index);
542 if (unlikely(pampd == NULL)) 567 if (unlikely(pampd == NULL))
543 goto free; 568 goto free;
544 ret = tmem_pampd_add_to_obj(obj, index, pampd); 569 ret = tmem_pampd_add_to_obj(obj, index, pampd);
@@ -551,7 +576,7 @@ delete_and_free:
551 (void)tmem_pampd_delete_from_obj(obj, index); 576 (void)tmem_pampd_delete_from_obj(obj, index);
552free: 577free:
553 if (pampd) 578 if (pampd)
554 (*tmem_pamops.free)(pampd, pool); 579 (*tmem_pamops.free)(pampd, pool, NULL, 0);
555 if (objnew) { 580 if (objnew) {
556 tmem_obj_free(objnew, hb); 581 tmem_obj_free(objnew, hb);
557 (*tmem_hostops.obj_free)(objnew, pool); 582 (*tmem_hostops.obj_free)(objnew, pool);
@@ -573,41 +598,52 @@ out:
573 * "put" done with the same handle). 598 * "put" done with the same handle).
574 599
575 */ 600 */
576int tmem_get(struct tmem_pool *pool, struct tmem_oid *oidp, 601int tmem_get(struct tmem_pool *pool, struct tmem_oid *oidp, uint32_t index,
577 uint32_t index, struct page *page) 602 char *data, size_t *size, bool raw, int get_and_free)
578{ 603{
579 struct tmem_obj *obj; 604 struct tmem_obj *obj;
580 void *pampd; 605 void *pampd;
581 bool ephemeral = is_ephemeral(pool); 606 bool ephemeral = is_ephemeral(pool);
582 uint32_t ret = -1; 607 uint32_t ret = -1;
583 struct tmem_hashbucket *hb; 608 struct tmem_hashbucket *hb;
609 bool free = (get_and_free == 1) || ((get_and_free == 0) && ephemeral);
610 bool lock_held = false;
584 611
585 hb = &pool->hashbucket[tmem_oid_hash(oidp)]; 612 hb = &pool->hashbucket[tmem_oid_hash(oidp)];
586 spin_lock(&hb->lock); 613 spin_lock(&hb->lock);
614 lock_held = true;
587 obj = tmem_obj_find(hb, oidp); 615 obj = tmem_obj_find(hb, oidp);
588 if (obj == NULL) 616 if (obj == NULL)
589 goto out; 617 goto out;
590 ephemeral = is_ephemeral(pool); 618 if (free)
591 if (ephemeral)
592 pampd = tmem_pampd_delete_from_obj(obj, index); 619 pampd = tmem_pampd_delete_from_obj(obj, index);
593 else 620 else
594 pampd = tmem_pampd_lookup_in_obj(obj, index); 621 pampd = tmem_pampd_lookup_in_obj(obj, index);
595 if (pampd == NULL) 622 if (pampd == NULL)
596 goto out; 623 goto out;
597 ret = (*tmem_pamops.get_data)(page, pampd, pool); 624 if (free) {
598 if (ret < 0)
599 goto out;
600 if (ephemeral) {
601 (*tmem_pamops.free)(pampd, pool);
602 if (obj->pampd_count == 0) { 625 if (obj->pampd_count == 0) {
603 tmem_obj_free(obj, hb); 626 tmem_obj_free(obj, hb);
604 (*tmem_hostops.obj_free)(obj, pool); 627 (*tmem_hostops.obj_free)(obj, pool);
605 obj = NULL; 628 obj = NULL;
606 } 629 }
607 } 630 }
631 if (tmem_pamops.is_remote(pampd)) {
632 lock_held = false;
633 spin_unlock(&hb->lock);
634 }
635 if (free)
636 ret = (*tmem_pamops.get_data_and_free)(
637 data, size, raw, pampd, pool, oidp, index);
638 else
639 ret = (*tmem_pamops.get_data)(
640 data, size, raw, pampd, pool, oidp, index);
641 if (ret < 0)
642 goto out;
608 ret = 0; 643 ret = 0;
609out: 644out:
610 spin_unlock(&hb->lock); 645 if (lock_held)
646 spin_unlock(&hb->lock);
611 return ret; 647 return ret;
612} 648}
613 649
@@ -632,7 +668,7 @@ int tmem_flush_page(struct tmem_pool *pool,
632 pampd = tmem_pampd_delete_from_obj(obj, index); 668 pampd = tmem_pampd_delete_from_obj(obj, index);
633 if (pampd == NULL) 669 if (pampd == NULL)
634 goto out; 670 goto out;
635 (*tmem_pamops.free)(pampd, pool); 671 (*tmem_pamops.free)(pampd, pool, oidp, index);
636 if (obj->pampd_count == 0) { 672 if (obj->pampd_count == 0) {
637 tmem_obj_free(obj, hb); 673 tmem_obj_free(obj, hb);
638 (*tmem_hostops.obj_free)(obj, pool); 674 (*tmem_hostops.obj_free)(obj, pool);
@@ -645,6 +681,30 @@ out:
645} 681}
646 682
647/* 683/*
684 * If a page in tmem matches the handle, replace the page so that any
685 * subsequent "get" gets the new page. Returns 0 if
686 * there was a page to replace, else returns -1.
687 */
688int tmem_replace(struct tmem_pool *pool, struct tmem_oid *oidp,
689 uint32_t index, void *new_pampd)
690{
691 struct tmem_obj *obj;
692 int ret = -1;
693 struct tmem_hashbucket *hb;
694
695 hb = &pool->hashbucket[tmem_oid_hash(oidp)];
696 spin_lock(&hb->lock);
697 obj = tmem_obj_find(hb, oidp);
698 if (obj == NULL)
699 goto out;
700 new_pampd = tmem_pampd_replace_in_obj(obj, index, new_pampd);
701 ret = (*tmem_pamops.replace_in_obj)(new_pampd, obj);
702out:
703 spin_unlock(&hb->lock);
704 return ret;
705}
706
707/*
648 * "Flush" all pages in tmem matching this oid. 708 * "Flush" all pages in tmem matching this oid.
649 */ 709 */
650int tmem_flush_object(struct tmem_pool *pool, struct tmem_oid *oidp) 710int tmem_flush_object(struct tmem_pool *pool, struct tmem_oid *oidp)
diff --git a/drivers/staging/zcache/tmem.h b/drivers/staging/zcache/tmem.h
index 2e07e217d51f..ed147c4b110d 100644
--- a/drivers/staging/zcache/tmem.h
+++ b/drivers/staging/zcache/tmem.h
@@ -147,6 +147,7 @@ struct tmem_obj {
147 unsigned int objnode_tree_height; 147 unsigned int objnode_tree_height;
148 unsigned long objnode_count; 148 unsigned long objnode_count;
149 long pampd_count; 149 long pampd_count;
150 void *extra; /* for private use by pampd implementation */
150 DECL_SENTINEL 151 DECL_SENTINEL
151}; 152};
152 153
@@ -166,10 +167,18 @@ struct tmem_objnode {
166 167
167/* pampd abstract datatype methods provided by the PAM implementation */ 168/* pampd abstract datatype methods provided by the PAM implementation */
168struct tmem_pamops { 169struct tmem_pamops {
169 void *(*create)(struct tmem_pool *, struct tmem_oid *, uint32_t, 170 void *(*create)(char *, size_t, bool, int,
170 struct page *); 171 struct tmem_pool *, struct tmem_oid *, uint32_t);
171 int (*get_data)(struct page *, void *, struct tmem_pool *); 172 int (*get_data)(char *, size_t *, bool, void *, struct tmem_pool *,
172 void (*free)(void *, struct tmem_pool *); 173 struct tmem_oid *, uint32_t);
174 int (*get_data_and_free)(char *, size_t *, bool, void *,
175 struct tmem_pool *, struct tmem_oid *,
176 uint32_t);
177 void (*free)(void *, struct tmem_pool *, struct tmem_oid *, uint32_t);
178 void (*free_obj)(struct tmem_pool *, struct tmem_obj *);
179 bool (*is_remote)(void *);
180 void (*new_obj)(struct tmem_obj *);
181 int (*replace_in_obj)(void *, struct tmem_obj *);
173}; 182};
174extern void tmem_register_pamops(struct tmem_pamops *m); 183extern void tmem_register_pamops(struct tmem_pamops *m);
175 184
@@ -184,9 +193,11 @@ extern void tmem_register_hostops(struct tmem_hostops *m);
184 193
185/* core tmem accessor functions */ 194/* core tmem accessor functions */
186extern int tmem_put(struct tmem_pool *, struct tmem_oid *, uint32_t index, 195extern int tmem_put(struct tmem_pool *, struct tmem_oid *, uint32_t index,
187 struct page *page); 196 char *, size_t, bool, bool);
188extern int tmem_get(struct tmem_pool *, struct tmem_oid *, uint32_t index, 197extern int tmem_get(struct tmem_pool *, struct tmem_oid *, uint32_t index,
189 struct page *page); 198 char *, size_t *, bool, int);
199extern int tmem_replace(struct tmem_pool *, struct tmem_oid *, uint32_t index,
200 void *);
190extern int tmem_flush_page(struct tmem_pool *, struct tmem_oid *, 201extern int tmem_flush_page(struct tmem_pool *, struct tmem_oid *,
191 uint32_t index); 202 uint32_t index);
192extern int tmem_flush_object(struct tmem_pool *, struct tmem_oid *); 203extern int tmem_flush_object(struct tmem_pool *, struct tmem_oid *);
diff --git a/drivers/staging/zcache/zcache.c b/drivers/staging/zcache/zcache.c
index 77ac2d4d3ef1..65a81a0d7c49 100644
--- a/drivers/staging/zcache/zcache.c
+++ b/drivers/staging/zcache/zcache.c
@@ -49,6 +49,33 @@
49 (__GFP_FS | __GFP_NORETRY | __GFP_NOWARN | __GFP_NOMEMALLOC) 49 (__GFP_FS | __GFP_NORETRY | __GFP_NOWARN | __GFP_NOMEMALLOC)
50#endif 50#endif
51 51
52#define MAX_POOLS_PER_CLIENT 16
53
54#define MAX_CLIENTS 16
55#define LOCAL_CLIENT ((uint16_t)-1)
56struct zcache_client {
57 struct tmem_pool *tmem_pools[MAX_POOLS_PER_CLIENT];
58 struct xv_pool *xvpool;
59 bool allocated;
60 atomic_t refcount;
61};
62
63static struct zcache_client zcache_host;
64static struct zcache_client zcache_clients[MAX_CLIENTS];
65
66static inline uint16_t get_client_id_from_client(struct zcache_client *cli)
67{
68 BUG_ON(cli == NULL);
69 if (cli == &zcache_host)
70 return LOCAL_CLIENT;
71 return cli - &zcache_clients[0];
72}
73
74static inline bool is_local_client(struct zcache_client *cli)
75{
76 return cli == &zcache_host;
77}
78
52/********** 79/**********
53 * Compression buddies ("zbud") provides for packing two (or, possibly 80 * Compression buddies ("zbud") provides for packing two (or, possibly
54 * in the future, more) compressed ephemeral pages into a single "raw" 81 * in the future, more) compressed ephemeral pages into a single "raw"
@@ -72,7 +99,8 @@
72#define ZBUD_MAX_BUDS 2 99#define ZBUD_MAX_BUDS 2
73 100
74struct zbud_hdr { 101struct zbud_hdr {
75 uint32_t pool_id; 102 uint16_t client_id;
103 uint16_t pool_id;
76 struct tmem_oid oid; 104 struct tmem_oid oid;
77 uint32_t index; 105 uint32_t index;
78 uint16_t size; /* compressed size in bytes, zero means unused */ 106 uint16_t size; /* compressed size in bytes, zero means unused */
@@ -120,6 +148,7 @@ static unsigned long zcache_zbud_curr_zbytes;
120static unsigned long zcache_zbud_cumul_zpages; 148static unsigned long zcache_zbud_cumul_zpages;
121static unsigned long zcache_zbud_cumul_zbytes; 149static unsigned long zcache_zbud_cumul_zbytes;
122static unsigned long zcache_compress_poor; 150static unsigned long zcache_compress_poor;
151static unsigned long zcache_mean_compress_poor;
123 152
124/* forward references */ 153/* forward references */
125static void *zcache_get_free_page(void); 154static void *zcache_get_free_page(void);
@@ -294,7 +323,8 @@ static void zbud_free_and_delist(struct zbud_hdr *zh)
294 } 323 }
295} 324}
296 325
297static struct zbud_hdr *zbud_create(uint32_t pool_id, struct tmem_oid *oid, 326static struct zbud_hdr *zbud_create(uint16_t client_id, uint16_t pool_id,
327 struct tmem_oid *oid,
298 uint32_t index, struct page *page, 328 uint32_t index, struct page *page,
299 void *cdata, unsigned size) 329 void *cdata, unsigned size)
300{ 330{
@@ -353,6 +383,7 @@ init_zh:
353 zh->index = index; 383 zh->index = index;
354 zh->oid = *oid; 384 zh->oid = *oid;
355 zh->pool_id = pool_id; 385 zh->pool_id = pool_id;
386 zh->client_id = client_id;
356 /* can wait to copy the data until the list locks are dropped */ 387 /* can wait to copy the data until the list locks are dropped */
357 spin_unlock(&zbud_budlists_spinlock); 388 spin_unlock(&zbud_budlists_spinlock);
358 389
@@ -407,7 +438,8 @@ static unsigned long zcache_evicted_raw_pages;
407static unsigned long zcache_evicted_buddied_pages; 438static unsigned long zcache_evicted_buddied_pages;
408static unsigned long zcache_evicted_unbuddied_pages; 439static unsigned long zcache_evicted_unbuddied_pages;
409 440
410static struct tmem_pool *zcache_get_pool_by_id(uint32_t poolid); 441static struct tmem_pool *zcache_get_pool_by_id(uint16_t cli_id,
442 uint16_t poolid);
411static void zcache_put_pool(struct tmem_pool *pool); 443static void zcache_put_pool(struct tmem_pool *pool);
412 444
413/* 445/*
@@ -417,7 +449,8 @@ static void zbud_evict_zbpg(struct zbud_page *zbpg)
417{ 449{
418 struct zbud_hdr *zh; 450 struct zbud_hdr *zh;
419 int i, j; 451 int i, j;
420 uint32_t pool_id[ZBUD_MAX_BUDS], index[ZBUD_MAX_BUDS]; 452 uint32_t pool_id[ZBUD_MAX_BUDS], client_id[ZBUD_MAX_BUDS];
453 uint32_t index[ZBUD_MAX_BUDS];
421 struct tmem_oid oid[ZBUD_MAX_BUDS]; 454 struct tmem_oid oid[ZBUD_MAX_BUDS];
422 struct tmem_pool *pool; 455 struct tmem_pool *pool;
423 456
@@ -426,6 +459,7 @@ static void zbud_evict_zbpg(struct zbud_page *zbpg)
426 for (i = 0, j = 0; i < ZBUD_MAX_BUDS; i++) { 459 for (i = 0, j = 0; i < ZBUD_MAX_BUDS; i++) {
427 zh = &zbpg->buddy[i]; 460 zh = &zbpg->buddy[i];
428 if (zh->size) { 461 if (zh->size) {
462 client_id[j] = zh->client_id;
429 pool_id[j] = zh->pool_id; 463 pool_id[j] = zh->pool_id;
430 oid[j] = zh->oid; 464 oid[j] = zh->oid;
431 index[j] = zh->index; 465 index[j] = zh->index;
@@ -435,7 +469,7 @@ static void zbud_evict_zbpg(struct zbud_page *zbpg)
435 } 469 }
436 spin_unlock(&zbpg->lock); 470 spin_unlock(&zbpg->lock);
437 for (i = 0; i < j; i++) { 471 for (i = 0; i < j; i++) {
438 pool = zcache_get_pool_by_id(pool_id[i]); 472 pool = zcache_get_pool_by_id(client_id[i], pool_id[i]);
439 if (pool != NULL) { 473 if (pool != NULL) {
440 tmem_flush_page(pool, &oid[i], index[i]); 474 tmem_flush_page(pool, &oid[i], index[i]);
441 zcache_put_pool(pool); 475 zcache_put_pool(pool);
@@ -552,9 +586,8 @@ static int zbud_show_unbuddied_list_counts(char *buf)
552 int i; 586 int i;
553 char *p = buf; 587 char *p = buf;
554 588
555 for (i = 0; i < NCHUNKS - 1; i++) 589 for (i = 0; i < NCHUNKS; i++)
556 p += sprintf(p, "%u ", zbud_unbuddied[i].count); 590 p += sprintf(p, "%u ", zbud_unbuddied[i].count);
557 p += sprintf(p, "%d\n", zbud_unbuddied[i].count);
558 return p - buf; 591 return p - buf;
559} 592}
560 593
@@ -602,7 +635,23 @@ struct zv_hdr {
602 DECL_SENTINEL 635 DECL_SENTINEL
603}; 636};
604 637
605static const int zv_max_page_size = (PAGE_SIZE / 8) * 7; 638/* rudimentary policy limits */
639/* total number of persistent pages may not exceed this percentage */
640static unsigned int zv_page_count_policy_percent = 75;
641/*
642 * byte count defining poor compression; pages with greater zsize will be
643 * rejected
644 */
645static unsigned int zv_max_zsize = (PAGE_SIZE / 8) * 7;
646/*
647 * byte count defining poor *mean* compression; pages with greater zsize
648 * will be rejected until sufficient better-compressed pages are accepted
649 * driving the man below this threshold
650 */
651static unsigned int zv_max_mean_zsize = (PAGE_SIZE / 8) * 5;
652
653static unsigned long zv_curr_dist_counts[NCHUNKS];
654static unsigned long zv_cumul_dist_counts[NCHUNKS];
606 655
607static struct zv_hdr *zv_create(struct xv_pool *xvpool, uint32_t pool_id, 656static struct zv_hdr *zv_create(struct xv_pool *xvpool, uint32_t pool_id,
608 struct tmem_oid *oid, uint32_t index, 657 struct tmem_oid *oid, uint32_t index,
@@ -611,13 +660,18 @@ static struct zv_hdr *zv_create(struct xv_pool *xvpool, uint32_t pool_id,
611 struct page *page; 660 struct page *page;
612 struct zv_hdr *zv = NULL; 661 struct zv_hdr *zv = NULL;
613 uint32_t offset; 662 uint32_t offset;
663 int alloc_size = clen + sizeof(struct zv_hdr);
664 int chunks = (alloc_size + (CHUNK_SIZE - 1)) >> CHUNK_SHIFT;
614 int ret; 665 int ret;
615 666
616 BUG_ON(!irqs_disabled()); 667 BUG_ON(!irqs_disabled());
617 ret = xv_malloc(xvpool, clen + sizeof(struct zv_hdr), 668 BUG_ON(chunks >= NCHUNKS);
669 ret = xv_malloc(xvpool, alloc_size,
618 &page, &offset, ZCACHE_GFP_MASK); 670 &page, &offset, ZCACHE_GFP_MASK);
619 if (unlikely(ret)) 671 if (unlikely(ret))
620 goto out; 672 goto out;
673 zv_curr_dist_counts[chunks]++;
674 zv_cumul_dist_counts[chunks]++;
621 zv = kmap_atomic(page, KM_USER0) + offset; 675 zv = kmap_atomic(page, KM_USER0) + offset;
622 zv->index = index; 676 zv->index = index;
623 zv->oid = *oid; 677 zv->oid = *oid;
@@ -634,11 +688,14 @@ static void zv_free(struct xv_pool *xvpool, struct zv_hdr *zv)
634 unsigned long flags; 688 unsigned long flags;
635 struct page *page; 689 struct page *page;
636 uint32_t offset; 690 uint32_t offset;
637 uint16_t size; 691 uint16_t size = xv_get_object_size(zv);
692 int chunks = (size + (CHUNK_SIZE - 1)) >> CHUNK_SHIFT;
638 693
639 ASSERT_SENTINEL(zv, ZVH); 694 ASSERT_SENTINEL(zv, ZVH);
640 size = xv_get_object_size(zv) - sizeof(*zv); 695 BUG_ON(chunks >= NCHUNKS);
641 BUG_ON(size == 0 || size > zv_max_page_size); 696 zv_curr_dist_counts[chunks]--;
697 size -= sizeof(*zv);
698 BUG_ON(size == 0);
642 INVERT_SENTINEL(zv, ZVH); 699 INVERT_SENTINEL(zv, ZVH);
643 page = virt_to_page(zv); 700 page = virt_to_page(zv);
644 offset = (unsigned long)zv & ~PAGE_MASK; 701 offset = (unsigned long)zv & ~PAGE_MASK;
@@ -656,7 +713,7 @@ static void zv_decompress(struct page *page, struct zv_hdr *zv)
656 713
657 ASSERT_SENTINEL(zv, ZVH); 714 ASSERT_SENTINEL(zv, ZVH);
658 size = xv_get_object_size(zv) - sizeof(*zv); 715 size = xv_get_object_size(zv) - sizeof(*zv);
659 BUG_ON(size == 0 || size > zv_max_page_size); 716 BUG_ON(size == 0);
660 to_va = kmap_atomic(page, KM_USER0); 717 to_va = kmap_atomic(page, KM_USER0);
661 ret = lzo1x_decompress_safe((char *)zv + sizeof(*zv), 718 ret = lzo1x_decompress_safe((char *)zv + sizeof(*zv),
662 size, to_va, &clen); 719 size, to_va, &clen);
@@ -665,6 +722,159 @@ static void zv_decompress(struct page *page, struct zv_hdr *zv)
665 BUG_ON(clen != PAGE_SIZE); 722 BUG_ON(clen != PAGE_SIZE);
666} 723}
667 724
725#ifdef CONFIG_SYSFS
726/*
727 * show a distribution of compression stats for zv pages.
728 */
729
730static int zv_curr_dist_counts_show(char *buf)
731{
732 unsigned long i, n, chunks = 0, sum_total_chunks = 0;
733 char *p = buf;
734
735 for (i = 0; i < NCHUNKS; i++) {
736 n = zv_curr_dist_counts[i];
737 p += sprintf(p, "%lu ", n);
738 chunks += n;
739 sum_total_chunks += i * n;
740 }
741 p += sprintf(p, "mean:%lu\n",
742 chunks == 0 ? 0 : sum_total_chunks / chunks);
743 return p - buf;
744}
745
746static int zv_cumul_dist_counts_show(char *buf)
747{
748 unsigned long i, n, chunks = 0, sum_total_chunks = 0;
749 char *p = buf;
750
751 for (i = 0; i < NCHUNKS; i++) {
752 n = zv_cumul_dist_counts[i];
753 p += sprintf(p, "%lu ", n);
754 chunks += n;
755 sum_total_chunks += i * n;
756 }
757 p += sprintf(p, "mean:%lu\n",
758 chunks == 0 ? 0 : sum_total_chunks / chunks);
759 return p - buf;
760}
761
762/*
763 * setting zv_max_zsize via sysfs causes all persistent (e.g. swap)
764 * pages that don't compress to less than this value (including metadata
765 * overhead) to be rejected. We don't allow the value to get too close
766 * to PAGE_SIZE.
767 */
768static ssize_t zv_max_zsize_show(struct kobject *kobj,
769 struct kobj_attribute *attr,
770 char *buf)
771{
772 return sprintf(buf, "%u\n", zv_max_zsize);
773}
774
775static ssize_t zv_max_zsize_store(struct kobject *kobj,
776 struct kobj_attribute *attr,
777 const char *buf, size_t count)
778{
779 unsigned long val;
780 int err;
781
782 if (!capable(CAP_SYS_ADMIN))
783 return -EPERM;
784
785 err = strict_strtoul(buf, 10, &val);
786 if (err || (val == 0) || (val > (PAGE_SIZE / 8) * 7))
787 return -EINVAL;
788 zv_max_zsize = val;
789 return count;
790}
791
792/*
793 * setting zv_max_mean_zsize via sysfs causes all persistent (e.g. swap)
794 * pages that don't compress to less than this value (including metadata
795 * overhead) to be rejected UNLESS the mean compression is also smaller
796 * than this value. In other words, we are load-balancing-by-zsize the
797 * accepted pages. Again, we don't allow the value to get too close
798 * to PAGE_SIZE.
799 */
800static ssize_t zv_max_mean_zsize_show(struct kobject *kobj,
801 struct kobj_attribute *attr,
802 char *buf)
803{
804 return sprintf(buf, "%u\n", zv_max_mean_zsize);
805}
806
807static ssize_t zv_max_mean_zsize_store(struct kobject *kobj,
808 struct kobj_attribute *attr,
809 const char *buf, size_t count)
810{
811 unsigned long val;
812 int err;
813
814 if (!capable(CAP_SYS_ADMIN))
815 return -EPERM;
816
817 err = strict_strtoul(buf, 10, &val);
818 if (err || (val == 0) || (val > (PAGE_SIZE / 8) * 7))
819 return -EINVAL;
820 zv_max_mean_zsize = val;
821 return count;
822}
823
824/*
825 * setting zv_page_count_policy_percent via sysfs sets an upper bound of
826 * persistent (e.g. swap) pages that will be retained according to:
827 * (zv_page_count_policy_percent * totalram_pages) / 100)
828 * when that limit is reached, further puts will be rejected (until
829 * some pages have been flushed). Note that, due to compression,
830 * this number may exceed 100; it defaults to 75 and we set an
831 * arbitary limit of 150. A poor choice will almost certainly result
832 * in OOM's, so this value should only be changed prudently.
833 */
834static ssize_t zv_page_count_policy_percent_show(struct kobject *kobj,
835 struct kobj_attribute *attr,
836 char *buf)
837{
838 return sprintf(buf, "%u\n", zv_page_count_policy_percent);
839}
840
841static ssize_t zv_page_count_policy_percent_store(struct kobject *kobj,
842 struct kobj_attribute *attr,
843 const char *buf, size_t count)
844{
845 unsigned long val;
846 int err;
847
848 if (!capable(CAP_SYS_ADMIN))
849 return -EPERM;
850
851 err = strict_strtoul(buf, 10, &val);
852 if (err || (val == 0) || (val > 150))
853 return -EINVAL;
854 zv_page_count_policy_percent = val;
855 return count;
856}
857
858static struct kobj_attribute zcache_zv_max_zsize_attr = {
859 .attr = { .name = "zv_max_zsize", .mode = 0644 },
860 .show = zv_max_zsize_show,
861 .store = zv_max_zsize_store,
862};
863
864static struct kobj_attribute zcache_zv_max_mean_zsize_attr = {
865 .attr = { .name = "zv_max_mean_zsize", .mode = 0644 },
866 .show = zv_max_mean_zsize_show,
867 .store = zv_max_mean_zsize_store,
868};
869
870static struct kobj_attribute zcache_zv_page_count_policy_percent_attr = {
871 .attr = { .name = "zv_page_count_policy_percent",
872 .mode = 0644 },
873 .show = zv_page_count_policy_percent_show,
874 .store = zv_page_count_policy_percent_store,
875};
876#endif
877
668/* 878/*
669 * zcache core code starts here 879 * zcache core code starts here
670 */ 880 */
@@ -677,36 +887,70 @@ static unsigned long zcache_flobj_found;
677static unsigned long zcache_failed_eph_puts; 887static unsigned long zcache_failed_eph_puts;
678static unsigned long zcache_failed_pers_puts; 888static unsigned long zcache_failed_pers_puts;
679 889
680#define MAX_POOLS_PER_CLIENT 16
681
682static struct {
683 struct tmem_pool *tmem_pools[MAX_POOLS_PER_CLIENT];
684 struct xv_pool *xvpool;
685} zcache_client;
686
687/* 890/*
688 * Tmem operations assume the poolid implies the invoking client. 891 * Tmem operations assume the poolid implies the invoking client.
689 * Zcache only has one client (the kernel itself), so translate 892 * Zcache only has one client (the kernel itself): LOCAL_CLIENT.
690 * the poolid into the tmem_pool allocated for it. A KVM version 893 * RAMster has each client numbered by cluster node, and a KVM version
691 * of zcache would have one client per guest and each client might 894 * of zcache would have one client per guest and each client might
692 * have a poolid==N. 895 * have a poolid==N.
693 */ 896 */
694static struct tmem_pool *zcache_get_pool_by_id(uint32_t poolid) 897static struct tmem_pool *zcache_get_pool_by_id(uint16_t cli_id, uint16_t poolid)
695{ 898{
696 struct tmem_pool *pool = NULL; 899 struct tmem_pool *pool = NULL;
900 struct zcache_client *cli = NULL;
697 901
698 if (poolid >= 0) { 902 if (cli_id == LOCAL_CLIENT)
699 pool = zcache_client.tmem_pools[poolid]; 903 cli = &zcache_host;
904 else {
905 if (cli_id >= MAX_CLIENTS)
906 goto out;
907 cli = &zcache_clients[cli_id];
908 if (cli == NULL)
909 goto out;
910 atomic_inc(&cli->refcount);
911 }
912 if (poolid < MAX_POOLS_PER_CLIENT) {
913 pool = cli->tmem_pools[poolid];
700 if (pool != NULL) 914 if (pool != NULL)
701 atomic_inc(&pool->refcount); 915 atomic_inc(&pool->refcount);
702 } 916 }
917out:
703 return pool; 918 return pool;
704} 919}
705 920
706static void zcache_put_pool(struct tmem_pool *pool) 921static void zcache_put_pool(struct tmem_pool *pool)
707{ 922{
708 if (pool != NULL) 923 struct zcache_client *cli = NULL;
709 atomic_dec(&pool->refcount); 924
925 if (pool == NULL)
926 BUG();
927 cli = pool->client;
928 atomic_dec(&pool->refcount);
929 atomic_dec(&cli->refcount);
930}
931
932int zcache_new_client(uint16_t cli_id)
933{
934 struct zcache_client *cli = NULL;
935 int ret = -1;
936
937 if (cli_id == LOCAL_CLIENT)
938 cli = &zcache_host;
939 else if ((unsigned int)cli_id < MAX_CLIENTS)
940 cli = &zcache_clients[cli_id];
941 if (cli == NULL)
942 goto out;
943 if (cli->allocated)
944 goto out;
945 cli->allocated = 1;
946#ifdef CONFIG_FRONTSWAP
947 cli->xvpool = xv_create_pool();
948 if (cli->xvpool == NULL)
949 goto out;
950#endif
951 ret = 0;
952out:
953 return ret;
710} 954}
711 955
712/* counters for debugging */ 956/* counters for debugging */
@@ -901,48 +1145,59 @@ static unsigned long zcache_curr_pers_pampd_count_max;
901/* forward reference */ 1145/* forward reference */
902static int zcache_compress(struct page *from, void **out_va, size_t *out_len); 1146static int zcache_compress(struct page *from, void **out_va, size_t *out_len);
903 1147
904static void *zcache_pampd_create(struct tmem_pool *pool, struct tmem_oid *oid, 1148static void *zcache_pampd_create(char *data, size_t size, bool raw, int eph,
905 uint32_t index, struct page *page) 1149 struct tmem_pool *pool, struct tmem_oid *oid,
1150 uint32_t index)
906{ 1151{
907 void *pampd = NULL, *cdata; 1152 void *pampd = NULL, *cdata;
908 size_t clen; 1153 size_t clen;
909 int ret; 1154 int ret;
910 bool ephemeral = is_ephemeral(pool);
911 unsigned long count; 1155 unsigned long count;
1156 struct page *page = virt_to_page(data);
1157 struct zcache_client *cli = pool->client;
1158 uint16_t client_id = get_client_id_from_client(cli);
1159 unsigned long zv_mean_zsize;
1160 unsigned long curr_pers_pampd_count;
912 1161
913 if (ephemeral) { 1162 if (eph) {
914 ret = zcache_compress(page, &cdata, &clen); 1163 ret = zcache_compress(page, &cdata, &clen);
915 if (ret == 0) 1164 if (ret == 0)
916
917 goto out; 1165 goto out;
918 if (clen == 0 || clen > zbud_max_buddy_size()) { 1166 if (clen == 0 || clen > zbud_max_buddy_size()) {
919 zcache_compress_poor++; 1167 zcache_compress_poor++;
920 goto out; 1168 goto out;
921 } 1169 }
922 pampd = (void *)zbud_create(pool->pool_id, oid, index, 1170 pampd = (void *)zbud_create(client_id, pool->pool_id, oid,
923 page, cdata, clen); 1171 index, page, cdata, clen);
924 if (pampd != NULL) { 1172 if (pampd != NULL) {
925 count = atomic_inc_return(&zcache_curr_eph_pampd_count); 1173 count = atomic_inc_return(&zcache_curr_eph_pampd_count);
926 if (count > zcache_curr_eph_pampd_count_max) 1174 if (count > zcache_curr_eph_pampd_count_max)
927 zcache_curr_eph_pampd_count_max = count; 1175 zcache_curr_eph_pampd_count_max = count;
928 } 1176 }
929 } else { 1177 } else {
930 /* 1178 curr_pers_pampd_count =
931 * FIXME: This is all the "policy" there is for now. 1179 atomic_read(&zcache_curr_pers_pampd_count);
932 * 3/4 totpages should allow ~37% of RAM to be filled with 1180 if (curr_pers_pampd_count >
933 * compressed frontswap pages 1181 (zv_page_count_policy_percent * totalram_pages) / 100)
934 */
935 if (atomic_read(&zcache_curr_pers_pampd_count) >
936 3 * totalram_pages / 4)
937 goto out; 1182 goto out;
938 ret = zcache_compress(page, &cdata, &clen); 1183 ret = zcache_compress(page, &cdata, &clen);
939 if (ret == 0) 1184 if (ret == 0)
940 goto out; 1185 goto out;
941 if (clen > zv_max_page_size) { 1186 /* reject if compression is too poor */
1187 if (clen > zv_max_zsize) {
942 zcache_compress_poor++; 1188 zcache_compress_poor++;
943 goto out; 1189 goto out;
944 } 1190 }
945 pampd = (void *)zv_create(zcache_client.xvpool, pool->pool_id, 1191 /* reject if mean compression is too poor */
1192 if ((clen > zv_max_mean_zsize) && (curr_pers_pampd_count > 0)) {
1193 zv_mean_zsize = xv_get_total_size_bytes(cli->xvpool) /
1194 curr_pers_pampd_count;
1195 if (zv_mean_zsize > zv_max_mean_zsize) {
1196 zcache_mean_compress_poor++;
1197 goto out;
1198 }
1199 }
1200 pampd = (void *)zv_create(cli->xvpool, pool->pool_id,
946 oid, index, cdata, clen); 1201 oid, index, cdata, clen);
947 if (pampd == NULL) 1202 if (pampd == NULL)
948 goto out; 1203 goto out;
@@ -958,15 +1213,31 @@ out:
958 * fill the pageframe corresponding to the struct page with the data 1213 * fill the pageframe corresponding to the struct page with the data
959 * from the passed pampd 1214 * from the passed pampd
960 */ 1215 */
961static int zcache_pampd_get_data(struct page *page, void *pampd, 1216static int zcache_pampd_get_data(char *data, size_t *bufsize, bool raw,
962 struct tmem_pool *pool) 1217 void *pampd, struct tmem_pool *pool,
1218 struct tmem_oid *oid, uint32_t index)
963{ 1219{
964 int ret = 0; 1220 int ret = 0;
965 1221
966 if (is_ephemeral(pool)) 1222 BUG_ON(is_ephemeral(pool));
967 ret = zbud_decompress(page, pampd); 1223 zv_decompress(virt_to_page(data), pampd);
968 else 1224 return ret;
969 zv_decompress(page, pampd); 1225}
1226
1227/*
1228 * fill the pageframe corresponding to the struct page with the data
1229 * from the passed pampd
1230 */
1231static int zcache_pampd_get_data_and_free(char *data, size_t *bufsize, bool raw,
1232 void *pampd, struct tmem_pool *pool,
1233 struct tmem_oid *oid, uint32_t index)
1234{
1235 int ret = 0;
1236
1237 BUG_ON(!is_ephemeral(pool));
1238 zbud_decompress(virt_to_page(data), pampd);
1239 zbud_free_and_delist((struct zbud_hdr *)pampd);
1240 atomic_dec(&zcache_curr_eph_pampd_count);
970 return ret; 1241 return ret;
971} 1242}
972 1243
@@ -974,23 +1245,49 @@ static int zcache_pampd_get_data(struct page *page, void *pampd,
974 * free the pampd and remove it from any zcache lists 1245 * free the pampd and remove it from any zcache lists
975 * pampd must no longer be pointed to from any tmem data structures! 1246 * pampd must no longer be pointed to from any tmem data structures!
976 */ 1247 */
977static void zcache_pampd_free(void *pampd, struct tmem_pool *pool) 1248static void zcache_pampd_free(void *pampd, struct tmem_pool *pool,
1249 struct tmem_oid *oid, uint32_t index)
978{ 1250{
1251 struct zcache_client *cli = pool->client;
1252
979 if (is_ephemeral(pool)) { 1253 if (is_ephemeral(pool)) {
980 zbud_free_and_delist((struct zbud_hdr *)pampd); 1254 zbud_free_and_delist((struct zbud_hdr *)pampd);
981 atomic_dec(&zcache_curr_eph_pampd_count); 1255 atomic_dec(&zcache_curr_eph_pampd_count);
982 BUG_ON(atomic_read(&zcache_curr_eph_pampd_count) < 0); 1256 BUG_ON(atomic_read(&zcache_curr_eph_pampd_count) < 0);
983 } else { 1257 } else {
984 zv_free(zcache_client.xvpool, (struct zv_hdr *)pampd); 1258 zv_free(cli->xvpool, (struct zv_hdr *)pampd);
985 atomic_dec(&zcache_curr_pers_pampd_count); 1259 atomic_dec(&zcache_curr_pers_pampd_count);
986 BUG_ON(atomic_read(&zcache_curr_pers_pampd_count) < 0); 1260 BUG_ON(atomic_read(&zcache_curr_pers_pampd_count) < 0);
987 } 1261 }
988} 1262}
989 1263
1264static void zcache_pampd_free_obj(struct tmem_pool *pool, struct tmem_obj *obj)
1265{
1266}
1267
1268static void zcache_pampd_new_obj(struct tmem_obj *obj)
1269{
1270}
1271
1272static int zcache_pampd_replace_in_obj(void *pampd, struct tmem_obj *obj)
1273{
1274 return -1;
1275}
1276
1277static bool zcache_pampd_is_remote(void *pampd)
1278{
1279 return 0;
1280}
1281
990static struct tmem_pamops zcache_pamops = { 1282static struct tmem_pamops zcache_pamops = {
991 .create = zcache_pampd_create, 1283 .create = zcache_pampd_create,
992 .get_data = zcache_pampd_get_data, 1284 .get_data = zcache_pampd_get_data,
1285 .get_data_and_free = zcache_pampd_get_data_and_free,
993 .free = zcache_pampd_free, 1286 .free = zcache_pampd_free,
1287 .free_obj = zcache_pampd_free_obj,
1288 .new_obj = zcache_pampd_new_obj,
1289 .replace_in_obj = zcache_pampd_replace_in_obj,
1290 .is_remote = zcache_pampd_is_remote,
994}; 1291};
995 1292
996/* 1293/*
@@ -1122,6 +1419,7 @@ ZCACHE_SYSFS_RO(put_to_flush);
1122ZCACHE_SYSFS_RO(aborted_preload); 1419ZCACHE_SYSFS_RO(aborted_preload);
1123ZCACHE_SYSFS_RO(aborted_shrink); 1420ZCACHE_SYSFS_RO(aborted_shrink);
1124ZCACHE_SYSFS_RO(compress_poor); 1421ZCACHE_SYSFS_RO(compress_poor);
1422ZCACHE_SYSFS_RO(mean_compress_poor);
1125ZCACHE_SYSFS_RO_ATOMIC(zbud_curr_raw_pages); 1423ZCACHE_SYSFS_RO_ATOMIC(zbud_curr_raw_pages);
1126ZCACHE_SYSFS_RO_ATOMIC(zbud_curr_zpages); 1424ZCACHE_SYSFS_RO_ATOMIC(zbud_curr_zpages);
1127ZCACHE_SYSFS_RO_ATOMIC(curr_obj_count); 1425ZCACHE_SYSFS_RO_ATOMIC(curr_obj_count);
@@ -1130,6 +1428,10 @@ ZCACHE_SYSFS_RO_CUSTOM(zbud_unbuddied_list_counts,
1130 zbud_show_unbuddied_list_counts); 1428 zbud_show_unbuddied_list_counts);
1131ZCACHE_SYSFS_RO_CUSTOM(zbud_cumul_chunk_counts, 1429ZCACHE_SYSFS_RO_CUSTOM(zbud_cumul_chunk_counts,
1132 zbud_show_cumul_chunk_counts); 1430 zbud_show_cumul_chunk_counts);
1431ZCACHE_SYSFS_RO_CUSTOM(zv_curr_dist_counts,
1432 zv_curr_dist_counts_show);
1433ZCACHE_SYSFS_RO_CUSTOM(zv_cumul_dist_counts,
1434 zv_cumul_dist_counts_show);
1133 1435
1134static struct attribute *zcache_attrs[] = { 1436static struct attribute *zcache_attrs[] = {
1135 &zcache_curr_obj_count_attr.attr, 1437 &zcache_curr_obj_count_attr.attr,
@@ -1143,6 +1445,7 @@ static struct attribute *zcache_attrs[] = {
1143 &zcache_failed_eph_puts_attr.attr, 1445 &zcache_failed_eph_puts_attr.attr,
1144 &zcache_failed_pers_puts_attr.attr, 1446 &zcache_failed_pers_puts_attr.attr,
1145 &zcache_compress_poor_attr.attr, 1447 &zcache_compress_poor_attr.attr,
1448 &zcache_mean_compress_poor_attr.attr,
1146 &zcache_zbud_curr_raw_pages_attr.attr, 1449 &zcache_zbud_curr_raw_pages_attr.attr,
1147 &zcache_zbud_curr_zpages_attr.attr, 1450 &zcache_zbud_curr_zpages_attr.attr,
1148 &zcache_zbud_curr_zbytes_attr.attr, 1451 &zcache_zbud_curr_zbytes_attr.attr,
@@ -1160,6 +1463,11 @@ static struct attribute *zcache_attrs[] = {
1160 &zcache_aborted_shrink_attr.attr, 1463 &zcache_aborted_shrink_attr.attr,
1161 &zcache_zbud_unbuddied_list_counts_attr.attr, 1464 &zcache_zbud_unbuddied_list_counts_attr.attr,
1162 &zcache_zbud_cumul_chunk_counts_attr.attr, 1465 &zcache_zbud_cumul_chunk_counts_attr.attr,
1466 &zcache_zv_curr_dist_counts_attr.attr,
1467 &zcache_zv_cumul_dist_counts_attr.attr,
1468 &zcache_zv_max_zsize_attr.attr,
1469 &zcache_zv_max_mean_zsize_attr.attr,
1470 &zcache_zv_page_count_policy_percent_attr.attr,
1163 NULL, 1471 NULL,
1164}; 1472};
1165 1473
@@ -1212,19 +1520,20 @@ static struct shrinker zcache_shrinker = {
1212 * zcache shims between cleancache/frontswap ops and tmem 1520 * zcache shims between cleancache/frontswap ops and tmem
1213 */ 1521 */
1214 1522
1215static int zcache_put_page(int pool_id, struct tmem_oid *oidp, 1523static int zcache_put_page(int cli_id, int pool_id, struct tmem_oid *oidp,
1216 uint32_t index, struct page *page) 1524 uint32_t index, struct page *page)
1217{ 1525{
1218 struct tmem_pool *pool; 1526 struct tmem_pool *pool;
1219 int ret = -1; 1527 int ret = -1;
1220 1528
1221 BUG_ON(!irqs_disabled()); 1529 BUG_ON(!irqs_disabled());
1222 pool = zcache_get_pool_by_id(pool_id); 1530 pool = zcache_get_pool_by_id(cli_id, pool_id);
1223 if (unlikely(pool == NULL)) 1531 if (unlikely(pool == NULL))
1224 goto out; 1532 goto out;
1225 if (!zcache_freeze && zcache_do_preload(pool) == 0) { 1533 if (!zcache_freeze && zcache_do_preload(pool) == 0) {
1226 /* preload does preempt_disable on success */ 1534 /* preload does preempt_disable on success */
1227 ret = tmem_put(pool, oidp, index, page); 1535 ret = tmem_put(pool, oidp, index, page_address(page),
1536 PAGE_SIZE, 0, is_ephemeral(pool));
1228 if (ret < 0) { 1537 if (ret < 0) {
1229 if (is_ephemeral(pool)) 1538 if (is_ephemeral(pool))
1230 zcache_failed_eph_puts++; 1539 zcache_failed_eph_puts++;
@@ -1244,25 +1553,28 @@ out:
1244 return ret; 1553 return ret;
1245} 1554}
1246 1555
1247static int zcache_get_page(int pool_id, struct tmem_oid *oidp, 1556static int zcache_get_page(int cli_id, int pool_id, struct tmem_oid *oidp,
1248 uint32_t index, struct page *page) 1557 uint32_t index, struct page *page)
1249{ 1558{
1250 struct tmem_pool *pool; 1559 struct tmem_pool *pool;
1251 int ret = -1; 1560 int ret = -1;
1252 unsigned long flags; 1561 unsigned long flags;
1562 size_t size = PAGE_SIZE;
1253 1563
1254 local_irq_save(flags); 1564 local_irq_save(flags);
1255 pool = zcache_get_pool_by_id(pool_id); 1565 pool = zcache_get_pool_by_id(cli_id, pool_id);
1256 if (likely(pool != NULL)) { 1566 if (likely(pool != NULL)) {
1257 if (atomic_read(&pool->obj_count) > 0) 1567 if (atomic_read(&pool->obj_count) > 0)
1258 ret = tmem_get(pool, oidp, index, page); 1568 ret = tmem_get(pool, oidp, index, page_address(page),
1569 &size, 0, is_ephemeral(pool));
1259 zcache_put_pool(pool); 1570 zcache_put_pool(pool);
1260 } 1571 }
1261 local_irq_restore(flags); 1572 local_irq_restore(flags);
1262 return ret; 1573 return ret;
1263} 1574}
1264 1575
1265static int zcache_flush_page(int pool_id, struct tmem_oid *oidp, uint32_t index) 1576static int zcache_flush_page(int cli_id, int pool_id,
1577 struct tmem_oid *oidp, uint32_t index)
1266{ 1578{
1267 struct tmem_pool *pool; 1579 struct tmem_pool *pool;
1268 int ret = -1; 1580 int ret = -1;
@@ -1270,7 +1582,7 @@ static int zcache_flush_page(int pool_id, struct tmem_oid *oidp, uint32_t index)
1270 1582
1271 local_irq_save(flags); 1583 local_irq_save(flags);
1272 zcache_flush_total++; 1584 zcache_flush_total++;
1273 pool = zcache_get_pool_by_id(pool_id); 1585 pool = zcache_get_pool_by_id(cli_id, pool_id);
1274 if (likely(pool != NULL)) { 1586 if (likely(pool != NULL)) {
1275 if (atomic_read(&pool->obj_count) > 0) 1587 if (atomic_read(&pool->obj_count) > 0)
1276 ret = tmem_flush_page(pool, oidp, index); 1588 ret = tmem_flush_page(pool, oidp, index);
@@ -1282,7 +1594,8 @@ static int zcache_flush_page(int pool_id, struct tmem_oid *oidp, uint32_t index)
1282 return ret; 1594 return ret;
1283} 1595}
1284 1596
1285static int zcache_flush_object(int pool_id, struct tmem_oid *oidp) 1597static int zcache_flush_object(int cli_id, int pool_id,
1598 struct tmem_oid *oidp)
1286{ 1599{
1287 struct tmem_pool *pool; 1600 struct tmem_pool *pool;
1288 int ret = -1; 1601 int ret = -1;
@@ -1290,7 +1603,7 @@ static int zcache_flush_object(int pool_id, struct tmem_oid *oidp)
1290 1603
1291 local_irq_save(flags); 1604 local_irq_save(flags);
1292 zcache_flobj_total++; 1605 zcache_flobj_total++;
1293 pool = zcache_get_pool_by_id(pool_id); 1606 pool = zcache_get_pool_by_id(cli_id, pool_id);
1294 if (likely(pool != NULL)) { 1607 if (likely(pool != NULL)) {
1295 if (atomic_read(&pool->obj_count) > 0) 1608 if (atomic_read(&pool->obj_count) > 0)
1296 ret = tmem_flush_object(pool, oidp); 1609 ret = tmem_flush_object(pool, oidp);
@@ -1302,34 +1615,52 @@ static int zcache_flush_object(int pool_id, struct tmem_oid *oidp)
1302 return ret; 1615 return ret;
1303} 1616}
1304 1617
1305static int zcache_destroy_pool(int pool_id) 1618static int zcache_destroy_pool(int cli_id, int pool_id)
1306{ 1619{
1307 struct tmem_pool *pool = NULL; 1620 struct tmem_pool *pool = NULL;
1621 struct zcache_client *cli = NULL;
1308 int ret = -1; 1622 int ret = -1;
1309 1623
1310 if (pool_id < 0) 1624 if (pool_id < 0)
1311 goto out; 1625 goto out;
1312 pool = zcache_client.tmem_pools[pool_id]; 1626 if (cli_id == LOCAL_CLIENT)
1627 cli = &zcache_host;
1628 else if ((unsigned int)cli_id < MAX_CLIENTS)
1629 cli = &zcache_clients[cli_id];
1630 if (cli == NULL)
1631 goto out;
1632 atomic_inc(&cli->refcount);
1633 pool = cli->tmem_pools[pool_id];
1313 if (pool == NULL) 1634 if (pool == NULL)
1314 goto out; 1635 goto out;
1315 zcache_client.tmem_pools[pool_id] = NULL; 1636 cli->tmem_pools[pool_id] = NULL;
1316 /* wait for pool activity on other cpus to quiesce */ 1637 /* wait for pool activity on other cpus to quiesce */
1317 while (atomic_read(&pool->refcount) != 0) 1638 while (atomic_read(&pool->refcount) != 0)
1318 ; 1639 ;
1640 atomic_dec(&cli->refcount);
1319 local_bh_disable(); 1641 local_bh_disable();
1320 ret = tmem_destroy_pool(pool); 1642 ret = tmem_destroy_pool(pool);
1321 local_bh_enable(); 1643 local_bh_enable();
1322 kfree(pool); 1644 kfree(pool);
1323 pr_info("zcache: destroyed pool id=%d\n", pool_id); 1645 pr_info("zcache: destroyed pool id=%d, cli_id=%d\n",
1646 pool_id, cli_id);
1324out: 1647out:
1325 return ret; 1648 return ret;
1326} 1649}
1327 1650
1328static int zcache_new_pool(uint32_t flags) 1651static int zcache_new_pool(uint16_t cli_id, uint32_t flags)
1329{ 1652{
1330 int poolid = -1; 1653 int poolid = -1;
1331 struct tmem_pool *pool; 1654 struct tmem_pool *pool;
1655 struct zcache_client *cli = NULL;
1332 1656
1657 if (cli_id == LOCAL_CLIENT)
1658 cli = &zcache_host;
1659 else if ((unsigned int)cli_id < MAX_CLIENTS)
1660 cli = &zcache_clients[cli_id];
1661 if (cli == NULL)
1662 goto out;
1663 atomic_inc(&cli->refcount);
1333 pool = kmalloc(sizeof(struct tmem_pool), GFP_KERNEL); 1664 pool = kmalloc(sizeof(struct tmem_pool), GFP_KERNEL);
1334 if (pool == NULL) { 1665 if (pool == NULL) {
1335 pr_info("zcache: pool creation failed: out of memory\n"); 1666 pr_info("zcache: pool creation failed: out of memory\n");
@@ -1337,7 +1668,7 @@ static int zcache_new_pool(uint32_t flags)
1337 } 1668 }
1338 1669
1339 for (poolid = 0; poolid < MAX_POOLS_PER_CLIENT; poolid++) 1670 for (poolid = 0; poolid < MAX_POOLS_PER_CLIENT; poolid++)
1340 if (zcache_client.tmem_pools[poolid] == NULL) 1671 if (cli->tmem_pools[poolid] == NULL)
1341 break; 1672 break;
1342 if (poolid >= MAX_POOLS_PER_CLIENT) { 1673 if (poolid >= MAX_POOLS_PER_CLIENT) {
1343 pr_info("zcache: pool creation failed: max exceeded\n"); 1674 pr_info("zcache: pool creation failed: max exceeded\n");
@@ -1346,14 +1677,16 @@ static int zcache_new_pool(uint32_t flags)
1346 goto out; 1677 goto out;
1347 } 1678 }
1348 atomic_set(&pool->refcount, 0); 1679 atomic_set(&pool->refcount, 0);
1349 pool->client = &zcache_client; 1680 pool->client = cli;
1350 pool->pool_id = poolid; 1681 pool->pool_id = poolid;
1351 tmem_new_pool(pool, flags); 1682 tmem_new_pool(pool, flags);
1352 zcache_client.tmem_pools[poolid] = pool; 1683 cli->tmem_pools[poolid] = pool;
1353 pr_info("zcache: created %s tmem pool, id=%d\n", 1684 pr_info("zcache: created %s tmem pool, id=%d, client=%d\n",
1354 flags & TMEM_POOL_PERSIST ? "persistent" : "ephemeral", 1685 flags & TMEM_POOL_PERSIST ? "persistent" : "ephemeral",
1355 poolid); 1686 poolid, cli_id);
1356out: 1687out:
1688 if (cli != NULL)
1689 atomic_dec(&cli->refcount);
1357 return poolid; 1690 return poolid;
1358} 1691}
1359 1692
@@ -1374,7 +1707,7 @@ static void zcache_cleancache_put_page(int pool_id,
1374 struct tmem_oid oid = *(struct tmem_oid *)&key; 1707 struct tmem_oid oid = *(struct tmem_oid *)&key;
1375 1708
1376 if (likely(ind == index)) 1709 if (likely(ind == index))
1377 (void)zcache_put_page(pool_id, &oid, index, page); 1710 (void)zcache_put_page(LOCAL_CLIENT, pool_id, &oid, index, page);
1378} 1711}
1379 1712
1380static int zcache_cleancache_get_page(int pool_id, 1713static int zcache_cleancache_get_page(int pool_id,
@@ -1386,7 +1719,7 @@ static int zcache_cleancache_get_page(int pool_id,
1386 int ret = -1; 1719 int ret = -1;
1387 1720
1388 if (likely(ind == index)) 1721 if (likely(ind == index))
1389 ret = zcache_get_page(pool_id, &oid, index, page); 1722 ret = zcache_get_page(LOCAL_CLIENT, pool_id, &oid, index, page);
1390 return ret; 1723 return ret;
1391} 1724}
1392 1725
@@ -1398,7 +1731,7 @@ static void zcache_cleancache_flush_page(int pool_id,
1398 struct tmem_oid oid = *(struct tmem_oid *)&key; 1731 struct tmem_oid oid = *(struct tmem_oid *)&key;
1399 1732
1400 if (likely(ind == index)) 1733 if (likely(ind == index))
1401 (void)zcache_flush_page(pool_id, &oid, ind); 1734 (void)zcache_flush_page(LOCAL_CLIENT, pool_id, &oid, ind);
1402} 1735}
1403 1736
1404static void zcache_cleancache_flush_inode(int pool_id, 1737static void zcache_cleancache_flush_inode(int pool_id,
@@ -1406,13 +1739,13 @@ static void zcache_cleancache_flush_inode(int pool_id,
1406{ 1739{
1407 struct tmem_oid oid = *(struct tmem_oid *)&key; 1740 struct tmem_oid oid = *(struct tmem_oid *)&key;
1408 1741
1409 (void)zcache_flush_object(pool_id, &oid); 1742 (void)zcache_flush_object(LOCAL_CLIENT, pool_id, &oid);
1410} 1743}
1411 1744
1412static void zcache_cleancache_flush_fs(int pool_id) 1745static void zcache_cleancache_flush_fs(int pool_id)
1413{ 1746{
1414 if (pool_id >= 0) 1747 if (pool_id >= 0)
1415 (void)zcache_destroy_pool(pool_id); 1748 (void)zcache_destroy_pool(LOCAL_CLIENT, pool_id);
1416} 1749}
1417 1750
1418static int zcache_cleancache_init_fs(size_t pagesize) 1751static int zcache_cleancache_init_fs(size_t pagesize)
@@ -1420,7 +1753,7 @@ static int zcache_cleancache_init_fs(size_t pagesize)
1420 BUG_ON(sizeof(struct cleancache_filekey) != 1753 BUG_ON(sizeof(struct cleancache_filekey) !=
1421 sizeof(struct tmem_oid)); 1754 sizeof(struct tmem_oid));
1422 BUG_ON(pagesize != PAGE_SIZE); 1755 BUG_ON(pagesize != PAGE_SIZE);
1423 return zcache_new_pool(0); 1756 return zcache_new_pool(LOCAL_CLIENT, 0);
1424} 1757}
1425 1758
1426static int zcache_cleancache_init_shared_fs(char *uuid, size_t pagesize) 1759static int zcache_cleancache_init_shared_fs(char *uuid, size_t pagesize)
@@ -1429,7 +1762,7 @@ static int zcache_cleancache_init_shared_fs(char *uuid, size_t pagesize)
1429 BUG_ON(sizeof(struct cleancache_filekey) != 1762 BUG_ON(sizeof(struct cleancache_filekey) !=
1430 sizeof(struct tmem_oid)); 1763 sizeof(struct tmem_oid));
1431 BUG_ON(pagesize != PAGE_SIZE); 1764 BUG_ON(pagesize != PAGE_SIZE);
1432 return zcache_new_pool(0); 1765 return zcache_new_pool(LOCAL_CLIENT, 0);
1433} 1766}
1434 1767
1435static struct cleancache_ops zcache_cleancache_ops = { 1768static struct cleancache_ops zcache_cleancache_ops = {
@@ -1483,8 +1816,8 @@ static int zcache_frontswap_put_page(unsigned type, pgoff_t offset,
1483 BUG_ON(!PageLocked(page)); 1816 BUG_ON(!PageLocked(page));
1484 if (likely(ind64 == ind)) { 1817 if (likely(ind64 == ind)) {
1485 local_irq_save(flags); 1818 local_irq_save(flags);
1486 ret = zcache_put_page(zcache_frontswap_poolid, &oid, 1819 ret = zcache_put_page(LOCAL_CLIENT, zcache_frontswap_poolid,
1487 iswiz(ind), page); 1820 &oid, iswiz(ind), page);
1488 local_irq_restore(flags); 1821 local_irq_restore(flags);
1489 } 1822 }
1490 return ret; 1823 return ret;
@@ -1502,8 +1835,8 @@ static int zcache_frontswap_get_page(unsigned type, pgoff_t offset,
1502 1835
1503 BUG_ON(!PageLocked(page)); 1836 BUG_ON(!PageLocked(page));
1504 if (likely(ind64 == ind)) 1837 if (likely(ind64 == ind))
1505 ret = zcache_get_page(zcache_frontswap_poolid, &oid, 1838 ret = zcache_get_page(LOCAL_CLIENT, zcache_frontswap_poolid,
1506 iswiz(ind), page); 1839 &oid, iswiz(ind), page);
1507 return ret; 1840 return ret;
1508} 1841}
1509 1842
@@ -1515,8 +1848,8 @@ static void zcache_frontswap_flush_page(unsigned type, pgoff_t offset)
1515 struct tmem_oid oid = oswiz(type, ind); 1848 struct tmem_oid oid = oswiz(type, ind);
1516 1849
1517 if (likely(ind64 == ind)) 1850 if (likely(ind64 == ind))
1518 (void)zcache_flush_page(zcache_frontswap_poolid, &oid, 1851 (void)zcache_flush_page(LOCAL_CLIENT, zcache_frontswap_poolid,
1519 iswiz(ind)); 1852 &oid, iswiz(ind));
1520} 1853}
1521 1854
1522/* flush all pages from the passed swaptype */ 1855/* flush all pages from the passed swaptype */
@@ -1527,7 +1860,8 @@ static void zcache_frontswap_flush_area(unsigned type)
1527 1860
1528 for (ind = SWIZ_MASK; ind >= 0; ind--) { 1861 for (ind = SWIZ_MASK; ind >= 0; ind--) {
1529 oid = oswiz(type, ind); 1862 oid = oswiz(type, ind);
1530 (void)zcache_flush_object(zcache_frontswap_poolid, &oid); 1863 (void)zcache_flush_object(LOCAL_CLIENT,
1864 zcache_frontswap_poolid, &oid);
1531 } 1865 }
1532} 1866}
1533 1867
@@ -1535,7 +1869,8 @@ static void zcache_frontswap_init(unsigned ignored)
1535{ 1869{
1536 /* a single tmem poolid is used for all frontswap "types" (swapfiles) */ 1870 /* a single tmem poolid is used for all frontswap "types" (swapfiles) */
1537 if (zcache_frontswap_poolid < 0) 1871 if (zcache_frontswap_poolid < 0)
1538 zcache_frontswap_poolid = zcache_new_pool(TMEM_POOL_PERSIST); 1872 zcache_frontswap_poolid =
1873 zcache_new_pool(LOCAL_CLIENT, TMEM_POOL_PERSIST);
1539} 1874}
1540 1875
1541static struct frontswap_ops zcache_frontswap_ops = { 1876static struct frontswap_ops zcache_frontswap_ops = {
@@ -1624,6 +1959,11 @@ static int __init zcache_init(void)
1624 sizeof(struct tmem_objnode), 0, 0, NULL); 1959 sizeof(struct tmem_objnode), 0, 0, NULL);
1625 zcache_obj_cache = kmem_cache_create("zcache_obj", 1960 zcache_obj_cache = kmem_cache_create("zcache_obj",
1626 sizeof(struct tmem_obj), 0, 0, NULL); 1961 sizeof(struct tmem_obj), 0, 0, NULL);
1962 ret = zcache_new_client(LOCAL_CLIENT);
1963 if (ret) {
1964 pr_err("zcache: can't create client\n");
1965 goto out;
1966 }
1627#endif 1967#endif
1628#ifdef CONFIG_CLEANCACHE 1968#ifdef CONFIG_CLEANCACHE
1629 if (zcache_enabled && use_cleancache) { 1969 if (zcache_enabled && use_cleancache) {
@@ -1642,11 +1982,6 @@ static int __init zcache_init(void)
1642 if (zcache_enabled && use_frontswap) { 1982 if (zcache_enabled && use_frontswap) {
1643 struct frontswap_ops old_ops; 1983 struct frontswap_ops old_ops;
1644 1984
1645 zcache_client.xvpool = xv_create_pool();
1646 if (zcache_client.xvpool == NULL) {
1647 pr_err("zcache: can't create xvpool\n");
1648 goto out;
1649 }
1650 old_ops = zcache_frontswap_register_ops(); 1985 old_ops = zcache_frontswap_register_ops();
1651 pr_info("zcache: frontswap enabled using kernel " 1986 pr_info("zcache: frontswap enabled using kernel "
1652 "transcendent memory and xvmalloc\n"); 1987 "transcendent memory and xvmalloc\n");