aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDan Magenheimer <dan.magenheimer@oracle.com>2012-05-10 15:06:21 -0400
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>2012-05-14 16:02:49 -0400
commit49b81a3c7415d9b02169f37b8559d468fd17686a (patch)
treea57e7ae0531e2b68f00850d77b578adbbde91f81
parentc15974ef1f2d858795f7f63280fb53bcab6064f9 (diff)
ramster: switch over to zsmalloc and crypto interface
RAMster does many zcache-like things. In order to avoid major merge conflicts at 3.4, ramster used lzo1x directly for compression and retained a local copy of xvmalloc, while zcache moved to the new zsmalloc allocator and the crypto API. This patch moves ramster forward to use zsmalloc and crypto. Signed-off-by: Dan Magenheimer <dan.magenheimer@oracle.com> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
-rw-r--r--drivers/staging/ramster/Kconfig6
-rw-r--r--drivers/staging/ramster/Makefile2
-rw-r--r--drivers/staging/ramster/TODO4
-rw-r--r--drivers/staging/ramster/xvmalloc.c509
-rw-r--r--drivers/staging/ramster/xvmalloc.h30
-rw-r--r--drivers/staging/ramster/xvmalloc_int.h95
-rw-r--r--drivers/staging/ramster/zcache-main.c288
7 files changed, 199 insertions, 735 deletions
diff --git a/drivers/staging/ramster/Kconfig b/drivers/staging/ramster/Kconfig
index 8349887827dc..98df39c05994 100644
--- a/drivers/staging/ramster/Kconfig
+++ b/drivers/staging/ramster/Kconfig
@@ -1,8 +1,8 @@
1config RAMSTER 1config RAMSTER
2 bool "Cross-machine RAM capacity sharing, aka peer-to-peer tmem" 2 bool "Cross-machine RAM capacity sharing, aka peer-to-peer tmem"
3 depends on (CLEANCACHE || FRONTSWAP) && CONFIGFS_FS=y && !ZCACHE && !XVMALLOC && !HIGHMEM && NET 3 depends on (CLEANCACHE || FRONTSWAP) && CONFIGFS_FS=y && !ZCACHE && CRYPTO=y && !HIGHMEM && NET
4 select LZO_COMPRESS 4 select ZSMALLOC
5 select LZO_DECOMPRESS 5 select CRYPTO_LZO
6 default n 6 default n
7 help 7 help
8 RAMster allows RAM on other machines in a cluster to be utilized 8 RAMster allows RAM on other machines in a cluster to be utilized
diff --git a/drivers/staging/ramster/Makefile b/drivers/staging/ramster/Makefile
index bcc13c87f996..07ffd7584ea9 100644
--- a/drivers/staging/ramster/Makefile
+++ b/drivers/staging/ramster/Makefile
@@ -1 +1 @@
obj-$(CONFIG_RAMSTER) += zcache-main.o tmem.o r2net.o xvmalloc.o cluster/ obj-$(CONFIG_RAMSTER) += zcache-main.o tmem.o r2net.o cluster/
diff --git a/drivers/staging/ramster/TODO b/drivers/staging/ramster/TODO
index 46fcf0c58acf..46882337e1b6 100644
--- a/drivers/staging/ramster/TODO
+++ b/drivers/staging/ramster/TODO
@@ -1,7 +1,5 @@
1For this staging driver, RAMster duplicates code from drivers/staging/zcache 1For this staging driver, RAMster duplicates code from drivers/staging/zcache
2then incorporates changes to the local copy of the code. For V5, it also 2then incorporates changes to the local copy of the code.
3directly incorporates the soon-to-be-removed drivers/staging/zram/xvmalloc.[ch]
4as all testing has been done with xvmalloc rather than the new zsmalloc.
5Before RAMster can be promoted from staging, the zcache and RAMster drivers 3Before RAMster can be promoted from staging, the zcache and RAMster drivers
6should be either merged or reorganized to separate out common code. 4should be either merged or reorganized to separate out common code.
7 5
diff --git a/drivers/staging/ramster/xvmalloc.c b/drivers/staging/ramster/xvmalloc.c
deleted file mode 100644
index 44ceb0b823a9..000000000000
--- a/drivers/staging/ramster/xvmalloc.c
+++ /dev/null
@@ -1,509 +0,0 @@
1/*
2 * xvmalloc memory allocator
3 *
4 * Copyright (C) 2008, 2009, 2010 Nitin Gupta
5 *
6 * This code is released using a dual license strategy: BSD/GPL
7 * You can choose the licence that better fits your requirements.
8 *
9 * Released under the terms of 3-clause BSD License
10 * Released under the terms of GNU General Public License Version 2.0
11 */
12
13#ifdef CONFIG_ZRAM_DEBUG
14#define DEBUG
15#endif
16
17#include <linux/module.h>
18#include <linux/kernel.h>
19#include <linux/bitops.h>
20#include <linux/errno.h>
21#include <linux/highmem.h>
22#include <linux/init.h>
23#include <linux/string.h>
24#include <linux/slab.h>
25
26#include "xvmalloc.h"
27#include "xvmalloc_int.h"
28
29static void stat_inc(u64 *value)
30{
31 *value = *value + 1;
32}
33
34static void stat_dec(u64 *value)
35{
36 *value = *value - 1;
37}
38
39static int test_flag(struct block_header *block, enum blockflags flag)
40{
41 return block->prev & BIT(flag);
42}
43
44static void set_flag(struct block_header *block, enum blockflags flag)
45{
46 block->prev |= BIT(flag);
47}
48
49static void clear_flag(struct block_header *block, enum blockflags flag)
50{
51 block->prev &= ~BIT(flag);
52}
53
54/*
55 * Given <page, offset> pair, provide a dereferencable pointer.
56 * This is called from xv_malloc/xv_free path, so it
57 * needs to be fast.
58 */
59static void *get_ptr_atomic(struct page *page, u16 offset)
60{
61 unsigned char *base;
62
63 base = kmap_atomic(page);
64 return base + offset;
65}
66
67static void put_ptr_atomic(void *ptr)
68{
69 kunmap_atomic(ptr);
70}
71
72static u32 get_blockprev(struct block_header *block)
73{
74 return block->prev & PREV_MASK;
75}
76
77static void set_blockprev(struct block_header *block, u16 new_offset)
78{
79 block->prev = new_offset | (block->prev & FLAGS_MASK);
80}
81
82static struct block_header *BLOCK_NEXT(struct block_header *block)
83{
84 return (struct block_header *)
85 ((char *)block + block->size + XV_ALIGN);
86}
87
88/*
89 * Get index of free list containing blocks of maximum size
90 * which is less than or equal to given size.
91 */
92static u32 get_index_for_insert(u32 size)
93{
94 if (unlikely(size > XV_MAX_ALLOC_SIZE))
95 size = XV_MAX_ALLOC_SIZE;
96 size &= ~FL_DELTA_MASK;
97 return (size - XV_MIN_ALLOC_SIZE) >> FL_DELTA_SHIFT;
98}
99
100/*
101 * Get index of free list having blocks of size greater than
102 * or equal to requested size.
103 */
104static u32 get_index(u32 size)
105{
106 if (unlikely(size < XV_MIN_ALLOC_SIZE))
107 size = XV_MIN_ALLOC_SIZE;
108 size = ALIGN(size, FL_DELTA);
109 return (size - XV_MIN_ALLOC_SIZE) >> FL_DELTA_SHIFT;
110}
111
112/**
113 * find_block - find block of at least given size
114 * @pool: memory pool to search from
115 * @size: size of block required
116 * @page: page containing required block
117 * @offset: offset within the page where block is located.
118 *
119 * Searches two level bitmap to locate block of at least
120 * the given size. If such a block is found, it provides
121 * <page, offset> to identify this block and returns index
122 * in freelist where we found this block.
123 * Otherwise, returns 0 and <page, offset> params are not touched.
124 */
125static u32 find_block(struct xv_pool *pool, u32 size,
126 struct page **page, u32 *offset)
127{
128 ulong flbitmap, slbitmap;
129 u32 flindex, slindex, slbitstart;
130
131 /* There are no free blocks in this pool */
132 if (!pool->flbitmap)
133 return 0;
134
135 /* Get freelist index corresponding to this size */
136 slindex = get_index(size);
137 slbitmap = pool->slbitmap[slindex / BITS_PER_LONG];
138 slbitstart = slindex % BITS_PER_LONG;
139
140 /*
141 * If freelist is not empty at this index, we found the
142 * block - head of this list. This is approximate best-fit match.
143 */
144 if (test_bit(slbitstart, &slbitmap)) {
145 *page = pool->freelist[slindex].page;
146 *offset = pool->freelist[slindex].offset;
147 return slindex;
148 }
149
150 /*
151 * No best-fit found. Search a bit further in bitmap for a free block.
152 * Second level bitmap consists of series of 32-bit chunks. Search
153 * further in the chunk where we expected a best-fit, starting from
154 * index location found above.
155 */
156 slbitstart++;
157 slbitmap >>= slbitstart;
158
159 /* Skip this search if we were already at end of this bitmap chunk */
160 if ((slbitstart != BITS_PER_LONG) && slbitmap) {
161 slindex += __ffs(slbitmap) + 1;
162 *page = pool->freelist[slindex].page;
163 *offset = pool->freelist[slindex].offset;
164 return slindex;
165 }
166
167 /* Now do a full two-level bitmap search to find next nearest fit */
168 flindex = slindex / BITS_PER_LONG;
169
170 flbitmap = (pool->flbitmap) >> (flindex + 1);
171 if (!flbitmap)
172 return 0;
173
174 flindex += __ffs(flbitmap) + 1;
175 slbitmap = pool->slbitmap[flindex];
176 slindex = (flindex * BITS_PER_LONG) + __ffs(slbitmap);
177 *page = pool->freelist[slindex].page;
178 *offset = pool->freelist[slindex].offset;
179
180 return slindex;
181}
182
183/*
184 * Insert block at <page, offset> in freelist of given pool.
185 * freelist used depends on block size.
186 */
187static void insert_block(struct xv_pool *pool, struct page *page, u32 offset,
188 struct block_header *block)
189{
190 u32 flindex, slindex;
191 struct block_header *nextblock;
192
193 slindex = get_index_for_insert(block->size);
194 flindex = slindex / BITS_PER_LONG;
195
196 block->link.prev_page = NULL;
197 block->link.prev_offset = 0;
198 block->link.next_page = pool->freelist[slindex].page;
199 block->link.next_offset = pool->freelist[slindex].offset;
200 pool->freelist[slindex].page = page;
201 pool->freelist[slindex].offset = offset;
202
203 if (block->link.next_page) {
204 nextblock = get_ptr_atomic(block->link.next_page,
205 block->link.next_offset);
206 nextblock->link.prev_page = page;
207 nextblock->link.prev_offset = offset;
208 put_ptr_atomic(nextblock);
209 /* If there was a next page then the free bits are set. */
210 return;
211 }
212
213 __set_bit(slindex % BITS_PER_LONG, &pool->slbitmap[flindex]);
214 __set_bit(flindex, &pool->flbitmap);
215}
216
217/*
218 * Remove block from freelist. Index 'slindex' identifies the freelist.
219 */
220static void remove_block(struct xv_pool *pool, struct page *page, u32 offset,
221 struct block_header *block, u32 slindex)
222{
223 u32 flindex = slindex / BITS_PER_LONG;
224 struct block_header *tmpblock;
225
226 if (block->link.prev_page) {
227 tmpblock = get_ptr_atomic(block->link.prev_page,
228 block->link.prev_offset);
229 tmpblock->link.next_page = block->link.next_page;
230 tmpblock->link.next_offset = block->link.next_offset;
231 put_ptr_atomic(tmpblock);
232 }
233
234 if (block->link.next_page) {
235 tmpblock = get_ptr_atomic(block->link.next_page,
236 block->link.next_offset);
237 tmpblock->link.prev_page = block->link.prev_page;
238 tmpblock->link.prev_offset = block->link.prev_offset;
239 put_ptr_atomic(tmpblock);
240 }
241
242 /* Is this block is at the head of the freelist? */
243 if (pool->freelist[slindex].page == page
244 && pool->freelist[slindex].offset == offset) {
245
246 pool->freelist[slindex].page = block->link.next_page;
247 pool->freelist[slindex].offset = block->link.next_offset;
248
249 if (pool->freelist[slindex].page) {
250 struct block_header *tmpblock;
251 tmpblock = get_ptr_atomic(pool->freelist[slindex].page,
252 pool->freelist[slindex].offset);
253 tmpblock->link.prev_page = NULL;
254 tmpblock->link.prev_offset = 0;
255 put_ptr_atomic(tmpblock);
256 } else {
257 /* This freelist bucket is empty */
258 __clear_bit(slindex % BITS_PER_LONG,
259 &pool->slbitmap[flindex]);
260 if (!pool->slbitmap[flindex])
261 __clear_bit(flindex, &pool->flbitmap);
262 }
263 }
264
265 block->link.prev_page = NULL;
266 block->link.prev_offset = 0;
267 block->link.next_page = NULL;
268 block->link.next_offset = 0;
269}
270
271/*
272 * Allocate a page and add it to freelist of given pool.
273 */
274static int grow_pool(struct xv_pool *pool, gfp_t flags)
275{
276 struct page *page;
277 struct block_header *block;
278
279 page = alloc_page(flags);
280 if (unlikely(!page))
281 return -ENOMEM;
282
283 stat_inc(&pool->total_pages);
284
285 spin_lock(&pool->lock);
286 block = get_ptr_atomic(page, 0);
287
288 block->size = PAGE_SIZE - XV_ALIGN;
289 set_flag(block, BLOCK_FREE);
290 clear_flag(block, PREV_FREE);
291 set_blockprev(block, 0);
292
293 insert_block(pool, page, 0, block);
294
295 put_ptr_atomic(block);
296 spin_unlock(&pool->lock);
297
298 return 0;
299}
300
301/*
302 * Create a memory pool. Allocates freelist, bitmaps and other
303 * per-pool metadata.
304 */
305struct xv_pool *xv_create_pool(void)
306{
307 u32 ovhd_size;
308 struct xv_pool *pool;
309
310 ovhd_size = roundup(sizeof(*pool), PAGE_SIZE);
311 pool = kzalloc(ovhd_size, GFP_KERNEL);
312 if (!pool)
313 return NULL;
314
315 spin_lock_init(&pool->lock);
316
317 return pool;
318}
319EXPORT_SYMBOL_GPL(xv_create_pool);
320
321void xv_destroy_pool(struct xv_pool *pool)
322{
323 kfree(pool);
324}
325EXPORT_SYMBOL_GPL(xv_destroy_pool);
326
327/**
328 * xv_malloc - Allocate block of given size from pool.
329 * @pool: pool to allocate from
330 * @size: size of block to allocate
331 * @page: page no. that holds the object
332 * @offset: location of object within page
333 *
334 * On success, <page, offset> identifies block allocated
335 * and 0 is returned. On failure, <page, offset> is set to
336 * 0 and -ENOMEM is returned.
337 *
338 * Allocation requests with size > XV_MAX_ALLOC_SIZE will fail.
339 */
340int xv_malloc(struct xv_pool *pool, u32 size, struct page **page,
341 u32 *offset, gfp_t flags)
342{
343 int error;
344 u32 index, tmpsize, origsize, tmpoffset;
345 struct block_header *block, *tmpblock;
346
347 *page = NULL;
348 *offset = 0;
349 origsize = size;
350
351 if (unlikely(!size || size > XV_MAX_ALLOC_SIZE))
352 return -ENOMEM;
353
354 size = ALIGN(size, XV_ALIGN);
355
356 spin_lock(&pool->lock);
357
358 index = find_block(pool, size, page, offset);
359
360 if (!*page) {
361 spin_unlock(&pool->lock);
362 if (flags & GFP_NOWAIT)
363 return -ENOMEM;
364 error = grow_pool(pool, flags);
365 if (unlikely(error))
366 return error;
367
368 spin_lock(&pool->lock);
369 index = find_block(pool, size, page, offset);
370 }
371
372 if (!*page) {
373 spin_unlock(&pool->lock);
374 return -ENOMEM;
375 }
376
377 block = get_ptr_atomic(*page, *offset);
378
379 remove_block(pool, *page, *offset, block, index);
380
381 /* Split the block if required */
382 tmpoffset = *offset + size + XV_ALIGN;
383 tmpsize = block->size - size;
384 tmpblock = (struct block_header *)((char *)block + size + XV_ALIGN);
385 if (tmpsize) {
386 tmpblock->size = tmpsize - XV_ALIGN;
387 set_flag(tmpblock, BLOCK_FREE);
388 clear_flag(tmpblock, PREV_FREE);
389
390 set_blockprev(tmpblock, *offset);
391 if (tmpblock->size >= XV_MIN_ALLOC_SIZE)
392 insert_block(pool, *page, tmpoffset, tmpblock);
393
394 if (tmpoffset + XV_ALIGN + tmpblock->size != PAGE_SIZE) {
395 tmpblock = BLOCK_NEXT(tmpblock);
396 set_blockprev(tmpblock, tmpoffset);
397 }
398 } else {
399 /* This block is exact fit */
400 if (tmpoffset != PAGE_SIZE)
401 clear_flag(tmpblock, PREV_FREE);
402 }
403
404 block->size = origsize;
405 clear_flag(block, BLOCK_FREE);
406
407 put_ptr_atomic(block);
408 spin_unlock(&pool->lock);
409
410 *offset += XV_ALIGN;
411
412 return 0;
413}
414EXPORT_SYMBOL_GPL(xv_malloc);
415
416/*
417 * Free block identified with <page, offset>
418 */
419void xv_free(struct xv_pool *pool, struct page *page, u32 offset)
420{
421 void *page_start;
422 struct block_header *block, *tmpblock;
423
424 offset -= XV_ALIGN;
425
426 spin_lock(&pool->lock);
427
428 page_start = get_ptr_atomic(page, 0);
429 block = (struct block_header *)((char *)page_start + offset);
430
431 /* Catch double free bugs */
432 BUG_ON(test_flag(block, BLOCK_FREE));
433
434 block->size = ALIGN(block->size, XV_ALIGN);
435
436 tmpblock = BLOCK_NEXT(block);
437 if (offset + block->size + XV_ALIGN == PAGE_SIZE)
438 tmpblock = NULL;
439
440 /* Merge next block if its free */
441 if (tmpblock && test_flag(tmpblock, BLOCK_FREE)) {
442 /*
443 * Blocks smaller than XV_MIN_ALLOC_SIZE
444 * are not inserted in any free list.
445 */
446 if (tmpblock->size >= XV_MIN_ALLOC_SIZE) {
447 remove_block(pool, page,
448 offset + block->size + XV_ALIGN, tmpblock,
449 get_index_for_insert(tmpblock->size));
450 }
451 block->size += tmpblock->size + XV_ALIGN;
452 }
453
454 /* Merge previous block if its free */
455 if (test_flag(block, PREV_FREE)) {
456 tmpblock = (struct block_header *)((char *)(page_start) +
457 get_blockprev(block));
458 offset = offset - tmpblock->size - XV_ALIGN;
459
460 if (tmpblock->size >= XV_MIN_ALLOC_SIZE)
461 remove_block(pool, page, offset, tmpblock,
462 get_index_for_insert(tmpblock->size));
463
464 tmpblock->size += block->size + XV_ALIGN;
465 block = tmpblock;
466 }
467
468 /* No used objects in this page. Free it. */
469 if (block->size == PAGE_SIZE - XV_ALIGN) {
470 put_ptr_atomic(page_start);
471 spin_unlock(&pool->lock);
472
473 __free_page(page);
474 stat_dec(&pool->total_pages);
475 return;
476 }
477
478 set_flag(block, BLOCK_FREE);
479 if (block->size >= XV_MIN_ALLOC_SIZE)
480 insert_block(pool, page, offset, block);
481
482 if (offset + block->size + XV_ALIGN != PAGE_SIZE) {
483 tmpblock = BLOCK_NEXT(block);
484 set_flag(tmpblock, PREV_FREE);
485 set_blockprev(tmpblock, offset);
486 }
487
488 put_ptr_atomic(page_start);
489 spin_unlock(&pool->lock);
490}
491EXPORT_SYMBOL_GPL(xv_free);
492
493u32 xv_get_object_size(void *obj)
494{
495 struct block_header *blk;
496
497 blk = (struct block_header *)((char *)(obj) - XV_ALIGN);
498 return blk->size;
499}
500EXPORT_SYMBOL_GPL(xv_get_object_size);
501
502/*
503 * Returns total memory used by allocator (userdata + metadata)
504 */
505u64 xv_get_total_size_bytes(struct xv_pool *pool)
506{
507 return pool->total_pages << PAGE_SHIFT;
508}
509EXPORT_SYMBOL_GPL(xv_get_total_size_bytes);
diff --git a/drivers/staging/ramster/xvmalloc.h b/drivers/staging/ramster/xvmalloc.h
deleted file mode 100644
index 5b1a81aa5faf..000000000000
--- a/drivers/staging/ramster/xvmalloc.h
+++ /dev/null
@@ -1,30 +0,0 @@
1/*
2 * xvmalloc memory allocator
3 *
4 * Copyright (C) 2008, 2009, 2010 Nitin Gupta
5 *
6 * This code is released using a dual license strategy: BSD/GPL
7 * You can choose the licence that better fits your requirements.
8 *
9 * Released under the terms of 3-clause BSD License
10 * Released under the terms of GNU General Public License Version 2.0
11 */
12
13#ifndef _XV_MALLOC_H_
14#define _XV_MALLOC_H_
15
16#include <linux/types.h>
17
18struct xv_pool;
19
20struct xv_pool *xv_create_pool(void);
21void xv_destroy_pool(struct xv_pool *pool);
22
23int xv_malloc(struct xv_pool *pool, u32 size, struct page **page,
24 u32 *offset, gfp_t flags);
25void xv_free(struct xv_pool *pool, struct page *page, u32 offset);
26
27u32 xv_get_object_size(void *obj);
28u64 xv_get_total_size_bytes(struct xv_pool *pool);
29
30#endif
diff --git a/drivers/staging/ramster/xvmalloc_int.h b/drivers/staging/ramster/xvmalloc_int.h
deleted file mode 100644
index b5f1f7febcf6..000000000000
--- a/drivers/staging/ramster/xvmalloc_int.h
+++ /dev/null
@@ -1,95 +0,0 @@
1/*
2 * xvmalloc memory allocator
3 *
4 * Copyright (C) 2008, 2009, 2010 Nitin Gupta
5 *
6 * This code is released using a dual license strategy: BSD/GPL
7 * You can choose the licence that better fits your requirements.
8 *
9 * Released under the terms of 3-clause BSD License
10 * Released under the terms of GNU General Public License Version 2.0
11 */
12
13#ifndef _XV_MALLOC_INT_H_
14#define _XV_MALLOC_INT_H_
15
16#include <linux/kernel.h>
17#include <linux/types.h>
18
19/* User configurable params */
20
21/* Must be power of two */
22#ifdef CONFIG_64BIT
23#define XV_ALIGN_SHIFT 3
24#else
25#define XV_ALIGN_SHIFT 2
26#endif
27#define XV_ALIGN (1 << XV_ALIGN_SHIFT)
28#define XV_ALIGN_MASK (XV_ALIGN - 1)
29
30/* This must be greater than sizeof(link_free) */
31#define XV_MIN_ALLOC_SIZE 32
32#define XV_MAX_ALLOC_SIZE (PAGE_SIZE - XV_ALIGN)
33
34/*
35 * Free lists are separated by FL_DELTA bytes
36 * This value is 3 for 4k pages and 4 for 64k pages, for any
37 * other page size, a conservative (PAGE_SHIFT - 9) is used.
38 */
39#if PAGE_SHIFT == 16
40#define FL_DELTA_SHIFT 4
41#else
42#define FL_DELTA_SHIFT (PAGE_SHIFT - 9)
43#endif
44#define FL_DELTA (1 << FL_DELTA_SHIFT)
45#define FL_DELTA_MASK (FL_DELTA - 1)
46#define NUM_FREE_LISTS ((XV_MAX_ALLOC_SIZE - XV_MIN_ALLOC_SIZE) \
47 / FL_DELTA + 1)
48
49#define MAX_FLI DIV_ROUND_UP(NUM_FREE_LISTS, BITS_PER_LONG)
50
51/* End of user params */
52
53enum blockflags {
54 BLOCK_FREE,
55 PREV_FREE,
56 __NR_BLOCKFLAGS,
57};
58
59#define FLAGS_MASK XV_ALIGN_MASK
60#define PREV_MASK (~FLAGS_MASK)
61
62struct freelist_entry {
63 struct page *page;
64 u16 offset;
65 u16 pad;
66};
67
68struct link_free {
69 struct page *prev_page;
70 struct page *next_page;
71 u16 prev_offset;
72 u16 next_offset;
73};
74
75struct block_header {
76 union {
77 /* This common header must be XV_ALIGN bytes */
78 u8 common[XV_ALIGN];
79 struct {
80 u16 size;
81 u16 prev;
82 };
83 };
84 struct link_free link;
85};
86
87struct xv_pool {
88 ulong flbitmap;
89 ulong slbitmap[MAX_FLI];
90 u64 total_pages; /* stats */
91 struct freelist_entry freelist[NUM_FREE_LISTS];
92 spinlock_t lock;
93};
94
95#endif
diff --git a/drivers/staging/ramster/zcache-main.c b/drivers/staging/ramster/zcache-main.c
index 4e7ef0e6b79c..225e3b32de33 100644
--- a/drivers/staging/ramster/zcache-main.c
+++ b/drivers/staging/ramster/zcache-main.c
@@ -6,9 +6,10 @@
6 * 6 *
7 * Zcache provides an in-kernel "host implementation" for transcendent memory 7 * Zcache provides an in-kernel "host implementation" for transcendent memory
8 * and, thus indirectly, for cleancache and frontswap. Zcache includes two 8 * and, thus indirectly, for cleancache and frontswap. Zcache includes two
9 * page-accessible memory [1] interfaces, both utilizing lzo1x compression: 9 * page-accessible memory [1] interfaces, both utilizing the crypto compression
10 * API:
10 * 1) "compression buddies" ("zbud") is used for ephemeral pages 11 * 1) "compression buddies" ("zbud") is used for ephemeral pages
11 * 2) xvmalloc is used for persistent pages. 12 * 2) zsmalloc is used for persistent pages.
12 * Xvmalloc (based on the TLSF allocator) has very low fragmentation 13 * Xvmalloc (based on the TLSF allocator) has very low fragmentation
13 * so maximizes space efficiency, while zbud allows pairs (and potentially, 14 * so maximizes space efficiency, while zbud allows pairs (and potentially,
14 * in the future, more than a pair of) compressed pages to be closely linked 15 * in the future, more than a pair of) compressed pages to be closely linked
@@ -26,18 +27,19 @@
26#include <linux/cpu.h> 27#include <linux/cpu.h>
27#include <linux/highmem.h> 28#include <linux/highmem.h>
28#include <linux/list.h> 29#include <linux/list.h>
29#include <linux/lzo.h>
30#include <linux/slab.h> 30#include <linux/slab.h>
31#include <linux/spinlock.h> 31#include <linux/spinlock.h>
32#include <linux/types.h> 32#include <linux/types.h>
33#include <linux/atomic.h> 33#include <linux/atomic.h>
34#include <linux/math64.h> 34#include <linux/math64.h>
35#include <linux/crypto.h>
36#include <linux/string.h>
35#include "tmem.h" 37#include "tmem.h"
36#include "zcache.h" 38#include "zcache.h"
37#include "ramster.h" 39#include "ramster.h"
38#include "cluster/tcp.h" 40#include "cluster/tcp.h"
39 41
40#include "xvmalloc.h" /* temporary until change to zsmalloc */ 42#include "../zsmalloc/zsmalloc.h"
41 43
42#define RAMSTER_TESTING 44#define RAMSTER_TESTING
43 45
@@ -88,6 +90,7 @@ struct zv_hdr {
88 uint16_t pool_id; 90 uint16_t pool_id;
89 struct tmem_oid oid; 91 struct tmem_oid oid;
90 uint32_t index; 92 uint32_t index;
93 size_t size;
91 DECL_SENTINEL 94 DECL_SENTINEL
92}; 95};
93 96
@@ -123,7 +126,7 @@ MODULE_LICENSE("GPL");
123 126
124struct zcache_client { 127struct zcache_client {
125 struct tmem_pool *tmem_pools[MAX_POOLS_PER_CLIENT]; 128 struct tmem_pool *tmem_pools[MAX_POOLS_PER_CLIENT];
126 struct xv_pool *xvpool; 129 struct zs_pool *zspool;
127 bool allocated; 130 bool allocated;
128 atomic_t refcount; 131 atomic_t refcount;
129}; 132};
@@ -144,6 +147,38 @@ static inline bool is_local_client(struct zcache_client *cli)
144 return cli == &zcache_host; 147 return cli == &zcache_host;
145} 148}
146 149
150/* crypto API for zcache */
151#define ZCACHE_COMP_NAME_SZ CRYPTO_MAX_ALG_NAME
152static char zcache_comp_name[ZCACHE_COMP_NAME_SZ];
153static struct crypto_comp * __percpu *zcache_comp_pcpu_tfms;
154
155enum comp_op {
156 ZCACHE_COMPOP_COMPRESS,
157 ZCACHE_COMPOP_DECOMPRESS
158};
159
160static inline int zcache_comp_op(enum comp_op op,
161 const u8 *src, unsigned int slen,
162 u8 *dst, unsigned int *dlen)
163{
164 struct crypto_comp *tfm;
165 int ret;
166
167 BUG_ON(!zcache_comp_pcpu_tfms);
168 tfm = *per_cpu_ptr(zcache_comp_pcpu_tfms, get_cpu());
169 BUG_ON(!tfm);
170 switch (op) {
171 case ZCACHE_COMPOP_COMPRESS:
172 ret = crypto_comp_compress(tfm, src, slen, dst, dlen);
173 break;
174 case ZCACHE_COMPOP_DECOMPRESS:
175 ret = crypto_comp_decompress(tfm, src, slen, dst, dlen);
176 break;
177 }
178 put_cpu();
179 return ret;
180}
181
147/********** 182/**********
148 * Compression buddies ("zbud") provides for packing two (or, possibly 183 * Compression buddies ("zbud") provides for packing two (or, possibly
149 * in the future, more) compressed ephemeral pages into a single "raw" 184 * in the future, more) compressed ephemeral pages into a single "raw"
@@ -374,11 +409,13 @@ static void zbud_free_and_delist(struct zbud_hdr *zh)
374 /* FIXME, should be BUG_ON, pool destruction path doesn't disable 409 /* FIXME, should be BUG_ON, pool destruction path doesn't disable
375 * interrupts tmem_destroy_pool()->tmem_pampd_destroy_all_in_obj()-> 410 * interrupts tmem_destroy_pool()->tmem_pampd_destroy_all_in_obj()->
376 * tmem_objnode_node_destroy()-> zcache_pampd_free() */ 411 * tmem_objnode_node_destroy()-> zcache_pampd_free() */
377 WARN_ON(!irqs_disabled()); 412 /* WARN_ON(!irqs_disabled()); FIXME for now, just avoid spew */
413 spin_lock(&zbud_budlists_spinlock);
378 spin_lock(&zbpg->lock); 414 spin_lock(&zbpg->lock);
379 if (list_empty(&zbpg->bud_list)) { 415 if (list_empty(&zbpg->bud_list)) {
380 /* ignore zombie page... see zbud_evict_pages() */ 416 /* ignore zombie page... see zbud_evict_pages() */
381 spin_unlock(&zbpg->lock); 417 spin_unlock(&zbpg->lock);
418 spin_unlock(&zbud_budlists_spinlock);
382 return; 419 return;
383 } 420 }
384 size = zbud_free(zh); 421 size = zbud_free(zh);
@@ -386,7 +423,6 @@ static void zbud_free_and_delist(struct zbud_hdr *zh)
386 zh_other = &zbpg->buddy[(budnum == 0) ? 1 : 0]; 423 zh_other = &zbpg->buddy[(budnum == 0) ? 1 : 0];
387 if (zh_other->size == 0) { /* was unbuddied: unlist and free */ 424 if (zh_other->size == 0) { /* was unbuddied: unlist and free */
388 chunks = zbud_size_to_chunks(size) ; 425 chunks = zbud_size_to_chunks(size) ;
389 spin_lock(&zbud_budlists_spinlock);
390 BUG_ON(list_empty(&zbud_unbuddied[chunks].list)); 426 BUG_ON(list_empty(&zbud_unbuddied[chunks].list));
391 list_del_init(&zbpg->bud_list); 427 list_del_init(&zbpg->bud_list);
392 zbud_unbuddied[chunks].count--; 428 zbud_unbuddied[chunks].count--;
@@ -394,13 +430,12 @@ static void zbud_free_and_delist(struct zbud_hdr *zh)
394 zbud_free_raw_page(zbpg); 430 zbud_free_raw_page(zbpg);
395 } else { /* was buddied: move remaining buddy to unbuddied list */ 431 } else { /* was buddied: move remaining buddy to unbuddied list */
396 chunks = zbud_size_to_chunks(zh_other->size) ; 432 chunks = zbud_size_to_chunks(zh_other->size) ;
397 spin_lock(&zbud_budlists_spinlock);
398 list_del_init(&zbpg->bud_list); 433 list_del_init(&zbpg->bud_list);
399 zcache_zbud_buddied_count--; 434 zcache_zbud_buddied_count--;
400 list_add_tail(&zbpg->bud_list, &zbud_unbuddied[chunks].list); 435 list_add_tail(&zbpg->bud_list, &zbud_unbuddied[chunks].list);
401 zbud_unbuddied[chunks].count++; 436 zbud_unbuddied[chunks].count++;
402 spin_unlock(&zbud_budlists_spinlock);
403 spin_unlock(&zbpg->lock); 437 spin_unlock(&zbpg->lock);
438 spin_unlock(&zbud_budlists_spinlock);
404 } 439 }
405} 440}
406 441
@@ -469,6 +504,7 @@ init_zh:
469 memcpy(to, cdata, size); 504 memcpy(to, cdata, size);
470 spin_unlock(&zbpg->lock); 505 spin_unlock(&zbpg->lock);
471 spin_unlock(&zbud_budlists_spinlock); 506 spin_unlock(&zbud_budlists_spinlock);
507
472 zbud_cumul_chunk_counts[nchunks]++; 508 zbud_cumul_chunk_counts[nchunks]++;
473 atomic_inc(&zcache_zbud_curr_zpages); 509 atomic_inc(&zcache_zbud_curr_zpages);
474 zcache_zbud_cumul_zpages++; 510 zcache_zbud_cumul_zpages++;
@@ -482,7 +518,7 @@ static int zbud_decompress(struct page *page, struct zbud_hdr *zh)
482{ 518{
483 struct zbud_page *zbpg; 519 struct zbud_page *zbpg;
484 unsigned budnum = zbud_budnum(zh); 520 unsigned budnum = zbud_budnum(zh);
485 size_t out_len = PAGE_SIZE; 521 unsigned int out_len = PAGE_SIZE;
486 char *to_va, *from_va; 522 char *to_va, *from_va;
487 unsigned size; 523 unsigned size;
488 int ret = 0; 524 int ret = 0;
@@ -499,8 +535,9 @@ static int zbud_decompress(struct page *page, struct zbud_hdr *zh)
499 to_va = kmap_atomic(page); 535 to_va = kmap_atomic(page);
500 size = zh->size; 536 size = zh->size;
501 from_va = zbud_data(zh, size); 537 from_va = zbud_data(zh, size);
502 ret = lzo1x_decompress_safe(from_va, size, to_va, &out_len); 538 ret = zcache_comp_op(ZCACHE_COMPOP_DECOMPRESS, from_va, size,
503 BUG_ON(ret != LZO_E_OK); 539 to_va, &out_len);
540 BUG_ON(ret);
504 BUG_ON(out_len != PAGE_SIZE); 541 BUG_ON(out_len != PAGE_SIZE);
505 kunmap_atomic(to_va); 542 kunmap_atomic(to_va);
506out: 543out:
@@ -861,7 +898,7 @@ static void zcache_remote_pers_put(struct zv_hdr *zv)
861 xh.pool_id = zv->pool_id; 898 xh.pool_id = zv->pool_id;
862 xh.oid = zv->oid; 899 xh.oid = zv->oid;
863 xh.index = zv->index; 900 xh.index = zv->index;
864 size = xv_get_object_size(zv) - sizeof(*zv); 901 size = zv->size;
865 BUG_ON(size == 0 || size > zv_max_page_size); 902 BUG_ON(size == 0 || size > zv_max_page_size);
866 data = (char *)zv + sizeof(*zv); 903 data = (char *)zv + sizeof(*zv);
867 for (p = data, cksum = 0, i = 0; i < size; i++) 904 for (p = data, cksum = 0, i = 0; i < size; i++)
@@ -1063,8 +1100,8 @@ static int zbud_show_cumul_chunk_counts(char *buf)
1063#endif 1100#endif
1064 1101
1065/********** 1102/**********
1066 * This "zv" PAM implementation combines the TLSF-based xvMalloc 1103 * This "zv" PAM implementation combines the slab-based zsmalloc
1067 * with lzo1x compression to maximize the amount of data that can 1104 * with the crypto compression API to maximize the amount of data that can
1068 * be packed into a physical page. 1105 * be packed into a physical page.
1069 * 1106 *
1070 * Zv represents a PAM page with the index and object (plus a "size" value 1107 * Zv represents a PAM page with the index and object (plus a "size" value
@@ -1094,26 +1131,23 @@ static struct zv_hdr *zv_create(struct zcache_client *cli, uint32_t pool_id,
1094 struct tmem_oid *oid, uint32_t index, 1131 struct tmem_oid *oid, uint32_t index,
1095 void *cdata, unsigned clen) 1132 void *cdata, unsigned clen)
1096{ 1133{
1097 struct page *page; 1134 struct zv_hdr *zv;
1098 struct zv_hdr *zv = NULL; 1135 int size = clen + sizeof(struct zv_hdr);
1099 uint32_t offset; 1136 int chunks = (size + (CHUNK_SIZE - 1)) >> CHUNK_SHIFT;
1100 int alloc_size = clen + sizeof(struct zv_hdr); 1137 void *handle = NULL;
1101 int chunks = (alloc_size + (CHUNK_SIZE - 1)) >> CHUNK_SHIFT;
1102 int ret;
1103 1138
1104 BUG_ON(!irqs_disabled()); 1139 BUG_ON(!irqs_disabled());
1105 BUG_ON(chunks >= NCHUNKS); 1140 BUG_ON(chunks >= NCHUNKS);
1106 ret = xv_malloc(cli->xvpool, clen + sizeof(struct zv_hdr), 1141 handle = zs_malloc(cli->zspool, size);
1107 &page, &offset, ZCACHE_GFP_MASK); 1142 if (!handle)
1108 if (unlikely(ret))
1109 goto out; 1143 goto out;
1110 atomic_inc(&zv_curr_dist_counts[chunks]); 1144 atomic_inc(&zv_curr_dist_counts[chunks]);
1111 atomic_inc(&zv_cumul_dist_counts[chunks]); 1145 atomic_inc(&zv_cumul_dist_counts[chunks]);
1112 zv = kmap_atomic(page) + offset; 1146 zv = zs_map_object(cli->zspool, handle);
1113 zv->index = index; 1147 zv->index = index;
1114 zv->oid = *oid; 1148 zv->oid = *oid;
1115 zv->pool_id = pool_id; 1149 zv->pool_id = pool_id;
1116 SET_SENTINEL(zv, ZVH); 1150 zv->size = clen;
1117 INIT_LIST_HEAD(&zv->rem_op.list); 1151 INIT_LIST_HEAD(&zv->rem_op.list);
1118 zv->client_id = get_client_id_from_client(cli); 1152 zv->client_id = get_client_id_from_client(cli);
1119 zv->rem_op.op = RAMSTER_REMOTIFY_PERS_PUT; 1153 zv->rem_op.op = RAMSTER_REMOTIFY_PERS_PUT;
@@ -1122,10 +1156,11 @@ static struct zv_hdr *zv_create(struct zcache_client *cli, uint32_t pool_id,
1122 list_add_tail(&zv->rem_op.list, &zcache_rem_op_list); 1156 list_add_tail(&zv->rem_op.list, &zcache_rem_op_list);
1123 spin_unlock(&zcache_rem_op_list_lock); 1157 spin_unlock(&zcache_rem_op_list_lock);
1124 } 1158 }
1159 SET_SENTINEL(zv, ZVH);
1125 memcpy((char *)zv + sizeof(struct zv_hdr), cdata, clen); 1160 memcpy((char *)zv + sizeof(struct zv_hdr), cdata, clen);
1126 kunmap_atomic(zv); 1161 zs_unmap_object(cli->zspool, handle);
1127out: 1162out:
1128 return zv; 1163 return handle;
1129} 1164}
1130 1165
1131/* similar to zv_create, but just reserve space, no data yet */ 1166/* similar to zv_create, but just reserve space, no data yet */
@@ -1134,71 +1169,74 @@ static struct zv_hdr *zv_alloc(struct tmem_pool *pool,
1134 unsigned clen) 1169 unsigned clen)
1135{ 1170{
1136 struct zcache_client *cli = pool->client; 1171 struct zcache_client *cli = pool->client;
1137 struct page *page; 1172 struct zv_hdr *zv;
1138 struct zv_hdr *zv = NULL; 1173 int size = clen + sizeof(struct zv_hdr);
1139 uint32_t offset; 1174 void *handle = NULL;
1140 int ret;
1141 1175
1142 BUG_ON(!irqs_disabled()); 1176 BUG_ON(!irqs_disabled());
1143 BUG_ON(!is_local_client(pool->client)); 1177 BUG_ON(!is_local_client(pool->client));
1144 ret = xv_malloc(cli->xvpool, clen + sizeof(struct zv_hdr), 1178 handle = zs_malloc(cli->zspool, size);
1145 &page, &offset, ZCACHE_GFP_MASK); 1179 if (!handle)
1146 if (unlikely(ret))
1147 goto out; 1180 goto out;
1148 zv = kmap_atomic(page) + offset; 1181 zv = zs_map_object(cli->zspool, handle);
1149 SET_SENTINEL(zv, ZVH);
1150 INIT_LIST_HEAD(&zv->rem_op.list); 1182 INIT_LIST_HEAD(&zv->rem_op.list);
1151 zv->client_id = LOCAL_CLIENT; 1183 zv->client_id = LOCAL_CLIENT;
1152 zv->rem_op.op = RAMSTER_INTRANSIT_PERS; 1184 zv->rem_op.op = RAMSTER_INTRANSIT_PERS;
1153 zv->index = index; 1185 zv->index = index;
1154 zv->oid = *oid; 1186 zv->oid = *oid;
1155 zv->pool_id = pool->pool_id; 1187 zv->pool_id = pool->pool_id;
1156 kunmap_atomic(zv); 1188 zv->size = clen;
1189 SET_SENTINEL(zv, ZVH);
1190 zs_unmap_object(cli->zspool, handle);
1157out: 1191out:
1158 return zv; 1192 return handle;
1159} 1193}
1160 1194
1161static void zv_free(struct xv_pool *xvpool, struct zv_hdr *zv) 1195static void zv_free(struct zs_pool *pool, void *handle)
1162{ 1196{
1163 unsigned long flags; 1197 unsigned long flags;
1164 struct page *page; 1198 struct zv_hdr *zv;
1165 uint32_t offset; 1199 uint16_t size;
1166 uint16_t size = xv_get_object_size(zv); 1200 int chunks;
1167 int chunks = (size + (CHUNK_SIZE - 1)) >> CHUNK_SHIFT;
1168 1201
1202 zv = zs_map_object(pool, handle);
1169 ASSERT_SENTINEL(zv, ZVH); 1203 ASSERT_SENTINEL(zv, ZVH);
1204 size = zv->size + sizeof(struct zv_hdr);
1205 INVERT_SENTINEL(zv, ZVH);
1206
1207 chunks = (size + (CHUNK_SIZE - 1)) >> CHUNK_SHIFT;
1170 BUG_ON(chunks >= NCHUNKS); 1208 BUG_ON(chunks >= NCHUNKS);
1171 atomic_dec(&zv_curr_dist_counts[chunks]); 1209 atomic_dec(&zv_curr_dist_counts[chunks]);
1172 size -= sizeof(*zv);
1173 spin_lock(&zcache_rem_op_list_lock); 1210 spin_lock(&zcache_rem_op_list_lock);
1174 size = xv_get_object_size(zv) - sizeof(*zv);
1175 BUG_ON(size == 0); 1211 BUG_ON(size == 0);
1176 INVERT_SENTINEL(zv, ZVH);
1177 if (!list_empty(&zv->rem_op.list)) 1212 if (!list_empty(&zv->rem_op.list))
1178 list_del_init(&zv->rem_op.list); 1213 list_del_init(&zv->rem_op.list);
1179 spin_unlock(&zcache_rem_op_list_lock); 1214 spin_unlock(&zcache_rem_op_list_lock);
1180 page = virt_to_page(zv); 1215 zs_unmap_object(pool, handle);
1181 offset = (unsigned long)zv & ~PAGE_MASK; 1216
1182 local_irq_save(flags); 1217 local_irq_save(flags);
1183 xv_free(xvpool, page, offset); 1218 zs_free(pool, handle);
1184 local_irq_restore(flags); 1219 local_irq_restore(flags);
1185} 1220}
1186 1221
1187static void zv_decompress(struct page *page, struct zv_hdr *zv) 1222static void zv_decompress(struct tmem_pool *pool,
1223 struct page *page, void *handle)
1188{ 1224{
1189 size_t clen = PAGE_SIZE; 1225 unsigned int clen = PAGE_SIZE;
1190 char *to_va; 1226 char *to_va;
1191 unsigned size;
1192 int ret; 1227 int ret;
1228 struct zv_hdr *zv;
1229 struct zcache_client *cli = pool->client;
1193 1230
1231 zv = zs_map_object(cli->zspool, handle);
1232 BUG_ON(zv->size == 0);
1194 ASSERT_SENTINEL(zv, ZVH); 1233 ASSERT_SENTINEL(zv, ZVH);
1195 size = xv_get_object_size(zv) - sizeof(*zv);
1196 BUG_ON(size == 0);
1197 to_va = kmap_atomic(page); 1234 to_va = kmap_atomic(page);
1198 ret = lzo1x_decompress_safe((char *)zv + sizeof(*zv), 1235 ret = zcache_comp_op(ZCACHE_COMPOP_DECOMPRESS, (char *)zv + sizeof(*zv),
1199 size, to_va, &clen); 1236 zv->size, to_va, &clen);
1200 kunmap_atomic(to_va); 1237 kunmap_atomic(to_va);
1201 BUG_ON(ret != LZO_E_OK); 1238 zs_unmap_object(cli->zspool, handle);
1239 BUG_ON(ret);
1202 BUG_ON(clen != PAGE_SIZE); 1240 BUG_ON(clen != PAGE_SIZE);
1203} 1241}
1204 1242
@@ -1207,7 +1245,7 @@ static void zv_copy_from_pampd(char *data, size_t *bufsize, struct zv_hdr *zv)
1207 unsigned size; 1245 unsigned size;
1208 1246
1209 ASSERT_SENTINEL(zv, ZVH); 1247 ASSERT_SENTINEL(zv, ZVH);
1210 size = xv_get_object_size(zv) - sizeof(*zv); 1248 size = zv->size;
1211 BUG_ON(size == 0 || size > zv_max_page_size); 1249 BUG_ON(size == 0 || size > zv_max_page_size);
1212 BUG_ON(size > *bufsize); 1250 BUG_ON(size > *bufsize);
1213 memcpy(data, (char *)zv + sizeof(*zv), size); 1251 memcpy(data, (char *)zv + sizeof(*zv), size);
@@ -1219,7 +1257,7 @@ static void zv_copy_to_pampd(struct zv_hdr *zv, char *data, size_t size)
1219 unsigned zv_size; 1257 unsigned zv_size;
1220 1258
1221 ASSERT_SENTINEL(zv, ZVH); 1259 ASSERT_SENTINEL(zv, ZVH);
1222 zv_size = xv_get_object_size(zv) - sizeof(*zv); 1260 zv_size = zv->size;
1223 BUG_ON(zv_size != size); 1261 BUG_ON(zv_size != size);
1224 BUG_ON(zv_size == 0 || zv_size > zv_max_page_size); 1262 BUG_ON(zv_size == 0 || zv_size > zv_max_page_size);
1225 memcpy((char *)zv + sizeof(*zv), data, size); 1263 memcpy((char *)zv + sizeof(*zv), data, size);
@@ -1448,8 +1486,8 @@ int zcache_new_client(uint16_t cli_id)
1448 goto out; 1486 goto out;
1449 cli->allocated = 1; 1487 cli->allocated = 1;
1450#ifdef CONFIG_FRONTSWAP 1488#ifdef CONFIG_FRONTSWAP
1451 cli->xvpool = xv_create_pool(); 1489 cli->zspool = zs_create_pool("zcache", ZCACHE_GFP_MASK);
1452 if (cli->xvpool == NULL) 1490 if (cli->zspool == NULL)
1453 goto out; 1491 goto out;
1454#endif 1492#endif
1455 ret = 0; 1493 ret = 0;
@@ -1701,7 +1739,7 @@ static atomic_t zcache_curr_pers_pampd_count = ATOMIC_INIT(0);
1701static unsigned long zcache_curr_pers_pampd_count_max; 1739static unsigned long zcache_curr_pers_pampd_count_max;
1702 1740
1703/* forward reference */ 1741/* forward reference */
1704static int zcache_compress(struct page *from, void **out_va, size_t *out_len); 1742static int zcache_compress(struct page *from, void **out_va, unsigned *out_len);
1705 1743
1706static int zcache_pampd_eph_create(char *data, size_t size, bool raw, 1744static int zcache_pampd_eph_create(char *data, size_t size, bool raw,
1707 struct tmem_pool *pool, struct tmem_oid *oid, 1745 struct tmem_pool *pool, struct tmem_oid *oid,
@@ -1709,7 +1747,7 @@ static int zcache_pampd_eph_create(char *data, size_t size, bool raw,
1709{ 1747{
1710 int ret = -1; 1748 int ret = -1;
1711 void *cdata = data; 1749 void *cdata = data;
1712 size_t clen = size; 1750 unsigned int clen = size;
1713 struct zcache_client *cli = pool->client; 1751 struct zcache_client *cli = pool->client;
1714 uint16_t client_id = get_client_id_from_client(cli); 1752 uint16_t client_id = get_client_id_from_client(cli);
1715 struct page *page = NULL; 1753 struct page *page = NULL;
@@ -1750,7 +1788,7 @@ static int zcache_pampd_pers_create(char *data, size_t size, bool raw,
1750{ 1788{
1751 int ret = -1; 1789 int ret = -1;
1752 void *cdata = data; 1790 void *cdata = data;
1753 size_t clen = size; 1791 unsigned int clen = size;
1754 struct zcache_client *cli = pool->client; 1792 struct zcache_client *cli = pool->client;
1755 struct page *page; 1793 struct page *page;
1756 unsigned long count; 1794 unsigned long count;
@@ -1788,7 +1826,7 @@ static int zcache_pampd_pers_create(char *data, size_t size, bool raw,
1788 } 1826 }
1789 /* reject if mean compression is too poor */ 1827 /* reject if mean compression is too poor */
1790 if ((clen > zv_max_mean_zsize) && (curr_pers_pampd_count > 0)) { 1828 if ((clen > zv_max_mean_zsize) && (curr_pers_pampd_count > 0)) {
1791 total_zsize = xv_get_total_size_bytes(cli->xvpool); 1829 total_zsize = zs_get_total_size_bytes(cli->zspool);
1792 zv_mean_zsize = div_u64(total_zsize, curr_pers_pampd_count); 1830 zv_mean_zsize = div_u64(total_zsize, curr_pers_pampd_count);
1793 if (zv_mean_zsize > zv_max_mean_zsize) { 1831 if (zv_mean_zsize > zv_max_mean_zsize) {
1794 zcache_mean_compress_poor++; 1832 zcache_mean_compress_poor++;
@@ -1851,7 +1889,7 @@ static int zcache_pampd_get_data(char *data, size_t *bufsize, bool raw,
1851 if (raw) 1889 if (raw)
1852 zv_copy_from_pampd(data, bufsize, pampd); 1890 zv_copy_from_pampd(data, bufsize, pampd);
1853 else 1891 else
1854 zv_decompress(virt_to_page(data), pampd); 1892 zv_decompress(pool, virt_to_page(data), pampd);
1855 return ret; 1893 return ret;
1856} 1894}
1857 1895
@@ -1882,8 +1920,8 @@ static int zcache_pampd_get_data_and_free(char *data, size_t *bufsize, bool raw,
1882 if (raw) 1920 if (raw)
1883 zv_copy_from_pampd(data, bufsize, pampd); 1921 zv_copy_from_pampd(data, bufsize, pampd);
1884 else 1922 else
1885 zv_decompress(virt_to_page(data), pampd); 1923 zv_decompress(pool, virt_to_page(data), pampd);
1886 zv_free(cli->xvpool, pampd); 1924 zv_free(cli->zspool, pampd);
1887 if (!is_local_client(cli)) 1925 if (!is_local_client(cli))
1888 dec_and_check(&ramster_foreign_pers_pampd_count); 1926 dec_and_check(&ramster_foreign_pers_pampd_count);
1889 dec_and_check(&zcache_curr_pers_pampd_count); 1927 dec_and_check(&zcache_curr_pers_pampd_count);
@@ -1951,7 +1989,7 @@ local_pers:
1951 zv = (struct zv_hdr *)pampd; 1989 zv = (struct zv_hdr *)pampd;
1952 if (!is_local_client(pool->client)) 1990 if (!is_local_client(pool->client))
1953 dec_and_check(&ramster_foreign_pers_pampd_count); 1991 dec_and_check(&ramster_foreign_pers_pampd_count);
1954 zv_free(cli->xvpool, zv); 1992 zv_free(cli->zspool, zv);
1955 if (acct) 1993 if (acct)
1956 /* FIXME get these working properly again */ 1994 /* FIXME get these working properly again */
1957 dec_and_check(&zcache_curr_pers_pampd_count); 1995 dec_and_check(&zcache_curr_pers_pampd_count);
@@ -2019,7 +2057,7 @@ int zcache_localify(int pool_id, struct tmem_oid *oidp,
2019 unsigned long flags; 2057 unsigned long flags;
2020 struct tmem_pool *pool; 2058 struct tmem_pool *pool;
2021 bool ephemeral, delete = false; 2059 bool ephemeral, delete = false;
2022 size_t clen = PAGE_SIZE; 2060 unsigned int clen = PAGE_SIZE;
2023 void *pampd, *saved_hb; 2061 void *pampd, *saved_hb;
2024 struct tmem_obj *obj; 2062 struct tmem_obj *obj;
2025 2063
@@ -2074,9 +2112,9 @@ int zcache_localify(int pool_id, struct tmem_oid *oidp,
2074 } 2112 }
2075 if (extra != NULL) { 2113 if (extra != NULL) {
2076 /* decompress direct-to-memory to complete remotify */ 2114 /* decompress direct-to-memory to complete remotify */
2077 ret = lzo1x_decompress_safe((char *)data, size, 2115 ret = zcache_comp_op(ZCACHE_COMPOP_DECOMPRESS, (char *)data,
2078 (char *)extra, &clen); 2116 size, (char *)extra, &clen);
2079 BUG_ON(ret != LZO_E_OK); 2117 BUG_ON(ret);
2080 BUG_ON(clen != PAGE_SIZE); 2118 BUG_ON(clen != PAGE_SIZE);
2081 } 2119 }
2082 if (ephemeral) 2120 if (ephemeral)
@@ -2188,25 +2226,24 @@ static struct tmem_pamops zcache_pamops = {
2188 * zcache compression/decompression and related per-cpu stuff 2226 * zcache compression/decompression and related per-cpu stuff
2189 */ 2227 */
2190 2228
2191#define LZO_WORKMEM_BYTES LZO1X_1_MEM_COMPRESS
2192#define LZO_DSTMEM_PAGE_ORDER 1
2193static DEFINE_PER_CPU(unsigned char *, zcache_workmem);
2194static DEFINE_PER_CPU(unsigned char *, zcache_dstmem); 2229static DEFINE_PER_CPU(unsigned char *, zcache_dstmem);
2230#define ZCACHE_DSTMEM_ORDER 1
2195 2231
2196static int zcache_compress(struct page *from, void **out_va, size_t *out_len) 2232static int zcache_compress(struct page *from, void **out_va, unsigned *out_len)
2197{ 2233{
2198 int ret = 0; 2234 int ret = 0;
2199 unsigned char *dmem = __get_cpu_var(zcache_dstmem); 2235 unsigned char *dmem = __get_cpu_var(zcache_dstmem);
2200 unsigned char *wmem = __get_cpu_var(zcache_workmem);
2201 char *from_va; 2236 char *from_va;
2202 2237
2203 BUG_ON(!irqs_disabled()); 2238 BUG_ON(!irqs_disabled());
2204 if (unlikely(dmem == NULL || wmem == NULL)) 2239 if (unlikely(dmem == NULL))
2205 goto out; /* no buffer, so can't compress */ 2240 goto out; /* no buffer or no compressor so can't compress */
2241 *out_len = PAGE_SIZE << ZCACHE_DSTMEM_ORDER;
2206 from_va = kmap_atomic(from); 2242 from_va = kmap_atomic(from);
2207 mb(); 2243 mb();
2208 ret = lzo1x_1_compress(from_va, PAGE_SIZE, dmem, out_len, wmem); 2244 ret = zcache_comp_op(ZCACHE_COMPOP_COMPRESS, from_va, PAGE_SIZE, dmem,
2209 BUG_ON(ret != LZO_E_OK); 2245 out_len);
2246 BUG_ON(ret);
2210 *out_va = dmem; 2247 *out_va = dmem;
2211 kunmap_atomic(from_va); 2248 kunmap_atomic(from_va);
2212 ret = 1; 2249 ret = 1;
@@ -2214,33 +2251,52 @@ out:
2214 return ret; 2251 return ret;
2215} 2252}
2216 2253
2254static int zcache_comp_cpu_up(int cpu)
2255{
2256 struct crypto_comp *tfm;
2257
2258 tfm = crypto_alloc_comp(zcache_comp_name, 0, 0);
2259 if (IS_ERR(tfm))
2260 return NOTIFY_BAD;
2261 *per_cpu_ptr(zcache_comp_pcpu_tfms, cpu) = tfm;
2262 return NOTIFY_OK;
2263}
2264
2265static void zcache_comp_cpu_down(int cpu)
2266{
2267 struct crypto_comp *tfm;
2268
2269 tfm = *per_cpu_ptr(zcache_comp_pcpu_tfms, cpu);
2270 crypto_free_comp(tfm);
2271 *per_cpu_ptr(zcache_comp_pcpu_tfms, cpu) = NULL;
2272}
2217 2273
2218static int zcache_cpu_notifier(struct notifier_block *nb, 2274static int zcache_cpu_notifier(struct notifier_block *nb,
2219 unsigned long action, void *pcpu) 2275 unsigned long action, void *pcpu)
2220{ 2276{
2221 int cpu = (long)pcpu; 2277 int ret, cpu = (long)pcpu;
2222 struct zcache_preload *kp; 2278 struct zcache_preload *kp;
2223 2279
2224 switch (action) { 2280 switch (action) {
2225 case CPU_UP_PREPARE: 2281 case CPU_UP_PREPARE:
2282 ret = zcache_comp_cpu_up(cpu);
2283 if (ret != NOTIFY_OK) {
2284 pr_err("zcache: can't allocate compressor transform\n");
2285 return ret;
2286 }
2226 per_cpu(zcache_dstmem, cpu) = (void *)__get_free_pages( 2287 per_cpu(zcache_dstmem, cpu) = (void *)__get_free_pages(
2227 GFP_KERNEL | __GFP_REPEAT, 2288 GFP_KERNEL | __GFP_REPEAT, ZCACHE_DSTMEM_ORDER),
2228 LZO_DSTMEM_PAGE_ORDER),
2229 per_cpu(zcache_workmem, cpu) =
2230 kzalloc(LZO1X_MEM_COMPRESS,
2231 GFP_KERNEL | __GFP_REPEAT);
2232 per_cpu(zcache_remoteputmem, cpu) = 2289 per_cpu(zcache_remoteputmem, cpu) =
2233 kzalloc(PAGE_SIZE, GFP_KERNEL | __GFP_REPEAT); 2290 kzalloc(PAGE_SIZE, GFP_KERNEL | __GFP_REPEAT);
2234 break; 2291 break;
2235 case CPU_DEAD: 2292 case CPU_DEAD:
2236 case CPU_UP_CANCELED: 2293 case CPU_UP_CANCELED:
2294 zcache_comp_cpu_down(cpu);
2237 kfree(per_cpu(zcache_remoteputmem, cpu)); 2295 kfree(per_cpu(zcache_remoteputmem, cpu));
2238 per_cpu(zcache_remoteputmem, cpu) = NULL; 2296 per_cpu(zcache_remoteputmem, cpu) = NULL;
2239 free_pages((unsigned long)per_cpu(zcache_dstmem, cpu), 2297 free_pages((unsigned long)per_cpu(zcache_dstmem, cpu),
2240 LZO_DSTMEM_PAGE_ORDER); 2298 ZCACHE_DSTMEM_ORDER);
2241 per_cpu(zcache_dstmem, cpu) = NULL; 2299 per_cpu(zcache_dstmem, cpu) = NULL;
2242 kfree(per_cpu(zcache_workmem, cpu));
2243 per_cpu(zcache_workmem, cpu) = NULL;
2244 kp = &per_cpu(zcache_preloads, cpu); 2300 kp = &per_cpu(zcache_preloads, cpu);
2245 while (kp->nr) { 2301 while (kp->nr) {
2246 kmem_cache_free(zcache_objnode_cache, 2302 kmem_cache_free(zcache_objnode_cache,
@@ -2752,7 +2808,8 @@ int zcache_client_destroy_pool(int cli_id, int pool_id)
2752 ret = tmem_destroy_pool(pool); 2808 ret = tmem_destroy_pool(pool);
2753 local_bh_enable(); 2809 local_bh_enable();
2754 kfree(pool); 2810 kfree(pool);
2755 pr_info("ramster: destroyed pool id=%d cli_id=%d\n", pool_id, cli_id); 2811 pr_info("ramster: destroyed pool id=%d cli_id=%d\n",
2812 pool_id, cli_id);
2756out: 2813out:
2757 return ret; 2814 return ret;
2758} 2815}
@@ -3245,6 +3302,44 @@ static int __init no_frontswap(char *s)
3245 3302
3246__setup("nofrontswap", no_frontswap); 3303__setup("nofrontswap", no_frontswap);
3247 3304
3305static int __init enable_zcache_compressor(char *s)
3306{
3307 strncpy(zcache_comp_name, s, ZCACHE_COMP_NAME_SZ);
3308 ramster_enabled = 1;
3309 return 1;
3310}
3311__setup("zcache=", enable_zcache_compressor);
3312
3313
3314static int zcache_comp_init(void)
3315{
3316 int ret = 0;
3317
3318 /* check crypto algorithm */
3319 if (*zcache_comp_name != '\0') {
3320 ret = crypto_has_comp(zcache_comp_name, 0, 0);
3321 if (!ret)
3322 pr_info("zcache: %s not supported\n",
3323 zcache_comp_name);
3324 }
3325 if (!ret)
3326 strcpy(zcache_comp_name, "lzo");
3327 ret = crypto_has_comp(zcache_comp_name, 0, 0);
3328 if (!ret) {
3329 ret = 1;
3330 goto out;
3331 }
3332 pr_info("zcache: using %s compressor\n", zcache_comp_name);
3333
3334 /* alloc percpu transforms */
3335 ret = 0;
3336 zcache_comp_pcpu_tfms = alloc_percpu(struct crypto_comp *);
3337 if (!zcache_comp_pcpu_tfms)
3338 ret = 1;
3339out:
3340 return ret;
3341}
3342
3248static int __init zcache_init(void) 3343static int __init zcache_init(void)
3249{ 3344{
3250 int ret = 0; 3345 int ret = 0;
@@ -3269,6 +3364,11 @@ static int __init zcache_init(void)
3269 pr_err("ramster: can't register cpu notifier\n"); 3364 pr_err("ramster: can't register cpu notifier\n");
3270 goto out; 3365 goto out;
3271 } 3366 }
3367 ret = zcache_comp_init();
3368 if (ret) {
3369 pr_err("zcache: compressor initialization failed\n");
3370 goto out;
3371 }
3272 for_each_online_cpu(cpu) { 3372 for_each_online_cpu(cpu) {
3273 void *pcpu = (void *)(long)cpu; 3373 void *pcpu = (void *)(long)cpu;
3274 zcache_cpu_notifier(&zcache_cpu_notifier_block, 3374 zcache_cpu_notifier(&zcache_cpu_notifier_block,
@@ -3306,7 +3406,7 @@ static int __init zcache_init(void)
3306 zcache_new_client(LOCAL_CLIENT); 3406 zcache_new_client(LOCAL_CLIENT);
3307 old_ops = zcache_frontswap_register_ops(); 3407 old_ops = zcache_frontswap_register_ops();
3308 pr_info("ramster: frontswap enabled using kernel " 3408 pr_info("ramster: frontswap enabled using kernel "
3309 "transcendent memory and xvmalloc\n"); 3409 "transcendent memory and zsmalloc\n");
3310 if (old_ops.init != NULL) 3410 if (old_ops.init != NULL)
3311 pr_warning("ramster: frontswap_ops overridden"); 3411 pr_warning("ramster: frontswap_ops overridden");
3312 } 3412 }