aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/block/zram
diff options
context:
space:
mode:
authorMinchan Kim <minchan@kernel.org>2014-01-30 18:45:52 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2014-01-30 19:56:55 -0500
commitcd67e10ac6997c6d1e1504e3c111b693bfdbc148 (patch)
treea37521eff62f7e7e86ab94be44f02aca4c6a0abb /drivers/block/zram
parentbcf1647d0899666f0fb90d176abf63bae22abb7c (diff)
zram: promote zram from staging
Zram has lived in staging for a LONG LONG time and have been fixed/improved by many contributors so code is clean and stable now. Of course, there are lots of product using zram in real practice. The major TV companys have used zram as swap since two years ago and recently our production team released android smart phone with zram which is used as swap, too and recently Android Kitkat start to use zram for small memory smart phone. And there was a report Google released their ChromeOS with zram, too and cyanogenmod have been used zram long time ago. And I heard some disto have used zram block device for tmpfs. In addition, I saw many report from many other peoples. For example, Lubuntu start to use it. The benefit of zram is very clear. With my experience, one of the benefit was to remove jitter of video application with backgroud memory pressure. It would be effect of efficient memory usage by compression but more issue is whether swap is there or not in the system. Recent mobile platforms have used JAVA so there are many anonymous pages. But embedded system normally are reluctant to use eMMC or SDCard as swap because there is wear-leveling and latency issues so if we do not use swap, it means we can't reclaim anoymous pages and at last, we could encounter OOM kill. :( Although we have real storage as swap, it was a problem, too. Because it sometime ends up making system very unresponsible caused by slow swap storage performance. Quote from Luigi on Google "Since Chrome OS was mentioned: the main reason why we don't use swap to a disk (rotating or SSD) is because it doesn't degrade gracefully and leads to a bad interactive experience. Generally we prefer to manage RAM at a higher level, by transparently killing and restarting processes. But we noticed that zram is fast enough to be competitive with the latter, and it lets us make more efficient use of the available RAM. " and he announced. http://www.spinics.net/lists/linux-mm/msg57717.html Other uses case is to use zram for block device. Zram is block device so anyone can format the block device and mount on it so some guys on the internet start zram as /var/tmp. http://forums.gentoo.org/viewtopic-t-838198-start-0.html Let's promote zram and enhance/maintain it instead of removing. Signed-off-by: Minchan Kim <minchan@kernel.org> Reviewed-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> Acked-by: Nitin Gupta <ngupta@vflare.org> Acked-by: Pekka Enberg <penberg@kernel.org> Cc: Bob Liu <bob.liu@oracle.com> Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org> Cc: Hugh Dickins <hughd@google.com> Cc: Jens Axboe <axboe@kernel.dk> Cc: Luigi Semenzato <semenzato@google.com> Cc: Mel Gorman <mgorman@suse.de> Cc: Rik van Riel <riel@redhat.com> Cc: Seth Jennings <sjenning@linux.vnet.ibm.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'drivers/block/zram')
-rw-r--r--drivers/block/zram/Kconfig25
-rw-r--r--drivers/block/zram/Makefile3
-rw-r--r--drivers/block/zram/zram_drv.c994
-rw-r--r--drivers/block/zram/zram_drv.h124
4 files changed, 1146 insertions, 0 deletions
diff --git a/drivers/block/zram/Kconfig b/drivers/block/zram/Kconfig
new file mode 100644
index 000000000000..983314c41349
--- /dev/null
+++ b/drivers/block/zram/Kconfig
@@ -0,0 +1,25 @@
1config ZRAM
2 tristate "Compressed RAM block device support"
3 depends on BLOCK && SYSFS && ZSMALLOC
4 select LZO_COMPRESS
5 select LZO_DECOMPRESS
6 default n
7 help
8 Creates virtual block devices called /dev/zramX (X = 0, 1, ...).
9 Pages written to these disks are compressed and stored in memory
10 itself. These disks allow very fast I/O and compression provides
11 good amounts of memory savings.
12
13 It has several use cases, for example: /tmp storage, use as swap
14 disks and maybe many more.
15
16 See zram.txt for more information.
17 Project home: <https://compcache.googlecode.com/>
18
19config ZRAM_DEBUG
20 bool "Compressed RAM block device debug support"
21 depends on ZRAM
22 default n
23 help
24 This option adds additional debugging code to the compressed
25 RAM block device driver.
diff --git a/drivers/block/zram/Makefile b/drivers/block/zram/Makefile
new file mode 100644
index 000000000000..cb0f9ced6a93
--- /dev/null
+++ b/drivers/block/zram/Makefile
@@ -0,0 +1,3 @@
1zram-y := zram_drv.o
2
3obj-$(CONFIG_ZRAM) += zram.o
diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c
new file mode 100644
index 000000000000..108f2733106d
--- /dev/null
+++ b/drivers/block/zram/zram_drv.c
@@ -0,0 +1,994 @@
1/*
2 * Compressed RAM block device
3 *
4 * Copyright (C) 2008, 2009, 2010 Nitin Gupta
5 *
6 * This code is released using a dual license strategy: BSD/GPL
7 * You can choose the licence that better fits your requirements.
8 *
9 * Released under the terms of 3-clause BSD License
10 * Released under the terms of GNU General Public License Version 2.0
11 *
12 * Project home: http://compcache.googlecode.com
13 */
14
15#define KMSG_COMPONENT "zram"
16#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
17
18#ifdef CONFIG_ZRAM_DEBUG
19#define DEBUG
20#endif
21
22#include <linux/module.h>
23#include <linux/kernel.h>
24#include <linux/bio.h>
25#include <linux/bitops.h>
26#include <linux/blkdev.h>
27#include <linux/buffer_head.h>
28#include <linux/device.h>
29#include <linux/genhd.h>
30#include <linux/highmem.h>
31#include <linux/slab.h>
32#include <linux/lzo.h>
33#include <linux/string.h>
34#include <linux/vmalloc.h>
35
36#include "zram_drv.h"
37
38/* Globals */
39static int zram_major;
40static struct zram *zram_devices;
41
42/* Module params (documentation at end) */
43static unsigned int num_devices = 1;
44
45static inline struct zram *dev_to_zram(struct device *dev)
46{
47 return (struct zram *)dev_to_disk(dev)->private_data;
48}
49
50static ssize_t disksize_show(struct device *dev,
51 struct device_attribute *attr, char *buf)
52{
53 struct zram *zram = dev_to_zram(dev);
54
55 return sprintf(buf, "%llu\n", zram->disksize);
56}
57
58static ssize_t initstate_show(struct device *dev,
59 struct device_attribute *attr, char *buf)
60{
61 struct zram *zram = dev_to_zram(dev);
62
63 return sprintf(buf, "%u\n", zram->init_done);
64}
65
66static ssize_t num_reads_show(struct device *dev,
67 struct device_attribute *attr, char *buf)
68{
69 struct zram *zram = dev_to_zram(dev);
70
71 return sprintf(buf, "%llu\n",
72 (u64)atomic64_read(&zram->stats.num_reads));
73}
74
75static ssize_t num_writes_show(struct device *dev,
76 struct device_attribute *attr, char *buf)
77{
78 struct zram *zram = dev_to_zram(dev);
79
80 return sprintf(buf, "%llu\n",
81 (u64)atomic64_read(&zram->stats.num_writes));
82}
83
84static ssize_t invalid_io_show(struct device *dev,
85 struct device_attribute *attr, char *buf)
86{
87 struct zram *zram = dev_to_zram(dev);
88
89 return sprintf(buf, "%llu\n",
90 (u64)atomic64_read(&zram->stats.invalid_io));
91}
92
93static ssize_t notify_free_show(struct device *dev,
94 struct device_attribute *attr, char *buf)
95{
96 struct zram *zram = dev_to_zram(dev);
97
98 return sprintf(buf, "%llu\n",
99 (u64)atomic64_read(&zram->stats.notify_free));
100}
101
102static ssize_t zero_pages_show(struct device *dev,
103 struct device_attribute *attr, char *buf)
104{
105 struct zram *zram = dev_to_zram(dev);
106
107 return sprintf(buf, "%u\n", zram->stats.pages_zero);
108}
109
110static ssize_t orig_data_size_show(struct device *dev,
111 struct device_attribute *attr, char *buf)
112{
113 struct zram *zram = dev_to_zram(dev);
114
115 return sprintf(buf, "%llu\n",
116 (u64)(zram->stats.pages_stored) << PAGE_SHIFT);
117}
118
119static ssize_t compr_data_size_show(struct device *dev,
120 struct device_attribute *attr, char *buf)
121{
122 struct zram *zram = dev_to_zram(dev);
123
124 return sprintf(buf, "%llu\n",
125 (u64)atomic64_read(&zram->stats.compr_size));
126}
127
128static ssize_t mem_used_total_show(struct device *dev,
129 struct device_attribute *attr, char *buf)
130{
131 u64 val = 0;
132 struct zram *zram = dev_to_zram(dev);
133 struct zram_meta *meta = zram->meta;
134
135 down_read(&zram->init_lock);
136 if (zram->init_done)
137 val = zs_get_total_size_bytes(meta->mem_pool);
138 up_read(&zram->init_lock);
139
140 return sprintf(buf, "%llu\n", val);
141}
142
143static int zram_test_flag(struct zram_meta *meta, u32 index,
144 enum zram_pageflags flag)
145{
146 return meta->table[index].flags & BIT(flag);
147}
148
149static void zram_set_flag(struct zram_meta *meta, u32 index,
150 enum zram_pageflags flag)
151{
152 meta->table[index].flags |= BIT(flag);
153}
154
155static void zram_clear_flag(struct zram_meta *meta, u32 index,
156 enum zram_pageflags flag)
157{
158 meta->table[index].flags &= ~BIT(flag);
159}
160
161static inline int is_partial_io(struct bio_vec *bvec)
162{
163 return bvec->bv_len != PAGE_SIZE;
164}
165
166/*
167 * Check if request is within bounds and aligned on zram logical blocks.
168 */
169static inline int valid_io_request(struct zram *zram, struct bio *bio)
170{
171 u64 start, end, bound;
172
173 /* unaligned request */
174 if (unlikely(bio->bi_iter.bi_sector &
175 (ZRAM_SECTOR_PER_LOGICAL_BLOCK - 1)))
176 return 0;
177 if (unlikely(bio->bi_iter.bi_size & (ZRAM_LOGICAL_BLOCK_SIZE - 1)))
178 return 0;
179
180 start = bio->bi_iter.bi_sector;
181 end = start + (bio->bi_iter.bi_size >> SECTOR_SHIFT);
182 bound = zram->disksize >> SECTOR_SHIFT;
183 /* out of range range */
184 if (unlikely(start >= bound || end > bound || start > end))
185 return 0;
186
187 /* I/O request is valid */
188 return 1;
189}
190
191static void zram_meta_free(struct zram_meta *meta)
192{
193 zs_destroy_pool(meta->mem_pool);
194 kfree(meta->compress_workmem);
195 free_pages((unsigned long)meta->compress_buffer, 1);
196 vfree(meta->table);
197 kfree(meta);
198}
199
200static struct zram_meta *zram_meta_alloc(u64 disksize)
201{
202 size_t num_pages;
203 struct zram_meta *meta = kmalloc(sizeof(*meta), GFP_KERNEL);
204 if (!meta)
205 goto out;
206
207 meta->compress_workmem = kzalloc(LZO1X_MEM_COMPRESS, GFP_KERNEL);
208 if (!meta->compress_workmem)
209 goto free_meta;
210
211 meta->compress_buffer =
212 (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, 1);
213 if (!meta->compress_buffer) {
214 pr_err("Error allocating compressor buffer space\n");
215 goto free_workmem;
216 }
217
218 num_pages = disksize >> PAGE_SHIFT;
219 meta->table = vzalloc(num_pages * sizeof(*meta->table));
220 if (!meta->table) {
221 pr_err("Error allocating zram address table\n");
222 goto free_buffer;
223 }
224
225 meta->mem_pool = zs_create_pool(GFP_NOIO | __GFP_HIGHMEM);
226 if (!meta->mem_pool) {
227 pr_err("Error creating memory pool\n");
228 goto free_table;
229 }
230
231 return meta;
232
233free_table:
234 vfree(meta->table);
235free_buffer:
236 free_pages((unsigned long)meta->compress_buffer, 1);
237free_workmem:
238 kfree(meta->compress_workmem);
239free_meta:
240 kfree(meta);
241 meta = NULL;
242out:
243 return meta;
244}
245
246static void update_position(u32 *index, int *offset, struct bio_vec *bvec)
247{
248 if (*offset + bvec->bv_len >= PAGE_SIZE)
249 (*index)++;
250 *offset = (*offset + bvec->bv_len) % PAGE_SIZE;
251}
252
253static int page_zero_filled(void *ptr)
254{
255 unsigned int pos;
256 unsigned long *page;
257
258 page = (unsigned long *)ptr;
259
260 for (pos = 0; pos != PAGE_SIZE / sizeof(*page); pos++) {
261 if (page[pos])
262 return 0;
263 }
264
265 return 1;
266}
267
268static void handle_zero_page(struct bio_vec *bvec)
269{
270 struct page *page = bvec->bv_page;
271 void *user_mem;
272
273 user_mem = kmap_atomic(page);
274 if (is_partial_io(bvec))
275 memset(user_mem + bvec->bv_offset, 0, bvec->bv_len);
276 else
277 clear_page(user_mem);
278 kunmap_atomic(user_mem);
279
280 flush_dcache_page(page);
281}
282
283static void zram_free_page(struct zram *zram, size_t index)
284{
285 struct zram_meta *meta = zram->meta;
286 unsigned long handle = meta->table[index].handle;
287 u16 size = meta->table[index].size;
288
289 if (unlikely(!handle)) {
290 /*
291 * No memory is allocated for zero filled pages.
292 * Simply clear zero page flag.
293 */
294 if (zram_test_flag(meta, index, ZRAM_ZERO)) {
295 zram_clear_flag(meta, index, ZRAM_ZERO);
296 zram->stats.pages_zero--;
297 }
298 return;
299 }
300
301 if (unlikely(size > max_zpage_size))
302 zram->stats.bad_compress--;
303
304 zs_free(meta->mem_pool, handle);
305
306 if (size <= PAGE_SIZE / 2)
307 zram->stats.good_compress--;
308
309 atomic64_sub(meta->table[index].size, &zram->stats.compr_size);
310 zram->stats.pages_stored--;
311
312 meta->table[index].handle = 0;
313 meta->table[index].size = 0;
314}
315
316static int zram_decompress_page(struct zram *zram, char *mem, u32 index)
317{
318 int ret = LZO_E_OK;
319 size_t clen = PAGE_SIZE;
320 unsigned char *cmem;
321 struct zram_meta *meta = zram->meta;
322 unsigned long handle = meta->table[index].handle;
323
324 if (!handle || zram_test_flag(meta, index, ZRAM_ZERO)) {
325 clear_page(mem);
326 return 0;
327 }
328
329 cmem = zs_map_object(meta->mem_pool, handle, ZS_MM_RO);
330 if (meta->table[index].size == PAGE_SIZE)
331 copy_page(mem, cmem);
332 else
333 ret = lzo1x_decompress_safe(cmem, meta->table[index].size,
334 mem, &clen);
335 zs_unmap_object(meta->mem_pool, handle);
336
337 /* Should NEVER happen. Return bio error if it does. */
338 if (unlikely(ret != LZO_E_OK)) {
339 pr_err("Decompression failed! err=%d, page=%u\n", ret, index);
340 atomic64_inc(&zram->stats.failed_reads);
341 return ret;
342 }
343
344 return 0;
345}
346
347static int zram_bvec_read(struct zram *zram, struct bio_vec *bvec,
348 u32 index, int offset, struct bio *bio)
349{
350 int ret;
351 struct page *page;
352 unsigned char *user_mem, *uncmem = NULL;
353 struct zram_meta *meta = zram->meta;
354 page = bvec->bv_page;
355
356 if (unlikely(!meta->table[index].handle) ||
357 zram_test_flag(meta, index, ZRAM_ZERO)) {
358 handle_zero_page(bvec);
359 return 0;
360 }
361
362 if (is_partial_io(bvec))
363 /* Use a temporary buffer to decompress the page */
364 uncmem = kmalloc(PAGE_SIZE, GFP_NOIO);
365
366 user_mem = kmap_atomic(page);
367 if (!is_partial_io(bvec))
368 uncmem = user_mem;
369
370 if (!uncmem) {
371 pr_info("Unable to allocate temp memory\n");
372 ret = -ENOMEM;
373 goto out_cleanup;
374 }
375
376 ret = zram_decompress_page(zram, uncmem, index);
377 /* Should NEVER happen. Return bio error if it does. */
378 if (unlikely(ret != LZO_E_OK))
379 goto out_cleanup;
380
381 if (is_partial_io(bvec))
382 memcpy(user_mem + bvec->bv_offset, uncmem + offset,
383 bvec->bv_len);
384
385 flush_dcache_page(page);
386 ret = 0;
387out_cleanup:
388 kunmap_atomic(user_mem);
389 if (is_partial_io(bvec))
390 kfree(uncmem);
391 return ret;
392}
393
394static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index,
395 int offset)
396{
397 int ret = 0;
398 size_t clen;
399 unsigned long handle;
400 struct page *page;
401 unsigned char *user_mem, *cmem, *src, *uncmem = NULL;
402 struct zram_meta *meta = zram->meta;
403
404 page = bvec->bv_page;
405 src = meta->compress_buffer;
406
407 if (is_partial_io(bvec)) {
408 /*
409 * This is a partial IO. We need to read the full page
410 * before to write the changes.
411 */
412 uncmem = kmalloc(PAGE_SIZE, GFP_NOIO);
413 if (!uncmem) {
414 ret = -ENOMEM;
415 goto out;
416 }
417 ret = zram_decompress_page(zram, uncmem, index);
418 if (ret)
419 goto out;
420 }
421
422 user_mem = kmap_atomic(page);
423
424 if (is_partial_io(bvec)) {
425 memcpy(uncmem + offset, user_mem + bvec->bv_offset,
426 bvec->bv_len);
427 kunmap_atomic(user_mem);
428 user_mem = NULL;
429 } else {
430 uncmem = user_mem;
431 }
432
433 if (page_zero_filled(uncmem)) {
434 kunmap_atomic(user_mem);
435 /* Free memory associated with this sector now. */
436 zram_free_page(zram, index);
437
438 zram->stats.pages_zero++;
439 zram_set_flag(meta, index, ZRAM_ZERO);
440 ret = 0;
441 goto out;
442 }
443
444 /*
445 * zram_slot_free_notify could miss free so that let's
446 * double check.
447 */
448 if (unlikely(meta->table[index].handle ||
449 zram_test_flag(meta, index, ZRAM_ZERO)))
450 zram_free_page(zram, index);
451
452 ret = lzo1x_1_compress(uncmem, PAGE_SIZE, src, &clen,
453 meta->compress_workmem);
454
455 if (!is_partial_io(bvec)) {
456 kunmap_atomic(user_mem);
457 user_mem = NULL;
458 uncmem = NULL;
459 }
460
461 if (unlikely(ret != LZO_E_OK)) {
462 pr_err("Compression failed! err=%d\n", ret);
463 goto out;
464 }
465
466 if (unlikely(clen > max_zpage_size)) {
467 zram->stats.bad_compress++;
468 clen = PAGE_SIZE;
469 src = NULL;
470 if (is_partial_io(bvec))
471 src = uncmem;
472 }
473
474 handle = zs_malloc(meta->mem_pool, clen);
475 if (!handle) {
476 pr_info("Error allocating memory for compressed page: %u, size=%zu\n",
477 index, clen);
478 ret = -ENOMEM;
479 goto out;
480 }
481 cmem = zs_map_object(meta->mem_pool, handle, ZS_MM_WO);
482
483 if ((clen == PAGE_SIZE) && !is_partial_io(bvec)) {
484 src = kmap_atomic(page);
485 copy_page(cmem, src);
486 kunmap_atomic(src);
487 } else {
488 memcpy(cmem, src, clen);
489 }
490
491 zs_unmap_object(meta->mem_pool, handle);
492
493 /*
494 * Free memory associated with this sector
495 * before overwriting unused sectors.
496 */
497 zram_free_page(zram, index);
498
499 meta->table[index].handle = handle;
500 meta->table[index].size = clen;
501
502 /* Update stats */
503 atomic64_add(clen, &zram->stats.compr_size);
504 zram->stats.pages_stored++;
505 if (clen <= PAGE_SIZE / 2)
506 zram->stats.good_compress++;
507
508out:
509 if (is_partial_io(bvec))
510 kfree(uncmem);
511
512 if (ret)
513 atomic64_inc(&zram->stats.failed_writes);
514 return ret;
515}
516
517static void handle_pending_slot_free(struct zram *zram)
518{
519 struct zram_slot_free *free_rq;
520
521 spin_lock(&zram->slot_free_lock);
522 while (zram->slot_free_rq) {
523 free_rq = zram->slot_free_rq;
524 zram->slot_free_rq = free_rq->next;
525 zram_free_page(zram, free_rq->index);
526 kfree(free_rq);
527 }
528 spin_unlock(&zram->slot_free_lock);
529}
530
531static int zram_bvec_rw(struct zram *zram, struct bio_vec *bvec, u32 index,
532 int offset, struct bio *bio, int rw)
533{
534 int ret;
535
536 if (rw == READ) {
537 down_read(&zram->lock);
538 handle_pending_slot_free(zram);
539 ret = zram_bvec_read(zram, bvec, index, offset, bio);
540 up_read(&zram->lock);
541 } else {
542 down_write(&zram->lock);
543 handle_pending_slot_free(zram);
544 ret = zram_bvec_write(zram, bvec, index, offset);
545 up_write(&zram->lock);
546 }
547
548 return ret;
549}
550
551static void zram_reset_device(struct zram *zram, bool reset_capacity)
552{
553 size_t index;
554 struct zram_meta *meta;
555
556 flush_work(&zram->free_work);
557
558 down_write(&zram->init_lock);
559 if (!zram->init_done) {
560 up_write(&zram->init_lock);
561 return;
562 }
563
564 meta = zram->meta;
565 zram->init_done = 0;
566
567 /* Free all pages that are still in this zram device */
568 for (index = 0; index < zram->disksize >> PAGE_SHIFT; index++) {
569 unsigned long handle = meta->table[index].handle;
570 if (!handle)
571 continue;
572
573 zs_free(meta->mem_pool, handle);
574 }
575
576 zram_meta_free(zram->meta);
577 zram->meta = NULL;
578 /* Reset stats */
579 memset(&zram->stats, 0, sizeof(zram->stats));
580
581 zram->disksize = 0;
582 if (reset_capacity)
583 set_capacity(zram->disk, 0);
584 up_write(&zram->init_lock);
585}
586
587static void zram_init_device(struct zram *zram, struct zram_meta *meta)
588{
589 if (zram->disksize > 2 * (totalram_pages << PAGE_SHIFT)) {
590 pr_info(
591 "There is little point creating a zram of greater than "
592 "twice the size of memory since we expect a 2:1 compression "
593 "ratio. Note that zram uses about 0.1%% of the size of "
594 "the disk when not in use so a huge zram is "
595 "wasteful.\n"
596 "\tMemory Size: %lu kB\n"
597 "\tSize you selected: %llu kB\n"
598 "Continuing anyway ...\n",
599 (totalram_pages << PAGE_SHIFT) >> 10, zram->disksize >> 10
600 );
601 }
602
603 /* zram devices sort of resembles non-rotational disks */
604 queue_flag_set_unlocked(QUEUE_FLAG_NONROT, zram->disk->queue);
605
606 zram->meta = meta;
607 zram->init_done = 1;
608
609 pr_debug("Initialization done!\n");
610}
611
612static ssize_t disksize_store(struct device *dev,
613 struct device_attribute *attr, const char *buf, size_t len)
614{
615 u64 disksize;
616 struct zram_meta *meta;
617 struct zram *zram = dev_to_zram(dev);
618
619 disksize = memparse(buf, NULL);
620 if (!disksize)
621 return -EINVAL;
622
623 disksize = PAGE_ALIGN(disksize);
624 meta = zram_meta_alloc(disksize);
625 down_write(&zram->init_lock);
626 if (zram->init_done) {
627 up_write(&zram->init_lock);
628 zram_meta_free(meta);
629 pr_info("Cannot change disksize for initialized device\n");
630 return -EBUSY;
631 }
632
633 zram->disksize = disksize;
634 set_capacity(zram->disk, zram->disksize >> SECTOR_SHIFT);
635 zram_init_device(zram, meta);
636 up_write(&zram->init_lock);
637
638 return len;
639}
640
641static ssize_t reset_store(struct device *dev,
642 struct device_attribute *attr, const char *buf, size_t len)
643{
644 int ret;
645 unsigned short do_reset;
646 struct zram *zram;
647 struct block_device *bdev;
648
649 zram = dev_to_zram(dev);
650 bdev = bdget_disk(zram->disk, 0);
651
652 if (!bdev)
653 return -ENOMEM;
654
655 /* Do not reset an active device! */
656 if (bdev->bd_holders) {
657 ret = -EBUSY;
658 goto out;
659 }
660
661 ret = kstrtou16(buf, 10, &do_reset);
662 if (ret)
663 goto out;
664
665 if (!do_reset) {
666 ret = -EINVAL;
667 goto out;
668 }
669
670 /* Make sure all pending I/O is finished */
671 fsync_bdev(bdev);
672 bdput(bdev);
673
674 zram_reset_device(zram, true);
675 return len;
676
677out:
678 bdput(bdev);
679 return ret;
680}
681
682static void __zram_make_request(struct zram *zram, struct bio *bio, int rw)
683{
684 int offset;
685 u32 index;
686 struct bio_vec bvec;
687 struct bvec_iter iter;
688
689 switch (rw) {
690 case READ:
691 atomic64_inc(&zram->stats.num_reads);
692 break;
693 case WRITE:
694 atomic64_inc(&zram->stats.num_writes);
695 break;
696 }
697
698 index = bio->bi_iter.bi_sector >> SECTORS_PER_PAGE_SHIFT;
699 offset = (bio->bi_iter.bi_sector &
700 (SECTORS_PER_PAGE - 1)) << SECTOR_SHIFT;
701
702 bio_for_each_segment(bvec, bio, iter) {
703 int max_transfer_size = PAGE_SIZE - offset;
704
705 if (bvec.bv_len > max_transfer_size) {
706 /*
707 * zram_bvec_rw() can only make operation on a single
708 * zram page. Split the bio vector.
709 */
710 struct bio_vec bv;
711
712 bv.bv_page = bvec.bv_page;
713 bv.bv_len = max_transfer_size;
714 bv.bv_offset = bvec.bv_offset;
715
716 if (zram_bvec_rw(zram, &bv, index, offset, bio, rw) < 0)
717 goto out;
718
719 bv.bv_len = bvec.bv_len - max_transfer_size;
720 bv.bv_offset += max_transfer_size;
721 if (zram_bvec_rw(zram, &bv, index+1, 0, bio, rw) < 0)
722 goto out;
723 } else
724 if (zram_bvec_rw(zram, &bvec, index, offset, bio, rw)
725 < 0)
726 goto out;
727
728 update_position(&index, &offset, &bvec);
729 }
730
731 set_bit(BIO_UPTODATE, &bio->bi_flags);
732 bio_endio(bio, 0);
733 return;
734
735out:
736 bio_io_error(bio);
737}
738
739/*
740 * Handler function for all zram I/O requests.
741 */
742static void zram_make_request(struct request_queue *queue, struct bio *bio)
743{
744 struct zram *zram = queue->queuedata;
745
746 down_read(&zram->init_lock);
747 if (unlikely(!zram->init_done))
748 goto error;
749
750 if (!valid_io_request(zram, bio)) {
751 atomic64_inc(&zram->stats.invalid_io);
752 goto error;
753 }
754
755 __zram_make_request(zram, bio, bio_data_dir(bio));
756 up_read(&zram->init_lock);
757
758 return;
759
760error:
761 up_read(&zram->init_lock);
762 bio_io_error(bio);
763}
764
765static void zram_slot_free(struct work_struct *work)
766{
767 struct zram *zram;
768
769 zram = container_of(work, struct zram, free_work);
770 down_write(&zram->lock);
771 handle_pending_slot_free(zram);
772 up_write(&zram->lock);
773}
774
775static void add_slot_free(struct zram *zram, struct zram_slot_free *free_rq)
776{
777 spin_lock(&zram->slot_free_lock);
778 free_rq->next = zram->slot_free_rq;
779 zram->slot_free_rq = free_rq;
780 spin_unlock(&zram->slot_free_lock);
781}
782
783static void zram_slot_free_notify(struct block_device *bdev,
784 unsigned long index)
785{
786 struct zram *zram;
787 struct zram_slot_free *free_rq;
788
789 zram = bdev->bd_disk->private_data;
790 atomic64_inc(&zram->stats.notify_free);
791
792 free_rq = kmalloc(sizeof(struct zram_slot_free), GFP_ATOMIC);
793 if (!free_rq)
794 return;
795
796 free_rq->index = index;
797 add_slot_free(zram, free_rq);
798 schedule_work(&zram->free_work);
799}
800
801static const struct block_device_operations zram_devops = {
802 .swap_slot_free_notify = zram_slot_free_notify,
803 .owner = THIS_MODULE
804};
805
806static DEVICE_ATTR(disksize, S_IRUGO | S_IWUSR,
807 disksize_show, disksize_store);
808static DEVICE_ATTR(initstate, S_IRUGO, initstate_show, NULL);
809static DEVICE_ATTR(reset, S_IWUSR, NULL, reset_store);
810static DEVICE_ATTR(num_reads, S_IRUGO, num_reads_show, NULL);
811static DEVICE_ATTR(num_writes, S_IRUGO, num_writes_show, NULL);
812static DEVICE_ATTR(invalid_io, S_IRUGO, invalid_io_show, NULL);
813static DEVICE_ATTR(notify_free, S_IRUGO, notify_free_show, NULL);
814static DEVICE_ATTR(zero_pages, S_IRUGO, zero_pages_show, NULL);
815static DEVICE_ATTR(orig_data_size, S_IRUGO, orig_data_size_show, NULL);
816static DEVICE_ATTR(compr_data_size, S_IRUGO, compr_data_size_show, NULL);
817static DEVICE_ATTR(mem_used_total, S_IRUGO, mem_used_total_show, NULL);
818
819static struct attribute *zram_disk_attrs[] = {
820 &dev_attr_disksize.attr,
821 &dev_attr_initstate.attr,
822 &dev_attr_reset.attr,
823 &dev_attr_num_reads.attr,
824 &dev_attr_num_writes.attr,
825 &dev_attr_invalid_io.attr,
826 &dev_attr_notify_free.attr,
827 &dev_attr_zero_pages.attr,
828 &dev_attr_orig_data_size.attr,
829 &dev_attr_compr_data_size.attr,
830 &dev_attr_mem_used_total.attr,
831 NULL,
832};
833
834static struct attribute_group zram_disk_attr_group = {
835 .attrs = zram_disk_attrs,
836};
837
838static int create_device(struct zram *zram, int device_id)
839{
840 int ret = -ENOMEM;
841
842 init_rwsem(&zram->lock);
843 init_rwsem(&zram->init_lock);
844
845 INIT_WORK(&zram->free_work, zram_slot_free);
846 spin_lock_init(&zram->slot_free_lock);
847 zram->slot_free_rq = NULL;
848
849 zram->queue = blk_alloc_queue(GFP_KERNEL);
850 if (!zram->queue) {
851 pr_err("Error allocating disk queue for device %d\n",
852 device_id);
853 goto out;
854 }
855
856 blk_queue_make_request(zram->queue, zram_make_request);
857 zram->queue->queuedata = zram;
858
859 /* gendisk structure */
860 zram->disk = alloc_disk(1);
861 if (!zram->disk) {
862 pr_warn("Error allocating disk structure for device %d\n",
863 device_id);
864 goto out_free_queue;
865 }
866
867 zram->disk->major = zram_major;
868 zram->disk->first_minor = device_id;
869 zram->disk->fops = &zram_devops;
870 zram->disk->queue = zram->queue;
871 zram->disk->private_data = zram;
872 snprintf(zram->disk->disk_name, 16, "zram%d", device_id);
873
874 /* Actual capacity set using syfs (/sys/block/zram<id>/disksize */
875 set_capacity(zram->disk, 0);
876
877 /*
878 * To ensure that we always get PAGE_SIZE aligned
879 * and n*PAGE_SIZED sized I/O requests.
880 */
881 blk_queue_physical_block_size(zram->disk->queue, PAGE_SIZE);
882 blk_queue_logical_block_size(zram->disk->queue,
883 ZRAM_LOGICAL_BLOCK_SIZE);
884 blk_queue_io_min(zram->disk->queue, PAGE_SIZE);
885 blk_queue_io_opt(zram->disk->queue, PAGE_SIZE);
886
887 add_disk(zram->disk);
888
889 ret = sysfs_create_group(&disk_to_dev(zram->disk)->kobj,
890 &zram_disk_attr_group);
891 if (ret < 0) {
892 pr_warn("Error creating sysfs group");
893 goto out_free_disk;
894 }
895
896 zram->init_done = 0;
897 return 0;
898
899out_free_disk:
900 del_gendisk(zram->disk);
901 put_disk(zram->disk);
902out_free_queue:
903 blk_cleanup_queue(zram->queue);
904out:
905 return ret;
906}
907
908static void destroy_device(struct zram *zram)
909{
910 sysfs_remove_group(&disk_to_dev(zram->disk)->kobj,
911 &zram_disk_attr_group);
912
913 del_gendisk(zram->disk);
914 put_disk(zram->disk);
915
916 blk_cleanup_queue(zram->queue);
917}
918
919static int __init zram_init(void)
920{
921 int ret, dev_id;
922
923 if (num_devices > max_num_devices) {
924 pr_warn("Invalid value for num_devices: %u\n",
925 num_devices);
926 ret = -EINVAL;
927 goto out;
928 }
929
930 zram_major = register_blkdev(0, "zram");
931 if (zram_major <= 0) {
932 pr_warn("Unable to get major number\n");
933 ret = -EBUSY;
934 goto out;
935 }
936
937 /* Allocate the device array and initialize each one */
938 zram_devices = kzalloc(num_devices * sizeof(struct zram), GFP_KERNEL);
939 if (!zram_devices) {
940 ret = -ENOMEM;
941 goto unregister;
942 }
943
944 for (dev_id = 0; dev_id < num_devices; dev_id++) {
945 ret = create_device(&zram_devices[dev_id], dev_id);
946 if (ret)
947 goto free_devices;
948 }
949
950 pr_info("Created %u device(s) ...\n", num_devices);
951
952 return 0;
953
954free_devices:
955 while (dev_id)
956 destroy_device(&zram_devices[--dev_id]);
957 kfree(zram_devices);
958unregister:
959 unregister_blkdev(zram_major, "zram");
960out:
961 return ret;
962}
963
964static void __exit zram_exit(void)
965{
966 int i;
967 struct zram *zram;
968
969 for (i = 0; i < num_devices; i++) {
970 zram = &zram_devices[i];
971
972 destroy_device(zram);
973 /*
974 * Shouldn't access zram->disk after destroy_device
975 * because destroy_device already released zram->disk.
976 */
977 zram_reset_device(zram, false);
978 }
979
980 unregister_blkdev(zram_major, "zram");
981
982 kfree(zram_devices);
983 pr_debug("Cleanup done!\n");
984}
985
986module_init(zram_init);
987module_exit(zram_exit);
988
989module_param(num_devices, uint, 0);
990MODULE_PARM_DESC(num_devices, "Number of zram devices");
991
992MODULE_LICENSE("Dual BSD/GPL");
993MODULE_AUTHOR("Nitin Gupta <ngupta@vflare.org>");
994MODULE_DESCRIPTION("Compressed RAM Block Device");
diff --git a/drivers/block/zram/zram_drv.h b/drivers/block/zram/zram_drv.h
new file mode 100644
index 000000000000..d8f6596513c3
--- /dev/null
+++ b/drivers/block/zram/zram_drv.h
@@ -0,0 +1,124 @@
1/*
2 * Compressed RAM block device
3 *
4 * Copyright (C) 2008, 2009, 2010 Nitin Gupta
5 *
6 * This code is released using a dual license strategy: BSD/GPL
7 * You can choose the licence that better fits your requirements.
8 *
9 * Released under the terms of 3-clause BSD License
10 * Released under the terms of GNU General Public License Version 2.0
11 *
12 * Project home: http://compcache.googlecode.com
13 */
14
15#ifndef _ZRAM_DRV_H_
16#define _ZRAM_DRV_H_
17
18#include <linux/spinlock.h>
19#include <linux/mutex.h>
20#include <linux/zsmalloc.h>
21
22/*
23 * Some arbitrary value. This is just to catch
24 * invalid value for num_devices module parameter.
25 */
26static const unsigned max_num_devices = 32;
27
28/*-- Configurable parameters */
29
30/*
31 * Pages that compress to size greater than this are stored
32 * uncompressed in memory.
33 */
34static const size_t max_zpage_size = PAGE_SIZE / 4 * 3;
35
36/*
37 * NOTE: max_zpage_size must be less than or equal to:
38 * ZS_MAX_ALLOC_SIZE. Otherwise, zs_malloc() would
39 * always return failure.
40 */
41
42/*-- End of configurable params */
43
44#define SECTOR_SHIFT 9
45#define SECTOR_SIZE (1 << SECTOR_SHIFT)
46#define SECTORS_PER_PAGE_SHIFT (PAGE_SHIFT - SECTOR_SHIFT)
47#define SECTORS_PER_PAGE (1 << SECTORS_PER_PAGE_SHIFT)
48#define ZRAM_LOGICAL_BLOCK_SHIFT 12
49#define ZRAM_LOGICAL_BLOCK_SIZE (1 << ZRAM_LOGICAL_BLOCK_SHIFT)
50#define ZRAM_SECTOR_PER_LOGICAL_BLOCK \
51 (1 << (ZRAM_LOGICAL_BLOCK_SHIFT - SECTOR_SHIFT))
52
53/* Flags for zram pages (table[page_no].flags) */
54enum zram_pageflags {
55 /* Page consists entirely of zeros */
56 ZRAM_ZERO,
57
58 __NR_ZRAM_PAGEFLAGS,
59};
60
61/*-- Data structures */
62
63/* Allocated for each disk page */
64struct table {
65 unsigned long handle;
66 u16 size; /* object size (excluding header) */
67 u8 count; /* object ref count (not yet used) */
68 u8 flags;
69} __aligned(4);
70
71/*
72 * All 64bit fields should only be manipulated by 64bit atomic accessors.
73 * All modifications to 32bit counter should be protected by zram->lock.
74 */
75struct zram_stats {
76 atomic64_t compr_size; /* compressed size of pages stored */
77 atomic64_t num_reads; /* failed + successful */
78 atomic64_t num_writes; /* --do-- */
79 atomic64_t failed_reads; /* should NEVER! happen */
80 atomic64_t failed_writes; /* can happen when memory is too low */
81 atomic64_t invalid_io; /* non-page-aligned I/O requests */
82 atomic64_t notify_free; /* no. of swap slot free notifications */
83 u32 pages_zero; /* no. of zero filled pages */
84 u32 pages_stored; /* no. of pages currently stored */
85 u32 good_compress; /* % of pages with compression ratio<=50% */
86 u32 bad_compress; /* % of pages with compression ratio>=75% */
87};
88
89struct zram_meta {
90 void *compress_workmem;
91 void *compress_buffer;
92 struct table *table;
93 struct zs_pool *mem_pool;
94};
95
96struct zram_slot_free {
97 unsigned long index;
98 struct zram_slot_free *next;
99};
100
101struct zram {
102 struct zram_meta *meta;
103 struct rw_semaphore lock; /* protect compression buffers, table,
104 * 32bit stat counters against concurrent
105 * notifications, reads and writes */
106
107 struct work_struct free_work; /* handle pending free request */
108 struct zram_slot_free *slot_free_rq; /* list head of free request */
109
110 struct request_queue *queue;
111 struct gendisk *disk;
112 int init_done;
113 /* Prevent concurrent execution of device init, reset and R/W request */
114 struct rw_semaphore init_lock;
115 /*
116 * This is the limit on amount of *uncompressed* worth of data
117 * we can store in a disk.
118 */
119 u64 disksize; /* bytes */
120 spinlock_t slot_free_lock;
121
122 struct zram_stats stats;
123};
124#endif