summaryrefslogtreecommitdiffstats
path: root/drivers/block/zram
diff options
context:
space:
mode:
authorMinchan Kim <minchan@kernel.org>2018-06-07 20:05:49 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2018-06-07 20:34:34 -0400
commitc0265342bff4fcaa2cdf13f4596244c18d4a7ae5 (patch)
tree51ce5d5c93d5931d7f2ce28e2da4509bba234b25 /drivers/block/zram
parentd7eac6b6e1838ef1a1400df4ec55daa34bbc855e (diff)
zram: introduce zram memory tracking
zRam as swap is useful for small memory device. However, swap means those pages on zram are mostly cold pages due to VM's LRU algorithm. Especially, once init data for application are touched for launching, they tend to be not accessed any more and finally swapped out. zRAM can store such cold pages as compressed form but it's pointless to keep in memory. Better idea is app developers free them directly rather than remaining them on heap. This patch tell us last access time of each block of zram via "cat /sys/kernel/debug/zram/zram0/block_state". The output is as follows, 300 75.033841 .wh 301 63.806904 s.. 302 63.806919 ..h First column is zram's block index and 3rh one represents symbol (s: same page w: written page to backing store h: huge page) of the block state. Second column represents usec time unit of the block was last accessed. So above example means the 300th block is accessed at 75.033851 second and it was huge so it was written to the backing store. Admin can leverage this information to catch cold|incompressible pages of process with *pagemap* once part of heaps are swapped out. I used the feature a few years ago to find memory hoggers in userspace to notify them what memory they have wasted without touch for a long time. With it, they could reduce unnecessary memory space. However, at that time, I hacked up zram for the feature but now I need the feature again so I decided it would be better to upstream rather than keeping it alone. I hope I submit the userspace tool to use the feature soon. [akpm@linux-foundation.org: fix i386 printk warning] [minchan@kernel.org: use ktime_get_boottime() instead of sched_clock()] Link: http://lkml.kernel.org/r/20180420063525.GA253739@rodete-desktop-imager.corp.google.com [akpm@linux-foundation.org: documentation tweak] [akpm@linux-foundation.org: fix i386 printk warning] [minchan@kernel.org: fix compile warning] Link: http://lkml.kernel.org/r/20180508104849.GA8209@rodete-desktop-imager.corp.google.com [rdunlap@infradead.org: fix printk formats] Link: http://lkml.kernel.org/r/3652ccb1-96ef-0b0b-05d1-f661d7733dcc@infradead.org Link: http://lkml.kernel.org/r/20180416090946.63057-5-minchan@kernel.org Signed-off-by: Minchan Kim <minchan@kernel.org> Signed-off-by: Randy Dunlap <rdunlap@infradead.org> Reviewed-by: Sergey Senozhatsky <sergey.senozhatsky@gmail.com> Acked-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'drivers/block/zram')
-rw-r--r--drivers/block/zram/Kconfig14
-rw-r--r--drivers/block/zram/zram_drv.c132
-rw-r--r--drivers/block/zram/zram_drv.h7
3 files changed, 139 insertions, 14 deletions
diff --git a/drivers/block/zram/Kconfig b/drivers/block/zram/Kconfig
index ac3a31d433b2..635235759a0a 100644
--- a/drivers/block/zram/Kconfig
+++ b/drivers/block/zram/Kconfig
@@ -13,7 +13,7 @@ config ZRAM
13 It has several use cases, for example: /tmp storage, use as swap 13 It has several use cases, for example: /tmp storage, use as swap
14 disks and maybe many more. 14 disks and maybe many more.
15 15
16 See zram.txt for more information. 16 See Documentation/blockdev/zram.txt for more information.
17 17
18config ZRAM_WRITEBACK 18config ZRAM_WRITEBACK
19 bool "Write back incompressible page to backing device" 19 bool "Write back incompressible page to backing device"
@@ -25,4 +25,14 @@ config ZRAM_WRITEBACK
25 For this feature, admin should set up backing device via 25 For this feature, admin should set up backing device via
26 /sys/block/zramX/backing_dev. 26 /sys/block/zramX/backing_dev.
27 27
28 See zram.txt for more infomration. 28 See Documentation/blockdev/zram.txt for more information.
29
30config ZRAM_MEMORY_TRACKING
31 bool "Track zRam block status"
32 depends on ZRAM && DEBUG_FS
33 help
34 With this feature, admin can track the state of allocated blocks
35 of zRAM. Admin could see the information via
36 /sys/kernel/debug/zram/zramX/block_state.
37
38 See Documentation/blockdev/zram.txt for more information.
diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c
index 7fc10e2ad734..da51293e7c03 100644
--- a/drivers/block/zram/zram_drv.c
+++ b/drivers/block/zram/zram_drv.c
@@ -31,6 +31,7 @@
31#include <linux/err.h> 31#include <linux/err.h>
32#include <linux/idr.h> 32#include <linux/idr.h>
33#include <linux/sysfs.h> 33#include <linux/sysfs.h>
34#include <linux/debugfs.h>
34#include <linux/cpuhotplug.h> 35#include <linux/cpuhotplug.h>
35 36
36#include "zram_drv.h" 37#include "zram_drv.h"
@@ -67,6 +68,13 @@ static inline bool init_done(struct zram *zram)
67 return zram->disksize; 68 return zram->disksize;
68} 69}
69 70
71static inline bool zram_allocated(struct zram *zram, u32 index)
72{
73
74 return (zram->table[index].value >> (ZRAM_FLAG_SHIFT + 1)) ||
75 zram->table[index].handle;
76}
77
70static inline struct zram *dev_to_zram(struct device *dev) 78static inline struct zram *dev_to_zram(struct device *dev)
71{ 79{
72 return (struct zram *)dev_to_disk(dev)->private_data; 80 return (struct zram *)dev_to_disk(dev)->private_data;
@@ -83,7 +91,7 @@ static void zram_set_handle(struct zram *zram, u32 index, unsigned long handle)
83} 91}
84 92
85/* flag operations require table entry bit_spin_lock() being held */ 93/* flag operations require table entry bit_spin_lock() being held */
86static int zram_test_flag(struct zram *zram, u32 index, 94static bool zram_test_flag(struct zram *zram, u32 index,
87 enum zram_pageflags flag) 95 enum zram_pageflags flag)
88{ 96{
89 return zram->table[index].value & BIT(flag); 97 return zram->table[index].value & BIT(flag);
@@ -107,16 +115,6 @@ static inline void zram_set_element(struct zram *zram, u32 index,
107 zram->table[index].element = element; 115 zram->table[index].element = element;
108} 116}
109 117
110static void zram_accessed(struct zram *zram, u32 index)
111{
112 zram->table[index].ac_time = sched_clock();
113}
114
115static void zram_reset_access(struct zram *zram, u32 index)
116{
117 zram->table[index].ac_time = 0;
118}
119
120static unsigned long zram_get_element(struct zram *zram, u32 index) 118static unsigned long zram_get_element(struct zram *zram, u32 index)
121{ 119{
122 return zram->table[index].element; 120 return zram->table[index].element;
@@ -620,6 +618,114 @@ static int read_from_bdev(struct zram *zram, struct bio_vec *bvec,
620static void zram_wb_clear(struct zram *zram, u32 index) {} 618static void zram_wb_clear(struct zram *zram, u32 index) {}
621#endif 619#endif
622 620
621#ifdef CONFIG_ZRAM_MEMORY_TRACKING
622
623static struct dentry *zram_debugfs_root;
624
625static void zram_debugfs_create(void)
626{
627 zram_debugfs_root = debugfs_create_dir("zram", NULL);
628}
629
630static void zram_debugfs_destroy(void)
631{
632 debugfs_remove_recursive(zram_debugfs_root);
633}
634
635static void zram_accessed(struct zram *zram, u32 index)
636{
637 zram->table[index].ac_time = ktime_get_boottime();
638}
639
640static void zram_reset_access(struct zram *zram, u32 index)
641{
642 zram->table[index].ac_time = 0;
643}
644
645static ssize_t read_block_state(struct file *file, char __user *buf,
646 size_t count, loff_t *ppos)
647{
648 char *kbuf;
649 ssize_t index, written = 0;
650 struct zram *zram = file->private_data;
651 unsigned long nr_pages = zram->disksize >> PAGE_SHIFT;
652 struct timespec64 ts;
653
654 kbuf = kvmalloc(count, GFP_KERNEL);
655 if (!kbuf)
656 return -ENOMEM;
657
658 down_read(&zram->init_lock);
659 if (!init_done(zram)) {
660 up_read(&zram->init_lock);
661 kvfree(kbuf);
662 return -EINVAL;
663 }
664
665 for (index = *ppos; index < nr_pages; index++) {
666 int copied;
667
668 zram_slot_lock(zram, index);
669 if (!zram_allocated(zram, index))
670 goto next;
671
672 ts = ktime_to_timespec64(zram->table[index].ac_time);
673 copied = snprintf(kbuf + written, count,
674 "%12zd %12lld.%06lu %c%c%c\n",
675 index, (s64)ts.tv_sec,
676 ts.tv_nsec / NSEC_PER_USEC,
677 zram_test_flag(zram, index, ZRAM_SAME) ? 's' : '.',
678 zram_test_flag(zram, index, ZRAM_WB) ? 'w' : '.',
679 zram_test_flag(zram, index, ZRAM_HUGE) ? 'h' : '.');
680
681 if (count < copied) {
682 zram_slot_unlock(zram, index);
683 break;
684 }
685 written += copied;
686 count -= copied;
687next:
688 zram_slot_unlock(zram, index);
689 *ppos += 1;
690 }
691
692 up_read(&zram->init_lock);
693 if (copy_to_user(buf, kbuf, written))
694 written = -EFAULT;
695 kvfree(kbuf);
696
697 return written;
698}
699
700static const struct file_operations proc_zram_block_state_op = {
701 .open = simple_open,
702 .read = read_block_state,
703 .llseek = default_llseek,
704};
705
706static void zram_debugfs_register(struct zram *zram)
707{
708 if (!zram_debugfs_root)
709 return;
710
711 zram->debugfs_dir = debugfs_create_dir(zram->disk->disk_name,
712 zram_debugfs_root);
713 debugfs_create_file("block_state", 0400, zram->debugfs_dir,
714 zram, &proc_zram_block_state_op);
715}
716
717static void zram_debugfs_unregister(struct zram *zram)
718{
719 debugfs_remove_recursive(zram->debugfs_dir);
720}
721#else
722static void zram_debugfs_create(void) {};
723static void zram_debugfs_destroy(void) {};
724static void zram_accessed(struct zram *zram, u32 index) {};
725static void zram_reset_access(struct zram *zram, u32 index) {};
726static void zram_debugfs_register(struct zram *zram) {};
727static void zram_debugfs_unregister(struct zram *zram) {};
728#endif
623 729
624/* 730/*
625 * We switched to per-cpu streams and this attr is not needed anymore. 731 * We switched to per-cpu streams and this attr is not needed anymore.
@@ -1604,6 +1710,7 @@ static int zram_add(void)
1604 } 1710 }
1605 strlcpy(zram->compressor, default_compressor, sizeof(zram->compressor)); 1711 strlcpy(zram->compressor, default_compressor, sizeof(zram->compressor));
1606 1712
1713 zram_debugfs_register(zram);
1607 pr_info("Added device: %s\n", zram->disk->disk_name); 1714 pr_info("Added device: %s\n", zram->disk->disk_name);
1608 return device_id; 1715 return device_id;
1609 1716
@@ -1637,6 +1744,7 @@ static int zram_remove(struct zram *zram)
1637 zram->claim = true; 1744 zram->claim = true;
1638 mutex_unlock(&bdev->bd_mutex); 1745 mutex_unlock(&bdev->bd_mutex);
1639 1746
1747 zram_debugfs_unregister(zram);
1640 /* 1748 /*
1641 * Remove sysfs first, so no one will perform a disksize 1749 * Remove sysfs first, so no one will perform a disksize
1642 * store while we destroy the devices. This also helps during 1750 * store while we destroy the devices. This also helps during
@@ -1739,6 +1847,7 @@ static void destroy_devices(void)
1739{ 1847{
1740 class_unregister(&zram_control_class); 1848 class_unregister(&zram_control_class);
1741 idr_for_each(&zram_index_idr, &zram_remove_cb, NULL); 1849 idr_for_each(&zram_index_idr, &zram_remove_cb, NULL);
1850 zram_debugfs_destroy();
1742 idr_destroy(&zram_index_idr); 1851 idr_destroy(&zram_index_idr);
1743 unregister_blkdev(zram_major, "zram"); 1852 unregister_blkdev(zram_major, "zram");
1744 cpuhp_remove_multi_state(CPUHP_ZCOMP_PREPARE); 1853 cpuhp_remove_multi_state(CPUHP_ZCOMP_PREPARE);
@@ -1760,6 +1869,7 @@ static int __init zram_init(void)
1760 return ret; 1869 return ret;
1761 } 1870 }
1762 1871
1872 zram_debugfs_create();
1763 zram_major = register_blkdev(0, "zram"); 1873 zram_major = register_blkdev(0, "zram");
1764 if (zram_major <= 0) { 1874 if (zram_major <= 0) {
1765 pr_err("Unable to get major number\n"); 1875 pr_err("Unable to get major number\n");
diff --git a/drivers/block/zram/zram_drv.h b/drivers/block/zram/zram_drv.h
index 1075218e88b2..72c8584b6dff 100644
--- a/drivers/block/zram/zram_drv.h
+++ b/drivers/block/zram/zram_drv.h
@@ -61,7 +61,9 @@ struct zram_table_entry {
61 unsigned long element; 61 unsigned long element;
62 }; 62 };
63 unsigned long value; 63 unsigned long value;
64 u64 ac_time; 64#ifdef CONFIG_ZRAM_MEMORY_TRACKING
65 ktime_t ac_time;
66#endif
65}; 67};
66 68
67struct zram_stats { 69struct zram_stats {
@@ -110,5 +112,8 @@ struct zram {
110 unsigned long nr_pages; 112 unsigned long nr_pages;
111 spinlock_t bitmap_lock; 113 spinlock_t bitmap_lock;
112#endif 114#endif
115#ifdef CONFIG_ZRAM_MEMORY_TRACKING
116 struct dentry *debugfs_dir;
117#endif
113}; 118};
114#endif 119#endif