aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSergey Senozhatsky <sergey.senozhatsky@gmail.com>2014-04-07 18:38:14 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2014-04-07 19:36:01 -0400
commitbeca3ec71fe5490ee9237dc42400f50402baf83e (patch)
treecac925d78139fe53a14d9c66939d07c472ca726b
parent9cc97529a180b369fcb7e5265771b6ba7e01f05b (diff)
zram: add multi stream functionality
Existing zram (zcomp) implementation has only one compression stream (buffer and algorithm private part), so in order to prevent data corruption only one write (compress operation) can use this compression stream, forcing all concurrent write operations to wait for stream lock to be released. This patch changes zcomp to keep a compression streams list of user-defined size (via sysfs device attr). Each write operation still exclusively holds compression stream, the difference is that we can have N write operations (depending on size of streams list) executing in parallel. See TEST section later in commit message for performance data. Introduce struct zcomp_strm_multi and a set of functions to manage zcomp_strm stream access. zcomp_strm_multi has a list of idle zcomp_strm structs, spinlock to protect idle list and wait queue, making it possible to perform parallel compressions. The following set of functions added: - zcomp_strm_multi_find()/zcomp_strm_multi_release() find and release a compression stream, implement required locking - zcomp_strm_multi_create()/zcomp_strm_multi_destroy() create and destroy zcomp_strm_multi zcomp ->strm_find() and ->strm_release() callbacks are set during initialisation to zcomp_strm_multi_find()/zcomp_strm_multi_release() correspondingly. Each time zcomp issues a zcomp_strm_multi_find() call, the following set of operations performed: - spin lock strm_lock - if idle list is not empty, remove zcomp_strm from idle list, spin unlock and return zcomp stream pointer to caller - if idle list is empty, current adds itself to wait queue. it will be awaken by zcomp_strm_multi_release() caller. zcomp_strm_multi_release(): - spin lock strm_lock - add zcomp stream to idle list - spin unlock, wake up sleeper Minchan Kim reported that spinlock-based locking scheme has demonstrated a severe perfomance regression for single compression stream case, comparing to mutex-based (see https://lkml.org/lkml/2014/2/18/16) base spinlock mutex ==Initial write ==Initial write ==Initial write records: 5 records: 5 records: 5 avg: 1642424.35 avg: 699610.40 avg: 1655583.71 std: 39890.95(2.43%) std: 232014.19(33.16%) std: 52293.96 max: 1690170.94 max: 1163473.45 max: 1697164.75 min: 1568669.52 min: 573429.88 min: 1553410.23 ==Rewrite ==Rewrite ==Rewrite records: 5 records: 5 records: 5 avg: 1611775.39 avg: 501406.64 avg: 1684419.11 std: 17144.58(1.06%) std: 15354.41(3.06%) std: 18367.42 max: 1641800.95 max: 531356.78 max: 1706445.84 min: 1593515.27 min: 488817.78 min: 1655335.73 When only one compression stream available, mutex with spin on owner tends to perform much better than frequent wait_event()/wake_up(). This is why single stream implemented as a special case with mutex locking. Introduce and document zram device attribute max_comp_streams. This attr shows and stores current zcomp's max number of zcomp streams (max_strm). Extend zcomp's zcomp_create() with `max_strm' parameter. `max_strm' limits the number of zcomp_strm structs in compression backend's idle list (max_comp_streams). max_comp_streams used during initialisation as follows: -- passing to zcomp_create() max_strm equals to 1 will initialise zcomp using single compression stream zcomp_strm_single (mutex-based locking). -- passing to zcomp_create() max_strm greater than 1 will initialise zcomp using multi compression stream zcomp_strm_multi (spinlock-based locking). default max_comp_streams value is 1, meaning that zram with single stream will be initialised. Later patch will introduce configuration knob to change max_comp_streams on already initialised and used zcomp. TEST iozone -t 3 -R -r 16K -s 60M -I +Z test base 1 strm (mutex) 3 strm (spinlock) ----------------------------------------------------------------------- Initial write 589286.78 583518.39 718011.05 Rewrite 604837.97 596776.38 1515125.72 Random write 584120.11 595714.58 1388850.25 Pwrite 535731.17 541117.38 739295.27 Fwrite 1418083.88 1478612.72 1484927.06 Usage example: set max_comp_streams to 4 echo 4 > /sys/block/zram0/max_comp_streams show current max_comp_streams (default value is 1). cat /sys/block/zram0/max_comp_streams Signed-off-by: Sergey Senozhatsky <sergey.senozhatsky@gmail.com> Acked-by: Minchan Kim <minchan@kernel.org> Cc: Jerome Marchand <jmarchan@redhat.com> Cc: Nitin Gupta <ngupta@vflare.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--Documentation/ABI/testing/sysfs-block-zram9
-rw-r--r--Documentation/blockdev/zram.txt31
-rw-r--r--drivers/block/zram/zcomp.c124
-rw-r--r--drivers/block/zram/zcomp.h4
-rw-r--r--drivers/block/zram/zram_drv.c42
-rw-r--r--drivers/block/zram/zram_drv.h2
6 files changed, 201 insertions, 11 deletions
diff --git a/Documentation/ABI/testing/sysfs-block-zram b/Documentation/ABI/testing/sysfs-block-zram
index 8aa046841625..0da9ed6b82ea 100644
--- a/Documentation/ABI/testing/sysfs-block-zram
+++ b/Documentation/ABI/testing/sysfs-block-zram
@@ -50,7 +50,6 @@ Description:
50 The failed_reads file is read-only and specifies the number of 50 The failed_reads file is read-only and specifies the number of
51 failed reads happened on this device. 51 failed reads happened on this device.
52 52
53
54What: /sys/block/zram<id>/failed_writes 53What: /sys/block/zram<id>/failed_writes
55Date: February 2014 54Date: February 2014
56Contact: Sergey Senozhatsky <sergey.senozhatsky@gmail.com> 55Contact: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
@@ -58,6 +57,14 @@ Description:
58 The failed_writes file is read-only and specifies the number of 57 The failed_writes file is read-only and specifies the number of
59 failed writes happened on this device. 58 failed writes happened on this device.
60 59
60What: /sys/block/zram<id>/max_comp_streams
61Date: February 2014
62Contact: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
63Description:
64 The max_comp_streams file is read-write and specifies the
65 number of backend's zcomp_strm compression streams (number of
66 concurrent compress operations).
67
61What: /sys/block/zram<id>/notify_free 68What: /sys/block/zram<id>/notify_free
62Date: August 2010 69Date: August 2010
63Contact: Nitin Gupta <ngupta@vflare.org> 70Contact: Nitin Gupta <ngupta@vflare.org>
diff --git a/Documentation/blockdev/zram.txt b/Documentation/blockdev/zram.txt
index b31ac5e5d4b9..aadfe60391b7 100644
--- a/Documentation/blockdev/zram.txt
+++ b/Documentation/blockdev/zram.txt
@@ -21,7 +21,28 @@ Following shows a typical sequence of steps for using zram.
21 This creates 4 devices: /dev/zram{0,1,2,3} 21 This creates 4 devices: /dev/zram{0,1,2,3}
22 (num_devices parameter is optional. Default: 1) 22 (num_devices parameter is optional. Default: 1)
23 23
242) Set Disksize 242) Set max number of compression streams
25 Compression backend may use up to max_comp_streams compression streams,
26 thus allowing up to max_comp_streams concurrent compression operations.
27 By default, compression backend uses single compression stream.
28
29 Examples:
30 #show max compression streams number
31 cat /sys/block/zram0/max_comp_streams
32
33 #set max compression streams number to 3
34 echo 3 > /sys/block/zram0/max_comp_streams
35
36Note:
37In order to enable compression backend's multi stream support max_comp_streams
38must be initially set to desired concurrency level before ZRAM device
39initialisation. Once the device initialised as a single stream compression
40backend (max_comp_streams equals to 0) changing the value of max_comp_streams
41will not take any effect, because single stream compression backend implemented
42as a special case and does not support dynamic max_comp_streams. Only multi
43stream backend supports dynamic max_comp_streams adjustment.
44
453) Set Disksize
25 Set disk size by writing the value to sysfs node 'disksize'. 46 Set disk size by writing the value to sysfs node 'disksize'.
26 The value can be either in bytes or you can use mem suffixes. 47 The value can be either in bytes or you can use mem suffixes.
27 Examples: 48 Examples:
@@ -38,14 +59,14 @@ There is little point creating a zram of greater than twice the size of memory
38since we expect a 2:1 compression ratio. Note that zram uses about 0.1% of the 59since we expect a 2:1 compression ratio. Note that zram uses about 0.1% of the
39size of the disk when not in use so a huge zram is wasteful. 60size of the disk when not in use so a huge zram is wasteful.
40 61
413) Activate: 624) Activate:
42 mkswap /dev/zram0 63 mkswap /dev/zram0
43 swapon /dev/zram0 64 swapon /dev/zram0
44 65
45 mkfs.ext4 /dev/zram1 66 mkfs.ext4 /dev/zram1
46 mount /dev/zram1 /tmp 67 mount /dev/zram1 /tmp
47 68
484) Stats: 695) Stats:
49 Per-device statistics are exported as various nodes under 70 Per-device statistics are exported as various nodes under
50 /sys/block/zram<id>/ 71 /sys/block/zram<id>/
51 disksize 72 disksize
@@ -60,11 +81,11 @@ size of the disk when not in use so a huge zram is wasteful.
60 compr_data_size 81 compr_data_size
61 mem_used_total 82 mem_used_total
62 83
635) Deactivate: 846) Deactivate:
64 swapoff /dev/zram0 85 swapoff /dev/zram0
65 umount /dev/zram1 86 umount /dev/zram1
66 87
676) Reset: 887) Reset:
68 Write any positive value to 'reset' sysfs node 89 Write any positive value to 'reset' sysfs node
69 echo 1 > /sys/block/zram0/reset 90 echo 1 > /sys/block/zram0/reset
70 echo 1 > /sys/block/zram1/reset 91 echo 1 > /sys/block/zram1/reset
diff --git a/drivers/block/zram/zcomp.c b/drivers/block/zram/zcomp.c
index 72e8071f9d73..c06f75f54718 100644
--- a/drivers/block/zram/zcomp.c
+++ b/drivers/block/zram/zcomp.c
@@ -24,6 +24,21 @@ struct zcomp_strm_single {
24 struct zcomp_strm *zstrm; 24 struct zcomp_strm *zstrm;
25}; 25};
26 26
27/*
28 * multi zcomp_strm backend
29 */
30struct zcomp_strm_multi {
31 /* protect strm list */
32 spinlock_t strm_lock;
33 /* max possible number of zstrm streams */
34 int max_strm;
35 /* number of available zstrm streams */
36 int avail_strm;
37 /* list of available strms */
38 struct list_head idle_strm;
39 wait_queue_head_t strm_wait;
40};
41
27static struct zcomp_backend *find_backend(const char *compress) 42static struct zcomp_backend *find_backend(const char *compress)
28{ 43{
29 if (strncmp(compress, "lzo", 3) == 0) 44 if (strncmp(compress, "lzo", 3) == 0)
@@ -62,6 +77,107 @@ static struct zcomp_strm *zcomp_strm_alloc(struct zcomp *comp)
62 return zstrm; 77 return zstrm;
63} 78}
64 79
80/*
81 * get idle zcomp_strm or wait until other process release
82 * (zcomp_strm_release()) one for us
83 */
84static struct zcomp_strm *zcomp_strm_multi_find(struct zcomp *comp)
85{
86 struct zcomp_strm_multi *zs = comp->stream;
87 struct zcomp_strm *zstrm;
88
89 while (1) {
90 spin_lock(&zs->strm_lock);
91 if (!list_empty(&zs->idle_strm)) {
92 zstrm = list_entry(zs->idle_strm.next,
93 struct zcomp_strm, list);
94 list_del(&zstrm->list);
95 spin_unlock(&zs->strm_lock);
96 return zstrm;
97 }
98 /* zstrm streams limit reached, wait for idle stream */
99 if (zs->avail_strm >= zs->max_strm) {
100 spin_unlock(&zs->strm_lock);
101 wait_event(zs->strm_wait, !list_empty(&zs->idle_strm));
102 continue;
103 }
104 /* allocate new zstrm stream */
105 zs->avail_strm++;
106 spin_unlock(&zs->strm_lock);
107
108 zstrm = zcomp_strm_alloc(comp);
109 if (!zstrm) {
110 spin_lock(&zs->strm_lock);
111 zs->avail_strm--;
112 spin_unlock(&zs->strm_lock);
113 wait_event(zs->strm_wait, !list_empty(&zs->idle_strm));
114 continue;
115 }
116 break;
117 }
118 return zstrm;
119}
120
121/* add stream back to idle list and wake up waiter or free the stream */
122static void zcomp_strm_multi_release(struct zcomp *comp, struct zcomp_strm *zstrm)
123{
124 struct zcomp_strm_multi *zs = comp->stream;
125
126 spin_lock(&zs->strm_lock);
127 if (zs->avail_strm <= zs->max_strm) {
128 list_add(&zstrm->list, &zs->idle_strm);
129 spin_unlock(&zs->strm_lock);
130 wake_up(&zs->strm_wait);
131 return;
132 }
133
134 zs->avail_strm--;
135 spin_unlock(&zs->strm_lock);
136 zcomp_strm_free(comp, zstrm);
137}
138
139static void zcomp_strm_multi_destroy(struct zcomp *comp)
140{
141 struct zcomp_strm_multi *zs = comp->stream;
142 struct zcomp_strm *zstrm;
143
144 while (!list_empty(&zs->idle_strm)) {
145 zstrm = list_entry(zs->idle_strm.next,
146 struct zcomp_strm, list);
147 list_del(&zstrm->list);
148 zcomp_strm_free(comp, zstrm);
149 }
150 kfree(zs);
151}
152
153static int zcomp_strm_multi_create(struct zcomp *comp, int max_strm)
154{
155 struct zcomp_strm *zstrm;
156 struct zcomp_strm_multi *zs;
157
158 comp->destroy = zcomp_strm_multi_destroy;
159 comp->strm_find = zcomp_strm_multi_find;
160 comp->strm_release = zcomp_strm_multi_release;
161 zs = kmalloc(sizeof(struct zcomp_strm_multi), GFP_KERNEL);
162 if (!zs)
163 return -ENOMEM;
164
165 comp->stream = zs;
166 spin_lock_init(&zs->strm_lock);
167 INIT_LIST_HEAD(&zs->idle_strm);
168 init_waitqueue_head(&zs->strm_wait);
169 zs->max_strm = max_strm;
170 zs->avail_strm = 1;
171
172 zstrm = zcomp_strm_alloc(comp);
173 if (!zstrm) {
174 kfree(zs);
175 return -ENOMEM;
176 }
177 list_add(&zstrm->list, &zs->idle_strm);
178 return 0;
179}
180
65static struct zcomp_strm *zcomp_strm_single_find(struct zcomp *comp) 181static struct zcomp_strm *zcomp_strm_single_find(struct zcomp *comp)
66{ 182{
67 struct zcomp_strm_single *zs = comp->stream; 183 struct zcomp_strm_single *zs = comp->stream;
@@ -139,7 +255,7 @@ void zcomp_destroy(struct zcomp *comp)
139 * if requested algorithm is not supported or in case 255 * if requested algorithm is not supported or in case
140 * of init error 256 * of init error
141 */ 257 */
142struct zcomp *zcomp_create(const char *compress) 258struct zcomp *zcomp_create(const char *compress, int max_strm)
143{ 259{
144 struct zcomp *comp; 260 struct zcomp *comp;
145 struct zcomp_backend *backend; 261 struct zcomp_backend *backend;
@@ -153,7 +269,11 @@ struct zcomp *zcomp_create(const char *compress)
153 return NULL; 269 return NULL;
154 270
155 comp->backend = backend; 271 comp->backend = backend;
156 if (zcomp_strm_single_create(comp) != 0) { 272 if (max_strm > 1)
273 zcomp_strm_multi_create(comp, max_strm);
274 else
275 zcomp_strm_single_create(comp);
276 if (!comp->stream) {
157 kfree(comp); 277 kfree(comp);
158 return NULL; 278 return NULL;
159 } 279 }
diff --git a/drivers/block/zram/zcomp.h b/drivers/block/zram/zcomp.h
index dc3500d842a3..2a3684446160 100644
--- a/drivers/block/zram/zcomp.h
+++ b/drivers/block/zram/zcomp.h
@@ -21,6 +21,8 @@ struct zcomp_strm {
21 * working memory) 21 * working memory)
22 */ 22 */
23 void *private; 23 void *private;
24 /* used in multi stream backend, protected by backend strm_lock */
25 struct list_head list;
24}; 26};
25 27
26/* static compression backend */ 28/* static compression backend */
@@ -47,7 +49,7 @@ struct zcomp {
47 void (*destroy)(struct zcomp *comp); 49 void (*destroy)(struct zcomp *comp);
48}; 50};
49 51
50struct zcomp *zcomp_create(const char *comp); 52struct zcomp *zcomp_create(const char *comp, int max_strm);
51void zcomp_destroy(struct zcomp *comp); 53void zcomp_destroy(struct zcomp *comp);
52 54
53struct zcomp_strm *zcomp_strm_find(struct zcomp *comp); 55struct zcomp_strm *zcomp_strm_find(struct zcomp *comp);
diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c
index 98823f9ca8b1..bdc7eb8c6df7 100644
--- a/drivers/block/zram/zram_drv.c
+++ b/drivers/block/zram/zram_drv.c
@@ -108,6 +108,40 @@ static ssize_t mem_used_total_show(struct device *dev,
108 return sprintf(buf, "%llu\n", val); 108 return sprintf(buf, "%llu\n", val);
109} 109}
110 110
111static ssize_t max_comp_streams_show(struct device *dev,
112 struct device_attribute *attr, char *buf)
113{
114 int val;
115 struct zram *zram = dev_to_zram(dev);
116
117 down_read(&zram->init_lock);
118 val = zram->max_comp_streams;
119 up_read(&zram->init_lock);
120
121 return sprintf(buf, "%d\n", val);
122}
123
124static ssize_t max_comp_streams_store(struct device *dev,
125 struct device_attribute *attr, const char *buf, size_t len)
126{
127 int num;
128 struct zram *zram = dev_to_zram(dev);
129
130 if (kstrtoint(buf, 0, &num))
131 return -EINVAL;
132 if (num < 1)
133 return -EINVAL;
134 down_write(&zram->init_lock);
135 if (init_done(zram)) {
136 up_write(&zram->init_lock);
137 pr_info("Can't set max_comp_streams for initialized device\n");
138 return -EBUSY;
139 }
140 zram->max_comp_streams = num;
141 up_write(&zram->init_lock);
142 return len;
143}
144
111/* flag operations needs meta->tb_lock */ 145/* flag operations needs meta->tb_lock */
112static int zram_test_flag(struct zram_meta *meta, u32 index, 146static int zram_test_flag(struct zram_meta *meta, u32 index,
113 enum zram_pageflags flag) 147 enum zram_pageflags flag)
@@ -502,6 +536,8 @@ static void zram_reset_device(struct zram *zram, bool reset_capacity)
502 } 536 }
503 537
504 zcomp_destroy(zram->comp); 538 zcomp_destroy(zram->comp);
539 zram->max_comp_streams = 1;
540
505 zram_meta_free(zram->meta); 541 zram_meta_free(zram->meta);
506 zram->meta = NULL; 542 zram->meta = NULL;
507 /* Reset stats */ 543 /* Reset stats */
@@ -537,7 +573,7 @@ static ssize_t disksize_store(struct device *dev,
537 goto out_free_meta; 573 goto out_free_meta;
538 } 574 }
539 575
540 zram->comp = zcomp_create(default_compressor); 576 zram->comp = zcomp_create(default_compressor, zram->max_comp_streams);
541 if (!zram->comp) { 577 if (!zram->comp) {
542 pr_info("Cannot initialise %s compressing backend\n", 578 pr_info("Cannot initialise %s compressing backend\n",
543 default_compressor); 579 default_compressor);
@@ -698,6 +734,8 @@ static DEVICE_ATTR(initstate, S_IRUGO, initstate_show, NULL);
698static DEVICE_ATTR(reset, S_IWUSR, NULL, reset_store); 734static DEVICE_ATTR(reset, S_IWUSR, NULL, reset_store);
699static DEVICE_ATTR(orig_data_size, S_IRUGO, orig_data_size_show, NULL); 735static DEVICE_ATTR(orig_data_size, S_IRUGO, orig_data_size_show, NULL);
700static DEVICE_ATTR(mem_used_total, S_IRUGO, mem_used_total_show, NULL); 736static DEVICE_ATTR(mem_used_total, S_IRUGO, mem_used_total_show, NULL);
737static DEVICE_ATTR(max_comp_streams, S_IRUGO | S_IWUSR,
738 max_comp_streams_show, max_comp_streams_store);
701 739
702ZRAM_ATTR_RO(num_reads); 740ZRAM_ATTR_RO(num_reads);
703ZRAM_ATTR_RO(num_writes); 741ZRAM_ATTR_RO(num_writes);
@@ -722,6 +760,7 @@ static struct attribute *zram_disk_attrs[] = {
722 &dev_attr_orig_data_size.attr, 760 &dev_attr_orig_data_size.attr,
723 &dev_attr_compr_data_size.attr, 761 &dev_attr_compr_data_size.attr,
724 &dev_attr_mem_used_total.attr, 762 &dev_attr_mem_used_total.attr,
763 &dev_attr_max_comp_streams.attr,
725 NULL, 764 NULL,
726}; 765};
727 766
@@ -784,6 +823,7 @@ static int create_device(struct zram *zram, int device_id)
784 } 823 }
785 824
786 zram->meta = NULL; 825 zram->meta = NULL;
826 zram->max_comp_streams = 1;
787 return 0; 827 return 0;
788 828
789out_free_disk: 829out_free_disk:
diff --git a/drivers/block/zram/zram_drv.h b/drivers/block/zram/zram_drv.h
index 45e04f7b713f..ccf36d11755a 100644
--- a/drivers/block/zram/zram_drv.h
+++ b/drivers/block/zram/zram_drv.h
@@ -99,7 +99,7 @@ struct zram {
99 * we can store in a disk. 99 * we can store in a disk.
100 */ 100 */
101 u64 disksize; /* bytes */ 101 u64 disksize; /* bytes */
102 102 int max_comp_streams;
103 struct zram_stats stats; 103 struct zram_stats stats;
104}; 104};
105#endif 105#endif