aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/block/zram
diff options
context:
space:
mode:
authorSergey Senozhatsky <sergey.senozhatsky@gmail.com>2016-05-20 19:59:51 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2016-05-20 20:58:30 -0400
commitda9556a2367cf2261ab4d3e100693c82fb1ddb26 (patch)
tree69fc1603bb3a01df93275d0f164111d2f3f129af /drivers/block/zram
parentd0d8da2dc49dfdfe1d788eaf4d55eb5d4964d926 (diff)
zram: user per-cpu compression streams
Remove idle streams list and keep compression streams in per-cpu data. This removes two contented spin_lock()/spin_unlock() calls from write path and also prevent write OP from being preempted while holding the compression stream, which can cause slow downs. For instance, let's assume that we have N cpus and N-2 max_comp_streams.TASK1 owns the last idle stream, TASK2-TASK3 come in with the write requests: TASK1 TASK2 TASK3 zram_bvec_write() spin_lock find stream spin_unlock compress <<preempted>> zram_bvec_write() spin_lock find stream spin_unlock no_stream schedule zram_bvec_write() spin_lock find_stream spin_unlock no_stream schedule spin_lock release stream spin_unlock wake up TASK2 not only TASK2 and TASK3 will not get the stream, TASK1 will be preempted in the middle of its operation; while we would prefer it to finish compression and release the stream. Test environment: x86_64, 4 CPU box, 3G zram, lzo The following fio tests were executed: read, randread, write, randwrite, rw, randrw with the increasing number of jobs from 1 to 10. 4 streams 8 streams per-cpu =========================================================== jobs1 READ: 2520.1MB/s 2566.5MB/s 2491.5MB/s READ: 2102.7MB/s 2104.2MB/s 2091.3MB/s WRITE: 1355.1MB/s 1320.2MB/s 1378.9MB/s WRITE: 1103.5MB/s 1097.2MB/s 1122.5MB/s READ: 434013KB/s 435153KB/s 439961KB/s WRITE: 433969KB/s 435109KB/s 439917KB/s READ: 403166KB/s 405139KB/s 403373KB/s WRITE: 403223KB/s 405197KB/s 403430KB/s jobs2 READ: 7958.6MB/s 8105.6MB/s 8073.7MB/s READ: 6864.9MB/s 6989.8MB/s 7021.8MB/s WRITE: 2438.1MB/s 2346.9MB/s 3400.2MB/s WRITE: 1994.2MB/s 1990.3MB/s 2941.2MB/s READ: 981504KB/s 973906KB/s 1018.8MB/s WRITE: 981659KB/s 974060KB/s 1018.1MB/s READ: 937021KB/s 938976KB/s 987250KB/s WRITE: 934878KB/s 936830KB/s 984993KB/s jobs3 READ: 13280MB/s 13553MB/s 13553MB/s READ: 11534MB/s 11785MB/s 11755MB/s WRITE: 3456.9MB/s 3469.9MB/s 4810.3MB/s WRITE: 3029.6MB/s 3031.6MB/s 4264.8MB/s READ: 1363.8MB/s 1362.6MB/s 1448.9MB/s WRITE: 1361.9MB/s 1360.7MB/s 1446.9MB/s READ: 1309.4MB/s 1310.6MB/s 1397.5MB/s WRITE: 1307.4MB/s 1308.5MB/s 1395.3MB/s jobs4 READ: 20244MB/s 20177MB/s 20344MB/s READ: 17886MB/s 17913MB/s 17835MB/s WRITE: 4071.6MB/s 4046.1MB/s 6370.2MB/s WRITE: 3608.9MB/s 3576.3MB/s 5785.4MB/s READ: 1824.3MB/s 1821.6MB/s 1997.5MB/s WRITE: 1819.8MB/s 1817.4MB/s 1992.5MB/s READ: 1765.7MB/s 1768.3MB/s 1937.3MB/s WRITE: 1767.5MB/s 1769.1MB/s 1939.2MB/s jobs5 READ: 18663MB/s 18986MB/s 18823MB/s READ: 16659MB/s 16605MB/s 16954MB/s WRITE: 3912.4MB/s 3888.7MB/s 6126.9MB/s WRITE: 3506.4MB/s 3442.5MB/s 5519.3MB/s READ: 1798.2MB/s 1746.5MB/s 1935.8MB/s WRITE: 1792.7MB/s 1740.7MB/s 1929.1MB/s READ: 1727.6MB/s 1658.2MB/s 1917.3MB/s WRITE: 1726.5MB/s 1657.2MB/s 1916.6MB/s jobs6 READ: 21017MB/s 20922MB/s 21162MB/s READ: 19022MB/s 19140MB/s 18770MB/s WRITE: 3968.2MB/s 4037.7MB/s 6620.8MB/s WRITE: 3643.5MB/s 3590.2MB/s 6027.5MB/s READ: 1871.8MB/s 1880.5MB/s 2049.9MB/s WRITE: 1867.8MB/s 1877.2MB/s 2046.2MB/s READ: 1755.8MB/s 1710.3MB/s 1964.7MB/s WRITE: 1750.5MB/s 1705.9MB/s 1958.8MB/s jobs7 READ: 21103MB/s 20677MB/s 21482MB/s READ: 18522MB/s 18379MB/s 19443MB/s WRITE: 4022.5MB/s 4067.4MB/s 6755.9MB/s WRITE: 3691.7MB/s 3695.5MB/s 5925.6MB/s READ: 1841.5MB/s 1933.9MB/s 2090.5MB/s WRITE: 1842.7MB/s 1935.3MB/s 2091.9MB/s READ: 1832.4MB/s 1856.4MB/s 1971.5MB/s WRITE: 1822.3MB/s 1846.2MB/s 1960.6MB/s jobs8 READ: 20463MB/s 20194MB/s 20862MB/s READ: 18178MB/s 17978MB/s 18299MB/s WRITE: 4085.9MB/s 4060.2MB/s 7023.8MB/s WRITE: 3776.3MB/s 3737.9MB/s 6278.2MB/s READ: 1957.6MB/s 1944.4MB/s 2109.5MB/s WRITE: 1959.2MB/s 1946.2MB/s 2111.4MB/s READ: 1900.6MB/s 1885.7MB/s 2082.1MB/s WRITE: 1896.2MB/s 1881.4MB/s 2078.3MB/s jobs9 READ: 19692MB/s 19734MB/s 19334MB/s READ: 17678MB/s 18249MB/s 17666MB/s WRITE: 4004.7MB/s 4064.8MB/s 6990.7MB/s WRITE: 3724.7MB/s 3772.1MB/s 6193.6MB/s READ: 1953.7MB/s 1967.3MB/s 2105.6MB/s WRITE: 1953.4MB/s 1966.7MB/s 2104.1MB/s READ: 1860.4MB/s 1897.4MB/s 2068.5MB/s WRITE: 1858.9MB/s 1895.9MB/s 2066.8MB/s jobs10 READ: 19730MB/s 19579MB/s 19492MB/s READ: 18028MB/s 18018MB/s 18221MB/s WRITE: 4027.3MB/s 4090.6MB/s 7020.1MB/s WRITE: 3810.5MB/s 3846.8MB/s 6426.8MB/s READ: 1956.1MB/s 1994.6MB/s 2145.2MB/s WRITE: 1955.9MB/s 1993.5MB/s 2144.8MB/s READ: 1852.8MB/s 1911.6MB/s 2075.8MB/s WRITE: 1855.7MB/s 1914.6MB/s 2078.1MB/s perf stat 4 streams 8 streams per-cpu ==================================================================================================================== jobs1 stalled-cycles-frontend 23,174,811,209 ( 38.21%) 23,220,254,188 ( 38.25%) 23,061,406,918 ( 38.34%) stalled-cycles-backend 11,514,174,638 ( 18.98%) 11,696,722,657 ( 19.27%) 11,370,852,810 ( 18.90%) instructions 73,925,005,782 ( 1.22) 73,903,177,632 ( 1.22) 73,507,201,037 ( 1.22) branches 14,455,124,835 ( 756.063) 14,455,184,779 ( 755.281) 14,378,599,509 ( 758.546) branch-misses 69,801,336 ( 0.48%) 80,225,529 ( 0.55%) 72,044,726 ( 0.50%) jobs2 stalled-cycles-frontend 49,912,741,782 ( 46.11%) 50,101,189,290 ( 45.95%) 32,874,195,633 ( 35.11%) stalled-cycles-backend 27,080,366,230 ( 25.02%) 27,949,970,232 ( 25.63%) 16,461,222,706 ( 17.58%) instructions 122,831,629,690 ( 1.13) 122,919,846,419 ( 1.13) 121,924,786,775 ( 1.30) branches 23,725,889,239 ( 692.663) 23,733,547,140 ( 688.062) 23,553,950,311 ( 794.794) branch-misses 90,733,041 ( 0.38%) 96,320,895 ( 0.41%) 84,561,092 ( 0.36%) jobs3 stalled-cycles-frontend 66,437,834,608 ( 45.58%) 63,534,923,344 ( 43.69%) 42,101,478,505 ( 33.19%) stalled-cycles-backend 34,940,799,661 ( 23.97%) 34,774,043,148 ( 23.91%) 21,163,324,388 ( 16.68%) instructions 171,692,121,862 ( 1.18) 171,775,373,044 ( 1.18) 170,353,542,261 ( 1.34) branches 32,968,962,622 ( 628.723) 32,987,739,894 ( 630.512) 32,729,463,918 ( 717.027) branch-misses 111,522,732 ( 0.34%) 110,472,894 ( 0.33%) 99,791,291 ( 0.30%) jobs4 stalled-cycles-frontend 98,741,701,675 ( 49.72%) 94,797,349,965 ( 47.59%) 54,535,655,381 ( 33.53%) stalled-cycles-backend 54,642,609,615 ( 27.51%) 55,233,554,408 ( 27.73%) 27,882,323,541 ( 17.14%) instructions 220,884,807,851 ( 1.11) 220,930,887,273 ( 1.11) 218,926,845,851 ( 1.35) branches 42,354,518,180 ( 592.105) 42,362,770,587 ( 590.452) 41,955,552,870 ( 716.154) branch-misses 138,093,449 ( 0.33%) 131,295,286 ( 0.31%) 121,794,771 ( 0.29%) jobs5 stalled-cycles-frontend 116,219,747,212 ( 48.14%) 110,310,397,012 ( 46.29%) 66,373,082,723 ( 33.70%) stalled-cycles-backend 66,325,434,776 ( 27.48%) 64,157,087,914 ( 26.92%) 32,999,097,299 ( 16.76%) instructions 270,615,008,466 ( 1.12) 270,546,409,525 ( 1.14) 268,439,910,948 ( 1.36) branches 51,834,046,557 ( 599.108) 51,811,867,722 ( 608.883) 51,412,576,077 ( 729.213) branch-misses 158,197,086 ( 0.31%) 142,639,805 ( 0.28%) 133,425,455 ( 0.26%) jobs6 stalled-cycles-frontend 138,009,414,492 ( 48.23%) 139,063,571,254 ( 48.80%) 75,278,568,278 ( 32.80%) stalled-cycles-backend 79,211,949,650 ( 27.68%) 79,077,241,028 ( 27.75%) 37,735,797,899 ( 16.44%) instructions 319,763,993,731 ( 1.12) 319,937,782,834 ( 1.12) 316,663,600,784 ( 1.38) branches 61,219,433,294 ( 595.056) 61,250,355,540 ( 598.215) 60,523,446,617 ( 733.706) branch-misses 169,257,123 ( 0.28%) 154,898,028 ( 0.25%) 141,180,587 ( 0.23%) jobs7 stalled-cycles-frontend 162,974,812,119 ( 49.20%) 159,290,061,987 ( 48.43%) 88,046,641,169 ( 33.21%) stalled-cycles-backend 92,223,151,661 ( 27.84%) 91,667,904,406 ( 27.87%) 44,068,454,971 ( 16.62%) instructions 369,516,432,430 ( 1.12) 369,361,799,063 ( 1.12) 365,290,380,661 ( 1.38) branches 70,795,673,950 ( 594.220) 70,743,136,124 ( 597.876) 69,803,996,038 ( 732.822) branch-misses 181,708,327 ( 0.26%) 165,767,821 ( 0.23%) 150,109,797 ( 0.22%) jobs8 stalled-cycles-frontend 185,000,017,027 ( 49.30%) 182,334,345,473 ( 48.37%) 99,980,147,041 ( 33.26%) stalled-cycles-backend 105,753,516,186 ( 28.18%) 107,937,830,322 ( 28.63%) 51,404,177,181 ( 17.10%) instructions 418,153,161,055 ( 1.11) 418,308,565,828 ( 1.11) 413,653,475,581 ( 1.38) branches 80,035,882,398 ( 592.296) 80,063,204,510 ( 589.843) 79,024,105,589 ( 730.530) branch-misses 199,764,528 ( 0.25%) 177,936,926 ( 0.22%) 160,525,449 ( 0.20%) jobs9 stalled-cycles-frontend 210,941,799,094 ( 49.63%) 204,714,679,254 ( 48.55%) 114,251,113,756 ( 33.96%) stalled-cycles-backend 122,640,849,067 ( 28.85%) 122,188,553,256 ( 28.98%) 58,360,041,127 ( 17.35%) instructions 468,151,025,415 ( 1.10) 467,354,869,323 ( 1.11) 462,665,165,216 ( 1.38) branches 89,657,067,510 ( 585.628) 89,411,550,407 ( 588.990) 88,360,523,943 ( 730.151) branch-misses 218,292,301 ( 0.24%) 191,701,247 ( 0.21%) 178,535,678 ( 0.20%) jobs10 stalled-cycles-frontend 233,595,958,008 ( 49.81%) 227,540,615,689 ( 49.11%) 160,341,979,938 ( 43.07%) stalled-cycles-backend 136,153,676,021 ( 29.03%) 133,635,240,742 ( 28.84%) 65,909,135,465 ( 17.70%) instructions 517,001,168,497 ( 1.10) 516,210,976,158 ( 1.11) 511,374,038,613 ( 1.37) branches 98,911,641,329 ( 585.796) 98,700,069,712 ( 591.583) 97,646,761,028 ( 728.712) branch-misses 232,341,823 ( 0.23%) 199,256,308 ( 0.20%) 183,135,268 ( 0.19%) per-cpu streams tend to cause significantly less stalled cycles; execute less branches and hit less branch-misses. perf stat reported execution time 4 streams 8 streams per-cpu ==================================================================== jobs1 seconds elapsed 20.909073870 20.875670495 20.817838540 jobs2 seconds elapsed 18.529488399 18.720566469 16.356103108 jobs3 seconds elapsed 18.991159531 18.991340812 16.766216066 jobs4 seconds elapsed 19.560643828 19.551323547 16.246621715 jobs5 seconds elapsed 24.746498464 25.221646740 20.696112444 jobs6 seconds elapsed 28.258181828 28.289765505 22.885688857 jobs7 seconds elapsed 32.632490241 31.909125381 26.272753738 jobs8 seconds elapsed 35.651403851 36.027596308 29.108024711 jobs9 seconds elapsed 40.569362365 40.024227989 32.898204012 jobs10 seconds elapsed 44.673112304 43.874898137 35.632952191 Please see Link: http://marc.info/?l=linux-kernel&m=146166970727530 Link: http://marc.info/?l=linux-kernel&m=146174716719650 for more test results (under low memory conditions). Signed-off-by: Sergey Senozhatsky <sergey.senozhatsky@gmail.com> Suggested-by: Minchan Kim <minchan@kernel.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'drivers/block/zram')
-rw-r--r--drivers/block/zram/zcomp.c297
-rw-r--r--drivers/block/zram/zcomp.h14
-rw-r--r--drivers/block/zram/zram_drv.c36
3 files changed, 116 insertions, 231 deletions
diff --git a/drivers/block/zram/zcomp.c b/drivers/block/zram/zcomp.c
index 3ef42e563bb5..bc98d5ed5477 100644
--- a/drivers/block/zram/zcomp.c
+++ b/drivers/block/zram/zcomp.c
@@ -13,6 +13,7 @@
13#include <linux/slab.h> 13#include <linux/slab.h>
14#include <linux/wait.h> 14#include <linux/wait.h>
15#include <linux/sched.h> 15#include <linux/sched.h>
16#include <linux/cpu.h>
16 17
17#include "zcomp.h" 18#include "zcomp.h"
18#include "zcomp_lzo.h" 19#include "zcomp_lzo.h"
@@ -20,29 +21,6 @@
20#include "zcomp_lz4.h" 21#include "zcomp_lz4.h"
21#endif 22#endif
22 23
23/*
24 * single zcomp_strm backend
25 */
26struct zcomp_strm_single {
27 struct mutex strm_lock;
28 struct zcomp_strm *zstrm;
29};
30
31/*
32 * multi zcomp_strm backend
33 */
34struct zcomp_strm_multi {
35 /* protect strm list */
36 spinlock_t strm_lock;
37 /* max possible number of zstrm streams */
38 int max_strm;
39 /* number of available zstrm streams */
40 int avail_strm;
41 /* list of available strms */
42 struct list_head idle_strm;
43 wait_queue_head_t strm_wait;
44};
45
46static struct zcomp_backend *backends[] = { 24static struct zcomp_backend *backends[] = {
47 &zcomp_lzo, 25 &zcomp_lzo,
48#ifdef CONFIG_ZRAM_LZ4_COMPRESS 26#ifdef CONFIG_ZRAM_LZ4_COMPRESS
@@ -93,188 +71,6 @@ static struct zcomp_strm *zcomp_strm_alloc(struct zcomp *comp, gfp_t flags)
93 return zstrm; 71 return zstrm;
94} 72}
95 73
96/*
97 * get idle zcomp_strm or wait until other process release
98 * (zcomp_strm_release()) one for us
99 */
100static struct zcomp_strm *zcomp_strm_multi_find(struct zcomp *comp)
101{
102 struct zcomp_strm_multi *zs = comp->stream;
103 struct zcomp_strm *zstrm;
104
105 while (1) {
106 spin_lock(&zs->strm_lock);
107 if (!list_empty(&zs->idle_strm)) {
108 zstrm = list_entry(zs->idle_strm.next,
109 struct zcomp_strm, list);
110 list_del(&zstrm->list);
111 spin_unlock(&zs->strm_lock);
112 return zstrm;
113 }
114 /* zstrm streams limit reached, wait for idle stream */
115 if (zs->avail_strm >= zs->max_strm) {
116 spin_unlock(&zs->strm_lock);
117 wait_event(zs->strm_wait, !list_empty(&zs->idle_strm));
118 continue;
119 }
120 /* allocate new zstrm stream */
121 zs->avail_strm++;
122 spin_unlock(&zs->strm_lock);
123 /*
124 * This function can be called in swapout/fs write path
125 * so we can't use GFP_FS|IO. And it assumes we already
126 * have at least one stream in zram initialization so we
127 * don't do best effort to allocate more stream in here.
128 * A default stream will work well without further multiple
129 * streams. That's why we use NORETRY | NOWARN.
130 */
131 zstrm = zcomp_strm_alloc(comp, GFP_NOIO | __GFP_NORETRY |
132 __GFP_NOWARN);
133 if (!zstrm) {
134 spin_lock(&zs->strm_lock);
135 zs->avail_strm--;
136 spin_unlock(&zs->strm_lock);
137 wait_event(zs->strm_wait, !list_empty(&zs->idle_strm));
138 continue;
139 }
140 break;
141 }
142 return zstrm;
143}
144
145/* add stream back to idle list and wake up waiter or free the stream */
146static void zcomp_strm_multi_release(struct zcomp *comp, struct zcomp_strm *zstrm)
147{
148 struct zcomp_strm_multi *zs = comp->stream;
149
150 spin_lock(&zs->strm_lock);
151 if (zs->avail_strm <= zs->max_strm) {
152 list_add(&zstrm->list, &zs->idle_strm);
153 spin_unlock(&zs->strm_lock);
154 wake_up(&zs->strm_wait);
155 return;
156 }
157
158 zs->avail_strm--;
159 spin_unlock(&zs->strm_lock);
160 zcomp_strm_free(comp, zstrm);
161}
162
163/* change max_strm limit */
164static bool zcomp_strm_multi_set_max_streams(struct zcomp *comp, int num_strm)
165{
166 struct zcomp_strm_multi *zs = comp->stream;
167 struct zcomp_strm *zstrm;
168
169 spin_lock(&zs->strm_lock);
170 zs->max_strm = num_strm;
171 /*
172 * if user has lowered the limit and there are idle streams,
173 * immediately free as much streams (and memory) as we can.
174 */
175 while (zs->avail_strm > num_strm && !list_empty(&zs->idle_strm)) {
176 zstrm = list_entry(zs->idle_strm.next,
177 struct zcomp_strm, list);
178 list_del(&zstrm->list);
179 zcomp_strm_free(comp, zstrm);
180 zs->avail_strm--;
181 }
182 spin_unlock(&zs->strm_lock);
183 return true;
184}
185
186static void zcomp_strm_multi_destroy(struct zcomp *comp)
187{
188 struct zcomp_strm_multi *zs = comp->stream;
189 struct zcomp_strm *zstrm;
190
191 while (!list_empty(&zs->idle_strm)) {
192 zstrm = list_entry(zs->idle_strm.next,
193 struct zcomp_strm, list);
194 list_del(&zstrm->list);
195 zcomp_strm_free(comp, zstrm);
196 }
197 kfree(zs);
198}
199
200static int zcomp_strm_multi_create(struct zcomp *comp, int max_strm)
201{
202 struct zcomp_strm *zstrm;
203 struct zcomp_strm_multi *zs;
204
205 comp->destroy = zcomp_strm_multi_destroy;
206 comp->strm_find = zcomp_strm_multi_find;
207 comp->strm_release = zcomp_strm_multi_release;
208 comp->set_max_streams = zcomp_strm_multi_set_max_streams;
209 zs = kmalloc(sizeof(struct zcomp_strm_multi), GFP_KERNEL);
210 if (!zs)
211 return -ENOMEM;
212
213 comp->stream = zs;
214 spin_lock_init(&zs->strm_lock);
215 INIT_LIST_HEAD(&zs->idle_strm);
216 init_waitqueue_head(&zs->strm_wait);
217 zs->max_strm = max_strm;
218 zs->avail_strm = 1;
219
220 zstrm = zcomp_strm_alloc(comp, GFP_KERNEL);
221 if (!zstrm) {
222 kfree(zs);
223 return -ENOMEM;
224 }
225 list_add(&zstrm->list, &zs->idle_strm);
226 return 0;
227}
228
229static struct zcomp_strm *zcomp_strm_single_find(struct zcomp *comp)
230{
231 struct zcomp_strm_single *zs = comp->stream;
232 mutex_lock(&zs->strm_lock);
233 return zs->zstrm;
234}
235
236static void zcomp_strm_single_release(struct zcomp *comp,
237 struct zcomp_strm *zstrm)
238{
239 struct zcomp_strm_single *zs = comp->stream;
240 mutex_unlock(&zs->strm_lock);
241}
242
243static bool zcomp_strm_single_set_max_streams(struct zcomp *comp, int num_strm)
244{
245 /* zcomp_strm_single support only max_comp_streams == 1 */
246 return false;
247}
248
249static void zcomp_strm_single_destroy(struct zcomp *comp)
250{
251 struct zcomp_strm_single *zs = comp->stream;
252 zcomp_strm_free(comp, zs->zstrm);
253 kfree(zs);
254}
255
256static int zcomp_strm_single_create(struct zcomp *comp)
257{
258 struct zcomp_strm_single *zs;
259
260 comp->destroy = zcomp_strm_single_destroy;
261 comp->strm_find = zcomp_strm_single_find;
262 comp->strm_release = zcomp_strm_single_release;
263 comp->set_max_streams = zcomp_strm_single_set_max_streams;
264 zs = kmalloc(sizeof(struct zcomp_strm_single), GFP_KERNEL);
265 if (!zs)
266 return -ENOMEM;
267
268 comp->stream = zs;
269 mutex_init(&zs->strm_lock);
270 zs->zstrm = zcomp_strm_alloc(comp, GFP_KERNEL);
271 if (!zs->zstrm) {
272 kfree(zs);
273 return -ENOMEM;
274 }
275 return 0;
276}
277
278/* show available compressors */ 74/* show available compressors */
279ssize_t zcomp_available_show(const char *comp, char *buf) 75ssize_t zcomp_available_show(const char *comp, char *buf)
280{ 76{
@@ -301,17 +97,17 @@ bool zcomp_available_algorithm(const char *comp)
301 97
302bool zcomp_set_max_streams(struct zcomp *comp, int num_strm) 98bool zcomp_set_max_streams(struct zcomp *comp, int num_strm)
303{ 99{
304 return comp->set_max_streams(comp, num_strm); 100 return true;
305} 101}
306 102
307struct zcomp_strm *zcomp_strm_find(struct zcomp *comp) 103struct zcomp_strm *zcomp_strm_find(struct zcomp *comp)
308{ 104{
309 return comp->strm_find(comp); 105 return *get_cpu_ptr(comp->stream);
310} 106}
311 107
312void zcomp_strm_release(struct zcomp *comp, struct zcomp_strm *zstrm) 108void zcomp_strm_release(struct zcomp *comp, struct zcomp_strm *zstrm)
313{ 109{
314 comp->strm_release(comp, zstrm); 110 put_cpu_ptr(comp->stream);
315} 111}
316 112
317int zcomp_compress(struct zcomp *comp, struct zcomp_strm *zstrm, 113int zcomp_compress(struct zcomp *comp, struct zcomp_strm *zstrm,
@@ -327,9 +123,83 @@ int zcomp_decompress(struct zcomp *comp, const unsigned char *src,
327 return comp->backend->decompress(src, src_len, dst); 123 return comp->backend->decompress(src, src_len, dst);
328} 124}
329 125
126static int __zcomp_cpu_notifier(struct zcomp *comp,
127 unsigned long action, unsigned long cpu)
128{
129 struct zcomp_strm *zstrm;
130
131 switch (action) {
132 case CPU_UP_PREPARE:
133 if (WARN_ON(*per_cpu_ptr(comp->stream, cpu)))
134 break;
135 zstrm = zcomp_strm_alloc(comp, GFP_KERNEL);
136 if (IS_ERR_OR_NULL(zstrm)) {
137 pr_err("Can't allocate a compression stream\n");
138 return NOTIFY_BAD;
139 }
140 *per_cpu_ptr(comp->stream, cpu) = zstrm;
141 break;
142 case CPU_DEAD:
143 case CPU_UP_CANCELED:
144 zstrm = *per_cpu_ptr(comp->stream, cpu);
145 if (!IS_ERR_OR_NULL(zstrm))
146 zcomp_strm_free(comp, zstrm);
147 *per_cpu_ptr(comp->stream, cpu) = NULL;
148 break;
149 default:
150 break;
151 }
152 return NOTIFY_OK;
153}
154
155static int zcomp_cpu_notifier(struct notifier_block *nb,
156 unsigned long action, void *pcpu)
157{
158 unsigned long cpu = (unsigned long)pcpu;
159 struct zcomp *comp = container_of(nb, typeof(*comp), notifier);
160
161 return __zcomp_cpu_notifier(comp, action, cpu);
162}
163
164static int zcomp_init(struct zcomp *comp)
165{
166 unsigned long cpu;
167 int ret;
168
169 comp->notifier.notifier_call = zcomp_cpu_notifier;
170
171 comp->stream = alloc_percpu(struct zcomp_strm *);
172 if (!comp->stream)
173 return -ENOMEM;
174
175 cpu_notifier_register_begin();
176 for_each_online_cpu(cpu) {
177 ret = __zcomp_cpu_notifier(comp, CPU_UP_PREPARE, cpu);
178 if (ret == NOTIFY_BAD)
179 goto cleanup;
180 }
181 __register_cpu_notifier(&comp->notifier);
182 cpu_notifier_register_done();
183 return 0;
184
185cleanup:
186 for_each_online_cpu(cpu)
187 __zcomp_cpu_notifier(comp, CPU_UP_CANCELED, cpu);
188 cpu_notifier_register_done();
189 return -ENOMEM;
190}
191
330void zcomp_destroy(struct zcomp *comp) 192void zcomp_destroy(struct zcomp *comp)
331{ 193{
332 comp->destroy(comp); 194 unsigned long cpu;
195
196 cpu_notifier_register_begin();
197 for_each_online_cpu(cpu)
198 __zcomp_cpu_notifier(comp, CPU_UP_CANCELED, cpu);
199 __unregister_cpu_notifier(&comp->notifier);
200 cpu_notifier_register_done();
201
202 free_percpu(comp->stream);
333 kfree(comp); 203 kfree(comp);
334} 204}
335 205
@@ -339,9 +209,9 @@ void zcomp_destroy(struct zcomp *comp)
339 * backend pointer or ERR_PTR if things went bad. ERR_PTR(-EINVAL) 209 * backend pointer or ERR_PTR if things went bad. ERR_PTR(-EINVAL)
340 * if requested algorithm is not supported, ERR_PTR(-ENOMEM) in 210 * if requested algorithm is not supported, ERR_PTR(-ENOMEM) in
341 * case of allocation error, or any other error potentially 211 * case of allocation error, or any other error potentially
342 * returned by functions zcomp_strm_{multi,single}_create. 212 * returned by zcomp_init().
343 */ 213 */
344struct zcomp *zcomp_create(const char *compress, int max_strm) 214struct zcomp *zcomp_create(const char *compress)
345{ 215{
346 struct zcomp *comp; 216 struct zcomp *comp;
347 struct zcomp_backend *backend; 217 struct zcomp_backend *backend;
@@ -356,10 +226,7 @@ struct zcomp *zcomp_create(const char *compress, int max_strm)
356 return ERR_PTR(-ENOMEM); 226 return ERR_PTR(-ENOMEM);
357 227
358 comp->backend = backend; 228 comp->backend = backend;
359 if (max_strm > 1) 229 error = zcomp_init(comp);
360 error = zcomp_strm_multi_create(comp, max_strm);
361 else
362 error = zcomp_strm_single_create(comp);
363 if (error) { 230 if (error) {
364 kfree(comp); 231 kfree(comp);
365 return ERR_PTR(error); 232 return ERR_PTR(error);
diff --git a/drivers/block/zram/zcomp.h b/drivers/block/zram/zcomp.h
index b7d2a4bcae54..ffd88cb747fe 100644
--- a/drivers/block/zram/zcomp.h
+++ b/drivers/block/zram/zcomp.h
@@ -10,8 +10,6 @@
10#ifndef _ZCOMP_H_ 10#ifndef _ZCOMP_H_
11#define _ZCOMP_H_ 11#define _ZCOMP_H_
12 12
13#include <linux/mutex.h>
14
15struct zcomp_strm { 13struct zcomp_strm {
16 /* compression/decompression buffer */ 14 /* compression/decompression buffer */
17 void *buffer; 15 void *buffer;
@@ -21,8 +19,6 @@ struct zcomp_strm {
21 * working memory) 19 * working memory)
22 */ 20 */
23 void *private; 21 void *private;
24 /* used in multi stream backend, protected by backend strm_lock */
25 struct list_head list;
26}; 22};
27 23
28/* static compression backend */ 24/* static compression backend */
@@ -41,19 +37,15 @@ struct zcomp_backend {
41 37
42/* dynamic per-device compression frontend */ 38/* dynamic per-device compression frontend */
43struct zcomp { 39struct zcomp {
44 void *stream; 40 struct zcomp_strm * __percpu *stream;
45 struct zcomp_backend *backend; 41 struct zcomp_backend *backend;
46 42 struct notifier_block notifier;
47 struct zcomp_strm *(*strm_find)(struct zcomp *comp);
48 void (*strm_release)(struct zcomp *comp, struct zcomp_strm *zstrm);
49 bool (*set_max_streams)(struct zcomp *comp, int num_strm);
50 void (*destroy)(struct zcomp *comp);
51}; 43};
52 44
53ssize_t zcomp_available_show(const char *comp, char *buf); 45ssize_t zcomp_available_show(const char *comp, char *buf);
54bool zcomp_available_algorithm(const char *comp); 46bool zcomp_available_algorithm(const char *comp);
55 47
56struct zcomp *zcomp_create(const char *comp, int max_strm); 48struct zcomp *zcomp_create(const char *comp);
57void zcomp_destroy(struct zcomp *comp); 49void zcomp_destroy(struct zcomp *comp);
58 50
59struct zcomp_strm *zcomp_strm_find(struct zcomp *comp); 51struct zcomp_strm *zcomp_strm_find(struct zcomp *comp);
diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c
index b09acdb753ee..f92965c4229b 100644
--- a/drivers/block/zram/zram_drv.c
+++ b/drivers/block/zram/zram_drv.c
@@ -650,7 +650,7 @@ static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index,
650{ 650{
651 int ret = 0; 651 int ret = 0;
652 size_t clen; 652 size_t clen;
653 unsigned long handle; 653 unsigned long handle = 0;
654 struct page *page; 654 struct page *page;
655 unsigned char *user_mem, *cmem, *src, *uncmem = NULL; 655 unsigned char *user_mem, *cmem, *src, *uncmem = NULL;
656 struct zram_meta *meta = zram->meta; 656 struct zram_meta *meta = zram->meta;
@@ -673,9 +673,8 @@ static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index,
673 goto out; 673 goto out;
674 } 674 }
675 675
676 zstrm = zcomp_strm_find(zram->comp); 676compress_again:
677 user_mem = kmap_atomic(page); 677 user_mem = kmap_atomic(page);
678
679 if (is_partial_io(bvec)) { 678 if (is_partial_io(bvec)) {
680 memcpy(uncmem + offset, user_mem + bvec->bv_offset, 679 memcpy(uncmem + offset, user_mem + bvec->bv_offset,
681 bvec->bv_len); 680 bvec->bv_len);
@@ -699,6 +698,7 @@ static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index,
699 goto out; 698 goto out;
700 } 699 }
701 700
701 zstrm = zcomp_strm_find(zram->comp);
702 ret = zcomp_compress(zram->comp, zstrm, uncmem, &clen); 702 ret = zcomp_compress(zram->comp, zstrm, uncmem, &clen);
703 if (!is_partial_io(bvec)) { 703 if (!is_partial_io(bvec)) {
704 kunmap_atomic(user_mem); 704 kunmap_atomic(user_mem);
@@ -710,6 +710,7 @@ static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index,
710 pr_err("Compression failed! err=%d\n", ret); 710 pr_err("Compression failed! err=%d\n", ret);
711 goto out; 711 goto out;
712 } 712 }
713
713 src = zstrm->buffer; 714 src = zstrm->buffer;
714 if (unlikely(clen > max_zpage_size)) { 715 if (unlikely(clen > max_zpage_size)) {
715 clen = PAGE_SIZE; 716 clen = PAGE_SIZE;
@@ -717,8 +718,33 @@ static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index,
717 src = uncmem; 718 src = uncmem;
718 } 719 }
719 720
720 handle = zs_malloc(meta->mem_pool, clen, GFP_NOIO | __GFP_HIGHMEM); 721 /*
722 * handle allocation has 2 paths:
723 * a) fast path is executed with preemption disabled (for
724 * per-cpu streams) and has __GFP_DIRECT_RECLAIM bit clear,
725 * since we can't sleep;
726 * b) slow path enables preemption and attempts to allocate
727 * the page with __GFP_DIRECT_RECLAIM bit set. we have to
728 * put per-cpu compression stream and, thus, to re-do
729 * the compression once handle is allocated.
730 *
731 * if we have a 'non-null' handle here then we are coming
732 * from the slow path and handle has already been allocated.
733 */
734 if (!handle)
735 handle = zs_malloc(meta->mem_pool, clen,
736 __GFP_KSWAPD_RECLAIM |
737 __GFP_NOWARN |
738 __GFP_HIGHMEM);
721 if (!handle) { 739 if (!handle) {
740 zcomp_strm_release(zram->comp, zstrm);
741 zstrm = NULL;
742
743 handle = zs_malloc(meta->mem_pool, clen,
744 GFP_NOIO | __GFP_HIGHMEM);
745 if (handle)
746 goto compress_again;
747
722 pr_err("Error allocating memory for compressed page: %u, size=%zu\n", 748 pr_err("Error allocating memory for compressed page: %u, size=%zu\n",
723 index, clen); 749 index, clen);
724 ret = -ENOMEM; 750 ret = -ENOMEM;
@@ -1038,7 +1064,7 @@ static ssize_t disksize_store(struct device *dev,
1038 if (!meta) 1064 if (!meta)
1039 return -ENOMEM; 1065 return -ENOMEM;
1040 1066
1041 comp = zcomp_create(zram->compressor, zram->max_comp_streams); 1067 comp = zcomp_create(zram->compressor);
1042 if (IS_ERR(comp)) { 1068 if (IS_ERR(comp)) {
1043 pr_err("Cannot initialise %s compressing backend\n", 1069 pr_err("Cannot initialise %s compressing backend\n",
1044 zram->compressor); 1070 zram->compressor);