aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorGao Xiang <gaoxiang25@huawei.com>2018-07-26 08:22:06 -0400
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>2018-07-27 11:24:10 -0400
commit3883a79abd02272222a214a5f84395d41eecdc84 (patch)
tree20b11dc7d7ddf0cb76f66523acc27d91c5329459
parente7e9a307be9d75ecc3bf20b362af88140dfb4304 (diff)
staging: erofs: introduce VLE decompression support
This patch introduces the basic in-place VLE decompression implementation for the erofs file system. Compared with fixed-sized input compression, it implements what we call 'the variable-length extent compression' which specifies the same output size for each compression block to make the full use of IO bandwidth (which means almost all data from block device can be directly used for decomp- ression), improve the real (rather than just via data caching, which costs more memory) random read and keep the relatively lower compression ratios (it saves more storage space than fixed-sized input compression which is also configured with the same input block size), as illustrated below: |--- variable-length extent ---|------ VLE ------|--- VLE ---| /> clusterofs /> clusterofs /> clusterofs /> clusterofs ++---|-------++-----------++---------|-++-----------++-|---------++-| ...|| | || || | || || | || | ... original data ++---|-------++-----------++---------|-++-----------++-|---------++-| ++->cluster<-++->cluster<-++->cluster<-++->cluster<-++->cluster<-++ size size size size size \ / / / \ / / / \ / / / ++-----------++-----------++-----------++ ... || || || || ... compressed clusters ++-----------++-----------++-----------++ ++->cluster<-++->cluster<-++->cluster<-++ size size size The main point of 'in-place' refers to the decompression mode: Instead of allocating independent compressed pages and data structures, it reuses the allocated file cache pages at most to store its compressed data and the corresponding pagevec in a time-sharing approach by default, which will be useful for low memory scenario. In the end, unlike the other filesystems with (de)compression support using a relatively large compression block size, which reads and decompresses >= 128KB at once, and gains a more good-looking random read (In fact it collects small random reads into large sequential reads and caches all decompressed data in memory, but it is unacceptable especially for embedded devices with limited memory, and it is not the real random read), we select a universal small-sized 4KB compressed cluster, which is the smallest page size for most architectures, and all compressed clusters can be read and decompressed independently, which ensures random read number for all use cases. Signed-off-by: Gao Xiang <gaoxiang25@huawei.com> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
-rw-r--r--drivers/staging/erofs/inode.c5
-rw-r--r--drivers/staging/erofs/internal.h6
-rw-r--r--drivers/staging/erofs/super.c25
-rw-r--r--drivers/staging/erofs/unzip_vle.c1119
-rw-r--r--drivers/staging/erofs/unzip_vle.h204
-rw-r--r--drivers/staging/erofs/utils.c61
6 files changed, 1418 insertions, 2 deletions
diff --git a/drivers/staging/erofs/inode.c b/drivers/staging/erofs/inode.c
index 613c9771bd14..fbf6ff25cd1b 100644
--- a/drivers/staging/erofs/inode.c
+++ b/drivers/staging/erofs/inode.c
@@ -210,7 +210,12 @@ static int fill_inode(struct inode *inode, int isdir)
210 } 210 }
211 211
212 if (is_inode_layout_compression(inode)) { 212 if (is_inode_layout_compression(inode)) {
213#ifdef CONFIG_EROFS_FS_ZIP
214 inode->i_mapping->a_ops =
215 &z_erofs_vle_normalaccess_aops;
216#else
213 err = -ENOTSUPP; 217 err = -ENOTSUPP;
218#endif
214 goto out_unlock; 219 goto out_unlock;
215 } 220 }
216 221
diff --git a/drivers/staging/erofs/internal.h b/drivers/staging/erofs/internal.h
index b07cd7aa0a09..3adec7d95d3e 100644
--- a/drivers/staging/erofs/internal.h
+++ b/drivers/staging/erofs/internal.h
@@ -262,6 +262,9 @@ static inline void erofs_workstation_cleanup_all(struct super_block *sb)
262#ifdef CONFIG_EROFS_FS_ZIP 262#ifdef CONFIG_EROFS_FS_ZIP
263/* hard limit of pages per compressed cluster */ 263/* hard limit of pages per compressed cluster */
264#define Z_EROFS_CLUSTER_MAX_PAGES (CONFIG_EROFS_FS_CLUSTER_PAGE_LIMIT) 264#define Z_EROFS_CLUSTER_MAX_PAGES (CONFIG_EROFS_FS_CLUSTER_PAGE_LIMIT)
265
266/* page count of a compressed cluster */
267#define erofs_clusterpages(sbi) ((1 << (sbi)->clusterbits) / PAGE_SIZE)
265#endif 268#endif
266 269
267typedef u64 erofs_off_t; 270typedef u64 erofs_off_t;
@@ -340,6 +343,9 @@ extern const struct inode_operations erofs_dir_iops;
340extern const struct file_operations erofs_dir_fops; 343extern const struct file_operations erofs_dir_fops;
341 344
342extern const struct address_space_operations erofs_raw_access_aops; 345extern const struct address_space_operations erofs_raw_access_aops;
346#ifdef CONFIG_EROFS_FS_ZIP
347extern const struct address_space_operations z_erofs_vle_normalaccess_aops;
348#endif
343 349
344/* 350/*
345 * Logical to physical block mapping, used by erofs_map_blocks() 351 * Logical to physical block mapping, used by erofs_map_blocks()
diff --git a/drivers/staging/erofs/super.c b/drivers/staging/erofs/super.c
index e155a2b0d43e..2bd433ab4c49 100644
--- a/drivers/staging/erofs/super.c
+++ b/drivers/staging/erofs/super.c
@@ -115,6 +115,13 @@ static int superblock_read(struct super_block *sb)
115 sbi->xattr_blkaddr = le32_to_cpu(layout->xattr_blkaddr); 115 sbi->xattr_blkaddr = le32_to_cpu(layout->xattr_blkaddr);
116#endif 116#endif
117 sbi->islotbits = ffs(sizeof(struct erofs_inode_v1)) - 1; 117 sbi->islotbits = ffs(sizeof(struct erofs_inode_v1)) - 1;
118#ifdef CONFIG_EROFS_FS_ZIP
119 sbi->clusterbits = 12;
120
121 if (1 << (sbi->clusterbits - 12) > Z_EROFS_CLUSTER_MAX_PAGES)
122 errln("clusterbits %u is not supported on this kernel",
123 sbi->clusterbits);
124#endif
118 125
119 sbi->root_nid = le16_to_cpu(layout->root_nid); 126 sbi->root_nid = le16_to_cpu(layout->root_nid);
120 sbi->inos = le64_to_cpu(layout->inos); 127 sbi->inos = le64_to_cpu(layout->inos);
@@ -441,6 +448,11 @@ static struct file_system_type erofs_fs_type = {
441}; 448};
442MODULE_ALIAS_FS("erofs"); 449MODULE_ALIAS_FS("erofs");
443 450
451#ifdef CONFIG_EROFS_FS_ZIP
452extern int z_erofs_init_zip_subsystem(void);
453extern void z_erofs_exit_zip_subsystem(void);
454#endif
455
444static int __init erofs_module_init(void) 456static int __init erofs_module_init(void)
445{ 457{
446 int err; 458 int err;
@@ -456,6 +468,12 @@ static int __init erofs_module_init(void)
456 if (err) 468 if (err)
457 goto shrinker_err; 469 goto shrinker_err;
458 470
471#ifdef CONFIG_EROFS_FS_ZIP
472 err = z_erofs_init_zip_subsystem();
473 if (err)
474 goto zip_err;
475#endif
476
459 err = register_filesystem(&erofs_fs_type); 477 err = register_filesystem(&erofs_fs_type);
460 if (err) 478 if (err)
461 goto fs_err; 479 goto fs_err;
@@ -464,6 +482,10 @@ static int __init erofs_module_init(void)
464 return 0; 482 return 0;
465 483
466fs_err: 484fs_err:
485#ifdef CONFIG_EROFS_FS_ZIP
486 z_erofs_exit_zip_subsystem();
487zip_err:
488#endif
467 unregister_shrinker(&erofs_shrinker_info); 489 unregister_shrinker(&erofs_shrinker_info);
468shrinker_err: 490shrinker_err:
469 erofs_exit_inode_cache(); 491 erofs_exit_inode_cache();
@@ -474,6 +496,9 @@ icache_err:
474static void __exit erofs_module_exit(void) 496static void __exit erofs_module_exit(void)
475{ 497{
476 unregister_filesystem(&erofs_fs_type); 498 unregister_filesystem(&erofs_fs_type);
499#ifdef CONFIG_EROFS_FS_ZIP
500 z_erofs_exit_zip_subsystem();
501#endif
477 unregister_shrinker(&erofs_shrinker_info); 502 unregister_shrinker(&erofs_shrinker_info);
478 erofs_exit_inode_cache(); 503 erofs_exit_inode_cache();
479 infoln("successfully finalize erofs"); 504 infoln("successfully finalize erofs");
diff --git a/drivers/staging/erofs/unzip_vle.c b/drivers/staging/erofs/unzip_vle.c
index 329cbe47f599..f0ead60a8fee 100644
--- a/drivers/staging/erofs/unzip_vle.c
+++ b/drivers/staging/erofs/unzip_vle.c
@@ -10,7 +10,1124 @@
10 * License. See the file COPYING in the main directory of the Linux 10 * License. See the file COPYING in the main directory of the Linux
11 * distribution for more details. 11 * distribution for more details.
12 */ 12 */
13#include "internal.h" 13#include "unzip_vle.h"
14#include <linux/prefetch.h>
15
16static struct workqueue_struct *z_erofs_workqueue __read_mostly;
17static struct kmem_cache *z_erofs_workgroup_cachep __read_mostly;
18
19void z_erofs_exit_zip_subsystem(void)
20{
21 BUG_ON(z_erofs_workqueue == NULL);
22 BUG_ON(z_erofs_workgroup_cachep == NULL);
23
24 destroy_workqueue(z_erofs_workqueue);
25 kmem_cache_destroy(z_erofs_workgroup_cachep);
26}
27
28static inline int init_unzip_workqueue(void)
29{
30 const unsigned onlinecpus = num_possible_cpus();
31
32 /*
33 * we don't need too many threads, limiting threads
34 * could improve scheduling performance.
35 */
36 z_erofs_workqueue = alloc_workqueue("erofs_unzipd",
37 WQ_UNBOUND | WQ_HIGHPRI | WQ_CPU_INTENSIVE,
38 onlinecpus + onlinecpus / 4);
39
40 return z_erofs_workqueue != NULL ? 0 : -ENOMEM;
41}
42
43int z_erofs_init_zip_subsystem(void)
44{
45 z_erofs_workgroup_cachep =
46 kmem_cache_create("erofs_compress",
47 Z_EROFS_WORKGROUP_SIZE, 0,
48 SLAB_RECLAIM_ACCOUNT, NULL);
49
50 if (z_erofs_workgroup_cachep != NULL) {
51 if (!init_unzip_workqueue())
52 return 0;
53
54 kmem_cache_destroy(z_erofs_workgroup_cachep);
55 }
56 return -ENOMEM;
57}
58
59enum z_erofs_vle_work_role {
60 Z_EROFS_VLE_WORK_SECONDARY,
61 Z_EROFS_VLE_WORK_PRIMARY,
62 /*
63 * The current work has at least been linked with the following
64 * processed chained works, which means if the processing page
65 * is the tail partial page of the work, the current work can
66 * safely use the whole page, as illustrated below:
67 * +--------------+-------------------------------------------+
68 * | tail page | head page (of the previous work) |
69 * +--------------+-------------------------------------------+
70 * /\ which belongs to the current work
71 * [ (*) this page can be used for the current work itself. ]
72 */
73 Z_EROFS_VLE_WORK_PRIMARY_FOLLOWED,
74 Z_EROFS_VLE_WORK_MAX
75};
76
77struct z_erofs_vle_work_builder {
78 enum z_erofs_vle_work_role role;
79 /*
80 * 'hosted = false' means that the current workgroup doesn't belong to
81 * the owned chained workgroups. In the other words, it is none of our
82 * business to submit this workgroup.
83 */
84 bool hosted;
85
86 struct z_erofs_vle_workgroup *grp;
87 struct z_erofs_vle_work *work;
88 struct z_erofs_pagevec_ctor vector;
89
90 /* pages used for reading the compressed data */
91 struct page **compressed_pages;
92 unsigned compressed_deficit;
93};
94
95#define VLE_WORK_BUILDER_INIT() \
96 { .work = NULL, .role = Z_EROFS_VLE_WORK_PRIMARY_FOLLOWED }
97
98/* page_type must be Z_EROFS_PAGE_TYPE_EXCLUSIVE */
99static inline bool try_to_reuse_as_compressed_page(
100 struct z_erofs_vle_work_builder *b,
101 struct page *page)
102{
103 while (b->compressed_deficit) {
104 --b->compressed_deficit;
105 if (NULL == cmpxchg(b->compressed_pages++, NULL, page))
106 return true;
107 }
108
109 return false;
110}
111
112/* callers must be with work->lock held */
113static int z_erofs_vle_work_add_page(
114 struct z_erofs_vle_work_builder *builder,
115 struct page *page,
116 enum z_erofs_page_type type)
117{
118 int ret;
119 bool occupied;
120
121 /* give priority for the compressed data storage */
122 if (builder->role >= Z_EROFS_VLE_WORK_PRIMARY &&
123 type == Z_EROFS_PAGE_TYPE_EXCLUSIVE &&
124 try_to_reuse_as_compressed_page(builder, page))
125 return 0;
126
127 ret = z_erofs_pagevec_ctor_enqueue(&builder->vector,
128 page, type, &occupied);
129 builder->work->vcnt += (unsigned)ret;
130
131 return ret ? 0 : -EAGAIN;
132}
133
134static inline bool try_to_claim_workgroup(
135 struct z_erofs_vle_workgroup *grp,
136 z_erofs_vle_owned_workgrp_t *owned_head,
137 bool *hosted)
138{
139 DBG_BUGON(*hosted == true);
140
141 /* let's claim these following types of workgroup */
142retry:
143 if (grp->next == Z_EROFS_VLE_WORKGRP_NIL) {
144 /* type 1, nil workgroup */
145 if (Z_EROFS_VLE_WORKGRP_NIL != cmpxchg(&grp->next,
146 Z_EROFS_VLE_WORKGRP_NIL, *owned_head))
147 goto retry;
148
149 *owned_head = grp;
150 *hosted = true;
151 } else if (grp->next == Z_EROFS_VLE_WORKGRP_TAIL) {
152 /*
153 * type 2, link to the end of a existing open chain,
154 * be careful that its submission itself is governed
155 * by the original owned chain.
156 */
157 if (Z_EROFS_VLE_WORKGRP_TAIL != cmpxchg(&grp->next,
158 Z_EROFS_VLE_WORKGRP_TAIL, *owned_head))
159 goto retry;
160
161 *owned_head = Z_EROFS_VLE_WORKGRP_TAIL;
162 } else
163 return false; /* :( better luck next time */
164
165 return true; /* lucky, I am the followee :) */
166}
167
168static struct z_erofs_vle_work *
169z_erofs_vle_work_lookup(struct super_block *sb,
170 pgoff_t idx, unsigned pageofs,
171 struct z_erofs_vle_workgroup **grp_ret,
172 enum z_erofs_vle_work_role *role,
173 z_erofs_vle_owned_workgrp_t *owned_head,
174 bool *hosted)
175{
176 bool tag, primary;
177 struct erofs_workgroup *egrp;
178 struct z_erofs_vle_workgroup *grp;
179 struct z_erofs_vle_work *work;
180
181 egrp = erofs_find_workgroup(sb, idx, &tag);
182 if (egrp == NULL) {
183 *grp_ret = NULL;
184 return NULL;
185 }
186
187 *grp_ret = grp = container_of(egrp,
188 struct z_erofs_vle_workgroup, obj);
189
190#ifndef CONFIG_EROFS_FS_ZIP_MULTIREF
191 work = z_erofs_vle_grab_work(grp, pageofs);
192 primary = true;
193#else
194 BUG();
195#endif
196
197 DBG_BUGON(work->pageofs != pageofs);
198
199 /*
200 * lock must be taken first to avoid grp->next == NIL between
201 * claiming workgroup and adding pages:
202 * grp->next != NIL
203 * grp->next = NIL
204 * mutex_unlock_all
205 * mutex_lock(&work->lock)
206 * add all pages to pagevec
207 *
208 * [correct locking case 1]:
209 * mutex_lock(grp->work[a])
210 * ...
211 * mutex_lock(grp->work[b]) mutex_lock(grp->work[c])
212 * ... *role = SECONDARY
213 * add all pages to pagevec
214 * ...
215 * mutex_unlock(grp->work[c])
216 * mutex_lock(grp->work[c])
217 * ...
218 * grp->next = NIL
219 * mutex_unlock_all
220 *
221 * [correct locking case 2]:
222 * mutex_lock(grp->work[b])
223 * ...
224 * mutex_lock(grp->work[a])
225 * ...
226 * mutex_lock(grp->work[c])
227 * ...
228 * grp->next = NIL
229 * mutex_unlock_all
230 * mutex_lock(grp->work[a])
231 * *role = PRIMARY_OWNER
232 * add all pages to pagevec
233 * ...
234 */
235 mutex_lock(&work->lock);
236
237 *hosted = false;
238 if (!primary)
239 *role = Z_EROFS_VLE_WORK_SECONDARY;
240 /* claim the workgroup if possible */
241 else if (try_to_claim_workgroup(grp, owned_head, hosted))
242 *role = Z_EROFS_VLE_WORK_PRIMARY_FOLLOWED;
243 else
244 *role = Z_EROFS_VLE_WORK_PRIMARY;
245
246 return work;
247}
248
249static struct z_erofs_vle_work *
250z_erofs_vle_work_register(struct super_block *sb,
251 struct z_erofs_vle_workgroup **grp_ret,
252 struct erofs_map_blocks *map,
253 pgoff_t index, unsigned pageofs,
254 enum z_erofs_vle_work_role *role,
255 z_erofs_vle_owned_workgrp_t *owned_head,
256 bool *hosted)
257{
258 bool newgrp = false;
259 struct z_erofs_vle_workgroup *grp = *grp_ret;
260 struct z_erofs_vle_work *work;
261
262#ifndef CONFIG_EROFS_FS_ZIP_MULTIREF
263 BUG_ON(grp != NULL);
264#else
265 if (grp != NULL)
266 goto skip;
267#endif
268 /* no available workgroup, let's allocate one */
269 grp = kmem_cache_zalloc(z_erofs_workgroup_cachep, GFP_NOFS);
270 if (unlikely(grp == NULL))
271 return ERR_PTR(-ENOMEM);
272
273 grp->obj.index = index;
274 grp->llen = map->m_llen;
275
276 z_erofs_vle_set_workgrp_fmt(grp,
277 (map->m_flags & EROFS_MAP_ZIPPED) ?
278 Z_EROFS_VLE_WORKGRP_FMT_LZ4 :
279 Z_EROFS_VLE_WORKGRP_FMT_PLAIN);
280 atomic_set(&grp->obj.refcount, 1);
281
282 /* new workgrps have been claimed as type 1 */
283 WRITE_ONCE(grp->next, *owned_head);
284 /* primary and followed work for all new workgrps */
285 *role = Z_EROFS_VLE_WORK_PRIMARY_FOLLOWED;
286 /* it should be submitted by ourselves */
287 *hosted = true;
288
289 newgrp = true;
290#ifdef CONFIG_EROFS_FS_ZIP_MULTIREF
291skip:
292 /* currently unimplemented */
293 BUG();
294#else
295 work = z_erofs_vle_grab_primary_work(grp);
296#endif
297 work->pageofs = pageofs;
298
299 mutex_init(&work->lock);
300
301 if (newgrp) {
302 int err = erofs_register_workgroup(sb, &grp->obj, 0);
303
304 if (err) {
305 kmem_cache_free(z_erofs_workgroup_cachep, grp);
306 return ERR_PTR(-EAGAIN);
307 }
308 }
309
310 *owned_head = *grp_ret = grp;
311
312 mutex_lock(&work->lock);
313 return work;
314}
315
316static inline void __update_workgrp_llen(struct z_erofs_vle_workgroup *grp,
317 unsigned int llen)
318{
319 while (1) {
320 unsigned int orig_llen = grp->llen;
321
322 if (orig_llen >= llen || orig_llen ==
323 cmpxchg(&grp->llen, orig_llen, llen))
324 break;
325 }
326}
327
328#define builder_is_followed(builder) \
329 ((builder)->role >= Z_EROFS_VLE_WORK_PRIMARY_FOLLOWED)
330
331static int z_erofs_vle_work_iter_begin(struct z_erofs_vle_work_builder *builder,
332 struct super_block *sb,
333 struct erofs_map_blocks *map,
334 z_erofs_vle_owned_workgrp_t *owned_head)
335{
336 const unsigned clusterpages = erofs_clusterpages(EROFS_SB(sb));
337 const erofs_blk_t index = erofs_blknr(map->m_pa);
338 const unsigned pageofs = map->m_la & ~PAGE_MASK;
339 struct z_erofs_vle_workgroup *grp;
340 struct z_erofs_vle_work *work;
341
342 DBG_BUGON(builder->work != NULL);
343
344 /* must be Z_EROFS_WORK_TAIL or the next chained work */
345 DBG_BUGON(*owned_head == Z_EROFS_VLE_WORKGRP_NIL);
346 DBG_BUGON(*owned_head == Z_EROFS_VLE_WORKGRP_TAIL_CLOSED);
347
348 DBG_BUGON(erofs_blkoff(map->m_pa));
349
350repeat:
351 work = z_erofs_vle_work_lookup(sb, index,
352 pageofs, &grp, &builder->role, owned_head, &builder->hosted);
353 if (work != NULL) {
354 __update_workgrp_llen(grp, map->m_llen);
355 goto got_it;
356 }
357
358 work = z_erofs_vle_work_register(sb, &grp, map, index, pageofs,
359 &builder->role, owned_head, &builder->hosted);
360
361 if (unlikely(work == ERR_PTR(-EAGAIN)))
362 goto repeat;
363
364 if (unlikely(IS_ERR(work)))
365 return PTR_ERR(work);
366got_it:
367 z_erofs_pagevec_ctor_init(&builder->vector,
368 Z_EROFS_VLE_INLINE_PAGEVECS, work->pagevec, work->vcnt);
369
370 if (builder->role >= Z_EROFS_VLE_WORK_PRIMARY) {
371 /* enable possibly in-place decompression */
372 builder->compressed_pages = grp->compressed_pages;
373 builder->compressed_deficit = clusterpages;
374 } else {
375 builder->compressed_pages = NULL;
376 builder->compressed_deficit = 0;
377 }
378
379 builder->grp = grp;
380 builder->work = work;
381 return 0;
382}
383
384/*
385 * keep in mind that no referenced workgroups will be freed
386 * only after a RCU grace period, so rcu_read_lock() could
387 * prevent a workgroup from being freed.
388 */
389static void z_erofs_rcu_callback(struct rcu_head *head)
390{
391 struct z_erofs_vle_work *work = container_of(head,
392 struct z_erofs_vle_work, rcu);
393 struct z_erofs_vle_workgroup *grp =
394 z_erofs_vle_work_workgroup(work, true);
395
396 kmem_cache_free(z_erofs_workgroup_cachep, grp);
397}
398
399void erofs_workgroup_free_rcu(struct erofs_workgroup *grp)
400{
401 struct z_erofs_vle_workgroup *const vgrp = container_of(grp,
402 struct z_erofs_vle_workgroup, obj);
403 struct z_erofs_vle_work *const work = &vgrp->work;
404
405 call_rcu(&work->rcu, z_erofs_rcu_callback);
406}
407
408static void __z_erofs_vle_work_release(struct z_erofs_vle_workgroup *grp,
409 struct z_erofs_vle_work *work __maybe_unused)
410{
411 erofs_workgroup_put(&grp->obj);
412}
413
414void z_erofs_vle_work_release(struct z_erofs_vle_work *work)
415{
416 struct z_erofs_vle_workgroup *grp =
417 z_erofs_vle_work_workgroup(work, true);
418
419 __z_erofs_vle_work_release(grp, work);
420}
421
422static inline bool
423z_erofs_vle_work_iter_end(struct z_erofs_vle_work_builder *builder)
424{
425 struct z_erofs_vle_work *work = builder->work;
426
427 if (work == NULL)
428 return false;
429
430 z_erofs_pagevec_ctor_exit(&builder->vector, false);
431 mutex_unlock(&work->lock);
432
433 /*
434 * if all pending pages are added, don't hold work reference
435 * any longer if the current work isn't hosted by ourselves.
436 */
437 if (!builder->hosted)
438 __z_erofs_vle_work_release(builder->grp, work);
439
440 builder->work = NULL;
441 builder->grp = NULL;
442 return true;
443}
444
445static inline struct page *__stagingpage_alloc(struct list_head *pagepool,
446 gfp_t gfp)
447{
448 struct page *page = erofs_allocpage(pagepool, gfp);
449
450 if (unlikely(page == NULL))
451 return NULL;
452
453 page->mapping = Z_EROFS_MAPPING_STAGING;
454 return page;
455}
456
457struct z_erofs_vle_frontend {
458 struct inode *const inode;
459
460 struct z_erofs_vle_work_builder builder;
461 struct erofs_map_blocks_iter m_iter;
462
463 z_erofs_vle_owned_workgrp_t owned_head;
464
465 bool initial;
466};
467
468#define VLE_FRONTEND_INIT(__i) { \
469 .inode = __i, \
470 .m_iter = { \
471 { .m_llen = 0, .m_plen = 0 }, \
472 .mpage = NULL \
473 }, \
474 .builder = VLE_WORK_BUILDER_INIT(), \
475 .owned_head = Z_EROFS_VLE_WORKGRP_TAIL, \
476 .initial = true, }
477
478static int z_erofs_do_read_page(struct z_erofs_vle_frontend *fe,
479 struct page *page,
480 struct list_head *page_pool)
481{
482 struct super_block *const sb = fe->inode->i_sb;
483 struct erofs_sb_info *const sbi __maybe_unused = EROFS_SB(sb);
484 struct erofs_map_blocks_iter *const m = &fe->m_iter;
485 struct erofs_map_blocks *const map = &m->map;
486 struct z_erofs_vle_work_builder *const builder = &fe->builder;
487 const loff_t offset = page_offset(page);
488
489 bool tight = builder_is_followed(builder);
490 struct z_erofs_vle_work *work = builder->work;
491
492 enum z_erofs_page_type page_type;
493 unsigned cur, end, spiltted, index;
494 int err;
495
496 /* register locked file pages as online pages in pack */
497 z_erofs_onlinepage_init(page);
498
499 spiltted = 0;
500 end = PAGE_SIZE;
501repeat:
502 cur = end - 1;
503
504 /* lucky, within the range of the current map_blocks */
505 if (offset + cur >= map->m_la &&
506 offset + cur < map->m_la + map->m_llen)
507 goto hitted;
508
509 /* go ahead the next map_blocks */
510 debugln("%s: [out-of-range] pos %llu", __func__, offset + cur);
511
512 if (!z_erofs_vle_work_iter_end(builder))
513 fe->initial = false;
514
515 map->m_la = offset + cur;
516 map->m_llen = 0;
517 err = erofs_map_blocks_iter(fe->inode, map, &m->mpage, 0);
518 if (unlikely(err))
519 goto err_out;
520
521 /* deal with hole (FIXME! broken now) */
522 if (unlikely(!(map->m_flags & EROFS_MAP_MAPPED)))
523 goto hitted;
524
525 DBG_BUGON(map->m_plen != 1 << sbi->clusterbits);
526 BUG_ON(erofs_blkoff(map->m_pa));
527
528 err = z_erofs_vle_work_iter_begin(builder, sb, map, &fe->owned_head);
529 if (unlikely(err))
530 goto err_out;
531
532 tight &= builder_is_followed(builder);
533 work = builder->work;
534hitted:
535 cur = end - min_t(unsigned, offset + end - map->m_la, end);
536 if (unlikely(!(map->m_flags & EROFS_MAP_MAPPED))) {
537 zero_user_segment(page, cur, end);
538 goto next_part;
539 }
540
541 /* let's derive page type */
542 page_type = cur ? Z_EROFS_VLE_PAGE_TYPE_HEAD :
543 (!spiltted ? Z_EROFS_PAGE_TYPE_EXCLUSIVE :
544 (tight ? Z_EROFS_PAGE_TYPE_EXCLUSIVE :
545 Z_EROFS_VLE_PAGE_TYPE_TAIL_SHARED));
546
547retry:
548 err = z_erofs_vle_work_add_page(builder, page, page_type);
549 /* should allocate an additional staging page for pagevec */
550 if (err == -EAGAIN) {
551 struct page *const newpage =
552 __stagingpage_alloc(page_pool, GFP_NOFS);
553
554 err = z_erofs_vle_work_add_page(builder,
555 newpage, Z_EROFS_PAGE_TYPE_EXCLUSIVE);
556 if (!err)
557 goto retry;
558 }
559
560 if (unlikely(err))
561 goto err_out;
562
563 index = page->index - map->m_la / PAGE_SIZE;
564
565 /* FIXME! avoid the last relundant fixup & endio */
566 z_erofs_onlinepage_fixup(page, index, true);
567 ++spiltted;
568
569 /* also update nr_pages and increase queued_pages */
570 work->nr_pages = max_t(pgoff_t, work->nr_pages, index + 1);
571next_part:
572 /* can be used for verification */
573 map->m_llen = offset + cur - map->m_la;
574
575 if ((end = cur) > 0)
576 goto repeat;
577
578 /* FIXME! avoid the last relundant fixup & endio */
579 z_erofs_onlinepage_endio(page);
580
581 debugln("%s, finish page: %pK spiltted: %u map->m_llen %llu",
582 __func__, page, spiltted, map->m_llen);
583 return 0;
584
585err_out:
586 /* TODO: the missing error handing cases */
587 return err;
588}
589
590static void z_erofs_vle_unzip_kickoff(void *ptr, int bios)
591{
592 tagptr1_t t = tagptr_init(tagptr1_t, ptr);
593 struct z_erofs_vle_unzip_io *io = tagptr_unfold_ptr(t);
594 bool background = tagptr_unfold_tags(t);
595
596 if (atomic_add_return(bios, &io->pending_bios))
597 return;
598
599 if (background)
600 queue_work(z_erofs_workqueue, &io->u.work);
601 else
602 wake_up(&io->u.wait);
603}
604
605static inline void z_erofs_vle_read_endio(struct bio *bio)
606{
607 const blk_status_t err = bio->bi_status;
608 unsigned i;
609 struct bio_vec *bvec;
610
611 bio_for_each_segment_all(bvec, bio, i) {
612 struct page *page = bvec->bv_page;
613
614 DBG_BUGON(PageUptodate(page));
615 BUG_ON(page->mapping == NULL);
616
617 if (unlikely(err))
618 SetPageError(page);
619 }
620
621 z_erofs_vle_unzip_kickoff(bio->bi_private, -1);
622 bio_put(bio);
623}
624
625static struct page *z_pagemap_global[Z_EROFS_VLE_VMAP_GLOBAL_PAGES];
626static DEFINE_MUTEX(z_pagemap_global_lock);
627
628static int z_erofs_vle_unzip(struct super_block *sb,
629 struct z_erofs_vle_workgroup *grp,
630 struct list_head *page_pool)
631{
632 struct erofs_sb_info *const sbi = EROFS_SB(sb);
633 const unsigned clusterpages = erofs_clusterpages(sbi);
634
635 struct z_erofs_pagevec_ctor ctor;
636 unsigned nr_pages;
637#ifndef CONFIG_EROFS_FS_ZIP_MULTIREF
638 unsigned sparsemem_pages = 0;
639#endif
640 struct page *pages_onstack[Z_EROFS_VLE_VMAP_ONSTACK_PAGES];
641 struct page **pages, **compressed_pages, *page;
642 unsigned i, llen;
643
644 enum z_erofs_page_type page_type;
645 bool overlapped;
646 struct z_erofs_vle_work *work;
647 void *vout;
648 int err;
649
650 might_sleep();
651#ifndef CONFIG_EROFS_FS_ZIP_MULTIREF
652 work = z_erofs_vle_grab_primary_work(grp);
653#else
654 BUG();
655#endif
656 BUG_ON(!READ_ONCE(work->nr_pages));
657
658 mutex_lock(&work->lock);
659 nr_pages = work->nr_pages;
660
661 if (likely(nr_pages <= Z_EROFS_VLE_VMAP_ONSTACK_PAGES))
662 pages = pages_onstack;
663 else if (nr_pages <= Z_EROFS_VLE_VMAP_GLOBAL_PAGES &&
664 mutex_trylock(&z_pagemap_global_lock))
665 pages = z_pagemap_global;
666 else {
667repeat:
668 pages = kvmalloc_array(nr_pages,
669 sizeof(struct page *), GFP_KERNEL);
670
671 /* fallback to global pagemap for the lowmem scenario */
672 if (unlikely(pages == NULL)) {
673 if (nr_pages > Z_EROFS_VLE_VMAP_GLOBAL_PAGES)
674 goto repeat;
675 else {
676 mutex_lock(&z_pagemap_global_lock);
677 pages = z_pagemap_global;
678 }
679 }
680 }
681
682 for (i = 0; i < nr_pages; ++i)
683 pages[i] = NULL;
684
685 z_erofs_pagevec_ctor_init(&ctor,
686 Z_EROFS_VLE_INLINE_PAGEVECS, work->pagevec, 0);
687
688 for (i = 0; i < work->vcnt; ++i) {
689 unsigned pagenr;
690
691 page = z_erofs_pagevec_ctor_dequeue(&ctor, &page_type);
692
693 /* all pages in pagevec ought to be valid */
694 DBG_BUGON(page == NULL);
695 DBG_BUGON(page->mapping == NULL);
696
697 if (z_erofs_gather_if_stagingpage(page_pool, page))
698 continue;
699
700 if (page_type == Z_EROFS_VLE_PAGE_TYPE_HEAD)
701 pagenr = 0;
702 else
703 pagenr = z_erofs_onlinepage_index(page);
704
705 BUG_ON(pagenr >= nr_pages);
706
707#ifndef CONFIG_EROFS_FS_ZIP_MULTIREF
708 BUG_ON(pages[pagenr] != NULL);
709 ++sparsemem_pages;
710#endif
711 pages[pagenr] = page;
712 }
713
714 z_erofs_pagevec_ctor_exit(&ctor, true);
715
716 overlapped = false;
717 compressed_pages = grp->compressed_pages;
718
719 for (i = 0; i < clusterpages; ++i) {
720 unsigned pagenr;
721
722 page = compressed_pages[i];
723
724 /* all compressed pages ought to be valid */
725 DBG_BUGON(page == NULL);
726 DBG_BUGON(page->mapping == NULL);
727
728 if (z_erofs_is_stagingpage(page))
729 continue;
730
731 /* only non-head page could be reused as a compressed page */
732 pagenr = z_erofs_onlinepage_index(page);
733
734 BUG_ON(pagenr >= nr_pages);
735#ifndef CONFIG_EROFS_FS_ZIP_MULTIREF
736 BUG_ON(pages[pagenr] != NULL);
737 ++sparsemem_pages;
738#endif
739 pages[pagenr] = page;
740
741 overlapped = true;
742 }
743
744 llen = (nr_pages << PAGE_SHIFT) - work->pageofs;
745
746 if (z_erofs_vle_workgrp_fmt(grp) == Z_EROFS_VLE_WORKGRP_FMT_PLAIN) {
747 /* FIXME! this should be fixed in the future */
748 BUG_ON(grp->llen != llen);
749
750 err = z_erofs_vle_plain_copy(compressed_pages, clusterpages,
751 pages, nr_pages, work->pageofs);
752 goto out;
753 }
754
755 if (llen > grp->llen)
756 llen = grp->llen;
757
758 err = z_erofs_vle_unzip_fast_percpu(compressed_pages,
759 clusterpages, pages, llen, work->pageofs,
760 z_erofs_onlinepage_endio);
761 if (err != -ENOTSUPP)
762 goto out_percpu;
763
764#ifndef CONFIG_EROFS_FS_ZIP_MULTIREF
765 if (sparsemem_pages >= nr_pages) {
766 BUG_ON(sparsemem_pages > nr_pages);
767 goto skip_allocpage;
768 }
769#endif
770
771 for (i = 0; i < nr_pages; ++i) {
772 if (pages[i] != NULL)
773 continue;
774
775 pages[i] = __stagingpage_alloc(page_pool, GFP_NOFS);
776 }
777
778#ifndef CONFIG_EROFS_FS_ZIP_MULTIREF
779skip_allocpage:
780#endif
781 vout = erofs_vmap(pages, nr_pages);
782
783 err = z_erofs_vle_unzip_vmap(compressed_pages,
784 clusterpages, vout, llen, work->pageofs, overlapped);
785
786 erofs_vunmap(vout, nr_pages);
787
788out:
789 for (i = 0; i < nr_pages; ++i) {
790 page = pages[i];
791 DBG_BUGON(page->mapping == NULL);
792
793 /* recycle all individual staging pages */
794 if (z_erofs_gather_if_stagingpage(page_pool, page))
795 continue;
796
797 if (unlikely(err < 0))
798 SetPageError(page);
799
800 z_erofs_onlinepage_endio(page);
801 }
802
803out_percpu:
804 for (i = 0; i < clusterpages; ++i) {
805 page = compressed_pages[i];
806
807 /* recycle all individual staging pages */
808 (void)z_erofs_gather_if_stagingpage(page_pool, page);
809
810 WRITE_ONCE(compressed_pages[i], NULL);
811 }
812
813 if (pages == z_pagemap_global)
814 mutex_unlock(&z_pagemap_global_lock);
815 else if (unlikely(pages != pages_onstack))
816 kvfree(pages);
817
818 work->nr_pages = 0;
819 work->vcnt = 0;
820
821 /* all work locks MUST be taken before the following line */
822
823 WRITE_ONCE(grp->next, Z_EROFS_VLE_WORKGRP_NIL);
824
825 /* all work locks SHOULD be released right now */
826 mutex_unlock(&work->lock);
827
828 z_erofs_vle_work_release(work);
829 return err;
830}
831
832static void z_erofs_vle_unzip_all(struct super_block *sb,
833 struct z_erofs_vle_unzip_io *io,
834 struct list_head *page_pool)
835{
836 z_erofs_vle_owned_workgrp_t owned = io->head;
837
838 while (owned != Z_EROFS_VLE_WORKGRP_TAIL_CLOSED) {
839 struct z_erofs_vle_workgroup *grp;
840
841 /* no possible that 'owned' equals Z_EROFS_WORK_TPTR_TAIL */
842 DBG_BUGON(owned == Z_EROFS_VLE_WORKGRP_TAIL);
843
844 /* no possible that 'owned' equals NULL */
845 DBG_BUGON(owned == Z_EROFS_VLE_WORKGRP_NIL);
846
847 grp = owned;
848 owned = READ_ONCE(grp->next);
849
850 z_erofs_vle_unzip(sb, grp, page_pool);
851 };
852}
853
854static void z_erofs_vle_unzip_wq(struct work_struct *work)
855{
856 struct z_erofs_vle_unzip_io_sb *iosb = container_of(work,
857 struct z_erofs_vle_unzip_io_sb, io.u.work);
858 LIST_HEAD(page_pool);
859
860 BUG_ON(iosb->io.head == Z_EROFS_VLE_WORKGRP_TAIL_CLOSED);
861 z_erofs_vle_unzip_all(iosb->sb, &iosb->io, &page_pool);
862
863 put_pages_list(&page_pool);
864 kvfree(iosb);
865}
866
867static inline struct z_erofs_vle_unzip_io *
868prepare_io_handler(struct super_block *sb,
869 struct z_erofs_vle_unzip_io *io,
870 bool background)
871{
872 struct z_erofs_vle_unzip_io_sb *iosb;
873
874 if (!background) {
875 /* waitqueue available for foreground io */
876 BUG_ON(io == NULL);
877
878 init_waitqueue_head(&io->u.wait);
879 atomic_set(&io->pending_bios, 0);
880 goto out;
881 }
882
883 if (io != NULL)
884 BUG();
885 else {
886 /* allocate extra io descriptor for background io */
887 iosb = kvzalloc(sizeof(struct z_erofs_vle_unzip_io_sb),
888 GFP_KERNEL | __GFP_NOFAIL);
889 BUG_ON(iosb == NULL);
890
891 io = &iosb->io;
892 }
893
894 iosb->sb = sb;
895 INIT_WORK(&io->u.work, z_erofs_vle_unzip_wq);
896out:
897 io->head = Z_EROFS_VLE_WORKGRP_TAIL_CLOSED;
898 return io;
899}
900
901#define __FSIO_1 0
902
903static bool z_erofs_vle_submit_all(struct super_block *sb,
904 z_erofs_vle_owned_workgrp_t owned_head,
905 struct list_head *pagepool,
906 struct z_erofs_vle_unzip_io *fg_io,
907 bool force_fg)
908{
909 struct erofs_sb_info *const sbi = EROFS_SB(sb);
910 const unsigned clusterpages = erofs_clusterpages(sbi);
911 const gfp_t gfp = GFP_NOFS;
912 struct z_erofs_vle_unzip_io *ios[1 + __FSIO_1];
913 struct bio *bio;
914 tagptr1_t bi_private;
915 /* since bio will be NULL, no need to initialize last_index */
916 pgoff_t uninitialized_var(last_index);
917 bool force_submit = false;
918 unsigned nr_bios;
919
920 if (unlikely(owned_head == Z_EROFS_VLE_WORKGRP_TAIL))
921 return false;
922
923 /*
924 * force_fg == 1, (io, fg_io[0]) no io, (io, fg_io[1]) need submit io
925 * force_fg == 0, (io, fg_io[0]) no io; (io[1], bg_io) need submit io
926 */
927 if (force_fg) {
928 ios[__FSIO_1] = prepare_io_handler(sb, fg_io + __FSIO_1, false);
929 bi_private = tagptr_fold(tagptr1_t, ios[__FSIO_1], 0);
930 } else {
931 ios[__FSIO_1] = prepare_io_handler(sb, NULL, true);
932 bi_private = tagptr_fold(tagptr1_t, ios[__FSIO_1], 1);
933 }
934
935 nr_bios = 0;
936 force_submit = false;
937 bio = NULL;
938
939 /* by default, all need io submission */
940 ios[__FSIO_1]->head = owned_head;
941
942 do {
943 struct z_erofs_vle_workgroup *grp;
944 struct page **compressed_pages, *oldpage, *page;
945 pgoff_t first_index;
946 unsigned i = 0;
947 int err;
948
949 /* no possible 'owned_head' equals the following */
950 DBG_BUGON(owned_head == Z_EROFS_VLE_WORKGRP_TAIL_CLOSED);
951 DBG_BUGON(owned_head == Z_EROFS_VLE_WORKGRP_NIL);
952
953 grp = owned_head;
954
955 /* close the main owned chain at first */
956 owned_head = cmpxchg(&grp->next, Z_EROFS_VLE_WORKGRP_TAIL,
957 Z_EROFS_VLE_WORKGRP_TAIL_CLOSED);
958
959 first_index = grp->obj.index;
960 compressed_pages = grp->compressed_pages;
961
962 force_submit |= (first_index != last_index + 1);
963repeat:
964 /* fulfill all compressed pages */
965 oldpage = page = READ_ONCE(compressed_pages[i]);
966
967 if (page != NULL)
968 BUG_ON(PageUptodate(page));
969 else {
970 page = __stagingpage_alloc(pagepool, gfp);
971
972 if (oldpage != cmpxchg(compressed_pages + i,
973 oldpage, page)) {
974 list_add(&page->lru, pagepool);
975 goto repeat;
976 }
977 }
978
979 if (bio != NULL && force_submit) {
980submit_bio_retry:
981 __submit_bio(bio, REQ_OP_READ, 0);
982 bio = NULL;
983 }
984
985 if (bio == NULL) {
986 bio = prepare_bio(sb, first_index + i,
987 BIO_MAX_PAGES, z_erofs_vle_read_endio);
988 bio->bi_private = tagptr_cast_ptr(bi_private);
989
990 ++nr_bios;
991 }
992
993 err = bio_add_page(bio, page, PAGE_SIZE, 0);
994 if (err < PAGE_SIZE)
995 goto submit_bio_retry;
996
997 force_submit = false;
998 last_index = first_index + i;
999 if (++i < clusterpages)
1000 goto repeat;
1001 } while (owned_head != Z_EROFS_VLE_WORKGRP_TAIL);
1002
1003 if (bio != NULL)
1004 __submit_bio(bio, REQ_OP_READ, 0);
1005
1006 BUG_ON(!nr_bios);
1007
1008 z_erofs_vle_unzip_kickoff(tagptr_cast_ptr(bi_private), nr_bios);
1009 return true;
1010}
1011
1012static void z_erofs_submit_and_unzip(struct z_erofs_vle_frontend *f,
1013 struct list_head *pagepool,
1014 bool force_fg)
1015{
1016 struct super_block *sb = f->inode->i_sb;
1017 struct z_erofs_vle_unzip_io io[1 + __FSIO_1];
1018
1019 if (!z_erofs_vle_submit_all(sb, f->owned_head, pagepool, io, force_fg))
1020 return;
1021
1022 if (!force_fg)
1023 return;
1024
1025 /* wait until all bios are completed */
1026 wait_event(io[__FSIO_1].u.wait,
1027 !atomic_read(&io[__FSIO_1].pending_bios));
1028
1029 /* let's synchronous decompression */
1030 z_erofs_vle_unzip_all(sb, &io[__FSIO_1], pagepool);
1031}
1032
1033static int z_erofs_vle_normalaccess_readpage(struct file *file,
1034 struct page *page)
1035{
1036 struct inode *const inode = page->mapping->host;
1037 struct z_erofs_vle_frontend f = VLE_FRONTEND_INIT(inode);
1038 int err;
1039 LIST_HEAD(pagepool);
1040
1041 err = z_erofs_do_read_page(&f, page, &pagepool);
1042 (void)z_erofs_vle_work_iter_end(&f.builder);
1043
1044 if (err) {
1045 errln("%s, failed to read, err [%d]", __func__, err);
1046 goto out;
1047 }
1048
1049 z_erofs_submit_and_unzip(&f, &pagepool, true);
1050out:
1051 if (f.m_iter.mpage != NULL)
1052 put_page(f.m_iter.mpage);
1053
1054 /* clean up the remaining free pages */
1055 put_pages_list(&pagepool);
1056 return 0;
1057}
1058
1059static inline int __z_erofs_vle_normalaccess_readpages(
1060 struct file *filp,
1061 struct address_space *mapping,
1062 struct list_head *pages, unsigned nr_pages, bool sync)
1063{
1064 struct inode *const inode = mapping->host;
1065
1066 struct z_erofs_vle_frontend f = VLE_FRONTEND_INIT(inode);
1067 gfp_t gfp = mapping_gfp_constraint(mapping, GFP_KERNEL);
1068 struct page *head = NULL;
1069 LIST_HEAD(pagepool);
1070
1071 for (; nr_pages; --nr_pages) {
1072 struct page *page = lru_to_page(pages);
1073
1074 prefetchw(&page->flags);
1075 list_del(&page->lru);
1076
1077 if (add_to_page_cache_lru(page, mapping, page->index, gfp)) {
1078 list_add(&page->lru, &pagepool);
1079 continue;
1080 }
1081
1082 BUG_ON(PagePrivate(page));
1083 set_page_private(page, (unsigned long)head);
1084 head = page;
1085 }
1086
1087 while (head != NULL) {
1088 struct page *page = head;
1089 int err;
1090
1091 /* traversal in reverse order */
1092 head = (void *)page_private(page);
1093
1094 err = z_erofs_do_read_page(&f, page, &pagepool);
1095 if (err) {
1096 struct erofs_vnode *vi = EROFS_V(inode);
1097
1098 errln("%s, readahead error at page %lu of nid %llu",
1099 __func__, page->index, vi->nid);
1100 }
1101
1102 put_page(page);
1103 }
1104
1105 (void)z_erofs_vle_work_iter_end(&f.builder);
1106
1107 z_erofs_submit_and_unzip(&f, &pagepool, sync);
1108
1109 if (f.m_iter.mpage != NULL)
1110 put_page(f.m_iter.mpage);
1111
1112 /* clean up the remaining free pages */
1113 put_pages_list(&pagepool);
1114 return 0;
1115}
1116
1117static int z_erofs_vle_normalaccess_readpages(
1118 struct file *filp,
1119 struct address_space *mapping,
1120 struct list_head *pages, unsigned nr_pages)
1121{
1122 return __z_erofs_vle_normalaccess_readpages(filp,
1123 mapping, pages, nr_pages,
1124 nr_pages < 4 /* sync */);
1125}
1126
1127const struct address_space_operations z_erofs_vle_normalaccess_aops = {
1128 .readpage = z_erofs_vle_normalaccess_readpage,
1129 .readpages = z_erofs_vle_normalaccess_readpages,
1130};
14 1131
15#define __vle_cluster_advise(x, bit, bits) \ 1132#define __vle_cluster_advise(x, bit, bits) \
16 ((le16_to_cpu(x) >> (bit)) & ((1 << (bits)) - 1)) 1133 ((le16_to_cpu(x) >> (bit)) & ((1 << (bits)) - 1))
diff --git a/drivers/staging/erofs/unzip_vle.h b/drivers/staging/erofs/unzip_vle.h
index b34f5bc28d29..3521dfb31906 100644
--- a/drivers/staging/erofs/unzip_vle.h
+++ b/drivers/staging/erofs/unzip_vle.h
@@ -14,9 +14,213 @@
14#define __EROFS_FS_UNZIP_VLE_H 14#define __EROFS_FS_UNZIP_VLE_H
15 15
16#include "internal.h" 16#include "internal.h"
17#include "unzip_pagevec.h"
18
19/*
20 * - 0x5A110C8D ('sallocated', Z_EROFS_MAPPING_STAGING) -
21 * used for temporary allocated pages (via erofs_allocpage),
22 * in order to seperate those from NULL mapping (eg. truncated pages)
23 */
24#define Z_EROFS_MAPPING_STAGING ((void *)0x5A110C8D)
25
26#define z_erofs_is_stagingpage(page) \
27 ((page)->mapping == Z_EROFS_MAPPING_STAGING)
28
29static inline bool z_erofs_gather_if_stagingpage(struct list_head *page_pool,
30 struct page *page)
31{
32 if (z_erofs_is_stagingpage(page)) {
33 list_add(&page->lru, page_pool);
34 return true;
35 }
36 return false;
37}
38
39/*
40 * Structure fields follow one of the following exclusion rules.
41 *
42 * I: Modifiable by initialization/destruction paths and read-only
43 * for everyone else.
44 *
45 */
17 46
18#define Z_EROFS_VLE_INLINE_PAGEVECS 3 47#define Z_EROFS_VLE_INLINE_PAGEVECS 3
19 48
49struct z_erofs_vle_work {
50 /* struct z_erofs_vle_work *left, *right; */
51
52#ifdef CONFIG_EROFS_FS_ZIP_MULTIREF
53 struct list_head list;
54
55 atomic_t refcount;
56#endif
57 struct mutex lock;
58
59 /* I: decompression offset in page */
60 unsigned short pageofs;
61 unsigned short nr_pages;
62
63 /* L: queued pages in pagevec[] */
64 unsigned vcnt;
65
66 union {
67 /* L: pagevec */
68 erofs_vtptr_t pagevec[Z_EROFS_VLE_INLINE_PAGEVECS];
69 struct rcu_head rcu;
70 };
71};
72
73#define Z_EROFS_VLE_WORKGRP_FMT_PLAIN 0
74#define Z_EROFS_VLE_WORKGRP_FMT_LZ4 1
75#define Z_EROFS_VLE_WORKGRP_FMT_MASK 1
76
77typedef struct z_erofs_vle_workgroup *z_erofs_vle_owned_workgrp_t;
78
79struct z_erofs_vle_workgroup {
80 struct erofs_workgroup obj;
81 struct z_erofs_vle_work work;
82
83 /* next owned workgroup */
84 z_erofs_vle_owned_workgrp_t next;
85
86 /* compressed pages (including multi-usage pages) */
87 struct page *compressed_pages[Z_EROFS_CLUSTER_MAX_PAGES];
88 unsigned int llen, flags;
89};
90
91/* let's avoid the valid 32-bit kernel addresses */
92
93/* the chained workgroup has't submitted io (still open) */
94#define Z_EROFS_VLE_WORKGRP_TAIL ((void *)0x5F0ECAFE)
95/* the chained workgroup has already submitted io */
96#define Z_EROFS_VLE_WORKGRP_TAIL_CLOSED ((void *)0x5F0EDEAD)
97
98#define Z_EROFS_VLE_WORKGRP_NIL (NULL)
99
100#define z_erofs_vle_workgrp_fmt(grp) \
101 ((grp)->flags & Z_EROFS_VLE_WORKGRP_FMT_MASK)
102
103static inline void z_erofs_vle_set_workgrp_fmt(
104 struct z_erofs_vle_workgroup *grp,
105 unsigned int fmt)
106{
107 grp->flags = fmt | (grp->flags & ~Z_EROFS_VLE_WORKGRP_FMT_MASK);
108}
109
110#ifdef CONFIG_EROFS_FS_ZIP_MULTIREF
111#error multiref decompression is unimplemented yet
112#else
113
114#define z_erofs_vle_grab_primary_work(grp) (&(grp)->work)
115#define z_erofs_vle_grab_work(grp, pageofs) (&(grp)->work)
116#define z_erofs_vle_work_workgroup(wrk, primary) \
117 ((primary) ? container_of(wrk, \
118 struct z_erofs_vle_workgroup, work) : \
119 ({ BUG(); (void *)NULL; }))
120
121#endif
122
123#define Z_EROFS_WORKGROUP_SIZE sizeof(struct z_erofs_vle_workgroup)
124
125struct z_erofs_vle_unzip_io {
126 atomic_t pending_bios;
127 z_erofs_vle_owned_workgrp_t head;
128
129 union {
130 wait_queue_head_t wait;
131 struct work_struct work;
132 } u;
133};
134
135struct z_erofs_vle_unzip_io_sb {
136 struct z_erofs_vle_unzip_io io;
137 struct super_block *sb;
138};
139
140#define Z_EROFS_ONLINEPAGE_COUNT_BITS 2
141#define Z_EROFS_ONLINEPAGE_COUNT_MASK ((1 << Z_EROFS_ONLINEPAGE_COUNT_BITS) - 1)
142#define Z_EROFS_ONLINEPAGE_INDEX_SHIFT (Z_EROFS_ONLINEPAGE_COUNT_BITS)
143
144/*
145 * waiters (aka. ongoing_packs): # to unlock the page
146 * sub-index: 0 - for partial page, >= 1 full page sub-index
147 */
148typedef atomic_t z_erofs_onlinepage_t;
149
150/* type punning */
151union z_erofs_onlinepage_converter {
152 z_erofs_onlinepage_t *o;
153 unsigned long *v;
154};
155
156static inline unsigned z_erofs_onlinepage_index(struct page *page)
157{
158 union z_erofs_onlinepage_converter u;
159
160 BUG_ON(!PagePrivate(page));
161 u.v = &page_private(page);
162
163 return atomic_read(u.o) >> Z_EROFS_ONLINEPAGE_INDEX_SHIFT;
164}
165
166static inline void z_erofs_onlinepage_init(struct page *page)
167{
168 union {
169 z_erofs_onlinepage_t o;
170 unsigned long v;
171 /* keep from being unlocked in advance */
172 } u = { .o = ATOMIC_INIT(1) };
173
174 set_page_private(page, u.v);
175 smp_wmb();
176 SetPagePrivate(page);
177}
178
179static inline void z_erofs_onlinepage_fixup(struct page *page,
180 uintptr_t index, bool down)
181{
182 unsigned long *p, o, v, id;
183repeat:
184 p = &page_private(page);
185 o = READ_ONCE(*p);
186
187 id = o >> Z_EROFS_ONLINEPAGE_INDEX_SHIFT;
188 if (id) {
189 if (!index)
190 return;
191
192 BUG_ON(id != index);
193 }
194
195 v = (index << Z_EROFS_ONLINEPAGE_INDEX_SHIFT) |
196 ((o & Z_EROFS_ONLINEPAGE_COUNT_MASK) + (unsigned)down);
197 if (cmpxchg(p, o, v) != o)
198 goto repeat;
199}
200
201static inline void z_erofs_onlinepage_endio(struct page *page)
202{
203 union z_erofs_onlinepage_converter u;
204 unsigned v;
205
206 BUG_ON(!PagePrivate(page));
207 u.v = &page_private(page);
208
209 v = atomic_dec_return(u.o);
210 if (!(v & Z_EROFS_ONLINEPAGE_COUNT_MASK)) {
211 ClearPagePrivate(page);
212 if (!PageError(page))
213 SetPageUptodate(page);
214 unlock_page(page);
215 }
216
217 debugln("%s, page %p value %x", __func__, page, atomic_read(u.o));
218}
219
220#define Z_EROFS_VLE_VMAP_ONSTACK_PAGES \
221 min(THREAD_SIZE / 8 / sizeof(struct page *), 96UL)
222#define Z_EROFS_VLE_VMAP_GLOBAL_PAGES 2048
223
20/* unzip_vle_lz4.c */ 224/* unzip_vle_lz4.c */
21extern int z_erofs_vle_plain_copy(struct page **compressed_pages, 225extern int z_erofs_vle_plain_copy(struct page **compressed_pages,
22 unsigned clusterpages, struct page **pages, 226 unsigned clusterpages, struct page **pages,
diff --git a/drivers/staging/erofs/utils.c b/drivers/staging/erofs/utils.c
index 0d4eae2f79a8..6530035f8a61 100644
--- a/drivers/staging/erofs/utils.c
+++ b/drivers/staging/erofs/utils.c
@@ -12,6 +12,7 @@
12 */ 12 */
13 13
14#include "internal.h" 14#include "internal.h"
15#include <linux/pagevec.h>
15 16
16struct page *erofs_allocpage(struct list_head *pool, gfp_t gfp) 17struct page *erofs_allocpage(struct list_head *pool, gfp_t gfp)
17{ 18{
@@ -98,11 +99,69 @@ int erofs_register_workgroup(struct super_block *sb,
98 return err; 99 return err;
99} 100}
100 101
102extern void erofs_workgroup_free_rcu(struct erofs_workgroup *grp);
103
104int erofs_workgroup_put(struct erofs_workgroup *grp)
105{
106 int count = atomic_dec_return(&grp->refcount);
107
108 if (count == 1)
109 atomic_long_inc(&erofs_global_shrink_cnt);
110 else if (!count) {
111 atomic_long_dec(&erofs_global_shrink_cnt);
112 erofs_workgroup_free_rcu(grp);
113 }
114 return count;
115}
116
101unsigned long erofs_shrink_workstation(struct erofs_sb_info *sbi, 117unsigned long erofs_shrink_workstation(struct erofs_sb_info *sbi,
102 unsigned long nr_shrink, 118 unsigned long nr_shrink,
103 bool cleanup) 119 bool cleanup)
104{ 120{
105 return 0; 121 pgoff_t first_index = 0;
122 void *batch[PAGEVEC_SIZE];
123 unsigned freed = 0;
124
125 int i, found;
126repeat:
127 erofs_workstn_lock(sbi);
128
129 found = radix_tree_gang_lookup(&sbi->workstn_tree,
130 batch, first_index, PAGEVEC_SIZE);
131
132 for (i = 0; i < found; ++i) {
133 int cnt;
134 struct erofs_workgroup *grp = (void *)
135 ((unsigned long)batch[i] &
136 ~RADIX_TREE_EXCEPTIONAL_ENTRY);
137
138 first_index = grp->index + 1;
139
140 cnt = atomic_read(&grp->refcount);
141 BUG_ON(cnt <= 0);
142
143 if (cleanup)
144 BUG_ON(cnt != 1);
145
146 else if (cnt > 1)
147 continue;
148
149 if (radix_tree_delete(&sbi->workstn_tree,
150 grp->index) != grp)
151 continue;
152
153 /* (rarely) grabbed again when freeing */
154 erofs_workgroup_put(grp);
155
156 ++freed;
157 if (unlikely(!--nr_shrink))
158 break;
159 }
160 erofs_workstn_unlock(sbi);
161
162 if (i && nr_shrink)
163 goto repeat;
164 return freed;
106} 165}
107 166
108#endif 167#endif