summaryrefslogtreecommitdiffstats
path: root/fs/erofs/utils.c
diff options
context:
space:
mode:
authorGao Xiang <hsiangkao@aol.com>2019-08-22 17:36:59 -0400
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>2019-08-24 08:20:10 -0400
commit47e4937a4a7ca4184fd282791dfee76c6799966a (patch)
treefc68338c13a00ac74ac9f1a838491bd3f7649c28 /fs/erofs/utils.c
parentf401441deda68326852560bf70d59e95f585bbb3 (diff)
erofs: move erofs out of staging
EROFS filesystem has been merged into linux-staging for a year. EROFS is designed to be a better solution of saving extra storage space with guaranteed end-to-end performance for read-only files with the help of reduced metadata, fixed-sized output compression and decompression inplace technologies. In the past year, EROFS was greatly improved by many people as a staging driver, self-tested, betaed by a large number of our internal users, successfully applied to almost all in-service HUAWEI smartphones as the part of EMUI 9.1 and proven to be stable enough to be moved out of staging. EROFS is a self-contained filesystem driver. Although there are still some TODOs to be more generic, we have a dedicated team actively keeping on working on EROFS in order to make it better with the evolution of Linux kernel as the other in-kernel filesystems. As Pavel suggested, it's better to do as one commit since git can do moves and all histories will be saved in this way. Let's promote it from staging and enhance it more actively as a "real" part of kernel for more wider scenarios! Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org> Cc: Alexander Viro <viro@zeniv.linux.org.uk> Cc: Andrew Morton <akpm@linux-foundation.org> Cc: Stephen Rothwell <sfr@canb.auug.org.au> Cc: Theodore Ts'o <tytso@mit.edu> Cc: Pavel Machek <pavel@denx.de> Cc: David Sterba <dsterba@suse.cz> Cc: Amir Goldstein <amir73il@gmail.com> Cc: Christoph Hellwig <hch@infradead.org> Cc: Darrick J . Wong <darrick.wong@oracle.com> Cc: Dave Chinner <david@fromorbit.com> Cc: Jaegeuk Kim <jaegeuk@kernel.org> Cc: Jan Kara <jack@suse.cz> Cc: Richard Weinberger <richard@nod.at> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Chao Yu <yuchao0@huawei.com> Cc: Miao Xie <miaoxie@huawei.com> Cc: Li Guifu <bluce.liguifu@huawei.com> Cc: Fang Wei <fangwei1@huawei.com> Signed-off-by: Gao Xiang <gaoxiang25@huawei.com> Link: https://lore.kernel.org/r/20190822213659.5501-1-hsiangkao@aol.com Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Diffstat (limited to 'fs/erofs/utils.c')
-rw-r--r--fs/erofs/utils.c333
1 files changed, 333 insertions, 0 deletions
diff --git a/fs/erofs/utils.c b/fs/erofs/utils.c
new file mode 100644
index 000000000000..1dd041aa0f5a
--- /dev/null
+++ b/fs/erofs/utils.c
@@ -0,0 +1,333 @@
1// SPDX-License-Identifier: GPL-2.0-only
2/*
3 * Copyright (C) 2018 HUAWEI, Inc.
4 * http://www.huawei.com/
5 * Created by Gao Xiang <gaoxiang25@huawei.com>
6 */
7#include "internal.h"
8#include <linux/pagevec.h>
9
10struct page *erofs_allocpage(struct list_head *pool, gfp_t gfp, bool nofail)
11{
12 struct page *page;
13
14 if (!list_empty(pool)) {
15 page = lru_to_page(pool);
16 DBG_BUGON(page_ref_count(page) != 1);
17 list_del(&page->lru);
18 } else {
19 page = alloc_pages(gfp | (nofail ? __GFP_NOFAIL : 0), 0);
20 }
21 return page;
22}
23
24#if (EROFS_PCPUBUF_NR_PAGES > 0)
25static struct {
26 u8 data[PAGE_SIZE * EROFS_PCPUBUF_NR_PAGES];
27} ____cacheline_aligned_in_smp erofs_pcpubuf[NR_CPUS];
28
29void *erofs_get_pcpubuf(unsigned int pagenr)
30{
31 preempt_disable();
32 return &erofs_pcpubuf[smp_processor_id()].data[pagenr * PAGE_SIZE];
33}
34#endif
35
36#ifdef CONFIG_EROFS_FS_ZIP
37/* global shrink count (for all mounted EROFS instances) */
38static atomic_long_t erofs_global_shrink_cnt;
39
40#define __erofs_workgroup_get(grp) atomic_inc(&(grp)->refcount)
41#define __erofs_workgroup_put(grp) atomic_dec(&(grp)->refcount)
42
43static int erofs_workgroup_get(struct erofs_workgroup *grp)
44{
45 int o;
46
47repeat:
48 o = erofs_wait_on_workgroup_freezed(grp);
49 if (unlikely(o <= 0))
50 return -1;
51
52 if (unlikely(atomic_cmpxchg(&grp->refcount, o, o + 1) != o))
53 goto repeat;
54
55 /* decrease refcount paired by erofs_workgroup_put */
56 if (unlikely(o == 1))
57 atomic_long_dec(&erofs_global_shrink_cnt);
58 return 0;
59}
60
61struct erofs_workgroup *erofs_find_workgroup(struct super_block *sb,
62 pgoff_t index, bool *tag)
63{
64 struct erofs_sb_info *sbi = EROFS_SB(sb);
65 struct erofs_workgroup *grp;
66
67repeat:
68 rcu_read_lock();
69 grp = radix_tree_lookup(&sbi->workstn_tree, index);
70 if (grp) {
71 *tag = xa_pointer_tag(grp);
72 grp = xa_untag_pointer(grp);
73
74 if (erofs_workgroup_get(grp)) {
75 /* prefer to relax rcu read side */
76 rcu_read_unlock();
77 goto repeat;
78 }
79
80 DBG_BUGON(index != grp->index);
81 }
82 rcu_read_unlock();
83 return grp;
84}
85
86int erofs_register_workgroup(struct super_block *sb,
87 struct erofs_workgroup *grp,
88 bool tag)
89{
90 struct erofs_sb_info *sbi;
91 int err;
92
93 /* grp shouldn't be broken or used before */
94 if (unlikely(atomic_read(&grp->refcount) != 1)) {
95 DBG_BUGON(1);
96 return -EINVAL;
97 }
98
99 err = radix_tree_preload(GFP_NOFS);
100 if (err)
101 return err;
102
103 sbi = EROFS_SB(sb);
104 xa_lock(&sbi->workstn_tree);
105
106 grp = xa_tag_pointer(grp, tag);
107
108 /*
109 * Bump up reference count before making this workgroup
110 * visible to other users in order to avoid potential UAF
111 * without serialized by workstn_lock.
112 */
113 __erofs_workgroup_get(grp);
114
115 err = radix_tree_insert(&sbi->workstn_tree, grp->index, grp);
116 if (unlikely(err))
117 /*
118 * it's safe to decrease since the workgroup isn't visible
119 * and refcount >= 2 (cannot be freezed).
120 */
121 __erofs_workgroup_put(grp);
122
123 xa_unlock(&sbi->workstn_tree);
124 radix_tree_preload_end();
125 return err;
126}
127
128static void __erofs_workgroup_free(struct erofs_workgroup *grp)
129{
130 atomic_long_dec(&erofs_global_shrink_cnt);
131 erofs_workgroup_free_rcu(grp);
132}
133
134int erofs_workgroup_put(struct erofs_workgroup *grp)
135{
136 int count = atomic_dec_return(&grp->refcount);
137
138 if (count == 1)
139 atomic_long_inc(&erofs_global_shrink_cnt);
140 else if (!count)
141 __erofs_workgroup_free(grp);
142 return count;
143}
144
145static void erofs_workgroup_unfreeze_final(struct erofs_workgroup *grp)
146{
147 erofs_workgroup_unfreeze(grp, 0);
148 __erofs_workgroup_free(grp);
149}
150
151static bool erofs_try_to_release_workgroup(struct erofs_sb_info *sbi,
152 struct erofs_workgroup *grp,
153 bool cleanup)
154{
155 /*
156 * If managed cache is on, refcount of workgroups
157 * themselves could be < 0 (freezed). In other words,
158 * there is no guarantee that all refcounts > 0.
159 */
160 if (!erofs_workgroup_try_to_freeze(grp, 1))
161 return false;
162
163 /*
164 * Note that all cached pages should be unattached
165 * before deleted from the radix tree. Otherwise some
166 * cached pages could be still attached to the orphan
167 * old workgroup when the new one is available in the tree.
168 */
169 if (erofs_try_to_free_all_cached_pages(sbi, grp)) {
170 erofs_workgroup_unfreeze(grp, 1);
171 return false;
172 }
173
174 /*
175 * It's impossible to fail after the workgroup is freezed,
176 * however in order to avoid some race conditions, add a
177 * DBG_BUGON to observe this in advance.
178 */
179 DBG_BUGON(xa_untag_pointer(radix_tree_delete(&sbi->workstn_tree,
180 grp->index)) != grp);
181
182 /*
183 * If managed cache is on, last refcount should indicate
184 * the related workstation.
185 */
186 erofs_workgroup_unfreeze_final(grp);
187 return true;
188}
189
190static unsigned long erofs_shrink_workstation(struct erofs_sb_info *sbi,
191 unsigned long nr_shrink,
192 bool cleanup)
193{
194 pgoff_t first_index = 0;
195 void *batch[PAGEVEC_SIZE];
196 unsigned int freed = 0;
197
198 int i, found;
199repeat:
200 xa_lock(&sbi->workstn_tree);
201
202 found = radix_tree_gang_lookup(&sbi->workstn_tree,
203 batch, first_index, PAGEVEC_SIZE);
204
205 for (i = 0; i < found; ++i) {
206 struct erofs_workgroup *grp = xa_untag_pointer(batch[i]);
207
208 first_index = grp->index + 1;
209
210 /* try to shrink each valid workgroup */
211 if (!erofs_try_to_release_workgroup(sbi, grp, cleanup))
212 continue;
213
214 ++freed;
215 if (unlikely(!--nr_shrink))
216 break;
217 }
218 xa_unlock(&sbi->workstn_tree);
219
220 if (i && nr_shrink)
221 goto repeat;
222 return freed;
223}
224
225/* protected by 'erofs_sb_list_lock' */
226static unsigned int shrinker_run_no;
227
228/* protects the mounted 'erofs_sb_list' */
229static DEFINE_SPINLOCK(erofs_sb_list_lock);
230static LIST_HEAD(erofs_sb_list);
231
232void erofs_shrinker_register(struct super_block *sb)
233{
234 struct erofs_sb_info *sbi = EROFS_SB(sb);
235
236 mutex_init(&sbi->umount_mutex);
237
238 spin_lock(&erofs_sb_list_lock);
239 list_add(&sbi->list, &erofs_sb_list);
240 spin_unlock(&erofs_sb_list_lock);
241}
242
243void erofs_shrinker_unregister(struct super_block *sb)
244{
245 struct erofs_sb_info *const sbi = EROFS_SB(sb);
246
247 mutex_lock(&sbi->umount_mutex);
248 erofs_shrink_workstation(sbi, ~0UL, true);
249
250 spin_lock(&erofs_sb_list_lock);
251 list_del(&sbi->list);
252 spin_unlock(&erofs_sb_list_lock);
253 mutex_unlock(&sbi->umount_mutex);
254}
255
256static unsigned long erofs_shrink_count(struct shrinker *shrink,
257 struct shrink_control *sc)
258{
259 return atomic_long_read(&erofs_global_shrink_cnt);
260}
261
262static unsigned long erofs_shrink_scan(struct shrinker *shrink,
263 struct shrink_control *sc)
264{
265 struct erofs_sb_info *sbi;
266 struct list_head *p;
267
268 unsigned long nr = sc->nr_to_scan;
269 unsigned int run_no;
270 unsigned long freed = 0;
271
272 spin_lock(&erofs_sb_list_lock);
273 do {
274 run_no = ++shrinker_run_no;
275 } while (run_no == 0);
276
277 /* Iterate over all mounted superblocks and try to shrink them */
278 p = erofs_sb_list.next;
279 while (p != &erofs_sb_list) {
280 sbi = list_entry(p, struct erofs_sb_info, list);
281
282 /*
283 * We move the ones we do to the end of the list, so we stop
284 * when we see one we have already done.
285 */
286 if (sbi->shrinker_run_no == run_no)
287 break;
288
289 if (!mutex_trylock(&sbi->umount_mutex)) {
290 p = p->next;
291 continue;
292 }
293
294 spin_unlock(&erofs_sb_list_lock);
295 sbi->shrinker_run_no = run_no;
296
297 freed += erofs_shrink_workstation(sbi, nr, false);
298
299 spin_lock(&erofs_sb_list_lock);
300 /* Get the next list element before we move this one */
301 p = p->next;
302
303 /*
304 * Move this one to the end of the list to provide some
305 * fairness.
306 */
307 list_move_tail(&sbi->list, &erofs_sb_list);
308 mutex_unlock(&sbi->umount_mutex);
309
310 if (freed >= nr)
311 break;
312 }
313 spin_unlock(&erofs_sb_list_lock);
314 return freed;
315}
316
317static struct shrinker erofs_shrinker_info = {
318 .scan_objects = erofs_shrink_scan,
319 .count_objects = erofs_shrink_count,
320 .seeks = DEFAULT_SEEKS,
321};
322
323int __init erofs_init_shrinker(void)
324{
325 return register_shrinker(&erofs_shrinker_info);
326}
327
328void erofs_exit_shrinker(void)
329{
330 unregister_shrinker(&erofs_shrinker_info);
331}
332#endif /* !CONFIG_EROFS_FS_ZIP */
333