summaryrefslogtreecommitdiffstats
path: root/fs/erofs/zmap.c
diff options
context:
space:
mode:
authorGao Xiang <hsiangkao@aol.com>2019-08-22 17:36:59 -0400
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>2019-08-24 08:20:10 -0400
commit47e4937a4a7ca4184fd282791dfee76c6799966a (patch)
treefc68338c13a00ac74ac9f1a838491bd3f7649c28 /fs/erofs/zmap.c
parentf401441deda68326852560bf70d59e95f585bbb3 (diff)
erofs: move erofs out of staging
EROFS filesystem has been merged into linux-staging for a year. EROFS is designed to be a better solution of saving extra storage space with guaranteed end-to-end performance for read-only files with the help of reduced metadata, fixed-sized output compression and decompression inplace technologies. In the past year, EROFS was greatly improved by many people as a staging driver, self-tested, betaed by a large number of our internal users, successfully applied to almost all in-service HUAWEI smartphones as the part of EMUI 9.1 and proven to be stable enough to be moved out of staging. EROFS is a self-contained filesystem driver. Although there are still some TODOs to be more generic, we have a dedicated team actively keeping on working on EROFS in order to make it better with the evolution of Linux kernel as the other in-kernel filesystems. As Pavel suggested, it's better to do as one commit since git can do moves and all histories will be saved in this way. Let's promote it from staging and enhance it more actively as a "real" part of kernel for more wider scenarios! Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org> Cc: Alexander Viro <viro@zeniv.linux.org.uk> Cc: Andrew Morton <akpm@linux-foundation.org> Cc: Stephen Rothwell <sfr@canb.auug.org.au> Cc: Theodore Ts'o <tytso@mit.edu> Cc: Pavel Machek <pavel@denx.de> Cc: David Sterba <dsterba@suse.cz> Cc: Amir Goldstein <amir73il@gmail.com> Cc: Christoph Hellwig <hch@infradead.org> Cc: Darrick J . Wong <darrick.wong@oracle.com> Cc: Dave Chinner <david@fromorbit.com> Cc: Jaegeuk Kim <jaegeuk@kernel.org> Cc: Jan Kara <jack@suse.cz> Cc: Richard Weinberger <richard@nod.at> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Chao Yu <yuchao0@huawei.com> Cc: Miao Xie <miaoxie@huawei.com> Cc: Li Guifu <bluce.liguifu@huawei.com> Cc: Fang Wei <fangwei1@huawei.com> Signed-off-by: Gao Xiang <gaoxiang25@huawei.com> Link: https://lore.kernel.org/r/20190822213659.5501-1-hsiangkao@aol.com Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Diffstat (limited to 'fs/erofs/zmap.c')
-rw-r--r--fs/erofs/zmap.c466
1 files changed, 466 insertions, 0 deletions
diff --git a/fs/erofs/zmap.c b/fs/erofs/zmap.c
new file mode 100644
index 000000000000..4dc9cec01297
--- /dev/null
+++ b/fs/erofs/zmap.c
@@ -0,0 +1,466 @@
1// SPDX-License-Identifier: GPL-2.0-only
2/*
3 * Copyright (C) 2018-2019 HUAWEI, Inc.
4 * http://www.huawei.com/
5 * Created by Gao Xiang <gaoxiang25@huawei.com>
6 */
7#include "internal.h"
8#include <asm/unaligned.h>
9#include <trace/events/erofs.h>
10
11int z_erofs_fill_inode(struct inode *inode)
12{
13 struct erofs_vnode *const vi = EROFS_V(inode);
14
15 if (vi->datamode == EROFS_INODE_FLAT_COMPRESSION_LEGACY) {
16 vi->z_advise = 0;
17 vi->z_algorithmtype[0] = 0;
18 vi->z_algorithmtype[1] = 0;
19 vi->z_logical_clusterbits = LOG_BLOCK_SIZE;
20 vi->z_physical_clusterbits[0] = vi->z_logical_clusterbits;
21 vi->z_physical_clusterbits[1] = vi->z_logical_clusterbits;
22 set_bit(EROFS_V_Z_INITED_BIT, &vi->flags);
23 }
24
25 inode->i_mapping->a_ops = &z_erofs_vle_normalaccess_aops;
26 return 0;
27}
28
29static int fill_inode_lazy(struct inode *inode)
30{
31 struct erofs_vnode *const vi = EROFS_V(inode);
32 struct super_block *const sb = inode->i_sb;
33 int err;
34 erofs_off_t pos;
35 struct page *page;
36 void *kaddr;
37 struct z_erofs_map_header *h;
38
39 if (test_bit(EROFS_V_Z_INITED_BIT, &vi->flags))
40 return 0;
41
42 if (wait_on_bit_lock(&vi->flags, EROFS_V_BL_Z_BIT, TASK_KILLABLE))
43 return -ERESTARTSYS;
44
45 err = 0;
46 if (test_bit(EROFS_V_Z_INITED_BIT, &vi->flags))
47 goto out_unlock;
48
49 DBG_BUGON(vi->datamode == EROFS_INODE_FLAT_COMPRESSION_LEGACY);
50
51 pos = ALIGN(iloc(EROFS_SB(sb), vi->nid) + vi->inode_isize +
52 vi->xattr_isize, 8);
53 page = erofs_get_meta_page(sb, erofs_blknr(pos), false);
54 if (IS_ERR(page)) {
55 err = PTR_ERR(page);
56 goto out_unlock;
57 }
58
59 kaddr = kmap_atomic(page);
60
61 h = kaddr + erofs_blkoff(pos);
62 vi->z_advise = le16_to_cpu(h->h_advise);
63 vi->z_algorithmtype[0] = h->h_algorithmtype & 15;
64 vi->z_algorithmtype[1] = h->h_algorithmtype >> 4;
65
66 if (vi->z_algorithmtype[0] >= Z_EROFS_COMPRESSION_MAX) {
67 errln("unknown compression format %u for nid %llu, please upgrade kernel",
68 vi->z_algorithmtype[0], vi->nid);
69 err = -EOPNOTSUPP;
70 goto unmap_done;
71 }
72
73 vi->z_logical_clusterbits = LOG_BLOCK_SIZE + (h->h_clusterbits & 7);
74 vi->z_physical_clusterbits[0] = vi->z_logical_clusterbits +
75 ((h->h_clusterbits >> 3) & 3);
76
77 if (vi->z_physical_clusterbits[0] != LOG_BLOCK_SIZE) {
78 errln("unsupported physical clusterbits %u for nid %llu, please upgrade kernel",
79 vi->z_physical_clusterbits[0], vi->nid);
80 err = -EOPNOTSUPP;
81 goto unmap_done;
82 }
83
84 vi->z_physical_clusterbits[1] = vi->z_logical_clusterbits +
85 ((h->h_clusterbits >> 5) & 7);
86 set_bit(EROFS_V_Z_INITED_BIT, &vi->flags);
87unmap_done:
88 kunmap_atomic(kaddr);
89 unlock_page(page);
90 put_page(page);
91out_unlock:
92 clear_and_wake_up_bit(EROFS_V_BL_Z_BIT, &vi->flags);
93 return err;
94}
95
96struct z_erofs_maprecorder {
97 struct inode *inode;
98 struct erofs_map_blocks *map;
99 void *kaddr;
100
101 unsigned long lcn;
102 /* compression extent information gathered */
103 u8 type;
104 u16 clusterofs;
105 u16 delta[2];
106 erofs_blk_t pblk;
107};
108
109static int z_erofs_reload_indexes(struct z_erofs_maprecorder *m,
110 erofs_blk_t eblk)
111{
112 struct super_block *const sb = m->inode->i_sb;
113 struct erofs_map_blocks *const map = m->map;
114 struct page *mpage = map->mpage;
115
116 if (mpage) {
117 if (mpage->index == eblk) {
118 if (!m->kaddr)
119 m->kaddr = kmap_atomic(mpage);
120 return 0;
121 }
122
123 if (m->kaddr) {
124 kunmap_atomic(m->kaddr);
125 m->kaddr = NULL;
126 }
127 put_page(mpage);
128 }
129
130 mpage = erofs_get_meta_page(sb, eblk, false);
131 if (IS_ERR(mpage)) {
132 map->mpage = NULL;
133 return PTR_ERR(mpage);
134 }
135 m->kaddr = kmap_atomic(mpage);
136 unlock_page(mpage);
137 map->mpage = mpage;
138 return 0;
139}
140
141static int vle_legacy_load_cluster_from_disk(struct z_erofs_maprecorder *m,
142 unsigned long lcn)
143{
144 struct inode *const inode = m->inode;
145 struct erofs_vnode *const vi = EROFS_V(inode);
146 const erofs_off_t ibase = iloc(EROFS_I_SB(inode), vi->nid);
147 const erofs_off_t pos =
148 Z_EROFS_VLE_LEGACY_INDEX_ALIGN(ibase + vi->inode_isize +
149 vi->xattr_isize) +
150 lcn * sizeof(struct z_erofs_vle_decompressed_index);
151 struct z_erofs_vle_decompressed_index *di;
152 unsigned int advise, type;
153 int err;
154
155 err = z_erofs_reload_indexes(m, erofs_blknr(pos));
156 if (err)
157 return err;
158
159 m->lcn = lcn;
160 di = m->kaddr + erofs_blkoff(pos);
161
162 advise = le16_to_cpu(di->di_advise);
163 type = (advise >> Z_EROFS_VLE_DI_CLUSTER_TYPE_BIT) &
164 ((1 << Z_EROFS_VLE_DI_CLUSTER_TYPE_BITS) - 1);
165 switch (type) {
166 case Z_EROFS_VLE_CLUSTER_TYPE_NONHEAD:
167 m->clusterofs = 1 << vi->z_logical_clusterbits;
168 m->delta[0] = le16_to_cpu(di->di_u.delta[0]);
169 m->delta[1] = le16_to_cpu(di->di_u.delta[1]);
170 break;
171 case Z_EROFS_VLE_CLUSTER_TYPE_PLAIN:
172 case Z_EROFS_VLE_CLUSTER_TYPE_HEAD:
173 m->clusterofs = le16_to_cpu(di->di_clusterofs);
174 m->pblk = le32_to_cpu(di->di_u.blkaddr);
175 break;
176 default:
177 DBG_BUGON(1);
178 return -EOPNOTSUPP;
179 }
180 m->type = type;
181 return 0;
182}
183
184static unsigned int decode_compactedbits(unsigned int lobits,
185 unsigned int lomask,
186 u8 *in, unsigned int pos, u8 *type)
187{
188 const unsigned int v = get_unaligned_le32(in + pos / 8) >> (pos & 7);
189 const unsigned int lo = v & lomask;
190
191 *type = (v >> lobits) & 3;
192 return lo;
193}
194
195static int unpack_compacted_index(struct z_erofs_maprecorder *m,
196 unsigned int amortizedshift,
197 unsigned int eofs)
198{
199 struct erofs_vnode *const vi = EROFS_V(m->inode);
200 const unsigned int lclusterbits = vi->z_logical_clusterbits;
201 const unsigned int lomask = (1 << lclusterbits) - 1;
202 unsigned int vcnt, base, lo, encodebits, nblk;
203 int i;
204 u8 *in, type;
205
206 if (1 << amortizedshift == 4)
207 vcnt = 2;
208 else if (1 << amortizedshift == 2 && lclusterbits == 12)
209 vcnt = 16;
210 else
211 return -EOPNOTSUPP;
212
213 encodebits = ((vcnt << amortizedshift) - sizeof(__le32)) * 8 / vcnt;
214 base = round_down(eofs, vcnt << amortizedshift);
215 in = m->kaddr + base;
216
217 i = (eofs - base) >> amortizedshift;
218
219 lo = decode_compactedbits(lclusterbits, lomask,
220 in, encodebits * i, &type);
221 m->type = type;
222 if (type == Z_EROFS_VLE_CLUSTER_TYPE_NONHEAD) {
223 m->clusterofs = 1 << lclusterbits;
224 if (i + 1 != vcnt) {
225 m->delta[0] = lo;
226 return 0;
227 }
228 /*
229 * since the last lcluster in the pack is special,
230 * of which lo saves delta[1] rather than delta[0].
231 * Hence, get delta[0] by the previous lcluster indirectly.
232 */
233 lo = decode_compactedbits(lclusterbits, lomask,
234 in, encodebits * (i - 1), &type);
235 if (type != Z_EROFS_VLE_CLUSTER_TYPE_NONHEAD)
236 lo = 0;
237 m->delta[0] = lo + 1;
238 return 0;
239 }
240 m->clusterofs = lo;
241 m->delta[0] = 0;
242 /* figout out blkaddr (pblk) for HEAD lclusters */
243 nblk = 1;
244 while (i > 0) {
245 --i;
246 lo = decode_compactedbits(lclusterbits, lomask,
247 in, encodebits * i, &type);
248 if (type == Z_EROFS_VLE_CLUSTER_TYPE_NONHEAD)
249 i -= lo;
250
251 if (i >= 0)
252 ++nblk;
253 }
254 in += (vcnt << amortizedshift) - sizeof(__le32);
255 m->pblk = le32_to_cpu(*(__le32 *)in) + nblk;
256 return 0;
257}
258
259static int compacted_load_cluster_from_disk(struct z_erofs_maprecorder *m,
260 unsigned long lcn)
261{
262 struct inode *const inode = m->inode;
263 struct erofs_vnode *const vi = EROFS_V(inode);
264 const unsigned int lclusterbits = vi->z_logical_clusterbits;
265 const erofs_off_t ebase = ALIGN(iloc(EROFS_I_SB(inode), vi->nid) +
266 vi->inode_isize + vi->xattr_isize, 8) +
267 sizeof(struct z_erofs_map_header);
268 const unsigned int totalidx = DIV_ROUND_UP(inode->i_size, EROFS_BLKSIZ);
269 unsigned int compacted_4b_initial, compacted_2b;
270 unsigned int amortizedshift;
271 erofs_off_t pos;
272 int err;
273
274 if (lclusterbits != 12)
275 return -EOPNOTSUPP;
276
277 if (lcn >= totalidx)
278 return -EINVAL;
279
280 m->lcn = lcn;
281 /* used to align to 32-byte (compacted_2b) alignment */
282 compacted_4b_initial = (32 - ebase % 32) / 4;
283 if (compacted_4b_initial == 32 / 4)
284 compacted_4b_initial = 0;
285
286 if (vi->z_advise & Z_EROFS_ADVISE_COMPACTED_2B)
287 compacted_2b = rounddown(totalidx - compacted_4b_initial, 16);
288 else
289 compacted_2b = 0;
290
291 pos = ebase;
292 if (lcn < compacted_4b_initial) {
293 amortizedshift = 2;
294 goto out;
295 }
296 pos += compacted_4b_initial * 4;
297 lcn -= compacted_4b_initial;
298
299 if (lcn < compacted_2b) {
300 amortizedshift = 1;
301 goto out;
302 }
303 pos += compacted_2b * 2;
304 lcn -= compacted_2b;
305 amortizedshift = 2;
306out:
307 pos += lcn * (1 << amortizedshift);
308 err = z_erofs_reload_indexes(m, erofs_blknr(pos));
309 if (err)
310 return err;
311 return unpack_compacted_index(m, amortizedshift, erofs_blkoff(pos));
312}
313
314static int vle_load_cluster_from_disk(struct z_erofs_maprecorder *m,
315 unsigned int lcn)
316{
317 const unsigned int datamode = EROFS_V(m->inode)->datamode;
318
319 if (datamode == EROFS_INODE_FLAT_COMPRESSION_LEGACY)
320 return vle_legacy_load_cluster_from_disk(m, lcn);
321
322 if (datamode == EROFS_INODE_FLAT_COMPRESSION)
323 return compacted_load_cluster_from_disk(m, lcn);
324
325 return -EINVAL;
326}
327
328static int vle_extent_lookback(struct z_erofs_maprecorder *m,
329 unsigned int lookback_distance)
330{
331 struct erofs_vnode *const vi = EROFS_V(m->inode);
332 struct erofs_map_blocks *const map = m->map;
333 const unsigned int lclusterbits = vi->z_logical_clusterbits;
334 unsigned long lcn = m->lcn;
335 int err;
336
337 if (lcn < lookback_distance) {
338 errln("bogus lookback distance @ nid %llu", vi->nid);
339 DBG_BUGON(1);
340 return -EFSCORRUPTED;
341 }
342
343 /* load extent head logical cluster if needed */
344 lcn -= lookback_distance;
345 err = vle_load_cluster_from_disk(m, lcn);
346 if (err)
347 return err;
348
349 switch (m->type) {
350 case Z_EROFS_VLE_CLUSTER_TYPE_NONHEAD:
351 if (unlikely(!m->delta[0])) {
352 errln("invalid lookback distance 0 at nid %llu",
353 vi->nid);
354 DBG_BUGON(1);
355 return -EFSCORRUPTED;
356 }
357 return vle_extent_lookback(m, m->delta[0]);
358 case Z_EROFS_VLE_CLUSTER_TYPE_PLAIN:
359 map->m_flags &= ~EROFS_MAP_ZIPPED;
360 /* fallthrough */
361 case Z_EROFS_VLE_CLUSTER_TYPE_HEAD:
362 map->m_la = (lcn << lclusterbits) | m->clusterofs;
363 break;
364 default:
365 errln("unknown type %u at lcn %lu of nid %llu",
366 m->type, lcn, vi->nid);
367 DBG_BUGON(1);
368 return -EOPNOTSUPP;
369 }
370 return 0;
371}
372
373int z_erofs_map_blocks_iter(struct inode *inode,
374 struct erofs_map_blocks *map,
375 int flags)
376{
377 struct erofs_vnode *const vi = EROFS_V(inode);
378 struct z_erofs_maprecorder m = {
379 .inode = inode,
380 .map = map,
381 };
382 int err = 0;
383 unsigned int lclusterbits, endoff;
384 unsigned long long ofs, end;
385
386 trace_z_erofs_map_blocks_iter_enter(inode, map, flags);
387
388 /* when trying to read beyond EOF, leave it unmapped */
389 if (unlikely(map->m_la >= inode->i_size)) {
390 map->m_llen = map->m_la + 1 - inode->i_size;
391 map->m_la = inode->i_size;
392 map->m_flags = 0;
393 goto out;
394 }
395
396 err = fill_inode_lazy(inode);
397 if (err)
398 goto out;
399
400 lclusterbits = vi->z_logical_clusterbits;
401 ofs = map->m_la;
402 m.lcn = ofs >> lclusterbits;
403 endoff = ofs & ((1 << lclusterbits) - 1);
404
405 err = vle_load_cluster_from_disk(&m, m.lcn);
406 if (err)
407 goto unmap_out;
408
409 map->m_flags = EROFS_MAP_ZIPPED; /* by default, compressed */
410 end = (m.lcn + 1ULL) << lclusterbits;
411
412 switch (m.type) {
413 case Z_EROFS_VLE_CLUSTER_TYPE_PLAIN:
414 if (endoff >= m.clusterofs)
415 map->m_flags &= ~EROFS_MAP_ZIPPED;
416 /* fallthrough */
417 case Z_EROFS_VLE_CLUSTER_TYPE_HEAD:
418 if (endoff >= m.clusterofs) {
419 map->m_la = (m.lcn << lclusterbits) | m.clusterofs;
420 break;
421 }
422 /* m.lcn should be >= 1 if endoff < m.clusterofs */
423 if (unlikely(!m.lcn)) {
424 errln("invalid logical cluster 0 at nid %llu",
425 vi->nid);
426 err = -EFSCORRUPTED;
427 goto unmap_out;
428 }
429 end = (m.lcn << lclusterbits) | m.clusterofs;
430 map->m_flags |= EROFS_MAP_FULL_MAPPED;
431 m.delta[0] = 1;
432 /* fallthrough */
433 case Z_EROFS_VLE_CLUSTER_TYPE_NONHEAD:
434 /* get the correspoinding first chunk */
435 err = vle_extent_lookback(&m, m.delta[0]);
436 if (unlikely(err))
437 goto unmap_out;
438 break;
439 default:
440 errln("unknown type %u at offset %llu of nid %llu",
441 m.type, ofs, vi->nid);
442 err = -EOPNOTSUPP;
443 goto unmap_out;
444 }
445
446 map->m_llen = end - map->m_la;
447 map->m_plen = 1 << lclusterbits;
448 map->m_pa = blknr_to_addr(m.pblk);
449 map->m_flags |= EROFS_MAP_MAPPED;
450
451unmap_out:
452 if (m.kaddr)
453 kunmap_atomic(m.kaddr);
454
455out:
456 debugln("%s, m_la %llu m_pa %llu m_llen %llu m_plen %llu m_flags 0%o",
457 __func__, map->m_la, map->m_pa,
458 map->m_llen, map->m_plen, map->m_flags);
459
460 trace_z_erofs_map_blocks_iter_exit(inode, map, flags, err);
461
462 /* aggressively BUG_ON iff CONFIG_EROFS_FS_DEBUG is on */
463 DBG_BUGON(err < 0 && err != -ENOMEM);
464 return err;
465}
466