summaryrefslogtreecommitdiffstats
path: root/fs/erofs/data.c
diff options
context:
space:
mode:
authorGao Xiang <hsiangkao@aol.com>2019-08-22 17:36:59 -0400
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>2019-08-24 08:20:10 -0400
commit47e4937a4a7ca4184fd282791dfee76c6799966a (patch)
treefc68338c13a00ac74ac9f1a838491bd3f7649c28 /fs/erofs/data.c
parentf401441deda68326852560bf70d59e95f585bbb3 (diff)
erofs: move erofs out of staging
EROFS filesystem has been merged into linux-staging for a year. EROFS is designed to be a better solution of saving extra storage space with guaranteed end-to-end performance for read-only files with the help of reduced metadata, fixed-sized output compression and decompression inplace technologies. In the past year, EROFS was greatly improved by many people as a staging driver, self-tested, betaed by a large number of our internal users, successfully applied to almost all in-service HUAWEI smartphones as the part of EMUI 9.1 and proven to be stable enough to be moved out of staging. EROFS is a self-contained filesystem driver. Although there are still some TODOs to be more generic, we have a dedicated team actively keeping on working on EROFS in order to make it better with the evolution of Linux kernel as the other in-kernel filesystems. As Pavel suggested, it's better to do as one commit since git can do moves and all histories will be saved in this way. Let's promote it from staging and enhance it more actively as a "real" part of kernel for more wider scenarios! Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org> Cc: Alexander Viro <viro@zeniv.linux.org.uk> Cc: Andrew Morton <akpm@linux-foundation.org> Cc: Stephen Rothwell <sfr@canb.auug.org.au> Cc: Theodore Ts'o <tytso@mit.edu> Cc: Pavel Machek <pavel@denx.de> Cc: David Sterba <dsterba@suse.cz> Cc: Amir Goldstein <amir73il@gmail.com> Cc: Christoph Hellwig <hch@infradead.org> Cc: Darrick J . Wong <darrick.wong@oracle.com> Cc: Dave Chinner <david@fromorbit.com> Cc: Jaegeuk Kim <jaegeuk@kernel.org> Cc: Jan Kara <jack@suse.cz> Cc: Richard Weinberger <richard@nod.at> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Chao Yu <yuchao0@huawei.com> Cc: Miao Xie <miaoxie@huawei.com> Cc: Li Guifu <bluce.liguifu@huawei.com> Cc: Fang Wei <fangwei1@huawei.com> Signed-off-by: Gao Xiang <gaoxiang25@huawei.com> Link: https://lore.kernel.org/r/20190822213659.5501-1-hsiangkao@aol.com Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Diffstat (limited to 'fs/erofs/data.c')
-rw-r--r--fs/erofs/data.c423
1 files changed, 423 insertions, 0 deletions
diff --git a/fs/erofs/data.c b/fs/erofs/data.c
new file mode 100644
index 000000000000..fda16ec8863e
--- /dev/null
+++ b/fs/erofs/data.c
@@ -0,0 +1,423 @@
1// SPDX-License-Identifier: GPL-2.0-only
2/*
3 * Copyright (C) 2017-2018 HUAWEI, Inc.
4 * http://www.huawei.com/
5 * Created by Gao Xiang <gaoxiang25@huawei.com>
6 */
7#include "internal.h"
8#include <linux/prefetch.h>
9
10#include <trace/events/erofs.h>
11
12static inline void read_endio(struct bio *bio)
13{
14 struct super_block *const sb = bio->bi_private;
15 struct bio_vec *bvec;
16 blk_status_t err = bio->bi_status;
17 struct bvec_iter_all iter_all;
18
19 if (time_to_inject(EROFS_SB(sb), FAULT_READ_IO)) {
20 erofs_show_injection_info(FAULT_READ_IO);
21 err = BLK_STS_IOERR;
22 }
23
24 bio_for_each_segment_all(bvec, bio, iter_all) {
25 struct page *page = bvec->bv_page;
26
27 /* page is already locked */
28 DBG_BUGON(PageUptodate(page));
29
30 if (unlikely(err))
31 SetPageError(page);
32 else
33 SetPageUptodate(page);
34
35 unlock_page(page);
36 /* page could be reclaimed now */
37 }
38 bio_put(bio);
39}
40
41/* prio -- true is used for dir */
42struct page *__erofs_get_meta_page(struct super_block *sb,
43 erofs_blk_t blkaddr, bool prio, bool nofail)
44{
45 struct inode *const bd_inode = sb->s_bdev->bd_inode;
46 struct address_space *const mapping = bd_inode->i_mapping;
47 /* prefer retrying in the allocator to blindly looping below */
48 const gfp_t gfp = mapping_gfp_constraint(mapping, ~__GFP_FS) |
49 (nofail ? __GFP_NOFAIL : 0);
50 unsigned int io_retries = nofail ? EROFS_IO_MAX_RETRIES_NOFAIL : 0;
51 struct page *page;
52 int err;
53
54repeat:
55 page = find_or_create_page(mapping, blkaddr, gfp);
56 if (unlikely(!page)) {
57 DBG_BUGON(nofail);
58 return ERR_PTR(-ENOMEM);
59 }
60 DBG_BUGON(!PageLocked(page));
61
62 if (!PageUptodate(page)) {
63 struct bio *bio;
64
65 bio = erofs_grab_bio(sb, blkaddr, 1, sb, read_endio, nofail);
66 if (IS_ERR(bio)) {
67 DBG_BUGON(nofail);
68 err = PTR_ERR(bio);
69 goto err_out;
70 }
71
72 err = bio_add_page(bio, page, PAGE_SIZE, 0);
73 if (unlikely(err != PAGE_SIZE)) {
74 err = -EFAULT;
75 goto err_out;
76 }
77
78 __submit_bio(bio, REQ_OP_READ,
79 REQ_META | (prio ? REQ_PRIO : 0));
80
81 lock_page(page);
82
83 /* this page has been truncated by others */
84 if (unlikely(page->mapping != mapping)) {
85unlock_repeat:
86 unlock_page(page);
87 put_page(page);
88 goto repeat;
89 }
90
91 /* more likely a read error */
92 if (unlikely(!PageUptodate(page))) {
93 if (io_retries) {
94 --io_retries;
95 goto unlock_repeat;
96 }
97 err = -EIO;
98 goto err_out;
99 }
100 }
101 return page;
102
103err_out:
104 unlock_page(page);
105 put_page(page);
106 return ERR_PTR(err);
107}
108
109static int erofs_map_blocks_flatmode(struct inode *inode,
110 struct erofs_map_blocks *map,
111 int flags)
112{
113 int err = 0;
114 erofs_blk_t nblocks, lastblk;
115 u64 offset = map->m_la;
116 struct erofs_vnode *vi = EROFS_V(inode);
117
118 trace_erofs_map_blocks_flatmode_enter(inode, map, flags);
119
120 nblocks = DIV_ROUND_UP(inode->i_size, PAGE_SIZE);
121 lastblk = nblocks - is_inode_flat_inline(inode);
122
123 if (unlikely(offset >= inode->i_size)) {
124 /* leave out-of-bound access unmapped */
125 map->m_flags = 0;
126 map->m_plen = 0;
127 goto out;
128 }
129
130 /* there is no hole in flatmode */
131 map->m_flags = EROFS_MAP_MAPPED;
132
133 if (offset < blknr_to_addr(lastblk)) {
134 map->m_pa = blknr_to_addr(vi->raw_blkaddr) + map->m_la;
135 map->m_plen = blknr_to_addr(lastblk) - offset;
136 } else if (is_inode_flat_inline(inode)) {
137 /* 2 - inode inline B: inode, [xattrs], inline last blk... */
138 struct erofs_sb_info *sbi = EROFS_SB(inode->i_sb);
139
140 map->m_pa = iloc(sbi, vi->nid) + vi->inode_isize +
141 vi->xattr_isize + erofs_blkoff(map->m_la);
142 map->m_plen = inode->i_size - offset;
143
144 /* inline data should be located in one meta block */
145 if (erofs_blkoff(map->m_pa) + map->m_plen > PAGE_SIZE) {
146 errln("inline data cross block boundary @ nid %llu",
147 vi->nid);
148 DBG_BUGON(1);
149 err = -EFSCORRUPTED;
150 goto err_out;
151 }
152
153 map->m_flags |= EROFS_MAP_META;
154 } else {
155 errln("internal error @ nid: %llu (size %llu), m_la 0x%llx",
156 vi->nid, inode->i_size, map->m_la);
157 DBG_BUGON(1);
158 err = -EIO;
159 goto err_out;
160 }
161
162out:
163 map->m_llen = map->m_plen;
164
165err_out:
166 trace_erofs_map_blocks_flatmode_exit(inode, map, flags, 0);
167 return err;
168}
169
170int erofs_map_blocks(struct inode *inode,
171 struct erofs_map_blocks *map, int flags)
172{
173 if (unlikely(is_inode_layout_compression(inode))) {
174 int err = z_erofs_map_blocks_iter(inode, map, flags);
175
176 if (map->mpage) {
177 put_page(map->mpage);
178 map->mpage = NULL;
179 }
180 return err;
181 }
182 return erofs_map_blocks_flatmode(inode, map, flags);
183}
184
185static inline struct bio *erofs_read_raw_page(struct bio *bio,
186 struct address_space *mapping,
187 struct page *page,
188 erofs_off_t *last_block,
189 unsigned int nblocks,
190 bool ra)
191{
192 struct inode *const inode = mapping->host;
193 struct super_block *const sb = inode->i_sb;
194 erofs_off_t current_block = (erofs_off_t)page->index;
195 int err;
196
197 DBG_BUGON(!nblocks);
198
199 if (PageUptodate(page)) {
200 err = 0;
201 goto has_updated;
202 }
203
204 /* note that for readpage case, bio also equals to NULL */
205 if (bio &&
206 /* not continuous */
207 *last_block + 1 != current_block) {
208submit_bio_retry:
209 __submit_bio(bio, REQ_OP_READ, 0);
210 bio = NULL;
211 }
212
213 if (!bio) {
214 struct erofs_map_blocks map = {
215 .m_la = blknr_to_addr(current_block),
216 };
217 erofs_blk_t blknr;
218 unsigned int blkoff;
219
220 err = erofs_map_blocks(inode, &map, EROFS_GET_BLOCKS_RAW);
221 if (unlikely(err))
222 goto err_out;
223
224 /* zero out the holed page */
225 if (unlikely(!(map.m_flags & EROFS_MAP_MAPPED))) {
226 zero_user_segment(page, 0, PAGE_SIZE);
227 SetPageUptodate(page);
228
229 /* imply err = 0, see erofs_map_blocks */
230 goto has_updated;
231 }
232
233 /* for RAW access mode, m_plen must be equal to m_llen */
234 DBG_BUGON(map.m_plen != map.m_llen);
235
236 blknr = erofs_blknr(map.m_pa);
237 blkoff = erofs_blkoff(map.m_pa);
238
239 /* deal with inline page */
240 if (map.m_flags & EROFS_MAP_META) {
241 void *vsrc, *vto;
242 struct page *ipage;
243
244 DBG_BUGON(map.m_plen > PAGE_SIZE);
245
246 ipage = erofs_get_meta_page(inode->i_sb, blknr, 0);
247
248 if (IS_ERR(ipage)) {
249 err = PTR_ERR(ipage);
250 goto err_out;
251 }
252
253 vsrc = kmap_atomic(ipage);
254 vto = kmap_atomic(page);
255 memcpy(vto, vsrc + blkoff, map.m_plen);
256 memset(vto + map.m_plen, 0, PAGE_SIZE - map.m_plen);
257 kunmap_atomic(vto);
258 kunmap_atomic(vsrc);
259 flush_dcache_page(page);
260
261 SetPageUptodate(page);
262 /* TODO: could we unlock the page earlier? */
263 unlock_page(ipage);
264 put_page(ipage);
265
266 /* imply err = 0, see erofs_map_blocks */
267 goto has_updated;
268 }
269
270 /* pa must be block-aligned for raw reading */
271 DBG_BUGON(erofs_blkoff(map.m_pa));
272
273 /* max # of continuous pages */
274 if (nblocks > DIV_ROUND_UP(map.m_plen, PAGE_SIZE))
275 nblocks = DIV_ROUND_UP(map.m_plen, PAGE_SIZE);
276 if (nblocks > BIO_MAX_PAGES)
277 nblocks = BIO_MAX_PAGES;
278
279 bio = erofs_grab_bio(sb, blknr, nblocks, sb,
280 read_endio, false);
281 if (IS_ERR(bio)) {
282 err = PTR_ERR(bio);
283 bio = NULL;
284 goto err_out;
285 }
286 }
287
288 err = bio_add_page(bio, page, PAGE_SIZE, 0);
289 /* out of the extent or bio is full */
290 if (err < PAGE_SIZE)
291 goto submit_bio_retry;
292
293 *last_block = current_block;
294
295 /* shift in advance in case of it followed by too many gaps */
296 if (bio->bi_iter.bi_size >= bio->bi_max_vecs * PAGE_SIZE) {
297 /* err should reassign to 0 after submitting */
298 err = 0;
299 goto submit_bio_out;
300 }
301
302 return bio;
303
304err_out:
305 /* for sync reading, set page error immediately */
306 if (!ra) {
307 SetPageError(page);
308 ClearPageUptodate(page);
309 }
310has_updated:
311 unlock_page(page);
312
313 /* if updated manually, continuous pages has a gap */
314 if (bio)
315submit_bio_out:
316 __submit_bio(bio, REQ_OP_READ, 0);
317
318 return unlikely(err) ? ERR_PTR(err) : NULL;
319}
320
321/*
322 * since we dont have write or truncate flows, so no inode
323 * locking needs to be held at the moment.
324 */
325static int erofs_raw_access_readpage(struct file *file, struct page *page)
326{
327 erofs_off_t last_block;
328 struct bio *bio;
329
330 trace_erofs_readpage(page, true);
331
332 bio = erofs_read_raw_page(NULL, page->mapping,
333 page, &last_block, 1, false);
334
335 if (IS_ERR(bio))
336 return PTR_ERR(bio);
337
338 DBG_BUGON(bio); /* since we have only one bio -- must be NULL */
339 return 0;
340}
341
342static int erofs_raw_access_readpages(struct file *filp,
343 struct address_space *mapping,
344 struct list_head *pages,
345 unsigned int nr_pages)
346{
347 erofs_off_t last_block;
348 struct bio *bio = NULL;
349 gfp_t gfp = readahead_gfp_mask(mapping);
350 struct page *page = list_last_entry(pages, struct page, lru);
351
352 trace_erofs_readpages(mapping->host, page, nr_pages, true);
353
354 for (; nr_pages; --nr_pages) {
355 page = list_entry(pages->prev, struct page, lru);
356
357 prefetchw(&page->flags);
358 list_del(&page->lru);
359
360 if (!add_to_page_cache_lru(page, mapping, page->index, gfp)) {
361 bio = erofs_read_raw_page(bio, mapping, page,
362 &last_block, nr_pages, true);
363
364 /* all the page errors are ignored when readahead */
365 if (IS_ERR(bio)) {
366 pr_err("%s, readahead error at page %lu of nid %llu\n",
367 __func__, page->index,
368 EROFS_V(mapping->host)->nid);
369
370 bio = NULL;
371 }
372 }
373
374 /* pages could still be locked */
375 put_page(page);
376 }
377 DBG_BUGON(!list_empty(pages));
378
379 /* the rare case (end in gaps) */
380 if (unlikely(bio))
381 __submit_bio(bio, REQ_OP_READ, 0);
382 return 0;
383}
384
385static int erofs_get_block(struct inode *inode, sector_t iblock,
386 struct buffer_head *bh, int create)
387{
388 struct erofs_map_blocks map = {
389 .m_la = iblock << 9,
390 };
391 int err;
392
393 err = erofs_map_blocks(inode, &map, EROFS_GET_BLOCKS_RAW);
394 if (err)
395 return err;
396
397 if (map.m_flags & EROFS_MAP_MAPPED)
398 bh->b_blocknr = erofs_blknr(map.m_pa);
399
400 return err;
401}
402
403static sector_t erofs_bmap(struct address_space *mapping, sector_t block)
404{
405 struct inode *inode = mapping->host;
406
407 if (is_inode_flat_inline(inode)) {
408 erofs_blk_t blks = i_size_read(inode) >> LOG_BLOCK_SIZE;
409
410 if (block >> LOG_SECTORS_PER_BLOCK >= blks)
411 return 0;
412 }
413
414 return generic_block_bmap(mapping, block, erofs_get_block);
415}
416
417/* for uncompressed (aligned) files and raw access for other files */
418const struct address_space_operations erofs_raw_access_aops = {
419 .readpage = erofs_raw_access_readpage,
420 .readpages = erofs_raw_access_readpages,
421 .bmap = erofs_bmap,
422};
423