erofs: move erofs out of staging

EROFS filesystem has been merged into linux-staging for a year. EROFS is designed to be a better solution of saving extra storage space with guaranteed end-to-end performance for read-only files with the help of reduced metadata, fixed-sized output compression and decompression inplace technologies. In the past year, EROFS was greatly improved by many people as a staging driver, self-tested, betaed by a large number of our internal users, successfully applied to almost all in-service HUAWEI smartphones as the part of EMUI 9.1 and proven to be stable enough to be moved out of staging. EROFS is a self-contained filesystem driver. Although there are still some TODOs to be more generic, we have a dedicated team actively keeping on working on EROFS in order to make it better with the evolution of Linux kernel as the other in-kernel filesystems. As Pavel suggested, it's better to do as one commit since git can do moves and all histories will be saved in this way. Let's promote it from staging and enhance it more actively as a "real" part of kernel for more wider scenarios! Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org> Cc: Alexander Viro <viro@zeniv.linux.org.uk> Cc: Andrew Morton <akpm@linux-foundation.org> Cc: Stephen Rothwell <sfr@canb.auug.org.au> Cc: Theodore Ts'o <tytso@mit.edu> Cc: Pavel Machek <pavel@denx.de> Cc: David Sterba <dsterba@suse.cz> Cc: Amir Goldstein <amir73il@gmail.com> Cc: Christoph Hellwig <hch@infradead.org> Cc: Darrick J . Wong <darrick.wong@oracle.com> Cc: Dave Chinner <david@fromorbit.com> Cc: Jaegeuk Kim <jaegeuk@kernel.org> Cc: Jan Kara <jack@suse.cz> Cc: Richard Weinberger <richard@nod.at> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Chao Yu <yuchao0@huawei.com> Cc: Miao Xie <miaoxie@huawei.com> Cc: Li Guifu <bluce.liguifu@huawei.com> Cc: Fang Wei <fangwei1@huawei.com> Signed-off-by: Gao Xiang <gaoxiang25@huawei.com> Link: https://lore.kernel.org/r/20190822213659.5501-1-hsiangkao@aol.com Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
author: Gao Xiang <hsiangkao@aol.com> 2019-08-22 17:36:59 -0400
committer: Greg Kroah-Hartman <gregkh@linuxfoundation.org> 2019-08-24 08:20:10 -0400
commit: 47e4937a4a7ca4184fd282791dfee76c6799966a (patch)
tree: fc68338c13a00ac74ac9f1a838491bd3f7649c28 /fs/erofs/data.c
parent: f401441deda68326852560bf70d59e95f585bbb3 (diff)
1 files changed, 423 insertions, 0 deletions
diff --git a/fs/erofs/data.c b/fs/erofs/data.c
new file mode 100644
index 000000000000..fda16ec8863e
--- /dev/null
+++ b/fs/erofs/data.c
@@ -0,0 +1,423 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2017-2018 HUAWEI, Inc.
+ *             http://www.huawei.com/
+ * Created by Gao Xiang <gaoxiang25@huawei.com>
+ */
+#include "internal.h"
+#include <linux/prefetch.h>
+#include <trace/events/erofs.h>
+static inline void read_endio(struct bio *bio)
+{
+        struct super_block *const sb = bio->bi_private;
+        struct bio_vec *bvec;
+        blk_status_t err = bio->bi_status;
+        struct bvec_iter_all iter_all;
+        if (time_to_inject(EROFS_SB(sb), FAULT_READ_IO)) {
+                erofs_show_injection_info(FAULT_READ_IO);
+                err = BLK_STS_IOERR;
+        }
+        bio_for_each_segment_all(bvec, bio, iter_all) {
+                struct page *page = bvec->bv_page;
+                /* page is already locked */
+                DBG_BUGON(PageUptodate(page));
+                if (unlikely(err))
+                        SetPageError(page);
+                else
+                        SetPageUptodate(page);
+                unlock_page(page);
+                /* page could be reclaimed now */
+        }
+        bio_put(bio);
+}
+/* prio -- true is used for dir */
+struct page *__erofs_get_meta_page(struct super_block *sb,
+                                   erofs_blk_t blkaddr, bool prio, bool nofail)
+{
+        struct inode *const bd_inode = sb->s_bdev->bd_inode;
+        struct address_space *const mapping = bd_inode->i_mapping;
+        /* prefer retrying in the allocator to blindly looping below */
+        const gfp_t gfp = mapping_gfp_constraint(mapping, ~__GFP_FS) |
+                (nofail ? __GFP_NOFAIL : 0);
+        unsigned int io_retries = nofail ? EROFS_IO_MAX_RETRIES_NOFAIL : 0;
+        struct page *page;
+        int err;
+repeat:
+        page = find_or_create_page(mapping, blkaddr, gfp);
+        if (unlikely(!page)) {
+                DBG_BUGON(nofail);
+                return ERR_PTR(-ENOMEM);
+        }
+        DBG_BUGON(!PageLocked(page));
+        if (!PageUptodate(page)) {
+                struct bio *bio;
+                bio = erofs_grab_bio(sb, blkaddr, 1, sb, read_endio, nofail);
+                if (IS_ERR(bio)) {
+                        DBG_BUGON(nofail);
+                        err = PTR_ERR(bio);
+                        goto err_out;
+                }
+                err = bio_add_page(bio, page, PAGE_SIZE, 0);
+                if (unlikely(err != PAGE_SIZE)) {
+                        err = -EFAULT;
+                        goto err_out;
+                }
+                __submit_bio(bio, REQ_OP_READ,
+                             REQ_META | (prio ? REQ_PRIO : 0));
+                lock_page(page);
+                /* this page has been truncated by others */
+                if (unlikely(page->mapping != mapping)) {
+unlock_repeat:
+                        unlock_page(page);
+                        put_page(page);
+                        goto repeat;
+                }
+                /* more likely a read error */
+                if (unlikely(!PageUptodate(page))) {
+                        if (io_retries) {
+                                --io_retries;
+                                goto unlock_repeat;
+                        }
+                        err = -EIO;
+                        goto err_out;
+                }
+        }
+        return page;
+err_out:
+        unlock_page(page);
+        put_page(page);
+        return ERR_PTR(err);
+}
+static int erofs_map_blocks_flatmode(struct inode *inode,
+                                     struct erofs_map_blocks *map,
+                                     int flags)
+{
+        int err = 0;
+        erofs_blk_t nblocks, lastblk;
+        u64 offset = map->m_la;
+        struct erofs_vnode *vi = EROFS_V(inode);
+        trace_erofs_map_blocks_flatmode_enter(inode, map, flags);
+        nblocks = DIV_ROUND_UP(inode->i_size, PAGE_SIZE);
+        lastblk = nblocks - is_inode_flat_inline(inode);
+        if (unlikely(offset >= inode->i_size)) {
+                /* leave out-of-bound access unmapped */
+                map->m_flags = 0;
+                map->m_plen = 0;
+                goto out;
+        }
+        /* there is no hole in flatmode */
+        map->m_flags = EROFS_MAP_MAPPED;
+        if (offset < blknr_to_addr(lastblk)) {
+                map->m_pa = blknr_to_addr(vi->raw_blkaddr) + map->m_la;
+                map->m_plen = blknr_to_addr(lastblk) - offset;
+        } else if (is_inode_flat_inline(inode)) {
+                /* 2 - inode inline B: inode, [xattrs], inline last blk... */
+                struct erofs_sb_info *sbi = EROFS_SB(inode->i_sb);
+                map->m_pa = iloc(sbi, vi->nid) + vi->inode_isize +
+                        vi->xattr_isize + erofs_blkoff(map->m_la);
+                map->m_plen = inode->i_size - offset;
+                /* inline data should be located in one meta block */
+                if (erofs_blkoff(map->m_pa) + map->m_plen > PAGE_SIZE) {
+                        errln("inline data cross block boundary @ nid %llu",
+                              vi->nid);
+                        DBG_BUGON(1);
+                        err = -EFSCORRUPTED;
+                        goto err_out;
+                }
+                map->m_flags |= EROFS_MAP_META;
+        } else {
+                errln("internal error @ nid: %llu (size %llu), m_la 0x%llx",
+                      vi->nid, inode->i_size, map->m_la);
+                DBG_BUGON(1);
+                err = -EIO;
+                goto err_out;
+        }
+out:
+        map->m_llen = map->m_plen;
+err_out:
+        trace_erofs_map_blocks_flatmode_exit(inode, map, flags, 0);
+        return err;
+}
+int erofs_map_blocks(struct inode *inode,
+                     struct erofs_map_blocks *map, int flags)
+{
+        if (unlikely(is_inode_layout_compression(inode))) {
+                int err = z_erofs_map_blocks_iter(inode, map, flags);
+                if (map->mpage) {
+                        put_page(map->mpage);
+                        map->mpage = NULL;
+                }
+                return err;
+        }
+        return erofs_map_blocks_flatmode(inode, map, flags);
+}
+static inline struct bio *erofs_read_raw_page(struct bio *bio,
+                                              struct address_space *mapping,
+                                              struct page *page,
+                                              erofs_off_t *last_block,
+                                              unsigned int nblocks,
+                                              bool ra)
+{
+        struct inode *const inode = mapping->host;
+        struct super_block *const sb = inode->i_sb;
+        erofs_off_t current_block = (erofs_off_t)page->index;
+        int err;
+        DBG_BUGON(!nblocks);
+        if (PageUptodate(page)) {
+                err = 0;
+                goto has_updated;
+        }
+        /* note that for readpage case, bio also equals to NULL */
+        if (bio &&
+            /* not continuous */
+            *last_block + 1 != current_block) {
+submit_bio_retry:
+                __submit_bio(bio, REQ_OP_READ, 0);
+                bio = NULL;
+        }
+        if (!bio) {
+                struct erofs_map_blocks map = {
+                        .m_la = blknr_to_addr(current_block),
+                };
+                erofs_blk_t blknr;
+                unsigned int blkoff;
+                err = erofs_map_blocks(inode, &map, EROFS_GET_BLOCKS_RAW);
+                if (unlikely(err))
+                        goto err_out;
+                /* zero out the holed page */
+                if (unlikely(!(map.m_flags & EROFS_MAP_MAPPED))) {
+                        zero_user_segment(page, 0, PAGE_SIZE);
+                        SetPageUptodate(page);
+                        /* imply err = 0, see erofs_map_blocks */
+                        goto has_updated;
+                }
+                /* for RAW access mode, m_plen must be equal to m_llen */
+                DBG_BUGON(map.m_plen != map.m_llen);
+                blknr = erofs_blknr(map.m_pa);
+                blkoff = erofs_blkoff(map.m_pa);
+                /* deal with inline page */
+                if (map.m_flags & EROFS_MAP_META) {
+                        void *vsrc, *vto;
+                        struct page *ipage;
+                        DBG_BUGON(map.m_plen > PAGE_SIZE);
+                        ipage = erofs_get_meta_page(inode->i_sb, blknr, 0);
+                        if (IS_ERR(ipage)) {
+                                err = PTR_ERR(ipage);
+                                goto err_out;
+                        }
+                        vsrc = kmap_atomic(ipage);
+                        vto = kmap_atomic(page);
+                        memcpy(vto, vsrc + blkoff, map.m_plen);
+                        memset(vto + map.m_plen, 0, PAGE_SIZE - map.m_plen);
+                        kunmap_atomic(vto);
+                        kunmap_atomic(vsrc);
+                        flush_dcache_page(page);
+                        SetPageUptodate(page);
+                        /* TODO: could we unlock the page earlier? */
+                        unlock_page(ipage);
+                        put_page(ipage);
+                        /* imply err = 0, see erofs_map_blocks */
+                        goto has_updated;
+                }
+                /* pa must be block-aligned for raw reading */
+                DBG_BUGON(erofs_blkoff(map.m_pa));
+                /* max # of continuous pages */
+                if (nblocks > DIV_ROUND_UP(map.m_plen, PAGE_SIZE))
+                        nblocks = DIV_ROUND_UP(map.m_plen, PAGE_SIZE);
+                if (nblocks > BIO_MAX_PAGES)
+                        nblocks = BIO_MAX_PAGES;
+                bio = erofs_grab_bio(sb, blknr, nblocks, sb,
+                                     read_endio, false);
+                if (IS_ERR(bio)) {
+                        err = PTR_ERR(bio);
+                        bio = NULL;
+                        goto err_out;
+                }
+        }
+        err = bio_add_page(bio, page, PAGE_SIZE, 0);
+        /* out of the extent or bio is full */
+        if (err < PAGE_SIZE)
+                goto submit_bio_retry;
+        *last_block = current_block;
+        /* shift in advance in case of it followed by too many gaps */
+        if (bio->bi_iter.bi_size >= bio->bi_max_vecs * PAGE_SIZE) {
+                /* err should reassign to 0 after submitting */
+                err = 0;
+                goto submit_bio_out;
+        }
+        return bio;
+err_out:
+        /* for sync reading, set page error immediately */
+        if (!ra) {
+                SetPageError(page);
+                ClearPageUptodate(page);
+        }
+has_updated:
+        unlock_page(page);
+        /* if updated manually, continuous pages has a gap */
+        if (bio)
+submit_bio_out:
+                __submit_bio(bio, REQ_OP_READ, 0);
+        return unlikely(err) ? ERR_PTR(err) : NULL;
+}
+/*
+ * since we dont have write or truncate flows, so no inode
+ * locking needs to be held at the moment.
+ */
+static int erofs_raw_access_readpage(struct file *file, struct page *page)
+{
+        erofs_off_t last_block;
+        struct bio *bio;
+        trace_erofs_readpage(page, true);
+        bio = erofs_read_raw_page(NULL, page->mapping,
+                                  page, &last_block, 1, false);
+        if (IS_ERR(bio))
+                return PTR_ERR(bio);
+        DBG_BUGON(bio); /* since we have only one bio -- must be NULL */
+        return 0;
+}
+static int erofs_raw_access_readpages(struct file *filp,
+                                      struct address_space *mapping,
+                                      struct list_head *pages,
+                                      unsigned int nr_pages)
+{
+        erofs_off_t last_block;
+        struct bio *bio = NULL;
+        gfp_t gfp = readahead_gfp_mask(mapping);
+        struct page *page = list_last_entry(pages, struct page, lru);
+        trace_erofs_readpages(mapping->host, page, nr_pages, true);
+        for (; nr_pages; --nr_pages) {
+                page = list_entry(pages->prev, struct page, lru);
+                prefetchw(&page->flags);
+                list_del(&page->lru);
+                if (!add_to_page_cache_lru(page, mapping, page->index, gfp)) {
+                        bio = erofs_read_raw_page(bio, mapping, page,
+                                                  &last_block, nr_pages, true);
+                        /* all the page errors are ignored when readahead */
+                        if (IS_ERR(bio)) {
+                                pr_err("%s, readahead error at page %lu of nid %llu\n",
+                                       __func__, page->index,
+                                       EROFS_V(mapping->host)->nid);
+                                bio = NULL;
+                        }
+                }
+                /* pages could still be locked */
+                put_page(page);
+        }
+        DBG_BUGON(!list_empty(pages));
+        /* the rare case (end in gaps) */
+        if (unlikely(bio))
+                __submit_bio(bio, REQ_OP_READ, 0);
+        return 0;
+}
+static int erofs_get_block(struct inode *inode, sector_t iblock,
+                           struct buffer_head *bh, int create)
+{
+        struct erofs_map_blocks map = {
+                .m_la = iblock << 9,
+        };
+        int err;
+        err = erofs_map_blocks(inode, &map, EROFS_GET_BLOCKS_RAW);
+        if (err)
+                return err;
+        if (map.m_flags & EROFS_MAP_MAPPED)
+                bh->b_blocknr = erofs_blknr(map.m_pa);
+        return err;
+}
+static sector_t erofs_bmap(struct address_space *mapping, sector_t block)
+{
+        struct inode *inode = mapping->host;
+        if (is_inode_flat_inline(inode)) {
+                erofs_blk_t blks = i_size_read(inode) >> LOG_BLOCK_SIZE;
+                if (block >> LOG_SECTORS_PER_BLOCK >= blks)
+                        return 0;
+        }
+        return generic_block_bmap(mapping, block, erofs_get_block);
+}
+/* for uncompressed (aligned) files and raw access for other files */
+const struct address_space_operations erofs_raw_access_aops = {
+        .readpage = erofs_raw_access_readpage,
+        .readpages = erofs_raw_access_readpages,
+        .bmap = erofs_bmap,
+};
author	Gao Xiang <hsiangkao@aol.com>	2019-08-22 17:36:59 -0400
committer	Greg Kroah-Hartman <gregkh@linuxfoundation.org>	2019-08-24 08:20:10 -0400
commit	47e4937a4a7ca4184fd282791dfee76c6799966a (patch)
tree	fc68338c13a00ac74ac9f1a838491bd3f7649c28 /fs/erofs/data.c
parent	f401441deda68326852560bf70d59e95f585bbb3 (diff)

diff --git a/fs/erofs/data.c b/fs/erofs/data.c new file mode 100644 index 000000000000..fda16ec8863e --- /dev/null +++ b/fs/erofs/data.c
@@ -0,0 +1,423 @@
	1	// SPDX-License-Identifier: GPL-2.0-only
	2	/*
	3	* Copyright (C) 2017-2018 HUAWEI, Inc.
	4	* http://www.huawei.com/
	5	* Created by Gao Xiang <gaoxiang25@huawei.com>
	6	*/
	7	#include "internal.h"
	8	#include <linux/prefetch.h>
	9
	10	#include <trace/events/erofs.h>
	11
	12	static inline void read_endio(struct bio *bio)
	13	{
	14	struct super_block *const sb = bio->bi_private;
	15	struct bio_vec *bvec;
	16	blk_status_t err = bio->bi_status;
	17	struct bvec_iter_all iter_all;
	18
	19	if (time_to_inject(EROFS_SB(sb), FAULT_READ_IO)) {
	20	erofs_show_injection_info(FAULT_READ_IO);
	21	err = BLK_STS_IOERR;
	22	}
	23
	24	bio_for_each_segment_all(bvec, bio, iter_all) {
	25	struct page *page = bvec->bv_page;
	26
	27	/* page is already locked */
	28	DBG_BUGON(PageUptodate(page));
	29
	30	if (unlikely(err))
	31	SetPageError(page);
	32	else
	33	SetPageUptodate(page);
	34
	35	unlock_page(page);
	36	/* page could be reclaimed now */
	37	}
	38	bio_put(bio);
	39	}
	40
	41	/* prio -- true is used for dir */
	42	struct page __erofs_get_meta_page(struct super_block sb,
	43	erofs_blk_t blkaddr, bool prio, bool nofail)
	44	{
	45	struct inode *const bd_inode = sb->s_bdev->bd_inode;
	46	struct address_space *const mapping = bd_inode->i_mapping;
	47	/* prefer retrying in the allocator to blindly looping below */
	48	const gfp_t gfp = mapping_gfp_constraint(mapping, ~__GFP_FS) \|
	49	(nofail ? __GFP_NOFAIL : 0);
	50	unsigned int io_retries = nofail ? EROFS_IO_MAX_RETRIES_NOFAIL : 0;
	51	struct page *page;
	52	int err;
	53
	54	repeat:
	55	page = find_or_create_page(mapping, blkaddr, gfp);
	56	if (unlikely(!page)) {
	57	DBG_BUGON(nofail);
	58	return ERR_PTR(-ENOMEM);
	59	}
	60	DBG_BUGON(!PageLocked(page));
	61
	62	if (!PageUptodate(page)) {
	63	struct bio *bio;
	64
	65	bio = erofs_grab_bio(sb, blkaddr, 1, sb, read_endio, nofail);
	66	if (IS_ERR(bio)) {
	67	DBG_BUGON(nofail);
	68	err = PTR_ERR(bio);
	69	goto err_out;
	70	}
	71
	72	err = bio_add_page(bio, page, PAGE_SIZE, 0);
	73	if (unlikely(err != PAGE_SIZE)) {
	74	err = -EFAULT;
	75	goto err_out;
	76	}
	77
	78	__submit_bio(bio, REQ_OP_READ,
	79	REQ_META \| (prio ? REQ_PRIO : 0));
	80
	81	lock_page(page);
	82
	83	/* this page has been truncated by others */
	84	if (unlikely(page->mapping != mapping)) {
	85	unlock_repeat:
	86	unlock_page(page);
	87	put_page(page);
	88	goto repeat;
	89	}
	90
	91	/* more likely a read error */
	92	if (unlikely(!PageUptodate(page))) {
	93	if (io_retries) {
	94	--io_retries;
	95	goto unlock_repeat;
	96	}
	97	err = -EIO;
	98	goto err_out;
	99	}
	100	}
	101	return page;
	102
	103	err_out:
	104	unlock_page(page);
	105	put_page(page);
	106	return ERR_PTR(err);
	107	}
	108
	109	static int erofs_map_blocks_flatmode(struct inode *inode,
	110	struct erofs_map_blocks *map,
	111	int flags)
	112	{
	113	int err = 0;
	114	erofs_blk_t nblocks, lastblk;
	115	u64 offset = map->m_la;
	116	struct erofs_vnode *vi = EROFS_V(inode);
	117
	118	trace_erofs_map_blocks_flatmode_enter(inode, map, flags);
	119
	120	nblocks = DIV_ROUND_UP(inode->i_size, PAGE_SIZE);
	121	lastblk = nblocks - is_inode_flat_inline(inode);
	122
	123	if (unlikely(offset >= inode->i_size)) {
	124	/* leave out-of-bound access unmapped */
	125	map->m_flags = 0;
	126	map->m_plen = 0;
	127	goto out;
	128	}
	129
	130	/* there is no hole in flatmode */
	131	map->m_flags = EROFS_MAP_MAPPED;
	132
	133	if (offset < blknr_to_addr(lastblk)) {
	134	map->m_pa = blknr_to_addr(vi->raw_blkaddr) + map->m_la;
	135	map->m_plen = blknr_to_addr(lastblk) - offset;
	136	} else if (is_inode_flat_inline(inode)) {
	137	/* 2 - inode inline B: inode, [xattrs], inline last blk... */
	138	struct erofs_sb_info *sbi = EROFS_SB(inode->i_sb);
	139
	140	map->m_pa = iloc(sbi, vi->nid) + vi->inode_isize +
	141	vi->xattr_isize + erofs_blkoff(map->m_la);
	142	map->m_plen = inode->i_size - offset;
	143
	144	/* inline data should be located in one meta block */
	145	if (erofs_blkoff(map->m_pa) + map->m_plen > PAGE_SIZE) {
	146	errln("inline data cross block boundary @ nid %llu",
	147	vi->nid);
	148	DBG_BUGON(1);
	149	err = -EFSCORRUPTED;
	150	goto err_out;
	151	}
	152
	153	map->m_flags \|= EROFS_MAP_META;
	154	} else {
	155	errln("internal error @ nid: %llu (size %llu), m_la 0x%llx",
	156	vi->nid, inode->i_size, map->m_la);
	157	DBG_BUGON(1);
	158	err = -EIO;
	159	goto err_out;
	160	}
	161
	162	out:
	163	map->m_llen = map->m_plen;
	164
	165	err_out:
	166	trace_erofs_map_blocks_flatmode_exit(inode, map, flags, 0);
	167	return err;
	168	}
	169
	170	int erofs_map_blocks(struct inode *inode,
	171	struct erofs_map_blocks *map, int flags)
	172	{
	173	if (unlikely(is_inode_layout_compression(inode))) {
	174	int err = z_erofs_map_blocks_iter(inode, map, flags);
	175
	176	if (map->mpage) {
	177	put_page(map->mpage);
	178	map->mpage = NULL;
	179	}
	180	return err;
	181	}
	182	return erofs_map_blocks_flatmode(inode, map, flags);
	183	}
	184
	185	static inline struct bio erofs_read_raw_page(struct bio bio,
	186	struct address_space *mapping,
	187	struct page *page,
	188	erofs_off_t *last_block,
	189	unsigned int nblocks,
	190	bool ra)
	191	{
	192	struct inode *const inode = mapping->host;
	193	struct super_block *const sb = inode->i_sb;
	194	erofs_off_t current_block = (erofs_off_t)page->index;
	195	int err;
	196
	197	DBG_BUGON(!nblocks);
	198
	199	if (PageUptodate(page)) {
	200	err = 0;
	201	goto has_updated;
	202	}
	203
	204	/* note that for readpage case, bio also equals to NULL */
	205	if (bio &&
	206	/* not continuous */
	207	*last_block + 1 != current_block) {
	208	submit_bio_retry:
	209	__submit_bio(bio, REQ_OP_READ, 0);
	210	bio = NULL;
	211	}
	212
	213	if (!bio) {
	214	struct erofs_map_blocks map = {
	215	.m_la = blknr_to_addr(current_block),
	216	};
	217	erofs_blk_t blknr;
	218	unsigned int blkoff;
	219
	220	err = erofs_map_blocks(inode, &map, EROFS_GET_BLOCKS_RAW);
	221	if (unlikely(err))
	222	goto err_out;
	223
	224	/* zero out the holed page */
	225	if (unlikely(!(map.m_flags & EROFS_MAP_MAPPED))) {
	226	zero_user_segment(page, 0, PAGE_SIZE);
	227	SetPageUptodate(page);
	228
	229	/* imply err = 0, see erofs_map_blocks */
	230	goto has_updated;
	231	}
	232
	233	/* for RAW access mode, m_plen must be equal to m_llen */
	234	DBG_BUGON(map.m_plen != map.m_llen);
	235
	236	blknr = erofs_blknr(map.m_pa);
	237	blkoff = erofs_blkoff(map.m_pa);
	238
	239	/* deal with inline page */
	240	if (map.m_flags & EROFS_MAP_META) {
	241	void vsrc, vto;
	242	struct page *ipage;
	243
	244	DBG_BUGON(map.m_plen > PAGE_SIZE);
	245
	246	ipage = erofs_get_meta_page(inode->i_sb, blknr, 0);
	247
	248	if (IS_ERR(ipage)) {
	249	err = PTR_ERR(ipage);
	250	goto err_out;
	251	}
	252
	253	vsrc = kmap_atomic(ipage);
	254	vto = kmap_atomic(page);
	255	memcpy(vto, vsrc + blkoff, map.m_plen);
	256	memset(vto + map.m_plen, 0, PAGE_SIZE - map.m_plen);
	257	kunmap_atomic(vto);
	258	kunmap_atomic(vsrc);
	259	flush_dcache_page(page);
	260
	261	SetPageUptodate(page);
	262	/* TODO: could we unlock the page earlier? */
	263	unlock_page(ipage);
	264	put_page(ipage);
	265
	266	/* imply err = 0, see erofs_map_blocks */
	267	goto has_updated;
	268	}
	269
	270	/* pa must be block-aligned for raw reading */
	271	DBG_BUGON(erofs_blkoff(map.m_pa));
	272
	273	/* max # of continuous pages */
	274	if (nblocks > DIV_ROUND_UP(map.m_plen, PAGE_SIZE))
	275	nblocks = DIV_ROUND_UP(map.m_plen, PAGE_SIZE);
	276	if (nblocks > BIO_MAX_PAGES)
	277	nblocks = BIO_MAX_PAGES;
	278
	279	bio = erofs_grab_bio(sb, blknr, nblocks, sb,
	280	read_endio, false);
	281	if (IS_ERR(bio)) {
	282	err = PTR_ERR(bio);
	283	bio = NULL;
	284	goto err_out;
	285	}
	286	}
	287
	288	err = bio_add_page(bio, page, PAGE_SIZE, 0);
	289	/* out of the extent or bio is full */
	290	if (err < PAGE_SIZE)
	291	goto submit_bio_retry;
	292
	293	*last_block = current_block;
	294
	295	/* shift in advance in case of it followed by too many gaps */
	296	if (bio->bi_iter.bi_size >= bio->bi_max_vecs * PAGE_SIZE) {
	297	/* err should reassign to 0 after submitting */
	298	err = 0;
	299	goto submit_bio_out;
	300	}
	301
	302	return bio;
	303
	304	err_out:
	305	/* for sync reading, set page error immediately */
	306	if (!ra) {
	307	SetPageError(page);
	308	ClearPageUptodate(page);
	309	}
	310	has_updated:
	311	unlock_page(page);
	312
	313	/* if updated manually, continuous pages has a gap */
	314	if (bio)
	315	submit_bio_out:
	316	__submit_bio(bio, REQ_OP_READ, 0);
	317
	318	return unlikely(err) ? ERR_PTR(err) : NULL;
	319	}
	320
	321	/*
	322	* since we dont have write or truncate flows, so no inode
	323	* locking needs to be held at the moment.
	324	*/
	325	static int erofs_raw_access_readpage(struct file file, struct page page)
	326	{
	327	erofs_off_t last_block;
	328	struct bio *bio;
	329
	330	trace_erofs_readpage(page, true);
	331
	332	bio = erofs_read_raw_page(NULL, page->mapping,
	333	page, &last_block, 1, false);
	334
	335	if (IS_ERR(bio))
	336	return PTR_ERR(bio);
	337
	338	DBG_BUGON(bio); /* since we have only one bio -- must be NULL */
	339	return 0;
	340	}
	341
	342	static int erofs_raw_access_readpages(struct file *filp,
	343	struct address_space *mapping,
	344	struct list_head *pages,
	345	unsigned int nr_pages)
	346	{
	347	erofs_off_t last_block;
	348	struct bio *bio = NULL;
	349	gfp_t gfp = readahead_gfp_mask(mapping);
	350	struct page *page = list_last_entry(pages, struct page, lru);
	351
	352	trace_erofs_readpages(mapping->host, page, nr_pages, true);
	353
	354	for (; nr_pages; --nr_pages) {
	355	page = list_entry(pages->prev, struct page, lru);
	356
	357	prefetchw(&page->flags);
	358	list_del(&page->lru);
	359
	360	if (!add_to_page_cache_lru(page, mapping, page->index, gfp)) {
	361	bio = erofs_read_raw_page(bio, mapping, page,
	362	&last_block, nr_pages, true);
	363
	364	/* all the page errors are ignored when readahead */
	365	if (IS_ERR(bio)) {
	366	pr_err("%s, readahead error at page %lu of nid %llu\n",
	367	__func__, page->index,
	368	EROFS_V(mapping->host)->nid);
	369
	370	bio = NULL;
	371	}
	372	}
	373
	374	/* pages could still be locked */
	375	put_page(page);
	376	}
	377	DBG_BUGON(!list_empty(pages));
	378
	379	/* the rare case (end in gaps) */
	380	if (unlikely(bio))
	381	__submit_bio(bio, REQ_OP_READ, 0);
	382	return 0;
	383	}
	384
	385	static int erofs_get_block(struct inode *inode, sector_t iblock,
	386	struct buffer_head *bh, int create)
	387	{
	388	struct erofs_map_blocks map = {
	389	.m_la = iblock << 9,
	390	};
	391	int err;
	392
	393	err = erofs_map_blocks(inode, &map, EROFS_GET_BLOCKS_RAW);
	394	if (err)
	395	return err;
	396
	397	if (map.m_flags & EROFS_MAP_MAPPED)
	398	bh->b_blocknr = erofs_blknr(map.m_pa);
	399
	400	return err;
	401	}
	402
	403	static sector_t erofs_bmap(struct address_space *mapping, sector_t block)
	404	{
	405	struct inode *inode = mapping->host;
	406
	407	if (is_inode_flat_inline(inode)) {
	408	erofs_blk_t blks = i_size_read(inode) >> LOG_BLOCK_SIZE;
	409
	410	if (block >> LOG_SECTORS_PER_BLOCK >= blks)
	411	return 0;
	412	}
	413
	414	return generic_block_bmap(mapping, block, erofs_get_block);
	415	}
	416
	417	/* for uncompressed (aligned) files and raw access for other files */
	418	const struct address_space_operations erofs_raw_access_aops = {
	419	.readpage = erofs_raw_access_readpage,
	420	.readpages = erofs_raw_access_readpages,
	421	.bmap = erofs_bmap,
	422	};
	423