From e7c24607b5d68a4cdc56e09d70a3c8bae5f0519f Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 10 Apr 2014 20:54:51 -0400 Subject: kill iov_iter_copy_from_user() all callers can use copy_page_from_iter() and it actually simplifies them. Signed-off-by: Al Viro --- mm/iov_iter.c | 27 --------------------------- mm/process_vm_access.c | 6 +----- 2 files changed, 1 insertion(+), 32 deletions(-) (limited to 'mm') diff --git a/mm/iov_iter.c b/mm/iov_iter.c index 10e46cd721de..22ec1ef068a8 100644 --- a/mm/iov_iter.c +++ b/mm/iov_iter.c @@ -129,33 +129,6 @@ size_t iov_iter_copy_from_user_atomic(struct page *page, } EXPORT_SYMBOL(iov_iter_copy_from_user_atomic); -/* - * This has the same sideeffects and return value as - * iov_iter_copy_from_user_atomic(). - * The difference is that it attempts to resolve faults. - * Page must not be locked. - */ -size_t iov_iter_copy_from_user(struct page *page, - struct iov_iter *i, unsigned long offset, size_t bytes) -{ - char *kaddr; - size_t copied; - - kaddr = kmap(page); - if (likely(i->nr_segs == 1)) { - int left; - char __user *buf = i->iov->iov_base + i->iov_offset; - left = __copy_from_user(kaddr + offset, buf, bytes); - copied = bytes - left; - } else { - copied = __iovec_copy_from_user_inatomic(kaddr + offset, - i->iov, i->iov_offset, bytes); - } - kunmap(page); - return copied; -} -EXPORT_SYMBOL(iov_iter_copy_from_user); - void iov_iter_advance(struct iov_iter *i, size_t bytes) { BUG_ON(i->count < bytes); diff --git a/mm/process_vm_access.c b/mm/process_vm_access.c index 8505c9262b35..f32b1fbbfe69 100644 --- a/mm/process_vm_access.c +++ b/mm/process_vm_access.c @@ -46,11 +46,7 @@ static int process_vm_rw_pages(struct page **pages, copy = len; if (vm_write) { - if (copy > iov_iter_count(iter)) - copy = iov_iter_count(iter); - copied = iov_iter_copy_from_user(page, iter, - offset, copy); - iov_iter_advance(iter, copied); + copied = copy_page_from_iter(page, offset, copy, iter); set_page_dirty_lock(page); } else { copied = copy_page_to_iter(page, offset, copy, iter); -- cgit v1.2.2 From f8579f8673b7ecdb7a81d5d5bb1d981093d9aa94 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 3 Mar 2014 22:03:20 -0500 Subject: generic_file_direct_write(): switch to iov_iter Signed-off-by: Al Viro --- mm/filemap.c | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) (limited to 'mm') diff --git a/mm/filemap.c b/mm/filemap.c index 000a220e2a41..a840890ed39f 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -2385,9 +2385,8 @@ int pagecache_write_end(struct file *file, struct address_space *mapping, EXPORT_SYMBOL(pagecache_write_end); ssize_t -generic_file_direct_write(struct kiocb *iocb, const struct iovec *iov, - unsigned long *nr_segs, loff_t pos, - size_t count, size_t ocount) +generic_file_direct_write(struct kiocb *iocb, struct iov_iter *from, + loff_t pos, size_t count, size_t ocount) { struct file *file = iocb->ki_filp; struct address_space *mapping = file->f_mapping; @@ -2397,9 +2396,9 @@ generic_file_direct_write(struct kiocb *iocb, const struct iovec *iov, pgoff_t end; if (count != ocount) - *nr_segs = iov_shorten((struct iovec *)iov, *nr_segs, count); + from->nr_segs = iov_shorten((struct iovec *)from->iov, from->nr_segs, count); - write_len = iov_length(iov, *nr_segs); + write_len = iov_length(from->iov, from->nr_segs); end = (pos + write_len - 1) >> PAGE_CACHE_SHIFT; written = filemap_write_and_wait_range(mapping, pos, pos + write_len - 1); @@ -2426,7 +2425,7 @@ generic_file_direct_write(struct kiocb *iocb, const struct iovec *iov, } } - written = mapping->a_ops->direct_IO(WRITE, iocb, iov, pos, *nr_segs); + written = mapping->a_ops->direct_IO(WRITE, iocb, from->iov, pos, from->nr_segs); /* * Finally, try again to invalidate clean pages which might have been @@ -2443,6 +2442,7 @@ generic_file_direct_write(struct kiocb *iocb, const struct iovec *iov, if (written > 0) { pos += written; + iov_iter_advance(from, written); if (pos > i_size_read(inode) && !S_ISBLK(inode->i_mode)) { i_size_write(inode, pos); mark_inode_dirty(inode); @@ -2645,11 +2645,10 @@ ssize_t __generic_file_aio_write(struct kiocb *iocb, const struct iovec *iov, if (unlikely(file->f_flags & O_DIRECT)) { loff_t endbyte; - written = generic_file_direct_write(iocb, iov, &from.nr_segs, pos, + written = generic_file_direct_write(iocb, &from, pos, count, ocount); if (written < 0 || written == count) goto out; - iov_iter_advance(&from, written); /* * direct-io write to a hole: fall through to buffered I/O -- cgit v1.2.2 From cb66a7a1f149ff705fa37cad6d1252b046e0ad4f Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 4 Mar 2014 15:24:06 -0500 Subject: kill generic_segment_checks() all callers of ->aio_read() and ->aio_write() have iov/nr_segs already checked - generic_segment_checks() done after that is just an odd way to spell iov_length(). Signed-off-by: Al Viro --- mm/filemap.c | 53 +++-------------------------------------------------- mm/shmem.c | 7 ++----- 2 files changed, 5 insertions(+), 55 deletions(-) (limited to 'mm') diff --git a/mm/filemap.c b/mm/filemap.c index a840890ed39f..7c1417b0bd7b 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -1663,45 +1663,6 @@ out: return written ? written : error; } -/* - * Performs necessary checks before doing a write - * @iov: io vector request - * @nr_segs: number of segments in the iovec - * @count: number of bytes to write - * @access_flags: type of access: %VERIFY_READ or %VERIFY_WRITE - * - * Adjust number of segments and amount of bytes to write (nr_segs should be - * properly initialized first). Returns appropriate error code that caller - * should return or zero in case that write should be allowed. - */ -int generic_segment_checks(const struct iovec *iov, - unsigned long *nr_segs, size_t *count, int access_flags) -{ - unsigned long seg; - size_t cnt = 0; - for (seg = 0; seg < *nr_segs; seg++) { - const struct iovec *iv = &iov[seg]; - - /* - * If any segment has a negative length, or the cumulative - * length ever wraps negative then return -EINVAL. - */ - cnt += iv->iov_len; - if (unlikely((ssize_t)(cnt|iv->iov_len) < 0)) - return -EINVAL; - if (access_ok(access_flags, iv->iov_base, iv->iov_len)) - continue; - if (seg == 0) - return -EFAULT; - *nr_segs = seg; - cnt -= iv->iov_len; /* This segment is no good */ - break; - } - *count = cnt; - return 0; -} -EXPORT_SYMBOL(generic_segment_checks); - /** * generic_file_aio_read - generic filesystem read routine * @iocb: kernel I/O control block @@ -1717,15 +1678,12 @@ generic_file_aio_read(struct kiocb *iocb, const struct iovec *iov, unsigned long nr_segs, loff_t pos) { struct file *filp = iocb->ki_filp; - ssize_t retval; + ssize_t retval = 0; size_t count; loff_t *ppos = &iocb->ki_pos; struct iov_iter i; - count = 0; - retval = generic_segment_checks(iov, &nr_segs, &count, VERIFY_WRITE); - if (retval) - return retval; + count = iov_length(iov, nr_segs); iov_iter_init(&i, iov, nr_segs, count, 0); /* coalesce the iovecs and go direct-to-BIO for O_DIRECT */ @@ -2615,12 +2573,7 @@ ssize_t __generic_file_aio_write(struct kiocb *iocb, const struct iovec *iov, ssize_t status; struct iov_iter from; - ocount = 0; - err = generic_segment_checks(iov, &nr_segs, &ocount, VERIFY_READ); - if (err) - return err; - - count = ocount; + count = ocount = iov_length(iov, nr_segs); /* We can write back this queue in page reclaim */ current->backing_dev_info = mapping->backing_dev_info; diff --git a/mm/shmem.c b/mm/shmem.c index 9f70e02111c6..2a93e625adaf 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -1412,14 +1412,11 @@ static ssize_t shmem_file_aio_read(struct kiocb *iocb, unsigned long offset; enum sgp_type sgp = SGP_READ; int error = 0; - ssize_t retval; - size_t count; + ssize_t retval = 0; + size_t count = iov_length(iov, nr_segs); loff_t *ppos = &iocb->ki_pos; struct iov_iter iter; - retval = generic_segment_checks(iov, &nr_segs, &count, VERIFY_WRITE); - if (retval) - return retval; iov_iter_init(&iter, iov, nr_segs, count, 0); /* -- cgit v1.2.2 From d8d3d94b80aa1a1c0ca75c58b8abdc7356f38418 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 4 Mar 2014 21:27:34 -0500 Subject: pass iov_iter to ->direct_IO() unmodified, for now Signed-off-by: Al Viro --- mm/filemap.c | 9 ++++----- mm/page_io.c | 6 ++++-- 2 files changed, 8 insertions(+), 7 deletions(-) (limited to 'mm') diff --git a/mm/filemap.c b/mm/filemap.c index 7c1417b0bd7b..139641274f1e 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -1699,10 +1699,9 @@ generic_file_aio_read(struct kiocb *iocb, const struct iovec *iov, size = i_size_read(inode); retval = filemap_write_and_wait_range(mapping, pos, pos + iov_length(iov, nr_segs) - 1); - if (!retval) { - retval = mapping->a_ops->direct_IO(READ, iocb, - iov, pos, nr_segs); - } + if (!retval) + retval = mapping->a_ops->direct_IO(READ, iocb, &i, pos); + if (retval > 0) { *ppos = pos + retval; count -= retval; @@ -2383,7 +2382,7 @@ generic_file_direct_write(struct kiocb *iocb, struct iov_iter *from, } } - written = mapping->a_ops->direct_IO(WRITE, iocb, from->iov, pos, from->nr_segs); + written = mapping->a_ops->direct_IO(WRITE, iocb, from, pos); /* * Finally, try again to invalidate clean pages which might have been diff --git a/mm/page_io.c b/mm/page_io.c index 7c59ef681381..0ed0644c73db 100644 --- a/mm/page_io.c +++ b/mm/page_io.c @@ -263,16 +263,18 @@ int __swap_writepage(struct page *page, struct writeback_control *wbc, .iov_base = kmap(page), .iov_len = PAGE_SIZE, }; + struct iov_iter from; init_sync_kiocb(&kiocb, swap_file); kiocb.ki_pos = page_file_offset(page); kiocb.ki_nbytes = PAGE_SIZE; + iov_iter_init(&from, &iov, 1, PAGE_SIZE, 0); set_page_writeback(page); unlock_page(page); ret = mapping->a_ops->direct_IO(KERNEL_WRITE, - &kiocb, &iov, - kiocb.ki_pos, 1); + &kiocb, &from, + kiocb.ki_pos); kunmap(page); if (ret == PAGE_SIZE) { count_vm_event(PSWPOUT); -- cgit v1.2.2 From a6cbcd4a4a85e2fdb0b3344b88df2e8b3d526b9e Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 4 Mar 2014 22:38:00 -0500 Subject: get rid of pointless iov_length() in ->direct_IO() all callers have iov_length(iter->iov, iter->nr_segs) == iov_iter_count(iter) Signed-off-by: Al Viro --- mm/filemap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'mm') diff --git a/mm/filemap.c b/mm/filemap.c index 139641274f1e..70c048ea36e0 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -1698,7 +1698,7 @@ generic_file_aio_read(struct kiocb *iocb, const struct iovec *iov, goto out; /* skip atime */ size = i_size_read(inode); retval = filemap_write_and_wait_range(mapping, pos, - pos + iov_length(iov, nr_segs) - 1); + pos + count - 1); if (!retval) retval = mapping->a_ops->direct_IO(READ, iocb, &i, pos); -- cgit v1.2.2 From 26978b8b4d83c46f4310b253db70fa9e65149e7c Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 10 Mar 2014 14:08:45 -0400 Subject: give ->direct_IO() a copy of iov_iter the thing is, we want to advance what's given to ->direct_IO() as we are forming the request; however, the callers care about the amount of data actually transferred, not the amount we tried to transfer. It's more convenient to allow ->direct_IO() instances do use iov_iter_advance() on the copy of iov_iter, leaving the actual advancing of the original to caller. Signed-off-by: Al Viro --- mm/filemap.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) (limited to 'mm') diff --git a/mm/filemap.c b/mm/filemap.c index 70c048ea36e0..866f4ae8223b 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -1699,8 +1699,10 @@ generic_file_aio_read(struct kiocb *iocb, const struct iovec *iov, size = i_size_read(inode); retval = filemap_write_and_wait_range(mapping, pos, pos + count - 1); - if (!retval) - retval = mapping->a_ops->direct_IO(READ, iocb, &i, pos); + if (!retval) { + struct iov_iter data = i; + retval = mapping->a_ops->direct_IO(READ, iocb, &data, pos); + } if (retval > 0) { *ppos = pos + retval; @@ -2351,6 +2353,7 @@ generic_file_direct_write(struct kiocb *iocb, struct iov_iter *from, ssize_t written; size_t write_len; pgoff_t end; + struct iov_iter data; if (count != ocount) from->nr_segs = iov_shorten((struct iovec *)from->iov, from->nr_segs, count); @@ -2382,7 +2385,8 @@ generic_file_direct_write(struct kiocb *iocb, struct iov_iter *from, } } - written = mapping->a_ops->direct_IO(WRITE, iocb, from, pos); + data = *from; + written = mapping->a_ops->direct_IO(WRITE, iocb, &data, pos); /* * Finally, try again to invalidate clean pages which might have been -- cgit v1.2.2 From 886a39115005ced8b15ab067c9c2a8d546b40a5e Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 5 Mar 2014 13:50:45 -0500 Subject: new primitive: iov_iter_alignment() returns the value aligned as badly as the worst remaining segment in iov_iter is. Use instead of open-coded equivalents. Signed-off-by: Al Viro --- mm/iov_iter.c | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) (limited to 'mm') diff --git a/mm/iov_iter.c b/mm/iov_iter.c index 22ec1ef068a8..2f762cc21080 100644 --- a/mm/iov_iter.c +++ b/mm/iov_iter.c @@ -195,3 +195,28 @@ size_t iov_iter_single_seg_count(const struct iov_iter *i) return min(i->count, iov->iov_len - i->iov_offset); } EXPORT_SYMBOL(iov_iter_single_seg_count); + +unsigned long iov_iter_alignment(const struct iov_iter *i) +{ + const struct iovec *iov = i->iov; + unsigned long res; + size_t size = i->count; + size_t n; + + if (!size) + return 0; + + res = (unsigned long)iov->iov_base + i->iov_offset; + n = iov->iov_len - i->iov_offset; + if (n >= size) + return res | size; + size -= n; + res |= n; + while (size > (++iov)->iov_len) { + res |= (unsigned long)iov->iov_base | iov->iov_len; + size -= iov->iov_len; + } + res |= (unsigned long)iov->iov_base | size; + return res; +} +EXPORT_SYMBOL(iov_iter_alignment); -- cgit v1.2.2 From ed978a811ec528dbe40243605c3afab55892f722 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 5 Mar 2014 22:53:04 -0500 Subject: new helper: generic_file_read_iter() iov_iter-using variant of generic_file_aio_read(). Some callers converted. Note that it's still not quite there for use as ->read_iter() - we depend on having zero iter->iov_offset in O_DIRECT case. Fortunately, that's true for all converted callers (and for generic_file_aio_read() itself). Signed-off-by: Al Viro --- mm/filemap.c | 67 ++++++++++++++++++++++++++++++------------------------------ 1 file changed, 34 insertions(+), 33 deletions(-) (limited to 'mm') diff --git a/mm/filemap.c b/mm/filemap.c index 866f4ae8223b..a7f79e90209c 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -1663,55 +1663,34 @@ out: return written ? written : error; } -/** - * generic_file_aio_read - generic filesystem read routine - * @iocb: kernel I/O control block - * @iov: io vector request - * @nr_segs: number of segments in the iovec - * @pos: current file position - * - * This is the "read()" routine for all filesystems - * that can use the page cache directly. - */ ssize_t -generic_file_aio_read(struct kiocb *iocb, const struct iovec *iov, - unsigned long nr_segs, loff_t pos) +generic_file_read_iter(struct kiocb *iocb, struct iov_iter *iter) { - struct file *filp = iocb->ki_filp; + struct file *file = iocb->ki_filp; ssize_t retval = 0; - size_t count; loff_t *ppos = &iocb->ki_pos; - struct iov_iter i; - - count = iov_length(iov, nr_segs); - iov_iter_init(&i, iov, nr_segs, count, 0); + loff_t pos = *ppos; /* coalesce the iovecs and go direct-to-BIO for O_DIRECT */ - if (filp->f_flags & O_DIRECT) { + if (file->f_flags & O_DIRECT) { + struct address_space *mapping = file->f_mapping; + struct inode *inode = mapping->host; + size_t count = iov_iter_count(iter); loff_t size; - struct address_space *mapping; - struct inode *inode; - mapping = filp->f_mapping; - inode = mapping->host; if (!count) goto out; /* skip atime */ size = i_size_read(inode); retval = filemap_write_and_wait_range(mapping, pos, pos + count - 1); if (!retval) { - struct iov_iter data = i; + struct iov_iter data = *iter; retval = mapping->a_ops->direct_IO(READ, iocb, &data, pos); } if (retval > 0) { *ppos = pos + retval; - count -= retval; - /* - * If we did a short DIO read we need to skip the - * section of the iov that we've already read data into. - */ - iov_iter_advance(&i, retval); + iov_iter_advance(iter, retval); } /* @@ -1722,16 +1701,38 @@ generic_file_aio_read(struct kiocb *iocb, const struct iovec *iov, * and return. Otherwise fallthrough to buffered io for * the rest of the read. */ - if (retval < 0 || !count || *ppos >= size) { - file_accessed(filp); + if (retval < 0 || !iov_iter_count(iter) || *ppos >= size) { + file_accessed(file); goto out; } } - retval = do_generic_file_read(filp, ppos, &i, retval); + retval = do_generic_file_read(file, ppos, iter, retval); out: return retval; } +EXPORT_SYMBOL(generic_file_read_iter); + +/** + * generic_file_aio_read - generic filesystem read routine + * @iocb: kernel I/O control block + * @iov: io vector request + * @nr_segs: number of segments in the iovec + * @pos: current file position + * + * This is the "read()" routine for all filesystems + * that can use the page cache directly. + */ +ssize_t +generic_file_aio_read(struct kiocb *iocb, const struct iovec *iov, + unsigned long nr_segs, loff_t pos) +{ + size_t count = iov_length(iov, nr_segs); + struct iov_iter i; + + iov_iter_init(&i, iov, nr_segs, count, 0); + return generic_file_read_iter(iocb, &i); +} EXPORT_SYMBOL(generic_file_aio_read); #ifdef CONFIG_MMU -- cgit v1.2.2 From 71d8e532b1549a478e6a6a8a44f309d050294d00 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 5 Mar 2014 19:28:09 -0500 Subject: start adding the tag to iov_iter For now, just use the same thing we pass to ->direct_IO() - it's all iovec-based at the moment. Pass it explicitly to iov_iter_init() and account for kvec vs. iovec in there, by the same kludge NFS ->direct_IO() uses. Signed-off-by: Al Viro --- mm/filemap.c | 4 ++-- mm/iov_iter.c | 15 +++++++++++++++ mm/page_io.c | 2 +- mm/process_vm_access.c | 4 ++-- mm/shmem.c | 2 +- 5 files changed, 21 insertions(+), 6 deletions(-) (limited to 'mm') diff --git a/mm/filemap.c b/mm/filemap.c index a7f79e90209c..3aeaf2df4135 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -1730,7 +1730,7 @@ generic_file_aio_read(struct kiocb *iocb, const struct iovec *iov, size_t count = iov_length(iov, nr_segs); struct iov_iter i; - iov_iter_init(&i, iov, nr_segs, count, 0); + iov_iter_init(&i, READ, iov, nr_segs, count); return generic_file_read_iter(iocb, &i); } EXPORT_SYMBOL(generic_file_aio_read); @@ -2596,7 +2596,7 @@ ssize_t __generic_file_aio_write(struct kiocb *iocb, const struct iovec *iov, if (err) goto out; - iov_iter_init(&from, iov, nr_segs, count, 0); + iov_iter_init(&from, WRITE, iov, nr_segs, count); /* coalesce the iovecs and go direct-to-BIO for O_DIRECT */ if (unlikely(file->f_flags & O_DIRECT)) { diff --git a/mm/iov_iter.c b/mm/iov_iter.c index 2f762cc21080..e2c9a2db4350 100644 --- a/mm/iov_iter.c +++ b/mm/iov_iter.c @@ -220,3 +220,18 @@ unsigned long iov_iter_alignment(const struct iov_iter *i) return res; } EXPORT_SYMBOL(iov_iter_alignment); + +void iov_iter_init(struct iov_iter *i, int direction, + const struct iovec *iov, unsigned long nr_segs, + size_t count) +{ + /* It will get better. Eventually... */ + if (segment_eq(get_fs(), KERNEL_DS)) + direction |= REQ_KERNEL; + i->type = direction; + i->iov = iov; + i->nr_segs = nr_segs; + i->iov_offset = 0; + i->count = count; +} +EXPORT_SYMBOL(iov_iter_init); diff --git a/mm/page_io.c b/mm/page_io.c index 0ed0644c73db..313bfedb75d1 100644 --- a/mm/page_io.c +++ b/mm/page_io.c @@ -268,7 +268,7 @@ int __swap_writepage(struct page *page, struct writeback_control *wbc, init_sync_kiocb(&kiocb, swap_file); kiocb.ki_pos = page_file_offset(page); kiocb.ki_nbytes = PAGE_SIZE; - iov_iter_init(&from, &iov, 1, PAGE_SIZE, 0); + iov_iter_init(&from, KERNEL_WRITE, &iov, 1, PAGE_SIZE); set_page_writeback(page); unlock_page(page); diff --git a/mm/process_vm_access.c b/mm/process_vm_access.c index f32b1fbbfe69..5077afcd9e11 100644 --- a/mm/process_vm_access.c +++ b/mm/process_vm_access.c @@ -274,7 +274,7 @@ static ssize_t process_vm_rw(pid_t pid, if (rc <= 0) goto free_iovecs; - iov_iter_init(&iter, iov_l, liovcnt, rc, 0); + iov_iter_init(&iter, vm_write ? WRITE : READ, iov_l, liovcnt, rc); rc = rw_copy_check_uvector(CHECK_IOVEC_ONLY, rvec, riovcnt, UIO_FASTIOV, iovstack_r, &iov_r); @@ -337,7 +337,7 @@ compat_process_vm_rw(compat_pid_t pid, &iov_l); if (rc <= 0) goto free_iovecs; - iov_iter_init(&iter, iov_l, liovcnt, rc, 0); + iov_iter_init(&iter, vm_write ? WRITE : READ, iov_l, liovcnt, rc); rc = compat_rw_copy_check_uvector(CHECK_IOVEC_ONLY, rvec, riovcnt, UIO_FASTIOV, iovstack_r, &iov_r); diff --git a/mm/shmem.c b/mm/shmem.c index 2a93e625adaf..e0b76696c3f9 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -1417,7 +1417,7 @@ static ssize_t shmem_file_aio_read(struct kiocb *iocb, loff_t *ppos = &iocb->ki_pos; struct iov_iter iter; - iov_iter_init(&iter, iov, nr_segs, count, 0); + iov_iter_init(&iter, READ, iov, nr_segs, count); /* * Might this read be for a stacking filesystem? Then when reading -- cgit v1.2.2 From 7b2c99d15559e285384c742db52316802e24b0bd Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 15 Mar 2014 04:05:57 -0400 Subject: new helper: iov_iter_get_pages() iov_iter_get_pages(iter, pages, maxsize, &start) grabs references pinning the pages of up to maxsize of (contiguous) data from iter. Returns the amount of memory grabbed or -error. In case of success, the requested area begins at offset start in pages[0] and runs through pages[1], etc. Less than requested amount might be returned - either because the contiguous area in the beginning of iterator is smaller than requested, or because the kernel failed to pin that many pages. direct-io.c switched to using iov_iter_get_pages() Signed-off-by: Al Viro --- mm/iov_iter.c | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) (limited to 'mm') diff --git a/mm/iov_iter.c b/mm/iov_iter.c index e2c9a2db4350..45204cd5ccd8 100644 --- a/mm/iov_iter.c +++ b/mm/iov_iter.c @@ -235,3 +235,30 @@ void iov_iter_init(struct iov_iter *i, int direction, i->count = count; } EXPORT_SYMBOL(iov_iter_init); + +ssize_t iov_iter_get_pages(struct iov_iter *i, + struct page **pages, size_t maxsize, + size_t *start) +{ + size_t offset = i->iov_offset; + const struct iovec *iov = i->iov; + size_t len; + unsigned long addr; + int n; + int res; + + len = iov->iov_len - offset; + if (len > i->count) + len = i->count; + if (len > maxsize) + len = maxsize; + addr = (unsigned long)iov->iov_base + offset; + len += *start = addr & (PAGE_SIZE - 1); + addr &= ~(PAGE_SIZE - 1); + n = (len + PAGE_SIZE - 1) / PAGE_SIZE; + res = get_user_pages_fast(addr, n, (i->type & WRITE) != WRITE, pages); + if (unlikely(res < 0)) + return res; + return (res == n ? len : res * PAGE_SIZE) - *start; +} +EXPORT_SYMBOL(iov_iter_get_pages); -- cgit v1.2.2 From f67da30c1d5fc9e341bc8121708874bfd7b31e45 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 19 Mar 2014 01:16:16 -0400 Subject: new helper: iov_iter_npages() counts the pages covered by iov_iter, up to given limit. do_block_direct_io() and fuse_iter_npages() switched to it. Signed-off-by: Al Viro --- mm/iov_iter.c | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) (limited to 'mm') diff --git a/mm/iov_iter.c b/mm/iov_iter.c index 45204cd5ccd8..0b677f8f9bad 100644 --- a/mm/iov_iter.c +++ b/mm/iov_iter.c @@ -262,3 +262,30 @@ ssize_t iov_iter_get_pages(struct iov_iter *i, return (res == n ? len : res * PAGE_SIZE) - *start; } EXPORT_SYMBOL(iov_iter_get_pages); + +int iov_iter_npages(const struct iov_iter *i, int maxpages) +{ + size_t offset = i->iov_offset; + size_t size = i->count; + const struct iovec *iov = i->iov; + int npages = 0; + int n; + + for (n = 0; size && n < i->nr_segs; n++, iov++) { + unsigned long addr = (unsigned long)iov->iov_base + offset; + size_t len = iov->iov_len - offset; + offset = 0; + if (unlikely(!len)) /* empty segment */ + continue; + if (len > size) + len = size; + npages += (addr + len + PAGE_SIZE - 1) / PAGE_SIZE + - addr / PAGE_SIZE; + if (npages >= maxpages) /* don't bother going further */ + return maxpages; + size -= len; + offset = 0; + } + return min(npages, maxpages); +} +EXPORT_SYMBOL(iov_iter_npages); -- cgit v1.2.2 From 91f79c43d1b54d7154b118860d81b39bad07dfff Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 21 Mar 2014 04:58:33 -0400 Subject: new helper: iov_iter_get_pages_alloc() same as iov_iter_get_pages(), except that pages array is allocated (kmalloc if possible, vmalloc if that fails) and left for caller to free. Lustre and NFS ->direct_IO() switched to it. Signed-off-by: Al Viro --- mm/iov_iter.c | 40 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) (limited to 'mm') diff --git a/mm/iov_iter.c b/mm/iov_iter.c index 0b677f8f9bad..a5c691c1a283 100644 --- a/mm/iov_iter.c +++ b/mm/iov_iter.c @@ -1,6 +1,8 @@ #include #include #include +#include +#include size_t copy_page_to_iter(struct page *page, size_t offset, size_t bytes, struct iov_iter *i) @@ -263,6 +265,44 @@ ssize_t iov_iter_get_pages(struct iov_iter *i, } EXPORT_SYMBOL(iov_iter_get_pages); +ssize_t iov_iter_get_pages_alloc(struct iov_iter *i, + struct page ***pages, size_t maxsize, + size_t *start) +{ + size_t offset = i->iov_offset; + const struct iovec *iov = i->iov; + size_t len; + unsigned long addr; + void *p; + int n; + int res; + + len = iov->iov_len - offset; + if (len > i->count) + len = i->count; + if (len > maxsize) + len = maxsize; + addr = (unsigned long)iov->iov_base + offset; + len += *start = addr & (PAGE_SIZE - 1); + addr &= ~(PAGE_SIZE - 1); + n = (len + PAGE_SIZE - 1) / PAGE_SIZE; + + p = kmalloc(n * sizeof(struct page *), GFP_KERNEL); + if (!p) + p = vmalloc(n * sizeof(struct page *)); + if (!p) + return -ENOMEM; + + res = get_user_pages_fast(addr, n, (i->type & WRITE) != WRITE, p); + if (unlikely(res < 0)) { + kvfree(p); + return res; + } + *pages = p; + return (res == n ? len : res * PAGE_SIZE) - *start; +} +EXPORT_SYMBOL(iov_iter_get_pages_alloc); + int iov_iter_npages(const struct iov_iter *i, int maxpages) { size_t offset = i->iov_offset; -- cgit v1.2.2 From 0c949334a9e2581646c6ff0d1470a805b1e5be99 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 22 Mar 2014 06:51:37 -0400 Subject: iov_iter_truncate() Now It Can Be Done(tm) - we don't need to do iov_shorten() in generic_file_direct_write() anymore, now that all ->direct_IO() instances are converted to proper iov_iter methods and honour iter->count and iter->iov_offset properly. Get rid of count/ocount arguments of generic_file_direct_write(), while we are at it. Signed-off-by: Al Viro --- mm/filemap.c | 19 +++++++------------ 1 file changed, 7 insertions(+), 12 deletions(-) (limited to 'mm') diff --git a/mm/filemap.c b/mm/filemap.c index 3aeaf2df4135..c0404b763a17 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -2345,8 +2345,7 @@ int pagecache_write_end(struct file *file, struct address_space *mapping, EXPORT_SYMBOL(pagecache_write_end); ssize_t -generic_file_direct_write(struct kiocb *iocb, struct iov_iter *from, - loff_t pos, size_t count, size_t ocount) +generic_file_direct_write(struct kiocb *iocb, struct iov_iter *from, loff_t pos) { struct file *file = iocb->ki_filp; struct address_space *mapping = file->f_mapping; @@ -2356,10 +2355,7 @@ generic_file_direct_write(struct kiocb *iocb, struct iov_iter *from, pgoff_t end; struct iov_iter data; - if (count != ocount) - from->nr_segs = iov_shorten((struct iovec *)from->iov, from->nr_segs, count); - - write_len = iov_length(from->iov, from->nr_segs); + write_len = iov_iter_count(from); end = (pos + write_len - 1) >> PAGE_CACHE_SHIFT; written = filemap_write_and_wait_range(mapping, pos, pos + write_len - 1); @@ -2568,7 +2564,6 @@ ssize_t __generic_file_aio_write(struct kiocb *iocb, const struct iovec *iov, { struct file *file = iocb->ki_filp; struct address_space * mapping = file->f_mapping; - size_t ocount; /* original count */ size_t count; /* after file limit checks */ struct inode *inode = mapping->host; loff_t pos = iocb->ki_pos; @@ -2577,7 +2572,8 @@ ssize_t __generic_file_aio_write(struct kiocb *iocb, const struct iovec *iov, ssize_t status; struct iov_iter from; - count = ocount = iov_length(iov, nr_segs); + count = iov_length(iov, nr_segs); + iov_iter_init(&from, WRITE, iov, nr_segs, count); /* We can write back this queue in page reclaim */ current->backing_dev_info = mapping->backing_dev_info; @@ -2588,6 +2584,8 @@ ssize_t __generic_file_aio_write(struct kiocb *iocb, const struct iovec *iov, if (count == 0) goto out; + iov_iter_truncate(&from, count); + err = file_remove_suid(file); if (err) goto out; @@ -2596,14 +2594,11 @@ ssize_t __generic_file_aio_write(struct kiocb *iocb, const struct iovec *iov, if (err) goto out; - iov_iter_init(&from, WRITE, iov, nr_segs, count); - /* coalesce the iovecs and go direct-to-BIO for O_DIRECT */ if (unlikely(file->f_flags & O_DIRECT)) { loff_t endbyte; - written = generic_file_direct_write(iocb, &from, pos, - count, ocount); + written = generic_file_direct_write(iocb, &from, pos); if (written < 0 || written == count) goto out; -- cgit v1.2.2 From 2ba5bbed0cd7429dbd567fa885ae3bc7a76de3d4 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 2 Apr 2014 20:00:02 -0400 Subject: shmem: switch to ->read_iter() Signed-off-by: Al Viro --- mm/shmem.c | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) (limited to 'mm') diff --git a/mm/shmem.c b/mm/shmem.c index e0b76696c3f9..edc6c7e817e9 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -1402,8 +1402,7 @@ shmem_write_end(struct file *file, struct address_space *mapping, return copied; } -static ssize_t shmem_file_aio_read(struct kiocb *iocb, - const struct iovec *iov, unsigned long nr_segs, loff_t pos) +static ssize_t shmem_file_read_iter(struct kiocb *iocb, struct iov_iter *to) { struct file *file = iocb->ki_filp; struct inode *inode = file_inode(file); @@ -1413,11 +1412,7 @@ static ssize_t shmem_file_aio_read(struct kiocb *iocb, enum sgp_type sgp = SGP_READ; int error = 0; ssize_t retval = 0; - size_t count = iov_length(iov, nr_segs); loff_t *ppos = &iocb->ki_pos; - struct iov_iter iter; - - iov_iter_init(&iter, READ, iov, nr_segs, count); /* * Might this read be for a stacking filesystem? Then when reading @@ -1493,14 +1488,14 @@ static ssize_t shmem_file_aio_read(struct kiocb *iocb, * Ok, we have the page, and it's up-to-date, so * now we can copy it to user space... */ - ret = copy_page_to_iter(page, offset, nr, &iter); + ret = copy_page_to_iter(page, offset, nr, to); retval += ret; offset += ret; index += offset >> PAGE_CACHE_SHIFT; offset &= ~PAGE_CACHE_MASK; page_cache_release(page); - if (!iov_iter_count(&iter)) + if (!iov_iter_count(to)) break; if (ret < nr) { error = -EFAULT; @@ -2622,9 +2617,9 @@ static const struct file_operations shmem_file_operations = { .mmap = shmem_mmap, #ifdef CONFIG_TMPFS .llseek = shmem_file_llseek, - .read = do_sync_read, + .read = new_sync_read, .write = do_sync_write, - .aio_read = shmem_file_aio_read, + .read_iter = shmem_file_read_iter, .aio_write = generic_file_aio_write, .fsync = noop_fsync, .splice_read = shmem_file_splice_read, -- cgit v1.2.2 From 8174202b34c30e0c07231bf63f18ab29af634f0b Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 3 Apr 2014 03:17:43 -0400 Subject: write_iter variants of {__,}generic_file_aio_write() Signed-off-by: Al Viro --- mm/filemap.c | 61 ++++++++++++++++++++++++++++++++++++------------------------ mm/shmem.c | 4 ++-- mm/vmscan.c | 2 +- 3 files changed, 40 insertions(+), 27 deletions(-) (limited to 'mm') diff --git a/mm/filemap.c b/mm/filemap.c index c0404b763a17..d2d9eeec8bf0 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -2542,10 +2542,9 @@ again: EXPORT_SYMBOL(generic_perform_write); /** - * __generic_file_aio_write - write data to a file + * __generic_file_write_iter - write data to a file * @iocb: IO state structure (file, offset, etc.) - * @iov: vector with data to write - * @nr_segs: number of segments in the vector + * @from: iov_iter with data to write * * This function does all the work needed for actually writing data to a * file. It does all basic checks, removes SUID from the file, updates @@ -2559,21 +2558,16 @@ EXPORT_SYMBOL(generic_perform_write); * A caller has to handle it. This is mainly due to the fact that we want to * avoid syncing under i_mutex. */ -ssize_t __generic_file_aio_write(struct kiocb *iocb, const struct iovec *iov, - unsigned long nr_segs) +ssize_t __generic_file_write_iter(struct kiocb *iocb, struct iov_iter *from) { struct file *file = iocb->ki_filp; struct address_space * mapping = file->f_mapping; - size_t count; /* after file limit checks */ struct inode *inode = mapping->host; loff_t pos = iocb->ki_pos; ssize_t written = 0; ssize_t err; ssize_t status; - struct iov_iter from; - - count = iov_length(iov, nr_segs); - iov_iter_init(&from, WRITE, iov, nr_segs, count); + size_t count = iov_iter_count(from); /* We can write back this queue in page reclaim */ current->backing_dev_info = mapping->backing_dev_info; @@ -2584,7 +2578,7 @@ ssize_t __generic_file_aio_write(struct kiocb *iocb, const struct iovec *iov, if (count == 0) goto out; - iov_iter_truncate(&from, count); + iov_iter_truncate(from, count); err = file_remove_suid(file); if (err) @@ -2598,7 +2592,7 @@ ssize_t __generic_file_aio_write(struct kiocb *iocb, const struct iovec *iov, if (unlikely(file->f_flags & O_DIRECT)) { loff_t endbyte; - written = generic_file_direct_write(iocb, &from, pos); + written = generic_file_direct_write(iocb, from, pos); if (written < 0 || written == count) goto out; @@ -2609,7 +2603,7 @@ ssize_t __generic_file_aio_write(struct kiocb *iocb, const struct iovec *iov, pos += written; count -= written; - status = generic_perform_write(file, &from, pos); + status = generic_perform_write(file, from, pos); /* * If generic_perform_write() returned a synchronous error * then we want to return the number of bytes which were @@ -2641,7 +2635,7 @@ ssize_t __generic_file_aio_write(struct kiocb *iocb, const struct iovec *iov, */ } } else { - written = generic_perform_write(file, &from, pos); + written = generic_perform_write(file, from, pos); if (likely(written >= 0)) iocb->ki_pos = pos + written; } @@ -2649,30 +2643,36 @@ out: current->backing_dev_info = NULL; return written ? written : err; } +EXPORT_SYMBOL(__generic_file_write_iter); + +ssize_t __generic_file_aio_write(struct kiocb *iocb, const struct iovec *iov, + unsigned long nr_segs) +{ + size_t count = iov_length(iov, nr_segs); + struct iov_iter from; + + iov_iter_init(&from, WRITE, iov, nr_segs, count); + return __generic_file_write_iter(iocb, &from); +} EXPORT_SYMBOL(__generic_file_aio_write); /** - * generic_file_aio_write - write data to a file + * generic_file_write_iter - write data to a file * @iocb: IO state structure - * @iov: vector with data to write - * @nr_segs: number of segments in the vector - * @pos: position in file where to write + * @from: iov_iter with data to write * - * This is a wrapper around __generic_file_aio_write() to be used by most + * This is a wrapper around __generic_file_write_iter() to be used by most * filesystems. It takes care of syncing the file in case of O_SYNC file * and acquires i_mutex as needed. */ -ssize_t generic_file_aio_write(struct kiocb *iocb, const struct iovec *iov, - unsigned long nr_segs, loff_t pos) +ssize_t generic_file_write_iter(struct kiocb *iocb, struct iov_iter *from) { struct file *file = iocb->ki_filp; struct inode *inode = file->f_mapping->host; ssize_t ret; - BUG_ON(iocb->ki_pos != pos); - mutex_lock(&inode->i_mutex); - ret = __generic_file_aio_write(iocb, iov, nr_segs); + ret = __generic_file_write_iter(iocb, from); mutex_unlock(&inode->i_mutex); if (ret > 0) { @@ -2684,6 +2684,19 @@ ssize_t generic_file_aio_write(struct kiocb *iocb, const struct iovec *iov, } return ret; } +EXPORT_SYMBOL(generic_file_write_iter); + +ssize_t generic_file_aio_write(struct kiocb *iocb, const struct iovec *iov, + unsigned long nr_segs, loff_t pos) +{ + size_t count = iov_length(iov, nr_segs); + struct iov_iter from; + + BUG_ON(iocb->ki_pos != pos); + + iov_iter_init(&from, WRITE, iov, nr_segs, count); + return generic_file_write_iter(iocb, &from); +} EXPORT_SYMBOL(generic_file_aio_write); /** diff --git a/mm/shmem.c b/mm/shmem.c index edc6c7e817e9..d3e5c6fc313c 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -2618,9 +2618,9 @@ static const struct file_operations shmem_file_operations = { #ifdef CONFIG_TMPFS .llseek = shmem_file_llseek, .read = new_sync_read, - .write = do_sync_write, + .write = new_sync_write, .read_iter = shmem_file_read_iter, - .aio_write = generic_file_aio_write, + .write_iter = generic_file_write_iter, .fsync = noop_fsync, .splice_read = shmem_file_splice_read, .splice_write = generic_file_splice_write, diff --git a/mm/vmscan.c b/mm/vmscan.c index 32c661d66a45..9c2dba6ac685 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -458,7 +458,7 @@ static pageout_t pageout(struct page *page, struct address_space *mapping, * stalls if we need to run get_block(). We could test * PagePrivate for that. * - * If this process is currently in __generic_file_aio_write() against + * If this process is currently in __generic_file_write_iter() against * this page's queue, we can perform writeback even if that * will block. * -- cgit v1.2.2 From a8f3550cd228b6edc5d17fce1a9af8cc7004f185 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 3 Apr 2014 03:32:25 -0400 Subject: bury __generic_file_aio_write() all users converted to __generic_file_write_iter() now Signed-off-by: Al Viro --- mm/filemap.c | 11 ----------- 1 file changed, 11 deletions(-) (limited to 'mm') diff --git a/mm/filemap.c b/mm/filemap.c index d2d9eeec8bf0..7dcdb9db710d 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -2645,17 +2645,6 @@ out: } EXPORT_SYMBOL(__generic_file_write_iter); -ssize_t __generic_file_aio_write(struct kiocb *iocb, const struct iovec *iov, - unsigned long nr_segs) -{ - size_t count = iov_length(iov, nr_segs); - struct iov_iter from; - - iov_iter_init(&from, WRITE, iov, nr_segs, count); - return __generic_file_write_iter(iocb, &from); -} -EXPORT_SYMBOL(__generic_file_aio_write); - /** * generic_file_write_iter - write data to a file * @iocb: IO state structure -- cgit v1.2.2 From f0d1bec9d58d4c038d0ac958c9af82be6eb18045 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 3 Apr 2014 15:05:18 -0400 Subject: new helper: copy_page_from_iter() parallel to copy_page_to_iter(). pipe_write() switched to it (and became ->write_iter()). Signed-off-by: Al Viro --- mm/iov_iter.c | 78 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 78 insertions(+) (limited to 'mm') diff --git a/mm/iov_iter.c b/mm/iov_iter.c index a5c691c1a283..081e3273085b 100644 --- a/mm/iov_iter.c +++ b/mm/iov_iter.c @@ -82,6 +82,84 @@ done: } EXPORT_SYMBOL(copy_page_to_iter); +size_t copy_page_from_iter(struct page *page, size_t offset, size_t bytes, + struct iov_iter *i) +{ + size_t skip, copy, left, wanted; + const struct iovec *iov; + char __user *buf; + void *kaddr, *to; + + if (unlikely(bytes > i->count)) + bytes = i->count; + + if (unlikely(!bytes)) + return 0; + + wanted = bytes; + iov = i->iov; + skip = i->iov_offset; + buf = iov->iov_base + skip; + copy = min(bytes, iov->iov_len - skip); + + if (!fault_in_pages_readable(buf, copy)) { + kaddr = kmap_atomic(page); + to = kaddr + offset; + + /* first chunk, usually the only one */ + left = __copy_from_user_inatomic(to, buf, copy); + copy -= left; + skip += copy; + to += copy; + bytes -= copy; + + while (unlikely(!left && bytes)) { + iov++; + buf = iov->iov_base; + copy = min(bytes, iov->iov_len); + left = __copy_from_user_inatomic(to, buf, copy); + copy -= left; + skip = copy; + to += copy; + bytes -= copy; + } + if (likely(!bytes)) { + kunmap_atomic(kaddr); + goto done; + } + offset = to - kaddr; + buf += copy; + kunmap_atomic(kaddr); + copy = min(bytes, iov->iov_len - skip); + } + /* Too bad - revert to non-atomic kmap */ + kaddr = kmap(page); + to = kaddr + offset; + left = __copy_from_user(to, buf, copy); + copy -= left; + skip += copy; + to += copy; + bytes -= copy; + while (unlikely(!left && bytes)) { + iov++; + buf = iov->iov_base; + copy = min(bytes, iov->iov_len); + left = __copy_from_user(to, buf, copy); + copy -= left; + skip = copy; + to += copy; + bytes -= copy; + } + kunmap(page); +done: + i->count -= wanted - bytes; + i->nr_segs -= iov - i->iov; + i->iov = iov; + i->iov_offset = skip; + return wanted - bytes; +} +EXPORT_SYMBOL(copy_page_from_iter); + static size_t __iovec_copy_from_user_inatomic(char *vaddr, const struct iovec *iov, size_t base, size_t bytes) { -- cgit v1.2.2 From 6abd232274fd652e4a57f486d14e52ffee6f72e9 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 4 Apr 2014 14:20:57 -0400 Subject: bury generic_file_aio_{read,write} no callers left Signed-off-by: Al Viro --- mm/filemap.c | 43 ++++++++----------------------------------- 1 file changed, 8 insertions(+), 35 deletions(-) (limited to 'mm') diff --git a/mm/filemap.c b/mm/filemap.c index 7dcdb9db710d..2f724e3cdf24 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -1663,6 +1663,14 @@ out: return written ? written : error; } +/** + * generic_file_read_iter - generic filesystem read routine + * @iocb: kernel I/O control block + * @iter: destination for the data read + * + * This is the "read_iter()" routine for all filesystems + * that can use the page cache directly. + */ ssize_t generic_file_read_iter(struct kiocb *iocb, struct iov_iter *iter) { @@ -1713,28 +1721,6 @@ out: } EXPORT_SYMBOL(generic_file_read_iter); -/** - * generic_file_aio_read - generic filesystem read routine - * @iocb: kernel I/O control block - * @iov: io vector request - * @nr_segs: number of segments in the iovec - * @pos: current file position - * - * This is the "read()" routine for all filesystems - * that can use the page cache directly. - */ -ssize_t -generic_file_aio_read(struct kiocb *iocb, const struct iovec *iov, - unsigned long nr_segs, loff_t pos) -{ - size_t count = iov_length(iov, nr_segs); - struct iov_iter i; - - iov_iter_init(&i, READ, iov, nr_segs, count); - return generic_file_read_iter(iocb, &i); -} -EXPORT_SYMBOL(generic_file_aio_read); - #ifdef CONFIG_MMU /** * page_cache_read - adds requested page to the page cache if not already there @@ -2675,19 +2661,6 @@ ssize_t generic_file_write_iter(struct kiocb *iocb, struct iov_iter *from) } EXPORT_SYMBOL(generic_file_write_iter); -ssize_t generic_file_aio_write(struct kiocb *iocb, const struct iovec *iov, - unsigned long nr_segs, loff_t pos) -{ - size_t count = iov_length(iov, nr_segs); - struct iov_iter from; - - BUG_ON(iocb->ki_pos != pos); - - iov_iter_init(&from, WRITE, iov, nr_segs, count); - return generic_file_write_iter(iocb, &from); -} -EXPORT_SYMBOL(generic_file_aio_write); - /** * try_to_release_page() - release old fs-specific metadata on a page * -- cgit v1.2.2 From 81055e584f9d743cb13dc7944923d817c20f089d Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 4 Apr 2014 19:23:46 -0400 Subject: optimize copy_page_{to,from}_iter() if we'd ended up in the end of a segment, jump to the beginning of the next one (iov_offset = 0, iov++), rather than having the next primitive deal with that. Ought to be folded back... Signed-off-by: Al Viro --- mm/iov_iter.c | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'mm') diff --git a/mm/iov_iter.c b/mm/iov_iter.c index 081e3273085b..fcdaaab438b6 100644 --- a/mm/iov_iter.c +++ b/mm/iov_iter.c @@ -74,6 +74,10 @@ size_t copy_page_to_iter(struct page *page, size_t offset, size_t bytes, } kunmap(page); done: + if (skip == iov->iov_len) { + iov++; + skip = 0; + } i->count -= wanted - bytes; i->nr_segs -= iov - i->iov; i->iov = iov; @@ -152,6 +156,10 @@ size_t copy_page_from_iter(struct page *page, size_t offset, size_t bytes, } kunmap(page); done: + if (skip == iov->iov_len) { + iov++; + skip = 0; + } i->count -= wanted - bytes; i->nr_segs -= iov - i->iov; i->iov = iov; -- cgit v1.2.2 From 62a8067a7f35dba2de501c9cb00e4cf36da90bc0 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 4 Apr 2014 23:12:29 -0400 Subject: bio_vec-backed iov_iter New variant of iov_iter - ITER_BVEC in iter->type, backed with bio_vec array instead of iovec one. Primitives taught to deal with such beasts, __swap_write() switched to using that kind of iov_iter. Note that bio_vec is just a triple - there's nothing block-specific about it. I've left the definition where it was, but took it from under ifdef CONFIG_BLOCK. Next target: ->splice_write()... Signed-off-by: Al Viro --- mm/iov_iter.c | 390 +++++++++++++++++++++++++++++++++++++++++++++++++++++----- mm/page_io.c | 19 +-- 2 files changed, 370 insertions(+), 39 deletions(-) (limited to 'mm') diff --git a/mm/iov_iter.c b/mm/iov_iter.c index fcdaaab438b6..7b5dbd1517b5 100644 --- a/mm/iov_iter.c +++ b/mm/iov_iter.c @@ -4,7 +4,7 @@ #include #include -size_t copy_page_to_iter(struct page *page, size_t offset, size_t bytes, +static size_t copy_page_to_iter_iovec(struct page *page, size_t offset, size_t bytes, struct iov_iter *i) { size_t skip, copy, left, wanted; @@ -84,9 +84,8 @@ done: i->iov_offset = skip; return wanted - bytes; } -EXPORT_SYMBOL(copy_page_to_iter); -size_t copy_page_from_iter(struct page *page, size_t offset, size_t bytes, +static size_t copy_page_from_iter_iovec(struct page *page, size_t offset, size_t bytes, struct iov_iter *i) { size_t skip, copy, left, wanted; @@ -166,7 +165,6 @@ done: i->iov_offset = skip; return wanted - bytes; } -EXPORT_SYMBOL(copy_page_from_iter); static size_t __iovec_copy_from_user_inatomic(char *vaddr, const struct iovec *iov, size_t base, size_t bytes) @@ -195,7 +193,7 @@ static size_t __iovec_copy_from_user_inatomic(char *vaddr, * were successfully copied. If a fault is encountered then return the number of * bytes which were copied. */ -size_t iov_iter_copy_from_user_atomic(struct page *page, +static size_t copy_from_user_atomic_iovec(struct page *page, struct iov_iter *i, unsigned long offset, size_t bytes) { char *kaddr; @@ -215,9 +213,8 @@ size_t iov_iter_copy_from_user_atomic(struct page *page, return copied; } -EXPORT_SYMBOL(iov_iter_copy_from_user_atomic); -void iov_iter_advance(struct iov_iter *i, size_t bytes) +static void advance_iovec(struct iov_iter *i, size_t bytes) { BUG_ON(i->count < bytes); @@ -252,7 +249,6 @@ void iov_iter_advance(struct iov_iter *i, size_t bytes) i->nr_segs = nr_segs; } } -EXPORT_SYMBOL(iov_iter_advance); /* * Fault in the first iovec of the given iov_iter, to a maximum length @@ -265,26 +261,16 @@ EXPORT_SYMBOL(iov_iter_advance); */ int iov_iter_fault_in_readable(struct iov_iter *i, size_t bytes) { - char __user *buf = i->iov->iov_base + i->iov_offset; - bytes = min(bytes, i->iov->iov_len - i->iov_offset); - return fault_in_pages_readable(buf, bytes); + if (!(i->type & ITER_BVEC)) { + char __user *buf = i->iov->iov_base + i->iov_offset; + bytes = min(bytes, i->iov->iov_len - i->iov_offset); + return fault_in_pages_readable(buf, bytes); + } + return 0; } EXPORT_SYMBOL(iov_iter_fault_in_readable); -/* - * Return the count of just the current iov_iter segment. - */ -size_t iov_iter_single_seg_count(const struct iov_iter *i) -{ - const struct iovec *iov = i->iov; - if (i->nr_segs == 1) - return i->count; - else - return min(i->count, iov->iov_len - i->iov_offset); -} -EXPORT_SYMBOL(iov_iter_single_seg_count); - -unsigned long iov_iter_alignment(const struct iov_iter *i) +static unsigned long alignment_iovec(const struct iov_iter *i) { const struct iovec *iov = i->iov; unsigned long res; @@ -307,7 +293,6 @@ unsigned long iov_iter_alignment(const struct iov_iter *i) res |= (unsigned long)iov->iov_base | size; return res; } -EXPORT_SYMBOL(iov_iter_alignment); void iov_iter_init(struct iov_iter *i, int direction, const struct iovec *iov, unsigned long nr_segs, @@ -315,7 +300,7 @@ void iov_iter_init(struct iov_iter *i, int direction, { /* It will get better. Eventually... */ if (segment_eq(get_fs(), KERNEL_DS)) - direction |= REQ_KERNEL; + direction |= ITER_KVEC; i->type = direction; i->iov = iov; i->nr_segs = nr_segs; @@ -324,7 +309,7 @@ void iov_iter_init(struct iov_iter *i, int direction, } EXPORT_SYMBOL(iov_iter_init); -ssize_t iov_iter_get_pages(struct iov_iter *i, +static ssize_t get_pages_iovec(struct iov_iter *i, struct page **pages, size_t maxsize, size_t *start) { @@ -349,9 +334,8 @@ ssize_t iov_iter_get_pages(struct iov_iter *i, return res; return (res == n ? len : res * PAGE_SIZE) - *start; } -EXPORT_SYMBOL(iov_iter_get_pages); -ssize_t iov_iter_get_pages_alloc(struct iov_iter *i, +static ssize_t get_pages_alloc_iovec(struct iov_iter *i, struct page ***pages, size_t maxsize, size_t *start) { @@ -387,9 +371,8 @@ ssize_t iov_iter_get_pages_alloc(struct iov_iter *i, *pages = p; return (res == n ? len : res * PAGE_SIZE) - *start; } -EXPORT_SYMBOL(iov_iter_get_pages_alloc); -int iov_iter_npages(const struct iov_iter *i, int maxpages) +static int iov_iter_npages_iovec(const struct iov_iter *i, int maxpages) { size_t offset = i->iov_offset; size_t size = i->count; @@ -414,4 +397,347 @@ int iov_iter_npages(const struct iov_iter *i, int maxpages) } return min(npages, maxpages); } + +static void memcpy_from_page(char *to, struct page *page, size_t offset, size_t len) +{ + char *from = kmap_atomic(page); + memcpy(to, from + offset, len); + kunmap_atomic(from); +} + +static void memcpy_to_page(struct page *page, size_t offset, char *from, size_t len) +{ + char *to = kmap_atomic(page); + memcpy(to + offset, from, len); + kunmap_atomic(to); +} + +static size_t copy_page_to_iter_bvec(struct page *page, size_t offset, size_t bytes, + struct iov_iter *i) +{ + size_t skip, copy, wanted; + const struct bio_vec *bvec; + void *kaddr, *from; + + if (unlikely(bytes > i->count)) + bytes = i->count; + + if (unlikely(!bytes)) + return 0; + + wanted = bytes; + bvec = i->bvec; + skip = i->iov_offset; + copy = min_t(size_t, bytes, bvec->bv_len - skip); + + kaddr = kmap_atomic(page); + from = kaddr + offset; + memcpy_to_page(bvec->bv_page, skip + bvec->bv_offset, from, copy); + skip += copy; + from += copy; + bytes -= copy; + while (bytes) { + bvec++; + copy = min(bytes, (size_t)bvec->bv_len); + memcpy_to_page(bvec->bv_page, bvec->bv_offset, from, copy); + skip = copy; + from += copy; + bytes -= copy; + } + kunmap_atomic(kaddr); + if (skip == bvec->bv_len) { + bvec++; + skip = 0; + } + i->count -= wanted - bytes; + i->nr_segs -= bvec - i->bvec; + i->bvec = bvec; + i->iov_offset = skip; + return wanted - bytes; +} + +static size_t copy_page_from_iter_bvec(struct page *page, size_t offset, size_t bytes, + struct iov_iter *i) +{ + size_t skip, copy, wanted; + const struct bio_vec *bvec; + void *kaddr, *to; + + if (unlikely(bytes > i->count)) + bytes = i->count; + + if (unlikely(!bytes)) + return 0; + + wanted = bytes; + bvec = i->bvec; + skip = i->iov_offset; + + kaddr = kmap_atomic(page); + + to = kaddr + offset; + + copy = min(bytes, bvec->bv_len - skip); + + memcpy_from_page(to, bvec->bv_page, bvec->bv_offset + skip, copy); + + to += copy; + skip += copy; + bytes -= copy; + + while (bytes) { + bvec++; + copy = min(bytes, (size_t)bvec->bv_len); + memcpy_from_page(to, bvec->bv_page, bvec->bv_offset, copy); + skip = copy; + to += copy; + bytes -= copy; + } + kunmap_atomic(kaddr); + if (skip == bvec->bv_len) { + bvec++; + skip = 0; + } + i->count -= wanted; + i->nr_segs -= bvec - i->bvec; + i->bvec = bvec; + i->iov_offset = skip; + return wanted; +} + +static size_t copy_from_user_bvec(struct page *page, + struct iov_iter *i, unsigned long offset, size_t bytes) +{ + char *kaddr; + size_t left; + const struct bio_vec *bvec; + size_t base = i->iov_offset; + + kaddr = kmap_atomic(page); + for (left = bytes, bvec = i->bvec; left; bvec++, base = 0) { + size_t copy = min(left, bvec->bv_len - base); + if (!bvec->bv_len) + continue; + memcpy_from_page(kaddr + offset, bvec->bv_page, + bvec->bv_offset + base, copy); + offset += copy; + left -= copy; + } + kunmap_atomic(kaddr); + return bytes; +} + +static void advance_bvec(struct iov_iter *i, size_t bytes) +{ + BUG_ON(i->count < bytes); + + if (likely(i->nr_segs == 1)) { + i->iov_offset += bytes; + i->count -= bytes; + } else { + const struct bio_vec *bvec = i->bvec; + size_t base = i->iov_offset; + unsigned long nr_segs = i->nr_segs; + + /* + * The !iov->iov_len check ensures we skip over unlikely + * zero-length segments (without overruning the iovec). + */ + while (bytes || unlikely(i->count && !bvec->bv_len)) { + int copy; + + copy = min(bytes, bvec->bv_len - base); + BUG_ON(!i->count || i->count < copy); + i->count -= copy; + bytes -= copy; + base += copy; + if (bvec->bv_len == base) { + bvec++; + nr_segs--; + base = 0; + } + } + i->bvec = bvec; + i->iov_offset = base; + i->nr_segs = nr_segs; + } +} + +static unsigned long alignment_bvec(const struct iov_iter *i) +{ + const struct bio_vec *bvec = i->bvec; + unsigned long res; + size_t size = i->count; + size_t n; + + if (!size) + return 0; + + res = bvec->bv_offset + i->iov_offset; + n = bvec->bv_len - i->iov_offset; + if (n >= size) + return res | size; + size -= n; + res |= n; + while (size > (++bvec)->bv_len) { + res |= bvec->bv_offset | bvec->bv_len; + size -= bvec->bv_len; + } + res |= bvec->bv_offset | size; + return res; +} + +static ssize_t get_pages_bvec(struct iov_iter *i, + struct page **pages, size_t maxsize, + size_t *start) +{ + const struct bio_vec *bvec = i->bvec; + size_t len = bvec->bv_len - i->iov_offset; + if (len > i->count) + len = i->count; + if (len > maxsize) + len = maxsize; + *start = bvec->bv_offset + i->iov_offset; + + get_page(*pages = bvec->bv_page); + + return len; +} + +static ssize_t get_pages_alloc_bvec(struct iov_iter *i, + struct page ***pages, size_t maxsize, + size_t *start) +{ + const struct bio_vec *bvec = i->bvec; + size_t len = bvec->bv_len - i->iov_offset; + if (len > i->count) + len = i->count; + if (len > maxsize) + len = maxsize; + *start = bvec->bv_offset + i->iov_offset; + + *pages = kmalloc(sizeof(struct page *), GFP_KERNEL); + if (!*pages) + return -ENOMEM; + + get_page(**pages = bvec->bv_page); + + return len; +} + +static int iov_iter_npages_bvec(const struct iov_iter *i, int maxpages) +{ + size_t offset = i->iov_offset; + size_t size = i->count; + const struct bio_vec *bvec = i->bvec; + int npages = 0; + int n; + + for (n = 0; size && n < i->nr_segs; n++, bvec++) { + size_t len = bvec->bv_len - offset; + offset = 0; + if (unlikely(!len)) /* empty segment */ + continue; + if (len > size) + len = size; + npages++; + if (npages >= maxpages) /* don't bother going further */ + return maxpages; + size -= len; + offset = 0; + } + return min(npages, maxpages); +} + +size_t copy_page_to_iter(struct page *page, size_t offset, size_t bytes, + struct iov_iter *i) +{ + if (i->type & ITER_BVEC) + return copy_page_to_iter_bvec(page, offset, bytes, i); + else + return copy_page_to_iter_iovec(page, offset, bytes, i); +} +EXPORT_SYMBOL(copy_page_to_iter); + +size_t copy_page_from_iter(struct page *page, size_t offset, size_t bytes, + struct iov_iter *i) +{ + if (i->type & ITER_BVEC) + return copy_page_from_iter_bvec(page, offset, bytes, i); + else + return copy_page_from_iter_iovec(page, offset, bytes, i); +} +EXPORT_SYMBOL(copy_page_from_iter); + +size_t iov_iter_copy_from_user_atomic(struct page *page, + struct iov_iter *i, unsigned long offset, size_t bytes) +{ + if (i->type & ITER_BVEC) + return copy_from_user_bvec(page, i, offset, bytes); + else + return copy_from_user_atomic_iovec(page, i, offset, bytes); +} +EXPORT_SYMBOL(iov_iter_copy_from_user_atomic); + +void iov_iter_advance(struct iov_iter *i, size_t size) +{ + if (i->type & ITER_BVEC) + advance_bvec(i, size); + else + advance_iovec(i, size); +} +EXPORT_SYMBOL(iov_iter_advance); + +/* + * Return the count of just the current iov_iter segment. + */ +size_t iov_iter_single_seg_count(const struct iov_iter *i) +{ + if (i->nr_segs == 1) + return i->count; + else if (i->type & ITER_BVEC) + return min(i->count, i->iov->iov_len - i->iov_offset); + else + return min(i->count, i->bvec->bv_len - i->iov_offset); +} +EXPORT_SYMBOL(iov_iter_single_seg_count); + +unsigned long iov_iter_alignment(const struct iov_iter *i) +{ + if (i->type & ITER_BVEC) + return alignment_bvec(i); + else + return alignment_iovec(i); +} +EXPORT_SYMBOL(iov_iter_alignment); + +ssize_t iov_iter_get_pages(struct iov_iter *i, + struct page **pages, size_t maxsize, + size_t *start) +{ + if (i->type & ITER_BVEC) + return get_pages_bvec(i, pages, maxsize, start); + else + return get_pages_iovec(i, pages, maxsize, start); +} +EXPORT_SYMBOL(iov_iter_get_pages); + +ssize_t iov_iter_get_pages_alloc(struct iov_iter *i, + struct page ***pages, size_t maxsize, + size_t *start) +{ + if (i->type & ITER_BVEC) + return get_pages_alloc_bvec(i, pages, maxsize, start); + else + return get_pages_alloc_iovec(i, pages, maxsize, start); +} +EXPORT_SYMBOL(iov_iter_get_pages_alloc); + +int iov_iter_npages(const struct iov_iter *i, int maxpages) +{ + if (i->type & ITER_BVEC) + return iov_iter_npages_bvec(i, maxpages); + else + return iov_iter_npages_iovec(i, maxpages); +} EXPORT_SYMBOL(iov_iter_npages); diff --git a/mm/page_io.c b/mm/page_io.c index 313bfedb75d1..33bb38c4aad7 100644 --- a/mm/page_io.c +++ b/mm/page_io.c @@ -259,23 +259,28 @@ int __swap_writepage(struct page *page, struct writeback_control *wbc, struct kiocb kiocb; struct file *swap_file = sis->swap_file; struct address_space *mapping = swap_file->f_mapping; - struct iovec iov = { - .iov_base = kmap(page), - .iov_len = PAGE_SIZE, + struct bio_vec bv = { + .bv_page = page, + .bv_len = PAGE_SIZE, + .bv_offset = 0 + }; + struct iov_iter from = { + .type = ITER_BVEC | WRITE, + .count = PAGE_SIZE, + .iov_offset = 0, + .nr_segs = 1, + .bvec = &bv }; - struct iov_iter from; init_sync_kiocb(&kiocb, swap_file); kiocb.ki_pos = page_file_offset(page); kiocb.ki_nbytes = PAGE_SIZE; - iov_iter_init(&from, KERNEL_WRITE, &iov, 1, PAGE_SIZE); set_page_writeback(page); unlock_page(page); - ret = mapping->a_ops->direct_IO(KERNEL_WRITE, + ret = mapping->a_ops->direct_IO(ITER_BVEC | WRITE, &kiocb, &from, kiocb.ki_pos); - kunmap(page); if (ret == PAGE_SIZE) { count_vm_event(PSWPOUT); ret = 0; -- cgit v1.2.2 From f6cb85d00e1eb2fc3bf27ffcd0acc9d519512bb0 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 5 Apr 2014 04:38:56 -0400 Subject: shmem: switch to iter_file_splice_write() Signed-off-by: Al Viro --- mm/shmem.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'mm') diff --git a/mm/shmem.c b/mm/shmem.c index d3e5c6fc313c..de834ab8b6b9 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -2623,7 +2623,7 @@ static const struct file_operations shmem_file_operations = { .write_iter = generic_file_write_iter, .fsync = noop_fsync, .splice_read = shmem_file_splice_read, - .splice_write = generic_file_splice_write, + .splice_write = iter_file_splice_write, .fallocate = shmem_fallocate, #endif }; -- cgit v1.2.2