aboutsummaryrefslogtreecommitdiffstats
path: root/mm
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2014-04-12 17:49:50 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2014-04-12 17:49:50 -0400
commit5166701b368caea89d57b14bf41cf39e819dad51 (patch)
treec73b9d4860809e3afa9359be9d03ba2d8d98a18e /mm
parent0a7418f5f569512e98789c439198eed4b507cce3 (diff)
parenta786c06d9f2719203c00b3d97b21f9a96980d0b5 (diff)
Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs
Pull vfs updates from Al Viro: "The first vfs pile, with deep apologies for being very late in this window. Assorted cleanups and fixes, plus a large preparatory part of iov_iter work. There's a lot more of that, but it'll probably go into the next merge window - it *does* shape up nicely, removes a lot of boilerplate, gets rid of locking inconsistencie between aio_write and splice_write and I hope to get Kent's direct-io rewrite merged into the same queue, but some of the stuff after this point is having (mostly trivial) conflicts with the things already merged into mainline and with some I want more testing. This one passes LTP and xfstests without regressions, in addition to usual beating. BTW, readahead02 in ltp syscalls testsuite has started giving failures since "mm/readahead.c: fix readahead failure for memoryless NUMA nodes and limit readahead pages" - might be a false positive, might be a real regression..." * 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs: (63 commits) missing bits of "splice: fix racy pipe->buffers uses" cifs: fix the race in cifs_writev() ceph_sync_{,direct_}write: fix an oops on ceph_osdc_new_request() failure kill generic_file_buffered_write() ocfs2_file_aio_write(): switch to generic_perform_write() ceph_aio_write(): switch to generic_perform_write() xfs_file_buffered_aio_write(): switch to generic_perform_write() export generic_perform_write(), start getting rid of generic_file_buffer_write() generic_file_direct_write(): get rid of ppos argument btrfs_file_aio_write(): get rid of ppos kill the 5th argument of generic_file_buffered_write() kill the 4th argument of __generic_file_aio_write() lustre: don't open-code kernel_recvmsg() ocfs2: don't open-code kernel_recvmsg() drbd: don't open-code kernel_recvmsg() constify blk_rq_map_user_iov() and friends lustre: switch to kernel_sendmsg() ocfs2: don't open-code kernel_sendmsg() take iov_iter stuff to mm/iov_iter.c process_vm_access: tidy up a bit ...
Diffstat (limited to 'mm')
-rw-r--r--mm/Makefile3
-rw-r--r--mm/filemap.c344
-rw-r--r--mm/iov_iter.c224
-rw-r--r--mm/process_vm_access.c250
-rw-r--r--mm/shmem.c79
5 files changed, 389 insertions, 511 deletions
diff --git a/mm/Makefile b/mm/Makefile
index 9e5aaf92197d..b484452dac57 100644
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -17,7 +17,8 @@ obj-y := filemap.o mempool.o oom_kill.o fadvise.o \
17 util.o mmzone.o vmstat.o backing-dev.o \ 17 util.o mmzone.o vmstat.o backing-dev.o \
18 mm_init.o mmu_context.o percpu.o slab_common.o \ 18 mm_init.o mmu_context.o percpu.o slab_common.o \
19 compaction.o balloon_compaction.o vmacache.o \ 19 compaction.o balloon_compaction.o vmacache.o \
20 interval_tree.o list_lru.o workingset.o $(mmu-y) 20 interval_tree.o list_lru.o workingset.o \
21 iov_iter.o $(mmu-y)
21 22
22obj-y += init-mm.o 23obj-y += init-mm.o
23 24
diff --git a/mm/filemap.c b/mm/filemap.c
index 27ebc0c9571b..a82fbe4c9e8e 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -77,7 +77,7 @@
77 * ->mmap_sem 77 * ->mmap_sem
78 * ->lock_page (access_process_vm) 78 * ->lock_page (access_process_vm)
79 * 79 *
80 * ->i_mutex (generic_file_buffered_write) 80 * ->i_mutex (generic_perform_write)
81 * ->mmap_sem (fault_in_pages_readable->do_page_fault) 81 * ->mmap_sem (fault_in_pages_readable->do_page_fault)
82 * 82 *
83 * bdi->wb.list_lock 83 * bdi->wb.list_lock
@@ -1428,7 +1428,8 @@ static void shrink_readahead_size_eio(struct file *filp,
1428 * do_generic_file_read - generic file read routine 1428 * do_generic_file_read - generic file read routine
1429 * @filp: the file to read 1429 * @filp: the file to read
1430 * @ppos: current file position 1430 * @ppos: current file position
1431 * @desc: read_descriptor 1431 * @iter: data destination
1432 * @written: already copied
1432 * 1433 *
1433 * This is a generic file read routine, and uses the 1434 * This is a generic file read routine, and uses the
1434 * mapping->a_ops->readpage() function for the actual low-level stuff. 1435 * mapping->a_ops->readpage() function for the actual low-level stuff.
@@ -1436,8 +1437,8 @@ static void shrink_readahead_size_eio(struct file *filp,
1436 * This is really ugly. But the goto's actually try to clarify some 1437 * This is really ugly. But the goto's actually try to clarify some
1437 * of the logic when it comes to error handling etc. 1438 * of the logic when it comes to error handling etc.
1438 */ 1439 */
1439static void do_generic_file_read(struct file *filp, loff_t *ppos, 1440static ssize_t do_generic_file_read(struct file *filp, loff_t *ppos,
1440 read_descriptor_t *desc) 1441 struct iov_iter *iter, ssize_t written)
1441{ 1442{
1442 struct address_space *mapping = filp->f_mapping; 1443 struct address_space *mapping = filp->f_mapping;
1443 struct inode *inode = mapping->host; 1444 struct inode *inode = mapping->host;
@@ -1447,12 +1448,12 @@ static void do_generic_file_read(struct file *filp, loff_t *ppos,
1447 pgoff_t prev_index; 1448 pgoff_t prev_index;
1448 unsigned long offset; /* offset into pagecache page */ 1449 unsigned long offset; /* offset into pagecache page */
1449 unsigned int prev_offset; 1450 unsigned int prev_offset;
1450 int error; 1451 int error = 0;
1451 1452
1452 index = *ppos >> PAGE_CACHE_SHIFT; 1453 index = *ppos >> PAGE_CACHE_SHIFT;
1453 prev_index = ra->prev_pos >> PAGE_CACHE_SHIFT; 1454 prev_index = ra->prev_pos >> PAGE_CACHE_SHIFT;
1454 prev_offset = ra->prev_pos & (PAGE_CACHE_SIZE-1); 1455 prev_offset = ra->prev_pos & (PAGE_CACHE_SIZE-1);
1455 last_index = (*ppos + desc->count + PAGE_CACHE_SIZE-1) >> PAGE_CACHE_SHIFT; 1456 last_index = (*ppos + iter->count + PAGE_CACHE_SIZE-1) >> PAGE_CACHE_SHIFT;
1456 offset = *ppos & ~PAGE_CACHE_MASK; 1457 offset = *ppos & ~PAGE_CACHE_MASK;
1457 1458
1458 for (;;) { 1459 for (;;) {
@@ -1487,7 +1488,7 @@ find_page:
1487 if (!page->mapping) 1488 if (!page->mapping)
1488 goto page_not_up_to_date_locked; 1489 goto page_not_up_to_date_locked;
1489 if (!mapping->a_ops->is_partially_uptodate(page, 1490 if (!mapping->a_ops->is_partially_uptodate(page,
1490 desc, offset)) 1491 offset, iter->count))
1491 goto page_not_up_to_date_locked; 1492 goto page_not_up_to_date_locked;
1492 unlock_page(page); 1493 unlock_page(page);
1493 } 1494 }
@@ -1537,24 +1538,23 @@ page_ok:
1537 /* 1538 /*
1538 * Ok, we have the page, and it's up-to-date, so 1539 * Ok, we have the page, and it's up-to-date, so
1539 * now we can copy it to user space... 1540 * now we can copy it to user space...
1540 *
1541 * The file_read_actor routine returns how many bytes were
1542 * actually used..
1543 * NOTE! This may not be the same as how much of a user buffer
1544 * we filled up (we may be padding etc), so we can only update
1545 * "pos" here (the actor routine has to update the user buffer
1546 * pointers and the remaining count).
1547 */ 1541 */
1548 ret = file_read_actor(desc, page, offset, nr); 1542
1543 ret = copy_page_to_iter(page, offset, nr, iter);
1549 offset += ret; 1544 offset += ret;
1550 index += offset >> PAGE_CACHE_SHIFT; 1545 index += offset >> PAGE_CACHE_SHIFT;
1551 offset &= ~PAGE_CACHE_MASK; 1546 offset &= ~PAGE_CACHE_MASK;
1552 prev_offset = offset; 1547 prev_offset = offset;
1553 1548
1554 page_cache_release(page); 1549 page_cache_release(page);
1555 if (ret == nr && desc->count) 1550 written += ret;
1556 continue; 1551 if (!iov_iter_count(iter))
1557 goto out; 1552 goto out;
1553 if (ret < nr) {
1554 error = -EFAULT;
1555 goto out;
1556 }
1557 continue;
1558 1558
1559page_not_up_to_date: 1559page_not_up_to_date:
1560 /* Get exclusive access to the page ... */ 1560 /* Get exclusive access to the page ... */
@@ -1589,6 +1589,7 @@ readpage:
1589 if (unlikely(error)) { 1589 if (unlikely(error)) {
1590 if (error == AOP_TRUNCATED_PAGE) { 1590 if (error == AOP_TRUNCATED_PAGE) {
1591 page_cache_release(page); 1591 page_cache_release(page);
1592 error = 0;
1592 goto find_page; 1593 goto find_page;
1593 } 1594 }
1594 goto readpage_error; 1595 goto readpage_error;
@@ -1619,7 +1620,6 @@ readpage:
1619 1620
1620readpage_error: 1621readpage_error:
1621 /* UHHUH! A synchronous read error occurred. Report it */ 1622 /* UHHUH! A synchronous read error occurred. Report it */
1622 desc->error = error;
1623 page_cache_release(page); 1623 page_cache_release(page);
1624 goto out; 1624 goto out;
1625 1625
@@ -1630,16 +1630,17 @@ no_cached_page:
1630 */ 1630 */
1631 page = page_cache_alloc_cold(mapping); 1631 page = page_cache_alloc_cold(mapping);
1632 if (!page) { 1632 if (!page) {
1633 desc->error = -ENOMEM; 1633 error = -ENOMEM;
1634 goto out; 1634 goto out;
1635 } 1635 }
1636 error = add_to_page_cache_lru(page, mapping, 1636 error = add_to_page_cache_lru(page, mapping,
1637 index, GFP_KERNEL); 1637 index, GFP_KERNEL);
1638 if (error) { 1638 if (error) {
1639 page_cache_release(page); 1639 page_cache_release(page);
1640 if (error == -EEXIST) 1640 if (error == -EEXIST) {
1641 error = 0;
1641 goto find_page; 1642 goto find_page;
1642 desc->error = error; 1643 }
1643 goto out; 1644 goto out;
1644 } 1645 }
1645 goto readpage; 1646 goto readpage;
@@ -1652,44 +1653,7 @@ out:
1652 1653
1653 *ppos = ((loff_t)index << PAGE_CACHE_SHIFT) + offset; 1654 *ppos = ((loff_t)index << PAGE_CACHE_SHIFT) + offset;
1654 file_accessed(filp); 1655 file_accessed(filp);
1655} 1656 return written ? written : error;
1656
1657int file_read_actor(read_descriptor_t *desc, struct page *page,
1658 unsigned long offset, unsigned long size)
1659{
1660 char *kaddr;
1661 unsigned long left, count = desc->count;
1662
1663 if (size > count)
1664 size = count;
1665
1666 /*
1667 * Faults on the destination of a read are common, so do it before
1668 * taking the kmap.
1669 */
1670 if (!fault_in_pages_writeable(desc->arg.buf, size)) {
1671 kaddr = kmap_atomic(page);
1672 left = __copy_to_user_inatomic(desc->arg.buf,
1673 kaddr + offset, size);
1674 kunmap_atomic(kaddr);
1675 if (left == 0)
1676 goto success;
1677 }
1678
1679 /* Do it the slow way */
1680 kaddr = kmap(page);
1681 left = __copy_to_user(desc->arg.buf, kaddr + offset, size);
1682 kunmap(page);
1683
1684 if (left) {
1685 size -= left;
1686 desc->error = -EFAULT;
1687 }
1688success:
1689 desc->count = count - size;
1690 desc->written += size;
1691 desc->arg.buf += size;
1692 return size;
1693} 1657}
1694 1658
1695/* 1659/*
@@ -1747,14 +1711,15 @@ generic_file_aio_read(struct kiocb *iocb, const struct iovec *iov,
1747{ 1711{
1748 struct file *filp = iocb->ki_filp; 1712 struct file *filp = iocb->ki_filp;
1749 ssize_t retval; 1713 ssize_t retval;
1750 unsigned long seg = 0;
1751 size_t count; 1714 size_t count;
1752 loff_t *ppos = &iocb->ki_pos; 1715 loff_t *ppos = &iocb->ki_pos;
1716 struct iov_iter i;
1753 1717
1754 count = 0; 1718 count = 0;
1755 retval = generic_segment_checks(iov, &nr_segs, &count, VERIFY_WRITE); 1719 retval = generic_segment_checks(iov, &nr_segs, &count, VERIFY_WRITE);
1756 if (retval) 1720 if (retval)
1757 return retval; 1721 return retval;
1722 iov_iter_init(&i, iov, nr_segs, count, 0);
1758 1723
1759 /* coalesce the iovecs and go direct-to-BIO for O_DIRECT */ 1724 /* coalesce the iovecs and go direct-to-BIO for O_DIRECT */
1760 if (filp->f_flags & O_DIRECT) { 1725 if (filp->f_flags & O_DIRECT) {
@@ -1776,6 +1741,11 @@ generic_file_aio_read(struct kiocb *iocb, const struct iovec *iov,
1776 if (retval > 0) { 1741 if (retval > 0) {
1777 *ppos = pos + retval; 1742 *ppos = pos + retval;
1778 count -= retval; 1743 count -= retval;
1744 /*
1745 * If we did a short DIO read we need to skip the
1746 * section of the iov that we've already read data into.
1747 */
1748 iov_iter_advance(&i, retval);
1779 } 1749 }
1780 1750
1781 /* 1751 /*
@@ -1792,39 +1762,7 @@ generic_file_aio_read(struct kiocb *iocb, const struct iovec *iov,
1792 } 1762 }
1793 } 1763 }
1794 1764
1795 count = retval; 1765 retval = do_generic_file_read(filp, ppos, &i, retval);
1796 for (seg = 0; seg < nr_segs; seg++) {
1797 read_descriptor_t desc;
1798 loff_t offset = 0;
1799
1800 /*
1801 * If we did a short DIO read we need to skip the section of the
1802 * iov that we've already read data into.
1803 */
1804 if (count) {
1805 if (count > iov[seg].iov_len) {
1806 count -= iov[seg].iov_len;
1807 continue;
1808 }
1809 offset = count;
1810 count = 0;
1811 }
1812
1813 desc.written = 0;
1814 desc.arg.buf = iov[seg].iov_base + offset;
1815 desc.count = iov[seg].iov_len - offset;
1816 if (desc.count == 0)
1817 continue;
1818 desc.error = 0;
1819 do_generic_file_read(filp, ppos, &desc);
1820 retval += desc.written;
1821 if (desc.error) {
1822 retval = retval ?: desc.error;
1823 break;
1824 }
1825 if (desc.count > 0)
1826 break;
1827 }
1828out: 1766out:
1829 return retval; 1767 return retval;
1830} 1768}
@@ -2335,150 +2273,6 @@ struct page *read_cache_page_gfp(struct address_space *mapping,
2335} 2273}
2336EXPORT_SYMBOL(read_cache_page_gfp); 2274EXPORT_SYMBOL(read_cache_page_gfp);
2337 2275
2338static size_t __iovec_copy_from_user_inatomic(char *vaddr,
2339 const struct iovec *iov, size_t base, size_t bytes)
2340{
2341 size_t copied = 0, left = 0;
2342
2343 while (bytes) {
2344 char __user *buf = iov->iov_base + base;
2345 int copy = min(bytes, iov->iov_len - base);
2346
2347 base = 0;
2348 left = __copy_from_user_inatomic(vaddr, buf, copy);
2349 copied += copy;
2350 bytes -= copy;
2351 vaddr += copy;
2352 iov++;
2353
2354 if (unlikely(left))
2355 break;
2356 }
2357 return copied - left;
2358}
2359
2360/*
2361 * Copy as much as we can into the page and return the number of bytes which
2362 * were successfully copied. If a fault is encountered then return the number of
2363 * bytes which were copied.
2364 */
2365size_t iov_iter_copy_from_user_atomic(struct page *page,
2366 struct iov_iter *i, unsigned long offset, size_t bytes)
2367{
2368 char *kaddr;
2369 size_t copied;
2370
2371 BUG_ON(!in_atomic());
2372 kaddr = kmap_atomic(page);
2373 if (likely(i->nr_segs == 1)) {
2374 int left;
2375 char __user *buf = i->iov->iov_base + i->iov_offset;
2376 left = __copy_from_user_inatomic(kaddr + offset, buf, bytes);
2377 copied = bytes - left;
2378 } else {
2379 copied = __iovec_copy_from_user_inatomic(kaddr + offset,
2380 i->iov, i->iov_offset, bytes);
2381 }
2382 kunmap_atomic(kaddr);
2383
2384 return copied;
2385}
2386EXPORT_SYMBOL(iov_iter_copy_from_user_atomic);
2387
2388/*
2389 * This has the same sideeffects and return value as
2390 * iov_iter_copy_from_user_atomic().
2391 * The difference is that it attempts to resolve faults.
2392 * Page must not be locked.
2393 */
2394size_t iov_iter_copy_from_user(struct page *page,
2395 struct iov_iter *i, unsigned long offset, size_t bytes)
2396{
2397 char *kaddr;
2398 size_t copied;
2399
2400 kaddr = kmap(page);
2401 if (likely(i->nr_segs == 1)) {
2402 int left;
2403 char __user *buf = i->iov->iov_base + i->iov_offset;
2404 left = __copy_from_user(kaddr + offset, buf, bytes);
2405 copied = bytes - left;
2406 } else {
2407 copied = __iovec_copy_from_user_inatomic(kaddr + offset,
2408 i->iov, i->iov_offset, bytes);
2409 }
2410 kunmap(page);
2411 return copied;
2412}
2413EXPORT_SYMBOL(iov_iter_copy_from_user);
2414
2415void iov_iter_advance(struct iov_iter *i, size_t bytes)
2416{
2417 BUG_ON(i->count < bytes);
2418
2419 if (likely(i->nr_segs == 1)) {
2420 i->iov_offset += bytes;
2421 i->count -= bytes;
2422 } else {
2423 const struct iovec *iov = i->iov;
2424 size_t base = i->iov_offset;
2425 unsigned long nr_segs = i->nr_segs;
2426
2427 /*
2428 * The !iov->iov_len check ensures we skip over unlikely
2429 * zero-length segments (without overruning the iovec).
2430 */
2431 while (bytes || unlikely(i->count && !iov->iov_len)) {
2432 int copy;
2433
2434 copy = min(bytes, iov->iov_len - base);
2435 BUG_ON(!i->count || i->count < copy);
2436 i->count -= copy;
2437 bytes -= copy;
2438 base += copy;
2439 if (iov->iov_len == base) {
2440 iov++;
2441 nr_segs--;
2442 base = 0;
2443 }
2444 }
2445 i->iov = iov;
2446 i->iov_offset = base;
2447 i->nr_segs = nr_segs;
2448 }
2449}
2450EXPORT_SYMBOL(iov_iter_advance);
2451
2452/*
2453 * Fault in the first iovec of the given iov_iter, to a maximum length
2454 * of bytes. Returns 0 on success, or non-zero if the memory could not be
2455 * accessed (ie. because it is an invalid address).
2456 *
2457 * writev-intensive code may want this to prefault several iovecs -- that
2458 * would be possible (callers must not rely on the fact that _only_ the
2459 * first iovec will be faulted with the current implementation).
2460 */
2461int iov_iter_fault_in_readable(struct iov_iter *i, size_t bytes)
2462{
2463 char __user *buf = i->iov->iov_base + i->iov_offset;
2464 bytes = min(bytes, i->iov->iov_len - i->iov_offset);
2465 return fault_in_pages_readable(buf, bytes);
2466}
2467EXPORT_SYMBOL(iov_iter_fault_in_readable);
2468
2469/*
2470 * Return the count of just the current iov_iter segment.
2471 */
2472size_t iov_iter_single_seg_count(const struct iov_iter *i)
2473{
2474 const struct iovec *iov = i->iov;
2475 if (i->nr_segs == 1)
2476 return i->count;
2477 else
2478 return min(i->count, iov->iov_len - i->iov_offset);
2479}
2480EXPORT_SYMBOL(iov_iter_single_seg_count);
2481
2482/* 2276/*
2483 * Performs necessary checks before doing a write 2277 * Performs necessary checks before doing a write
2484 * 2278 *
@@ -2585,7 +2379,7 @@ EXPORT_SYMBOL(pagecache_write_end);
2585 2379
2586ssize_t 2380ssize_t
2587generic_file_direct_write(struct kiocb *iocb, const struct iovec *iov, 2381generic_file_direct_write(struct kiocb *iocb, const struct iovec *iov,
2588 unsigned long *nr_segs, loff_t pos, loff_t *ppos, 2382 unsigned long *nr_segs, loff_t pos,
2589 size_t count, size_t ocount) 2383 size_t count, size_t ocount)
2590{ 2384{
2591 struct file *file = iocb->ki_filp; 2385 struct file *file = iocb->ki_filp;
@@ -2646,7 +2440,7 @@ generic_file_direct_write(struct kiocb *iocb, const struct iovec *iov,
2646 i_size_write(inode, pos); 2440 i_size_write(inode, pos);
2647 mark_inode_dirty(inode); 2441 mark_inode_dirty(inode);
2648 } 2442 }
2649 *ppos = pos; 2443 iocb->ki_pos = pos;
2650 } 2444 }
2651out: 2445out:
2652 return written; 2446 return written;
@@ -2692,7 +2486,7 @@ found:
2692} 2486}
2693EXPORT_SYMBOL(grab_cache_page_write_begin); 2487EXPORT_SYMBOL(grab_cache_page_write_begin);
2694 2488
2695static ssize_t generic_perform_write(struct file *file, 2489ssize_t generic_perform_write(struct file *file,
2696 struct iov_iter *i, loff_t pos) 2490 struct iov_iter *i, loff_t pos)
2697{ 2491{
2698 struct address_space *mapping = file->f_mapping; 2492 struct address_space *mapping = file->f_mapping;
@@ -2742,9 +2536,7 @@ again:
2742 if (mapping_writably_mapped(mapping)) 2536 if (mapping_writably_mapped(mapping))
2743 flush_dcache_page(page); 2537 flush_dcache_page(page);
2744 2538
2745 pagefault_disable();
2746 copied = iov_iter_copy_from_user_atomic(page, i, offset, bytes); 2539 copied = iov_iter_copy_from_user_atomic(page, i, offset, bytes);
2747 pagefault_enable();
2748 flush_dcache_page(page); 2540 flush_dcache_page(page);
2749 2541
2750 mark_page_accessed(page); 2542 mark_page_accessed(page);
@@ -2782,27 +2574,7 @@ again:
2782 2574
2783 return written ? written : status; 2575 return written ? written : status;
2784} 2576}
2785 2577EXPORT_SYMBOL(generic_perform_write);
2786ssize_t
2787generic_file_buffered_write(struct kiocb *iocb, const struct iovec *iov,
2788 unsigned long nr_segs, loff_t pos, loff_t *ppos,
2789 size_t count, ssize_t written)
2790{
2791 struct file *file = iocb->ki_filp;
2792 ssize_t status;
2793 struct iov_iter i;
2794
2795 iov_iter_init(&i, iov, nr_segs, count, written);
2796 status = generic_perform_write(file, &i, pos);
2797
2798 if (likely(status >= 0)) {
2799 written += status;
2800 *ppos = pos + status;
2801 }
2802
2803 return written ? written : status;
2804}
2805EXPORT_SYMBOL(generic_file_buffered_write);
2806 2578
2807/** 2579/**
2808 * __generic_file_aio_write - write data to a file 2580 * __generic_file_aio_write - write data to a file
@@ -2824,16 +2596,18 @@ EXPORT_SYMBOL(generic_file_buffered_write);
2824 * avoid syncing under i_mutex. 2596 * avoid syncing under i_mutex.
2825 */ 2597 */
2826ssize_t __generic_file_aio_write(struct kiocb *iocb, const struct iovec *iov, 2598ssize_t __generic_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
2827 unsigned long nr_segs, loff_t *ppos) 2599 unsigned long nr_segs)
2828{ 2600{
2829 struct file *file = iocb->ki_filp; 2601 struct file *file = iocb->ki_filp;
2830 struct address_space * mapping = file->f_mapping; 2602 struct address_space * mapping = file->f_mapping;
2831 size_t ocount; /* original count */ 2603 size_t ocount; /* original count */
2832 size_t count; /* after file limit checks */ 2604 size_t count; /* after file limit checks */
2833 struct inode *inode = mapping->host; 2605 struct inode *inode = mapping->host;
2834 loff_t pos; 2606 loff_t pos = iocb->ki_pos;
2835 ssize_t written; 2607 ssize_t written = 0;
2836 ssize_t err; 2608 ssize_t err;
2609 ssize_t status;
2610 struct iov_iter from;
2837 2611
2838 ocount = 0; 2612 ocount = 0;
2839 err = generic_segment_checks(iov, &nr_segs, &ocount, VERIFY_READ); 2613 err = generic_segment_checks(iov, &nr_segs, &ocount, VERIFY_READ);
@@ -2841,12 +2615,9 @@ ssize_t __generic_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
2841 return err; 2615 return err;
2842 2616
2843 count = ocount; 2617 count = ocount;
2844 pos = *ppos;
2845 2618
2846 /* We can write back this queue in page reclaim */ 2619 /* We can write back this queue in page reclaim */
2847 current->backing_dev_info = mapping->backing_dev_info; 2620 current->backing_dev_info = mapping->backing_dev_info;
2848 written = 0;
2849
2850 err = generic_write_checks(file, &pos, &count, S_ISBLK(inode->i_mode)); 2621 err = generic_write_checks(file, &pos, &count, S_ISBLK(inode->i_mode));
2851 if (err) 2622 if (err)
2852 goto out; 2623 goto out;
@@ -2862,45 +2633,47 @@ ssize_t __generic_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
2862 if (err) 2633 if (err)
2863 goto out; 2634 goto out;
2864 2635
2636 iov_iter_init(&from, iov, nr_segs, count, 0);
2637
2865 /* coalesce the iovecs and go direct-to-BIO for O_DIRECT */ 2638 /* coalesce the iovecs and go direct-to-BIO for O_DIRECT */
2866 if (unlikely(file->f_flags & O_DIRECT)) { 2639 if (unlikely(file->f_flags & O_DIRECT)) {
2867 loff_t endbyte; 2640 loff_t endbyte;
2868 ssize_t written_buffered;
2869 2641
2870 written = generic_file_direct_write(iocb, iov, &nr_segs, pos, 2642 written = generic_file_direct_write(iocb, iov, &from.nr_segs, pos,
2871 ppos, count, ocount); 2643 count, ocount);
2872 if (written < 0 || written == count) 2644 if (written < 0 || written == count)
2873 goto out; 2645 goto out;
2646 iov_iter_advance(&from, written);
2647
2874 /* 2648 /*
2875 * direct-io write to a hole: fall through to buffered I/O 2649 * direct-io write to a hole: fall through to buffered I/O
2876 * for completing the rest of the request. 2650 * for completing the rest of the request.
2877 */ 2651 */
2878 pos += written; 2652 pos += written;
2879 count -= written; 2653 count -= written;
2880 written_buffered = generic_file_buffered_write(iocb, iov, 2654
2881 nr_segs, pos, ppos, count, 2655 status = generic_perform_write(file, &from, pos);
2882 written);
2883 /* 2656 /*
2884 * If generic_file_buffered_write() retuned a synchronous error 2657 * If generic_perform_write() returned a synchronous error
2885 * then we want to return the number of bytes which were 2658 * then we want to return the number of bytes which were
2886 * direct-written, or the error code if that was zero. Note 2659 * direct-written, or the error code if that was zero. Note
2887 * that this differs from normal direct-io semantics, which 2660 * that this differs from normal direct-io semantics, which
2888 * will return -EFOO even if some bytes were written. 2661 * will return -EFOO even if some bytes were written.
2889 */ 2662 */
2890 if (written_buffered < 0) { 2663 if (unlikely(status < 0) && !written) {
2891 err = written_buffered; 2664 err = status;
2892 goto out; 2665 goto out;
2893 } 2666 }
2894 2667 iocb->ki_pos = pos + status;
2895 /* 2668 /*
2896 * We need to ensure that the page cache pages are written to 2669 * We need to ensure that the page cache pages are written to
2897 * disk and invalidated to preserve the expected O_DIRECT 2670 * disk and invalidated to preserve the expected O_DIRECT
2898 * semantics. 2671 * semantics.
2899 */ 2672 */
2900 endbyte = pos + written_buffered - written - 1; 2673 endbyte = pos + status - 1;
2901 err = filemap_write_and_wait_range(file->f_mapping, pos, endbyte); 2674 err = filemap_write_and_wait_range(file->f_mapping, pos, endbyte);
2902 if (err == 0) { 2675 if (err == 0) {
2903 written = written_buffered; 2676 written += status;
2904 invalidate_mapping_pages(mapping, 2677 invalidate_mapping_pages(mapping,
2905 pos >> PAGE_CACHE_SHIFT, 2678 pos >> PAGE_CACHE_SHIFT,
2906 endbyte >> PAGE_CACHE_SHIFT); 2679 endbyte >> PAGE_CACHE_SHIFT);
@@ -2911,8 +2684,9 @@ ssize_t __generic_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
2911 */ 2684 */
2912 } 2685 }
2913 } else { 2686 } else {
2914 written = generic_file_buffered_write(iocb, iov, nr_segs, 2687 written = generic_perform_write(file, &from, pos);
2915 pos, ppos, count, written); 2688 if (likely(written >= 0))
2689 iocb->ki_pos = pos + written;
2916 } 2690 }
2917out: 2691out:
2918 current->backing_dev_info = NULL; 2692 current->backing_dev_info = NULL;
@@ -2941,7 +2715,7 @@ ssize_t generic_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
2941 BUG_ON(iocb->ki_pos != pos); 2715 BUG_ON(iocb->ki_pos != pos);
2942 2716
2943 mutex_lock(&inode->i_mutex); 2717 mutex_lock(&inode->i_mutex);
2944 ret = __generic_file_aio_write(iocb, iov, nr_segs, &iocb->ki_pos); 2718 ret = __generic_file_aio_write(iocb, iov, nr_segs);
2945 mutex_unlock(&inode->i_mutex); 2719 mutex_unlock(&inode->i_mutex);
2946 2720
2947 if (ret > 0) { 2721 if (ret > 0) {
diff --git a/mm/iov_iter.c b/mm/iov_iter.c
new file mode 100644
index 000000000000..10e46cd721de
--- /dev/null
+++ b/mm/iov_iter.c
@@ -0,0 +1,224 @@
1#include <linux/export.h>
2#include <linux/uio.h>
3#include <linux/pagemap.h>
4
5size_t copy_page_to_iter(struct page *page, size_t offset, size_t bytes,
6 struct iov_iter *i)
7{
8 size_t skip, copy, left, wanted;
9 const struct iovec *iov;
10 char __user *buf;
11 void *kaddr, *from;
12
13 if (unlikely(bytes > i->count))
14 bytes = i->count;
15
16 if (unlikely(!bytes))
17 return 0;
18
19 wanted = bytes;
20 iov = i->iov;
21 skip = i->iov_offset;
22 buf = iov->iov_base + skip;
23 copy = min(bytes, iov->iov_len - skip);
24
25 if (!fault_in_pages_writeable(buf, copy)) {
26 kaddr = kmap_atomic(page);
27 from = kaddr + offset;
28
29 /* first chunk, usually the only one */
30 left = __copy_to_user_inatomic(buf, from, copy);
31 copy -= left;
32 skip += copy;
33 from += copy;
34 bytes -= copy;
35
36 while (unlikely(!left && bytes)) {
37 iov++;
38 buf = iov->iov_base;
39 copy = min(bytes, iov->iov_len);
40 left = __copy_to_user_inatomic(buf, from, copy);
41 copy -= left;
42 skip = copy;
43 from += copy;
44 bytes -= copy;
45 }
46 if (likely(!bytes)) {
47 kunmap_atomic(kaddr);
48 goto done;
49 }
50 offset = from - kaddr;
51 buf += copy;
52 kunmap_atomic(kaddr);
53 copy = min(bytes, iov->iov_len - skip);
54 }
55 /* Too bad - revert to non-atomic kmap */
56 kaddr = kmap(page);
57 from = kaddr + offset;
58 left = __copy_to_user(buf, from, copy);
59 copy -= left;
60 skip += copy;
61 from += copy;
62 bytes -= copy;
63 while (unlikely(!left && bytes)) {
64 iov++;
65 buf = iov->iov_base;
66 copy = min(bytes, iov->iov_len);
67 left = __copy_to_user(buf, from, copy);
68 copy -= left;
69 skip = copy;
70 from += copy;
71 bytes -= copy;
72 }
73 kunmap(page);
74done:
75 i->count -= wanted - bytes;
76 i->nr_segs -= iov - i->iov;
77 i->iov = iov;
78 i->iov_offset = skip;
79 return wanted - bytes;
80}
81EXPORT_SYMBOL(copy_page_to_iter);
82
83static size_t __iovec_copy_from_user_inatomic(char *vaddr,
84 const struct iovec *iov, size_t base, size_t bytes)
85{
86 size_t copied = 0, left = 0;
87
88 while (bytes) {
89 char __user *buf = iov->iov_base + base;
90 int copy = min(bytes, iov->iov_len - base);
91
92 base = 0;
93 left = __copy_from_user_inatomic(vaddr, buf, copy);
94 copied += copy;
95 bytes -= copy;
96 vaddr += copy;
97 iov++;
98
99 if (unlikely(left))
100 break;
101 }
102 return copied - left;
103}
104
105/*
106 * Copy as much as we can into the page and return the number of bytes which
107 * were successfully copied. If a fault is encountered then return the number of
108 * bytes which were copied.
109 */
110size_t iov_iter_copy_from_user_atomic(struct page *page,
111 struct iov_iter *i, unsigned long offset, size_t bytes)
112{
113 char *kaddr;
114 size_t copied;
115
116 kaddr = kmap_atomic(page);
117 if (likely(i->nr_segs == 1)) {
118 int left;
119 char __user *buf = i->iov->iov_base + i->iov_offset;
120 left = __copy_from_user_inatomic(kaddr + offset, buf, bytes);
121 copied = bytes - left;
122 } else {
123 copied = __iovec_copy_from_user_inatomic(kaddr + offset,
124 i->iov, i->iov_offset, bytes);
125 }
126 kunmap_atomic(kaddr);
127
128 return copied;
129}
130EXPORT_SYMBOL(iov_iter_copy_from_user_atomic);
131
132/*
133 * This has the same sideeffects and return value as
134 * iov_iter_copy_from_user_atomic().
135 * The difference is that it attempts to resolve faults.
136 * Page must not be locked.
137 */
138size_t iov_iter_copy_from_user(struct page *page,
139 struct iov_iter *i, unsigned long offset, size_t bytes)
140{
141 char *kaddr;
142 size_t copied;
143
144 kaddr = kmap(page);
145 if (likely(i->nr_segs == 1)) {
146 int left;
147 char __user *buf = i->iov->iov_base + i->iov_offset;
148 left = __copy_from_user(kaddr + offset, buf, bytes);
149 copied = bytes - left;
150 } else {
151 copied = __iovec_copy_from_user_inatomic(kaddr + offset,
152 i->iov, i->iov_offset, bytes);
153 }
154 kunmap(page);
155 return copied;
156}
157EXPORT_SYMBOL(iov_iter_copy_from_user);
158
159void iov_iter_advance(struct iov_iter *i, size_t bytes)
160{
161 BUG_ON(i->count < bytes);
162
163 if (likely(i->nr_segs == 1)) {
164 i->iov_offset += bytes;
165 i->count -= bytes;
166 } else {
167 const struct iovec *iov = i->iov;
168 size_t base = i->iov_offset;
169 unsigned long nr_segs = i->nr_segs;
170
171 /*
172 * The !iov->iov_len check ensures we skip over unlikely
173 * zero-length segments (without overruning the iovec).
174 */
175 while (bytes || unlikely(i->count && !iov->iov_len)) {
176 int copy;
177
178 copy = min(bytes, iov->iov_len - base);
179 BUG_ON(!i->count || i->count < copy);
180 i->count -= copy;
181 bytes -= copy;
182 base += copy;
183 if (iov->iov_len == base) {
184 iov++;
185 nr_segs--;
186 base = 0;
187 }
188 }
189 i->iov = iov;
190 i->iov_offset = base;
191 i->nr_segs = nr_segs;
192 }
193}
194EXPORT_SYMBOL(iov_iter_advance);
195
196/*
197 * Fault in the first iovec of the given iov_iter, to a maximum length
198 * of bytes. Returns 0 on success, or non-zero if the memory could not be
199 * accessed (ie. because it is an invalid address).
200 *
201 * writev-intensive code may want this to prefault several iovecs -- that
202 * would be possible (callers must not rely on the fact that _only_ the
203 * first iovec will be faulted with the current implementation).
204 */
205int iov_iter_fault_in_readable(struct iov_iter *i, size_t bytes)
206{
207 char __user *buf = i->iov->iov_base + i->iov_offset;
208 bytes = min(bytes, i->iov->iov_len - i->iov_offset);
209 return fault_in_pages_readable(buf, bytes);
210}
211EXPORT_SYMBOL(iov_iter_fault_in_readable);
212
213/*
214 * Return the count of just the current iov_iter segment.
215 */
216size_t iov_iter_single_seg_count(const struct iov_iter *i)
217{
218 const struct iovec *iov = i->iov;
219 if (i->nr_segs == 1)
220 return i->count;
221 else
222 return min(i->count, iov->iov_len - i->iov_offset);
223}
224EXPORT_SYMBOL(iov_iter_single_seg_count);
diff --git a/mm/process_vm_access.c b/mm/process_vm_access.c
index cb79065c19e5..8505c9262b35 100644
--- a/mm/process_vm_access.c
+++ b/mm/process_vm_access.c
@@ -23,129 +23,44 @@
23 23
24/** 24/**
25 * process_vm_rw_pages - read/write pages from task specified 25 * process_vm_rw_pages - read/write pages from task specified
26 * @task: task to read/write from 26 * @pages: array of pointers to pages we want to copy
27 * @mm: mm for task
28 * @process_pages: struct pages area that can store at least
29 * nr_pages_to_copy struct page pointers
30 * @pa: address of page in task to start copying from/to
31 * @start_offset: offset in page to start copying from/to 27 * @start_offset: offset in page to start copying from/to
32 * @len: number of bytes to copy 28 * @len: number of bytes to copy
33 * @lvec: iovec array specifying where to copy to/from 29 * @iter: where to copy to/from locally
34 * @lvec_cnt: number of elements in iovec array
35 * @lvec_current: index in iovec array we are up to
36 * @lvec_offset: offset in bytes from current iovec iov_base we are up to
37 * @vm_write: 0 means copy from, 1 means copy to 30 * @vm_write: 0 means copy from, 1 means copy to
38 * @nr_pages_to_copy: number of pages to copy
39 * @bytes_copied: returns number of bytes successfully copied
40 * Returns 0 on success, error code otherwise 31 * Returns 0 on success, error code otherwise
41 */ 32 */
42static int process_vm_rw_pages(struct task_struct *task, 33static int process_vm_rw_pages(struct page **pages,
43 struct mm_struct *mm, 34 unsigned offset,
44 struct page **process_pages, 35 size_t len,
45 unsigned long pa, 36 struct iov_iter *iter,
46 unsigned long start_offset, 37 int vm_write)
47 unsigned long len,
48 const struct iovec *lvec,
49 unsigned long lvec_cnt,
50 unsigned long *lvec_current,
51 size_t *lvec_offset,
52 int vm_write,
53 unsigned int nr_pages_to_copy,
54 ssize_t *bytes_copied)
55{ 38{
56 int pages_pinned;
57 void *target_kaddr;
58 int pgs_copied = 0;
59 int j;
60 int ret;
61 ssize_t bytes_to_copy;
62 ssize_t rc = 0;
63
64 *bytes_copied = 0;
65
66 /* Get the pages we're interested in */
67 down_read(&mm->mmap_sem);
68 pages_pinned = get_user_pages(task, mm, pa,
69 nr_pages_to_copy,
70 vm_write, 0, process_pages, NULL);
71 up_read(&mm->mmap_sem);
72
73 if (pages_pinned != nr_pages_to_copy) {
74 rc = -EFAULT;
75 goto end;
76 }
77
78 /* Do the copy for each page */ 39 /* Do the copy for each page */
79 for (pgs_copied = 0; 40 while (len && iov_iter_count(iter)) {
80 (pgs_copied < nr_pages_to_copy) && (*lvec_current < lvec_cnt); 41 struct page *page = *pages++;
81 pgs_copied++) { 42 size_t copy = PAGE_SIZE - offset;
82 /* Make sure we have a non zero length iovec */ 43 size_t copied;
83 while (*lvec_current < lvec_cnt 44
84 && lvec[*lvec_current].iov_len == 0) 45 if (copy > len)
85 (*lvec_current)++; 46 copy = len;
86 if (*lvec_current == lvec_cnt) 47
87 break; 48 if (vm_write) {
88 49 if (copy > iov_iter_count(iter))
89 /* 50 copy = iov_iter_count(iter);
90 * Will copy smallest of: 51 copied = iov_iter_copy_from_user(page, iter,
91 * - bytes remaining in page 52 offset, copy);
92 * - bytes remaining in destination iovec 53 iov_iter_advance(iter, copied);
93 */ 54 set_page_dirty_lock(page);
94 bytes_to_copy = min_t(ssize_t, PAGE_SIZE - start_offset,
95 len - *bytes_copied);
96 bytes_to_copy = min_t(ssize_t, bytes_to_copy,
97 lvec[*lvec_current].iov_len
98 - *lvec_offset);
99
100 target_kaddr = kmap(process_pages[pgs_copied]) + start_offset;
101
102 if (vm_write)
103 ret = copy_from_user(target_kaddr,
104 lvec[*lvec_current].iov_base
105 + *lvec_offset,
106 bytes_to_copy);
107 else
108 ret = copy_to_user(lvec[*lvec_current].iov_base
109 + *lvec_offset,
110 target_kaddr, bytes_to_copy);
111 kunmap(process_pages[pgs_copied]);
112 if (ret) {
113 *bytes_copied += bytes_to_copy - ret;
114 pgs_copied++;
115 rc = -EFAULT;
116 goto end;
117 }
118 *bytes_copied += bytes_to_copy;
119 *lvec_offset += bytes_to_copy;
120 if (*lvec_offset == lvec[*lvec_current].iov_len) {
121 /*
122 * Need to copy remaining part of page into the
123 * next iovec if there are any bytes left in page
124 */
125 (*lvec_current)++;
126 *lvec_offset = 0;
127 start_offset = (start_offset + bytes_to_copy)
128 % PAGE_SIZE;
129 if (start_offset)
130 pgs_copied--;
131 } else { 55 } else {
132 start_offset = 0; 56 copied = copy_page_to_iter(page, offset, copy, iter);
133 }
134 }
135
136end:
137 if (vm_write) {
138 for (j = 0; j < pages_pinned; j++) {
139 if (j < pgs_copied)
140 set_page_dirty_lock(process_pages[j]);
141 put_page(process_pages[j]);
142 } 57 }
143 } else { 58 len -= copied;
144 for (j = 0; j < pages_pinned; j++) 59 if (copied < copy && iov_iter_count(iter))
145 put_page(process_pages[j]); 60 return -EFAULT;
61 offset = 0;
146 } 62 }
147 63 return 0;
148 return rc;
149} 64}
150 65
151/* Maximum number of pages kmalloc'd to hold struct page's during copy */ 66/* Maximum number of pages kmalloc'd to hold struct page's during copy */
@@ -155,67 +70,60 @@ end:
155 * process_vm_rw_single_vec - read/write pages from task specified 70 * process_vm_rw_single_vec - read/write pages from task specified
156 * @addr: start memory address of target process 71 * @addr: start memory address of target process
157 * @len: size of area to copy to/from 72 * @len: size of area to copy to/from
158 * @lvec: iovec array specifying where to copy to/from locally 73 * @iter: where to copy to/from locally
159 * @lvec_cnt: number of elements in iovec array
160 * @lvec_current: index in iovec array we are up to
161 * @lvec_offset: offset in bytes from current iovec iov_base we are up to
162 * @process_pages: struct pages area that can store at least 74 * @process_pages: struct pages area that can store at least
163 * nr_pages_to_copy struct page pointers 75 * nr_pages_to_copy struct page pointers
164 * @mm: mm for task 76 * @mm: mm for task
165 * @task: task to read/write from 77 * @task: task to read/write from
166 * @vm_write: 0 means copy from, 1 means copy to 78 * @vm_write: 0 means copy from, 1 means copy to
167 * @bytes_copied: returns number of bytes successfully copied
168 * Returns 0 on success or on failure error code 79 * Returns 0 on success or on failure error code
169 */ 80 */
170static int process_vm_rw_single_vec(unsigned long addr, 81static int process_vm_rw_single_vec(unsigned long addr,
171 unsigned long len, 82 unsigned long len,
172 const struct iovec *lvec, 83 struct iov_iter *iter,
173 unsigned long lvec_cnt,
174 unsigned long *lvec_current,
175 size_t *lvec_offset,
176 struct page **process_pages, 84 struct page **process_pages,
177 struct mm_struct *mm, 85 struct mm_struct *mm,
178 struct task_struct *task, 86 struct task_struct *task,
179 int vm_write, 87 int vm_write)
180 ssize_t *bytes_copied)
181{ 88{
182 unsigned long pa = addr & PAGE_MASK; 89 unsigned long pa = addr & PAGE_MASK;
183 unsigned long start_offset = addr - pa; 90 unsigned long start_offset = addr - pa;
184 unsigned long nr_pages; 91 unsigned long nr_pages;
185 ssize_t bytes_copied_loop;
186 ssize_t rc = 0; 92 ssize_t rc = 0;
187 unsigned long nr_pages_copied = 0;
188 unsigned long nr_pages_to_copy;
189 unsigned long max_pages_per_loop = PVM_MAX_KMALLOC_PAGES 93 unsigned long max_pages_per_loop = PVM_MAX_KMALLOC_PAGES
190 / sizeof(struct pages *); 94 / sizeof(struct pages *);
191 95
192 *bytes_copied = 0;
193
194 /* Work out address and page range required */ 96 /* Work out address and page range required */
195 if (len == 0) 97 if (len == 0)
196 return 0; 98 return 0;
197 nr_pages = (addr + len - 1) / PAGE_SIZE - addr / PAGE_SIZE + 1; 99 nr_pages = (addr + len - 1) / PAGE_SIZE - addr / PAGE_SIZE + 1;
198 100
199 while ((nr_pages_copied < nr_pages) && (*lvec_current < lvec_cnt)) { 101 while (!rc && nr_pages && iov_iter_count(iter)) {
200 nr_pages_to_copy = min(nr_pages - nr_pages_copied, 102 int pages = min(nr_pages, max_pages_per_loop);
201 max_pages_per_loop); 103 size_t bytes;
202 104
203 rc = process_vm_rw_pages(task, mm, process_pages, pa, 105 /* Get the pages we're interested in */
204 start_offset, len, 106 down_read(&mm->mmap_sem);
205 lvec, lvec_cnt, 107 pages = get_user_pages(task, mm, pa, pages,
206 lvec_current, lvec_offset, 108 vm_write, 0, process_pages, NULL);
207 vm_write, nr_pages_to_copy, 109 up_read(&mm->mmap_sem);
208 &bytes_copied_loop);
209 start_offset = 0;
210 *bytes_copied += bytes_copied_loop;
211 110
212 if (rc < 0) { 111 if (pages <= 0)
213 return rc; 112 return -EFAULT;
214 } else { 113
215 len -= bytes_copied_loop; 114 bytes = pages * PAGE_SIZE - start_offset;
216 nr_pages_copied += nr_pages_to_copy; 115 if (bytes > len)
217 pa += nr_pages_to_copy * PAGE_SIZE; 116 bytes = len;
218 } 117
118 rc = process_vm_rw_pages(process_pages,
119 start_offset, bytes, iter,
120 vm_write);
121 len -= bytes;
122 start_offset = 0;
123 nr_pages -= pages;
124 pa += pages * PAGE_SIZE;
125 while (pages)
126 put_page(process_pages[--pages]);
219 } 127 }
220 128
221 return rc; 129 return rc;
@@ -228,8 +136,7 @@ static int process_vm_rw_single_vec(unsigned long addr,
228/** 136/**
229 * process_vm_rw_core - core of reading/writing pages from task specified 137 * process_vm_rw_core - core of reading/writing pages from task specified
230 * @pid: PID of process to read/write from/to 138 * @pid: PID of process to read/write from/to
231 * @lvec: iovec array specifying where to copy to/from locally 139 * @iter: where to copy to/from locally
232 * @liovcnt: size of lvec array
233 * @rvec: iovec array specifying where to copy to/from in the other process 140 * @rvec: iovec array specifying where to copy to/from in the other process
234 * @riovcnt: size of rvec array 141 * @riovcnt: size of rvec array
235 * @flags: currently unused 142 * @flags: currently unused
@@ -238,8 +145,7 @@ static int process_vm_rw_single_vec(unsigned long addr,
238 * return less bytes than expected if an error occurs during the copying 145 * return less bytes than expected if an error occurs during the copying
239 * process. 146 * process.
240 */ 147 */
241static ssize_t process_vm_rw_core(pid_t pid, const struct iovec *lvec, 148static ssize_t process_vm_rw_core(pid_t pid, struct iov_iter *iter,
242 unsigned long liovcnt,
243 const struct iovec *rvec, 149 const struct iovec *rvec,
244 unsigned long riovcnt, 150 unsigned long riovcnt,
245 unsigned long flags, int vm_write) 151 unsigned long flags, int vm_write)
@@ -250,13 +156,10 @@ static ssize_t process_vm_rw_core(pid_t pid, const struct iovec *lvec,
250 struct mm_struct *mm; 156 struct mm_struct *mm;
251 unsigned long i; 157 unsigned long i;
252 ssize_t rc = 0; 158 ssize_t rc = 0;
253 ssize_t bytes_copied_loop;
254 ssize_t bytes_copied = 0;
255 unsigned long nr_pages = 0; 159 unsigned long nr_pages = 0;
256 unsigned long nr_pages_iov; 160 unsigned long nr_pages_iov;
257 unsigned long iov_l_curr_idx = 0;
258 size_t iov_l_curr_offset = 0;
259 ssize_t iov_len; 161 ssize_t iov_len;
162 size_t total_len = iov_iter_count(iter);
260 163
261 /* 164 /*
262 * Work out how many pages of struct pages we're going to need 165 * Work out how many pages of struct pages we're going to need
@@ -310,24 +213,20 @@ static ssize_t process_vm_rw_core(pid_t pid, const struct iovec *lvec,
310 goto put_task_struct; 213 goto put_task_struct;
311 } 214 }
312 215
313 for (i = 0; i < riovcnt && iov_l_curr_idx < liovcnt; i++) { 216 for (i = 0; i < riovcnt && iov_iter_count(iter) && !rc; i++)
314 rc = process_vm_rw_single_vec( 217 rc = process_vm_rw_single_vec(
315 (unsigned long)rvec[i].iov_base, rvec[i].iov_len, 218 (unsigned long)rvec[i].iov_base, rvec[i].iov_len,
316 lvec, liovcnt, &iov_l_curr_idx, &iov_l_curr_offset, 219 iter, process_pages, mm, task, vm_write);
317 process_pages, mm, task, vm_write, &bytes_copied_loop); 220
318 bytes_copied += bytes_copied_loop; 221 /* copied = space before - space after */
319 if (rc != 0) { 222 total_len -= iov_iter_count(iter);
320 /* If we have managed to copy any data at all then 223
321 we return the number of bytes copied. Otherwise 224 /* If we have managed to copy any data at all then
322 we return the error code */ 225 we return the number of bytes copied. Otherwise
323 if (bytes_copied) 226 we return the error code */
324 rc = bytes_copied; 227 if (total_len)
325 goto put_mm; 228 rc = total_len;
326 }
327 }
328 229
329 rc = bytes_copied;
330put_mm:
331 mmput(mm); 230 mmput(mm);
332 231
333put_task_struct: 232put_task_struct:
@@ -363,6 +262,7 @@ static ssize_t process_vm_rw(pid_t pid,
363 struct iovec iovstack_r[UIO_FASTIOV]; 262 struct iovec iovstack_r[UIO_FASTIOV];
364 struct iovec *iov_l = iovstack_l; 263 struct iovec *iov_l = iovstack_l;
365 struct iovec *iov_r = iovstack_r; 264 struct iovec *iov_r = iovstack_r;
265 struct iov_iter iter;
366 ssize_t rc; 266 ssize_t rc;
367 267
368 if (flags != 0) 268 if (flags != 0)
@@ -378,13 +278,14 @@ static ssize_t process_vm_rw(pid_t pid,
378 if (rc <= 0) 278 if (rc <= 0)
379 goto free_iovecs; 279 goto free_iovecs;
380 280
281 iov_iter_init(&iter, iov_l, liovcnt, rc, 0);
282
381 rc = rw_copy_check_uvector(CHECK_IOVEC_ONLY, rvec, riovcnt, UIO_FASTIOV, 283 rc = rw_copy_check_uvector(CHECK_IOVEC_ONLY, rvec, riovcnt, UIO_FASTIOV,
382 iovstack_r, &iov_r); 284 iovstack_r, &iov_r);
383 if (rc <= 0) 285 if (rc <= 0)
384 goto free_iovecs; 286 goto free_iovecs;
385 287
386 rc = process_vm_rw_core(pid, iov_l, liovcnt, iov_r, riovcnt, flags, 288 rc = process_vm_rw_core(pid, &iter, iov_r, riovcnt, flags, vm_write);
387 vm_write);
388 289
389free_iovecs: 290free_iovecs:
390 if (iov_r != iovstack_r) 291 if (iov_r != iovstack_r)
@@ -424,6 +325,7 @@ compat_process_vm_rw(compat_pid_t pid,
424 struct iovec iovstack_r[UIO_FASTIOV]; 325 struct iovec iovstack_r[UIO_FASTIOV];
425 struct iovec *iov_l = iovstack_l; 326 struct iovec *iov_l = iovstack_l;
426 struct iovec *iov_r = iovstack_r; 327 struct iovec *iov_r = iovstack_r;
328 struct iov_iter iter;
427 ssize_t rc = -EFAULT; 329 ssize_t rc = -EFAULT;
428 330
429 if (flags != 0) 331 if (flags != 0)
@@ -439,14 +341,14 @@ compat_process_vm_rw(compat_pid_t pid,
439 &iov_l); 341 &iov_l);
440 if (rc <= 0) 342 if (rc <= 0)
441 goto free_iovecs; 343 goto free_iovecs;
344 iov_iter_init(&iter, iov_l, liovcnt, rc, 0);
442 rc = compat_rw_copy_check_uvector(CHECK_IOVEC_ONLY, rvec, riovcnt, 345 rc = compat_rw_copy_check_uvector(CHECK_IOVEC_ONLY, rvec, riovcnt,
443 UIO_FASTIOV, iovstack_r, 346 UIO_FASTIOV, iovstack_r,
444 &iov_r); 347 &iov_r);
445 if (rc <= 0) 348 if (rc <= 0)
446 goto free_iovecs; 349 goto free_iovecs;
447 350
448 rc = process_vm_rw_core(pid, iov_l, liovcnt, iov_r, riovcnt, flags, 351 rc = process_vm_rw_core(pid, &iter, iov_r, riovcnt, flags, vm_write);
449 vm_write);
450 352
451free_iovecs: 353free_iovecs:
452 if (iov_r != iovstack_r) 354 if (iov_r != iovstack_r)
diff --git a/mm/shmem.c b/mm/shmem.c
index 70273f8df586..8f1a95406bae 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -1402,13 +1402,25 @@ shmem_write_end(struct file *file, struct address_space *mapping,
1402 return copied; 1402 return copied;
1403} 1403}
1404 1404
1405static void do_shmem_file_read(struct file *filp, loff_t *ppos, read_descriptor_t *desc, read_actor_t actor) 1405static ssize_t shmem_file_aio_read(struct kiocb *iocb,
1406 const struct iovec *iov, unsigned long nr_segs, loff_t pos)
1406{ 1407{
1407 struct inode *inode = file_inode(filp); 1408 struct file *file = iocb->ki_filp;
1409 struct inode *inode = file_inode(file);
1408 struct address_space *mapping = inode->i_mapping; 1410 struct address_space *mapping = inode->i_mapping;
1409 pgoff_t index; 1411 pgoff_t index;
1410 unsigned long offset; 1412 unsigned long offset;
1411 enum sgp_type sgp = SGP_READ; 1413 enum sgp_type sgp = SGP_READ;
1414 int error;
1415 ssize_t retval;
1416 size_t count;
1417 loff_t *ppos = &iocb->ki_pos;
1418 struct iov_iter iter;
1419
1420 retval = generic_segment_checks(iov, &nr_segs, &count, VERIFY_WRITE);
1421 if (retval)
1422 return retval;
1423 iov_iter_init(&iter, iov, nr_segs, count, 0);
1412 1424
1413 /* 1425 /*
1414 * Might this read be for a stacking filesystem? Then when reading 1426 * Might this read be for a stacking filesystem? Then when reading
@@ -1436,10 +1448,10 @@ static void do_shmem_file_read(struct file *filp, loff_t *ppos, read_descriptor_
1436 break; 1448 break;
1437 } 1449 }
1438 1450
1439 desc->error = shmem_getpage(inode, index, &page, sgp, NULL); 1451 error = shmem_getpage(inode, index, &page, sgp, NULL);
1440 if (desc->error) { 1452 if (error) {
1441 if (desc->error == -EINVAL) 1453 if (error == -EINVAL)
1442 desc->error = 0; 1454 error = 0;
1443 break; 1455 break;
1444 } 1456 }
1445 if (page) 1457 if (page)
@@ -1483,61 +1495,26 @@ static void do_shmem_file_read(struct file *filp, loff_t *ppos, read_descriptor_
1483 /* 1495 /*
1484 * Ok, we have the page, and it's up-to-date, so 1496 * Ok, we have the page, and it's up-to-date, so
1485 * now we can copy it to user space... 1497 * now we can copy it to user space...
1486 *
1487 * The actor routine returns how many bytes were actually used..
1488 * NOTE! This may not be the same as how much of a user buffer
1489 * we filled up (we may be padding etc), so we can only update
1490 * "pos" here (the actor routine has to update the user buffer
1491 * pointers and the remaining count).
1492 */ 1498 */
1493 ret = actor(desc, page, offset, nr); 1499 ret = copy_page_to_iter(page, offset, nr, &iter);
1500 retval += ret;
1494 offset += ret; 1501 offset += ret;
1495 index += offset >> PAGE_CACHE_SHIFT; 1502 index += offset >> PAGE_CACHE_SHIFT;
1496 offset &= ~PAGE_CACHE_MASK; 1503 offset &= ~PAGE_CACHE_MASK;
1497 1504
1498 page_cache_release(page); 1505 page_cache_release(page);
1499 if (ret != nr || !desc->count) 1506 if (!iov_iter_count(&iter))
1500 break; 1507 break;
1501 1508 if (ret < nr) {
1509 error = -EFAULT;
1510 break;
1511 }
1502 cond_resched(); 1512 cond_resched();
1503 } 1513 }
1504 1514
1505 *ppos = ((loff_t) index << PAGE_CACHE_SHIFT) + offset; 1515 *ppos = ((loff_t) index << PAGE_CACHE_SHIFT) + offset;
1506 file_accessed(filp); 1516 file_accessed(file);
1507} 1517 return retval ? retval : error;
1508
1509static ssize_t shmem_file_aio_read(struct kiocb *iocb,
1510 const struct iovec *iov, unsigned long nr_segs, loff_t pos)
1511{
1512 struct file *filp = iocb->ki_filp;
1513 ssize_t retval;
1514 unsigned long seg;
1515 size_t count;
1516 loff_t *ppos = &iocb->ki_pos;
1517
1518 retval = generic_segment_checks(iov, &nr_segs, &count, VERIFY_WRITE);
1519 if (retval)
1520 return retval;
1521
1522 for (seg = 0; seg < nr_segs; seg++) {
1523 read_descriptor_t desc;
1524
1525 desc.written = 0;
1526 desc.arg.buf = iov[seg].iov_base;
1527 desc.count = iov[seg].iov_len;
1528 if (desc.count == 0)
1529 continue;
1530 desc.error = 0;
1531 do_shmem_file_read(filp, ppos, &desc, file_read_actor);
1532 retval += desc.written;
1533 if (desc.error) {
1534 retval = retval ?: desc.error;
1535 break;
1536 }
1537 if (desc.count > 0)
1538 break;
1539 }
1540 return retval;
1541} 1518}
1542 1519
1543static ssize_t shmem_file_splice_read(struct file *in, loff_t *ppos, 1520static ssize_t shmem_file_splice_read(struct file *in, loff_t *ppos,
@@ -1576,7 +1553,7 @@ static ssize_t shmem_file_splice_read(struct file *in, loff_t *ppos,
1576 index = *ppos >> PAGE_CACHE_SHIFT; 1553 index = *ppos >> PAGE_CACHE_SHIFT;
1577 loff = *ppos & ~PAGE_CACHE_MASK; 1554 loff = *ppos & ~PAGE_CACHE_MASK;
1578 req_pages = (len + loff + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; 1555 req_pages = (len + loff + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
1579 nr_pages = min(req_pages, pipe->buffers); 1556 nr_pages = min(req_pages, spd.nr_pages_max);
1580 1557
1581 spd.nr_pages = find_get_pages_contig(mapping, index, 1558 spd.nr_pages = find_get_pages_contig(mapping, index,
1582 nr_pages, spd.pages); 1559 nr_pages, spd.pages);