diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2014-06-12 13:30:18 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2014-06-12 13:30:18 -0400 |
commit | 16b9057804c02e2d351e9c8f606e909b43cbd9e7 (patch) | |
tree | a3ac6e1d9d57a8abf4267e5ead3f2de1309335eb /mm/filemap.c | |
parent | 5c02c392cd2320e8d612376d6b72b6548a680923 (diff) | |
parent | c2338f2dc7c1e9f6202f370c64ffd7f44f3d4b51 (diff) |
Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs
Pull vfs updates from Al Viro:
"This the bunch that sat in -next + lock_parent() fix. This is the
minimal set; there's more pending stuff.
In particular, I really hope to get acct.c fixes merged this cycle -
we need that to deal sanely with delayed-mntput stuff. In the next
pile, hopefully - that series is fairly short and localized
(kernel/acct.c, fs/super.c and fs/namespace.c). In this pile: more
iov_iter work. Most of prereqs for ->splice_write with sane locking
order are there and Kent's dio rewrite would also fit nicely on top of
this pile"
* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs: (70 commits)
lock_parent: don't step on stale ->d_parent of all-but-freed one
kill generic_file_splice_write()
ceph: switch to iter_file_splice_write()
shmem: switch to iter_file_splice_write()
nfs: switch to iter_splice_write_file()
fs/splice.c: remove unneeded exports
ocfs2: switch to iter_file_splice_write()
->splice_write() via ->write_iter()
bio_vec-backed iov_iter
optimize copy_page_{to,from}_iter()
bury generic_file_aio_{read,write}
lustre: get rid of messing with iovecs
ceph: switch to ->write_iter()
ceph_sync_direct_write: stop poking into iov_iter guts
ceph_sync_read: stop poking into iov_iter guts
new helper: copy_page_from_iter()
fuse: switch to ->write_iter()
btrfs: switch to ->write_iter()
ocfs2: switch to ->write_iter()
xfs: switch to ->write_iter()
...
Diffstat (limited to 'mm/filemap.c')
-rw-r--r-- | mm/filemap.c | 158 |
1 files changed, 42 insertions, 116 deletions
diff --git a/mm/filemap.c b/mm/filemap.c index 7fadf1c62838..dafb06f70a09 100644 --- a/mm/filemap.c +++ b/mm/filemap.c | |||
@@ -1665,96 +1665,42 @@ out: | |||
1665 | return written ? written : error; | 1665 | return written ? written : error; |
1666 | } | 1666 | } |
1667 | 1667 | ||
1668 | /* | ||
1669 | * Performs necessary checks before doing a write | ||
1670 | * @iov: io vector request | ||
1671 | * @nr_segs: number of segments in the iovec | ||
1672 | * @count: number of bytes to write | ||
1673 | * @access_flags: type of access: %VERIFY_READ or %VERIFY_WRITE | ||
1674 | * | ||
1675 | * Adjust number of segments and amount of bytes to write (nr_segs should be | ||
1676 | * properly initialized first). Returns appropriate error code that caller | ||
1677 | * should return or zero in case that write should be allowed. | ||
1678 | */ | ||
1679 | int generic_segment_checks(const struct iovec *iov, | ||
1680 | unsigned long *nr_segs, size_t *count, int access_flags) | ||
1681 | { | ||
1682 | unsigned long seg; | ||
1683 | size_t cnt = 0; | ||
1684 | for (seg = 0; seg < *nr_segs; seg++) { | ||
1685 | const struct iovec *iv = &iov[seg]; | ||
1686 | |||
1687 | /* | ||
1688 | * If any segment has a negative length, or the cumulative | ||
1689 | * length ever wraps negative then return -EINVAL. | ||
1690 | */ | ||
1691 | cnt += iv->iov_len; | ||
1692 | if (unlikely((ssize_t)(cnt|iv->iov_len) < 0)) | ||
1693 | return -EINVAL; | ||
1694 | if (access_ok(access_flags, iv->iov_base, iv->iov_len)) | ||
1695 | continue; | ||
1696 | if (seg == 0) | ||
1697 | return -EFAULT; | ||
1698 | *nr_segs = seg; | ||
1699 | cnt -= iv->iov_len; /* This segment is no good */ | ||
1700 | break; | ||
1701 | } | ||
1702 | *count = cnt; | ||
1703 | return 0; | ||
1704 | } | ||
1705 | EXPORT_SYMBOL(generic_segment_checks); | ||
1706 | |||
1707 | /** | 1668 | /** |
1708 | * generic_file_aio_read - generic filesystem read routine | 1669 | * generic_file_read_iter - generic filesystem read routine |
1709 | * @iocb: kernel I/O control block | 1670 | * @iocb: kernel I/O control block |
1710 | * @iov: io vector request | 1671 | * @iter: destination for the data read |
1711 | * @nr_segs: number of segments in the iovec | ||
1712 | * @pos: current file position | ||
1713 | * | 1672 | * |
1714 | * This is the "read()" routine for all filesystems | 1673 | * This is the "read_iter()" routine for all filesystems |
1715 | * that can use the page cache directly. | 1674 | * that can use the page cache directly. |
1716 | */ | 1675 | */ |
1717 | ssize_t | 1676 | ssize_t |
1718 | generic_file_aio_read(struct kiocb *iocb, const struct iovec *iov, | 1677 | generic_file_read_iter(struct kiocb *iocb, struct iov_iter *iter) |
1719 | unsigned long nr_segs, loff_t pos) | ||
1720 | { | 1678 | { |
1721 | struct file *filp = iocb->ki_filp; | 1679 | struct file *file = iocb->ki_filp; |
1722 | ssize_t retval; | 1680 | ssize_t retval = 0; |
1723 | size_t count; | ||
1724 | loff_t *ppos = &iocb->ki_pos; | 1681 | loff_t *ppos = &iocb->ki_pos; |
1725 | struct iov_iter i; | 1682 | loff_t pos = *ppos; |
1726 | |||
1727 | count = 0; | ||
1728 | retval = generic_segment_checks(iov, &nr_segs, &count, VERIFY_WRITE); | ||
1729 | if (retval) | ||
1730 | return retval; | ||
1731 | iov_iter_init(&i, iov, nr_segs, count, 0); | ||
1732 | 1683 | ||
1733 | /* coalesce the iovecs and go direct-to-BIO for O_DIRECT */ | 1684 | /* coalesce the iovecs and go direct-to-BIO for O_DIRECT */ |
1734 | if (filp->f_flags & O_DIRECT) { | 1685 | if (file->f_flags & O_DIRECT) { |
1686 | struct address_space *mapping = file->f_mapping; | ||
1687 | struct inode *inode = mapping->host; | ||
1688 | size_t count = iov_iter_count(iter); | ||
1735 | loff_t size; | 1689 | loff_t size; |
1736 | struct address_space *mapping; | ||
1737 | struct inode *inode; | ||
1738 | 1690 | ||
1739 | mapping = filp->f_mapping; | ||
1740 | inode = mapping->host; | ||
1741 | if (!count) | 1691 | if (!count) |
1742 | goto out; /* skip atime */ | 1692 | goto out; /* skip atime */ |
1743 | size = i_size_read(inode); | 1693 | size = i_size_read(inode); |
1744 | retval = filemap_write_and_wait_range(mapping, pos, | 1694 | retval = filemap_write_and_wait_range(mapping, pos, |
1745 | pos + iov_length(iov, nr_segs) - 1); | 1695 | pos + count - 1); |
1746 | if (!retval) { | 1696 | if (!retval) { |
1747 | retval = mapping->a_ops->direct_IO(READ, iocb, | 1697 | struct iov_iter data = *iter; |
1748 | iov, pos, nr_segs); | 1698 | retval = mapping->a_ops->direct_IO(READ, iocb, &data, pos); |
1749 | } | 1699 | } |
1700 | |||
1750 | if (retval > 0) { | 1701 | if (retval > 0) { |
1751 | *ppos = pos + retval; | 1702 | *ppos = pos + retval; |
1752 | count -= retval; | 1703 | iov_iter_advance(iter, retval); |
1753 | /* | ||
1754 | * If we did a short DIO read we need to skip the | ||
1755 | * section of the iov that we've already read data into. | ||
1756 | */ | ||
1757 | iov_iter_advance(&i, retval); | ||
1758 | } | 1704 | } |
1759 | 1705 | ||
1760 | /* | 1706 | /* |
@@ -1765,17 +1711,17 @@ generic_file_aio_read(struct kiocb *iocb, const struct iovec *iov, | |||
1765 | * and return. Otherwise fallthrough to buffered io for | 1711 | * and return. Otherwise fallthrough to buffered io for |
1766 | * the rest of the read. | 1712 | * the rest of the read. |
1767 | */ | 1713 | */ |
1768 | if (retval < 0 || !count || *ppos >= size) { | 1714 | if (retval < 0 || !iov_iter_count(iter) || *ppos >= size) { |
1769 | file_accessed(filp); | 1715 | file_accessed(file); |
1770 | goto out; | 1716 | goto out; |
1771 | } | 1717 | } |
1772 | } | 1718 | } |
1773 | 1719 | ||
1774 | retval = do_generic_file_read(filp, ppos, &i, retval); | 1720 | retval = do_generic_file_read(file, ppos, iter, retval); |
1775 | out: | 1721 | out: |
1776 | return retval; | 1722 | return retval; |
1777 | } | 1723 | } |
1778 | EXPORT_SYMBOL(generic_file_aio_read); | 1724 | EXPORT_SYMBOL(generic_file_read_iter); |
1779 | 1725 | ||
1780 | #ifdef CONFIG_MMU | 1726 | #ifdef CONFIG_MMU |
1781 | /** | 1727 | /** |
@@ -2386,9 +2332,7 @@ int pagecache_write_end(struct file *file, struct address_space *mapping, | |||
2386 | EXPORT_SYMBOL(pagecache_write_end); | 2332 | EXPORT_SYMBOL(pagecache_write_end); |
2387 | 2333 | ||
2388 | ssize_t | 2334 | ssize_t |
2389 | generic_file_direct_write(struct kiocb *iocb, const struct iovec *iov, | 2335 | generic_file_direct_write(struct kiocb *iocb, struct iov_iter *from, loff_t pos) |
2390 | unsigned long *nr_segs, loff_t pos, | ||
2391 | size_t count, size_t ocount) | ||
2392 | { | 2336 | { |
2393 | struct file *file = iocb->ki_filp; | 2337 | struct file *file = iocb->ki_filp; |
2394 | struct address_space *mapping = file->f_mapping; | 2338 | struct address_space *mapping = file->f_mapping; |
@@ -2396,11 +2340,9 @@ generic_file_direct_write(struct kiocb *iocb, const struct iovec *iov, | |||
2396 | ssize_t written; | 2340 | ssize_t written; |
2397 | size_t write_len; | 2341 | size_t write_len; |
2398 | pgoff_t end; | 2342 | pgoff_t end; |
2343 | struct iov_iter data; | ||
2399 | 2344 | ||
2400 | if (count != ocount) | 2345 | write_len = iov_iter_count(from); |
2401 | *nr_segs = iov_shorten((struct iovec *)iov, *nr_segs, count); | ||
2402 | |||
2403 | write_len = iov_length(iov, *nr_segs); | ||
2404 | end = (pos + write_len - 1) >> PAGE_CACHE_SHIFT; | 2346 | end = (pos + write_len - 1) >> PAGE_CACHE_SHIFT; |
2405 | 2347 | ||
2406 | written = filemap_write_and_wait_range(mapping, pos, pos + write_len - 1); | 2348 | written = filemap_write_and_wait_range(mapping, pos, pos + write_len - 1); |
@@ -2427,7 +2369,8 @@ generic_file_direct_write(struct kiocb *iocb, const struct iovec *iov, | |||
2427 | } | 2369 | } |
2428 | } | 2370 | } |
2429 | 2371 | ||
2430 | written = mapping->a_ops->direct_IO(WRITE, iocb, iov, pos, *nr_segs); | 2372 | data = *from; |
2373 | written = mapping->a_ops->direct_IO(WRITE, iocb, &data, pos); | ||
2431 | 2374 | ||
2432 | /* | 2375 | /* |
2433 | * Finally, try again to invalidate clean pages which might have been | 2376 | * Finally, try again to invalidate clean pages which might have been |
@@ -2444,6 +2387,7 @@ generic_file_direct_write(struct kiocb *iocb, const struct iovec *iov, | |||
2444 | 2387 | ||
2445 | if (written > 0) { | 2388 | if (written > 0) { |
2446 | pos += written; | 2389 | pos += written; |
2390 | iov_iter_advance(from, written); | ||
2447 | if (pos > i_size_read(inode) && !S_ISBLK(inode->i_mode)) { | 2391 | if (pos > i_size_read(inode) && !S_ISBLK(inode->i_mode)) { |
2448 | i_size_write(inode, pos); | 2392 | i_size_write(inode, pos); |
2449 | mark_inode_dirty(inode); | 2393 | mark_inode_dirty(inode); |
@@ -2568,10 +2512,9 @@ again: | |||
2568 | EXPORT_SYMBOL(generic_perform_write); | 2512 | EXPORT_SYMBOL(generic_perform_write); |
2569 | 2513 | ||
2570 | /** | 2514 | /** |
2571 | * __generic_file_aio_write - write data to a file | 2515 | * __generic_file_write_iter - write data to a file |
2572 | * @iocb: IO state structure (file, offset, etc.) | 2516 | * @iocb: IO state structure (file, offset, etc.) |
2573 | * @iov: vector with data to write | 2517 | * @from: iov_iter with data to write |
2574 | * @nr_segs: number of segments in the vector | ||
2575 | * | 2518 | * |
2576 | * This function does all the work needed for actually writing data to a | 2519 | * This function does all the work needed for actually writing data to a |
2577 | * file. It does all basic checks, removes SUID from the file, updates | 2520 | * file. It does all basic checks, removes SUID from the file, updates |
@@ -2585,26 +2528,16 @@ EXPORT_SYMBOL(generic_perform_write); | |||
2585 | * A caller has to handle it. This is mainly due to the fact that we want to | 2528 | * A caller has to handle it. This is mainly due to the fact that we want to |
2586 | * avoid syncing under i_mutex. | 2529 | * avoid syncing under i_mutex. |
2587 | */ | 2530 | */ |
2588 | ssize_t __generic_file_aio_write(struct kiocb *iocb, const struct iovec *iov, | 2531 | ssize_t __generic_file_write_iter(struct kiocb *iocb, struct iov_iter *from) |
2589 | unsigned long nr_segs) | ||
2590 | { | 2532 | { |
2591 | struct file *file = iocb->ki_filp; | 2533 | struct file *file = iocb->ki_filp; |
2592 | struct address_space * mapping = file->f_mapping; | 2534 | struct address_space * mapping = file->f_mapping; |
2593 | size_t ocount; /* original count */ | ||
2594 | size_t count; /* after file limit checks */ | ||
2595 | struct inode *inode = mapping->host; | 2535 | struct inode *inode = mapping->host; |
2596 | loff_t pos = iocb->ki_pos; | 2536 | loff_t pos = iocb->ki_pos; |
2597 | ssize_t written = 0; | 2537 | ssize_t written = 0; |
2598 | ssize_t err; | 2538 | ssize_t err; |
2599 | ssize_t status; | 2539 | ssize_t status; |
2600 | struct iov_iter from; | 2540 | size_t count = iov_iter_count(from); |
2601 | |||
2602 | ocount = 0; | ||
2603 | err = generic_segment_checks(iov, &nr_segs, &ocount, VERIFY_READ); | ||
2604 | if (err) | ||
2605 | return err; | ||
2606 | |||
2607 | count = ocount; | ||
2608 | 2541 | ||
2609 | /* We can write back this queue in page reclaim */ | 2542 | /* We can write back this queue in page reclaim */ |
2610 | current->backing_dev_info = mapping->backing_dev_info; | 2543 | current->backing_dev_info = mapping->backing_dev_info; |
@@ -2615,6 +2548,8 @@ ssize_t __generic_file_aio_write(struct kiocb *iocb, const struct iovec *iov, | |||
2615 | if (count == 0) | 2548 | if (count == 0) |
2616 | goto out; | 2549 | goto out; |
2617 | 2550 | ||
2551 | iov_iter_truncate(from, count); | ||
2552 | |||
2618 | err = file_remove_suid(file); | 2553 | err = file_remove_suid(file); |
2619 | if (err) | 2554 | if (err) |
2620 | goto out; | 2555 | goto out; |
@@ -2623,17 +2558,13 @@ ssize_t __generic_file_aio_write(struct kiocb *iocb, const struct iovec *iov, | |||
2623 | if (err) | 2558 | if (err) |
2624 | goto out; | 2559 | goto out; |
2625 | 2560 | ||
2626 | iov_iter_init(&from, iov, nr_segs, count, 0); | ||
2627 | |||
2628 | /* coalesce the iovecs and go direct-to-BIO for O_DIRECT */ | 2561 | /* coalesce the iovecs and go direct-to-BIO for O_DIRECT */ |
2629 | if (unlikely(file->f_flags & O_DIRECT)) { | 2562 | if (unlikely(file->f_flags & O_DIRECT)) { |
2630 | loff_t endbyte; | 2563 | loff_t endbyte; |
2631 | 2564 | ||
2632 | written = generic_file_direct_write(iocb, iov, &from.nr_segs, pos, | 2565 | written = generic_file_direct_write(iocb, from, pos); |
2633 | count, ocount); | ||
2634 | if (written < 0 || written == count) | 2566 | if (written < 0 || written == count) |
2635 | goto out; | 2567 | goto out; |
2636 | iov_iter_advance(&from, written); | ||
2637 | 2568 | ||
2638 | /* | 2569 | /* |
2639 | * direct-io write to a hole: fall through to buffered I/O | 2570 | * direct-io write to a hole: fall through to buffered I/O |
@@ -2642,7 +2573,7 @@ ssize_t __generic_file_aio_write(struct kiocb *iocb, const struct iovec *iov, | |||
2642 | pos += written; | 2573 | pos += written; |
2643 | count -= written; | 2574 | count -= written; |
2644 | 2575 | ||
2645 | status = generic_perform_write(file, &from, pos); | 2576 | status = generic_perform_write(file, from, pos); |
2646 | /* | 2577 | /* |
2647 | * If generic_perform_write() returned a synchronous error | 2578 | * If generic_perform_write() returned a synchronous error |
2648 | * then we want to return the number of bytes which were | 2579 | * then we want to return the number of bytes which were |
@@ -2674,7 +2605,7 @@ ssize_t __generic_file_aio_write(struct kiocb *iocb, const struct iovec *iov, | |||
2674 | */ | 2605 | */ |
2675 | } | 2606 | } |
2676 | } else { | 2607 | } else { |
2677 | written = generic_perform_write(file, &from, pos); | 2608 | written = generic_perform_write(file, from, pos); |
2678 | if (likely(written >= 0)) | 2609 | if (likely(written >= 0)) |
2679 | iocb->ki_pos = pos + written; | 2610 | iocb->ki_pos = pos + written; |
2680 | } | 2611 | } |
@@ -2682,30 +2613,25 @@ out: | |||
2682 | current->backing_dev_info = NULL; | 2613 | current->backing_dev_info = NULL; |
2683 | return written ? written : err; | 2614 | return written ? written : err; |
2684 | } | 2615 | } |
2685 | EXPORT_SYMBOL(__generic_file_aio_write); | 2616 | EXPORT_SYMBOL(__generic_file_write_iter); |
2686 | 2617 | ||
2687 | /** | 2618 | /** |
2688 | * generic_file_aio_write - write data to a file | 2619 | * generic_file_write_iter - write data to a file |
2689 | * @iocb: IO state structure | 2620 | * @iocb: IO state structure |
2690 | * @iov: vector with data to write | 2621 | * @from: iov_iter with data to write |
2691 | * @nr_segs: number of segments in the vector | ||
2692 | * @pos: position in file where to write | ||
2693 | * | 2622 | * |
2694 | * This is a wrapper around __generic_file_aio_write() to be used by most | 2623 | * This is a wrapper around __generic_file_write_iter() to be used by most |
2695 | * filesystems. It takes care of syncing the file in case of O_SYNC file | 2624 | * filesystems. It takes care of syncing the file in case of O_SYNC file |
2696 | * and acquires i_mutex as needed. | 2625 | * and acquires i_mutex as needed. |
2697 | */ | 2626 | */ |
2698 | ssize_t generic_file_aio_write(struct kiocb *iocb, const struct iovec *iov, | 2627 | ssize_t generic_file_write_iter(struct kiocb *iocb, struct iov_iter *from) |
2699 | unsigned long nr_segs, loff_t pos) | ||
2700 | { | 2628 | { |
2701 | struct file *file = iocb->ki_filp; | 2629 | struct file *file = iocb->ki_filp; |
2702 | struct inode *inode = file->f_mapping->host; | 2630 | struct inode *inode = file->f_mapping->host; |
2703 | ssize_t ret; | 2631 | ssize_t ret; |
2704 | 2632 | ||
2705 | BUG_ON(iocb->ki_pos != pos); | ||
2706 | |||
2707 | mutex_lock(&inode->i_mutex); | 2633 | mutex_lock(&inode->i_mutex); |
2708 | ret = __generic_file_aio_write(iocb, iov, nr_segs); | 2634 | ret = __generic_file_write_iter(iocb, from); |
2709 | mutex_unlock(&inode->i_mutex); | 2635 | mutex_unlock(&inode->i_mutex); |
2710 | 2636 | ||
2711 | if (ret > 0) { | 2637 | if (ret > 0) { |
@@ -2717,7 +2643,7 @@ ssize_t generic_file_aio_write(struct kiocb *iocb, const struct iovec *iov, | |||
2717 | } | 2643 | } |
2718 | return ret; | 2644 | return ret; |
2719 | } | 2645 | } |
2720 | EXPORT_SYMBOL(generic_file_aio_write); | 2646 | EXPORT_SYMBOL(generic_file_write_iter); |
2721 | 2647 | ||
2722 | /** | 2648 | /** |
2723 | * try_to_release_page() - release old fs-specific metadata on a page | 2649 | * try_to_release_page() - release old fs-specific metadata on a page |