diff options
author | Nick Piggin <npiggin@suse.de> | 2007-10-16 04:24:59 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@woody.linux-foundation.org> | 2007-10-16 12:42:55 -0400 |
commit | 2f718ffc16c43a435d12919c75dbfad518abd056 (patch) | |
tree | 55588cb2815d844e9d0b2404cf8ceafe98b5c55d | |
parent | 08291429cfa6258c4cd95d8833beb40f828b194e (diff) |
mm: buffered write iterator
Add an iterator data structure to operate over an iovec. Add usercopy
operators needed by generic_file_buffered_write, and convert that function
over.
Signed-off-by: Nick Piggin <npiggin@suse.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r-- | include/linux/fs.h | 33 | ||||
-rw-r--r-- | mm/filemap.c | 144 | ||||
-rw-r--r-- | mm/filemap.h | 103 |
3 files changed, 150 insertions, 130 deletions
diff --git a/include/linux/fs.h b/include/linux/fs.h index 500ffc0e4ac7..86ce27c72554 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h | |||
@@ -401,6 +401,39 @@ struct page; | |||
401 | struct address_space; | 401 | struct address_space; |
402 | struct writeback_control; | 402 | struct writeback_control; |
403 | 403 | ||
404 | struct iov_iter { | ||
405 | const struct iovec *iov; | ||
406 | unsigned long nr_segs; | ||
407 | size_t iov_offset; | ||
408 | size_t count; | ||
409 | }; | ||
410 | |||
411 | size_t iov_iter_copy_from_user_atomic(struct page *page, | ||
412 | struct iov_iter *i, unsigned long offset, size_t bytes); | ||
413 | size_t iov_iter_copy_from_user(struct page *page, | ||
414 | struct iov_iter *i, unsigned long offset, size_t bytes); | ||
415 | void iov_iter_advance(struct iov_iter *i, size_t bytes); | ||
416 | int iov_iter_fault_in_readable(struct iov_iter *i); | ||
417 | size_t iov_iter_single_seg_count(struct iov_iter *i); | ||
418 | |||
419 | static inline void iov_iter_init(struct iov_iter *i, | ||
420 | const struct iovec *iov, unsigned long nr_segs, | ||
421 | size_t count, size_t written) | ||
422 | { | ||
423 | i->iov = iov; | ||
424 | i->nr_segs = nr_segs; | ||
425 | i->iov_offset = 0; | ||
426 | i->count = count + written; | ||
427 | |||
428 | iov_iter_advance(i, written); | ||
429 | } | ||
430 | |||
431 | static inline size_t iov_iter_count(struct iov_iter *i) | ||
432 | { | ||
433 | return i->count; | ||
434 | } | ||
435 | |||
436 | |||
404 | struct address_space_operations { | 437 | struct address_space_operations { |
405 | int (*writepage)(struct page *page, struct writeback_control *wbc); | 438 | int (*writepage)(struct page *page, struct writeback_control *wbc); |
406 | int (*readpage)(struct file *, struct page *); | 439 | int (*readpage)(struct file *, struct page *); |
diff --git a/mm/filemap.c b/mm/filemap.c index 557fd887254f..67a03a0a9aee 100644 --- a/mm/filemap.c +++ b/mm/filemap.c | |||
@@ -30,7 +30,7 @@ | |||
30 | #include <linux/security.h> | 30 | #include <linux/security.h> |
31 | #include <linux/syscalls.h> | 31 | #include <linux/syscalls.h> |
32 | #include <linux/cpuset.h> | 32 | #include <linux/cpuset.h> |
33 | #include "filemap.h" | 33 | #include <linux/hardirq.h> /* for BUG_ON(!in_atomic()) only */ |
34 | #include "internal.h" | 34 | #include "internal.h" |
35 | 35 | ||
36 | /* | 36 | /* |
@@ -1635,8 +1635,7 @@ int remove_suid(struct dentry *dentry) | |||
1635 | } | 1635 | } |
1636 | EXPORT_SYMBOL(remove_suid); | 1636 | EXPORT_SYMBOL(remove_suid); |
1637 | 1637 | ||
1638 | size_t | 1638 | static size_t __iovec_copy_from_user_inatomic(char *vaddr, |
1639 | __filemap_copy_from_user_iovec_inatomic(char *vaddr, | ||
1640 | const struct iovec *iov, size_t base, size_t bytes) | 1639 | const struct iovec *iov, size_t base, size_t bytes) |
1641 | { | 1640 | { |
1642 | size_t copied = 0, left = 0; | 1641 | size_t copied = 0, left = 0; |
@@ -1659,6 +1658,110 @@ __filemap_copy_from_user_iovec_inatomic(char *vaddr, | |||
1659 | } | 1658 | } |
1660 | 1659 | ||
1661 | /* | 1660 | /* |
1661 | * Copy as much as we can into the page and return the number of bytes which | ||
1662 | * were sucessfully copied. If a fault is encountered then return the number of | ||
1663 | * bytes which were copied. | ||
1664 | */ | ||
1665 | size_t iov_iter_copy_from_user_atomic(struct page *page, | ||
1666 | struct iov_iter *i, unsigned long offset, size_t bytes) | ||
1667 | { | ||
1668 | char *kaddr; | ||
1669 | size_t copied; | ||
1670 | |||
1671 | BUG_ON(!in_atomic()); | ||
1672 | kaddr = kmap_atomic(page, KM_USER0); | ||
1673 | if (likely(i->nr_segs == 1)) { | ||
1674 | int left; | ||
1675 | char __user *buf = i->iov->iov_base + i->iov_offset; | ||
1676 | left = __copy_from_user_inatomic_nocache(kaddr + offset, | ||
1677 | buf, bytes); | ||
1678 | copied = bytes - left; | ||
1679 | } else { | ||
1680 | copied = __iovec_copy_from_user_inatomic(kaddr + offset, | ||
1681 | i->iov, i->iov_offset, bytes); | ||
1682 | } | ||
1683 | kunmap_atomic(kaddr, KM_USER0); | ||
1684 | |||
1685 | return copied; | ||
1686 | } | ||
1687 | |||
1688 | /* | ||
1689 | * This has the same sideeffects and return value as | ||
1690 | * iov_iter_copy_from_user_atomic(). | ||
1691 | * The difference is that it attempts to resolve faults. | ||
1692 | * Page must not be locked. | ||
1693 | */ | ||
1694 | size_t iov_iter_copy_from_user(struct page *page, | ||
1695 | struct iov_iter *i, unsigned long offset, size_t bytes) | ||
1696 | { | ||
1697 | char *kaddr; | ||
1698 | size_t copied; | ||
1699 | |||
1700 | kaddr = kmap(page); | ||
1701 | if (likely(i->nr_segs == 1)) { | ||
1702 | int left; | ||
1703 | char __user *buf = i->iov->iov_base + i->iov_offset; | ||
1704 | left = __copy_from_user_nocache(kaddr + offset, buf, bytes); | ||
1705 | copied = bytes - left; | ||
1706 | } else { | ||
1707 | copied = __iovec_copy_from_user_inatomic(kaddr + offset, | ||
1708 | i->iov, i->iov_offset, bytes); | ||
1709 | } | ||
1710 | kunmap(page); | ||
1711 | return copied; | ||
1712 | } | ||
1713 | |||
1714 | static void __iov_iter_advance_iov(struct iov_iter *i, size_t bytes) | ||
1715 | { | ||
1716 | if (likely(i->nr_segs == 1)) { | ||
1717 | i->iov_offset += bytes; | ||
1718 | } else { | ||
1719 | const struct iovec *iov = i->iov; | ||
1720 | size_t base = i->iov_offset; | ||
1721 | |||
1722 | while (bytes) { | ||
1723 | int copy = min(bytes, iov->iov_len - base); | ||
1724 | |||
1725 | bytes -= copy; | ||
1726 | base += copy; | ||
1727 | if (iov->iov_len == base) { | ||
1728 | iov++; | ||
1729 | base = 0; | ||
1730 | } | ||
1731 | } | ||
1732 | i->iov = iov; | ||
1733 | i->iov_offset = base; | ||
1734 | } | ||
1735 | } | ||
1736 | |||
1737 | void iov_iter_advance(struct iov_iter *i, size_t bytes) | ||
1738 | { | ||
1739 | BUG_ON(i->count < bytes); | ||
1740 | |||
1741 | __iov_iter_advance_iov(i, bytes); | ||
1742 | i->count -= bytes; | ||
1743 | } | ||
1744 | |||
1745 | int iov_iter_fault_in_readable(struct iov_iter *i) | ||
1746 | { | ||
1747 | size_t seglen = min(i->iov->iov_len - i->iov_offset, i->count); | ||
1748 | char __user *buf = i->iov->iov_base + i->iov_offset; | ||
1749 | return fault_in_pages_readable(buf, seglen); | ||
1750 | } | ||
1751 | |||
1752 | /* | ||
1753 | * Return the count of just the current iov_iter segment. | ||
1754 | */ | ||
1755 | size_t iov_iter_single_seg_count(struct iov_iter *i) | ||
1756 | { | ||
1757 | const struct iovec *iov = i->iov; | ||
1758 | if (i->nr_segs == 1) | ||
1759 | return i->count; | ||
1760 | else | ||
1761 | return min(i->count, iov->iov_len - i->iov_offset); | ||
1762 | } | ||
1763 | |||
1764 | /* | ||
1662 | * Performs necessary checks before doing a write | 1765 | * Performs necessary checks before doing a write |
1663 | * | 1766 | * |
1664 | * Can adjust writing position or amount of bytes to write. | 1767 | * Can adjust writing position or amount of bytes to write. |
@@ -1816,30 +1919,22 @@ generic_file_buffered_write(struct kiocb *iocb, const struct iovec *iov, | |||
1816 | const struct address_space_operations *a_ops = mapping->a_ops; | 1919 | const struct address_space_operations *a_ops = mapping->a_ops; |
1817 | struct inode *inode = mapping->host; | 1920 | struct inode *inode = mapping->host; |
1818 | long status = 0; | 1921 | long status = 0; |
1819 | const struct iovec *cur_iov = iov; /* current iovec */ | 1922 | struct iov_iter i; |
1820 | size_t iov_offset = 0; /* offset in the current iovec */ | ||
1821 | char __user *buf; | ||
1822 | 1923 | ||
1823 | /* | 1924 | iov_iter_init(&i, iov, nr_segs, count, written); |
1824 | * handle partial DIO write. Adjust cur_iov if needed. | ||
1825 | */ | ||
1826 | filemap_set_next_iovec(&cur_iov, nr_segs, &iov_offset, written); | ||
1827 | 1925 | ||
1828 | do { | 1926 | do { |
1829 | struct page *src_page; | 1927 | struct page *src_page; |
1830 | struct page *page; | 1928 | struct page *page; |
1831 | pgoff_t index; /* Pagecache index for current page */ | 1929 | pgoff_t index; /* Pagecache index for current page */ |
1832 | unsigned long offset; /* Offset into pagecache page */ | 1930 | unsigned long offset; /* Offset into pagecache page */ |
1833 | unsigned long seglen; /* Bytes remaining in current iovec */ | ||
1834 | unsigned long bytes; /* Bytes to write to page */ | 1931 | unsigned long bytes; /* Bytes to write to page */ |
1835 | size_t copied; /* Bytes copied from user */ | 1932 | size_t copied; /* Bytes copied from user */ |
1836 | 1933 | ||
1837 | buf = cur_iov->iov_base + iov_offset; | ||
1838 | offset = (pos & (PAGE_CACHE_SIZE - 1)); | 1934 | offset = (pos & (PAGE_CACHE_SIZE - 1)); |
1839 | index = pos >> PAGE_CACHE_SHIFT; | 1935 | index = pos >> PAGE_CACHE_SHIFT; |
1840 | bytes = PAGE_CACHE_SIZE - offset; | 1936 | bytes = min_t(unsigned long, PAGE_CACHE_SIZE - offset, |
1841 | if (bytes > count) | 1937 | iov_iter_count(&i)); |
1842 | bytes = count; | ||
1843 | 1938 | ||
1844 | /* | 1939 | /* |
1845 | * a non-NULL src_page indicates that we're doing the | 1940 | * a non-NULL src_page indicates that we're doing the |
@@ -1847,10 +1942,6 @@ generic_file_buffered_write(struct kiocb *iocb, const struct iovec *iov, | |||
1847 | */ | 1942 | */ |
1848 | src_page = NULL; | 1943 | src_page = NULL; |
1849 | 1944 | ||
1850 | seglen = cur_iov->iov_len - iov_offset; | ||
1851 | if (seglen > bytes) | ||
1852 | seglen = bytes; | ||
1853 | |||
1854 | /* | 1945 | /* |
1855 | * Bring in the user page that we will copy from _first_. | 1946 | * Bring in the user page that we will copy from _first_. |
1856 | * Otherwise there's a nasty deadlock on copying from the | 1947 | * Otherwise there's a nasty deadlock on copying from the |
@@ -1861,7 +1952,7 @@ generic_file_buffered_write(struct kiocb *iocb, const struct iovec *iov, | |||
1861 | * to check that the address is actually valid, when atomic | 1952 | * to check that the address is actually valid, when atomic |
1862 | * usercopies are used, below. | 1953 | * usercopies are used, below. |
1863 | */ | 1954 | */ |
1864 | if (unlikely(fault_in_pages_readable(buf, seglen))) { | 1955 | if (unlikely(iov_iter_fault_in_readable(&i))) { |
1865 | status = -EFAULT; | 1956 | status = -EFAULT; |
1866 | break; | 1957 | break; |
1867 | } | 1958 | } |
@@ -1892,8 +1983,8 @@ generic_file_buffered_write(struct kiocb *iocb, const struct iovec *iov, | |||
1892 | * same reason as we can't take a page fault with a | 1983 | * same reason as we can't take a page fault with a |
1893 | * page locked (as explained below). | 1984 | * page locked (as explained below). |
1894 | */ | 1985 | */ |
1895 | copied = filemap_copy_from_user(src_page, offset, | 1986 | copied = iov_iter_copy_from_user(src_page, &i, |
1896 | cur_iov, nr_segs, iov_offset, bytes); | 1987 | offset, bytes); |
1897 | if (unlikely(copied == 0)) { | 1988 | if (unlikely(copied == 0)) { |
1898 | status = -EFAULT; | 1989 | status = -EFAULT; |
1899 | page_cache_release(page); | 1990 | page_cache_release(page); |
@@ -1939,8 +2030,8 @@ generic_file_buffered_write(struct kiocb *iocb, const struct iovec *iov, | |||
1939 | * really matter. | 2030 | * really matter. |
1940 | */ | 2031 | */ |
1941 | pagefault_disable(); | 2032 | pagefault_disable(); |
1942 | copied = filemap_copy_from_user_atomic(page, offset, | 2033 | copied = iov_iter_copy_from_user_atomic(page, &i, |
1943 | cur_iov, nr_segs, iov_offset, bytes); | 2034 | offset, bytes); |
1944 | pagefault_enable(); | 2035 | pagefault_enable(); |
1945 | } else { | 2036 | } else { |
1946 | void *src, *dst; | 2037 | void *src, *dst; |
@@ -1965,10 +2056,9 @@ generic_file_buffered_write(struct kiocb *iocb, const struct iovec *iov, | |||
1965 | if (src_page) | 2056 | if (src_page) |
1966 | page_cache_release(src_page); | 2057 | page_cache_release(src_page); |
1967 | 2058 | ||
2059 | iov_iter_advance(&i, copied); | ||
1968 | written += copied; | 2060 | written += copied; |
1969 | count -= copied; | ||
1970 | pos += copied; | 2061 | pos += copied; |
1971 | filemap_set_next_iovec(&cur_iov, nr_segs, &iov_offset, copied); | ||
1972 | 2062 | ||
1973 | balance_dirty_pages_ratelimited(mapping); | 2063 | balance_dirty_pages_ratelimited(mapping); |
1974 | cond_resched(); | 2064 | cond_resched(); |
@@ -1992,7 +2082,7 @@ fs_write_aop_error: | |||
1992 | continue; | 2082 | continue; |
1993 | else | 2083 | else |
1994 | break; | 2084 | break; |
1995 | } while (count); | 2085 | } while (iov_iter_count(&i)); |
1996 | *ppos = pos; | 2086 | *ppos = pos; |
1997 | 2087 | ||
1998 | /* | 2088 | /* |
diff --git a/mm/filemap.h b/mm/filemap.h deleted file mode 100644 index b500d936cec5..000000000000 --- a/mm/filemap.h +++ /dev/null | |||
@@ -1,103 +0,0 @@ | |||
1 | /* | ||
2 | * linux/mm/filemap.h | ||
3 | * | ||
4 | * Copyright (C) 1994-1999 Linus Torvalds | ||
5 | */ | ||
6 | |||
7 | #ifndef __FILEMAP_H | ||
8 | #define __FILEMAP_H | ||
9 | |||
10 | #include <linux/types.h> | ||
11 | #include <linux/fs.h> | ||
12 | #include <linux/mm.h> | ||
13 | #include <linux/highmem.h> | ||
14 | #include <linux/uio.h> | ||
15 | #include <linux/uaccess.h> | ||
16 | |||
17 | size_t | ||
18 | __filemap_copy_from_user_iovec_inatomic(char *vaddr, | ||
19 | const struct iovec *iov, | ||
20 | size_t base, | ||
21 | size_t bytes); | ||
22 | |||
23 | /* | ||
24 | * Copy as much as we can into the page and return the number of bytes which | ||
25 | * were sucessfully copied. If a fault is encountered then return the number of | ||
26 | * bytes which were copied. | ||
27 | */ | ||
28 | static inline size_t | ||
29 | filemap_copy_from_user_atomic(struct page *page, unsigned long offset, | ||
30 | const struct iovec *iov, unsigned long nr_segs, | ||
31 | size_t base, size_t bytes) | ||
32 | { | ||
33 | char *kaddr; | ||
34 | size_t copied; | ||
35 | |||
36 | kaddr = kmap_atomic(page, KM_USER0); | ||
37 | if (likely(nr_segs == 1)) { | ||
38 | int left; | ||
39 | char __user *buf = iov->iov_base + base; | ||
40 | left = __copy_from_user_inatomic_nocache(kaddr + offset, | ||
41 | buf, bytes); | ||
42 | copied = bytes - left; | ||
43 | } else { | ||
44 | copied = __filemap_copy_from_user_iovec_inatomic(kaddr + offset, | ||
45 | iov, base, bytes); | ||
46 | } | ||
47 | kunmap_atomic(kaddr, KM_USER0); | ||
48 | |||
49 | return copied; | ||
50 | } | ||
51 | |||
52 | /* | ||
53 | * This has the same sideeffects and return value as | ||
54 | * filemap_copy_from_user_atomic(). | ||
55 | * The difference is that it attempts to resolve faults. | ||
56 | */ | ||
57 | static inline size_t | ||
58 | filemap_copy_from_user(struct page *page, unsigned long offset, | ||
59 | const struct iovec *iov, unsigned long nr_segs, | ||
60 | size_t base, size_t bytes) | ||
61 | { | ||
62 | char *kaddr; | ||
63 | size_t copied; | ||
64 | |||
65 | kaddr = kmap(page); | ||
66 | if (likely(nr_segs == 1)) { | ||
67 | int left; | ||
68 | char __user *buf = iov->iov_base + base; | ||
69 | left = __copy_from_user_nocache(kaddr + offset, buf, bytes); | ||
70 | copied = bytes - left; | ||
71 | } else { | ||
72 | copied = __filemap_copy_from_user_iovec_inatomic(kaddr + offset, | ||
73 | iov, base, bytes); | ||
74 | } | ||
75 | kunmap(page); | ||
76 | return copied; | ||
77 | } | ||
78 | |||
79 | static inline void | ||
80 | filemap_set_next_iovec(const struct iovec **iovp, unsigned long nr_segs, | ||
81 | size_t *basep, size_t bytes) | ||
82 | { | ||
83 | if (likely(nr_segs == 1)) { | ||
84 | *basep += bytes; | ||
85 | } else { | ||
86 | const struct iovec *iov = *iovp; | ||
87 | size_t base = *basep; | ||
88 | |||
89 | while (bytes) { | ||
90 | int copy = min(bytes, iov->iov_len - base); | ||
91 | |||
92 | bytes -= copy; | ||
93 | base += copy; | ||
94 | if (iov->iov_len == base) { | ||
95 | iov++; | ||
96 | base = 0; | ||
97 | } | ||
98 | } | ||
99 | *iovp = iov; | ||
100 | *basep = base; | ||
101 | } | ||
102 | } | ||
103 | #endif | ||