diff options
| -rw-r--r-- | Documentation/filesystems/Locking | 12 | ||||
| -rw-r--r-- | Documentation/filesystems/vfs.txt | 39 | ||||
| -rw-r--r-- | drivers/block/loop.c | 5 | ||||
| -rw-r--r-- | fs/fat/inode.c | 2 | ||||
| -rw-r--r-- | fs/libfs.c | 2 | ||||
| -rw-r--r-- | fs/ocfs2/file.c | 3 | ||||
| -rw-r--r-- | fs/splice.c | 4 | ||||
| -rw-r--r-- | include/linux/fs.h | 7 | ||||
| -rw-r--r-- | mm/filemap.c | 242 |
9 files changed, 23 insertions, 293 deletions
diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking index 8362860e21a7..23d2f4460deb 100644 --- a/Documentation/filesystems/Locking +++ b/Documentation/filesystems/Locking | |||
| @@ -161,8 +161,12 @@ prototypes: | |||
| 161 | int (*set_page_dirty)(struct page *page); | 161 | int (*set_page_dirty)(struct page *page); |
| 162 | int (*readpages)(struct file *filp, struct address_space *mapping, | 162 | int (*readpages)(struct file *filp, struct address_space *mapping, |
| 163 | struct list_head *pages, unsigned nr_pages); | 163 | struct list_head *pages, unsigned nr_pages); |
| 164 | int (*prepare_write)(struct file *, struct page *, unsigned, unsigned); | 164 | int (*write_begin)(struct file *, struct address_space *mapping, |
| 165 | int (*commit_write)(struct file *, struct page *, unsigned, unsigned); | 165 | loff_t pos, unsigned len, unsigned flags, |
| 166 | struct page **pagep, void **fsdata); | ||
| 167 | int (*write_end)(struct file *, struct address_space *mapping, | ||
| 168 | loff_t pos, unsigned len, unsigned copied, | ||
| 169 | struct page *page, void *fsdata); | ||
| 166 | sector_t (*bmap)(struct address_space *, sector_t); | 170 | sector_t (*bmap)(struct address_space *, sector_t); |
| 167 | int (*invalidatepage) (struct page *, unsigned long); | 171 | int (*invalidatepage) (struct page *, unsigned long); |
| 168 | int (*releasepage) (struct page *, int); | 172 | int (*releasepage) (struct page *, int); |
| @@ -180,8 +184,6 @@ sync_page: no maybe | |||
| 180 | writepages: no | 184 | writepages: no |
| 181 | set_page_dirty no no | 185 | set_page_dirty no no |
| 182 | readpages: no | 186 | readpages: no |
| 183 | prepare_write: no yes yes | ||
| 184 | commit_write: no yes yes | ||
| 185 | write_begin: no locks the page yes | 187 | write_begin: no locks the page yes |
| 186 | write_end: no yes, unlocks yes | 188 | write_end: no yes, unlocks yes |
| 187 | perform_write: no n/a yes | 189 | perform_write: no n/a yes |
| @@ -191,7 +193,7 @@ releasepage: no yes | |||
| 191 | direct_IO: no | 193 | direct_IO: no |
| 192 | launder_page: no yes | 194 | launder_page: no yes |
| 193 | 195 | ||
| 194 | ->prepare_write(), ->commit_write(), ->sync_page() and ->readpage() | 196 | ->write_begin(), ->write_end(), ->sync_page() and ->readpage() |
| 195 | may be called from the request handler (/dev/loop). | 197 | may be called from the request handler (/dev/loop). |
| 196 | 198 | ||
| 197 | ->readpage() unlocks the page, either synchronously or via I/O | 199 | ->readpage() unlocks the page, either synchronously or via I/O |
diff --git a/Documentation/filesystems/vfs.txt b/Documentation/filesystems/vfs.txt index c4d348dabe94..5579bda58a6d 100644 --- a/Documentation/filesystems/vfs.txt +++ b/Documentation/filesystems/vfs.txt | |||
| @@ -492,7 +492,7 @@ written-back to storage typically in whole pages, however the | |||
| 492 | address_space has finer control of write sizes. | 492 | address_space has finer control of write sizes. |
| 493 | 493 | ||
| 494 | The read process essentially only requires 'readpage'. The write | 494 | The read process essentially only requires 'readpage'. The write |
| 495 | process is more complicated and uses prepare_write/commit_write or | 495 | process is more complicated and uses write_begin/write_end or |
| 496 | set_page_dirty to write data into the address_space, and writepage, | 496 | set_page_dirty to write data into the address_space, and writepage, |
| 497 | sync_page, and writepages to writeback data to storage. | 497 | sync_page, and writepages to writeback data to storage. |
| 498 | 498 | ||
| @@ -521,8 +521,6 @@ struct address_space_operations { | |||
| 521 | int (*set_page_dirty)(struct page *page); | 521 | int (*set_page_dirty)(struct page *page); |
| 522 | int (*readpages)(struct file *filp, struct address_space *mapping, | 522 | int (*readpages)(struct file *filp, struct address_space *mapping, |
| 523 | struct list_head *pages, unsigned nr_pages); | 523 | struct list_head *pages, unsigned nr_pages); |
| 524 | int (*prepare_write)(struct file *, struct page *, unsigned, unsigned); | ||
| 525 | int (*commit_write)(struct file *, struct page *, unsigned, unsigned); | ||
| 526 | int (*write_begin)(struct file *, struct address_space *mapping, | 524 | int (*write_begin)(struct file *, struct address_space *mapping, |
| 527 | loff_t pos, unsigned len, unsigned flags, | 525 | loff_t pos, unsigned len, unsigned flags, |
| 528 | struct page **pagep, void **fsdata); | 526 | struct page **pagep, void **fsdata); |
| @@ -598,37 +596,7 @@ struct address_space_operations { | |||
| 598 | readpages is only used for read-ahead, so read errors are | 596 | readpages is only used for read-ahead, so read errors are |
| 599 | ignored. If anything goes wrong, feel free to give up. | 597 | ignored. If anything goes wrong, feel free to give up. |
| 600 | 598 | ||
| 601 | prepare_write: called by the generic write path in VM to set up a write | 599 | write_begin: |
| 602 | request for a page. This indicates to the address space that | ||
| 603 | the given range of bytes is about to be written. The | ||
| 604 | address_space should check that the write will be able to | ||
| 605 | complete, by allocating space if necessary and doing any other | ||
| 606 | internal housekeeping. If the write will update parts of | ||
| 607 | any basic-blocks on storage, then those blocks should be | ||
| 608 | pre-read (if they haven't been read already) so that the | ||
| 609 | updated blocks can be written out properly. | ||
| 610 | The page will be locked. | ||
| 611 | |||
| 612 | Note: the page _must not_ be marked uptodate in this function | ||
| 613 | (or anywhere else) unless it actually is uptodate right now. As | ||
| 614 | soon as a page is marked uptodate, it is possible for a concurrent | ||
| 615 | read(2) to copy it to userspace. | ||
| 616 | |||
| 617 | commit_write: If prepare_write succeeds, new data will be copied | ||
| 618 | into the page and then commit_write will be called. It will | ||
| 619 | typically update the size of the file (if appropriate) and | ||
| 620 | mark the inode as dirty, and do any other related housekeeping | ||
| 621 | operations. It should avoid returning an error if possible - | ||
| 622 | errors should have been handled by prepare_write. | ||
| 623 | |||
| 624 | write_begin: This is intended as a replacement for prepare_write. The | ||
| 625 | key differences being that: | ||
| 626 | - it returns a locked page (in *pagep) rather than being | ||
| 627 | given a pre locked page; | ||
| 628 | - it must be able to cope with short writes (where the | ||
| 629 | length passed to write_begin is greater than the number | ||
| 630 | of bytes copied into the page). | ||
| 631 | |||
| 632 | Called by the generic buffered write code to ask the filesystem to | 600 | Called by the generic buffered write code to ask the filesystem to |
| 633 | prepare to write len bytes at the given offset in the file. The | 601 | prepare to write len bytes at the given offset in the file. The |
| 634 | address_space should check that the write will be able to complete, | 602 | address_space should check that the write will be able to complete, |
| @@ -640,6 +608,9 @@ struct address_space_operations { | |||
| 640 | The filesystem must return the locked pagecache page for the specified | 608 | The filesystem must return the locked pagecache page for the specified |
| 641 | offset, in *pagep, for the caller to write into. | 609 | offset, in *pagep, for the caller to write into. |
| 642 | 610 | ||
| 611 | It must be able to cope with short writes (where the length passed to | ||
| 612 | write_begin is greater than the number of bytes copied into the page). | ||
| 613 | |||
| 643 | flags is a field for AOP_FLAG_xxx flags, described in | 614 | flags is a field for AOP_FLAG_xxx flags, described in |
| 644 | include/linux/fs.h. | 615 | include/linux/fs.h. |
| 645 | 616 | ||
diff --git a/drivers/block/loop.c b/drivers/block/loop.c index 3f09cd8bcc38..5c4ee70d5cf3 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c | |||
| @@ -40,8 +40,7 @@ | |||
| 40 | * Heinz Mauelshagen <mge@sistina.com>, Feb 2002 | 40 | * Heinz Mauelshagen <mge@sistina.com>, Feb 2002 |
| 41 | * | 41 | * |
| 42 | * Support for falling back on the write file operation when the address space | 42 | * Support for falling back on the write file operation when the address space |
| 43 | * operations prepare_write and/or commit_write are not available on the | 43 | * operations write_begin is not available on the backing filesystem. |
| 44 | * backing filesystem. | ||
| 45 | * Anton Altaparmakov, 16 Feb 2005 | 44 | * Anton Altaparmakov, 16 Feb 2005 |
| 46 | * | 45 | * |
| 47 | * Still To Fix: | 46 | * Still To Fix: |
| @@ -765,7 +764,7 @@ static int loop_set_fd(struct loop_device *lo, fmode_t mode, | |||
| 765 | */ | 764 | */ |
| 766 | if (!file->f_op->splice_read) | 765 | if (!file->f_op->splice_read) |
| 767 | goto out_putf; | 766 | goto out_putf; |
| 768 | if (aops->prepare_write || aops->write_begin) | 767 | if (aops->write_begin) |
| 769 | lo_flags |= LO_FLAGS_USE_AOPS; | 768 | lo_flags |= LO_FLAGS_USE_AOPS; |
| 770 | if (!(lo_flags & LO_FLAGS_USE_AOPS) && !file->f_op->write) | 769 | if (!(lo_flags & LO_FLAGS_USE_AOPS) && !file->f_op->write) |
| 771 | lo_flags |= LO_FLAGS_READ_ONLY; | 770 | lo_flags |= LO_FLAGS_READ_ONLY; |
diff --git a/fs/fat/inode.c b/fs/fat/inode.c index 19eafbe3c379..2b2eec1283bf 100644 --- a/fs/fat/inode.c +++ b/fs/fat/inode.c | |||
| @@ -175,7 +175,7 @@ static ssize_t fat_direct_IO(int rw, struct kiocb *iocb, | |||
| 175 | 175 | ||
| 176 | if (rw == WRITE) { | 176 | if (rw == WRITE) { |
| 177 | /* | 177 | /* |
| 178 | * FIXME: blockdev_direct_IO() doesn't use ->prepare_write(), | 178 | * FIXME: blockdev_direct_IO() doesn't use ->write_begin(), |
| 179 | * so we need to update the ->mmu_private to block boundary. | 179 | * so we need to update the ->mmu_private to block boundary. |
| 180 | * | 180 | * |
| 181 | * But we must fill the remaining area or hole by nul for | 181 | * But we must fill the remaining area or hole by nul for |
diff --git a/fs/libfs.c b/fs/libfs.c index 74688598bcf7..e960a8321902 100644 --- a/fs/libfs.c +++ b/fs/libfs.c | |||
| @@ -814,7 +814,7 @@ EXPORT_SYMBOL(simple_getattr); | |||
| 814 | EXPORT_SYMBOL(simple_link); | 814 | EXPORT_SYMBOL(simple_link); |
| 815 | EXPORT_SYMBOL(simple_lookup); | 815 | EXPORT_SYMBOL(simple_lookup); |
| 816 | EXPORT_SYMBOL(simple_pin_fs); | 816 | EXPORT_SYMBOL(simple_pin_fs); |
| 817 | EXPORT_SYMBOL(simple_prepare_write); | 817 | EXPORT_UNUSED_SYMBOL(simple_prepare_write); |
| 818 | EXPORT_SYMBOL(simple_readpage); | 818 | EXPORT_SYMBOL(simple_readpage); |
| 819 | EXPORT_SYMBOL(simple_release_fs); | 819 | EXPORT_SYMBOL(simple_release_fs); |
| 820 | EXPORT_SYMBOL(simple_rename); | 820 | EXPORT_SYMBOL(simple_rename); |
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index 8d3225a78073..7efe937a415f 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c | |||
| @@ -679,8 +679,7 @@ leave: | |||
| 679 | 679 | ||
| 680 | /* Some parts of this taken from generic_cont_expand, which turned out | 680 | /* Some parts of this taken from generic_cont_expand, which turned out |
| 681 | * to be too fragile to do exactly what we need without us having to | 681 | * to be too fragile to do exactly what we need without us having to |
| 682 | * worry about recursive locking in ->prepare_write() and | 682 | * worry about recursive locking in ->write_begin() and ->write_end(). */ |
| 683 | * ->commit_write(). */ | ||
| 684 | static int ocfs2_write_zero_page(struct inode *inode, | 683 | static int ocfs2_write_zero_page(struct inode *inode, |
| 685 | u64 size) | 684 | u64 size) |
| 686 | { | 685 | { |
diff --git a/fs/splice.c b/fs/splice.c index a1e701c27156..1abab5cee4ba 100644 --- a/fs/splice.c +++ b/fs/splice.c | |||
| @@ -731,8 +731,8 @@ ssize_t splice_from_pipe(struct pipe_inode_info *pipe, struct file *out, | |||
| 731 | }; | 731 | }; |
| 732 | 732 | ||
| 733 | /* | 733 | /* |
| 734 | * The actor worker might be calling ->prepare_write and | 734 | * The actor worker might be calling ->write_begin and |
| 735 | * ->commit_write. Most of the time, these expect i_mutex to | 735 | * ->write_end. Most of the time, these expect i_mutex to |
| 736 | * be held. Since this may result in an ABBA deadlock with | 736 | * be held. Since this may result in an ABBA deadlock with |
| 737 | * pipe->inode, we have to order lock acquiry here. | 737 | * pipe->inode, we have to order lock acquiry here. |
| 738 | */ | 738 | */ |
diff --git a/include/linux/fs.h b/include/linux/fs.h index 5b248d61430c..0dcdd9458f4b 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h | |||
| @@ -489,13 +489,6 @@ struct address_space_operations { | |||
| 489 | int (*readpages)(struct file *filp, struct address_space *mapping, | 489 | int (*readpages)(struct file *filp, struct address_space *mapping, |
| 490 | struct list_head *pages, unsigned nr_pages); | 490 | struct list_head *pages, unsigned nr_pages); |
| 491 | 491 | ||
| 492 | /* | ||
| 493 | * ext3 requires that a successful prepare_write() call be followed | ||
| 494 | * by a commit_write() call - they must be balanced | ||
| 495 | */ | ||
| 496 | int (*prepare_write)(struct file *, struct page *, unsigned, unsigned); | ||
| 497 | int (*commit_write)(struct file *, struct page *, unsigned, unsigned); | ||
| 498 | |||
| 499 | int (*write_begin)(struct file *, struct address_space *mapping, | 492 | int (*write_begin)(struct file *, struct address_space *mapping, |
| 500 | loff_t pos, unsigned len, unsigned flags, | 493 | loff_t pos, unsigned len, unsigned flags, |
| 501 | struct page **pagep, void **fsdata); | 494 | struct page **pagep, void **fsdata); |
diff --git a/mm/filemap.c b/mm/filemap.c index ab8553658af3..f3e5f8944d17 100644 --- a/mm/filemap.c +++ b/mm/filemap.c | |||
| @@ -2029,48 +2029,8 @@ int pagecache_write_begin(struct file *file, struct address_space *mapping, | |||
| 2029 | { | 2029 | { |
| 2030 | const struct address_space_operations *aops = mapping->a_ops; | 2030 | const struct address_space_operations *aops = mapping->a_ops; |
| 2031 | 2031 | ||
| 2032 | if (aops->write_begin) { | 2032 | return aops->write_begin(file, mapping, pos, len, flags, |
| 2033 | return aops->write_begin(file, mapping, pos, len, flags, | ||
| 2034 | pagep, fsdata); | 2033 | pagep, fsdata); |
| 2035 | } else { | ||
| 2036 | int ret; | ||
| 2037 | pgoff_t index = pos >> PAGE_CACHE_SHIFT; | ||
| 2038 | unsigned offset = pos & (PAGE_CACHE_SIZE - 1); | ||
| 2039 | struct inode *inode = mapping->host; | ||
| 2040 | struct page *page; | ||
| 2041 | again: | ||
| 2042 | page = __grab_cache_page(mapping, index); | ||
| 2043 | *pagep = page; | ||
| 2044 | if (!page) | ||
| 2045 | return -ENOMEM; | ||
| 2046 | |||
| 2047 | if (flags & AOP_FLAG_UNINTERRUPTIBLE && !PageUptodate(page)) { | ||
| 2048 | /* | ||
| 2049 | * There is no way to resolve a short write situation | ||
| 2050 | * for a !Uptodate page (except by double copying in | ||
| 2051 | * the caller done by generic_perform_write_2copy). | ||
| 2052 | * | ||
| 2053 | * Instead, we have to bring it uptodate here. | ||
| 2054 | */ | ||
| 2055 | ret = aops->readpage(file, page); | ||
| 2056 | page_cache_release(page); | ||
| 2057 | if (ret) { | ||
| 2058 | if (ret == AOP_TRUNCATED_PAGE) | ||
| 2059 | goto again; | ||
| 2060 | return ret; | ||
| 2061 | } | ||
| 2062 | goto again; | ||
| 2063 | } | ||
| 2064 | |||
| 2065 | ret = aops->prepare_write(file, page, offset, offset+len); | ||
| 2066 | if (ret) { | ||
| 2067 | unlock_page(page); | ||
| 2068 | page_cache_release(page); | ||
| 2069 | if (pos + len > inode->i_size) | ||
| 2070 | vmtruncate(inode, inode->i_size); | ||
| 2071 | } | ||
| 2072 | return ret; | ||
| 2073 | } | ||
| 2074 | } | 2034 | } |
| 2075 | EXPORT_SYMBOL(pagecache_write_begin); | 2035 | EXPORT_SYMBOL(pagecache_write_begin); |
| 2076 | 2036 | ||
| @@ -2079,32 +2039,9 @@ int pagecache_write_end(struct file *file, struct address_space *mapping, | |||
| 2079 | struct page *page, void *fsdata) | 2039 | struct page *page, void *fsdata) |
| 2080 | { | 2040 | { |
| 2081 | const struct address_space_operations *aops = mapping->a_ops; | 2041 | const struct address_space_operations *aops = mapping->a_ops; |
| 2082 | int ret; | ||
| 2083 | |||
| 2084 | if (aops->write_end) { | ||
| 2085 | mark_page_accessed(page); | ||
| 2086 | ret = aops->write_end(file, mapping, pos, len, copied, | ||
| 2087 | page, fsdata); | ||
| 2088 | } else { | ||
| 2089 | unsigned offset = pos & (PAGE_CACHE_SIZE - 1); | ||
| 2090 | struct inode *inode = mapping->host; | ||
| 2091 | |||
| 2092 | flush_dcache_page(page); | ||
| 2093 | ret = aops->commit_write(file, page, offset, offset+len); | ||
| 2094 | unlock_page(page); | ||
| 2095 | mark_page_accessed(page); | ||
| 2096 | page_cache_release(page); | ||
| 2097 | |||
| 2098 | if (ret < 0) { | ||
| 2099 | if (pos + len > inode->i_size) | ||
| 2100 | vmtruncate(inode, inode->i_size); | ||
| 2101 | } else if (ret > 0) | ||
| 2102 | ret = min_t(size_t, copied, ret); | ||
| 2103 | else | ||
| 2104 | ret = copied; | ||
| 2105 | } | ||
| 2106 | 2042 | ||
| 2107 | return ret; | 2043 | mark_page_accessed(page); |
| 2044 | return aops->write_end(file, mapping, pos, len, copied, page, fsdata); | ||
| 2108 | } | 2045 | } |
| 2109 | EXPORT_SYMBOL(pagecache_write_end); | 2046 | EXPORT_SYMBOL(pagecache_write_end); |
| 2110 | 2047 | ||
| @@ -2226,174 +2163,6 @@ repeat: | |||
| 2226 | } | 2163 | } |
| 2227 | EXPORT_SYMBOL(__grab_cache_page); | 2164 | EXPORT_SYMBOL(__grab_cache_page); |
| 2228 | 2165 | ||
| 2229 | static ssize_t generic_perform_write_2copy(struct file *file, | ||
| 2230 | struct iov_iter *i, loff_t pos) | ||
| 2231 | { | ||
| 2232 | struct address_space *mapping = file->f_mapping; | ||
| 2233 | const struct address_space_operations *a_ops = mapping->a_ops; | ||
| 2234 | struct inode *inode = mapping->host; | ||
| 2235 | long status = 0; | ||
| 2236 | ssize_t written = 0; | ||
| 2237 | |||
| 2238 | do { | ||
| 2239 | struct page *src_page; | ||
| 2240 | struct page *page; | ||
| 2241 | pgoff_t index; /* Pagecache index for current page */ | ||
| 2242 | unsigned long offset; /* Offset into pagecache page */ | ||
| 2243 | unsigned long bytes; /* Bytes to write to page */ | ||
| 2244 | size_t copied; /* Bytes copied from user */ | ||
| 2245 | |||
| 2246 | offset = (pos & (PAGE_CACHE_SIZE - 1)); | ||
| 2247 | index = pos >> PAGE_CACHE_SHIFT; | ||
| 2248 | bytes = min_t(unsigned long, PAGE_CACHE_SIZE - offset, | ||
| 2249 | iov_iter_count(i)); | ||
| 2250 | |||
| 2251 | /* | ||
| 2252 | * a non-NULL src_page indicates that we're doing the | ||
| 2253 | * copy via get_user_pages and kmap. | ||
| 2254 | */ | ||
| 2255 | src_page = NULL; | ||
| 2256 | |||
| 2257 | /* | ||
| 2258 | * Bring in the user page that we will copy from _first_. | ||
| 2259 | * Otherwise there's a nasty deadlock on copying from the | ||
| 2260 | * same page as we're writing to, without it being marked | ||
| 2261 | * up-to-date. | ||
| 2262 | * | ||
| 2263 | * Not only is this an optimisation, but it is also required | ||
| 2264 | * to check that the address is actually valid, when atomic | ||
| 2265 | * usercopies are used, below. | ||
| 2266 | */ | ||
| 2267 | if (unlikely(iov_iter_fault_in_readable(i, bytes))) { | ||
| 2268 | status = -EFAULT; | ||
| 2269 | break; | ||
| 2270 | } | ||
| 2271 | |||
| 2272 | page = __grab_cache_page(mapping, index); | ||
| 2273 | if (!page) { | ||
| 2274 | status = -ENOMEM; | ||
| 2275 | break; | ||
| 2276 | } | ||
| 2277 | |||
| 2278 | /* | ||
| 2279 | * non-uptodate pages cannot cope with short copies, and we | ||
| 2280 | * cannot take a pagefault with the destination page locked. | ||
| 2281 | * So pin the source page to copy it. | ||
| 2282 | */ | ||
| 2283 | if (!PageUptodate(page) && !segment_eq(get_fs(), KERNEL_DS)) { | ||
| 2284 | unlock_page(page); | ||
| 2285 | |||
| 2286 | src_page = alloc_page(GFP_KERNEL); | ||
| 2287 | if (!src_page) { | ||
| 2288 | page_cache_release(page); | ||
| 2289 | status = -ENOMEM; | ||
| 2290 | break; | ||
| 2291 | } | ||
| 2292 | |||
| 2293 | /* | ||
| 2294 | * Cannot get_user_pages with a page locked for the | ||
| 2295 | * same reason as we can't take a page fault with a | ||
| 2296 | * page locked (as explained below). | ||
| 2297 | */ | ||
| 2298 | copied = iov_iter_copy_from_user(src_page, i, | ||
| 2299 | offset, bytes); | ||
| 2300 | if (unlikely(copied == 0)) { | ||
| 2301 | status = -EFAULT; | ||
| 2302 | page_cache_release(page); | ||
| 2303 | page_cache_release(src_page); | ||
| 2304 | break; | ||
| 2305 | } | ||
| 2306 | bytes = copied; | ||
| 2307 | |||
| 2308 | lock_page(page); | ||
| 2309 | /* | ||
| 2310 | * Can't handle the page going uptodate here, because | ||
| 2311 | * that means we would use non-atomic usercopies, which | ||
| 2312 | * zero out the tail of the page, which can cause | ||
| 2313 | * zeroes to become transiently visible. We could just | ||
| 2314 | * use a non-zeroing copy, but the APIs aren't too | ||
| 2315 | * consistent. | ||
| 2316 | */ | ||
| 2317 | if (unlikely(!page->mapping || PageUptodate(page))) { | ||
| 2318 | unlock_page(page); | ||
| 2319 | page_cache_release(page); | ||
| 2320 | page_cache_release(src_page); | ||
| 2321 | continue; | ||
| 2322 | } | ||
| 2323 | } | ||
| 2324 | |||
| 2325 | status = a_ops->prepare_write(file, page, offset, offset+bytes); | ||
| 2326 | if (unlikely(status)) | ||
| 2327 | goto fs_write_aop_error; | ||
| 2328 | |||
| 2329 | if (!src_page) { | ||
| 2330 | /* | ||
| 2331 | * Must not enter the pagefault handler here, because | ||
| 2332 | * we hold the page lock, so we might recursively | ||
| 2333 | * deadlock on the same lock, or get an ABBA deadlock | ||
| 2334 | * against a different lock, or against the mmap_sem | ||
| 2335 | * (which nests outside the page lock). So increment | ||
| 2336 | * preempt count, and use _atomic usercopies. | ||
| 2337 | * | ||
| 2338 | * The page is uptodate so we are OK to encounter a | ||
| 2339 | * short copy: if unmodified parts of the page are | ||
| 2340 | * marked dirty and written out to disk, it doesn't | ||
| 2341 | * really matter. | ||
| 2342 | */ | ||
| 2343 | pagefault_disable(); | ||
| 2344 | copied = iov_iter_copy_from_user_atomic(page, i, | ||
| 2345 | offset, bytes); | ||
| 2346 | pagefault_enable(); | ||
| 2347 | } else { | ||
| 2348 | void *src, *dst; | ||
| 2349 | src = kmap_atomic(src_page, KM_USER0); | ||
| 2350 | dst = kmap_atomic(page, KM_USER1); | ||
| 2351 | memcpy(dst + offset, src + offset, bytes); | ||
| 2352 | kunmap_atomic(dst, KM_USER1); | ||
| 2353 | kunmap_atomic(src, KM_USER0); | ||
| 2354 | copied = bytes; | ||
| 2355 | } | ||
| 2356 | flush_dcache_page(page); | ||
| 2357 | |||
| 2358 | status = a_ops->commit_write(file, page, offset, offset+bytes); | ||
| 2359 | if (unlikely(status < 0)) | ||
| 2360 | goto fs_write_aop_error; | ||
| 2361 | if (unlikely(status > 0)) /* filesystem did partial write */ | ||
| 2362 | copied = min_t(size_t, copied, status); | ||
| 2363 | |||
| 2364 | unlock_page(page); | ||
| 2365 | mark_page_accessed(page); | ||
| 2366 | page_cache_release(page); | ||
| 2367 | if (src_page) | ||
| 2368 | page_cache_release(src_page); | ||
| 2369 | |||
| 2370 | iov_iter_advance(i, copied); | ||
| 2371 | pos += copied; | ||
| 2372 | written += copied; | ||
| 2373 | |||
| 2374 | balance_dirty_pages_ratelimited(mapping); | ||
| 2375 | cond_resched(); | ||
| 2376 | continue; | ||
| 2377 | |||
| 2378 | fs_write_aop_error: | ||
| 2379 | unlock_page(page); | ||
| 2380 | page_cache_release(page); | ||
| 2381 | if (src_page) | ||
| 2382 | page_cache_release(src_page); | ||
| 2383 | |||
| 2384 | /* | ||
| 2385 | * prepare_write() may have instantiated a few blocks | ||
| 2386 | * outside i_size. Trim these off again. Don't need | ||
| 2387 | * i_size_read because we hold i_mutex. | ||
| 2388 | */ | ||
| 2389 | if (pos + bytes > inode->i_size) | ||
| 2390 | vmtruncate(inode, inode->i_size); | ||
| 2391 | break; | ||
| 2392 | } while (iov_iter_count(i)); | ||
| 2393 | |||
| 2394 | return written ? written : status; | ||
| 2395 | } | ||
| 2396 | |||
| 2397 | static ssize_t generic_perform_write(struct file *file, | 2166 | static ssize_t generic_perform_write(struct file *file, |
| 2398 | struct iov_iter *i, loff_t pos) | 2167 | struct iov_iter *i, loff_t pos) |
| 2399 | { | 2168 | { |
| @@ -2494,10 +2263,7 @@ generic_file_buffered_write(struct kiocb *iocb, const struct iovec *iov, | |||
| 2494 | struct iov_iter i; | 2263 | struct iov_iter i; |
| 2495 | 2264 | ||
| 2496 | iov_iter_init(&i, iov, nr_segs, count, written); | 2265 | iov_iter_init(&i, iov, nr_segs, count, written); |
| 2497 | if (a_ops->write_begin) | 2266 | status = generic_perform_write(file, &i, pos); |
| 2498 | status = generic_perform_write(file, &i, pos); | ||
| 2499 | else | ||
| 2500 | status = generic_perform_write_2copy(file, &i, pos); | ||
| 2501 | 2267 | ||
| 2502 | if (likely(status >= 0)) { | 2268 | if (likely(status >= 0)) { |
| 2503 | written += status; | 2269 | written += status; |
