diff options
author | Matthew Wilcox <matthew.r.wilcox@intel.com> | 2015-02-16 18:58:56 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2015-02-16 20:56:03 -0500 |
commit | d475c6346a38aef3058eba96867bfa726a3cc940 (patch) | |
tree | 4d69d0f50a4a8e649a751dca8f710485848c0249 | |
parent | fbbbad4bc2101e452b24e6e65d3d5e11314a0b5f (diff) |
dax,ext2: replace XIP read and write with DAX I/O
Use the generic AIO infrastructure instead of custom read and write
methods. In addition to giving us support for AIO, this adds the missing
locking between read() and truncate().
Signed-off-by: Matthew Wilcox <matthew.r.wilcox@intel.com>
Reviewed-by: Ross Zwisler <ross.zwisler@linux.intel.com>
Reviewed-by: Jan Kara <jack@suse.cz>
Cc: Andreas Dilger <andreas.dilger@intel.com>
Cc: Boaz Harrosh <boaz@plexistor.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Dave Chinner <david@fromorbit.com>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Cc: Randy Dunlap <rdunlap@infradead.org>
Cc: Theodore Ts'o <tytso@mit.edu>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r-- | MAINTAINERS | 6 | ||||
-rw-r--r-- | fs/Makefile | 1 | ||||
-rw-r--r-- | fs/dax.c | 186 | ||||
-rw-r--r-- | fs/ext2/file.c | 6 | ||||
-rw-r--r-- | fs/ext2/inode.c | 8 | ||||
-rw-r--r-- | include/linux/fs.h | 12 | ||||
-rw-r--r-- | mm/filemap.c | 6 | ||||
-rw-r--r-- | mm/filemap_xip.c | 234 |
8 files changed, 214 insertions, 245 deletions
diff --git a/MAINTAINERS b/MAINTAINERS index 348f5c16ef50..8670c224c833 100644 --- a/MAINTAINERS +++ b/MAINTAINERS | |||
@@ -3151,6 +3151,12 @@ L: linux-i2c@vger.kernel.org | |||
3151 | S: Maintained | 3151 | S: Maintained |
3152 | F: drivers/i2c/busses/i2c-diolan-u2c.c | 3152 | F: drivers/i2c/busses/i2c-diolan-u2c.c |
3153 | 3153 | ||
3154 | DIRECT ACCESS (DAX) | ||
3155 | M: Matthew Wilcox <willy@linux.intel.com> | ||
3156 | L: linux-fsdevel@vger.kernel.org | ||
3157 | S: Supported | ||
3158 | F: fs/dax.c | ||
3159 | |||
3154 | DIRECTORY NOTIFICATION (DNOTIFY) | 3160 | DIRECTORY NOTIFICATION (DNOTIFY) |
3155 | M: Eric Paris <eparis@parisplace.org> | 3161 | M: Eric Paris <eparis@parisplace.org> |
3156 | S: Maintained | 3162 | S: Maintained |
diff --git a/fs/Makefile b/fs/Makefile index bedff48e8fdc..0534444e257c 100644 --- a/fs/Makefile +++ b/fs/Makefile | |||
@@ -28,6 +28,7 @@ obj-$(CONFIG_SIGNALFD) += signalfd.o | |||
28 | obj-$(CONFIG_TIMERFD) += timerfd.o | 28 | obj-$(CONFIG_TIMERFD) += timerfd.o |
29 | obj-$(CONFIG_EVENTFD) += eventfd.o | 29 | obj-$(CONFIG_EVENTFD) += eventfd.o |
30 | obj-$(CONFIG_AIO) += aio.o | 30 | obj-$(CONFIG_AIO) += aio.o |
31 | obj-$(CONFIG_FS_XIP) += dax.o | ||
31 | obj-$(CONFIG_FILE_LOCKING) += locks.o | 32 | obj-$(CONFIG_FILE_LOCKING) += locks.o |
32 | obj-$(CONFIG_COMPAT) += compat.o compat_ioctl.o | 33 | obj-$(CONFIG_COMPAT) += compat.o compat_ioctl.o |
33 | obj-$(CONFIG_BINFMT_AOUT) += binfmt_aout.o | 34 | obj-$(CONFIG_BINFMT_AOUT) += binfmt_aout.o |
diff --git a/fs/dax.c b/fs/dax.c new file mode 100644 index 000000000000..1a2bdbfa3ea9 --- /dev/null +++ b/fs/dax.c | |||
@@ -0,0 +1,186 @@ | |||
1 | /* | ||
2 | * fs/dax.c - Direct Access filesystem code | ||
3 | * Copyright (c) 2013-2014 Intel Corporation | ||
4 | * Author: Matthew Wilcox <matthew.r.wilcox@intel.com> | ||
5 | * Author: Ross Zwisler <ross.zwisler@linux.intel.com> | ||
6 | * | ||
7 | * This program is free software; you can redistribute it and/or modify it | ||
8 | * under the terms and conditions of the GNU General Public License, | ||
9 | * version 2, as published by the Free Software Foundation. | ||
10 | * | ||
11 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
12 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
13 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
14 | * more details. | ||
15 | */ | ||
16 | |||
17 | #include <linux/atomic.h> | ||
18 | #include <linux/blkdev.h> | ||
19 | #include <linux/buffer_head.h> | ||
20 | #include <linux/fs.h> | ||
21 | #include <linux/genhd.h> | ||
22 | #include <linux/mutex.h> | ||
23 | #include <linux/uio.h> | ||
24 | |||
25 | static long dax_get_addr(struct buffer_head *bh, void **addr, unsigned blkbits) | ||
26 | { | ||
27 | unsigned long pfn; | ||
28 | sector_t sector = bh->b_blocknr << (blkbits - 9); | ||
29 | return bdev_direct_access(bh->b_bdev, sector, addr, &pfn, bh->b_size); | ||
30 | } | ||
31 | |||
32 | static void dax_new_buf(void *addr, unsigned size, unsigned first, loff_t pos, | ||
33 | loff_t end) | ||
34 | { | ||
35 | loff_t final = end - pos + first; /* The final byte of the buffer */ | ||
36 | |||
37 | if (first > 0) | ||
38 | memset(addr, 0, first); | ||
39 | if (final < size) | ||
40 | memset(addr + final, 0, size - final); | ||
41 | } | ||
42 | |||
43 | static bool buffer_written(struct buffer_head *bh) | ||
44 | { | ||
45 | return buffer_mapped(bh) && !buffer_unwritten(bh); | ||
46 | } | ||
47 | |||
48 | /* | ||
49 | * When ext4 encounters a hole, it returns without modifying the buffer_head | ||
50 | * which means that we can't trust b_size. To cope with this, we set b_state | ||
51 | * to 0 before calling get_block and, if any bit is set, we know we can trust | ||
52 | * b_size. Unfortunate, really, since ext4 knows precisely how long a hole is | ||
53 | * and would save us time calling get_block repeatedly. | ||
54 | */ | ||
55 | static bool buffer_size_valid(struct buffer_head *bh) | ||
56 | { | ||
57 | return bh->b_state != 0; | ||
58 | } | ||
59 | |||
60 | static ssize_t dax_io(int rw, struct inode *inode, struct iov_iter *iter, | ||
61 | loff_t start, loff_t end, get_block_t get_block, | ||
62 | struct buffer_head *bh) | ||
63 | { | ||
64 | ssize_t retval = 0; | ||
65 | loff_t pos = start; | ||
66 | loff_t max = start; | ||
67 | loff_t bh_max = start; | ||
68 | void *addr; | ||
69 | bool hole = false; | ||
70 | |||
71 | if (rw != WRITE) | ||
72 | end = min(end, i_size_read(inode)); | ||
73 | |||
74 | while (pos < end) { | ||
75 | unsigned len; | ||
76 | if (pos == max) { | ||
77 | unsigned blkbits = inode->i_blkbits; | ||
78 | sector_t block = pos >> blkbits; | ||
79 | unsigned first = pos - (block << blkbits); | ||
80 | long size; | ||
81 | |||
82 | if (pos == bh_max) { | ||
83 | bh->b_size = PAGE_ALIGN(end - pos); | ||
84 | bh->b_state = 0; | ||
85 | retval = get_block(inode, block, bh, | ||
86 | rw == WRITE); | ||
87 | if (retval) | ||
88 | break; | ||
89 | if (!buffer_size_valid(bh)) | ||
90 | bh->b_size = 1 << blkbits; | ||
91 | bh_max = pos - first + bh->b_size; | ||
92 | } else { | ||
93 | unsigned done = bh->b_size - | ||
94 | (bh_max - (pos - first)); | ||
95 | bh->b_blocknr += done >> blkbits; | ||
96 | bh->b_size -= done; | ||
97 | } | ||
98 | |||
99 | hole = (rw != WRITE) && !buffer_written(bh); | ||
100 | if (hole) { | ||
101 | addr = NULL; | ||
102 | size = bh->b_size - first; | ||
103 | } else { | ||
104 | retval = dax_get_addr(bh, &addr, blkbits); | ||
105 | if (retval < 0) | ||
106 | break; | ||
107 | if (buffer_unwritten(bh) || buffer_new(bh)) | ||
108 | dax_new_buf(addr, retval, first, pos, | ||
109 | end); | ||
110 | addr += first; | ||
111 | size = retval - first; | ||
112 | } | ||
113 | max = min(pos + size, end); | ||
114 | } | ||
115 | |||
116 | if (rw == WRITE) | ||
117 | len = copy_from_iter(addr, max - pos, iter); | ||
118 | else if (!hole) | ||
119 | len = copy_to_iter(addr, max - pos, iter); | ||
120 | else | ||
121 | len = iov_iter_zero(max - pos, iter); | ||
122 | |||
123 | if (!len) | ||
124 | break; | ||
125 | |||
126 | pos += len; | ||
127 | addr += len; | ||
128 | } | ||
129 | |||
130 | return (pos == start) ? retval : pos - start; | ||
131 | } | ||
132 | |||
133 | /** | ||
134 | * dax_do_io - Perform I/O to a DAX file | ||
135 | * @rw: READ to read or WRITE to write | ||
136 | * @iocb: The control block for this I/O | ||
137 | * @inode: The file which the I/O is directed at | ||
138 | * @iter: The addresses to do I/O from or to | ||
139 | * @pos: The file offset where the I/O starts | ||
140 | * @get_block: The filesystem method used to translate file offsets to blocks | ||
141 | * @end_io: A filesystem callback for I/O completion | ||
142 | * @flags: See below | ||
143 | * | ||
144 | * This function uses the same locking scheme as do_blockdev_direct_IO: | ||
145 | * If @flags has DIO_LOCKING set, we assume that the i_mutex is held by the | ||
146 | * caller for writes. For reads, we take and release the i_mutex ourselves. | ||
147 | * If DIO_LOCKING is not set, the filesystem takes care of its own locking. | ||
148 | * As with do_blockdev_direct_IO(), we increment i_dio_count while the I/O | ||
149 | * is in progress. | ||
150 | */ | ||
151 | ssize_t dax_do_io(int rw, struct kiocb *iocb, struct inode *inode, | ||
152 | struct iov_iter *iter, loff_t pos, | ||
153 | get_block_t get_block, dio_iodone_t end_io, int flags) | ||
154 | { | ||
155 | struct buffer_head bh; | ||
156 | ssize_t retval = -EINVAL; | ||
157 | loff_t end = pos + iov_iter_count(iter); | ||
158 | |||
159 | memset(&bh, 0, sizeof(bh)); | ||
160 | |||
161 | if ((flags & DIO_LOCKING) && (rw == READ)) { | ||
162 | struct address_space *mapping = inode->i_mapping; | ||
163 | mutex_lock(&inode->i_mutex); | ||
164 | retval = filemap_write_and_wait_range(mapping, pos, end - 1); | ||
165 | if (retval) { | ||
166 | mutex_unlock(&inode->i_mutex); | ||
167 | goto out; | ||
168 | } | ||
169 | } | ||
170 | |||
171 | /* Protects against truncate */ | ||
172 | atomic_inc(&inode->i_dio_count); | ||
173 | |||
174 | retval = dax_io(rw, inode, iter, pos, end, get_block, &bh); | ||
175 | |||
176 | if ((flags & DIO_LOCKING) && (rw == READ)) | ||
177 | mutex_unlock(&inode->i_mutex); | ||
178 | |||
179 | if ((retval > 0) && end_io) | ||
180 | end_io(iocb, pos, retval, bh.b_private); | ||
181 | |||
182 | inode_dio_done(inode); | ||
183 | out: | ||
184 | return retval; | ||
185 | } | ||
186 | EXPORT_SYMBOL_GPL(dax_do_io); | ||
diff --git a/fs/ext2/file.c b/fs/ext2/file.c index 7c87b22a7228..a247123fd798 100644 --- a/fs/ext2/file.c +++ b/fs/ext2/file.c | |||
@@ -81,8 +81,10 @@ const struct file_operations ext2_file_operations = { | |||
81 | #ifdef CONFIG_EXT2_FS_XIP | 81 | #ifdef CONFIG_EXT2_FS_XIP |
82 | const struct file_operations ext2_xip_file_operations = { | 82 | const struct file_operations ext2_xip_file_operations = { |
83 | .llseek = generic_file_llseek, | 83 | .llseek = generic_file_llseek, |
84 | .read = xip_file_read, | 84 | .read = new_sync_read, |
85 | .write = xip_file_write, | 85 | .write = new_sync_write, |
86 | .read_iter = generic_file_read_iter, | ||
87 | .write_iter = generic_file_write_iter, | ||
86 | .unlocked_ioctl = ext2_ioctl, | 88 | .unlocked_ioctl = ext2_ioctl, |
87 | #ifdef CONFIG_COMPAT | 89 | #ifdef CONFIG_COMPAT |
88 | .compat_ioctl = ext2_compat_ioctl, | 90 | .compat_ioctl = ext2_compat_ioctl, |
diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c index 0cb04486577d..3ccd5fd47d66 100644 --- a/fs/ext2/inode.c +++ b/fs/ext2/inode.c | |||
@@ -859,7 +859,12 @@ ext2_direct_IO(int rw, struct kiocb *iocb, struct iov_iter *iter, | |||
859 | size_t count = iov_iter_count(iter); | 859 | size_t count = iov_iter_count(iter); |
860 | ssize_t ret; | 860 | ssize_t ret; |
861 | 861 | ||
862 | ret = blockdev_direct_IO(rw, iocb, inode, iter, offset, ext2_get_block); | 862 | if (IS_DAX(inode)) |
863 | ret = dax_do_io(rw, iocb, inode, iter, offset, ext2_get_block, | ||
864 | NULL, DIO_LOCKING); | ||
865 | else | ||
866 | ret = blockdev_direct_IO(rw, iocb, inode, iter, offset, | ||
867 | ext2_get_block); | ||
863 | if (ret < 0 && (rw & WRITE)) | 868 | if (ret < 0 && (rw & WRITE)) |
864 | ext2_write_failed(mapping, offset + count); | 869 | ext2_write_failed(mapping, offset + count); |
865 | return ret; | 870 | return ret; |
@@ -888,6 +893,7 @@ const struct address_space_operations ext2_aops = { | |||
888 | const struct address_space_operations ext2_aops_xip = { | 893 | const struct address_space_operations ext2_aops_xip = { |
889 | .bmap = ext2_bmap, | 894 | .bmap = ext2_bmap, |
890 | .get_xip_mem = ext2_get_xip_mem, | 895 | .get_xip_mem = ext2_get_xip_mem, |
896 | .direct_IO = ext2_direct_IO, | ||
891 | }; | 897 | }; |
892 | 898 | ||
893 | const struct address_space_operations ext2_nobh_aops = { | 899 | const struct address_space_operations ext2_nobh_aops = { |
diff --git a/include/linux/fs.h b/include/linux/fs.h index fb373bb5cf03..241c3c030fb5 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h | |||
@@ -2587,12 +2587,11 @@ extern loff_t fixed_size_llseek(struct file *file, loff_t offset, | |||
2587 | extern int generic_file_open(struct inode * inode, struct file * filp); | 2587 | extern int generic_file_open(struct inode * inode, struct file * filp); |
2588 | extern int nonseekable_open(struct inode * inode, struct file * filp); | 2588 | extern int nonseekable_open(struct inode * inode, struct file * filp); |
2589 | 2589 | ||
2590 | ssize_t dax_do_io(int rw, struct kiocb *, struct inode *, struct iov_iter *, | ||
2591 | loff_t, get_block_t, dio_iodone_t, int flags); | ||
2592 | |||
2590 | #ifdef CONFIG_FS_XIP | 2593 | #ifdef CONFIG_FS_XIP |
2591 | extern ssize_t xip_file_read(struct file *filp, char __user *buf, size_t len, | ||
2592 | loff_t *ppos); | ||
2593 | extern int xip_file_mmap(struct file * file, struct vm_area_struct * vma); | 2594 | extern int xip_file_mmap(struct file * file, struct vm_area_struct * vma); |
2594 | extern ssize_t xip_file_write(struct file *filp, const char __user *buf, | ||
2595 | size_t len, loff_t *ppos); | ||
2596 | extern int xip_truncate_page(struct address_space *mapping, loff_t from); | 2595 | extern int xip_truncate_page(struct address_space *mapping, loff_t from); |
2597 | #else | 2596 | #else |
2598 | static inline int xip_truncate_page(struct address_space *mapping, loff_t from) | 2597 | static inline int xip_truncate_page(struct address_space *mapping, loff_t from) |
@@ -2756,6 +2755,11 @@ extern int generic_show_options(struct seq_file *m, struct dentry *root); | |||
2756 | extern void save_mount_options(struct super_block *sb, char *options); | 2755 | extern void save_mount_options(struct super_block *sb, char *options); |
2757 | extern void replace_mount_options(struct super_block *sb, char *options); | 2756 | extern void replace_mount_options(struct super_block *sb, char *options); |
2758 | 2757 | ||
2758 | static inline bool io_is_direct(struct file *filp) | ||
2759 | { | ||
2760 | return (filp->f_flags & O_DIRECT) || IS_DAX(file_inode(filp)); | ||
2761 | } | ||
2762 | |||
2759 | static inline ino_t parent_ino(struct dentry *dentry) | 2763 | static inline ino_t parent_ino(struct dentry *dentry) |
2760 | { | 2764 | { |
2761 | ino_t res; | 2765 | ino_t res; |
diff --git a/mm/filemap.c b/mm/filemap.c index 1578c224285e..ad7242043bdb 100644 --- a/mm/filemap.c +++ b/mm/filemap.c | |||
@@ -1695,8 +1695,7 @@ generic_file_read_iter(struct kiocb *iocb, struct iov_iter *iter) | |||
1695 | loff_t *ppos = &iocb->ki_pos; | 1695 | loff_t *ppos = &iocb->ki_pos; |
1696 | loff_t pos = *ppos; | 1696 | loff_t pos = *ppos; |
1697 | 1697 | ||
1698 | /* coalesce the iovecs and go direct-to-BIO for O_DIRECT */ | 1698 | if (io_is_direct(file)) { |
1699 | if (file->f_flags & O_DIRECT) { | ||
1700 | struct address_space *mapping = file->f_mapping; | 1699 | struct address_space *mapping = file->f_mapping; |
1701 | struct inode *inode = mapping->host; | 1700 | struct inode *inode = mapping->host; |
1702 | size_t count = iov_iter_count(iter); | 1701 | size_t count = iov_iter_count(iter); |
@@ -2584,8 +2583,7 @@ ssize_t __generic_file_write_iter(struct kiocb *iocb, struct iov_iter *from) | |||
2584 | if (err) | 2583 | if (err) |
2585 | goto out; | 2584 | goto out; |
2586 | 2585 | ||
2587 | /* coalesce the iovecs and go direct-to-BIO for O_DIRECT */ | 2586 | if (io_is_direct(file)) { |
2588 | if (unlikely(file->f_flags & O_DIRECT)) { | ||
2589 | loff_t endbyte; | 2587 | loff_t endbyte; |
2590 | 2588 | ||
2591 | written = generic_file_direct_write(iocb, from, pos); | 2589 | written = generic_file_direct_write(iocb, from, pos); |
diff --git a/mm/filemap_xip.c b/mm/filemap_xip.c index 59e1c5585748..9c869f402c07 100644 --- a/mm/filemap_xip.c +++ b/mm/filemap_xip.c | |||
@@ -43,119 +43,6 @@ static struct page *xip_sparse_page(void) | |||
43 | } | 43 | } |
44 | 44 | ||
45 | /* | 45 | /* |
46 | * This is a file read routine for execute in place files, and uses | ||
47 | * the mapping->a_ops->get_xip_mem() function for the actual low-level | ||
48 | * stuff. | ||
49 | * | ||
50 | * Note the struct file* is not used at all. It may be NULL. | ||
51 | */ | ||
52 | static ssize_t | ||
53 | do_xip_mapping_read(struct address_space *mapping, | ||
54 | struct file_ra_state *_ra, | ||
55 | struct file *filp, | ||
56 | char __user *buf, | ||
57 | size_t len, | ||
58 | loff_t *ppos) | ||
59 | { | ||
60 | struct inode *inode = mapping->host; | ||
61 | pgoff_t index, end_index; | ||
62 | unsigned long offset; | ||
63 | loff_t isize, pos; | ||
64 | size_t copied = 0, error = 0; | ||
65 | |||
66 | BUG_ON(!mapping->a_ops->get_xip_mem); | ||
67 | |||
68 | pos = *ppos; | ||
69 | index = pos >> PAGE_CACHE_SHIFT; | ||
70 | offset = pos & ~PAGE_CACHE_MASK; | ||
71 | |||
72 | isize = i_size_read(inode); | ||
73 | if (!isize) | ||
74 | goto out; | ||
75 | |||
76 | end_index = (isize - 1) >> PAGE_CACHE_SHIFT; | ||
77 | do { | ||
78 | unsigned long nr, left; | ||
79 | void *xip_mem; | ||
80 | unsigned long xip_pfn; | ||
81 | int zero = 0; | ||
82 | |||
83 | /* nr is the maximum number of bytes to copy from this page */ | ||
84 | nr = PAGE_CACHE_SIZE; | ||
85 | if (index >= end_index) { | ||
86 | if (index > end_index) | ||
87 | goto out; | ||
88 | nr = ((isize - 1) & ~PAGE_CACHE_MASK) + 1; | ||
89 | if (nr <= offset) { | ||
90 | goto out; | ||
91 | } | ||
92 | } | ||
93 | nr = nr - offset; | ||
94 | if (nr > len - copied) | ||
95 | nr = len - copied; | ||
96 | |||
97 | error = mapping->a_ops->get_xip_mem(mapping, index, 0, | ||
98 | &xip_mem, &xip_pfn); | ||
99 | if (unlikely(error)) { | ||
100 | if (error == -ENODATA) { | ||
101 | /* sparse */ | ||
102 | zero = 1; | ||
103 | } else | ||
104 | goto out; | ||
105 | } | ||
106 | |||
107 | /* If users can be writing to this page using arbitrary | ||
108 | * virtual addresses, take care about potential aliasing | ||
109 | * before reading the page on the kernel side. | ||
110 | */ | ||
111 | if (mapping_writably_mapped(mapping)) | ||
112 | /* address based flush */ ; | ||
113 | |||
114 | /* | ||
115 | * Ok, we have the mem, so now we can copy it to user space... | ||
116 | * | ||
117 | * The actor routine returns how many bytes were actually used.. | ||
118 | * NOTE! This may not be the same as how much of a user buffer | ||
119 | * we filled up (we may be padding etc), so we can only update | ||
120 | * "pos" here (the actor routine has to update the user buffer | ||
121 | * pointers and the remaining count). | ||
122 | */ | ||
123 | if (!zero) | ||
124 | left = __copy_to_user(buf+copied, xip_mem+offset, nr); | ||
125 | else | ||
126 | left = __clear_user(buf + copied, nr); | ||
127 | |||
128 | if (left) { | ||
129 | error = -EFAULT; | ||
130 | goto out; | ||
131 | } | ||
132 | |||
133 | copied += (nr - left); | ||
134 | offset += (nr - left); | ||
135 | index += offset >> PAGE_CACHE_SHIFT; | ||
136 | offset &= ~PAGE_CACHE_MASK; | ||
137 | } while (copied < len); | ||
138 | |||
139 | out: | ||
140 | *ppos = pos + copied; | ||
141 | if (filp) | ||
142 | file_accessed(filp); | ||
143 | |||
144 | return (copied ? copied : error); | ||
145 | } | ||
146 | |||
147 | ssize_t | ||
148 | xip_file_read(struct file *filp, char __user *buf, size_t len, loff_t *ppos) | ||
149 | { | ||
150 | if (!access_ok(VERIFY_WRITE, buf, len)) | ||
151 | return -EFAULT; | ||
152 | |||
153 | return do_xip_mapping_read(filp->f_mapping, &filp->f_ra, filp, | ||
154 | buf, len, ppos); | ||
155 | } | ||
156 | EXPORT_SYMBOL_GPL(xip_file_read); | ||
157 | |||
158 | /* | ||
159 | * __xip_unmap is invoked from xip_unmap and xip_write | 46 | * __xip_unmap is invoked from xip_unmap and xip_write |
160 | * | 47 | * |
161 | * This function walks all vmas of the address_space and unmaps the | 48 | * This function walks all vmas of the address_space and unmaps the |
@@ -341,127 +228,6 @@ int xip_file_mmap(struct file * file, struct vm_area_struct * vma) | |||
341 | } | 228 | } |
342 | EXPORT_SYMBOL_GPL(xip_file_mmap); | 229 | EXPORT_SYMBOL_GPL(xip_file_mmap); |
343 | 230 | ||
344 | static ssize_t | ||
345 | __xip_file_write(struct file *filp, const char __user *buf, | ||
346 | size_t count, loff_t pos, loff_t *ppos) | ||
347 | { | ||
348 | struct address_space * mapping = filp->f_mapping; | ||
349 | const struct address_space_operations *a_ops = mapping->a_ops; | ||
350 | struct inode *inode = mapping->host; | ||
351 | long status = 0; | ||
352 | size_t bytes; | ||
353 | ssize_t written = 0; | ||
354 | |||
355 | BUG_ON(!mapping->a_ops->get_xip_mem); | ||
356 | |||
357 | do { | ||
358 | unsigned long index; | ||
359 | unsigned long offset; | ||
360 | size_t copied; | ||
361 | void *xip_mem; | ||
362 | unsigned long xip_pfn; | ||
363 | |||
364 | offset = (pos & (PAGE_CACHE_SIZE -1)); /* Within page */ | ||
365 | index = pos >> PAGE_CACHE_SHIFT; | ||
366 | bytes = PAGE_CACHE_SIZE - offset; | ||
367 | if (bytes > count) | ||
368 | bytes = count; | ||
369 | |||
370 | status = a_ops->get_xip_mem(mapping, index, 0, | ||
371 | &xip_mem, &xip_pfn); | ||
372 | if (status == -ENODATA) { | ||
373 | /* we allocate a new page unmap it */ | ||
374 | mutex_lock(&xip_sparse_mutex); | ||
375 | status = a_ops->get_xip_mem(mapping, index, 1, | ||
376 | &xip_mem, &xip_pfn); | ||
377 | mutex_unlock(&xip_sparse_mutex); | ||
378 | if (!status) | ||
379 | /* unmap page at pgoff from all other vmas */ | ||
380 | __xip_unmap(mapping, index); | ||
381 | } | ||
382 | |||
383 | if (status) | ||
384 | break; | ||
385 | |||
386 | copied = bytes - | ||
387 | __copy_from_user_nocache(xip_mem + offset, buf, bytes); | ||
388 | |||
389 | if (likely(copied > 0)) { | ||
390 | status = copied; | ||
391 | |||
392 | if (status >= 0) { | ||
393 | written += status; | ||
394 | count -= status; | ||
395 | pos += status; | ||
396 | buf += status; | ||
397 | } | ||
398 | } | ||
399 | if (unlikely(copied != bytes)) | ||
400 | if (status >= 0) | ||
401 | status = -EFAULT; | ||
402 | if (status < 0) | ||
403 | break; | ||
404 | } while (count); | ||
405 | *ppos = pos; | ||
406 | /* | ||
407 | * No need to use i_size_read() here, the i_size | ||
408 | * cannot change under us because we hold i_mutex. | ||
409 | */ | ||
410 | if (pos > inode->i_size) { | ||
411 | i_size_write(inode, pos); | ||
412 | mark_inode_dirty(inode); | ||
413 | } | ||
414 | |||
415 | return written ? written : status; | ||
416 | } | ||
417 | |||
418 | ssize_t | ||
419 | xip_file_write(struct file *filp, const char __user *buf, size_t len, | ||
420 | loff_t *ppos) | ||
421 | { | ||
422 | struct address_space *mapping = filp->f_mapping; | ||
423 | struct inode *inode = mapping->host; | ||
424 | size_t count; | ||
425 | loff_t pos; | ||
426 | ssize_t ret; | ||
427 | |||
428 | mutex_lock(&inode->i_mutex); | ||
429 | |||
430 | if (!access_ok(VERIFY_READ, buf, len)) { | ||
431 | ret=-EFAULT; | ||
432 | goto out_up; | ||
433 | } | ||
434 | |||
435 | pos = *ppos; | ||
436 | count = len; | ||
437 | |||
438 | /* We can write back this queue in page reclaim */ | ||
439 | current->backing_dev_info = inode_to_bdi(inode); | ||
440 | |||
441 | ret = generic_write_checks(filp, &pos, &count, S_ISBLK(inode->i_mode)); | ||
442 | if (ret) | ||
443 | goto out_backing; | ||
444 | if (count == 0) | ||
445 | goto out_backing; | ||
446 | |||
447 | ret = file_remove_suid(filp); | ||
448 | if (ret) | ||
449 | goto out_backing; | ||
450 | |||
451 | ret = file_update_time(filp); | ||
452 | if (ret) | ||
453 | goto out_backing; | ||
454 | |||
455 | ret = __xip_file_write (filp, buf, count, pos, ppos); | ||
456 | |||
457 | out_backing: | ||
458 | current->backing_dev_info = NULL; | ||
459 | out_up: | ||
460 | mutex_unlock(&inode->i_mutex); | ||
461 | return ret; | ||
462 | } | ||
463 | EXPORT_SYMBOL_GPL(xip_file_write); | ||
464 | |||
465 | /* | 231 | /* |
466 | * truncate a page used for execute in place | 232 | * truncate a page used for execute in place |
467 | * functionality is analog to block_truncate_page but does use get_xip_mem | 233 | * functionality is analog to block_truncate_page but does use get_xip_mem |