diff options
author | Linus Torvalds <torvalds@g5.osdl.org> | 2006-05-01 21:33:40 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@g5.osdl.org> | 2006-05-01 21:33:40 -0400 |
commit | 9817d207dc13e3a9fc0287bbd36bdfa3cffe5ed4 (patch) | |
tree | 38f87e68f1cd36159fe5b9c03e9047b2fe374324 | |
parent | cf105601df49ba0ea5ac04a6154c6c1442994c74 (diff) | |
parent | 7afa6fd037e51e95d322990cb127bb2b1217251a (diff) |
Merge branch 'splice' of git://brick.kernel.dk/data/git/linux-2.6-block
* 'splice' of git://brick.kernel.dk/data/git/linux-2.6-block:
[PATCH] vmsplice: allow user to pass in gift pages
[PATCH] pipe: enable atomic copying of pipe data to/from user space
[PATCH] splice: call handle_ra_miss() on failure to lookup page
[PATCH] Add ->splice_read/splice_write to def_blk_fops
[PATCH] pipe: introduce ->pin() buffer operation
[PATCH] splice: fix bugs in pipe_to_file()
[PATCH] splice: fix bugs with stealing regular pipe pages
-rw-r--r-- | fs/block_dev.c | 2 | ||||
-rw-r--r-- | fs/pipe.c | 184 | ||||
-rw-r--r-- | fs/splice.c | 166 | ||||
-rw-r--r-- | include/linux/pipe_fs_i.h | 29 |
4 files changed, 255 insertions, 126 deletions
diff --git a/fs/block_dev.c b/fs/block_dev.c index af88c43043d5..f5958f413bd1 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c | |||
@@ -1104,6 +1104,8 @@ const struct file_operations def_blk_fops = { | |||
1104 | .readv = generic_file_readv, | 1104 | .readv = generic_file_readv, |
1105 | .writev = generic_file_write_nolock, | 1105 | .writev = generic_file_write_nolock, |
1106 | .sendfile = generic_file_sendfile, | 1106 | .sendfile = generic_file_sendfile, |
1107 | .splice_read = generic_file_splice_read, | ||
1108 | .splice_write = generic_file_splice_write, | ||
1107 | }; | 1109 | }; |
1108 | 1110 | ||
1109 | int ioctl_by_bdev(struct block_device *bdev, unsigned cmd, unsigned long arg) | 1111 | int ioctl_by_bdev(struct block_device *bdev, unsigned cmd, unsigned long arg) |
@@ -55,7 +55,8 @@ void pipe_wait(struct pipe_inode_info *pipe) | |||
55 | } | 55 | } |
56 | 56 | ||
57 | static int | 57 | static int |
58 | pipe_iov_copy_from_user(void *to, struct iovec *iov, unsigned long len) | 58 | pipe_iov_copy_from_user(void *to, struct iovec *iov, unsigned long len, |
59 | int atomic) | ||
59 | { | 60 | { |
60 | unsigned long copy; | 61 | unsigned long copy; |
61 | 62 | ||
@@ -64,8 +65,13 @@ pipe_iov_copy_from_user(void *to, struct iovec *iov, unsigned long len) | |||
64 | iov++; | 65 | iov++; |
65 | copy = min_t(unsigned long, len, iov->iov_len); | 66 | copy = min_t(unsigned long, len, iov->iov_len); |
66 | 67 | ||
67 | if (copy_from_user(to, iov->iov_base, copy)) | 68 | if (atomic) { |
68 | return -EFAULT; | 69 | if (__copy_from_user_inatomic(to, iov->iov_base, copy)) |
70 | return -EFAULT; | ||
71 | } else { | ||
72 | if (copy_from_user(to, iov->iov_base, copy)) | ||
73 | return -EFAULT; | ||
74 | } | ||
69 | to += copy; | 75 | to += copy; |
70 | len -= copy; | 76 | len -= copy; |
71 | iov->iov_base += copy; | 77 | iov->iov_base += copy; |
@@ -75,7 +81,8 @@ pipe_iov_copy_from_user(void *to, struct iovec *iov, unsigned long len) | |||
75 | } | 81 | } |
76 | 82 | ||
77 | static int | 83 | static int |
78 | pipe_iov_copy_to_user(struct iovec *iov, const void *from, unsigned long len) | 84 | pipe_iov_copy_to_user(struct iovec *iov, const void *from, unsigned long len, |
85 | int atomic) | ||
79 | { | 86 | { |
80 | unsigned long copy; | 87 | unsigned long copy; |
81 | 88 | ||
@@ -84,8 +91,13 @@ pipe_iov_copy_to_user(struct iovec *iov, const void *from, unsigned long len) | |||
84 | iov++; | 91 | iov++; |
85 | copy = min_t(unsigned long, len, iov->iov_len); | 92 | copy = min_t(unsigned long, len, iov->iov_len); |
86 | 93 | ||
87 | if (copy_to_user(iov->iov_base, from, copy)) | 94 | if (atomic) { |
88 | return -EFAULT; | 95 | if (__copy_to_user_inatomic(iov->iov_base, from, copy)) |
96 | return -EFAULT; | ||
97 | } else { | ||
98 | if (copy_to_user(iov->iov_base, from, copy)) | ||
99 | return -EFAULT; | ||
100 | } | ||
89 | from += copy; | 101 | from += copy; |
90 | len -= copy; | 102 | len -= copy; |
91 | iov->iov_base += copy; | 103 | iov->iov_base += copy; |
@@ -94,13 +106,52 @@ pipe_iov_copy_to_user(struct iovec *iov, const void *from, unsigned long len) | |||
94 | return 0; | 106 | return 0; |
95 | } | 107 | } |
96 | 108 | ||
109 | /* | ||
110 | * Attempt to pre-fault in the user memory, so we can use atomic copies. | ||
111 | * Returns the number of bytes not faulted in. | ||
112 | */ | ||
113 | static int iov_fault_in_pages_write(struct iovec *iov, unsigned long len) | ||
114 | { | ||
115 | while (!iov->iov_len) | ||
116 | iov++; | ||
117 | |||
118 | while (len > 0) { | ||
119 | unsigned long this_len; | ||
120 | |||
121 | this_len = min_t(unsigned long, len, iov->iov_len); | ||
122 | if (fault_in_pages_writeable(iov->iov_base, this_len)) | ||
123 | break; | ||
124 | |||
125 | len -= this_len; | ||
126 | iov++; | ||
127 | } | ||
128 | |||
129 | return len; | ||
130 | } | ||
131 | |||
132 | /* | ||
133 | * Pre-fault in the user memory, so we can use atomic copies. | ||
134 | */ | ||
135 | static void iov_fault_in_pages_read(struct iovec *iov, unsigned long len) | ||
136 | { | ||
137 | while (!iov->iov_len) | ||
138 | iov++; | ||
139 | |||
140 | while (len > 0) { | ||
141 | unsigned long this_len; | ||
142 | |||
143 | this_len = min_t(unsigned long, len, iov->iov_len); | ||
144 | fault_in_pages_readable(iov->iov_base, this_len); | ||
145 | len -= this_len; | ||
146 | iov++; | ||
147 | } | ||
148 | } | ||
149 | |||
97 | static void anon_pipe_buf_release(struct pipe_inode_info *pipe, | 150 | static void anon_pipe_buf_release(struct pipe_inode_info *pipe, |
98 | struct pipe_buffer *buf) | 151 | struct pipe_buffer *buf) |
99 | { | 152 | { |
100 | struct page *page = buf->page; | 153 | struct page *page = buf->page; |
101 | 154 | ||
102 | buf->flags &= ~PIPE_BUF_FLAG_STOLEN; | ||
103 | |||
104 | /* | 155 | /* |
105 | * If nobody else uses this page, and we don't already have a | 156 | * If nobody else uses this page, and we don't already have a |
106 | * temporary page, let's keep track of it as a one-deep | 157 | * temporary page, let's keep track of it as a one-deep |
@@ -112,38 +163,58 @@ static void anon_pipe_buf_release(struct pipe_inode_info *pipe, | |||
112 | page_cache_release(page); | 163 | page_cache_release(page); |
113 | } | 164 | } |
114 | 165 | ||
115 | static void * anon_pipe_buf_map(struct file *file, struct pipe_inode_info *pipe, | 166 | void *generic_pipe_buf_map(struct pipe_inode_info *pipe, |
116 | struct pipe_buffer *buf) | 167 | struct pipe_buffer *buf, int atomic) |
117 | { | 168 | { |
169 | if (atomic) { | ||
170 | buf->flags |= PIPE_BUF_FLAG_ATOMIC; | ||
171 | return kmap_atomic(buf->page, KM_USER0); | ||
172 | } | ||
173 | |||
118 | return kmap(buf->page); | 174 | return kmap(buf->page); |
119 | } | 175 | } |
120 | 176 | ||
121 | static void anon_pipe_buf_unmap(struct pipe_inode_info *pipe, | 177 | void generic_pipe_buf_unmap(struct pipe_inode_info *pipe, |
122 | struct pipe_buffer *buf) | 178 | struct pipe_buffer *buf, void *map_data) |
123 | { | 179 | { |
124 | kunmap(buf->page); | 180 | if (buf->flags & PIPE_BUF_FLAG_ATOMIC) { |
181 | buf->flags &= ~PIPE_BUF_FLAG_ATOMIC; | ||
182 | kunmap_atomic(map_data, KM_USER0); | ||
183 | } else | ||
184 | kunmap(buf->page); | ||
125 | } | 185 | } |
126 | 186 | ||
127 | static int anon_pipe_buf_steal(struct pipe_inode_info *pipe, | 187 | static int anon_pipe_buf_steal(struct pipe_inode_info *pipe, |
128 | struct pipe_buffer *buf) | 188 | struct pipe_buffer *buf) |
129 | { | 189 | { |
130 | buf->flags |= PIPE_BUF_FLAG_STOLEN; | 190 | struct page *page = buf->page; |
131 | return 0; | 191 | |
192 | if (page_count(page) == 1) { | ||
193 | lock_page(page); | ||
194 | return 0; | ||
195 | } | ||
196 | |||
197 | return 1; | ||
132 | } | 198 | } |
133 | 199 | ||
134 | static void anon_pipe_buf_get(struct pipe_inode_info *info, | 200 | void generic_pipe_buf_get(struct pipe_inode_info *info, struct pipe_buffer *buf) |
135 | struct pipe_buffer *buf) | ||
136 | { | 201 | { |
137 | page_cache_get(buf->page); | 202 | page_cache_get(buf->page); |
138 | } | 203 | } |
139 | 204 | ||
205 | int generic_pipe_buf_pin(struct pipe_inode_info *info, struct pipe_buffer *buf) | ||
206 | { | ||
207 | return 0; | ||
208 | } | ||
209 | |||
140 | static struct pipe_buf_operations anon_pipe_buf_ops = { | 210 | static struct pipe_buf_operations anon_pipe_buf_ops = { |
141 | .can_merge = 1, | 211 | .can_merge = 1, |
142 | .map = anon_pipe_buf_map, | 212 | .map = generic_pipe_buf_map, |
143 | .unmap = anon_pipe_buf_unmap, | 213 | .unmap = generic_pipe_buf_unmap, |
214 | .pin = generic_pipe_buf_pin, | ||
144 | .release = anon_pipe_buf_release, | 215 | .release = anon_pipe_buf_release, |
145 | .steal = anon_pipe_buf_steal, | 216 | .steal = anon_pipe_buf_steal, |
146 | .get = anon_pipe_buf_get, | 217 | .get = generic_pipe_buf_get, |
147 | }; | 218 | }; |
148 | 219 | ||
149 | static ssize_t | 220 | static ssize_t |
@@ -174,22 +245,33 @@ pipe_readv(struct file *filp, const struct iovec *_iov, | |||
174 | struct pipe_buf_operations *ops = buf->ops; | 245 | struct pipe_buf_operations *ops = buf->ops; |
175 | void *addr; | 246 | void *addr; |
176 | size_t chars = buf->len; | 247 | size_t chars = buf->len; |
177 | int error; | 248 | int error, atomic; |
178 | 249 | ||
179 | if (chars > total_len) | 250 | if (chars > total_len) |
180 | chars = total_len; | 251 | chars = total_len; |
181 | 252 | ||
182 | addr = ops->map(filp, pipe, buf); | 253 | error = ops->pin(pipe, buf); |
183 | if (IS_ERR(addr)) { | 254 | if (error) { |
184 | if (!ret) | 255 | if (!ret) |
185 | ret = PTR_ERR(addr); | 256 | error = ret; |
186 | break; | 257 | break; |
187 | } | 258 | } |
188 | error = pipe_iov_copy_to_user(iov, addr + buf->offset, chars); | 259 | |
189 | ops->unmap(pipe, buf); | 260 | atomic = !iov_fault_in_pages_write(iov, chars); |
261 | redo: | ||
262 | addr = ops->map(pipe, buf, atomic); | ||
263 | error = pipe_iov_copy_to_user(iov, addr + buf->offset, chars, atomic); | ||
264 | ops->unmap(pipe, buf, addr); | ||
190 | if (unlikely(error)) { | 265 | if (unlikely(error)) { |
266 | /* | ||
267 | * Just retry with the slow path if we failed. | ||
268 | */ | ||
269 | if (atomic) { | ||
270 | atomic = 0; | ||
271 | goto redo; | ||
272 | } | ||
191 | if (!ret) | 273 | if (!ret) |
192 | ret = -EFAULT; | 274 | ret = error; |
193 | break; | 275 | break; |
194 | } | 276 | } |
195 | ret += chars; | 277 | ret += chars; |
@@ -293,21 +375,28 @@ pipe_writev(struct file *filp, const struct iovec *_iov, | |||
293 | int offset = buf->offset + buf->len; | 375 | int offset = buf->offset + buf->len; |
294 | 376 | ||
295 | if (ops->can_merge && offset + chars <= PAGE_SIZE) { | 377 | if (ops->can_merge && offset + chars <= PAGE_SIZE) { |
378 | int error, atomic = 1; | ||
296 | void *addr; | 379 | void *addr; |
297 | int error; | ||
298 | 380 | ||
299 | addr = ops->map(filp, pipe, buf); | 381 | error = ops->pin(pipe, buf); |
300 | if (IS_ERR(addr)) { | 382 | if (error) |
301 | error = PTR_ERR(addr); | ||
302 | goto out; | 383 | goto out; |
303 | } | 384 | |
385 | iov_fault_in_pages_read(iov, chars); | ||
386 | redo1: | ||
387 | addr = ops->map(pipe, buf, atomic); | ||
304 | error = pipe_iov_copy_from_user(offset + addr, iov, | 388 | error = pipe_iov_copy_from_user(offset + addr, iov, |
305 | chars); | 389 | chars, atomic); |
306 | ops->unmap(pipe, buf); | 390 | ops->unmap(pipe, buf, addr); |
307 | ret = error; | 391 | ret = error; |
308 | do_wakeup = 1; | 392 | do_wakeup = 1; |
309 | if (error) | 393 | if (error) { |
394 | if (atomic) { | ||
395 | atomic = 0; | ||
396 | goto redo1; | ||
397 | } | ||
310 | goto out; | 398 | goto out; |
399 | } | ||
311 | buf->len += chars; | 400 | buf->len += chars; |
312 | total_len -= chars; | 401 | total_len -= chars; |
313 | ret = chars; | 402 | ret = chars; |
@@ -330,7 +419,8 @@ pipe_writev(struct file *filp, const struct iovec *_iov, | |||
330 | int newbuf = (pipe->curbuf + bufs) & (PIPE_BUFFERS-1); | 419 | int newbuf = (pipe->curbuf + bufs) & (PIPE_BUFFERS-1); |
331 | struct pipe_buffer *buf = pipe->bufs + newbuf; | 420 | struct pipe_buffer *buf = pipe->bufs + newbuf; |
332 | struct page *page = pipe->tmp_page; | 421 | struct page *page = pipe->tmp_page; |
333 | int error; | 422 | char *src; |
423 | int error, atomic = 1; | ||
334 | 424 | ||
335 | if (!page) { | 425 | if (!page) { |
336 | page = alloc_page(GFP_HIGHUSER); | 426 | page = alloc_page(GFP_HIGHUSER); |
@@ -350,11 +440,27 @@ pipe_writev(struct file *filp, const struct iovec *_iov, | |||
350 | if (chars > total_len) | 440 | if (chars > total_len) |
351 | chars = total_len; | 441 | chars = total_len; |
352 | 442 | ||
353 | error = pipe_iov_copy_from_user(kmap(page), iov, chars); | 443 | iov_fault_in_pages_read(iov, chars); |
354 | kunmap(page); | 444 | redo2: |
445 | if (atomic) | ||
446 | src = kmap_atomic(page, KM_USER0); | ||
447 | else | ||
448 | src = kmap(page); | ||
449 | |||
450 | error = pipe_iov_copy_from_user(src, iov, chars, | ||
451 | atomic); | ||
452 | if (atomic) | ||
453 | kunmap_atomic(src, KM_USER0); | ||
454 | else | ||
455 | kunmap(page); | ||
456 | |||
355 | if (unlikely(error)) { | 457 | if (unlikely(error)) { |
458 | if (atomic) { | ||
459 | atomic = 0; | ||
460 | goto redo2; | ||
461 | } | ||
356 | if (!ret) | 462 | if (!ret) |
357 | ret = -EFAULT; | 463 | ret = error; |
358 | break; | 464 | break; |
359 | } | 465 | } |
360 | ret += chars; | 466 | ret += chars; |
diff --git a/fs/splice.c b/fs/splice.c index a46ddd28561e..b150493b6fc3 100644 --- a/fs/splice.c +++ b/fs/splice.c | |||
@@ -78,7 +78,7 @@ static int page_cache_pipe_buf_steal(struct pipe_inode_info *info, | |||
78 | return 1; | 78 | return 1; |
79 | } | 79 | } |
80 | 80 | ||
81 | buf->flags |= PIPE_BUF_FLAG_STOLEN | PIPE_BUF_FLAG_LRU; | 81 | buf->flags |= PIPE_BUF_FLAG_LRU; |
82 | return 0; | 82 | return 0; |
83 | } | 83 | } |
84 | 84 | ||
@@ -87,12 +87,11 @@ static void page_cache_pipe_buf_release(struct pipe_inode_info *info, | |||
87 | { | 87 | { |
88 | page_cache_release(buf->page); | 88 | page_cache_release(buf->page); |
89 | buf->page = NULL; | 89 | buf->page = NULL; |
90 | buf->flags &= ~(PIPE_BUF_FLAG_STOLEN | PIPE_BUF_FLAG_LRU); | 90 | buf->flags &= ~PIPE_BUF_FLAG_LRU; |
91 | } | 91 | } |
92 | 92 | ||
93 | static void *page_cache_pipe_buf_map(struct file *file, | 93 | static int page_cache_pipe_buf_pin(struct pipe_inode_info *info, |
94 | struct pipe_inode_info *info, | 94 | struct pipe_buffer *buf) |
95 | struct pipe_buffer *buf) | ||
96 | { | 95 | { |
97 | struct page *page = buf->page; | 96 | struct page *page = buf->page; |
98 | int err; | 97 | int err; |
@@ -118,64 +117,44 @@ static void *page_cache_pipe_buf_map(struct file *file, | |||
118 | } | 117 | } |
119 | 118 | ||
120 | /* | 119 | /* |
121 | * Page is ok afterall, fall through to mapping. | 120 | * Page is ok afterall, we are done. |
122 | */ | 121 | */ |
123 | unlock_page(page); | 122 | unlock_page(page); |
124 | } | 123 | } |
125 | 124 | ||
126 | return kmap(page); | 125 | return 0; |
127 | error: | 126 | error: |
128 | unlock_page(page); | 127 | unlock_page(page); |
129 | return ERR_PTR(err); | 128 | return err; |
130 | } | ||
131 | |||
132 | static void page_cache_pipe_buf_unmap(struct pipe_inode_info *info, | ||
133 | struct pipe_buffer *buf) | ||
134 | { | ||
135 | kunmap(buf->page); | ||
136 | } | ||
137 | |||
138 | static void *user_page_pipe_buf_map(struct file *file, | ||
139 | struct pipe_inode_info *pipe, | ||
140 | struct pipe_buffer *buf) | ||
141 | { | ||
142 | return kmap(buf->page); | ||
143 | } | ||
144 | |||
145 | static void user_page_pipe_buf_unmap(struct pipe_inode_info *pipe, | ||
146 | struct pipe_buffer *buf) | ||
147 | { | ||
148 | kunmap(buf->page); | ||
149 | } | ||
150 | |||
151 | static void page_cache_pipe_buf_get(struct pipe_inode_info *info, | ||
152 | struct pipe_buffer *buf) | ||
153 | { | ||
154 | page_cache_get(buf->page); | ||
155 | } | 129 | } |
156 | 130 | ||
157 | static struct pipe_buf_operations page_cache_pipe_buf_ops = { | 131 | static struct pipe_buf_operations page_cache_pipe_buf_ops = { |
158 | .can_merge = 0, | 132 | .can_merge = 0, |
159 | .map = page_cache_pipe_buf_map, | 133 | .map = generic_pipe_buf_map, |
160 | .unmap = page_cache_pipe_buf_unmap, | 134 | .unmap = generic_pipe_buf_unmap, |
135 | .pin = page_cache_pipe_buf_pin, | ||
161 | .release = page_cache_pipe_buf_release, | 136 | .release = page_cache_pipe_buf_release, |
162 | .steal = page_cache_pipe_buf_steal, | 137 | .steal = page_cache_pipe_buf_steal, |
163 | .get = page_cache_pipe_buf_get, | 138 | .get = generic_pipe_buf_get, |
164 | }; | 139 | }; |
165 | 140 | ||
166 | static int user_page_pipe_buf_steal(struct pipe_inode_info *pipe, | 141 | static int user_page_pipe_buf_steal(struct pipe_inode_info *pipe, |
167 | struct pipe_buffer *buf) | 142 | struct pipe_buffer *buf) |
168 | { | 143 | { |
169 | return 1; | 144 | if (!(buf->flags & PIPE_BUF_FLAG_GIFT)) |
145 | return 1; | ||
146 | |||
147 | return 0; | ||
170 | } | 148 | } |
171 | 149 | ||
172 | static struct pipe_buf_operations user_page_pipe_buf_ops = { | 150 | static struct pipe_buf_operations user_page_pipe_buf_ops = { |
173 | .can_merge = 0, | 151 | .can_merge = 0, |
174 | .map = user_page_pipe_buf_map, | 152 | .map = generic_pipe_buf_map, |
175 | .unmap = user_page_pipe_buf_unmap, | 153 | .unmap = generic_pipe_buf_unmap, |
154 | .pin = generic_pipe_buf_pin, | ||
176 | .release = page_cache_pipe_buf_release, | 155 | .release = page_cache_pipe_buf_release, |
177 | .steal = user_page_pipe_buf_steal, | 156 | .steal = user_page_pipe_buf_steal, |
178 | .get = page_cache_pipe_buf_get, | 157 | .get = generic_pipe_buf_get, |
179 | }; | 158 | }; |
180 | 159 | ||
181 | /* | 160 | /* |
@@ -210,6 +189,9 @@ static ssize_t splice_to_pipe(struct pipe_inode_info *pipe, | |||
210 | buf->offset = spd->partial[page_nr].offset; | 189 | buf->offset = spd->partial[page_nr].offset; |
211 | buf->len = spd->partial[page_nr].len; | 190 | buf->len = spd->partial[page_nr].len; |
212 | buf->ops = spd->ops; | 191 | buf->ops = spd->ops; |
192 | if (spd->flags & SPLICE_F_GIFT) | ||
193 | buf->flags |= PIPE_BUF_FLAG_GIFT; | ||
194 | |||
213 | pipe->nrbufs++; | 195 | pipe->nrbufs++; |
214 | page_nr++; | 196 | page_nr++; |
215 | ret += buf->len; | 197 | ret += buf->len; |
@@ -326,6 +308,12 @@ __generic_file_splice_read(struct file *in, loff_t *ppos, | |||
326 | page = find_get_page(mapping, index); | 308 | page = find_get_page(mapping, index); |
327 | if (!page) { | 309 | if (!page) { |
328 | /* | 310 | /* |
311 | * Make sure the read-ahead engine is notified | ||
312 | * about this failure. | ||
313 | */ | ||
314 | handle_ra_miss(mapping, &in->f_ra, index); | ||
315 | |||
316 | /* | ||
329 | * page didn't exist, allocate one. | 317 | * page didn't exist, allocate one. |
330 | */ | 318 | */ |
331 | page = page_cache_alloc_cold(mapping); | 319 | page = page_cache_alloc_cold(mapping); |
@@ -517,26 +505,16 @@ static int pipe_to_sendpage(struct pipe_inode_info *info, | |||
517 | { | 505 | { |
518 | struct file *file = sd->file; | 506 | struct file *file = sd->file; |
519 | loff_t pos = sd->pos; | 507 | loff_t pos = sd->pos; |
520 | ssize_t ret; | 508 | int ret, more; |
521 | void *ptr; | ||
522 | int more; | ||
523 | |||
524 | /* | ||
525 | * Sub-optimal, but we are limited by the pipe ->map. We don't | ||
526 | * need a kmap'ed buffer here, we just want to make sure we | ||
527 | * have the page pinned if the pipe page originates from the | ||
528 | * page cache. | ||
529 | */ | ||
530 | ptr = buf->ops->map(file, info, buf); | ||
531 | if (IS_ERR(ptr)) | ||
532 | return PTR_ERR(ptr); | ||
533 | 509 | ||
534 | more = (sd->flags & SPLICE_F_MORE) || sd->len < sd->total_len; | 510 | ret = buf->ops->pin(info, buf); |
511 | if (!ret) { | ||
512 | more = (sd->flags & SPLICE_F_MORE) || sd->len < sd->total_len; | ||
535 | 513 | ||
536 | ret = file->f_op->sendpage(file, buf->page, buf->offset, sd->len, | 514 | ret = file->f_op->sendpage(file, buf->page, buf->offset, |
537 | &pos, more); | 515 | sd->len, &pos, more); |
516 | } | ||
538 | 517 | ||
539 | buf->ops->unmap(info, buf); | ||
540 | return ret; | 518 | return ret; |
541 | } | 519 | } |
542 | 520 | ||
@@ -569,15 +547,14 @@ static int pipe_to_file(struct pipe_inode_info *info, struct pipe_buffer *buf, | |||
569 | unsigned int offset, this_len; | 547 | unsigned int offset, this_len; |
570 | struct page *page; | 548 | struct page *page; |
571 | pgoff_t index; | 549 | pgoff_t index; |
572 | char *src; | ||
573 | int ret; | 550 | int ret; |
574 | 551 | ||
575 | /* | 552 | /* |
576 | * make sure the data in this buffer is uptodate | 553 | * make sure the data in this buffer is uptodate |
577 | */ | 554 | */ |
578 | src = buf->ops->map(file, info, buf); | 555 | ret = buf->ops->pin(info, buf); |
579 | if (IS_ERR(src)) | 556 | if (unlikely(ret)) |
580 | return PTR_ERR(src); | 557 | return ret; |
581 | 558 | ||
582 | index = sd->pos >> PAGE_CACHE_SHIFT; | 559 | index = sd->pos >> PAGE_CACHE_SHIFT; |
583 | offset = sd->pos & ~PAGE_CACHE_MASK; | 560 | offset = sd->pos & ~PAGE_CACHE_MASK; |
@@ -587,9 +564,10 @@ static int pipe_to_file(struct pipe_inode_info *info, struct pipe_buffer *buf, | |||
587 | this_len = PAGE_CACHE_SIZE - offset; | 564 | this_len = PAGE_CACHE_SIZE - offset; |
588 | 565 | ||
589 | /* | 566 | /* |
590 | * Reuse buf page, if SPLICE_F_MOVE is set. | 567 | * Reuse buf page, if SPLICE_F_MOVE is set and we are doing a full |
568 | * page. | ||
591 | */ | 569 | */ |
592 | if (sd->flags & SPLICE_F_MOVE) { | 570 | if ((sd->flags & SPLICE_F_MOVE) && this_len == PAGE_CACHE_SIZE) { |
593 | /* | 571 | /* |
594 | * If steal succeeds, buf->page is now pruned from the vm | 572 | * If steal succeeds, buf->page is now pruned from the vm |
595 | * side (LRU and page cache) and we can reuse it. The page | 573 | * side (LRU and page cache) and we can reuse it. The page |
@@ -599,8 +577,12 @@ static int pipe_to_file(struct pipe_inode_info *info, struct pipe_buffer *buf, | |||
599 | goto find_page; | 577 | goto find_page; |
600 | 578 | ||
601 | page = buf->page; | 579 | page = buf->page; |
602 | if (add_to_page_cache(page, mapping, index, gfp_mask)) | 580 | if (add_to_page_cache(page, mapping, index, gfp_mask)) { |
581 | unlock_page(page); | ||
603 | goto find_page; | 582 | goto find_page; |
583 | } | ||
584 | |||
585 | page_cache_get(page); | ||
604 | 586 | ||
605 | if (!(buf->flags & PIPE_BUF_FLAG_LRU)) | 587 | if (!(buf->flags & PIPE_BUF_FLAG_LRU)) |
606 | lru_cache_add(page); | 588 | lru_cache_add(page); |
@@ -660,34 +642,36 @@ find_page: | |||
660 | } else if (ret) | 642 | } else if (ret) |
661 | goto out; | 643 | goto out; |
662 | 644 | ||
663 | if (!(buf->flags & PIPE_BUF_FLAG_STOLEN)) { | 645 | if (buf->page != page) { |
664 | char *dst = kmap_atomic(page, KM_USER0); | 646 | /* |
647 | * Careful, ->map() uses KM_USER0! | ||
648 | */ | ||
649 | char *src = buf->ops->map(info, buf, 1); | ||
650 | char *dst = kmap_atomic(page, KM_USER1); | ||
665 | 651 | ||
666 | memcpy(dst + offset, src + buf->offset, this_len); | 652 | memcpy(dst + offset, src + buf->offset, this_len); |
667 | flush_dcache_page(page); | 653 | flush_dcache_page(page); |
668 | kunmap_atomic(dst, KM_USER0); | 654 | kunmap_atomic(dst, KM_USER1); |
655 | buf->ops->unmap(info, buf, src); | ||
669 | } | 656 | } |
670 | 657 | ||
671 | ret = mapping->a_ops->commit_write(file, page, offset, offset+this_len); | 658 | ret = mapping->a_ops->commit_write(file, page, offset, offset+this_len); |
672 | if (ret == AOP_TRUNCATED_PAGE) { | 659 | if (!ret) { |
660 | /* | ||
661 | * Return the number of bytes written and mark page as | ||
662 | * accessed, we are now done! | ||
663 | */ | ||
664 | ret = this_len; | ||
665 | mark_page_accessed(page); | ||
666 | balance_dirty_pages_ratelimited(mapping); | ||
667 | } else if (ret == AOP_TRUNCATED_PAGE) { | ||
673 | page_cache_release(page); | 668 | page_cache_release(page); |
674 | goto find_page; | 669 | goto find_page; |
675 | } else if (ret) | 670 | } |
676 | goto out; | ||
677 | |||
678 | /* | ||
679 | * Return the number of bytes written. | ||
680 | */ | ||
681 | ret = this_len; | ||
682 | mark_page_accessed(page); | ||
683 | balance_dirty_pages_ratelimited(mapping); | ||
684 | out: | 671 | out: |
685 | if (!(buf->flags & PIPE_BUF_FLAG_STOLEN)) | 672 | page_cache_release(page); |
686 | page_cache_release(page); | ||
687 | |||
688 | unlock_page(page); | 673 | unlock_page(page); |
689 | out_nomem: | 674 | out_nomem: |
690 | buf->ops->unmap(info, buf); | ||
691 | return ret; | 675 | return ret; |
692 | } | 676 | } |
693 | 677 | ||
@@ -1095,7 +1079,7 @@ static long do_splice(struct file *in, loff_t __user *off_in, | |||
1095 | */ | 1079 | */ |
1096 | static int get_iovec_page_array(const struct iovec __user *iov, | 1080 | static int get_iovec_page_array(const struct iovec __user *iov, |
1097 | unsigned int nr_vecs, struct page **pages, | 1081 | unsigned int nr_vecs, struct page **pages, |
1098 | struct partial_page *partial) | 1082 | struct partial_page *partial, int aligned) |
1099 | { | 1083 | { |
1100 | int buffers = 0, error = 0; | 1084 | int buffers = 0, error = 0; |
1101 | 1085 | ||
@@ -1135,6 +1119,15 @@ static int get_iovec_page_array(const struct iovec __user *iov, | |||
1135 | * in the user pages. | 1119 | * in the user pages. |
1136 | */ | 1120 | */ |
1137 | off = (unsigned long) base & ~PAGE_MASK; | 1121 | off = (unsigned long) base & ~PAGE_MASK; |
1122 | |||
1123 | /* | ||
1124 | * If asked for alignment, the offset must be zero and the | ||
1125 | * length a multiple of the PAGE_SIZE. | ||
1126 | */ | ||
1127 | error = -EINVAL; | ||
1128 | if (aligned && (off || len & ~PAGE_MASK)) | ||
1129 | break; | ||
1130 | |||
1138 | npages = (off + len + PAGE_SIZE - 1) >> PAGE_SHIFT; | 1131 | npages = (off + len + PAGE_SIZE - 1) >> PAGE_SHIFT; |
1139 | if (npages > PIPE_BUFFERS - buffers) | 1132 | if (npages > PIPE_BUFFERS - buffers) |
1140 | npages = PIPE_BUFFERS - buffers; | 1133 | npages = PIPE_BUFFERS - buffers; |
@@ -1228,7 +1221,8 @@ static long do_vmsplice(struct file *file, const struct iovec __user *iov, | |||
1228 | else if (unlikely(!nr_segs)) | 1221 | else if (unlikely(!nr_segs)) |
1229 | return 0; | 1222 | return 0; |
1230 | 1223 | ||
1231 | spd.nr_pages = get_iovec_page_array(iov, nr_segs, pages, partial); | 1224 | spd.nr_pages = get_iovec_page_array(iov, nr_segs, pages, partial, |
1225 | flags & SPLICE_F_GIFT); | ||
1232 | if (spd.nr_pages <= 0) | 1226 | if (spd.nr_pages <= 0) |
1233 | return spd.nr_pages; | 1227 | return spd.nr_pages; |
1234 | 1228 | ||
@@ -1336,6 +1330,12 @@ static int link_pipe(struct pipe_inode_info *ipipe, | |||
1336 | obuf = opipe->bufs + nbuf; | 1330 | obuf = opipe->bufs + nbuf; |
1337 | *obuf = *ibuf; | 1331 | *obuf = *ibuf; |
1338 | 1332 | ||
1333 | /* | ||
1334 | * Don't inherit the gift flag, we need to | ||
1335 | * prevent multiple steals of this page. | ||
1336 | */ | ||
1337 | obuf->flags &= ~PIPE_BUF_FLAG_GIFT; | ||
1338 | |||
1339 | if (obuf->len > len) | 1339 | if (obuf->len > len) |
1340 | obuf->len = len; | 1340 | obuf->len = len; |
1341 | 1341 | ||
diff --git a/include/linux/pipe_fs_i.h b/include/linux/pipe_fs_i.h index 0008d4bd4059..df4d3fa7d3dc 100644 --- a/include/linux/pipe_fs_i.h +++ b/include/linux/pipe_fs_i.h | |||
@@ -5,8 +5,9 @@ | |||
5 | 5 | ||
6 | #define PIPE_BUFFERS (16) | 6 | #define PIPE_BUFFERS (16) |
7 | 7 | ||
8 | #define PIPE_BUF_FLAG_STOLEN 0x01 | 8 | #define PIPE_BUF_FLAG_LRU 0x01 /* page is on the LRU */ |
9 | #define PIPE_BUF_FLAG_LRU 0x02 | 9 | #define PIPE_BUF_FLAG_ATOMIC 0x02 /* was atomically mapped */ |
10 | #define PIPE_BUF_FLAG_GIFT 0x04 /* page is a gift */ | ||
10 | 11 | ||
11 | struct pipe_buffer { | 12 | struct pipe_buffer { |
12 | struct page *page; | 13 | struct page *page; |
@@ -15,10 +16,23 @@ struct pipe_buffer { | |||
15 | unsigned int flags; | 16 | unsigned int flags; |
16 | }; | 17 | }; |
17 | 18 | ||
19 | /* | ||
20 | * Note on the nesting of these functions: | ||
21 | * | ||
22 | * ->pin() | ||
23 | * ->steal() | ||
24 | * ... | ||
25 | * ->map() | ||
26 | * ... | ||
27 | * ->unmap() | ||
28 | * | ||
29 | * That is, ->map() must be called on a pinned buffer, same goes for ->steal(). | ||
30 | */ | ||
18 | struct pipe_buf_operations { | 31 | struct pipe_buf_operations { |
19 | int can_merge; | 32 | int can_merge; |
20 | void * (*map)(struct file *, struct pipe_inode_info *, struct pipe_buffer *); | 33 | void * (*map)(struct pipe_inode_info *, struct pipe_buffer *, int); |
21 | void (*unmap)(struct pipe_inode_info *, struct pipe_buffer *); | 34 | void (*unmap)(struct pipe_inode_info *, struct pipe_buffer *, void *); |
35 | int (*pin)(struct pipe_inode_info *, struct pipe_buffer *); | ||
22 | void (*release)(struct pipe_inode_info *, struct pipe_buffer *); | 36 | void (*release)(struct pipe_inode_info *, struct pipe_buffer *); |
23 | int (*steal)(struct pipe_inode_info *, struct pipe_buffer *); | 37 | int (*steal)(struct pipe_inode_info *, struct pipe_buffer *); |
24 | void (*get)(struct pipe_inode_info *, struct pipe_buffer *); | 38 | void (*get)(struct pipe_inode_info *, struct pipe_buffer *); |
@@ -51,6 +65,12 @@ struct pipe_inode_info * alloc_pipe_info(struct inode * inode); | |||
51 | void free_pipe_info(struct inode * inode); | 65 | void free_pipe_info(struct inode * inode); |
52 | void __free_pipe_info(struct pipe_inode_info *); | 66 | void __free_pipe_info(struct pipe_inode_info *); |
53 | 67 | ||
68 | /* Generic pipe buffer ops functions */ | ||
69 | void *generic_pipe_buf_map(struct pipe_inode_info *, struct pipe_buffer *, int); | ||
70 | void generic_pipe_buf_unmap(struct pipe_inode_info *, struct pipe_buffer *, void *); | ||
71 | void generic_pipe_buf_get(struct pipe_inode_info *, struct pipe_buffer *); | ||
72 | int generic_pipe_buf_pin(struct pipe_inode_info *, struct pipe_buffer *); | ||
73 | |||
54 | /* | 74 | /* |
55 | * splice is tied to pipes as a transport (at least for now), so we'll just | 75 | * splice is tied to pipes as a transport (at least for now), so we'll just |
56 | * add the splice flags here. | 76 | * add the splice flags here. |
@@ -60,6 +80,7 @@ void __free_pipe_info(struct pipe_inode_info *); | |||
60 | /* we may still block on the fd we splice */ | 80 | /* we may still block on the fd we splice */ |
61 | /* from/to, of course */ | 81 | /* from/to, of course */ |
62 | #define SPLICE_F_MORE (0x04) /* expect more data */ | 82 | #define SPLICE_F_MORE (0x04) /* expect more data */ |
83 | #define SPLICE_F_GIFT (0x08) /* pages passed in are a gift */ | ||
63 | 84 | ||
64 | /* | 85 | /* |
65 | * Passed to the actors | 86 | * Passed to the actors |