aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@g5.osdl.org>2006-05-01 21:33:40 -0400
committerLinus Torvalds <torvalds@g5.osdl.org>2006-05-01 21:33:40 -0400
commit9817d207dc13e3a9fc0287bbd36bdfa3cffe5ed4 (patch)
tree38f87e68f1cd36159fe5b9c03e9047b2fe374324
parentcf105601df49ba0ea5ac04a6154c6c1442994c74 (diff)
parent7afa6fd037e51e95d322990cb127bb2b1217251a (diff)
Merge branch 'splice' of git://brick.kernel.dk/data/git/linux-2.6-block
* 'splice' of git://brick.kernel.dk/data/git/linux-2.6-block: [PATCH] vmsplice: allow user to pass in gift pages [PATCH] pipe: enable atomic copying of pipe data to/from user space [PATCH] splice: call handle_ra_miss() on failure to lookup page [PATCH] Add ->splice_read/splice_write to def_blk_fops [PATCH] pipe: introduce ->pin() buffer operation [PATCH] splice: fix bugs in pipe_to_file() [PATCH] splice: fix bugs with stealing regular pipe pages
-rw-r--r--fs/block_dev.c2
-rw-r--r--fs/pipe.c184
-rw-r--r--fs/splice.c166
-rw-r--r--include/linux/pipe_fs_i.h29
4 files changed, 255 insertions, 126 deletions
diff --git a/fs/block_dev.c b/fs/block_dev.c
index af88c43043d5..f5958f413bd1 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -1104,6 +1104,8 @@ const struct file_operations def_blk_fops = {
1104 .readv = generic_file_readv, 1104 .readv = generic_file_readv,
1105 .writev = generic_file_write_nolock, 1105 .writev = generic_file_write_nolock,
1106 .sendfile = generic_file_sendfile, 1106 .sendfile = generic_file_sendfile,
1107 .splice_read = generic_file_splice_read,
1108 .splice_write = generic_file_splice_write,
1107}; 1109};
1108 1110
1109int ioctl_by_bdev(struct block_device *bdev, unsigned cmd, unsigned long arg) 1111int ioctl_by_bdev(struct block_device *bdev, unsigned cmd, unsigned long arg)
diff --git a/fs/pipe.c b/fs/pipe.c
index 7fefb10db8d9..3941a7f78b5d 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -55,7 +55,8 @@ void pipe_wait(struct pipe_inode_info *pipe)
55} 55}
56 56
57static int 57static int
58pipe_iov_copy_from_user(void *to, struct iovec *iov, unsigned long len) 58pipe_iov_copy_from_user(void *to, struct iovec *iov, unsigned long len,
59 int atomic)
59{ 60{
60 unsigned long copy; 61 unsigned long copy;
61 62
@@ -64,8 +65,13 @@ pipe_iov_copy_from_user(void *to, struct iovec *iov, unsigned long len)
64 iov++; 65 iov++;
65 copy = min_t(unsigned long, len, iov->iov_len); 66 copy = min_t(unsigned long, len, iov->iov_len);
66 67
67 if (copy_from_user(to, iov->iov_base, copy)) 68 if (atomic) {
68 return -EFAULT; 69 if (__copy_from_user_inatomic(to, iov->iov_base, copy))
70 return -EFAULT;
71 } else {
72 if (copy_from_user(to, iov->iov_base, copy))
73 return -EFAULT;
74 }
69 to += copy; 75 to += copy;
70 len -= copy; 76 len -= copy;
71 iov->iov_base += copy; 77 iov->iov_base += copy;
@@ -75,7 +81,8 @@ pipe_iov_copy_from_user(void *to, struct iovec *iov, unsigned long len)
75} 81}
76 82
77static int 83static int
78pipe_iov_copy_to_user(struct iovec *iov, const void *from, unsigned long len) 84pipe_iov_copy_to_user(struct iovec *iov, const void *from, unsigned long len,
85 int atomic)
79{ 86{
80 unsigned long copy; 87 unsigned long copy;
81 88
@@ -84,8 +91,13 @@ pipe_iov_copy_to_user(struct iovec *iov, const void *from, unsigned long len)
84 iov++; 91 iov++;
85 copy = min_t(unsigned long, len, iov->iov_len); 92 copy = min_t(unsigned long, len, iov->iov_len);
86 93
87 if (copy_to_user(iov->iov_base, from, copy)) 94 if (atomic) {
88 return -EFAULT; 95 if (__copy_to_user_inatomic(iov->iov_base, from, copy))
96 return -EFAULT;
97 } else {
98 if (copy_to_user(iov->iov_base, from, copy))
99 return -EFAULT;
100 }
89 from += copy; 101 from += copy;
90 len -= copy; 102 len -= copy;
91 iov->iov_base += copy; 103 iov->iov_base += copy;
@@ -94,13 +106,52 @@ pipe_iov_copy_to_user(struct iovec *iov, const void *from, unsigned long len)
94 return 0; 106 return 0;
95} 107}
96 108
109/*
110 * Attempt to pre-fault in the user memory, so we can use atomic copies.
111 * Returns the number of bytes not faulted in.
112 */
113static int iov_fault_in_pages_write(struct iovec *iov, unsigned long len)
114{
115 while (!iov->iov_len)
116 iov++;
117
118 while (len > 0) {
119 unsigned long this_len;
120
121 this_len = min_t(unsigned long, len, iov->iov_len);
122 if (fault_in_pages_writeable(iov->iov_base, this_len))
123 break;
124
125 len -= this_len;
126 iov++;
127 }
128
129 return len;
130}
131
132/*
133 * Pre-fault in the user memory, so we can use atomic copies.
134 */
135static void iov_fault_in_pages_read(struct iovec *iov, unsigned long len)
136{
137 while (!iov->iov_len)
138 iov++;
139
140 while (len > 0) {
141 unsigned long this_len;
142
143 this_len = min_t(unsigned long, len, iov->iov_len);
144 fault_in_pages_readable(iov->iov_base, this_len);
145 len -= this_len;
146 iov++;
147 }
148}
149
97static void anon_pipe_buf_release(struct pipe_inode_info *pipe, 150static void anon_pipe_buf_release(struct pipe_inode_info *pipe,
98 struct pipe_buffer *buf) 151 struct pipe_buffer *buf)
99{ 152{
100 struct page *page = buf->page; 153 struct page *page = buf->page;
101 154
102 buf->flags &= ~PIPE_BUF_FLAG_STOLEN;
103
104 /* 155 /*
105 * If nobody else uses this page, and we don't already have a 156 * If nobody else uses this page, and we don't already have a
106 * temporary page, let's keep track of it as a one-deep 157 * temporary page, let's keep track of it as a one-deep
@@ -112,38 +163,58 @@ static void anon_pipe_buf_release(struct pipe_inode_info *pipe,
112 page_cache_release(page); 163 page_cache_release(page);
113} 164}
114 165
115static void * anon_pipe_buf_map(struct file *file, struct pipe_inode_info *pipe, 166void *generic_pipe_buf_map(struct pipe_inode_info *pipe,
116 struct pipe_buffer *buf) 167 struct pipe_buffer *buf, int atomic)
117{ 168{
169 if (atomic) {
170 buf->flags |= PIPE_BUF_FLAG_ATOMIC;
171 return kmap_atomic(buf->page, KM_USER0);
172 }
173
118 return kmap(buf->page); 174 return kmap(buf->page);
119} 175}
120 176
121static void anon_pipe_buf_unmap(struct pipe_inode_info *pipe, 177void generic_pipe_buf_unmap(struct pipe_inode_info *pipe,
122 struct pipe_buffer *buf) 178 struct pipe_buffer *buf, void *map_data)
123{ 179{
124 kunmap(buf->page); 180 if (buf->flags & PIPE_BUF_FLAG_ATOMIC) {
181 buf->flags &= ~PIPE_BUF_FLAG_ATOMIC;
182 kunmap_atomic(map_data, KM_USER0);
183 } else
184 kunmap(buf->page);
125} 185}
126 186
127static int anon_pipe_buf_steal(struct pipe_inode_info *pipe, 187static int anon_pipe_buf_steal(struct pipe_inode_info *pipe,
128 struct pipe_buffer *buf) 188 struct pipe_buffer *buf)
129{ 189{
130 buf->flags |= PIPE_BUF_FLAG_STOLEN; 190 struct page *page = buf->page;
131 return 0; 191
192 if (page_count(page) == 1) {
193 lock_page(page);
194 return 0;
195 }
196
197 return 1;
132} 198}
133 199
134static void anon_pipe_buf_get(struct pipe_inode_info *info, 200void generic_pipe_buf_get(struct pipe_inode_info *info, struct pipe_buffer *buf)
135 struct pipe_buffer *buf)
136{ 201{
137 page_cache_get(buf->page); 202 page_cache_get(buf->page);
138} 203}
139 204
205int generic_pipe_buf_pin(struct pipe_inode_info *info, struct pipe_buffer *buf)
206{
207 return 0;
208}
209
140static struct pipe_buf_operations anon_pipe_buf_ops = { 210static struct pipe_buf_operations anon_pipe_buf_ops = {
141 .can_merge = 1, 211 .can_merge = 1,
142 .map = anon_pipe_buf_map, 212 .map = generic_pipe_buf_map,
143 .unmap = anon_pipe_buf_unmap, 213 .unmap = generic_pipe_buf_unmap,
214 .pin = generic_pipe_buf_pin,
144 .release = anon_pipe_buf_release, 215 .release = anon_pipe_buf_release,
145 .steal = anon_pipe_buf_steal, 216 .steal = anon_pipe_buf_steal,
146 .get = anon_pipe_buf_get, 217 .get = generic_pipe_buf_get,
147}; 218};
148 219
149static ssize_t 220static ssize_t
@@ -174,22 +245,33 @@ pipe_readv(struct file *filp, const struct iovec *_iov,
174 struct pipe_buf_operations *ops = buf->ops; 245 struct pipe_buf_operations *ops = buf->ops;
175 void *addr; 246 void *addr;
176 size_t chars = buf->len; 247 size_t chars = buf->len;
177 int error; 248 int error, atomic;
178 249
179 if (chars > total_len) 250 if (chars > total_len)
180 chars = total_len; 251 chars = total_len;
181 252
182 addr = ops->map(filp, pipe, buf); 253 error = ops->pin(pipe, buf);
183 if (IS_ERR(addr)) { 254 if (error) {
184 if (!ret) 255 if (!ret)
185 ret = PTR_ERR(addr); 256 error = ret;
186 break; 257 break;
187 } 258 }
188 error = pipe_iov_copy_to_user(iov, addr + buf->offset, chars); 259
189 ops->unmap(pipe, buf); 260 atomic = !iov_fault_in_pages_write(iov, chars);
261redo:
262 addr = ops->map(pipe, buf, atomic);
263 error = pipe_iov_copy_to_user(iov, addr + buf->offset, chars, atomic);
264 ops->unmap(pipe, buf, addr);
190 if (unlikely(error)) { 265 if (unlikely(error)) {
266 /*
267 * Just retry with the slow path if we failed.
268 */
269 if (atomic) {
270 atomic = 0;
271 goto redo;
272 }
191 if (!ret) 273 if (!ret)
192 ret = -EFAULT; 274 ret = error;
193 break; 275 break;
194 } 276 }
195 ret += chars; 277 ret += chars;
@@ -293,21 +375,28 @@ pipe_writev(struct file *filp, const struct iovec *_iov,
293 int offset = buf->offset + buf->len; 375 int offset = buf->offset + buf->len;
294 376
295 if (ops->can_merge && offset + chars <= PAGE_SIZE) { 377 if (ops->can_merge && offset + chars <= PAGE_SIZE) {
378 int error, atomic = 1;
296 void *addr; 379 void *addr;
297 int error;
298 380
299 addr = ops->map(filp, pipe, buf); 381 error = ops->pin(pipe, buf);
300 if (IS_ERR(addr)) { 382 if (error)
301 error = PTR_ERR(addr);
302 goto out; 383 goto out;
303 } 384
385 iov_fault_in_pages_read(iov, chars);
386redo1:
387 addr = ops->map(pipe, buf, atomic);
304 error = pipe_iov_copy_from_user(offset + addr, iov, 388 error = pipe_iov_copy_from_user(offset + addr, iov,
305 chars); 389 chars, atomic);
306 ops->unmap(pipe, buf); 390 ops->unmap(pipe, buf, addr);
307 ret = error; 391 ret = error;
308 do_wakeup = 1; 392 do_wakeup = 1;
309 if (error) 393 if (error) {
394 if (atomic) {
395 atomic = 0;
396 goto redo1;
397 }
310 goto out; 398 goto out;
399 }
311 buf->len += chars; 400 buf->len += chars;
312 total_len -= chars; 401 total_len -= chars;
313 ret = chars; 402 ret = chars;
@@ -330,7 +419,8 @@ pipe_writev(struct file *filp, const struct iovec *_iov,
330 int newbuf = (pipe->curbuf + bufs) & (PIPE_BUFFERS-1); 419 int newbuf = (pipe->curbuf + bufs) & (PIPE_BUFFERS-1);
331 struct pipe_buffer *buf = pipe->bufs + newbuf; 420 struct pipe_buffer *buf = pipe->bufs + newbuf;
332 struct page *page = pipe->tmp_page; 421 struct page *page = pipe->tmp_page;
333 int error; 422 char *src;
423 int error, atomic = 1;
334 424
335 if (!page) { 425 if (!page) {
336 page = alloc_page(GFP_HIGHUSER); 426 page = alloc_page(GFP_HIGHUSER);
@@ -350,11 +440,27 @@ pipe_writev(struct file *filp, const struct iovec *_iov,
350 if (chars > total_len) 440 if (chars > total_len)
351 chars = total_len; 441 chars = total_len;
352 442
353 error = pipe_iov_copy_from_user(kmap(page), iov, chars); 443 iov_fault_in_pages_read(iov, chars);
354 kunmap(page); 444redo2:
445 if (atomic)
446 src = kmap_atomic(page, KM_USER0);
447 else
448 src = kmap(page);
449
450 error = pipe_iov_copy_from_user(src, iov, chars,
451 atomic);
452 if (atomic)
453 kunmap_atomic(src, KM_USER0);
454 else
455 kunmap(page);
456
355 if (unlikely(error)) { 457 if (unlikely(error)) {
458 if (atomic) {
459 atomic = 0;
460 goto redo2;
461 }
356 if (!ret) 462 if (!ret)
357 ret = -EFAULT; 463 ret = error;
358 break; 464 break;
359 } 465 }
360 ret += chars; 466 ret += chars;
diff --git a/fs/splice.c b/fs/splice.c
index a46ddd28561e..b150493b6fc3 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -78,7 +78,7 @@ static int page_cache_pipe_buf_steal(struct pipe_inode_info *info,
78 return 1; 78 return 1;
79 } 79 }
80 80
81 buf->flags |= PIPE_BUF_FLAG_STOLEN | PIPE_BUF_FLAG_LRU; 81 buf->flags |= PIPE_BUF_FLAG_LRU;
82 return 0; 82 return 0;
83} 83}
84 84
@@ -87,12 +87,11 @@ static void page_cache_pipe_buf_release(struct pipe_inode_info *info,
87{ 87{
88 page_cache_release(buf->page); 88 page_cache_release(buf->page);
89 buf->page = NULL; 89 buf->page = NULL;
90 buf->flags &= ~(PIPE_BUF_FLAG_STOLEN | PIPE_BUF_FLAG_LRU); 90 buf->flags &= ~PIPE_BUF_FLAG_LRU;
91} 91}
92 92
93static void *page_cache_pipe_buf_map(struct file *file, 93static int page_cache_pipe_buf_pin(struct pipe_inode_info *info,
94 struct pipe_inode_info *info, 94 struct pipe_buffer *buf)
95 struct pipe_buffer *buf)
96{ 95{
97 struct page *page = buf->page; 96 struct page *page = buf->page;
98 int err; 97 int err;
@@ -118,64 +117,44 @@ static void *page_cache_pipe_buf_map(struct file *file,
118 } 117 }
119 118
120 /* 119 /*
121 * Page is ok afterall, fall through to mapping. 120 * Page is ok afterall, we are done.
122 */ 121 */
123 unlock_page(page); 122 unlock_page(page);
124 } 123 }
125 124
126 return kmap(page); 125 return 0;
127error: 126error:
128 unlock_page(page); 127 unlock_page(page);
129 return ERR_PTR(err); 128 return err;
130}
131
132static void page_cache_pipe_buf_unmap(struct pipe_inode_info *info,
133 struct pipe_buffer *buf)
134{
135 kunmap(buf->page);
136}
137
138static void *user_page_pipe_buf_map(struct file *file,
139 struct pipe_inode_info *pipe,
140 struct pipe_buffer *buf)
141{
142 return kmap(buf->page);
143}
144
145static void user_page_pipe_buf_unmap(struct pipe_inode_info *pipe,
146 struct pipe_buffer *buf)
147{
148 kunmap(buf->page);
149}
150
151static void page_cache_pipe_buf_get(struct pipe_inode_info *info,
152 struct pipe_buffer *buf)
153{
154 page_cache_get(buf->page);
155} 129}
156 130
157static struct pipe_buf_operations page_cache_pipe_buf_ops = { 131static struct pipe_buf_operations page_cache_pipe_buf_ops = {
158 .can_merge = 0, 132 .can_merge = 0,
159 .map = page_cache_pipe_buf_map, 133 .map = generic_pipe_buf_map,
160 .unmap = page_cache_pipe_buf_unmap, 134 .unmap = generic_pipe_buf_unmap,
135 .pin = page_cache_pipe_buf_pin,
161 .release = page_cache_pipe_buf_release, 136 .release = page_cache_pipe_buf_release,
162 .steal = page_cache_pipe_buf_steal, 137 .steal = page_cache_pipe_buf_steal,
163 .get = page_cache_pipe_buf_get, 138 .get = generic_pipe_buf_get,
164}; 139};
165 140
166static int user_page_pipe_buf_steal(struct pipe_inode_info *pipe, 141static int user_page_pipe_buf_steal(struct pipe_inode_info *pipe,
167 struct pipe_buffer *buf) 142 struct pipe_buffer *buf)
168{ 143{
169 return 1; 144 if (!(buf->flags & PIPE_BUF_FLAG_GIFT))
145 return 1;
146
147 return 0;
170} 148}
171 149
172static struct pipe_buf_operations user_page_pipe_buf_ops = { 150static struct pipe_buf_operations user_page_pipe_buf_ops = {
173 .can_merge = 0, 151 .can_merge = 0,
174 .map = user_page_pipe_buf_map, 152 .map = generic_pipe_buf_map,
175 .unmap = user_page_pipe_buf_unmap, 153 .unmap = generic_pipe_buf_unmap,
154 .pin = generic_pipe_buf_pin,
176 .release = page_cache_pipe_buf_release, 155 .release = page_cache_pipe_buf_release,
177 .steal = user_page_pipe_buf_steal, 156 .steal = user_page_pipe_buf_steal,
178 .get = page_cache_pipe_buf_get, 157 .get = generic_pipe_buf_get,
179}; 158};
180 159
181/* 160/*
@@ -210,6 +189,9 @@ static ssize_t splice_to_pipe(struct pipe_inode_info *pipe,
210 buf->offset = spd->partial[page_nr].offset; 189 buf->offset = spd->partial[page_nr].offset;
211 buf->len = spd->partial[page_nr].len; 190 buf->len = spd->partial[page_nr].len;
212 buf->ops = spd->ops; 191 buf->ops = spd->ops;
192 if (spd->flags & SPLICE_F_GIFT)
193 buf->flags |= PIPE_BUF_FLAG_GIFT;
194
213 pipe->nrbufs++; 195 pipe->nrbufs++;
214 page_nr++; 196 page_nr++;
215 ret += buf->len; 197 ret += buf->len;
@@ -326,6 +308,12 @@ __generic_file_splice_read(struct file *in, loff_t *ppos,
326 page = find_get_page(mapping, index); 308 page = find_get_page(mapping, index);
327 if (!page) { 309 if (!page) {
328 /* 310 /*
311 * Make sure the read-ahead engine is notified
312 * about this failure.
313 */
314 handle_ra_miss(mapping, &in->f_ra, index);
315
316 /*
329 * page didn't exist, allocate one. 317 * page didn't exist, allocate one.
330 */ 318 */
331 page = page_cache_alloc_cold(mapping); 319 page = page_cache_alloc_cold(mapping);
@@ -517,26 +505,16 @@ static int pipe_to_sendpage(struct pipe_inode_info *info,
517{ 505{
518 struct file *file = sd->file; 506 struct file *file = sd->file;
519 loff_t pos = sd->pos; 507 loff_t pos = sd->pos;
520 ssize_t ret; 508 int ret, more;
521 void *ptr;
522 int more;
523
524 /*
525 * Sub-optimal, but we are limited by the pipe ->map. We don't
526 * need a kmap'ed buffer here, we just want to make sure we
527 * have the page pinned if the pipe page originates from the
528 * page cache.
529 */
530 ptr = buf->ops->map(file, info, buf);
531 if (IS_ERR(ptr))
532 return PTR_ERR(ptr);
533 509
534 more = (sd->flags & SPLICE_F_MORE) || sd->len < sd->total_len; 510 ret = buf->ops->pin(info, buf);
511 if (!ret) {
512 more = (sd->flags & SPLICE_F_MORE) || sd->len < sd->total_len;
535 513
536 ret = file->f_op->sendpage(file, buf->page, buf->offset, sd->len, 514 ret = file->f_op->sendpage(file, buf->page, buf->offset,
537 &pos, more); 515 sd->len, &pos, more);
516 }
538 517
539 buf->ops->unmap(info, buf);
540 return ret; 518 return ret;
541} 519}
542 520
@@ -569,15 +547,14 @@ static int pipe_to_file(struct pipe_inode_info *info, struct pipe_buffer *buf,
569 unsigned int offset, this_len; 547 unsigned int offset, this_len;
570 struct page *page; 548 struct page *page;
571 pgoff_t index; 549 pgoff_t index;
572 char *src;
573 int ret; 550 int ret;
574 551
575 /* 552 /*
576 * make sure the data in this buffer is uptodate 553 * make sure the data in this buffer is uptodate
577 */ 554 */
578 src = buf->ops->map(file, info, buf); 555 ret = buf->ops->pin(info, buf);
579 if (IS_ERR(src)) 556 if (unlikely(ret))
580 return PTR_ERR(src); 557 return ret;
581 558
582 index = sd->pos >> PAGE_CACHE_SHIFT; 559 index = sd->pos >> PAGE_CACHE_SHIFT;
583 offset = sd->pos & ~PAGE_CACHE_MASK; 560 offset = sd->pos & ~PAGE_CACHE_MASK;
@@ -587,9 +564,10 @@ static int pipe_to_file(struct pipe_inode_info *info, struct pipe_buffer *buf,
587 this_len = PAGE_CACHE_SIZE - offset; 564 this_len = PAGE_CACHE_SIZE - offset;
588 565
589 /* 566 /*
590 * Reuse buf page, if SPLICE_F_MOVE is set. 567 * Reuse buf page, if SPLICE_F_MOVE is set and we are doing a full
568 * page.
591 */ 569 */
592 if (sd->flags & SPLICE_F_MOVE) { 570 if ((sd->flags & SPLICE_F_MOVE) && this_len == PAGE_CACHE_SIZE) {
593 /* 571 /*
594 * If steal succeeds, buf->page is now pruned from the vm 572 * If steal succeeds, buf->page is now pruned from the vm
595 * side (LRU and page cache) and we can reuse it. The page 573 * side (LRU and page cache) and we can reuse it. The page
@@ -599,8 +577,12 @@ static int pipe_to_file(struct pipe_inode_info *info, struct pipe_buffer *buf,
599 goto find_page; 577 goto find_page;
600 578
601 page = buf->page; 579 page = buf->page;
602 if (add_to_page_cache(page, mapping, index, gfp_mask)) 580 if (add_to_page_cache(page, mapping, index, gfp_mask)) {
581 unlock_page(page);
603 goto find_page; 582 goto find_page;
583 }
584
585 page_cache_get(page);
604 586
605 if (!(buf->flags & PIPE_BUF_FLAG_LRU)) 587 if (!(buf->flags & PIPE_BUF_FLAG_LRU))
606 lru_cache_add(page); 588 lru_cache_add(page);
@@ -660,34 +642,36 @@ find_page:
660 } else if (ret) 642 } else if (ret)
661 goto out; 643 goto out;
662 644
663 if (!(buf->flags & PIPE_BUF_FLAG_STOLEN)) { 645 if (buf->page != page) {
664 char *dst = kmap_atomic(page, KM_USER0); 646 /*
647 * Careful, ->map() uses KM_USER0!
648 */
649 char *src = buf->ops->map(info, buf, 1);
650 char *dst = kmap_atomic(page, KM_USER1);
665 651
666 memcpy(dst + offset, src + buf->offset, this_len); 652 memcpy(dst + offset, src + buf->offset, this_len);
667 flush_dcache_page(page); 653 flush_dcache_page(page);
668 kunmap_atomic(dst, KM_USER0); 654 kunmap_atomic(dst, KM_USER1);
655 buf->ops->unmap(info, buf, src);
669 } 656 }
670 657
671 ret = mapping->a_ops->commit_write(file, page, offset, offset+this_len); 658 ret = mapping->a_ops->commit_write(file, page, offset, offset+this_len);
672 if (ret == AOP_TRUNCATED_PAGE) { 659 if (!ret) {
660 /*
661 * Return the number of bytes written and mark page as
662 * accessed, we are now done!
663 */
664 ret = this_len;
665 mark_page_accessed(page);
666 balance_dirty_pages_ratelimited(mapping);
667 } else if (ret == AOP_TRUNCATED_PAGE) {
673 page_cache_release(page); 668 page_cache_release(page);
674 goto find_page; 669 goto find_page;
675 } else if (ret) 670 }
676 goto out;
677
678 /*
679 * Return the number of bytes written.
680 */
681 ret = this_len;
682 mark_page_accessed(page);
683 balance_dirty_pages_ratelimited(mapping);
684out: 671out:
685 if (!(buf->flags & PIPE_BUF_FLAG_STOLEN)) 672 page_cache_release(page);
686 page_cache_release(page);
687
688 unlock_page(page); 673 unlock_page(page);
689out_nomem: 674out_nomem:
690 buf->ops->unmap(info, buf);
691 return ret; 675 return ret;
692} 676}
693 677
@@ -1095,7 +1079,7 @@ static long do_splice(struct file *in, loff_t __user *off_in,
1095 */ 1079 */
1096static int get_iovec_page_array(const struct iovec __user *iov, 1080static int get_iovec_page_array(const struct iovec __user *iov,
1097 unsigned int nr_vecs, struct page **pages, 1081 unsigned int nr_vecs, struct page **pages,
1098 struct partial_page *partial) 1082 struct partial_page *partial, int aligned)
1099{ 1083{
1100 int buffers = 0, error = 0; 1084 int buffers = 0, error = 0;
1101 1085
@@ -1135,6 +1119,15 @@ static int get_iovec_page_array(const struct iovec __user *iov,
1135 * in the user pages. 1119 * in the user pages.
1136 */ 1120 */
1137 off = (unsigned long) base & ~PAGE_MASK; 1121 off = (unsigned long) base & ~PAGE_MASK;
1122
1123 /*
1124 * If asked for alignment, the offset must be zero and the
1125 * length a multiple of the PAGE_SIZE.
1126 */
1127 error = -EINVAL;
1128 if (aligned && (off || len & ~PAGE_MASK))
1129 break;
1130
1138 npages = (off + len + PAGE_SIZE - 1) >> PAGE_SHIFT; 1131 npages = (off + len + PAGE_SIZE - 1) >> PAGE_SHIFT;
1139 if (npages > PIPE_BUFFERS - buffers) 1132 if (npages > PIPE_BUFFERS - buffers)
1140 npages = PIPE_BUFFERS - buffers; 1133 npages = PIPE_BUFFERS - buffers;
@@ -1228,7 +1221,8 @@ static long do_vmsplice(struct file *file, const struct iovec __user *iov,
1228 else if (unlikely(!nr_segs)) 1221 else if (unlikely(!nr_segs))
1229 return 0; 1222 return 0;
1230 1223
1231 spd.nr_pages = get_iovec_page_array(iov, nr_segs, pages, partial); 1224 spd.nr_pages = get_iovec_page_array(iov, nr_segs, pages, partial,
1225 flags & SPLICE_F_GIFT);
1232 if (spd.nr_pages <= 0) 1226 if (spd.nr_pages <= 0)
1233 return spd.nr_pages; 1227 return spd.nr_pages;
1234 1228
@@ -1336,6 +1330,12 @@ static int link_pipe(struct pipe_inode_info *ipipe,
1336 obuf = opipe->bufs + nbuf; 1330 obuf = opipe->bufs + nbuf;
1337 *obuf = *ibuf; 1331 *obuf = *ibuf;
1338 1332
1333 /*
1334 * Don't inherit the gift flag, we need to
1335 * prevent multiple steals of this page.
1336 */
1337 obuf->flags &= ~PIPE_BUF_FLAG_GIFT;
1338
1339 if (obuf->len > len) 1339 if (obuf->len > len)
1340 obuf->len = len; 1340 obuf->len = len;
1341 1341
diff --git a/include/linux/pipe_fs_i.h b/include/linux/pipe_fs_i.h
index 0008d4bd4059..df4d3fa7d3dc 100644
--- a/include/linux/pipe_fs_i.h
+++ b/include/linux/pipe_fs_i.h
@@ -5,8 +5,9 @@
5 5
6#define PIPE_BUFFERS (16) 6#define PIPE_BUFFERS (16)
7 7
8#define PIPE_BUF_FLAG_STOLEN 0x01 8#define PIPE_BUF_FLAG_LRU 0x01 /* page is on the LRU */
9#define PIPE_BUF_FLAG_LRU 0x02 9#define PIPE_BUF_FLAG_ATOMIC 0x02 /* was atomically mapped */
10#define PIPE_BUF_FLAG_GIFT 0x04 /* page is a gift */
10 11
11struct pipe_buffer { 12struct pipe_buffer {
12 struct page *page; 13 struct page *page;
@@ -15,10 +16,23 @@ struct pipe_buffer {
15 unsigned int flags; 16 unsigned int flags;
16}; 17};
17 18
19/*
20 * Note on the nesting of these functions:
21 *
22 * ->pin()
23 * ->steal()
24 * ...
25 * ->map()
26 * ...
27 * ->unmap()
28 *
29 * That is, ->map() must be called on a pinned buffer, same goes for ->steal().
30 */
18struct pipe_buf_operations { 31struct pipe_buf_operations {
19 int can_merge; 32 int can_merge;
20 void * (*map)(struct file *, struct pipe_inode_info *, struct pipe_buffer *); 33 void * (*map)(struct pipe_inode_info *, struct pipe_buffer *, int);
21 void (*unmap)(struct pipe_inode_info *, struct pipe_buffer *); 34 void (*unmap)(struct pipe_inode_info *, struct pipe_buffer *, void *);
35 int (*pin)(struct pipe_inode_info *, struct pipe_buffer *);
22 void (*release)(struct pipe_inode_info *, struct pipe_buffer *); 36 void (*release)(struct pipe_inode_info *, struct pipe_buffer *);
23 int (*steal)(struct pipe_inode_info *, struct pipe_buffer *); 37 int (*steal)(struct pipe_inode_info *, struct pipe_buffer *);
24 void (*get)(struct pipe_inode_info *, struct pipe_buffer *); 38 void (*get)(struct pipe_inode_info *, struct pipe_buffer *);
@@ -51,6 +65,12 @@ struct pipe_inode_info * alloc_pipe_info(struct inode * inode);
51void free_pipe_info(struct inode * inode); 65void free_pipe_info(struct inode * inode);
52void __free_pipe_info(struct pipe_inode_info *); 66void __free_pipe_info(struct pipe_inode_info *);
53 67
68/* Generic pipe buffer ops functions */
69void *generic_pipe_buf_map(struct pipe_inode_info *, struct pipe_buffer *, int);
70void generic_pipe_buf_unmap(struct pipe_inode_info *, struct pipe_buffer *, void *);
71void generic_pipe_buf_get(struct pipe_inode_info *, struct pipe_buffer *);
72int generic_pipe_buf_pin(struct pipe_inode_info *, struct pipe_buffer *);
73
54/* 74/*
55 * splice is tied to pipes as a transport (at least for now), so we'll just 75 * splice is tied to pipes as a transport (at least for now), so we'll just
56 * add the splice flags here. 76 * add the splice flags here.
@@ -60,6 +80,7 @@ void __free_pipe_info(struct pipe_inode_info *);
60 /* we may still block on the fd we splice */ 80 /* we may still block on the fd we splice */
61 /* from/to, of course */ 81 /* from/to, of course */
62#define SPLICE_F_MORE (0x04) /* expect more data */ 82#define SPLICE_F_MORE (0x04) /* expect more data */
83#define SPLICE_F_GIFT (0x08) /* pages passed in are a gift */
63 84
64/* 85/*
65 * Passed to the actors 86 * Passed to the actors