aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/cifs/connect.c5
-rw-r--r--fs/compat.c4
-rw-r--r--fs/ext3/ioctl.c18
-rw-r--r--fs/ext3/resize.c2
-rw-r--r--fs/fuse/dev.c35
-rw-r--r--fs/fuse/fuse_i.h12
-rw-r--r--fs/fuse/inode.c40
-rw-r--r--fs/splice.c355
8 files changed, 352 insertions, 119 deletions
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index aaf151cb5822..d2ec806a4f32 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -3447,10 +3447,13 @@ int cifs_setup_session(unsigned int xid, struct cifsSesInfo *pSesInfo,
3447 pSesInfo->server->secMode, 3447 pSesInfo->server->secMode,
3448 pSesInfo->server->capabilities, 3448 pSesInfo->server->capabilities,
3449 pSesInfo->server->timeZone)); 3449 pSesInfo->server->timeZone));
3450#ifdef CONFIG_CIFS_EXPERIMENTAL
3450 if(experimEnabled > 1) 3451 if(experimEnabled > 1)
3451 rc = CIFS_SessSetup(xid, pSesInfo, CIFS_NTLM /* type */, 3452 rc = CIFS_SessSetup(xid, pSesInfo, CIFS_NTLM /* type */,
3452 &ntlmv2_flag, nls_info); 3453 &ntlmv2_flag, nls_info);
3453 else if (extended_security 3454 else
3455#endif
3456 if (extended_security
3454 && (pSesInfo->capabilities & CAP_EXTENDED_SECURITY) 3457 && (pSesInfo->capabilities & CAP_EXTENDED_SECURITY)
3455 && (pSesInfo->server->secType == NTLMSSP)) { 3458 && (pSesInfo->server->secType == NTLMSSP)) {
3456 cFYI(1, ("New style sesssetup")); 3459 cFYI(1, ("New style sesssetup"));
diff --git a/fs/compat.c b/fs/compat.c
index 7f8e26ea427c..2e32bd340474 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -1217,6 +1217,10 @@ static ssize_t compat_do_readv_writev(int type, struct file *file,
1217 if (ret < 0) 1217 if (ret < 0)
1218 goto out; 1218 goto out;
1219 1219
1220 ret = security_file_permission(file, type == READ ? MAY_READ:MAY_WRITE);
1221 if (ret)
1222 goto out;
1223
1220 fnv = NULL; 1224 fnv = NULL;
1221 if (type == READ) { 1225 if (type == READ) {
1222 fn = file->f_op->read; 1226 fn = file->f_op->read;
diff --git a/fs/ext3/ioctl.c b/fs/ext3/ioctl.c
index aaf1da17b6d4..8c22aa9a7fbb 100644
--- a/fs/ext3/ioctl.c
+++ b/fs/ext3/ioctl.c
@@ -48,6 +48,7 @@ int ext3_ioctl (struct inode * inode, struct file * filp, unsigned int cmd,
48 if (!S_ISDIR(inode->i_mode)) 48 if (!S_ISDIR(inode->i_mode))
49 flags &= ~EXT3_DIRSYNC_FL; 49 flags &= ~EXT3_DIRSYNC_FL;
50 50
51 mutex_lock(&inode->i_mutex);
51 oldflags = ei->i_flags; 52 oldflags = ei->i_flags;
52 53
53 /* The JOURNAL_DATA flag is modifiable only by root */ 54 /* The JOURNAL_DATA flag is modifiable only by root */
@@ -60,8 +61,10 @@ int ext3_ioctl (struct inode * inode, struct file * filp, unsigned int cmd,
60 * This test looks nicer. Thanks to Pauline Middelink 61 * This test looks nicer. Thanks to Pauline Middelink
61 */ 62 */
62 if ((flags ^ oldflags) & (EXT3_APPEND_FL | EXT3_IMMUTABLE_FL)) { 63 if ((flags ^ oldflags) & (EXT3_APPEND_FL | EXT3_IMMUTABLE_FL)) {
63 if (!capable(CAP_LINUX_IMMUTABLE)) 64 if (!capable(CAP_LINUX_IMMUTABLE)) {
65 mutex_unlock(&inode->i_mutex);
64 return -EPERM; 66 return -EPERM;
67 }
65 } 68 }
66 69
67 /* 70 /*
@@ -69,14 +72,18 @@ int ext3_ioctl (struct inode * inode, struct file * filp, unsigned int cmd,
69 * the relevant capability. 72 * the relevant capability.
70 */ 73 */
71 if ((jflag ^ oldflags) & (EXT3_JOURNAL_DATA_FL)) { 74 if ((jflag ^ oldflags) & (EXT3_JOURNAL_DATA_FL)) {
72 if (!capable(CAP_SYS_RESOURCE)) 75 if (!capable(CAP_SYS_RESOURCE)) {
76 mutex_unlock(&inode->i_mutex);
73 return -EPERM; 77 return -EPERM;
78 }
74 } 79 }
75 80
76 81
77 handle = ext3_journal_start(inode, 1); 82 handle = ext3_journal_start(inode, 1);
78 if (IS_ERR(handle)) 83 if (IS_ERR(handle)) {
84 mutex_unlock(&inode->i_mutex);
79 return PTR_ERR(handle); 85 return PTR_ERR(handle);
86 }
80 if (IS_SYNC(inode)) 87 if (IS_SYNC(inode))
81 handle->h_sync = 1; 88 handle->h_sync = 1;
82 err = ext3_reserve_inode_write(handle, inode, &iloc); 89 err = ext3_reserve_inode_write(handle, inode, &iloc);
@@ -93,11 +100,14 @@ int ext3_ioctl (struct inode * inode, struct file * filp, unsigned int cmd,
93 err = ext3_mark_iloc_dirty(handle, inode, &iloc); 100 err = ext3_mark_iloc_dirty(handle, inode, &iloc);
94flags_err: 101flags_err:
95 ext3_journal_stop(handle); 102 ext3_journal_stop(handle);
96 if (err) 103 if (err) {
104 mutex_unlock(&inode->i_mutex);
97 return err; 105 return err;
106 }
98 107
99 if ((jflag ^ oldflags) & (EXT3_JOURNAL_DATA_FL)) 108 if ((jflag ^ oldflags) & (EXT3_JOURNAL_DATA_FL))
100 err = ext3_change_inode_journal_flag(inode, jflag); 109 err = ext3_change_inode_journal_flag(inode, jflag);
110 mutex_unlock(&inode->i_mutex);
101 return err; 111 return err;
102 } 112 }
103 case EXT3_IOC_GETVERSION: 113 case EXT3_IOC_GETVERSION:
diff --git a/fs/ext3/resize.c b/fs/ext3/resize.c
index c5ffa8523968..8aac5334680d 100644
--- a/fs/ext3/resize.c
+++ b/fs/ext3/resize.c
@@ -213,7 +213,7 @@ static int setup_new_group_blocks(struct super_block *sb,
213 goto exit_bh; 213 goto exit_bh;
214 } 214 }
215 lock_buffer(bh); 215 lock_buffer(bh);
216 memcpy(gdb->b_data, sbi->s_group_desc[i], bh->b_size); 216 memcpy(gdb->b_data, sbi->s_group_desc[i]->b_data, bh->b_size);
217 set_buffer_uptodate(gdb); 217 set_buffer_uptodate(gdb);
218 unlock_buffer(bh); 218 unlock_buffer(bh);
219 ext3_journal_dirty_metadata(handle, gdb); 219 ext3_journal_dirty_metadata(handle, gdb);
diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index cc750c68fe70..104a62dadb94 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -128,14 +128,24 @@ void fuse_put_request(struct fuse_conn *fc, struct fuse_req *req)
128 } 128 }
129} 129}
130 130
131void fuse_remove_background(struct fuse_conn *fc, struct fuse_req *req) 131/*
132 * Called with sbput_sem held for read (request_end) or write
133 * (fuse_put_super). By the time fuse_put_super() is finished, all
134 * inodes belonging to background requests must be released, so the
135 * iputs have to be done within the locked region.
136 */
137void fuse_release_background(struct fuse_conn *fc, struct fuse_req *req)
132{ 138{
133 list_del_init(&req->bg_entry); 139 iput(req->inode);
140 iput(req->inode2);
141 spin_lock(&fc->lock);
142 list_del(&req->bg_entry);
134 if (fc->num_background == FUSE_MAX_BACKGROUND) { 143 if (fc->num_background == FUSE_MAX_BACKGROUND) {
135 fc->blocked = 0; 144 fc->blocked = 0;
136 wake_up_all(&fc->blocked_waitq); 145 wake_up_all(&fc->blocked_waitq);
137 } 146 }
138 fc->num_background--; 147 fc->num_background--;
148 spin_unlock(&fc->lock);
139} 149}
140 150
141/* 151/*
@@ -165,27 +175,22 @@ static void request_end(struct fuse_conn *fc, struct fuse_req *req)
165 wake_up(&req->waitq); 175 wake_up(&req->waitq);
166 fuse_put_request(fc, req); 176 fuse_put_request(fc, req);
167 } else { 177 } else {
168 struct inode *inode = req->inode;
169 struct inode *inode2 = req->inode2;
170 struct file *file = req->file;
171 void (*end) (struct fuse_conn *, struct fuse_req *) = req->end; 178 void (*end) (struct fuse_conn *, struct fuse_req *) = req->end;
172 req->end = NULL; 179 req->end = NULL;
173 req->inode = NULL;
174 req->inode2 = NULL;
175 req->file = NULL;
176 if (!list_empty(&req->bg_entry))
177 fuse_remove_background(fc, req);
178 spin_unlock(&fc->lock); 180 spin_unlock(&fc->lock);
181 down_read(&fc->sbput_sem);
182 if (fc->mounted)
183 fuse_release_background(fc, req);
184 up_read(&fc->sbput_sem);
185
186 /* fput must go outside sbput_sem, otherwise it can deadlock */
187 if (req->file)
188 fput(req->file);
179 189
180 if (end) 190 if (end)
181 end(fc, req); 191 end(fc, req);
182 else 192 else
183 fuse_put_request(fc, req); 193 fuse_put_request(fc, req);
184
185 if (file)
186 fput(file);
187 iput(inode);
188 iput(inode2);
189 } 194 }
190} 195}
191 196
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index 59661c481d9d..0474202cb5dc 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -258,9 +258,15 @@ struct fuse_conn {
258 /** waitq for blocked connection */ 258 /** waitq for blocked connection */
259 wait_queue_head_t blocked_waitq; 259 wait_queue_head_t blocked_waitq;
260 260
261 /** RW semaphore for exclusion with fuse_put_super() */
262 struct rw_semaphore sbput_sem;
263
261 /** The next unique request id */ 264 /** The next unique request id */
262 u64 reqctr; 265 u64 reqctr;
263 266
267 /** Mount is active */
268 unsigned mounted;
269
264 /** Connection established, cleared on umount, connection 270 /** Connection established, cleared on umount, connection
265 abort and device release */ 271 abort and device release */
266 unsigned connected; 272 unsigned connected;
@@ -471,11 +477,11 @@ void request_send_noreply(struct fuse_conn *fc, struct fuse_req *req);
471void request_send_background(struct fuse_conn *fc, struct fuse_req *req); 477void request_send_background(struct fuse_conn *fc, struct fuse_req *req);
472 478
473/** 479/**
474 * Remove request from the the background list 480 * Release inodes and file associated with background request
475 */ 481 */
476void fuse_remove_background(struct fuse_conn *fc, struct fuse_req *req); 482void fuse_release_background(struct fuse_conn *fc, struct fuse_req *req);
477 483
478/** Abort all requests */ 484/* Abort all requests */
479void fuse_abort_conn(struct fuse_conn *fc); 485void fuse_abort_conn(struct fuse_conn *fc);
480 486
481/** 487/**
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index 43a6fc0db8a7..7627022446b2 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -204,26 +204,17 @@ static void fuse_put_super(struct super_block *sb)
204{ 204{
205 struct fuse_conn *fc = get_fuse_conn_super(sb); 205 struct fuse_conn *fc = get_fuse_conn_super(sb);
206 206
207 down_write(&fc->sbput_sem);
208 while (!list_empty(&fc->background))
209 fuse_release_background(fc,
210 list_entry(fc->background.next,
211 struct fuse_req, bg_entry));
212
207 spin_lock(&fc->lock); 213 spin_lock(&fc->lock);
214 fc->mounted = 0;
208 fc->connected = 0; 215 fc->connected = 0;
209 while (!list_empty(&fc->background)) {
210 struct fuse_req *req = list_entry(fc->background.next,
211 struct fuse_req, bg_entry);
212 struct inode *inode = req->inode;
213 struct inode *inode2 = req->inode2;
214
215 /* File would hold a reference to vfsmount */
216 BUG_ON(req->file);
217 req->inode = NULL;
218 req->inode2 = NULL;
219 fuse_remove_background(fc, req);
220
221 spin_unlock(&fc->lock);
222 iput(inode);
223 iput(inode2);
224 spin_lock(&fc->lock);
225 }
226 spin_unlock(&fc->lock); 216 spin_unlock(&fc->lock);
217 up_write(&fc->sbput_sem);
227 /* Flush all readers on this fs */ 218 /* Flush all readers on this fs */
228 kill_fasync(&fc->fasync, SIGIO, POLL_IN); 219 kill_fasync(&fc->fasync, SIGIO, POLL_IN);
229 wake_up_all(&fc->waitq); 220 wake_up_all(&fc->waitq);
@@ -395,6 +386,7 @@ static struct fuse_conn *new_conn(void)
395 INIT_LIST_HEAD(&fc->processing); 386 INIT_LIST_HEAD(&fc->processing);
396 INIT_LIST_HEAD(&fc->io); 387 INIT_LIST_HEAD(&fc->io);
397 INIT_LIST_HEAD(&fc->background); 388 INIT_LIST_HEAD(&fc->background);
389 init_rwsem(&fc->sbput_sem);
398 kobj_set_kset_s(fc, connections_subsys); 390 kobj_set_kset_s(fc, connections_subsys);
399 kobject_init(&fc->kobj); 391 kobject_init(&fc->kobj);
400 atomic_set(&fc->num_waiting, 0); 392 atomic_set(&fc->num_waiting, 0);
@@ -508,11 +500,6 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent)
508 if (file->f_op != &fuse_dev_operations) 500 if (file->f_op != &fuse_dev_operations)
509 return -EINVAL; 501 return -EINVAL;
510 502
511 /* Setting file->private_data can't race with other mount()
512 instances, since BKL is held for ->get_sb() */
513 if (file->private_data)
514 return -EINVAL;
515
516 fc = new_conn(); 503 fc = new_conn();
517 if (!fc) 504 if (!fc)
518 return -ENOMEM; 505 return -ENOMEM;
@@ -548,7 +535,14 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent)
548 if (err) 535 if (err)
549 goto err_free_req; 536 goto err_free_req;
550 537
538 /* Setting file->private_data can't race with other mount()
539 instances, since BKL is held for ->get_sb() */
540 err = -EINVAL;
541 if (file->private_data)
542 goto err_kobject_del;
543
551 sb->s_root = root_dentry; 544 sb->s_root = root_dentry;
545 fc->mounted = 1;
552 fc->connected = 1; 546 fc->connected = 1;
553 kobject_get(&fc->kobj); 547 kobject_get(&fc->kobj);
554 file->private_data = fc; 548 file->private_data = fc;
@@ -563,6 +557,8 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent)
563 557
564 return 0; 558 return 0;
565 559
560 err_kobject_del:
561 kobject_del(&fc->kobj);
566 err_free_req: 562 err_free_req:
567 fuse_request_free(init_req); 563 fuse_request_free(init_req);
568 err_put_root: 564 err_put_root:
diff --git a/fs/splice.c b/fs/splice.c
index 0559e7577a04..447ebc0a37f3 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -27,15 +27,22 @@
27#include <linux/buffer_head.h> 27#include <linux/buffer_head.h>
28#include <linux/module.h> 28#include <linux/module.h>
29#include <linux/syscalls.h> 29#include <linux/syscalls.h>
30#include <linux/uio.h>
31
32struct partial_page {
33 unsigned int offset;
34 unsigned int len;
35};
30 36
31/* 37/*
32 * Passed to the actors 38 * Passed to splice_to_pipe
33 */ 39 */
34struct splice_desc { 40struct splice_pipe_desc {
35 unsigned int len, total_len; /* current and remaining length */ 41 struct page **pages; /* page map */
42 struct partial_page *partial; /* pages[] may not be contig */
43 int nr_pages; /* number of pages in map */
36 unsigned int flags; /* splice flags */ 44 unsigned int flags; /* splice flags */
37 struct file *file; /* file to read/write */ 45 struct pipe_buf_operations *ops;/* ops associated with output pipe */
38 loff_t pos; /* file position */
39}; 46};
40 47
41/* 48/*
@@ -128,6 +135,19 @@ static void page_cache_pipe_buf_unmap(struct pipe_inode_info *info,
128 kunmap(buf->page); 135 kunmap(buf->page);
129} 136}
130 137
138static void *user_page_pipe_buf_map(struct file *file,
139 struct pipe_inode_info *pipe,
140 struct pipe_buffer *buf)
141{
142 return kmap(buf->page);
143}
144
145static void user_page_pipe_buf_unmap(struct pipe_inode_info *pipe,
146 struct pipe_buffer *buf)
147{
148 kunmap(buf->page);
149}
150
131static void page_cache_pipe_buf_get(struct pipe_inode_info *info, 151static void page_cache_pipe_buf_get(struct pipe_inode_info *info,
132 struct pipe_buffer *buf) 152 struct pipe_buffer *buf)
133{ 153{
@@ -143,19 +163,33 @@ static struct pipe_buf_operations page_cache_pipe_buf_ops = {
143 .get = page_cache_pipe_buf_get, 163 .get = page_cache_pipe_buf_get,
144}; 164};
145 165
166static int user_page_pipe_buf_steal(struct pipe_inode_info *pipe,
167 struct pipe_buffer *buf)
168{
169 return 1;
170}
171
172static struct pipe_buf_operations user_page_pipe_buf_ops = {
173 .can_merge = 0,
174 .map = user_page_pipe_buf_map,
175 .unmap = user_page_pipe_buf_unmap,
176 .release = page_cache_pipe_buf_release,
177 .steal = user_page_pipe_buf_steal,
178 .get = page_cache_pipe_buf_get,
179};
180
146/* 181/*
147 * Pipe output worker. This sets up our pipe format with the page cache 182 * Pipe output worker. This sets up our pipe format with the page cache
148 * pipe buffer operations. Otherwise very similar to the regular pipe_writev(). 183 * pipe buffer operations. Otherwise very similar to the regular pipe_writev().
149 */ 184 */
150static ssize_t move_to_pipe(struct pipe_inode_info *pipe, struct page **pages, 185static ssize_t splice_to_pipe(struct pipe_inode_info *pipe,
151 int nr_pages, unsigned long len, 186 struct splice_pipe_desc *spd)
152 unsigned int offset, unsigned int flags)
153{ 187{
154 int ret, do_wakeup, i; 188 int ret, do_wakeup, page_nr;
155 189
156 ret = 0; 190 ret = 0;
157 do_wakeup = 0; 191 do_wakeup = 0;
158 i = 0; 192 page_nr = 0;
159 193
160 if (pipe->inode) 194 if (pipe->inode)
161 mutex_lock(&pipe->inode->i_mutex); 195 mutex_lock(&pipe->inode->i_mutex);
@@ -171,27 +205,19 @@ static ssize_t move_to_pipe(struct pipe_inode_info *pipe, struct page **pages,
171 if (pipe->nrbufs < PIPE_BUFFERS) { 205 if (pipe->nrbufs < PIPE_BUFFERS) {
172 int newbuf = (pipe->curbuf + pipe->nrbufs) & (PIPE_BUFFERS - 1); 206 int newbuf = (pipe->curbuf + pipe->nrbufs) & (PIPE_BUFFERS - 1);
173 struct pipe_buffer *buf = pipe->bufs + newbuf; 207 struct pipe_buffer *buf = pipe->bufs + newbuf;
174 struct page *page = pages[i++];
175 unsigned long this_len;
176 208
177 this_len = PAGE_CACHE_SIZE - offset; 209 buf->page = spd->pages[page_nr];
178 if (this_len > len) 210 buf->offset = spd->partial[page_nr].offset;
179 this_len = len; 211 buf->len = spd->partial[page_nr].len;
180 212 buf->ops = spd->ops;
181 buf->page = page;
182 buf->offset = offset;
183 buf->len = this_len;
184 buf->ops = &page_cache_pipe_buf_ops;
185 pipe->nrbufs++; 213 pipe->nrbufs++;
214 page_nr++;
215 ret += buf->len;
216
186 if (pipe->inode) 217 if (pipe->inode)
187 do_wakeup = 1; 218 do_wakeup = 1;
188 219
189 ret += this_len; 220 if (!--spd->nr_pages)
190 len -= this_len;
191 offset = 0;
192 if (!--nr_pages)
193 break;
194 if (!len)
195 break; 221 break;
196 if (pipe->nrbufs < PIPE_BUFFERS) 222 if (pipe->nrbufs < PIPE_BUFFERS)
197 continue; 223 continue;
@@ -199,7 +225,7 @@ static ssize_t move_to_pipe(struct pipe_inode_info *pipe, struct page **pages,
199 break; 225 break;
200 } 226 }
201 227
202 if (flags & SPLICE_F_NONBLOCK) { 228 if (spd->flags & SPLICE_F_NONBLOCK) {
203 if (!ret) 229 if (!ret)
204 ret = -EAGAIN; 230 ret = -EAGAIN;
205 break; 231 break;
@@ -234,8 +260,8 @@ static ssize_t move_to_pipe(struct pipe_inode_info *pipe, struct page **pages,
234 kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN); 260 kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
235 } 261 }
236 262
237 while (i < nr_pages) 263 while (page_nr < spd->nr_pages)
238 page_cache_release(pages[i++]); 264 page_cache_release(spd->pages[page_nr++]);
239 265
240 return ret; 266 return ret;
241} 267}
@@ -246,17 +272,24 @@ __generic_file_splice_read(struct file *in, loff_t *ppos,
246 unsigned int flags) 272 unsigned int flags)
247{ 273{
248 struct address_space *mapping = in->f_mapping; 274 struct address_space *mapping = in->f_mapping;
249 unsigned int loff, offset, nr_pages; 275 unsigned int loff, nr_pages;
250 struct page *pages[PIPE_BUFFERS]; 276 struct page *pages[PIPE_BUFFERS];
277 struct partial_page partial[PIPE_BUFFERS];
251 struct page *page; 278 struct page *page;
252 pgoff_t index, end_index; 279 pgoff_t index, end_index;
253 loff_t isize; 280 loff_t isize;
254 size_t bytes; 281 size_t total_len;
255 int i, error; 282 int error;
283 struct splice_pipe_desc spd = {
284 .pages = pages,
285 .partial = partial,
286 .flags = flags,
287 .ops = &page_cache_pipe_buf_ops,
288 };
256 289
257 index = *ppos >> PAGE_CACHE_SHIFT; 290 index = *ppos >> PAGE_CACHE_SHIFT;
258 loff = offset = *ppos & ~PAGE_CACHE_MASK; 291 loff = *ppos & ~PAGE_CACHE_MASK;
259 nr_pages = (len + offset + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; 292 nr_pages = (len + loff + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
260 293
261 if (nr_pages > PIPE_BUFFERS) 294 if (nr_pages > PIPE_BUFFERS)
262 nr_pages = PIPE_BUFFERS; 295 nr_pages = PIPE_BUFFERS;
@@ -266,15 +299,15 @@ __generic_file_splice_read(struct file *in, loff_t *ppos,
266 * read-ahead if this is a non-zero offset (we are likely doing small 299 * read-ahead if this is a non-zero offset (we are likely doing small
267 * chunk splice and the page is already there) for a single page. 300 * chunk splice and the page is already there) for a single page.
268 */ 301 */
269 if (!offset || nr_pages > 1) 302 if (!loff || spd.nr_pages > 1)
270 do_page_cache_readahead(mapping, in, index, nr_pages); 303 do_page_cache_readahead(mapping, in, index, spd.nr_pages);
271 304
272 /* 305 /*
273 * Now fill in the holes: 306 * Now fill in the holes:
274 */ 307 */
275 error = 0; 308 error = 0;
276 bytes = 0; 309 total_len = 0;
277 for (i = 0; i < nr_pages; i++, index++) { 310 for (spd.nr_pages = 0; spd.nr_pages < nr_pages; spd.nr_pages++, index++) {
278 unsigned int this_len; 311 unsigned int this_len;
279 312
280 if (!len) 313 if (!len)
@@ -283,7 +316,7 @@ __generic_file_splice_read(struct file *in, loff_t *ppos,
283 /* 316 /*
284 * this_len is the max we'll use from this page 317 * this_len is the max we'll use from this page
285 */ 318 */
286 this_len = min(len, PAGE_CACHE_SIZE - loff); 319 this_len = min_t(unsigned long, len, PAGE_CACHE_SIZE - loff);
287find_page: 320find_page:
288 /* 321 /*
289 * lookup the page for this index 322 * lookup the page for this index
@@ -367,26 +400,29 @@ readpage:
367 */ 400 */
368 if (end_index == index) { 401 if (end_index == index) {
369 loff = PAGE_CACHE_SIZE - (isize & ~PAGE_CACHE_MASK); 402 loff = PAGE_CACHE_SIZE - (isize & ~PAGE_CACHE_MASK);
370 if (bytes + loff > isize) { 403 if (total_len + loff > isize) {
371 page_cache_release(page); 404 page_cache_release(page);
372 break; 405 break;
373 } 406 }
374 /* 407 /*
375 * force quit after adding this page 408 * force quit after adding this page
376 */ 409 */
377 nr_pages = i; 410 nr_pages = spd.nr_pages;
378 this_len = min(this_len, loff); 411 this_len = min(this_len, loff);
412 loff = 0;
379 } 413 }
380 } 414 }
381fill_it: 415fill_it:
382 pages[i] = page; 416 pages[spd.nr_pages] = page;
383 bytes += this_len; 417 partial[spd.nr_pages].offset = loff;
418 partial[spd.nr_pages].len = this_len;
384 len -= this_len; 419 len -= this_len;
420 total_len += this_len;
385 loff = 0; 421 loff = 0;
386 } 422 }
387 423
388 if (i) 424 if (spd.nr_pages)
389 return move_to_pipe(pipe, pages, i, bytes, offset, flags); 425 return splice_to_pipe(pipe, &spd);
390 426
391 return error; 427 return error;
392} 428}
@@ -439,14 +475,13 @@ EXPORT_SYMBOL(generic_file_splice_read);
439 475
440/* 476/*
441 * Send 'sd->len' bytes to socket from 'sd->file' at position 'sd->pos' 477 * Send 'sd->len' bytes to socket from 'sd->file' at position 'sd->pos'
442 * using sendpage(). 478 * using sendpage(). Return the number of bytes sent.
443 */ 479 */
444static int pipe_to_sendpage(struct pipe_inode_info *info, 480static int pipe_to_sendpage(struct pipe_inode_info *info,
445 struct pipe_buffer *buf, struct splice_desc *sd) 481 struct pipe_buffer *buf, struct splice_desc *sd)
446{ 482{
447 struct file *file = sd->file; 483 struct file *file = sd->file;
448 loff_t pos = sd->pos; 484 loff_t pos = sd->pos;
449 unsigned int offset;
450 ssize_t ret; 485 ssize_t ret;
451 void *ptr; 486 void *ptr;
452 int more; 487 int more;
@@ -461,16 +496,13 @@ static int pipe_to_sendpage(struct pipe_inode_info *info,
461 if (IS_ERR(ptr)) 496 if (IS_ERR(ptr))
462 return PTR_ERR(ptr); 497 return PTR_ERR(ptr);
463 498
464 offset = pos & ~PAGE_CACHE_MASK;
465 more = (sd->flags & SPLICE_F_MORE) || sd->len < sd->total_len; 499 more = (sd->flags & SPLICE_F_MORE) || sd->len < sd->total_len;
466 500
467 ret = file->f_op->sendpage(file, buf->page, offset, sd->len, &pos,more); 501 ret = file->f_op->sendpage(file, buf->page, buf->offset, sd->len,
502 &pos, more);
468 503
469 buf->ops->unmap(info, buf); 504 buf->ops->unmap(info, buf);
470 if (ret == sd->len) 505 return ret;
471 return 0;
472
473 return -EIO;
474} 506}
475 507
476/* 508/*
@@ -499,7 +531,7 @@ static int pipe_to_file(struct pipe_inode_info *info, struct pipe_buffer *buf,
499 struct file *file = sd->file; 531 struct file *file = sd->file;
500 struct address_space *mapping = file->f_mapping; 532 struct address_space *mapping = file->f_mapping;
501 gfp_t gfp_mask = mapping_gfp_mask(mapping); 533 gfp_t gfp_mask = mapping_gfp_mask(mapping);
502 unsigned int offset; 534 unsigned int offset, this_len;
503 struct page *page; 535 struct page *page;
504 pgoff_t index; 536 pgoff_t index;
505 char *src; 537 char *src;
@@ -515,6 +547,10 @@ static int pipe_to_file(struct pipe_inode_info *info, struct pipe_buffer *buf,
515 index = sd->pos >> PAGE_CACHE_SHIFT; 547 index = sd->pos >> PAGE_CACHE_SHIFT;
516 offset = sd->pos & ~PAGE_CACHE_MASK; 548 offset = sd->pos & ~PAGE_CACHE_MASK;
517 549
550 this_len = sd->len;
551 if (this_len + offset > PAGE_CACHE_SIZE)
552 this_len = PAGE_CACHE_SIZE - offset;
553
518 /* 554 /*
519 * Reuse buf page, if SPLICE_F_MOVE is set. 555 * Reuse buf page, if SPLICE_F_MOVE is set.
520 */ 556 */
@@ -558,7 +594,7 @@ find_page:
558 * the full page. 594 * the full page.
559 */ 595 */
560 if (!PageUptodate(page)) { 596 if (!PageUptodate(page)) {
561 if (sd->len < PAGE_CACHE_SIZE) { 597 if (this_len < PAGE_CACHE_SIZE) {
562 ret = mapping->a_ops->readpage(file, page); 598 ret = mapping->a_ops->readpage(file, page);
563 if (unlikely(ret)) 599 if (unlikely(ret))
564 goto out; 600 goto out;
@@ -582,7 +618,7 @@ find_page:
582 } 618 }
583 } 619 }
584 620
585 ret = mapping->a_ops->prepare_write(file, page, 0, sd->len); 621 ret = mapping->a_ops->prepare_write(file, page, offset, offset+this_len);
586 if (ret == AOP_TRUNCATED_PAGE) { 622 if (ret == AOP_TRUNCATED_PAGE) {
587 page_cache_release(page); 623 page_cache_release(page);
588 goto find_page; 624 goto find_page;
@@ -592,18 +628,22 @@ find_page:
592 if (!(buf->flags & PIPE_BUF_FLAG_STOLEN)) { 628 if (!(buf->flags & PIPE_BUF_FLAG_STOLEN)) {
593 char *dst = kmap_atomic(page, KM_USER0); 629 char *dst = kmap_atomic(page, KM_USER0);
594 630
595 memcpy(dst + offset, src + buf->offset, sd->len); 631 memcpy(dst + offset, src + buf->offset, this_len);
596 flush_dcache_page(page); 632 flush_dcache_page(page);
597 kunmap_atomic(dst, KM_USER0); 633 kunmap_atomic(dst, KM_USER0);
598 } 634 }
599 635
600 ret = mapping->a_ops->commit_write(file, page, 0, sd->len); 636 ret = mapping->a_ops->commit_write(file, page, offset, offset+this_len);
601 if (ret == AOP_TRUNCATED_PAGE) { 637 if (ret == AOP_TRUNCATED_PAGE) {
602 page_cache_release(page); 638 page_cache_release(page);
603 goto find_page; 639 goto find_page;
604 } else if (ret) 640 } else if (ret)
605 goto out; 641 goto out;
606 642
643 /*
644 * Return the number of bytes written.
645 */
646 ret = this_len;
607 mark_page_accessed(page); 647 mark_page_accessed(page);
608 balance_dirty_pages_ratelimited(mapping); 648 balance_dirty_pages_ratelimited(mapping);
609out: 649out:
@@ -616,17 +656,14 @@ out_nomem:
616 return ret; 656 return ret;
617} 657}
618 658
619typedef int (splice_actor)(struct pipe_inode_info *, struct pipe_buffer *,
620 struct splice_desc *);
621
622/* 659/*
623 * Pipe input worker. Most of this logic works like a regular pipe, the 660 * Pipe input worker. Most of this logic works like a regular pipe, the
624 * key here is the 'actor' worker passed in that actually moves the data 661 * key here is the 'actor' worker passed in that actually moves the data
625 * to the wanted destination. See pipe_to_file/pipe_to_sendpage above. 662 * to the wanted destination. See pipe_to_file/pipe_to_sendpage above.
626 */ 663 */
627static ssize_t move_from_pipe(struct pipe_inode_info *pipe, struct file *out, 664ssize_t splice_from_pipe(struct pipe_inode_info *pipe, struct file *out,
628 loff_t *ppos, size_t len, unsigned int flags, 665 loff_t *ppos, size_t len, unsigned int flags,
629 splice_actor *actor) 666 splice_actor *actor)
630{ 667{
631 int ret, do_wakeup, err; 668 int ret, do_wakeup, err;
632 struct splice_desc sd; 669 struct splice_desc sd;
@@ -652,16 +689,22 @@ static ssize_t move_from_pipe(struct pipe_inode_info *pipe, struct file *out,
652 sd.len = sd.total_len; 689 sd.len = sd.total_len;
653 690
654 err = actor(pipe, buf, &sd); 691 err = actor(pipe, buf, &sd);
655 if (err) { 692 if (err <= 0) {
656 if (!ret && err != -ENODATA) 693 if (!ret && err != -ENODATA)
657 ret = err; 694 ret = err;
658 695
659 break; 696 break;
660 } 697 }
661 698
662 ret += sd.len; 699 ret += err;
663 buf->offset += sd.len; 700 buf->offset += err;
664 buf->len -= sd.len; 701 buf->len -= err;
702
703 sd.len -= err;
704 sd.pos += err;
705 sd.total_len -= err;
706 if (sd.len)
707 continue;
665 708
666 if (!buf->len) { 709 if (!buf->len) {
667 buf->ops = NULL; 710 buf->ops = NULL;
@@ -672,8 +715,6 @@ static ssize_t move_from_pipe(struct pipe_inode_info *pipe, struct file *out,
672 do_wakeup = 1; 715 do_wakeup = 1;
673 } 716 }
674 717
675 sd.pos += sd.len;
676 sd.total_len -= sd.len;
677 if (!sd.total_len) 718 if (!sd.total_len)
678 break; 719 break;
679 } 720 }
@@ -741,7 +782,7 @@ generic_file_splice_write(struct pipe_inode_info *pipe, struct file *out,
741 struct address_space *mapping = out->f_mapping; 782 struct address_space *mapping = out->f_mapping;
742 ssize_t ret; 783 ssize_t ret;
743 784
744 ret = move_from_pipe(pipe, out, ppos, len, flags, pipe_to_file); 785 ret = splice_from_pipe(pipe, out, ppos, len, flags, pipe_to_file);
745 if (ret > 0) { 786 if (ret > 0) {
746 struct inode *inode = mapping->host; 787 struct inode *inode = mapping->host;
747 788
@@ -783,7 +824,7 @@ EXPORT_SYMBOL(generic_file_splice_write);
783ssize_t generic_splice_sendpage(struct pipe_inode_info *pipe, struct file *out, 824ssize_t generic_splice_sendpage(struct pipe_inode_info *pipe, struct file *out,
784 loff_t *ppos, size_t len, unsigned int flags) 825 loff_t *ppos, size_t len, unsigned int flags)
785{ 826{
786 return move_from_pipe(pipe, out, ppos, len, flags, pipe_to_sendpage); 827 return splice_from_pipe(pipe, out, ppos, len, flags, pipe_to_sendpage);
787} 828}
788 829
789EXPORT_SYMBOL(generic_splice_sendpage); 830EXPORT_SYMBOL(generic_splice_sendpage);
@@ -870,7 +911,7 @@ long do_splice_direct(struct file *in, loff_t *ppos, struct file *out,
870 911
871 /* 912 /*
872 * We don't have an immediate reader, but we'll read the stuff 913 * We don't have an immediate reader, but we'll read the stuff
873 * out of the pipe right after the move_to_pipe(). So set 914 * out of the pipe right after the splice_to_pipe(). So set
874 * PIPE_READERS appropriately. 915 * PIPE_READERS appropriately.
875 */ 916 */
876 pipe->readers = 1; 917 pipe->readers = 1;
@@ -1010,6 +1051,174 @@ static long do_splice(struct file *in, loff_t __user *off_in,
1010 return -EINVAL; 1051 return -EINVAL;
1011} 1052}
1012 1053
1054/*
1055 * Map an iov into an array of pages and offset/length tupples. With the
1056 * partial_page structure, we can map several non-contiguous ranges into
1057 * our ones pages[] map instead of splitting that operation into pieces.
1058 * Could easily be exported as a generic helper for other users, in which
1059 * case one would probably want to add a 'max_nr_pages' parameter as well.
1060 */
1061static int get_iovec_page_array(const struct iovec __user *iov,
1062 unsigned int nr_vecs, struct page **pages,
1063 struct partial_page *partial)
1064{
1065 int buffers = 0, error = 0;
1066
1067 /*
1068 * It's ok to take the mmap_sem for reading, even
1069 * across a "get_user()".
1070 */
1071 down_read(&current->mm->mmap_sem);
1072
1073 while (nr_vecs) {
1074 unsigned long off, npages;
1075 void __user *base;
1076 size_t len;
1077 int i;
1078
1079 /*
1080 * Get user address base and length for this iovec.
1081 */
1082 error = get_user(base, &iov->iov_base);
1083 if (unlikely(error))
1084 break;
1085 error = get_user(len, &iov->iov_len);
1086 if (unlikely(error))
1087 break;
1088
1089 /*
1090 * Sanity check this iovec. 0 read succeeds.
1091 */
1092 if (unlikely(!len))
1093 break;
1094 error = -EFAULT;
1095 if (unlikely(!base))
1096 break;
1097
1098 /*
1099 * Get this base offset and number of pages, then map
1100 * in the user pages.
1101 */
1102 off = (unsigned long) base & ~PAGE_MASK;
1103 npages = (off + len + PAGE_SIZE - 1) >> PAGE_SHIFT;
1104 if (npages > PIPE_BUFFERS - buffers)
1105 npages = PIPE_BUFFERS - buffers;
1106
1107 error = get_user_pages(current, current->mm,
1108 (unsigned long) base, npages, 0, 0,
1109 &pages[buffers], NULL);
1110
1111 if (unlikely(error <= 0))
1112 break;
1113
1114 /*
1115 * Fill this contiguous range into the partial page map.
1116 */
1117 for (i = 0; i < error; i++) {
1118 const int plen = min_t(size_t, len, PAGE_SIZE) - off;
1119
1120 partial[buffers].offset = off;
1121 partial[buffers].len = plen;
1122
1123 off = 0;
1124 len -= plen;
1125 buffers++;
1126 }
1127
1128 /*
1129 * We didn't complete this iov, stop here since it probably
1130 * means we have to move some of this into a pipe to
1131 * be able to continue.
1132 */
1133 if (len)
1134 break;
1135
1136 /*
1137 * Don't continue if we mapped fewer pages than we asked for,
1138 * or if we mapped the max number of pages that we have
1139 * room for.
1140 */
1141 if (error < npages || buffers == PIPE_BUFFERS)
1142 break;
1143
1144 nr_vecs--;
1145 iov++;
1146 }
1147
1148 up_read(&current->mm->mmap_sem);
1149
1150 if (buffers)
1151 return buffers;
1152
1153 return error;
1154}
1155
1156/*
1157 * vmsplice splices a user address range into a pipe. It can be thought of
1158 * as splice-from-memory, where the regular splice is splice-from-file (or
1159 * to file). In both cases the output is a pipe, naturally.
1160 *
1161 * Note that vmsplice only supports splicing _from_ user memory to a pipe,
1162 * not the other way around. Splicing from user memory is a simple operation
1163 * that can be supported without any funky alignment restrictions or nasty
1164 * vm tricks. We simply map in the user memory and fill them into a pipe.
1165 * The reverse isn't quite as easy, though. There are two possible solutions
1166 * for that:
1167 *
1168 * - memcpy() the data internally, at which point we might as well just
1169 * do a regular read() on the buffer anyway.
1170 * - Lots of nasty vm tricks, that are neither fast nor flexible (it
1171 * has restriction limitations on both ends of the pipe).
1172 *
1173 * Alas, it isn't here.
1174 *
1175 */
1176static long do_vmsplice(struct file *file, const struct iovec __user *iov,
1177 unsigned long nr_segs, unsigned int flags)
1178{
1179 struct pipe_inode_info *pipe = file->f_dentry->d_inode->i_pipe;
1180 struct page *pages[PIPE_BUFFERS];
1181 struct partial_page partial[PIPE_BUFFERS];
1182 struct splice_pipe_desc spd = {
1183 .pages = pages,
1184 .partial = partial,
1185 .flags = flags,
1186 .ops = &user_page_pipe_buf_ops,
1187 };
1188
1189 if (unlikely(!pipe))
1190 return -EBADF;
1191 if (unlikely(nr_segs > UIO_MAXIOV))
1192 return -EINVAL;
1193 else if (unlikely(!nr_segs))
1194 return 0;
1195
1196 spd.nr_pages = get_iovec_page_array(iov, nr_segs, pages, partial);
1197 if (spd.nr_pages <= 0)
1198 return spd.nr_pages;
1199
1200 return splice_to_pipe(pipe, &spd);
1201}
1202
1203asmlinkage long sys_vmsplice(int fd, const struct iovec __user *iov,
1204 unsigned long nr_segs, unsigned int flags)
1205{
1206 struct file *file;
1207 long error;
1208 int fput;
1209
1210 error = -EBADF;
1211 file = fget_light(fd, &fput);
1212 if (file) {
1213 if (file->f_mode & FMODE_WRITE)
1214 error = do_vmsplice(file, iov, nr_segs, flags);
1215
1216 fput_light(file, fput);
1217 }
1218
1219 return error;
1220}
1221
1013asmlinkage long sys_splice(int fd_in, loff_t __user *off_in, 1222asmlinkage long sys_splice(int fd_in, loff_t __user *off_in,
1014 int fd_out, loff_t __user *off_out, 1223 int fd_out, loff_t __user *off_out,
1015 size_t len, unsigned int flags) 1224 size_t len, unsigned int flags)