aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/Kconfig6
-rw-r--r--fs/exec.c2
-rw-r--r--fs/locks.c9
-rw-r--r--fs/proc/base.c21
-rw-r--r--fs/splice.c185
5 files changed, 156 insertions, 67 deletions
diff --git a/fs/Kconfig b/fs/Kconfig
index 2524629dc835..f9b5842c8d2d 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -842,6 +842,12 @@ config TMPFS
842config HUGETLBFS 842config HUGETLBFS
843 bool "HugeTLB file system support" 843 bool "HugeTLB file system support"
844 depends X86 || IA64 || PPC64 || SPARC64 || SUPERH || BROKEN 844 depends X86 || IA64 || PPC64 || SPARC64 || SUPERH || BROKEN
845 help
846 hugetlbfs is a filesystem backing for HugeTLB pages, based on
847 ramfs. For architectures that support it, say Y here and read
848 <file:Documentation/vm/hugetlbpage.txt> for details.
849
850 If unsure, say N.
845 851
846config HUGETLB_PAGE 852config HUGETLB_PAGE
847 def_bool HUGETLBFS 853 def_bool HUGETLBFS
diff --git a/fs/exec.c b/fs/exec.c
index 4121bb559739..3a79d97ac234 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -712,7 +712,7 @@ static int de_thread(struct task_struct *tsk)
712 attach_pid(current, PIDTYPE_PID, current->pid); 712 attach_pid(current, PIDTYPE_PID, current->pid);
713 attach_pid(current, PIDTYPE_PGID, current->signal->pgrp); 713 attach_pid(current, PIDTYPE_PGID, current->signal->pgrp);
714 attach_pid(current, PIDTYPE_SID, current->signal->session); 714 attach_pid(current, PIDTYPE_SID, current->signal->session);
715 list_add_tail(&current->tasks, &init_task.tasks); 715 list_add_tail_rcu(&current->tasks, &init_task.tasks);
716 716
717 current->group_leader = current; 717 current->group_leader = current;
718 leader->group_leader = current; 718 leader->group_leader = current;
diff --git a/fs/locks.c b/fs/locks.c
index dda83d6cd48b..efad798824dc 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -2230,7 +2230,12 @@ void steal_locks(fl_owner_t from)
2230 2230
2231 lock_kernel(); 2231 lock_kernel();
2232 j = 0; 2232 j = 0;
2233 rcu_read_lock(); 2233
2234 /*
2235 * We are not taking a ref to the file structures, so
2236 * we need to acquire ->file_lock.
2237 */
2238 spin_lock(&files->file_lock);
2234 fdt = files_fdtable(files); 2239 fdt = files_fdtable(files);
2235 for (;;) { 2240 for (;;) {
2236 unsigned long set; 2241 unsigned long set;
@@ -2248,7 +2253,7 @@ void steal_locks(fl_owner_t from)
2248 set >>= 1; 2253 set >>= 1;
2249 } 2254 }
2250 } 2255 }
2251 rcu_read_unlock(); 2256 spin_unlock(&files->file_lock);
2252 unlock_kernel(); 2257 unlock_kernel();
2253} 2258}
2254EXPORT_SYMBOL(steal_locks); 2259EXPORT_SYMBOL(steal_locks);
diff --git a/fs/proc/base.c b/fs/proc/base.c
index a3a3eecef689..6cc77dc3f3ff 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -297,16 +297,20 @@ static int proc_fd_link(struct inode *inode, struct dentry **dentry, struct vfsm
297 297
298 files = get_files_struct(task); 298 files = get_files_struct(task);
299 if (files) { 299 if (files) {
300 rcu_read_lock(); 300 /*
301 * We are not taking a ref to the file structure, so we must
302 * hold ->file_lock.
303 */
304 spin_lock(&files->file_lock);
301 file = fcheck_files(files, fd); 305 file = fcheck_files(files, fd);
302 if (file) { 306 if (file) {
303 *mnt = mntget(file->f_vfsmnt); 307 *mnt = mntget(file->f_vfsmnt);
304 *dentry = dget(file->f_dentry); 308 *dentry = dget(file->f_dentry);
305 rcu_read_unlock(); 309 spin_unlock(&files->file_lock);
306 put_files_struct(files); 310 put_files_struct(files);
307 return 0; 311 return 0;
308 } 312 }
309 rcu_read_unlock(); 313 spin_unlock(&files->file_lock);
310 put_files_struct(files); 314 put_files_struct(files);
311 } 315 }
312 return -ENOENT; 316 return -ENOENT;
@@ -1523,7 +1527,12 @@ static struct dentry *proc_lookupfd(struct inode * dir, struct dentry * dentry,
1523 if (!files) 1527 if (!files)
1524 goto out_unlock; 1528 goto out_unlock;
1525 inode->i_mode = S_IFLNK; 1529 inode->i_mode = S_IFLNK;
1526 rcu_read_lock(); 1530
1531 /*
1532 * We are not taking a ref to the file structure, so we must
1533 * hold ->file_lock.
1534 */
1535 spin_lock(&files->file_lock);
1527 file = fcheck_files(files, fd); 1536 file = fcheck_files(files, fd);
1528 if (!file) 1537 if (!file)
1529 goto out_unlock2; 1538 goto out_unlock2;
@@ -1531,7 +1540,7 @@ static struct dentry *proc_lookupfd(struct inode * dir, struct dentry * dentry,
1531 inode->i_mode |= S_IRUSR | S_IXUSR; 1540 inode->i_mode |= S_IRUSR | S_IXUSR;
1532 if (file->f_mode & 2) 1541 if (file->f_mode & 2)
1533 inode->i_mode |= S_IWUSR | S_IXUSR; 1542 inode->i_mode |= S_IWUSR | S_IXUSR;
1534 rcu_read_unlock(); 1543 spin_unlock(&files->file_lock);
1535 put_files_struct(files); 1544 put_files_struct(files);
1536 inode->i_op = &proc_pid_link_inode_operations; 1545 inode->i_op = &proc_pid_link_inode_operations;
1537 inode->i_size = 64; 1546 inode->i_size = 64;
@@ -1541,7 +1550,7 @@ static struct dentry *proc_lookupfd(struct inode * dir, struct dentry * dentry,
1541 return NULL; 1550 return NULL;
1542 1551
1543out_unlock2: 1552out_unlock2:
1544 rcu_read_unlock(); 1553 spin_unlock(&files->file_lock);
1545 put_files_struct(files); 1554 put_files_struct(files);
1546out_unlock: 1555out_unlock:
1547 iput(inode); 1556 iput(inode);
diff --git a/fs/splice.c b/fs/splice.c
index 8d57e89924a6..22fac87e90b3 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -50,7 +50,8 @@ static int page_cache_pipe_buf_steal(struct pipe_inode_info *info,
50 struct page *page = buf->page; 50 struct page *page = buf->page;
51 struct address_space *mapping = page_mapping(page); 51 struct address_space *mapping = page_mapping(page);
52 52
53 WARN_ON(!PageLocked(page)); 53 lock_page(page);
54
54 WARN_ON(!PageUptodate(page)); 55 WARN_ON(!PageUptodate(page));
55 56
56 /* 57 /*
@@ -65,8 +66,10 @@ static int page_cache_pipe_buf_steal(struct pipe_inode_info *info,
65 if (PagePrivate(page)) 66 if (PagePrivate(page))
66 try_to_release_page(page, mapping_gfp_mask(mapping)); 67 try_to_release_page(page, mapping_gfp_mask(mapping));
67 68
68 if (!remove_mapping(mapping, page)) 69 if (!remove_mapping(mapping, page)) {
70 unlock_page(page);
69 return 1; 71 return 1;
72 }
70 73
71 buf->flags |= PIPE_BUF_FLAG_STOLEN | PIPE_BUF_FLAG_LRU; 74 buf->flags |= PIPE_BUF_FLAG_STOLEN | PIPE_BUF_FLAG_LRU;
72 return 0; 75 return 0;
@@ -145,8 +148,8 @@ static struct pipe_buf_operations page_cache_pipe_buf_ops = {
145 * pipe buffer operations. Otherwise very similar to the regular pipe_writev(). 148 * pipe buffer operations. Otherwise very similar to the regular pipe_writev().
146 */ 149 */
147static ssize_t move_to_pipe(struct pipe_inode_info *pipe, struct page **pages, 150static ssize_t move_to_pipe(struct pipe_inode_info *pipe, struct page **pages,
148 int nr_pages, unsigned long offset, 151 int nr_pages, unsigned long len,
149 unsigned long len, unsigned int flags) 152 unsigned int offset, unsigned int flags)
150{ 153{
151 int ret, do_wakeup, i; 154 int ret, do_wakeup, i;
152 155
@@ -243,14 +246,16 @@ __generic_file_splice_read(struct file *in, loff_t *ppos,
243 unsigned int flags) 246 unsigned int flags)
244{ 247{
245 struct address_space *mapping = in->f_mapping; 248 struct address_space *mapping = in->f_mapping;
246 unsigned int offset, nr_pages; 249 unsigned int loff, offset, nr_pages;
247 struct page *pages[PIPE_BUFFERS]; 250 struct page *pages[PIPE_BUFFERS];
248 struct page *page; 251 struct page *page;
249 pgoff_t index; 252 pgoff_t index, end_index;
253 loff_t isize;
254 size_t bytes;
250 int i, error; 255 int i, error;
251 256
252 index = *ppos >> PAGE_CACHE_SHIFT; 257 index = *ppos >> PAGE_CACHE_SHIFT;
253 offset = *ppos & ~PAGE_CACHE_MASK; 258 loff = offset = *ppos & ~PAGE_CACHE_MASK;
254 nr_pages = (len + offset + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; 259 nr_pages = (len + offset + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
255 260
256 if (nr_pages > PIPE_BUFFERS) 261 if (nr_pages > PIPE_BUFFERS)
@@ -268,6 +273,7 @@ __generic_file_splice_read(struct file *in, loff_t *ppos,
268 * Now fill in the holes: 273 * Now fill in the holes:
269 */ 274 */
270 error = 0; 275 error = 0;
276 bytes = 0;
271 for (i = 0; i < nr_pages; i++, index++) { 277 for (i = 0; i < nr_pages; i++, index++) {
272find_page: 278find_page:
273 /* 279 /*
@@ -276,14 +282,6 @@ find_page:
276 page = find_get_page(mapping, index); 282 page = find_get_page(mapping, index);
277 if (!page) { 283 if (!page) {
278 /* 284 /*
279 * If in nonblock mode then dont block on
280 * readpage (we've kicked readahead so there
281 * will be asynchronous progress):
282 */
283 if (flags & SPLICE_F_NONBLOCK)
284 break;
285
286 /*
287 * page didn't exist, allocate one 285 * page didn't exist, allocate one
288 */ 286 */
289 page = page_cache_alloc_cold(mapping); 287 page = page_cache_alloc_cold(mapping);
@@ -304,6 +302,13 @@ find_page:
304 * If the page isn't uptodate, we may need to start io on it 302 * If the page isn't uptodate, we may need to start io on it
305 */ 303 */
306 if (!PageUptodate(page)) { 304 if (!PageUptodate(page)) {
305 /*
306 * If in nonblock mode then dont block on waiting
307 * for an in-flight io page
308 */
309 if (flags & SPLICE_F_NONBLOCK)
310 break;
311
307 lock_page(page); 312 lock_page(page);
308 313
309 /* 314 /*
@@ -336,13 +341,41 @@ readpage:
336 goto find_page; 341 goto find_page;
337 break; 342 break;
338 } 343 }
344
345 /*
346 * i_size must be checked after ->readpage().
347 */
348 isize = i_size_read(mapping->host);
349 end_index = (isize - 1) >> PAGE_CACHE_SHIFT;
350 if (unlikely(!isize || index > end_index)) {
351 page_cache_release(page);
352 break;
353 }
354
355 /*
356 * if this is the last page, see if we need to shrink
357 * the length and stop
358 */
359 if (end_index == index) {
360 loff = PAGE_CACHE_SIZE - (isize & ~PAGE_CACHE_MASK);
361 if (bytes + loff > isize) {
362 page_cache_release(page);
363 break;
364 }
365 /*
366 * force quit after adding this page
367 */
368 nr_pages = i;
369 }
339 } 370 }
340fill_it: 371fill_it:
341 pages[i] = page; 372 pages[i] = page;
373 bytes += PAGE_CACHE_SIZE - loff;
374 loff = 0;
342 } 375 }
343 376
344 if (i) 377 if (i)
345 return move_to_pipe(pipe, pages, i, offset, len, flags); 378 return move_to_pipe(pipe, pages, i, bytes, offset, flags);
346 379
347 return error; 380 return error;
348} 381}
@@ -369,17 +402,20 @@ ssize_t generic_file_splice_read(struct file *in, loff_t *ppos,
369 while (len) { 402 while (len) {
370 ret = __generic_file_splice_read(in, ppos, pipe, len, flags); 403 ret = __generic_file_splice_read(in, ppos, pipe, len, flags);
371 404
372 if (ret <= 0) 405 if (ret < 0)
373 break; 406 break;
407 else if (!ret) {
408 if (spliced)
409 break;
410 if (flags & SPLICE_F_NONBLOCK) {
411 ret = -EAGAIN;
412 break;
413 }
414 }
374 415
375 *ppos += ret; 416 *ppos += ret;
376 len -= ret; 417 len -= ret;
377 spliced += ret; 418 spliced += ret;
378
379 if (!(flags & SPLICE_F_NONBLOCK))
380 continue;
381 ret = -EAGAIN;
382 break;
383 } 419 }
384 420
385 if (spliced) 421 if (spliced)
@@ -474,14 +510,12 @@ static int pipe_to_file(struct pipe_inode_info *info, struct pipe_buffer *buf,
474 if (sd->flags & SPLICE_F_MOVE) { 510 if (sd->flags & SPLICE_F_MOVE) {
475 /* 511 /*
476 * If steal succeeds, buf->page is now pruned from the vm 512 * If steal succeeds, buf->page is now pruned from the vm
477 * side (LRU and page cache) and we can reuse it. 513 * side (LRU and page cache) and we can reuse it. The page
514 * will also be looked on successful return.
478 */ 515 */
479 if (buf->ops->steal(info, buf)) 516 if (buf->ops->steal(info, buf))
480 goto find_page; 517 goto find_page;
481 518
482 /*
483 * this will also set the page locked
484 */
485 page = buf->page; 519 page = buf->page;
486 if (add_to_page_cache(page, mapping, index, gfp_mask)) 520 if (add_to_page_cache(page, mapping, index, gfp_mask))
487 goto find_page; 521 goto find_page;
@@ -490,15 +524,27 @@ static int pipe_to_file(struct pipe_inode_info *info, struct pipe_buffer *buf,
490 lru_cache_add(page); 524 lru_cache_add(page);
491 } else { 525 } else {
492find_page: 526find_page:
493 ret = -ENOMEM; 527 page = find_lock_page(mapping, index);
494 page = find_or_create_page(mapping, index, gfp_mask); 528 if (!page) {
495 if (!page) 529 ret = -ENOMEM;
496 goto out_nomem; 530 page = page_cache_alloc_cold(mapping);
531 if (unlikely(!page))
532 goto out_nomem;
533
534 /*
535 * This will also lock the page
536 */
537 ret = add_to_page_cache_lru(page, mapping, index,
538 gfp_mask);
539 if (unlikely(ret))
540 goto out;
541 }
497 542
498 /* 543 /*
499 * If the page is uptodate, it is also locked. If it isn't 544 * We get here with the page locked. If the page is also
500 * uptodate, we can mark it uptodate if we are filling the 545 * uptodate, we don't need to do more. If it isn't, we
501 * full page. Otherwise we need to read it in first... 546 * may need to bring it in if we are not going to overwrite
547 * the full page.
502 */ 548 */
503 if (!PageUptodate(page)) { 549 if (!PageUptodate(page)) {
504 if (sd->len < PAGE_CACHE_SIZE) { 550 if (sd->len < PAGE_CACHE_SIZE) {
@@ -520,10 +566,8 @@ find_page:
520 ret = -EIO; 566 ret = -EIO;
521 goto out; 567 goto out;
522 } 568 }
523 } else { 569 } else
524 WARN_ON(!PageLocked(page));
525 SetPageUptodate(page); 570 SetPageUptodate(page);
526 }
527 } 571 }
528 } 572 }
529 573
@@ -552,10 +596,10 @@ find_page:
552 mark_page_accessed(page); 596 mark_page_accessed(page);
553 balance_dirty_pages_ratelimited(mapping); 597 balance_dirty_pages_ratelimited(mapping);
554out: 598out:
555 if (!(buf->flags & PIPE_BUF_FLAG_STOLEN)) { 599 if (!(buf->flags & PIPE_BUF_FLAG_STOLEN))
556 page_cache_release(page); 600 page_cache_release(page);
557 unlock_page(page); 601
558 } 602 unlock_page(page);
559out_nomem: 603out_nomem:
560 buf->ops->unmap(info, buf); 604 buf->ops->unmap(info, buf);
561 return ret; 605 return ret;
@@ -687,22 +731,26 @@ generic_file_splice_write(struct pipe_inode_info *pipe, struct file *out,
687 ssize_t ret; 731 ssize_t ret;
688 732
689 ret = move_from_pipe(pipe, out, ppos, len, flags, pipe_to_file); 733 ret = move_from_pipe(pipe, out, ppos, len, flags, pipe_to_file);
690 734 if (ret > 0) {
691 /*
692 * If file or inode is SYNC and we actually wrote some data, sync it.
693 */
694 if (unlikely((out->f_flags & O_SYNC) || IS_SYNC(mapping->host))
695 && ret > 0) {
696 struct inode *inode = mapping->host; 735 struct inode *inode = mapping->host;
697 int err;
698 736
699 mutex_lock(&inode->i_mutex); 737 *ppos += ret;
700 err = generic_osync_inode(mapping->host, mapping, 738
701 OSYNC_METADATA|OSYNC_DATA); 739 /*
702 mutex_unlock(&inode->i_mutex); 740 * If file or inode is SYNC and we actually wrote some data,
741 * sync it.
742 */
743 if (unlikely((out->f_flags & O_SYNC) || IS_SYNC(inode))) {
744 int err;
745
746 mutex_lock(&inode->i_mutex);
747 err = generic_osync_inode(inode, mapping,
748 OSYNC_METADATA|OSYNC_DATA);
749 mutex_unlock(&inode->i_mutex);
703 750
704 if (err) 751 if (err)
705 ret = err; 752 ret = err;
753 }
706 } 754 }
707 755
708 return ret; 756 return ret;
@@ -904,6 +952,7 @@ static long do_splice(struct file *in, loff_t __user *off_in,
904{ 952{
905 struct pipe_inode_info *pipe; 953 struct pipe_inode_info *pipe;
906 loff_t offset, *off; 954 loff_t offset, *off;
955 long ret;
907 956
908 pipe = in->f_dentry->d_inode->i_pipe; 957 pipe = in->f_dentry->d_inode->i_pipe;
909 if (pipe) { 958 if (pipe) {
@@ -918,7 +967,12 @@ static long do_splice(struct file *in, loff_t __user *off_in,
918 } else 967 } else
919 off = &out->f_pos; 968 off = &out->f_pos;
920 969
921 return do_splice_from(pipe, out, off, len, flags); 970 ret = do_splice_from(pipe, out, off, len, flags);
971
972 if (off_out && copy_to_user(off_out, off, sizeof(loff_t)))
973 ret = -EFAULT;
974
975 return ret;
922 } 976 }
923 977
924 pipe = out->f_dentry->d_inode->i_pipe; 978 pipe = out->f_dentry->d_inode->i_pipe;
@@ -934,7 +988,12 @@ static long do_splice(struct file *in, loff_t __user *off_in,
934 } else 988 } else
935 off = &in->f_pos; 989 off = &in->f_pos;
936 990
937 return do_splice_to(in, off, pipe, len, flags); 991 ret = do_splice_to(in, off, pipe, len, flags);
992
993 if (off_in && copy_to_user(off_in, off, sizeof(loff_t)))
994 ret = -EFAULT;
995
996 return ret;
938 } 997 }
939 998
940 return -EINVAL; 999 return -EINVAL;
@@ -979,7 +1038,9 @@ static int link_pipe(struct pipe_inode_info *ipipe,
979 size_t len, unsigned int flags) 1038 size_t len, unsigned int flags)
980{ 1039{
981 struct pipe_buffer *ibuf, *obuf; 1040 struct pipe_buffer *ibuf, *obuf;
982 int ret = 0, do_wakeup = 0, i; 1041 int ret, do_wakeup, i, ipipe_first;
1042
1043 ret = do_wakeup = ipipe_first = 0;
983 1044
984 /* 1045 /*
985 * Potential ABBA deadlock, work around it by ordering lock 1046 * Potential ABBA deadlock, work around it by ordering lock
@@ -987,6 +1048,7 @@ static int link_pipe(struct pipe_inode_info *ipipe,
987 * could deadlock (one doing tee from A -> B, the other from B -> A). 1048 * could deadlock (one doing tee from A -> B, the other from B -> A).
988 */ 1049 */
989 if (ipipe->inode < opipe->inode) { 1050 if (ipipe->inode < opipe->inode) {
1051 ipipe_first = 1;
990 mutex_lock(&ipipe->inode->i_mutex); 1052 mutex_lock(&ipipe->inode->i_mutex);
991 mutex_lock(&opipe->inode->i_mutex); 1053 mutex_lock(&opipe->inode->i_mutex);
992 } else { 1054 } else {
@@ -1035,9 +1097,11 @@ static int link_pipe(struct pipe_inode_info *ipipe,
1035 1097
1036 /* 1098 /*
1037 * We have input available, but no output room. 1099 * We have input available, but no output room.
1038 * If we already copied data, return that. 1100 * If we already copied data, return that. If we
1101 * need to drop the opipe lock, it must be ordered
1102 * last to avoid deadlocks.
1039 */ 1103 */
1040 if (flags & SPLICE_F_NONBLOCK) { 1104 if ((flags & SPLICE_F_NONBLOCK) || !ipipe_first) {
1041 if (!ret) 1105 if (!ret)
1042 ret = -EAGAIN; 1106 ret = -EAGAIN;
1043 break; 1107 break;
@@ -1071,7 +1135,12 @@ static int link_pipe(struct pipe_inode_info *ipipe,
1071 if (ret) 1135 if (ret)
1072 break; 1136 break;
1073 } 1137 }
1074 if (flags & SPLICE_F_NONBLOCK) { 1138 /*
1139 * pipe_wait() drops the ipipe mutex. To avoid deadlocks
1140 * with another process, we can only safely do that if
1141 * the ipipe lock is ordered last.
1142 */
1143 if ((flags & SPLICE_F_NONBLOCK) || ipipe_first) {
1075 if (!ret) 1144 if (!ret)
1076 ret = -EAGAIN; 1145 ret = -EAGAIN;
1077 break; 1146 break;