aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorSteven Whitehouse <swhiteho@redhat.com>2006-04-21 12:52:36 -0400
committerSteven Whitehouse <swhiteho@redhat.com>2006-04-21 12:52:36 -0400
commita748422ee45725e04e1d3792fa19dfa90ddfd116 (patch)
tree978e12895468baaa9f7ab2747b9f7d50beaf1717 /fs
parentc63e31c2cc1ec67372920b5e1aff8204d04dd172 (diff)
parentf4ffaa452e71495a06376f12f772342bc57051fc (diff)
Merge branch 'master'
Diffstat (limited to 'fs')
-rw-r--r--fs/9p/vfs_super.c13
-rw-r--r--fs/Kconfig9
-rw-r--r--fs/configfs/dir.c2
-rw-r--r--fs/eventpoll.c4
-rw-r--r--fs/exec.c46
-rw-r--r--fs/ext3/resize.c2
-rw-r--r--fs/fifo.c65
-rw-r--r--fs/fuse/dev.c286
-rw-r--r--fs/fuse/dir.c118
-rw-r--r--fs/fuse/file.c66
-rw-r--r--fs/fuse/fuse_i.h72
-rw-r--r--fs/fuse/inode.c158
-rw-r--r--fs/inotify.c2
-rw-r--r--fs/lockd/svclock.c2
-rw-r--r--fs/locks.c9
-rw-r--r--fs/namespace.c7
-rw-r--r--fs/nfs/dir.c5
-rw-r--r--fs/nfs/direct.c8
-rw-r--r--fs/nfs/file.c5
-rw-r--r--fs/nfs/inode.c5
-rw-r--r--fs/nfs/nfs4proc.c10
-rw-r--r--fs/nfsd/auth.c46
-rw-r--r--fs/nfsd/export.c3
-rw-r--r--fs/nfsd/nfs3proc.c2
-rw-r--r--fs/nfsd/nfs4acl.c8
-rw-r--r--fs/nfsd/nfs4callback.c6
-rw-r--r--fs/nfsd/nfs4proc.c4
-rw-r--r--fs/nfsd/nfs4state.c150
-rw-r--r--fs/nfsd/nfs4xdr.c62
-rw-r--r--fs/nfsd/nfsproc.c2
-rw-r--r--fs/nfsd/vfs.c6
-rw-r--r--fs/ocfs2/cluster/heartbeat.c40
-rw-r--r--fs/ocfs2/dlm/userdlm.c74
-rw-r--r--fs/ocfs2/file.c19
-rw-r--r--fs/open.c24
-rw-r--r--fs/partitions/check.c43
-rw-r--r--fs/pipe.c317
-rw-r--r--fs/proc/base.c21
-rw-r--r--fs/proc/vmcore.c4
-rw-r--r--fs/read_write.c2
-rw-r--r--fs/select.c30
-rw-r--r--fs/splice.c866
-rw-r--r--fs/sync.c4
-rw-r--r--fs/sysfs/dir.c1
-rw-r--r--fs/sysfs/file.c76
-rw-r--r--fs/sysfs/sysfs.h1
-rw-r--r--fs/xfs/linux-2.6/xfs_aops.c18
-rw-r--r--fs/xfs/linux-2.6/xfs_buf.c2
-rw-r--r--fs/xfs/linux-2.6/xfs_file.c20
-rw-r--r--fs/xfs/linux-2.6/xfs_iops.c3
-rw-r--r--fs/xfs/linux-2.6/xfs_lrw.c18
-rw-r--r--fs/xfs/linux-2.6/xfs_lrw.h8
-rw-r--r--fs/xfs/linux-2.6/xfs_vnode.h16
-rw-r--r--fs/xfs/xfs_ialloc.c15
-rw-r--r--fs/xfs/xfs_iget.c29
-rw-r--r--fs/xfs/xfs_inode.c27
-rw-r--r--fs/xfs/xfs_inode.h1
-rw-r--r--fs/xfs/xfs_mount.c2
58 files changed, 1758 insertions, 1106 deletions
diff --git a/fs/9p/vfs_super.c b/fs/9p/vfs_super.c
index b0a0ae509c00..61c599b4a1e3 100644
--- a/fs/9p/vfs_super.c
+++ b/fs/9p/vfs_super.c
@@ -127,12 +127,13 @@ static struct super_block *v9fs_get_sb(struct file_system_type
127 127
128 if ((newfid = v9fs_session_init(v9ses, dev_name, data)) < 0) { 128 if ((newfid = v9fs_session_init(v9ses, dev_name, data)) < 0) {
129 dprintk(DEBUG_ERROR, "problem initiating session\n"); 129 dprintk(DEBUG_ERROR, "problem initiating session\n");
130 kfree(v9ses); 130 sb = ERR_PTR(newfid);
131 return ERR_PTR(newfid); 131 goto out_free_session;
132 } 132 }
133 133
134 sb = sget(fs_type, NULL, v9fs_set_super, v9ses); 134 sb = sget(fs_type, NULL, v9fs_set_super, v9ses);
135 135 if (IS_ERR(sb))
136 goto out_close_session;
136 v9fs_fill_super(sb, v9ses, flags); 137 v9fs_fill_super(sb, v9ses, flags);
137 138
138 inode = v9fs_get_inode(sb, S_IFDIR | mode); 139 inode = v9fs_get_inode(sb, S_IFDIR | mode);
@@ -185,6 +186,12 @@ static struct super_block *v9fs_get_sb(struct file_system_type
185 186
186 return sb; 187 return sb;
187 188
189out_close_session:
190 v9fs_session_close(v9ses);
191out_free_session:
192 kfree(v9ses);
193 return sb;
194
188put_back_sb: 195put_back_sb:
189 /* deactivate_super calls v9fs_kill_super which will frees the rest */ 196 /* deactivate_super calls v9fs_kill_super which will frees the rest */
190 up_write(&sb->s_umount); 197 up_write(&sb->s_umount);
diff --git a/fs/Kconfig b/fs/Kconfig
index 62ee097776f0..563a59e5e694 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -800,6 +800,7 @@ config PROC_KCORE
800config PROC_VMCORE 800config PROC_VMCORE
801 bool "/proc/vmcore support (EXPERIMENTAL)" 801 bool "/proc/vmcore support (EXPERIMENTAL)"
802 depends on PROC_FS && EXPERIMENTAL && CRASH_DUMP 802 depends on PROC_FS && EXPERIMENTAL && CRASH_DUMP
803 default y
803 help 804 help
804 Exports the dump image of crashed kernel in ELF format. 805 Exports the dump image of crashed kernel in ELF format.
805 806
@@ -842,6 +843,12 @@ config TMPFS
842config HUGETLBFS 843config HUGETLBFS
843 bool "HugeTLB file system support" 844 bool "HugeTLB file system support"
844 depends X86 || IA64 || PPC64 || SPARC64 || SUPERH || BROKEN 845 depends X86 || IA64 || PPC64 || SPARC64 || SUPERH || BROKEN
846 help
847 hugetlbfs is a filesystem backing for HugeTLB pages, based on
848 ramfs. For architectures that support it, say Y here and read
849 <file:Documentation/vm/hugetlbpage.txt> for details.
850
851 If unsure, say N.
845 852
846config HUGETLB_PAGE 853config HUGETLB_PAGE
847 def_bool HUGETLBFS 854 def_bool HUGETLBFS
@@ -862,7 +869,7 @@ config RAMFS
862 869
863config CONFIGFS_FS 870config CONFIGFS_FS
864 tristate "Userspace-driven configuration filesystem (EXPERIMENTAL)" 871 tristate "Userspace-driven configuration filesystem (EXPERIMENTAL)"
865 depends on EXPERIMENTAL 872 depends on SYSFS && EXPERIMENTAL
866 help 873 help
867 configfs is a ram-based filesystem that provides the converse 874 configfs is a ram-based filesystem that provides the converse
868 of sysfs's functionality. Where sysfs is a filesystem-based 875 of sysfs's functionality. Where sysfs is a filesystem-based
diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c
index 8ed9b06a9828..5638c8f9362f 100644
--- a/fs/configfs/dir.c
+++ b/fs/configfs/dir.c
@@ -504,7 +504,7 @@ static int populate_groups(struct config_group *group)
504 int ret = 0; 504 int ret = 0;
505 int i; 505 int i;
506 506
507 if (group && group->default_groups) { 507 if (group->default_groups) {
508 /* FYI, we're faking mkdir here 508 /* FYI, we're faking mkdir here
509 * I'm not sure we need this semaphore, as we're called 509 * I'm not sure we need this semaphore, as we're called
510 * from our parent's mkdir. That holds our parent's 510 * from our parent's mkdir. That holds our parent's
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index 242fe1a66ce5..1b4491cdd115 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -599,7 +599,7 @@ sys_epoll_ctl(int epfd, int op, int fd, struct epoll_event __user *event)
599 switch (op) { 599 switch (op) {
600 case EPOLL_CTL_ADD: 600 case EPOLL_CTL_ADD:
601 if (!epi) { 601 if (!epi) {
602 epds.events |= POLLERR | POLLHUP | POLLRDHUP; 602 epds.events |= POLLERR | POLLHUP;
603 603
604 error = ep_insert(ep, &epds, tfile, fd); 604 error = ep_insert(ep, &epds, tfile, fd);
605 } else 605 } else
@@ -613,7 +613,7 @@ sys_epoll_ctl(int epfd, int op, int fd, struct epoll_event __user *event)
613 break; 613 break;
614 case EPOLL_CTL_MOD: 614 case EPOLL_CTL_MOD:
615 if (epi) { 615 if (epi) {
616 epds.events |= POLLERR | POLLHUP | POLLRDHUP; 616 epds.events |= POLLERR | POLLHUP;
617 error = ep_modify(ep, epi, &epds); 617 error = ep_modify(ep, epi, &epds);
618 } else 618 } else
619 error = -ENOENT; 619 error = -ENOENT;
diff --git a/fs/exec.c b/fs/exec.c
index 0291a68a3626..3a79d97ac234 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -665,9 +665,7 @@ static int de_thread(struct task_struct *tsk)
665 * and to assume its PID: 665 * and to assume its PID:
666 */ 666 */
667 if (!thread_group_leader(current)) { 667 if (!thread_group_leader(current)) {
668 struct task_struct *parent;
669 struct dentry *proc_dentry1, *proc_dentry2; 668 struct dentry *proc_dentry1, *proc_dentry2;
670 unsigned long ptrace;
671 669
672 /* 670 /*
673 * Wait for the thread group leader to be a zombie. 671 * Wait for the thread group leader to be a zombie.
@@ -678,6 +676,18 @@ static int de_thread(struct task_struct *tsk)
678 while (leader->exit_state != EXIT_ZOMBIE) 676 while (leader->exit_state != EXIT_ZOMBIE)
679 yield(); 677 yield();
680 678
679 /*
680 * The only record we have of the real-time age of a
681 * process, regardless of execs it's done, is start_time.
682 * All the past CPU time is accumulated in signal_struct
683 * from sister threads now dead. But in this non-leader
684 * exec, nothing survives from the original leader thread,
685 * whose birth marks the true age of this process now.
686 * When we take on its identity by switching to its PID, we
687 * also take its birthdate (always earlier than our own).
688 */
689 current->start_time = leader->start_time;
690
681 spin_lock(&leader->proc_lock); 691 spin_lock(&leader->proc_lock);
682 spin_lock(&current->proc_lock); 692 spin_lock(&current->proc_lock);
683 proc_dentry1 = proc_pid_unhash(current); 693 proc_dentry1 = proc_pid_unhash(current);
@@ -692,22 +702,6 @@ static int de_thread(struct task_struct *tsk)
692 * two threads with a switched PID, and release 702 * two threads with a switched PID, and release
693 * the former thread group leader: 703 * the former thread group leader:
694 */ 704 */
695 ptrace = leader->ptrace;
696 parent = leader->parent;
697 if (unlikely(ptrace) && unlikely(parent == current)) {
698 /*
699 * Joker was ptracing his own group leader,
700 * and now he wants to be his own parent!
701 * We can't have that.
702 */
703 ptrace = 0;
704 }
705
706 ptrace_unlink(current);
707 ptrace_unlink(leader);
708 remove_parent(current);
709 remove_parent(leader);
710
711 705
712 /* Become a process group leader with the old leader's pid. 706 /* Become a process group leader with the old leader's pid.
713 * Note: The old leader also uses thispid until release_task 707 * Note: The old leader also uses thispid until release_task
@@ -718,19 +712,15 @@ static int de_thread(struct task_struct *tsk)
718 attach_pid(current, PIDTYPE_PID, current->pid); 712 attach_pid(current, PIDTYPE_PID, current->pid);
719 attach_pid(current, PIDTYPE_PGID, current->signal->pgrp); 713 attach_pid(current, PIDTYPE_PGID, current->signal->pgrp);
720 attach_pid(current, PIDTYPE_SID, current->signal->session); 714 attach_pid(current, PIDTYPE_SID, current->signal->session);
721 list_add_tail(&current->tasks, &init_task.tasks); 715 list_add_tail_rcu(&current->tasks, &init_task.tasks);
722 716
723 current->parent = current->real_parent = leader->real_parent;
724 leader->parent = leader->real_parent = child_reaper;
725 current->group_leader = current; 717 current->group_leader = current;
726 leader->group_leader = leader; 718 leader->group_leader = current;
727 719
728 add_parent(current); 720 /* Reduce leader to a thread */
729 add_parent(leader); 721 detach_pid(leader, PIDTYPE_PGID);
730 if (ptrace) { 722 detach_pid(leader, PIDTYPE_SID);
731 current->ptrace = ptrace; 723 list_del_init(&leader->tasks);
732 __ptrace_link(current, parent);
733 }
734 724
735 current->exit_signal = SIGCHLD; 725 current->exit_signal = SIGCHLD;
736 726
diff --git a/fs/ext3/resize.c b/fs/ext3/resize.c
index 1041dab6de2f..c5ffa8523968 100644
--- a/fs/ext3/resize.c
+++ b/fs/ext3/resize.c
@@ -767,6 +767,7 @@ int ext3_group_add(struct super_block *sb, struct ext3_new_group_data *input)
767 if (input->group != sbi->s_groups_count) { 767 if (input->group != sbi->s_groups_count) {
768 ext3_warning(sb, __FUNCTION__, 768 ext3_warning(sb, __FUNCTION__,
769 "multiple resizers run on filesystem!"); 769 "multiple resizers run on filesystem!");
770 unlock_super(sb);
770 err = -EBUSY; 771 err = -EBUSY;
771 goto exit_journal; 772 goto exit_journal;
772 } 773 }
@@ -974,6 +975,7 @@ int ext3_group_extend(struct super_block *sb, struct ext3_super_block *es,
974 if (o_blocks_count != le32_to_cpu(es->s_blocks_count)) { 975 if (o_blocks_count != le32_to_cpu(es->s_blocks_count)) {
975 ext3_warning(sb, __FUNCTION__, 976 ext3_warning(sb, __FUNCTION__,
976 "multiple resizers run on filesystem!"); 977 "multiple resizers run on filesystem!");
978 unlock_super(sb);
977 err = -EBUSY; 979 err = -EBUSY;
978 goto exit_put; 980 goto exit_put;
979 } 981 }
diff --git a/fs/fifo.c b/fs/fifo.c
index 889f722ee36d..49035b174b48 100644
--- a/fs/fifo.c
+++ b/fs/fifo.c
@@ -15,30 +15,35 @@
15#include <linux/fs.h> 15#include <linux/fs.h>
16#include <linux/pipe_fs_i.h> 16#include <linux/pipe_fs_i.h>
17 17
18static void wait_for_partner(struct inode* inode, unsigned int* cnt) 18static void wait_for_partner(struct inode* inode, unsigned int *cnt)
19{ 19{
20 int cur = *cnt; 20 int cur = *cnt;
21 while(cur == *cnt) { 21
22 pipe_wait(inode); 22 while (cur == *cnt) {
23 if(signal_pending(current)) 23 pipe_wait(inode->i_pipe);
24 if (signal_pending(current))
24 break; 25 break;
25 } 26 }
26} 27}
27 28
28static void wake_up_partner(struct inode* inode) 29static void wake_up_partner(struct inode* inode)
29{ 30{
30 wake_up_interruptible(PIPE_WAIT(*inode)); 31 wake_up_interruptible(&inode->i_pipe->wait);
31} 32}
32 33
33static int fifo_open(struct inode *inode, struct file *filp) 34static int fifo_open(struct inode *inode, struct file *filp)
34{ 35{
36 struct pipe_inode_info *pipe;
35 int ret; 37 int ret;
36 38
37 mutex_lock(PIPE_MUTEX(*inode)); 39 mutex_lock(&inode->i_mutex);
38 if (!inode->i_pipe) { 40 pipe = inode->i_pipe;
41 if (!pipe) {
39 ret = -ENOMEM; 42 ret = -ENOMEM;
40 if(!pipe_new(inode)) 43 pipe = alloc_pipe_info(inode);
44 if (!pipe)
41 goto err_nocleanup; 45 goto err_nocleanup;
46 inode->i_pipe = pipe;
42 } 47 }
43 filp->f_version = 0; 48 filp->f_version = 0;
44 49
@@ -53,18 +58,18 @@ static int fifo_open(struct inode *inode, struct file *filp)
53 * opened, even when there is no process writing the FIFO. 58 * opened, even when there is no process writing the FIFO.
54 */ 59 */
55 filp->f_op = &read_fifo_fops; 60 filp->f_op = &read_fifo_fops;
56 PIPE_RCOUNTER(*inode)++; 61 pipe->r_counter++;
57 if (PIPE_READERS(*inode)++ == 0) 62 if (pipe->readers++ == 0)
58 wake_up_partner(inode); 63 wake_up_partner(inode);
59 64
60 if (!PIPE_WRITERS(*inode)) { 65 if (!pipe->writers) {
61 if ((filp->f_flags & O_NONBLOCK)) { 66 if ((filp->f_flags & O_NONBLOCK)) {
62 /* suppress POLLHUP until we have 67 /* suppress POLLHUP until we have
63 * seen a writer */ 68 * seen a writer */
64 filp->f_version = PIPE_WCOUNTER(*inode); 69 filp->f_version = pipe->w_counter;
65 } else 70 } else
66 { 71 {
67 wait_for_partner(inode, &PIPE_WCOUNTER(*inode)); 72 wait_for_partner(inode, &pipe->w_counter);
68 if(signal_pending(current)) 73 if(signal_pending(current))
69 goto err_rd; 74 goto err_rd;
70 } 75 }
@@ -78,16 +83,16 @@ static int fifo_open(struct inode *inode, struct file *filp)
78 * errno=ENXIO when there is no process reading the FIFO. 83 * errno=ENXIO when there is no process reading the FIFO.
79 */ 84 */
80 ret = -ENXIO; 85 ret = -ENXIO;
81 if ((filp->f_flags & O_NONBLOCK) && !PIPE_READERS(*inode)) 86 if ((filp->f_flags & O_NONBLOCK) && !pipe->readers)
82 goto err; 87 goto err;
83 88
84 filp->f_op = &write_fifo_fops; 89 filp->f_op = &write_fifo_fops;
85 PIPE_WCOUNTER(*inode)++; 90 pipe->w_counter++;
86 if (!PIPE_WRITERS(*inode)++) 91 if (!pipe->writers++)
87 wake_up_partner(inode); 92 wake_up_partner(inode);
88 93
89 if (!PIPE_READERS(*inode)) { 94 if (!pipe->readers) {
90 wait_for_partner(inode, &PIPE_RCOUNTER(*inode)); 95 wait_for_partner(inode, &pipe->r_counter);
91 if (signal_pending(current)) 96 if (signal_pending(current))
92 goto err_wr; 97 goto err_wr;
93 } 98 }
@@ -102,11 +107,11 @@ static int fifo_open(struct inode *inode, struct file *filp)
102 */ 107 */
103 filp->f_op = &rdwr_fifo_fops; 108 filp->f_op = &rdwr_fifo_fops;
104 109
105 PIPE_READERS(*inode)++; 110 pipe->readers++;
106 PIPE_WRITERS(*inode)++; 111 pipe->writers++;
107 PIPE_RCOUNTER(*inode)++; 112 pipe->r_counter++;
108 PIPE_WCOUNTER(*inode)++; 113 pipe->w_counter++;
109 if (PIPE_READERS(*inode) == 1 || PIPE_WRITERS(*inode) == 1) 114 if (pipe->readers == 1 || pipe->writers == 1)
110 wake_up_partner(inode); 115 wake_up_partner(inode);
111 break; 116 break;
112 117
@@ -116,27 +121,27 @@ static int fifo_open(struct inode *inode, struct file *filp)
116 } 121 }
117 122
118 /* Ok! */ 123 /* Ok! */
119 mutex_unlock(PIPE_MUTEX(*inode)); 124 mutex_unlock(&inode->i_mutex);
120 return 0; 125 return 0;
121 126
122err_rd: 127err_rd:
123 if (!--PIPE_READERS(*inode)) 128 if (!--pipe->readers)
124 wake_up_interruptible(PIPE_WAIT(*inode)); 129 wake_up_interruptible(&pipe->wait);
125 ret = -ERESTARTSYS; 130 ret = -ERESTARTSYS;
126 goto err; 131 goto err;
127 132
128err_wr: 133err_wr:
129 if (!--PIPE_WRITERS(*inode)) 134 if (!--pipe->writers)
130 wake_up_interruptible(PIPE_WAIT(*inode)); 135 wake_up_interruptible(&pipe->wait);
131 ret = -ERESTARTSYS; 136 ret = -ERESTARTSYS;
132 goto err; 137 goto err;
133 138
134err: 139err:
135 if (!PIPE_READERS(*inode) && !PIPE_WRITERS(*inode)) 140 if (!pipe->readers && !pipe->writers)
136 free_pipe_info(inode); 141 free_pipe_info(inode);
137 142
138err_nocleanup: 143err_nocleanup:
139 mutex_unlock(PIPE_MUTEX(*inode)); 144 mutex_unlock(&inode->i_mutex);
140 return ret; 145 return ret;
141} 146}
142 147
diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index 23d1f52eb1b8..cc750c68fe70 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -1,6 +1,6 @@
1/* 1/*
2 FUSE: Filesystem in Userspace 2 FUSE: Filesystem in Userspace
3 Copyright (C) 2001-2005 Miklos Szeredi <miklos@szeredi.hu> 3 Copyright (C) 2001-2006 Miklos Szeredi <miklos@szeredi.hu>
4 4
5 This program can be distributed under the terms of the GNU GPL. 5 This program can be distributed under the terms of the GNU GPL.
6 See the file COPYING. 6 See the file COPYING.
@@ -23,13 +23,11 @@ static kmem_cache_t *fuse_req_cachep;
23 23
24static struct fuse_conn *fuse_get_conn(struct file *file) 24static struct fuse_conn *fuse_get_conn(struct file *file)
25{ 25{
26 struct fuse_conn *fc; 26 /*
27 spin_lock(&fuse_lock); 27 * Lockless access is OK, because file->private data is set
28 fc = file->private_data; 28 * once during mount and is valid until the file is released.
29 if (fc && !fc->connected) 29 */
30 fc = NULL; 30 return file->private_data;
31 spin_unlock(&fuse_lock);
32 return fc;
33} 31}
34 32
35static void fuse_request_init(struct fuse_req *req) 33static void fuse_request_init(struct fuse_req *req)
@@ -74,10 +72,8 @@ static void restore_sigs(sigset_t *oldset)
74 */ 72 */
75void fuse_reset_request(struct fuse_req *req) 73void fuse_reset_request(struct fuse_req *req)
76{ 74{
77 int preallocated = req->preallocated;
78 BUG_ON(atomic_read(&req->count) != 1); 75 BUG_ON(atomic_read(&req->count) != 1);
79 fuse_request_init(req); 76 fuse_request_init(req);
80 req->preallocated = preallocated;
81} 77}
82 78
83static void __fuse_get_request(struct fuse_req *req) 79static void __fuse_get_request(struct fuse_req *req)
@@ -92,80 +88,54 @@ static void __fuse_put_request(struct fuse_req *req)
92 atomic_dec(&req->count); 88 atomic_dec(&req->count);
93} 89}
94 90
95static struct fuse_req *do_get_request(struct fuse_conn *fc) 91struct fuse_req *fuse_get_req(struct fuse_conn *fc)
96{ 92{
97 struct fuse_req *req; 93 struct fuse_req *req;
98
99 spin_lock(&fuse_lock);
100 BUG_ON(list_empty(&fc->unused_list));
101 req = list_entry(fc->unused_list.next, struct fuse_req, list);
102 list_del_init(&req->list);
103 spin_unlock(&fuse_lock);
104 fuse_request_init(req);
105 req->preallocated = 1;
106 req->in.h.uid = current->fsuid;
107 req->in.h.gid = current->fsgid;
108 req->in.h.pid = current->pid;
109 return req;
110}
111
112/* This can return NULL, but only in case it's interrupted by a SIGKILL */
113struct fuse_req *fuse_get_request(struct fuse_conn *fc)
114{
115 int intr;
116 sigset_t oldset; 94 sigset_t oldset;
95 int intr;
96 int err;
117 97
118 atomic_inc(&fc->num_waiting); 98 atomic_inc(&fc->num_waiting);
119 block_sigs(&oldset); 99 block_sigs(&oldset);
120 intr = down_interruptible(&fc->outstanding_sem); 100 intr = wait_event_interruptible(fc->blocked_waitq, !fc->blocked);
121 restore_sigs(&oldset); 101 restore_sigs(&oldset);
122 if (intr) { 102 err = -EINTR;
123 atomic_dec(&fc->num_waiting); 103 if (intr)
124 return NULL; 104 goto out;
125 }
126 return do_get_request(fc);
127}
128 105
129/* Must be called with fuse_lock held */ 106 req = fuse_request_alloc();
130static void fuse_putback_request(struct fuse_conn *fc, struct fuse_req *req) 107 err = -ENOMEM;
131{ 108 if (!req)
132 if (req->preallocated) { 109 goto out;
133 atomic_dec(&fc->num_waiting);
134 list_add(&req->list, &fc->unused_list);
135 } else
136 fuse_request_free(req);
137 110
138 /* If we are in debt decrease that first */ 111 req->in.h.uid = current->fsuid;
139 if (fc->outstanding_debt) 112 req->in.h.gid = current->fsgid;
140 fc->outstanding_debt--; 113 req->in.h.pid = current->pid;
141 else 114 req->waiting = 1;
142 up(&fc->outstanding_sem); 115 return req;
116
117 out:
118 atomic_dec(&fc->num_waiting);
119 return ERR_PTR(err);
143} 120}
144 121
145void fuse_put_request(struct fuse_conn *fc, struct fuse_req *req) 122void fuse_put_request(struct fuse_conn *fc, struct fuse_req *req)
146{ 123{
147 if (atomic_dec_and_test(&req->count)) { 124 if (atomic_dec_and_test(&req->count)) {
148 spin_lock(&fuse_lock); 125 if (req->waiting)
149 fuse_putback_request(fc, req); 126 atomic_dec(&fc->num_waiting);
150 spin_unlock(&fuse_lock); 127 fuse_request_free(req);
151 } 128 }
152} 129}
153 130
154static void fuse_put_request_locked(struct fuse_conn *fc, struct fuse_req *req) 131void fuse_remove_background(struct fuse_conn *fc, struct fuse_req *req)
155{
156 if (atomic_dec_and_test(&req->count))
157 fuse_putback_request(fc, req);
158}
159
160void fuse_release_background(struct fuse_req *req)
161{ 132{
162 iput(req->inode); 133 list_del_init(&req->bg_entry);
163 iput(req->inode2); 134 if (fc->num_background == FUSE_MAX_BACKGROUND) {
164 if (req->file) 135 fc->blocked = 0;
165 fput(req->file); 136 wake_up_all(&fc->blocked_waitq);
166 spin_lock(&fuse_lock); 137 }
167 list_del(&req->bg_entry); 138 fc->num_background--;
168 spin_unlock(&fuse_lock);
169} 139}
170 140
171/* 141/*
@@ -184,28 +154,38 @@ void fuse_release_background(struct fuse_req *req)
184 * interrupted and put in the background, it will return with an error 154 * interrupted and put in the background, it will return with an error
185 * and hence never be reset and reused. 155 * and hence never be reset and reused.
186 * 156 *
187 * Called with fuse_lock, unlocks it 157 * Called with fc->lock, unlocks it
188 */ 158 */
189static void request_end(struct fuse_conn *fc, struct fuse_req *req) 159static void request_end(struct fuse_conn *fc, struct fuse_req *req)
190{ 160{
191 list_del(&req->list); 161 list_del(&req->list);
192 req->state = FUSE_REQ_FINISHED; 162 req->state = FUSE_REQ_FINISHED;
193 if (!req->background) { 163 if (!req->background) {
164 spin_unlock(&fc->lock);
194 wake_up(&req->waitq); 165 wake_up(&req->waitq);
195 fuse_put_request_locked(fc, req); 166 fuse_put_request(fc, req);
196 spin_unlock(&fuse_lock);
197 } else { 167 } else {
168 struct inode *inode = req->inode;
169 struct inode *inode2 = req->inode2;
170 struct file *file = req->file;
198 void (*end) (struct fuse_conn *, struct fuse_req *) = req->end; 171 void (*end) (struct fuse_conn *, struct fuse_req *) = req->end;
199 req->end = NULL; 172 req->end = NULL;
200 spin_unlock(&fuse_lock); 173 req->inode = NULL;
201 down_read(&fc->sbput_sem); 174 req->inode2 = NULL;
202 if (fc->mounted) 175 req->file = NULL;
203 fuse_release_background(req); 176 if (!list_empty(&req->bg_entry))
204 up_read(&fc->sbput_sem); 177 fuse_remove_background(fc, req);
178 spin_unlock(&fc->lock);
179
205 if (end) 180 if (end)
206 end(fc, req); 181 end(fc, req);
207 else 182 else
208 fuse_put_request(fc, req); 183 fuse_put_request(fc, req);
184
185 if (file)
186 fput(file);
187 iput(inode);
188 iput(inode2);
209 } 189 }
210} 190}
211 191
@@ -242,6 +222,9 @@ static void background_request(struct fuse_conn *fc, struct fuse_req *req)
242{ 222{
243 req->background = 1; 223 req->background = 1;
244 list_add(&req->bg_entry, &fc->background); 224 list_add(&req->bg_entry, &fc->background);
225 fc->num_background++;
226 if (fc->num_background == FUSE_MAX_BACKGROUND)
227 fc->blocked = 1;
245 if (req->inode) 228 if (req->inode)
246 req->inode = igrab(req->inode); 229 req->inode = igrab(req->inode);
247 if (req->inode2) 230 if (req->inode2)
@@ -250,16 +233,16 @@ static void background_request(struct fuse_conn *fc, struct fuse_req *req)
250 get_file(req->file); 233 get_file(req->file);
251} 234}
252 235
253/* Called with fuse_lock held. Releases, and then reacquires it. */ 236/* Called with fc->lock held. Releases, and then reacquires it. */
254static void request_wait_answer(struct fuse_conn *fc, struct fuse_req *req) 237static void request_wait_answer(struct fuse_conn *fc, struct fuse_req *req)
255{ 238{
256 sigset_t oldset; 239 sigset_t oldset;
257 240
258 spin_unlock(&fuse_lock); 241 spin_unlock(&fc->lock);
259 block_sigs(&oldset); 242 block_sigs(&oldset);
260 wait_event_interruptible(req->waitq, req->state == FUSE_REQ_FINISHED); 243 wait_event_interruptible(req->waitq, req->state == FUSE_REQ_FINISHED);
261 restore_sigs(&oldset); 244 restore_sigs(&oldset);
262 spin_lock(&fuse_lock); 245 spin_lock(&fc->lock);
263 if (req->state == FUSE_REQ_FINISHED && !req->interrupted) 246 if (req->state == FUSE_REQ_FINISHED && !req->interrupted)
264 return; 247 return;
265 248
@@ -273,9 +256,9 @@ static void request_wait_answer(struct fuse_conn *fc, struct fuse_req *req)
273 locked state, there mustn't be any filesystem 256 locked state, there mustn't be any filesystem
274 operation (e.g. page fault), since that could lead 257 operation (e.g. page fault), since that could lead
275 to deadlock */ 258 to deadlock */
276 spin_unlock(&fuse_lock); 259 spin_unlock(&fc->lock);
277 wait_event(req->waitq, !req->locked); 260 wait_event(req->waitq, !req->locked);
278 spin_lock(&fuse_lock); 261 spin_lock(&fc->lock);
279 } 262 }
280 if (req->state == FUSE_REQ_PENDING) { 263 if (req->state == FUSE_REQ_PENDING) {
281 list_del(&req->list); 264 list_del(&req->list);
@@ -304,19 +287,14 @@ static void queue_request(struct fuse_conn *fc, struct fuse_req *req)
304 req->in.h.unique = fc->reqctr; 287 req->in.h.unique = fc->reqctr;
305 req->in.h.len = sizeof(struct fuse_in_header) + 288 req->in.h.len = sizeof(struct fuse_in_header) +
306 len_args(req->in.numargs, (struct fuse_arg *) req->in.args); 289 len_args(req->in.numargs, (struct fuse_arg *) req->in.args);
307 if (!req->preallocated) {
308 /* If request is not preallocated (either FORGET or
309 RELEASE), then still decrease outstanding_sem, so
310 user can't open infinite number of files while not
311 processing the RELEASE requests. However for
312 efficiency do it without blocking, so if down()
313 would block, just increase the debt instead */
314 if (down_trylock(&fc->outstanding_sem))
315 fc->outstanding_debt++;
316 }
317 list_add_tail(&req->list, &fc->pending); 290 list_add_tail(&req->list, &fc->pending);
318 req->state = FUSE_REQ_PENDING; 291 req->state = FUSE_REQ_PENDING;
292 if (!req->waiting) {
293 req->waiting = 1;
294 atomic_inc(&fc->num_waiting);
295 }
319 wake_up(&fc->waitq); 296 wake_up(&fc->waitq);
297 kill_fasync(&fc->fasync, SIGIO, POLL_IN);
320} 298}
321 299
322/* 300/*
@@ -325,7 +303,7 @@ static void queue_request(struct fuse_conn *fc, struct fuse_req *req)
325void request_send(struct fuse_conn *fc, struct fuse_req *req) 303void request_send(struct fuse_conn *fc, struct fuse_req *req)
326{ 304{
327 req->isreply = 1; 305 req->isreply = 1;
328 spin_lock(&fuse_lock); 306 spin_lock(&fc->lock);
329 if (!fc->connected) 307 if (!fc->connected)
330 req->out.h.error = -ENOTCONN; 308 req->out.h.error = -ENOTCONN;
331 else if (fc->conn_error) 309 else if (fc->conn_error)
@@ -338,15 +316,16 @@ void request_send(struct fuse_conn *fc, struct fuse_req *req)
338 316
339 request_wait_answer(fc, req); 317 request_wait_answer(fc, req);
340 } 318 }
341 spin_unlock(&fuse_lock); 319 spin_unlock(&fc->lock);
342} 320}
343 321
344static void request_send_nowait(struct fuse_conn *fc, struct fuse_req *req) 322static void request_send_nowait(struct fuse_conn *fc, struct fuse_req *req)
345{ 323{
346 spin_lock(&fuse_lock); 324 spin_lock(&fc->lock);
325 background_request(fc, req);
347 if (fc->connected) { 326 if (fc->connected) {
348 queue_request(fc, req); 327 queue_request(fc, req);
349 spin_unlock(&fuse_lock); 328 spin_unlock(&fc->lock);
350 } else { 329 } else {
351 req->out.h.error = -ENOTCONN; 330 req->out.h.error = -ENOTCONN;
352 request_end(fc, req); 331 request_end(fc, req);
@@ -362,9 +341,6 @@ void request_send_noreply(struct fuse_conn *fc, struct fuse_req *req)
362void request_send_background(struct fuse_conn *fc, struct fuse_req *req) 341void request_send_background(struct fuse_conn *fc, struct fuse_req *req)
363{ 342{
364 req->isreply = 1; 343 req->isreply = 1;
365 spin_lock(&fuse_lock);
366 background_request(fc, req);
367 spin_unlock(&fuse_lock);
368 request_send_nowait(fc, req); 344 request_send_nowait(fc, req);
369} 345}
370 346
@@ -373,16 +349,16 @@ void request_send_background(struct fuse_conn *fc, struct fuse_req *req)
373 * anything that could cause a page-fault. If the request was already 349 * anything that could cause a page-fault. If the request was already
374 * interrupted bail out. 350 * interrupted bail out.
375 */ 351 */
376static int lock_request(struct fuse_req *req) 352static int lock_request(struct fuse_conn *fc, struct fuse_req *req)
377{ 353{
378 int err = 0; 354 int err = 0;
379 if (req) { 355 if (req) {
380 spin_lock(&fuse_lock); 356 spin_lock(&fc->lock);
381 if (req->interrupted) 357 if (req->interrupted)
382 err = -ENOENT; 358 err = -ENOENT;
383 else 359 else
384 req->locked = 1; 360 req->locked = 1;
385 spin_unlock(&fuse_lock); 361 spin_unlock(&fc->lock);
386 } 362 }
387 return err; 363 return err;
388} 364}
@@ -392,18 +368,19 @@ static int lock_request(struct fuse_req *req)
392 * requester thread is currently waiting for it to be unlocked, so 368 * requester thread is currently waiting for it to be unlocked, so
393 * wake it up. 369 * wake it up.
394 */ 370 */
395static void unlock_request(struct fuse_req *req) 371static void unlock_request(struct fuse_conn *fc, struct fuse_req *req)
396{ 372{
397 if (req) { 373 if (req) {
398 spin_lock(&fuse_lock); 374 spin_lock(&fc->lock);
399 req->locked = 0; 375 req->locked = 0;
400 if (req->interrupted) 376 if (req->interrupted)
401 wake_up(&req->waitq); 377 wake_up(&req->waitq);
402 spin_unlock(&fuse_lock); 378 spin_unlock(&fc->lock);
403 } 379 }
404} 380}
405 381
406struct fuse_copy_state { 382struct fuse_copy_state {
383 struct fuse_conn *fc;
407 int write; 384 int write;
408 struct fuse_req *req; 385 struct fuse_req *req;
409 const struct iovec *iov; 386 const struct iovec *iov;
@@ -416,11 +393,12 @@ struct fuse_copy_state {
416 unsigned len; 393 unsigned len;
417}; 394};
418 395
419static void fuse_copy_init(struct fuse_copy_state *cs, int write, 396static void fuse_copy_init(struct fuse_copy_state *cs, struct fuse_conn *fc,
420 struct fuse_req *req, const struct iovec *iov, 397 int write, struct fuse_req *req,
421 unsigned long nr_segs) 398 const struct iovec *iov, unsigned long nr_segs)
422{ 399{
423 memset(cs, 0, sizeof(*cs)); 400 memset(cs, 0, sizeof(*cs));
401 cs->fc = fc;
424 cs->write = write; 402 cs->write = write;
425 cs->req = req; 403 cs->req = req;
426 cs->iov = iov; 404 cs->iov = iov;
@@ -450,7 +428,7 @@ static int fuse_copy_fill(struct fuse_copy_state *cs)
450 unsigned long offset; 428 unsigned long offset;
451 int err; 429 int err;
452 430
453 unlock_request(cs->req); 431 unlock_request(cs->fc, cs->req);
454 fuse_copy_finish(cs); 432 fuse_copy_finish(cs);
455 if (!cs->seglen) { 433 if (!cs->seglen) {
456 BUG_ON(!cs->nr_segs); 434 BUG_ON(!cs->nr_segs);
@@ -473,7 +451,7 @@ static int fuse_copy_fill(struct fuse_copy_state *cs)
473 cs->seglen -= cs->len; 451 cs->seglen -= cs->len;
474 cs->addr += cs->len; 452 cs->addr += cs->len;
475 453
476 return lock_request(cs->req); 454 return lock_request(cs->fc, cs->req);
477} 455}
478 456
479/* Do as much copy to/from userspace buffer as we can */ 457/* Do as much copy to/from userspace buffer as we can */
@@ -585,9 +563,9 @@ static void request_wait(struct fuse_conn *fc)
585 if (signal_pending(current)) 563 if (signal_pending(current))
586 break; 564 break;
587 565
588 spin_unlock(&fuse_lock); 566 spin_unlock(&fc->lock);
589 schedule(); 567 schedule();
590 spin_lock(&fuse_lock); 568 spin_lock(&fc->lock);
591 } 569 }
592 set_current_state(TASK_RUNNING); 570 set_current_state(TASK_RUNNING);
593 remove_wait_queue(&fc->waitq, &wait); 571 remove_wait_queue(&fc->waitq, &wait);
@@ -606,18 +584,21 @@ static ssize_t fuse_dev_readv(struct file *file, const struct iovec *iov,
606 unsigned long nr_segs, loff_t *off) 584 unsigned long nr_segs, loff_t *off)
607{ 585{
608 int err; 586 int err;
609 struct fuse_conn *fc;
610 struct fuse_req *req; 587 struct fuse_req *req;
611 struct fuse_in *in; 588 struct fuse_in *in;
612 struct fuse_copy_state cs; 589 struct fuse_copy_state cs;
613 unsigned reqsize; 590 unsigned reqsize;
591 struct fuse_conn *fc = fuse_get_conn(file);
592 if (!fc)
593 return -EPERM;
614 594
615 restart: 595 restart:
616 spin_lock(&fuse_lock); 596 spin_lock(&fc->lock);
617 fc = file->private_data; 597 err = -EAGAIN;
618 err = -EPERM; 598 if ((file->f_flags & O_NONBLOCK) && fc->connected &&
619 if (!fc) 599 list_empty(&fc->pending))
620 goto err_unlock; 600 goto err_unlock;
601
621 request_wait(fc); 602 request_wait(fc);
622 err = -ENODEV; 603 err = -ENODEV;
623 if (!fc->connected) 604 if (!fc->connected)
@@ -641,14 +622,14 @@ static ssize_t fuse_dev_readv(struct file *file, const struct iovec *iov,
641 request_end(fc, req); 622 request_end(fc, req);
642 goto restart; 623 goto restart;
643 } 624 }
644 spin_unlock(&fuse_lock); 625 spin_unlock(&fc->lock);
645 fuse_copy_init(&cs, 1, req, iov, nr_segs); 626 fuse_copy_init(&cs, fc, 1, req, iov, nr_segs);
646 err = fuse_copy_one(&cs, &in->h, sizeof(in->h)); 627 err = fuse_copy_one(&cs, &in->h, sizeof(in->h));
647 if (!err) 628 if (!err)
648 err = fuse_copy_args(&cs, in->numargs, in->argpages, 629 err = fuse_copy_args(&cs, in->numargs, in->argpages,
649 (struct fuse_arg *) in->args, 0); 630 (struct fuse_arg *) in->args, 0);
650 fuse_copy_finish(&cs); 631 fuse_copy_finish(&cs);
651 spin_lock(&fuse_lock); 632 spin_lock(&fc->lock);
652 req->locked = 0; 633 req->locked = 0;
653 if (!err && req->interrupted) 634 if (!err && req->interrupted)
654 err = -ENOENT; 635 err = -ENOENT;
@@ -663,12 +644,12 @@ static ssize_t fuse_dev_readv(struct file *file, const struct iovec *iov,
663 else { 644 else {
664 req->state = FUSE_REQ_SENT; 645 req->state = FUSE_REQ_SENT;
665 list_move_tail(&req->list, &fc->processing); 646 list_move_tail(&req->list, &fc->processing);
666 spin_unlock(&fuse_lock); 647 spin_unlock(&fc->lock);
667 } 648 }
668 return reqsize; 649 return reqsize;
669 650
670 err_unlock: 651 err_unlock:
671 spin_unlock(&fuse_lock); 652 spin_unlock(&fc->lock);
672 return err; 653 return err;
673} 654}
674 655
@@ -735,9 +716,9 @@ static ssize_t fuse_dev_writev(struct file *file, const struct iovec *iov,
735 struct fuse_copy_state cs; 716 struct fuse_copy_state cs;
736 struct fuse_conn *fc = fuse_get_conn(file); 717 struct fuse_conn *fc = fuse_get_conn(file);
737 if (!fc) 718 if (!fc)
738 return -ENODEV; 719 return -EPERM;
739 720
740 fuse_copy_init(&cs, 0, NULL, iov, nr_segs); 721 fuse_copy_init(&cs, fc, 0, NULL, iov, nr_segs);
741 if (nbytes < sizeof(struct fuse_out_header)) 722 if (nbytes < sizeof(struct fuse_out_header))
742 return -EINVAL; 723 return -EINVAL;
743 724
@@ -749,7 +730,7 @@ static ssize_t fuse_dev_writev(struct file *file, const struct iovec *iov,
749 oh.len != nbytes) 730 oh.len != nbytes)
750 goto err_finish; 731 goto err_finish;
751 732
752 spin_lock(&fuse_lock); 733 spin_lock(&fc->lock);
753 err = -ENOENT; 734 err = -ENOENT;
754 if (!fc->connected) 735 if (!fc->connected)
755 goto err_unlock; 736 goto err_unlock;
@@ -760,9 +741,9 @@ static ssize_t fuse_dev_writev(struct file *file, const struct iovec *iov,
760 goto err_unlock; 741 goto err_unlock;
761 742
762 if (req->interrupted) { 743 if (req->interrupted) {
763 spin_unlock(&fuse_lock); 744 spin_unlock(&fc->lock);
764 fuse_copy_finish(&cs); 745 fuse_copy_finish(&cs);
765 spin_lock(&fuse_lock); 746 spin_lock(&fc->lock);
766 request_end(fc, req); 747 request_end(fc, req);
767 return -ENOENT; 748 return -ENOENT;
768 } 749 }
@@ -770,12 +751,12 @@ static ssize_t fuse_dev_writev(struct file *file, const struct iovec *iov,
770 req->out.h = oh; 751 req->out.h = oh;
771 req->locked = 1; 752 req->locked = 1;
772 cs.req = req; 753 cs.req = req;
773 spin_unlock(&fuse_lock); 754 spin_unlock(&fc->lock);
774 755
775 err = copy_out_args(&cs, &req->out, nbytes); 756 err = copy_out_args(&cs, &req->out, nbytes);
776 fuse_copy_finish(&cs); 757 fuse_copy_finish(&cs);
777 758
778 spin_lock(&fuse_lock); 759 spin_lock(&fc->lock);
779 req->locked = 0; 760 req->locked = 0;
780 if (!err) { 761 if (!err) {
781 if (req->interrupted) 762 if (req->interrupted)
@@ -787,7 +768,7 @@ static ssize_t fuse_dev_writev(struct file *file, const struct iovec *iov,
787 return err ? err : nbytes; 768 return err ? err : nbytes;
788 769
789 err_unlock: 770 err_unlock:
790 spin_unlock(&fuse_lock); 771 spin_unlock(&fc->lock);
791 err_finish: 772 err_finish:
792 fuse_copy_finish(&cs); 773 fuse_copy_finish(&cs);
793 return err; 774 return err;
@@ -804,18 +785,19 @@ static ssize_t fuse_dev_write(struct file *file, const char __user *buf,
804 785
805static unsigned fuse_dev_poll(struct file *file, poll_table *wait) 786static unsigned fuse_dev_poll(struct file *file, poll_table *wait)
806{ 787{
807 struct fuse_conn *fc = fuse_get_conn(file);
808 unsigned mask = POLLOUT | POLLWRNORM; 788 unsigned mask = POLLOUT | POLLWRNORM;
809 789 struct fuse_conn *fc = fuse_get_conn(file);
810 if (!fc) 790 if (!fc)
811 return -ENODEV; 791 return POLLERR;
812 792
813 poll_wait(file, &fc->waitq, wait); 793 poll_wait(file, &fc->waitq, wait);
814 794
815 spin_lock(&fuse_lock); 795 spin_lock(&fc->lock);
816 if (!list_empty(&fc->pending)) 796 if (!fc->connected)
817 mask |= POLLIN | POLLRDNORM; 797 mask = POLLERR;
818 spin_unlock(&fuse_lock); 798 else if (!list_empty(&fc->pending))
799 mask |= POLLIN | POLLRDNORM;
800 spin_unlock(&fc->lock);
819 801
820 return mask; 802 return mask;
821} 803}
@@ -823,7 +805,7 @@ static unsigned fuse_dev_poll(struct file *file, poll_table *wait)
823/* 805/*
824 * Abort all requests on the given list (pending or processing) 806 * Abort all requests on the given list (pending or processing)
825 * 807 *
826 * This function releases and reacquires fuse_lock 808 * This function releases and reacquires fc->lock
827 */ 809 */
828static void end_requests(struct fuse_conn *fc, struct list_head *head) 810static void end_requests(struct fuse_conn *fc, struct list_head *head)
829{ 811{
@@ -832,7 +814,7 @@ static void end_requests(struct fuse_conn *fc, struct list_head *head)
832 req = list_entry(head->next, struct fuse_req, list); 814 req = list_entry(head->next, struct fuse_req, list);
833 req->out.h.error = -ECONNABORTED; 815 req->out.h.error = -ECONNABORTED;
834 request_end(fc, req); 816 request_end(fc, req);
835 spin_lock(&fuse_lock); 817 spin_lock(&fc->lock);
836 } 818 }
837} 819}
838 820
@@ -863,10 +845,10 @@ static void end_io_requests(struct fuse_conn *fc)
863 req->end = NULL; 845 req->end = NULL;
864 /* The end function will consume this reference */ 846 /* The end function will consume this reference */
865 __fuse_get_request(req); 847 __fuse_get_request(req);
866 spin_unlock(&fuse_lock); 848 spin_unlock(&fc->lock);
867 wait_event(req->waitq, !req->locked); 849 wait_event(req->waitq, !req->locked);
868 end(fc, req); 850 end(fc, req);
869 spin_lock(&fuse_lock); 851 spin_lock(&fc->lock);
870 } 852 }
871 } 853 }
872} 854}
@@ -893,35 +875,44 @@ static void end_io_requests(struct fuse_conn *fc)
893 */ 875 */
894void fuse_abort_conn(struct fuse_conn *fc) 876void fuse_abort_conn(struct fuse_conn *fc)
895{ 877{
896 spin_lock(&fuse_lock); 878 spin_lock(&fc->lock);
897 if (fc->connected) { 879 if (fc->connected) {
898 fc->connected = 0; 880 fc->connected = 0;
899 end_io_requests(fc); 881 end_io_requests(fc);
900 end_requests(fc, &fc->pending); 882 end_requests(fc, &fc->pending);
901 end_requests(fc, &fc->processing); 883 end_requests(fc, &fc->processing);
902 wake_up_all(&fc->waitq); 884 wake_up_all(&fc->waitq);
885 kill_fasync(&fc->fasync, SIGIO, POLL_IN);
903 } 886 }
904 spin_unlock(&fuse_lock); 887 spin_unlock(&fc->lock);
905} 888}
906 889
907static int fuse_dev_release(struct inode *inode, struct file *file) 890static int fuse_dev_release(struct inode *inode, struct file *file)
908{ 891{
909 struct fuse_conn *fc; 892 struct fuse_conn *fc = fuse_get_conn(file);
910
911 spin_lock(&fuse_lock);
912 fc = file->private_data;
913 if (fc) { 893 if (fc) {
894 spin_lock(&fc->lock);
914 fc->connected = 0; 895 fc->connected = 0;
915 end_requests(fc, &fc->pending); 896 end_requests(fc, &fc->pending);
916 end_requests(fc, &fc->processing); 897 end_requests(fc, &fc->processing);
917 } 898 spin_unlock(&fc->lock);
918 spin_unlock(&fuse_lock); 899 fasync_helper(-1, file, 0, &fc->fasync);
919 if (fc)
920 kobject_put(&fc->kobj); 900 kobject_put(&fc->kobj);
901 }
921 902
922 return 0; 903 return 0;
923} 904}
924 905
906static int fuse_dev_fasync(int fd, struct file *file, int on)
907{
908 struct fuse_conn *fc = fuse_get_conn(file);
909 if (!fc)
910 return -EPERM;
911
912 /* No locking - fasync_helper does its own locking */
913 return fasync_helper(fd, file, on, &fc->fasync);
914}
915
925const struct file_operations fuse_dev_operations = { 916const struct file_operations fuse_dev_operations = {
926 .owner = THIS_MODULE, 917 .owner = THIS_MODULE,
927 .llseek = no_llseek, 918 .llseek = no_llseek,
@@ -931,6 +922,7 @@ const struct file_operations fuse_dev_operations = {
931 .writev = fuse_dev_writev, 922 .writev = fuse_dev_writev,
932 .poll = fuse_dev_poll, 923 .poll = fuse_dev_poll,
933 .release = fuse_dev_release, 924 .release = fuse_dev_release,
925 .fasync = fuse_dev_fasync,
934}; 926};
935 927
936static struct miscdevice fuse_miscdevice = { 928static struct miscdevice fuse_miscdevice = {
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index 256355b80256..8d7546e832e8 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -117,8 +117,8 @@ static int fuse_dentry_revalidate(struct dentry *entry, struct nameidata *nd)
117 return 0; 117 return 0;
118 118
119 fc = get_fuse_conn(inode); 119 fc = get_fuse_conn(inode);
120 req = fuse_get_request(fc); 120 req = fuse_get_req(fc);
121 if (!req) 121 if (IS_ERR(req))
122 return 0; 122 return 0;
123 123
124 fuse_lookup_init(req, entry->d_parent->d_inode, entry, &outarg); 124 fuse_lookup_init(req, entry->d_parent->d_inode, entry, &outarg);
@@ -188,9 +188,9 @@ static struct dentry *fuse_lookup(struct inode *dir, struct dentry *entry,
188 if (entry->d_name.len > FUSE_NAME_MAX) 188 if (entry->d_name.len > FUSE_NAME_MAX)
189 return ERR_PTR(-ENAMETOOLONG); 189 return ERR_PTR(-ENAMETOOLONG);
190 190
191 req = fuse_get_request(fc); 191 req = fuse_get_req(fc);
192 if (!req) 192 if (IS_ERR(req))
193 return ERR_PTR(-EINTR); 193 return ERR_PTR(PTR_ERR(req));
194 194
195 fuse_lookup_init(req, dir, entry, &outarg); 195 fuse_lookup_init(req, dir, entry, &outarg);
196 request_send(fc, req); 196 request_send(fc, req);
@@ -244,15 +244,14 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry, int mode,
244 struct file *file; 244 struct file *file;
245 int flags = nd->intent.open.flags - 1; 245 int flags = nd->intent.open.flags - 1;
246 246
247 err = -ENOSYS;
248 if (fc->no_create) 247 if (fc->no_create)
249 goto out; 248 return -ENOSYS;
250 249
251 err = -EINTR; 250 req = fuse_get_req(fc);
252 req = fuse_get_request(fc); 251 if (IS_ERR(req))
253 if (!req) 252 return PTR_ERR(req);
254 goto out;
255 253
254 err = -ENOMEM;
256 ff = fuse_file_alloc(); 255 ff = fuse_file_alloc();
257 if (!ff) 256 if (!ff)
258 goto out_put_request; 257 goto out_put_request;
@@ -314,7 +313,6 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry, int mode,
314 fuse_file_free(ff); 313 fuse_file_free(ff);
315 out_put_request: 314 out_put_request:
316 fuse_put_request(fc, req); 315 fuse_put_request(fc, req);
317 out:
318 return err; 316 return err;
319} 317}
320 318
@@ -375,9 +373,9 @@ static int fuse_mknod(struct inode *dir, struct dentry *entry, int mode,
375{ 373{
376 struct fuse_mknod_in inarg; 374 struct fuse_mknod_in inarg;
377 struct fuse_conn *fc = get_fuse_conn(dir); 375 struct fuse_conn *fc = get_fuse_conn(dir);
378 struct fuse_req *req = fuse_get_request(fc); 376 struct fuse_req *req = fuse_get_req(fc);
379 if (!req) 377 if (IS_ERR(req))
380 return -EINTR; 378 return PTR_ERR(req);
381 379
382 memset(&inarg, 0, sizeof(inarg)); 380 memset(&inarg, 0, sizeof(inarg));
383 inarg.mode = mode; 381 inarg.mode = mode;
@@ -407,9 +405,9 @@ static int fuse_mkdir(struct inode *dir, struct dentry *entry, int mode)
407{ 405{
408 struct fuse_mkdir_in inarg; 406 struct fuse_mkdir_in inarg;
409 struct fuse_conn *fc = get_fuse_conn(dir); 407 struct fuse_conn *fc = get_fuse_conn(dir);
410 struct fuse_req *req = fuse_get_request(fc); 408 struct fuse_req *req = fuse_get_req(fc);
411 if (!req) 409 if (IS_ERR(req))
412 return -EINTR; 410 return PTR_ERR(req);
413 411
414 memset(&inarg, 0, sizeof(inarg)); 412 memset(&inarg, 0, sizeof(inarg));
415 inarg.mode = mode; 413 inarg.mode = mode;
@@ -427,9 +425,9 @@ static int fuse_symlink(struct inode *dir, struct dentry *entry,
427{ 425{
428 struct fuse_conn *fc = get_fuse_conn(dir); 426 struct fuse_conn *fc = get_fuse_conn(dir);
429 unsigned len = strlen(link) + 1; 427 unsigned len = strlen(link) + 1;
430 struct fuse_req *req = fuse_get_request(fc); 428 struct fuse_req *req = fuse_get_req(fc);
431 if (!req) 429 if (IS_ERR(req))
432 return -EINTR; 430 return PTR_ERR(req);
433 431
434 req->in.h.opcode = FUSE_SYMLINK; 432 req->in.h.opcode = FUSE_SYMLINK;
435 req->in.numargs = 2; 433 req->in.numargs = 2;
@@ -444,9 +442,9 @@ static int fuse_unlink(struct inode *dir, struct dentry *entry)
444{ 442{
445 int err; 443 int err;
446 struct fuse_conn *fc = get_fuse_conn(dir); 444 struct fuse_conn *fc = get_fuse_conn(dir);
447 struct fuse_req *req = fuse_get_request(fc); 445 struct fuse_req *req = fuse_get_req(fc);
448 if (!req) 446 if (IS_ERR(req))
449 return -EINTR; 447 return PTR_ERR(req);
450 448
451 req->in.h.opcode = FUSE_UNLINK; 449 req->in.h.opcode = FUSE_UNLINK;
452 req->in.h.nodeid = get_node_id(dir); 450 req->in.h.nodeid = get_node_id(dir);
@@ -476,9 +474,9 @@ static int fuse_rmdir(struct inode *dir, struct dentry *entry)
476{ 474{
477 int err; 475 int err;
478 struct fuse_conn *fc = get_fuse_conn(dir); 476 struct fuse_conn *fc = get_fuse_conn(dir);
479 struct fuse_req *req = fuse_get_request(fc); 477 struct fuse_req *req = fuse_get_req(fc);
480 if (!req) 478 if (IS_ERR(req))
481 return -EINTR; 479 return PTR_ERR(req);
482 480
483 req->in.h.opcode = FUSE_RMDIR; 481 req->in.h.opcode = FUSE_RMDIR;
484 req->in.h.nodeid = get_node_id(dir); 482 req->in.h.nodeid = get_node_id(dir);
@@ -504,9 +502,9 @@ static int fuse_rename(struct inode *olddir, struct dentry *oldent,
504 int err; 502 int err;
505 struct fuse_rename_in inarg; 503 struct fuse_rename_in inarg;
506 struct fuse_conn *fc = get_fuse_conn(olddir); 504 struct fuse_conn *fc = get_fuse_conn(olddir);
507 struct fuse_req *req = fuse_get_request(fc); 505 struct fuse_req *req = fuse_get_req(fc);
508 if (!req) 506 if (IS_ERR(req))
509 return -EINTR; 507 return PTR_ERR(req);
510 508
511 memset(&inarg, 0, sizeof(inarg)); 509 memset(&inarg, 0, sizeof(inarg));
512 inarg.newdir = get_node_id(newdir); 510 inarg.newdir = get_node_id(newdir);
@@ -553,9 +551,9 @@ static int fuse_link(struct dentry *entry, struct inode *newdir,
553 struct fuse_link_in inarg; 551 struct fuse_link_in inarg;
554 struct inode *inode = entry->d_inode; 552 struct inode *inode = entry->d_inode;
555 struct fuse_conn *fc = get_fuse_conn(inode); 553 struct fuse_conn *fc = get_fuse_conn(inode);
556 struct fuse_req *req = fuse_get_request(fc); 554 struct fuse_req *req = fuse_get_req(fc);
557 if (!req) 555 if (IS_ERR(req))
558 return -EINTR; 556 return PTR_ERR(req);
559 557
560 memset(&inarg, 0, sizeof(inarg)); 558 memset(&inarg, 0, sizeof(inarg));
561 inarg.oldnodeid = get_node_id(inode); 559 inarg.oldnodeid = get_node_id(inode);
@@ -583,9 +581,9 @@ int fuse_do_getattr(struct inode *inode)
583 int err; 581 int err;
584 struct fuse_attr_out arg; 582 struct fuse_attr_out arg;
585 struct fuse_conn *fc = get_fuse_conn(inode); 583 struct fuse_conn *fc = get_fuse_conn(inode);
586 struct fuse_req *req = fuse_get_request(fc); 584 struct fuse_req *req = fuse_get_req(fc);
587 if (!req) 585 if (IS_ERR(req))
588 return -EINTR; 586 return PTR_ERR(req);
589 587
590 req->in.h.opcode = FUSE_GETATTR; 588 req->in.h.opcode = FUSE_GETATTR;
591 req->in.h.nodeid = get_node_id(inode); 589 req->in.h.nodeid = get_node_id(inode);
@@ -673,9 +671,9 @@ static int fuse_access(struct inode *inode, int mask)
673 if (fc->no_access) 671 if (fc->no_access)
674 return 0; 672 return 0;
675 673
676 req = fuse_get_request(fc); 674 req = fuse_get_req(fc);
677 if (!req) 675 if (IS_ERR(req))
678 return -EINTR; 676 return PTR_ERR(req);
679 677
680 memset(&inarg, 0, sizeof(inarg)); 678 memset(&inarg, 0, sizeof(inarg));
681 inarg.mask = mask; 679 inarg.mask = mask;
@@ -780,9 +778,9 @@ static int fuse_readdir(struct file *file, void *dstbuf, filldir_t filldir)
780 if (is_bad_inode(inode)) 778 if (is_bad_inode(inode))
781 return -EIO; 779 return -EIO;
782 780
783 req = fuse_get_request(fc); 781 req = fuse_get_req(fc);
784 if (!req) 782 if (IS_ERR(req))
785 return -EINTR; 783 return PTR_ERR(req);
786 784
787 page = alloc_page(GFP_KERNEL); 785 page = alloc_page(GFP_KERNEL);
788 if (!page) { 786 if (!page) {
@@ -809,11 +807,11 @@ static char *read_link(struct dentry *dentry)
809{ 807{
810 struct inode *inode = dentry->d_inode; 808 struct inode *inode = dentry->d_inode;
811 struct fuse_conn *fc = get_fuse_conn(inode); 809 struct fuse_conn *fc = get_fuse_conn(inode);
812 struct fuse_req *req = fuse_get_request(fc); 810 struct fuse_req *req = fuse_get_req(fc);
813 char *link; 811 char *link;
814 812
815 if (!req) 813 if (IS_ERR(req))
816 return ERR_PTR(-EINTR); 814 return ERR_PTR(PTR_ERR(req));
817 815
818 link = (char *) __get_free_page(GFP_KERNEL); 816 link = (char *) __get_free_page(GFP_KERNEL);
819 if (!link) { 817 if (!link) {
@@ -933,9 +931,9 @@ static int fuse_setattr(struct dentry *entry, struct iattr *attr)
933 } 931 }
934 } 932 }
935 933
936 req = fuse_get_request(fc); 934 req = fuse_get_req(fc);
937 if (!req) 935 if (IS_ERR(req))
938 return -EINTR; 936 return PTR_ERR(req);
939 937
940 memset(&inarg, 0, sizeof(inarg)); 938 memset(&inarg, 0, sizeof(inarg));
941 iattr_to_fattr(attr, &inarg); 939 iattr_to_fattr(attr, &inarg);
@@ -995,9 +993,9 @@ static int fuse_setxattr(struct dentry *entry, const char *name,
995 if (fc->no_setxattr) 993 if (fc->no_setxattr)
996 return -EOPNOTSUPP; 994 return -EOPNOTSUPP;
997 995
998 req = fuse_get_request(fc); 996 req = fuse_get_req(fc);
999 if (!req) 997 if (IS_ERR(req))
1000 return -EINTR; 998 return PTR_ERR(req);
1001 999
1002 memset(&inarg, 0, sizeof(inarg)); 1000 memset(&inarg, 0, sizeof(inarg));
1003 inarg.size = size; 1001 inarg.size = size;
@@ -1035,9 +1033,9 @@ static ssize_t fuse_getxattr(struct dentry *entry, const char *name,
1035 if (fc->no_getxattr) 1033 if (fc->no_getxattr)
1036 return -EOPNOTSUPP; 1034 return -EOPNOTSUPP;
1037 1035
1038 req = fuse_get_request(fc); 1036 req = fuse_get_req(fc);
1039 if (!req) 1037 if (IS_ERR(req))
1040 return -EINTR; 1038 return PTR_ERR(req);
1041 1039
1042 memset(&inarg, 0, sizeof(inarg)); 1040 memset(&inarg, 0, sizeof(inarg));
1043 inarg.size = size; 1041 inarg.size = size;
@@ -1085,9 +1083,9 @@ static ssize_t fuse_listxattr(struct dentry *entry, char *list, size_t size)
1085 if (fc->no_listxattr) 1083 if (fc->no_listxattr)
1086 return -EOPNOTSUPP; 1084 return -EOPNOTSUPP;
1087 1085
1088 req = fuse_get_request(fc); 1086 req = fuse_get_req(fc);
1089 if (!req) 1087 if (IS_ERR(req))
1090 return -EINTR; 1088 return PTR_ERR(req);
1091 1089
1092 memset(&inarg, 0, sizeof(inarg)); 1090 memset(&inarg, 0, sizeof(inarg));
1093 inarg.size = size; 1091 inarg.size = size;
@@ -1131,9 +1129,9 @@ static int fuse_removexattr(struct dentry *entry, const char *name)
1131 if (fc->no_removexattr) 1129 if (fc->no_removexattr)
1132 return -EOPNOTSUPP; 1130 return -EOPNOTSUPP;
1133 1131
1134 req = fuse_get_request(fc); 1132 req = fuse_get_req(fc);
1135 if (!req) 1133 if (IS_ERR(req))
1136 return -EINTR; 1134 return PTR_ERR(req);
1137 1135
1138 req->in.h.opcode = FUSE_REMOVEXATTR; 1136 req->in.h.opcode = FUSE_REMOVEXATTR;
1139 req->in.h.nodeid = get_node_id(inode); 1137 req->in.h.nodeid = get_node_id(inode);
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 975f2697e866..fc342cf7c2cc 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -1,6 +1,6 @@
1/* 1/*
2 FUSE: Filesystem in Userspace 2 FUSE: Filesystem in Userspace
3 Copyright (C) 2001-2005 Miklos Szeredi <miklos@szeredi.hu> 3 Copyright (C) 2001-2006 Miklos Szeredi <miklos@szeredi.hu>
4 4
5 This program can be distributed under the terms of the GNU GPL. 5 This program can be distributed under the terms of the GNU GPL.
6 See the file COPYING. 6 See the file COPYING.
@@ -22,9 +22,9 @@ static int fuse_send_open(struct inode *inode, struct file *file, int isdir,
22 struct fuse_req *req; 22 struct fuse_req *req;
23 int err; 23 int err;
24 24
25 req = fuse_get_request(fc); 25 req = fuse_get_req(fc);
26 if (!req) 26 if (IS_ERR(req))
27 return -EINTR; 27 return PTR_ERR(req);
28 28
29 memset(&inarg, 0, sizeof(inarg)); 29 memset(&inarg, 0, sizeof(inarg));
30 inarg.flags = file->f_flags & ~(O_CREAT | O_EXCL | O_NOCTTY | O_TRUNC); 30 inarg.flags = file->f_flags & ~(O_CREAT | O_EXCL | O_NOCTTY | O_TRUNC);
@@ -184,9 +184,9 @@ static int fuse_flush(struct file *file)
184 if (fc->no_flush) 184 if (fc->no_flush)
185 return 0; 185 return 0;
186 186
187 req = fuse_get_request(fc); 187 req = fuse_get_req(fc);
188 if (!req) 188 if (IS_ERR(req))
189 return -EINTR; 189 return PTR_ERR(req);
190 190
191 memset(&inarg, 0, sizeof(inarg)); 191 memset(&inarg, 0, sizeof(inarg));
192 inarg.fh = ff->fh; 192 inarg.fh = ff->fh;
@@ -223,9 +223,9 @@ int fuse_fsync_common(struct file *file, struct dentry *de, int datasync,
223 if ((!isdir && fc->no_fsync) || (isdir && fc->no_fsyncdir)) 223 if ((!isdir && fc->no_fsync) || (isdir && fc->no_fsyncdir))
224 return 0; 224 return 0;
225 225
226 req = fuse_get_request(fc); 226 req = fuse_get_req(fc);
227 if (!req) 227 if (IS_ERR(req))
228 return -EINTR; 228 return PTR_ERR(req);
229 229
230 memset(&inarg, 0, sizeof(inarg)); 230 memset(&inarg, 0, sizeof(inarg));
231 inarg.fh = ff->fh; 231 inarg.fh = ff->fh;
@@ -297,9 +297,9 @@ static int fuse_readpage(struct file *file, struct page *page)
297 if (is_bad_inode(inode)) 297 if (is_bad_inode(inode))
298 goto out; 298 goto out;
299 299
300 err = -EINTR; 300 req = fuse_get_req(fc);
301 req = fuse_get_request(fc); 301 err = PTR_ERR(req);
302 if (!req) 302 if (IS_ERR(req))
303 goto out; 303 goto out;
304 304
305 req->out.page_zeroing = 1; 305 req->out.page_zeroing = 1;
@@ -368,10 +368,10 @@ static int fuse_readpages_fill(void *_data, struct page *page)
368 (req->num_pages + 1) * PAGE_CACHE_SIZE > fc->max_read || 368 (req->num_pages + 1) * PAGE_CACHE_SIZE > fc->max_read ||
369 req->pages[req->num_pages - 1]->index + 1 != page->index)) { 369 req->pages[req->num_pages - 1]->index + 1 != page->index)) {
370 fuse_send_readpages(req, data->file, inode); 370 fuse_send_readpages(req, data->file, inode);
371 data->req = req = fuse_get_request(fc); 371 data->req = req = fuse_get_req(fc);
372 if (!req) { 372 if (IS_ERR(req)) {
373 unlock_page(page); 373 unlock_page(page);
374 return -EINTR; 374 return PTR_ERR(req);
375 } 375 }
376 } 376 }
377 req->pages[req->num_pages] = page; 377 req->pages[req->num_pages] = page;
@@ -392,13 +392,17 @@ static int fuse_readpages(struct file *file, struct address_space *mapping,
392 392
393 data.file = file; 393 data.file = file;
394 data.inode = inode; 394 data.inode = inode;
395 data.req = fuse_get_request(fc); 395 data.req = fuse_get_req(fc);
396 if (!data.req) 396 if (IS_ERR(data.req))
397 return -EINTR; 397 return PTR_ERR(data.req);
398 398
399 err = read_cache_pages(mapping, pages, fuse_readpages_fill, &data); 399 err = read_cache_pages(mapping, pages, fuse_readpages_fill, &data);
400 if (!err) 400 if (!err) {
401 fuse_send_readpages(data.req, file, inode); 401 if (data.req->num_pages)
402 fuse_send_readpages(data.req, file, inode);
403 else
404 fuse_put_request(fc, data.req);
405 }
402 return err; 406 return err;
403} 407}
404 408
@@ -451,9 +455,9 @@ static int fuse_commit_write(struct file *file, struct page *page,
451 if (is_bad_inode(inode)) 455 if (is_bad_inode(inode))
452 return -EIO; 456 return -EIO;
453 457
454 req = fuse_get_request(fc); 458 req = fuse_get_req(fc);
455 if (!req) 459 if (IS_ERR(req))
456 return -EINTR; 460 return PTR_ERR(req);
457 461
458 req->num_pages = 1; 462 req->num_pages = 1;
459 req->pages[0] = page; 463 req->pages[0] = page;
@@ -528,9 +532,9 @@ static ssize_t fuse_direct_io(struct file *file, const char __user *buf,
528 if (is_bad_inode(inode)) 532 if (is_bad_inode(inode))
529 return -EIO; 533 return -EIO;
530 534
531 req = fuse_get_request(fc); 535 req = fuse_get_req(fc);
532 if (!req) 536 if (IS_ERR(req))
533 return -EINTR; 537 return PTR_ERR(req);
534 538
535 while (count) { 539 while (count) {
536 size_t nres; 540 size_t nres;
@@ -561,8 +565,12 @@ static ssize_t fuse_direct_io(struct file *file, const char __user *buf,
561 buf += nres; 565 buf += nres;
562 if (nres != nbytes) 566 if (nres != nbytes)
563 break; 567 break;
564 if (count) 568 if (count) {
565 fuse_reset_request(req); 569 fuse_put_request(fc, req);
570 req = fuse_get_req(fc);
571 if (IS_ERR(req))
572 break;
573 }
566 } 574 }
567 fuse_put_request(fc, req); 575 fuse_put_request(fc, req);
568 if (res > 0) { 576 if (res > 0) {
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index a16a04fcf41e..59661c481d9d 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -1,6 +1,6 @@
1/* 1/*
2 FUSE: Filesystem in Userspace 2 FUSE: Filesystem in Userspace
3 Copyright (C) 2001-2005 Miklos Szeredi <miklos@szeredi.hu> 3 Copyright (C) 2001-2006 Miklos Szeredi <miklos@szeredi.hu>
4 4
5 This program can be distributed under the terms of the GNU GPL. 5 This program can be distributed under the terms of the GNU GPL.
6 See the file COPYING. 6 See the file COPYING.
@@ -18,8 +18,8 @@
18/** Max number of pages that can be used in a single read request */ 18/** Max number of pages that can be used in a single read request */
19#define FUSE_MAX_PAGES_PER_REQ 32 19#define FUSE_MAX_PAGES_PER_REQ 32
20 20
21/** If more requests are outstanding, then the operation will block */ 21/** Maximum number of outstanding background requests */
22#define FUSE_MAX_OUTSTANDING 10 22#define FUSE_MAX_BACKGROUND 10
23 23
24/** It could be as large as PATH_MAX, but would that have any uses? */ 24/** It could be as large as PATH_MAX, but would that have any uses? */
25#define FUSE_NAME_MAX 1024 25#define FUSE_NAME_MAX 1024
@@ -131,8 +131,8 @@ struct fuse_conn;
131 * A request to the client 131 * A request to the client
132 */ 132 */
133struct fuse_req { 133struct fuse_req {
134 /** This can be on either unused_list, pending processing or 134 /** This can be on either pending processing or io lists in
135 io lists in fuse_conn */ 135 fuse_conn */
136 struct list_head list; 136 struct list_head list;
137 137
138 /** Entry on the background list */ 138 /** Entry on the background list */
@@ -144,15 +144,12 @@ struct fuse_req {
144 /* 144 /*
145 * The following bitfields are either set once before the 145 * The following bitfields are either set once before the
146 * request is queued or setting/clearing them is protected by 146 * request is queued or setting/clearing them is protected by
147 * fuse_lock 147 * fuse_conn->lock
148 */ 148 */
149 149
150 /** True if the request has reply */ 150 /** True if the request has reply */
151 unsigned isreply:1; 151 unsigned isreply:1;
152 152
153 /** The request is preallocated */
154 unsigned preallocated:1;
155
156 /** The request was interrupted */ 153 /** The request was interrupted */
157 unsigned interrupted:1; 154 unsigned interrupted:1;
158 155
@@ -162,6 +159,9 @@ struct fuse_req {
162 /** Data is being copied to/from the request */ 159 /** Data is being copied to/from the request */
163 unsigned locked:1; 160 unsigned locked:1;
164 161
162 /** Request is counted as "waiting" */
163 unsigned waiting:1;
164
165 /** State of the request */ 165 /** State of the request */
166 enum fuse_req_state state; 166 enum fuse_req_state state;
167 167
@@ -213,6 +213,9 @@ struct fuse_req {
213 * unmounted. 213 * unmounted.
214 */ 214 */
215struct fuse_conn { 215struct fuse_conn {
216 /** Lock protecting accessess to members of this structure */
217 spinlock_t lock;
218
216 /** The user id for this mount */ 219 /** The user id for this mount */
217 uid_t user_id; 220 uid_t user_id;
218 221
@@ -244,25 +247,20 @@ struct fuse_conn {
244 interrupted request) */ 247 interrupted request) */
245 struct list_head background; 248 struct list_head background;
246 249
247 /** Controls the maximum number of outstanding requests */ 250 /** Number of requests currently in the background */
248 struct semaphore outstanding_sem; 251 unsigned num_background;
249 252
250 /** This counts the number of outstanding requests if 253 /** Flag indicating if connection is blocked. This will be
251 outstanding_sem would go negative */ 254 the case before the INIT reply is received, and if there
252 unsigned outstanding_debt; 255 are too many outstading backgrounds requests */
256 int blocked;
253 257
254 /** RW semaphore for exclusion with fuse_put_super() */ 258 /** waitq for blocked connection */
255 struct rw_semaphore sbput_sem; 259 wait_queue_head_t blocked_waitq;
256
257 /** The list of unused requests */
258 struct list_head unused_list;
259 260
260 /** The next unique request id */ 261 /** The next unique request id */
261 u64 reqctr; 262 u64 reqctr;
262 263
263 /** Mount is active */
264 unsigned mounted;
265
266 /** Connection established, cleared on umount, connection 264 /** Connection established, cleared on umount, connection
267 abort and device release */ 265 abort and device release */
268 unsigned connected; 266 unsigned connected;
@@ -318,6 +316,9 @@ struct fuse_conn {
318 316
319 /** kobject */ 317 /** kobject */
320 struct kobject kobj; 318 struct kobject kobj;
319
320 /** O_ASYNC requests */
321 struct fasync_struct *fasync;
321}; 322};
322 323
323static inline struct fuse_conn *get_fuse_conn_super(struct super_block *sb) 324static inline struct fuse_conn *get_fuse_conn_super(struct super_block *sb)
@@ -349,21 +350,6 @@ static inline u64 get_node_id(struct inode *inode)
349extern const struct file_operations fuse_dev_operations; 350extern const struct file_operations fuse_dev_operations;
350 351
351/** 352/**
352 * This is the single global spinlock which protects FUSE's structures
353 *
354 * The following data is protected by this lock:
355 *
356 * - the private_data field of the device file
357 * - the s_fs_info field of the super block
358 * - unused_list, pending, processing lists in fuse_conn
359 * - background list in fuse_conn
360 * - the unique request ID counter reqctr in fuse_conn
361 * - the sb (super_block) field in fuse_conn
362 * - the file (device file) field in fuse_conn
363 */
364extern spinlock_t fuse_lock;
365
366/**
367 * Get a filled in inode 353 * Get a filled in inode
368 */ 354 */
369struct inode *fuse_iget(struct super_block *sb, unsigned long nodeid, 355struct inode *fuse_iget(struct super_block *sb, unsigned long nodeid,
@@ -461,11 +447,11 @@ void fuse_reset_request(struct fuse_req *req);
461/** 447/**
462 * Reserve a preallocated request 448 * Reserve a preallocated request
463 */ 449 */
464struct fuse_req *fuse_get_request(struct fuse_conn *fc); 450struct fuse_req *fuse_get_req(struct fuse_conn *fc);
465 451
466/** 452/**
467 * Decrement reference count of a request. If count goes to zero put 453 * Decrement reference count of a request. If count goes to zero free
468 * on unused list (preallocated) or free request (not preallocated). 454 * the request.
469 */ 455 */
470void fuse_put_request(struct fuse_conn *fc, struct fuse_req *req); 456void fuse_put_request(struct fuse_conn *fc, struct fuse_req *req);
471 457
@@ -485,11 +471,11 @@ void request_send_noreply(struct fuse_conn *fc, struct fuse_req *req);
485void request_send_background(struct fuse_conn *fc, struct fuse_req *req); 471void request_send_background(struct fuse_conn *fc, struct fuse_req *req);
486 472
487/** 473/**
488 * Release inodes and file associated with background request 474 * Remove request from the the background list
489 */ 475 */
490void fuse_release_background(struct fuse_req *req); 476void fuse_remove_background(struct fuse_conn *fc, struct fuse_req *req);
491 477
492/* Abort all requests */ 478/** Abort all requests */
493void fuse_abort_conn(struct fuse_conn *fc); 479void fuse_abort_conn(struct fuse_conn *fc);
494 480
495/** 481/**
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index 879e6fba9480..43a6fc0db8a7 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -1,6 +1,6 @@
1/* 1/*
2 FUSE: Filesystem in Userspace 2 FUSE: Filesystem in Userspace
3 Copyright (C) 2001-2005 Miklos Szeredi <miklos@szeredi.hu> 3 Copyright (C) 2001-2006 Miklos Szeredi <miklos@szeredi.hu>
4 4
5 This program can be distributed under the terms of the GNU GPL. 5 This program can be distributed under the terms of the GNU GPL.
6 See the file COPYING. 6 See the file COPYING.
@@ -22,7 +22,6 @@ MODULE_AUTHOR("Miklos Szeredi <miklos@szeredi.hu>");
22MODULE_DESCRIPTION("Filesystem in Userspace"); 22MODULE_DESCRIPTION("Filesystem in Userspace");
23MODULE_LICENSE("GPL"); 23MODULE_LICENSE("GPL");
24 24
25spinlock_t fuse_lock;
26static kmem_cache_t *fuse_inode_cachep; 25static kmem_cache_t *fuse_inode_cachep;
27static struct subsystem connections_subsys; 26static struct subsystem connections_subsys;
28 27
@@ -205,17 +204,28 @@ static void fuse_put_super(struct super_block *sb)
205{ 204{
206 struct fuse_conn *fc = get_fuse_conn_super(sb); 205 struct fuse_conn *fc = get_fuse_conn_super(sb);
207 206
208 down_write(&fc->sbput_sem); 207 spin_lock(&fc->lock);
209 while (!list_empty(&fc->background))
210 fuse_release_background(list_entry(fc->background.next,
211 struct fuse_req, bg_entry));
212
213 spin_lock(&fuse_lock);
214 fc->mounted = 0;
215 fc->connected = 0; 208 fc->connected = 0;
216 spin_unlock(&fuse_lock); 209 while (!list_empty(&fc->background)) {
217 up_write(&fc->sbput_sem); 210 struct fuse_req *req = list_entry(fc->background.next,
211 struct fuse_req, bg_entry);
212 struct inode *inode = req->inode;
213 struct inode *inode2 = req->inode2;
214
215 /* File would hold a reference to vfsmount */
216 BUG_ON(req->file);
217 req->inode = NULL;
218 req->inode2 = NULL;
219 fuse_remove_background(fc, req);
220
221 spin_unlock(&fc->lock);
222 iput(inode);
223 iput(inode2);
224 spin_lock(&fc->lock);
225 }
226 spin_unlock(&fc->lock);
218 /* Flush all readers on this fs */ 227 /* Flush all readers on this fs */
228 kill_fasync(&fc->fasync, SIGIO, POLL_IN);
219 wake_up_all(&fc->waitq); 229 wake_up_all(&fc->waitq);
220 kobject_del(&fc->kobj); 230 kobject_del(&fc->kobj);
221 kobject_put(&fc->kobj); 231 kobject_put(&fc->kobj);
@@ -242,9 +252,9 @@ static int fuse_statfs(struct super_block *sb, struct kstatfs *buf)
242 struct fuse_statfs_out outarg; 252 struct fuse_statfs_out outarg;
243 int err; 253 int err;
244 254
245 req = fuse_get_request(fc); 255 req = fuse_get_req(fc);
246 if (!req) 256 if (IS_ERR(req))
247 return -EINTR; 257 return PTR_ERR(req);
248 258
249 memset(&outarg, 0, sizeof(outarg)); 259 memset(&outarg, 0, sizeof(outarg));
250 req->in.numargs = 0; 260 req->in.numargs = 0;
@@ -369,15 +379,7 @@ static int fuse_show_options(struct seq_file *m, struct vfsmount *mnt)
369 379
370static void fuse_conn_release(struct kobject *kobj) 380static void fuse_conn_release(struct kobject *kobj)
371{ 381{
372 struct fuse_conn *fc = get_fuse_conn_kobj(kobj); 382 kfree(get_fuse_conn_kobj(kobj));
373
374 while (!list_empty(&fc->unused_list)) {
375 struct fuse_req *req;
376 req = list_entry(fc->unused_list.next, struct fuse_req, list);
377 list_del(&req->list);
378 fuse_request_free(req);
379 }
380 kfree(fc);
381} 383}
382 384
383static struct fuse_conn *new_conn(void) 385static struct fuse_conn *new_conn(void)
@@ -386,64 +388,24 @@ static struct fuse_conn *new_conn(void)
386 388
387 fc = kzalloc(sizeof(*fc), GFP_KERNEL); 389 fc = kzalloc(sizeof(*fc), GFP_KERNEL);
388 if (fc) { 390 if (fc) {
389 int i; 391 spin_lock_init(&fc->lock);
390 init_waitqueue_head(&fc->waitq); 392 init_waitqueue_head(&fc->waitq);
393 init_waitqueue_head(&fc->blocked_waitq);
391 INIT_LIST_HEAD(&fc->pending); 394 INIT_LIST_HEAD(&fc->pending);
392 INIT_LIST_HEAD(&fc->processing); 395 INIT_LIST_HEAD(&fc->processing);
393 INIT_LIST_HEAD(&fc->io); 396 INIT_LIST_HEAD(&fc->io);
394 INIT_LIST_HEAD(&fc->unused_list);
395 INIT_LIST_HEAD(&fc->background); 397 INIT_LIST_HEAD(&fc->background);
396 sema_init(&fc->outstanding_sem, 1); /* One for INIT */
397 init_rwsem(&fc->sbput_sem);
398 kobj_set_kset_s(fc, connections_subsys); 398 kobj_set_kset_s(fc, connections_subsys);
399 kobject_init(&fc->kobj); 399 kobject_init(&fc->kobj);
400 atomic_set(&fc->num_waiting, 0); 400 atomic_set(&fc->num_waiting, 0);
401 for (i = 0; i < FUSE_MAX_OUTSTANDING; i++) {
402 struct fuse_req *req = fuse_request_alloc();
403 if (!req) {
404 kobject_put(&fc->kobj);
405 return NULL;
406 }
407 list_add(&req->list, &fc->unused_list);
408 }
409 fc->bdi.ra_pages = (VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE; 401 fc->bdi.ra_pages = (VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE;
410 fc->bdi.unplug_io_fn = default_unplug_io_fn; 402 fc->bdi.unplug_io_fn = default_unplug_io_fn;
411 fc->reqctr = 0; 403 fc->reqctr = 0;
404 fc->blocked = 1;
412 } 405 }
413 return fc; 406 return fc;
414} 407}
415 408
416static struct fuse_conn *get_conn(struct file *file, struct super_block *sb)
417{
418 struct fuse_conn *fc;
419 int err;
420
421 err = -EINVAL;
422 if (file->f_op != &fuse_dev_operations)
423 goto out_err;
424
425 err = -ENOMEM;
426 fc = new_conn();
427 if (!fc)
428 goto out_err;
429
430 spin_lock(&fuse_lock);
431 err = -EINVAL;
432 if (file->private_data)
433 goto out_unlock;
434
435 kobject_get(&fc->kobj);
436 file->private_data = fc;
437 spin_unlock(&fuse_lock);
438 return fc;
439
440 out_unlock:
441 spin_unlock(&fuse_lock);
442 kobject_put(&fc->kobj);
443 out_err:
444 return ERR_PTR(err);
445}
446
447static struct inode *get_root_inode(struct super_block *sb, unsigned mode) 409static struct inode *get_root_inode(struct super_block *sb, unsigned mode)
448{ 410{
449 struct fuse_attr attr; 411 struct fuse_attr attr;
@@ -467,7 +429,6 @@ static struct super_operations fuse_super_operations = {
467 429
468static void process_init_reply(struct fuse_conn *fc, struct fuse_req *req) 430static void process_init_reply(struct fuse_conn *fc, struct fuse_req *req)
469{ 431{
470 int i;
471 struct fuse_init_out *arg = &req->misc.init_out; 432 struct fuse_init_out *arg = &req->misc.init_out;
472 433
473 if (req->out.h.error || arg->major != FUSE_KERNEL_VERSION) 434 if (req->out.h.error || arg->major != FUSE_KERNEL_VERSION)
@@ -486,22 +447,13 @@ static void process_init_reply(struct fuse_conn *fc, struct fuse_req *req)
486 fc->minor = arg->minor; 447 fc->minor = arg->minor;
487 fc->max_write = arg->minor < 5 ? 4096 : arg->max_write; 448 fc->max_write = arg->minor < 5 ? 4096 : arg->max_write;
488 } 449 }
489
490 /* After INIT reply is received other requests can go
491 out. So do (FUSE_MAX_OUTSTANDING - 1) number of
492 up()s on outstanding_sem. The last up() is done in
493 fuse_putback_request() */
494 for (i = 1; i < FUSE_MAX_OUTSTANDING; i++)
495 up(&fc->outstanding_sem);
496
497 fuse_put_request(fc, req); 450 fuse_put_request(fc, req);
451 fc->blocked = 0;
452 wake_up_all(&fc->blocked_waitq);
498} 453}
499 454
500static void fuse_send_init(struct fuse_conn *fc) 455static void fuse_send_init(struct fuse_conn *fc, struct fuse_req *req)
501{ 456{
502 /* This is called from fuse_read_super() so there's guaranteed
503 to be exactly one request available */
504 struct fuse_req *req = fuse_get_request(fc);
505 struct fuse_init_in *arg = &req->misc.init_in; 457 struct fuse_init_in *arg = &req->misc.init_in;
506 458
507 arg->major = FUSE_KERNEL_VERSION; 459 arg->major = FUSE_KERNEL_VERSION;
@@ -525,12 +477,9 @@ static void fuse_send_init(struct fuse_conn *fc)
525 477
526static unsigned long long conn_id(void) 478static unsigned long long conn_id(void)
527{ 479{
480 /* BKL is held for ->get_sb() */
528 static unsigned long long ctr = 1; 481 static unsigned long long ctr = 1;
529 unsigned long long val; 482 return ctr++;
530 spin_lock(&fuse_lock);
531 val = ctr++;
532 spin_unlock(&fuse_lock);
533 return val;
534} 483}
535 484
536static int fuse_fill_super(struct super_block *sb, void *data, int silent) 485static int fuse_fill_super(struct super_block *sb, void *data, int silent)
@@ -540,6 +489,7 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent)
540 struct fuse_mount_data d; 489 struct fuse_mount_data d;
541 struct file *file; 490 struct file *file;
542 struct dentry *root_dentry; 491 struct dentry *root_dentry;
492 struct fuse_req *init_req;
543 int err; 493 int err;
544 494
545 if (!parse_fuse_opt((char *) data, &d)) 495 if (!parse_fuse_opt((char *) data, &d))
@@ -555,10 +505,17 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent)
555 if (!file) 505 if (!file)
556 return -EINVAL; 506 return -EINVAL;
557 507
558 fc = get_conn(file, sb); 508 if (file->f_op != &fuse_dev_operations)
559 fput(file); 509 return -EINVAL;
560 if (IS_ERR(fc)) 510
561 return PTR_ERR(fc); 511 /* Setting file->private_data can't race with other mount()
512 instances, since BKL is held for ->get_sb() */
513 if (file->private_data)
514 return -EINVAL;
515
516 fc = new_conn();
517 if (!fc)
518 return -ENOMEM;
562 519
563 fc->flags = d.flags; 520 fc->flags = d.flags;
564 fc->user_id = d.user_id; 521 fc->user_id = d.user_id;
@@ -579,27 +536,39 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent)
579 goto err; 536 goto err;
580 } 537 }
581 538
539 init_req = fuse_request_alloc();
540 if (!init_req)
541 goto err_put_root;
542
582 err = kobject_set_name(&fc->kobj, "%llu", conn_id()); 543 err = kobject_set_name(&fc->kobj, "%llu", conn_id());
583 if (err) 544 if (err)
584 goto err_put_root; 545 goto err_free_req;
585 546
586 err = kobject_add(&fc->kobj); 547 err = kobject_add(&fc->kobj);
587 if (err) 548 if (err)
588 goto err_put_root; 549 goto err_free_req;
589 550
590 sb->s_root = root_dentry; 551 sb->s_root = root_dentry;
591 spin_lock(&fuse_lock);
592 fc->mounted = 1;
593 fc->connected = 1; 552 fc->connected = 1;
594 spin_unlock(&fuse_lock); 553 kobject_get(&fc->kobj);
554 file->private_data = fc;
555 /*
556 * atomic_dec_and_test() in fput() provides the necessary
557 * memory barrier for file->private_data to be visible on all
558 * CPUs after this
559 */
560 fput(file);
595 561
596 fuse_send_init(fc); 562 fuse_send_init(fc, init_req);
597 563
598 return 0; 564 return 0;
599 565
566 err_free_req:
567 fuse_request_free(init_req);
600 err_put_root: 568 err_put_root:
601 dput(root_dentry); 569 dput(root_dentry);
602 err: 570 err:
571 fput(file);
603 kobject_put(&fc->kobj); 572 kobject_put(&fc->kobj);
604 return err; 573 return err;
605} 574}
@@ -753,7 +722,6 @@ static int __init fuse_init(void)
753 printk("fuse init (API version %i.%i)\n", 722 printk("fuse init (API version %i.%i)\n",
754 FUSE_KERNEL_VERSION, FUSE_KERNEL_MINOR_VERSION); 723 FUSE_KERNEL_VERSION, FUSE_KERNEL_MINOR_VERSION);
755 724
756 spin_lock_init(&fuse_lock);
757 res = fuse_fs_init(); 725 res = fuse_fs_init();
758 if (res) 726 if (res)
759 goto err; 727 goto err;
diff --git a/fs/inotify.c b/fs/inotify.c
index 367c487c014b..1f50302849c5 100644
--- a/fs/inotify.c
+++ b/fs/inotify.c
@@ -538,7 +538,7 @@ void inotify_d_instantiate(struct dentry *entry, struct inode *inode)
538 WARN_ON(entry->d_flags & DCACHE_INOTIFY_PARENT_WATCHED); 538 WARN_ON(entry->d_flags & DCACHE_INOTIFY_PARENT_WATCHED);
539 spin_lock(&entry->d_lock); 539 spin_lock(&entry->d_lock);
540 parent = entry->d_parent; 540 parent = entry->d_parent;
541 if (inotify_inode_watched(parent->d_inode)) 541 if (parent->d_inode && inotify_inode_watched(parent->d_inode))
542 entry->d_flags |= DCACHE_INOTIFY_PARENT_WATCHED; 542 entry->d_flags |= DCACHE_INOTIFY_PARENT_WATCHED;
543 spin_unlock(&entry->d_lock); 543 spin_unlock(&entry->d_lock);
544} 544}
diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c
index d2b66bad7d50..3ef739120dff 100644
--- a/fs/lockd/svclock.c
+++ b/fs/lockd/svclock.c
@@ -650,7 +650,7 @@ static void nlmsvc_grant_callback(struct rpc_task *task, void *data)
650 svc_wake_up(block->b_daemon); 650 svc_wake_up(block->b_daemon);
651} 651}
652 652
653void nlmsvc_grant_release(void *data) 653static void nlmsvc_grant_release(void *data)
654{ 654{
655 struct nlm_rqst *call = data; 655 struct nlm_rqst *call = data;
656 656
diff --git a/fs/locks.c b/fs/locks.c
index dda83d6cd48b..efad798824dc 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -2230,7 +2230,12 @@ void steal_locks(fl_owner_t from)
2230 2230
2231 lock_kernel(); 2231 lock_kernel();
2232 j = 0; 2232 j = 0;
2233 rcu_read_lock(); 2233
2234 /*
2235 * We are not taking a ref to the file structures, so
2236 * we need to acquire ->file_lock.
2237 */
2238 spin_lock(&files->file_lock);
2234 fdt = files_fdtable(files); 2239 fdt = files_fdtable(files);
2235 for (;;) { 2240 for (;;) {
2236 unsigned long set; 2241 unsigned long set;
@@ -2248,7 +2253,7 @@ void steal_locks(fl_owner_t from)
2248 set >>= 1; 2253 set >>= 1;
2249 } 2254 }
2250 } 2255 }
2251 rcu_read_unlock(); 2256 spin_unlock(&files->file_lock);
2252 unlock_kernel(); 2257 unlock_kernel();
2253} 2258}
2254EXPORT_SYMBOL(steal_locks); 2259EXPORT_SYMBOL(steal_locks);
diff --git a/fs/namespace.c b/fs/namespace.c
index bf478addb852..2c5f1f80bdc2 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -899,11 +899,13 @@ static int do_change_type(struct nameidata *nd, int flag)
899/* 899/*
900 * do loopback mount. 900 * do loopback mount.
901 */ 901 */
902static int do_loopback(struct nameidata *nd, char *old_name, int recurse) 902static int do_loopback(struct nameidata *nd, char *old_name, unsigned long flags, int mnt_flags)
903{ 903{
904 struct nameidata old_nd; 904 struct nameidata old_nd;
905 struct vfsmount *mnt = NULL; 905 struct vfsmount *mnt = NULL;
906 int recurse = flags & MS_REC;
906 int err = mount_is_safe(nd); 907 int err = mount_is_safe(nd);
908
907 if (err) 909 if (err)
908 return err; 910 return err;
909 if (!old_name || !*old_name) 911 if (!old_name || !*old_name)
@@ -937,6 +939,7 @@ static int do_loopback(struct nameidata *nd, char *old_name, int recurse)
937 spin_unlock(&vfsmount_lock); 939 spin_unlock(&vfsmount_lock);
938 release_mounts(&umount_list); 940 release_mounts(&umount_list);
939 } 941 }
942 mnt->mnt_flags = mnt_flags;
940 943
941out: 944out:
942 up_write(&namespace_sem); 945 up_write(&namespace_sem);
@@ -1350,7 +1353,7 @@ long do_mount(char *dev_name, char *dir_name, char *type_page,
1350 retval = do_remount(&nd, flags & ~MS_REMOUNT, mnt_flags, 1353 retval = do_remount(&nd, flags & ~MS_REMOUNT, mnt_flags,
1351 data_page); 1354 data_page);
1352 else if (flags & MS_BIND) 1355 else if (flags & MS_BIND)
1353 retval = do_loopback(&nd, dev_name, flags & MS_REC); 1356 retval = do_loopback(&nd, dev_name, flags, mnt_flags);
1354 else if (flags & (MS_SHARED | MS_PRIVATE | MS_SLAVE | MS_UNBINDABLE)) 1357 else if (flags & (MS_SHARED | MS_PRIVATE | MS_SLAVE | MS_UNBINDABLE))
1355 retval = do_change_type(&nd, flags); 1358 retval = do_change_type(&nd, flags);
1356 else if (flags & MS_MOVE) 1359 else if (flags & MS_MOVE)
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index a23f34894167..cae74dd4c7f5 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -128,15 +128,14 @@ struct inode_operations nfs4_dir_inode_operations = {
128static int 128static int
129nfs_opendir(struct inode *inode, struct file *filp) 129nfs_opendir(struct inode *inode, struct file *filp)
130{ 130{
131 int res = 0; 131 int res;
132 132
133 dfprintk(VFS, "NFS: opendir(%s/%ld)\n", 133 dfprintk(VFS, "NFS: opendir(%s/%ld)\n",
134 inode->i_sb->s_id, inode->i_ino); 134 inode->i_sb->s_id, inode->i_ino);
135 135
136 lock_kernel(); 136 lock_kernel();
137 /* Call generic open code in order to cache credentials */ 137 /* Call generic open code in order to cache credentials */
138 if (!res) 138 res = nfs_open(inode, filp);
139 res = nfs_open(inode, filp);
140 unlock_kernel(); 139 unlock_kernel();
141 return res; 140 return res;
142} 141}
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index 0f583cb16ddb..3c72b0c07283 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -112,10 +112,9 @@ static void nfs_direct_write_complete(struct nfs_direct_req *dreq, struct inode
112 */ 112 */
113ssize_t nfs_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, loff_t pos, unsigned long nr_segs) 113ssize_t nfs_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, loff_t pos, unsigned long nr_segs)
114{ 114{
115 struct dentry *dentry = iocb->ki_filp->f_dentry;
116
117 dprintk("NFS: nfs_direct_IO (%s) off/no(%Ld/%lu) EINVAL\n", 115 dprintk("NFS: nfs_direct_IO (%s) off/no(%Ld/%lu) EINVAL\n",
118 dentry->d_name.name, (long long) pos, nr_segs); 116 iocb->ki_filp->f_dentry->d_name.name,
117 (long long) pos, nr_segs);
119 118
120 return -EINVAL; 119 return -EINVAL;
121} 120}
@@ -468,7 +467,6 @@ static const struct rpc_call_ops nfs_commit_direct_ops = {
468static void nfs_direct_commit_schedule(struct nfs_direct_req *dreq) 467static void nfs_direct_commit_schedule(struct nfs_direct_req *dreq)
469{ 468{
470 struct nfs_write_data *data = dreq->commit_data; 469 struct nfs_write_data *data = dreq->commit_data;
471 struct rpc_task *task = &data->task;
472 470
473 data->inode = dreq->inode; 471 data->inode = dreq->inode;
474 data->cred = dreq->ctx->cred; 472 data->cred = dreq->ctx->cred;
@@ -489,7 +487,7 @@ static void nfs_direct_commit_schedule(struct nfs_direct_req *dreq)
489 /* Note: task.tk_ops->rpc_release will free dreq->commit_data */ 487 /* Note: task.tk_ops->rpc_release will free dreq->commit_data */
490 dreq->commit_data = NULL; 488 dreq->commit_data = NULL;
491 489
492 dprintk("NFS: %5u initiated commit call\n", task->tk_pid); 490 dprintk("NFS: %5u initiated commit call\n", data->task.tk_pid);
493 491
494 lock_kernel(); 492 lock_kernel();
495 rpc_execute(&data->task); 493 rpc_execute(&data->task);
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index f1df2c8d9259..fade02c15e6e 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -534,10 +534,9 @@ static int nfs_lock(struct file *filp, int cmd, struct file_lock *fl)
534 */ 534 */
535static int nfs_flock(struct file *filp, int cmd, struct file_lock *fl) 535static int nfs_flock(struct file *filp, int cmd, struct file_lock *fl)
536{ 536{
537 struct inode * inode = filp->f_mapping->host;
538
539 dprintk("NFS: nfs_flock(f=%s/%ld, t=%x, fl=%x)\n", 537 dprintk("NFS: nfs_flock(f=%s/%ld, t=%x, fl=%x)\n",
540 inode->i_sb->s_id, inode->i_ino, 538 filp->f_dentry->d_inode->i_sb->s_id,
539 filp->f_dentry->d_inode->i_ino,
541 fl->fl_type, fl->fl_flags); 540 fl->fl_type, fl->fl_flags);
542 541
543 /* 542 /*
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 2f7656b911b6..d0b991a92327 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -700,12 +700,9 @@ static int nfs_show_stats(struct seq_file *m, struct vfsmount *mnt)
700 /* 700 /*
701 * Display superblock I/O counters 701 * Display superblock I/O counters
702 */ 702 */
703 for (cpu = 0; cpu < NR_CPUS; cpu++) { 703 for_each_possible_cpu(cpu) {
704 struct nfs_iostats *stats; 704 struct nfs_iostats *stats;
705 705
706 if (!cpu_possible(cpu))
707 continue;
708
709 preempt_disable(); 706 preempt_disable();
710 stats = per_cpu_ptr(nfss->io_stats, cpu); 707 stats = per_cpu_ptr(nfss->io_stats, cpu);
711 708
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 47ece1dd3c67..d86c0db7b1e8 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -1218,7 +1218,7 @@ out:
1218 return status; 1218 return status;
1219} 1219}
1220 1220
1221static void nfs4_intent_set_file(struct nameidata *nd, struct dentry *dentry, struct nfs4_state *state) 1221static int nfs4_intent_set_file(struct nameidata *nd, struct dentry *dentry, struct nfs4_state *state)
1222{ 1222{
1223 struct file *filp; 1223 struct file *filp;
1224 1224
@@ -1227,8 +1227,10 @@ static void nfs4_intent_set_file(struct nameidata *nd, struct dentry *dentry, st
1227 struct nfs_open_context *ctx; 1227 struct nfs_open_context *ctx;
1228 ctx = (struct nfs_open_context *)filp->private_data; 1228 ctx = (struct nfs_open_context *)filp->private_data;
1229 ctx->state = state; 1229 ctx->state = state;
1230 } else 1230 return 0;
1231 nfs4_close_state(state, nd->intent.open.flags); 1231 }
1232 nfs4_close_state(state, nd->intent.open.flags);
1233 return PTR_ERR(filp);
1232} 1234}
1233 1235
1234struct dentry * 1236struct dentry *
@@ -1835,7 +1837,7 @@ nfs4_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
1835 nfs_setattr_update_inode(state->inode, sattr); 1837 nfs_setattr_update_inode(state->inode, sattr);
1836 } 1838 }
1837 if (status == 0 && nd != NULL && (nd->flags & LOOKUP_OPEN)) 1839 if (status == 0 && nd != NULL && (nd->flags & LOOKUP_OPEN))
1838 nfs4_intent_set_file(nd, dentry, state); 1840 status = nfs4_intent_set_file(nd, dentry, state);
1839 else 1841 else
1840 nfs4_close_state(state, flags); 1842 nfs4_close_state(state, flags);
1841out: 1843out:
diff --git a/fs/nfsd/auth.c b/fs/nfsd/auth.c
index cfe9ce881613..6e92b0fe5323 100644
--- a/fs/nfsd/auth.c
+++ b/fs/nfsd/auth.c
@@ -14,46 +14,46 @@
14 14
15int nfsd_setuser(struct svc_rqst *rqstp, struct svc_export *exp) 15int nfsd_setuser(struct svc_rqst *rqstp, struct svc_export *exp)
16{ 16{
17 struct svc_cred *cred = &rqstp->rq_cred; 17 struct svc_cred cred = rqstp->rq_cred;
18 int i; 18 int i;
19 int ret; 19 int ret;
20 20
21 if (exp->ex_flags & NFSEXP_ALLSQUASH) { 21 if (exp->ex_flags & NFSEXP_ALLSQUASH) {
22 cred->cr_uid = exp->ex_anon_uid; 22 cred.cr_uid = exp->ex_anon_uid;
23 cred->cr_gid = exp->ex_anon_gid; 23 cred.cr_gid = exp->ex_anon_gid;
24 put_group_info(cred->cr_group_info); 24 cred.cr_group_info = groups_alloc(0);
25 cred->cr_group_info = groups_alloc(0);
26 } else if (exp->ex_flags & NFSEXP_ROOTSQUASH) { 25 } else if (exp->ex_flags & NFSEXP_ROOTSQUASH) {
27 struct group_info *gi; 26 struct group_info *gi;
28 if (!cred->cr_uid) 27 if (!cred.cr_uid)
29 cred->cr_uid = exp->ex_anon_uid; 28 cred.cr_uid = exp->ex_anon_uid;
30 if (!cred->cr_gid) 29 if (!cred.cr_gid)
31 cred->cr_gid = exp->ex_anon_gid; 30 cred.cr_gid = exp->ex_anon_gid;
32 gi = groups_alloc(cred->cr_group_info->ngroups); 31 gi = groups_alloc(cred.cr_group_info->ngroups);
33 if (gi) 32 if (gi)
34 for (i = 0; i < cred->cr_group_info->ngroups; i++) { 33 for (i = 0; i < cred.cr_group_info->ngroups; i++) {
35 if (!GROUP_AT(cred->cr_group_info, i)) 34 if (!GROUP_AT(cred.cr_group_info, i))
36 GROUP_AT(gi, i) = exp->ex_anon_gid; 35 GROUP_AT(gi, i) = exp->ex_anon_gid;
37 else 36 else
38 GROUP_AT(gi, i) = GROUP_AT(cred->cr_group_info, i); 37 GROUP_AT(gi, i) = GROUP_AT(cred.cr_group_info, i);
39 } 38 }
40 put_group_info(cred->cr_group_info); 39 cred.cr_group_info = gi;
41 cred->cr_group_info = gi; 40 } else
42 } 41 get_group_info(cred.cr_group_info);
43 42
44 if (cred->cr_uid != (uid_t) -1) 43 if (cred.cr_uid != (uid_t) -1)
45 current->fsuid = cred->cr_uid; 44 current->fsuid = cred.cr_uid;
46 else 45 else
47 current->fsuid = exp->ex_anon_uid; 46 current->fsuid = exp->ex_anon_uid;
48 if (cred->cr_gid != (gid_t) -1) 47 if (cred.cr_gid != (gid_t) -1)
49 current->fsgid = cred->cr_gid; 48 current->fsgid = cred.cr_gid;
50 else 49 else
51 current->fsgid = exp->ex_anon_gid; 50 current->fsgid = exp->ex_anon_gid;
52 51
53 if (!cred->cr_group_info) 52 if (!cred.cr_group_info)
54 return -ENOMEM; 53 return -ENOMEM;
55 ret = set_current_groups(cred->cr_group_info); 54 ret = set_current_groups(cred.cr_group_info);
56 if ((cred->cr_uid)) { 55 put_group_info(cred.cr_group_info);
56 if ((cred.cr_uid)) {
57 cap_t(current->cap_effective) &= ~CAP_NFSD_MASK; 57 cap_t(current->cap_effective) &= ~CAP_NFSD_MASK;
58 } else { 58 } else {
59 cap_t(current->cap_effective) |= (CAP_NFSD_MASK & 59 cap_t(current->cap_effective) |= (CAP_NFSD_MASK &
diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c
index c340be0a3f59..4e0578121d9a 100644
--- a/fs/nfsd/export.c
+++ b/fs/nfsd/export.c
@@ -422,7 +422,7 @@ static int svc_export_parse(struct cache_detail *cd, char *mesg, int mlen)
422 if ((len=qword_get(&mesg, buf, PAGE_SIZE)) <= 0) 422 if ((len=qword_get(&mesg, buf, PAGE_SIZE)) <= 0)
423 goto out; 423 goto out;
424 err = path_lookup(buf, 0, &nd); 424 err = path_lookup(buf, 0, &nd);
425 if (err) goto out; 425 if (err) goto out_no_path;
426 426
427 exp.h.flags = 0; 427 exp.h.flags = 0;
428 exp.ex_client = dom; 428 exp.ex_client = dom;
@@ -475,6 +475,7 @@ static int svc_export_parse(struct cache_detail *cd, char *mesg, int mlen)
475 out: 475 out:
476 if (nd.dentry) 476 if (nd.dentry)
477 path_release(&nd); 477 path_release(&nd);
478 out_no_path:
478 if (dom) 479 if (dom)
479 auth_domain_put(dom); 480 auth_domain_put(dom);
480 kfree(buf); 481 kfree(buf);
diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c
index 6d2dfed1de08..f61142afea44 100644
--- a/fs/nfsd/nfs3proc.c
+++ b/fs/nfsd/nfs3proc.c
@@ -682,7 +682,7 @@ static struct svc_procedure nfsd_procedures3[22] = {
682 PROC(lookup, dirop, dirop, fhandle2, RC_NOCACHE, ST+FH+pAT+pAT), 682 PROC(lookup, dirop, dirop, fhandle2, RC_NOCACHE, ST+FH+pAT+pAT),
683 PROC(access, access, access, fhandle, RC_NOCACHE, ST+pAT+1), 683 PROC(access, access, access, fhandle, RC_NOCACHE, ST+pAT+1),
684 PROC(readlink, readlink, readlink, fhandle, RC_NOCACHE, ST+pAT+1+NFS3_MAXPATHLEN/4), 684 PROC(readlink, readlink, readlink, fhandle, RC_NOCACHE, ST+pAT+1+NFS3_MAXPATHLEN/4),
685 PROC(read, read, read, fhandle, RC_NOCACHE, ST+pAT+4+NFSSVC_MAXBLKSIZE), 685 PROC(read, read, read, fhandle, RC_NOCACHE, ST+pAT+4+NFSSVC_MAXBLKSIZE/4),
686 PROC(write, write, write, fhandle, RC_REPLBUFF, ST+WC+4), 686 PROC(write, write, write, fhandle, RC_REPLBUFF, ST+WC+4),
687 PROC(create, create, create, fhandle2, RC_REPLBUFF, ST+(1+FH+pAT)+WC), 687 PROC(create, create, create, fhandle2, RC_REPLBUFF, ST+(1+FH+pAT)+WC),
688 PROC(mkdir, mkdir, create, fhandle2, RC_REPLBUFF, ST+(1+FH+pAT)+WC), 688 PROC(mkdir, mkdir, create, fhandle2, RC_REPLBUFF, ST+(1+FH+pAT)+WC),
diff --git a/fs/nfsd/nfs4acl.c b/fs/nfsd/nfs4acl.c
index 7391f4aabedb..edb107e61b91 100644
--- a/fs/nfsd/nfs4acl.c
+++ b/fs/nfsd/nfs4acl.c
@@ -710,9 +710,9 @@ calculate_posix_ace_count(struct nfs4_acl *n4acl)
710 /* Also, the remaining entries are for named users and 710 /* Also, the remaining entries are for named users and
711 * groups, and come in threes (mask, allow, deny): */ 711 * groups, and come in threes (mask, allow, deny): */
712 if (n4acl->naces < 7) 712 if (n4acl->naces < 7)
713 return -1; 713 return -EINVAL;
714 if ((n4acl->naces - 7) % 3) 714 if ((n4acl->naces - 7) % 3)
715 return -1; 715 return -EINVAL;
716 return 4 + (n4acl->naces - 7)/3; 716 return 4 + (n4acl->naces - 7)/3;
717 } 717 }
718} 718}
@@ -790,7 +790,7 @@ nfs4_acl_split(struct nfs4_acl *acl, struct nfs4_acl *dacl)
790 continue; 790 continue;
791 791
792 error = nfs4_acl_add_ace(dacl, ace->type, ace->flag, 792 error = nfs4_acl_add_ace(dacl, ace->type, ace->flag,
793 ace->access_mask, ace->whotype, ace->who) == -1; 793 ace->access_mask, ace->whotype, ace->who);
794 if (error < 0) 794 if (error < 0)
795 goto out; 795 goto out;
796 796
@@ -866,7 +866,7 @@ nfs4_acl_add_ace(struct nfs4_acl *acl, u32 type, u32 flag, u32 access_mask,
866 struct nfs4_ace *ace; 866 struct nfs4_ace *ace;
867 867
868 if ((ace = kmalloc(sizeof(*ace), GFP_KERNEL)) == NULL) 868 if ((ace = kmalloc(sizeof(*ace), GFP_KERNEL)) == NULL)
869 return -1; 869 return -ENOMEM;
870 870
871 ace->type = type; 871 ace->type = type;
872 ace->flag = flag; 872 ace->flag = flag;
diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index c872bd07fc10..dbaf3f93f328 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -441,8 +441,9 @@ nfsd4_probe_callback(struct nfs4_client *clp)
441 goto out_clnt; 441 goto out_clnt;
442 } 442 }
443 443
444 /* the task holds a reference to the nfs4_client struct */
445 cb->cb_client = clnt; 444 cb->cb_client = clnt;
445
446 /* the task holds a reference to the nfs4_client struct */
446 atomic_inc(&clp->cl_count); 447 atomic_inc(&clp->cl_count);
447 448
448 msg.rpc_cred = nfsd4_lookupcred(clp,0); 449 msg.rpc_cred = nfsd4_lookupcred(clp,0);
@@ -460,13 +461,12 @@ nfsd4_probe_callback(struct nfs4_client *clp)
460out_rpciod: 461out_rpciod:
461 atomic_dec(&clp->cl_count); 462 atomic_dec(&clp->cl_count);
462 rpciod_down(); 463 rpciod_down();
464 cb->cb_client = NULL;
463out_clnt: 465out_clnt:
464 rpc_shutdown_client(clnt); 466 rpc_shutdown_client(clnt);
465 goto out_err;
466out_err: 467out_err:
467 dprintk("NFSD: warning: no callback path to client %.*s\n", 468 dprintk("NFSD: warning: no callback path to client %.*s\n",
468 (int)clp->cl_name.len, clp->cl_name.data); 469 (int)clp->cl_name.len, clp->cl_name.data);
469 cb->cb_client = NULL;
470} 470}
471 471
472static void 472static void
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index 6d63f1d9e5f5..b0e095ea0c03 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -288,8 +288,6 @@ nfsd4_putrootfh(struct svc_rqst *rqstp, struct svc_fh *current_fh)
288 fh_put(current_fh); 288 fh_put(current_fh);
289 status = exp_pseudoroot(rqstp->rq_client, current_fh, 289 status = exp_pseudoroot(rqstp->rq_client, current_fh,
290 &rqstp->rq_chandle); 290 &rqstp->rq_chandle);
291 if (!status)
292 status = nfserrno(nfsd_setuser(rqstp, current_fh->fh_export));
293 return status; 291 return status;
294} 292}
295 293
@@ -975,7 +973,7 @@ struct nfsd4_voidargs { int dummy; };
975 */ 973 */
976static struct svc_procedure nfsd_procedures4[2] = { 974static struct svc_procedure nfsd_procedures4[2] = {
977 PROC(null, void, void, void, RC_NOCACHE, 1), 975 PROC(null, void, void, void, RC_NOCACHE, 1),
978 PROC(compound, compound, compound, compound, RC_NOCACHE, NFSD_BUFSIZE) 976 PROC(compound, compound, compound, compound, RC_NOCACHE, NFSD_BUFSIZE/4)
979}; 977};
980 978
981struct svc_version nfsd_version4 = { 979struct svc_version nfsd_version4 = {
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 47ec112b266c..96c7578cbe1e 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -147,6 +147,42 @@ get_nfs4_file(struct nfs4_file *fi)
147 kref_get(&fi->fi_ref); 147 kref_get(&fi->fi_ref);
148} 148}
149 149
150static int num_delegations;
151
152/*
153 * Open owner state (share locks)
154 */
155
156/* hash tables for nfs4_stateowner */
157#define OWNER_HASH_BITS 8
158#define OWNER_HASH_SIZE (1 << OWNER_HASH_BITS)
159#define OWNER_HASH_MASK (OWNER_HASH_SIZE - 1)
160
161#define ownerid_hashval(id) \
162 ((id) & OWNER_HASH_MASK)
163#define ownerstr_hashval(clientid, ownername) \
164 (((clientid) + opaque_hashval((ownername.data), (ownername.len))) & OWNER_HASH_MASK)
165
166static struct list_head ownerid_hashtbl[OWNER_HASH_SIZE];
167static struct list_head ownerstr_hashtbl[OWNER_HASH_SIZE];
168
169/* hash table for nfs4_file */
170#define FILE_HASH_BITS 8
171#define FILE_HASH_SIZE (1 << FILE_HASH_BITS)
172#define FILE_HASH_MASK (FILE_HASH_SIZE - 1)
173/* hash table for (open)nfs4_stateid */
174#define STATEID_HASH_BITS 10
175#define STATEID_HASH_SIZE (1 << STATEID_HASH_BITS)
176#define STATEID_HASH_MASK (STATEID_HASH_SIZE - 1)
177
178#define file_hashval(x) \
179 hash_ptr(x, FILE_HASH_BITS)
180#define stateid_hashval(owner_id, file_id) \
181 (((owner_id) + (file_id)) & STATEID_HASH_MASK)
182
183static struct list_head file_hashtbl[FILE_HASH_SIZE];
184static struct list_head stateid_hashtbl[STATEID_HASH_SIZE];
185
150static struct nfs4_delegation * 186static struct nfs4_delegation *
151alloc_init_deleg(struct nfs4_client *clp, struct nfs4_stateid *stp, struct svc_fh *current_fh, u32 type) 187alloc_init_deleg(struct nfs4_client *clp, struct nfs4_stateid *stp, struct svc_fh *current_fh, u32 type)
152{ 188{
@@ -155,9 +191,12 @@ alloc_init_deleg(struct nfs4_client *clp, struct nfs4_stateid *stp, struct svc_f
155 struct nfs4_callback *cb = &stp->st_stateowner->so_client->cl_callback; 191 struct nfs4_callback *cb = &stp->st_stateowner->so_client->cl_callback;
156 192
157 dprintk("NFSD alloc_init_deleg\n"); 193 dprintk("NFSD alloc_init_deleg\n");
194 if (num_delegations > STATEID_HASH_SIZE * 4)
195 return NULL;
158 dp = kmem_cache_alloc(deleg_slab, GFP_KERNEL); 196 dp = kmem_cache_alloc(deleg_slab, GFP_KERNEL);
159 if (dp == NULL) 197 if (dp == NULL)
160 return dp; 198 return dp;
199 num_delegations++;
161 INIT_LIST_HEAD(&dp->dl_perfile); 200 INIT_LIST_HEAD(&dp->dl_perfile);
162 INIT_LIST_HEAD(&dp->dl_perclnt); 201 INIT_LIST_HEAD(&dp->dl_perclnt);
163 INIT_LIST_HEAD(&dp->dl_recall_lru); 202 INIT_LIST_HEAD(&dp->dl_recall_lru);
@@ -192,6 +231,7 @@ nfs4_put_delegation(struct nfs4_delegation *dp)
192 dprintk("NFSD: freeing dp %p\n",dp); 231 dprintk("NFSD: freeing dp %p\n",dp);
193 put_nfs4_file(dp->dl_file); 232 put_nfs4_file(dp->dl_file);
194 kmem_cache_free(deleg_slab, dp); 233 kmem_cache_free(deleg_slab, dp);
234 num_delegations--;
195 } 235 }
196} 236}
197 237
@@ -330,22 +370,29 @@ put_nfs4_client(struct nfs4_client *clp)
330} 370}
331 371
332static void 372static void
373shutdown_callback_client(struct nfs4_client *clp)
374{
375 struct rpc_clnt *clnt = clp->cl_callback.cb_client;
376
377 /* shutdown rpc client, ending any outstanding recall rpcs */
378 if (clnt) {
379 clp->cl_callback.cb_client = NULL;
380 rpc_shutdown_client(clnt);
381 rpciod_down();
382 }
383}
384
385static void
333expire_client(struct nfs4_client *clp) 386expire_client(struct nfs4_client *clp)
334{ 387{
335 struct nfs4_stateowner *sop; 388 struct nfs4_stateowner *sop;
336 struct nfs4_delegation *dp; 389 struct nfs4_delegation *dp;
337 struct nfs4_callback *cb = &clp->cl_callback;
338 struct rpc_clnt *clnt = clp->cl_callback.cb_client;
339 struct list_head reaplist; 390 struct list_head reaplist;
340 391
341 dprintk("NFSD: expire_client cl_count %d\n", 392 dprintk("NFSD: expire_client cl_count %d\n",
342 atomic_read(&clp->cl_count)); 393 atomic_read(&clp->cl_count));
343 394
344 /* shutdown rpc client, ending any outstanding recall rpcs */ 395 shutdown_callback_client(clp);
345 if (atomic_read(&cb->cb_set) == 1 && clnt) {
346 rpc_shutdown_client(clnt);
347 clnt = clp->cl_callback.cb_client = NULL;
348 }
349 396
350 INIT_LIST_HEAD(&reaplist); 397 INIT_LIST_HEAD(&reaplist);
351 spin_lock(&recall_lock); 398 spin_lock(&recall_lock);
@@ -936,40 +983,6 @@ out:
936 return status; 983 return status;
937} 984}
938 985
939/*
940 * Open owner state (share locks)
941 */
942
943/* hash tables for nfs4_stateowner */
944#define OWNER_HASH_BITS 8
945#define OWNER_HASH_SIZE (1 << OWNER_HASH_BITS)
946#define OWNER_HASH_MASK (OWNER_HASH_SIZE - 1)
947
948#define ownerid_hashval(id) \
949 ((id) & OWNER_HASH_MASK)
950#define ownerstr_hashval(clientid, ownername) \
951 (((clientid) + opaque_hashval((ownername.data), (ownername.len))) & OWNER_HASH_MASK)
952
953static struct list_head ownerid_hashtbl[OWNER_HASH_SIZE];
954static struct list_head ownerstr_hashtbl[OWNER_HASH_SIZE];
955
956/* hash table for nfs4_file */
957#define FILE_HASH_BITS 8
958#define FILE_HASH_SIZE (1 << FILE_HASH_BITS)
959#define FILE_HASH_MASK (FILE_HASH_SIZE - 1)
960/* hash table for (open)nfs4_stateid */
961#define STATEID_HASH_BITS 10
962#define STATEID_HASH_SIZE (1 << STATEID_HASH_BITS)
963#define STATEID_HASH_MASK (STATEID_HASH_SIZE - 1)
964
965#define file_hashval(x) \
966 hash_ptr(x, FILE_HASH_BITS)
967#define stateid_hashval(owner_id, file_id) \
968 (((owner_id) + (file_id)) & STATEID_HASH_MASK)
969
970static struct list_head file_hashtbl[FILE_HASH_SIZE];
971static struct list_head stateid_hashtbl[STATEID_HASH_SIZE];
972
973/* OPEN Share state helper functions */ 986/* OPEN Share state helper functions */
974static inline struct nfs4_file * 987static inline struct nfs4_file *
975alloc_init_file(struct inode *ino) 988alloc_init_file(struct inode *ino)
@@ -1186,8 +1199,7 @@ move_to_close_lru(struct nfs4_stateowner *sop)
1186{ 1199{
1187 dprintk("NFSD: move_to_close_lru nfs4_stateowner %p\n", sop); 1200 dprintk("NFSD: move_to_close_lru nfs4_stateowner %p\n", sop);
1188 1201
1189 unhash_stateowner(sop); 1202 list_move_tail(&sop->so_close_lru, &close_lru);
1190 list_add_tail(&sop->so_close_lru, &close_lru);
1191 sop->so_time = get_seconds(); 1203 sop->so_time = get_seconds();
1192} 1204}
1193 1205
@@ -1916,8 +1928,7 @@ nfs4_laundromat(void)
1916 } 1928 }
1917 dprintk("NFSD: purging unused open stateowner (so_id %d)\n", 1929 dprintk("NFSD: purging unused open stateowner (so_id %d)\n",
1918 sop->so_id); 1930 sop->so_id);
1919 list_del(&sop->so_close_lru); 1931 release_stateowner(sop);
1920 nfs4_put_stateowner(sop);
1921 } 1932 }
1922 if (clientid_val < NFSD_LAUNDROMAT_MINTIMEOUT) 1933 if (clientid_val < NFSD_LAUNDROMAT_MINTIMEOUT)
1923 clientid_val = NFSD_LAUNDROMAT_MINTIMEOUT; 1934 clientid_val = NFSD_LAUNDROMAT_MINTIMEOUT;
@@ -2495,36 +2506,27 @@ nfs4_transform_lock_offset(struct file_lock *lock)
2495 lock->fl_end = OFFSET_MAX; 2506 lock->fl_end = OFFSET_MAX;
2496} 2507}
2497 2508
2498static int 2509/* Hack!: For now, we're defining this just so we can use a pointer to it
2499nfs4_verify_lock_stateowner(struct nfs4_stateowner *sop, unsigned int hashval) 2510 * as a unique cookie to identify our (NFSv4's) posix locks. */
2500{ 2511static struct lock_manager_operations nfsd_posix_mng_ops = {
2501 struct nfs4_stateowner *local = NULL; 2512};
2502 int status = 0;
2503
2504 if (hashval >= LOCK_HASH_SIZE)
2505 goto out;
2506 list_for_each_entry(local, &lock_ownerid_hashtbl[hashval], so_idhash) {
2507 if (local == sop) {
2508 status = 1;
2509 goto out;
2510 }
2511 }
2512out:
2513 return status;
2514}
2515
2516 2513
2517static inline void 2514static inline void
2518nfs4_set_lock_denied(struct file_lock *fl, struct nfsd4_lock_denied *deny) 2515nfs4_set_lock_denied(struct file_lock *fl, struct nfsd4_lock_denied *deny)
2519{ 2516{
2520 struct nfs4_stateowner *sop = (struct nfs4_stateowner *) fl->fl_owner; 2517 struct nfs4_stateowner *sop;
2521 unsigned int hval = lockownerid_hashval(sop->so_id); 2518 unsigned int hval;
2522 2519
2523 deny->ld_sop = NULL; 2520 if (fl->fl_lmops == &nfsd_posix_mng_ops) {
2524 if (nfs4_verify_lock_stateowner(sop, hval)) { 2521 sop = (struct nfs4_stateowner *) fl->fl_owner;
2522 hval = lockownerid_hashval(sop->so_id);
2525 kref_get(&sop->so_ref); 2523 kref_get(&sop->so_ref);
2526 deny->ld_sop = sop; 2524 deny->ld_sop = sop;
2527 deny->ld_clientid = sop->so_client->cl_clientid; 2525 deny->ld_clientid = sop->so_client->cl_clientid;
2526 } else {
2527 deny->ld_sop = NULL;
2528 deny->ld_clientid.cl_boot = 0;
2529 deny->ld_clientid.cl_id = 0;
2528 } 2530 }
2529 deny->ld_start = fl->fl_start; 2531 deny->ld_start = fl->fl_start;
2530 deny->ld_length = ~(u64)0; 2532 deny->ld_length = ~(u64)0;
@@ -2736,6 +2738,7 @@ nfsd4_lock(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_lock
2736 file_lock.fl_pid = current->tgid; 2738 file_lock.fl_pid = current->tgid;
2737 file_lock.fl_file = filp; 2739 file_lock.fl_file = filp;
2738 file_lock.fl_flags = FL_POSIX; 2740 file_lock.fl_flags = FL_POSIX;
2741 file_lock.fl_lmops = &nfsd_posix_mng_ops;
2739 2742
2740 file_lock.fl_start = lock->lk_offset; 2743 file_lock.fl_start = lock->lk_offset;
2741 if ((lock->lk_length == ~(u64)0) || 2744 if ((lock->lk_length == ~(u64)0) ||
@@ -2841,6 +2844,7 @@ nfsd4_lockt(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_lock
2841 file_lock.fl_owner = (fl_owner_t)lockt->lt_stateowner; 2844 file_lock.fl_owner = (fl_owner_t)lockt->lt_stateowner;
2842 file_lock.fl_pid = current->tgid; 2845 file_lock.fl_pid = current->tgid;
2843 file_lock.fl_flags = FL_POSIX; 2846 file_lock.fl_flags = FL_POSIX;
2847 file_lock.fl_lmops = &nfsd_posix_mng_ops;
2844 2848
2845 file_lock.fl_start = lockt->lt_offset; 2849 file_lock.fl_start = lockt->lt_offset;
2846 if ((lockt->lt_length == ~(u64)0) || LOFF_OVERFLOW(lockt->lt_offset, lockt->lt_length)) 2850 if ((lockt->lt_length == ~(u64)0) || LOFF_OVERFLOW(lockt->lt_offset, lockt->lt_length))
@@ -2900,6 +2904,7 @@ nfsd4_locku(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_lock
2900 file_lock.fl_pid = current->tgid; 2904 file_lock.fl_pid = current->tgid;
2901 file_lock.fl_file = filp; 2905 file_lock.fl_file = filp;
2902 file_lock.fl_flags = FL_POSIX; 2906 file_lock.fl_flags = FL_POSIX;
2907 file_lock.fl_lmops = &nfsd_posix_mng_ops;
2903 file_lock.fl_start = locku->lu_offset; 2908 file_lock.fl_start = locku->lu_offset;
2904 2909
2905 if ((locku->lu_length == ~(u64)0) || LOFF_OVERFLOW(locku->lu_offset, locku->lu_length)) 2910 if ((locku->lu_length == ~(u64)0) || LOFF_OVERFLOW(locku->lu_offset, locku->lu_length))
@@ -3211,15 +3216,8 @@ __nfs4_state_shutdown(void)
3211 int i; 3216 int i;
3212 struct nfs4_client *clp = NULL; 3217 struct nfs4_client *clp = NULL;
3213 struct nfs4_delegation *dp = NULL; 3218 struct nfs4_delegation *dp = NULL;
3214 struct nfs4_stateowner *sop = NULL;
3215 struct list_head *pos, *next, reaplist; 3219 struct list_head *pos, *next, reaplist;
3216 3220
3217 list_for_each_safe(pos, next, &close_lru) {
3218 sop = list_entry(pos, struct nfs4_stateowner, so_close_lru);
3219 list_del(&sop->so_close_lru);
3220 nfs4_put_stateowner(sop);
3221 }
3222
3223 for (i = 0; i < CLIENT_HASH_SIZE; i++) { 3221 for (i = 0; i < CLIENT_HASH_SIZE; i++) {
3224 while (!list_empty(&conf_id_hashtbl[i])) { 3222 while (!list_empty(&conf_id_hashtbl[i])) {
3225 clp = list_entry(conf_id_hashtbl[i].next, struct nfs4_client, cl_idhash); 3223 clp = list_entry(conf_id_hashtbl[i].next, struct nfs4_client, cl_idhash);
@@ -3244,8 +3242,6 @@ __nfs4_state_shutdown(void)
3244 } 3242 }
3245 3243
3246 cancel_delayed_work(&laundromat_work); 3244 cancel_delayed_work(&laundromat_work);
3247 flush_workqueue(laundry_wq);
3248 destroy_workqueue(laundry_wq);
3249 nfsd4_shutdown_recdir(); 3245 nfsd4_shutdown_recdir();
3250 nfs4_init = 0; 3246 nfs4_init = 0;
3251} 3247}
@@ -3253,6 +3249,8 @@ __nfs4_state_shutdown(void)
3253void 3249void
3254nfs4_state_shutdown(void) 3250nfs4_state_shutdown(void)
3255{ 3251{
3252 cancel_rearming_delayed_workqueue(laundry_wq, &laundromat_work);
3253 destroy_workqueue(laundry_wq);
3256 nfs4_lock_state(); 3254 nfs4_lock_state();
3257 nfs4_release_reclaim(); 3255 nfs4_release_reclaim();
3258 __nfs4_state_shutdown(); 3256 __nfs4_state_shutdown();
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 03857fd81126..de3998f15f10 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -299,11 +299,10 @@ nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval, struct iattr *ia
299 buf, dummy32, &ace.who); 299 buf, dummy32, &ace.who);
300 if (status) 300 if (status)
301 goto out_nfserr; 301 goto out_nfserr;
302 if (nfs4_acl_add_ace(*acl, ace.type, ace.flag, 302 status = nfs4_acl_add_ace(*acl, ace.type, ace.flag,
303 ace.access_mask, ace.whotype, ace.who) != 0) { 303 ace.access_mask, ace.whotype, ace.who);
304 status = -ENOMEM; 304 if (status)
305 goto out_nfserr; 305 goto out_nfserr;
306 }
307 } 306 }
308 } else 307 } else
309 *acl = NULL; 308 *acl = NULL;
@@ -2085,27 +2084,20 @@ nfsd4_encode_read(struct nfsd4_compoundres *resp, int nfserr, struct nfsd4_read
2085 WRITE32(eof); 2084 WRITE32(eof);
2086 WRITE32(maxcount); 2085 WRITE32(maxcount);
2087 ADJUST_ARGS(); 2086 ADJUST_ARGS();
2088 resp->xbuf->head[0].iov_len = ((char*)resp->p) - (char*)resp->xbuf->head[0].iov_base; 2087 resp->xbuf->head[0].iov_len = (char*)p
2089 2088 - (char*)resp->xbuf->head[0].iov_base;
2090 resp->xbuf->page_len = maxcount; 2089 resp->xbuf->page_len = maxcount;
2091 2090
2092 /* read zero bytes -> don't set up tail */ 2091 /* Use rest of head for padding and remaining ops: */
2093 if(!maxcount) 2092 resp->rqstp->rq_restailpage = 0;
2094 return 0; 2093 resp->xbuf->tail[0].iov_base = p;
2095
2096 /* set up page for remaining responses */
2097 svc_take_page(resp->rqstp);
2098 resp->xbuf->tail[0].iov_base =
2099 page_address(resp->rqstp->rq_respages[resp->rqstp->rq_resused-1]);
2100 resp->rqstp->rq_restailpage = resp->rqstp->rq_resused-1;
2101 resp->xbuf->tail[0].iov_len = 0; 2094 resp->xbuf->tail[0].iov_len = 0;
2102 resp->p = resp->xbuf->tail[0].iov_base;
2103 resp->end = resp->p + PAGE_SIZE/4;
2104
2105 if (maxcount&3) { 2095 if (maxcount&3) {
2106 *(resp->p)++ = 0; 2096 RESERVE_SPACE(4);
2097 WRITE32(0);
2107 resp->xbuf->tail[0].iov_base += maxcount&3; 2098 resp->xbuf->tail[0].iov_base += maxcount&3;
2108 resp->xbuf->tail[0].iov_len = 4 - (maxcount&3); 2099 resp->xbuf->tail[0].iov_len = 4 - (maxcount&3);
2100 ADJUST_ARGS();
2109 } 2101 }
2110 return 0; 2102 return 0;
2111} 2103}
@@ -2142,21 +2134,20 @@ nfsd4_encode_readlink(struct nfsd4_compoundres *resp, int nfserr, struct nfsd4_r
2142 2134
2143 WRITE32(maxcount); 2135 WRITE32(maxcount);
2144 ADJUST_ARGS(); 2136 ADJUST_ARGS();
2145 resp->xbuf->head[0].iov_len = ((char*)resp->p) - (char*)resp->xbuf->head[0].iov_base; 2137 resp->xbuf->head[0].iov_len = (char*)p
2138 - (char*)resp->xbuf->head[0].iov_base;
2139 resp->xbuf->page_len = maxcount;
2146 2140
2147 svc_take_page(resp->rqstp); 2141 /* Use rest of head for padding and remaining ops: */
2148 resp->xbuf->tail[0].iov_base = 2142 resp->rqstp->rq_restailpage = 0;
2149 page_address(resp->rqstp->rq_respages[resp->rqstp->rq_resused-1]); 2143 resp->xbuf->tail[0].iov_base = p;
2150 resp->rqstp->rq_restailpage = resp->rqstp->rq_resused-1;
2151 resp->xbuf->tail[0].iov_len = 0; 2144 resp->xbuf->tail[0].iov_len = 0;
2152 resp->p = resp->xbuf->tail[0].iov_base;
2153 resp->end = resp->p + PAGE_SIZE/4;
2154
2155 resp->xbuf->page_len = maxcount;
2156 if (maxcount&3) { 2145 if (maxcount&3) {
2157 *(resp->p)++ = 0; 2146 RESERVE_SPACE(4);
2147 WRITE32(0);
2158 resp->xbuf->tail[0].iov_base += maxcount&3; 2148 resp->xbuf->tail[0].iov_base += maxcount&3;
2159 resp->xbuf->tail[0].iov_len = 4 - (maxcount&3); 2149 resp->xbuf->tail[0].iov_len = 4 - (maxcount&3);
2150 ADJUST_ARGS();
2160 } 2151 }
2161 return 0; 2152 return 0;
2162} 2153}
@@ -2166,7 +2157,7 @@ nfsd4_encode_readdir(struct nfsd4_compoundres *resp, int nfserr, struct nfsd4_re
2166{ 2157{
2167 int maxcount; 2158 int maxcount;
2168 loff_t offset; 2159 loff_t offset;
2169 u32 *page, *savep; 2160 u32 *page, *savep, *tailbase;
2170 ENCODE_HEAD; 2161 ENCODE_HEAD;
2171 2162
2172 if (nfserr) 2163 if (nfserr)
@@ -2182,6 +2173,7 @@ nfsd4_encode_readdir(struct nfsd4_compoundres *resp, int nfserr, struct nfsd4_re
2182 WRITE32(0); 2173 WRITE32(0);
2183 ADJUST_ARGS(); 2174 ADJUST_ARGS();
2184 resp->xbuf->head[0].iov_len = ((char*)resp->p) - (char*)resp->xbuf->head[0].iov_base; 2175 resp->xbuf->head[0].iov_len = ((char*)resp->p) - (char*)resp->xbuf->head[0].iov_base;
2176 tailbase = p;
2185 2177
2186 maxcount = PAGE_SIZE; 2178 maxcount = PAGE_SIZE;
2187 if (maxcount > readdir->rd_maxcount) 2179 if (maxcount > readdir->rd_maxcount)
@@ -2226,14 +2218,12 @@ nfsd4_encode_readdir(struct nfsd4_compoundres *resp, int nfserr, struct nfsd4_re
2226 *p++ = htonl(readdir->common.err == nfserr_eof); 2218 *p++ = htonl(readdir->common.err == nfserr_eof);
2227 resp->xbuf->page_len = ((char*)p) - (char*)page_address(resp->rqstp->rq_respages[resp->rqstp->rq_resused-1]); 2219 resp->xbuf->page_len = ((char*)p) - (char*)page_address(resp->rqstp->rq_respages[resp->rqstp->rq_resused-1]);
2228 2220
2229 /* allocate a page for the tail */ 2221 /* Use rest of head for padding and remaining ops: */
2230 svc_take_page(resp->rqstp); 2222 resp->rqstp->rq_restailpage = 0;
2231 resp->xbuf->tail[0].iov_base = 2223 resp->xbuf->tail[0].iov_base = tailbase;
2232 page_address(resp->rqstp->rq_respages[resp->rqstp->rq_resused-1]);
2233 resp->rqstp->rq_restailpage = resp->rqstp->rq_resused-1;
2234 resp->xbuf->tail[0].iov_len = 0; 2224 resp->xbuf->tail[0].iov_len = 0;
2235 resp->p = resp->xbuf->tail[0].iov_base; 2225 resp->p = resp->xbuf->tail[0].iov_base;
2236 resp->end = resp->p + PAGE_SIZE/4; 2226 resp->end = resp->p + (PAGE_SIZE - resp->xbuf->head[0].iov_len)/4;
2237 2227
2238 return 0; 2228 return 0;
2239err_no_verf: 2229err_no_verf:
diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c
index 3e6b75cd90fd..06cd0db0f32b 100644
--- a/fs/nfsd/nfsproc.c
+++ b/fs/nfsd/nfsproc.c
@@ -553,7 +553,7 @@ static struct svc_procedure nfsd_procedures2[18] = {
553 PROC(none, void, void, none, RC_NOCACHE, ST), 553 PROC(none, void, void, none, RC_NOCACHE, ST),
554 PROC(lookup, diropargs, diropres, fhandle, RC_NOCACHE, ST+FH+AT), 554 PROC(lookup, diropargs, diropres, fhandle, RC_NOCACHE, ST+FH+AT),
555 PROC(readlink, readlinkargs, readlinkres, none, RC_NOCACHE, ST+1+NFS_MAXPATHLEN/4), 555 PROC(readlink, readlinkargs, readlinkres, none, RC_NOCACHE, ST+1+NFS_MAXPATHLEN/4),
556 PROC(read, readargs, readres, fhandle, RC_NOCACHE, ST+AT+1+NFSSVC_MAXBLKSIZE), 556 PROC(read, readargs, readres, fhandle, RC_NOCACHE, ST+AT+1+NFSSVC_MAXBLKSIZE/4),
557 PROC(none, void, void, none, RC_NOCACHE, ST), 557 PROC(none, void, void, none, RC_NOCACHE, ST),
558 PROC(write, writeargs, attrstat, fhandle, RC_REPLBUFF, ST+AT), 558 PROC(write, writeargs, attrstat, fhandle, RC_REPLBUFF, ST+AT),
559 PROC(create, createargs, diropres, fhandle, RC_REPLBUFF, ST+FH+AT), 559 PROC(create, createargs, diropres, fhandle, RC_REPLBUFF, ST+FH+AT),
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 31018333dc38..6aa92d0e6876 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -371,7 +371,6 @@ out_nfserr:
371static ssize_t nfsd_getxattr(struct dentry *dentry, char *key, void **buf) 371static ssize_t nfsd_getxattr(struct dentry *dentry, char *key, void **buf)
372{ 372{
373 ssize_t buflen; 373 ssize_t buflen;
374 int error;
375 374
376 buflen = vfs_getxattr(dentry, key, NULL, 0); 375 buflen = vfs_getxattr(dentry, key, NULL, 0);
377 if (buflen <= 0) 376 if (buflen <= 0)
@@ -381,10 +380,7 @@ static ssize_t nfsd_getxattr(struct dentry *dentry, char *key, void **buf)
381 if (!*buf) 380 if (!*buf)
382 return -ENOMEM; 381 return -ENOMEM;
383 382
384 error = vfs_getxattr(dentry, key, *buf, buflen); 383 return vfs_getxattr(dentry, key, *buf, buflen);
385 if (error < 0)
386 return error;
387 return buflen;
388} 384}
389#endif 385#endif
390 386
diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c
index bff0f0d06867..21f38accd039 100644
--- a/fs/ocfs2/cluster/heartbeat.c
+++ b/fs/ocfs2/cluster/heartbeat.c
@@ -153,6 +153,7 @@ struct o2hb_region {
153struct o2hb_bio_wait_ctxt { 153struct o2hb_bio_wait_ctxt {
154 atomic_t wc_num_reqs; 154 atomic_t wc_num_reqs;
155 struct completion wc_io_complete; 155 struct completion wc_io_complete;
156 int wc_error;
156}; 157};
157 158
158static void o2hb_write_timeout(void *arg) 159static void o2hb_write_timeout(void *arg)
@@ -186,6 +187,7 @@ static inline void o2hb_bio_wait_init(struct o2hb_bio_wait_ctxt *wc,
186{ 187{
187 atomic_set(&wc->wc_num_reqs, num_ios); 188 atomic_set(&wc->wc_num_reqs, num_ios);
188 init_completion(&wc->wc_io_complete); 189 init_completion(&wc->wc_io_complete);
190 wc->wc_error = 0;
189} 191}
190 192
191/* Used in error paths too */ 193/* Used in error paths too */
@@ -218,8 +220,10 @@ static int o2hb_bio_end_io(struct bio *bio,
218{ 220{
219 struct o2hb_bio_wait_ctxt *wc = bio->bi_private; 221 struct o2hb_bio_wait_ctxt *wc = bio->bi_private;
220 222
221 if (error) 223 if (error) {
222 mlog(ML_ERROR, "IO Error %d\n", error); 224 mlog(ML_ERROR, "IO Error %d\n", error);
225 wc->wc_error = error;
226 }
223 227
224 if (bio->bi_size) 228 if (bio->bi_size)
225 return 1; 229 return 1;
@@ -390,6 +394,8 @@ static int o2hb_read_slots(struct o2hb_region *reg,
390 394
391bail_and_wait: 395bail_and_wait:
392 o2hb_wait_on_io(reg, &wc); 396 o2hb_wait_on_io(reg, &wc);
397 if (wc.wc_error && !status)
398 status = wc.wc_error;
393 399
394 if (bios) { 400 if (bios) {
395 for(i = 0; i < num_bios; i++) 401 for(i = 0; i < num_bios; i++)
@@ -790,20 +796,24 @@ static int o2hb_highest_node(unsigned long *nodes,
790 return highest; 796 return highest;
791} 797}
792 798
793static void o2hb_do_disk_heartbeat(struct o2hb_region *reg) 799static int o2hb_do_disk_heartbeat(struct o2hb_region *reg)
794{ 800{
795 int i, ret, highest_node, change = 0; 801 int i, ret, highest_node, change = 0;
796 unsigned long configured_nodes[BITS_TO_LONGS(O2NM_MAX_NODES)]; 802 unsigned long configured_nodes[BITS_TO_LONGS(O2NM_MAX_NODES)];
797 struct bio *write_bio; 803 struct bio *write_bio;
798 struct o2hb_bio_wait_ctxt write_wc; 804 struct o2hb_bio_wait_ctxt write_wc;
799 805
800 if (o2nm_configured_node_map(configured_nodes, sizeof(configured_nodes))) 806 ret = o2nm_configured_node_map(configured_nodes,
801 return; 807 sizeof(configured_nodes));
808 if (ret) {
809 mlog_errno(ret);
810 return ret;
811 }
802 812
803 highest_node = o2hb_highest_node(configured_nodes, O2NM_MAX_NODES); 813 highest_node = o2hb_highest_node(configured_nodes, O2NM_MAX_NODES);
804 if (highest_node >= O2NM_MAX_NODES) { 814 if (highest_node >= O2NM_MAX_NODES) {
805 mlog(ML_NOTICE, "ocfs2_heartbeat: no configured nodes found!\n"); 815 mlog(ML_NOTICE, "ocfs2_heartbeat: no configured nodes found!\n");
806 return; 816 return -EINVAL;
807 } 817 }
808 818
809 /* No sense in reading the slots of nodes that don't exist 819 /* No sense in reading the slots of nodes that don't exist
@@ -813,7 +823,7 @@ static void o2hb_do_disk_heartbeat(struct o2hb_region *reg)
813 ret = o2hb_read_slots(reg, highest_node + 1); 823 ret = o2hb_read_slots(reg, highest_node + 1);
814 if (ret < 0) { 824 if (ret < 0) {
815 mlog_errno(ret); 825 mlog_errno(ret);
816 return; 826 return ret;
817 } 827 }
818 828
819 /* With an up to date view of the slots, we can check that no 829 /* With an up to date view of the slots, we can check that no
@@ -831,7 +841,7 @@ static void o2hb_do_disk_heartbeat(struct o2hb_region *reg)
831 ret = o2hb_issue_node_write(reg, &write_bio, &write_wc); 841 ret = o2hb_issue_node_write(reg, &write_bio, &write_wc);
832 if (ret < 0) { 842 if (ret < 0) {
833 mlog_errno(ret); 843 mlog_errno(ret);
834 return; 844 return ret;
835 } 845 }
836 846
837 i = -1; 847 i = -1;
@@ -847,6 +857,15 @@ static void o2hb_do_disk_heartbeat(struct o2hb_region *reg)
847 */ 857 */
848 o2hb_wait_on_io(reg, &write_wc); 858 o2hb_wait_on_io(reg, &write_wc);
849 bio_put(write_bio); 859 bio_put(write_bio);
860 if (write_wc.wc_error) {
861 /* Do not re-arm the write timeout on I/O error - we
862 * can't be sure that the new block ever made it to
863 * disk */
864 mlog(ML_ERROR, "Write error %d on device \"%s\"\n",
865 write_wc.wc_error, reg->hr_dev_name);
866 return write_wc.wc_error;
867 }
868
850 o2hb_arm_write_timeout(reg); 869 o2hb_arm_write_timeout(reg);
851 870
852 /* let the person who launched us know when things are steady */ 871 /* let the person who launched us know when things are steady */
@@ -854,6 +873,8 @@ static void o2hb_do_disk_heartbeat(struct o2hb_region *reg)
854 if (atomic_dec_and_test(&reg->hr_steady_iterations)) 873 if (atomic_dec_and_test(&reg->hr_steady_iterations))
855 wake_up(&o2hb_steady_queue); 874 wake_up(&o2hb_steady_queue);
856 } 875 }
876
877 return 0;
857} 878}
858 879
859/* Subtract b from a, storing the result in a. a *must* have a larger 880/* Subtract b from a, storing the result in a. a *must* have a larger
@@ -913,7 +934,10 @@ static int o2hb_thread(void *data)
913 * likely to time itself out. */ 934 * likely to time itself out. */
914 do_gettimeofday(&before_hb); 935 do_gettimeofday(&before_hb);
915 936
916 o2hb_do_disk_heartbeat(reg); 937 i = 0;
938 do {
939 ret = o2hb_do_disk_heartbeat(reg);
940 } while (ret && ++i < 2);
917 941
918 do_gettimeofday(&after_hb); 942 do_gettimeofday(&after_hb);
919 elapsed_msec = o2hb_elapsed_msecs(&before_hb, &after_hb); 943 elapsed_msec = o2hb_elapsed_msecs(&before_hb, &after_hb);
diff --git a/fs/ocfs2/dlm/userdlm.c b/fs/ocfs2/dlm/userdlm.c
index c3764f4744ee..74ca4e5f9765 100644
--- a/fs/ocfs2/dlm/userdlm.c
+++ b/fs/ocfs2/dlm/userdlm.c
@@ -139,6 +139,10 @@ static void user_ast(void *opaque)
139 return; 139 return;
140 } 140 }
141 141
142 mlog_bug_on_msg(lockres->l_requested == LKM_IVMODE,
143 "Lockres %s, requested ivmode. flags 0x%x\n",
144 lockres->l_name, lockres->l_flags);
145
142 /* we're downconverting. */ 146 /* we're downconverting. */
143 if (lockres->l_requested < lockres->l_level) { 147 if (lockres->l_requested < lockres->l_level) {
144 if (lockres->l_requested <= 148 if (lockres->l_requested <=
@@ -229,23 +233,42 @@ static void user_unlock_ast(void *opaque, enum dlm_status status)
229 233
230 mlog(0, "UNLOCK AST called on lock %s\n", lockres->l_name); 234 mlog(0, "UNLOCK AST called on lock %s\n", lockres->l_name);
231 235
232 if (status != DLM_NORMAL) 236 if (status != DLM_NORMAL && status != DLM_CANCELGRANT)
233 mlog(ML_ERROR, "Dlm returns status %d\n", status); 237 mlog(ML_ERROR, "Dlm returns status %d\n", status);
234 238
235 spin_lock(&lockres->l_lock); 239 spin_lock(&lockres->l_lock);
236 if (lockres->l_flags & USER_LOCK_IN_TEARDOWN) 240 /* The teardown flag gets set early during the unlock process,
241 * so test the cancel flag to make sure that this ast isn't
242 * for a concurrent cancel. */
243 if (lockres->l_flags & USER_LOCK_IN_TEARDOWN
244 && !(lockres->l_flags & USER_LOCK_IN_CANCEL)) {
237 lockres->l_level = LKM_IVMODE; 245 lockres->l_level = LKM_IVMODE;
238 else { 246 } else if (status == DLM_CANCELGRANT) {
247 mlog(0, "Lock %s, cancel fails, flags 0x%x\n",
248 lockres->l_name, lockres->l_flags);
249 /* We tried to cancel a convert request, but it was
250 * already granted. Don't clear the busy flag - the
251 * ast should've done this already. */
252 BUG_ON(!(lockres->l_flags & USER_LOCK_IN_CANCEL));
253 lockres->l_flags &= ~USER_LOCK_IN_CANCEL;
254 goto out_noclear;
255 } else {
256 BUG_ON(!(lockres->l_flags & USER_LOCK_IN_CANCEL));
257 /* Cancel succeeded, we want to re-queue */
258 mlog(0, "Lock %s, cancel succeeds, flags 0x%x\n",
259 lockres->l_name, lockres->l_flags);
239 lockres->l_requested = LKM_IVMODE; /* cancel an 260 lockres->l_requested = LKM_IVMODE; /* cancel an
240 * upconvert 261 * upconvert
241 * request. */ 262 * request. */
242 lockres->l_flags &= ~USER_LOCK_IN_CANCEL; 263 lockres->l_flags &= ~USER_LOCK_IN_CANCEL;
243 /* we want the unblock thread to look at it again 264 /* we want the unblock thread to look at it again
244 * now. */ 265 * now. */
245 __user_dlm_queue_lockres(lockres); 266 if (lockres->l_flags & USER_LOCK_BLOCKED)
267 __user_dlm_queue_lockres(lockres);
246 } 268 }
247 269
248 lockres->l_flags &= ~USER_LOCK_BUSY; 270 lockres->l_flags &= ~USER_LOCK_BUSY;
271out_noclear:
249 spin_unlock(&lockres->l_lock); 272 spin_unlock(&lockres->l_lock);
250 273
251 wake_up(&lockres->l_event); 274 wake_up(&lockres->l_event);
@@ -268,13 +291,26 @@ static void user_dlm_unblock_lock(void *opaque)
268 291
269 spin_lock(&lockres->l_lock); 292 spin_lock(&lockres->l_lock);
270 293
271 BUG_ON(!(lockres->l_flags & USER_LOCK_BLOCKED)); 294 mlog_bug_on_msg(!(lockres->l_flags & USER_LOCK_QUEUED),
272 BUG_ON(!(lockres->l_flags & USER_LOCK_QUEUED)); 295 "Lockres %s, flags 0x%x\n",
296 lockres->l_name, lockres->l_flags);
273 297
274 /* notice that we don't clear USER_LOCK_BLOCKED here. That's 298 /* notice that we don't clear USER_LOCK_BLOCKED here. If it's
275 * for user_ast to do. */ 299 * set, we want user_ast clear it. */
276 lockres->l_flags &= ~USER_LOCK_QUEUED; 300 lockres->l_flags &= ~USER_LOCK_QUEUED;
277 301
302 /* It's valid to get here and no longer be blocked - if we get
303 * several basts in a row, we might be queued by the first
304 * one, the unblock thread might run and clear the queued
305 * flag, and finally we might get another bast which re-queues
306 * us before our ast for the downconvert is called. */
307 if (!(lockres->l_flags & USER_LOCK_BLOCKED)) {
308 mlog(0, "Lockres %s, flags 0x%x: queued but not blocking\n",
309 lockres->l_name, lockres->l_flags);
310 spin_unlock(&lockres->l_lock);
311 goto drop_ref;
312 }
313
278 if (lockres->l_flags & USER_LOCK_IN_TEARDOWN) { 314 if (lockres->l_flags & USER_LOCK_IN_TEARDOWN) {
279 mlog(0, "lock is in teardown so we do nothing\n"); 315 mlog(0, "lock is in teardown so we do nothing\n");
280 spin_unlock(&lockres->l_lock); 316 spin_unlock(&lockres->l_lock);
@@ -282,7 +318,9 @@ static void user_dlm_unblock_lock(void *opaque)
282 } 318 }
283 319
284 if (lockres->l_flags & USER_LOCK_BUSY) { 320 if (lockres->l_flags & USER_LOCK_BUSY) {
285 mlog(0, "BUSY flag detected...\n"); 321 mlog(0, "Cancel lock %s, flags 0x%x\n",
322 lockres->l_name, lockres->l_flags);
323
286 if (lockres->l_flags & USER_LOCK_IN_CANCEL) { 324 if (lockres->l_flags & USER_LOCK_IN_CANCEL) {
287 spin_unlock(&lockres->l_lock); 325 spin_unlock(&lockres->l_lock);
288 goto drop_ref; 326 goto drop_ref;
@@ -296,14 +334,7 @@ static void user_dlm_unblock_lock(void *opaque)
296 LKM_CANCEL, 334 LKM_CANCEL,
297 user_unlock_ast, 335 user_unlock_ast,
298 lockres); 336 lockres);
299 if (status == DLM_CANCELGRANT) { 337 if (status != DLM_NORMAL)
300 /* If we got this, then the ast was fired
301 * before we could cancel. We cleanup our
302 * state, and restart the function. */
303 spin_lock(&lockres->l_lock);
304 lockres->l_flags &= ~USER_LOCK_IN_CANCEL;
305 spin_unlock(&lockres->l_lock);
306 } else if (status != DLM_NORMAL)
307 user_log_dlm_error("dlmunlock", status, lockres); 338 user_log_dlm_error("dlmunlock", status, lockres);
308 goto drop_ref; 339 goto drop_ref;
309 } 340 }
@@ -581,6 +612,14 @@ int user_dlm_destroy_lock(struct user_lock_res *lockres)
581 mlog(0, "asked to destroy %s\n", lockres->l_name); 612 mlog(0, "asked to destroy %s\n", lockres->l_name);
582 613
583 spin_lock(&lockres->l_lock); 614 spin_lock(&lockres->l_lock);
615 if (lockres->l_flags & USER_LOCK_IN_TEARDOWN) {
616 mlog(0, "Lock is already torn down\n");
617 spin_unlock(&lockres->l_lock);
618 return 0;
619 }
620
621 lockres->l_flags |= USER_LOCK_IN_TEARDOWN;
622
584 while (lockres->l_flags & USER_LOCK_BUSY) { 623 while (lockres->l_flags & USER_LOCK_BUSY) {
585 spin_unlock(&lockres->l_lock); 624 spin_unlock(&lockres->l_lock);
586 625
@@ -606,7 +645,6 @@ int user_dlm_destroy_lock(struct user_lock_res *lockres)
606 645
607 lockres->l_flags &= ~USER_LOCK_ATTACHED; 646 lockres->l_flags &= ~USER_LOCK_ATTACHED;
608 lockres->l_flags |= USER_LOCK_BUSY; 647 lockres->l_flags |= USER_LOCK_BUSY;
609 lockres->l_flags |= USER_LOCK_IN_TEARDOWN;
610 spin_unlock(&lockres->l_lock); 648 spin_unlock(&lockres->l_lock);
611 649
612 mlog(0, "unlocking lockres %s\n", lockres->l_name); 650 mlog(0, "unlocking lockres %s\n", lockres->l_name);
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 34e903a6a46b..581eb451a41a 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -260,6 +260,17 @@ static int ocfs2_truncate_file(struct inode *inode,
260 if (new_i_size == le64_to_cpu(fe->i_size)) 260 if (new_i_size == le64_to_cpu(fe->i_size))
261 goto bail; 261 goto bail;
262 262
263 /* This forces other nodes to sync and drop their pages. Do
264 * this even if we have a truncate without allocation change -
265 * ocfs2 cluster sizes can be much greater than page size, so
266 * we have to truncate them anyway. */
267 status = ocfs2_data_lock(inode, 1);
268 if (status < 0) {
269 mlog_errno(status);
270 goto bail;
271 }
272 ocfs2_data_unlock(inode, 1);
273
263 if (le32_to_cpu(fe->i_clusters) == 274 if (le32_to_cpu(fe->i_clusters) ==
264 ocfs2_clusters_for_bytes(osb->sb, new_i_size)) { 275 ocfs2_clusters_for_bytes(osb->sb, new_i_size)) {
265 mlog(0, "fe->i_clusters = %u, so we do a simple truncate\n", 276 mlog(0, "fe->i_clusters = %u, so we do a simple truncate\n",
@@ -272,14 +283,6 @@ static int ocfs2_truncate_file(struct inode *inode,
272 goto bail; 283 goto bail;
273 } 284 }
274 285
275 /* This forces other nodes to sync and drop their pages */
276 status = ocfs2_data_lock(inode, 1);
277 if (status < 0) {
278 mlog_errno(status);
279 goto bail;
280 }
281 ocfs2_data_unlock(inode, 1);
282
283 /* alright, we're going to need to do a full blown alloc size 286 /* alright, we're going to need to do a full blown alloc size
284 * change. Orphan the inode so that recovery can complete the 287 * change. Orphan the inode so that recovery can complete the
285 * truncate if necessary. This does the task of marking 288 * truncate if necessary. This does the task of marking
diff --git a/fs/open.c b/fs/open.c
index c32c89d6d8db..53ec28c36777 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -331,7 +331,10 @@ out:
331 331
332asmlinkage long sys_ftruncate(unsigned int fd, unsigned long length) 332asmlinkage long sys_ftruncate(unsigned int fd, unsigned long length)
333{ 333{
334 return do_sys_ftruncate(fd, length, 1); 334 long ret = do_sys_ftruncate(fd, length, 1);
335 /* avoid REGPARM breakage on x86: */
336 prevent_tail_call(ret);
337 return ret;
335} 338}
336 339
337/* LFS versions of truncate are only needed on 32 bit machines */ 340/* LFS versions of truncate are only needed on 32 bit machines */
@@ -343,7 +346,10 @@ asmlinkage long sys_truncate64(const char __user * path, loff_t length)
343 346
344asmlinkage long sys_ftruncate64(unsigned int fd, loff_t length) 347asmlinkage long sys_ftruncate64(unsigned int fd, loff_t length)
345{ 348{
346 return do_sys_ftruncate(fd, length, 0); 349 long ret = do_sys_ftruncate(fd, length, 0);
350 /* avoid REGPARM breakage on x86: */
351 prevent_tail_call(ret);
352 return ret;
347} 353}
348#endif 354#endif
349 355
@@ -1093,20 +1099,30 @@ long do_sys_open(int dfd, const char __user *filename, int flags, int mode)
1093 1099
1094asmlinkage long sys_open(const char __user *filename, int flags, int mode) 1100asmlinkage long sys_open(const char __user *filename, int flags, int mode)
1095{ 1101{
1102 long ret;
1103
1096 if (force_o_largefile()) 1104 if (force_o_largefile())
1097 flags |= O_LARGEFILE; 1105 flags |= O_LARGEFILE;
1098 1106
1099 return do_sys_open(AT_FDCWD, filename, flags, mode); 1107 ret = do_sys_open(AT_FDCWD, filename, flags, mode);
1108 /* avoid REGPARM breakage on x86: */
1109 prevent_tail_call(ret);
1110 return ret;
1100} 1111}
1101EXPORT_SYMBOL_GPL(sys_open); 1112EXPORT_SYMBOL_GPL(sys_open);
1102 1113
1103asmlinkage long sys_openat(int dfd, const char __user *filename, int flags, 1114asmlinkage long sys_openat(int dfd, const char __user *filename, int flags,
1104 int mode) 1115 int mode)
1105{ 1116{
1117 long ret;
1118
1106 if (force_o_largefile()) 1119 if (force_o_largefile())
1107 flags |= O_LARGEFILE; 1120 flags |= O_LARGEFILE;
1108 1121
1109 return do_sys_open(dfd, filename, flags, mode); 1122 ret = do_sys_open(dfd, filename, flags, mode);
1123 /* avoid REGPARM breakage on x86: */
1124 prevent_tail_call(ret);
1125 return ret;
1110} 1126}
1111EXPORT_SYMBOL_GPL(sys_openat); 1127EXPORT_SYMBOL_GPL(sys_openat);
1112 1128
diff --git a/fs/partitions/check.c b/fs/partitions/check.c
index af0cb4b9e784..45ae7dd3c650 100644
--- a/fs/partitions/check.c
+++ b/fs/partitions/check.c
@@ -331,7 +331,9 @@ void delete_partition(struct gendisk *disk, int part)
331 devfs_remove("%s/part%d", disk->devfs_name, part); 331 devfs_remove("%s/part%d", disk->devfs_name, part);
332 if (p->holder_dir) 332 if (p->holder_dir)
333 kobject_unregister(p->holder_dir); 333 kobject_unregister(p->holder_dir);
334 kobject_unregister(&p->kobj); 334 kobject_uevent(&p->kobj, KOBJ_REMOVE);
335 kobject_del(&p->kobj);
336 kobject_put(&p->kobj);
335} 337}
336 338
337void add_partition(struct gendisk *disk, int part, sector_t start, sector_t len) 339void add_partition(struct gendisk *disk, int part, sector_t start, sector_t len)
@@ -357,7 +359,10 @@ void add_partition(struct gendisk *disk, int part, sector_t start, sector_t len)
357 snprintf(p->kobj.name,KOBJ_NAME_LEN,"%s%d",disk->kobj.name,part); 359 snprintf(p->kobj.name,KOBJ_NAME_LEN,"%s%d",disk->kobj.name,part);
358 p->kobj.parent = &disk->kobj; 360 p->kobj.parent = &disk->kobj;
359 p->kobj.ktype = &ktype_part; 361 p->kobj.ktype = &ktype_part;
360 kobject_register(&p->kobj); 362 kobject_init(&p->kobj);
363 kobject_add(&p->kobj);
364 if (!disk->part_uevent_suppress)
365 kobject_uevent(&p->kobj, KOBJ_ADD);
361 partition_sysfs_add_subdir(p); 366 partition_sysfs_add_subdir(p);
362 disk->part[part-1] = p; 367 disk->part[part-1] = p;
363} 368}
@@ -367,6 +372,7 @@ static char *make_block_name(struct gendisk *disk)
367 char *name; 372 char *name;
368 static char *block_str = "block:"; 373 static char *block_str = "block:";
369 int size; 374 int size;
375 char *s;
370 376
371 size = strlen(block_str) + strlen(disk->disk_name) + 1; 377 size = strlen(block_str) + strlen(disk->disk_name) + 1;
372 name = kmalloc(size, GFP_KERNEL); 378 name = kmalloc(size, GFP_KERNEL);
@@ -374,6 +380,10 @@ static char *make_block_name(struct gendisk *disk)
374 return NULL; 380 return NULL;
375 strcpy(name, block_str); 381 strcpy(name, block_str);
376 strcat(name, disk->disk_name); 382 strcat(name, disk->disk_name);
383 /* ewww... some of these buggers have / in name... */
384 s = strchr(name, '/');
385 if (s)
386 *s = '!';
377 return name; 387 return name;
378} 388}
379 389
@@ -395,6 +405,8 @@ void register_disk(struct gendisk *disk)
395{ 405{
396 struct block_device *bdev; 406 struct block_device *bdev;
397 char *s; 407 char *s;
408 int i;
409 struct hd_struct *p;
398 int err; 410 int err;
399 411
400 strlcpy(disk->kobj.name,disk->disk_name,KOBJ_NAME_LEN); 412 strlcpy(disk->kobj.name,disk->disk_name,KOBJ_NAME_LEN);
@@ -406,13 +418,12 @@ void register_disk(struct gendisk *disk)
406 return; 418 return;
407 disk_sysfs_symlinks(disk); 419 disk_sysfs_symlinks(disk);
408 disk_sysfs_add_subdirs(disk); 420 disk_sysfs_add_subdirs(disk);
409 kobject_uevent(&disk->kobj, KOBJ_ADD);
410 421
411 /* No minors to use for partitions */ 422 /* No minors to use for partitions */
412 if (disk->minors == 1) { 423 if (disk->minors == 1) {
413 if (disk->devfs_name[0] != '\0') 424 if (disk->devfs_name[0] != '\0')
414 devfs_add_disk(disk); 425 devfs_add_disk(disk);
415 return; 426 goto exit;
416 } 427 }
417 428
418 /* always add handle for the whole disk */ 429 /* always add handle for the whole disk */
@@ -420,16 +431,32 @@ void register_disk(struct gendisk *disk)
420 431
421 /* No such device (e.g., media were just removed) */ 432 /* No such device (e.g., media were just removed) */
422 if (!get_capacity(disk)) 433 if (!get_capacity(disk))
423 return; 434 goto exit;
424 435
425 bdev = bdget_disk(disk, 0); 436 bdev = bdget_disk(disk, 0);
426 if (!bdev) 437 if (!bdev)
427 return; 438 goto exit;
428 439
440 /* scan partition table, but suppress uevents */
429 bdev->bd_invalidated = 1; 441 bdev->bd_invalidated = 1;
430 if (blkdev_get(bdev, FMODE_READ, 0) < 0) 442 disk->part_uevent_suppress = 1;
431 return; 443 err = blkdev_get(bdev, FMODE_READ, 0);
444 disk->part_uevent_suppress = 0;
445 if (err < 0)
446 goto exit;
432 blkdev_put(bdev); 447 blkdev_put(bdev);
448
449exit:
450 /* announce disk after possible partitions are already created */
451 kobject_uevent(&disk->kobj, KOBJ_ADD);
452
453 /* announce possible partitions */
454 for (i = 1; i < disk->minors; i++) {
455 p = disk->part[i-1];
456 if (!p || !p->nr_sects)
457 continue;
458 kobject_uevent(&p->kobj, KOBJ_ADD);
459 }
433} 460}
434 461
435int rescan_partitions(struct gendisk *disk, struct block_device *bdev) 462int rescan_partitions(struct gendisk *disk, struct block_device *bdev)
diff --git a/fs/pipe.c b/fs/pipe.c
index 795df987cd38..7fefb10db8d9 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -36,7 +36,7 @@
36 */ 36 */
37 37
38/* Drop the inode semaphore and wait for a pipe event, atomically */ 38/* Drop the inode semaphore and wait for a pipe event, atomically */
39void pipe_wait(struct inode * inode) 39void pipe_wait(struct pipe_inode_info *pipe)
40{ 40{
41 DEFINE_WAIT(wait); 41 DEFINE_WAIT(wait);
42 42
@@ -44,11 +44,14 @@ void pipe_wait(struct inode * inode)
44 * Pipes are system-local resources, so sleeping on them 44 * Pipes are system-local resources, so sleeping on them
45 * is considered a noninteractive wait: 45 * is considered a noninteractive wait:
46 */ 46 */
47 prepare_to_wait(PIPE_WAIT(*inode), &wait, TASK_INTERRUPTIBLE|TASK_NONINTERACTIVE); 47 prepare_to_wait(&pipe->wait, &wait,
48 mutex_unlock(PIPE_MUTEX(*inode)); 48 TASK_INTERRUPTIBLE | TASK_NONINTERACTIVE);
49 if (pipe->inode)
50 mutex_unlock(&pipe->inode->i_mutex);
49 schedule(); 51 schedule();
50 finish_wait(PIPE_WAIT(*inode), &wait); 52 finish_wait(&pipe->wait, &wait);
51 mutex_lock(PIPE_MUTEX(*inode)); 53 if (pipe->inode)
54 mutex_lock(&pipe->inode->i_mutex);
52} 55}
53 56
54static int 57static int
@@ -91,7 +94,8 @@ pipe_iov_copy_to_user(struct iovec *iov, const void *from, unsigned long len)
91 return 0; 94 return 0;
92} 95}
93 96
94static void anon_pipe_buf_release(struct pipe_inode_info *info, struct pipe_buffer *buf) 97static void anon_pipe_buf_release(struct pipe_inode_info *pipe,
98 struct pipe_buffer *buf)
95{ 99{
96 struct page *page = buf->page; 100 struct page *page = buf->page;
97 101
@@ -100,42 +104,46 @@ static void anon_pipe_buf_release(struct pipe_inode_info *info, struct pipe_buff
100 /* 104 /*
101 * If nobody else uses this page, and we don't already have a 105 * If nobody else uses this page, and we don't already have a
102 * temporary page, let's keep track of it as a one-deep 106 * temporary page, let's keep track of it as a one-deep
103 * allocation cache 107 * allocation cache. (Otherwise just release our reference to it)
104 */ 108 */
105 if (page_count(page) == 1 && !info->tmp_page) { 109 if (page_count(page) == 1 && !pipe->tmp_page)
106 info->tmp_page = page; 110 pipe->tmp_page = page;
107 return; 111 else
108 } 112 page_cache_release(page);
109
110 /*
111 * Otherwise just release our reference to it
112 */
113 page_cache_release(page);
114} 113}
115 114
116static void *anon_pipe_buf_map(struct file *file, struct pipe_inode_info *info, struct pipe_buffer *buf) 115static void * anon_pipe_buf_map(struct file *file, struct pipe_inode_info *pipe,
116 struct pipe_buffer *buf)
117{ 117{
118 return kmap(buf->page); 118 return kmap(buf->page);
119} 119}
120 120
121static void anon_pipe_buf_unmap(struct pipe_inode_info *info, struct pipe_buffer *buf) 121static void anon_pipe_buf_unmap(struct pipe_inode_info *pipe,
122 struct pipe_buffer *buf)
122{ 123{
123 kunmap(buf->page); 124 kunmap(buf->page);
124} 125}
125 126
126static int anon_pipe_buf_steal(struct pipe_inode_info *info, 127static int anon_pipe_buf_steal(struct pipe_inode_info *pipe,
127 struct pipe_buffer *buf) 128 struct pipe_buffer *buf)
128{ 129{
129 buf->flags |= PIPE_BUF_FLAG_STOLEN; 130 buf->flags |= PIPE_BUF_FLAG_STOLEN;
130 return 0; 131 return 0;
131} 132}
132 133
134static void anon_pipe_buf_get(struct pipe_inode_info *info,
135 struct pipe_buffer *buf)
136{
137 page_cache_get(buf->page);
138}
139
133static struct pipe_buf_operations anon_pipe_buf_ops = { 140static struct pipe_buf_operations anon_pipe_buf_ops = {
134 .can_merge = 1, 141 .can_merge = 1,
135 .map = anon_pipe_buf_map, 142 .map = anon_pipe_buf_map,
136 .unmap = anon_pipe_buf_unmap, 143 .unmap = anon_pipe_buf_unmap,
137 .release = anon_pipe_buf_release, 144 .release = anon_pipe_buf_release,
138 .steal = anon_pipe_buf_steal, 145 .steal = anon_pipe_buf_steal,
146 .get = anon_pipe_buf_get,
139}; 147};
140 148
141static ssize_t 149static ssize_t
@@ -143,7 +151,7 @@ pipe_readv(struct file *filp, const struct iovec *_iov,
143 unsigned long nr_segs, loff_t *ppos) 151 unsigned long nr_segs, loff_t *ppos)
144{ 152{
145 struct inode *inode = filp->f_dentry->d_inode; 153 struct inode *inode = filp->f_dentry->d_inode;
146 struct pipe_inode_info *info; 154 struct pipe_inode_info *pipe;
147 int do_wakeup; 155 int do_wakeup;
148 ssize_t ret; 156 ssize_t ret;
149 struct iovec *iov = (struct iovec *)_iov; 157 struct iovec *iov = (struct iovec *)_iov;
@@ -156,13 +164,13 @@ pipe_readv(struct file *filp, const struct iovec *_iov,
156 164
157 do_wakeup = 0; 165 do_wakeup = 0;
158 ret = 0; 166 ret = 0;
159 mutex_lock(PIPE_MUTEX(*inode)); 167 mutex_lock(&inode->i_mutex);
160 info = inode->i_pipe; 168 pipe = inode->i_pipe;
161 for (;;) { 169 for (;;) {
162 int bufs = info->nrbufs; 170 int bufs = pipe->nrbufs;
163 if (bufs) { 171 if (bufs) {
164 int curbuf = info->curbuf; 172 int curbuf = pipe->curbuf;
165 struct pipe_buffer *buf = info->bufs + curbuf; 173 struct pipe_buffer *buf = pipe->bufs + curbuf;
166 struct pipe_buf_operations *ops = buf->ops; 174 struct pipe_buf_operations *ops = buf->ops;
167 void *addr; 175 void *addr;
168 size_t chars = buf->len; 176 size_t chars = buf->len;
@@ -171,16 +179,17 @@ pipe_readv(struct file *filp, const struct iovec *_iov,
171 if (chars > total_len) 179 if (chars > total_len)
172 chars = total_len; 180 chars = total_len;
173 181
174 addr = ops->map(filp, info, buf); 182 addr = ops->map(filp, pipe, buf);
175 if (IS_ERR(addr)) { 183 if (IS_ERR(addr)) {
176 if (!ret) 184 if (!ret)
177 ret = PTR_ERR(addr); 185 ret = PTR_ERR(addr);
178 break; 186 break;
179 } 187 }
180 error = pipe_iov_copy_to_user(iov, addr + buf->offset, chars); 188 error = pipe_iov_copy_to_user(iov, addr + buf->offset, chars);
181 ops->unmap(info, buf); 189 ops->unmap(pipe, buf);
182 if (unlikely(error)) { 190 if (unlikely(error)) {
183 if (!ret) ret = -EFAULT; 191 if (!ret)
192 ret = -EFAULT;
184 break; 193 break;
185 } 194 }
186 ret += chars; 195 ret += chars;
@@ -188,10 +197,10 @@ pipe_readv(struct file *filp, const struct iovec *_iov,
188 buf->len -= chars; 197 buf->len -= chars;
189 if (!buf->len) { 198 if (!buf->len) {
190 buf->ops = NULL; 199 buf->ops = NULL;
191 ops->release(info, buf); 200 ops->release(pipe, buf);
192 curbuf = (curbuf + 1) & (PIPE_BUFFERS-1); 201 curbuf = (curbuf + 1) & (PIPE_BUFFERS-1);
193 info->curbuf = curbuf; 202 pipe->curbuf = curbuf;
194 info->nrbufs = --bufs; 203 pipe->nrbufs = --bufs;
195 do_wakeup = 1; 204 do_wakeup = 1;
196 } 205 }
197 total_len -= chars; 206 total_len -= chars;
@@ -200,9 +209,9 @@ pipe_readv(struct file *filp, const struct iovec *_iov,
200 } 209 }
201 if (bufs) /* More to do? */ 210 if (bufs) /* More to do? */
202 continue; 211 continue;
203 if (!PIPE_WRITERS(*inode)) 212 if (!pipe->writers)
204 break; 213 break;
205 if (!PIPE_WAITING_WRITERS(*inode)) { 214 if (!pipe->waiting_writers) {
206 /* syscall merging: Usually we must not sleep 215 /* syscall merging: Usually we must not sleep
207 * if O_NONBLOCK is set, or if we got some data. 216 * if O_NONBLOCK is set, or if we got some data.
208 * But if a writer sleeps in kernel space, then 217 * But if a writer sleeps in kernel space, then
@@ -216,20 +225,22 @@ pipe_readv(struct file *filp, const struct iovec *_iov,
216 } 225 }
217 } 226 }
218 if (signal_pending(current)) { 227 if (signal_pending(current)) {
219 if (!ret) ret = -ERESTARTSYS; 228 if (!ret)
229 ret = -ERESTARTSYS;
220 break; 230 break;
221 } 231 }
222 if (do_wakeup) { 232 if (do_wakeup) {
223 wake_up_interruptible_sync(PIPE_WAIT(*inode)); 233 wake_up_interruptible_sync(&pipe->wait);
224 kill_fasync(PIPE_FASYNC_WRITERS(*inode), SIGIO, POLL_OUT); 234 kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT);
225 } 235 }
226 pipe_wait(inode); 236 pipe_wait(pipe);
227 } 237 }
228 mutex_unlock(PIPE_MUTEX(*inode)); 238 mutex_unlock(&inode->i_mutex);
229 /* Signal writers asynchronously that there is more room. */ 239
240 /* Signal writers asynchronously that there is more room. */
230 if (do_wakeup) { 241 if (do_wakeup) {
231 wake_up_interruptible(PIPE_WAIT(*inode)); 242 wake_up_interruptible(&pipe->wait);
232 kill_fasync(PIPE_FASYNC_WRITERS(*inode), SIGIO, POLL_OUT); 243 kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT);
233 } 244 }
234 if (ret > 0) 245 if (ret > 0)
235 file_accessed(filp); 246 file_accessed(filp);
@@ -240,6 +251,7 @@ static ssize_t
240pipe_read(struct file *filp, char __user *buf, size_t count, loff_t *ppos) 251pipe_read(struct file *filp, char __user *buf, size_t count, loff_t *ppos)
241{ 252{
242 struct iovec iov = { .iov_base = buf, .iov_len = count }; 253 struct iovec iov = { .iov_base = buf, .iov_len = count };
254
243 return pipe_readv(filp, &iov, 1, ppos); 255 return pipe_readv(filp, &iov, 1, ppos);
244} 256}
245 257
@@ -248,7 +260,7 @@ pipe_writev(struct file *filp, const struct iovec *_iov,
248 unsigned long nr_segs, loff_t *ppos) 260 unsigned long nr_segs, loff_t *ppos)
249{ 261{
250 struct inode *inode = filp->f_dentry->d_inode; 262 struct inode *inode = filp->f_dentry->d_inode;
251 struct pipe_inode_info *info; 263 struct pipe_inode_info *pipe;
252 ssize_t ret; 264 ssize_t ret;
253 int do_wakeup; 265 int do_wakeup;
254 struct iovec *iov = (struct iovec *)_iov; 266 struct iovec *iov = (struct iovec *)_iov;
@@ -262,10 +274,10 @@ pipe_writev(struct file *filp, const struct iovec *_iov,
262 274
263 do_wakeup = 0; 275 do_wakeup = 0;
264 ret = 0; 276 ret = 0;
265 mutex_lock(PIPE_MUTEX(*inode)); 277 mutex_lock(&inode->i_mutex);
266 info = inode->i_pipe; 278 pipe = inode->i_pipe;
267 279
268 if (!PIPE_READERS(*inode)) { 280 if (!pipe->readers) {
269 send_sig(SIGPIPE, current, 0); 281 send_sig(SIGPIPE, current, 0);
270 ret = -EPIPE; 282 ret = -EPIPE;
271 goto out; 283 goto out;
@@ -273,23 +285,25 @@ pipe_writev(struct file *filp, const struct iovec *_iov,
273 285
274 /* We try to merge small writes */ 286 /* We try to merge small writes */
275 chars = total_len & (PAGE_SIZE-1); /* size of the last buffer */ 287 chars = total_len & (PAGE_SIZE-1); /* size of the last buffer */
276 if (info->nrbufs && chars != 0) { 288 if (pipe->nrbufs && chars != 0) {
277 int lastbuf = (info->curbuf + info->nrbufs - 1) & (PIPE_BUFFERS-1); 289 int lastbuf = (pipe->curbuf + pipe->nrbufs - 1) &
278 struct pipe_buffer *buf = info->bufs + lastbuf; 290 (PIPE_BUFFERS-1);
291 struct pipe_buffer *buf = pipe->bufs + lastbuf;
279 struct pipe_buf_operations *ops = buf->ops; 292 struct pipe_buf_operations *ops = buf->ops;
280 int offset = buf->offset + buf->len; 293 int offset = buf->offset + buf->len;
294
281 if (ops->can_merge && offset + chars <= PAGE_SIZE) { 295 if (ops->can_merge && offset + chars <= PAGE_SIZE) {
282 void *addr; 296 void *addr;
283 int error; 297 int error;
284 298
285 addr = ops->map(filp, info, buf); 299 addr = ops->map(filp, pipe, buf);
286 if (IS_ERR(addr)) { 300 if (IS_ERR(addr)) {
287 error = PTR_ERR(addr); 301 error = PTR_ERR(addr);
288 goto out; 302 goto out;
289 } 303 }
290 error = pipe_iov_copy_from_user(offset + addr, iov, 304 error = pipe_iov_copy_from_user(offset + addr, iov,
291 chars); 305 chars);
292 ops->unmap(info, buf); 306 ops->unmap(pipe, buf);
293 ret = error; 307 ret = error;
294 do_wakeup = 1; 308 do_wakeup = 1;
295 if (error) 309 if (error)
@@ -304,16 +318,18 @@ pipe_writev(struct file *filp, const struct iovec *_iov,
304 318
305 for (;;) { 319 for (;;) {
306 int bufs; 320 int bufs;
307 if (!PIPE_READERS(*inode)) { 321
322 if (!pipe->readers) {
308 send_sig(SIGPIPE, current, 0); 323 send_sig(SIGPIPE, current, 0);
309 if (!ret) ret = -EPIPE; 324 if (!ret)
325 ret = -EPIPE;
310 break; 326 break;
311 } 327 }
312 bufs = info->nrbufs; 328 bufs = pipe->nrbufs;
313 if (bufs < PIPE_BUFFERS) { 329 if (bufs < PIPE_BUFFERS) {
314 int newbuf = (info->curbuf + bufs) & (PIPE_BUFFERS-1); 330 int newbuf = (pipe->curbuf + bufs) & (PIPE_BUFFERS-1);
315 struct pipe_buffer *buf = info->bufs + newbuf; 331 struct pipe_buffer *buf = pipe->bufs + newbuf;
316 struct page *page = info->tmp_page; 332 struct page *page = pipe->tmp_page;
317 int error; 333 int error;
318 334
319 if (!page) { 335 if (!page) {
@@ -322,9 +338,9 @@ pipe_writev(struct file *filp, const struct iovec *_iov,
322 ret = ret ? : -ENOMEM; 338 ret = ret ? : -ENOMEM;
323 break; 339 break;
324 } 340 }
325 info->tmp_page = page; 341 pipe->tmp_page = page;
326 } 342 }
327 /* Always wakeup, even if the copy fails. Otherwise 343 /* Always wake up, even if the copy fails. Otherwise
328 * we lock up (O_NONBLOCK-)readers that sleep due to 344 * we lock up (O_NONBLOCK-)readers that sleep due to
329 * syscall merging. 345 * syscall merging.
330 * FIXME! Is this really true? 346 * FIXME! Is this really true?
@@ -337,7 +353,8 @@ pipe_writev(struct file *filp, const struct iovec *_iov,
337 error = pipe_iov_copy_from_user(kmap(page), iov, chars); 353 error = pipe_iov_copy_from_user(kmap(page), iov, chars);
338 kunmap(page); 354 kunmap(page);
339 if (unlikely(error)) { 355 if (unlikely(error)) {
340 if (!ret) ret = -EFAULT; 356 if (!ret)
357 ret = -EFAULT;
341 break; 358 break;
342 } 359 }
343 ret += chars; 360 ret += chars;
@@ -347,8 +364,8 @@ pipe_writev(struct file *filp, const struct iovec *_iov,
347 buf->ops = &anon_pipe_buf_ops; 364 buf->ops = &anon_pipe_buf_ops;
348 buf->offset = 0; 365 buf->offset = 0;
349 buf->len = chars; 366 buf->len = chars;
350 info->nrbufs = ++bufs; 367 pipe->nrbufs = ++bufs;
351 info->tmp_page = NULL; 368 pipe->tmp_page = NULL;
352 369
353 total_len -= chars; 370 total_len -= chars;
354 if (!total_len) 371 if (!total_len)
@@ -357,27 +374,29 @@ pipe_writev(struct file *filp, const struct iovec *_iov,
357 if (bufs < PIPE_BUFFERS) 374 if (bufs < PIPE_BUFFERS)
358 continue; 375 continue;
359 if (filp->f_flags & O_NONBLOCK) { 376 if (filp->f_flags & O_NONBLOCK) {
360 if (!ret) ret = -EAGAIN; 377 if (!ret)
378 ret = -EAGAIN;
361 break; 379 break;
362 } 380 }
363 if (signal_pending(current)) { 381 if (signal_pending(current)) {
364 if (!ret) ret = -ERESTARTSYS; 382 if (!ret)
383 ret = -ERESTARTSYS;
365 break; 384 break;
366 } 385 }
367 if (do_wakeup) { 386 if (do_wakeup) {
368 wake_up_interruptible_sync(PIPE_WAIT(*inode)); 387 wake_up_interruptible_sync(&pipe->wait);
369 kill_fasync(PIPE_FASYNC_READERS(*inode), SIGIO, POLL_IN); 388 kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
370 do_wakeup = 0; 389 do_wakeup = 0;
371 } 390 }
372 PIPE_WAITING_WRITERS(*inode)++; 391 pipe->waiting_writers++;
373 pipe_wait(inode); 392 pipe_wait(pipe);
374 PIPE_WAITING_WRITERS(*inode)--; 393 pipe->waiting_writers--;
375 } 394 }
376out: 395out:
377 mutex_unlock(PIPE_MUTEX(*inode)); 396 mutex_unlock(&inode->i_mutex);
378 if (do_wakeup) { 397 if (do_wakeup) {
379 wake_up_interruptible(PIPE_WAIT(*inode)); 398 wake_up_interruptible(&pipe->wait);
380 kill_fasync(PIPE_FASYNC_READERS(*inode), SIGIO, POLL_IN); 399 kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
381 } 400 }
382 if (ret > 0) 401 if (ret > 0)
383 file_update_time(filp); 402 file_update_time(filp);
@@ -389,6 +408,7 @@ pipe_write(struct file *filp, const char __user *buf,
389 size_t count, loff_t *ppos) 408 size_t count, loff_t *ppos)
390{ 409{
391 struct iovec iov = { .iov_base = (void __user *)buf, .iov_len = count }; 410 struct iovec iov = { .iov_base = (void __user *)buf, .iov_len = count };
411
392 return pipe_writev(filp, &iov, 1, ppos); 412 return pipe_writev(filp, &iov, 1, ppos);
393} 413}
394 414
@@ -399,7 +419,8 @@ bad_pipe_r(struct file *filp, char __user *buf, size_t count, loff_t *ppos)
399} 419}
400 420
401static ssize_t 421static ssize_t
402bad_pipe_w(struct file *filp, const char __user *buf, size_t count, loff_t *ppos) 422bad_pipe_w(struct file *filp, const char __user *buf, size_t count,
423 loff_t *ppos)
403{ 424{
404 return -EBADF; 425 return -EBADF;
405} 426}
@@ -409,21 +430,22 @@ pipe_ioctl(struct inode *pino, struct file *filp,
409 unsigned int cmd, unsigned long arg) 430 unsigned int cmd, unsigned long arg)
410{ 431{
411 struct inode *inode = filp->f_dentry->d_inode; 432 struct inode *inode = filp->f_dentry->d_inode;
412 struct pipe_inode_info *info; 433 struct pipe_inode_info *pipe;
413 int count, buf, nrbufs; 434 int count, buf, nrbufs;
414 435
415 switch (cmd) { 436 switch (cmd) {
416 case FIONREAD: 437 case FIONREAD:
417 mutex_lock(PIPE_MUTEX(*inode)); 438 mutex_lock(&inode->i_mutex);
418 info = inode->i_pipe; 439 pipe = inode->i_pipe;
419 count = 0; 440 count = 0;
420 buf = info->curbuf; 441 buf = pipe->curbuf;
421 nrbufs = info->nrbufs; 442 nrbufs = pipe->nrbufs;
422 while (--nrbufs >= 0) { 443 while (--nrbufs >= 0) {
423 count += info->bufs[buf].len; 444 count += pipe->bufs[buf].len;
424 buf = (buf+1) & (PIPE_BUFFERS-1); 445 buf = (buf+1) & (PIPE_BUFFERS-1);
425 } 446 }
426 mutex_unlock(PIPE_MUTEX(*inode)); 447 mutex_unlock(&inode->i_mutex);
448
427 return put_user(count, (int __user *)arg); 449 return put_user(count, (int __user *)arg);
428 default: 450 default:
429 return -EINVAL; 451 return -EINVAL;
@@ -436,17 +458,17 @@ pipe_poll(struct file *filp, poll_table *wait)
436{ 458{
437 unsigned int mask; 459 unsigned int mask;
438 struct inode *inode = filp->f_dentry->d_inode; 460 struct inode *inode = filp->f_dentry->d_inode;
439 struct pipe_inode_info *info = inode->i_pipe; 461 struct pipe_inode_info *pipe = inode->i_pipe;
440 int nrbufs; 462 int nrbufs;
441 463
442 poll_wait(filp, PIPE_WAIT(*inode), wait); 464 poll_wait(filp, &pipe->wait, wait);
443 465
444 /* Reading only -- no need for acquiring the semaphore. */ 466 /* Reading only -- no need for acquiring the semaphore. */
445 nrbufs = info->nrbufs; 467 nrbufs = pipe->nrbufs;
446 mask = 0; 468 mask = 0;
447 if (filp->f_mode & FMODE_READ) { 469 if (filp->f_mode & FMODE_READ) {
448 mask = (nrbufs > 0) ? POLLIN | POLLRDNORM : 0; 470 mask = (nrbufs > 0) ? POLLIN | POLLRDNORM : 0;
449 if (!PIPE_WRITERS(*inode) && filp->f_version != PIPE_WCOUNTER(*inode)) 471 if (!pipe->writers && filp->f_version != pipe->w_counter)
450 mask |= POLLHUP; 472 mask |= POLLHUP;
451 } 473 }
452 474
@@ -456,7 +478,7 @@ pipe_poll(struct file *filp, poll_table *wait)
456 * Most Unices do not set POLLERR for FIFOs but on Linux they 478 * Most Unices do not set POLLERR for FIFOs but on Linux they
457 * behave exactly like pipes for poll(). 479 * behave exactly like pipes for poll().
458 */ 480 */
459 if (!PIPE_READERS(*inode)) 481 if (!pipe->readers)
460 mask |= POLLERR; 482 mask |= POLLERR;
461 } 483 }
462 484
@@ -466,17 +488,21 @@ pipe_poll(struct file *filp, poll_table *wait)
466static int 488static int
467pipe_release(struct inode *inode, int decr, int decw) 489pipe_release(struct inode *inode, int decr, int decw)
468{ 490{
469 mutex_lock(PIPE_MUTEX(*inode)); 491 struct pipe_inode_info *pipe;
470 PIPE_READERS(*inode) -= decr; 492
471 PIPE_WRITERS(*inode) -= decw; 493 mutex_lock(&inode->i_mutex);
472 if (!PIPE_READERS(*inode) && !PIPE_WRITERS(*inode)) { 494 pipe = inode->i_pipe;
495 pipe->readers -= decr;
496 pipe->writers -= decw;
497
498 if (!pipe->readers && !pipe->writers) {
473 free_pipe_info(inode); 499 free_pipe_info(inode);
474 } else { 500 } else {
475 wake_up_interruptible(PIPE_WAIT(*inode)); 501 wake_up_interruptible(&pipe->wait);
476 kill_fasync(PIPE_FASYNC_READERS(*inode), SIGIO, POLL_IN); 502 kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
477 kill_fasync(PIPE_FASYNC_WRITERS(*inode), SIGIO, POLL_OUT); 503 kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT);
478 } 504 }
479 mutex_unlock(PIPE_MUTEX(*inode)); 505 mutex_unlock(&inode->i_mutex);
480 506
481 return 0; 507 return 0;
482} 508}
@@ -487,9 +513,9 @@ pipe_read_fasync(int fd, struct file *filp, int on)
487 struct inode *inode = filp->f_dentry->d_inode; 513 struct inode *inode = filp->f_dentry->d_inode;
488 int retval; 514 int retval;
489 515
490 mutex_lock(PIPE_MUTEX(*inode)); 516 mutex_lock(&inode->i_mutex);
491 retval = fasync_helper(fd, filp, on, PIPE_FASYNC_READERS(*inode)); 517 retval = fasync_helper(fd, filp, on, &inode->i_pipe->fasync_readers);
492 mutex_unlock(PIPE_MUTEX(*inode)); 518 mutex_unlock(&inode->i_mutex);
493 519
494 if (retval < 0) 520 if (retval < 0)
495 return retval; 521 return retval;
@@ -504,9 +530,9 @@ pipe_write_fasync(int fd, struct file *filp, int on)
504 struct inode *inode = filp->f_dentry->d_inode; 530 struct inode *inode = filp->f_dentry->d_inode;
505 int retval; 531 int retval;
506 532
507 mutex_lock(PIPE_MUTEX(*inode)); 533 mutex_lock(&inode->i_mutex);
508 retval = fasync_helper(fd, filp, on, PIPE_FASYNC_WRITERS(*inode)); 534 retval = fasync_helper(fd, filp, on, &inode->i_pipe->fasync_writers);
509 mutex_unlock(PIPE_MUTEX(*inode)); 535 mutex_unlock(&inode->i_mutex);
510 536
511 if (retval < 0) 537 if (retval < 0)
512 return retval; 538 return retval;
@@ -519,16 +545,17 @@ static int
519pipe_rdwr_fasync(int fd, struct file *filp, int on) 545pipe_rdwr_fasync(int fd, struct file *filp, int on)
520{ 546{
521 struct inode *inode = filp->f_dentry->d_inode; 547 struct inode *inode = filp->f_dentry->d_inode;
548 struct pipe_inode_info *pipe = inode->i_pipe;
522 int retval; 549 int retval;
523 550
524 mutex_lock(PIPE_MUTEX(*inode)); 551 mutex_lock(&inode->i_mutex);
525 552
526 retval = fasync_helper(fd, filp, on, PIPE_FASYNC_READERS(*inode)); 553 retval = fasync_helper(fd, filp, on, &pipe->fasync_readers);
527 554
528 if (retval >= 0) 555 if (retval >= 0)
529 retval = fasync_helper(fd, filp, on, PIPE_FASYNC_WRITERS(*inode)); 556 retval = fasync_helper(fd, filp, on, &pipe->fasync_writers);
530 557
531 mutex_unlock(PIPE_MUTEX(*inode)); 558 mutex_unlock(&inode->i_mutex);
532 559
533 if (retval < 0) 560 if (retval < 0)
534 return retval; 561 return retval;
@@ -567,9 +594,9 @@ pipe_read_open(struct inode *inode, struct file *filp)
567{ 594{
568 /* We could have perhaps used atomic_t, but this and friends 595 /* We could have perhaps used atomic_t, but this and friends
569 below are the only places. So it doesn't seem worthwhile. */ 596 below are the only places. So it doesn't seem worthwhile. */
570 mutex_lock(PIPE_MUTEX(*inode)); 597 mutex_lock(&inode->i_mutex);
571 PIPE_READERS(*inode)++; 598 inode->i_pipe->readers++;
572 mutex_unlock(PIPE_MUTEX(*inode)); 599 mutex_unlock(&inode->i_mutex);
573 600
574 return 0; 601 return 0;
575} 602}
@@ -577,9 +604,9 @@ pipe_read_open(struct inode *inode, struct file *filp)
577static int 604static int
578pipe_write_open(struct inode *inode, struct file *filp) 605pipe_write_open(struct inode *inode, struct file *filp)
579{ 606{
580 mutex_lock(PIPE_MUTEX(*inode)); 607 mutex_lock(&inode->i_mutex);
581 PIPE_WRITERS(*inode)++; 608 inode->i_pipe->writers++;
582 mutex_unlock(PIPE_MUTEX(*inode)); 609 mutex_unlock(&inode->i_mutex);
583 610
584 return 0; 611 return 0;
585} 612}
@@ -587,12 +614,12 @@ pipe_write_open(struct inode *inode, struct file *filp)
587static int 614static int
588pipe_rdwr_open(struct inode *inode, struct file *filp) 615pipe_rdwr_open(struct inode *inode, struct file *filp)
589{ 616{
590 mutex_lock(PIPE_MUTEX(*inode)); 617 mutex_lock(&inode->i_mutex);
591 if (filp->f_mode & FMODE_READ) 618 if (filp->f_mode & FMODE_READ)
592 PIPE_READERS(*inode)++; 619 inode->i_pipe->readers++;
593 if (filp->f_mode & FMODE_WRITE) 620 if (filp->f_mode & FMODE_WRITE)
594 PIPE_WRITERS(*inode)++; 621 inode->i_pipe->writers++;
595 mutex_unlock(PIPE_MUTEX(*inode)); 622 mutex_unlock(&inode->i_mutex);
596 623
597 return 0; 624 return 0;
598} 625}
@@ -675,37 +702,38 @@ static struct file_operations rdwr_pipe_fops = {
675 .fasync = pipe_rdwr_fasync, 702 .fasync = pipe_rdwr_fasync,
676}; 703};
677 704
678void free_pipe_info(struct inode *inode) 705struct pipe_inode_info * alloc_pipe_info(struct inode *inode)
706{
707 struct pipe_inode_info *pipe;
708
709 pipe = kzalloc(sizeof(struct pipe_inode_info), GFP_KERNEL);
710 if (pipe) {
711 init_waitqueue_head(&pipe->wait);
712 pipe->r_counter = pipe->w_counter = 1;
713 pipe->inode = inode;
714 }
715
716 return pipe;
717}
718
719void __free_pipe_info(struct pipe_inode_info *pipe)
679{ 720{
680 int i; 721 int i;
681 struct pipe_inode_info *info = inode->i_pipe;
682 722
683 inode->i_pipe = NULL;
684 for (i = 0; i < PIPE_BUFFERS; i++) { 723 for (i = 0; i < PIPE_BUFFERS; i++) {
685 struct pipe_buffer *buf = info->bufs + i; 724 struct pipe_buffer *buf = pipe->bufs + i;
686 if (buf->ops) 725 if (buf->ops)
687 buf->ops->release(info, buf); 726 buf->ops->release(pipe, buf);
688 } 727 }
689 if (info->tmp_page) 728 if (pipe->tmp_page)
690 __free_page(info->tmp_page); 729 __free_page(pipe->tmp_page);
691 kfree(info); 730 kfree(pipe);
692} 731}
693 732
694struct inode* pipe_new(struct inode* inode) 733void free_pipe_info(struct inode *inode)
695{ 734{
696 struct pipe_inode_info *info; 735 __free_pipe_info(inode->i_pipe);
697 736 inode->i_pipe = NULL;
698 info = kzalloc(sizeof(struct pipe_inode_info), GFP_KERNEL);
699 if (!info)
700 goto fail_page;
701 inode->i_pipe = info;
702
703 init_waitqueue_head(PIPE_WAIT(*inode));
704 PIPE_RCOUNTER(*inode) = PIPE_WCOUNTER(*inode) = 1;
705
706 return inode;
707fail_page:
708 return NULL;
709} 737}
710 738
711static struct vfsmount *pipe_mnt __read_mostly; 739static struct vfsmount *pipe_mnt __read_mostly;
@@ -713,6 +741,7 @@ static int pipefs_delete_dentry(struct dentry *dentry)
713{ 741{
714 return 1; 742 return 1;
715} 743}
744
716static struct dentry_operations pipefs_dentry_operations = { 745static struct dentry_operations pipefs_dentry_operations = {
717 .d_delete = pipefs_delete_dentry, 746 .d_delete = pipefs_delete_dentry,
718}; 747};
@@ -720,13 +749,17 @@ static struct dentry_operations pipefs_dentry_operations = {
720static struct inode * get_pipe_inode(void) 749static struct inode * get_pipe_inode(void)
721{ 750{
722 struct inode *inode = new_inode(pipe_mnt->mnt_sb); 751 struct inode *inode = new_inode(pipe_mnt->mnt_sb);
752 struct pipe_inode_info *pipe;
723 753
724 if (!inode) 754 if (!inode)
725 goto fail_inode; 755 goto fail_inode;
726 756
727 if(!pipe_new(inode)) 757 pipe = alloc_pipe_info(inode);
758 if (!pipe)
728 goto fail_iput; 759 goto fail_iput;
729 PIPE_READERS(*inode) = PIPE_WRITERS(*inode) = 1; 760 inode->i_pipe = pipe;
761
762 pipe->readers = pipe->writers = 1;
730 inode->i_fop = &rdwr_pipe_fops; 763 inode->i_fop = &rdwr_pipe_fops;
731 764
732 /* 765 /*
@@ -741,10 +774,12 @@ static struct inode * get_pipe_inode(void)
741 inode->i_gid = current->fsgid; 774 inode->i_gid = current->fsgid;
742 inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; 775 inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
743 inode->i_blksize = PAGE_SIZE; 776 inode->i_blksize = PAGE_SIZE;
777
744 return inode; 778 return inode;
745 779
746fail_iput: 780fail_iput:
747 iput(inode); 781 iput(inode);
782
748fail_inode: 783fail_inode:
749 return NULL; 784 return NULL;
750} 785}
@@ -757,7 +792,7 @@ int do_pipe(int *fd)
757 struct inode * inode; 792 struct inode * inode;
758 struct file *f1, *f2; 793 struct file *f1, *f2;
759 int error; 794 int error;
760 int i,j; 795 int i, j;
761 796
762 error = -ENFILE; 797 error = -ENFILE;
763 f1 = get_empty_filp(); 798 f1 = get_empty_filp();
@@ -790,6 +825,7 @@ int do_pipe(int *fd)
790 dentry = d_alloc(pipe_mnt->mnt_sb->s_root, &this); 825 dentry = d_alloc(pipe_mnt->mnt_sb->s_root, &this);
791 if (!dentry) 826 if (!dentry)
792 goto close_f12_inode_i_j; 827 goto close_f12_inode_i_j;
828
793 dentry->d_op = &pipefs_dentry_operations; 829 dentry->d_op = &pipefs_dentry_operations;
794 d_add(dentry, inode); 830 d_add(dentry, inode);
795 f1->f_vfsmnt = f2->f_vfsmnt = mntget(mntget(pipe_mnt)); 831 f1->f_vfsmnt = f2->f_vfsmnt = mntget(mntget(pipe_mnt));
@@ -813,6 +849,7 @@ int do_pipe(int *fd)
813 fd_install(j, f2); 849 fd_install(j, f2);
814 fd[0] = i; 850 fd[0] = i;
815 fd[1] = j; 851 fd[1] = j;
852
816 return 0; 853 return 0;
817 854
818close_f12_inode_i_j: 855close_f12_inode_i_j:
@@ -837,8 +874,9 @@ no_files:
837 * d_name - pipe: will go nicely and kill the special-casing in procfs. 874 * d_name - pipe: will go nicely and kill the special-casing in procfs.
838 */ 875 */
839 876
840static struct super_block *pipefs_get_sb(struct file_system_type *fs_type, 877static struct super_block *
841 int flags, const char *dev_name, void *data) 878pipefs_get_sb(struct file_system_type *fs_type, int flags,
879 const char *dev_name, void *data)
842{ 880{
843 return get_sb_pseudo(fs_type, "pipe:", NULL, PIPEFS_MAGIC); 881 return get_sb_pseudo(fs_type, "pipe:", NULL, PIPEFS_MAGIC);
844} 882}
@@ -852,6 +890,7 @@ static struct file_system_type pipe_fs_type = {
852static int __init init_pipe_fs(void) 890static int __init init_pipe_fs(void)
853{ 891{
854 int err = register_filesystem(&pipe_fs_type); 892 int err = register_filesystem(&pipe_fs_type);
893
855 if (!err) { 894 if (!err) {
856 pipe_mnt = kern_mount(&pipe_fs_type); 895 pipe_mnt = kern_mount(&pipe_fs_type);
857 if (IS_ERR(pipe_mnt)) { 896 if (IS_ERR(pipe_mnt)) {
diff --git a/fs/proc/base.c b/fs/proc/base.c
index a3a3eecef689..6cc77dc3f3ff 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -297,16 +297,20 @@ static int proc_fd_link(struct inode *inode, struct dentry **dentry, struct vfsm
297 297
298 files = get_files_struct(task); 298 files = get_files_struct(task);
299 if (files) { 299 if (files) {
300 rcu_read_lock(); 300 /*
301 * We are not taking a ref to the file structure, so we must
302 * hold ->file_lock.
303 */
304 spin_lock(&files->file_lock);
301 file = fcheck_files(files, fd); 305 file = fcheck_files(files, fd);
302 if (file) { 306 if (file) {
303 *mnt = mntget(file->f_vfsmnt); 307 *mnt = mntget(file->f_vfsmnt);
304 *dentry = dget(file->f_dentry); 308 *dentry = dget(file->f_dentry);
305 rcu_read_unlock(); 309 spin_unlock(&files->file_lock);
306 put_files_struct(files); 310 put_files_struct(files);
307 return 0; 311 return 0;
308 } 312 }
309 rcu_read_unlock(); 313 spin_unlock(&files->file_lock);
310 put_files_struct(files); 314 put_files_struct(files);
311 } 315 }
312 return -ENOENT; 316 return -ENOENT;
@@ -1523,7 +1527,12 @@ static struct dentry *proc_lookupfd(struct inode * dir, struct dentry * dentry,
1523 if (!files) 1527 if (!files)
1524 goto out_unlock; 1528 goto out_unlock;
1525 inode->i_mode = S_IFLNK; 1529 inode->i_mode = S_IFLNK;
1526 rcu_read_lock(); 1530
1531 /*
1532 * We are not taking a ref to the file structure, so we must
1533 * hold ->file_lock.
1534 */
1535 spin_lock(&files->file_lock);
1527 file = fcheck_files(files, fd); 1536 file = fcheck_files(files, fd);
1528 if (!file) 1537 if (!file)
1529 goto out_unlock2; 1538 goto out_unlock2;
@@ -1531,7 +1540,7 @@ static struct dentry *proc_lookupfd(struct inode * dir, struct dentry * dentry,
1531 inode->i_mode |= S_IRUSR | S_IXUSR; 1540 inode->i_mode |= S_IRUSR | S_IXUSR;
1532 if (file->f_mode & 2) 1541 if (file->f_mode & 2)
1533 inode->i_mode |= S_IWUSR | S_IXUSR; 1542 inode->i_mode |= S_IWUSR | S_IXUSR;
1534 rcu_read_unlock(); 1543 spin_unlock(&files->file_lock);
1535 put_files_struct(files); 1544 put_files_struct(files);
1536 inode->i_op = &proc_pid_link_inode_operations; 1545 inode->i_op = &proc_pid_link_inode_operations;
1537 inode->i_size = 64; 1546 inode->i_size = 64;
@@ -1541,7 +1550,7 @@ static struct dentry *proc_lookupfd(struct inode * dir, struct dentry * dentry,
1541 return NULL; 1550 return NULL;
1542 1551
1543out_unlock2: 1552out_unlock2:
1544 rcu_read_unlock(); 1553 spin_unlock(&files->file_lock);
1545 put_files_struct(files); 1554 put_files_struct(files);
1546out_unlock: 1555out_unlock:
1547 iput(inode); 1556 iput(inode);
diff --git a/fs/proc/vmcore.c b/fs/proc/vmcore.c
index 7efa73d44c9a..20d4b2237fce 100644
--- a/fs/proc/vmcore.c
+++ b/fs/proc/vmcore.c
@@ -103,8 +103,8 @@ static ssize_t read_vmcore(struct file *file, char __user *buffer,
103 size_t buflen, loff_t *fpos) 103 size_t buflen, loff_t *fpos)
104{ 104{
105 ssize_t acc = 0, tmp; 105 ssize_t acc = 0, tmp;
106 size_t tsz, nr_bytes; 106 size_t tsz;
107 u64 start; 107 u64 start, nr_bytes;
108 struct vmcore *curr_m = NULL; 108 struct vmcore *curr_m = NULL;
109 109
110 if (buflen == 0 || *fpos >= vmcore_size) 110 if (buflen == 0 || *fpos >= vmcore_size)
diff --git a/fs/read_write.c b/fs/read_write.c
index 6256ca81a718..5bc0e9234f9d 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -202,7 +202,7 @@ int rw_verify_area(int read_write, struct file *file, loff_t *ppos, size_t count
202 goto Einval; 202 goto Einval;
203 203
204 inode = file->f_dentry->d_inode; 204 inode = file->f_dentry->d_inode;
205 if (inode->i_flock && MANDATORY_LOCK(inode)) { 205 if (unlikely(inode->i_flock && MANDATORY_LOCK(inode))) {
206 int retval = locks_mandatory_area( 206 int retval = locks_mandatory_area(
207 read_write == READ ? FLOCK_VERIFY_READ : FLOCK_VERIFY_WRITE, 207 read_write == READ ? FLOCK_VERIFY_READ : FLOCK_VERIFY_WRITE,
208 inode, file, pos, count); 208 inode, file, pos, count);
diff --git a/fs/select.c b/fs/select.c
index 071660fa7b01..a8109baa5e46 100644
--- a/fs/select.c
+++ b/fs/select.c
@@ -310,8 +310,9 @@ static int core_sys_select(int n, fd_set __user *inp, fd_set __user *outp,
310 fd_set __user *exp, s64 *timeout) 310 fd_set __user *exp, s64 *timeout)
311{ 311{
312 fd_set_bits fds; 312 fd_set_bits fds;
313 char *bits; 313 void *bits;
314 int ret, size, max_fdset; 314 int ret, max_fdset;
315 unsigned int size;
315 struct fdtable *fdt; 316 struct fdtable *fdt;
316 /* Allocate small arguments on the stack to save memory and be faster */ 317 /* Allocate small arguments on the stack to save memory and be faster */
317 long stack_fds[SELECT_STACK_ALLOC/sizeof(long)]; 318 long stack_fds[SELECT_STACK_ALLOC/sizeof(long)];
@@ -333,20 +334,21 @@ static int core_sys_select(int n, fd_set __user *inp, fd_set __user *outp,
333 * since we used fdset we need to allocate memory in units of 334 * since we used fdset we need to allocate memory in units of
334 * long-words. 335 * long-words.
335 */ 336 */
336 ret = -ENOMEM;
337 size = FDS_BYTES(n); 337 size = FDS_BYTES(n);
338 if (6*size < SELECT_STACK_ALLOC) 338 bits = stack_fds;
339 bits = stack_fds; 339 if (size > sizeof(stack_fds) / 6) {
340 else 340 /* Not enough space in on-stack array; must use kmalloc */
341 ret = -ENOMEM;
341 bits = kmalloc(6 * size, GFP_KERNEL); 342 bits = kmalloc(6 * size, GFP_KERNEL);
342 if (!bits) 343 if (!bits)
343 goto out_nofds; 344 goto out_nofds;
344 fds.in = (unsigned long *) bits; 345 }
345 fds.out = (unsigned long *) (bits + size); 346 fds.in = bits;
346 fds.ex = (unsigned long *) (bits + 2*size); 347 fds.out = bits + size;
347 fds.res_in = (unsigned long *) (bits + 3*size); 348 fds.ex = bits + 2*size;
348 fds.res_out = (unsigned long *) (bits + 4*size); 349 fds.res_in = bits + 3*size;
349 fds.res_ex = (unsigned long *) (bits + 5*size); 350 fds.res_out = bits + 4*size;
351 fds.res_ex = bits + 5*size;
350 352
351 if ((ret = get_fd_set(n, inp, fds.in)) || 353 if ((ret = get_fd_set(n, inp, fds.in)) ||
352 (ret = get_fd_set(n, outp, fds.out)) || 354 (ret = get_fd_set(n, outp, fds.out)) ||
diff --git a/fs/splice.c b/fs/splice.c
index bfa42a277bb8..0559e7577a04 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -9,11 +9,12 @@
9 * that transfers data buffers to or from a pipe buffer. 9 * that transfers data buffers to or from a pipe buffer.
10 * 10 *
11 * Named by Larry McVoy, original implementation from Linus, extended by 11 * Named by Larry McVoy, original implementation from Linus, extended by
12 * Jens to support splicing to files and fixing the initial implementation 12 * Jens to support splicing to files, network, direct splicing, etc and
13 * bugs. 13 * fixing lots of bugs.
14 * 14 *
15 * Copyright (C) 2005 Jens Axboe <axboe@suse.de> 15 * Copyright (C) 2005-2006 Jens Axboe <axboe@suse.de>
16 * Copyright (C) 2005 Linus Torvalds <torvalds@osdl.org> 16 * Copyright (C) 2005-2006 Linus Torvalds <torvalds@osdl.org>
17 * Copyright (C) 2006 Ingo Molnar <mingo@elte.hu>
17 * 18 *
18 */ 19 */
19#include <linux/fs.h> 20#include <linux/fs.h>
@@ -49,7 +50,8 @@ static int page_cache_pipe_buf_steal(struct pipe_inode_info *info,
49 struct page *page = buf->page; 50 struct page *page = buf->page;
50 struct address_space *mapping = page_mapping(page); 51 struct address_space *mapping = page_mapping(page);
51 52
52 WARN_ON(!PageLocked(page)); 53 lock_page(page);
54
53 WARN_ON(!PageUptodate(page)); 55 WARN_ON(!PageUptodate(page));
54 56
55 /* 57 /*
@@ -64,8 +66,10 @@ static int page_cache_pipe_buf_steal(struct pipe_inode_info *info,
64 if (PagePrivate(page)) 66 if (PagePrivate(page))
65 try_to_release_page(page, mapping_gfp_mask(mapping)); 67 try_to_release_page(page, mapping_gfp_mask(mapping));
66 68
67 if (!remove_mapping(mapping, page)) 69 if (!remove_mapping(mapping, page)) {
70 unlock_page(page);
68 return 1; 71 return 1;
72 }
69 73
70 buf->flags |= PIPE_BUF_FLAG_STOLEN | PIPE_BUF_FLAG_LRU; 74 buf->flags |= PIPE_BUF_FLAG_STOLEN | PIPE_BUF_FLAG_LRU;
71 return 0; 75 return 0;
@@ -84,69 +88,89 @@ static void *page_cache_pipe_buf_map(struct file *file,
84 struct pipe_buffer *buf) 88 struct pipe_buffer *buf)
85{ 89{
86 struct page *page = buf->page; 90 struct page *page = buf->page;
87 91 int err;
88 lock_page(page);
89 92
90 if (!PageUptodate(page)) { 93 if (!PageUptodate(page)) {
91 unlock_page(page); 94 lock_page(page);
92 return ERR_PTR(-EIO);
93 }
94 95
95 if (!page->mapping) { 96 /*
97 * Page got truncated/unhashed. This will cause a 0-byte
98 * splice, if this is the first page.
99 */
100 if (!page->mapping) {
101 err = -ENODATA;
102 goto error;
103 }
104
105 /*
106 * Uh oh, read-error from disk.
107 */
108 if (!PageUptodate(page)) {
109 err = -EIO;
110 goto error;
111 }
112
113 /*
114 * Page is ok afterall, fall through to mapping.
115 */
96 unlock_page(page); 116 unlock_page(page);
97 return ERR_PTR(-ENODATA);
98 } 117 }
99 118
100 return kmap(buf->page); 119 return kmap(page);
120error:
121 unlock_page(page);
122 return ERR_PTR(err);
101} 123}
102 124
103static void page_cache_pipe_buf_unmap(struct pipe_inode_info *info, 125static void page_cache_pipe_buf_unmap(struct pipe_inode_info *info,
104 struct pipe_buffer *buf) 126 struct pipe_buffer *buf)
105{ 127{
106 unlock_page(buf->page);
107 kunmap(buf->page); 128 kunmap(buf->page);
108} 129}
109 130
131static void page_cache_pipe_buf_get(struct pipe_inode_info *info,
132 struct pipe_buffer *buf)
133{
134 page_cache_get(buf->page);
135}
136
110static struct pipe_buf_operations page_cache_pipe_buf_ops = { 137static struct pipe_buf_operations page_cache_pipe_buf_ops = {
111 .can_merge = 0, 138 .can_merge = 0,
112 .map = page_cache_pipe_buf_map, 139 .map = page_cache_pipe_buf_map,
113 .unmap = page_cache_pipe_buf_unmap, 140 .unmap = page_cache_pipe_buf_unmap,
114 .release = page_cache_pipe_buf_release, 141 .release = page_cache_pipe_buf_release,
115 .steal = page_cache_pipe_buf_steal, 142 .steal = page_cache_pipe_buf_steal,
143 .get = page_cache_pipe_buf_get,
116}; 144};
117 145
118/* 146/*
119 * Pipe output worker. This sets up our pipe format with the page cache 147 * Pipe output worker. This sets up our pipe format with the page cache
120 * pipe buffer operations. Otherwise very similar to the regular pipe_writev(). 148 * pipe buffer operations. Otherwise very similar to the regular pipe_writev().
121 */ 149 */
122static ssize_t move_to_pipe(struct inode *inode, struct page **pages, 150static ssize_t move_to_pipe(struct pipe_inode_info *pipe, struct page **pages,
123 int nr_pages, unsigned long offset, 151 int nr_pages, unsigned long len,
124 unsigned long len, unsigned int flags) 152 unsigned int offset, unsigned int flags)
125{ 153{
126 struct pipe_inode_info *info;
127 int ret, do_wakeup, i; 154 int ret, do_wakeup, i;
128 155
129 ret = 0; 156 ret = 0;
130 do_wakeup = 0; 157 do_wakeup = 0;
131 i = 0; 158 i = 0;
132 159
133 mutex_lock(PIPE_MUTEX(*inode)); 160 if (pipe->inode)
161 mutex_lock(&pipe->inode->i_mutex);
134 162
135 info = inode->i_pipe;
136 for (;;) { 163 for (;;) {
137 int bufs; 164 if (!pipe->readers) {
138
139 if (!PIPE_READERS(*inode)) {
140 send_sig(SIGPIPE, current, 0); 165 send_sig(SIGPIPE, current, 0);
141 if (!ret) 166 if (!ret)
142 ret = -EPIPE; 167 ret = -EPIPE;
143 break; 168 break;
144 } 169 }
145 170
146 bufs = info->nrbufs; 171 if (pipe->nrbufs < PIPE_BUFFERS) {
147 if (bufs < PIPE_BUFFERS) { 172 int newbuf = (pipe->curbuf + pipe->nrbufs) & (PIPE_BUFFERS - 1);
148 int newbuf = (info->curbuf + bufs) & (PIPE_BUFFERS - 1); 173 struct pipe_buffer *buf = pipe->bufs + newbuf;
149 struct pipe_buffer *buf = info->bufs + newbuf;
150 struct page *page = pages[i++]; 174 struct page *page = pages[i++];
151 unsigned long this_len; 175 unsigned long this_len;
152 176
@@ -158,8 +182,9 @@ static ssize_t move_to_pipe(struct inode *inode, struct page **pages,
158 buf->offset = offset; 182 buf->offset = offset;
159 buf->len = this_len; 183 buf->len = this_len;
160 buf->ops = &page_cache_pipe_buf_ops; 184 buf->ops = &page_cache_pipe_buf_ops;
161 info->nrbufs = ++bufs; 185 pipe->nrbufs++;
162 do_wakeup = 1; 186 if (pipe->inode)
187 do_wakeup = 1;
163 188
164 ret += this_len; 189 ret += this_len;
165 len -= this_len; 190 len -= this_len;
@@ -168,7 +193,7 @@ static ssize_t move_to_pipe(struct inode *inode, struct page **pages,
168 break; 193 break;
169 if (!len) 194 if (!len)
170 break; 195 break;
171 if (bufs < PIPE_BUFFERS) 196 if (pipe->nrbufs < PIPE_BUFFERS)
172 continue; 197 continue;
173 198
174 break; 199 break;
@@ -187,22 +212,26 @@ static ssize_t move_to_pipe(struct inode *inode, struct page **pages,
187 } 212 }
188 213
189 if (do_wakeup) { 214 if (do_wakeup) {
190 wake_up_interruptible_sync(PIPE_WAIT(*inode)); 215 smp_mb();
191 kill_fasync(PIPE_FASYNC_READERS(*inode), SIGIO, 216 if (waitqueue_active(&pipe->wait))
192 POLL_IN); 217 wake_up_interruptible_sync(&pipe->wait);
218 kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
193 do_wakeup = 0; 219 do_wakeup = 0;
194 } 220 }
195 221
196 PIPE_WAITING_WRITERS(*inode)++; 222 pipe->waiting_writers++;
197 pipe_wait(inode); 223 pipe_wait(pipe);
198 PIPE_WAITING_WRITERS(*inode)--; 224 pipe->waiting_writers--;
199 } 225 }
200 226
201 mutex_unlock(PIPE_MUTEX(*inode)); 227 if (pipe->inode)
228 mutex_unlock(&pipe->inode->i_mutex);
202 229
203 if (do_wakeup) { 230 if (do_wakeup) {
204 wake_up_interruptible(PIPE_WAIT(*inode)); 231 smp_mb();
205 kill_fasync(PIPE_FASYNC_READERS(*inode), SIGIO, POLL_IN); 232 if (waitqueue_active(&pipe->wait))
233 wake_up_interruptible(&pipe->wait);
234 kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
206 } 235 }
207 236
208 while (i < nr_pages) 237 while (i < nr_pages)
@@ -211,96 +240,155 @@ static ssize_t move_to_pipe(struct inode *inode, struct page **pages,
211 return ret; 240 return ret;
212} 241}
213 242
214static int __generic_file_splice_read(struct file *in, struct inode *pipe, 243static int
215 size_t len, unsigned int flags) 244__generic_file_splice_read(struct file *in, loff_t *ppos,
245 struct pipe_inode_info *pipe, size_t len,
246 unsigned int flags)
216{ 247{
217 struct address_space *mapping = in->f_mapping; 248 struct address_space *mapping = in->f_mapping;
218 unsigned int offset, nr_pages; 249 unsigned int loff, offset, nr_pages;
219 struct page *pages[PIPE_BUFFERS], *shadow[PIPE_BUFFERS]; 250 struct page *pages[PIPE_BUFFERS];
220 struct page *page; 251 struct page *page;
221 pgoff_t index, pidx; 252 pgoff_t index, end_index;
222 int i, j; 253 loff_t isize;
254 size_t bytes;
255 int i, error;
223 256
224 index = in->f_pos >> PAGE_CACHE_SHIFT; 257 index = *ppos >> PAGE_CACHE_SHIFT;
225 offset = in->f_pos & ~PAGE_CACHE_MASK; 258 loff = offset = *ppos & ~PAGE_CACHE_MASK;
226 nr_pages = (len + offset + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; 259 nr_pages = (len + offset + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
227 260
228 if (nr_pages > PIPE_BUFFERS) 261 if (nr_pages > PIPE_BUFFERS)
229 nr_pages = PIPE_BUFFERS; 262 nr_pages = PIPE_BUFFERS;
230 263
231 /* 264 /*
232 * initiate read-ahead on this page range 265 * Initiate read-ahead on this page range. however, don't call into
266 * read-ahead if this is a non-zero offset (we are likely doing small
267 * chunk splice and the page is already there) for a single page.
233 */ 268 */
234 do_page_cache_readahead(mapping, in, index, nr_pages); 269 if (!offset || nr_pages > 1)
270 do_page_cache_readahead(mapping, in, index, nr_pages);
235 271
236 /* 272 /*
237 * Get as many pages from the page cache as possible.. 273 * Now fill in the holes:
238 * Start IO on the page cache entries we create (we
239 * can assume that any pre-existing ones we find have
240 * already had IO started on them).
241 */ 274 */
242 i = find_get_pages(mapping, index, nr_pages, pages); 275 error = 0;
276 bytes = 0;
277 for (i = 0; i < nr_pages; i++, index++) {
278 unsigned int this_len;
243 279
244 /* 280 if (!len)
245 * common case - we found all pages and they are contiguous, 281 break;
246 * kick them off
247 */
248 if (i && (pages[i - 1]->index == index + i - 1))
249 goto splice_them;
250 282
251 /* 283 /*
252 * fill shadow[] with pages at the right locations, so we only 284 * this_len is the max we'll use from this page
253 * have to fill holes 285 */
254 */ 286 this_len = min(len, PAGE_CACHE_SIZE - loff);
255 memset(shadow, 0, nr_pages * sizeof(struct page *)); 287find_page:
256 for (j = 0; j < i; j++) 288 /*
257 shadow[pages[j]->index - index] = pages[j]; 289 * lookup the page for this index
290 */
291 page = find_get_page(mapping, index);
292 if (!page) {
293 /*
294 * page didn't exist, allocate one
295 */
296 page = page_cache_alloc_cold(mapping);
297 if (!page)
298 break;
258 299
259 /* 300 error = add_to_page_cache_lru(page, mapping, index,
260 * now fill in the holes 301 mapping_gfp_mask(mapping));
261 */ 302 if (unlikely(error)) {
262 for (i = 0, pidx = index; i < nr_pages; pidx++, i++) { 303 page_cache_release(page);
263 int error; 304 break;
305 }
264 306
265 if (shadow[i]) 307 goto readpage;
266 continue; 308 }
267 309
268 /* 310 /*
269 * no page there, look one up / create it 311 * If the page isn't uptodate, we may need to start io on it
270 */ 312 */
271 page = find_or_create_page(mapping, pidx, 313 if (!PageUptodate(page)) {
272 mapping_gfp_mask(mapping)); 314 /*
273 if (!page) 315 * If in nonblock mode then dont block on waiting
274 break; 316 * for an in-flight io page
317 */
318 if (flags & SPLICE_F_NONBLOCK)
319 break;
320
321 lock_page(page);
322
323 /*
324 * page was truncated, stop here. if this isn't the
325 * first page, we'll just complete what we already
326 * added
327 */
328 if (!page->mapping) {
329 unlock_page(page);
330 page_cache_release(page);
331 break;
332 }
333 /*
334 * page was already under io and is now done, great
335 */
336 if (PageUptodate(page)) {
337 unlock_page(page);
338 goto fill_it;
339 }
275 340
276 if (PageUptodate(page)) 341readpage:
277 unlock_page(page); 342 /*
278 else { 343 * need to read in the page
344 */
279 error = mapping->a_ops->readpage(in, page); 345 error = mapping->a_ops->readpage(in, page);
280 346
281 if (unlikely(error)) { 347 if (unlikely(error)) {
282 page_cache_release(page); 348 page_cache_release(page);
349 if (error == AOP_TRUNCATED_PAGE)
350 goto find_page;
283 break; 351 break;
284 } 352 }
285 }
286 shadow[i] = page;
287 }
288 353
289 if (!i) { 354 /*
290 for (i = 0; i < nr_pages; i++) { 355 * i_size must be checked after ->readpage().
291 if (shadow[i]) 356 */
292 page_cache_release(shadow[i]); 357 isize = i_size_read(mapping->host);
358 end_index = (isize - 1) >> PAGE_CACHE_SHIFT;
359 if (unlikely(!isize || index > end_index)) {
360 page_cache_release(page);
361 break;
362 }
363
364 /*
365 * if this is the last page, see if we need to shrink
366 * the length and stop
367 */
368 if (end_index == index) {
369 loff = PAGE_CACHE_SIZE - (isize & ~PAGE_CACHE_MASK);
370 if (bytes + loff > isize) {
371 page_cache_release(page);
372 break;
373 }
374 /*
375 * force quit after adding this page
376 */
377 nr_pages = i;
378 this_len = min(this_len, loff);
379 }
293 } 380 }
294 return 0; 381fill_it:
382 pages[i] = page;
383 bytes += this_len;
384 len -= this_len;
385 loff = 0;
295 } 386 }
296 387
297 memcpy(pages, shadow, i * sizeof(struct page *)); 388 if (i)
389 return move_to_pipe(pipe, pages, i, bytes, offset, flags);
298 390
299 /* 391 return error;
300 * Now we splice them into the pipe..
301 */
302splice_them:
303 return move_to_pipe(pipe, pages, i, offset, len, flags);
304} 392}
305 393
306/** 394/**
@@ -311,30 +399,34 @@ splice_them:
311 * @flags: splice modifier flags 399 * @flags: splice modifier flags
312 * 400 *
313 * Will read pages from given file and fill them into a pipe. 401 * Will read pages from given file and fill them into a pipe.
314 *
315 */ 402 */
316ssize_t generic_file_splice_read(struct file *in, struct inode *pipe, 403ssize_t generic_file_splice_read(struct file *in, loff_t *ppos,
317 size_t len, unsigned int flags) 404 struct pipe_inode_info *pipe, size_t len,
405 unsigned int flags)
318{ 406{
319 ssize_t spliced; 407 ssize_t spliced;
320 int ret; 408 int ret;
321 409
322 ret = 0; 410 ret = 0;
323 spliced = 0; 411 spliced = 0;
412
324 while (len) { 413 while (len) {
325 ret = __generic_file_splice_read(in, pipe, len, flags); 414 ret = __generic_file_splice_read(in, ppos, pipe, len, flags);
326 415
327 if (ret <= 0) 416 if (ret < 0)
328 break; 417 break;
418 else if (!ret) {
419 if (spliced)
420 break;
421 if (flags & SPLICE_F_NONBLOCK) {
422 ret = -EAGAIN;
423 break;
424 }
425 }
329 426
330 in->f_pos += ret; 427 *ppos += ret;
331 len -= ret; 428 len -= ret;
332 spliced += ret; 429 spliced += ret;
333
334 if (!(flags & SPLICE_F_NONBLOCK))
335 continue;
336 ret = -EAGAIN;
337 break;
338 } 430 }
339 431
340 if (spliced) 432 if (spliced)
@@ -360,10 +452,10 @@ static int pipe_to_sendpage(struct pipe_inode_info *info,
360 int more; 452 int more;
361 453
362 /* 454 /*
363 * sub-optimal, but we are limited by the pipe ->map. we don't 455 * Sub-optimal, but we are limited by the pipe ->map. We don't
364 * need a kmap'ed buffer here, we just want to make sure we 456 * need a kmap'ed buffer here, we just want to make sure we
365 * have the page pinned if the pipe page originates from the 457 * have the page pinned if the pipe page originates from the
366 * page cache 458 * page cache.
367 */ 459 */
368 ptr = buf->ops->map(file, info, buf); 460 ptr = buf->ops->map(file, info, buf);
369 if (IS_ERR(ptr)) 461 if (IS_ERR(ptr))
@@ -414,7 +506,7 @@ static int pipe_to_file(struct pipe_inode_info *info, struct pipe_buffer *buf,
414 int ret; 506 int ret;
415 507
416 /* 508 /*
417 * after this, page will be locked and unmapped 509 * make sure the data in this buffer is uptodate
418 */ 510 */
419 src = buf->ops->map(file, info, buf); 511 src = buf->ops->map(file, info, buf);
420 if (IS_ERR(src)) 512 if (IS_ERR(src))
@@ -424,12 +516,13 @@ static int pipe_to_file(struct pipe_inode_info *info, struct pipe_buffer *buf,
424 offset = sd->pos & ~PAGE_CACHE_MASK; 516 offset = sd->pos & ~PAGE_CACHE_MASK;
425 517
426 /* 518 /*
427 * reuse buf page, if SPLICE_F_MOVE is set 519 * Reuse buf page, if SPLICE_F_MOVE is set.
428 */ 520 */
429 if (sd->flags & SPLICE_F_MOVE) { 521 if (sd->flags & SPLICE_F_MOVE) {
430 /* 522 /*
431 * If steal succeeds, buf->page is now pruned from the vm 523 * If steal succeeds, buf->page is now pruned from the vm
432 * side (LRU and page cache) and we can reuse it. 524 * side (LRU and page cache) and we can reuse it. The page
525 * will also be looked on successful return.
433 */ 526 */
434 if (buf->ops->steal(info, buf)) 527 if (buf->ops->steal(info, buf))
435 goto find_page; 528 goto find_page;
@@ -442,15 +535,27 @@ static int pipe_to_file(struct pipe_inode_info *info, struct pipe_buffer *buf,
442 lru_cache_add(page); 535 lru_cache_add(page);
443 } else { 536 } else {
444find_page: 537find_page:
445 ret = -ENOMEM; 538 page = find_lock_page(mapping, index);
446 page = find_or_create_page(mapping, index, gfp_mask); 539 if (!page) {
447 if (!page) 540 ret = -ENOMEM;
448 goto out; 541 page = page_cache_alloc_cold(mapping);
542 if (unlikely(!page))
543 goto out_nomem;
544
545 /*
546 * This will also lock the page
547 */
548 ret = add_to_page_cache_lru(page, mapping, index,
549 gfp_mask);
550 if (unlikely(ret))
551 goto out;
552 }
449 553
450 /* 554 /*
451 * If the page is uptodate, it is also locked. If it isn't 555 * We get here with the page locked. If the page is also
452 * uptodate, we can mark it uptodate if we are filling the 556 * uptodate, we don't need to do more. If it isn't, we
453 * full page. Otherwise we need to read it in first... 557 * may need to bring it in if we are not going to overwrite
558 * the full page.
454 */ 559 */
455 if (!PageUptodate(page)) { 560 if (!PageUptodate(page)) {
456 if (sd->len < PAGE_CACHE_SIZE) { 561 if (sd->len < PAGE_CACHE_SIZE) {
@@ -462,7 +567,7 @@ find_page:
462 567
463 if (!PageUptodate(page)) { 568 if (!PageUptodate(page)) {
464 /* 569 /*
465 * page got invalidated, repeat 570 * Page got invalidated, repeat.
466 */ 571 */
467 if (!page->mapping) { 572 if (!page->mapping) {
468 unlock_page(page); 573 unlock_page(page);
@@ -472,10 +577,8 @@ find_page:
472 ret = -EIO; 577 ret = -EIO;
473 goto out; 578 goto out;
474 } 579 }
475 } else { 580 } else
476 WARN_ON(!PageLocked(page));
477 SetPageUptodate(page); 581 SetPageUptodate(page);
478 }
479 } 582 }
480 } 583 }
481 584
@@ -501,12 +604,14 @@ find_page:
501 } else if (ret) 604 } else if (ret)
502 goto out; 605 goto out;
503 606
607 mark_page_accessed(page);
504 balance_dirty_pages_ratelimited(mapping); 608 balance_dirty_pages_ratelimited(mapping);
505out: 609out:
506 if (!(buf->flags & PIPE_BUF_FLAG_STOLEN)) { 610 if (!(buf->flags & PIPE_BUF_FLAG_STOLEN))
507 page_cache_release(page); 611 page_cache_release(page);
508 unlock_page(page); 612
509 } 613 unlock_page(page);
614out_nomem:
510 buf->ops->unmap(info, buf); 615 buf->ops->unmap(info, buf);
511 return ret; 616 return ret;
512} 617}
@@ -519,11 +624,10 @@ typedef int (splice_actor)(struct pipe_inode_info *, struct pipe_buffer *,
519 * key here is the 'actor' worker passed in that actually moves the data 624 * key here is the 'actor' worker passed in that actually moves the data
520 * to the wanted destination. See pipe_to_file/pipe_to_sendpage above. 625 * to the wanted destination. See pipe_to_file/pipe_to_sendpage above.
521 */ 626 */
522static ssize_t move_from_pipe(struct inode *inode, struct file *out, 627static ssize_t move_from_pipe(struct pipe_inode_info *pipe, struct file *out,
523 size_t len, unsigned int flags, 628 loff_t *ppos, size_t len, unsigned int flags,
524 splice_actor *actor) 629 splice_actor *actor)
525{ 630{
526 struct pipe_inode_info *info;
527 int ret, do_wakeup, err; 631 int ret, do_wakeup, err;
528 struct splice_desc sd; 632 struct splice_desc sd;
529 633
@@ -533,24 +637,21 @@ static ssize_t move_from_pipe(struct inode *inode, struct file *out,
533 sd.total_len = len; 637 sd.total_len = len;
534 sd.flags = flags; 638 sd.flags = flags;
535 sd.file = out; 639 sd.file = out;
536 sd.pos = out->f_pos; 640 sd.pos = *ppos;
537 641
538 mutex_lock(PIPE_MUTEX(*inode)); 642 if (pipe->inode)
643 mutex_lock(&pipe->inode->i_mutex);
539 644
540 info = inode->i_pipe;
541 for (;;) { 645 for (;;) {
542 int bufs = info->nrbufs; 646 if (pipe->nrbufs) {
543 647 struct pipe_buffer *buf = pipe->bufs + pipe->curbuf;
544 if (bufs) {
545 int curbuf = info->curbuf;
546 struct pipe_buffer *buf = info->bufs + curbuf;
547 struct pipe_buf_operations *ops = buf->ops; 648 struct pipe_buf_operations *ops = buf->ops;
548 649
549 sd.len = buf->len; 650 sd.len = buf->len;
550 if (sd.len > sd.total_len) 651 if (sd.len > sd.total_len)
551 sd.len = sd.total_len; 652 sd.len = sd.total_len;
552 653
553 err = actor(info, buf, &sd); 654 err = actor(pipe, buf, &sd);
554 if (err) { 655 if (err) {
555 if (!ret && err != -ENODATA) 656 if (!ret && err != -ENODATA)
556 ret = err; 657 ret = err;
@@ -561,13 +662,14 @@ static ssize_t move_from_pipe(struct inode *inode, struct file *out,
561 ret += sd.len; 662 ret += sd.len;
562 buf->offset += sd.len; 663 buf->offset += sd.len;
563 buf->len -= sd.len; 664 buf->len -= sd.len;
665
564 if (!buf->len) { 666 if (!buf->len) {
565 buf->ops = NULL; 667 buf->ops = NULL;
566 ops->release(info, buf); 668 ops->release(pipe, buf);
567 curbuf = (curbuf + 1) & (PIPE_BUFFERS - 1); 669 pipe->curbuf = (pipe->curbuf + 1) & (PIPE_BUFFERS - 1);
568 info->curbuf = curbuf; 670 pipe->nrbufs--;
569 info->nrbufs = --bufs; 671 if (pipe->inode)
570 do_wakeup = 1; 672 do_wakeup = 1;
571 } 673 }
572 674
573 sd.pos += sd.len; 675 sd.pos += sd.len;
@@ -576,11 +678,11 @@ static ssize_t move_from_pipe(struct inode *inode, struct file *out,
576 break; 678 break;
577 } 679 }
578 680
579 if (bufs) 681 if (pipe->nrbufs)
580 continue; 682 continue;
581 if (!PIPE_WRITERS(*inode)) 683 if (!pipe->writers)
582 break; 684 break;
583 if (!PIPE_WAITING_WRITERS(*inode)) { 685 if (!pipe->waiting_writers) {
584 if (ret) 686 if (ret)
585 break; 687 break;
586 } 688 }
@@ -598,31 +700,32 @@ static ssize_t move_from_pipe(struct inode *inode, struct file *out,
598 } 700 }
599 701
600 if (do_wakeup) { 702 if (do_wakeup) {
601 wake_up_interruptible_sync(PIPE_WAIT(*inode)); 703 smp_mb();
602 kill_fasync(PIPE_FASYNC_WRITERS(*inode),SIGIO,POLL_OUT); 704 if (waitqueue_active(&pipe->wait))
705 wake_up_interruptible_sync(&pipe->wait);
706 kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT);
603 do_wakeup = 0; 707 do_wakeup = 0;
604 } 708 }
605 709
606 pipe_wait(inode); 710 pipe_wait(pipe);
607 } 711 }
608 712
609 mutex_unlock(PIPE_MUTEX(*inode)); 713 if (pipe->inode)
714 mutex_unlock(&pipe->inode->i_mutex);
610 715
611 if (do_wakeup) { 716 if (do_wakeup) {
612 wake_up_interruptible(PIPE_WAIT(*inode)); 717 smp_mb();
613 kill_fasync(PIPE_FASYNC_WRITERS(*inode), SIGIO, POLL_OUT); 718 if (waitqueue_active(&pipe->wait))
719 wake_up_interruptible(&pipe->wait);
720 kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT);
614 } 721 }
615 722
616 mutex_lock(&out->f_mapping->host->i_mutex);
617 out->f_pos = sd.pos;
618 mutex_unlock(&out->f_mapping->host->i_mutex);
619 return ret; 723 return ret;
620
621} 724}
622 725
623/** 726/**
624 * generic_file_splice_write - splice data from a pipe to a file 727 * generic_file_splice_write - splice data from a pipe to a file
625 * @inode: pipe inode 728 * @pipe: pipe info
626 * @out: file to write to 729 * @out: file to write to
627 * @len: number of bytes to splice 730 * @len: number of bytes to splice
628 * @flags: splice modifier flags 731 * @flags: splice modifier flags
@@ -631,27 +734,34 @@ static ssize_t move_from_pipe(struct inode *inode, struct file *out,
631 * the given pipe inode to the given file. 734 * the given pipe inode to the given file.
632 * 735 *
633 */ 736 */
634ssize_t generic_file_splice_write(struct inode *inode, struct file *out, 737ssize_t
635 size_t len, unsigned int flags) 738generic_file_splice_write(struct pipe_inode_info *pipe, struct file *out,
739 loff_t *ppos, size_t len, unsigned int flags)
636{ 740{
637 struct address_space *mapping = out->f_mapping; 741 struct address_space *mapping = out->f_mapping;
638 ssize_t ret = move_from_pipe(inode, out, len, flags, pipe_to_file); 742 ssize_t ret;
639 743
640 /* 744 ret = move_from_pipe(pipe, out, ppos, len, flags, pipe_to_file);
641 * if file or inode is SYNC and we actually wrote some data, sync it 745 if (ret > 0) {
642 */
643 if (unlikely((out->f_flags & O_SYNC) || IS_SYNC(mapping->host))
644 && ret > 0) {
645 struct inode *inode = mapping->host; 746 struct inode *inode = mapping->host;
646 int err;
647 747
648 mutex_lock(&inode->i_mutex); 748 *ppos += ret;
649 err = generic_osync_inode(mapping->host, mapping, 749
650 OSYNC_METADATA|OSYNC_DATA); 750 /*
651 mutex_unlock(&inode->i_mutex); 751 * If file or inode is SYNC and we actually wrote some data,
752 * sync it.
753 */
754 if (unlikely((out->f_flags & O_SYNC) || IS_SYNC(inode))) {
755 int err;
756
757 mutex_lock(&inode->i_mutex);
758 err = generic_osync_inode(inode, mapping,
759 OSYNC_METADATA|OSYNC_DATA);
760 mutex_unlock(&inode->i_mutex);
652 761
653 if (err) 762 if (err)
654 ret = err; 763 ret = err;
764 }
655 } 765 }
656 766
657 return ret; 767 return ret;
@@ -670,10 +780,10 @@ EXPORT_SYMBOL(generic_file_splice_write);
670 * is involved. 780 * is involved.
671 * 781 *
672 */ 782 */
673ssize_t generic_splice_sendpage(struct inode *inode, struct file *out, 783ssize_t generic_splice_sendpage(struct pipe_inode_info *pipe, struct file *out,
674 size_t len, unsigned int flags) 784 loff_t *ppos, size_t len, unsigned int flags)
675{ 785{
676 return move_from_pipe(inode, out, len, flags, pipe_to_sendpage); 786 return move_from_pipe(pipe, out, ppos, len, flags, pipe_to_sendpage);
677} 787}
678 788
679EXPORT_SYMBOL(generic_splice_sendpage); 789EXPORT_SYMBOL(generic_splice_sendpage);
@@ -681,77 +791,228 @@ EXPORT_SYMBOL(generic_splice_sendpage);
681/* 791/*
682 * Attempt to initiate a splice from pipe to file. 792 * Attempt to initiate a splice from pipe to file.
683 */ 793 */
684static long do_splice_from(struct inode *pipe, struct file *out, size_t len, 794static long do_splice_from(struct pipe_inode_info *pipe, struct file *out,
685 unsigned int flags) 795 loff_t *ppos, size_t len, unsigned int flags)
686{ 796{
687 loff_t pos;
688 int ret; 797 int ret;
689 798
690 if (!out->f_op || !out->f_op->splice_write) 799 if (unlikely(!out->f_op || !out->f_op->splice_write))
691 return -EINVAL; 800 return -EINVAL;
692 801
693 if (!(out->f_mode & FMODE_WRITE)) 802 if (unlikely(!(out->f_mode & FMODE_WRITE)))
694 return -EBADF; 803 return -EBADF;
695 804
696 pos = out->f_pos; 805 ret = rw_verify_area(WRITE, out, ppos, len);
697 ret = rw_verify_area(WRITE, out, &pos, len);
698 if (unlikely(ret < 0)) 806 if (unlikely(ret < 0))
699 return ret; 807 return ret;
700 808
701 return out->f_op->splice_write(pipe, out, len, flags); 809 return out->f_op->splice_write(pipe, out, ppos, len, flags);
702} 810}
703 811
704/* 812/*
705 * Attempt to initiate a splice from a file to a pipe. 813 * Attempt to initiate a splice from a file to a pipe.
706 */ 814 */
707static long do_splice_to(struct file *in, struct inode *pipe, size_t len, 815static long do_splice_to(struct file *in, loff_t *ppos,
816 struct pipe_inode_info *pipe, size_t len,
708 unsigned int flags) 817 unsigned int flags)
709{ 818{
710 loff_t pos, isize, left; 819 loff_t isize, left;
711 int ret; 820 int ret;
712 821
713 if (!in->f_op || !in->f_op->splice_read) 822 if (unlikely(!in->f_op || !in->f_op->splice_read))
714 return -EINVAL; 823 return -EINVAL;
715 824
716 if (!(in->f_mode & FMODE_READ)) 825 if (unlikely(!(in->f_mode & FMODE_READ)))
717 return -EBADF; 826 return -EBADF;
718 827
719 pos = in->f_pos; 828 ret = rw_verify_area(READ, in, ppos, len);
720 ret = rw_verify_area(READ, in, &pos, len);
721 if (unlikely(ret < 0)) 829 if (unlikely(ret < 0))
722 return ret; 830 return ret;
723 831
724 isize = i_size_read(in->f_mapping->host); 832 isize = i_size_read(in->f_mapping->host);
725 if (unlikely(in->f_pos >= isize)) 833 if (unlikely(*ppos >= isize))
726 return 0; 834 return 0;
727 835
728 left = isize - in->f_pos; 836 left = isize - *ppos;
729 if (left < len) 837 if (unlikely(left < len))
730 len = left; 838 len = left;
731 839
732 return in->f_op->splice_read(in, pipe, len, flags); 840 return in->f_op->splice_read(in, ppos, pipe, len, flags);
841}
842
843long do_splice_direct(struct file *in, loff_t *ppos, struct file *out,
844 size_t len, unsigned int flags)
845{
846 struct pipe_inode_info *pipe;
847 long ret, bytes;
848 loff_t out_off;
849 umode_t i_mode;
850 int i;
851
852 /*
853 * We require the input being a regular file, as we don't want to
854 * randomly drop data for eg socket -> socket splicing. Use the
855 * piped splicing for that!
856 */
857 i_mode = in->f_dentry->d_inode->i_mode;
858 if (unlikely(!S_ISREG(i_mode) && !S_ISBLK(i_mode)))
859 return -EINVAL;
860
861 /*
862 * neither in nor out is a pipe, setup an internal pipe attached to
863 * 'out' and transfer the wanted data from 'in' to 'out' through that
864 */
865 pipe = current->splice_pipe;
866 if (unlikely(!pipe)) {
867 pipe = alloc_pipe_info(NULL);
868 if (!pipe)
869 return -ENOMEM;
870
871 /*
872 * We don't have an immediate reader, but we'll read the stuff
873 * out of the pipe right after the move_to_pipe(). So set
874 * PIPE_READERS appropriately.
875 */
876 pipe->readers = 1;
877
878 current->splice_pipe = pipe;
879 }
880
881 /*
882 * Do the splice.
883 */
884 ret = 0;
885 bytes = 0;
886 out_off = 0;
887
888 while (len) {
889 size_t read_len, max_read_len;
890
891 /*
892 * Do at most PIPE_BUFFERS pages worth of transfer:
893 */
894 max_read_len = min(len, (size_t)(PIPE_BUFFERS*PAGE_SIZE));
895
896 ret = do_splice_to(in, ppos, pipe, max_read_len, flags);
897 if (unlikely(ret < 0))
898 goto out_release;
899
900 read_len = ret;
901
902 /*
903 * NOTE: nonblocking mode only applies to the input. We
904 * must not do the output in nonblocking mode as then we
905 * could get stuck data in the internal pipe:
906 */
907 ret = do_splice_from(pipe, out, &out_off, read_len,
908 flags & ~SPLICE_F_NONBLOCK);
909 if (unlikely(ret < 0))
910 goto out_release;
911
912 bytes += ret;
913 len -= ret;
914
915 /*
916 * In nonblocking mode, if we got back a short read then
917 * that was due to either an IO error or due to the
918 * pagecache entry not being there. In the IO error case
919 * the _next_ splice attempt will produce a clean IO error
920 * return value (not a short read), so in both cases it's
921 * correct to break out of the loop here:
922 */
923 if ((flags & SPLICE_F_NONBLOCK) && (read_len < max_read_len))
924 break;
925 }
926
927 pipe->nrbufs = pipe->curbuf = 0;
928
929 return bytes;
930
931out_release:
932 /*
933 * If we did an incomplete transfer we must release
934 * the pipe buffers in question:
935 */
936 for (i = 0; i < PIPE_BUFFERS; i++) {
937 struct pipe_buffer *buf = pipe->bufs + i;
938
939 if (buf->ops) {
940 buf->ops->release(pipe, buf);
941 buf->ops = NULL;
942 }
943 }
944 pipe->nrbufs = pipe->curbuf = 0;
945
946 /*
947 * If we transferred some data, return the number of bytes:
948 */
949 if (bytes > 0)
950 return bytes;
951
952 return ret;
733} 953}
734 954
955EXPORT_SYMBOL(do_splice_direct);
956
735/* 957/*
736 * Determine where to splice to/from. 958 * Determine where to splice to/from.
737 */ 959 */
738static long do_splice(struct file *in, struct file *out, size_t len, 960static long do_splice(struct file *in, loff_t __user *off_in,
739 unsigned int flags) 961 struct file *out, loff_t __user *off_out,
962 size_t len, unsigned int flags)
740{ 963{
741 struct inode *pipe; 964 struct pipe_inode_info *pipe;
965 loff_t offset, *off;
966 long ret;
967
968 pipe = in->f_dentry->d_inode->i_pipe;
969 if (pipe) {
970 if (off_in)
971 return -ESPIPE;
972 if (off_out) {
973 if (out->f_op->llseek == no_llseek)
974 return -EINVAL;
975 if (copy_from_user(&offset, off_out, sizeof(loff_t)))
976 return -EFAULT;
977 off = &offset;
978 } else
979 off = &out->f_pos;
980
981 ret = do_splice_from(pipe, out, off, len, flags);
982
983 if (off_out && copy_to_user(off_out, off, sizeof(loff_t)))
984 ret = -EFAULT;
742 985
743 pipe = in->f_dentry->d_inode; 986 return ret;
744 if (pipe->i_pipe) 987 }
745 return do_splice_from(pipe, out, len, flags); 988
989 pipe = out->f_dentry->d_inode->i_pipe;
990 if (pipe) {
991 if (off_out)
992 return -ESPIPE;
993 if (off_in) {
994 if (in->f_op->llseek == no_llseek)
995 return -EINVAL;
996 if (copy_from_user(&offset, off_in, sizeof(loff_t)))
997 return -EFAULT;
998 off = &offset;
999 } else
1000 off = &in->f_pos;
746 1001
747 pipe = out->f_dentry->d_inode; 1002 ret = do_splice_to(in, off, pipe, len, flags);
748 if (pipe->i_pipe) 1003
749 return do_splice_to(in, pipe, len, flags); 1004 if (off_in && copy_to_user(off_in, off, sizeof(loff_t)))
1005 ret = -EFAULT;
1006
1007 return ret;
1008 }
750 1009
751 return -EINVAL; 1010 return -EINVAL;
752} 1011}
753 1012
754asmlinkage long sys_splice(int fdin, int fdout, size_t len, unsigned int flags) 1013asmlinkage long sys_splice(int fd_in, loff_t __user *off_in,
1014 int fd_out, loff_t __user *off_out,
1015 size_t len, unsigned int flags)
755{ 1016{
756 long error; 1017 long error;
757 struct file *in, *out; 1018 struct file *in, *out;
@@ -761,13 +1022,15 @@ asmlinkage long sys_splice(int fdin, int fdout, size_t len, unsigned int flags)
761 return 0; 1022 return 0;
762 1023
763 error = -EBADF; 1024 error = -EBADF;
764 in = fget_light(fdin, &fput_in); 1025 in = fget_light(fd_in, &fput_in);
765 if (in) { 1026 if (in) {
766 if (in->f_mode & FMODE_READ) { 1027 if (in->f_mode & FMODE_READ) {
767 out = fget_light(fdout, &fput_out); 1028 out = fget_light(fd_out, &fput_out);
768 if (out) { 1029 if (out) {
769 if (out->f_mode & FMODE_WRITE) 1030 if (out->f_mode & FMODE_WRITE)
770 error = do_splice(in, out, len, flags); 1031 error = do_splice(in, off_in,
1032 out, off_out,
1033 len, flags);
771 fput_light(out, fput_out); 1034 fput_light(out, fput_out);
772 } 1035 }
773 } 1036 }
@@ -777,3 +1040,192 @@ asmlinkage long sys_splice(int fdin, int fdout, size_t len, unsigned int flags)
777 1040
778 return error; 1041 return error;
779} 1042}
1043
1044/*
1045 * Link contents of ipipe to opipe.
1046 */
1047static int link_pipe(struct pipe_inode_info *ipipe,
1048 struct pipe_inode_info *opipe,
1049 size_t len, unsigned int flags)
1050{
1051 struct pipe_buffer *ibuf, *obuf;
1052 int ret, do_wakeup, i, ipipe_first;
1053
1054 ret = do_wakeup = ipipe_first = 0;
1055
1056 /*
1057 * Potential ABBA deadlock, work around it by ordering lock
1058 * grabbing by inode address. Otherwise two different processes
1059 * could deadlock (one doing tee from A -> B, the other from B -> A).
1060 */
1061 if (ipipe->inode < opipe->inode) {
1062 ipipe_first = 1;
1063 mutex_lock(&ipipe->inode->i_mutex);
1064 mutex_lock(&opipe->inode->i_mutex);
1065 } else {
1066 mutex_lock(&opipe->inode->i_mutex);
1067 mutex_lock(&ipipe->inode->i_mutex);
1068 }
1069
1070 for (i = 0;; i++) {
1071 if (!opipe->readers) {
1072 send_sig(SIGPIPE, current, 0);
1073 if (!ret)
1074 ret = -EPIPE;
1075 break;
1076 }
1077 if (ipipe->nrbufs - i) {
1078 ibuf = ipipe->bufs + ((ipipe->curbuf + i) & (PIPE_BUFFERS - 1));
1079
1080 /*
1081 * If we have room, fill this buffer
1082 */
1083 if (opipe->nrbufs < PIPE_BUFFERS) {
1084 int nbuf = (opipe->curbuf + opipe->nrbufs) & (PIPE_BUFFERS - 1);
1085
1086 /*
1087 * Get a reference to this pipe buffer,
1088 * so we can copy the contents over.
1089 */
1090 ibuf->ops->get(ipipe, ibuf);
1091
1092 obuf = opipe->bufs + nbuf;
1093 *obuf = *ibuf;
1094
1095 if (obuf->len > len)
1096 obuf->len = len;
1097
1098 opipe->nrbufs++;
1099 do_wakeup = 1;
1100 ret += obuf->len;
1101 len -= obuf->len;
1102
1103 if (!len)
1104 break;
1105 if (opipe->nrbufs < PIPE_BUFFERS)
1106 continue;
1107 }
1108
1109 /*
1110 * We have input available, but no output room.
1111 * If we already copied data, return that. If we
1112 * need to drop the opipe lock, it must be ordered
1113 * last to avoid deadlocks.
1114 */
1115 if ((flags & SPLICE_F_NONBLOCK) || !ipipe_first) {
1116 if (!ret)
1117 ret = -EAGAIN;
1118 break;
1119 }
1120 if (signal_pending(current)) {
1121 if (!ret)
1122 ret = -ERESTARTSYS;
1123 break;
1124 }
1125 if (do_wakeup) {
1126 smp_mb();
1127 if (waitqueue_active(&opipe->wait))
1128 wake_up_interruptible(&opipe->wait);
1129 kill_fasync(&opipe->fasync_readers, SIGIO, POLL_IN);
1130 do_wakeup = 0;
1131 }
1132
1133 opipe->waiting_writers++;
1134 pipe_wait(opipe);
1135 opipe->waiting_writers--;
1136 continue;
1137 }
1138
1139 /*
1140 * No input buffers, do the usual checks for available
1141 * writers and blocking and wait if necessary
1142 */
1143 if (!ipipe->writers)
1144 break;
1145 if (!ipipe->waiting_writers) {
1146 if (ret)
1147 break;
1148 }
1149 /*
1150 * pipe_wait() drops the ipipe mutex. To avoid deadlocks
1151 * with another process, we can only safely do that if
1152 * the ipipe lock is ordered last.
1153 */
1154 if ((flags & SPLICE_F_NONBLOCK) || ipipe_first) {
1155 if (!ret)
1156 ret = -EAGAIN;
1157 break;
1158 }
1159 if (signal_pending(current)) {
1160 if (!ret)
1161 ret = -ERESTARTSYS;
1162 break;
1163 }
1164
1165 if (waitqueue_active(&ipipe->wait))
1166 wake_up_interruptible_sync(&ipipe->wait);
1167 kill_fasync(&ipipe->fasync_writers, SIGIO, POLL_OUT);
1168
1169 pipe_wait(ipipe);
1170 }
1171
1172 mutex_unlock(&ipipe->inode->i_mutex);
1173 mutex_unlock(&opipe->inode->i_mutex);
1174
1175 if (do_wakeup) {
1176 smp_mb();
1177 if (waitqueue_active(&opipe->wait))
1178 wake_up_interruptible(&opipe->wait);
1179 kill_fasync(&opipe->fasync_readers, SIGIO, POLL_IN);
1180 }
1181
1182 return ret;
1183}
1184
1185/*
1186 * This is a tee(1) implementation that works on pipes. It doesn't copy
1187 * any data, it simply references the 'in' pages on the 'out' pipe.
1188 * The 'flags' used are the SPLICE_F_* variants, currently the only
1189 * applicable one is SPLICE_F_NONBLOCK.
1190 */
1191static long do_tee(struct file *in, struct file *out, size_t len,
1192 unsigned int flags)
1193{
1194 struct pipe_inode_info *ipipe = in->f_dentry->d_inode->i_pipe;
1195 struct pipe_inode_info *opipe = out->f_dentry->d_inode->i_pipe;
1196
1197 /*
1198 * Link ipipe to the two output pipes, consuming as we go along.
1199 */
1200 if (ipipe && opipe)
1201 return link_pipe(ipipe, opipe, len, flags);
1202
1203 return -EINVAL;
1204}
1205
1206asmlinkage long sys_tee(int fdin, int fdout, size_t len, unsigned int flags)
1207{
1208 struct file *in;
1209 int error, fput_in;
1210
1211 if (unlikely(!len))
1212 return 0;
1213
1214 error = -EBADF;
1215 in = fget_light(fdin, &fput_in);
1216 if (in) {
1217 if (in->f_mode & FMODE_READ) {
1218 int fput_out;
1219 struct file *out = fget_light(fdout, &fput_out);
1220
1221 if (out) {
1222 if (out->f_mode & FMODE_WRITE)
1223 error = do_tee(in, out, len, flags);
1224 fput_light(out, fput_out);
1225 }
1226 }
1227 fput_light(in, fput_in);
1228 }
1229
1230 return error;
1231}
diff --git a/fs/sync.c b/fs/sync.c
index 8616006d2094..aab5ffe77e9f 100644
--- a/fs/sync.c
+++ b/fs/sync.c
@@ -61,7 +61,7 @@
61 * will be available after a crash. 61 * will be available after a crash.
62 */ 62 */
63asmlinkage long sys_sync_file_range(int fd, loff_t offset, loff_t nbytes, 63asmlinkage long sys_sync_file_range(int fd, loff_t offset, loff_t nbytes,
64 int flags) 64 unsigned int flags)
65{ 65{
66 int ret; 66 int ret;
67 struct file *file; 67 struct file *file;
@@ -126,7 +126,7 @@ out:
126 * `endbyte' is inclusive 126 * `endbyte' is inclusive
127 */ 127 */
128int do_sync_file_range(struct file *file, loff_t offset, loff_t endbyte, 128int do_sync_file_range(struct file *file, loff_t offset, loff_t endbyte,
129 int flags) 129 unsigned int flags)
130{ 130{
131 int ret; 131 int ret;
132 struct address_space *mapping; 132 struct address_space *mapping;
diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c
index 6cfdc9a87772..610b5bdbe75b 100644
--- a/fs/sysfs/dir.c
+++ b/fs/sysfs/dir.c
@@ -43,6 +43,7 @@ static struct sysfs_dirent * sysfs_new_dirent(struct sysfs_dirent * parent_sd,
43 43
44 memset(sd, 0, sizeof(*sd)); 44 memset(sd, 0, sizeof(*sd));
45 atomic_set(&sd->s_count, 1); 45 atomic_set(&sd->s_count, 1);
46 atomic_set(&sd->s_event, 0);
46 INIT_LIST_HEAD(&sd->s_children); 47 INIT_LIST_HEAD(&sd->s_children);
47 list_add(&sd->s_sibling, &parent_sd->s_children); 48 list_add(&sd->s_sibling, &parent_sd->s_children);
48 sd->s_element = element; 49 sd->s_element = element;
diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c
index f1cb1ddde511..cf3786625bfa 100644
--- a/fs/sysfs/file.c
+++ b/fs/sysfs/file.c
@@ -6,6 +6,7 @@
6#include <linux/fsnotify.h> 6#include <linux/fsnotify.h>
7#include <linux/kobject.h> 7#include <linux/kobject.h>
8#include <linux/namei.h> 8#include <linux/namei.h>
9#include <linux/poll.h>
9#include <asm/uaccess.h> 10#include <asm/uaccess.h>
10#include <asm/semaphore.h> 11#include <asm/semaphore.h>
11 12
@@ -57,6 +58,7 @@ struct sysfs_buffer {
57 struct sysfs_ops * ops; 58 struct sysfs_ops * ops;
58 struct semaphore sem; 59 struct semaphore sem;
59 int needs_read_fill; 60 int needs_read_fill;
61 int event;
60}; 62};
61 63
62 64
@@ -72,6 +74,7 @@ struct sysfs_buffer {
72 */ 74 */
73static int fill_read_buffer(struct dentry * dentry, struct sysfs_buffer * buffer) 75static int fill_read_buffer(struct dentry * dentry, struct sysfs_buffer * buffer)
74{ 76{
77 struct sysfs_dirent * sd = dentry->d_fsdata;
75 struct attribute * attr = to_attr(dentry); 78 struct attribute * attr = to_attr(dentry);
76 struct kobject * kobj = to_kobj(dentry->d_parent); 79 struct kobject * kobj = to_kobj(dentry->d_parent);
77 struct sysfs_ops * ops = buffer->ops; 80 struct sysfs_ops * ops = buffer->ops;
@@ -83,6 +86,7 @@ static int fill_read_buffer(struct dentry * dentry, struct sysfs_buffer * buffer
83 if (!buffer->page) 86 if (!buffer->page)
84 return -ENOMEM; 87 return -ENOMEM;
85 88
89 buffer->event = atomic_read(&sd->s_event);
86 count = ops->show(kobj,attr,buffer->page); 90 count = ops->show(kobj,attr,buffer->page);
87 buffer->needs_read_fill = 0; 91 buffer->needs_read_fill = 0;
88 BUG_ON(count > (ssize_t)PAGE_SIZE); 92 BUG_ON(count > (ssize_t)PAGE_SIZE);
@@ -348,12 +352,84 @@ static int sysfs_release(struct inode * inode, struct file * filp)
348 return 0; 352 return 0;
349} 353}
350 354
355/* Sysfs attribute files are pollable. The idea is that you read
356 * the content and then you use 'poll' or 'select' to wait for
357 * the content to change. When the content changes (assuming the
358 * manager for the kobject supports notification), poll will
359 * return POLLERR|POLLPRI, and select will return the fd whether
360 * it is waiting for read, write, or exceptions.
361 * Once poll/select indicates that the value has changed, you
362 * need to close and re-open the file, as simply seeking and reading
363 * again will not get new data, or reset the state of 'poll'.
364 * Reminder: this only works for attributes which actively support
365 * it, and it is not possible to test an attribute from userspace
366 * to see if it supports poll (Nether 'poll' or 'select' return
367 * an appropriate error code). When in doubt, set a suitable timeout value.
368 */
369static unsigned int sysfs_poll(struct file *filp, poll_table *wait)
370{
371 struct sysfs_buffer * buffer = filp->private_data;
372 struct kobject * kobj = to_kobj(filp->f_dentry->d_parent);
373 struct sysfs_dirent * sd = filp->f_dentry->d_fsdata;
374 int res = 0;
375
376 poll_wait(filp, &kobj->poll, wait);
377
378 if (buffer->event != atomic_read(&sd->s_event)) {
379 res = POLLERR|POLLPRI;
380 buffer->needs_read_fill = 1;
381 }
382
383 return res;
384}
385
386
387static struct dentry *step_down(struct dentry *dir, const char * name)
388{
389 struct dentry * de;
390
391 if (dir == NULL || dir->d_inode == NULL)
392 return NULL;
393
394 mutex_lock(&dir->d_inode->i_mutex);
395 de = lookup_one_len(name, dir, strlen(name));
396 mutex_unlock(&dir->d_inode->i_mutex);
397 dput(dir);
398 if (IS_ERR(de))
399 return NULL;
400 if (de->d_inode == NULL) {
401 dput(de);
402 return NULL;
403 }
404 return de;
405}
406
407void sysfs_notify(struct kobject * k, char *dir, char *attr)
408{
409 struct dentry *de = k->dentry;
410 if (de)
411 dget(de);
412 if (de && dir)
413 de = step_down(de, dir);
414 if (de && attr)
415 de = step_down(de, attr);
416 if (de) {
417 struct sysfs_dirent * sd = de->d_fsdata;
418 if (sd)
419 atomic_inc(&sd->s_event);
420 wake_up_interruptible(&k->poll);
421 dput(de);
422 }
423}
424EXPORT_SYMBOL_GPL(sysfs_notify);
425
351const struct file_operations sysfs_file_operations = { 426const struct file_operations sysfs_file_operations = {
352 .read = sysfs_read_file, 427 .read = sysfs_read_file,
353 .write = sysfs_write_file, 428 .write = sysfs_write_file,
354 .llseek = generic_file_llseek, 429 .llseek = generic_file_llseek,
355 .open = sysfs_open_file, 430 .open = sysfs_open_file,
356 .release = sysfs_release, 431 .release = sysfs_release,
432 .poll = sysfs_poll,
357}; 433};
358 434
359 435
diff --git a/fs/sysfs/sysfs.h b/fs/sysfs/sysfs.h
index 32958a7c50e9..3651ffb5ec09 100644
--- a/fs/sysfs/sysfs.h
+++ b/fs/sysfs/sysfs.h
@@ -11,6 +11,7 @@ extern int sysfs_make_dirent(struct sysfs_dirent *, struct dentry *, void *,
11 11
12extern int sysfs_add_file(struct dentry *, const struct attribute *, int); 12extern int sysfs_add_file(struct dentry *, const struct attribute *, int);
13extern void sysfs_hash_and_remove(struct dentry * dir, const char * name); 13extern void sysfs_hash_and_remove(struct dentry * dir, const char * name);
14extern struct sysfs_dirent *sysfs_find(struct sysfs_dirent *dir, const char * name);
14 15
15extern int sysfs_create_subdir(struct kobject *, const char *, struct dentry **); 16extern int sysfs_create_subdir(struct kobject *, const char *, struct dentry **);
16extern void sysfs_remove_subdir(struct dentry *); 17extern void sysfs_remove_subdir(struct dentry *);
diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c
index 6cbbd165c60d..4d191ef39b67 100644
--- a/fs/xfs/linux-2.6/xfs_aops.c
+++ b/fs/xfs/linux-2.6/xfs_aops.c
@@ -870,12 +870,14 @@ xfs_page_state_convert(
870 pgoff_t end_index, last_index, tlast; 870 pgoff_t end_index, last_index, tlast;
871 ssize_t size, len; 871 ssize_t size, len;
872 int flags, err, iomap_valid = 0, uptodate = 1; 872 int flags, err, iomap_valid = 0, uptodate = 1;
873 int page_dirty, count = 0, trylock_flag = 0; 873 int page_dirty, count = 0;
874 int trylock = 0;
874 int all_bh = unmapped; 875 int all_bh = unmapped;
875 876
876 /* wait for other IO threads? */ 877 if (startio) {
877 if (startio && (wbc->sync_mode == WB_SYNC_NONE && wbc->nonblocking)) 878 if (wbc->sync_mode == WB_SYNC_NONE && wbc->nonblocking)
878 trylock_flag |= BMAPI_TRYLOCK; 879 trylock |= BMAPI_TRYLOCK;
880 }
879 881
880 /* Is this page beyond the end of the file? */ 882 /* Is this page beyond the end of the file? */
881 offset = i_size_read(inode); 883 offset = i_size_read(inode);
@@ -956,15 +958,13 @@ xfs_page_state_convert(
956 958
957 if (buffer_unwritten(bh)) { 959 if (buffer_unwritten(bh)) {
958 type = IOMAP_UNWRITTEN; 960 type = IOMAP_UNWRITTEN;
959 flags = BMAPI_WRITE|BMAPI_IGNSTATE; 961 flags = BMAPI_WRITE | BMAPI_IGNSTATE;
960 } else if (buffer_delay(bh)) { 962 } else if (buffer_delay(bh)) {
961 type = IOMAP_DELAY; 963 type = IOMAP_DELAY;
962 flags = BMAPI_ALLOCATE; 964 flags = BMAPI_ALLOCATE | trylock;
963 if (!startio)
964 flags |= trylock_flag;
965 } else { 965 } else {
966 type = IOMAP_NEW; 966 type = IOMAP_NEW;
967 flags = BMAPI_WRITE|BMAPI_MMAP; 967 flags = BMAPI_WRITE | BMAPI_MMAP;
968 } 968 }
969 969
970 if (!iomap_valid) { 970 if (!iomap_valid) {
diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c
index 9fb0312665ca..26fed0756f01 100644
--- a/fs/xfs/linux-2.6/xfs_buf.c
+++ b/fs/xfs/linux-2.6/xfs_buf.c
@@ -182,7 +182,7 @@ free_address(
182{ 182{
183 a_list_t *aentry; 183 a_list_t *aentry;
184 184
185 aentry = kmalloc(sizeof(a_list_t), GFP_ATOMIC & ~__GFP_HIGH); 185 aentry = kmalloc(sizeof(a_list_t), GFP_NOWAIT);
186 if (likely(aentry)) { 186 if (likely(aentry)) {
187 spin_lock(&as_lock); 187 spin_lock(&as_lock);
188 aentry->next = as_free_head; 188 aentry->next = as_free_head;
diff --git a/fs/xfs/linux-2.6/xfs_file.c b/fs/xfs/linux-2.6/xfs_file.c
index ae4c4754ed31..c847416f6d10 100644
--- a/fs/xfs/linux-2.6/xfs_file.c
+++ b/fs/xfs/linux-2.6/xfs_file.c
@@ -252,56 +252,60 @@ xfs_file_sendfile_invis(
252STATIC ssize_t 252STATIC ssize_t
253xfs_file_splice_read( 253xfs_file_splice_read(
254 struct file *infilp, 254 struct file *infilp,
255 struct inode *pipe, 255 loff_t *ppos,
256 struct pipe_inode_info *pipe,
256 size_t len, 257 size_t len,
257 unsigned int flags) 258 unsigned int flags)
258{ 259{
259 vnode_t *vp = vn_from_inode(infilp->f_dentry->d_inode); 260 vnode_t *vp = vn_from_inode(infilp->f_dentry->d_inode);
260 ssize_t rval; 261 ssize_t rval;
261 262
262 VOP_SPLICE_READ(vp, infilp, pipe, len, flags, 0, NULL, rval); 263 VOP_SPLICE_READ(vp, infilp, ppos, pipe, len, flags, 0, NULL, rval);
263 return rval; 264 return rval;
264} 265}
265 266
266STATIC ssize_t 267STATIC ssize_t
267xfs_file_splice_read_invis( 268xfs_file_splice_read_invis(
268 struct file *infilp, 269 struct file *infilp,
269 struct inode *pipe, 270 loff_t *ppos,
271 struct pipe_inode_info *pipe,
270 size_t len, 272 size_t len,
271 unsigned int flags) 273 unsigned int flags)
272{ 274{
273 vnode_t *vp = vn_from_inode(infilp->f_dentry->d_inode); 275 vnode_t *vp = vn_from_inode(infilp->f_dentry->d_inode);
274 ssize_t rval; 276 ssize_t rval;
275 277
276 VOP_SPLICE_READ(vp, infilp, pipe, len, flags, IO_INVIS, NULL, rval); 278 VOP_SPLICE_READ(vp, infilp, ppos, pipe, len, flags, IO_INVIS, NULL, rval);
277 return rval; 279 return rval;
278} 280}
279 281
280STATIC ssize_t 282STATIC ssize_t
281xfs_file_splice_write( 283xfs_file_splice_write(
282 struct inode *pipe, 284 struct pipe_inode_info *pipe,
283 struct file *outfilp, 285 struct file *outfilp,
286 loff_t *ppos,
284 size_t len, 287 size_t len,
285 unsigned int flags) 288 unsigned int flags)
286{ 289{
287 vnode_t *vp = vn_from_inode(outfilp->f_dentry->d_inode); 290 vnode_t *vp = vn_from_inode(outfilp->f_dentry->d_inode);
288 ssize_t rval; 291 ssize_t rval;
289 292
290 VOP_SPLICE_WRITE(vp, pipe, outfilp, len, flags, 0, NULL, rval); 293 VOP_SPLICE_WRITE(vp, pipe, outfilp, ppos, len, flags, 0, NULL, rval);
291 return rval; 294 return rval;
292} 295}
293 296
294STATIC ssize_t 297STATIC ssize_t
295xfs_file_splice_write_invis( 298xfs_file_splice_write_invis(
296 struct inode *pipe, 299 struct pipe_inode_info *pipe,
297 struct file *outfilp, 300 struct file *outfilp,
301 loff_t *ppos,
298 size_t len, 302 size_t len,
299 unsigned int flags) 303 unsigned int flags)
300{ 304{
301 vnode_t *vp = vn_from_inode(outfilp->f_dentry->d_inode); 305 vnode_t *vp = vn_from_inode(outfilp->f_dentry->d_inode);
302 ssize_t rval; 306 ssize_t rval;
303 307
304 VOP_SPLICE_WRITE(vp, pipe, outfilp, len, flags, IO_INVIS, NULL, rval); 308 VOP_SPLICE_WRITE(vp, pipe, outfilp, ppos, len, flags, IO_INVIS, NULL, rval);
305 return rval; 309 return rval;
306} 310}
307 311
diff --git a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/linux-2.6/xfs_iops.c
index 149237304fb6..2e2e275c786f 100644
--- a/fs/xfs/linux-2.6/xfs_iops.c
+++ b/fs/xfs/linux-2.6/xfs_iops.c
@@ -673,8 +673,7 @@ xfs_vn_setattr(
673 if (ia_valid & ATTR_ATIME) { 673 if (ia_valid & ATTR_ATIME) {
674 vattr.va_mask |= XFS_AT_ATIME; 674 vattr.va_mask |= XFS_AT_ATIME;
675 vattr.va_atime = attr->ia_atime; 675 vattr.va_atime = attr->ia_atime;
676 if (ia_valid & ATTR_ATIME_SET) 676 inode->i_atime = attr->ia_atime;
677 inode->i_atime = attr->ia_atime;
678 } 677 }
679 if (ia_valid & ATTR_MTIME) { 678 if (ia_valid & ATTR_MTIME) {
680 vattr.va_mask |= XFS_AT_MTIME; 679 vattr.va_mask |= XFS_AT_MTIME;
diff --git a/fs/xfs/linux-2.6/xfs_lrw.c b/fs/xfs/linux-2.6/xfs_lrw.c
index 90cd314acbaa..67efe3308980 100644
--- a/fs/xfs/linux-2.6/xfs_lrw.c
+++ b/fs/xfs/linux-2.6/xfs_lrw.c
@@ -338,7 +338,8 @@ ssize_t
338xfs_splice_read( 338xfs_splice_read(
339 bhv_desc_t *bdp, 339 bhv_desc_t *bdp,
340 struct file *infilp, 340 struct file *infilp,
341 struct inode *pipe, 341 loff_t *ppos,
342 struct pipe_inode_info *pipe,
342 size_t count, 343 size_t count,
343 int flags, 344 int flags,
344 int ioflags, 345 int ioflags,
@@ -360,7 +361,7 @@ xfs_splice_read(
360 int error; 361 int error;
361 362
362 error = XFS_SEND_DATA(mp, DM_EVENT_READ, BHV_TO_VNODE(bdp), 363 error = XFS_SEND_DATA(mp, DM_EVENT_READ, BHV_TO_VNODE(bdp),
363 infilp->f_pos, count, 364 *ppos, count,
364 FILP_DELAY_FLAG(infilp), &locktype); 365 FILP_DELAY_FLAG(infilp), &locktype);
365 if (error) { 366 if (error) {
366 xfs_iunlock(ip, XFS_IOLOCK_SHARED); 367 xfs_iunlock(ip, XFS_IOLOCK_SHARED);
@@ -368,8 +369,8 @@ xfs_splice_read(
368 } 369 }
369 } 370 }
370 xfs_rw_enter_trace(XFS_SPLICE_READ_ENTER, &ip->i_iocore, 371 xfs_rw_enter_trace(XFS_SPLICE_READ_ENTER, &ip->i_iocore,
371 pipe, count, infilp->f_pos, ioflags); 372 pipe, count, *ppos, ioflags);
372 ret = generic_file_splice_read(infilp, pipe, count, flags); 373 ret = generic_file_splice_read(infilp, ppos, pipe, count, flags);
373 if (ret > 0) 374 if (ret > 0)
374 XFS_STATS_ADD(xs_read_bytes, ret); 375 XFS_STATS_ADD(xs_read_bytes, ret);
375 376
@@ -380,8 +381,9 @@ xfs_splice_read(
380ssize_t 381ssize_t
381xfs_splice_write( 382xfs_splice_write(
382 bhv_desc_t *bdp, 383 bhv_desc_t *bdp,
383 struct inode *pipe, 384 struct pipe_inode_info *pipe,
384 struct file *outfilp, 385 struct file *outfilp,
386 loff_t *ppos,
385 size_t count, 387 size_t count,
386 int flags, 388 int flags,
387 int ioflags, 389 int ioflags,
@@ -403,7 +405,7 @@ xfs_splice_write(
403 int error; 405 int error;
404 406
405 error = XFS_SEND_DATA(mp, DM_EVENT_WRITE, BHV_TO_VNODE(bdp), 407 error = XFS_SEND_DATA(mp, DM_EVENT_WRITE, BHV_TO_VNODE(bdp),
406 outfilp->f_pos, count, 408 *ppos, count,
407 FILP_DELAY_FLAG(outfilp), &locktype); 409 FILP_DELAY_FLAG(outfilp), &locktype);
408 if (error) { 410 if (error) {
409 xfs_iunlock(ip, XFS_IOLOCK_EXCL); 411 xfs_iunlock(ip, XFS_IOLOCK_EXCL);
@@ -411,8 +413,8 @@ xfs_splice_write(
411 } 413 }
412 } 414 }
413 xfs_rw_enter_trace(XFS_SPLICE_WRITE_ENTER, &ip->i_iocore, 415 xfs_rw_enter_trace(XFS_SPLICE_WRITE_ENTER, &ip->i_iocore,
414 pipe, count, outfilp->f_pos, ioflags); 416 pipe, count, *ppos, ioflags);
415 ret = generic_file_splice_write(pipe, outfilp, count, flags); 417 ret = generic_file_splice_write(pipe, outfilp, ppos, count, flags);
416 if (ret > 0) 418 if (ret > 0)
417 XFS_STATS_ADD(xs_write_bytes, ret); 419 XFS_STATS_ADD(xs_write_bytes, ret);
418 420
diff --git a/fs/xfs/linux-2.6/xfs_lrw.h b/fs/xfs/linux-2.6/xfs_lrw.h
index eaa5659713fb..8f4539952350 100644
--- a/fs/xfs/linux-2.6/xfs_lrw.h
+++ b/fs/xfs/linux-2.6/xfs_lrw.h
@@ -93,11 +93,11 @@ extern ssize_t xfs_write(struct bhv_desc *, struct kiocb *,
93extern ssize_t xfs_sendfile(struct bhv_desc *, struct file *, 93extern ssize_t xfs_sendfile(struct bhv_desc *, struct file *,
94 loff_t *, int, size_t, read_actor_t, 94 loff_t *, int, size_t, read_actor_t,
95 void *, struct cred *); 95 void *, struct cred *);
96extern ssize_t xfs_splice_read(struct bhv_desc *, struct file *, 96extern ssize_t xfs_splice_read(struct bhv_desc *, struct file *, loff_t *,
97 struct inode *, size_t, int, int, 97 struct pipe_inode_info *, size_t, int, int,
98 struct cred *); 98 struct cred *);
99extern ssize_t xfs_splice_write(struct bhv_desc *, struct inode *, 99extern ssize_t xfs_splice_write(struct bhv_desc *, struct pipe_inode_info *,
100 struct file *, size_t, int, int, 100 struct file *, loff_t *, size_t, int, int,
101 struct cred *); 101 struct cred *);
102 102
103#endif /* __XFS_LRW_H__ */ 103#endif /* __XFS_LRW_H__ */
diff --git a/fs/xfs/linux-2.6/xfs_vnode.h b/fs/xfs/linux-2.6/xfs_vnode.h
index 6f1c79a28f8b..2a8e16c22353 100644
--- a/fs/xfs/linux-2.6/xfs_vnode.h
+++ b/fs/xfs/linux-2.6/xfs_vnode.h
@@ -173,11 +173,11 @@ typedef ssize_t (*vop_write_t)(bhv_desc_t *, struct kiocb *,
173typedef ssize_t (*vop_sendfile_t)(bhv_desc_t *, struct file *, 173typedef ssize_t (*vop_sendfile_t)(bhv_desc_t *, struct file *,
174 loff_t *, int, size_t, read_actor_t, 174 loff_t *, int, size_t, read_actor_t,
175 void *, struct cred *); 175 void *, struct cred *);
176typedef ssize_t (*vop_splice_read_t)(bhv_desc_t *, struct file *, 176typedef ssize_t (*vop_splice_read_t)(bhv_desc_t *, struct file *, loff_t *,
177 struct inode *, size_t, int, int, 177 struct pipe_inode_info *, size_t, int, int,
178 struct cred *); 178 struct cred *);
179typedef ssize_t (*vop_splice_write_t)(bhv_desc_t *, struct inode *, 179typedef ssize_t (*vop_splice_write_t)(bhv_desc_t *, struct pipe_inode_info *,
180 struct file *, size_t, int, int, 180 struct file *, loff_t *, size_t, int, int,
181 struct cred *); 181 struct cred *);
182typedef int (*vop_ioctl_t)(bhv_desc_t *, struct inode *, struct file *, 182typedef int (*vop_ioctl_t)(bhv_desc_t *, struct inode *, struct file *,
183 int, unsigned int, void __user *); 183 int, unsigned int, void __user *);
@@ -284,10 +284,10 @@ typedef struct vnodeops {
284 rv = _VOP_(vop_write, vp)((vp)->v_fbhv,file,iov,segs,offset,ioflags,cr) 284 rv = _VOP_(vop_write, vp)((vp)->v_fbhv,file,iov,segs,offset,ioflags,cr)
285#define VOP_SENDFILE(vp,f,off,ioflags,cnt,act,targ,cr,rv) \ 285#define VOP_SENDFILE(vp,f,off,ioflags,cnt,act,targ,cr,rv) \
286 rv = _VOP_(vop_sendfile, vp)((vp)->v_fbhv,f,off,ioflags,cnt,act,targ,cr) 286 rv = _VOP_(vop_sendfile, vp)((vp)->v_fbhv,f,off,ioflags,cnt,act,targ,cr)
287#define VOP_SPLICE_READ(vp,f,pipe,cnt,fl,iofl,cr,rv) \ 287#define VOP_SPLICE_READ(vp,f,o,pipe,cnt,fl,iofl,cr,rv) \
288 rv = _VOP_(vop_splice_read, vp)((vp)->v_fbhv,f,pipe,cnt,fl,iofl,cr) 288 rv = _VOP_(vop_splice_read, vp)((vp)->v_fbhv,f,o,pipe,cnt,fl,iofl,cr)
289#define VOP_SPLICE_WRITE(vp,f,pipe,cnt,fl,iofl,cr,rv) \ 289#define VOP_SPLICE_WRITE(vp,f,o,pipe,cnt,fl,iofl,cr,rv) \
290 rv = _VOP_(vop_splice_write, vp)((vp)->v_fbhv,f,pipe,cnt,fl,iofl,cr) 290 rv = _VOP_(vop_splice_write, vp)((vp)->v_fbhv,f,o,pipe,cnt,fl,iofl,cr)
291#define VOP_BMAP(vp,of,sz,rw,b,n,rv) \ 291#define VOP_BMAP(vp,of,sz,rw,b,n,rv) \
292 rv = _VOP_(vop_bmap, vp)((vp)->v_fbhv,of,sz,rw,b,n) 292 rv = _VOP_(vop_bmap, vp)((vp)->v_fbhv,of,sz,rw,b,n)
293#define VOP_OPEN(vp, cr, rv) \ 293#define VOP_OPEN(vp, cr, rv) \
diff --git a/fs/xfs/xfs_ialloc.c b/fs/xfs/xfs_ialloc.c
index 4eeb856183b1..deddbd03c166 100644
--- a/fs/xfs/xfs_ialloc.c
+++ b/fs/xfs/xfs_ialloc.c
@@ -158,9 +158,10 @@ xfs_ialloc_ag_alloc(
158 */ 158 */
159 agi = XFS_BUF_TO_AGI(agbp); 159 agi = XFS_BUF_TO_AGI(agbp);
160 newino = be32_to_cpu(agi->agi_newino); 160 newino = be32_to_cpu(agi->agi_newino);
161 if(likely(newino != NULLAGINO)) { 161 args.agbno = XFS_AGINO_TO_AGBNO(args.mp, newino) +
162 args.agbno = XFS_AGINO_TO_AGBNO(args.mp, newino) + 162 XFS_IALLOC_BLOCKS(args.mp);
163 XFS_IALLOC_BLOCKS(args.mp); 163 if (likely(newino != NULLAGINO &&
164 (args.agbno < be32_to_cpu(agi->agi_length)))) {
164 args.fsbno = XFS_AGB_TO_FSB(args.mp, 165 args.fsbno = XFS_AGB_TO_FSB(args.mp,
165 be32_to_cpu(agi->agi_seqno), args.agbno); 166 be32_to_cpu(agi->agi_seqno), args.agbno);
166 args.type = XFS_ALLOCTYPE_THIS_BNO; 167 args.type = XFS_ALLOCTYPE_THIS_BNO;
@@ -182,8 +183,8 @@ xfs_ialloc_ag_alloc(
182 * Set the alignment for the allocation. 183 * Set the alignment for the allocation.
183 * If stripe alignment is turned on then align at stripe unit 184 * If stripe alignment is turned on then align at stripe unit
184 * boundary. 185 * boundary.
185 * If the cluster size is smaller than a filesystem block 186 * If the cluster size is smaller than a filesystem block
186 * then we're doing I/O for inodes in filesystem block size 187 * then we're doing I/O for inodes in filesystem block size
187 * pieces, so don't need alignment anyway. 188 * pieces, so don't need alignment anyway.
188 */ 189 */
189 isaligned = 0; 190 isaligned = 0;
@@ -192,7 +193,7 @@ xfs_ialloc_ag_alloc(
192 args.alignment = args.mp->m_dalign; 193 args.alignment = args.mp->m_dalign;
193 isaligned = 1; 194 isaligned = 1;
194 } else if (XFS_SB_VERSION_HASALIGN(&args.mp->m_sb) && 195 } else if (XFS_SB_VERSION_HASALIGN(&args.mp->m_sb) &&
195 args.mp->m_sb.sb_inoalignmt >= 196 args.mp->m_sb.sb_inoalignmt >=
196 XFS_B_TO_FSBT(args.mp, 197 XFS_B_TO_FSBT(args.mp,
197 XFS_INODE_CLUSTER_SIZE(args.mp))) 198 XFS_INODE_CLUSTER_SIZE(args.mp)))
198 args.alignment = args.mp->m_sb.sb_inoalignmt; 199 args.alignment = args.mp->m_sb.sb_inoalignmt;
@@ -220,7 +221,7 @@ xfs_ialloc_ag_alloc(
220 if ((error = xfs_alloc_vextent(&args))) 221 if ((error = xfs_alloc_vextent(&args)))
221 return error; 222 return error;
222 } 223 }
223 224
224 /* 225 /*
225 * If stripe alignment is turned on, then try again with cluster 226 * If stripe alignment is turned on, then try again with cluster
226 * alignment. 227 * alignment.
diff --git a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c
index bb33113eef9f..b53854325266 100644
--- a/fs/xfs/xfs_iget.c
+++ b/fs/xfs/xfs_iget.c
@@ -421,7 +421,10 @@ finish_inode:
421 ip->i_chash = chlnew; 421 ip->i_chash = chlnew;
422 chlnew->chl_ip = ip; 422 chlnew->chl_ip = ip;
423 chlnew->chl_blkno = ip->i_blkno; 423 chlnew->chl_blkno = ip->i_blkno;
424 if (ch->ch_list)
425 ch->ch_list->chl_prev = chlnew;
424 chlnew->chl_next = ch->ch_list; 426 chlnew->chl_next = ch->ch_list;
427 chlnew->chl_prev = NULL;
425 ch->ch_list = chlnew; 428 ch->ch_list = chlnew;
426 chlnew = NULL; 429 chlnew = NULL;
427 } 430 }
@@ -723,23 +726,15 @@ xfs_iextract(
723 ASSERT(ip->i_cnext == ip && ip->i_cprev == ip); 726 ASSERT(ip->i_cnext == ip && ip->i_cprev == ip);
724 ASSERT(ip->i_chash != NULL); 727 ASSERT(ip->i_chash != NULL);
725 chm=NULL; 728 chm=NULL;
726 for (chl = ch->ch_list; chl != NULL; chl = chl->chl_next) { 729 chl = ip->i_chash;
727 if (chl->chl_blkno == ip->i_blkno) { 730 if (chl->chl_prev)
728 if (chm == NULL) { 731 chl->chl_prev->chl_next = chl->chl_next;
729 /* first item on the list */ 732 else
730 ch->ch_list = chl->chl_next; 733 ch->ch_list = chl->chl_next;
731 } else { 734 if (chl->chl_next)
732 chm->chl_next = chl->chl_next; 735 chl->chl_next->chl_prev = chl->chl_prev;
733 } 736 kmem_zone_free(xfs_chashlist_zone, chl);
734 kmem_zone_free(xfs_chashlist_zone, chl); 737 } else {
735 break;
736 } else {
737 ASSERT(chl->chl_ip != ip);
738 chm = chl;
739 }
740 }
741 ASSERT_ALWAYS(chl != NULL);
742 } else {
743 /* delete one inode from a non-empty list */ 738 /* delete one inode from a non-empty list */
744 iq = ip->i_cnext; 739 iq = ip->i_cnext;
745 iq->i_cprev = ip->i_cprev; 740 iq->i_cprev = ip->i_cprev;
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index 48146bdc6bdd..94b60dd03801 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -2732,16 +2732,29 @@ xfs_iunpin(
2732 ASSERT(atomic_read(&ip->i_pincount) > 0); 2732 ASSERT(atomic_read(&ip->i_pincount) > 0);
2733 2733
2734 if (atomic_dec_and_test(&ip->i_pincount)) { 2734 if (atomic_dec_and_test(&ip->i_pincount)) {
2735 vnode_t *vp = XFS_ITOV_NULL(ip); 2735 /*
2736 * If the inode is currently being reclaimed, the
2737 * linux inode _and_ the xfs vnode may have been
2738 * freed so we cannot reference either of them safely.
2739 * Hence we should not try to do anything to them
2740 * if the xfs inode is currently in the reclaim
2741 * path.
2742 *
2743 * However, we still need to issue the unpin wakeup
2744 * call as the inode reclaim may be blocked waiting for
2745 * the inode to become unpinned.
2746 */
2747 if (!(ip->i_flags & (XFS_IRECLAIM|XFS_IRECLAIMABLE))) {
2748 vnode_t *vp = XFS_ITOV_NULL(ip);
2736 2749
2737 /* make sync come back and flush this inode */ 2750 /* make sync come back and flush this inode */
2738 if (vp) { 2751 if (vp) {
2739 struct inode *inode = vn_to_inode(vp); 2752 struct inode *inode = vn_to_inode(vp);
2740 2753
2741 if (!(inode->i_state & I_NEW)) 2754 if (!(inode->i_state & I_NEW))
2742 mark_inode_dirty_sync(inode); 2755 mark_inode_dirty_sync(inode);
2756 }
2743 } 2757 }
2744
2745 wake_up(&ip->i_ipin_wait); 2758 wake_up(&ip->i_ipin_wait);
2746 } 2759 }
2747} 2760}
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index 39ef9c36ea55..3b544db1790b 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -189,6 +189,7 @@ typedef struct xfs_ihash {
189 */ 189 */
190typedef struct xfs_chashlist { 190typedef struct xfs_chashlist {
191 struct xfs_chashlist *chl_next; 191 struct xfs_chashlist *chl_next;
192 struct xfs_chashlist *chl_prev;
192 struct xfs_inode *chl_ip; 193 struct xfs_inode *chl_ip;
193 xfs_daddr_t chl_blkno; /* starting block number of 194 xfs_daddr_t chl_blkno; /* starting block number of
194 * the cluster */ 195 * the cluster */
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index 049fabb7f7e0..c0b1c2906880 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -270,7 +270,7 @@ xfs_mount_validate_sb(
270 (sbp->sb_blocklog - sbp->sb_inodelog != sbp->sb_inopblog) || 270 (sbp->sb_blocklog - sbp->sb_inodelog != sbp->sb_inopblog) ||
271 (sbp->sb_rextsize * sbp->sb_blocksize > XFS_MAX_RTEXTSIZE) || 271 (sbp->sb_rextsize * sbp->sb_blocksize > XFS_MAX_RTEXTSIZE) ||
272 (sbp->sb_rextsize * sbp->sb_blocksize < XFS_MIN_RTEXTSIZE) || 272 (sbp->sb_rextsize * sbp->sb_blocksize < XFS_MIN_RTEXTSIZE) ||
273 (sbp->sb_imax_pct > 100 || sbp->sb_imax_pct < 1))) { 273 (sbp->sb_imax_pct > 100 /* zero sb_imax_pct is valid */))) {
274 xfs_fs_mount_cmn_err(flags, "SB sanity check 1 failed"); 274 xfs_fs_mount_cmn_err(flags, "SB sanity check 1 failed");
275 return XFS_ERROR(EFSCORRUPTED); 275 return XFS_ERROR(EFSCORRUPTED);
276 } 276 }