aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--fs/Kconfig1
-rw-r--r--fs/lockd/clntlock.c112
-rw-r--r--fs/lockd/clntproc.c317
-rw-r--r--fs/lockd/host.c12
-rw-r--r--fs/lockd/mon.c11
-rw-r--r--fs/lockd/svc4proc.c157
-rw-r--r--fs/lockd/svclock.c349
-rw-r--r--fs/lockd/svcproc.c151
-rw-r--r--fs/lockd/svcshare.c4
-rw-r--r--fs/lockd/svcsubs.c7
-rw-r--r--fs/lockd/xdr.c17
-rw-r--r--fs/lockd/xdr4.c21
-rw-r--r--fs/locks.c106
-rw-r--r--fs/namespace.c38
-rw-r--r--fs/nfs/callback.c20
-rw-r--r--fs/nfs/callback_xdr.c28
-rw-r--r--fs/nfs/delegation.c19
-rw-r--r--fs/nfs/delegation.h1
-rw-r--r--fs/nfs/dir.c114
-rw-r--r--fs/nfs/direct.c949
-rw-r--r--fs/nfs/file.c49
-rw-r--r--fs/nfs/idmap.c47
-rw-r--r--fs/nfs/inode.c229
-rw-r--r--fs/nfs/iostat.h164
-rw-r--r--fs/nfs/mount_clnt.c17
-rw-r--r--fs/nfs/nfs2xdr.c4
-rw-r--r--fs/nfs/nfs3acl.c16
-rw-r--r--fs/nfs/nfs3proc.c246
-rw-r--r--fs/nfs/nfs3xdr.c6
-rw-r--r--fs/nfs/nfs4proc.c180
-rw-r--r--fs/nfs/nfs4state.c1
-rw-r--r--fs/nfs/nfs4xdr.c2
-rw-r--r--fs/nfs/pagelist.c16
-rw-r--r--fs/nfs/proc.c156
-rw-r--r--fs/nfs/read.c102
-rw-r--r--fs/nfs/unlink.c3
-rw-r--r--fs/nfs/write.c288
-rw-r--r--fs/nfsd/nfs4callback.c2
-rw-r--r--fs/nfsd/nfs4state.c13
-rw-r--r--fs/proc/base.c39
-rw-r--r--include/linux/fs.h7
-rw-r--r--include/linux/lockd/lockd.h27
-rw-r--r--include/linux/lockd/share.h2
-rw-r--r--include/linux/lockd/xdr.h1
-rw-r--r--include/linux/nfs_fs.h102
-rw-r--r--include/linux/nfs_fs_i.h8
-rw-r--r--include/linux/nfs_fs_sb.h6
-rw-r--r--include/linux/nfs_xdr.h5
-rw-r--r--include/linux/sunrpc/clnt.h20
-rw-r--r--include/linux/sunrpc/gss_krb5.h2
-rw-r--r--include/linux/sunrpc/metrics.h77
-rw-r--r--include/linux/sunrpc/rpc_pipe_fs.h2
-rw-r--r--include/linux/sunrpc/sched.h9
-rw-r--r--include/linux/sunrpc/xprt.h13
-rw-r--r--net/sunrpc/auth.c16
-rw-r--r--net/sunrpc/auth_gss/auth_gss.c2
-rw-r--r--net/sunrpc/auth_gss/gss_krb5_seal.c15
-rw-r--r--net/sunrpc/auth_gss/gss_krb5_unseal.c4
-rw-r--r--net/sunrpc/auth_gss/gss_krb5_wrap.c17
-rw-r--r--net/sunrpc/auth_gss/gss_spkm3_mech.c6
-rw-r--r--net/sunrpc/auth_gss/gss_spkm3_seal.c5
-rw-r--r--net/sunrpc/auth_gss/gss_spkm3_unseal.c4
-rw-r--r--net/sunrpc/clnt.c53
-rw-r--r--net/sunrpc/pmap_clnt.c41
-rw-r--r--net/sunrpc/rpc_pipe.c31
-rw-r--r--net/sunrpc/sched.c12
-rw-r--r--net/sunrpc/stats.c115
-rw-r--r--net/sunrpc/xprt.c29
-rw-r--r--net/sunrpc/xprtsock.c49
69 files changed, 2825 insertions, 1869 deletions
diff --git a/fs/Kconfig b/fs/Kconfig
index c8d0a209120c..e207be68d4ca 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -1555,6 +1555,7 @@ config RPCSEC_GSS_SPKM3
1555 select CRYPTO 1555 select CRYPTO
1556 select CRYPTO_MD5 1556 select CRYPTO_MD5
1557 select CRYPTO_DES 1557 select CRYPTO_DES
1558 select CRYPTO_CAST5
1558 help 1559 help
1559 Provides for secure RPC calls by means of a gss-api 1560 Provides for secure RPC calls by means of a gss-api
1560 mechanism based on the SPKM3 public-key mechanism. 1561 mechanism based on the SPKM3 public-key mechanism.
diff --git a/fs/lockd/clntlock.c b/fs/lockd/clntlock.c
index da6354baa0b8..bce744468708 100644
--- a/fs/lockd/clntlock.c
+++ b/fs/lockd/clntlock.c
@@ -44,32 +44,25 @@ static LIST_HEAD(nlm_blocked);
44/* 44/*
45 * Queue up a lock for blocking so that the GRANTED request can see it 45 * Queue up a lock for blocking so that the GRANTED request can see it
46 */ 46 */
47int nlmclnt_prepare_block(struct nlm_rqst *req, struct nlm_host *host, struct file_lock *fl) 47struct nlm_wait *nlmclnt_prepare_block(struct nlm_host *host, struct file_lock *fl)
48{ 48{
49 struct nlm_wait *block; 49 struct nlm_wait *block;
50 50
51 BUG_ON(req->a_block != NULL);
52 block = kmalloc(sizeof(*block), GFP_KERNEL); 51 block = kmalloc(sizeof(*block), GFP_KERNEL);
53 if (block == NULL) 52 if (block != NULL) {
54 return -ENOMEM; 53 block->b_host = host;
55 block->b_host = host; 54 block->b_lock = fl;
56 block->b_lock = fl; 55 init_waitqueue_head(&block->b_wait);
57 init_waitqueue_head(&block->b_wait); 56 block->b_status = NLM_LCK_BLOCKED;
58 block->b_status = NLM_LCK_BLOCKED; 57 list_add(&block->b_list, &nlm_blocked);
59 58 }
60 list_add(&block->b_list, &nlm_blocked); 59 return block;
61 req->a_block = block;
62
63 return 0;
64} 60}
65 61
66void nlmclnt_finish_block(struct nlm_rqst *req) 62void nlmclnt_finish_block(struct nlm_wait *block)
67{ 63{
68 struct nlm_wait *block = req->a_block;
69
70 if (block == NULL) 64 if (block == NULL)
71 return; 65 return;
72 req->a_block = NULL;
73 list_del(&block->b_list); 66 list_del(&block->b_list);
74 kfree(block); 67 kfree(block);
75} 68}
@@ -77,15 +70,14 @@ void nlmclnt_finish_block(struct nlm_rqst *req)
77/* 70/*
78 * Block on a lock 71 * Block on a lock
79 */ 72 */
80long nlmclnt_block(struct nlm_rqst *req, long timeout) 73int nlmclnt_block(struct nlm_wait *block, struct nlm_rqst *req, long timeout)
81{ 74{
82 struct nlm_wait *block = req->a_block;
83 long ret; 75 long ret;
84 76
85 /* A borken server might ask us to block even if we didn't 77 /* A borken server might ask us to block even if we didn't
86 * request it. Just say no! 78 * request it. Just say no!
87 */ 79 */
88 if (!req->a_args.block) 80 if (block == NULL)
89 return -EAGAIN; 81 return -EAGAIN;
90 82
91 /* Go to sleep waiting for GRANT callback. Some servers seem 83 /* Go to sleep waiting for GRANT callback. Some servers seem
@@ -99,13 +91,10 @@ long nlmclnt_block(struct nlm_rqst *req, long timeout)
99 ret = wait_event_interruptible_timeout(block->b_wait, 91 ret = wait_event_interruptible_timeout(block->b_wait,
100 block->b_status != NLM_LCK_BLOCKED, 92 block->b_status != NLM_LCK_BLOCKED,
101 timeout); 93 timeout);
102 94 if (ret < 0)
103 if (block->b_status != NLM_LCK_BLOCKED) { 95 return -ERESTARTSYS;
104 req->a_res.status = block->b_status; 96 req->a_res.status = block->b_status;
105 block->b_status = NLM_LCK_BLOCKED; 97 return 0;
106 }
107
108 return ret;
109} 98}
110 99
111/* 100/*
@@ -125,7 +114,15 @@ u32 nlmclnt_grant(const struct sockaddr_in *addr, const struct nlm_lock *lock)
125 list_for_each_entry(block, &nlm_blocked, b_list) { 114 list_for_each_entry(block, &nlm_blocked, b_list) {
126 struct file_lock *fl_blocked = block->b_lock; 115 struct file_lock *fl_blocked = block->b_lock;
127 116
128 if (!nlm_compare_locks(fl_blocked, fl)) 117 if (fl_blocked->fl_start != fl->fl_start)
118 continue;
119 if (fl_blocked->fl_end != fl->fl_end)
120 continue;
121 /*
122 * Careful! The NLM server will return the 32-bit "pid" that
123 * we put on the wire: in this case the lockowner "pid".
124 */
125 if (fl_blocked->fl_u.nfs_fl.owner->pid != lock->svid)
129 continue; 126 continue;
130 if (!nlm_cmp_addr(&block->b_host->h_addr, addr)) 127 if (!nlm_cmp_addr(&block->b_host->h_addr, addr))
131 continue; 128 continue;
@@ -147,34 +144,6 @@ u32 nlmclnt_grant(const struct sockaddr_in *addr, const struct nlm_lock *lock)
147 */ 144 */
148 145
149/* 146/*
150 * Mark the locks for reclaiming.
151 * FIXME: In 2.5 we don't want to iterate through any global file_lock_list.
152 * Maintain NLM lock reclaiming lists in the nlm_host instead.
153 */
154static
155void nlmclnt_mark_reclaim(struct nlm_host *host)
156{
157 struct file_lock *fl;
158 struct inode *inode;
159 struct list_head *tmp;
160
161 list_for_each(tmp, &file_lock_list) {
162 fl = list_entry(tmp, struct file_lock, fl_link);
163
164 inode = fl->fl_file->f_dentry->d_inode;
165 if (inode->i_sb->s_magic != NFS_SUPER_MAGIC)
166 continue;
167 if (fl->fl_u.nfs_fl.owner == NULL)
168 continue;
169 if (fl->fl_u.nfs_fl.owner->host != host)
170 continue;
171 if (!(fl->fl_u.nfs_fl.flags & NFS_LCK_GRANTED))
172 continue;
173 fl->fl_u.nfs_fl.flags |= NFS_LCK_RECLAIM;
174 }
175}
176
177/*
178 * Someone has sent us an SM_NOTIFY. Ensure we bind to the new port number, 147 * Someone has sent us an SM_NOTIFY. Ensure we bind to the new port number,
179 * that we mark locks for reclaiming, and that we bump the pseudo NSM state. 148 * that we mark locks for reclaiming, and that we bump the pseudo NSM state.
180 */ 149 */
@@ -186,7 +155,12 @@ void nlmclnt_prepare_reclaim(struct nlm_host *host, u32 newstate)
186 host->h_state++; 155 host->h_state++;
187 host->h_nextrebind = 0; 156 host->h_nextrebind = 0;
188 nlm_rebind_host(host); 157 nlm_rebind_host(host);
189 nlmclnt_mark_reclaim(host); 158
159 /*
160 * Mark the locks for reclaiming.
161 */
162 list_splice_init(&host->h_granted, &host->h_reclaim);
163
190 dprintk("NLM: reclaiming locks for host %s", host->h_name); 164 dprintk("NLM: reclaiming locks for host %s", host->h_name);
191} 165}
192 166
@@ -215,9 +189,7 @@ reclaimer(void *ptr)
215{ 189{
216 struct nlm_host *host = (struct nlm_host *) ptr; 190 struct nlm_host *host = (struct nlm_host *) ptr;
217 struct nlm_wait *block; 191 struct nlm_wait *block;
218 struct list_head *tmp; 192 struct file_lock *fl, *next;
219 struct file_lock *fl;
220 struct inode *inode;
221 193
222 daemonize("%s-reclaim", host->h_name); 194 daemonize("%s-reclaim", host->h_name);
223 allow_signal(SIGKILL); 195 allow_signal(SIGKILL);
@@ -229,23 +201,13 @@ reclaimer(void *ptr)
229 201
230 /* First, reclaim all locks that have been marked. */ 202 /* First, reclaim all locks that have been marked. */
231restart: 203restart:
232 list_for_each(tmp, &file_lock_list) { 204 list_for_each_entry_safe(fl, next, &host->h_reclaim, fl_u.nfs_fl.list) {
233 fl = list_entry(tmp, struct file_lock, fl_link); 205 list_del_init(&fl->fl_u.nfs_fl.list);
234 206
235 inode = fl->fl_file->f_dentry->d_inode;
236 if (inode->i_sb->s_magic != NFS_SUPER_MAGIC)
237 continue;
238 if (fl->fl_u.nfs_fl.owner == NULL)
239 continue;
240 if (fl->fl_u.nfs_fl.owner->host != host)
241 continue;
242 if (!(fl->fl_u.nfs_fl.flags & NFS_LCK_RECLAIM))
243 continue;
244
245 fl->fl_u.nfs_fl.flags &= ~NFS_LCK_RECLAIM;
246 nlmclnt_reclaim(host, fl);
247 if (signalled()) 207 if (signalled())
248 break; 208 continue;
209 if (nlmclnt_reclaim(host, fl) == 0)
210 list_add_tail(&fl->fl_u.nfs_fl.list, &host->h_granted);
249 goto restart; 211 goto restart;
250 } 212 }
251 213
diff --git a/fs/lockd/clntproc.c b/fs/lockd/clntproc.c
index 970b6a6aa337..f96e38155b5c 100644
--- a/fs/lockd/clntproc.c
+++ b/fs/lockd/clntproc.c
@@ -132,59 +132,18 @@ static void nlmclnt_setlockargs(struct nlm_rqst *req, struct file_lock *fl)
132 memcpy(&lock->fh, NFS_FH(fl->fl_file->f_dentry->d_inode), sizeof(struct nfs_fh)); 132 memcpy(&lock->fh, NFS_FH(fl->fl_file->f_dentry->d_inode), sizeof(struct nfs_fh));
133 lock->caller = system_utsname.nodename; 133 lock->caller = system_utsname.nodename;
134 lock->oh.data = req->a_owner; 134 lock->oh.data = req->a_owner;
135 lock->oh.len = sprintf(req->a_owner, "%d@%s", 135 lock->oh.len = snprintf(req->a_owner, sizeof(req->a_owner), "%u@%s",
136 current->pid, system_utsname.nodename); 136 (unsigned int)fl->fl_u.nfs_fl.owner->pid,
137 locks_copy_lock(&lock->fl, fl); 137 system_utsname.nodename);
138 lock->svid = fl->fl_u.nfs_fl.owner->pid;
139 lock->fl.fl_start = fl->fl_start;
140 lock->fl.fl_end = fl->fl_end;
141 lock->fl.fl_type = fl->fl_type;
138} 142}
139 143
140static void nlmclnt_release_lockargs(struct nlm_rqst *req) 144static void nlmclnt_release_lockargs(struct nlm_rqst *req)
141{ 145{
142 struct file_lock *fl = &req->a_args.lock.fl; 146 BUG_ON(req->a_args.lock.fl.fl_ops != NULL);
143
144 if (fl->fl_ops && fl->fl_ops->fl_release_private)
145 fl->fl_ops->fl_release_private(fl);
146}
147
148/*
149 * Initialize arguments for GRANTED call. The nlm_rqst structure
150 * has been cleared already.
151 */
152int
153nlmclnt_setgrantargs(struct nlm_rqst *call, struct nlm_lock *lock)
154{
155 locks_copy_lock(&call->a_args.lock.fl, &lock->fl);
156 memcpy(&call->a_args.lock.fh, &lock->fh, sizeof(call->a_args.lock.fh));
157 call->a_args.lock.caller = system_utsname.nodename;
158 call->a_args.lock.oh.len = lock->oh.len;
159
160 /* set default data area */
161 call->a_args.lock.oh.data = call->a_owner;
162
163 if (lock->oh.len > NLMCLNT_OHSIZE) {
164 void *data = kmalloc(lock->oh.len, GFP_KERNEL);
165 if (!data) {
166 nlmclnt_freegrantargs(call);
167 return 0;
168 }
169 call->a_args.lock.oh.data = (u8 *) data;
170 }
171
172 memcpy(call->a_args.lock.oh.data, lock->oh.data, lock->oh.len);
173 return 1;
174}
175
176void
177nlmclnt_freegrantargs(struct nlm_rqst *call)
178{
179 struct file_lock *fl = &call->a_args.lock.fl;
180 /*
181 * Check whether we allocated memory for the owner.
182 */
183 if (call->a_args.lock.oh.data != (u8 *) call->a_owner) {
184 kfree(call->a_args.lock.oh.data);
185 }
186 if (fl->fl_ops && fl->fl_ops->fl_release_private)
187 fl->fl_ops->fl_release_private(fl);
188} 147}
189 148
190/* 149/*
@@ -193,9 +152,8 @@ nlmclnt_freegrantargs(struct nlm_rqst *call)
193int 152int
194nlmclnt_proc(struct inode *inode, int cmd, struct file_lock *fl) 153nlmclnt_proc(struct inode *inode, int cmd, struct file_lock *fl)
195{ 154{
196 struct nfs_server *nfssrv = NFS_SERVER(inode);
197 struct nlm_host *host; 155 struct nlm_host *host;
198 struct nlm_rqst reqst, *call = &reqst; 156 struct nlm_rqst *call;
199 sigset_t oldset; 157 sigset_t oldset;
200 unsigned long flags; 158 unsigned long flags;
201 int status, proto, vers; 159 int status, proto, vers;
@@ -209,23 +167,17 @@ nlmclnt_proc(struct inode *inode, int cmd, struct file_lock *fl)
209 /* Retrieve transport protocol from NFS client */ 167 /* Retrieve transport protocol from NFS client */
210 proto = NFS_CLIENT(inode)->cl_xprt->prot; 168 proto = NFS_CLIENT(inode)->cl_xprt->prot;
211 169
212 if (!(host = nlmclnt_lookup_host(NFS_ADDR(inode), proto, vers))) 170 host = nlmclnt_lookup_host(NFS_ADDR(inode), proto, vers);
171 if (host == NULL)
213 return -ENOLCK; 172 return -ENOLCK;
214 173
215 /* Create RPC client handle if not there, and copy soft 174 call = nlm_alloc_call(host);
216 * and intr flags from NFS client. */ 175 if (call == NULL)
217 if (host->h_rpcclnt == NULL) { 176 return -ENOMEM;
218 struct rpc_clnt *clnt;
219 177
220 /* Bind an rpc client to this host handle (does not 178 nlmclnt_locks_init_private(fl, host);
221 * perform a portmapper lookup) */ 179 /* Set up the argument struct */
222 if (!(clnt = nlm_bind_host(host))) { 180 nlmclnt_setlockargs(call, fl);
223 status = -ENOLCK;
224 goto done;
225 }
226 clnt->cl_softrtry = nfssrv->client->cl_softrtry;
227 clnt->cl_intr = nfssrv->client->cl_intr;
228 }
229 181
230 /* Keep the old signal mask */ 182 /* Keep the old signal mask */
231 spin_lock_irqsave(&current->sighand->siglock, flags); 183 spin_lock_irqsave(&current->sighand->siglock, flags);
@@ -238,26 +190,10 @@ nlmclnt_proc(struct inode *inode, int cmd, struct file_lock *fl)
238 && (current->flags & PF_EXITING)) { 190 && (current->flags & PF_EXITING)) {
239 sigfillset(&current->blocked); /* Mask all signals */ 191 sigfillset(&current->blocked); /* Mask all signals */
240 recalc_sigpending(); 192 recalc_sigpending();
241 spin_unlock_irqrestore(&current->sighand->siglock, flags);
242 193
243 call = nlmclnt_alloc_call();
244 if (!call) {
245 status = -ENOMEM;
246 goto out_restore;
247 }
248 call->a_flags = RPC_TASK_ASYNC; 194 call->a_flags = RPC_TASK_ASYNC;
249 } else {
250 spin_unlock_irqrestore(&current->sighand->siglock, flags);
251 memset(call, 0, sizeof(*call));
252 locks_init_lock(&call->a_args.lock.fl);
253 locks_init_lock(&call->a_res.lock.fl);
254 } 195 }
255 call->a_host = host; 196 spin_unlock_irqrestore(&current->sighand->siglock, flags);
256
257 nlmclnt_locks_init_private(fl, host);
258
259 /* Set up the argument struct */
260 nlmclnt_setlockargs(call, fl);
261 197
262 if (IS_SETLK(cmd) || IS_SETLKW(cmd)) { 198 if (IS_SETLK(cmd) || IS_SETLKW(cmd)) {
263 if (fl->fl_type != F_UNLCK) { 199 if (fl->fl_type != F_UNLCK) {
@@ -270,41 +206,58 @@ nlmclnt_proc(struct inode *inode, int cmd, struct file_lock *fl)
270 else 206 else
271 status = -EINVAL; 207 status = -EINVAL;
272 208
273 out_restore: 209 fl->fl_ops->fl_release_private(fl);
210 fl->fl_ops = NULL;
211
274 spin_lock_irqsave(&current->sighand->siglock, flags); 212 spin_lock_irqsave(&current->sighand->siglock, flags);
275 current->blocked = oldset; 213 current->blocked = oldset;
276 recalc_sigpending(); 214 recalc_sigpending();
277 spin_unlock_irqrestore(&current->sighand->siglock, flags); 215 spin_unlock_irqrestore(&current->sighand->siglock, flags);
278 216
279done:
280 dprintk("lockd: clnt proc returns %d\n", status); 217 dprintk("lockd: clnt proc returns %d\n", status);
281 nlm_release_host(host);
282 return status; 218 return status;
283} 219}
284EXPORT_SYMBOL(nlmclnt_proc); 220EXPORT_SYMBOL(nlmclnt_proc);
285 221
286/* 222/*
287 * Allocate an NLM RPC call struct 223 * Allocate an NLM RPC call struct
224 *
225 * Note: the caller must hold a reference to host. In case of failure,
226 * this reference will be released.
288 */ 227 */
289struct nlm_rqst * 228struct nlm_rqst *nlm_alloc_call(struct nlm_host *host)
290nlmclnt_alloc_call(void)
291{ 229{
292 struct nlm_rqst *call; 230 struct nlm_rqst *call;
293 231
294 while (!signalled()) { 232 for(;;) {
295 call = (struct nlm_rqst *) kmalloc(sizeof(struct nlm_rqst), GFP_KERNEL); 233 call = kzalloc(sizeof(*call), GFP_KERNEL);
296 if (call) { 234 if (call != NULL) {
297 memset(call, 0, sizeof(*call));
298 locks_init_lock(&call->a_args.lock.fl); 235 locks_init_lock(&call->a_args.lock.fl);
299 locks_init_lock(&call->a_res.lock.fl); 236 locks_init_lock(&call->a_res.lock.fl);
237 call->a_host = host;
300 return call; 238 return call;
301 } 239 }
302 printk("nlmclnt_alloc_call: failed, waiting for memory\n"); 240 if (signalled())
241 break;
242 printk("nlm_alloc_call: failed, waiting for memory\n");
303 schedule_timeout_interruptible(5*HZ); 243 schedule_timeout_interruptible(5*HZ);
304 } 244 }
245 nlm_release_host(host);
305 return NULL; 246 return NULL;
306} 247}
307 248
249void nlm_release_call(struct nlm_rqst *call)
250{
251 nlm_release_host(call->a_host);
252 nlmclnt_release_lockargs(call);
253 kfree(call);
254}
255
256static void nlmclnt_rpc_release(void *data)
257{
258 return nlm_release_call(data);
259}
260
308static int nlm_wait_on_grace(wait_queue_head_t *queue) 261static int nlm_wait_on_grace(wait_queue_head_t *queue)
309{ 262{
310 DEFINE_WAIT(wait); 263 DEFINE_WAIT(wait);
@@ -401,57 +354,45 @@ in_grace_period:
401/* 354/*
402 * Generic NLM call, async version. 355 * Generic NLM call, async version.
403 */ 356 */
404int nlmsvc_async_call(struct nlm_rqst *req, u32 proc, const struct rpc_call_ops *tk_ops) 357static int __nlm_async_call(struct nlm_rqst *req, u32 proc, struct rpc_message *msg, const struct rpc_call_ops *tk_ops)
405{ 358{
406 struct nlm_host *host = req->a_host; 359 struct nlm_host *host = req->a_host;
407 struct rpc_clnt *clnt; 360 struct rpc_clnt *clnt;
408 struct rpc_message msg = { 361 int status = -ENOLCK;
409 .rpc_argp = &req->a_args,
410 .rpc_resp = &req->a_res,
411 };
412 int status;
413 362
414 dprintk("lockd: call procedure %d on %s (async)\n", 363 dprintk("lockd: call procedure %d on %s (async)\n",
415 (int)proc, host->h_name); 364 (int)proc, host->h_name);
416 365
417 /* If we have no RPC client yet, create one. */ 366 /* If we have no RPC client yet, create one. */
418 if ((clnt = nlm_bind_host(host)) == NULL) 367 clnt = nlm_bind_host(host);
419 return -ENOLCK; 368 if (clnt == NULL)
420 msg.rpc_proc = &clnt->cl_procinfo[proc]; 369 goto out_err;
370 msg->rpc_proc = &clnt->cl_procinfo[proc];
421 371
422 /* bootstrap and kick off the async RPC call */ 372 /* bootstrap and kick off the async RPC call */
423 status = rpc_call_async(clnt, &msg, RPC_TASK_ASYNC, tk_ops, req); 373 status = rpc_call_async(clnt, msg, RPC_TASK_ASYNC, tk_ops, req);
424 374 if (status == 0)
375 return 0;
376out_err:
377 nlm_release_call(req);
425 return status; 378 return status;
426} 379}
427 380
428static int nlmclnt_async_call(struct nlm_rqst *req, u32 proc, const struct rpc_call_ops *tk_ops) 381int nlm_async_call(struct nlm_rqst *req, u32 proc, const struct rpc_call_ops *tk_ops)
429{ 382{
430 struct nlm_host *host = req->a_host;
431 struct rpc_clnt *clnt;
432 struct nlm_args *argp = &req->a_args;
433 struct nlm_res *resp = &req->a_res;
434 struct rpc_message msg = { 383 struct rpc_message msg = {
435 .rpc_argp = argp, 384 .rpc_argp = &req->a_args,
436 .rpc_resp = resp, 385 .rpc_resp = &req->a_res,
437 }; 386 };
438 int status; 387 return __nlm_async_call(req, proc, &msg, tk_ops);
439 388}
440 dprintk("lockd: call procedure %d on %s (async)\n",
441 (int)proc, host->h_name);
442
443 /* If we have no RPC client yet, create one. */
444 if ((clnt = nlm_bind_host(host)) == NULL)
445 return -ENOLCK;
446 msg.rpc_proc = &clnt->cl_procinfo[proc];
447 389
448 /* Increment host refcount */ 390int nlm_async_reply(struct nlm_rqst *req, u32 proc, const struct rpc_call_ops *tk_ops)
449 nlm_get_host(host); 391{
450 /* bootstrap and kick off the async RPC call */ 392 struct rpc_message msg = {
451 status = rpc_call_async(clnt, &msg, RPC_TASK_ASYNC, tk_ops, req); 393 .rpc_argp = &req->a_res,
452 if (status < 0) 394 };
453 nlm_release_host(host); 395 return __nlm_async_call(req, proc, &msg, tk_ops);
454 return status;
455} 396}
456 397
457/* 398/*
@@ -463,36 +404,41 @@ nlmclnt_test(struct nlm_rqst *req, struct file_lock *fl)
463 int status; 404 int status;
464 405
465 status = nlmclnt_call(req, NLMPROC_TEST); 406 status = nlmclnt_call(req, NLMPROC_TEST);
466 nlmclnt_release_lockargs(req);
467 if (status < 0) 407 if (status < 0)
468 return status; 408 goto out;
469 409
470 status = req->a_res.status; 410 switch (req->a_res.status) {
471 if (status == NLM_LCK_GRANTED) { 411 case NLM_LCK_GRANTED:
472 fl->fl_type = F_UNLCK; 412 fl->fl_type = F_UNLCK;
473 } if (status == NLM_LCK_DENIED) { 413 break;
474 /* 414 case NLM_LCK_DENIED:
475 * Report the conflicting lock back to the application. 415 /*
476 */ 416 * Report the conflicting lock back to the application.
477 locks_copy_lock(fl, &req->a_res.lock.fl); 417 */
478 fl->fl_pid = 0; 418 fl->fl_start = req->a_res.lock.fl.fl_start;
479 } else { 419 fl->fl_end = req->a_res.lock.fl.fl_start;
480 return nlm_stat_to_errno(req->a_res.status); 420 fl->fl_type = req->a_res.lock.fl.fl_type;
421 fl->fl_pid = 0;
422 break;
423 default:
424 status = nlm_stat_to_errno(req->a_res.status);
481 } 425 }
482 426out:
483 return 0; 427 nlm_release_call(req);
428 return status;
484} 429}
485 430
486static void nlmclnt_locks_copy_lock(struct file_lock *new, struct file_lock *fl) 431static void nlmclnt_locks_copy_lock(struct file_lock *new, struct file_lock *fl)
487{ 432{
488 memcpy(&new->fl_u.nfs_fl, &fl->fl_u.nfs_fl, sizeof(new->fl_u.nfs_fl)); 433 new->fl_u.nfs_fl.state = fl->fl_u.nfs_fl.state;
489 nlm_get_lockowner(new->fl_u.nfs_fl.owner); 434 new->fl_u.nfs_fl.owner = nlm_get_lockowner(fl->fl_u.nfs_fl.owner);
435 list_add_tail(&new->fl_u.nfs_fl.list, &fl->fl_u.nfs_fl.owner->host->h_granted);
490} 436}
491 437
492static void nlmclnt_locks_release_private(struct file_lock *fl) 438static void nlmclnt_locks_release_private(struct file_lock *fl)
493{ 439{
440 list_del(&fl->fl_u.nfs_fl.list);
494 nlm_put_lockowner(fl->fl_u.nfs_fl.owner); 441 nlm_put_lockowner(fl->fl_u.nfs_fl.owner);
495 fl->fl_ops = NULL;
496} 442}
497 443
498static struct file_lock_operations nlmclnt_lock_ops = { 444static struct file_lock_operations nlmclnt_lock_ops = {
@@ -504,8 +450,8 @@ static void nlmclnt_locks_init_private(struct file_lock *fl, struct nlm_host *ho
504{ 450{
505 BUG_ON(fl->fl_ops != NULL); 451 BUG_ON(fl->fl_ops != NULL);
506 fl->fl_u.nfs_fl.state = 0; 452 fl->fl_u.nfs_fl.state = 0;
507 fl->fl_u.nfs_fl.flags = 0;
508 fl->fl_u.nfs_fl.owner = nlm_find_lockowner(host, fl->fl_owner); 453 fl->fl_u.nfs_fl.owner = nlm_find_lockowner(host, fl->fl_owner);
454 INIT_LIST_HEAD(&fl->fl_u.nfs_fl.list);
509 fl->fl_ops = &nlmclnt_lock_ops; 455 fl->fl_ops = &nlmclnt_lock_ops;
510} 456}
511 457
@@ -552,57 +498,52 @@ nlmclnt_lock(struct nlm_rqst *req, struct file_lock *fl)
552{ 498{
553 struct nlm_host *host = req->a_host; 499 struct nlm_host *host = req->a_host;
554 struct nlm_res *resp = &req->a_res; 500 struct nlm_res *resp = &req->a_res;
555 long timeout; 501 struct nlm_wait *block = NULL;
556 int status; 502 int status = -ENOLCK;
557 503
558 if (!host->h_monitored && nsm_monitor(host) < 0) { 504 if (!host->h_monitored && nsm_monitor(host) < 0) {
559 printk(KERN_NOTICE "lockd: failed to monitor %s\n", 505 printk(KERN_NOTICE "lockd: failed to monitor %s\n",
560 host->h_name); 506 host->h_name);
561 status = -ENOLCK;
562 goto out; 507 goto out;
563 } 508 }
564 509
565 if (req->a_args.block) { 510 block = nlmclnt_prepare_block(host, fl);
566 status = nlmclnt_prepare_block(req, host, fl);
567 if (status < 0)
568 goto out;
569 }
570 for(;;) { 511 for(;;) {
571 status = nlmclnt_call(req, NLMPROC_LOCK); 512 status = nlmclnt_call(req, NLMPROC_LOCK);
572 if (status < 0) 513 if (status < 0)
573 goto out_unblock; 514 goto out_unblock;
574 if (resp->status != NLM_LCK_BLOCKED) 515 if (!req->a_args.block)
575 break; 516 break;
576 /* Wait on an NLM blocking lock */
577 timeout = nlmclnt_block(req, NLMCLNT_POLL_TIMEOUT);
578 /* Did a reclaimer thread notify us of a server reboot? */ 517 /* Did a reclaimer thread notify us of a server reboot? */
579 if (resp->status == NLM_LCK_DENIED_GRACE_PERIOD) 518 if (resp->status == NLM_LCK_DENIED_GRACE_PERIOD)
580 continue; 519 continue;
581 if (resp->status != NLM_LCK_BLOCKED) 520 if (resp->status != NLM_LCK_BLOCKED)
582 break; 521 break;
583 if (timeout >= 0) 522 /* Wait on an NLM blocking lock */
584 continue; 523 status = nlmclnt_block(block, req, NLMCLNT_POLL_TIMEOUT);
585 /* We were interrupted. Send a CANCEL request to the server 524 /* if we were interrupted. Send a CANCEL request to the server
586 * and exit 525 * and exit
587 */ 526 */
588 status = (int)timeout; 527 if (status < 0)
589 goto out_unblock; 528 goto out_unblock;
529 if (resp->status != NLM_LCK_BLOCKED)
530 break;
590 } 531 }
591 532
592 if (resp->status == NLM_LCK_GRANTED) { 533 if (resp->status == NLM_LCK_GRANTED) {
593 fl->fl_u.nfs_fl.state = host->h_state; 534 fl->fl_u.nfs_fl.state = host->h_state;
594 fl->fl_u.nfs_fl.flags |= NFS_LCK_GRANTED;
595 fl->fl_flags |= FL_SLEEP; 535 fl->fl_flags |= FL_SLEEP;
536 /* Ensure the resulting lock will get added to granted list */
596 do_vfs_lock(fl); 537 do_vfs_lock(fl);
597 } 538 }
598 status = nlm_stat_to_errno(resp->status); 539 status = nlm_stat_to_errno(resp->status);
599out_unblock: 540out_unblock:
600 nlmclnt_finish_block(req); 541 nlmclnt_finish_block(block);
601 /* Cancel the blocked request if it is still pending */ 542 /* Cancel the blocked request if it is still pending */
602 if (resp->status == NLM_LCK_BLOCKED) 543 if (resp->status == NLM_LCK_BLOCKED)
603 nlmclnt_cancel(host, req->a_args.block, fl); 544 nlmclnt_cancel(host, req->a_args.block, fl);
604out: 545out:
605 nlmclnt_release_lockargs(req); 546 nlm_release_call(req);
606 return status; 547 return status;
607} 548}
608 549
@@ -658,10 +599,6 @@ nlmclnt_unlock(struct nlm_rqst *req, struct file_lock *fl)
658 struct nlm_res *resp = &req->a_res; 599 struct nlm_res *resp = &req->a_res;
659 int status; 600 int status;
660 601
661 /* Clean the GRANTED flag now so the lock doesn't get
662 * reclaimed while we're stuck in the unlock call. */
663 fl->fl_u.nfs_fl.flags &= ~NFS_LCK_GRANTED;
664
665 /* 602 /*
666 * Note: the server is supposed to either grant us the unlock 603 * Note: the server is supposed to either grant us the unlock
667 * request, or to deny it with NLM_LCK_DENIED_GRACE_PERIOD. In either 604 * request, or to deny it with NLM_LCK_DENIED_GRACE_PERIOD. In either
@@ -669,32 +606,24 @@ nlmclnt_unlock(struct nlm_rqst *req, struct file_lock *fl)
669 */ 606 */
670 do_vfs_lock(fl); 607 do_vfs_lock(fl);
671 608
672 if (req->a_flags & RPC_TASK_ASYNC) { 609 if (req->a_flags & RPC_TASK_ASYNC)
673 status = nlmclnt_async_call(req, NLMPROC_UNLOCK, 610 return nlm_async_call(req, NLMPROC_UNLOCK, &nlmclnt_unlock_ops);
674 &nlmclnt_unlock_ops);
675 /* Hrmf... Do the unlock early since locks_remove_posix()
676 * really expects us to free the lock synchronously */
677 if (status < 0) {
678 nlmclnt_release_lockargs(req);
679 kfree(req);
680 }
681 return status;
682 }
683 611
684 status = nlmclnt_call(req, NLMPROC_UNLOCK); 612 status = nlmclnt_call(req, NLMPROC_UNLOCK);
685 nlmclnt_release_lockargs(req);
686 if (status < 0) 613 if (status < 0)
687 return status; 614 goto out;
688 615
616 status = 0;
689 if (resp->status == NLM_LCK_GRANTED) 617 if (resp->status == NLM_LCK_GRANTED)
690 return 0; 618 goto out;
691 619
692 if (resp->status != NLM_LCK_DENIED_NOLOCKS) 620 if (resp->status != NLM_LCK_DENIED_NOLOCKS)
693 printk("lockd: unexpected unlock status: %d\n", resp->status); 621 printk("lockd: unexpected unlock status: %d\n", resp->status);
694
695 /* What to do now? I'm out of my depth... */ 622 /* What to do now? I'm out of my depth... */
696 623 status = -ENOLCK;
697 return -ENOLCK; 624out:
625 nlm_release_call(req);
626 return status;
698} 627}
699 628
700static void nlmclnt_unlock_callback(struct rpc_task *task, void *data) 629static void nlmclnt_unlock_callback(struct rpc_task *task, void *data)
@@ -716,9 +645,6 @@ static void nlmclnt_unlock_callback(struct rpc_task *task, void *data)
716 if (status != NLM_LCK_GRANTED) 645 if (status != NLM_LCK_GRANTED)
717 printk(KERN_WARNING "lockd: unexpected unlock status: %d\n", status); 646 printk(KERN_WARNING "lockd: unexpected unlock status: %d\n", status);
718die: 647die:
719 nlm_release_host(req->a_host);
720 nlmclnt_release_lockargs(req);
721 kfree(req);
722 return; 648 return;
723 retry_rebind: 649 retry_rebind:
724 nlm_rebind_host(req->a_host); 650 nlm_rebind_host(req->a_host);
@@ -728,6 +654,7 @@ die:
728 654
729static const struct rpc_call_ops nlmclnt_unlock_ops = { 655static const struct rpc_call_ops nlmclnt_unlock_ops = {
730 .rpc_call_done = nlmclnt_unlock_callback, 656 .rpc_call_done = nlmclnt_unlock_callback,
657 .rpc_release = nlmclnt_rpc_release,
731}; 658};
732 659
733/* 660/*
@@ -749,20 +676,15 @@ static int nlmclnt_cancel(struct nlm_host *host, int block, struct file_lock *fl
749 recalc_sigpending(); 676 recalc_sigpending();
750 spin_unlock_irqrestore(&current->sighand->siglock, flags); 677 spin_unlock_irqrestore(&current->sighand->siglock, flags);
751 678
752 req = nlmclnt_alloc_call(); 679 req = nlm_alloc_call(nlm_get_host(host));
753 if (!req) 680 if (!req)
754 return -ENOMEM; 681 return -ENOMEM;
755 req->a_host = host;
756 req->a_flags = RPC_TASK_ASYNC; 682 req->a_flags = RPC_TASK_ASYNC;
757 683
758 nlmclnt_setlockargs(req, fl); 684 nlmclnt_setlockargs(req, fl);
759 req->a_args.block = block; 685 req->a_args.block = block;
760 686
761 status = nlmclnt_async_call(req, NLMPROC_CANCEL, &nlmclnt_cancel_ops); 687 status = nlm_async_call(req, NLMPROC_CANCEL, &nlmclnt_cancel_ops);
762 if (status < 0) {
763 nlmclnt_release_lockargs(req);
764 kfree(req);
765 }
766 688
767 spin_lock_irqsave(&current->sighand->siglock, flags); 689 spin_lock_irqsave(&current->sighand->siglock, flags);
768 current->blocked = oldset; 690 current->blocked = oldset;
@@ -791,6 +713,7 @@ static void nlmclnt_cancel_callback(struct rpc_task *task, void *data)
791 switch (req->a_res.status) { 713 switch (req->a_res.status) {
792 case NLM_LCK_GRANTED: 714 case NLM_LCK_GRANTED:
793 case NLM_LCK_DENIED_GRACE_PERIOD: 715 case NLM_LCK_DENIED_GRACE_PERIOD:
716 case NLM_LCK_DENIED:
794 /* Everything's good */ 717 /* Everything's good */
795 break; 718 break;
796 case NLM_LCK_DENIED_NOLOCKS: 719 case NLM_LCK_DENIED_NOLOCKS:
@@ -802,9 +725,6 @@ static void nlmclnt_cancel_callback(struct rpc_task *task, void *data)
802 } 725 }
803 726
804die: 727die:
805 nlm_release_host(req->a_host);
806 nlmclnt_release_lockargs(req);
807 kfree(req);
808 return; 728 return;
809 729
810retry_cancel: 730retry_cancel:
@@ -818,6 +738,7 @@ retry_cancel:
818 738
819static const struct rpc_call_ops nlmclnt_cancel_ops = { 739static const struct rpc_call_ops nlmclnt_cancel_ops = {
820 .rpc_call_done = nlmclnt_cancel_callback, 740 .rpc_call_done = nlmclnt_cancel_callback,
741 .rpc_release = nlmclnt_rpc_release,
821}; 742};
822 743
823/* 744/*
diff --git a/fs/lockd/host.c b/fs/lockd/host.c
index 82f7a0b1d8ae..112ebf8b8dfe 100644
--- a/fs/lockd/host.c
+++ b/fs/lockd/host.c
@@ -123,6 +123,8 @@ nlm_lookup_host(int server, struct sockaddr_in *sin,
123 nlm_hosts[hash] = host; 123 nlm_hosts[hash] = host;
124 INIT_LIST_HEAD(&host->h_lockowners); 124 INIT_LIST_HEAD(&host->h_lockowners);
125 spin_lock_init(&host->h_lock); 125 spin_lock_init(&host->h_lock);
126 INIT_LIST_HEAD(&host->h_granted);
127 INIT_LIST_HEAD(&host->h_reclaim);
126 128
127 if (++nrhosts > NLM_HOST_MAX) 129 if (++nrhosts > NLM_HOST_MAX)
128 next_gc = 0; 130 next_gc = 0;
@@ -191,11 +193,12 @@ nlm_bind_host(struct nlm_host *host)
191 xprt->resvport = 1; /* NLM requires a reserved port */ 193 xprt->resvport = 1; /* NLM requires a reserved port */
192 194
193 /* Existing NLM servers accept AUTH_UNIX only */ 195 /* Existing NLM servers accept AUTH_UNIX only */
194 clnt = rpc_create_client(xprt, host->h_name, &nlm_program, 196 clnt = rpc_new_client(xprt, host->h_name, &nlm_program,
195 host->h_version, RPC_AUTH_UNIX); 197 host->h_version, RPC_AUTH_UNIX);
196 if (IS_ERR(clnt)) 198 if (IS_ERR(clnt))
197 goto forgetit; 199 goto forgetit;
198 clnt->cl_autobind = 1; /* turn on pmap queries */ 200 clnt->cl_autobind = 1; /* turn on pmap queries */
201 clnt->cl_softrtry = 1; /* All queries are soft */
199 202
200 host->h_rpcclnt = clnt; 203 host->h_rpcclnt = clnt;
201 } 204 }
@@ -242,8 +245,12 @@ void nlm_release_host(struct nlm_host *host)
242{ 245{
243 if (host != NULL) { 246 if (host != NULL) {
244 dprintk("lockd: release host %s\n", host->h_name); 247 dprintk("lockd: release host %s\n", host->h_name);
245 atomic_dec(&host->h_count);
246 BUG_ON(atomic_read(&host->h_count) < 0); 248 BUG_ON(atomic_read(&host->h_count) < 0);
249 if (atomic_dec_and_test(&host->h_count)) {
250 BUG_ON(!list_empty(&host->h_lockowners));
251 BUG_ON(!list_empty(&host->h_granted));
252 BUG_ON(!list_empty(&host->h_reclaim));
253 }
247 } 254 }
248} 255}
249 256
@@ -331,7 +338,6 @@ nlm_gc_hosts(void)
331 rpc_destroy_client(host->h_rpcclnt); 338 rpc_destroy_client(host->h_rpcclnt);
332 } 339 }
333 } 340 }
334 BUG_ON(!list_empty(&host->h_lockowners));
335 kfree(host); 341 kfree(host);
336 nrhosts--; 342 nrhosts--;
337 } 343 }
diff --git a/fs/lockd/mon.c b/fs/lockd/mon.c
index a89cb8aa2c88..3fc683f46b3e 100644
--- a/fs/lockd/mon.c
+++ b/fs/lockd/mon.c
@@ -35,6 +35,10 @@ nsm_mon_unmon(struct nlm_host *host, u32 proc, struct nsm_res *res)
35 struct rpc_clnt *clnt; 35 struct rpc_clnt *clnt;
36 int status; 36 int status;
37 struct nsm_args args; 37 struct nsm_args args;
38 struct rpc_message msg = {
39 .rpc_argp = &args,
40 .rpc_resp = res,
41 };
38 42
39 clnt = nsm_create(); 43 clnt = nsm_create();
40 if (IS_ERR(clnt)) { 44 if (IS_ERR(clnt)) {
@@ -49,7 +53,8 @@ nsm_mon_unmon(struct nlm_host *host, u32 proc, struct nsm_res *res)
49 args.proc = NLMPROC_NSM_NOTIFY; 53 args.proc = NLMPROC_NSM_NOTIFY;
50 memset(res, 0, sizeof(*res)); 54 memset(res, 0, sizeof(*res));
51 55
52 status = rpc_call(clnt, proc, &args, res, 0); 56 msg.rpc_proc = &clnt->cl_procinfo[proc];
57 status = rpc_call_sync(clnt, &msg, 0);
53 if (status < 0) 58 if (status < 0)
54 printk(KERN_DEBUG "nsm_mon_unmon: rpc failed, status=%d\n", 59 printk(KERN_DEBUG "nsm_mon_unmon: rpc failed, status=%d\n",
55 status); 60 status);
@@ -214,12 +219,16 @@ static struct rpc_procinfo nsm_procedures[] = {
214 .p_encode = (kxdrproc_t) xdr_encode_mon, 219 .p_encode = (kxdrproc_t) xdr_encode_mon,
215 .p_decode = (kxdrproc_t) xdr_decode_stat_res, 220 .p_decode = (kxdrproc_t) xdr_decode_stat_res,
216 .p_bufsiz = MAX(SM_mon_sz, SM_monres_sz) << 2, 221 .p_bufsiz = MAX(SM_mon_sz, SM_monres_sz) << 2,
222 .p_statidx = SM_MON,
223 .p_name = "MONITOR",
217 }, 224 },
218[SM_UNMON] = { 225[SM_UNMON] = {
219 .p_proc = SM_UNMON, 226 .p_proc = SM_UNMON,
220 .p_encode = (kxdrproc_t) xdr_encode_unmon, 227 .p_encode = (kxdrproc_t) xdr_encode_unmon,
221 .p_decode = (kxdrproc_t) xdr_decode_stat, 228 .p_decode = (kxdrproc_t) xdr_decode_stat,
222 .p_bufsiz = MAX(SM_mon_id_sz, SM_unmonres_sz) << 2, 229 .p_bufsiz = MAX(SM_mon_id_sz, SM_unmonres_sz) << 2,
230 .p_statidx = SM_UNMON,
231 .p_name = "UNMONITOR",
223 }, 232 },
224}; 233};
225 234
diff --git a/fs/lockd/svc4proc.c b/fs/lockd/svc4proc.c
index b10f913aa06a..a2dd9ccb9b32 100644
--- a/fs/lockd/svc4proc.c
+++ b/fs/lockd/svc4proc.c
@@ -21,10 +21,6 @@
21 21
22#define NLMDBG_FACILITY NLMDBG_CLIENT 22#define NLMDBG_FACILITY NLMDBG_CLIENT
23 23
24static u32 nlm4svc_callback(struct svc_rqst *, u32, struct nlm_res *);
25
26static const struct rpc_call_ops nlm4svc_callback_ops;
27
28/* 24/*
29 * Obtain client and file from arguments 25 * Obtain client and file from arguments
30 */ 26 */
@@ -234,83 +230,89 @@ nlm4svc_proc_granted(struct svc_rqst *rqstp, struct nlm_args *argp,
234} 230}
235 231
236/* 232/*
233 * This is the generic lockd callback for async RPC calls
234 */
235static void nlm4svc_callback_exit(struct rpc_task *task, void *data)
236{
237 dprintk("lockd: %4d callback returned %d\n", task->tk_pid,
238 -task->tk_status);
239}
240
241static void nlm4svc_callback_release(void *data)
242{
243 nlm_release_call(data);
244}
245
246static const struct rpc_call_ops nlm4svc_callback_ops = {
247 .rpc_call_done = nlm4svc_callback_exit,
248 .rpc_release = nlm4svc_callback_release,
249};
250
251/*
237 * `Async' versions of the above service routines. They aren't really, 252 * `Async' versions of the above service routines. They aren't really,
238 * because we send the callback before the reply proper. I hope this 253 * because we send the callback before the reply proper. I hope this
239 * doesn't break any clients. 254 * doesn't break any clients.
240 */ 255 */
241static int 256static int nlm4svc_callback(struct svc_rqst *rqstp, u32 proc, struct nlm_args *argp,
242nlm4svc_proc_test_msg(struct svc_rqst *rqstp, struct nlm_args *argp, 257 int (*func)(struct svc_rqst *, struct nlm_args *, struct nlm_res *))
243 void *resp)
244{ 258{
245 struct nlm_res res; 259 struct nlm_host *host;
246 u32 stat; 260 struct nlm_rqst *call;
261 int stat;
247 262
248 dprintk("lockd: TEST_MSG called\n"); 263 host = nlmsvc_lookup_host(rqstp);
249 memset(&res, 0, sizeof(res)); 264 if (host == NULL)
265 return rpc_system_err;
266
267 call = nlm_alloc_call(host);
268 if (call == NULL)
269 return rpc_system_err;
250 270
251 if ((stat = nlm4svc_proc_test(rqstp, argp, &res)) == 0) 271 stat = func(rqstp, argp, &call->a_res);
252 stat = nlm4svc_callback(rqstp, NLMPROC_TEST_RES, &res); 272 if (stat != 0) {
253 return stat; 273 nlm_release_call(call);
274 return stat;
275 }
276
277 call->a_flags = RPC_TASK_ASYNC;
278 if (nlm_async_reply(call, proc, &nlm4svc_callback_ops) < 0)
279 return rpc_system_err;
280 return rpc_success;
254} 281}
255 282
256static int 283static int nlm4svc_proc_test_msg(struct svc_rqst *rqstp, struct nlm_args *argp,
257nlm4svc_proc_lock_msg(struct svc_rqst *rqstp, struct nlm_args *argp,
258 void *resp) 284 void *resp)
259{ 285{
260 struct nlm_res res; 286 dprintk("lockd: TEST_MSG called\n");
261 u32 stat; 287 return nlm4svc_callback(rqstp, NLMPROC_TEST_RES, argp, nlm4svc_proc_test);
288}
262 289
290static int nlm4svc_proc_lock_msg(struct svc_rqst *rqstp, struct nlm_args *argp,
291 void *resp)
292{
263 dprintk("lockd: LOCK_MSG called\n"); 293 dprintk("lockd: LOCK_MSG called\n");
264 memset(&res, 0, sizeof(res)); 294 return nlm4svc_callback(rqstp, NLMPROC_LOCK_RES, argp, nlm4svc_proc_lock);
265
266 if ((stat = nlm4svc_proc_lock(rqstp, argp, &res)) == 0)
267 stat = nlm4svc_callback(rqstp, NLMPROC_LOCK_RES, &res);
268 return stat;
269} 295}
270 296
271static int 297static int nlm4svc_proc_cancel_msg(struct svc_rqst *rqstp, struct nlm_args *argp,
272nlm4svc_proc_cancel_msg(struct svc_rqst *rqstp, struct nlm_args *argp,
273 void *resp) 298 void *resp)
274{ 299{
275 struct nlm_res res;
276 u32 stat;
277
278 dprintk("lockd: CANCEL_MSG called\n"); 300 dprintk("lockd: CANCEL_MSG called\n");
279 memset(&res, 0, sizeof(res)); 301 return nlm4svc_callback(rqstp, NLMPROC_CANCEL_RES, argp, nlm4svc_proc_cancel);
280
281 if ((stat = nlm4svc_proc_cancel(rqstp, argp, &res)) == 0)
282 stat = nlm4svc_callback(rqstp, NLMPROC_CANCEL_RES, &res);
283 return stat;
284} 302}
285 303
286static int 304static int nlm4svc_proc_unlock_msg(struct svc_rqst *rqstp, struct nlm_args *argp,
287nlm4svc_proc_unlock_msg(struct svc_rqst *rqstp, struct nlm_args *argp,
288 void *resp) 305 void *resp)
289{ 306{
290 struct nlm_res res;
291 u32 stat;
292
293 dprintk("lockd: UNLOCK_MSG called\n"); 307 dprintk("lockd: UNLOCK_MSG called\n");
294 memset(&res, 0, sizeof(res)); 308 return nlm4svc_callback(rqstp, NLMPROC_UNLOCK_RES, argp, nlm4svc_proc_unlock);
295
296 if ((stat = nlm4svc_proc_unlock(rqstp, argp, &res)) == 0)
297 stat = nlm4svc_callback(rqstp, NLMPROC_UNLOCK_RES, &res);
298 return stat;
299} 309}
300 310
301static int 311static int nlm4svc_proc_granted_msg(struct svc_rqst *rqstp, struct nlm_args *argp,
302nlm4svc_proc_granted_msg(struct svc_rqst *rqstp, struct nlm_args *argp,
303 void *resp) 312 void *resp)
304{ 313{
305 struct nlm_res res;
306 u32 stat;
307
308 dprintk("lockd: GRANTED_MSG called\n"); 314 dprintk("lockd: GRANTED_MSG called\n");
309 memset(&res, 0, sizeof(res)); 315 return nlm4svc_callback(rqstp, NLMPROC_GRANTED_RES, argp, nlm4svc_proc_granted);
310
311 if ((stat = nlm4svc_proc_granted(rqstp, argp, &res)) == 0)
312 stat = nlm4svc_callback(rqstp, NLMPROC_GRANTED_RES, &res);
313 return stat;
314} 316}
315 317
316/* 318/*
@@ -472,55 +474,6 @@ nlm4svc_proc_granted_res(struct svc_rqst *rqstp, struct nlm_res *argp,
472 474
473 475
474/* 476/*
475 * This is the generic lockd callback for async RPC calls
476 */
477static u32
478nlm4svc_callback(struct svc_rqst *rqstp, u32 proc, struct nlm_res *resp)
479{
480 struct nlm_host *host;
481 struct nlm_rqst *call;
482
483 if (!(call = nlmclnt_alloc_call()))
484 return rpc_system_err;
485
486 host = nlmclnt_lookup_host(&rqstp->rq_addr,
487 rqstp->rq_prot, rqstp->rq_vers);
488 if (!host) {
489 kfree(call);
490 return rpc_system_err;
491 }
492
493 call->a_flags = RPC_TASK_ASYNC;
494 call->a_host = host;
495 memcpy(&call->a_args, resp, sizeof(*resp));
496
497 if (nlmsvc_async_call(call, proc, &nlm4svc_callback_ops) < 0)
498 goto error;
499
500 return rpc_success;
501 error:
502 kfree(call);
503 nlm_release_host(host);
504 return rpc_system_err;
505}
506
507static void nlm4svc_callback_exit(struct rpc_task *task, void *data)
508{
509 struct nlm_rqst *call = data;
510
511 if (task->tk_status < 0) {
512 dprintk("lockd: %4d callback failed (errno = %d)\n",
513 task->tk_pid, -task->tk_status);
514 }
515 nlm_release_host(call->a_host);
516 kfree(call);
517}
518
519static const struct rpc_call_ops nlm4svc_callback_ops = {
520 .rpc_call_done = nlm4svc_callback_exit,
521};
522
523/*
524 * NLM Server procedures. 477 * NLM Server procedures.
525 */ 478 */
526 479
diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c
index 9cfced65d4a2..d2b66bad7d50 100644
--- a/fs/lockd/svclock.c
+++ b/fs/lockd/svclock.c
@@ -39,9 +39,12 @@
39#define nlm_deadlock nlm_lck_denied 39#define nlm_deadlock nlm_lck_denied
40#endif 40#endif
41 41
42static void nlmsvc_release_block(struct nlm_block *block);
42static void nlmsvc_insert_block(struct nlm_block *block, unsigned long); 43static void nlmsvc_insert_block(struct nlm_block *block, unsigned long);
43static int nlmsvc_remove_block(struct nlm_block *block); 44static int nlmsvc_remove_block(struct nlm_block *block);
44 45
46static int nlmsvc_setgrantargs(struct nlm_rqst *call, struct nlm_lock *lock);
47static void nlmsvc_freegrantargs(struct nlm_rqst *call);
45static const struct rpc_call_ops nlmsvc_grant_ops; 48static const struct rpc_call_ops nlmsvc_grant_ops;
46 49
47/* 50/*
@@ -58,6 +61,7 @@ nlmsvc_insert_block(struct nlm_block *block, unsigned long when)
58 struct nlm_block **bp, *b; 61 struct nlm_block **bp, *b;
59 62
60 dprintk("lockd: nlmsvc_insert_block(%p, %ld)\n", block, when); 63 dprintk("lockd: nlmsvc_insert_block(%p, %ld)\n", block, when);
64 kref_get(&block->b_count);
61 if (block->b_queued) 65 if (block->b_queued)
62 nlmsvc_remove_block(block); 66 nlmsvc_remove_block(block);
63 bp = &nlm_blocked; 67 bp = &nlm_blocked;
@@ -90,6 +94,7 @@ nlmsvc_remove_block(struct nlm_block *block)
90 if (b == block) { 94 if (b == block) {
91 *bp = block->b_next; 95 *bp = block->b_next;
92 block->b_queued = 0; 96 block->b_queued = 0;
97 nlmsvc_release_block(block);
93 return 1; 98 return 1;
94 } 99 }
95 } 100 }
@@ -98,11 +103,10 @@ nlmsvc_remove_block(struct nlm_block *block)
98} 103}
99 104
100/* 105/*
101 * Find a block for a given lock and optionally remove it from 106 * Find a block for a given lock
102 * the list.
103 */ 107 */
104static struct nlm_block * 108static struct nlm_block *
105nlmsvc_lookup_block(struct nlm_file *file, struct nlm_lock *lock, int remove) 109nlmsvc_lookup_block(struct nlm_file *file, struct nlm_lock *lock)
106{ 110{
107 struct nlm_block **head, *block; 111 struct nlm_block **head, *block;
108 struct file_lock *fl; 112 struct file_lock *fl;
@@ -112,17 +116,14 @@ nlmsvc_lookup_block(struct nlm_file *file, struct nlm_lock *lock, int remove)
112 (long long)lock->fl.fl_start, 116 (long long)lock->fl.fl_start,
113 (long long)lock->fl.fl_end, lock->fl.fl_type); 117 (long long)lock->fl.fl_end, lock->fl.fl_type);
114 for (head = &nlm_blocked; (block = *head) != 0; head = &block->b_next) { 118 for (head = &nlm_blocked; (block = *head) != 0; head = &block->b_next) {
115 fl = &block->b_call.a_args.lock.fl; 119 fl = &block->b_call->a_args.lock.fl;
116 dprintk("lockd: check f=%p pd=%d %Ld-%Ld ty=%d cookie=%s\n", 120 dprintk("lockd: check f=%p pd=%d %Ld-%Ld ty=%d cookie=%s\n",
117 block->b_file, fl->fl_pid, 121 block->b_file, fl->fl_pid,
118 (long long)fl->fl_start, 122 (long long)fl->fl_start,
119 (long long)fl->fl_end, fl->fl_type, 123 (long long)fl->fl_end, fl->fl_type,
120 nlmdbg_cookie2a(&block->b_call.a_args.cookie)); 124 nlmdbg_cookie2a(&block->b_call->a_args.cookie));
121 if (block->b_file == file && nlm_compare_locks(fl, &lock->fl)) { 125 if (block->b_file == file && nlm_compare_locks(fl, &lock->fl)) {
122 if (remove) { 126 kref_get(&block->b_count);
123 *head = block->b_next;
124 block->b_queued = 0;
125 }
126 return block; 127 return block;
127 } 128 }
128 } 129 }
@@ -150,11 +151,13 @@ nlmsvc_find_block(struct nlm_cookie *cookie, struct sockaddr_in *sin)
150 for (block = nlm_blocked; block; block = block->b_next) { 151 for (block = nlm_blocked; block; block = block->b_next) {
151 dprintk("cookie: head of blocked queue %p, block %p\n", 152 dprintk("cookie: head of blocked queue %p, block %p\n",
152 nlm_blocked, block); 153 nlm_blocked, block);
153 if (nlm_cookie_match(&block->b_call.a_args.cookie,cookie) 154 if (nlm_cookie_match(&block->b_call->a_args.cookie,cookie)
154 && nlm_cmp_addr(sin, &block->b_host->h_addr)) 155 && nlm_cmp_addr(sin, &block->b_host->h_addr))
155 break; 156 break;
156 } 157 }
157 158
159 if (block != NULL)
160 kref_get(&block->b_count);
158 return block; 161 return block;
159} 162}
160 163
@@ -174,27 +177,30 @@ nlmsvc_create_block(struct svc_rqst *rqstp, struct nlm_file *file,
174{ 177{
175 struct nlm_block *block; 178 struct nlm_block *block;
176 struct nlm_host *host; 179 struct nlm_host *host;
177 struct nlm_rqst *call; 180 struct nlm_rqst *call = NULL;
178 181
179 /* Create host handle for callback */ 182 /* Create host handle for callback */
180 host = nlmclnt_lookup_host(&rqstp->rq_addr, 183 host = nlmsvc_lookup_host(rqstp);
181 rqstp->rq_prot, rqstp->rq_vers);
182 if (host == NULL) 184 if (host == NULL)
183 return NULL; 185 return NULL;
184 186
187 call = nlm_alloc_call(host);
188 if (call == NULL)
189 return NULL;
190
185 /* Allocate memory for block, and initialize arguments */ 191 /* Allocate memory for block, and initialize arguments */
186 if (!(block = (struct nlm_block *) kmalloc(sizeof(*block), GFP_KERNEL))) 192 block = kzalloc(sizeof(*block), GFP_KERNEL);
193 if (block == NULL)
187 goto failed; 194 goto failed;
188 memset(block, 0, sizeof(*block)); 195 kref_init(&block->b_count);
189 locks_init_lock(&block->b_call.a_args.lock.fl);
190 locks_init_lock(&block->b_call.a_res.lock.fl);
191 196
192 if (!nlmclnt_setgrantargs(&block->b_call, lock)) 197 if (!nlmsvc_setgrantargs(call, lock))
193 goto failed_free; 198 goto failed_free;
194 199
195 /* Set notifier function for VFS, and init args */ 200 /* Set notifier function for VFS, and init args */
196 block->b_call.a_args.lock.fl.fl_lmops = &nlmsvc_lock_operations; 201 call->a_args.lock.fl.fl_flags |= FL_SLEEP;
197 block->b_call.a_args.cookie = *cookie; /* see above */ 202 call->a_args.lock.fl.fl_lmops = &nlmsvc_lock_operations;
203 call->a_args.cookie = *cookie; /* see above */
198 204
199 dprintk("lockd: created block %p...\n", block); 205 dprintk("lockd: created block %p...\n", block);
200 206
@@ -202,22 +208,23 @@ nlmsvc_create_block(struct svc_rqst *rqstp, struct nlm_file *file,
202 block->b_daemon = rqstp->rq_server; 208 block->b_daemon = rqstp->rq_server;
203 block->b_host = host; 209 block->b_host = host;
204 block->b_file = file; 210 block->b_file = file;
211 file->f_count++;
205 212
206 /* Add to file's list of blocks */ 213 /* Add to file's list of blocks */
207 block->b_fnext = file->f_blocks; 214 block->b_fnext = file->f_blocks;
208 file->f_blocks = block; 215 file->f_blocks = block;
209 216
210 /* Set up RPC arguments for callback */ 217 /* Set up RPC arguments for callback */
211 call = &block->b_call; 218 block->b_call = call;
212 call->a_host = host;
213 call->a_flags = RPC_TASK_ASYNC; 219 call->a_flags = RPC_TASK_ASYNC;
220 call->a_block = block;
214 221
215 return block; 222 return block;
216 223
217failed_free: 224failed_free:
218 kfree(block); 225 kfree(block);
219failed: 226failed:
220 nlm_release_host(host); 227 nlm_release_call(call);
221 return NULL; 228 return NULL;
222} 229}
223 230
@@ -227,29 +234,26 @@ failed:
227 * It is the caller's responsibility to check whether the file 234 * It is the caller's responsibility to check whether the file
228 * can be closed hereafter. 235 * can be closed hereafter.
229 */ 236 */
230static int 237static int nlmsvc_unlink_block(struct nlm_block *block)
231nlmsvc_delete_block(struct nlm_block *block, int unlock)
232{ 238{
233 struct file_lock *fl = &block->b_call.a_args.lock.fl; 239 int status;
234 struct nlm_file *file = block->b_file; 240 dprintk("lockd: unlinking block %p...\n", block);
235 struct nlm_block **bp;
236 int status = 0;
237
238 dprintk("lockd: deleting block %p...\n", block);
239 241
240 /* Remove block from list */ 242 /* Remove block from list */
243 status = posix_unblock_lock(block->b_file->f_file, &block->b_call->a_args.lock.fl);
241 nlmsvc_remove_block(block); 244 nlmsvc_remove_block(block);
242 if (unlock) 245 return status;
243 status = posix_unblock_lock(file->f_file, fl); 246}
244 247
245 /* If the block is in the middle of a GRANT callback, 248static void nlmsvc_free_block(struct kref *kref)
246 * don't kill it yet. */ 249{
247 if (block->b_incall) { 250 struct nlm_block *block = container_of(kref, struct nlm_block, b_count);
248 nlmsvc_insert_block(block, NLM_NEVER); 251 struct nlm_file *file = block->b_file;
249 block->b_done = 1; 252 struct nlm_block **bp;
250 return status;
251 }
252 253
254 dprintk("lockd: freeing block %p...\n", block);
255
256 down(&file->f_sema);
253 /* Remove block from file's list of blocks */ 257 /* Remove block from file's list of blocks */
254 for (bp = &file->f_blocks; *bp; bp = &(*bp)->b_fnext) { 258 for (bp = &file->f_blocks; *bp; bp = &(*bp)->b_fnext) {
255 if (*bp == block) { 259 if (*bp == block) {
@@ -257,36 +261,93 @@ nlmsvc_delete_block(struct nlm_block *block, int unlock)
257 break; 261 break;
258 } 262 }
259 } 263 }
264 up(&file->f_sema);
260 265
261 if (block->b_host) 266 nlmsvc_freegrantargs(block->b_call);
262 nlm_release_host(block->b_host); 267 nlm_release_call(block->b_call);
263 nlmclnt_freegrantargs(&block->b_call); 268 nlm_release_file(block->b_file);
264 kfree(block); 269 kfree(block);
265 return status; 270}
271
272static void nlmsvc_release_block(struct nlm_block *block)
273{
274 if (block != NULL)
275 kref_put(&block->b_count, nlmsvc_free_block);
276}
277
278static void nlmsvc_act_mark(struct nlm_host *host, struct nlm_file *file)
279{
280 struct nlm_block *block;
281
282 down(&file->f_sema);
283 for (block = file->f_blocks; block != NULL; block = block->b_fnext)
284 block->b_host->h_inuse = 1;
285 up(&file->f_sema);
286}
287
288static void nlmsvc_act_unlock(struct nlm_host *host, struct nlm_file *file)
289{
290 struct nlm_block *block;
291
292restart:
293 down(&file->f_sema);
294 for (block = file->f_blocks; block != NULL; block = block->b_fnext) {
295 if (host != NULL && host != block->b_host)
296 continue;
297 if (!block->b_queued)
298 continue;
299 kref_get(&block->b_count);
300 up(&file->f_sema);
301 nlmsvc_unlink_block(block);
302 nlmsvc_release_block(block);
303 goto restart;
304 }
305 up(&file->f_sema);
266} 306}
267 307
268/* 308/*
269 * Loop over all blocks and perform the action specified. 309 * Loop over all blocks and perform the action specified.
270 * (NLM_ACT_CHECK handled by nlmsvc_inspect_file). 310 * (NLM_ACT_CHECK handled by nlmsvc_inspect_file).
271 */ 311 */
272int 312void
273nlmsvc_traverse_blocks(struct nlm_host *host, struct nlm_file *file, int action) 313nlmsvc_traverse_blocks(struct nlm_host *host, struct nlm_file *file, int action)
274{ 314{
275 struct nlm_block *block, *next; 315 if (action == NLM_ACT_MARK)
276 /* XXX: Will everything get cleaned up if we don't unlock here? */ 316 nlmsvc_act_mark(host, file);
317 else
318 nlmsvc_act_unlock(host, file);
319}
277 320
278 down(&file->f_sema); 321/*
279 for (block = file->f_blocks; block; block = next) { 322 * Initialize arguments for GRANTED call. The nlm_rqst structure
280 next = block->b_fnext; 323 * has been cleared already.
281 if (action == NLM_ACT_MARK) 324 */
282 block->b_host->h_inuse = 1; 325static int nlmsvc_setgrantargs(struct nlm_rqst *call, struct nlm_lock *lock)
283 else if (action == NLM_ACT_UNLOCK) { 326{
284 if (host == NULL || host == block->b_host) 327 locks_copy_lock(&call->a_args.lock.fl, &lock->fl);
285 nlmsvc_delete_block(block, 1); 328 memcpy(&call->a_args.lock.fh, &lock->fh, sizeof(call->a_args.lock.fh));
286 } 329 call->a_args.lock.caller = system_utsname.nodename;
330 call->a_args.lock.oh.len = lock->oh.len;
331
332 /* set default data area */
333 call->a_args.lock.oh.data = call->a_owner;
334 call->a_args.lock.svid = lock->fl.fl_pid;
335
336 if (lock->oh.len > NLMCLNT_OHSIZE) {
337 void *data = kmalloc(lock->oh.len, GFP_KERNEL);
338 if (!data)
339 return 0;
340 call->a_args.lock.oh.data = (u8 *) data;
287 } 341 }
288 up(&file->f_sema); 342
289 return 0; 343 memcpy(call->a_args.lock.oh.data, lock->oh.data, lock->oh.len);
344 return 1;
345}
346
347static void nlmsvc_freegrantargs(struct nlm_rqst *call)
348{
349 if (call->a_args.lock.oh.data != call->a_owner)
350 kfree(call->a_args.lock.oh.data);
290} 351}
291 352
292/* 353/*
@@ -297,9 +358,9 @@ u32
297nlmsvc_lock(struct svc_rqst *rqstp, struct nlm_file *file, 358nlmsvc_lock(struct svc_rqst *rqstp, struct nlm_file *file,
298 struct nlm_lock *lock, int wait, struct nlm_cookie *cookie) 359 struct nlm_lock *lock, int wait, struct nlm_cookie *cookie)
299{ 360{
300 struct file_lock *conflock; 361 struct nlm_block *block, *newblock = NULL;
301 struct nlm_block *block;
302 int error; 362 int error;
363 u32 ret;
303 364
304 dprintk("lockd: nlmsvc_lock(%s/%ld, ty=%d, pi=%d, %Ld-%Ld, bl=%d)\n", 365 dprintk("lockd: nlmsvc_lock(%s/%ld, ty=%d, pi=%d, %Ld-%Ld, bl=%d)\n",
305 file->f_file->f_dentry->d_inode->i_sb->s_id, 366 file->f_file->f_dentry->d_inode->i_sb->s_id,
@@ -310,69 +371,65 @@ nlmsvc_lock(struct svc_rqst *rqstp, struct nlm_file *file,
310 wait); 371 wait);
311 372
312 373
313 /* Get existing block (in case client is busy-waiting) */ 374 lock->fl.fl_flags &= ~FL_SLEEP;
314 block = nlmsvc_lookup_block(file, lock, 0);
315
316 lock->fl.fl_flags |= FL_LOCKD;
317
318again: 375again:
319 /* Lock file against concurrent access */ 376 /* Lock file against concurrent access */
320 down(&file->f_sema); 377 down(&file->f_sema);
378 /* Get existing block (in case client is busy-waiting) */
379 block = nlmsvc_lookup_block(file, lock);
380 if (block == NULL) {
381 if (newblock != NULL)
382 lock = &newblock->b_call->a_args.lock;
383 } else
384 lock = &block->b_call->a_args.lock;
321 385
322 if (!(conflock = posix_test_lock(file->f_file, &lock->fl))) { 386 error = posix_lock_file(file->f_file, &lock->fl);
323 error = posix_lock_file(file->f_file, &lock->fl); 387 lock->fl.fl_flags &= ~FL_SLEEP;
324 388
325 if (block) 389 dprintk("lockd: posix_lock_file returned %d\n", error);
326 nlmsvc_delete_block(block, 0);
327 up(&file->f_sema);
328 390
329 dprintk("lockd: posix_lock_file returned %d\n", -error); 391 switch(error) {
330 switch(-error) {
331 case 0: 392 case 0:
332 return nlm_granted; 393 ret = nlm_granted;
333 case EDEADLK: 394 goto out;
334 return nlm_deadlock; 395 case -EAGAIN:
335 case EAGAIN: 396 break;
336 return nlm_lck_denied; 397 case -EDEADLK:
398 ret = nlm_deadlock;
399 goto out;
337 default: /* includes ENOLCK */ 400 default: /* includes ENOLCK */
338 return nlm_lck_denied_nolocks; 401 ret = nlm_lck_denied_nolocks;
339 } 402 goto out;
340 } 403 }
341 404
342 if (!wait) { 405 ret = nlm_lck_denied;
343 up(&file->f_sema); 406 if (!wait)
344 return nlm_lck_denied; 407 goto out;
345 }
346 408
347 if (posix_locks_deadlock(&lock->fl, conflock)) { 409 ret = nlm_lck_blocked;
348 up(&file->f_sema); 410 if (block != NULL)
349 return nlm_deadlock; 411 goto out;
350 }
351 412
352 /* If we don't have a block, create and initialize it. Then 413 /* If we don't have a block, create and initialize it. Then
353 * retry because we may have slept in kmalloc. */ 414 * retry because we may have slept in kmalloc. */
354 /* We have to release f_sema as nlmsvc_create_block may try to 415 /* We have to release f_sema as nlmsvc_create_block may try to
355 * to claim it while doing host garbage collection */ 416 * to claim it while doing host garbage collection */
356 if (block == NULL) { 417 if (newblock == NULL) {
357 up(&file->f_sema); 418 up(&file->f_sema);
358 dprintk("lockd: blocking on this lock (allocating).\n"); 419 dprintk("lockd: blocking on this lock (allocating).\n");
359 if (!(block = nlmsvc_create_block(rqstp, file, lock, cookie))) 420 if (!(newblock = nlmsvc_create_block(rqstp, file, lock, cookie)))
360 return nlm_lck_denied_nolocks; 421 return nlm_lck_denied_nolocks;
361 goto again; 422 goto again;
362 } 423 }
363 424
364 /* Append to list of blocked */ 425 /* Append to list of blocked */
365 nlmsvc_insert_block(block, NLM_NEVER); 426 nlmsvc_insert_block(newblock, NLM_NEVER);
366 427out:
367 if (list_empty(&block->b_call.a_args.lock.fl.fl_block)) {
368 /* Now add block to block list of the conflicting lock
369 if we haven't done so. */
370 dprintk("lockd: blocking on this lock.\n");
371 posix_block_lock(conflock, &block->b_call.a_args.lock.fl);
372 }
373
374 up(&file->f_sema); 428 up(&file->f_sema);
375 return nlm_lck_blocked; 429 nlmsvc_release_block(newblock);
430 nlmsvc_release_block(block);
431 dprintk("lockd: nlmsvc_lock returned %u\n", ret);
432 return ret;
376} 433}
377 434
378/* 435/*
@@ -382,8 +439,6 @@ u32
382nlmsvc_testlock(struct nlm_file *file, struct nlm_lock *lock, 439nlmsvc_testlock(struct nlm_file *file, struct nlm_lock *lock,
383 struct nlm_lock *conflock) 440 struct nlm_lock *conflock)
384{ 441{
385 struct file_lock *fl;
386
387 dprintk("lockd: nlmsvc_testlock(%s/%ld, ty=%d, %Ld-%Ld)\n", 442 dprintk("lockd: nlmsvc_testlock(%s/%ld, ty=%d, %Ld-%Ld)\n",
388 file->f_file->f_dentry->d_inode->i_sb->s_id, 443 file->f_file->f_dentry->d_inode->i_sb->s_id,
389 file->f_file->f_dentry->d_inode->i_ino, 444 file->f_file->f_dentry->d_inode->i_ino,
@@ -391,13 +446,14 @@ nlmsvc_testlock(struct nlm_file *file, struct nlm_lock *lock,
391 (long long)lock->fl.fl_start, 446 (long long)lock->fl.fl_start,
392 (long long)lock->fl.fl_end); 447 (long long)lock->fl.fl_end);
393 448
394 if ((fl = posix_test_lock(file->f_file, &lock->fl)) != NULL) { 449 if (posix_test_lock(file->f_file, &lock->fl, &conflock->fl)) {
395 dprintk("lockd: conflicting lock(ty=%d, %Ld-%Ld)\n", 450 dprintk("lockd: conflicting lock(ty=%d, %Ld-%Ld)\n",
396 fl->fl_type, (long long)fl->fl_start, 451 conflock->fl.fl_type,
397 (long long)fl->fl_end); 452 (long long)conflock->fl.fl_start,
453 (long long)conflock->fl.fl_end);
398 conflock->caller = "somehost"; /* FIXME */ 454 conflock->caller = "somehost"; /* FIXME */
399 conflock->oh.len = 0; /* don't return OH info */ 455 conflock->oh.len = 0; /* don't return OH info */
400 conflock->fl = *fl; 456 conflock->svid = conflock->fl.fl_pid;
401 return nlm_lck_denied; 457 return nlm_lck_denied;
402 } 458 }
403 459
@@ -453,9 +509,12 @@ nlmsvc_cancel_blocked(struct nlm_file *file, struct nlm_lock *lock)
453 (long long)lock->fl.fl_end); 509 (long long)lock->fl.fl_end);
454 510
455 down(&file->f_sema); 511 down(&file->f_sema);
456 if ((block = nlmsvc_lookup_block(file, lock, 1)) != NULL) 512 block = nlmsvc_lookup_block(file, lock);
457 status = nlmsvc_delete_block(block, 1);
458 up(&file->f_sema); 513 up(&file->f_sema);
514 if (block != NULL) {
515 status = nlmsvc_unlink_block(block);
516 nlmsvc_release_block(block);
517 }
459 return status ? nlm_lck_denied : nlm_granted; 518 return status ? nlm_lck_denied : nlm_granted;
460} 519}
461 520
@@ -473,7 +532,7 @@ nlmsvc_notify_blocked(struct file_lock *fl)
473 532
474 dprintk("lockd: VFS unblock notification for block %p\n", fl); 533 dprintk("lockd: VFS unblock notification for block %p\n", fl);
475 for (bp = &nlm_blocked; (block = *bp) != 0; bp = &block->b_next) { 534 for (bp = &nlm_blocked; (block = *bp) != 0; bp = &block->b_next) {
476 if (nlm_compare_locks(&block->b_call.a_args.lock.fl, fl)) { 535 if (nlm_compare_locks(&block->b_call->a_args.lock.fl, fl)) {
477 nlmsvc_insert_block(block, 0); 536 nlmsvc_insert_block(block, 0);
478 svc_wake_up(block->b_daemon); 537 svc_wake_up(block->b_daemon);
479 return; 538 return;
@@ -508,17 +567,13 @@ static void
508nlmsvc_grant_blocked(struct nlm_block *block) 567nlmsvc_grant_blocked(struct nlm_block *block)
509{ 568{
510 struct nlm_file *file = block->b_file; 569 struct nlm_file *file = block->b_file;
511 struct nlm_lock *lock = &block->b_call.a_args.lock; 570 struct nlm_lock *lock = &block->b_call->a_args.lock;
512 struct file_lock *conflock;
513 int error; 571 int error;
514 572
515 dprintk("lockd: grant blocked lock %p\n", block); 573 dprintk("lockd: grant blocked lock %p\n", block);
516 574
517 /* First thing is lock the file */
518 down(&file->f_sema);
519
520 /* Unlink block request from list */ 575 /* Unlink block request from list */
521 nlmsvc_remove_block(block); 576 nlmsvc_unlink_block(block);
522 577
523 /* If b_granted is true this means we've been here before. 578 /* If b_granted is true this means we've been here before.
524 * Just retry the grant callback, possibly refreshing the RPC 579 * Just retry the grant callback, possibly refreshing the RPC
@@ -529,24 +584,21 @@ nlmsvc_grant_blocked(struct nlm_block *block)
529 } 584 }
530 585
531 /* Try the lock operation again */ 586 /* Try the lock operation again */
532 if ((conflock = posix_test_lock(file->f_file, &lock->fl)) != NULL) { 587 lock->fl.fl_flags |= FL_SLEEP;
533 /* Bummer, we blocked again */ 588 error = posix_lock_file(file->f_file, &lock->fl);
589 lock->fl.fl_flags &= ~FL_SLEEP;
590
591 switch (error) {
592 case 0:
593 break;
594 case -EAGAIN:
534 dprintk("lockd: lock still blocked\n"); 595 dprintk("lockd: lock still blocked\n");
535 nlmsvc_insert_block(block, NLM_NEVER); 596 nlmsvc_insert_block(block, NLM_NEVER);
536 posix_block_lock(conflock, &lock->fl);
537 up(&file->f_sema);
538 return; 597 return;
539 } 598 default:
540
541 /* Alright, no conflicting lock. Now lock it for real. If the
542 * following yields an error, this is most probably due to low
543 * memory. Retry the lock in a few seconds.
544 */
545 if ((error = posix_lock_file(file->f_file, &lock->fl)) < 0) {
546 printk(KERN_WARNING "lockd: unexpected error %d in %s!\n", 599 printk(KERN_WARNING "lockd: unexpected error %d in %s!\n",
547 -error, __FUNCTION__); 600 -error, __FUNCTION__);
548 nlmsvc_insert_block(block, 10 * HZ); 601 nlmsvc_insert_block(block, 10 * HZ);
549 up(&file->f_sema);
550 return; 602 return;
551 } 603 }
552 604
@@ -554,17 +606,15 @@ callback:
554 /* Lock was granted by VFS. */ 606 /* Lock was granted by VFS. */
555 dprintk("lockd: GRANTing blocked lock.\n"); 607 dprintk("lockd: GRANTing blocked lock.\n");
556 block->b_granted = 1; 608 block->b_granted = 1;
557 block->b_incall = 1;
558 609
559 /* Schedule next grant callback in 30 seconds */ 610 /* Schedule next grant callback in 30 seconds */
560 nlmsvc_insert_block(block, 30 * HZ); 611 nlmsvc_insert_block(block, 30 * HZ);
561 612
562 /* Call the client */ 613 /* Call the client */
563 nlm_get_host(block->b_call.a_host); 614 kref_get(&block->b_count);
564 if (nlmsvc_async_call(&block->b_call, NLMPROC_GRANTED_MSG, 615 if (nlm_async_call(block->b_call, NLMPROC_GRANTED_MSG,
565 &nlmsvc_grant_ops) < 0) 616 &nlmsvc_grant_ops) < 0)
566 nlm_release_host(block->b_call.a_host); 617 nlmsvc_release_block(block);
567 up(&file->f_sema);
568} 618}
569 619
570/* 620/*
@@ -578,20 +628,10 @@ callback:
578static void nlmsvc_grant_callback(struct rpc_task *task, void *data) 628static void nlmsvc_grant_callback(struct rpc_task *task, void *data)
579{ 629{
580 struct nlm_rqst *call = data; 630 struct nlm_rqst *call = data;
581 struct nlm_block *block; 631 struct nlm_block *block = call->a_block;
582 unsigned long timeout; 632 unsigned long timeout;
583 struct sockaddr_in *peer_addr = RPC_PEERADDR(task->tk_client);
584 633
585 dprintk("lockd: GRANT_MSG RPC callback\n"); 634 dprintk("lockd: GRANT_MSG RPC callback\n");
586 dprintk("callback: looking for cookie %s, host (%u.%u.%u.%u)\n",
587 nlmdbg_cookie2a(&call->a_args.cookie),
588 NIPQUAD(peer_addr->sin_addr.s_addr));
589 if (!(block = nlmsvc_find_block(&call->a_args.cookie, peer_addr))) {
590 dprintk("lockd: no block for cookie %s, host (%u.%u.%u.%u)\n",
591 nlmdbg_cookie2a(&call->a_args.cookie),
592 NIPQUAD(peer_addr->sin_addr.s_addr));
593 return;
594 }
595 635
596 /* Technically, we should down the file semaphore here. Since we 636 /* Technically, we should down the file semaphore here. Since we
597 * move the block towards the head of the queue only, no harm 637 * move the block towards the head of the queue only, no harm
@@ -608,13 +648,18 @@ static void nlmsvc_grant_callback(struct rpc_task *task, void *data)
608 } 648 }
609 nlmsvc_insert_block(block, timeout); 649 nlmsvc_insert_block(block, timeout);
610 svc_wake_up(block->b_daemon); 650 svc_wake_up(block->b_daemon);
611 block->b_incall = 0; 651}
612 652
613 nlm_release_host(call->a_host); 653void nlmsvc_grant_release(void *data)
654{
655 struct nlm_rqst *call = data;
656
657 nlmsvc_release_block(call->a_block);
614} 658}
615 659
616static const struct rpc_call_ops nlmsvc_grant_ops = { 660static const struct rpc_call_ops nlmsvc_grant_ops = {
617 .rpc_call_done = nlmsvc_grant_callback, 661 .rpc_call_done = nlmsvc_grant_callback,
662 .rpc_release = nlmsvc_grant_release,
618}; 663};
619 664
620/* 665/*
@@ -634,25 +679,17 @@ nlmsvc_grant_reply(struct svc_rqst *rqstp, struct nlm_cookie *cookie, u32 status
634 return; 679 return;
635 file = block->b_file; 680 file = block->b_file;
636 681
637 file->f_count++;
638 down(&file->f_sema);
639 block = nlmsvc_find_block(cookie, &rqstp->rq_addr);
640 if (block) { 682 if (block) {
641 if (status == NLM_LCK_DENIED_GRACE_PERIOD) { 683 if (status == NLM_LCK_DENIED_GRACE_PERIOD) {
642 /* Try again in a couple of seconds */ 684 /* Try again in a couple of seconds */
643 nlmsvc_insert_block(block, 10 * HZ); 685 nlmsvc_insert_block(block, 10 * HZ);
644 up(&file->f_sema);
645 } else { 686 } else {
646 /* Lock is now held by client, or has been rejected. 687 /* Lock is now held by client, or has been rejected.
647 * In both cases, the block should be removed. */ 688 * In both cases, the block should be removed. */
648 up(&file->f_sema); 689 nlmsvc_unlink_block(block);
649 if (status == NLM_LCK_GRANTED)
650 nlmsvc_delete_block(block, 0);
651 else
652 nlmsvc_delete_block(block, 1);
653 } 690 }
654 } 691 }
655 nlm_release_file(file); 692 nlmsvc_release_block(block);
656} 693}
657 694
658/* 695/*
@@ -675,10 +712,12 @@ nlmsvc_retry_blocked(void)
675 break; 712 break;
676 dprintk("nlmsvc_retry_blocked(%p, when=%ld, done=%d)\n", 713 dprintk("nlmsvc_retry_blocked(%p, when=%ld, done=%d)\n",
677 block, block->b_when, block->b_done); 714 block, block->b_when, block->b_done);
715 kref_get(&block->b_count);
678 if (block->b_done) 716 if (block->b_done)
679 nlmsvc_delete_block(block, 0); 717 nlmsvc_unlink_block(block);
680 else 718 else
681 nlmsvc_grant_blocked(block); 719 nlmsvc_grant_blocked(block);
720 nlmsvc_release_block(block);
682 } 721 }
683 722
684 if ((block = nlm_blocked) && block->b_when != NLM_NEVER) 723 if ((block = nlm_blocked) && block->b_when != NLM_NEVER)
diff --git a/fs/lockd/svcproc.c b/fs/lockd/svcproc.c
index 35681d9cf1fc..d210cf304e92 100644
--- a/fs/lockd/svcproc.c
+++ b/fs/lockd/svcproc.c
@@ -22,10 +22,6 @@
22 22
23#define NLMDBG_FACILITY NLMDBG_CLIENT 23#define NLMDBG_FACILITY NLMDBG_CLIENT
24 24
25static u32 nlmsvc_callback(struct svc_rqst *, u32, struct nlm_res *);
26
27static const struct rpc_call_ops nlmsvc_callback_ops;
28
29#ifdef CONFIG_LOCKD_V4 25#ifdef CONFIG_LOCKD_V4
30static u32 26static u32
31cast_to_nlm(u32 status, u32 vers) 27cast_to_nlm(u32 status, u32 vers)
@@ -262,83 +258,91 @@ nlmsvc_proc_granted(struct svc_rqst *rqstp, struct nlm_args *argp,
262} 258}
263 259
264/* 260/*
261 * This is the generic lockd callback for async RPC calls
262 */
263static void nlmsvc_callback_exit(struct rpc_task *task, void *data)
264{
265 dprintk("lockd: %4d callback returned %d\n", task->tk_pid,
266 -task->tk_status);
267}
268
269static void nlmsvc_callback_release(void *data)
270{
271 nlm_release_call(data);
272}
273
274static const struct rpc_call_ops nlmsvc_callback_ops = {
275 .rpc_call_done = nlmsvc_callback_exit,
276 .rpc_release = nlmsvc_callback_release,
277};
278
279/*
265 * `Async' versions of the above service routines. They aren't really, 280 * `Async' versions of the above service routines. They aren't really,
266 * because we send the callback before the reply proper. I hope this 281 * because we send the callback before the reply proper. I hope this
267 * doesn't break any clients. 282 * doesn't break any clients.
268 */ 283 */
269static int 284static int nlmsvc_callback(struct svc_rqst *rqstp, u32 proc, struct nlm_args *argp,
270nlmsvc_proc_test_msg(struct svc_rqst *rqstp, struct nlm_args *argp, 285 int (*func)(struct svc_rqst *, struct nlm_args *, struct nlm_res *))
271 void *resp)
272{ 286{
273 struct nlm_res res; 287 struct nlm_host *host;
274 u32 stat; 288 struct nlm_rqst *call;
289 int stat;
275 290
276 dprintk("lockd: TEST_MSG called\n"); 291 host = nlmsvc_lookup_host(rqstp);
277 memset(&res, 0, sizeof(res)); 292 if (host == NULL)
293 return rpc_system_err;
278 294
279 if ((stat = nlmsvc_proc_test(rqstp, argp, &res)) == 0) 295 call = nlm_alloc_call(host);
280 stat = nlmsvc_callback(rqstp, NLMPROC_TEST_RES, &res); 296 if (call == NULL)
281 return stat; 297 return rpc_system_err;
298
299 stat = func(rqstp, argp, &call->a_res);
300 if (stat != 0) {
301 nlm_release_call(call);
302 return stat;
303 }
304
305 call->a_flags = RPC_TASK_ASYNC;
306 if (nlm_async_reply(call, proc, &nlmsvc_callback_ops) < 0)
307 return rpc_system_err;
308 return rpc_success;
282} 309}
283 310
284static int 311static int nlmsvc_proc_test_msg(struct svc_rqst *rqstp, struct nlm_args *argp,
285nlmsvc_proc_lock_msg(struct svc_rqst *rqstp, struct nlm_args *argp,
286 void *resp) 312 void *resp)
287{ 313{
288 struct nlm_res res; 314 dprintk("lockd: TEST_MSG called\n");
289 u32 stat; 315 return nlmsvc_callback(rqstp, NLMPROC_TEST_RES, argp, nlmsvc_proc_test);
316}
290 317
318static int nlmsvc_proc_lock_msg(struct svc_rqst *rqstp, struct nlm_args *argp,
319 void *resp)
320{
291 dprintk("lockd: LOCK_MSG called\n"); 321 dprintk("lockd: LOCK_MSG called\n");
292 memset(&res, 0, sizeof(res)); 322 return nlmsvc_callback(rqstp, NLMPROC_LOCK_RES, argp, nlmsvc_proc_lock);
293
294 if ((stat = nlmsvc_proc_lock(rqstp, argp, &res)) == 0)
295 stat = nlmsvc_callback(rqstp, NLMPROC_LOCK_RES, &res);
296 return stat;
297} 323}
298 324
299static int 325static int nlmsvc_proc_cancel_msg(struct svc_rqst *rqstp, struct nlm_args *argp,
300nlmsvc_proc_cancel_msg(struct svc_rqst *rqstp, struct nlm_args *argp,
301 void *resp) 326 void *resp)
302{ 327{
303 struct nlm_res res;
304 u32 stat;
305
306 dprintk("lockd: CANCEL_MSG called\n"); 328 dprintk("lockd: CANCEL_MSG called\n");
307 memset(&res, 0, sizeof(res)); 329 return nlmsvc_callback(rqstp, NLMPROC_CANCEL_RES, argp, nlmsvc_proc_cancel);
308
309 if ((stat = nlmsvc_proc_cancel(rqstp, argp, &res)) == 0)
310 stat = nlmsvc_callback(rqstp, NLMPROC_CANCEL_RES, &res);
311 return stat;
312} 330}
313 331
314static int 332static int
315nlmsvc_proc_unlock_msg(struct svc_rqst *rqstp, struct nlm_args *argp, 333nlmsvc_proc_unlock_msg(struct svc_rqst *rqstp, struct nlm_args *argp,
316 void *resp) 334 void *resp)
317{ 335{
318 struct nlm_res res;
319 u32 stat;
320
321 dprintk("lockd: UNLOCK_MSG called\n"); 336 dprintk("lockd: UNLOCK_MSG called\n");
322 memset(&res, 0, sizeof(res)); 337 return nlmsvc_callback(rqstp, NLMPROC_UNLOCK_RES, argp, nlmsvc_proc_unlock);
323
324 if ((stat = nlmsvc_proc_unlock(rqstp, argp, &res)) == 0)
325 stat = nlmsvc_callback(rqstp, NLMPROC_UNLOCK_RES, &res);
326 return stat;
327} 338}
328 339
329static int 340static int
330nlmsvc_proc_granted_msg(struct svc_rqst *rqstp, struct nlm_args *argp, 341nlmsvc_proc_granted_msg(struct svc_rqst *rqstp, struct nlm_args *argp,
331 void *resp) 342 void *resp)
332{ 343{
333 struct nlm_res res;
334 u32 stat;
335
336 dprintk("lockd: GRANTED_MSG called\n"); 344 dprintk("lockd: GRANTED_MSG called\n");
337 memset(&res, 0, sizeof(res)); 345 return nlmsvc_callback(rqstp, NLMPROC_GRANTED_RES, argp, nlmsvc_proc_granted);
338
339 if ((stat = nlmsvc_proc_granted(rqstp, argp, &res)) == 0)
340 stat = nlmsvc_callback(rqstp, NLMPROC_GRANTED_RES, &res);
341 return stat;
342} 346}
343 347
344/* 348/*
@@ -497,55 +501,6 @@ nlmsvc_proc_granted_res(struct svc_rqst *rqstp, struct nlm_res *argp,
497} 501}
498 502
499/* 503/*
500 * This is the generic lockd callback for async RPC calls
501 */
502static u32
503nlmsvc_callback(struct svc_rqst *rqstp, u32 proc, struct nlm_res *resp)
504{
505 struct nlm_host *host;
506 struct nlm_rqst *call;
507
508 if (!(call = nlmclnt_alloc_call()))
509 return rpc_system_err;
510
511 host = nlmclnt_lookup_host(&rqstp->rq_addr,
512 rqstp->rq_prot, rqstp->rq_vers);
513 if (!host) {
514 kfree(call);
515 return rpc_system_err;
516 }
517
518 call->a_flags = RPC_TASK_ASYNC;
519 call->a_host = host;
520 memcpy(&call->a_args, resp, sizeof(*resp));
521
522 if (nlmsvc_async_call(call, proc, &nlmsvc_callback_ops) < 0)
523 goto error;
524
525 return rpc_success;
526 error:
527 nlm_release_host(host);
528 kfree(call);
529 return rpc_system_err;
530}
531
532static void nlmsvc_callback_exit(struct rpc_task *task, void *data)
533{
534 struct nlm_rqst *call = data;
535
536 if (task->tk_status < 0) {
537 dprintk("lockd: %4d callback failed (errno = %d)\n",
538 task->tk_pid, -task->tk_status);
539 }
540 nlm_release_host(call->a_host);
541 kfree(call);
542}
543
544static const struct rpc_call_ops nlmsvc_callback_ops = {
545 .rpc_call_done = nlmsvc_callback_exit,
546};
547
548/*
549 * NLM Server procedures. 504 * NLM Server procedures.
550 */ 505 */
551 506
diff --git a/fs/lockd/svcshare.c b/fs/lockd/svcshare.c
index 4943fb7836ce..27288c83da96 100644
--- a/fs/lockd/svcshare.c
+++ b/fs/lockd/svcshare.c
@@ -88,7 +88,7 @@ nlmsvc_unshare_file(struct nlm_host *host, struct nlm_file *file,
88 * Traverse all shares for a given file (and host). 88 * Traverse all shares for a given file (and host).
89 * NLM_ACT_CHECK is handled by nlmsvc_inspect_file. 89 * NLM_ACT_CHECK is handled by nlmsvc_inspect_file.
90 */ 90 */
91int 91void
92nlmsvc_traverse_shares(struct nlm_host *host, struct nlm_file *file, int action) 92nlmsvc_traverse_shares(struct nlm_host *host, struct nlm_file *file, int action)
93{ 93{
94 struct nlm_share *share, **shpp; 94 struct nlm_share *share, **shpp;
@@ -106,6 +106,4 @@ nlmsvc_traverse_shares(struct nlm_host *host, struct nlm_file *file, int action)
106 } 106 }
107 shpp = &share->s_next; 107 shpp = &share->s_next;
108 } 108 }
109
110 return 0;
111} 109}
diff --git a/fs/lockd/svcsubs.c b/fs/lockd/svcsubs.c
index 62f4a385177f..c7a6e3ae44d6 100644
--- a/fs/lockd/svcsubs.c
+++ b/fs/lockd/svcsubs.c
@@ -182,7 +182,7 @@ nlm_traverse_locks(struct nlm_host *host, struct nlm_file *file, int action)
182again: 182again:
183 file->f_locks = 0; 183 file->f_locks = 0;
184 for (fl = inode->i_flock; fl; fl = fl->fl_next) { 184 for (fl = inode->i_flock; fl; fl = fl->fl_next) {
185 if (!(fl->fl_flags & FL_LOCKD)) 185 if (fl->fl_lmops != &nlmsvc_lock_operations)
186 continue; 186 continue;
187 187
188 /* update current lock count */ 188 /* update current lock count */
@@ -224,9 +224,8 @@ nlm_inspect_file(struct nlm_host *host, struct nlm_file *file, int action)
224 if (file->f_count || file->f_blocks || file->f_shares) 224 if (file->f_count || file->f_blocks || file->f_shares)
225 return 1; 225 return 1;
226 } else { 226 } else {
227 if (nlmsvc_traverse_blocks(host, file, action) 227 nlmsvc_traverse_blocks(host, file, action);
228 || nlmsvc_traverse_shares(host, file, action)) 228 nlmsvc_traverse_shares(host, file, action);
229 return 1;
230 } 229 }
231 return nlm_traverse_locks(host, file, action); 230 return nlm_traverse_locks(host, file, action);
232} 231}
diff --git a/fs/lockd/xdr.c b/fs/lockd/xdr.c
index 1d700a4dd0b5..f22a3764461a 100644
--- a/fs/lockd/xdr.c
+++ b/fs/lockd/xdr.c
@@ -131,10 +131,11 @@ nlm_decode_lock(u32 *p, struct nlm_lock *lock)
131 || !(p = nlm_decode_fh(p, &lock->fh)) 131 || !(p = nlm_decode_fh(p, &lock->fh))
132 || !(p = nlm_decode_oh(p, &lock->oh))) 132 || !(p = nlm_decode_oh(p, &lock->oh)))
133 return NULL; 133 return NULL;
134 lock->svid = ntohl(*p++);
134 135
135 locks_init_lock(fl); 136 locks_init_lock(fl);
136 fl->fl_owner = current->files; 137 fl->fl_owner = current->files;
137 fl->fl_pid = ntohl(*p++); 138 fl->fl_pid = (pid_t)lock->svid;
138 fl->fl_flags = FL_POSIX; 139 fl->fl_flags = FL_POSIX;
139 fl->fl_type = F_RDLCK; /* as good as anything else */ 140 fl->fl_type = F_RDLCK; /* as good as anything else */
140 start = ntohl(*p++); 141 start = ntohl(*p++);
@@ -174,7 +175,7 @@ nlm_encode_lock(u32 *p, struct nlm_lock *lock)
174 else 175 else
175 len = loff_t_to_s32(fl->fl_end - fl->fl_start + 1); 176 len = loff_t_to_s32(fl->fl_end - fl->fl_start + 1);
176 177
177 *p++ = htonl(fl->fl_pid); 178 *p++ = htonl(lock->svid);
178 *p++ = htonl(start); 179 *p++ = htonl(start);
179 *p++ = htonl(len); 180 *p++ = htonl(len);
180 181
@@ -197,7 +198,7 @@ nlm_encode_testres(u32 *p, struct nlm_res *resp)
197 struct file_lock *fl = &resp->lock.fl; 198 struct file_lock *fl = &resp->lock.fl;
198 199
199 *p++ = (fl->fl_type == F_RDLCK)? xdr_zero : xdr_one; 200 *p++ = (fl->fl_type == F_RDLCK)? xdr_zero : xdr_one;
200 *p++ = htonl(fl->fl_pid); 201 *p++ = htonl(resp->lock.svid);
201 202
202 /* Encode owner handle. */ 203 /* Encode owner handle. */
203 if (!(p = xdr_encode_netobj(p, &resp->lock.oh))) 204 if (!(p = xdr_encode_netobj(p, &resp->lock.oh)))
@@ -298,7 +299,8 @@ nlmsvc_decode_shareargs(struct svc_rqst *rqstp, u32 *p, nlm_args *argp)
298 299
299 memset(lock, 0, sizeof(*lock)); 300 memset(lock, 0, sizeof(*lock));
300 locks_init_lock(&lock->fl); 301 locks_init_lock(&lock->fl);
301 lock->fl.fl_pid = ~(u32) 0; 302 lock->svid = ~(u32) 0;
303 lock->fl.fl_pid = (pid_t)lock->svid;
302 304
303 if (!(p = nlm_decode_cookie(p, &argp->cookie)) 305 if (!(p = nlm_decode_cookie(p, &argp->cookie))
304 || !(p = xdr_decode_string_inplace(p, &lock->caller, 306 || !(p = xdr_decode_string_inplace(p, &lock->caller,
@@ -415,7 +417,8 @@ nlmclt_decode_testres(struct rpc_rqst *req, u32 *p, struct nlm_res *resp)
415 memset(&resp->lock, 0, sizeof(resp->lock)); 417 memset(&resp->lock, 0, sizeof(resp->lock));
416 locks_init_lock(fl); 418 locks_init_lock(fl);
417 excl = ntohl(*p++); 419 excl = ntohl(*p++);
418 fl->fl_pid = ntohl(*p++); 420 resp->lock.svid = ntohl(*p++);
421 fl->fl_pid = (pid_t)resp->lock.svid;
419 if (!(p = nlm_decode_oh(p, &resp->lock.oh))) 422 if (!(p = nlm_decode_oh(p, &resp->lock.oh)))
420 return -EIO; 423 return -EIO;
421 424
@@ -543,7 +546,9 @@ nlmclt_decode_res(struct rpc_rqst *req, u32 *p, struct nlm_res *resp)
543 .p_proc = NLMPROC_##proc, \ 546 .p_proc = NLMPROC_##proc, \
544 .p_encode = (kxdrproc_t) nlmclt_encode_##argtype, \ 547 .p_encode = (kxdrproc_t) nlmclt_encode_##argtype, \
545 .p_decode = (kxdrproc_t) nlmclt_decode_##restype, \ 548 .p_decode = (kxdrproc_t) nlmclt_decode_##restype, \
546 .p_bufsiz = MAX(NLM_##argtype##_sz, NLM_##restype##_sz) << 2 \ 549 .p_bufsiz = MAX(NLM_##argtype##_sz, NLM_##restype##_sz) << 2, \
550 .p_statidx = NLMPROC_##proc, \
551 .p_name = #proc, \
547 } 552 }
548 553
549static struct rpc_procinfo nlm_procedures[] = { 554static struct rpc_procinfo nlm_procedures[] = {
diff --git a/fs/lockd/xdr4.c b/fs/lockd/xdr4.c
index fdcf105a5303..36eb175ec335 100644
--- a/fs/lockd/xdr4.c
+++ b/fs/lockd/xdr4.c
@@ -130,10 +130,11 @@ nlm4_decode_lock(u32 *p, struct nlm_lock *lock)
130 || !(p = nlm4_decode_fh(p, &lock->fh)) 130 || !(p = nlm4_decode_fh(p, &lock->fh))
131 || !(p = nlm4_decode_oh(p, &lock->oh))) 131 || !(p = nlm4_decode_oh(p, &lock->oh)))
132 return NULL; 132 return NULL;
133 lock->svid = ntohl(*p++);
133 134
134 locks_init_lock(fl); 135 locks_init_lock(fl);
135 fl->fl_owner = current->files; 136 fl->fl_owner = current->files;
136 fl->fl_pid = ntohl(*p++); 137 fl->fl_pid = (pid_t)lock->svid;
137 fl->fl_flags = FL_POSIX; 138 fl->fl_flags = FL_POSIX;
138 fl->fl_type = F_RDLCK; /* as good as anything else */ 139 fl->fl_type = F_RDLCK; /* as good as anything else */
139 p = xdr_decode_hyper(p, &start); 140 p = xdr_decode_hyper(p, &start);
@@ -167,7 +168,7 @@ nlm4_encode_lock(u32 *p, struct nlm_lock *lock)
167 || (fl->fl_end > NLM4_OFFSET_MAX && fl->fl_end != OFFSET_MAX)) 168 || (fl->fl_end > NLM4_OFFSET_MAX && fl->fl_end != OFFSET_MAX))
168 return NULL; 169 return NULL;
169 170
170 *p++ = htonl(fl->fl_pid); 171 *p++ = htonl(lock->svid);
171 172
172 start = loff_t_to_s64(fl->fl_start); 173 start = loff_t_to_s64(fl->fl_start);
173 if (fl->fl_end == OFFSET_MAX) 174 if (fl->fl_end == OFFSET_MAX)
@@ -198,7 +199,7 @@ nlm4_encode_testres(u32 *p, struct nlm_res *resp)
198 struct file_lock *fl = &resp->lock.fl; 199 struct file_lock *fl = &resp->lock.fl;
199 200
200 *p++ = (fl->fl_type == F_RDLCK)? xdr_zero : xdr_one; 201 *p++ = (fl->fl_type == F_RDLCK)? xdr_zero : xdr_one;
201 *p++ = htonl(fl->fl_pid); 202 *p++ = htonl(resp->lock.svid);
202 203
203 /* Encode owner handle. */ 204 /* Encode owner handle. */
204 if (!(p = xdr_encode_netobj(p, &resp->lock.oh))) 205 if (!(p = xdr_encode_netobj(p, &resp->lock.oh)))
@@ -212,8 +213,8 @@ nlm4_encode_testres(u32 *p, struct nlm_res *resp)
212 213
213 p = xdr_encode_hyper(p, start); 214 p = xdr_encode_hyper(p, start);
214 p = xdr_encode_hyper(p, len); 215 p = xdr_encode_hyper(p, len);
215 dprintk("xdr: encode_testres (status %d pid %d type %d start %Ld end %Ld)\n", 216 dprintk("xdr: encode_testres (status %u pid %d type %d start %Ld end %Ld)\n",
216 resp->status, fl->fl_pid, fl->fl_type, 217 resp->status, (int)resp->lock.svid, fl->fl_type,
217 (long long)fl->fl_start, (long long)fl->fl_end); 218 (long long)fl->fl_start, (long long)fl->fl_end);
218 } 219 }
219 220
@@ -303,7 +304,8 @@ nlm4svc_decode_shareargs(struct svc_rqst *rqstp, u32 *p, nlm_args *argp)
303 304
304 memset(lock, 0, sizeof(*lock)); 305 memset(lock, 0, sizeof(*lock));
305 locks_init_lock(&lock->fl); 306 locks_init_lock(&lock->fl);
306 lock->fl.fl_pid = ~(u32) 0; 307 lock->svid = ~(u32) 0;
308 lock->fl.fl_pid = (pid_t)lock->svid;
307 309
308 if (!(p = nlm4_decode_cookie(p, &argp->cookie)) 310 if (!(p = nlm4_decode_cookie(p, &argp->cookie))
309 || !(p = xdr_decode_string_inplace(p, &lock->caller, 311 || !(p = xdr_decode_string_inplace(p, &lock->caller,
@@ -420,7 +422,8 @@ nlm4clt_decode_testres(struct rpc_rqst *req, u32 *p, struct nlm_res *resp)
420 memset(&resp->lock, 0, sizeof(resp->lock)); 422 memset(&resp->lock, 0, sizeof(resp->lock));
421 locks_init_lock(fl); 423 locks_init_lock(fl);
422 excl = ntohl(*p++); 424 excl = ntohl(*p++);
423 fl->fl_pid = ntohl(*p++); 425 resp->lock.svid = ntohl(*p++);
426 fl->fl_pid = (pid_t)resp->lock.svid;
424 if (!(p = nlm4_decode_oh(p, &resp->lock.oh))) 427 if (!(p = nlm4_decode_oh(p, &resp->lock.oh)))
425 return -EIO; 428 return -EIO;
426 429
@@ -548,7 +551,9 @@ nlm4clt_decode_res(struct rpc_rqst *req, u32 *p, struct nlm_res *resp)
548 .p_proc = NLMPROC_##proc, \ 551 .p_proc = NLMPROC_##proc, \
549 .p_encode = (kxdrproc_t) nlm4clt_encode_##argtype, \ 552 .p_encode = (kxdrproc_t) nlm4clt_encode_##argtype, \
550 .p_decode = (kxdrproc_t) nlm4clt_decode_##restype, \ 553 .p_decode = (kxdrproc_t) nlm4clt_decode_##restype, \
551 .p_bufsiz = MAX(NLM4_##argtype##_sz, NLM4_##restype##_sz) << 2 \ 554 .p_bufsiz = MAX(NLM4_##argtype##_sz, NLM4_##restype##_sz) << 2, \
555 .p_statidx = NLMPROC_##proc, \
556 .p_name = #proc, \
552 } 557 }
553 558
554static struct rpc_procinfo nlm4_procedures[] = { 559static struct rpc_procinfo nlm4_procedures[] = {
diff --git a/fs/locks.c b/fs/locks.c
index 909eab8fb1d0..56f996e98bbc 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -139,10 +139,7 @@ int lease_break_time = 45;
139#define for_each_lock(inode, lockp) \ 139#define for_each_lock(inode, lockp) \
140 for (lockp = &inode->i_flock; *lockp != NULL; lockp = &(*lockp)->fl_next) 140 for (lockp = &inode->i_flock; *lockp != NULL; lockp = &(*lockp)->fl_next)
141 141
142LIST_HEAD(file_lock_list); 142static LIST_HEAD(file_lock_list);
143
144EXPORT_SYMBOL(file_lock_list);
145
146static LIST_HEAD(blocked_list); 143static LIST_HEAD(blocked_list);
147 144
148static kmem_cache_t *filelock_cache; 145static kmem_cache_t *filelock_cache;
@@ -153,6 +150,21 @@ static struct file_lock *locks_alloc_lock(void)
153 return kmem_cache_alloc(filelock_cache, SLAB_KERNEL); 150 return kmem_cache_alloc(filelock_cache, SLAB_KERNEL);
154} 151}
155 152
153static void locks_release_private(struct file_lock *fl)
154{
155 if (fl->fl_ops) {
156 if (fl->fl_ops->fl_release_private)
157 fl->fl_ops->fl_release_private(fl);
158 fl->fl_ops = NULL;
159 }
160 if (fl->fl_lmops) {
161 if (fl->fl_lmops->fl_release_private)
162 fl->fl_lmops->fl_release_private(fl);
163 fl->fl_lmops = NULL;
164 }
165
166}
167
156/* Free a lock which is not in use. */ 168/* Free a lock which is not in use. */
157static void locks_free_lock(struct file_lock *fl) 169static void locks_free_lock(struct file_lock *fl)
158{ 170{
@@ -169,18 +181,7 @@ static void locks_free_lock(struct file_lock *fl)
169 if (!list_empty(&fl->fl_link)) 181 if (!list_empty(&fl->fl_link))
170 panic("Attempting to free lock on active lock list"); 182 panic("Attempting to free lock on active lock list");
171 183
172 if (fl->fl_ops) { 184 locks_release_private(fl);
173 if (fl->fl_ops->fl_release_private)
174 fl->fl_ops->fl_release_private(fl);
175 fl->fl_ops = NULL;
176 }
177
178 if (fl->fl_lmops) {
179 if (fl->fl_lmops->fl_release_private)
180 fl->fl_lmops->fl_release_private(fl);
181 fl->fl_lmops = NULL;
182 }
183
184 kmem_cache_free(filelock_cache, fl); 185 kmem_cache_free(filelock_cache, fl);
185} 186}
186 187
@@ -218,24 +219,46 @@ static void init_once(void *foo, kmem_cache_t *cache, unsigned long flags)
218 locks_init_lock(lock); 219 locks_init_lock(lock);
219} 220}
220 221
222static void locks_copy_private(struct file_lock *new, struct file_lock *fl)
223{
224 if (fl->fl_ops) {
225 if (fl->fl_ops->fl_copy_lock)
226 fl->fl_ops->fl_copy_lock(new, fl);
227 new->fl_ops = fl->fl_ops;
228 }
229 if (fl->fl_lmops) {
230 if (fl->fl_lmops->fl_copy_lock)
231 fl->fl_lmops->fl_copy_lock(new, fl);
232 new->fl_lmops = fl->fl_lmops;
233 }
234}
235
221/* 236/*
222 * Initialize a new lock from an existing file_lock structure. 237 * Initialize a new lock from an existing file_lock structure.
223 */ 238 */
224void locks_copy_lock(struct file_lock *new, struct file_lock *fl) 239static void __locks_copy_lock(struct file_lock *new, const struct file_lock *fl)
225{ 240{
226 new->fl_owner = fl->fl_owner; 241 new->fl_owner = fl->fl_owner;
227 new->fl_pid = fl->fl_pid; 242 new->fl_pid = fl->fl_pid;
228 new->fl_file = fl->fl_file; 243 new->fl_file = NULL;
229 new->fl_flags = fl->fl_flags; 244 new->fl_flags = fl->fl_flags;
230 new->fl_type = fl->fl_type; 245 new->fl_type = fl->fl_type;
231 new->fl_start = fl->fl_start; 246 new->fl_start = fl->fl_start;
232 new->fl_end = fl->fl_end; 247 new->fl_end = fl->fl_end;
248 new->fl_ops = NULL;
249 new->fl_lmops = NULL;
250}
251
252void locks_copy_lock(struct file_lock *new, struct file_lock *fl)
253{
254 locks_release_private(new);
255
256 __locks_copy_lock(new, fl);
257 new->fl_file = fl->fl_file;
233 new->fl_ops = fl->fl_ops; 258 new->fl_ops = fl->fl_ops;
234 new->fl_lmops = fl->fl_lmops; 259 new->fl_lmops = fl->fl_lmops;
235 if (fl->fl_ops && fl->fl_ops->fl_copy_lock) 260
236 fl->fl_ops->fl_copy_lock(new, fl); 261 locks_copy_private(new, fl);
237 if (fl->fl_lmops && fl->fl_lmops->fl_copy_lock)
238 fl->fl_lmops->fl_copy_lock(new, fl);
239} 262}
240 263
241EXPORT_SYMBOL(locks_copy_lock); 264EXPORT_SYMBOL(locks_copy_lock);
@@ -654,8 +677,9 @@ static int locks_block_on_timeout(struct file_lock *blocker, struct file_lock *w
654 return result; 677 return result;
655} 678}
656 679
657struct file_lock * 680int
658posix_test_lock(struct file *filp, struct file_lock *fl) 681posix_test_lock(struct file *filp, struct file_lock *fl,
682 struct file_lock *conflock)
659{ 683{
660 struct file_lock *cfl; 684 struct file_lock *cfl;
661 685
@@ -666,9 +690,13 @@ posix_test_lock(struct file *filp, struct file_lock *fl)
666 if (posix_locks_conflict(cfl, fl)) 690 if (posix_locks_conflict(cfl, fl))
667 break; 691 break;
668 } 692 }
693 if (cfl) {
694 __locks_copy_lock(conflock, cfl);
695 unlock_kernel();
696 return 1;
697 }
669 unlock_kernel(); 698 unlock_kernel();
670 699 return 0;
671 return (cfl);
672} 700}
673 701
674EXPORT_SYMBOL(posix_test_lock); 702EXPORT_SYMBOL(posix_test_lock);
@@ -904,7 +932,8 @@ static int __posix_lock_file(struct inode *inode, struct file_lock *request)
904 fl->fl_start = request->fl_start; 932 fl->fl_start = request->fl_start;
905 fl->fl_end = request->fl_end; 933 fl->fl_end = request->fl_end;
906 fl->fl_type = request->fl_type; 934 fl->fl_type = request->fl_type;
907 fl->fl_u = request->fl_u; 935 locks_release_private(fl);
936 locks_copy_private(fl, request);
908 request = fl; 937 request = fl;
909 added = 1; 938 added = 1;
910 } 939 }
@@ -1544,7 +1573,7 @@ asmlinkage long sys_flock(unsigned int fd, unsigned int cmd)
1544 */ 1573 */
1545int fcntl_getlk(struct file *filp, struct flock __user *l) 1574int fcntl_getlk(struct file *filp, struct flock __user *l)
1546{ 1575{
1547 struct file_lock *fl, file_lock; 1576 struct file_lock *fl, cfl, file_lock;
1548 struct flock flock; 1577 struct flock flock;
1549 int error; 1578 int error;
1550 1579
@@ -1568,7 +1597,7 @@ int fcntl_getlk(struct file *filp, struct flock __user *l)
1568 else 1597 else
1569 fl = (file_lock.fl_type == F_UNLCK ? NULL : &file_lock); 1598 fl = (file_lock.fl_type == F_UNLCK ? NULL : &file_lock);
1570 } else { 1599 } else {
1571 fl = posix_test_lock(filp, &file_lock); 1600 fl = (posix_test_lock(filp, &file_lock, &cfl) ? &cfl : NULL);
1572 } 1601 }
1573 1602
1574 flock.l_type = F_UNLCK; 1603 flock.l_type = F_UNLCK;
@@ -1698,7 +1727,7 @@ out:
1698 */ 1727 */
1699int fcntl_getlk64(struct file *filp, struct flock64 __user *l) 1728int fcntl_getlk64(struct file *filp, struct flock64 __user *l)
1700{ 1729{
1701 struct file_lock *fl, file_lock; 1730 struct file_lock *fl, cfl, file_lock;
1702 struct flock64 flock; 1731 struct flock64 flock;
1703 int error; 1732 int error;
1704 1733
@@ -1722,7 +1751,7 @@ int fcntl_getlk64(struct file *filp, struct flock64 __user *l)
1722 else 1751 else
1723 fl = (file_lock.fl_type == F_UNLCK ? NULL : &file_lock); 1752 fl = (file_lock.fl_type == F_UNLCK ? NULL : &file_lock);
1724 } else { 1753 } else {
1725 fl = posix_test_lock(filp, &file_lock); 1754 fl = (posix_test_lock(filp, &file_lock, &cfl) ? &cfl : NULL);
1726 } 1755 }
1727 1756
1728 flock.l_type = F_UNLCK; 1757 flock.l_type = F_UNLCK;
@@ -1936,21 +1965,6 @@ void locks_remove_flock(struct file *filp)
1936} 1965}
1937 1966
1938/** 1967/**
1939 * posix_block_lock - blocks waiting for a file lock
1940 * @blocker: the lock which is blocking
1941 * @waiter: the lock which conflicts and has to wait
1942 *
1943 * lockd needs to block waiting for locks.
1944 */
1945void
1946posix_block_lock(struct file_lock *blocker, struct file_lock *waiter)
1947{
1948 locks_insert_block(blocker, waiter);
1949}
1950
1951EXPORT_SYMBOL(posix_block_lock);
1952
1953/**
1954 * posix_unblock_lock - stop waiting for a file lock 1968 * posix_unblock_lock - stop waiting for a file lock
1955 * @filp: how the file was opened 1969 * @filp: how the file was opened
1956 * @waiter: the lock which was waiting 1970 * @waiter: the lock which was waiting
diff --git a/fs/namespace.c b/fs/namespace.c
index 39c81a8d6316..71e75bcf4d28 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -399,6 +399,44 @@ struct seq_operations mounts_op = {
399 .show = show_vfsmnt 399 .show = show_vfsmnt
400}; 400};
401 401
402static int show_vfsstat(struct seq_file *m, void *v)
403{
404 struct vfsmount *mnt = v;
405 int err = 0;
406
407 /* device */
408 if (mnt->mnt_devname) {
409 seq_puts(m, "device ");
410 mangle(m, mnt->mnt_devname);
411 } else
412 seq_puts(m, "no device");
413
414 /* mount point */
415 seq_puts(m, " mounted on ");
416 seq_path(m, mnt, mnt->mnt_root, " \t\n\\");
417 seq_putc(m, ' ');
418
419 /* file system type */
420 seq_puts(m, "with fstype ");
421 mangle(m, mnt->mnt_sb->s_type->name);
422
423 /* optional statistics */
424 if (mnt->mnt_sb->s_op->show_stats) {
425 seq_putc(m, ' ');
426 err = mnt->mnt_sb->s_op->show_stats(m, mnt);
427 }
428
429 seq_putc(m, '\n');
430 return err;
431}
432
433struct seq_operations mountstats_op = {
434 .start = m_start,
435 .next = m_next,
436 .stop = m_stop,
437 .show = show_vfsstat,
438};
439
402/** 440/**
403 * may_umount_tree - check if a mount tree is busy 441 * may_umount_tree - check if a mount tree is busy
404 * @mnt: root of mount tree 442 * @mnt: root of mount tree
diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c
index fcd97406a778..99d2cfbce863 100644
--- a/fs/nfs/callback.c
+++ b/fs/nfs/callback.c
@@ -55,7 +55,12 @@ static void nfs_callback_svc(struct svc_rqst *rqstp)
55 55
56 complete(&nfs_callback_info.started); 56 complete(&nfs_callback_info.started);
57 57
58 while (nfs_callback_info.users != 0 || !signalled()) { 58 for(;;) {
59 if (signalled()) {
60 if (nfs_callback_info.users == 0)
61 break;
62 flush_signals(current);
63 }
59 /* 64 /*
60 * Listen for a request on the socket 65 * Listen for a request on the socket
61 */ 66 */
@@ -73,6 +78,7 @@ static void nfs_callback_svc(struct svc_rqst *rqstp)
73 svc_process(serv, rqstp); 78 svc_process(serv, rqstp);
74 } 79 }
75 80
81 svc_exit_thread(rqstp);
76 nfs_callback_info.pid = 0; 82 nfs_callback_info.pid = 0;
77 complete(&nfs_callback_info.stopped); 83 complete(&nfs_callback_info.stopped);
78 unlock_kernel(); 84 unlock_kernel();
@@ -134,11 +140,13 @@ int nfs_callback_down(void)
134 140
135 lock_kernel(); 141 lock_kernel();
136 down(&nfs_callback_sema); 142 down(&nfs_callback_sema);
137 if (--nfs_callback_info.users || nfs_callback_info.pid == 0) 143 nfs_callback_info.users--;
138 goto out; 144 do {
139 kill_proc(nfs_callback_info.pid, SIGKILL, 1); 145 if (nfs_callback_info.users != 0 || nfs_callback_info.pid == 0)
140 wait_for_completion(&nfs_callback_info.stopped); 146 break;
141out: 147 if (kill_proc(nfs_callback_info.pid, SIGKILL, 1) < 0)
148 break;
149 } while (wait_for_completion_timeout(&nfs_callback_info.stopped, 5*HZ) == 0);
142 up(&nfs_callback_sema); 150 up(&nfs_callback_sema);
143 unlock_kernel(); 151 unlock_kernel();
144 return ret; 152 return ret;
diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c
index 7c33b9a81a94..05c38cf40b69 100644
--- a/fs/nfs/callback_xdr.c
+++ b/fs/nfs/callback_xdr.c
@@ -330,7 +330,7 @@ static unsigned encode_op_hdr(struct xdr_stream *xdr, uint32_t op, uint32_t res)
330 330
331static unsigned encode_getattr_res(struct svc_rqst *rqstp, struct xdr_stream *xdr, const struct cb_getattrres *res) 331static unsigned encode_getattr_res(struct svc_rqst *rqstp, struct xdr_stream *xdr, const struct cb_getattrres *res)
332{ 332{
333 uint32_t *savep; 333 uint32_t *savep = NULL;
334 unsigned status = res->status; 334 unsigned status = res->status;
335 335
336 if (unlikely(status != 0)) 336 if (unlikely(status != 0))
@@ -358,23 +358,26 @@ static unsigned process_op(struct svc_rqst *rqstp,
358 struct xdr_stream *xdr_in, void *argp, 358 struct xdr_stream *xdr_in, void *argp,
359 struct xdr_stream *xdr_out, void *resp) 359 struct xdr_stream *xdr_out, void *resp)
360{ 360{
361 struct callback_op *op; 361 struct callback_op *op = &callback_ops[0];
362 unsigned int op_nr; 362 unsigned int op_nr = OP_CB_ILLEGAL;
363 unsigned int status = 0; 363 unsigned int status = 0;
364 long maxlen; 364 long maxlen;
365 unsigned res; 365 unsigned res;
366 366
367 dprintk("%s: start\n", __FUNCTION__); 367 dprintk("%s: start\n", __FUNCTION__);
368 status = decode_op_hdr(xdr_in, &op_nr); 368 status = decode_op_hdr(xdr_in, &op_nr);
369 if (unlikely(status != 0)) { 369 if (likely(status == 0)) {
370 op_nr = OP_CB_ILLEGAL; 370 switch (op_nr) {
371 op = &callback_ops[0]; 371 case OP_CB_GETATTR:
372 } else if (unlikely(op_nr != OP_CB_GETATTR && op_nr != OP_CB_RECALL)) { 372 case OP_CB_RECALL:
373 op_nr = OP_CB_ILLEGAL; 373 op = &callback_ops[op_nr];
374 op = &callback_ops[0]; 374 break;
375 status = htonl(NFS4ERR_OP_ILLEGAL); 375 default:
376 } else 376 op_nr = OP_CB_ILLEGAL;
377 op = &callback_ops[op_nr]; 377 op = &callback_ops[0];
378 status = htonl(NFS4ERR_OP_ILLEGAL);
379 }
380 }
378 381
379 maxlen = xdr_out->end - xdr_out->p; 382 maxlen = xdr_out->end - xdr_out->p;
380 if (maxlen > 0 && maxlen < PAGE_SIZE) { 383 if (maxlen > 0 && maxlen < PAGE_SIZE) {
@@ -416,6 +419,7 @@ static int nfs4_callback_compound(struct svc_rqst *rqstp, void *argp, void *resp
416 decode_compound_hdr_arg(&xdr_in, &hdr_arg); 419 decode_compound_hdr_arg(&xdr_in, &hdr_arg);
417 hdr_res.taglen = hdr_arg.taglen; 420 hdr_res.taglen = hdr_arg.taglen;
418 hdr_res.tag = hdr_arg.tag; 421 hdr_res.tag = hdr_arg.tag;
422 hdr_res.nops = NULL;
419 encode_compound_hdr_res(&xdr_out, &hdr_res); 423 encode_compound_hdr_res(&xdr_out, &hdr_res);
420 424
421 for (;;) { 425 for (;;) {
diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c
index c6f07c1c71e6..d3be923d4e43 100644
--- a/fs/nfs/delegation.c
+++ b/fs/nfs/delegation.c
@@ -421,3 +421,22 @@ void nfs_delegation_reap_unclaimed(struct nfs4_client *clp)
421 nfs_free_delegation(delegation); 421 nfs_free_delegation(delegation);
422 } 422 }
423} 423}
424
425int nfs4_copy_delegation_stateid(nfs4_stateid *dst, struct inode *inode)
426{
427 struct nfs4_client *clp = NFS_SERVER(inode)->nfs4_state;
428 struct nfs_inode *nfsi = NFS_I(inode);
429 struct nfs_delegation *delegation;
430 int res = 0;
431
432 if (nfsi->delegation_state == 0)
433 return 0;
434 spin_lock(&clp->cl_lock);
435 delegation = nfsi->delegation;
436 if (delegation != NULL) {
437 memcpy(dst->data, delegation->stateid.data, sizeof(dst->data));
438 res = 1;
439 }
440 spin_unlock(&clp->cl_lock);
441 return res;
442}
diff --git a/fs/nfs/delegation.h b/fs/nfs/delegation.h
index 7a0b2bfce771..3858694652fa 100644
--- a/fs/nfs/delegation.h
+++ b/fs/nfs/delegation.h
@@ -41,6 +41,7 @@ void nfs_delegation_reap_unclaimed(struct nfs4_client *clp);
41int nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, const nfs4_stateid *stateid); 41int nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, const nfs4_stateid *stateid);
42int nfs4_open_delegation_recall(struct dentry *dentry, struct nfs4_state *state); 42int nfs4_open_delegation_recall(struct dentry *dentry, struct nfs4_state *state);
43int nfs4_lock_delegation_recall(struct nfs4_state *state, struct file_lock *fl); 43int nfs4_lock_delegation_recall(struct nfs4_state *state, struct file_lock *fl);
44int nfs4_copy_delegation_stateid(nfs4_stateid *dst, struct inode *inode);
44 45
45static inline int nfs_have_delegation(struct inode *inode, int flags) 46static inline int nfs_have_delegation(struct inode *inode, int flags)
46{ 47{
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index a1554bead692..06c48b385c94 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -34,6 +34,7 @@
34 34
35#include "nfs4_fs.h" 35#include "nfs4_fs.h"
36#include "delegation.h" 36#include "delegation.h"
37#include "iostat.h"
37 38
38#define NFS_PARANOIA 1 39#define NFS_PARANOIA 1
39/* #define NFS_DEBUG_VERBOSE 1 */ 40/* #define NFS_DEBUG_VERBOSE 1 */
@@ -129,6 +130,9 @@ nfs_opendir(struct inode *inode, struct file *filp)
129{ 130{
130 int res = 0; 131 int res = 0;
131 132
133 dfprintk(VFS, "NFS: opendir(%s/%ld)\n",
134 inode->i_sb->s_id, inode->i_ino);
135
132 lock_kernel(); 136 lock_kernel();
133 /* Call generic open code in order to cache credentials */ 137 /* Call generic open code in order to cache credentials */
134 if (!res) 138 if (!res)
@@ -172,7 +176,9 @@ int nfs_readdir_filler(nfs_readdir_descriptor_t *desc, struct page *page)
172 unsigned long timestamp; 176 unsigned long timestamp;
173 int error; 177 int error;
174 178
175 dfprintk(VFS, "NFS: nfs_readdir_filler() reading cookie %Lu into page %lu.\n", (long long)desc->entry->cookie, page->index); 179 dfprintk(DIRCACHE, "NFS: %s: reading cookie %Lu into page %lu\n",
180 __FUNCTION__, (long long)desc->entry->cookie,
181 page->index);
176 182
177 again: 183 again:
178 timestamp = jiffies; 184 timestamp = jiffies;
@@ -244,7 +250,8 @@ int find_dirent(nfs_readdir_descriptor_t *desc)
244 status; 250 status;
245 251
246 while((status = dir_decode(desc)) == 0) { 252 while((status = dir_decode(desc)) == 0) {
247 dfprintk(VFS, "NFS: found cookie %Lu\n", (unsigned long long)entry->cookie); 253 dfprintk(DIRCACHE, "NFS: %s: examining cookie %Lu\n",
254 __FUNCTION__, (unsigned long long)entry->cookie);
248 if (entry->prev_cookie == *desc->dir_cookie) 255 if (entry->prev_cookie == *desc->dir_cookie)
249 break; 256 break;
250 if (loop_count++ > 200) { 257 if (loop_count++ > 200) {
@@ -252,7 +259,6 @@ int find_dirent(nfs_readdir_descriptor_t *desc)
252 schedule(); 259 schedule();
253 } 260 }
254 } 261 }
255 dfprintk(VFS, "NFS: find_dirent() returns %d\n", status);
256 return status; 262 return status;
257} 263}
258 264
@@ -276,7 +282,8 @@ int find_dirent_index(nfs_readdir_descriptor_t *desc)
276 if (status) 282 if (status)
277 break; 283 break;
278 284
279 dfprintk(VFS, "NFS: found cookie %Lu at index %Ld\n", (unsigned long long)entry->cookie, desc->current_index); 285 dfprintk(DIRCACHE, "NFS: found cookie %Lu at index %Ld\n",
286 (unsigned long long)entry->cookie, desc->current_index);
280 287
281 if (desc->file->f_pos == desc->current_index) { 288 if (desc->file->f_pos == desc->current_index) {
282 *desc->dir_cookie = entry->cookie; 289 *desc->dir_cookie = entry->cookie;
@@ -288,7 +295,6 @@ int find_dirent_index(nfs_readdir_descriptor_t *desc)
288 schedule(); 295 schedule();
289 } 296 }
290 } 297 }
291 dfprintk(VFS, "NFS: find_dirent_index() returns %d\n", status);
292 return status; 298 return status;
293} 299}
294 300
@@ -303,7 +309,9 @@ int find_dirent_page(nfs_readdir_descriptor_t *desc)
303 struct page *page; 309 struct page *page;
304 int status; 310 int status;
305 311
306 dfprintk(VFS, "NFS: find_dirent_page() searching directory page %ld\n", desc->page_index); 312 dfprintk(DIRCACHE, "NFS: %s: searching page %ld for target %Lu\n",
313 __FUNCTION__, desc->page_index,
314 (long long) *desc->dir_cookie);
307 315
308 page = read_cache_page(inode->i_mapping, desc->page_index, 316 page = read_cache_page(inode->i_mapping, desc->page_index,
309 (filler_t *)nfs_readdir_filler, desc); 317 (filler_t *)nfs_readdir_filler, desc);
@@ -324,7 +332,7 @@ int find_dirent_page(nfs_readdir_descriptor_t *desc)
324 if (status < 0) 332 if (status < 0)
325 dir_page_release(desc); 333 dir_page_release(desc);
326 out: 334 out:
327 dfprintk(VFS, "NFS: find_dirent_page() returns %d\n", status); 335 dfprintk(DIRCACHE, "NFS: %s: returns %d\n", __FUNCTION__, status);
328 return status; 336 return status;
329 read_error: 337 read_error:
330 page_cache_release(page); 338 page_cache_release(page);
@@ -346,13 +354,15 @@ int readdir_search_pagecache(nfs_readdir_descriptor_t *desc)
346 354
347 /* Always search-by-index from the beginning of the cache */ 355 /* Always search-by-index from the beginning of the cache */
348 if (*desc->dir_cookie == 0) { 356 if (*desc->dir_cookie == 0) {
349 dfprintk(VFS, "NFS: readdir_search_pagecache() searching for offset %Ld\n", (long long)desc->file->f_pos); 357 dfprintk(DIRCACHE, "NFS: readdir_search_pagecache() searching for offset %Ld\n",
358 (long long)desc->file->f_pos);
350 desc->page_index = 0; 359 desc->page_index = 0;
351 desc->entry->cookie = desc->entry->prev_cookie = 0; 360 desc->entry->cookie = desc->entry->prev_cookie = 0;
352 desc->entry->eof = 0; 361 desc->entry->eof = 0;
353 desc->current_index = 0; 362 desc->current_index = 0;
354 } else 363 } else
355 dfprintk(VFS, "NFS: readdir_search_pagecache() searching for cookie %Lu\n", (unsigned long long)*desc->dir_cookie); 364 dfprintk(DIRCACHE, "NFS: readdir_search_pagecache() searching for cookie %Lu\n",
365 (unsigned long long)*desc->dir_cookie);
356 366
357 for (;;) { 367 for (;;) {
358 res = find_dirent_page(desc); 368 res = find_dirent_page(desc);
@@ -365,7 +375,8 @@ int readdir_search_pagecache(nfs_readdir_descriptor_t *desc)
365 schedule(); 375 schedule();
366 } 376 }
367 } 377 }
368 dfprintk(VFS, "NFS: readdir_search_pagecache() returned %d\n", res); 378
379 dfprintk(DIRCACHE, "NFS: %s: returns %d\n", __FUNCTION__, res);
369 return res; 380 return res;
370} 381}
371 382
@@ -390,7 +401,8 @@ int nfs_do_filldir(nfs_readdir_descriptor_t *desc, void *dirent,
390 int loop_count = 0, 401 int loop_count = 0,
391 res; 402 res;
392 403
393 dfprintk(VFS, "NFS: nfs_do_filldir() filling starting @ cookie %Lu\n", (long long)entry->cookie); 404 dfprintk(DIRCACHE, "NFS: nfs_do_filldir() filling starting @ cookie %Lu\n",
405 (unsigned long long)entry->cookie);
394 406
395 for(;;) { 407 for(;;) {
396 unsigned d_type = DT_UNKNOWN; 408 unsigned d_type = DT_UNKNOWN;
@@ -427,7 +439,8 @@ int nfs_do_filldir(nfs_readdir_descriptor_t *desc, void *dirent,
427 dir_page_release(desc); 439 dir_page_release(desc);
428 if (dentry != NULL) 440 if (dentry != NULL)
429 dput(dentry); 441 dput(dentry);
430 dfprintk(VFS, "NFS: nfs_do_filldir() filling ended @ cookie %Lu; returning = %d\n", (unsigned long long)*desc->dir_cookie, res); 442 dfprintk(DIRCACHE, "NFS: nfs_do_filldir() filling ended @ cookie %Lu; returning = %d\n",
443 (unsigned long long)*desc->dir_cookie, res);
431 return res; 444 return res;
432} 445}
433 446
@@ -453,7 +466,8 @@ int uncached_readdir(nfs_readdir_descriptor_t *desc, void *dirent,
453 struct page *page = NULL; 466 struct page *page = NULL;
454 int status; 467 int status;
455 468
456 dfprintk(VFS, "NFS: uncached_readdir() searching for cookie %Lu\n", (unsigned long long)*desc->dir_cookie); 469 dfprintk(DIRCACHE, "NFS: uncached_readdir() searching for cookie %Lu\n",
470 (unsigned long long)*desc->dir_cookie);
457 471
458 page = alloc_page(GFP_HIGHUSER); 472 page = alloc_page(GFP_HIGHUSER);
459 if (!page) { 473 if (!page) {
@@ -485,7 +499,8 @@ int uncached_readdir(nfs_readdir_descriptor_t *desc, void *dirent,
485 desc->entry->cookie = desc->entry->prev_cookie = 0; 499 desc->entry->cookie = desc->entry->prev_cookie = 0;
486 desc->entry->eof = 0; 500 desc->entry->eof = 0;
487 out: 501 out:
488 dfprintk(VFS, "NFS: uncached_readdir() returns %d\n", status); 502 dfprintk(DIRCACHE, "NFS: %s: returns %d\n",
503 __FUNCTION__, status);
489 return status; 504 return status;
490 out_release: 505 out_release:
491 dir_page_release(desc); 506 dir_page_release(desc);
@@ -507,6 +522,11 @@ static int nfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
507 struct nfs_fattr fattr; 522 struct nfs_fattr fattr;
508 long res; 523 long res;
509 524
525 dfprintk(VFS, "NFS: readdir(%s/%s) starting at cookie %Lu\n",
526 dentry->d_parent->d_name.name, dentry->d_name.name,
527 (long long)filp->f_pos);
528 nfs_inc_stats(inode, NFSIOS_VFSGETDENTS);
529
510 lock_kernel(); 530 lock_kernel();
511 531
512 res = nfs_revalidate_inode(NFS_SERVER(inode), inode); 532 res = nfs_revalidate_inode(NFS_SERVER(inode), inode);
@@ -566,9 +586,12 @@ static int nfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
566 } 586 }
567 } 587 }
568 unlock_kernel(); 588 unlock_kernel();
569 if (res < 0) 589 if (res > 0)
570 return res; 590 res = 0;
571 return 0; 591 dfprintk(VFS, "NFS: readdir(%s/%s) returns %ld\n",
592 dentry->d_parent->d_name.name, dentry->d_name.name,
593 res);
594 return res;
572} 595}
573 596
574loff_t nfs_llseek_dir(struct file *filp, loff_t offset, int origin) 597loff_t nfs_llseek_dir(struct file *filp, loff_t offset, int origin)
@@ -599,6 +622,10 @@ out:
599 */ 622 */
600int nfs_fsync_dir(struct file *filp, struct dentry *dentry, int datasync) 623int nfs_fsync_dir(struct file *filp, struct dentry *dentry, int datasync)
601{ 624{
625 dfprintk(VFS, "NFS: fsync_dir(%s/%s) datasync %d\n",
626 dentry->d_parent->d_name.name, dentry->d_name.name,
627 datasync);
628
602 return 0; 629 return 0;
603} 630}
604 631
@@ -713,6 +740,7 @@ static int nfs_lookup_revalidate(struct dentry * dentry, struct nameidata *nd)
713 parent = dget_parent(dentry); 740 parent = dget_parent(dentry);
714 lock_kernel(); 741 lock_kernel();
715 dir = parent->d_inode; 742 dir = parent->d_inode;
743 nfs_inc_stats(dir, NFSIOS_DENTRYREVALIDATE);
716 inode = dentry->d_inode; 744 inode = dentry->d_inode;
717 745
718 if (!inode) { 746 if (!inode) {
@@ -722,8 +750,9 @@ static int nfs_lookup_revalidate(struct dentry * dentry, struct nameidata *nd)
722 } 750 }
723 751
724 if (is_bad_inode(inode)) { 752 if (is_bad_inode(inode)) {
725 dfprintk(VFS, "nfs_lookup_validate: %s/%s has dud inode\n", 753 dfprintk(LOOKUPCACHE, "%s: %s/%s has dud inode\n",
726 dentry->d_parent->d_name.name, dentry->d_name.name); 754 __FUNCTION__, dentry->d_parent->d_name.name,
755 dentry->d_name.name);
727 goto out_bad; 756 goto out_bad;
728 } 757 }
729 758
@@ -755,6 +784,9 @@ static int nfs_lookup_revalidate(struct dentry * dentry, struct nameidata *nd)
755 out_valid: 784 out_valid:
756 unlock_kernel(); 785 unlock_kernel();
757 dput(parent); 786 dput(parent);
787 dfprintk(LOOKUPCACHE, "NFS: %s(%s/%s) is valid\n",
788 __FUNCTION__, dentry->d_parent->d_name.name,
789 dentry->d_name.name);
758 return 1; 790 return 1;
759out_zap_parent: 791out_zap_parent:
760 nfs_zap_caches(dir); 792 nfs_zap_caches(dir);
@@ -771,6 +803,9 @@ out_zap_parent:
771 d_drop(dentry); 803 d_drop(dentry);
772 unlock_kernel(); 804 unlock_kernel();
773 dput(parent); 805 dput(parent);
806 dfprintk(LOOKUPCACHE, "NFS: %s(%s/%s) is invalid\n",
807 __FUNCTION__, dentry->d_parent->d_name.name,
808 dentry->d_name.name);
774 return 0; 809 return 0;
775} 810}
776 811
@@ -844,6 +879,7 @@ static struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, stru
844 879
845 dfprintk(VFS, "NFS: lookup(%s/%s)\n", 880 dfprintk(VFS, "NFS: lookup(%s/%s)\n",
846 dentry->d_parent->d_name.name, dentry->d_name.name); 881 dentry->d_parent->d_name.name, dentry->d_name.name);
882 nfs_inc_stats(dir, NFSIOS_VFSLOOKUP);
847 883
848 res = ERR_PTR(-ENAMETOOLONG); 884 res = ERR_PTR(-ENAMETOOLONG);
849 if (dentry->d_name.len > NFS_SERVER(dir)->namelen) 885 if (dentry->d_name.len > NFS_SERVER(dir)->namelen)
@@ -865,9 +901,9 @@ static struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, stru
865 res = ERR_PTR(error); 901 res = ERR_PTR(error);
866 goto out_unlock; 902 goto out_unlock;
867 } 903 }
868 res = ERR_PTR(-EACCES);
869 inode = nfs_fhget(dentry->d_sb, &fhandle, &fattr); 904 inode = nfs_fhget(dentry->d_sb, &fhandle, &fattr);
870 if (!inode) 905 res = (struct dentry *)inode;
906 if (IS_ERR(res))
871 goto out_unlock; 907 goto out_unlock;
872no_entry: 908no_entry:
873 res = d_add_unique(dentry, inode); 909 res = d_add_unique(dentry, inode);
@@ -912,6 +948,9 @@ static struct dentry *nfs_atomic_lookup(struct inode *dir, struct dentry *dentry
912 struct dentry *res = NULL; 948 struct dentry *res = NULL;
913 int error; 949 int error;
914 950
951 dfprintk(VFS, "NFS: atomic_lookup(%s/%ld), %s\n",
952 dir->i_sb->s_id, dir->i_ino, dentry->d_name.name);
953
915 /* Check that we are indeed trying to open this file */ 954 /* Check that we are indeed trying to open this file */
916 if (!is_atomic_open(dir, nd)) 955 if (!is_atomic_open(dir, nd))
917 goto no_open; 956 goto no_open;
@@ -1057,7 +1096,7 @@ static struct dentry *nfs_readdir_lookup(nfs_readdir_descriptor_t *desc)
1057 return NULL; 1096 return NULL;
1058 dentry->d_op = NFS_PROTO(dir)->dentry_ops; 1097 dentry->d_op = NFS_PROTO(dir)->dentry_ops;
1059 inode = nfs_fhget(dentry->d_sb, entry->fh, entry->fattr); 1098 inode = nfs_fhget(dentry->d_sb, entry->fh, entry->fattr);
1060 if (!inode) { 1099 if (IS_ERR(inode)) {
1061 dput(dentry); 1100 dput(dentry);
1062 return NULL; 1101 return NULL;
1063 } 1102 }
@@ -1095,9 +1134,9 @@ int nfs_instantiate(struct dentry *dentry, struct nfs_fh *fhandle,
1095 if (error < 0) 1134 if (error < 0)
1096 goto out_err; 1135 goto out_err;
1097 } 1136 }
1098 error = -ENOMEM;
1099 inode = nfs_fhget(dentry->d_sb, fhandle, fattr); 1137 inode = nfs_fhget(dentry->d_sb, fhandle, fattr);
1100 if (inode == NULL) 1138 error = PTR_ERR(inode);
1139 if (IS_ERR(inode))
1101 goto out_err; 1140 goto out_err;
1102 d_instantiate(dentry, inode); 1141 d_instantiate(dentry, inode);
1103 return 0; 1142 return 0;
@@ -1119,8 +1158,8 @@ static int nfs_create(struct inode *dir, struct dentry *dentry, int mode,
1119 int error; 1158 int error;
1120 int open_flags = 0; 1159 int open_flags = 0;
1121 1160
1122 dfprintk(VFS, "NFS: create(%s/%ld, %s\n", dir->i_sb->s_id, 1161 dfprintk(VFS, "NFS: create(%s/%ld), %s\n",
1123 dir->i_ino, dentry->d_name.name); 1162 dir->i_sb->s_id, dir->i_ino, dentry->d_name.name);
1124 1163
1125 attr.ia_mode = mode; 1164 attr.ia_mode = mode;
1126 attr.ia_valid = ATTR_MODE; 1165 attr.ia_valid = ATTR_MODE;
@@ -1153,8 +1192,8 @@ nfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t rdev)
1153 struct iattr attr; 1192 struct iattr attr;
1154 int status; 1193 int status;
1155 1194
1156 dfprintk(VFS, "NFS: mknod(%s/%ld, %s\n", dir->i_sb->s_id, 1195 dfprintk(VFS, "NFS: mknod(%s/%ld), %s\n",
1157 dir->i_ino, dentry->d_name.name); 1196 dir->i_sb->s_id, dir->i_ino, dentry->d_name.name);
1158 1197
1159 if (!new_valid_dev(rdev)) 1198 if (!new_valid_dev(rdev))
1160 return -EINVAL; 1199 return -EINVAL;
@@ -1186,8 +1225,8 @@ static int nfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
1186 struct iattr attr; 1225 struct iattr attr;
1187 int error; 1226 int error;
1188 1227
1189 dfprintk(VFS, "NFS: mkdir(%s/%ld, %s\n", dir->i_sb->s_id, 1228 dfprintk(VFS, "NFS: mkdir(%s/%ld), %s\n",
1190 dir->i_ino, dentry->d_name.name); 1229 dir->i_sb->s_id, dir->i_ino, dentry->d_name.name);
1191 1230
1192 attr.ia_valid = ATTR_MODE; 1231 attr.ia_valid = ATTR_MODE;
1193 attr.ia_mode = mode | S_IFDIR; 1232 attr.ia_mode = mode | S_IFDIR;
@@ -1212,8 +1251,8 @@ static int nfs_rmdir(struct inode *dir, struct dentry *dentry)
1212{ 1251{
1213 int error; 1252 int error;
1214 1253
1215 dfprintk(VFS, "NFS: rmdir(%s/%ld, %s\n", dir->i_sb->s_id, 1254 dfprintk(VFS, "NFS: rmdir(%s/%ld), %s\n",
1216 dir->i_ino, dentry->d_name.name); 1255 dir->i_sb->s_id, dir->i_ino, dentry->d_name.name);
1217 1256
1218 lock_kernel(); 1257 lock_kernel();
1219 nfs_begin_data_update(dir); 1258 nfs_begin_data_update(dir);
@@ -1241,6 +1280,7 @@ static int nfs_sillyrename(struct inode *dir, struct dentry *dentry)
1241 dfprintk(VFS, "NFS: silly-rename(%s/%s, ct=%d)\n", 1280 dfprintk(VFS, "NFS: silly-rename(%s/%s, ct=%d)\n",
1242 dentry->d_parent->d_name.name, dentry->d_name.name, 1281 dentry->d_parent->d_name.name, dentry->d_name.name,
1243 atomic_read(&dentry->d_count)); 1282 atomic_read(&dentry->d_count));
1283 nfs_inc_stats(dir, NFSIOS_SILLYRENAME);
1244 1284
1245#ifdef NFS_PARANOIA 1285#ifdef NFS_PARANOIA
1246if (!dentry->d_inode) 1286if (!dentry->d_inode)
@@ -1268,8 +1308,8 @@ dentry->d_parent->d_name.name, dentry->d_name.name);
1268 sillycounter++; 1308 sillycounter++;
1269 sprintf(suffix, "%*.*x", countersize, countersize, sillycounter); 1309 sprintf(suffix, "%*.*x", countersize, countersize, sillycounter);
1270 1310
1271 dfprintk(VFS, "trying to rename %s to %s\n", 1311 dfprintk(VFS, "NFS: trying to rename %s to %s\n",
1272 dentry->d_name.name, silly); 1312 dentry->d_name.name, silly);
1273 1313
1274 sdentry = lookup_one_len(silly, dentry->d_parent, slen); 1314 sdentry = lookup_one_len(silly, dentry->d_parent, slen);
1275 /* 1315 /*
@@ -1640,6 +1680,8 @@ int nfs_permission(struct inode *inode, int mask, struct nameidata *nd)
1640 struct rpc_cred *cred; 1680 struct rpc_cred *cred;
1641 int res = 0; 1681 int res = 0;
1642 1682
1683 nfs_inc_stats(inode, NFSIOS_VFSACCESS);
1684
1643 if (mask == 0) 1685 if (mask == 0)
1644 goto out; 1686 goto out;
1645 /* Is this sys_access() ? */ 1687 /* Is this sys_access() ? */
@@ -1679,13 +1721,15 @@ force_lookup:
1679 res = PTR_ERR(cred); 1721 res = PTR_ERR(cred);
1680 unlock_kernel(); 1722 unlock_kernel();
1681out: 1723out:
1724 dfprintk(VFS, "NFS: permission(%s/%ld), mask=0x%x, res=%d\n",
1725 inode->i_sb->s_id, inode->i_ino, mask, res);
1682 return res; 1726 return res;
1683out_notsup: 1727out_notsup:
1684 res = nfs_revalidate_inode(NFS_SERVER(inode), inode); 1728 res = nfs_revalidate_inode(NFS_SERVER(inode), inode);
1685 if (res == 0) 1729 if (res == 0)
1686 res = generic_permission(inode, mask, NULL); 1730 res = generic_permission(inode, mask, NULL);
1687 unlock_kernel(); 1731 unlock_kernel();
1688 return res; 1732 goto out;
1689} 1733}
1690 1734
1691/* 1735/*
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index 4ae2f3b33fef..0f583cb16ddb 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -7,11 +7,11 @@
7 * 7 *
8 * There are important applications whose performance or correctness 8 * There are important applications whose performance or correctness
9 * depends on uncached access to file data. Database clusters 9 * depends on uncached access to file data. Database clusters
10 * (multiple copies of the same instance running on separate hosts) 10 * (multiple copies of the same instance running on separate hosts)
11 * implement their own cache coherency protocol that subsumes file 11 * implement their own cache coherency protocol that subsumes file
12 * system cache protocols. Applications that process datasets 12 * system cache protocols. Applications that process datasets
13 * considerably larger than the client's memory do not always benefit 13 * considerably larger than the client's memory do not always benefit
14 * from a local cache. A streaming video server, for instance, has no 14 * from a local cache. A streaming video server, for instance, has no
15 * need to cache the contents of a file. 15 * need to cache the contents of a file.
16 * 16 *
17 * When an application requests uncached I/O, all read and write requests 17 * When an application requests uncached I/O, all read and write requests
@@ -34,6 +34,7 @@
34 * 08 Jun 2003 Port to 2.5 APIs --cel 34 * 08 Jun 2003 Port to 2.5 APIs --cel
35 * 31 Mar 2004 Handle direct I/O without VFS support --cel 35 * 31 Mar 2004 Handle direct I/O without VFS support --cel
36 * 15 Sep 2004 Parallel async reads --cel 36 * 15 Sep 2004 Parallel async reads --cel
37 * 04 May 2005 support O_DIRECT with aio --cel
37 * 38 *
38 */ 39 */
39 40
@@ -54,10 +55,10 @@
54#include <asm/uaccess.h> 55#include <asm/uaccess.h>
55#include <asm/atomic.h> 56#include <asm/atomic.h>
56 57
58#include "iostat.h"
59
57#define NFSDBG_FACILITY NFSDBG_VFS 60#define NFSDBG_FACILITY NFSDBG_VFS
58#define MAX_DIRECTIO_SIZE (4096UL << PAGE_SHIFT)
59 61
60static void nfs_free_user_pages(struct page **pages, int npages, int do_dirty);
61static kmem_cache_t *nfs_direct_cachep; 62static kmem_cache_t *nfs_direct_cachep;
62 63
63/* 64/*
@@ -65,38 +66,78 @@ static kmem_cache_t *nfs_direct_cachep;
65 */ 66 */
66struct nfs_direct_req { 67struct nfs_direct_req {
67 struct kref kref; /* release manager */ 68 struct kref kref; /* release manager */
68 struct list_head list; /* nfs_read_data structs */ 69
69 wait_queue_head_t wait; /* wait for i/o completion */ 70 /* I/O parameters */
71 struct list_head list, /* nfs_read/write_data structs */
72 rewrite_list; /* saved nfs_write_data structs */
73 struct nfs_open_context *ctx; /* file open context info */
74 struct kiocb * iocb; /* controlling i/o request */
75 struct inode * inode; /* target file of i/o */
76 unsigned long user_addr; /* location of user's buffer */
77 size_t user_count; /* total bytes to move */
78 loff_t pos; /* starting offset in file */
70 struct page ** pages; /* pages in our buffer */ 79 struct page ** pages; /* pages in our buffer */
71 unsigned int npages; /* count of pages */ 80 unsigned int npages; /* count of pages */
72 atomic_t complete, /* i/os we're waiting for */ 81
73 count, /* bytes actually processed */ 82 /* completion state */
83 spinlock_t lock; /* protect completion state */
84 int outstanding; /* i/os we're waiting for */
85 ssize_t count, /* bytes actually processed */
74 error; /* any reported error */ 86 error; /* any reported error */
87 struct completion completion; /* wait for i/o completion */
88
89 /* commit state */
90 struct nfs_write_data * commit_data; /* special write_data for commits */
91 int flags;
92#define NFS_ODIRECT_DO_COMMIT (1) /* an unstable reply was received */
93#define NFS_ODIRECT_RESCHED_WRITES (2) /* write verification failed */
94 struct nfs_writeverf verf; /* unstable write verifier */
75}; 95};
76 96
97static void nfs_direct_write_schedule(struct nfs_direct_req *dreq, int sync);
98static void nfs_direct_write_complete(struct nfs_direct_req *dreq, struct inode *inode);
77 99
78/** 100/**
79 * nfs_get_user_pages - find and set up pages underlying user's buffer 101 * nfs_direct_IO - NFS address space operation for direct I/O
80 * rw: direction (read or write) 102 * @rw: direction (read or write)
81 * user_addr: starting address of this segment of user's buffer 103 * @iocb: target I/O control block
82 * count: size of this segment 104 * @iov: array of vectors that define I/O buffer
83 * @pages: returned array of page struct pointers underlying user's buffer 105 * @pos: offset in file to begin the operation
106 * @nr_segs: size of iovec array
107 *
108 * The presence of this routine in the address space ops vector means
109 * the NFS client supports direct I/O. However, we shunt off direct
110 * read and write requests before the VFS gets them, so this method
111 * should never be called.
84 */ 112 */
85static inline int 113ssize_t nfs_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, loff_t pos, unsigned long nr_segs)
86nfs_get_user_pages(int rw, unsigned long user_addr, size_t size, 114{
87 struct page ***pages) 115 struct dentry *dentry = iocb->ki_filp->f_dentry;
116
117 dprintk("NFS: nfs_direct_IO (%s) off/no(%Ld/%lu) EINVAL\n",
118 dentry->d_name.name, (long long) pos, nr_segs);
119
120 return -EINVAL;
121}
122
123static void nfs_free_user_pages(struct page **pages, int npages, int do_dirty)
124{
125 int i;
126 for (i = 0; i < npages; i++) {
127 struct page *page = pages[i];
128 if (do_dirty && !PageCompound(page))
129 set_page_dirty_lock(page);
130 page_cache_release(page);
131 }
132 kfree(pages);
133}
134
135static inline int nfs_get_user_pages(int rw, unsigned long user_addr, size_t size, struct page ***pages)
88{ 136{
89 int result = -ENOMEM; 137 int result = -ENOMEM;
90 unsigned long page_count; 138 unsigned long page_count;
91 size_t array_size; 139 size_t array_size;
92 140
93 /* set an arbitrary limit to prevent type overflow */
94 /* XXX: this can probably be as large as INT_MAX */
95 if (size > MAX_DIRECTIO_SIZE) {
96 *pages = NULL;
97 return -EFBIG;
98 }
99
100 page_count = (user_addr + size + PAGE_SIZE - 1) >> PAGE_SHIFT; 141 page_count = (user_addr + size + PAGE_SIZE - 1) >> PAGE_SHIFT;
101 page_count -= user_addr >> PAGE_SHIFT; 142 page_count -= user_addr >> PAGE_SHIFT;
102 143
@@ -108,75 +149,117 @@ nfs_get_user_pages(int rw, unsigned long user_addr, size_t size,
108 page_count, (rw == READ), 0, 149 page_count, (rw == READ), 0,
109 *pages, NULL); 150 *pages, NULL);
110 up_read(&current->mm->mmap_sem); 151 up_read(&current->mm->mmap_sem);
111 /* 152 if (result != page_count) {
112 * If we got fewer pages than expected from get_user_pages(), 153 /*
113 * the user buffer runs off the end of a mapping; return EFAULT. 154 * If we got fewer pages than expected from
114 */ 155 * get_user_pages(), the user buffer runs off the
115 if (result >= 0 && result < page_count) { 156 * end of a mapping; return EFAULT.
116 nfs_free_user_pages(*pages, result, 0); 157 */
158 if (result >= 0) {
159 nfs_free_user_pages(*pages, result, 0);
160 result = -EFAULT;
161 } else
162 kfree(*pages);
117 *pages = NULL; 163 *pages = NULL;
118 result = -EFAULT;
119 } 164 }
120 } 165 }
121 return result; 166 return result;
122} 167}
123 168
124/** 169static inline struct nfs_direct_req *nfs_direct_req_alloc(void)
125 * nfs_free_user_pages - tear down page struct array
126 * @pages: array of page struct pointers underlying target buffer
127 * @npages: number of pages in the array
128 * @do_dirty: dirty the pages as we release them
129 */
130static void
131nfs_free_user_pages(struct page **pages, int npages, int do_dirty)
132{ 170{
133 int i; 171 struct nfs_direct_req *dreq;
134 for (i = 0; i < npages; i++) { 172
135 struct page *page = pages[i]; 173 dreq = kmem_cache_alloc(nfs_direct_cachep, SLAB_KERNEL);
136 if (do_dirty && !PageCompound(page)) 174 if (!dreq)
137 set_page_dirty_lock(page); 175 return NULL;
138 page_cache_release(page); 176
139 } 177 kref_init(&dreq->kref);
140 kfree(pages); 178 init_completion(&dreq->completion);
179 INIT_LIST_HEAD(&dreq->list);
180 INIT_LIST_HEAD(&dreq->rewrite_list);
181 dreq->iocb = NULL;
182 dreq->ctx = NULL;
183 spin_lock_init(&dreq->lock);
184 dreq->outstanding = 0;
185 dreq->count = 0;
186 dreq->error = 0;
187 dreq->flags = 0;
188
189 return dreq;
141} 190}
142 191
143/**
144 * nfs_direct_req_release - release nfs_direct_req structure for direct read
145 * @kref: kref object embedded in an nfs_direct_req structure
146 *
147 */
148static void nfs_direct_req_release(struct kref *kref) 192static void nfs_direct_req_release(struct kref *kref)
149{ 193{
150 struct nfs_direct_req *dreq = container_of(kref, struct nfs_direct_req, kref); 194 struct nfs_direct_req *dreq = container_of(kref, struct nfs_direct_req, kref);
195
196 if (dreq->ctx != NULL)
197 put_nfs_open_context(dreq->ctx);
151 kmem_cache_free(nfs_direct_cachep, dreq); 198 kmem_cache_free(nfs_direct_cachep, dreq);
152} 199}
153 200
154/** 201/*
155 * nfs_direct_read_alloc - allocate nfs_read_data structures for direct read 202 * Collects and returns the final error value/byte-count.
156 * @count: count of bytes for the read request 203 */
157 * @rsize: local rsize setting 204static ssize_t nfs_direct_wait(struct nfs_direct_req *dreq)
205{
206 ssize_t result = -EIOCBQUEUED;
207
208 /* Async requests don't wait here */
209 if (dreq->iocb)
210 goto out;
211
212 result = wait_for_completion_interruptible(&dreq->completion);
213
214 if (!result)
215 result = dreq->error;
216 if (!result)
217 result = dreq->count;
218
219out:
220 kref_put(&dreq->kref, nfs_direct_req_release);
221 return (ssize_t) result;
222}
223
224/*
225 * We must hold a reference to all the pages in this direct read request
226 * until the RPCs complete. This could be long *after* we are woken up in
227 * nfs_direct_wait (for instance, if someone hits ^C on a slow server).
158 * 228 *
229 * In addition, synchronous I/O uses a stack-allocated iocb. Thus we
230 * can't trust the iocb is still valid here if this is a synchronous
231 * request. If the waiter is woken prematurely, the iocb is long gone.
232 */
233static void nfs_direct_complete(struct nfs_direct_req *dreq)
234{
235 nfs_free_user_pages(dreq->pages, dreq->npages, 1);
236
237 if (dreq->iocb) {
238 long res = (long) dreq->error;
239 if (!res)
240 res = (long) dreq->count;
241 aio_complete(dreq->iocb, res, 0);
242 }
243 complete_all(&dreq->completion);
244
245 kref_put(&dreq->kref, nfs_direct_req_release);
246}
247
248/*
159 * Note we also set the number of requests we have in the dreq when we are 249 * Note we also set the number of requests we have in the dreq when we are
160 * done. This prevents races with I/O completion so we will always wait 250 * done. This prevents races with I/O completion so we will always wait
161 * until all requests have been dispatched and completed. 251 * until all requests have been dispatched and completed.
162 */ 252 */
163static struct nfs_direct_req *nfs_direct_read_alloc(size_t nbytes, unsigned int rsize) 253static struct nfs_direct_req *nfs_direct_read_alloc(size_t nbytes, size_t rsize)
164{ 254{
165 struct list_head *list; 255 struct list_head *list;
166 struct nfs_direct_req *dreq; 256 struct nfs_direct_req *dreq;
167 unsigned int reads = 0;
168 unsigned int rpages = (rsize + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; 257 unsigned int rpages = (rsize + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
169 258
170 dreq = kmem_cache_alloc(nfs_direct_cachep, SLAB_KERNEL); 259 dreq = nfs_direct_req_alloc();
171 if (!dreq) 260 if (!dreq)
172 return NULL; 261 return NULL;
173 262
174 kref_init(&dreq->kref);
175 init_waitqueue_head(&dreq->wait);
176 INIT_LIST_HEAD(&dreq->list);
177 atomic_set(&dreq->count, 0);
178 atomic_set(&dreq->error, 0);
179
180 list = &dreq->list; 263 list = &dreq->list;
181 for(;;) { 264 for(;;) {
182 struct nfs_read_data *data = nfs_readdata_alloc(rpages); 265 struct nfs_read_data *data = nfs_readdata_alloc(rpages);
@@ -196,72 +279,70 @@ static struct nfs_direct_req *nfs_direct_read_alloc(size_t nbytes, unsigned int
196 list_add(&data->pages, list); 279 list_add(&data->pages, list);
197 280
198 data->req = (struct nfs_page *) dreq; 281 data->req = (struct nfs_page *) dreq;
199 reads++; 282 dreq->outstanding++;
200 if (nbytes <= rsize) 283 if (nbytes <= rsize)
201 break; 284 break;
202 nbytes -= rsize; 285 nbytes -= rsize;
203 } 286 }
204 kref_get(&dreq->kref); 287 kref_get(&dreq->kref);
205 atomic_set(&dreq->complete, reads);
206 return dreq; 288 return dreq;
207} 289}
208 290
209/** 291static void nfs_direct_read_result(struct rpc_task *task, void *calldata)
210 * nfs_direct_read_result - handle a read reply for a direct read request
211 * @data: address of NFS READ operation control block
212 * @status: status of this NFS READ operation
213 *
214 * We must hold a reference to all the pages in this direct read request
215 * until the RPCs complete. This could be long *after* we are woken up in
216 * nfs_direct_read_wait (for instance, if someone hits ^C on a slow server).
217 */
218static void nfs_direct_read_result(struct nfs_read_data *data, int status)
219{ 292{
293 struct nfs_read_data *data = calldata;
220 struct nfs_direct_req *dreq = (struct nfs_direct_req *) data->req; 294 struct nfs_direct_req *dreq = (struct nfs_direct_req *) data->req;
221 295
222 if (likely(status >= 0)) 296 if (nfs_readpage_result(task, data) != 0)
223 atomic_add(data->res.count, &dreq->count); 297 return;
298
299 spin_lock(&dreq->lock);
300
301 if (likely(task->tk_status >= 0))
302 dreq->count += data->res.count;
224 else 303 else
225 atomic_set(&dreq->error, status); 304 dreq->error = task->tk_status;
226 305
227 if (unlikely(atomic_dec_and_test(&dreq->complete))) { 306 if (--dreq->outstanding) {
228 nfs_free_user_pages(dreq->pages, dreq->npages, 1); 307 spin_unlock(&dreq->lock);
229 wake_up(&dreq->wait); 308 return;
230 kref_put(&dreq->kref, nfs_direct_req_release);
231 } 309 }
310
311 spin_unlock(&dreq->lock);
312 nfs_direct_complete(dreq);
232} 313}
233 314
234/** 315static const struct rpc_call_ops nfs_read_direct_ops = {
235 * nfs_direct_read_schedule - dispatch NFS READ operations for a direct read 316 .rpc_call_done = nfs_direct_read_result,
236 * @dreq: address of nfs_direct_req struct for this request 317 .rpc_release = nfs_readdata_release,
237 * @inode: target inode 318};
238 * @ctx: target file open context 319
239 * @user_addr: starting address of this segment of user's buffer 320/*
240 * @count: size of this segment
241 * @file_offset: offset in file to begin the operation
242 *
243 * For each nfs_read_data struct that was allocated on the list, dispatch 321 * For each nfs_read_data struct that was allocated on the list, dispatch
244 * an NFS READ operation 322 * an NFS READ operation
245 */ 323 */
246static void nfs_direct_read_schedule(struct nfs_direct_req *dreq, 324static void nfs_direct_read_schedule(struct nfs_direct_req *dreq)
247 struct inode *inode, struct nfs_open_context *ctx,
248 unsigned long user_addr, size_t count, loff_t file_offset)
249{ 325{
326 struct nfs_open_context *ctx = dreq->ctx;
327 struct inode *inode = ctx->dentry->d_inode;
250 struct list_head *list = &dreq->list; 328 struct list_head *list = &dreq->list;
251 struct page **pages = dreq->pages; 329 struct page **pages = dreq->pages;
330 size_t count = dreq->user_count;
331 loff_t pos = dreq->pos;
332 size_t rsize = NFS_SERVER(inode)->rsize;
252 unsigned int curpage, pgbase; 333 unsigned int curpage, pgbase;
253 unsigned int rsize = NFS_SERVER(inode)->rsize;
254 334
255 curpage = 0; 335 curpage = 0;
256 pgbase = user_addr & ~PAGE_MASK; 336 pgbase = dreq->user_addr & ~PAGE_MASK;
257 do { 337 do {
258 struct nfs_read_data *data; 338 struct nfs_read_data *data;
259 unsigned int bytes; 339 size_t bytes;
260 340
261 bytes = rsize; 341 bytes = rsize;
262 if (count < rsize) 342 if (count < rsize)
263 bytes = count; 343 bytes = count;
264 344
345 BUG_ON(list_empty(list));
265 data = list_entry(list->next, struct nfs_read_data, pages); 346 data = list_entry(list->next, struct nfs_read_data, pages);
266 list_del_init(&data->pages); 347 list_del_init(&data->pages);
267 348
@@ -269,7 +350,7 @@ static void nfs_direct_read_schedule(struct nfs_direct_req *dreq,
269 data->cred = ctx->cred; 350 data->cred = ctx->cred;
270 data->args.fh = NFS_FH(inode); 351 data->args.fh = NFS_FH(inode);
271 data->args.context = ctx; 352 data->args.context = ctx;
272 data->args.offset = file_offset; 353 data->args.offset = pos;
273 data->args.pgbase = pgbase; 354 data->args.pgbase = pgbase;
274 data->args.pages = &pages[curpage]; 355 data->args.pages = &pages[curpage];
275 data->args.count = bytes; 356 data->args.count = bytes;
@@ -277,77 +358,38 @@ static void nfs_direct_read_schedule(struct nfs_direct_req *dreq,
277 data->res.eof = 0; 358 data->res.eof = 0;
278 data->res.count = bytes; 359 data->res.count = bytes;
279 360
361 rpc_init_task(&data->task, NFS_CLIENT(inode), RPC_TASK_ASYNC,
362 &nfs_read_direct_ops, data);
280 NFS_PROTO(inode)->read_setup(data); 363 NFS_PROTO(inode)->read_setup(data);
281 364
282 data->task.tk_cookie = (unsigned long) inode; 365 data->task.tk_cookie = (unsigned long) inode;
283 data->complete = nfs_direct_read_result;
284 366
285 lock_kernel(); 367 lock_kernel();
286 rpc_execute(&data->task); 368 rpc_execute(&data->task);
287 unlock_kernel(); 369 unlock_kernel();
288 370
289 dfprintk(VFS, "NFS: %4d initiated direct read call (req %s/%Ld, %u bytes @ offset %Lu)\n", 371 dfprintk(VFS, "NFS: %5u initiated direct read call (req %s/%Ld, %zu bytes @ offset %Lu)\n",
290 data->task.tk_pid, 372 data->task.tk_pid,
291 inode->i_sb->s_id, 373 inode->i_sb->s_id,
292 (long long)NFS_FILEID(inode), 374 (long long)NFS_FILEID(inode),
293 bytes, 375 bytes,
294 (unsigned long long)data->args.offset); 376 (unsigned long long)data->args.offset);
295 377
296 file_offset += bytes; 378 pos += bytes;
297 pgbase += bytes; 379 pgbase += bytes;
298 curpage += pgbase >> PAGE_SHIFT; 380 curpage += pgbase >> PAGE_SHIFT;
299 pgbase &= ~PAGE_MASK; 381 pgbase &= ~PAGE_MASK;
300 382
301 count -= bytes; 383 count -= bytes;
302 } while (count != 0); 384 } while (count != 0);
385 BUG_ON(!list_empty(list));
303} 386}
304 387
305/** 388static ssize_t nfs_direct_read(struct kiocb *iocb, unsigned long user_addr, size_t count, loff_t pos, struct page **pages, unsigned int nr_pages)
306 * nfs_direct_read_wait - wait for I/O completion for direct reads
307 * @dreq: request on which we are to wait
308 * @intr: whether or not this wait can be interrupted
309 *
310 * Collects and returns the final error value/byte-count.
311 */
312static ssize_t nfs_direct_read_wait(struct nfs_direct_req *dreq, int intr)
313{
314 int result = 0;
315
316 if (intr) {
317 result = wait_event_interruptible(dreq->wait,
318 (atomic_read(&dreq->complete) == 0));
319 } else {
320 wait_event(dreq->wait, (atomic_read(&dreq->complete) == 0));
321 }
322
323 if (!result)
324 result = atomic_read(&dreq->error);
325 if (!result)
326 result = atomic_read(&dreq->count);
327
328 kref_put(&dreq->kref, nfs_direct_req_release);
329 return (ssize_t) result;
330}
331
332/**
333 * nfs_direct_read_seg - Read in one iov segment. Generate separate
334 * read RPCs for each "rsize" bytes.
335 * @inode: target inode
336 * @ctx: target file open context
337 * @user_addr: starting address of this segment of user's buffer
338 * @count: size of this segment
339 * @file_offset: offset in file to begin the operation
340 * @pages: array of addresses of page structs defining user's buffer
341 * @nr_pages: number of pages in the array
342 *
343 */
344static ssize_t nfs_direct_read_seg(struct inode *inode,
345 struct nfs_open_context *ctx, unsigned long user_addr,
346 size_t count, loff_t file_offset, struct page **pages,
347 unsigned int nr_pages)
348{ 389{
349 ssize_t result; 390 ssize_t result;
350 sigset_t oldset; 391 sigset_t oldset;
392 struct inode *inode = iocb->ki_filp->f_mapping->host;
351 struct rpc_clnt *clnt = NFS_CLIENT(inode); 393 struct rpc_clnt *clnt = NFS_CLIENT(inode);
352 struct nfs_direct_req *dreq; 394 struct nfs_direct_req *dreq;
353 395
@@ -355,284 +397,350 @@ static ssize_t nfs_direct_read_seg(struct inode *inode,
355 if (!dreq) 397 if (!dreq)
356 return -ENOMEM; 398 return -ENOMEM;
357 399
400 dreq->user_addr = user_addr;
401 dreq->user_count = count;
402 dreq->pos = pos;
358 dreq->pages = pages; 403 dreq->pages = pages;
359 dreq->npages = nr_pages; 404 dreq->npages = nr_pages;
405 dreq->inode = inode;
406 dreq->ctx = get_nfs_open_context((struct nfs_open_context *)iocb->ki_filp->private_data);
407 if (!is_sync_kiocb(iocb))
408 dreq->iocb = iocb;
360 409
410 nfs_add_stats(inode, NFSIOS_DIRECTREADBYTES, count);
361 rpc_clnt_sigmask(clnt, &oldset); 411 rpc_clnt_sigmask(clnt, &oldset);
362 nfs_direct_read_schedule(dreq, inode, ctx, user_addr, count, 412 nfs_direct_read_schedule(dreq);
363 file_offset); 413 result = nfs_direct_wait(dreq);
364 result = nfs_direct_read_wait(dreq, clnt->cl_intr);
365 rpc_clnt_sigunmask(clnt, &oldset); 414 rpc_clnt_sigunmask(clnt, &oldset);
366 415
367 return result; 416 return result;
368} 417}
369 418
370/** 419static void nfs_direct_free_writedata(struct nfs_direct_req *dreq)
371 * nfs_direct_read - For each iov segment, map the user's buffer
372 * then generate read RPCs.
373 * @inode: target inode
374 * @ctx: target file open context
375 * @iov: array of vectors that define I/O buffer
376 * file_offset: offset in file to begin the operation
377 * nr_segs: size of iovec array
378 *
379 * We've already pushed out any non-direct writes so that this read
380 * will see them when we read from the server.
381 */
382static ssize_t
383nfs_direct_read(struct inode *inode, struct nfs_open_context *ctx,
384 const struct iovec *iov, loff_t file_offset,
385 unsigned long nr_segs)
386{ 420{
387 ssize_t tot_bytes = 0; 421 list_splice_init(&dreq->rewrite_list, &dreq->list);
388 unsigned long seg = 0; 422 while (!list_empty(&dreq->list)) {
389 423 struct nfs_write_data *data = list_entry(dreq->list.next, struct nfs_write_data, pages);
390 while ((seg < nr_segs) && (tot_bytes >= 0)) { 424 list_del(&data->pages);
391 ssize_t result; 425 nfs_writedata_release(data);
392 int page_count; 426 }
393 struct page **pages; 427}
394 const struct iovec *vec = &iov[seg++];
395 unsigned long user_addr = (unsigned long) vec->iov_base;
396 size_t size = vec->iov_len;
397
398 page_count = nfs_get_user_pages(READ, user_addr, size, &pages);
399 if (page_count < 0) {
400 nfs_free_user_pages(pages, 0, 0);
401 if (tot_bytes > 0)
402 break;
403 return page_count;
404 }
405 428
406 result = nfs_direct_read_seg(inode, ctx, user_addr, size, 429#if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
407 file_offset, pages, page_count); 430static void nfs_direct_write_reschedule(struct nfs_direct_req *dreq)
431{
432 struct list_head *pos;
408 433
409 if (result <= 0) { 434 list_splice_init(&dreq->rewrite_list, &dreq->list);
410 if (tot_bytes > 0) 435 list_for_each(pos, &dreq->list)
411 break; 436 dreq->outstanding++;
412 return result; 437 dreq->count = 0;
413 } 438
414 tot_bytes += result; 439 nfs_direct_write_schedule(dreq, FLUSH_STABLE);
415 file_offset += result; 440}
416 if (result < size) 441
417 break; 442static void nfs_direct_commit_result(struct rpc_task *task, void *calldata)
443{
444 struct nfs_write_data *data = calldata;
445 struct nfs_direct_req *dreq = (struct nfs_direct_req *) data->req;
446
447 /* Call the NFS version-specific code */
448 if (NFS_PROTO(data->inode)->commit_done(task, data) != 0)
449 return;
450 if (unlikely(task->tk_status < 0)) {
451 dreq->error = task->tk_status;
452 dreq->flags = NFS_ODIRECT_RESCHED_WRITES;
453 }
454 if (memcmp(&dreq->verf, &data->verf, sizeof(data->verf))) {
455 dprintk("NFS: %5u commit verify failed\n", task->tk_pid);
456 dreq->flags = NFS_ODIRECT_RESCHED_WRITES;
418 } 457 }
419 458
420 return tot_bytes; 459 dprintk("NFS: %5u commit returned %d\n", task->tk_pid, task->tk_status);
460 nfs_direct_write_complete(dreq, data->inode);
421} 461}
422 462
423/** 463static const struct rpc_call_ops nfs_commit_direct_ops = {
424 * nfs_direct_write_seg - Write out one iov segment. Generate separate 464 .rpc_call_done = nfs_direct_commit_result,
425 * write RPCs for each "wsize" bytes, then commit. 465 .rpc_release = nfs_commit_release,
426 * @inode: target inode 466};
427 * @ctx: target file open context 467
428 * user_addr: starting address of this segment of user's buffer 468static void nfs_direct_commit_schedule(struct nfs_direct_req *dreq)
429 * count: size of this segment
430 * file_offset: offset in file to begin the operation
431 * @pages: array of addresses of page structs defining user's buffer
432 * nr_pages: size of pages array
433 */
434static ssize_t nfs_direct_write_seg(struct inode *inode,
435 struct nfs_open_context *ctx, unsigned long user_addr,
436 size_t count, loff_t file_offset, struct page **pages,
437 int nr_pages)
438{ 469{
439 const unsigned int wsize = NFS_SERVER(inode)->wsize; 470 struct nfs_write_data *data = dreq->commit_data;
440 size_t request; 471 struct rpc_task *task = &data->task;
441 int curpage, need_commit;
442 ssize_t result, tot_bytes;
443 struct nfs_writeverf first_verf;
444 struct nfs_write_data *wdata;
445
446 wdata = nfs_writedata_alloc(NFS_SERVER(inode)->wpages);
447 if (!wdata)
448 return -ENOMEM;
449 472
450 wdata->inode = inode; 473 data->inode = dreq->inode;
451 wdata->cred = ctx->cred; 474 data->cred = dreq->ctx->cred;
452 wdata->args.fh = NFS_FH(inode);
453 wdata->args.context = ctx;
454 wdata->args.stable = NFS_UNSTABLE;
455 if (IS_SYNC(inode) || NFS_PROTO(inode)->version == 2 || count <= wsize)
456 wdata->args.stable = NFS_FILE_SYNC;
457 wdata->res.fattr = &wdata->fattr;
458 wdata->res.verf = &wdata->verf;
459 475
460 nfs_begin_data_update(inode); 476 data->args.fh = NFS_FH(data->inode);
461retry: 477 data->args.offset = dreq->pos;
462 need_commit = 0; 478 data->args.count = dreq->user_count;
463 tot_bytes = 0; 479 data->res.count = 0;
464 curpage = 0; 480 data->res.fattr = &data->fattr;
465 request = count; 481 data->res.verf = &data->verf;
466 wdata->args.pgbase = user_addr & ~PAGE_MASK;
467 wdata->args.offset = file_offset;
468 do {
469 wdata->args.count = request;
470 if (wdata->args.count > wsize)
471 wdata->args.count = wsize;
472 wdata->args.pages = &pages[curpage];
473 482
474 dprintk("NFS: direct write: c=%u o=%Ld ua=%lu, pb=%u, cp=%u\n", 483 rpc_init_task(&data->task, NFS_CLIENT(dreq->inode), RPC_TASK_ASYNC,
475 wdata->args.count, (long long) wdata->args.offset, 484 &nfs_commit_direct_ops, data);
476 user_addr + tot_bytes, wdata->args.pgbase, curpage); 485 NFS_PROTO(data->inode)->commit_setup(data, 0);
477 486
478 lock_kernel(); 487 data->task.tk_priority = RPC_PRIORITY_NORMAL;
479 result = NFS_PROTO(inode)->write(wdata); 488 data->task.tk_cookie = (unsigned long)data->inode;
480 unlock_kernel(); 489 /* Note: task.tk_ops->rpc_release will free dreq->commit_data */
490 dreq->commit_data = NULL;
481 491
482 if (result <= 0) { 492 dprintk("NFS: %5u initiated commit call\n", task->tk_pid);
483 if (tot_bytes > 0)
484 break;
485 goto out;
486 }
487 493
488 if (tot_bytes == 0) 494 lock_kernel();
489 memcpy(&first_verf.verifier, &wdata->verf.verifier, 495 rpc_execute(&data->task);
490 sizeof(first_verf.verifier)); 496 unlock_kernel();
491 if (wdata->verf.committed != NFS_FILE_SYNC) { 497}
492 need_commit = 1;
493 if (memcmp(&first_verf.verifier, &wdata->verf.verifier,
494 sizeof(first_verf.verifier)))
495 goto sync_retry;
496 }
497 498
498 tot_bytes += result; 499static void nfs_direct_write_complete(struct nfs_direct_req *dreq, struct inode *inode)
500{
501 int flags = dreq->flags;
499 502
500 /* in case of a short write: stop now, let the app recover */ 503 dreq->flags = 0;
501 if (result < wdata->args.count) 504 switch (flags) {
505 case NFS_ODIRECT_DO_COMMIT:
506 nfs_direct_commit_schedule(dreq);
502 break; 507 break;
508 case NFS_ODIRECT_RESCHED_WRITES:
509 nfs_direct_write_reschedule(dreq);
510 break;
511 default:
512 nfs_end_data_update(inode);
513 if (dreq->commit_data != NULL)
514 nfs_commit_free(dreq->commit_data);
515 nfs_direct_free_writedata(dreq);
516 nfs_direct_complete(dreq);
517 }
518}
503 519
504 wdata->args.offset += result; 520static void nfs_alloc_commit_data(struct nfs_direct_req *dreq)
505 wdata->args.pgbase += result; 521{
506 curpage += wdata->args.pgbase >> PAGE_SHIFT; 522 dreq->commit_data = nfs_commit_alloc(0);
507 wdata->args.pgbase &= ~PAGE_MASK; 523 if (dreq->commit_data != NULL)
508 request -= result; 524 dreq->commit_data->req = (struct nfs_page *) dreq;
509 } while (request != 0); 525}
526#else
527static inline void nfs_alloc_commit_data(struct nfs_direct_req *dreq)
528{
529 dreq->commit_data = NULL;
530}
510 531
511 /* 532static void nfs_direct_write_complete(struct nfs_direct_req *dreq, struct inode *inode)
512 * Commit data written so far, even in the event of an error 533{
513 */ 534 nfs_end_data_update(inode);
514 if (need_commit) { 535 nfs_direct_free_writedata(dreq);
515 wdata->args.count = tot_bytes; 536 nfs_direct_complete(dreq);
516 wdata->args.offset = file_offset; 537}
538#endif
517 539
518 lock_kernel(); 540static struct nfs_direct_req *nfs_direct_write_alloc(size_t nbytes, size_t wsize)
519 result = NFS_PROTO(inode)->commit(wdata); 541{
520 unlock_kernel(); 542 struct list_head *list;
543 struct nfs_direct_req *dreq;
544 unsigned int wpages = (wsize + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
545
546 dreq = nfs_direct_req_alloc();
547 if (!dreq)
548 return NULL;
549
550 list = &dreq->list;
551 for(;;) {
552 struct nfs_write_data *data = nfs_writedata_alloc(wpages);
521 553
522 if (result < 0 || memcmp(&first_verf.verifier, 554 if (unlikely(!data)) {
523 &wdata->verf.verifier, 555 while (!list_empty(list)) {
524 sizeof(first_verf.verifier)) != 0) 556 data = list_entry(list->next,
525 goto sync_retry; 557 struct nfs_write_data, pages);
558 list_del(&data->pages);
559 nfs_writedata_free(data);
560 }
561 kref_put(&dreq->kref, nfs_direct_req_release);
562 return NULL;
563 }
564
565 INIT_LIST_HEAD(&data->pages);
566 list_add(&data->pages, list);
567
568 data->req = (struct nfs_page *) dreq;
569 dreq->outstanding++;
570 if (nbytes <= wsize)
571 break;
572 nbytes -= wsize;
526 } 573 }
527 result = tot_bytes;
528 574
529out: 575 nfs_alloc_commit_data(dreq);
530 nfs_end_data_update(inode);
531 nfs_writedata_free(wdata);
532 return result;
533 576
534sync_retry: 577 kref_get(&dreq->kref);
535 wdata->args.stable = NFS_FILE_SYNC; 578 return dreq;
536 goto retry;
537} 579}
538 580
539/** 581static void nfs_direct_write_result(struct rpc_task *task, void *calldata)
540 * nfs_direct_write - For each iov segment, map the user's buffer
541 * then generate write and commit RPCs.
542 * @inode: target inode
543 * @ctx: target file open context
544 * @iov: array of vectors that define I/O buffer
545 * file_offset: offset in file to begin the operation
546 * nr_segs: size of iovec array
547 *
548 * Upon return, generic_file_direct_IO invalidates any cached pages
549 * that non-direct readers might access, so they will pick up these
550 * writes immediately.
551 */
552static ssize_t nfs_direct_write(struct inode *inode,
553 struct nfs_open_context *ctx, const struct iovec *iov,
554 loff_t file_offset, unsigned long nr_segs)
555{ 582{
556 ssize_t tot_bytes = 0; 583 struct nfs_write_data *data = calldata;
557 unsigned long seg = 0; 584 struct nfs_direct_req *dreq = (struct nfs_direct_req *) data->req;
558 585 int status = task->tk_status;
559 while ((seg < nr_segs) && (tot_bytes >= 0)) { 586
560 ssize_t result; 587 if (nfs_writeback_done(task, data) != 0)
561 int page_count; 588 return;
562 struct page **pages; 589
563 const struct iovec *vec = &iov[seg++]; 590 spin_lock(&dreq->lock);
564 unsigned long user_addr = (unsigned long) vec->iov_base;
565 size_t size = vec->iov_len;
566
567 page_count = nfs_get_user_pages(WRITE, user_addr, size, &pages);
568 if (page_count < 0) {
569 nfs_free_user_pages(pages, 0, 0);
570 if (tot_bytes > 0)
571 break;
572 return page_count;
573 }
574 591
575 result = nfs_direct_write_seg(inode, ctx, user_addr, size, 592 if (likely(status >= 0))
576 file_offset, pages, page_count); 593 dreq->count += data->res.count;
577 nfs_free_user_pages(pages, page_count, 0); 594 else
595 dreq->error = task->tk_status;
578 596
579 if (result <= 0) { 597 if (data->res.verf->committed != NFS_FILE_SYNC) {
580 if (tot_bytes > 0) 598 switch (dreq->flags) {
599 case 0:
600 memcpy(&dreq->verf, &data->verf, sizeof(dreq->verf));
601 dreq->flags = NFS_ODIRECT_DO_COMMIT;
581 break; 602 break;
582 return result; 603 case NFS_ODIRECT_DO_COMMIT:
604 if (memcmp(&dreq->verf, &data->verf, sizeof(dreq->verf))) {
605 dprintk("NFS: %5u write verify failed\n", task->tk_pid);
606 dreq->flags = NFS_ODIRECT_RESCHED_WRITES;
607 }
583 } 608 }
584 tot_bytes += result;
585 file_offset += result;
586 if (result < size)
587 break;
588 } 609 }
589 return tot_bytes; 610 /* In case we have to resend */
611 data->args.stable = NFS_FILE_SYNC;
612
613 spin_unlock(&dreq->lock);
590} 614}
591 615
592/** 616/*
593 * nfs_direct_IO - NFS address space operation for direct I/O 617 * NB: Return the value of the first error return code. Subsequent
594 * rw: direction (read or write) 618 * errors after the first one are ignored.
595 * @iocb: target I/O control block
596 * @iov: array of vectors that define I/O buffer
597 * file_offset: offset in file to begin the operation
598 * nr_segs: size of iovec array
599 *
600 */ 619 */
601ssize_t 620static void nfs_direct_write_release(void *calldata)
602nfs_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov,
603 loff_t file_offset, unsigned long nr_segs)
604{ 621{
605 ssize_t result = -EINVAL; 622 struct nfs_write_data *data = calldata;
606 struct file *file = iocb->ki_filp; 623 struct nfs_direct_req *dreq = (struct nfs_direct_req *) data->req;
607 struct nfs_open_context *ctx;
608 struct dentry *dentry = file->f_dentry;
609 struct inode *inode = dentry->d_inode;
610 624
611 /* 625 spin_lock(&dreq->lock);
612 * No support for async yet 626 if (--dreq->outstanding) {
613 */ 627 spin_unlock(&dreq->lock);
614 if (!is_sync_kiocb(iocb)) 628 return;
615 return result;
616
617 ctx = (struct nfs_open_context *)file->private_data;
618 switch (rw) {
619 case READ:
620 dprintk("NFS: direct_IO(read) (%s) off/no(%Lu/%lu)\n",
621 dentry->d_name.name, file_offset, nr_segs);
622
623 result = nfs_direct_read(inode, ctx, iov,
624 file_offset, nr_segs);
625 break;
626 case WRITE:
627 dprintk("NFS: direct_IO(write) (%s) off/no(%Lu/%lu)\n",
628 dentry->d_name.name, file_offset, nr_segs);
629
630 result = nfs_direct_write(inode, ctx, iov,
631 file_offset, nr_segs);
632 break;
633 default:
634 break;
635 } 629 }
630 spin_unlock(&dreq->lock);
631
632 nfs_direct_write_complete(dreq, data->inode);
633}
634
635static const struct rpc_call_ops nfs_write_direct_ops = {
636 .rpc_call_done = nfs_direct_write_result,
637 .rpc_release = nfs_direct_write_release,
638};
639
640/*
641 * For each nfs_write_data struct that was allocated on the list, dispatch
642 * an NFS WRITE operation
643 */
644static void nfs_direct_write_schedule(struct nfs_direct_req *dreq, int sync)
645{
646 struct nfs_open_context *ctx = dreq->ctx;
647 struct inode *inode = ctx->dentry->d_inode;
648 struct list_head *list = &dreq->list;
649 struct page **pages = dreq->pages;
650 size_t count = dreq->user_count;
651 loff_t pos = dreq->pos;
652 size_t wsize = NFS_SERVER(inode)->wsize;
653 unsigned int curpage, pgbase;
654
655 curpage = 0;
656 pgbase = dreq->user_addr & ~PAGE_MASK;
657 do {
658 struct nfs_write_data *data;
659 size_t bytes;
660
661 bytes = wsize;
662 if (count < wsize)
663 bytes = count;
664
665 BUG_ON(list_empty(list));
666 data = list_entry(list->next, struct nfs_write_data, pages);
667 list_move_tail(&data->pages, &dreq->rewrite_list);
668
669 data->inode = inode;
670 data->cred = ctx->cred;
671 data->args.fh = NFS_FH(inode);
672 data->args.context = ctx;
673 data->args.offset = pos;
674 data->args.pgbase = pgbase;
675 data->args.pages = &pages[curpage];
676 data->args.count = bytes;
677 data->res.fattr = &data->fattr;
678 data->res.count = bytes;
679 data->res.verf = &data->verf;
680
681 rpc_init_task(&data->task, NFS_CLIENT(inode), RPC_TASK_ASYNC,
682 &nfs_write_direct_ops, data);
683 NFS_PROTO(inode)->write_setup(data, sync);
684
685 data->task.tk_priority = RPC_PRIORITY_NORMAL;
686 data->task.tk_cookie = (unsigned long) inode;
687
688 lock_kernel();
689 rpc_execute(&data->task);
690 unlock_kernel();
691
692 dfprintk(VFS, "NFS: %5u initiated direct write call (req %s/%Ld, %zu bytes @ offset %Lu)\n",
693 data->task.tk_pid,
694 inode->i_sb->s_id,
695 (long long)NFS_FILEID(inode),
696 bytes,
697 (unsigned long long)data->args.offset);
698
699 pos += bytes;
700 pgbase += bytes;
701 curpage += pgbase >> PAGE_SHIFT;
702 pgbase &= ~PAGE_MASK;
703
704 count -= bytes;
705 } while (count != 0);
706 BUG_ON(!list_empty(list));
707}
708
709static ssize_t nfs_direct_write(struct kiocb *iocb, unsigned long user_addr, size_t count, loff_t pos, struct page **pages, int nr_pages)
710{
711 ssize_t result;
712 sigset_t oldset;
713 struct inode *inode = iocb->ki_filp->f_mapping->host;
714 struct rpc_clnt *clnt = NFS_CLIENT(inode);
715 struct nfs_direct_req *dreq;
716 size_t wsize = NFS_SERVER(inode)->wsize;
717 int sync = 0;
718
719 dreq = nfs_direct_write_alloc(count, wsize);
720 if (!dreq)
721 return -ENOMEM;
722 if (dreq->commit_data == NULL || count < wsize)
723 sync = FLUSH_STABLE;
724
725 dreq->user_addr = user_addr;
726 dreq->user_count = count;
727 dreq->pos = pos;
728 dreq->pages = pages;
729 dreq->npages = nr_pages;
730 dreq->inode = inode;
731 dreq->ctx = get_nfs_open_context((struct nfs_open_context *)iocb->ki_filp->private_data);
732 if (!is_sync_kiocb(iocb))
733 dreq->iocb = iocb;
734
735 nfs_add_stats(inode, NFSIOS_DIRECTWRITTENBYTES, count);
736
737 nfs_begin_data_update(inode);
738
739 rpc_clnt_sigmask(clnt, &oldset);
740 nfs_direct_write_schedule(dreq, sync);
741 result = nfs_direct_wait(dreq);
742 rpc_clnt_sigunmask(clnt, &oldset);
743
636 return result; 744 return result;
637} 745}
638 746
@@ -640,49 +748,40 @@ nfs_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov,
640 * nfs_file_direct_read - file direct read operation for NFS files 748 * nfs_file_direct_read - file direct read operation for NFS files
641 * @iocb: target I/O control block 749 * @iocb: target I/O control block
642 * @buf: user's buffer into which to read data 750 * @buf: user's buffer into which to read data
643 * count: number of bytes to read 751 * @count: number of bytes to read
644 * pos: byte offset in file where reading starts 752 * @pos: byte offset in file where reading starts
645 * 753 *
646 * We use this function for direct reads instead of calling 754 * We use this function for direct reads instead of calling
647 * generic_file_aio_read() in order to avoid gfar's check to see if 755 * generic_file_aio_read() in order to avoid gfar's check to see if
648 * the request starts before the end of the file. For that check 756 * the request starts before the end of the file. For that check
649 * to work, we must generate a GETATTR before each direct read, and 757 * to work, we must generate a GETATTR before each direct read, and
650 * even then there is a window between the GETATTR and the subsequent 758 * even then there is a window between the GETATTR and the subsequent
651 * READ where the file size could change. So our preference is simply 759 * READ where the file size could change. Our preference is simply
652 * to do all reads the application wants, and the server will take 760 * to do all reads the application wants, and the server will take
653 * care of managing the end of file boundary. 761 * care of managing the end of file boundary.
654 * 762 *
655 * This function also eliminates unnecessarily updating the file's 763 * This function also eliminates unnecessarily updating the file's
656 * atime locally, as the NFS server sets the file's atime, and this 764 * atime locally, as the NFS server sets the file's atime, and this
657 * client must read the updated atime from the server back into its 765 * client must read the updated atime from the server back into its
658 * cache. 766 * cache.
659 */ 767 */
660ssize_t 768ssize_t nfs_file_direct_read(struct kiocb *iocb, char __user *buf, size_t count, loff_t pos)
661nfs_file_direct_read(struct kiocb *iocb, char __user *buf, size_t count, loff_t pos)
662{ 769{
663 ssize_t retval = -EINVAL; 770 ssize_t retval = -EINVAL;
664 loff_t *ppos = &iocb->ki_pos; 771 int page_count;
772 struct page **pages;
665 struct file *file = iocb->ki_filp; 773 struct file *file = iocb->ki_filp;
666 struct nfs_open_context *ctx =
667 (struct nfs_open_context *) file->private_data;
668 struct address_space *mapping = file->f_mapping; 774 struct address_space *mapping = file->f_mapping;
669 struct inode *inode = mapping->host;
670 struct iovec iov = {
671 .iov_base = buf,
672 .iov_len = count,
673 };
674 775
675 dprintk("nfs: direct read(%s/%s, %lu@%Ld)\n", 776 dprintk("nfs: direct read(%s/%s, %lu@%Ld)\n",
676 file->f_dentry->d_parent->d_name.name, 777 file->f_dentry->d_parent->d_name.name,
677 file->f_dentry->d_name.name, 778 file->f_dentry->d_name.name,
678 (unsigned long) count, (long long) pos); 779 (unsigned long) count, (long long) pos);
679 780
680 if (!is_sync_kiocb(iocb))
681 goto out;
682 if (count < 0) 781 if (count < 0)
683 goto out; 782 goto out;
684 retval = -EFAULT; 783 retval = -EFAULT;
685 if (!access_ok(VERIFY_WRITE, iov.iov_base, iov.iov_len)) 784 if (!access_ok(VERIFY_WRITE, buf, count))
686 goto out; 785 goto out;
687 retval = 0; 786 retval = 0;
688 if (!count) 787 if (!count)
@@ -692,9 +791,16 @@ nfs_file_direct_read(struct kiocb *iocb, char __user *buf, size_t count, loff_t
692 if (retval) 791 if (retval)
693 goto out; 792 goto out;
694 793
695 retval = nfs_direct_read(inode, ctx, &iov, pos, 1); 794 retval = nfs_get_user_pages(READ, (unsigned long) buf,
795 count, &pages);
796 if (retval < 0)
797 goto out;
798 page_count = retval;
799
800 retval = nfs_direct_read(iocb, (unsigned long) buf, count, pos,
801 pages, page_count);
696 if (retval > 0) 802 if (retval > 0)
697 *ppos = pos + retval; 803 iocb->ki_pos = pos + retval;
698 804
699out: 805out:
700 return retval; 806 return retval;
@@ -704,8 +810,8 @@ out:
704 * nfs_file_direct_write - file direct write operation for NFS files 810 * nfs_file_direct_write - file direct write operation for NFS files
705 * @iocb: target I/O control block 811 * @iocb: target I/O control block
706 * @buf: user's buffer from which to write data 812 * @buf: user's buffer from which to write data
707 * count: number of bytes to write 813 * @count: number of bytes to write
708 * pos: byte offset in file where writing starts 814 * @pos: byte offset in file where writing starts
709 * 815 *
710 * We use this function for direct writes instead of calling 816 * We use this function for direct writes instead of calling
711 * generic_file_aio_write() in order to avoid taking the inode 817 * generic_file_aio_write() in order to avoid taking the inode
@@ -725,28 +831,19 @@ out:
725 * Note that O_APPEND is not supported for NFS direct writes, as there 831 * Note that O_APPEND is not supported for NFS direct writes, as there
726 * is no atomic O_APPEND write facility in the NFS protocol. 832 * is no atomic O_APPEND write facility in the NFS protocol.
727 */ 833 */
728ssize_t 834ssize_t nfs_file_direct_write(struct kiocb *iocb, const char __user *buf, size_t count, loff_t pos)
729nfs_file_direct_write(struct kiocb *iocb, const char __user *buf, size_t count, loff_t pos)
730{ 835{
731 ssize_t retval; 836 ssize_t retval;
837 int page_count;
838 struct page **pages;
732 struct file *file = iocb->ki_filp; 839 struct file *file = iocb->ki_filp;
733 struct nfs_open_context *ctx =
734 (struct nfs_open_context *) file->private_data;
735 struct address_space *mapping = file->f_mapping; 840 struct address_space *mapping = file->f_mapping;
736 struct inode *inode = mapping->host;
737 struct iovec iov = {
738 .iov_base = (char __user *)buf,
739 };
740 841
741 dfprintk(VFS, "nfs: direct write(%s/%s, %lu@%Ld)\n", 842 dfprintk(VFS, "nfs: direct write(%s/%s, %lu@%Ld)\n",
742 file->f_dentry->d_parent->d_name.name, 843 file->f_dentry->d_parent->d_name.name,
743 file->f_dentry->d_name.name, 844 file->f_dentry->d_name.name,
744 (unsigned long) count, (long long) pos); 845 (unsigned long) count, (long long) pos);
745 846
746 retval = -EINVAL;
747 if (!is_sync_kiocb(iocb))
748 goto out;
749
750 retval = generic_write_checks(file, &pos, &count, 0); 847 retval = generic_write_checks(file, &pos, &count, 0);
751 if (retval) 848 if (retval)
752 goto out; 849 goto out;
@@ -757,19 +854,35 @@ nfs_file_direct_write(struct kiocb *iocb, const char __user *buf, size_t count,
757 retval = 0; 854 retval = 0;
758 if (!count) 855 if (!count)
759 goto out; 856 goto out;
760 iov.iov_len = count,
761 857
762 retval = -EFAULT; 858 retval = -EFAULT;
763 if (!access_ok(VERIFY_READ, iov.iov_base, iov.iov_len)) 859 if (!access_ok(VERIFY_READ, buf, count))
764 goto out; 860 goto out;
765 861
766 retval = nfs_sync_mapping(mapping); 862 retval = nfs_sync_mapping(mapping);
767 if (retval) 863 if (retval)
768 goto out; 864 goto out;
769 865
770 retval = nfs_direct_write(inode, ctx, &iov, pos, 1); 866 retval = nfs_get_user_pages(WRITE, (unsigned long) buf,
867 count, &pages);
868 if (retval < 0)
869 goto out;
870 page_count = retval;
871
872 retval = nfs_direct_write(iocb, (unsigned long) buf, count,
873 pos, pages, page_count);
874
875 /*
876 * XXX: nfs_end_data_update() already ensures this file's
877 * cached data is subsequently invalidated. Do we really
878 * need to call invalidate_inode_pages2() again here?
879 *
880 * For aio writes, this invalidation will almost certainly
881 * occur before the writes complete. Kind of racey.
882 */
771 if (mapping->nrpages) 883 if (mapping->nrpages)
772 invalidate_inode_pages2(mapping); 884 invalidate_inode_pages2(mapping);
885
773 if (retval > 0) 886 if (retval > 0)
774 iocb->ki_pos = pos + retval; 887 iocb->ki_pos = pos + retval;
775 888
@@ -777,6 +890,10 @@ out:
777 return retval; 890 return retval;
778} 891}
779 892
893/**
894 * nfs_init_directcache - create a slab cache for nfs_direct_req structures
895 *
896 */
780int nfs_init_directcache(void) 897int nfs_init_directcache(void)
781{ 898{
782 nfs_direct_cachep = kmem_cache_create("nfs_direct_cache", 899 nfs_direct_cachep = kmem_cache_create("nfs_direct_cache",
@@ -790,6 +907,10 @@ int nfs_init_directcache(void)
790 return 0; 907 return 0;
791} 908}
792 909
910/**
911 * nfs_init_directcache - destroy the slab cache for nfs_direct_req structures
912 *
913 */
793void nfs_destroy_directcache(void) 914void nfs_destroy_directcache(void)
794{ 915{
795 if (kmem_cache_destroy(nfs_direct_cachep)) 916 if (kmem_cache_destroy(nfs_direct_cachep))
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index 7a79fbe9f539..5263b2864a44 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -32,6 +32,7 @@
32#include <asm/system.h> 32#include <asm/system.h>
33 33
34#include "delegation.h" 34#include "delegation.h"
35#include "iostat.h"
35 36
36#define NFSDBG_FACILITY NFSDBG_FILE 37#define NFSDBG_FACILITY NFSDBG_FILE
37 38
@@ -102,18 +103,15 @@ static int nfs_check_flags(int flags)
102static int 103static int
103nfs_file_open(struct inode *inode, struct file *filp) 104nfs_file_open(struct inode *inode, struct file *filp)
104{ 105{
105 struct nfs_server *server = NFS_SERVER(inode);
106 int (*open)(struct inode *, struct file *);
107 int res; 106 int res;
108 107
109 res = nfs_check_flags(filp->f_flags); 108 res = nfs_check_flags(filp->f_flags);
110 if (res) 109 if (res)
111 return res; 110 return res;
112 111
112 nfs_inc_stats(inode, NFSIOS_VFSOPEN);
113 lock_kernel(); 113 lock_kernel();
114 /* Do NFSv4 open() call */ 114 res = NFS_SERVER(inode)->rpc_ops->file_open(inode, filp);
115 if ((open = server->rpc_ops->file_open) != NULL)
116 res = open(inode, filp);
117 unlock_kernel(); 115 unlock_kernel();
118 return res; 116 return res;
119} 117}
@@ -124,6 +122,7 @@ nfs_file_release(struct inode *inode, struct file *filp)
124 /* Ensure that dirty pages are flushed out with the right creds */ 122 /* Ensure that dirty pages are flushed out with the right creds */
125 if (filp->f_mode & FMODE_WRITE) 123 if (filp->f_mode & FMODE_WRITE)
126 filemap_fdatawrite(filp->f_mapping); 124 filemap_fdatawrite(filp->f_mapping);
125 nfs_inc_stats(inode, NFSIOS_VFSRELEASE);
127 return NFS_PROTO(inode)->file_release(inode, filp); 126 return NFS_PROTO(inode)->file_release(inode, filp);
128} 127}
129 128
@@ -199,6 +198,7 @@ nfs_file_flush(struct file *file)
199 198
200 if ((file->f_mode & FMODE_WRITE) == 0) 199 if ((file->f_mode & FMODE_WRITE) == 0)
201 return 0; 200 return 0;
201 nfs_inc_stats(inode, NFSIOS_VFSFLUSH);
202 lock_kernel(); 202 lock_kernel();
203 /* Ensure that data+attribute caches are up to date after close() */ 203 /* Ensure that data+attribute caches are up to date after close() */
204 status = nfs_wb_all(inode); 204 status = nfs_wb_all(inode);
@@ -229,6 +229,7 @@ nfs_file_read(struct kiocb *iocb, char __user * buf, size_t count, loff_t pos)
229 (unsigned long) count, (unsigned long) pos); 229 (unsigned long) count, (unsigned long) pos);
230 230
231 result = nfs_revalidate_file(inode, iocb->ki_filp); 231 result = nfs_revalidate_file(inode, iocb->ki_filp);
232 nfs_add_stats(inode, NFSIOS_NORMALREADBYTES, count);
232 if (!result) 233 if (!result)
233 result = generic_file_aio_read(iocb, buf, count, pos); 234 result = generic_file_aio_read(iocb, buf, count, pos);
234 return result; 235 return result;
@@ -282,6 +283,7 @@ nfs_fsync(struct file *file, struct dentry *dentry, int datasync)
282 283
283 dfprintk(VFS, "nfs: fsync(%s/%ld)\n", inode->i_sb->s_id, inode->i_ino); 284 dfprintk(VFS, "nfs: fsync(%s/%ld)\n", inode->i_sb->s_id, inode->i_ino);
284 285
286 nfs_inc_stats(inode, NFSIOS_VFSFSYNC);
285 lock_kernel(); 287 lock_kernel();
286 status = nfs_wb_all(inode); 288 status = nfs_wb_all(inode);
287 if (!status) { 289 if (!status) {
@@ -316,6 +318,17 @@ static int nfs_commit_write(struct file *file, struct page *page, unsigned offse
316 return status; 318 return status;
317} 319}
318 320
321static int nfs_invalidate_page(struct page *page, unsigned long offset)
322{
323 /* FIXME: we really should cancel any unstarted writes on this page */
324 return 1;
325}
326
327static int nfs_release_page(struct page *page, gfp_t gfp)
328{
329 return !nfs_wb_page(page->mapping->host, page);
330}
331
319struct address_space_operations nfs_file_aops = { 332struct address_space_operations nfs_file_aops = {
320 .readpage = nfs_readpage, 333 .readpage = nfs_readpage,
321 .readpages = nfs_readpages, 334 .readpages = nfs_readpages,
@@ -324,6 +337,8 @@ struct address_space_operations nfs_file_aops = {
324 .writepages = nfs_writepages, 337 .writepages = nfs_writepages,
325 .prepare_write = nfs_prepare_write, 338 .prepare_write = nfs_prepare_write,
326 .commit_write = nfs_commit_write, 339 .commit_write = nfs_commit_write,
340 .invalidatepage = nfs_invalidate_page,
341 .releasepage = nfs_release_page,
327#ifdef CONFIG_NFS_DIRECTIO 342#ifdef CONFIG_NFS_DIRECTIO
328 .direct_IO = nfs_direct_IO, 343 .direct_IO = nfs_direct_IO,
329#endif 344#endif
@@ -365,6 +380,7 @@ nfs_file_write(struct kiocb *iocb, const char __user *buf, size_t count, loff_t
365 if (!count) 380 if (!count)
366 goto out; 381 goto out;
367 382
383 nfs_add_stats(inode, NFSIOS_NORMALWRITTENBYTES, count);
368 result = generic_file_aio_write(iocb, buf, count, pos); 384 result = generic_file_aio_write(iocb, buf, count, pos);
369out: 385out:
370 return result; 386 return result;
@@ -376,15 +392,17 @@ out_swapfile:
376 392
377static int do_getlk(struct file *filp, int cmd, struct file_lock *fl) 393static int do_getlk(struct file *filp, int cmd, struct file_lock *fl)
378{ 394{
379 struct file_lock *cfl; 395 struct file_lock cfl;
380 struct inode *inode = filp->f_mapping->host; 396 struct inode *inode = filp->f_mapping->host;
381 int status = 0; 397 int status = 0;
382 398
383 lock_kernel(); 399 lock_kernel();
384 /* Try local locking first */ 400 /* Try local locking first */
385 cfl = posix_test_lock(filp, fl); 401 if (posix_test_lock(filp, fl, &cfl)) {
386 if (cfl != NULL) { 402 fl->fl_start = cfl.fl_start;
387 locks_copy_lock(fl, cfl); 403 fl->fl_end = cfl.fl_end;
404 fl->fl_type = cfl.fl_type;
405 fl->fl_pid = cfl.fl_pid;
388 goto out; 406 goto out;
389 } 407 }
390 408
@@ -425,10 +443,8 @@ static int do_vfs_lock(struct file *file, struct file_lock *fl)
425static int do_unlk(struct file *filp, int cmd, struct file_lock *fl) 443static int do_unlk(struct file *filp, int cmd, struct file_lock *fl)
426{ 444{
427 struct inode *inode = filp->f_mapping->host; 445 struct inode *inode = filp->f_mapping->host;
428 sigset_t oldset;
429 int status; 446 int status;
430 447
431 rpc_clnt_sigmask(NFS_CLIENT(inode), &oldset);
432 /* 448 /*
433 * Flush all pending writes before doing anything 449 * Flush all pending writes before doing anything
434 * with locks.. 450 * with locks..
@@ -446,17 +462,14 @@ static int do_unlk(struct file *filp, int cmd, struct file_lock *fl)
446 else 462 else
447 status = do_vfs_lock(filp, fl); 463 status = do_vfs_lock(filp, fl);
448 unlock_kernel(); 464 unlock_kernel();
449 rpc_clnt_sigunmask(NFS_CLIENT(inode), &oldset);
450 return status; 465 return status;
451} 466}
452 467
453static int do_setlk(struct file *filp, int cmd, struct file_lock *fl) 468static int do_setlk(struct file *filp, int cmd, struct file_lock *fl)
454{ 469{
455 struct inode *inode = filp->f_mapping->host; 470 struct inode *inode = filp->f_mapping->host;
456 sigset_t oldset;
457 int status; 471 int status;
458 472
459 rpc_clnt_sigmask(NFS_CLIENT(inode), &oldset);
460 /* 473 /*
461 * Flush all pending writes before doing anything 474 * Flush all pending writes before doing anything
462 * with locks.. 475 * with locks..
@@ -489,7 +502,6 @@ static int do_setlk(struct file *filp, int cmd, struct file_lock *fl)
489 nfs_sync_mapping(filp->f_mapping); 502 nfs_sync_mapping(filp->f_mapping);
490 nfs_zap_caches(inode); 503 nfs_zap_caches(inode);
491out: 504out:
492 rpc_clnt_sigunmask(NFS_CLIENT(inode), &oldset);
493 return status; 505 return status;
494} 506}
495 507
@@ -504,9 +516,7 @@ static int nfs_lock(struct file *filp, int cmd, struct file_lock *fl)
504 inode->i_sb->s_id, inode->i_ino, 516 inode->i_sb->s_id, inode->i_ino,
505 fl->fl_type, fl->fl_flags, 517 fl->fl_type, fl->fl_flags,
506 (long long)fl->fl_start, (long long)fl->fl_end); 518 (long long)fl->fl_start, (long long)fl->fl_end);
507 519 nfs_inc_stats(inode, NFSIOS_VFSLOCK);
508 if (!inode)
509 return -EINVAL;
510 520
511 /* No mandatory locks over NFS */ 521 /* No mandatory locks over NFS */
512 if ((inode->i_mode & (S_ISGID | S_IXGRP)) == S_ISGID && 522 if ((inode->i_mode & (S_ISGID | S_IXGRP)) == S_ISGID &&
@@ -531,9 +541,6 @@ static int nfs_flock(struct file *filp, int cmd, struct file_lock *fl)
531 inode->i_sb->s_id, inode->i_ino, 541 inode->i_sb->s_id, inode->i_ino,
532 fl->fl_type, fl->fl_flags); 542 fl->fl_type, fl->fl_flags);
533 543
534 if (!inode)
535 return -EINVAL;
536
537 /* 544 /*
538 * No BSD flocks over NFS allowed. 545 * No BSD flocks over NFS allowed.
539 * Note: we could try to fake a POSIX lock request here by 546 * Note: we could try to fake a POSIX lock request here by
diff --git a/fs/nfs/idmap.c b/fs/nfs/idmap.c
index 821edd30333b..3fab5b0cfc5a 100644
--- a/fs/nfs/idmap.c
+++ b/fs/nfs/idmap.c
@@ -35,6 +35,7 @@
35 */ 35 */
36 36
37#include <linux/module.h> 37#include <linux/module.h>
38#include <linux/mutex.h>
38#include <linux/init.h> 39#include <linux/init.h>
39#include <linux/types.h> 40#include <linux/types.h>
40#include <linux/slab.h> 41#include <linux/slab.h>
@@ -74,8 +75,8 @@ struct idmap {
74 struct dentry *idmap_dentry; 75 struct dentry *idmap_dentry;
75 wait_queue_head_t idmap_wq; 76 wait_queue_head_t idmap_wq;
76 struct idmap_msg idmap_im; 77 struct idmap_msg idmap_im;
77 struct semaphore idmap_lock; /* Serializes upcalls */ 78 struct mutex idmap_lock; /* Serializes upcalls */
78 struct semaphore idmap_im_lock; /* Protects the hashtable */ 79 struct mutex idmap_im_lock; /* Protects the hashtable */
79 struct idmap_hashtable idmap_user_hash; 80 struct idmap_hashtable idmap_user_hash;
80 struct idmap_hashtable idmap_group_hash; 81 struct idmap_hashtable idmap_group_hash;
81}; 82};
@@ -101,11 +102,9 @@ nfs_idmap_new(struct nfs4_client *clp)
101 102
102 if (clp->cl_idmap != NULL) 103 if (clp->cl_idmap != NULL)
103 return; 104 return;
104 if ((idmap = kmalloc(sizeof(*idmap), GFP_KERNEL)) == NULL) 105 if ((idmap = kzalloc(sizeof(*idmap), GFP_KERNEL)) == NULL)
105 return; 106 return;
106 107
107 memset(idmap, 0, sizeof(*idmap));
108
109 snprintf(idmap->idmap_path, sizeof(idmap->idmap_path), 108 snprintf(idmap->idmap_path, sizeof(idmap->idmap_path),
110 "%s/idmap", clp->cl_rpcclient->cl_pathname); 109 "%s/idmap", clp->cl_rpcclient->cl_pathname);
111 110
@@ -116,8 +115,8 @@ nfs_idmap_new(struct nfs4_client *clp)
116 return; 115 return;
117 } 116 }
118 117
119 init_MUTEX(&idmap->idmap_lock); 118 mutex_init(&idmap->idmap_lock);
120 init_MUTEX(&idmap->idmap_im_lock); 119 mutex_init(&idmap->idmap_im_lock);
121 init_waitqueue_head(&idmap->idmap_wq); 120 init_waitqueue_head(&idmap->idmap_wq);
122 idmap->idmap_user_hash.h_type = IDMAP_TYPE_USER; 121 idmap->idmap_user_hash.h_type = IDMAP_TYPE_USER;
123 idmap->idmap_group_hash.h_type = IDMAP_TYPE_GROUP; 122 idmap->idmap_group_hash.h_type = IDMAP_TYPE_GROUP;
@@ -132,6 +131,8 @@ nfs_idmap_delete(struct nfs4_client *clp)
132 131
133 if (!idmap) 132 if (!idmap)
134 return; 133 return;
134 dput(idmap->idmap_dentry);
135 idmap->idmap_dentry = NULL;
135 rpc_unlink(idmap->idmap_path); 136 rpc_unlink(idmap->idmap_path);
136 clp->cl_idmap = NULL; 137 clp->cl_idmap = NULL;
137 kfree(idmap); 138 kfree(idmap);
@@ -232,8 +233,8 @@ nfs_idmap_id(struct idmap *idmap, struct idmap_hashtable *h,
232 if (namelen >= IDMAP_NAMESZ) 233 if (namelen >= IDMAP_NAMESZ)
233 return -EINVAL; 234 return -EINVAL;
234 235
235 down(&idmap->idmap_lock); 236 mutex_lock(&idmap->idmap_lock);
236 down(&idmap->idmap_im_lock); 237 mutex_lock(&idmap->idmap_im_lock);
237 238
238 he = idmap_lookup_name(h, name, namelen); 239 he = idmap_lookup_name(h, name, namelen);
239 if (he != NULL) { 240 if (he != NULL) {
@@ -259,11 +260,11 @@ nfs_idmap_id(struct idmap *idmap, struct idmap_hashtable *h,
259 } 260 }
260 261
261 set_current_state(TASK_UNINTERRUPTIBLE); 262 set_current_state(TASK_UNINTERRUPTIBLE);
262 up(&idmap->idmap_im_lock); 263 mutex_unlock(&idmap->idmap_im_lock);
263 schedule(); 264 schedule();
264 current->state = TASK_RUNNING; 265 current->state = TASK_RUNNING;
265 remove_wait_queue(&idmap->idmap_wq, &wq); 266 remove_wait_queue(&idmap->idmap_wq, &wq);
266 down(&idmap->idmap_im_lock); 267 mutex_lock(&idmap->idmap_im_lock);
267 268
268 if (im->im_status & IDMAP_STATUS_SUCCESS) { 269 if (im->im_status & IDMAP_STATUS_SUCCESS) {
269 *id = im->im_id; 270 *id = im->im_id;
@@ -272,8 +273,8 @@ nfs_idmap_id(struct idmap *idmap, struct idmap_hashtable *h,
272 273
273 out: 274 out:
274 memset(im, 0, sizeof(*im)); 275 memset(im, 0, sizeof(*im));
275 up(&idmap->idmap_im_lock); 276 mutex_unlock(&idmap->idmap_im_lock);
276 up(&idmap->idmap_lock); 277 mutex_unlock(&idmap->idmap_lock);
277 return (ret); 278 return (ret);
278} 279}
279 280
@@ -293,8 +294,8 @@ nfs_idmap_name(struct idmap *idmap, struct idmap_hashtable *h,
293 294
294 im = &idmap->idmap_im; 295 im = &idmap->idmap_im;
295 296
296 down(&idmap->idmap_lock); 297 mutex_lock(&idmap->idmap_lock);
297 down(&idmap->idmap_im_lock); 298 mutex_lock(&idmap->idmap_im_lock);
298 299
299 he = idmap_lookup_id(h, id); 300 he = idmap_lookup_id(h, id);
300 if (he != 0) { 301 if (he != 0) {
@@ -320,11 +321,11 @@ nfs_idmap_name(struct idmap *idmap, struct idmap_hashtable *h,
320 } 321 }
321 322
322 set_current_state(TASK_UNINTERRUPTIBLE); 323 set_current_state(TASK_UNINTERRUPTIBLE);
323 up(&idmap->idmap_im_lock); 324 mutex_unlock(&idmap->idmap_im_lock);
324 schedule(); 325 schedule();
325 current->state = TASK_RUNNING; 326 current->state = TASK_RUNNING;
326 remove_wait_queue(&idmap->idmap_wq, &wq); 327 remove_wait_queue(&idmap->idmap_wq, &wq);
327 down(&idmap->idmap_im_lock); 328 mutex_lock(&idmap->idmap_im_lock);
328 329
329 if (im->im_status & IDMAP_STATUS_SUCCESS) { 330 if (im->im_status & IDMAP_STATUS_SUCCESS) {
330 if ((len = strnlen(im->im_name, IDMAP_NAMESZ)) == 0) 331 if ((len = strnlen(im->im_name, IDMAP_NAMESZ)) == 0)
@@ -335,8 +336,8 @@ nfs_idmap_name(struct idmap *idmap, struct idmap_hashtable *h,
335 336
336 out: 337 out:
337 memset(im, 0, sizeof(*im)); 338 memset(im, 0, sizeof(*im));
338 up(&idmap->idmap_im_lock); 339 mutex_unlock(&idmap->idmap_im_lock);
339 up(&idmap->idmap_lock); 340 mutex_unlock(&idmap->idmap_lock);
340 return ret; 341 return ret;
341} 342}
342 343
@@ -380,7 +381,7 @@ idmap_pipe_downcall(struct file *filp, const char __user *src, size_t mlen)
380 if (copy_from_user(&im_in, src, mlen) != 0) 381 if (copy_from_user(&im_in, src, mlen) != 0)
381 return (-EFAULT); 382 return (-EFAULT);
382 383
383 down(&idmap->idmap_im_lock); 384 mutex_lock(&idmap->idmap_im_lock);
384 385
385 ret = mlen; 386 ret = mlen;
386 im->im_status = im_in.im_status; 387 im->im_status = im_in.im_status;
@@ -440,7 +441,7 @@ idmap_pipe_downcall(struct file *filp, const char __user *src, size_t mlen)
440 idmap_update_entry(he, im_in.im_name, namelen_in, im_in.im_id); 441 idmap_update_entry(he, im_in.im_name, namelen_in, im_in.im_id);
441 ret = mlen; 442 ret = mlen;
442out: 443out:
443 up(&idmap->idmap_im_lock); 444 mutex_unlock(&idmap->idmap_im_lock);
444 return ret; 445 return ret;
445} 446}
446 447
@@ -452,10 +453,10 @@ idmap_pipe_destroy_msg(struct rpc_pipe_msg *msg)
452 453
453 if (msg->errno >= 0) 454 if (msg->errno >= 0)
454 return; 455 return;
455 down(&idmap->idmap_im_lock); 456 mutex_lock(&idmap->idmap_im_lock);
456 im->im_status = IDMAP_STATUS_LOOKUPFAIL; 457 im->im_status = IDMAP_STATUS_LOOKUPFAIL;
457 wake_up(&idmap->idmap_wq); 458 wake_up(&idmap->idmap_wq);
458 up(&idmap->idmap_im_lock); 459 mutex_unlock(&idmap->idmap_im_lock);
459} 460}
460 461
461/* 462/*
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 3413996f9a86..2f7656b911b6 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -26,6 +26,7 @@
26#include <linux/unistd.h> 26#include <linux/unistd.h>
27#include <linux/sunrpc/clnt.h> 27#include <linux/sunrpc/clnt.h>
28#include <linux/sunrpc/stats.h> 28#include <linux/sunrpc/stats.h>
29#include <linux/sunrpc/metrics.h>
29#include <linux/nfs_fs.h> 30#include <linux/nfs_fs.h>
30#include <linux/nfs_mount.h> 31#include <linux/nfs_mount.h>
31#include <linux/nfs4_mount.h> 32#include <linux/nfs4_mount.h>
@@ -42,6 +43,7 @@
42#include "nfs4_fs.h" 43#include "nfs4_fs.h"
43#include "callback.h" 44#include "callback.h"
44#include "delegation.h" 45#include "delegation.h"
46#include "iostat.h"
45 47
46#define NFSDBG_FACILITY NFSDBG_VFS 48#define NFSDBG_FACILITY NFSDBG_VFS
47#define NFS_PARANOIA 1 49#define NFS_PARANOIA 1
@@ -65,6 +67,7 @@ static void nfs_clear_inode(struct inode *);
65static void nfs_umount_begin(struct super_block *); 67static void nfs_umount_begin(struct super_block *);
66static int nfs_statfs(struct super_block *, struct kstatfs *); 68static int nfs_statfs(struct super_block *, struct kstatfs *);
67static int nfs_show_options(struct seq_file *, struct vfsmount *); 69static int nfs_show_options(struct seq_file *, struct vfsmount *);
70static int nfs_show_stats(struct seq_file *, struct vfsmount *);
68static void nfs_zap_acl_cache(struct inode *); 71static void nfs_zap_acl_cache(struct inode *);
69 72
70static struct rpc_program nfs_program; 73static struct rpc_program nfs_program;
@@ -78,6 +81,7 @@ static struct super_operations nfs_sops = {
78 .clear_inode = nfs_clear_inode, 81 .clear_inode = nfs_clear_inode,
79 .umount_begin = nfs_umount_begin, 82 .umount_begin = nfs_umount_begin,
80 .show_options = nfs_show_options, 83 .show_options = nfs_show_options,
84 .show_stats = nfs_show_stats,
81}; 85};
82 86
83/* 87/*
@@ -133,7 +137,7 @@ nfs_fattr_to_ino_t(struct nfs_fattr *fattr)
133static int 137static int
134nfs_write_inode(struct inode *inode, int sync) 138nfs_write_inode(struct inode *inode, int sync)
135{ 139{
136 int flags = sync ? FLUSH_WAIT : 0; 140 int flags = sync ? FLUSH_SYNC : 0;
137 int ret; 141 int ret;
138 142
139 ret = nfs_commit_inode(inode, flags); 143 ret = nfs_commit_inode(inode, flags);
@@ -237,7 +241,6 @@ static struct inode *
237nfs_get_root(struct super_block *sb, struct nfs_fh *rootfh, struct nfs_fsinfo *fsinfo) 241nfs_get_root(struct super_block *sb, struct nfs_fh *rootfh, struct nfs_fsinfo *fsinfo)
238{ 242{
239 struct nfs_server *server = NFS_SB(sb); 243 struct nfs_server *server = NFS_SB(sb);
240 struct inode *rooti;
241 int error; 244 int error;
242 245
243 error = server->rpc_ops->getroot(server, rootfh, fsinfo); 246 error = server->rpc_ops->getroot(server, rootfh, fsinfo);
@@ -246,10 +249,7 @@ nfs_get_root(struct super_block *sb, struct nfs_fh *rootfh, struct nfs_fsinfo *f
246 return ERR_PTR(error); 249 return ERR_PTR(error);
247 } 250 }
248 251
249 rooti = nfs_fhget(sb, rootfh, fsinfo->fattr); 252 return nfs_fhget(sb, rootfh, fsinfo->fattr);
250 if (!rooti)
251 return ERR_PTR(-ENOMEM);
252 return rooti;
253} 253}
254 254
255/* 255/*
@@ -277,6 +277,10 @@ nfs_sb_init(struct super_block *sb, rpc_authflavor_t authflavor)
277 277
278 sb->s_magic = NFS_SUPER_MAGIC; 278 sb->s_magic = NFS_SUPER_MAGIC;
279 279
280 server->io_stats = nfs_alloc_iostats();
281 if (server->io_stats == NULL)
282 return -ENOMEM;
283
280 root_inode = nfs_get_root(sb, &server->fh, &fsinfo); 284 root_inode = nfs_get_root(sb, &server->fh, &fsinfo);
281 /* Did getting the root inode fail? */ 285 /* Did getting the root inode fail? */
282 if (IS_ERR(root_inode)) { 286 if (IS_ERR(root_inode)) {
@@ -290,6 +294,9 @@ nfs_sb_init(struct super_block *sb, rpc_authflavor_t authflavor)
290 } 294 }
291 sb->s_root->d_op = server->rpc_ops->dentry_ops; 295 sb->s_root->d_op = server->rpc_ops->dentry_ops;
292 296
297 /* mount time stamp, in seconds */
298 server->mount_time = jiffies;
299
293 /* Get some general file system info */ 300 /* Get some general file system info */
294 if (server->namelen == 0 && 301 if (server->namelen == 0 &&
295 server->rpc_ops->pathconf(server, &server->fh, &pathinfo) >= 0) 302 server->rpc_ops->pathconf(server, &server->fh, &pathinfo) >= 0)
@@ -396,6 +403,9 @@ nfs_create_client(struct nfs_server *server, const struct nfs_mount_data *data)
396 403
397 nfs_init_timeout_values(&timeparms, proto, data->timeo, data->retrans); 404 nfs_init_timeout_values(&timeparms, proto, data->timeo, data->retrans);
398 405
406 server->retrans_timeo = timeparms.to_initval;
407 server->retrans_count = timeparms.to_retries;
408
399 /* create transport and client */ 409 /* create transport and client */
400 xprt = xprt_create_proto(proto, &server->addr, &timeparms); 410 xprt = xprt_create_proto(proto, &server->addr, &timeparms);
401 if (IS_ERR(xprt)) { 411 if (IS_ERR(xprt)) {
@@ -579,7 +589,7 @@ nfs_statfs(struct super_block *sb, struct kstatfs *buf)
579 589
580} 590}
581 591
582static int nfs_show_options(struct seq_file *m, struct vfsmount *mnt) 592static void nfs_show_mount_options(struct seq_file *m, struct nfs_server *nfss, int showdefaults)
583{ 593{
584 static struct proc_nfs_info { 594 static struct proc_nfs_info {
585 int flag; 595 int flag;
@@ -588,28 +598,26 @@ static int nfs_show_options(struct seq_file *m, struct vfsmount *mnt)
588 } nfs_info[] = { 598 } nfs_info[] = {
589 { NFS_MOUNT_SOFT, ",soft", ",hard" }, 599 { NFS_MOUNT_SOFT, ",soft", ",hard" },
590 { NFS_MOUNT_INTR, ",intr", "" }, 600 { NFS_MOUNT_INTR, ",intr", "" },
591 { NFS_MOUNT_POSIX, ",posix", "" },
592 { NFS_MOUNT_NOCTO, ",nocto", "" }, 601 { NFS_MOUNT_NOCTO, ",nocto", "" },
593 { NFS_MOUNT_NOAC, ",noac", "" }, 602 { NFS_MOUNT_NOAC, ",noac", "" },
594 { NFS_MOUNT_NONLM, ",nolock", ",lock" }, 603 { NFS_MOUNT_NONLM, ",nolock", "" },
595 { NFS_MOUNT_NOACL, ",noacl", "" }, 604 { NFS_MOUNT_NOACL, ",noacl", "" },
596 { 0, NULL, NULL } 605 { 0, NULL, NULL }
597 }; 606 };
598 struct proc_nfs_info *nfs_infop; 607 struct proc_nfs_info *nfs_infop;
599 struct nfs_server *nfss = NFS_SB(mnt->mnt_sb);
600 char buf[12]; 608 char buf[12];
601 char *proto; 609 char *proto;
602 610
603 seq_printf(m, ",v%d", nfss->rpc_ops->version); 611 seq_printf(m, ",vers=%d", nfss->rpc_ops->version);
604 seq_printf(m, ",rsize=%d", nfss->rsize); 612 seq_printf(m, ",rsize=%d", nfss->rsize);
605 seq_printf(m, ",wsize=%d", nfss->wsize); 613 seq_printf(m, ",wsize=%d", nfss->wsize);
606 if (nfss->acregmin != 3*HZ) 614 if (nfss->acregmin != 3*HZ || showdefaults)
607 seq_printf(m, ",acregmin=%d", nfss->acregmin/HZ); 615 seq_printf(m, ",acregmin=%d", nfss->acregmin/HZ);
608 if (nfss->acregmax != 60*HZ) 616 if (nfss->acregmax != 60*HZ || showdefaults)
609 seq_printf(m, ",acregmax=%d", nfss->acregmax/HZ); 617 seq_printf(m, ",acregmax=%d", nfss->acregmax/HZ);
610 if (nfss->acdirmin != 30*HZ) 618 if (nfss->acdirmin != 30*HZ || showdefaults)
611 seq_printf(m, ",acdirmin=%d", nfss->acdirmin/HZ); 619 seq_printf(m, ",acdirmin=%d", nfss->acdirmin/HZ);
612 if (nfss->acdirmax != 60*HZ) 620 if (nfss->acdirmax != 60*HZ || showdefaults)
613 seq_printf(m, ",acdirmax=%d", nfss->acdirmax/HZ); 621 seq_printf(m, ",acdirmax=%d", nfss->acdirmax/HZ);
614 for (nfs_infop = nfs_info; nfs_infop->flag; nfs_infop++) { 622 for (nfs_infop = nfs_info; nfs_infop->flag; nfs_infop++) {
615 if (nfss->flags & nfs_infop->flag) 623 if (nfss->flags & nfs_infop->flag)
@@ -629,8 +637,96 @@ static int nfs_show_options(struct seq_file *m, struct vfsmount *mnt)
629 proto = buf; 637 proto = buf;
630 } 638 }
631 seq_printf(m, ",proto=%s", proto); 639 seq_printf(m, ",proto=%s", proto);
640 seq_printf(m, ",timeo=%lu", 10U * nfss->retrans_timeo / HZ);
641 seq_printf(m, ",retrans=%u", nfss->retrans_count);
642}
643
644static int nfs_show_options(struct seq_file *m, struct vfsmount *mnt)
645{
646 struct nfs_server *nfss = NFS_SB(mnt->mnt_sb);
647
648 nfs_show_mount_options(m, nfss, 0);
649
632 seq_puts(m, ",addr="); 650 seq_puts(m, ",addr=");
633 seq_escape(m, nfss->hostname, " \t\n\\"); 651 seq_escape(m, nfss->hostname, " \t\n\\");
652
653 return 0;
654}
655
656static int nfs_show_stats(struct seq_file *m, struct vfsmount *mnt)
657{
658 int i, cpu;
659 struct nfs_server *nfss = NFS_SB(mnt->mnt_sb);
660 struct rpc_auth *auth = nfss->client->cl_auth;
661 struct nfs_iostats totals = { };
662
663 seq_printf(m, "statvers=%s", NFS_IOSTAT_VERS);
664
665 /*
666 * Display all mount option settings
667 */
668 seq_printf(m, "\n\topts:\t");
669 seq_puts(m, mnt->mnt_sb->s_flags & MS_RDONLY ? "ro" : "rw");
670 seq_puts(m, mnt->mnt_sb->s_flags & MS_SYNCHRONOUS ? ",sync" : "");
671 seq_puts(m, mnt->mnt_sb->s_flags & MS_NOATIME ? ",noatime" : "");
672 seq_puts(m, mnt->mnt_sb->s_flags & MS_NODIRATIME ? ",nodiratime" : "");
673 nfs_show_mount_options(m, nfss, 1);
674
675 seq_printf(m, "\n\tage:\t%lu", (jiffies - nfss->mount_time) / HZ);
676
677 seq_printf(m, "\n\tcaps:\t");
678 seq_printf(m, "caps=0x%x", nfss->caps);
679 seq_printf(m, ",wtmult=%d", nfss->wtmult);
680 seq_printf(m, ",dtsize=%d", nfss->dtsize);
681 seq_printf(m, ",bsize=%d", nfss->bsize);
682 seq_printf(m, ",namelen=%d", nfss->namelen);
683
684#ifdef CONFIG_NFS_V4
685 if (nfss->rpc_ops->version == 4) {
686 seq_printf(m, "\n\tnfsv4:\t");
687 seq_printf(m, "bm0=0x%x", nfss->attr_bitmask[0]);
688 seq_printf(m, ",bm1=0x%x", nfss->attr_bitmask[1]);
689 seq_printf(m, ",acl=0x%x", nfss->acl_bitmask);
690 }
691#endif
692
693 /*
694 * Display security flavor in effect for this mount
695 */
696 seq_printf(m, "\n\tsec:\tflavor=%d", auth->au_ops->au_flavor);
697 if (auth->au_flavor)
698 seq_printf(m, ",pseudoflavor=%d", auth->au_flavor);
699
700 /*
701 * Display superblock I/O counters
702 */
703 for (cpu = 0; cpu < NR_CPUS; cpu++) {
704 struct nfs_iostats *stats;
705
706 if (!cpu_possible(cpu))
707 continue;
708
709 preempt_disable();
710 stats = per_cpu_ptr(nfss->io_stats, cpu);
711
712 for (i = 0; i < __NFSIOS_COUNTSMAX; i++)
713 totals.events[i] += stats->events[i];
714 for (i = 0; i < __NFSIOS_BYTESMAX; i++)
715 totals.bytes[i] += stats->bytes[i];
716
717 preempt_enable();
718 }
719
720 seq_printf(m, "\n\tevents:\t");
721 for (i = 0; i < __NFSIOS_COUNTSMAX; i++)
722 seq_printf(m, "%lu ", totals.events[i]);
723 seq_printf(m, "\n\tbytes:\t");
724 for (i = 0; i < __NFSIOS_BYTESMAX; i++)
725 seq_printf(m, "%Lu ", totals.bytes[i]);
726 seq_printf(m, "\n");
727
728 rpc_print_iostats(m, nfss->client);
729
634 return 0; 730 return 0;
635} 731}
636 732
@@ -660,6 +756,8 @@ static void nfs_zap_caches_locked(struct inode *inode)
660 struct nfs_inode *nfsi = NFS_I(inode); 756 struct nfs_inode *nfsi = NFS_I(inode);
661 int mode = inode->i_mode; 757 int mode = inode->i_mode;
662 758
759 nfs_inc_stats(inode, NFSIOS_ATTRINVALIDATE);
760
663 NFS_ATTRTIMEO(inode) = NFS_MINATTRTIMEO(inode); 761 NFS_ATTRTIMEO(inode) = NFS_MINATTRTIMEO(inode);
664 NFS_ATTRTIMEO_UPDATE(inode) = jiffies; 762 NFS_ATTRTIMEO_UPDATE(inode) = jiffies;
665 763
@@ -751,7 +849,7 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr)
751 .fh = fh, 849 .fh = fh,
752 .fattr = fattr 850 .fattr = fattr
753 }; 851 };
754 struct inode *inode = NULL; 852 struct inode *inode = ERR_PTR(-ENOENT);
755 unsigned long hash; 853 unsigned long hash;
756 854
757 if ((fattr->valid & NFS_ATTR_FATTR) == 0) 855 if ((fattr->valid & NFS_ATTR_FATTR) == 0)
@@ -764,8 +862,11 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr)
764 862
765 hash = nfs_fattr_to_ino_t(fattr); 863 hash = nfs_fattr_to_ino_t(fattr);
766 864
767 if (!(inode = iget5_locked(sb, hash, nfs_find_actor, nfs_init_locked, &desc))) 865 inode = iget5_locked(sb, hash, nfs_find_actor, nfs_init_locked, &desc);
866 if (inode == NULL) {
867 inode = ERR_PTR(-ENOMEM);
768 goto out_no_inode; 868 goto out_no_inode;
869 }
769 870
770 if (inode->i_state & I_NEW) { 871 if (inode->i_state & I_NEW) {
771 struct nfs_inode *nfsi = NFS_I(inode); 872 struct nfs_inode *nfsi = NFS_I(inode);
@@ -834,7 +935,7 @@ out:
834 return inode; 935 return inode;
835 936
836out_no_inode: 937out_no_inode:
837 printk("nfs_fhget: iget failed\n"); 938 dprintk("nfs_fhget: iget failed with error %ld\n", PTR_ERR(inode));
838 goto out; 939 goto out;
839} 940}
840 941
@@ -847,6 +948,8 @@ nfs_setattr(struct dentry *dentry, struct iattr *attr)
847 struct nfs_fattr fattr; 948 struct nfs_fattr fattr;
848 int error; 949 int error;
849 950
951 nfs_inc_stats(inode, NFSIOS_VFSSETATTR);
952
850 if (attr->ia_valid & ATTR_SIZE) { 953 if (attr->ia_valid & ATTR_SIZE) {
851 if (!S_ISREG(inode->i_mode) || attr->ia_size == i_size_read(inode)) 954 if (!S_ISREG(inode->i_mode) || attr->ia_size == i_size_read(inode))
852 attr->ia_valid &= ~ATTR_SIZE; 955 attr->ia_valid &= ~ATTR_SIZE;
@@ -859,11 +962,9 @@ nfs_setattr(struct dentry *dentry, struct iattr *attr)
859 962
860 lock_kernel(); 963 lock_kernel();
861 nfs_begin_data_update(inode); 964 nfs_begin_data_update(inode);
862 /* Write all dirty data if we're changing file permissions or size */ 965 /* Write all dirty data */
863 if ((attr->ia_valid & (ATTR_MODE|ATTR_UID|ATTR_GID|ATTR_SIZE)) != 0) { 966 filemap_write_and_wait(inode->i_mapping);
864 filemap_write_and_wait(inode->i_mapping); 967 nfs_wb_all(inode);
865 nfs_wb_all(inode);
866 }
867 /* 968 /*
868 * Return any delegations if we're going to change ACLs 969 * Return any delegations if we're going to change ACLs
869 */ 970 */
@@ -902,6 +1003,7 @@ void nfs_setattr_update_inode(struct inode *inode, struct iattr *attr)
902 spin_unlock(&inode->i_lock); 1003 spin_unlock(&inode->i_lock);
903 } 1004 }
904 if ((attr->ia_valid & ATTR_SIZE) != 0) { 1005 if ((attr->ia_valid & ATTR_SIZE) != 0) {
1006 nfs_inc_stats(inode, NFSIOS_SETATTRTRUNC);
905 inode->i_size = attr->ia_size; 1007 inode->i_size = attr->ia_size;
906 vmtruncate(inode, attr->ia_size); 1008 vmtruncate(inode, attr->ia_size);
907 } 1009 }
@@ -949,7 +1051,7 @@ int nfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
949 int err; 1051 int err;
950 1052
951 /* Flush out writes to the server in order to update c/mtime */ 1053 /* Flush out writes to the server in order to update c/mtime */
952 nfs_sync_inode(inode, 0, 0, FLUSH_WAIT|FLUSH_NOCOMMIT); 1054 nfs_sync_inode_wait(inode, 0, 0, FLUSH_NOCOMMIT);
953 1055
954 /* 1056 /*
955 * We may force a getattr if the user cares about atime. 1057 * We may force a getattr if the user cares about atime.
@@ -973,7 +1075,7 @@ int nfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
973 return err; 1075 return err;
974} 1076}
975 1077
976struct nfs_open_context *alloc_nfs_open_context(struct dentry *dentry, struct rpc_cred *cred) 1078static struct nfs_open_context *alloc_nfs_open_context(struct vfsmount *mnt, struct dentry *dentry, struct rpc_cred *cred)
977{ 1079{
978 struct nfs_open_context *ctx; 1080 struct nfs_open_context *ctx;
979 1081
@@ -981,6 +1083,7 @@ struct nfs_open_context *alloc_nfs_open_context(struct dentry *dentry, struct rp
981 if (ctx != NULL) { 1083 if (ctx != NULL) {
982 atomic_set(&ctx->count, 1); 1084 atomic_set(&ctx->count, 1);
983 ctx->dentry = dget(dentry); 1085 ctx->dentry = dget(dentry);
1086 ctx->vfsmnt = mntget(mnt);
984 ctx->cred = get_rpccred(cred); 1087 ctx->cred = get_rpccred(cred);
985 ctx->state = NULL; 1088 ctx->state = NULL;
986 ctx->lockowner = current->files; 1089 ctx->lockowner = current->files;
@@ -1011,6 +1114,7 @@ void put_nfs_open_context(struct nfs_open_context *ctx)
1011 if (ctx->cred != NULL) 1114 if (ctx->cred != NULL)
1012 put_rpccred(ctx->cred); 1115 put_rpccred(ctx->cred);
1013 dput(ctx->dentry); 1116 dput(ctx->dentry);
1117 mntput(ctx->vfsmnt);
1014 kfree(ctx); 1118 kfree(ctx);
1015 } 1119 }
1016} 1120}
@@ -1019,7 +1123,7 @@ void put_nfs_open_context(struct nfs_open_context *ctx)
1019 * Ensure that mmap has a recent RPC credential for use when writing out 1123 * Ensure that mmap has a recent RPC credential for use when writing out
1020 * shared pages 1124 * shared pages
1021 */ 1125 */
1022void nfs_file_set_open_context(struct file *filp, struct nfs_open_context *ctx) 1126static void nfs_file_set_open_context(struct file *filp, struct nfs_open_context *ctx)
1023{ 1127{
1024 struct inode *inode = filp->f_dentry->d_inode; 1128 struct inode *inode = filp->f_dentry->d_inode;
1025 struct nfs_inode *nfsi = NFS_I(inode); 1129 struct nfs_inode *nfsi = NFS_I(inode);
@@ -1051,7 +1155,7 @@ struct nfs_open_context *nfs_find_open_context(struct inode *inode, struct rpc_c
1051 return ctx; 1155 return ctx;
1052} 1156}
1053 1157
1054void nfs_file_clear_open_context(struct file *filp) 1158static void nfs_file_clear_open_context(struct file *filp)
1055{ 1159{
1056 struct inode *inode = filp->f_dentry->d_inode; 1160 struct inode *inode = filp->f_dentry->d_inode;
1057 struct nfs_open_context *ctx = (struct nfs_open_context *)filp->private_data; 1161 struct nfs_open_context *ctx = (struct nfs_open_context *)filp->private_data;
@@ -1076,7 +1180,7 @@ int nfs_open(struct inode *inode, struct file *filp)
1076 cred = rpcauth_lookupcred(NFS_CLIENT(inode)->cl_auth, 0); 1180 cred = rpcauth_lookupcred(NFS_CLIENT(inode)->cl_auth, 0);
1077 if (IS_ERR(cred)) 1181 if (IS_ERR(cred))
1078 return PTR_ERR(cred); 1182 return PTR_ERR(cred);
1079 ctx = alloc_nfs_open_context(filp->f_dentry, cred); 1183 ctx = alloc_nfs_open_context(filp->f_vfsmnt, filp->f_dentry, cred);
1080 put_rpccred(cred); 1184 put_rpccred(cred);
1081 if (ctx == NULL) 1185 if (ctx == NULL)
1082 return -ENOMEM; 1186 return -ENOMEM;
@@ -1185,6 +1289,7 @@ int nfs_attribute_timeout(struct inode *inode)
1185 */ 1289 */
1186int nfs_revalidate_inode(struct nfs_server *server, struct inode *inode) 1290int nfs_revalidate_inode(struct nfs_server *server, struct inode *inode)
1187{ 1291{
1292 nfs_inc_stats(inode, NFSIOS_INODEREVALIDATE);
1188 if (!(NFS_I(inode)->cache_validity & (NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA)) 1293 if (!(NFS_I(inode)->cache_validity & (NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA))
1189 && !nfs_attribute_timeout(inode)) 1294 && !nfs_attribute_timeout(inode))
1190 return NFS_STALE(inode) ? -ESTALE : 0; 1295 return NFS_STALE(inode) ? -ESTALE : 0;
@@ -1201,6 +1306,7 @@ void nfs_revalidate_mapping(struct inode *inode, struct address_space *mapping)
1201 struct nfs_inode *nfsi = NFS_I(inode); 1306 struct nfs_inode *nfsi = NFS_I(inode);
1202 1307
1203 if (nfsi->cache_validity & NFS_INO_INVALID_DATA) { 1308 if (nfsi->cache_validity & NFS_INO_INVALID_DATA) {
1309 nfs_inc_stats(inode, NFSIOS_DATAINVALIDATE);
1204 if (S_ISREG(inode->i_mode)) 1310 if (S_ISREG(inode->i_mode))
1205 nfs_sync_mapping(mapping); 1311 nfs_sync_mapping(mapping);
1206 invalidate_inode_pages2(mapping); 1312 invalidate_inode_pages2(mapping);
@@ -1299,39 +1405,37 @@ static int nfs_check_inode_attributes(struct inode *inode, struct nfs_fattr *fat
1299 if ((fattr->valid & NFS_ATTR_FATTR) == 0) 1405 if ((fattr->valid & NFS_ATTR_FATTR) == 0)
1300 return 0; 1406 return 0;
1301 1407
1408 /* Has the inode gone and changed behind our back? */
1409 if (nfsi->fileid != fattr->fileid
1410 || (inode->i_mode & S_IFMT) != (fattr->mode & S_IFMT)) {
1411 return -EIO;
1412 }
1413
1302 /* Are we in the process of updating data on the server? */ 1414 /* Are we in the process of updating data on the server? */
1303 data_unstable = nfs_caches_unstable(inode); 1415 data_unstable = nfs_caches_unstable(inode);
1304 1416
1305 /* Do atomic weak cache consistency updates */ 1417 /* Do atomic weak cache consistency updates */
1306 nfs_wcc_update_inode(inode, fattr); 1418 nfs_wcc_update_inode(inode, fattr);
1307 1419
1308 if ((fattr->valid & NFS_ATTR_FATTR_V4) != 0 && 1420 if ((fattr->valid & NFS_ATTR_FATTR_V4) != 0) {
1309 nfsi->change_attr != fattr->change_attr) { 1421 if (nfsi->change_attr == fattr->change_attr)
1422 goto out;
1310 nfsi->cache_validity |= NFS_INO_INVALID_ATTR; 1423 nfsi->cache_validity |= NFS_INO_INVALID_ATTR;
1311 if (!data_unstable) 1424 if (!data_unstable)
1312 nfsi->cache_validity |= NFS_INO_REVAL_PAGECACHE; 1425 nfsi->cache_validity |= NFS_INO_REVAL_PAGECACHE;
1313 } 1426 }
1314 1427
1315 /* Has the inode gone and changed behind our back? */
1316 if (nfsi->fileid != fattr->fileid
1317 || (inode->i_mode & S_IFMT) != (fattr->mode & S_IFMT)) {
1318 return -EIO;
1319 }
1320
1321 cur_size = i_size_read(inode);
1322 new_isize = nfs_size_to_loff_t(fattr->size);
1323
1324 /* Verify a few of the more important attributes */ 1428 /* Verify a few of the more important attributes */
1325 if (!timespec_equal(&inode->i_mtime, &fattr->mtime)) { 1429 if (!timespec_equal(&inode->i_mtime, &fattr->mtime)) {
1326 nfsi->cache_validity |= NFS_INO_INVALID_ATTR; 1430 nfsi->cache_validity |= NFS_INO_INVALID_ATTR;
1327 if (!data_unstable) 1431 if (!data_unstable)
1328 nfsi->cache_validity |= NFS_INO_REVAL_PAGECACHE; 1432 nfsi->cache_validity |= NFS_INO_REVAL_PAGECACHE;
1329 } 1433 }
1330 if (cur_size != new_isize) { 1434
1331 nfsi->cache_validity |= NFS_INO_INVALID_ATTR; 1435 cur_size = i_size_read(inode);
1332 if (nfsi->npages == 0) 1436 new_isize = nfs_size_to_loff_t(fattr->size);
1333 nfsi->cache_validity |= NFS_INO_REVAL_PAGECACHE; 1437 if (cur_size != new_isize && nfsi->npages == 0)
1334 } 1438 nfsi->cache_validity |= NFS_INO_INVALID_ATTR|NFS_INO_REVAL_PAGECACHE;
1335 1439
1336 /* Have any file permissions changed? */ 1440 /* Have any file permissions changed? */
1337 if ((inode->i_mode & S_IALLUGO) != (fattr->mode & S_IALLUGO) 1441 if ((inode->i_mode & S_IALLUGO) != (fattr->mode & S_IALLUGO)
@@ -1343,6 +1447,7 @@ static int nfs_check_inode_attributes(struct inode *inode, struct nfs_fattr *fat
1343 if (inode->i_nlink != fattr->nlink) 1447 if (inode->i_nlink != fattr->nlink)
1344 nfsi->cache_validity |= NFS_INO_INVALID_ATTR; 1448 nfsi->cache_validity |= NFS_INO_INVALID_ATTR;
1345 1449
1450out:
1346 if (!timespec_equal(&inode->i_atime, &fattr->atime)) 1451 if (!timespec_equal(&inode->i_atime, &fattr->atime))
1347 nfsi->cache_validity |= NFS_INO_INVALID_ATIME; 1452 nfsi->cache_validity |= NFS_INO_INVALID_ATIME;
1348 1453
@@ -1481,15 +1586,6 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
1481 nfsi->cache_change_attribute = jiffies; 1586 nfsi->cache_change_attribute = jiffies;
1482 } 1587 }
1483 1588
1484 if ((fattr->valid & NFS_ATTR_FATTR_V4)
1485 && nfsi->change_attr != fattr->change_attr) {
1486 dprintk("NFS: change_attr change on server for file %s/%ld\n",
1487 inode->i_sb->s_id, inode->i_ino);
1488 nfsi->change_attr = fattr->change_attr;
1489 invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL;
1490 nfsi->cache_change_attribute = jiffies;
1491 }
1492
1493 /* If ctime has changed we should definitely clear access+acl caches */ 1589 /* If ctime has changed we should definitely clear access+acl caches */
1494 if (!timespec_equal(&inode->i_ctime, &fattr->ctime)) { 1590 if (!timespec_equal(&inode->i_ctime, &fattr->ctime)) {
1495 invalid |= NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL; 1591 invalid |= NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL;
@@ -1519,8 +1615,20 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
1519 inode->i_blksize = fattr->du.nfs2.blocksize; 1615 inode->i_blksize = fattr->du.nfs2.blocksize;
1520 } 1616 }
1521 1617
1618 if ((fattr->valid & NFS_ATTR_FATTR_V4)) {
1619 if (nfsi->change_attr != fattr->change_attr) {
1620 dprintk("NFS: change_attr change on server for file %s/%ld\n",
1621 inode->i_sb->s_id, inode->i_ino);
1622 nfsi->change_attr = fattr->change_attr;
1623 invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL;
1624 nfsi->cache_change_attribute = jiffies;
1625 } else
1626 invalid &= ~(NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA);
1627 }
1628
1522 /* Update attrtimeo value if we're out of the unstable period */ 1629 /* Update attrtimeo value if we're out of the unstable period */
1523 if (invalid & NFS_INO_INVALID_ATTR) { 1630 if (invalid & NFS_INO_INVALID_ATTR) {
1631 nfs_inc_stats(inode, NFSIOS_ATTRINVALIDATE);
1524 nfsi->attrtimeo = NFS_MINATTRTIMEO(inode); 1632 nfsi->attrtimeo = NFS_MINATTRTIMEO(inode);
1525 nfsi->attrtimeo_timestamp = jiffies; 1633 nfsi->attrtimeo_timestamp = jiffies;
1526 } else if (time_after(jiffies, nfsi->attrtimeo_timestamp+nfsi->attrtimeo)) { 1634 } else if (time_after(jiffies, nfsi->attrtimeo_timestamp+nfsi->attrtimeo)) {
@@ -1637,10 +1745,9 @@ static struct super_block *nfs_get_sb(struct file_system_type *fs_type,
1637#endif /* CONFIG_NFS_V3 */ 1745#endif /* CONFIG_NFS_V3 */
1638 1746
1639 s = ERR_PTR(-ENOMEM); 1747 s = ERR_PTR(-ENOMEM);
1640 server = kmalloc(sizeof(struct nfs_server), GFP_KERNEL); 1748 server = kzalloc(sizeof(struct nfs_server), GFP_KERNEL);
1641 if (!server) 1749 if (!server)
1642 goto out_err; 1750 goto out_err;
1643 memset(server, 0, sizeof(struct nfs_server));
1644 /* Zero out the NFS state stuff */ 1751 /* Zero out the NFS state stuff */
1645 init_nfsv4_state(server); 1752 init_nfsv4_state(server);
1646 server->client = server->client_sys = server->client_acl = ERR_PTR(-EINVAL); 1753 server->client = server->client_sys = server->client_acl = ERR_PTR(-EINVAL);
@@ -1712,6 +1819,7 @@ static void nfs_kill_super(struct super_block *s)
1712 1819
1713 rpciod_down(); /* release rpciod */ 1820 rpciod_down(); /* release rpciod */
1714 1821
1822 nfs_free_iostats(server->io_stats);
1715 kfree(server->hostname); 1823 kfree(server->hostname);
1716 kfree(server); 1824 kfree(server);
1717} 1825}
@@ -1738,6 +1846,7 @@ static struct super_operations nfs4_sops = {
1738 .clear_inode = nfs4_clear_inode, 1846 .clear_inode = nfs4_clear_inode,
1739 .umount_begin = nfs_umount_begin, 1847 .umount_begin = nfs_umount_begin,
1740 .show_options = nfs_show_options, 1848 .show_options = nfs_show_options,
1849 .show_stats = nfs_show_stats,
1741}; 1850};
1742 1851
1743/* 1852/*
@@ -1800,6 +1909,9 @@ static int nfs4_fill_super(struct super_block *sb, struct nfs4_mount_data *data,
1800 1909
1801 nfs_init_timeout_values(&timeparms, data->proto, data->timeo, data->retrans); 1910 nfs_init_timeout_values(&timeparms, data->proto, data->timeo, data->retrans);
1802 1911
1912 server->retrans_timeo = timeparms.to_initval;
1913 server->retrans_count = timeparms.to_retries;
1914
1803 clp = nfs4_get_client(&server->addr.sin_addr); 1915 clp = nfs4_get_client(&server->addr.sin_addr);
1804 if (!clp) { 1916 if (!clp) {
1805 dprintk("%s: failed to create NFS4 client.\n", __FUNCTION__); 1917 dprintk("%s: failed to create NFS4 client.\n", __FUNCTION__);
@@ -1941,10 +2053,9 @@ static struct super_block *nfs4_get_sb(struct file_system_type *fs_type,
1941 return ERR_PTR(-EINVAL); 2053 return ERR_PTR(-EINVAL);
1942 } 2054 }
1943 2055
1944 server = kmalloc(sizeof(struct nfs_server), GFP_KERNEL); 2056 server = kzalloc(sizeof(struct nfs_server), GFP_KERNEL);
1945 if (!server) 2057 if (!server)
1946 return ERR_PTR(-ENOMEM); 2058 return ERR_PTR(-ENOMEM);
1947 memset(server, 0, sizeof(struct nfs_server));
1948 /* Zero out the NFS state stuff */ 2059 /* Zero out the NFS state stuff */
1949 init_nfsv4_state(server); 2060 init_nfsv4_state(server);
1950 server->client = server->client_sys = server->client_acl = ERR_PTR(-EINVAL); 2061 server->client = server->client_sys = server->client_acl = ERR_PTR(-EINVAL);
@@ -2024,10 +2135,12 @@ static void nfs4_kill_super(struct super_block *sb)
2024 2135
2025 if (server->client != NULL && !IS_ERR(server->client)) 2136 if (server->client != NULL && !IS_ERR(server->client))
2026 rpc_shutdown_client(server->client); 2137 rpc_shutdown_client(server->client);
2027 rpciod_down(); /* release rpciod */
2028 2138
2029 destroy_nfsv4_state(server); 2139 destroy_nfsv4_state(server);
2030 2140
2141 rpciod_down();
2142
2143 nfs_free_iostats(server->io_stats);
2031 kfree(server->hostname); 2144 kfree(server->hostname);
2032 kfree(server); 2145 kfree(server);
2033} 2146}
diff --git a/fs/nfs/iostat.h b/fs/nfs/iostat.h
new file mode 100644
index 000000000000..6350ecbde589
--- /dev/null
+++ b/fs/nfs/iostat.h
@@ -0,0 +1,164 @@
1/*
2 * linux/fs/nfs/iostat.h
3 *
4 * Declarations for NFS client per-mount statistics
5 *
6 * Copyright (C) 2005, 2006 Chuck Lever <cel@netapp.com>
7 *
8 * NFS client per-mount statistics provide information about the health of
9 * the NFS client and the health of each NFS mount point. Generally these
10 * are not for detailed problem diagnosis, but simply to indicate that there
11 * is a problem.
12 *
13 * These counters are not meant to be human-readable, but are meant to be
14 * integrated into system monitoring tools such as "sar" and "iostat". As
15 * such, the counters are sampled by the tools over time, and are never
16 * zeroed after a file system is mounted. Moving averages can be computed
17 * by the tools by taking the difference between two instantaneous samples
18 * and dividing that by the time between the samples.
19 */
20
21#ifndef _NFS_IOSTAT
22#define _NFS_IOSTAT
23
24#define NFS_IOSTAT_VERS "1.0"
25
26/*
27 * NFS byte counters
28 *
29 * 1. SERVER - the number of payload bytes read from or written to the
30 * server by the NFS client via an NFS READ or WRITE request.
31 *
32 * 2. NORMAL - the number of bytes read or written by applications via
33 * the read(2) and write(2) system call interfaces.
34 *
35 * 3. DIRECT - the number of bytes read or written from files opened
36 * with the O_DIRECT flag.
37 *
38 * These counters give a view of the data throughput into and out of the NFS
39 * client. Comparing the number of bytes requested by an application with the
40 * number of bytes the client requests from the server can provide an
41 * indication of client efficiency (per-op, cache hits, etc).
42 *
43 * These counters can also help characterize which access methods are in
44 * use. DIRECT by itself shows whether there is any O_DIRECT traffic.
45 * NORMAL + DIRECT shows how much data is going through the system call
46 * interface. A large amount of SERVER traffic without much NORMAL or
47 * DIRECT traffic shows that applications are using mapped files.
48 *
49 * NFS page counters
50 *
51 * These count the number of pages read or written via nfs_readpage(),
52 * nfs_readpages(), or their write equivalents.
53 */
54enum nfs_stat_bytecounters {
55 NFSIOS_NORMALREADBYTES = 0,
56 NFSIOS_NORMALWRITTENBYTES,
57 NFSIOS_DIRECTREADBYTES,
58 NFSIOS_DIRECTWRITTENBYTES,
59 NFSIOS_SERVERREADBYTES,
60 NFSIOS_SERVERWRITTENBYTES,
61 NFSIOS_READPAGES,
62 NFSIOS_WRITEPAGES,
63 __NFSIOS_BYTESMAX,
64};
65
66/*
67 * NFS event counters
68 *
69 * These counters provide a low-overhead way of monitoring client activity
70 * without enabling NFS trace debugging. The counters show the rate at
71 * which VFS requests are made, and how often the client invalidates its
72 * data and attribute caches. This allows system administrators to monitor
73 * such things as how close-to-open is working, and answer questions such
74 * as "why are there so many GETATTR requests on the wire?"
75 *
76 * They also count anamolous events such as short reads and writes, silly
77 * renames due to close-after-delete, and operations that change the size
78 * of a file (such operations can often be the source of data corruption
79 * if applications aren't using file locking properly).
80 */
81enum nfs_stat_eventcounters {
82 NFSIOS_INODEREVALIDATE = 0,
83 NFSIOS_DENTRYREVALIDATE,
84 NFSIOS_DATAINVALIDATE,
85 NFSIOS_ATTRINVALIDATE,
86 NFSIOS_VFSOPEN,
87 NFSIOS_VFSLOOKUP,
88 NFSIOS_VFSACCESS,
89 NFSIOS_VFSUPDATEPAGE,
90 NFSIOS_VFSREADPAGE,
91 NFSIOS_VFSREADPAGES,
92 NFSIOS_VFSWRITEPAGE,
93 NFSIOS_VFSWRITEPAGES,
94 NFSIOS_VFSGETDENTS,
95 NFSIOS_VFSSETATTR,
96 NFSIOS_VFSFLUSH,
97 NFSIOS_VFSFSYNC,
98 NFSIOS_VFSLOCK,
99 NFSIOS_VFSRELEASE,
100 NFSIOS_CONGESTIONWAIT,
101 NFSIOS_SETATTRTRUNC,
102 NFSIOS_EXTENDWRITE,
103 NFSIOS_SILLYRENAME,
104 NFSIOS_SHORTREAD,
105 NFSIOS_SHORTWRITE,
106 NFSIOS_DELAY,
107 __NFSIOS_COUNTSMAX,
108};
109
110#ifdef __KERNEL__
111
112#include <linux/percpu.h>
113#include <linux/cache.h>
114
115struct nfs_iostats {
116 unsigned long long bytes[__NFSIOS_BYTESMAX];
117 unsigned long events[__NFSIOS_COUNTSMAX];
118} ____cacheline_aligned;
119
120static inline void nfs_inc_server_stats(struct nfs_server *server, enum nfs_stat_eventcounters stat)
121{
122 struct nfs_iostats *iostats;
123 int cpu;
124
125 cpu = get_cpu();
126 iostats = per_cpu_ptr(server->io_stats, cpu);
127 iostats->events[stat] ++;
128 put_cpu_no_resched();
129}
130
131static inline void nfs_inc_stats(struct inode *inode, enum nfs_stat_eventcounters stat)
132{
133 nfs_inc_server_stats(NFS_SERVER(inode), stat);
134}
135
136static inline void nfs_add_server_stats(struct nfs_server *server, enum nfs_stat_bytecounters stat, unsigned long addend)
137{
138 struct nfs_iostats *iostats;
139 int cpu;
140
141 cpu = get_cpu();
142 iostats = per_cpu_ptr(server->io_stats, cpu);
143 iostats->bytes[stat] += addend;
144 put_cpu_no_resched();
145}
146
147static inline void nfs_add_stats(struct inode *inode, enum nfs_stat_bytecounters stat, unsigned long addend)
148{
149 nfs_add_server_stats(NFS_SERVER(inode), stat, addend);
150}
151
152static inline struct nfs_iostats *nfs_alloc_iostats(void)
153{
154 return alloc_percpu(struct nfs_iostats);
155}
156
157static inline void nfs_free_iostats(struct nfs_iostats *stats)
158{
159 if (stats != NULL)
160 free_percpu(stats);
161}
162
163#endif
164#endif
diff --git a/fs/nfs/mount_clnt.c b/fs/nfs/mount_clnt.c
index 0b9a78353d6e..445abb4d4214 100644
--- a/fs/nfs/mount_clnt.c
+++ b/fs/nfs/mount_clnt.c
@@ -49,9 +49,12 @@ nfsroot_mount(struct sockaddr_in *addr, char *path, struct nfs_fh *fh,
49 struct mnt_fhstatus result = { 49 struct mnt_fhstatus result = {
50 .fh = fh 50 .fh = fh
51 }; 51 };
52 struct rpc_message msg = {
53 .rpc_argp = path,
54 .rpc_resp = &result,
55 };
52 char hostname[32]; 56 char hostname[32];
53 int status; 57 int status;
54 int call;
55 58
56 dprintk("NFS: nfs_mount(%08x:%s)\n", 59 dprintk("NFS: nfs_mount(%08x:%s)\n",
57 (unsigned)ntohl(addr->sin_addr.s_addr), path); 60 (unsigned)ntohl(addr->sin_addr.s_addr), path);
@@ -61,8 +64,12 @@ nfsroot_mount(struct sockaddr_in *addr, char *path, struct nfs_fh *fh,
61 if (IS_ERR(mnt_clnt)) 64 if (IS_ERR(mnt_clnt))
62 return PTR_ERR(mnt_clnt); 65 return PTR_ERR(mnt_clnt);
63 66
64 call = (version == NFS_MNT3_VERSION) ? MOUNTPROC3_MNT : MNTPROC_MNT; 67 if (version == NFS_MNT3_VERSION)
65 status = rpc_call(mnt_clnt, call, path, &result, 0); 68 msg.rpc_proc = &mnt_clnt->cl_procinfo[MOUNTPROC3_MNT];
69 else
70 msg.rpc_proc = &mnt_clnt->cl_procinfo[MNTPROC_MNT];
71
72 status = rpc_call_sync(mnt_clnt, &msg, 0);
66 return status < 0? status : (result.status? -EACCES : 0); 73 return status < 0? status : (result.status? -EACCES : 0);
67} 74}
68 75
@@ -137,6 +144,8 @@ static struct rpc_procinfo mnt_procedures[] = {
137 .p_encode = (kxdrproc_t) xdr_encode_dirpath, 144 .p_encode = (kxdrproc_t) xdr_encode_dirpath,
138 .p_decode = (kxdrproc_t) xdr_decode_fhstatus, 145 .p_decode = (kxdrproc_t) xdr_decode_fhstatus,
139 .p_bufsiz = MNT_dirpath_sz << 2, 146 .p_bufsiz = MNT_dirpath_sz << 2,
147 .p_statidx = MNTPROC_MNT,
148 .p_name = "MOUNT",
140 }, 149 },
141}; 150};
142 151
@@ -146,6 +155,8 @@ static struct rpc_procinfo mnt3_procedures[] = {
146 .p_encode = (kxdrproc_t) xdr_encode_dirpath, 155 .p_encode = (kxdrproc_t) xdr_encode_dirpath,
147 .p_decode = (kxdrproc_t) xdr_decode_fhstatus3, 156 .p_decode = (kxdrproc_t) xdr_decode_fhstatus3,
148 .p_bufsiz = MNT_dirpath_sz << 2, 157 .p_bufsiz = MNT_dirpath_sz << 2,
158 .p_statidx = MOUNTPROC3_MNT,
159 .p_name = "MOUNT",
149 }, 160 },
150}; 161};
151 162
diff --git a/fs/nfs/nfs2xdr.c b/fs/nfs/nfs2xdr.c
index 6548a65de944..f0015fa876e1 100644
--- a/fs/nfs/nfs2xdr.c
+++ b/fs/nfs/nfs2xdr.c
@@ -682,7 +682,9 @@ nfs_stat_to_errno(int stat)
682 .p_encode = (kxdrproc_t) nfs_xdr_##argtype, \ 682 .p_encode = (kxdrproc_t) nfs_xdr_##argtype, \
683 .p_decode = (kxdrproc_t) nfs_xdr_##restype, \ 683 .p_decode = (kxdrproc_t) nfs_xdr_##restype, \
684 .p_bufsiz = MAX(NFS_##argtype##_sz,NFS_##restype##_sz) << 2, \ 684 .p_bufsiz = MAX(NFS_##argtype##_sz,NFS_##restype##_sz) << 2, \
685 .p_timer = timer \ 685 .p_timer = timer, \
686 .p_statidx = NFSPROC_##proc, \
687 .p_name = #proc, \
686 } 688 }
687struct rpc_procinfo nfs_procedures[] = { 689struct rpc_procinfo nfs_procedures[] = {
688 PROC(GETATTR, fhandle, attrstat, 1), 690 PROC(GETATTR, fhandle, attrstat, 1),
diff --git a/fs/nfs/nfs3acl.c b/fs/nfs/nfs3acl.c
index 6a5bbc0ae941..33287879bd23 100644
--- a/fs/nfs/nfs3acl.c
+++ b/fs/nfs/nfs3acl.c
@@ -190,6 +190,10 @@ struct posix_acl *nfs3_proc_getacl(struct inode *inode, int type)
190 struct nfs3_getaclres res = { 190 struct nfs3_getaclres res = {
191 .fattr = &fattr, 191 .fattr = &fattr,
192 }; 192 };
193 struct rpc_message msg = {
194 .rpc_argp = &args,
195 .rpc_resp = &res,
196 };
193 struct posix_acl *acl; 197 struct posix_acl *acl;
194 int status, count; 198 int status, count;
195 199
@@ -218,8 +222,8 @@ struct posix_acl *nfs3_proc_getacl(struct inode *inode, int type)
218 return NULL; 222 return NULL;
219 223
220 dprintk("NFS call getacl\n"); 224 dprintk("NFS call getacl\n");
221 status = rpc_call(server->client_acl, ACLPROC3_GETACL, 225 msg.rpc_proc = &server->client_acl->cl_procinfo[ACLPROC3_GETACL];
222 &args, &res, 0); 226 status = rpc_call_sync(server->client_acl, &msg, 0);
223 dprintk("NFS reply getacl: %d\n", status); 227 dprintk("NFS reply getacl: %d\n", status);
224 228
225 /* pages may have been allocated at the xdr layer. */ 229 /* pages may have been allocated at the xdr layer. */
@@ -286,6 +290,10 @@ static int nfs3_proc_setacls(struct inode *inode, struct posix_acl *acl,
286 .acl_access = acl, 290 .acl_access = acl,
287 .pages = pages, 291 .pages = pages,
288 }; 292 };
293 struct rpc_message msg = {
294 .rpc_argp = &args,
295 .rpc_resp = &fattr,
296 };
289 int status, count; 297 int status, count;
290 298
291 status = -EOPNOTSUPP; 299 status = -EOPNOTSUPP;
@@ -306,8 +314,8 @@ static int nfs3_proc_setacls(struct inode *inode, struct posix_acl *acl,
306 314
307 dprintk("NFS call setacl\n"); 315 dprintk("NFS call setacl\n");
308 nfs_begin_data_update(inode); 316 nfs_begin_data_update(inode);
309 status = rpc_call(server->client_acl, ACLPROC3_SETACL, 317 msg.rpc_proc = &server->client_acl->cl_procinfo[ACLPROC3_SETACL];
310 &args, &fattr, 0); 318 status = rpc_call_sync(server->client_acl, &msg, 0);
311 spin_lock(&inode->i_lock); 319 spin_lock(&inode->i_lock);
312 NFS_I(inode)->cache_validity |= NFS_INO_INVALID_ACCESS; 320 NFS_I(inode)->cache_validity |= NFS_INO_INVALID_ACCESS;
313 spin_unlock(&inode->i_lock); 321 spin_unlock(&inode->i_lock);
diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c
index ed67567f0556..cf186f0d2b3b 100644
--- a/fs/nfs/nfs3proc.c
+++ b/fs/nfs/nfs3proc.c
@@ -19,6 +19,8 @@
19#include <linux/smp_lock.h> 19#include <linux/smp_lock.h>
20#include <linux/nfs_mount.h> 20#include <linux/nfs_mount.h>
21 21
22#include "iostat.h"
23
22#define NFSDBG_FACILITY NFSDBG_PROC 24#define NFSDBG_FACILITY NFSDBG_PROC
23 25
24extern struct rpc_procinfo nfs3_procedures[]; 26extern struct rpc_procinfo nfs3_procedures[];
@@ -41,27 +43,14 @@ nfs3_rpc_wrapper(struct rpc_clnt *clnt, struct rpc_message *msg, int flags)
41 return res; 43 return res;
42} 44}
43 45
44static inline int 46#define rpc_call_sync(clnt, msg, flags) nfs3_rpc_wrapper(clnt, msg, flags)
45nfs3_rpc_call_wrapper(struct rpc_clnt *clnt, u32 proc, void *argp, void *resp, int flags)
46{
47 struct rpc_message msg = {
48 .rpc_proc = &clnt->cl_procinfo[proc],
49 .rpc_argp = argp,
50 .rpc_resp = resp,
51 };
52 return nfs3_rpc_wrapper(clnt, &msg, flags);
53}
54
55#define rpc_call(clnt, proc, argp, resp, flags) \
56 nfs3_rpc_call_wrapper(clnt, proc, argp, resp, flags)
57#define rpc_call_sync(clnt, msg, flags) \
58 nfs3_rpc_wrapper(clnt, msg, flags)
59 47
60static int 48static int
61nfs3_async_handle_jukebox(struct rpc_task *task) 49nfs3_async_handle_jukebox(struct rpc_task *task, struct inode *inode)
62{ 50{
63 if (task->tk_status != -EJUKEBOX) 51 if (task->tk_status != -EJUKEBOX)
64 return 0; 52 return 0;
53 nfs_inc_stats(inode, NFSIOS_DELAY);
65 task->tk_status = 0; 54 task->tk_status = 0;
66 rpc_restart_call(task); 55 rpc_restart_call(task);
67 rpc_delay(task, NFS_JUKEBOX_RETRY_TIME); 56 rpc_delay(task, NFS_JUKEBOX_RETRY_TIME);
@@ -72,14 +61,21 @@ static int
72do_proc_get_root(struct rpc_clnt *client, struct nfs_fh *fhandle, 61do_proc_get_root(struct rpc_clnt *client, struct nfs_fh *fhandle,
73 struct nfs_fsinfo *info) 62 struct nfs_fsinfo *info)
74{ 63{
64 struct rpc_message msg = {
65 .rpc_proc = &nfs3_procedures[NFS3PROC_FSINFO],
66 .rpc_argp = fhandle,
67 .rpc_resp = info,
68 };
75 int status; 69 int status;
76 70
77 dprintk("%s: call fsinfo\n", __FUNCTION__); 71 dprintk("%s: call fsinfo\n", __FUNCTION__);
78 nfs_fattr_init(info->fattr); 72 nfs_fattr_init(info->fattr);
79 status = rpc_call(client, NFS3PROC_FSINFO, fhandle, info, 0); 73 status = rpc_call_sync(client, &msg, 0);
80 dprintk("%s: reply fsinfo: %d\n", __FUNCTION__, status); 74 dprintk("%s: reply fsinfo: %d\n", __FUNCTION__, status);
81 if (!(info->fattr->valid & NFS_ATTR_FATTR)) { 75 if (!(info->fattr->valid & NFS_ATTR_FATTR)) {
82 status = rpc_call(client, NFS3PROC_GETATTR, fhandle, info->fattr, 0); 76 msg.rpc_proc = &nfs3_procedures[NFS3PROC_GETATTR];
77 msg.rpc_resp = info->fattr;
78 status = rpc_call_sync(client, &msg, 0);
83 dprintk("%s: reply getattr: %d\n", __FUNCTION__, status); 79 dprintk("%s: reply getattr: %d\n", __FUNCTION__, status);
84 } 80 }
85 return status; 81 return status;
@@ -107,12 +103,16 @@ static int
107nfs3_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle, 103nfs3_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle,
108 struct nfs_fattr *fattr) 104 struct nfs_fattr *fattr)
109{ 105{
106 struct rpc_message msg = {
107 .rpc_proc = &nfs3_procedures[NFS3PROC_GETATTR],
108 .rpc_argp = fhandle,
109 .rpc_resp = fattr,
110 };
110 int status; 111 int status;
111 112
112 dprintk("NFS call getattr\n"); 113 dprintk("NFS call getattr\n");
113 nfs_fattr_init(fattr); 114 nfs_fattr_init(fattr);
114 status = rpc_call(server->client, NFS3PROC_GETATTR, 115 status = rpc_call_sync(server->client, &msg, 0);
115 fhandle, fattr, 0);
116 dprintk("NFS reply getattr: %d\n", status); 116 dprintk("NFS reply getattr: %d\n", status);
117 return status; 117 return status;
118} 118}
@@ -126,11 +126,16 @@ nfs3_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr,
126 .fh = NFS_FH(inode), 126 .fh = NFS_FH(inode),
127 .sattr = sattr, 127 .sattr = sattr,
128 }; 128 };
129 struct rpc_message msg = {
130 .rpc_proc = &nfs3_procedures[NFS3PROC_SETATTR],
131 .rpc_argp = &arg,
132 .rpc_resp = fattr,
133 };
129 int status; 134 int status;
130 135
131 dprintk("NFS call setattr\n"); 136 dprintk("NFS call setattr\n");
132 nfs_fattr_init(fattr); 137 nfs_fattr_init(fattr);
133 status = rpc_call(NFS_CLIENT(inode), NFS3PROC_SETATTR, &arg, fattr, 0); 138 status = rpc_call_sync(NFS_CLIENT(inode), &msg, 0);
134 if (status == 0) 139 if (status == 0)
135 nfs_setattr_update_inode(inode, sattr); 140 nfs_setattr_update_inode(inode, sattr);
136 dprintk("NFS reply setattr: %d\n", status); 141 dprintk("NFS reply setattr: %d\n", status);
@@ -152,15 +157,23 @@ nfs3_proc_lookup(struct inode *dir, struct qstr *name,
152 .fh = fhandle, 157 .fh = fhandle,
153 .fattr = fattr 158 .fattr = fattr
154 }; 159 };
160 struct rpc_message msg = {
161 .rpc_proc = &nfs3_procedures[NFS3PROC_LOOKUP],
162 .rpc_argp = &arg,
163 .rpc_resp = &res,
164 };
155 int status; 165 int status;
156 166
157 dprintk("NFS call lookup %s\n", name->name); 167 dprintk("NFS call lookup %s\n", name->name);
158 nfs_fattr_init(&dir_attr); 168 nfs_fattr_init(&dir_attr);
159 nfs_fattr_init(fattr); 169 nfs_fattr_init(fattr);
160 status = rpc_call(NFS_CLIENT(dir), NFS3PROC_LOOKUP, &arg, &res, 0); 170 status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
161 if (status >= 0 && !(fattr->valid & NFS_ATTR_FATTR)) 171 if (status >= 0 && !(fattr->valid & NFS_ATTR_FATTR)) {
162 status = rpc_call(NFS_CLIENT(dir), NFS3PROC_GETATTR, 172 msg.rpc_proc = &nfs3_procedures[NFS3PROC_GETATTR];
163 fhandle, fattr, 0); 173 msg.rpc_argp = fhandle;
174 msg.rpc_resp = fattr;
175 status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
176 }
164 dprintk("NFS reply lookup: %d\n", status); 177 dprintk("NFS reply lookup: %d\n", status);
165 if (status >= 0) 178 if (status >= 0)
166 status = nfs_refresh_inode(dir, &dir_attr); 179 status = nfs_refresh_inode(dir, &dir_attr);
@@ -180,7 +193,7 @@ static int nfs3_proc_access(struct inode *inode, struct nfs_access_entry *entry)
180 .rpc_proc = &nfs3_procedures[NFS3PROC_ACCESS], 193 .rpc_proc = &nfs3_procedures[NFS3PROC_ACCESS],
181 .rpc_argp = &arg, 194 .rpc_argp = &arg,
182 .rpc_resp = &res, 195 .rpc_resp = &res,
183 .rpc_cred = entry->cred 196 .rpc_cred = entry->cred,
184 }; 197 };
185 int mode = entry->mask; 198 int mode = entry->mask;
186 int status; 199 int status;
@@ -226,12 +239,16 @@ static int nfs3_proc_readlink(struct inode *inode, struct page *page,
226 .pglen = pglen, 239 .pglen = pglen,
227 .pages = &page 240 .pages = &page
228 }; 241 };
242 struct rpc_message msg = {
243 .rpc_proc = &nfs3_procedures[NFS3PROC_READLINK],
244 .rpc_argp = &args,
245 .rpc_resp = &fattr,
246 };
229 int status; 247 int status;
230 248
231 dprintk("NFS call readlink\n"); 249 dprintk("NFS call readlink\n");
232 nfs_fattr_init(&fattr); 250 nfs_fattr_init(&fattr);
233 status = rpc_call(NFS_CLIENT(inode), NFS3PROC_READLINK, 251 status = rpc_call_sync(NFS_CLIENT(inode), &msg, 0);
234 &args, &fattr, 0);
235 nfs_refresh_inode(inode, &fattr); 252 nfs_refresh_inode(inode, &fattr);
236 dprintk("NFS reply readlink: %d\n", status); 253 dprintk("NFS reply readlink: %d\n", status);
237 return status; 254 return status;
@@ -327,6 +344,11 @@ nfs3_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
327 .fh = &fhandle, 344 .fh = &fhandle,
328 .fattr = &fattr 345 .fattr = &fattr
329 }; 346 };
347 struct rpc_message msg = {
348 .rpc_proc = &nfs3_procedures[NFS3PROC_CREATE],
349 .rpc_argp = &arg,
350 .rpc_resp = &res,
351 };
330 mode_t mode = sattr->ia_mode; 352 mode_t mode = sattr->ia_mode;
331 int status; 353 int status;
332 354
@@ -343,8 +365,8 @@ nfs3_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
343again: 365again:
344 nfs_fattr_init(&dir_attr); 366 nfs_fattr_init(&dir_attr);
345 nfs_fattr_init(&fattr); 367 nfs_fattr_init(&fattr);
346 status = rpc_call(NFS_CLIENT(dir), NFS3PROC_CREATE, &arg, &res, 0); 368 status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
347 nfs_post_op_update_inode(dir, &dir_attr); 369 nfs_refresh_inode(dir, &dir_attr);
348 370
349 /* If the server doesn't support the exclusive creation semantics, 371 /* If the server doesn't support the exclusive creation semantics,
350 * try again with simple 'guarded' mode. */ 372 * try again with simple 'guarded' mode. */
@@ -447,7 +469,7 @@ nfs3_proc_unlink_done(struct dentry *dir, struct rpc_task *task)
447 struct rpc_message *msg = &task->tk_msg; 469 struct rpc_message *msg = &task->tk_msg;
448 struct nfs_fattr *dir_attr; 470 struct nfs_fattr *dir_attr;
449 471
450 if (nfs3_async_handle_jukebox(task)) 472 if (nfs3_async_handle_jukebox(task, dir->d_inode))
451 return 1; 473 return 1;
452 if (msg->rpc_argp) { 474 if (msg->rpc_argp) {
453 dir_attr = (struct nfs_fattr*)msg->rpc_resp; 475 dir_attr = (struct nfs_fattr*)msg->rpc_resp;
@@ -474,12 +496,17 @@ nfs3_proc_rename(struct inode *old_dir, struct qstr *old_name,
474 .fromattr = &old_dir_attr, 496 .fromattr = &old_dir_attr,
475 .toattr = &new_dir_attr 497 .toattr = &new_dir_attr
476 }; 498 };
499 struct rpc_message msg = {
500 .rpc_proc = &nfs3_procedures[NFS3PROC_RENAME],
501 .rpc_argp = &arg,
502 .rpc_resp = &res,
503 };
477 int status; 504 int status;
478 505
479 dprintk("NFS call rename %s -> %s\n", old_name->name, new_name->name); 506 dprintk("NFS call rename %s -> %s\n", old_name->name, new_name->name);
480 nfs_fattr_init(&old_dir_attr); 507 nfs_fattr_init(&old_dir_attr);
481 nfs_fattr_init(&new_dir_attr); 508 nfs_fattr_init(&new_dir_attr);
482 status = rpc_call(NFS_CLIENT(old_dir), NFS3PROC_RENAME, &arg, &res, 0); 509 status = rpc_call_sync(NFS_CLIENT(old_dir), &msg, 0);
483 nfs_post_op_update_inode(old_dir, &old_dir_attr); 510 nfs_post_op_update_inode(old_dir, &old_dir_attr);
484 nfs_post_op_update_inode(new_dir, &new_dir_attr); 511 nfs_post_op_update_inode(new_dir, &new_dir_attr);
485 dprintk("NFS reply rename: %d\n", status); 512 dprintk("NFS reply rename: %d\n", status);
@@ -500,12 +527,17 @@ nfs3_proc_link(struct inode *inode, struct inode *dir, struct qstr *name)
500 .dir_attr = &dir_attr, 527 .dir_attr = &dir_attr,
501 .fattr = &fattr 528 .fattr = &fattr
502 }; 529 };
530 struct rpc_message msg = {
531 .rpc_proc = &nfs3_procedures[NFS3PROC_LINK],
532 .rpc_argp = &arg,
533 .rpc_resp = &res,
534 };
503 int status; 535 int status;
504 536
505 dprintk("NFS call link %s\n", name->name); 537 dprintk("NFS call link %s\n", name->name);
506 nfs_fattr_init(&dir_attr); 538 nfs_fattr_init(&dir_attr);
507 nfs_fattr_init(&fattr); 539 nfs_fattr_init(&fattr);
508 status = rpc_call(NFS_CLIENT(inode), NFS3PROC_LINK, &arg, &res, 0); 540 status = rpc_call_sync(NFS_CLIENT(inode), &msg, 0);
509 nfs_post_op_update_inode(dir, &dir_attr); 541 nfs_post_op_update_inode(dir, &dir_attr);
510 nfs_post_op_update_inode(inode, &fattr); 542 nfs_post_op_update_inode(inode, &fattr);
511 dprintk("NFS reply link: %d\n", status); 543 dprintk("NFS reply link: %d\n", status);
@@ -531,6 +563,11 @@ nfs3_proc_symlink(struct inode *dir, struct qstr *name, struct qstr *path,
531 .fh = fhandle, 563 .fh = fhandle,
532 .fattr = fattr 564 .fattr = fattr
533 }; 565 };
566 struct rpc_message msg = {
567 .rpc_proc = &nfs3_procedures[NFS3PROC_SYMLINK],
568 .rpc_argp = &arg,
569 .rpc_resp = &res,
570 };
534 int status; 571 int status;
535 572
536 if (path->len > NFS3_MAXPATHLEN) 573 if (path->len > NFS3_MAXPATHLEN)
@@ -538,7 +575,7 @@ nfs3_proc_symlink(struct inode *dir, struct qstr *name, struct qstr *path,
538 dprintk("NFS call symlink %s -> %s\n", name->name, path->name); 575 dprintk("NFS call symlink %s -> %s\n", name->name, path->name);
539 nfs_fattr_init(&dir_attr); 576 nfs_fattr_init(&dir_attr);
540 nfs_fattr_init(fattr); 577 nfs_fattr_init(fattr);
541 status = rpc_call(NFS_CLIENT(dir), NFS3PROC_SYMLINK, &arg, &res, 0); 578 status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
542 nfs_post_op_update_inode(dir, &dir_attr); 579 nfs_post_op_update_inode(dir, &dir_attr);
543 dprintk("NFS reply symlink: %d\n", status); 580 dprintk("NFS reply symlink: %d\n", status);
544 return status; 581 return status;
@@ -560,6 +597,11 @@ nfs3_proc_mkdir(struct inode *dir, struct dentry *dentry, struct iattr *sattr)
560 .fh = &fhandle, 597 .fh = &fhandle,
561 .fattr = &fattr 598 .fattr = &fattr
562 }; 599 };
600 struct rpc_message msg = {
601 .rpc_proc = &nfs3_procedures[NFS3PROC_MKDIR],
602 .rpc_argp = &arg,
603 .rpc_resp = &res,
604 };
563 int mode = sattr->ia_mode; 605 int mode = sattr->ia_mode;
564 int status; 606 int status;
565 607
@@ -569,7 +611,7 @@ nfs3_proc_mkdir(struct inode *dir, struct dentry *dentry, struct iattr *sattr)
569 611
570 nfs_fattr_init(&dir_attr); 612 nfs_fattr_init(&dir_attr);
571 nfs_fattr_init(&fattr); 613 nfs_fattr_init(&fattr);
572 status = rpc_call(NFS_CLIENT(dir), NFS3PROC_MKDIR, &arg, &res, 0); 614 status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
573 nfs_post_op_update_inode(dir, &dir_attr); 615 nfs_post_op_update_inode(dir, &dir_attr);
574 if (status != 0) 616 if (status != 0)
575 goto out; 617 goto out;
@@ -591,11 +633,16 @@ nfs3_proc_rmdir(struct inode *dir, struct qstr *name)
591 .name = name->name, 633 .name = name->name,
592 .len = name->len 634 .len = name->len
593 }; 635 };
636 struct rpc_message msg = {
637 .rpc_proc = &nfs3_procedures[NFS3PROC_RMDIR],
638 .rpc_argp = &arg,
639 .rpc_resp = &dir_attr,
640 };
594 int status; 641 int status;
595 642
596 dprintk("NFS call rmdir %s\n", name->name); 643 dprintk("NFS call rmdir %s\n", name->name);
597 nfs_fattr_init(&dir_attr); 644 nfs_fattr_init(&dir_attr);
598 status = rpc_call(NFS_CLIENT(dir), NFS3PROC_RMDIR, &arg, &dir_attr, 0); 645 status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
599 nfs_post_op_update_inode(dir, &dir_attr); 646 nfs_post_op_update_inode(dir, &dir_attr);
600 dprintk("NFS reply rmdir: %d\n", status); 647 dprintk("NFS reply rmdir: %d\n", status);
601 return status; 648 return status;
@@ -672,6 +719,11 @@ nfs3_proc_mknod(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
672 .fh = &fh, 719 .fh = &fh,
673 .fattr = &fattr 720 .fattr = &fattr
674 }; 721 };
722 struct rpc_message msg = {
723 .rpc_proc = &nfs3_procedures[NFS3PROC_MKNOD],
724 .rpc_argp = &arg,
725 .rpc_resp = &res,
726 };
675 mode_t mode = sattr->ia_mode; 727 mode_t mode = sattr->ia_mode;
676 int status; 728 int status;
677 729
@@ -690,7 +742,7 @@ nfs3_proc_mknod(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
690 742
691 nfs_fattr_init(&dir_attr); 743 nfs_fattr_init(&dir_attr);
692 nfs_fattr_init(&fattr); 744 nfs_fattr_init(&fattr);
693 status = rpc_call(NFS_CLIENT(dir), NFS3PROC_MKNOD, &arg, &res, 0); 745 status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
694 nfs_post_op_update_inode(dir, &dir_attr); 746 nfs_post_op_update_inode(dir, &dir_attr);
695 if (status != 0) 747 if (status != 0)
696 goto out; 748 goto out;
@@ -707,11 +759,16 @@ static int
707nfs3_proc_statfs(struct nfs_server *server, struct nfs_fh *fhandle, 759nfs3_proc_statfs(struct nfs_server *server, struct nfs_fh *fhandle,
708 struct nfs_fsstat *stat) 760 struct nfs_fsstat *stat)
709{ 761{
762 struct rpc_message msg = {
763 .rpc_proc = &nfs3_procedures[NFS3PROC_FSSTAT],
764 .rpc_argp = fhandle,
765 .rpc_resp = stat,
766 };
710 int status; 767 int status;
711 768
712 dprintk("NFS call fsstat\n"); 769 dprintk("NFS call fsstat\n");
713 nfs_fattr_init(stat->fattr); 770 nfs_fattr_init(stat->fattr);
714 status = rpc_call(server->client, NFS3PROC_FSSTAT, fhandle, stat, 0); 771 status = rpc_call_sync(server->client, &msg, 0);
715 dprintk("NFS reply statfs: %d\n", status); 772 dprintk("NFS reply statfs: %d\n", status);
716 return status; 773 return status;
717} 774}
@@ -720,11 +777,16 @@ static int
720nfs3_proc_fsinfo(struct nfs_server *server, struct nfs_fh *fhandle, 777nfs3_proc_fsinfo(struct nfs_server *server, struct nfs_fh *fhandle,
721 struct nfs_fsinfo *info) 778 struct nfs_fsinfo *info)
722{ 779{
780 struct rpc_message msg = {
781 .rpc_proc = &nfs3_procedures[NFS3PROC_FSINFO],
782 .rpc_argp = fhandle,
783 .rpc_resp = info,
784 };
723 int status; 785 int status;
724 786
725 dprintk("NFS call fsinfo\n"); 787 dprintk("NFS call fsinfo\n");
726 nfs_fattr_init(info->fattr); 788 nfs_fattr_init(info->fattr);
727 status = rpc_call(server->client_sys, NFS3PROC_FSINFO, fhandle, info, 0); 789 status = rpc_call_sync(server->client_sys, &msg, 0);
728 dprintk("NFS reply fsinfo: %d\n", status); 790 dprintk("NFS reply fsinfo: %d\n", status);
729 return status; 791 return status;
730} 792}
@@ -733,40 +795,34 @@ static int
733nfs3_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle, 795nfs3_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle,
734 struct nfs_pathconf *info) 796 struct nfs_pathconf *info)
735{ 797{
798 struct rpc_message msg = {
799 .rpc_proc = &nfs3_procedures[NFS3PROC_PATHCONF],
800 .rpc_argp = fhandle,
801 .rpc_resp = info,
802 };
736 int status; 803 int status;
737 804
738 dprintk("NFS call pathconf\n"); 805 dprintk("NFS call pathconf\n");
739 nfs_fattr_init(info->fattr); 806 nfs_fattr_init(info->fattr);
740 status = rpc_call(server->client, NFS3PROC_PATHCONF, fhandle, info, 0); 807 status = rpc_call_sync(server->client, &msg, 0);
741 dprintk("NFS reply pathconf: %d\n", status); 808 dprintk("NFS reply pathconf: %d\n", status);
742 return status; 809 return status;
743} 810}
744 811
745extern u32 *nfs3_decode_dirent(u32 *, struct nfs_entry *, int); 812extern u32 *nfs3_decode_dirent(u32 *, struct nfs_entry *, int);
746 813
747static void nfs3_read_done(struct rpc_task *task, void *calldata) 814static int nfs3_read_done(struct rpc_task *task, struct nfs_read_data *data)
748{ 815{
749 struct nfs_read_data *data = calldata; 816 if (nfs3_async_handle_jukebox(task, data->inode))
750 817 return -EAGAIN;
751 if (nfs3_async_handle_jukebox(task))
752 return;
753 /* Call back common NFS readpage processing */ 818 /* Call back common NFS readpage processing */
754 if (task->tk_status >= 0) 819 if (task->tk_status >= 0)
755 nfs_refresh_inode(data->inode, &data->fattr); 820 nfs_refresh_inode(data->inode, &data->fattr);
756 nfs_readpage_result(task, calldata); 821 return 0;
757} 822}
758 823
759static const struct rpc_call_ops nfs3_read_ops = { 824static void nfs3_proc_read_setup(struct nfs_read_data *data)
760 .rpc_call_done = nfs3_read_done,
761 .rpc_release = nfs_readdata_release,
762};
763
764static void
765nfs3_proc_read_setup(struct nfs_read_data *data)
766{ 825{
767 struct rpc_task *task = &data->task;
768 struct inode *inode = data->inode;
769 int flags;
770 struct rpc_message msg = { 826 struct rpc_message msg = {
771 .rpc_proc = &nfs3_procedures[NFS3PROC_READ], 827 .rpc_proc = &nfs3_procedures[NFS3PROC_READ],
772 .rpc_argp = &data->args, 828 .rpc_argp = &data->args,
@@ -774,37 +830,20 @@ nfs3_proc_read_setup(struct nfs_read_data *data)
774 .rpc_cred = data->cred, 830 .rpc_cred = data->cred,
775 }; 831 };
776 832
777 /* N.B. Do we need to test? Never called for swapfile inode */ 833 rpc_call_setup(&data->task, &msg, 0);
778 flags = RPC_TASK_ASYNC | (IS_SWAPFILE(inode)? NFS_RPC_SWAPFLAGS : 0);
779
780 /* Finalize the task. */
781 rpc_init_task(task, NFS_CLIENT(inode), flags, &nfs3_read_ops, data);
782 rpc_call_setup(task, &msg, 0);
783} 834}
784 835
785static void nfs3_write_done(struct rpc_task *task, void *calldata) 836static int nfs3_write_done(struct rpc_task *task, struct nfs_write_data *data)
786{ 837{
787 struct nfs_write_data *data = calldata; 838 if (nfs3_async_handle_jukebox(task, data->inode))
788 839 return -EAGAIN;
789 if (nfs3_async_handle_jukebox(task))
790 return;
791 if (task->tk_status >= 0) 840 if (task->tk_status >= 0)
792 nfs_post_op_update_inode(data->inode, data->res.fattr); 841 nfs_post_op_update_inode(data->inode, data->res.fattr);
793 nfs_writeback_done(task, calldata); 842 return 0;
794} 843}
795 844
796static const struct rpc_call_ops nfs3_write_ops = { 845static void nfs3_proc_write_setup(struct nfs_write_data *data, int how)
797 .rpc_call_done = nfs3_write_done,
798 .rpc_release = nfs_writedata_release,
799};
800
801static void
802nfs3_proc_write_setup(struct nfs_write_data *data, int how)
803{ 846{
804 struct rpc_task *task = &data->task;
805 struct inode *inode = data->inode;
806 int stable;
807 int flags;
808 struct rpc_message msg = { 847 struct rpc_message msg = {
809 .rpc_proc = &nfs3_procedures[NFS3PROC_WRITE], 848 .rpc_proc = &nfs3_procedures[NFS3PROC_WRITE],
810 .rpc_argp = &data->args, 849 .rpc_argp = &data->args,
@@ -812,45 +851,28 @@ nfs3_proc_write_setup(struct nfs_write_data *data, int how)
812 .rpc_cred = data->cred, 851 .rpc_cred = data->cred,
813 }; 852 };
814 853
854 data->args.stable = NFS_UNSTABLE;
815 if (how & FLUSH_STABLE) { 855 if (how & FLUSH_STABLE) {
816 if (!NFS_I(inode)->ncommit) 856 data->args.stable = NFS_FILE_SYNC;
817 stable = NFS_FILE_SYNC; 857 if (NFS_I(data->inode)->ncommit)
818 else 858 data->args.stable = NFS_DATA_SYNC;
819 stable = NFS_DATA_SYNC; 859 }
820 } else
821 stable = NFS_UNSTABLE;
822 data->args.stable = stable;
823
824 /* Set the initial flags for the task. */
825 flags = (how & FLUSH_SYNC) ? 0 : RPC_TASK_ASYNC;
826 860
827 /* Finalize the task. */ 861 /* Finalize the task. */
828 rpc_init_task(task, NFS_CLIENT(inode), flags, &nfs3_write_ops, data); 862 rpc_call_setup(&data->task, &msg, 0);
829 rpc_call_setup(task, &msg, 0);
830} 863}
831 864
832static void nfs3_commit_done(struct rpc_task *task, void *calldata) 865static int nfs3_commit_done(struct rpc_task *task, struct nfs_write_data *data)
833{ 866{
834 struct nfs_write_data *data = calldata; 867 if (nfs3_async_handle_jukebox(task, data->inode))
835 868 return -EAGAIN;
836 if (nfs3_async_handle_jukebox(task))
837 return;
838 if (task->tk_status >= 0) 869 if (task->tk_status >= 0)
839 nfs_post_op_update_inode(data->inode, data->res.fattr); 870 nfs_post_op_update_inode(data->inode, data->res.fattr);
840 nfs_commit_done(task, calldata); 871 return 0;
841} 872}
842 873
843static const struct rpc_call_ops nfs3_commit_ops = { 874static void nfs3_proc_commit_setup(struct nfs_write_data *data, int how)
844 .rpc_call_done = nfs3_commit_done,
845 .rpc_release = nfs_commit_release,
846};
847
848static void
849nfs3_proc_commit_setup(struct nfs_write_data *data, int how)
850{ 875{
851 struct rpc_task *task = &data->task;
852 struct inode *inode = data->inode;
853 int flags;
854 struct rpc_message msg = { 876 struct rpc_message msg = {
855 .rpc_proc = &nfs3_procedures[NFS3PROC_COMMIT], 877 .rpc_proc = &nfs3_procedures[NFS3PROC_COMMIT],
856 .rpc_argp = &data->args, 878 .rpc_argp = &data->args,
@@ -858,12 +880,7 @@ nfs3_proc_commit_setup(struct nfs_write_data *data, int how)
858 .rpc_cred = data->cred, 880 .rpc_cred = data->cred,
859 }; 881 };
860 882
861 /* Set the initial flags for the task. */ 883 rpc_call_setup(&data->task, &msg, 0);
862 flags = (how & FLUSH_SYNC) ? 0 : RPC_TASK_ASYNC;
863
864 /* Finalize the task. */
865 rpc_init_task(task, NFS_CLIENT(inode), flags, &nfs3_commit_ops, data);
866 rpc_call_setup(task, &msg, 0);
867} 884}
868 885
869static int 886static int
@@ -902,8 +919,11 @@ struct nfs_rpc_ops nfs_v3_clientops = {
902 .pathconf = nfs3_proc_pathconf, 919 .pathconf = nfs3_proc_pathconf,
903 .decode_dirent = nfs3_decode_dirent, 920 .decode_dirent = nfs3_decode_dirent,
904 .read_setup = nfs3_proc_read_setup, 921 .read_setup = nfs3_proc_read_setup,
922 .read_done = nfs3_read_done,
905 .write_setup = nfs3_proc_write_setup, 923 .write_setup = nfs3_proc_write_setup,
924 .write_done = nfs3_write_done,
906 .commit_setup = nfs3_proc_commit_setup, 925 .commit_setup = nfs3_proc_commit_setup,
926 .commit_done = nfs3_commit_done,
907 .file_open = nfs_open, 927 .file_open = nfs_open,
908 .file_release = nfs_release, 928 .file_release = nfs_release,
909 .lock = nfs3_proc_lock, 929 .lock = nfs3_proc_lock,
diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c
index 5224a191efb6..ec233619687e 100644
--- a/fs/nfs/nfs3xdr.c
+++ b/fs/nfs/nfs3xdr.c
@@ -1109,7 +1109,9 @@ nfs3_xdr_setaclres(struct rpc_rqst *req, u32 *p, struct nfs_fattr *fattr)
1109 .p_encode = (kxdrproc_t) nfs3_xdr_##argtype, \ 1109 .p_encode = (kxdrproc_t) nfs3_xdr_##argtype, \
1110 .p_decode = (kxdrproc_t) nfs3_xdr_##restype, \ 1110 .p_decode = (kxdrproc_t) nfs3_xdr_##restype, \
1111 .p_bufsiz = MAX(NFS3_##argtype##_sz,NFS3_##restype##_sz) << 2, \ 1111 .p_bufsiz = MAX(NFS3_##argtype##_sz,NFS3_##restype##_sz) << 2, \
1112 .p_timer = timer \ 1112 .p_timer = timer, \
1113 .p_statidx = NFS3PROC_##proc, \
1114 .p_name = #proc, \
1113 } 1115 }
1114 1116
1115struct rpc_procinfo nfs3_procedures[] = { 1117struct rpc_procinfo nfs3_procedures[] = {
@@ -1150,6 +1152,7 @@ static struct rpc_procinfo nfs3_acl_procedures[] = {
1150 .p_decode = (kxdrproc_t) nfs3_xdr_getaclres, 1152 .p_decode = (kxdrproc_t) nfs3_xdr_getaclres,
1151 .p_bufsiz = MAX(ACL3_getaclargs_sz, ACL3_getaclres_sz) << 2, 1153 .p_bufsiz = MAX(ACL3_getaclargs_sz, ACL3_getaclres_sz) << 2,
1152 .p_timer = 1, 1154 .p_timer = 1,
1155 .p_name = "GETACL",
1153 }, 1156 },
1154 [ACLPROC3_SETACL] = { 1157 [ACLPROC3_SETACL] = {
1155 .p_proc = ACLPROC3_SETACL, 1158 .p_proc = ACLPROC3_SETACL,
@@ -1157,6 +1160,7 @@ static struct rpc_procinfo nfs3_acl_procedures[] = {
1157 .p_decode = (kxdrproc_t) nfs3_xdr_setaclres, 1160 .p_decode = (kxdrproc_t) nfs3_xdr_setaclres,
1158 .p_bufsiz = MAX(ACL3_setaclargs_sz, ACL3_setaclres_sz) << 2, 1161 .p_bufsiz = MAX(ACL3_setaclargs_sz, ACL3_setaclres_sz) << 2,
1159 .p_timer = 0, 1162 .p_timer = 0,
1163 .p_name = "SETACL",
1160 }, 1164 },
1161}; 1165};
1162 1166
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index f8c0066e02e1..47ece1dd3c67 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -51,6 +51,7 @@
51 51
52#include "nfs4_fs.h" 52#include "nfs4_fs.h"
53#include "delegation.h" 53#include "delegation.h"
54#include "iostat.h"
54 55
55#define NFSDBG_FACILITY NFSDBG_PROC 56#define NFSDBG_FACILITY NFSDBG_PROC
56 57
@@ -335,7 +336,7 @@ static struct nfs4_state *nfs4_opendata_to_nfs4_state(struct nfs4_opendata *data
335 if (!(data->f_attr.valid & NFS_ATTR_FATTR)) 336 if (!(data->f_attr.valid & NFS_ATTR_FATTR))
336 goto out; 337 goto out;
337 inode = nfs_fhget(data->dir->d_sb, &data->o_res.fh, &data->f_attr); 338 inode = nfs_fhget(data->dir->d_sb, &data->o_res.fh, &data->f_attr);
338 if (inode == NULL) 339 if (IS_ERR(inode))
339 goto out; 340 goto out;
340 state = nfs4_get_open_state(inode, data->owner); 341 state = nfs4_get_open_state(inode, data->owner);
341 if (state == NULL) 342 if (state == NULL)
@@ -604,11 +605,14 @@ static int _nfs4_proc_open_confirm(struct nfs4_opendata *data)
604 int status; 605 int status;
605 606
606 atomic_inc(&data->count); 607 atomic_inc(&data->count);
608 /*
609 * If rpc_run_task() ends up calling ->rpc_release(), we
610 * want to ensure that it takes the 'error' code path.
611 */
612 data->rpc_status = -ENOMEM;
607 task = rpc_run_task(server->client, RPC_TASK_ASYNC, &nfs4_open_confirm_ops, data); 613 task = rpc_run_task(server->client, RPC_TASK_ASYNC, &nfs4_open_confirm_ops, data);
608 if (IS_ERR(task)) { 614 if (IS_ERR(task))
609 nfs4_opendata_free(data);
610 return PTR_ERR(task); 615 return PTR_ERR(task);
611 }
612 status = nfs4_wait_for_completion_rpc_task(task); 616 status = nfs4_wait_for_completion_rpc_task(task);
613 if (status != 0) { 617 if (status != 0) {
614 data->cancelled = 1; 618 data->cancelled = 1;
@@ -707,11 +711,14 @@ static int _nfs4_proc_open(struct nfs4_opendata *data)
707 int status; 711 int status;
708 712
709 atomic_inc(&data->count); 713 atomic_inc(&data->count);
714 /*
715 * If rpc_run_task() ends up calling ->rpc_release(), we
716 * want to ensure that it takes the 'error' code path.
717 */
718 data->rpc_status = -ENOMEM;
710 task = rpc_run_task(server->client, RPC_TASK_ASYNC, &nfs4_open_ops, data); 719 task = rpc_run_task(server->client, RPC_TASK_ASYNC, &nfs4_open_ops, data);
711 if (IS_ERR(task)) { 720 if (IS_ERR(task))
712 nfs4_opendata_free(data);
713 return PTR_ERR(task); 721 return PTR_ERR(task);
714 }
715 status = nfs4_wait_for_completion_rpc_task(task); 722 status = nfs4_wait_for_completion_rpc_task(task);
716 if (status != 0) { 723 if (status != 0) {
717 data->cancelled = 1; 724 data->cancelled = 1;
@@ -908,7 +915,7 @@ out_put_state_owner:
908static struct nfs4_state *nfs4_open_delegated(struct inode *inode, int flags, struct rpc_cred *cred) 915static struct nfs4_state *nfs4_open_delegated(struct inode *inode, int flags, struct rpc_cred *cred)
909{ 916{
910 struct nfs4_exception exception = { }; 917 struct nfs4_exception exception = { };
911 struct nfs4_state *res; 918 struct nfs4_state *res = ERR_PTR(-EIO);
912 int err; 919 int err;
913 920
914 do { 921 do {
@@ -1017,12 +1024,12 @@ static struct nfs4_state *nfs4_do_open(struct inode *dir, struct dentry *dentry,
1017 return res; 1024 return res;
1018} 1025}
1019 1026
1020static int _nfs4_do_setattr(struct nfs_server *server, struct nfs_fattr *fattr, 1027static int _nfs4_do_setattr(struct inode *inode, struct nfs_fattr *fattr,
1021 struct nfs_fh *fhandle, struct iattr *sattr, 1028 struct iattr *sattr, struct nfs4_state *state)
1022 struct nfs4_state *state)
1023{ 1029{
1030 struct nfs_server *server = NFS_SERVER(inode);
1024 struct nfs_setattrargs arg = { 1031 struct nfs_setattrargs arg = {
1025 .fh = fhandle, 1032 .fh = NFS_FH(inode),
1026 .iap = sattr, 1033 .iap = sattr,
1027 .server = server, 1034 .server = server,
1028 .bitmask = server->attr_bitmask, 1035 .bitmask = server->attr_bitmask,
@@ -1041,7 +1048,9 @@ static int _nfs4_do_setattr(struct nfs_server *server, struct nfs_fattr *fattr,
1041 1048
1042 nfs_fattr_init(fattr); 1049 nfs_fattr_init(fattr);
1043 1050
1044 if (state != NULL) { 1051 if (nfs4_copy_delegation_stateid(&arg.stateid, inode)) {
1052 /* Use that stateid */
1053 } else if (state != NULL) {
1045 msg.rpc_cred = state->owner->so_cred; 1054 msg.rpc_cred = state->owner->so_cred;
1046 nfs4_copy_stateid(&arg.stateid, state, current->files); 1055 nfs4_copy_stateid(&arg.stateid, state, current->files);
1047 } else 1056 } else
@@ -1053,16 +1062,15 @@ static int _nfs4_do_setattr(struct nfs_server *server, struct nfs_fattr *fattr,
1053 return status; 1062 return status;
1054} 1063}
1055 1064
1056static int nfs4_do_setattr(struct nfs_server *server, struct nfs_fattr *fattr, 1065static int nfs4_do_setattr(struct inode *inode, struct nfs_fattr *fattr,
1057 struct nfs_fh *fhandle, struct iattr *sattr, 1066 struct iattr *sattr, struct nfs4_state *state)
1058 struct nfs4_state *state)
1059{ 1067{
1068 struct nfs_server *server = NFS_SERVER(inode);
1060 struct nfs4_exception exception = { }; 1069 struct nfs4_exception exception = { };
1061 int err; 1070 int err;
1062 do { 1071 do {
1063 err = nfs4_handle_exception(server, 1072 err = nfs4_handle_exception(server,
1064 _nfs4_do_setattr(server, fattr, fhandle, sattr, 1073 _nfs4_do_setattr(inode, fattr, sattr, state),
1065 state),
1066 &exception); 1074 &exception);
1067 } while (exception.retry); 1075 } while (exception.retry);
1068 return err; 1076 return err;
@@ -1503,8 +1511,7 @@ nfs4_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr,
1503 if (ctx != NULL) 1511 if (ctx != NULL)
1504 state = ctx->state; 1512 state = ctx->state;
1505 1513
1506 status = nfs4_do_setattr(NFS_SERVER(inode), fattr, 1514 status = nfs4_do_setattr(inode, fattr, sattr, state);
1507 NFS_FH(inode), sattr, state);
1508 if (status == 0) 1515 if (status == 0)
1509 nfs_setattr_update_inode(inode, sattr); 1516 nfs_setattr_update_inode(inode, sattr);
1510 if (ctx != NULL) 1517 if (ctx != NULL)
@@ -1823,8 +1830,7 @@ nfs4_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
1823 d_instantiate(dentry, igrab(state->inode)); 1830 d_instantiate(dentry, igrab(state->inode));
1824 if (flags & O_EXCL) { 1831 if (flags & O_EXCL) {
1825 struct nfs_fattr fattr; 1832 struct nfs_fattr fattr;
1826 status = nfs4_do_setattr(NFS_SERVER(dir), &fattr, 1833 status = nfs4_do_setattr(state->inode, &fattr, sattr, state);
1827 NFS_FH(state->inode), sattr, state);
1828 if (status == 0) 1834 if (status == 0)
1829 nfs_setattr_update_inode(state->inode, sattr); 1835 nfs_setattr_update_inode(state->inode, sattr);
1830 } 1836 }
@@ -2344,75 +2350,50 @@ static int nfs4_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle,
2344 return err; 2350 return err;
2345} 2351}
2346 2352
2347static void nfs4_read_done(struct rpc_task *task, void *calldata) 2353static int nfs4_read_done(struct rpc_task *task, struct nfs_read_data *data)
2348{ 2354{
2349 struct nfs_read_data *data = calldata; 2355 struct nfs_server *server = NFS_SERVER(data->inode);
2350 struct inode *inode = data->inode;
2351 2356
2352 if (nfs4_async_handle_error(task, NFS_SERVER(inode)) == -EAGAIN) { 2357 if (nfs4_async_handle_error(task, server) == -EAGAIN) {
2353 rpc_restart_call(task); 2358 rpc_restart_call(task);
2354 return; 2359 return -EAGAIN;
2355 } 2360 }
2356 if (task->tk_status > 0) 2361 if (task->tk_status > 0)
2357 renew_lease(NFS_SERVER(inode), data->timestamp); 2362 renew_lease(server, data->timestamp);
2358 /* Call back common NFS readpage processing */ 2363 return 0;
2359 nfs_readpage_result(task, calldata);
2360} 2364}
2361 2365
2362static const struct rpc_call_ops nfs4_read_ops = { 2366static void nfs4_proc_read_setup(struct nfs_read_data *data)
2363 .rpc_call_done = nfs4_read_done,
2364 .rpc_release = nfs_readdata_release,
2365};
2366
2367static void
2368nfs4_proc_read_setup(struct nfs_read_data *data)
2369{ 2367{
2370 struct rpc_task *task = &data->task;
2371 struct rpc_message msg = { 2368 struct rpc_message msg = {
2372 .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_READ], 2369 .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_READ],
2373 .rpc_argp = &data->args, 2370 .rpc_argp = &data->args,
2374 .rpc_resp = &data->res, 2371 .rpc_resp = &data->res,
2375 .rpc_cred = data->cred, 2372 .rpc_cred = data->cred,
2376 }; 2373 };
2377 struct inode *inode = data->inode;
2378 int flags;
2379 2374
2380 data->timestamp = jiffies; 2375 data->timestamp = jiffies;
2381 2376
2382 /* N.B. Do we need to test? Never called for swapfile inode */ 2377 rpc_call_setup(&data->task, &msg, 0);
2383 flags = RPC_TASK_ASYNC | (IS_SWAPFILE(inode)? NFS_RPC_SWAPFLAGS : 0);
2384
2385 /* Finalize the task. */
2386 rpc_init_task(task, NFS_CLIENT(inode), flags, &nfs4_read_ops, data);
2387 rpc_call_setup(task, &msg, 0);
2388} 2378}
2389 2379
2390static void nfs4_write_done(struct rpc_task *task, void *calldata) 2380static int nfs4_write_done(struct rpc_task *task, struct nfs_write_data *data)
2391{ 2381{
2392 struct nfs_write_data *data = calldata;
2393 struct inode *inode = data->inode; 2382 struct inode *inode = data->inode;
2394 2383
2395 if (nfs4_async_handle_error(task, NFS_SERVER(inode)) == -EAGAIN) { 2384 if (nfs4_async_handle_error(task, NFS_SERVER(inode)) == -EAGAIN) {
2396 rpc_restart_call(task); 2385 rpc_restart_call(task);
2397 return; 2386 return -EAGAIN;
2398 } 2387 }
2399 if (task->tk_status >= 0) { 2388 if (task->tk_status >= 0) {
2400 renew_lease(NFS_SERVER(inode), data->timestamp); 2389 renew_lease(NFS_SERVER(inode), data->timestamp);
2401 nfs_post_op_update_inode(inode, data->res.fattr); 2390 nfs_post_op_update_inode(inode, data->res.fattr);
2402 } 2391 }
2403 /* Call back common NFS writeback processing */ 2392 return 0;
2404 nfs_writeback_done(task, calldata);
2405} 2393}
2406 2394
2407static const struct rpc_call_ops nfs4_write_ops = { 2395static void nfs4_proc_write_setup(struct nfs_write_data *data, int how)
2408 .rpc_call_done = nfs4_write_done,
2409 .rpc_release = nfs_writedata_release,
2410};
2411
2412static void
2413nfs4_proc_write_setup(struct nfs_write_data *data, int how)
2414{ 2396{
2415 struct rpc_task *task = &data->task;
2416 struct rpc_message msg = { 2397 struct rpc_message msg = {
2417 .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_WRITE], 2398 .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_WRITE],
2418 .rpc_argp = &data->args, 2399 .rpc_argp = &data->args,
@@ -2422,7 +2403,6 @@ nfs4_proc_write_setup(struct nfs_write_data *data, int how)
2422 struct inode *inode = data->inode; 2403 struct inode *inode = data->inode;
2423 struct nfs_server *server = NFS_SERVER(inode); 2404 struct nfs_server *server = NFS_SERVER(inode);
2424 int stable; 2405 int stable;
2425 int flags;
2426 2406
2427 if (how & FLUSH_STABLE) { 2407 if (how & FLUSH_STABLE) {
2428 if (!NFS_I(inode)->ncommit) 2408 if (!NFS_I(inode)->ncommit)
@@ -2437,57 +2417,37 @@ nfs4_proc_write_setup(struct nfs_write_data *data, int how)
2437 2417
2438 data->timestamp = jiffies; 2418 data->timestamp = jiffies;
2439 2419
2440 /* Set the initial flags for the task. */
2441 flags = (how & FLUSH_SYNC) ? 0 : RPC_TASK_ASYNC;
2442
2443 /* Finalize the task. */ 2420 /* Finalize the task. */
2444 rpc_init_task(task, NFS_CLIENT(inode), flags, &nfs4_write_ops, data); 2421 rpc_call_setup(&data->task, &msg, 0);
2445 rpc_call_setup(task, &msg, 0);
2446} 2422}
2447 2423
2448static void nfs4_commit_done(struct rpc_task *task, void *calldata) 2424static int nfs4_commit_done(struct rpc_task *task, struct nfs_write_data *data)
2449{ 2425{
2450 struct nfs_write_data *data = calldata;
2451 struct inode *inode = data->inode; 2426 struct inode *inode = data->inode;
2452 2427
2453 if (nfs4_async_handle_error(task, NFS_SERVER(inode)) == -EAGAIN) { 2428 if (nfs4_async_handle_error(task, NFS_SERVER(inode)) == -EAGAIN) {
2454 rpc_restart_call(task); 2429 rpc_restart_call(task);
2455 return; 2430 return -EAGAIN;
2456 } 2431 }
2457 if (task->tk_status >= 0) 2432 if (task->tk_status >= 0)
2458 nfs_post_op_update_inode(inode, data->res.fattr); 2433 nfs_post_op_update_inode(inode, data->res.fattr);
2459 /* Call back common NFS writeback processing */ 2434 return 0;
2460 nfs_commit_done(task, calldata);
2461} 2435}
2462 2436
2463static const struct rpc_call_ops nfs4_commit_ops = { 2437static void nfs4_proc_commit_setup(struct nfs_write_data *data, int how)
2464 .rpc_call_done = nfs4_commit_done,
2465 .rpc_release = nfs_commit_release,
2466};
2467
2468static void
2469nfs4_proc_commit_setup(struct nfs_write_data *data, int how)
2470{ 2438{
2471 struct rpc_task *task = &data->task;
2472 struct rpc_message msg = { 2439 struct rpc_message msg = {
2473 .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_COMMIT], 2440 .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_COMMIT],
2474 .rpc_argp = &data->args, 2441 .rpc_argp = &data->args,
2475 .rpc_resp = &data->res, 2442 .rpc_resp = &data->res,
2476 .rpc_cred = data->cred, 2443 .rpc_cred = data->cred,
2477 }; 2444 };
2478 struct inode *inode = data->inode; 2445 struct nfs_server *server = NFS_SERVER(data->inode);
2479 struct nfs_server *server = NFS_SERVER(inode);
2480 int flags;
2481 2446
2482 data->args.bitmask = server->attr_bitmask; 2447 data->args.bitmask = server->attr_bitmask;
2483 data->res.server = server; 2448 data->res.server = server;
2484 2449
2485 /* Set the initial flags for the task. */ 2450 rpc_call_setup(&data->task, &msg, 0);
2486 flags = (how & FLUSH_SYNC) ? 0 : RPC_TASK_ASYNC;
2487
2488 /* Finalize the task. */
2489 rpc_init_task(task, NFS_CLIENT(inode), flags, &nfs4_commit_ops, data);
2490 rpc_call_setup(task, &msg, 0);
2491} 2451}
2492 2452
2493/* 2453/*
@@ -2755,8 +2715,10 @@ nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server)
2755 rpc_wake_up_task(task); 2715 rpc_wake_up_task(task);
2756 task->tk_status = 0; 2716 task->tk_status = 0;
2757 return -EAGAIN; 2717 return -EAGAIN;
2758 case -NFS4ERR_GRACE:
2759 case -NFS4ERR_DELAY: 2718 case -NFS4ERR_DELAY:
2719 nfs_inc_server_stats((struct nfs_server *) server,
2720 NFSIOS_DELAY);
2721 case -NFS4ERR_GRACE:
2760 rpc_delay(task, NFS4_POLL_RETRY_MAX); 2722 rpc_delay(task, NFS4_POLL_RETRY_MAX);
2761 task->tk_status = 0; 2723 task->tk_status = 0;
2762 return -EAGAIN; 2724 return -EAGAIN;
@@ -2893,8 +2855,7 @@ int nfs4_proc_setclientid(struct nfs4_client *clp, u32 program, unsigned short p
2893 return status; 2855 return status;
2894} 2856}
2895 2857
2896int 2858static int _nfs4_proc_setclientid_confirm(struct nfs4_client *clp, struct rpc_cred *cred)
2897nfs4_proc_setclientid_confirm(struct nfs4_client *clp, struct rpc_cred *cred)
2898{ 2859{
2899 struct nfs_fsinfo fsinfo; 2860 struct nfs_fsinfo fsinfo;
2900 struct rpc_message msg = { 2861 struct rpc_message msg = {
@@ -2918,6 +2879,24 @@ nfs4_proc_setclientid_confirm(struct nfs4_client *clp, struct rpc_cred *cred)
2918 return status; 2879 return status;
2919} 2880}
2920 2881
2882int nfs4_proc_setclientid_confirm(struct nfs4_client *clp, struct rpc_cred *cred)
2883{
2884 long timeout;
2885 int err;
2886 do {
2887 err = _nfs4_proc_setclientid_confirm(clp, cred);
2888 switch (err) {
2889 case 0:
2890 return err;
2891 case -NFS4ERR_RESOURCE:
2892 /* The IBM lawyers misread another document! */
2893 case -NFS4ERR_DELAY:
2894 err = nfs4_delay(clp->cl_rpcclient, &timeout);
2895 }
2896 } while (err == 0);
2897 return err;
2898}
2899
2921struct nfs4_delegreturndata { 2900struct nfs4_delegreturndata {
2922 struct nfs4_delegreturnargs args; 2901 struct nfs4_delegreturnargs args;
2923 struct nfs4_delegreturnres res; 2902 struct nfs4_delegreturnres res;
@@ -2958,7 +2937,7 @@ static void nfs4_delegreturn_release(void *calldata)
2958 kfree(calldata); 2937 kfree(calldata);
2959} 2938}
2960 2939
2961const static struct rpc_call_ops nfs4_delegreturn_ops = { 2940static const struct rpc_call_ops nfs4_delegreturn_ops = {
2962 .rpc_call_prepare = nfs4_delegreturn_prepare, 2941 .rpc_call_prepare = nfs4_delegreturn_prepare,
2963 .rpc_call_done = nfs4_delegreturn_done, 2942 .rpc_call_done = nfs4_delegreturn_done,
2964 .rpc_release = nfs4_delegreturn_release, 2943 .rpc_release = nfs4_delegreturn_release,
@@ -2986,10 +2965,8 @@ static int _nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, co
2986 data->rpc_status = 0; 2965 data->rpc_status = 0;
2987 2966
2988 task = rpc_run_task(NFS_CLIENT(inode), RPC_TASK_ASYNC, &nfs4_delegreturn_ops, data); 2967 task = rpc_run_task(NFS_CLIENT(inode), RPC_TASK_ASYNC, &nfs4_delegreturn_ops, data);
2989 if (IS_ERR(task)) { 2968 if (IS_ERR(task))
2990 nfs4_delegreturn_release(data);
2991 return PTR_ERR(task); 2969 return PTR_ERR(task);
2992 }
2993 status = nfs4_wait_for_completion_rpc_task(task); 2970 status = nfs4_wait_for_completion_rpc_task(task);
2994 if (status == 0) { 2971 if (status == 0) {
2995 status = data->rpc_status; 2972 status = data->rpc_status;
@@ -3209,7 +3186,6 @@ static struct rpc_task *nfs4_do_unlck(struct file_lock *fl,
3209 struct nfs_seqid *seqid) 3186 struct nfs_seqid *seqid)
3210{ 3187{
3211 struct nfs4_unlockdata *data; 3188 struct nfs4_unlockdata *data;
3212 struct rpc_task *task;
3213 3189
3214 data = nfs4_alloc_unlockdata(fl, ctx, lsp, seqid); 3190 data = nfs4_alloc_unlockdata(fl, ctx, lsp, seqid);
3215 if (data == NULL) { 3191 if (data == NULL) {
@@ -3219,10 +3195,7 @@ static struct rpc_task *nfs4_do_unlck(struct file_lock *fl,
3219 3195
3220 /* Unlock _before_ we do the RPC call */ 3196 /* Unlock _before_ we do the RPC call */
3221 do_vfs_lock(fl->fl_file, fl); 3197 do_vfs_lock(fl->fl_file, fl);
3222 task = rpc_run_task(NFS_CLIENT(lsp->ls_state->inode), RPC_TASK_ASYNC, &nfs4_locku_ops, data); 3198 return rpc_run_task(NFS_CLIENT(lsp->ls_state->inode), RPC_TASK_ASYNC, &nfs4_locku_ops, data);
3223 if (IS_ERR(task))
3224 nfs4_locku_release_calldata(data);
3225 return task;
3226} 3199}
3227 3200
3228static int nfs4_proc_unlck(struct nfs4_state *state, int cmd, struct file_lock *request) 3201static int nfs4_proc_unlck(struct nfs4_state *state, int cmd, struct file_lock *request)
@@ -3403,10 +3376,8 @@ static int _nfs4_do_setlk(struct nfs4_state *state, int cmd, struct file_lock *f
3403 data->arg.reclaim = 1; 3376 data->arg.reclaim = 1;
3404 task = rpc_run_task(NFS_CLIENT(state->inode), RPC_TASK_ASYNC, 3377 task = rpc_run_task(NFS_CLIENT(state->inode), RPC_TASK_ASYNC,
3405 &nfs4_lock_ops, data); 3378 &nfs4_lock_ops, data);
3406 if (IS_ERR(task)) { 3379 if (IS_ERR(task))
3407 nfs4_lock_release(data);
3408 return PTR_ERR(task); 3380 return PTR_ERR(task);
3409 }
3410 ret = nfs4_wait_for_completion_rpc_task(task); 3381 ret = nfs4_wait_for_completion_rpc_task(task);
3411 if (ret == 0) { 3382 if (ret == 0) {
3412 ret = data->rpc_status; 3383 ret = data->rpc_status;
@@ -3588,6 +3559,8 @@ ssize_t nfs4_listxattr(struct dentry *dentry, char *buf, size_t buflen)
3588{ 3559{
3589 size_t len = strlen(XATTR_NAME_NFSV4_ACL) + 1; 3560 size_t len = strlen(XATTR_NAME_NFSV4_ACL) + 1;
3590 3561
3562 if (!nfs4_server_supports_acls(NFS_SERVER(dentry->d_inode)))
3563 return 0;
3591 if (buf && buflen < len) 3564 if (buf && buflen < len)
3592 return -ERANGE; 3565 return -ERANGE;
3593 if (buf) 3566 if (buf)
@@ -3644,8 +3617,11 @@ struct nfs_rpc_ops nfs_v4_clientops = {
3644 .pathconf = nfs4_proc_pathconf, 3617 .pathconf = nfs4_proc_pathconf,
3645 .decode_dirent = nfs4_decode_dirent, 3618 .decode_dirent = nfs4_decode_dirent,
3646 .read_setup = nfs4_proc_read_setup, 3619 .read_setup = nfs4_proc_read_setup,
3620 .read_done = nfs4_read_done,
3647 .write_setup = nfs4_proc_write_setup, 3621 .write_setup = nfs4_proc_write_setup,
3622 .write_done = nfs4_write_done,
3648 .commit_setup = nfs4_proc_commit_setup, 3623 .commit_setup = nfs4_proc_commit_setup,
3624 .commit_done = nfs4_commit_done,
3649 .file_open = nfs_open, 3625 .file_open = nfs_open,
3650 .file_release = nfs_release, 3626 .file_release = nfs_release,
3651 .lock = nfs4_proc_lock, 3627 .lock = nfs4_proc_lock,
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index afad0255e7db..96e5b82c153b 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -977,6 +977,7 @@ out:
977out_error: 977out_error:
978 printk(KERN_WARNING "Error: state recovery failed on NFSv4 server %u.%u.%u.%u with error %d\n", 978 printk(KERN_WARNING "Error: state recovery failed on NFSv4 server %u.%u.%u.%u with error %d\n",
979 NIPQUAD(clp->cl_addr.s_addr), -status); 979 NIPQUAD(clp->cl_addr.s_addr), -status);
980 set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state);
980 goto out; 981 goto out;
981} 982}
982 983
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 0a1bd36a4837..7c5d70efe720 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -4344,6 +4344,8 @@ nfs_stat_to_errno(int stat)
4344 .p_encode = (kxdrproc_t) nfs4_xdr_##argtype, \ 4344 .p_encode = (kxdrproc_t) nfs4_xdr_##argtype, \
4345 .p_decode = (kxdrproc_t) nfs4_xdr_##restype, \ 4345 .p_decode = (kxdrproc_t) nfs4_xdr_##restype, \
4346 .p_bufsiz = MAX(NFS4_##argtype##_sz,NFS4_##restype##_sz) << 2, \ 4346 .p_bufsiz = MAX(NFS4_##argtype##_sz,NFS4_##restype##_sz) << 2, \
4347 .p_statidx = NFSPROC4_CLNT_##proc, \
4348 .p_name = #proc, \
4347 } 4349 }
4348 4350
4349struct rpc_procinfo nfs4_procedures[] = { 4351struct rpc_procinfo nfs4_procedures[] = {
diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index d53857b148e2..106aca388ebc 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -85,6 +85,9 @@ nfs_create_request(struct nfs_open_context *ctx, struct inode *inode,
85 atomic_set(&req->wb_complete, 0); 85 atomic_set(&req->wb_complete, 0);
86 req->wb_index = page->index; 86 req->wb_index = page->index;
87 page_cache_get(page); 87 page_cache_get(page);
88 BUG_ON(PagePrivate(page));
89 BUG_ON(!PageLocked(page));
90 BUG_ON(page->mapping->host != inode);
88 req->wb_offset = offset; 91 req->wb_offset = offset;
89 req->wb_pgbase = offset; 92 req->wb_pgbase = offset;
90 req->wb_bytes = count; 93 req->wb_bytes = count;
@@ -132,9 +135,11 @@ void nfs_clear_page_writeback(struct nfs_page *req)
132{ 135{
133 struct nfs_inode *nfsi = NFS_I(req->wb_context->dentry->d_inode); 136 struct nfs_inode *nfsi = NFS_I(req->wb_context->dentry->d_inode);
134 137
135 spin_lock(&nfsi->req_lock); 138 if (req->wb_page != NULL) {
136 radix_tree_tag_clear(&nfsi->nfs_page_tree, req->wb_index, NFS_PAGE_TAG_WRITEBACK); 139 spin_lock(&nfsi->req_lock);
137 spin_unlock(&nfsi->req_lock); 140 radix_tree_tag_clear(&nfsi->nfs_page_tree, req->wb_index, NFS_PAGE_TAG_WRITEBACK);
141 spin_unlock(&nfsi->req_lock);
142 }
138 nfs_unlock_request(req); 143 nfs_unlock_request(req);
139} 144}
140 145
@@ -147,8 +152,9 @@ void nfs_clear_page_writeback(struct nfs_page *req)
147 */ 152 */
148void nfs_clear_request(struct nfs_page *req) 153void nfs_clear_request(struct nfs_page *req)
149{ 154{
150 if (req->wb_page) { 155 struct page *page = req->wb_page;
151 page_cache_release(req->wb_page); 156 if (page != NULL) {
157 page_cache_release(page);
152 req->wb_page = NULL; 158 req->wb_page = NULL;
153 } 159 }
154} 160}
diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c
index f5150d71c03d..9dd85cac2df0 100644
--- a/fs/nfs/proc.c
+++ b/fs/nfs/proc.c
@@ -58,16 +58,23 @@ nfs_proc_get_root(struct nfs_server *server, struct nfs_fh *fhandle,
58{ 58{
59 struct nfs_fattr *fattr = info->fattr; 59 struct nfs_fattr *fattr = info->fattr;
60 struct nfs2_fsstat fsinfo; 60 struct nfs2_fsstat fsinfo;
61 struct rpc_message msg = {
62 .rpc_proc = &nfs_procedures[NFSPROC_GETATTR],
63 .rpc_argp = fhandle,
64 .rpc_resp = fattr,
65 };
61 int status; 66 int status;
62 67
63 dprintk("%s: call getattr\n", __FUNCTION__); 68 dprintk("%s: call getattr\n", __FUNCTION__);
64 nfs_fattr_init(fattr); 69 nfs_fattr_init(fattr);
65 status = rpc_call(server->client_sys, NFSPROC_GETATTR, fhandle, fattr, 0); 70 status = rpc_call_sync(server->client_sys, &msg, 0);
66 dprintk("%s: reply getattr: %d\n", __FUNCTION__, status); 71 dprintk("%s: reply getattr: %d\n", __FUNCTION__, status);
67 if (status) 72 if (status)
68 return status; 73 return status;
69 dprintk("%s: call statfs\n", __FUNCTION__); 74 dprintk("%s: call statfs\n", __FUNCTION__);
70 status = rpc_call(server->client_sys, NFSPROC_STATFS, fhandle, &fsinfo, 0); 75 msg.rpc_proc = &nfs_procedures[NFSPROC_STATFS];
76 msg.rpc_resp = &fsinfo;
77 status = rpc_call_sync(server->client_sys, &msg, 0);
71 dprintk("%s: reply statfs: %d\n", __FUNCTION__, status); 78 dprintk("%s: reply statfs: %d\n", __FUNCTION__, status);
72 if (status) 79 if (status)
73 return status; 80 return status;
@@ -90,12 +97,16 @@ static int
90nfs_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle, 97nfs_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle,
91 struct nfs_fattr *fattr) 98 struct nfs_fattr *fattr)
92{ 99{
100 struct rpc_message msg = {
101 .rpc_proc = &nfs_procedures[NFSPROC_GETATTR],
102 .rpc_argp = fhandle,
103 .rpc_resp = fattr,
104 };
93 int status; 105 int status;
94 106
95 dprintk("NFS call getattr\n"); 107 dprintk("NFS call getattr\n");
96 nfs_fattr_init(fattr); 108 nfs_fattr_init(fattr);
97 status = rpc_call(server->client, NFSPROC_GETATTR, 109 status = rpc_call_sync(server->client, &msg, 0);
98 fhandle, fattr, 0);
99 dprintk("NFS reply getattr: %d\n", status); 110 dprintk("NFS reply getattr: %d\n", status);
100 return status; 111 return status;
101} 112}
@@ -109,6 +120,11 @@ nfs_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr,
109 .fh = NFS_FH(inode), 120 .fh = NFS_FH(inode),
110 .sattr = sattr 121 .sattr = sattr
111 }; 122 };
123 struct rpc_message msg = {
124 .rpc_proc = &nfs_procedures[NFSPROC_SETATTR],
125 .rpc_argp = &arg,
126 .rpc_resp = fattr,
127 };
112 int status; 128 int status;
113 129
114 /* Mask out the non-modebit related stuff from attr->ia_mode */ 130 /* Mask out the non-modebit related stuff from attr->ia_mode */
@@ -116,7 +132,7 @@ nfs_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr,
116 132
117 dprintk("NFS call setattr\n"); 133 dprintk("NFS call setattr\n");
118 nfs_fattr_init(fattr); 134 nfs_fattr_init(fattr);
119 status = rpc_call(NFS_CLIENT(inode), NFSPROC_SETATTR, &arg, fattr, 0); 135 status = rpc_call_sync(NFS_CLIENT(inode), &msg, 0);
120 if (status == 0) 136 if (status == 0)
121 nfs_setattr_update_inode(inode, sattr); 137 nfs_setattr_update_inode(inode, sattr);
122 dprintk("NFS reply setattr: %d\n", status); 138 dprintk("NFS reply setattr: %d\n", status);
@@ -136,11 +152,16 @@ nfs_proc_lookup(struct inode *dir, struct qstr *name,
136 .fh = fhandle, 152 .fh = fhandle,
137 .fattr = fattr 153 .fattr = fattr
138 }; 154 };
155 struct rpc_message msg = {
156 .rpc_proc = &nfs_procedures[NFSPROC_LOOKUP],
157 .rpc_argp = &arg,
158 .rpc_resp = &res,
159 };
139 int status; 160 int status;
140 161
141 dprintk("NFS call lookup %s\n", name->name); 162 dprintk("NFS call lookup %s\n", name->name);
142 nfs_fattr_init(fattr); 163 nfs_fattr_init(fattr);
143 status = rpc_call(NFS_CLIENT(dir), NFSPROC_LOOKUP, &arg, &res, 0); 164 status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
144 dprintk("NFS reply lookup: %d\n", status); 165 dprintk("NFS reply lookup: %d\n", status);
145 return status; 166 return status;
146} 167}
@@ -154,10 +175,14 @@ static int nfs_proc_readlink(struct inode *inode, struct page *page,
154 .pglen = pglen, 175 .pglen = pglen,
155 .pages = &page 176 .pages = &page
156 }; 177 };
178 struct rpc_message msg = {
179 .rpc_proc = &nfs_procedures[NFSPROC_READLINK],
180 .rpc_argp = &args,
181 };
157 int status; 182 int status;
158 183
159 dprintk("NFS call readlink\n"); 184 dprintk("NFS call readlink\n");
160 status = rpc_call(NFS_CLIENT(inode), NFSPROC_READLINK, &args, NULL, 0); 185 status = rpc_call_sync(NFS_CLIENT(inode), &msg, 0);
161 dprintk("NFS reply readlink: %d\n", status); 186 dprintk("NFS reply readlink: %d\n", status);
162 return status; 187 return status;
163} 188}
@@ -233,11 +258,16 @@ nfs_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
233 .fh = &fhandle, 258 .fh = &fhandle,
234 .fattr = &fattr 259 .fattr = &fattr
235 }; 260 };
261 struct rpc_message msg = {
262 .rpc_proc = &nfs_procedures[NFSPROC_CREATE],
263 .rpc_argp = &arg,
264 .rpc_resp = &res,
265 };
236 int status; 266 int status;
237 267
238 nfs_fattr_init(&fattr); 268 nfs_fattr_init(&fattr);
239 dprintk("NFS call create %s\n", dentry->d_name.name); 269 dprintk("NFS call create %s\n", dentry->d_name.name);
240 status = rpc_call(NFS_CLIENT(dir), NFSPROC_CREATE, &arg, &res, 0); 270 status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
241 if (status == 0) 271 if (status == 0)
242 status = nfs_instantiate(dentry, &fhandle, &fattr); 272 status = nfs_instantiate(dentry, &fhandle, &fattr);
243 dprintk("NFS reply create: %d\n", status); 273 dprintk("NFS reply create: %d\n", status);
@@ -263,6 +293,11 @@ nfs_proc_mknod(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
263 .fh = &fhandle, 293 .fh = &fhandle,
264 .fattr = &fattr 294 .fattr = &fattr
265 }; 295 };
296 struct rpc_message msg = {
297 .rpc_proc = &nfs_procedures[NFSPROC_CREATE],
298 .rpc_argp = &arg,
299 .rpc_resp = &res,
300 };
266 int status, mode; 301 int status, mode;
267 302
268 dprintk("NFS call mknod %s\n", dentry->d_name.name); 303 dprintk("NFS call mknod %s\n", dentry->d_name.name);
@@ -277,13 +312,13 @@ nfs_proc_mknod(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
277 } 312 }
278 313
279 nfs_fattr_init(&fattr); 314 nfs_fattr_init(&fattr);
280 status = rpc_call(NFS_CLIENT(dir), NFSPROC_CREATE, &arg, &res, 0); 315 status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
281 nfs_mark_for_revalidate(dir); 316 nfs_mark_for_revalidate(dir);
282 317
283 if (status == -EINVAL && S_ISFIFO(mode)) { 318 if (status == -EINVAL && S_ISFIFO(mode)) {
284 sattr->ia_mode = mode; 319 sattr->ia_mode = mode;
285 nfs_fattr_init(&fattr); 320 nfs_fattr_init(&fattr);
286 status = rpc_call(NFS_CLIENT(dir), NFSPROC_CREATE, &arg, &res, 0); 321 status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
287 } 322 }
288 if (status == 0) 323 if (status == 0)
289 status = nfs_instantiate(dentry, &fhandle, &fattr); 324 status = nfs_instantiate(dentry, &fhandle, &fattr);
@@ -302,8 +337,6 @@ nfs_proc_remove(struct inode *dir, struct qstr *name)
302 struct rpc_message msg = { 337 struct rpc_message msg = {
303 .rpc_proc = &nfs_procedures[NFSPROC_REMOVE], 338 .rpc_proc = &nfs_procedures[NFSPROC_REMOVE],
304 .rpc_argp = &arg, 339 .rpc_argp = &arg,
305 .rpc_resp = NULL,
306 .rpc_cred = NULL
307 }; 340 };
308 int status; 341 int status;
309 342
@@ -355,10 +388,14 @@ nfs_proc_rename(struct inode *old_dir, struct qstr *old_name,
355 .toname = new_name->name, 388 .toname = new_name->name,
356 .tolen = new_name->len 389 .tolen = new_name->len
357 }; 390 };
391 struct rpc_message msg = {
392 .rpc_proc = &nfs_procedures[NFSPROC_RENAME],
393 .rpc_argp = &arg,
394 };
358 int status; 395 int status;
359 396
360 dprintk("NFS call rename %s -> %s\n", old_name->name, new_name->name); 397 dprintk("NFS call rename %s -> %s\n", old_name->name, new_name->name);
361 status = rpc_call(NFS_CLIENT(old_dir), NFSPROC_RENAME, &arg, NULL, 0); 398 status = rpc_call_sync(NFS_CLIENT(old_dir), &msg, 0);
362 nfs_mark_for_revalidate(old_dir); 399 nfs_mark_for_revalidate(old_dir);
363 nfs_mark_for_revalidate(new_dir); 400 nfs_mark_for_revalidate(new_dir);
364 dprintk("NFS reply rename: %d\n", status); 401 dprintk("NFS reply rename: %d\n", status);
@@ -374,10 +411,14 @@ nfs_proc_link(struct inode *inode, struct inode *dir, struct qstr *name)
374 .toname = name->name, 411 .toname = name->name,
375 .tolen = name->len 412 .tolen = name->len
376 }; 413 };
414 struct rpc_message msg = {
415 .rpc_proc = &nfs_procedures[NFSPROC_LINK],
416 .rpc_argp = &arg,
417 };
377 int status; 418 int status;
378 419
379 dprintk("NFS call link %s\n", name->name); 420 dprintk("NFS call link %s\n", name->name);
380 status = rpc_call(NFS_CLIENT(inode), NFSPROC_LINK, &arg, NULL, 0); 421 status = rpc_call_sync(NFS_CLIENT(inode), &msg, 0);
381 nfs_mark_for_revalidate(inode); 422 nfs_mark_for_revalidate(inode);
382 nfs_mark_for_revalidate(dir); 423 nfs_mark_for_revalidate(dir);
383 dprintk("NFS reply link: %d\n", status); 424 dprintk("NFS reply link: %d\n", status);
@@ -397,6 +438,10 @@ nfs_proc_symlink(struct inode *dir, struct qstr *name, struct qstr *path,
397 .tolen = path->len, 438 .tolen = path->len,
398 .sattr = sattr 439 .sattr = sattr
399 }; 440 };
441 struct rpc_message msg = {
442 .rpc_proc = &nfs_procedures[NFSPROC_SYMLINK],
443 .rpc_argp = &arg,
444 };
400 int status; 445 int status;
401 446
402 if (path->len > NFS2_MAXPATHLEN) 447 if (path->len > NFS2_MAXPATHLEN)
@@ -404,7 +449,7 @@ nfs_proc_symlink(struct inode *dir, struct qstr *name, struct qstr *path,
404 dprintk("NFS call symlink %s -> %s\n", name->name, path->name); 449 dprintk("NFS call symlink %s -> %s\n", name->name, path->name);
405 nfs_fattr_init(fattr); 450 nfs_fattr_init(fattr);
406 fhandle->size = 0; 451 fhandle->size = 0;
407 status = rpc_call(NFS_CLIENT(dir), NFSPROC_SYMLINK, &arg, NULL, 0); 452 status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
408 nfs_mark_for_revalidate(dir); 453 nfs_mark_for_revalidate(dir);
409 dprintk("NFS reply symlink: %d\n", status); 454 dprintk("NFS reply symlink: %d\n", status);
410 return status; 455 return status;
@@ -425,11 +470,16 @@ nfs_proc_mkdir(struct inode *dir, struct dentry *dentry, struct iattr *sattr)
425 .fh = &fhandle, 470 .fh = &fhandle,
426 .fattr = &fattr 471 .fattr = &fattr
427 }; 472 };
473 struct rpc_message msg = {
474 .rpc_proc = &nfs_procedures[NFSPROC_MKDIR],
475 .rpc_argp = &arg,
476 .rpc_resp = &res,
477 };
428 int status; 478 int status;
429 479
430 dprintk("NFS call mkdir %s\n", dentry->d_name.name); 480 dprintk("NFS call mkdir %s\n", dentry->d_name.name);
431 nfs_fattr_init(&fattr); 481 nfs_fattr_init(&fattr);
432 status = rpc_call(NFS_CLIENT(dir), NFSPROC_MKDIR, &arg, &res, 0); 482 status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
433 nfs_mark_for_revalidate(dir); 483 nfs_mark_for_revalidate(dir);
434 if (status == 0) 484 if (status == 0)
435 status = nfs_instantiate(dentry, &fhandle, &fattr); 485 status = nfs_instantiate(dentry, &fhandle, &fattr);
@@ -445,10 +495,14 @@ nfs_proc_rmdir(struct inode *dir, struct qstr *name)
445 .name = name->name, 495 .name = name->name,
446 .len = name->len 496 .len = name->len
447 }; 497 };
498 struct rpc_message msg = {
499 .rpc_proc = &nfs_procedures[NFSPROC_RMDIR],
500 .rpc_argp = &arg,
501 };
448 int status; 502 int status;
449 503
450 dprintk("NFS call rmdir %s\n", name->name); 504 dprintk("NFS call rmdir %s\n", name->name);
451 status = rpc_call(NFS_CLIENT(dir), NFSPROC_RMDIR, &arg, NULL, 0); 505 status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
452 nfs_mark_for_revalidate(dir); 506 nfs_mark_for_revalidate(dir);
453 dprintk("NFS reply rmdir: %d\n", status); 507 dprintk("NFS reply rmdir: %d\n", status);
454 return status; 508 return status;
@@ -470,13 +524,12 @@ nfs_proc_readdir(struct dentry *dentry, struct rpc_cred *cred,
470 .fh = NFS_FH(dir), 524 .fh = NFS_FH(dir),
471 .cookie = cookie, 525 .cookie = cookie,
472 .count = count, 526 .count = count,
473 .pages = &page 527 .pages = &page,
474 }; 528 };
475 struct rpc_message msg = { 529 struct rpc_message msg = {
476 .rpc_proc = &nfs_procedures[NFSPROC_READDIR], 530 .rpc_proc = &nfs_procedures[NFSPROC_READDIR],
477 .rpc_argp = &arg, 531 .rpc_argp = &arg,
478 .rpc_resp = NULL, 532 .rpc_cred = cred,
479 .rpc_cred = cred
480 }; 533 };
481 int status; 534 int status;
482 535
@@ -495,11 +548,16 @@ nfs_proc_statfs(struct nfs_server *server, struct nfs_fh *fhandle,
495 struct nfs_fsstat *stat) 548 struct nfs_fsstat *stat)
496{ 549{
497 struct nfs2_fsstat fsinfo; 550 struct nfs2_fsstat fsinfo;
551 struct rpc_message msg = {
552 .rpc_proc = &nfs_procedures[NFSPROC_STATFS],
553 .rpc_argp = fhandle,
554 .rpc_resp = &fsinfo,
555 };
498 int status; 556 int status;
499 557
500 dprintk("NFS call statfs\n"); 558 dprintk("NFS call statfs\n");
501 nfs_fattr_init(stat->fattr); 559 nfs_fattr_init(stat->fattr);
502 status = rpc_call(server->client, NFSPROC_STATFS, fhandle, &fsinfo, 0); 560 status = rpc_call_sync(server->client, &msg, 0);
503 dprintk("NFS reply statfs: %d\n", status); 561 dprintk("NFS reply statfs: %d\n", status);
504 if (status) 562 if (status)
505 goto out; 563 goto out;
@@ -518,11 +576,16 @@ nfs_proc_fsinfo(struct nfs_server *server, struct nfs_fh *fhandle,
518 struct nfs_fsinfo *info) 576 struct nfs_fsinfo *info)
519{ 577{
520 struct nfs2_fsstat fsinfo; 578 struct nfs2_fsstat fsinfo;
579 struct rpc_message msg = {
580 .rpc_proc = &nfs_procedures[NFSPROC_STATFS],
581 .rpc_argp = fhandle,
582 .rpc_resp = &fsinfo,
583 };
521 int status; 584 int status;
522 585
523 dprintk("NFS call fsinfo\n"); 586 dprintk("NFS call fsinfo\n");
524 nfs_fattr_init(info->fattr); 587 nfs_fattr_init(info->fattr);
525 status = rpc_call(server->client, NFSPROC_STATFS, fhandle, &fsinfo, 0); 588 status = rpc_call_sync(server->client, &msg, 0);
526 dprintk("NFS reply fsinfo: %d\n", status); 589 dprintk("NFS reply fsinfo: %d\n", status);
527 if (status) 590 if (status)
528 goto out; 591 goto out;
@@ -550,10 +613,8 @@ nfs_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle,
550 613
551extern u32 * nfs_decode_dirent(u32 *, struct nfs_entry *, int); 614extern u32 * nfs_decode_dirent(u32 *, struct nfs_entry *, int);
552 615
553static void nfs_read_done(struct rpc_task *task, void *calldata) 616static int nfs_read_done(struct rpc_task *task, struct nfs_read_data *data)
554{ 617{
555 struct nfs_read_data *data = calldata;
556
557 if (task->tk_status >= 0) { 618 if (task->tk_status >= 0) {
558 nfs_refresh_inode(data->inode, data->res.fattr); 619 nfs_refresh_inode(data->inode, data->res.fattr);
559 /* Emulate the eof flag, which isn't normally needed in NFSv2 620 /* Emulate the eof flag, which isn't normally needed in NFSv2
@@ -562,20 +623,11 @@ static void nfs_read_done(struct rpc_task *task, void *calldata)
562 if (data->args.offset + data->args.count >= data->res.fattr->size) 623 if (data->args.offset + data->args.count >= data->res.fattr->size)
563 data->res.eof = 1; 624 data->res.eof = 1;
564 } 625 }
565 nfs_readpage_result(task, calldata); 626 return 0;
566} 627}
567 628
568static const struct rpc_call_ops nfs_read_ops = { 629static void nfs_proc_read_setup(struct nfs_read_data *data)
569 .rpc_call_done = nfs_read_done,
570 .rpc_release = nfs_readdata_release,
571};
572
573static void
574nfs_proc_read_setup(struct nfs_read_data *data)
575{ 630{
576 struct rpc_task *task = &data->task;
577 struct inode *inode = data->inode;
578 int flags;
579 struct rpc_message msg = { 631 struct rpc_message msg = {
580 .rpc_proc = &nfs_procedures[NFSPROC_READ], 632 .rpc_proc = &nfs_procedures[NFSPROC_READ],
581 .rpc_argp = &data->args, 633 .rpc_argp = &data->args,
@@ -583,34 +635,18 @@ nfs_proc_read_setup(struct nfs_read_data *data)
583 .rpc_cred = data->cred, 635 .rpc_cred = data->cred,
584 }; 636 };
585 637
586 /* N.B. Do we need to test? Never called for swapfile inode */ 638 rpc_call_setup(&data->task, &msg, 0);
587 flags = RPC_TASK_ASYNC | (IS_SWAPFILE(inode)? NFS_RPC_SWAPFLAGS : 0);
588
589 /* Finalize the task. */
590 rpc_init_task(task, NFS_CLIENT(inode), flags, &nfs_read_ops, data);
591 rpc_call_setup(task, &msg, 0);
592} 639}
593 640
594static void nfs_write_done(struct rpc_task *task, void *calldata) 641static int nfs_write_done(struct rpc_task *task, struct nfs_write_data *data)
595{ 642{
596 struct nfs_write_data *data = calldata;
597
598 if (task->tk_status >= 0) 643 if (task->tk_status >= 0)
599 nfs_post_op_update_inode(data->inode, data->res.fattr); 644 nfs_post_op_update_inode(data->inode, data->res.fattr);
600 nfs_writeback_done(task, calldata); 645 return 0;
601} 646}
602 647
603static const struct rpc_call_ops nfs_write_ops = { 648static void nfs_proc_write_setup(struct nfs_write_data *data, int how)
604 .rpc_call_done = nfs_write_done,
605 .rpc_release = nfs_writedata_release,
606};
607
608static void
609nfs_proc_write_setup(struct nfs_write_data *data, int how)
610{ 649{
611 struct rpc_task *task = &data->task;
612 struct inode *inode = data->inode;
613 int flags;
614 struct rpc_message msg = { 650 struct rpc_message msg = {
615 .rpc_proc = &nfs_procedures[NFSPROC_WRITE], 651 .rpc_proc = &nfs_procedures[NFSPROC_WRITE],
616 .rpc_argp = &data->args, 652 .rpc_argp = &data->args,
@@ -621,12 +657,8 @@ nfs_proc_write_setup(struct nfs_write_data *data, int how)
621 /* Note: NFSv2 ignores @stable and always uses NFS_FILE_SYNC */ 657 /* Note: NFSv2 ignores @stable and always uses NFS_FILE_SYNC */
622 data->args.stable = NFS_FILE_SYNC; 658 data->args.stable = NFS_FILE_SYNC;
623 659
624 /* Set the initial flags for the task. */
625 flags = (how & FLUSH_SYNC) ? 0 : RPC_TASK_ASYNC;
626
627 /* Finalize the task. */ 660 /* Finalize the task. */
628 rpc_init_task(task, NFS_CLIENT(inode), flags, &nfs_write_ops, data); 661 rpc_call_setup(&data->task, &msg, 0);
629 rpc_call_setup(task, &msg, 0);
630} 662}
631 663
632static void 664static void
@@ -672,7 +704,9 @@ struct nfs_rpc_ops nfs_v2_clientops = {
672 .pathconf = nfs_proc_pathconf, 704 .pathconf = nfs_proc_pathconf,
673 .decode_dirent = nfs_decode_dirent, 705 .decode_dirent = nfs_decode_dirent,
674 .read_setup = nfs_proc_read_setup, 706 .read_setup = nfs_proc_read_setup,
707 .read_done = nfs_read_done,
675 .write_setup = nfs_proc_write_setup, 708 .write_setup = nfs_proc_write_setup,
709 .write_done = nfs_write_done,
676 .commit_setup = nfs_proc_commit_setup, 710 .commit_setup = nfs_proc_commit_setup,
677 .file_open = nfs_open, 711 .file_open = nfs_open,
678 .file_release = nfs_release, 712 .file_release = nfs_release,
diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index 05eb43fadf8e..3961524fd4ab 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -31,17 +31,49 @@
31 31
32#include <asm/system.h> 32#include <asm/system.h>
33 33
34#include "iostat.h"
35
34#define NFSDBG_FACILITY NFSDBG_PAGECACHE 36#define NFSDBG_FACILITY NFSDBG_PAGECACHE
35 37
36static int nfs_pagein_one(struct list_head *, struct inode *); 38static int nfs_pagein_one(struct list_head *, struct inode *);
37static void nfs_readpage_result_partial(struct nfs_read_data *, int); 39static const struct rpc_call_ops nfs_read_partial_ops;
38static void nfs_readpage_result_full(struct nfs_read_data *, int); 40static const struct rpc_call_ops nfs_read_full_ops;
39 41
40static kmem_cache_t *nfs_rdata_cachep; 42static kmem_cache_t *nfs_rdata_cachep;
41mempool_t *nfs_rdata_mempool; 43static mempool_t *nfs_rdata_mempool;
42 44
43#define MIN_POOL_READ (32) 45#define MIN_POOL_READ (32)
44 46
47struct nfs_read_data *nfs_readdata_alloc(unsigned int pagecount)
48{
49 struct nfs_read_data *p = mempool_alloc(nfs_rdata_mempool, SLAB_NOFS);
50
51 if (p) {
52 memset(p, 0, sizeof(*p));
53 INIT_LIST_HEAD(&p->pages);
54 if (pagecount < NFS_PAGEVEC_SIZE)
55 p->pagevec = &p->page_array[0];
56 else {
57 size_t size = ++pagecount * sizeof(struct page *);
58 p->pagevec = kmalloc(size, GFP_NOFS);
59 if (p->pagevec) {
60 memset(p->pagevec, 0, size);
61 } else {
62 mempool_free(p, nfs_rdata_mempool);
63 p = NULL;
64 }
65 }
66 }
67 return p;
68}
69
70void nfs_readdata_free(struct nfs_read_data *p)
71{
72 if (p && (p->pagevec != &p->page_array[0]))
73 kfree(p->pagevec);
74 mempool_free(p, nfs_rdata_mempool);
75}
76
45void nfs_readdata_release(void *data) 77void nfs_readdata_release(void *data)
46{ 78{
47 nfs_readdata_free(data); 79 nfs_readdata_free(data);
@@ -133,6 +165,8 @@ static int nfs_readpage_sync(struct nfs_open_context *ctx, struct inode *inode,
133 } 165 }
134 count -= result; 166 count -= result;
135 rdata->args.pgbase += result; 167 rdata->args.pgbase += result;
168 nfs_add_stats(inode, NFSIOS_SERVERREADBYTES, result);
169
136 /* Note: result == 0 should only happen if we're caching 170 /* Note: result == 0 should only happen if we're caching
137 * a write that extends the file and punches a hole. 171 * a write that extends the file and punches a hole.
138 */ 172 */
@@ -196,9 +230,11 @@ static void nfs_readpage_release(struct nfs_page *req)
196 * Set up the NFS read request struct 230 * Set up the NFS read request struct
197 */ 231 */
198static void nfs_read_rpcsetup(struct nfs_page *req, struct nfs_read_data *data, 232static void nfs_read_rpcsetup(struct nfs_page *req, struct nfs_read_data *data,
233 const struct rpc_call_ops *call_ops,
199 unsigned int count, unsigned int offset) 234 unsigned int count, unsigned int offset)
200{ 235{
201 struct inode *inode; 236 struct inode *inode;
237 int flags;
202 238
203 data->req = req; 239 data->req = req;
204 data->inode = inode = req->wb_context->dentry->d_inode; 240 data->inode = inode = req->wb_context->dentry->d_inode;
@@ -216,6 +252,9 @@ static void nfs_read_rpcsetup(struct nfs_page *req, struct nfs_read_data *data,
216 data->res.eof = 0; 252 data->res.eof = 0;
217 nfs_fattr_init(&data->fattr); 253 nfs_fattr_init(&data->fattr);
218 254
255 /* Set up the initial task struct. */
256 flags = RPC_TASK_ASYNC | (IS_SWAPFILE(inode)? NFS_RPC_SWAPFLAGS : 0);
257 rpc_init_task(&data->task, NFS_CLIENT(inode), flags, call_ops, data);
219 NFS_PROTO(inode)->read_setup(data); 258 NFS_PROTO(inode)->read_setup(data);
220 259
221 data->task.tk_cookie = (unsigned long)inode; 260 data->task.tk_cookie = (unsigned long)inode;
@@ -303,14 +342,15 @@ static int nfs_pagein_multi(struct list_head *head, struct inode *inode)
303 list_del_init(&data->pages); 342 list_del_init(&data->pages);
304 343
305 data->pagevec[0] = page; 344 data->pagevec[0] = page;
306 data->complete = nfs_readpage_result_partial;
307 345
308 if (nbytes > rsize) { 346 if (nbytes > rsize) {
309 nfs_read_rpcsetup(req, data, rsize, offset); 347 nfs_read_rpcsetup(req, data, &nfs_read_partial_ops,
348 rsize, offset);
310 offset += rsize; 349 offset += rsize;
311 nbytes -= rsize; 350 nbytes -= rsize;
312 } else { 351 } else {
313 nfs_read_rpcsetup(req, data, nbytes, offset); 352 nfs_read_rpcsetup(req, data, &nfs_read_partial_ops,
353 nbytes, offset);
314 nbytes = 0; 354 nbytes = 0;
315 } 355 }
316 nfs_execute_read(data); 356 nfs_execute_read(data);
@@ -356,8 +396,7 @@ static int nfs_pagein_one(struct list_head *head, struct inode *inode)
356 } 396 }
357 req = nfs_list_entry(data->pages.next); 397 req = nfs_list_entry(data->pages.next);
358 398
359 data->complete = nfs_readpage_result_full; 399 nfs_read_rpcsetup(req, data, &nfs_read_full_ops, count, 0);
360 nfs_read_rpcsetup(req, data, count, 0);
361 400
362 nfs_execute_read(data); 401 nfs_execute_read(data);
363 return 0; 402 return 0;
@@ -391,12 +430,15 @@ nfs_pagein_list(struct list_head *head, int rpages)
391/* 430/*
392 * Handle a read reply that fills part of a page. 431 * Handle a read reply that fills part of a page.
393 */ 432 */
394static void nfs_readpage_result_partial(struct nfs_read_data *data, int status) 433static void nfs_readpage_result_partial(struct rpc_task *task, void *calldata)
395{ 434{
435 struct nfs_read_data *data = calldata;
396 struct nfs_page *req = data->req; 436 struct nfs_page *req = data->req;
397 struct page *page = req->wb_page; 437 struct page *page = req->wb_page;
398 438
399 if (status >= 0) { 439 if (nfs_readpage_result(task, data) != 0)
440 return;
441 if (task->tk_status >= 0) {
400 unsigned int request = data->args.count; 442 unsigned int request = data->args.count;
401 unsigned int result = data->res.count; 443 unsigned int result = data->res.count;
402 444
@@ -415,20 +457,28 @@ static void nfs_readpage_result_partial(struct nfs_read_data *data, int status)
415 } 457 }
416} 458}
417 459
460static const struct rpc_call_ops nfs_read_partial_ops = {
461 .rpc_call_done = nfs_readpage_result_partial,
462 .rpc_release = nfs_readdata_release,
463};
464
418/* 465/*
419 * This is the callback from RPC telling us whether a reply was 466 * This is the callback from RPC telling us whether a reply was
420 * received or some error occurred (timeout or socket shutdown). 467 * received or some error occurred (timeout or socket shutdown).
421 */ 468 */
422static void nfs_readpage_result_full(struct nfs_read_data *data, int status) 469static void nfs_readpage_result_full(struct rpc_task *task, void *calldata)
423{ 470{
471 struct nfs_read_data *data = calldata;
424 unsigned int count = data->res.count; 472 unsigned int count = data->res.count;
425 473
474 if (nfs_readpage_result(task, data) != 0)
475 return;
426 while (!list_empty(&data->pages)) { 476 while (!list_empty(&data->pages)) {
427 struct nfs_page *req = nfs_list_entry(data->pages.next); 477 struct nfs_page *req = nfs_list_entry(data->pages.next);
428 struct page *page = req->wb_page; 478 struct page *page = req->wb_page;
429 nfs_list_remove_request(req); 479 nfs_list_remove_request(req);
430 480
431 if (status >= 0) { 481 if (task->tk_status >= 0) {
432 if (count < PAGE_CACHE_SIZE) { 482 if (count < PAGE_CACHE_SIZE) {
433 if (count < req->wb_bytes) 483 if (count < req->wb_bytes)
434 memclear_highpage_flush(page, 484 memclear_highpage_flush(page,
@@ -444,22 +494,33 @@ static void nfs_readpage_result_full(struct nfs_read_data *data, int status)
444 } 494 }
445} 495}
446 496
497static const struct rpc_call_ops nfs_read_full_ops = {
498 .rpc_call_done = nfs_readpage_result_full,
499 .rpc_release = nfs_readdata_release,
500};
501
447/* 502/*
448 * This is the callback from RPC telling us whether a reply was 503 * This is the callback from RPC telling us whether a reply was
449 * received or some error occurred (timeout or socket shutdown). 504 * received or some error occurred (timeout or socket shutdown).
450 */ 505 */
451void nfs_readpage_result(struct rpc_task *task, void *calldata) 506int nfs_readpage_result(struct rpc_task *task, struct nfs_read_data *data)
452{ 507{
453 struct nfs_read_data *data = calldata;
454 struct nfs_readargs *argp = &data->args; 508 struct nfs_readargs *argp = &data->args;
455 struct nfs_readres *resp = &data->res; 509 struct nfs_readres *resp = &data->res;
456 int status = task->tk_status; 510 int status;
457 511
458 dprintk("NFS: %4d nfs_readpage_result, (status %d)\n", 512 dprintk("NFS: %4d nfs_readpage_result, (status %d)\n",
459 task->tk_pid, status); 513 task->tk_pid, task->tk_status);
514
515 status = NFS_PROTO(data->inode)->read_done(task, data);
516 if (status != 0)
517 return status;
518
519 nfs_add_stats(data->inode, NFSIOS_SERVERREADBYTES, resp->count);
460 520
461 /* Is this a short read? */ 521 /* Is this a short read? */
462 if (task->tk_status >= 0 && resp->count < argp->count && !resp->eof) { 522 if (task->tk_status >= 0 && resp->count < argp->count && !resp->eof) {
523 nfs_inc_stats(data->inode, NFSIOS_SHORTREAD);
463 /* Has the server at least made some progress? */ 524 /* Has the server at least made some progress? */
464 if (resp->count != 0) { 525 if (resp->count != 0) {
465 /* Yes, so retry the read at the end of the data */ 526 /* Yes, so retry the read at the end of the data */
@@ -467,14 +528,14 @@ void nfs_readpage_result(struct rpc_task *task, void *calldata)
467 argp->pgbase += resp->count; 528 argp->pgbase += resp->count;
468 argp->count -= resp->count; 529 argp->count -= resp->count;
469 rpc_restart_call(task); 530 rpc_restart_call(task);
470 return; 531 return -EAGAIN;
471 } 532 }
472 task->tk_status = -EIO; 533 task->tk_status = -EIO;
473 } 534 }
474 spin_lock(&data->inode->i_lock); 535 spin_lock(&data->inode->i_lock);
475 NFS_I(data->inode)->cache_validity |= NFS_INO_INVALID_ATIME; 536 NFS_I(data->inode)->cache_validity |= NFS_INO_INVALID_ATIME;
476 spin_unlock(&data->inode->i_lock); 537 spin_unlock(&data->inode->i_lock);
477 data->complete(data, status); 538 return 0;
478} 539}
479 540
480/* 541/*
@@ -491,6 +552,9 @@ int nfs_readpage(struct file *file, struct page *page)
491 552
492 dprintk("NFS: nfs_readpage (%p %ld@%lu)\n", 553 dprintk("NFS: nfs_readpage (%p %ld@%lu)\n",
493 page, PAGE_CACHE_SIZE, page->index); 554 page, PAGE_CACHE_SIZE, page->index);
555 nfs_inc_stats(inode, NFSIOS_VFSREADPAGE);
556 nfs_add_stats(inode, NFSIOS_READPAGES, 1);
557
494 /* 558 /*
495 * Try to flush any pending writes to the file.. 559 * Try to flush any pending writes to the file..
496 * 560 *
@@ -570,6 +634,7 @@ int nfs_readpages(struct file *filp, struct address_space *mapping,
570 inode->i_sb->s_id, 634 inode->i_sb->s_id,
571 (long long)NFS_FILEID(inode), 635 (long long)NFS_FILEID(inode),
572 nr_pages); 636 nr_pages);
637 nfs_inc_stats(inode, NFSIOS_VFSREADPAGES);
573 638
574 if (filp == NULL) { 639 if (filp == NULL) {
575 desc.ctx = nfs_find_open_context(inode, NULL, FMODE_READ); 640 desc.ctx = nfs_find_open_context(inode, NULL, FMODE_READ);
@@ -582,6 +647,7 @@ int nfs_readpages(struct file *filp, struct address_space *mapping,
582 if (!list_empty(&head)) { 647 if (!list_empty(&head)) {
583 int err = nfs_pagein_list(&head, server->rpages); 648 int err = nfs_pagein_list(&head, server->rpages);
584 if (!ret) 649 if (!ret)
650 nfs_add_stats(inode, NFSIOS_READPAGES, err);
585 ret = err; 651 ret = err;
586 } 652 }
587 put_nfs_open_context(desc.ctx); 653 put_nfs_open_context(desc.ctx);
diff --git a/fs/nfs/unlink.c b/fs/nfs/unlink.c
index a65c7b53d558..0e28189c2151 100644
--- a/fs/nfs/unlink.c
+++ b/fs/nfs/unlink.c
@@ -163,10 +163,9 @@ nfs_async_unlink(struct dentry *dentry)
163 struct rpc_clnt *clnt = NFS_CLIENT(dir->d_inode); 163 struct rpc_clnt *clnt = NFS_CLIENT(dir->d_inode);
164 int status = -ENOMEM; 164 int status = -ENOMEM;
165 165
166 data = kmalloc(sizeof(*data), GFP_KERNEL); 166 data = kzalloc(sizeof(*data), GFP_KERNEL);
167 if (!data) 167 if (!data)
168 goto out; 168 goto out;
169 memset(data, 0, sizeof(*data));
170 169
171 data->cred = rpcauth_lookupcred(clnt->cl_auth, 0); 170 data->cred = rpcauth_lookupcred(clnt->cl_auth, 0);
172 if (IS_ERR(data->cred)) { 171 if (IS_ERR(data->cred)) {
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 9449b6835509..3f5225404c97 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -63,6 +63,7 @@
63#include <linux/smp_lock.h> 63#include <linux/smp_lock.h>
64 64
65#include "delegation.h" 65#include "delegation.h"
66#include "iostat.h"
66 67
67#define NFSDBG_FACILITY NFSDBG_PAGECACHE 68#define NFSDBG_FACILITY NFSDBG_PAGECACHE
68 69
@@ -76,20 +77,21 @@ static struct nfs_page * nfs_update_request(struct nfs_open_context*,
76 struct inode *, 77 struct inode *,
77 struct page *, 78 struct page *,
78 unsigned int, unsigned int); 79 unsigned int, unsigned int);
79static void nfs_writeback_done_partial(struct nfs_write_data *, int);
80static void nfs_writeback_done_full(struct nfs_write_data *, int);
81static int nfs_wait_on_write_congestion(struct address_space *, int); 80static int nfs_wait_on_write_congestion(struct address_space *, int);
82static int nfs_wait_on_requests(struct inode *, unsigned long, unsigned int); 81static int nfs_wait_on_requests(struct inode *, unsigned long, unsigned int);
83static int nfs_flush_inode(struct inode *inode, unsigned long idx_start, 82static int nfs_flush_inode(struct inode *inode, unsigned long idx_start,
84 unsigned int npages, int how); 83 unsigned int npages, int how);
84static const struct rpc_call_ops nfs_write_partial_ops;
85static const struct rpc_call_ops nfs_write_full_ops;
86static const struct rpc_call_ops nfs_commit_ops;
85 87
86static kmem_cache_t *nfs_wdata_cachep; 88static kmem_cache_t *nfs_wdata_cachep;
87mempool_t *nfs_wdata_mempool; 89static mempool_t *nfs_wdata_mempool;
88static mempool_t *nfs_commit_mempool; 90static mempool_t *nfs_commit_mempool;
89 91
90static DECLARE_WAIT_QUEUE_HEAD(nfs_write_congestion); 92static DECLARE_WAIT_QUEUE_HEAD(nfs_write_congestion);
91 93
92static inline struct nfs_write_data *nfs_commit_alloc(unsigned int pagecount) 94struct nfs_write_data *nfs_commit_alloc(unsigned int pagecount)
93{ 95{
94 struct nfs_write_data *p = mempool_alloc(nfs_commit_mempool, SLAB_NOFS); 96 struct nfs_write_data *p = mempool_alloc(nfs_commit_mempool, SLAB_NOFS);
95 97
@@ -100,11 +102,39 @@ static inline struct nfs_write_data *nfs_commit_alloc(unsigned int pagecount)
100 p->pagevec = &p->page_array[0]; 102 p->pagevec = &p->page_array[0];
101 else { 103 else {
102 size_t size = ++pagecount * sizeof(struct page *); 104 size_t size = ++pagecount * sizeof(struct page *);
105 p->pagevec = kzalloc(size, GFP_NOFS);
106 if (!p->pagevec) {
107 mempool_free(p, nfs_commit_mempool);
108 p = NULL;
109 }
110 }
111 }
112 return p;
113}
114
115void nfs_commit_free(struct nfs_write_data *p)
116{
117 if (p && (p->pagevec != &p->page_array[0]))
118 kfree(p->pagevec);
119 mempool_free(p, nfs_commit_mempool);
120}
121
122struct nfs_write_data *nfs_writedata_alloc(unsigned int pagecount)
123{
124 struct nfs_write_data *p = mempool_alloc(nfs_wdata_mempool, SLAB_NOFS);
125
126 if (p) {
127 memset(p, 0, sizeof(*p));
128 INIT_LIST_HEAD(&p->pages);
129 if (pagecount < NFS_PAGEVEC_SIZE)
130 p->pagevec = &p->page_array[0];
131 else {
132 size_t size = ++pagecount * sizeof(struct page *);
103 p->pagevec = kmalloc(size, GFP_NOFS); 133 p->pagevec = kmalloc(size, GFP_NOFS);
104 if (p->pagevec) { 134 if (p->pagevec) {
105 memset(p->pagevec, 0, size); 135 memset(p->pagevec, 0, size);
106 } else { 136 } else {
107 mempool_free(p, nfs_commit_mempool); 137 mempool_free(p, nfs_wdata_mempool);
108 p = NULL; 138 p = NULL;
109 } 139 }
110 } 140 }
@@ -112,11 +142,11 @@ static inline struct nfs_write_data *nfs_commit_alloc(unsigned int pagecount)
112 return p; 142 return p;
113} 143}
114 144
115static inline void nfs_commit_free(struct nfs_write_data *p) 145void nfs_writedata_free(struct nfs_write_data *p)
116{ 146{
117 if (p && (p->pagevec != &p->page_array[0])) 147 if (p && (p->pagevec != &p->page_array[0]))
118 kfree(p->pagevec); 148 kfree(p->pagevec);
119 mempool_free(p, nfs_commit_mempool); 149 mempool_free(p, nfs_wdata_mempool);
120} 150}
121 151
122void nfs_writedata_release(void *wdata) 152void nfs_writedata_release(void *wdata)
@@ -136,6 +166,7 @@ static void nfs_grow_file(struct page *page, unsigned int offset, unsigned int c
136 end = ((loff_t)page->index << PAGE_CACHE_SHIFT) + ((loff_t)offset+count); 166 end = ((loff_t)page->index << PAGE_CACHE_SHIFT) + ((loff_t)offset+count);
137 if (i_size >= end) 167 if (i_size >= end)
138 return; 168 return;
169 nfs_inc_stats(inode, NFSIOS_EXTENDWRITE);
139 i_size_write(inode, end); 170 i_size_write(inode, end);
140} 171}
141 172
@@ -225,6 +256,7 @@ static int nfs_writepage_sync(struct nfs_open_context *ctx, struct inode *inode,
225 wdata->args.pgbase += result; 256 wdata->args.pgbase += result;
226 written += result; 257 written += result;
227 count -= result; 258 count -= result;
259 nfs_add_stats(inode, NFSIOS_SERVERWRITTENBYTES, result);
228 } while (count); 260 } while (count);
229 /* Update file length */ 261 /* Update file length */
230 nfs_grow_file(page, offset, written); 262 nfs_grow_file(page, offset, written);
@@ -281,6 +313,9 @@ int nfs_writepage(struct page *page, struct writeback_control *wbc)
281 int priority = wb_priority(wbc); 313 int priority = wb_priority(wbc);
282 int err; 314 int err;
283 315
316 nfs_inc_stats(inode, NFSIOS_VFSWRITEPAGE);
317 nfs_add_stats(inode, NFSIOS_WRITEPAGES, 1);
318
284 /* 319 /*
285 * Note: We need to ensure that we have a reference to the inode 320 * Note: We need to ensure that we have a reference to the inode
286 * if we are to do asynchronous writes. If not, waiting 321 * if we are to do asynchronous writes. If not, waiting
@@ -345,6 +380,8 @@ int nfs_writepages(struct address_space *mapping, struct writeback_control *wbc)
345 struct inode *inode = mapping->host; 380 struct inode *inode = mapping->host;
346 int err; 381 int err;
347 382
383 nfs_inc_stats(inode, NFSIOS_VFSWRITEPAGES);
384
348 err = generic_writepages(mapping, wbc); 385 err = generic_writepages(mapping, wbc);
349 if (err) 386 if (err)
350 return err; 387 return err;
@@ -356,6 +393,7 @@ int nfs_writepages(struct address_space *mapping, struct writeback_control *wbc)
356 err = nfs_flush_inode(inode, 0, 0, wb_priority(wbc)); 393 err = nfs_flush_inode(inode, 0, 0, wb_priority(wbc));
357 if (err < 0) 394 if (err < 0)
358 goto out; 395 goto out;
396 nfs_add_stats(inode, NFSIOS_WRITEPAGES, err);
359 wbc->nr_to_write -= err; 397 wbc->nr_to_write -= err;
360 if (!wbc->nonblocking && wbc->sync_mode == WB_SYNC_ALL) { 398 if (!wbc->nonblocking && wbc->sync_mode == WB_SYNC_ALL) {
361 err = nfs_wait_on_requests(inode, 0, 0); 399 err = nfs_wait_on_requests(inode, 0, 0);
@@ -391,6 +429,7 @@ static int nfs_inode_add_request(struct inode *inode, struct nfs_page *req)
391 if (nfs_have_delegation(inode, FMODE_WRITE)) 429 if (nfs_have_delegation(inode, FMODE_WRITE))
392 nfsi->change_attr++; 430 nfsi->change_attr++;
393 } 431 }
432 SetPagePrivate(req->wb_page);
394 nfsi->npages++; 433 nfsi->npages++;
395 atomic_inc(&req->wb_count); 434 atomic_inc(&req->wb_count);
396 return 0; 435 return 0;
@@ -407,6 +446,7 @@ static void nfs_inode_remove_request(struct nfs_page *req)
407 BUG_ON (!NFS_WBACK_BUSY(req)); 446 BUG_ON (!NFS_WBACK_BUSY(req));
408 447
409 spin_lock(&nfsi->req_lock); 448 spin_lock(&nfsi->req_lock);
449 ClearPagePrivate(req->wb_page);
410 radix_tree_delete(&nfsi->nfs_page_tree, req->wb_index); 450 radix_tree_delete(&nfsi->nfs_page_tree, req->wb_index);
411 nfsi->npages--; 451 nfsi->npages--;
412 if (!nfsi->npages) { 452 if (!nfsi->npages) {
@@ -499,8 +539,7 @@ nfs_mark_request_commit(struct nfs_page *req)
499 * 539 *
500 * Interruptible by signals only if mounted with intr flag. 540 * Interruptible by signals only if mounted with intr flag.
501 */ 541 */
502static int 542static int nfs_wait_on_requests_locked(struct inode *inode, unsigned long idx_start, unsigned int npages)
503nfs_wait_on_requests(struct inode *inode, unsigned long idx_start, unsigned int npages)
504{ 543{
505 struct nfs_inode *nfsi = NFS_I(inode); 544 struct nfs_inode *nfsi = NFS_I(inode);
506 struct nfs_page *req; 545 struct nfs_page *req;
@@ -513,7 +552,6 @@ nfs_wait_on_requests(struct inode *inode, unsigned long idx_start, unsigned int
513 else 552 else
514 idx_end = idx_start + npages - 1; 553 idx_end = idx_start + npages - 1;
515 554
516 spin_lock(&nfsi->req_lock);
517 next = idx_start; 555 next = idx_start;
518 while (radix_tree_gang_lookup_tag(&nfsi->nfs_page_tree, (void **)&req, next, 1, NFS_PAGE_TAG_WRITEBACK)) { 556 while (radix_tree_gang_lookup_tag(&nfsi->nfs_page_tree, (void **)&req, next, 1, NFS_PAGE_TAG_WRITEBACK)) {
519 if (req->wb_index > idx_end) 557 if (req->wb_index > idx_end)
@@ -526,15 +564,25 @@ nfs_wait_on_requests(struct inode *inode, unsigned long idx_start, unsigned int
526 spin_unlock(&nfsi->req_lock); 564 spin_unlock(&nfsi->req_lock);
527 error = nfs_wait_on_request(req); 565 error = nfs_wait_on_request(req);
528 nfs_release_request(req); 566 nfs_release_request(req);
567 spin_lock(&nfsi->req_lock);
529 if (error < 0) 568 if (error < 0)
530 return error; 569 return error;
531 spin_lock(&nfsi->req_lock);
532 res++; 570 res++;
533 } 571 }
534 spin_unlock(&nfsi->req_lock);
535 return res; 572 return res;
536} 573}
537 574
575static int nfs_wait_on_requests(struct inode *inode, unsigned long idx_start, unsigned int npages)
576{
577 struct nfs_inode *nfsi = NFS_I(inode);
578 int ret;
579
580 spin_lock(&nfsi->req_lock);
581 ret = nfs_wait_on_requests_locked(inode, idx_start, npages);
582 spin_unlock(&nfsi->req_lock);
583 return ret;
584}
585
538/* 586/*
539 * nfs_scan_dirty - Scan an inode for dirty requests 587 * nfs_scan_dirty - Scan an inode for dirty requests
540 * @inode: NFS inode to scan 588 * @inode: NFS inode to scan
@@ -586,6 +634,11 @@ nfs_scan_commit(struct inode *inode, struct list_head *dst, unsigned long idx_st
586 } 634 }
587 return res; 635 return res;
588} 636}
637#else
638static inline int nfs_scan_commit(struct inode *inode, struct list_head *dst, unsigned long idx_start, unsigned int npages)
639{
640 return 0;
641}
589#endif 642#endif
590 643
591static int nfs_wait_on_write_congestion(struct address_space *mapping, int intr) 644static int nfs_wait_on_write_congestion(struct address_space *mapping, int intr)
@@ -598,6 +651,9 @@ static int nfs_wait_on_write_congestion(struct address_space *mapping, int intr)
598 651
599 if (!bdi_write_congested(bdi)) 652 if (!bdi_write_congested(bdi))
600 return 0; 653 return 0;
654
655 nfs_inc_stats(mapping->host, NFSIOS_CONGESTIONWAIT);
656
601 if (intr) { 657 if (intr) {
602 struct rpc_clnt *clnt = NFS_CLIENT(mapping->host); 658 struct rpc_clnt *clnt = NFS_CLIENT(mapping->host);
603 sigset_t oldset; 659 sigset_t oldset;
@@ -653,8 +709,11 @@ static struct nfs_page * nfs_update_request(struct nfs_open_context* ctx,
653 spin_unlock(&nfsi->req_lock); 709 spin_unlock(&nfsi->req_lock);
654 error = nfs_wait_on_request(req); 710 error = nfs_wait_on_request(req);
655 nfs_release_request(req); 711 nfs_release_request(req);
656 if (error < 0) 712 if (error < 0) {
713 if (new)
714 nfs_release_request(new);
657 return ERR_PTR(error); 715 return ERR_PTR(error);
716 }
658 continue; 717 continue;
659 } 718 }
660 spin_unlock(&nfsi->req_lock); 719 spin_unlock(&nfsi->req_lock);
@@ -748,6 +807,8 @@ int nfs_updatepage(struct file *file, struct page *page,
748 struct nfs_page *req; 807 struct nfs_page *req;
749 int status = 0; 808 int status = 0;
750 809
810 nfs_inc_stats(inode, NFSIOS_VFSUPDATEPAGE);
811
751 dprintk("NFS: nfs_updatepage(%s/%s %d@%Ld)\n", 812 dprintk("NFS: nfs_updatepage(%s/%s %d@%Ld)\n",
752 file->f_dentry->d_parent->d_name.name, 813 file->f_dentry->d_parent->d_name.name,
753 file->f_dentry->d_name.name, count, 814 file->f_dentry->d_name.name, count,
@@ -857,10 +918,12 @@ static inline int flush_task_priority(int how)
857 */ 918 */
858static void nfs_write_rpcsetup(struct nfs_page *req, 919static void nfs_write_rpcsetup(struct nfs_page *req,
859 struct nfs_write_data *data, 920 struct nfs_write_data *data,
921 const struct rpc_call_ops *call_ops,
860 unsigned int count, unsigned int offset, 922 unsigned int count, unsigned int offset,
861 int how) 923 int how)
862{ 924{
863 struct inode *inode; 925 struct inode *inode;
926 int flags;
864 927
865 /* Set up the RPC argument and reply structs 928 /* Set up the RPC argument and reply structs
866 * NB: take care not to mess about with data->commit et al. */ 929 * NB: take care not to mess about with data->commit et al. */
@@ -881,6 +944,9 @@ static void nfs_write_rpcsetup(struct nfs_page *req,
881 data->res.verf = &data->verf; 944 data->res.verf = &data->verf;
882 nfs_fattr_init(&data->fattr); 945 nfs_fattr_init(&data->fattr);
883 946
947 /* Set up the initial task struct. */
948 flags = (how & FLUSH_SYNC) ? 0 : RPC_TASK_ASYNC;
949 rpc_init_task(&data->task, NFS_CLIENT(inode), flags, call_ops, data);
884 NFS_PROTO(inode)->write_setup(data, how); 950 NFS_PROTO(inode)->write_setup(data, how);
885 951
886 data->task.tk_priority = flush_task_priority(how); 952 data->task.tk_priority = flush_task_priority(how);
@@ -910,7 +976,7 @@ static void nfs_execute_write(struct nfs_write_data *data)
910 * Generate multiple small requests to write out a single 976 * Generate multiple small requests to write out a single
911 * contiguous dirty area on one page. 977 * contiguous dirty area on one page.
912 */ 978 */
913static int nfs_flush_multi(struct list_head *head, struct inode *inode, int how) 979static int nfs_flush_multi(struct inode *inode, struct list_head *head, int how)
914{ 980{
915 struct nfs_page *req = nfs_list_entry(head->next); 981 struct nfs_page *req = nfs_list_entry(head->next);
916 struct page *page = req->wb_page; 982 struct page *page = req->wb_page;
@@ -944,14 +1010,15 @@ static int nfs_flush_multi(struct list_head *head, struct inode *inode, int how)
944 list_del_init(&data->pages); 1010 list_del_init(&data->pages);
945 1011
946 data->pagevec[0] = page; 1012 data->pagevec[0] = page;
947 data->complete = nfs_writeback_done_partial;
948 1013
949 if (nbytes > wsize) { 1014 if (nbytes > wsize) {
950 nfs_write_rpcsetup(req, data, wsize, offset, how); 1015 nfs_write_rpcsetup(req, data, &nfs_write_partial_ops,
1016 wsize, offset, how);
951 offset += wsize; 1017 offset += wsize;
952 nbytes -= wsize; 1018 nbytes -= wsize;
953 } else { 1019 } else {
954 nfs_write_rpcsetup(req, data, nbytes, offset, how); 1020 nfs_write_rpcsetup(req, data, &nfs_write_partial_ops,
1021 nbytes, offset, how);
955 nbytes = 0; 1022 nbytes = 0;
956 } 1023 }
957 nfs_execute_write(data); 1024 nfs_execute_write(data);
@@ -978,16 +1045,13 @@ out_bad:
978 * This is the case if nfs_updatepage detects a conflicting request 1045 * This is the case if nfs_updatepage detects a conflicting request
979 * that has been written but not committed. 1046 * that has been written but not committed.
980 */ 1047 */
981static int nfs_flush_one(struct list_head *head, struct inode *inode, int how) 1048static int nfs_flush_one(struct inode *inode, struct list_head *head, int how)
982{ 1049{
983 struct nfs_page *req; 1050 struct nfs_page *req;
984 struct page **pages; 1051 struct page **pages;
985 struct nfs_write_data *data; 1052 struct nfs_write_data *data;
986 unsigned int count; 1053 unsigned int count;
987 1054
988 if (NFS_SERVER(inode)->wsize < PAGE_CACHE_SIZE)
989 return nfs_flush_multi(head, inode, how);
990
991 data = nfs_writedata_alloc(NFS_SERVER(inode)->wpages); 1055 data = nfs_writedata_alloc(NFS_SERVER(inode)->wpages);
992 if (!data) 1056 if (!data)
993 goto out_bad; 1057 goto out_bad;
@@ -1005,9 +1069,8 @@ static int nfs_flush_one(struct list_head *head, struct inode *inode, int how)
1005 } 1069 }
1006 req = nfs_list_entry(data->pages.next); 1070 req = nfs_list_entry(data->pages.next);
1007 1071
1008 data->complete = nfs_writeback_done_full;
1009 /* Set up the argument struct */ 1072 /* Set up the argument struct */
1010 nfs_write_rpcsetup(req, data, count, 0, how); 1073 nfs_write_rpcsetup(req, data, &nfs_write_full_ops, count, 0, how);
1011 1074
1012 nfs_execute_write(data); 1075 nfs_execute_write(data);
1013 return 0; 1076 return 0;
@@ -1021,24 +1084,32 @@ static int nfs_flush_one(struct list_head *head, struct inode *inode, int how)
1021 return -ENOMEM; 1084 return -ENOMEM;
1022} 1085}
1023 1086
1024static int 1087static int nfs_flush_list(struct inode *inode, struct list_head *head, int npages, int how)
1025nfs_flush_list(struct list_head *head, int wpages, int how)
1026{ 1088{
1027 LIST_HEAD(one_request); 1089 LIST_HEAD(one_request);
1028 struct nfs_page *req; 1090 int (*flush_one)(struct inode *, struct list_head *, int);
1029 int error = 0; 1091 struct nfs_page *req;
1030 unsigned int pages = 0; 1092 int wpages = NFS_SERVER(inode)->wpages;
1093 int wsize = NFS_SERVER(inode)->wsize;
1094 int error;
1031 1095
1032 while (!list_empty(head)) { 1096 flush_one = nfs_flush_one;
1033 pages += nfs_coalesce_requests(head, &one_request, wpages); 1097 if (wsize < PAGE_CACHE_SIZE)
1098 flush_one = nfs_flush_multi;
1099 /* For single writes, FLUSH_STABLE is more efficient */
1100 if (npages <= wpages && npages == NFS_I(inode)->npages
1101 && nfs_list_entry(head->next)->wb_bytes <= wsize)
1102 how |= FLUSH_STABLE;
1103
1104 do {
1105 nfs_coalesce_requests(head, &one_request, wpages);
1034 req = nfs_list_entry(one_request.next); 1106 req = nfs_list_entry(one_request.next);
1035 error = nfs_flush_one(&one_request, req->wb_context->dentry->d_inode, how); 1107 error = flush_one(inode, &one_request, how);
1036 if (error < 0) 1108 if (error < 0)
1037 break; 1109 goto out_err;
1038 } 1110 } while (!list_empty(head));
1039 if (error >= 0) 1111 return 0;
1040 return pages; 1112out_err:
1041
1042 while (!list_empty(head)) { 1113 while (!list_empty(head)) {
1043 req = nfs_list_entry(head->next); 1114 req = nfs_list_entry(head->next);
1044 nfs_list_remove_request(req); 1115 nfs_list_remove_request(req);
@@ -1051,8 +1122,9 @@ nfs_flush_list(struct list_head *head, int wpages, int how)
1051/* 1122/*
1052 * Handle a write reply that flushed part of a page. 1123 * Handle a write reply that flushed part of a page.
1053 */ 1124 */
1054static void nfs_writeback_done_partial(struct nfs_write_data *data, int status) 1125static void nfs_writeback_done_partial(struct rpc_task *task, void *calldata)
1055{ 1126{
1127 struct nfs_write_data *data = calldata;
1056 struct nfs_page *req = data->req; 1128 struct nfs_page *req = data->req;
1057 struct page *page = req->wb_page; 1129 struct page *page = req->wb_page;
1058 1130
@@ -1062,11 +1134,14 @@ static void nfs_writeback_done_partial(struct nfs_write_data *data, int status)
1062 req->wb_bytes, 1134 req->wb_bytes,
1063 (long long)req_offset(req)); 1135 (long long)req_offset(req));
1064 1136
1065 if (status < 0) { 1137 if (nfs_writeback_done(task, data) != 0)
1138 return;
1139
1140 if (task->tk_status < 0) {
1066 ClearPageUptodate(page); 1141 ClearPageUptodate(page);
1067 SetPageError(page); 1142 SetPageError(page);
1068 req->wb_context->error = status; 1143 req->wb_context->error = task->tk_status;
1069 dprintk(", error = %d\n", status); 1144 dprintk(", error = %d\n", task->tk_status);
1070 } else { 1145 } else {
1071#if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) 1146#if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
1072 if (data->verf.committed < NFS_FILE_SYNC) { 1147 if (data->verf.committed < NFS_FILE_SYNC) {
@@ -1087,6 +1162,11 @@ static void nfs_writeback_done_partial(struct nfs_write_data *data, int status)
1087 nfs_writepage_release(req); 1162 nfs_writepage_release(req);
1088} 1163}
1089 1164
1165static const struct rpc_call_ops nfs_write_partial_ops = {
1166 .rpc_call_done = nfs_writeback_done_partial,
1167 .rpc_release = nfs_writedata_release,
1168};
1169
1090/* 1170/*
1091 * Handle a write reply that flushes a whole page. 1171 * Handle a write reply that flushes a whole page.
1092 * 1172 *
@@ -1094,11 +1174,15 @@ static void nfs_writeback_done_partial(struct nfs_write_data *data, int status)
1094 * writebacks since the page->count is kept > 1 for as long 1174 * writebacks since the page->count is kept > 1 for as long
1095 * as the page has a write request pending. 1175 * as the page has a write request pending.
1096 */ 1176 */
1097static void nfs_writeback_done_full(struct nfs_write_data *data, int status) 1177static void nfs_writeback_done_full(struct rpc_task *task, void *calldata)
1098{ 1178{
1179 struct nfs_write_data *data = calldata;
1099 struct nfs_page *req; 1180 struct nfs_page *req;
1100 struct page *page; 1181 struct page *page;
1101 1182
1183 if (nfs_writeback_done(task, data) != 0)
1184 return;
1185
1102 /* Update attributes as result of writeback. */ 1186 /* Update attributes as result of writeback. */
1103 while (!list_empty(&data->pages)) { 1187 while (!list_empty(&data->pages)) {
1104 req = nfs_list_entry(data->pages.next); 1188 req = nfs_list_entry(data->pages.next);
@@ -1111,13 +1195,13 @@ static void nfs_writeback_done_full(struct nfs_write_data *data, int status)
1111 req->wb_bytes, 1195 req->wb_bytes,
1112 (long long)req_offset(req)); 1196 (long long)req_offset(req));
1113 1197
1114 if (status < 0) { 1198 if (task->tk_status < 0) {
1115 ClearPageUptodate(page); 1199 ClearPageUptodate(page);
1116 SetPageError(page); 1200 SetPageError(page);
1117 req->wb_context->error = status; 1201 req->wb_context->error = task->tk_status;
1118 end_page_writeback(page); 1202 end_page_writeback(page);
1119 nfs_inode_remove_request(req); 1203 nfs_inode_remove_request(req);
1120 dprintk(", error = %d\n", status); 1204 dprintk(", error = %d\n", task->tk_status);
1121 goto next; 1205 goto next;
1122 } 1206 }
1123 end_page_writeback(page); 1207 end_page_writeback(page);
@@ -1139,18 +1223,30 @@ static void nfs_writeback_done_full(struct nfs_write_data *data, int status)
1139 } 1223 }
1140} 1224}
1141 1225
1226static const struct rpc_call_ops nfs_write_full_ops = {
1227 .rpc_call_done = nfs_writeback_done_full,
1228 .rpc_release = nfs_writedata_release,
1229};
1230
1231
1142/* 1232/*
1143 * This function is called when the WRITE call is complete. 1233 * This function is called when the WRITE call is complete.
1144 */ 1234 */
1145void nfs_writeback_done(struct rpc_task *task, void *calldata) 1235int nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data)
1146{ 1236{
1147 struct nfs_write_data *data = calldata;
1148 struct nfs_writeargs *argp = &data->args; 1237 struct nfs_writeargs *argp = &data->args;
1149 struct nfs_writeres *resp = &data->res; 1238 struct nfs_writeres *resp = &data->res;
1239 int status;
1150 1240
1151 dprintk("NFS: %4d nfs_writeback_done (status %d)\n", 1241 dprintk("NFS: %4d nfs_writeback_done (status %d)\n",
1152 task->tk_pid, task->tk_status); 1242 task->tk_pid, task->tk_status);
1153 1243
1244 /* Call the NFS version-specific code */
1245 status = NFS_PROTO(data->inode)->write_done(task, data);
1246 if (status != 0)
1247 return status;
1248 nfs_add_stats(data->inode, NFSIOS_SERVERWRITTENBYTES, resp->count);
1249
1154#if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) 1250#if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
1155 if (resp->verf->committed < argp->stable && task->tk_status >= 0) { 1251 if (resp->verf->committed < argp->stable && task->tk_status >= 0) {
1156 /* We tried a write call, but the server did not 1252 /* We tried a write call, but the server did not
@@ -1176,6 +1272,8 @@ void nfs_writeback_done(struct rpc_task *task, void *calldata)
1176 if (task->tk_status >= 0 && resp->count < argp->count) { 1272 if (task->tk_status >= 0 && resp->count < argp->count) {
1177 static unsigned long complain; 1273 static unsigned long complain;
1178 1274
1275 nfs_inc_stats(data->inode, NFSIOS_SHORTWRITE);
1276
1179 /* Has the server at least made some progress? */ 1277 /* Has the server at least made some progress? */
1180 if (resp->count != 0) { 1278 if (resp->count != 0) {
1181 /* Was this an NFSv2 write or an NFSv3 stable write? */ 1279 /* Was this an NFSv2 write or an NFSv3 stable write? */
@@ -1191,7 +1289,7 @@ void nfs_writeback_done(struct rpc_task *task, void *calldata)
1191 argp->stable = NFS_FILE_SYNC; 1289 argp->stable = NFS_FILE_SYNC;
1192 } 1290 }
1193 rpc_restart_call(task); 1291 rpc_restart_call(task);
1194 return; 1292 return -EAGAIN;
1195 } 1293 }
1196 if (time_before(complain, jiffies)) { 1294 if (time_before(complain, jiffies)) {
1197 printk(KERN_WARNING 1295 printk(KERN_WARNING
@@ -1202,11 +1300,7 @@ void nfs_writeback_done(struct rpc_task *task, void *calldata)
1202 /* Can't do anything about it except throw an error. */ 1300 /* Can't do anything about it except throw an error. */
1203 task->tk_status = -EIO; 1301 task->tk_status = -EIO;
1204 } 1302 }
1205 1303 return 0;
1206 /*
1207 * Process the nfs_page list
1208 */
1209 data->complete(data, task->tk_status);
1210} 1304}
1211 1305
1212 1306
@@ -1220,10 +1314,12 @@ void nfs_commit_release(void *wdata)
1220 * Set up the argument/result storage required for the RPC call. 1314 * Set up the argument/result storage required for the RPC call.
1221 */ 1315 */
1222static void nfs_commit_rpcsetup(struct list_head *head, 1316static void nfs_commit_rpcsetup(struct list_head *head,
1223 struct nfs_write_data *data, int how) 1317 struct nfs_write_data *data,
1318 int how)
1224{ 1319{
1225 struct nfs_page *first; 1320 struct nfs_page *first;
1226 struct inode *inode; 1321 struct inode *inode;
1322 int flags;
1227 1323
1228 /* Set up the RPC argument and reply structs 1324 /* Set up the RPC argument and reply structs
1229 * NB: take care not to mess about with data->commit et al. */ 1325 * NB: take care not to mess about with data->commit et al. */
@@ -1243,7 +1339,10 @@ static void nfs_commit_rpcsetup(struct list_head *head,
1243 data->res.fattr = &data->fattr; 1339 data->res.fattr = &data->fattr;
1244 data->res.verf = &data->verf; 1340 data->res.verf = &data->verf;
1245 nfs_fattr_init(&data->fattr); 1341 nfs_fattr_init(&data->fattr);
1246 1342
1343 /* Set up the initial task struct. */
1344 flags = (how & FLUSH_SYNC) ? 0 : RPC_TASK_ASYNC;
1345 rpc_init_task(&data->task, NFS_CLIENT(inode), flags, &nfs_commit_ops, data);
1247 NFS_PROTO(inode)->commit_setup(data, how); 1346 NFS_PROTO(inode)->commit_setup(data, how);
1248 1347
1249 data->task.tk_priority = flush_task_priority(how); 1348 data->task.tk_priority = flush_task_priority(how);
@@ -1284,7 +1383,7 @@ nfs_commit_list(struct inode *inode, struct list_head *head, int how)
1284/* 1383/*
1285 * COMMIT call returned 1384 * COMMIT call returned
1286 */ 1385 */
1287void nfs_commit_done(struct rpc_task *task, void *calldata) 1386static void nfs_commit_done(struct rpc_task *task, void *calldata)
1288{ 1387{
1289 struct nfs_write_data *data = calldata; 1388 struct nfs_write_data *data = calldata;
1290 struct nfs_page *req; 1389 struct nfs_page *req;
@@ -1293,6 +1392,10 @@ void nfs_commit_done(struct rpc_task *task, void *calldata)
1293 dprintk("NFS: %4d nfs_commit_done (status %d)\n", 1392 dprintk("NFS: %4d nfs_commit_done (status %d)\n",
1294 task->tk_pid, task->tk_status); 1393 task->tk_pid, task->tk_status);
1295 1394
1395 /* Call the NFS version-specific code */
1396 if (NFS_PROTO(data->inode)->commit_done(task, data) != 0)
1397 return;
1398
1296 while (!list_empty(&data->pages)) { 1399 while (!list_empty(&data->pages)) {
1297 req = nfs_list_entry(data->pages.next); 1400 req = nfs_list_entry(data->pages.next);
1298 nfs_list_remove_request(req); 1401 nfs_list_remove_request(req);
@@ -1326,6 +1429,16 @@ void nfs_commit_done(struct rpc_task *task, void *calldata)
1326 } 1429 }
1327 sub_page_state(nr_unstable,res); 1430 sub_page_state(nr_unstable,res);
1328} 1431}
1432
1433static const struct rpc_call_ops nfs_commit_ops = {
1434 .rpc_call_done = nfs_commit_done,
1435 .rpc_release = nfs_commit_release,
1436};
1437#else
1438static inline int nfs_commit_list(struct inode *inode, struct list_head *head, int how)
1439{
1440 return 0;
1441}
1329#endif 1442#endif
1330 1443
1331static int nfs_flush_inode(struct inode *inode, unsigned long idx_start, 1444static int nfs_flush_inode(struct inode *inode, unsigned long idx_start,
@@ -1333,24 +1446,16 @@ static int nfs_flush_inode(struct inode *inode, unsigned long idx_start,
1333{ 1446{
1334 struct nfs_inode *nfsi = NFS_I(inode); 1447 struct nfs_inode *nfsi = NFS_I(inode);
1335 LIST_HEAD(head); 1448 LIST_HEAD(head);
1336 int res, 1449 int res;
1337 error = 0;
1338 1450
1339 spin_lock(&nfsi->req_lock); 1451 spin_lock(&nfsi->req_lock);
1340 res = nfs_scan_dirty(inode, &head, idx_start, npages); 1452 res = nfs_scan_dirty(inode, &head, idx_start, npages);
1341 spin_unlock(&nfsi->req_lock); 1453 spin_unlock(&nfsi->req_lock);
1342 if (res) { 1454 if (res) {
1343 struct nfs_server *server = NFS_SERVER(inode); 1455 int error = nfs_flush_list(inode, &head, res, how);
1344 1456 if (error < 0)
1345 /* For single writes, FLUSH_STABLE is more efficient */ 1457 return error;
1346 if (res == nfsi->npages && nfsi->npages <= server->wpages) {
1347 if (res > 1 || nfs_list_entry(head.next)->wb_bytes <= server->wsize)
1348 how |= FLUSH_STABLE;
1349 }
1350 error = nfs_flush_list(&head, server->wpages, how);
1351 } 1458 }
1352 if (error < 0)
1353 return error;
1354 return res; 1459 return res;
1355} 1460}
1356 1461
@@ -1359,14 +1464,13 @@ int nfs_commit_inode(struct inode *inode, int how)
1359{ 1464{
1360 struct nfs_inode *nfsi = NFS_I(inode); 1465 struct nfs_inode *nfsi = NFS_I(inode);
1361 LIST_HEAD(head); 1466 LIST_HEAD(head);
1362 int res, 1467 int res;
1363 error = 0;
1364 1468
1365 spin_lock(&nfsi->req_lock); 1469 spin_lock(&nfsi->req_lock);
1366 res = nfs_scan_commit(inode, &head, 0, 0); 1470 res = nfs_scan_commit(inode, &head, 0, 0);
1367 spin_unlock(&nfsi->req_lock); 1471 spin_unlock(&nfsi->req_lock);
1368 if (res) { 1472 if (res) {
1369 error = nfs_commit_list(inode, &head, how); 1473 int error = nfs_commit_list(inode, &head, how);
1370 if (error < 0) 1474 if (error < 0)
1371 return error; 1475 return error;
1372 } 1476 }
@@ -1374,28 +1478,38 @@ int nfs_commit_inode(struct inode *inode, int how)
1374} 1478}
1375#endif 1479#endif
1376 1480
1377int nfs_sync_inode(struct inode *inode, unsigned long idx_start, 1481int nfs_sync_inode_wait(struct inode *inode, unsigned long idx_start,
1378 unsigned int npages, int how) 1482 unsigned int npages, int how)
1379{ 1483{
1484 struct nfs_inode *nfsi = NFS_I(inode);
1485 LIST_HEAD(head);
1380 int nocommit = how & FLUSH_NOCOMMIT; 1486 int nocommit = how & FLUSH_NOCOMMIT;
1381 int wait = how & FLUSH_WAIT; 1487 int pages, ret;
1382 int error;
1383
1384 how &= ~(FLUSH_WAIT|FLUSH_NOCOMMIT);
1385 1488
1489 how &= ~FLUSH_NOCOMMIT;
1490 spin_lock(&nfsi->req_lock);
1386 do { 1491 do {
1387 if (wait) { 1492 ret = nfs_wait_on_requests_locked(inode, idx_start, npages);
1388 error = nfs_wait_on_requests(inode, idx_start, npages); 1493 if (ret != 0)
1389 if (error != 0)
1390 continue;
1391 }
1392 error = nfs_flush_inode(inode, idx_start, npages, how);
1393 if (error != 0)
1394 continue; 1494 continue;
1395 if (!nocommit) 1495 pages = nfs_scan_dirty(inode, &head, idx_start, npages);
1396 error = nfs_commit_inode(inode, how); 1496 if (pages != 0) {
1397 } while (error > 0); 1497 spin_unlock(&nfsi->req_lock);
1398 return error; 1498 ret = nfs_flush_list(inode, &head, pages, how);
1499 spin_lock(&nfsi->req_lock);
1500 continue;
1501 }
1502 if (nocommit)
1503 break;
1504 pages = nfs_scan_commit(inode, &head, 0, 0);
1505 if (pages == 0)
1506 break;
1507 spin_unlock(&nfsi->req_lock);
1508 ret = nfs_commit_list(inode, &head, how);
1509 spin_lock(&nfsi->req_lock);
1510 } while (ret >= 0);
1511 spin_unlock(&nfsi->req_lock);
1512 return ret;
1399} 1513}
1400 1514
1401int nfs_init_writepagecache(void) 1515int nfs_init_writepagecache(void)
diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index 8d3d23c8a4d2..c872bd07fc10 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -326,6 +326,8 @@ out:
326 .p_encode = (kxdrproc_t) nfs4_xdr_##argtype, \ 326 .p_encode = (kxdrproc_t) nfs4_xdr_##argtype, \
327 .p_decode = (kxdrproc_t) nfs4_xdr_##restype, \ 327 .p_decode = (kxdrproc_t) nfs4_xdr_##restype, \
328 .p_bufsiz = MAX(NFS4_##argtype##_sz,NFS4_##restype##_sz) << 2, \ 328 .p_bufsiz = MAX(NFS4_##argtype##_sz,NFS4_##restype##_sz) << 2, \
329 .p_statidx = NFSPROC4_CB_##call, \
330 .p_name = #proc, \
329} 331}
330 332
331static struct rpc_procinfo nfs4_cb_procedures[] = { 333static struct rpc_procinfo nfs4_cb_procedures[] = {
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 1143cfb64549..f6ab762bea99 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -2639,7 +2639,7 @@ nfsd4_lock(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_lock
2639 struct nfs4_stateid *lock_stp; 2639 struct nfs4_stateid *lock_stp;
2640 struct file *filp; 2640 struct file *filp;
2641 struct file_lock file_lock; 2641 struct file_lock file_lock;
2642 struct file_lock *conflock; 2642 struct file_lock conflock;
2643 int status = 0; 2643 int status = 0;
2644 unsigned int strhashval; 2644 unsigned int strhashval;
2645 2645
@@ -2775,11 +2775,11 @@ conflicting_lock:
2775 /* XXX There is a race here. Future patch needed to provide 2775 /* XXX There is a race here. Future patch needed to provide
2776 * an atomic posix_lock_and_test_file 2776 * an atomic posix_lock_and_test_file
2777 */ 2777 */
2778 if (!(conflock = posix_test_lock(filp, &file_lock))) { 2778 if (!posix_test_lock(filp, &file_lock, &conflock)) {
2779 status = nfserr_serverfault; 2779 status = nfserr_serverfault;
2780 goto out; 2780 goto out;
2781 } 2781 }
2782 nfs4_set_lock_denied(conflock, &lock->lk_denied); 2782 nfs4_set_lock_denied(&conflock, &lock->lk_denied);
2783out: 2783out:
2784 if (status && lock->lk_is_new && lock_sop) 2784 if (status && lock->lk_is_new && lock_sop)
2785 release_stateowner(lock_sop); 2785 release_stateowner(lock_sop);
@@ -2800,7 +2800,7 @@ nfsd4_lockt(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_lock
2800 struct inode *inode; 2800 struct inode *inode;
2801 struct file file; 2801 struct file file;
2802 struct file_lock file_lock; 2802 struct file_lock file_lock;
2803 struct file_lock *conflicting_lock; 2803 struct file_lock conflock;
2804 int status; 2804 int status;
2805 2805
2806 if (nfs4_in_grace()) 2806 if (nfs4_in_grace())
@@ -2864,10 +2864,9 @@ nfsd4_lockt(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_lock
2864 file.f_dentry = current_fh->fh_dentry; 2864 file.f_dentry = current_fh->fh_dentry;
2865 2865
2866 status = nfs_ok; 2866 status = nfs_ok;
2867 conflicting_lock = posix_test_lock(&file, &file_lock); 2867 if (posix_test_lock(&file, &file_lock, &conflock)) {
2868 if (conflicting_lock) {
2869 status = nfserr_denied; 2868 status = nfserr_denied;
2870 nfs4_set_lock_denied(conflicting_lock, &lockt->lt_denied); 2869 nfs4_set_lock_denied(&conflock, &lockt->lt_denied);
2871 } 2870 }
2872out: 2871out:
2873 nfs4_unlock_state(); 2872 nfs4_unlock_state();
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 20feb7568deb..8f1f49ceebec 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -104,6 +104,7 @@ enum pid_directory_inos {
104 PROC_TGID_MAPS, 104 PROC_TGID_MAPS,
105 PROC_TGID_NUMA_MAPS, 105 PROC_TGID_NUMA_MAPS,
106 PROC_TGID_MOUNTS, 106 PROC_TGID_MOUNTS,
107 PROC_TGID_MOUNTSTATS,
107 PROC_TGID_WCHAN, 108 PROC_TGID_WCHAN,
108#ifdef CONFIG_MMU 109#ifdef CONFIG_MMU
109 PROC_TGID_SMAPS, 110 PROC_TGID_SMAPS,
@@ -144,6 +145,7 @@ enum pid_directory_inos {
144 PROC_TID_MAPS, 145 PROC_TID_MAPS,
145 PROC_TID_NUMA_MAPS, 146 PROC_TID_NUMA_MAPS,
146 PROC_TID_MOUNTS, 147 PROC_TID_MOUNTS,
148 PROC_TID_MOUNTSTATS,
147 PROC_TID_WCHAN, 149 PROC_TID_WCHAN,
148#ifdef CONFIG_MMU 150#ifdef CONFIG_MMU
149 PROC_TID_SMAPS, 151 PROC_TID_SMAPS,
@@ -201,6 +203,7 @@ static struct pid_entry tgid_base_stuff[] = {
201 E(PROC_TGID_ROOT, "root", S_IFLNK|S_IRWXUGO), 203 E(PROC_TGID_ROOT, "root", S_IFLNK|S_IRWXUGO),
202 E(PROC_TGID_EXE, "exe", S_IFLNK|S_IRWXUGO), 204 E(PROC_TGID_EXE, "exe", S_IFLNK|S_IRWXUGO),
203 E(PROC_TGID_MOUNTS, "mounts", S_IFREG|S_IRUGO), 205 E(PROC_TGID_MOUNTS, "mounts", S_IFREG|S_IRUGO),
206 E(PROC_TGID_MOUNTSTATS, "mountstats", S_IFREG|S_IRUSR),
204#ifdef CONFIG_MMU 207#ifdef CONFIG_MMU
205 E(PROC_TGID_SMAPS, "smaps", S_IFREG|S_IRUGO), 208 E(PROC_TGID_SMAPS, "smaps", S_IFREG|S_IRUGO),
206#endif 209#endif
@@ -732,6 +735,38 @@ static struct file_operations proc_mounts_operations = {
732 .poll = mounts_poll, 735 .poll = mounts_poll,
733}; 736};
734 737
738extern struct seq_operations mountstats_op;
739static int mountstats_open(struct inode *inode, struct file *file)
740{
741 struct task_struct *task = proc_task(inode);
742 int ret = seq_open(file, &mountstats_op);
743
744 if (!ret) {
745 struct seq_file *m = file->private_data;
746 struct namespace *namespace;
747 task_lock(task);
748 namespace = task->namespace;
749 if (namespace)
750 get_namespace(namespace);
751 task_unlock(task);
752
753 if (namespace)
754 m->private = namespace;
755 else {
756 seq_release(inode, file);
757 ret = -EINVAL;
758 }
759 }
760 return ret;
761}
762
763static struct file_operations proc_mountstats_operations = {
764 .open = mountstats_open,
765 .read = seq_read,
766 .llseek = seq_lseek,
767 .release = mounts_release,
768};
769
735#define PROC_BLOCK_SIZE (3*1024) /* 4K page size but our output routines use some slack for overruns */ 770#define PROC_BLOCK_SIZE (3*1024) /* 4K page size but our output routines use some slack for overruns */
736 771
737static ssize_t proc_info_read(struct file * file, char __user * buf, 772static ssize_t proc_info_read(struct file * file, char __user * buf,
@@ -1730,6 +1765,10 @@ static struct dentry *proc_pident_lookup(struct inode *dir,
1730 inode->i_fop = &proc_smaps_operations; 1765 inode->i_fop = &proc_smaps_operations;
1731 break; 1766 break;
1732#endif 1767#endif
1768 case PROC_TID_MOUNTSTATS:
1769 case PROC_TGID_MOUNTSTATS:
1770 inode->i_fop = &proc_mountstats_operations;
1771 break;
1733#ifdef CONFIG_SECURITY 1772#ifdef CONFIG_SECURITY
1734 case PROC_TID_ATTR: 1773 case PROC_TID_ATTR:
1735 inode->i_nlink = 2; 1774 inode->i_nlink = 2;
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 21e8cf795c38..5adf32b90f36 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -678,7 +678,6 @@ extern spinlock_t files_lock;
678#define FL_POSIX 1 678#define FL_POSIX 1
679#define FL_FLOCK 2 679#define FL_FLOCK 2
680#define FL_ACCESS 8 /* not trying to lock, just looking */ 680#define FL_ACCESS 8 /* not trying to lock, just looking */
681#define FL_LOCKD 16 /* lock held by rpc.lockd */
682#define FL_LEASE 32 /* lease held on this file */ 681#define FL_LEASE 32 /* lease held on this file */
683#define FL_SLEEP 128 /* A blocking lock */ 682#define FL_SLEEP 128 /* A blocking lock */
684 683
@@ -742,8 +741,6 @@ struct file_lock {
742#define OFFT_OFFSET_MAX INT_LIMIT(off_t) 741#define OFFT_OFFSET_MAX INT_LIMIT(off_t)
743#endif 742#endif
744 743
745extern struct list_head file_lock_list;
746
747#include <linux/fcntl.h> 744#include <linux/fcntl.h>
748 745
749extern int fcntl_getlk(struct file *, struct flock __user *); 746extern int fcntl_getlk(struct file *, struct flock __user *);
@@ -765,10 +762,9 @@ extern void locks_init_lock(struct file_lock *);
765extern void locks_copy_lock(struct file_lock *, struct file_lock *); 762extern void locks_copy_lock(struct file_lock *, struct file_lock *);
766extern void locks_remove_posix(struct file *, fl_owner_t); 763extern void locks_remove_posix(struct file *, fl_owner_t);
767extern void locks_remove_flock(struct file *); 764extern void locks_remove_flock(struct file *);
768extern struct file_lock *posix_test_lock(struct file *, struct file_lock *); 765extern int posix_test_lock(struct file *, struct file_lock *, struct file_lock *);
769extern int posix_lock_file(struct file *, struct file_lock *); 766extern int posix_lock_file(struct file *, struct file_lock *);
770extern int posix_lock_file_wait(struct file *, struct file_lock *); 767extern int posix_lock_file_wait(struct file *, struct file_lock *);
771extern void posix_block_lock(struct file_lock *, struct file_lock *);
772extern int posix_unblock_lock(struct file *, struct file_lock *); 768extern int posix_unblock_lock(struct file *, struct file_lock *);
773extern int posix_locks_deadlock(struct file_lock *, struct file_lock *); 769extern int posix_locks_deadlock(struct file_lock *, struct file_lock *);
774extern int flock_lock_file_wait(struct file *filp, struct file_lock *fl); 770extern int flock_lock_file_wait(struct file *filp, struct file_lock *fl);
@@ -1097,6 +1093,7 @@ struct super_operations {
1097 void (*umount_begin) (struct super_block *); 1093 void (*umount_begin) (struct super_block *);
1098 1094
1099 int (*show_options)(struct seq_file *, struct vfsmount *); 1095 int (*show_options)(struct seq_file *, struct vfsmount *);
1096 int (*show_stats)(struct seq_file *, struct vfsmount *);
1100 1097
1101 ssize_t (*quota_read)(struct super_block *, int, char *, size_t, loff_t); 1098 ssize_t (*quota_read)(struct super_block *, int, char *, size_t, loff_t);
1102 ssize_t (*quota_write)(struct super_block *, int, const char *, size_t, loff_t); 1099 ssize_t (*quota_write)(struct super_block *, int, const char *, size_t, loff_t);
diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h
index ef21ed296039..995f89dc8c04 100644
--- a/include/linux/lockd/lockd.h
+++ b/include/linux/lockd/lockd.h
@@ -14,6 +14,7 @@
14#include <linux/config.h> 14#include <linux/config.h>
15#include <linux/in.h> 15#include <linux/in.h>
16#include <linux/fs.h> 16#include <linux/fs.h>
17#include <linux/kref.h>
17#include <linux/utsname.h> 18#include <linux/utsname.h>
18#include <linux/nfsd/nfsfh.h> 19#include <linux/nfsd/nfsfh.h>
19#include <linux/lockd/bind.h> 20#include <linux/lockd/bind.h>
@@ -58,6 +59,8 @@ struct nlm_host {
58 unsigned long h_expires; /* eligible for GC */ 59 unsigned long h_expires; /* eligible for GC */
59 struct list_head h_lockowners; /* Lockowners for the client */ 60 struct list_head h_lockowners; /* Lockowners for the client */
60 spinlock_t h_lock; 61 spinlock_t h_lock;
62 struct list_head h_granted; /* Locks in GRANTED state */
63 struct list_head h_reclaim; /* Locks in RECLAIM state */
61}; 64};
62 65
63/* 66/*
@@ -83,9 +86,9 @@ struct nlm_rqst {
83 struct nlm_host * a_host; /* host handle */ 86 struct nlm_host * a_host; /* host handle */
84 struct nlm_args a_args; /* arguments */ 87 struct nlm_args a_args; /* arguments */
85 struct nlm_res a_res; /* result */ 88 struct nlm_res a_res; /* result */
86 struct nlm_wait * a_block; 89 struct nlm_block * a_block;
87 unsigned int a_retries; /* Retry count */ 90 unsigned int a_retries; /* Retry count */
88 char a_owner[NLMCLNT_OHSIZE]; 91 u8 a_owner[NLMCLNT_OHSIZE];
89}; 92};
90 93
91/* 94/*
@@ -110,16 +113,16 @@ struct nlm_file {
110 */ 113 */
111#define NLM_NEVER (~(unsigned long) 0) 114#define NLM_NEVER (~(unsigned long) 0)
112struct nlm_block { 115struct nlm_block {
116 struct kref b_count; /* Reference count */
113 struct nlm_block * b_next; /* linked list (all blocks) */ 117 struct nlm_block * b_next; /* linked list (all blocks) */
114 struct nlm_block * b_fnext; /* linked list (per file) */ 118 struct nlm_block * b_fnext; /* linked list (per file) */
115 struct nlm_rqst b_call; /* RPC args & callback info */ 119 struct nlm_rqst * b_call; /* RPC args & callback info */
116 struct svc_serv * b_daemon; /* NLM service */ 120 struct svc_serv * b_daemon; /* NLM service */
117 struct nlm_host * b_host; /* host handle for RPC clnt */ 121 struct nlm_host * b_host; /* host handle for RPC clnt */
118 unsigned long b_when; /* next re-xmit */ 122 unsigned long b_when; /* next re-xmit */
119 unsigned int b_id; /* block id */ 123 unsigned int b_id; /* block id */
120 unsigned char b_queued; /* re-queued */ 124 unsigned char b_queued; /* re-queued */
121 unsigned char b_granted; /* VFS granted lock */ 125 unsigned char b_granted; /* VFS granted lock */
122 unsigned char b_incall; /* doing callback */
123 unsigned char b_done; /* callback complete */ 126 unsigned char b_done; /* callback complete */
124 struct nlm_file * b_file; /* file in question */ 127 struct nlm_file * b_file; /* file in question */
125}; 128};
@@ -145,15 +148,16 @@ extern unsigned long nlmsvc_timeout;
145/* 148/*
146 * Lockd client functions 149 * Lockd client functions
147 */ 150 */
148struct nlm_rqst * nlmclnt_alloc_call(void); 151struct nlm_rqst * nlm_alloc_call(struct nlm_host *host);
149int nlmclnt_prepare_block(struct nlm_rqst *req, struct nlm_host *host, struct file_lock *fl); 152void nlm_release_call(struct nlm_rqst *);
150void nlmclnt_finish_block(struct nlm_rqst *req); 153int nlm_async_call(struct nlm_rqst *, u32, const struct rpc_call_ops *);
151long nlmclnt_block(struct nlm_rqst *req, long timeout); 154int nlm_async_reply(struct nlm_rqst *, u32, const struct rpc_call_ops *);
155struct nlm_wait * nlmclnt_prepare_block(struct nlm_host *host, struct file_lock *fl);
156void nlmclnt_finish_block(struct nlm_wait *block);
157int nlmclnt_block(struct nlm_wait *block, struct nlm_rqst *req, long timeout);
152u32 nlmclnt_grant(const struct sockaddr_in *addr, const struct nlm_lock *); 158u32 nlmclnt_grant(const struct sockaddr_in *addr, const struct nlm_lock *);
153void nlmclnt_recovery(struct nlm_host *, u32); 159void nlmclnt_recovery(struct nlm_host *, u32);
154int nlmclnt_reclaim(struct nlm_host *, struct file_lock *); 160int nlmclnt_reclaim(struct nlm_host *, struct file_lock *);
155int nlmclnt_setgrantargs(struct nlm_rqst *, struct nlm_lock *);
156void nlmclnt_freegrantargs(struct nlm_rqst *);
157 161
158/* 162/*
159 * Host cache 163 * Host cache
@@ -172,7 +176,6 @@ extern struct nlm_host *nlm_find_client(void);
172/* 176/*
173 * Server-side lock handling 177 * Server-side lock handling
174 */ 178 */
175int nlmsvc_async_call(struct nlm_rqst *, u32, const struct rpc_call_ops *);
176u32 nlmsvc_lock(struct svc_rqst *, struct nlm_file *, 179u32 nlmsvc_lock(struct svc_rqst *, struct nlm_file *,
177 struct nlm_lock *, int, struct nlm_cookie *); 180 struct nlm_lock *, int, struct nlm_cookie *);
178u32 nlmsvc_unlock(struct nlm_file *, struct nlm_lock *); 181u32 nlmsvc_unlock(struct nlm_file *, struct nlm_lock *);
@@ -180,7 +183,7 @@ u32 nlmsvc_testlock(struct nlm_file *, struct nlm_lock *,
180 struct nlm_lock *); 183 struct nlm_lock *);
181u32 nlmsvc_cancel_blocked(struct nlm_file *, struct nlm_lock *); 184u32 nlmsvc_cancel_blocked(struct nlm_file *, struct nlm_lock *);
182unsigned long nlmsvc_retry_blocked(void); 185unsigned long nlmsvc_retry_blocked(void);
183int nlmsvc_traverse_blocks(struct nlm_host *, struct nlm_file *, 186void nlmsvc_traverse_blocks(struct nlm_host *, struct nlm_file *,
184 int action); 187 int action);
185void nlmsvc_grant_reply(struct svc_rqst *, struct nlm_cookie *, u32); 188void nlmsvc_grant_reply(struct svc_rqst *, struct nlm_cookie *, u32);
186 189
diff --git a/include/linux/lockd/share.h b/include/linux/lockd/share.h
index 5d8aa325f140..c75a424ebe4c 100644
--- a/include/linux/lockd/share.h
+++ b/include/linux/lockd/share.h
@@ -25,6 +25,6 @@ u32 nlmsvc_share_file(struct nlm_host *, struct nlm_file *,
25 struct nlm_args *); 25 struct nlm_args *);
26u32 nlmsvc_unshare_file(struct nlm_host *, struct nlm_file *, 26u32 nlmsvc_unshare_file(struct nlm_host *, struct nlm_file *,
27 struct nlm_args *); 27 struct nlm_args *);
28int nlmsvc_traverse_shares(struct nlm_host *, struct nlm_file *, int); 28void nlmsvc_traverse_shares(struct nlm_host *, struct nlm_file *, int);
29 29
30#endif /* LINUX_LOCKD_SHARE_H */ 30#endif /* LINUX_LOCKD_SHARE_H */
diff --git a/include/linux/lockd/xdr.h b/include/linux/lockd/xdr.h
index d7a5cc4cfa97..bb0a0f1caa91 100644
--- a/include/linux/lockd/xdr.h
+++ b/include/linux/lockd/xdr.h
@@ -28,6 +28,7 @@ struct nlm_lock {
28 int len; /* length of "caller" */ 28 int len; /* length of "caller" */
29 struct nfs_fh fh; 29 struct nfs_fh fh;
30 struct xdr_netobj oh; 30 struct xdr_netobj oh;
31 u32 svid;
31 struct file_lock fl; 32 struct file_lock fl;
32}; 33};
33 34
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index b4dc6e2e10c9..cbebd7d1b9e8 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -56,9 +56,7 @@
56 * When flushing a cluster of dirty pages, there can be different 56 * When flushing a cluster of dirty pages, there can be different
57 * strategies: 57 * strategies:
58 */ 58 */
59#define FLUSH_AGING 0 /* only flush old buffers */
60#define FLUSH_SYNC 1 /* file being synced, or contention */ 59#define FLUSH_SYNC 1 /* file being synced, or contention */
61#define FLUSH_WAIT 2 /* wait for completion */
62#define FLUSH_STABLE 4 /* commit to stable storage */ 60#define FLUSH_STABLE 4 /* commit to stable storage */
63#define FLUSH_LOWPRI 8 /* low priority background flush */ 61#define FLUSH_LOWPRI 8 /* low priority background flush */
64#define FLUSH_HIGHPRI 16 /* high priority memory reclaim flush */ 62#define FLUSH_HIGHPRI 16 /* high priority memory reclaim flush */
@@ -78,6 +76,7 @@ struct nfs_access_entry {
78struct nfs4_state; 76struct nfs4_state;
79struct nfs_open_context { 77struct nfs_open_context {
80 atomic_t count; 78 atomic_t count;
79 struct vfsmount *vfsmnt;
81 struct dentry *dentry; 80 struct dentry *dentry;
82 struct rpc_cred *cred; 81 struct rpc_cred *cred;
83 struct nfs4_state *state; 82 struct nfs4_state *state;
@@ -118,8 +117,7 @@ struct nfs_inode {
118 unsigned long cache_validity; /* bit mask */ 117 unsigned long cache_validity; /* bit mask */
119 118
120 /* 119 /*
121 * read_cache_jiffies is when we started read-caching this inode, 120 * read_cache_jiffies is when we started read-caching this inode.
122 * and read_cache_mtime is the mtime of the inode at that time.
123 * attrtimeo is for how long the cached information is assumed 121 * attrtimeo is for how long the cached information is assumed
124 * to be valid. A successful attribute revalidation doubles 122 * to be valid. A successful attribute revalidation doubles
125 * attrtimeo (up to acregmax/acdirmax), a failure resets it to 123 * attrtimeo (up to acregmax/acdirmax), a failure resets it to
@@ -128,11 +126,6 @@ struct nfs_inode {
128 * We need to revalidate the cached attrs for this inode if 126 * We need to revalidate the cached attrs for this inode if
129 * 127 *
130 * jiffies - read_cache_jiffies > attrtimeo 128 * jiffies - read_cache_jiffies > attrtimeo
131 *
132 * and invalidate any cached data/flush out any dirty pages if
133 * we find that
134 *
135 * mtime != read_cache_mtime
136 */ 129 */
137 unsigned long read_cache_jiffies; 130 unsigned long read_cache_jiffies;
138 unsigned long attrtimeo; 131 unsigned long attrtimeo;
@@ -311,12 +304,9 @@ extern void nfs_begin_attr_update(struct inode *);
311extern void nfs_end_attr_update(struct inode *); 304extern void nfs_end_attr_update(struct inode *);
312extern void nfs_begin_data_update(struct inode *); 305extern void nfs_begin_data_update(struct inode *);
313extern void nfs_end_data_update(struct inode *); 306extern void nfs_end_data_update(struct inode *);
314extern struct nfs_open_context *alloc_nfs_open_context(struct dentry *dentry, struct rpc_cred *cred);
315extern struct nfs_open_context *get_nfs_open_context(struct nfs_open_context *ctx); 307extern struct nfs_open_context *get_nfs_open_context(struct nfs_open_context *ctx);
316extern void put_nfs_open_context(struct nfs_open_context *ctx); 308extern void put_nfs_open_context(struct nfs_open_context *ctx);
317extern void nfs_file_set_open_context(struct file *filp, struct nfs_open_context *ctx);
318extern struct nfs_open_context *nfs_find_open_context(struct inode *inode, struct rpc_cred *cred, int mode); 309extern struct nfs_open_context *nfs_find_open_context(struct inode *inode, struct rpc_cred *cred, int mode);
319extern void nfs_file_clear_open_context(struct file *filp);
320 310
321/* linux/net/ipv4/ipconfig.c: trims ip addr off front of name, too. */ 311/* linux/net/ipv4/ipconfig.c: trims ip addr off front of name, too. */
322extern u32 root_nfs_parse_addr(char *name); /*__init*/ 312extern u32 root_nfs_parse_addr(char *name); /*__init*/
@@ -415,21 +405,22 @@ extern int nfs_writepage(struct page *page, struct writeback_control *wbc);
415extern int nfs_writepages(struct address_space *, struct writeback_control *); 405extern int nfs_writepages(struct address_space *, struct writeback_control *);
416extern int nfs_flush_incompatible(struct file *file, struct page *page); 406extern int nfs_flush_incompatible(struct file *file, struct page *page);
417extern int nfs_updatepage(struct file *, struct page *, unsigned int, unsigned int); 407extern int nfs_updatepage(struct file *, struct page *, unsigned int, unsigned int);
418extern void nfs_writeback_done(struct rpc_task *task, void *data); 408extern int nfs_writeback_done(struct rpc_task *, struct nfs_write_data *);
419extern void nfs_writedata_release(void *data); 409extern void nfs_writedata_release(void *);
420 410
421#if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) 411#if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
422extern void nfs_commit_done(struct rpc_task *, void *data); 412struct nfs_write_data *nfs_commit_alloc(unsigned int pagecount);
423extern void nfs_commit_release(void *data); 413void nfs_commit_free(struct nfs_write_data *p);
424#endif 414#endif
425 415
426/* 416/*
427 * Try to write back everything synchronously (but check the 417 * Try to write back everything synchronously (but check the
428 * return value!) 418 * return value!)
429 */ 419 */
430extern int nfs_sync_inode(struct inode *, unsigned long, unsigned int, int); 420extern int nfs_sync_inode_wait(struct inode *, unsigned long, unsigned int, int);
431#if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) 421#if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
432extern int nfs_commit_inode(struct inode *, int); 422extern int nfs_commit_inode(struct inode *, int);
423extern void nfs_commit_release(void *wdata);
433#else 424#else
434static inline int 425static inline int
435nfs_commit_inode(struct inode *inode, int how) 426nfs_commit_inode(struct inode *inode, int how)
@@ -447,7 +438,7 @@ nfs_have_writebacks(struct inode *inode)
447static inline int 438static inline int
448nfs_wb_all(struct inode *inode) 439nfs_wb_all(struct inode *inode)
449{ 440{
450 int error = nfs_sync_inode(inode, 0, 0, FLUSH_WAIT); 441 int error = nfs_sync_inode_wait(inode, 0, 0, 0);
451 return (error < 0) ? error : 0; 442 return (error < 0) ? error : 0;
452} 443}
453 444
@@ -456,8 +447,8 @@ nfs_wb_all(struct inode *inode)
456 */ 447 */
457static inline int nfs_wb_page_priority(struct inode *inode, struct page* page, int how) 448static inline int nfs_wb_page_priority(struct inode *inode, struct page* page, int how)
458{ 449{
459 int error = nfs_sync_inode(inode, page->index, 1, 450 int error = nfs_sync_inode_wait(inode, page->index, 1,
460 how | FLUSH_WAIT | FLUSH_STABLE); 451 how | FLUSH_STABLE);
461 return (error < 0) ? error : 0; 452 return (error < 0) ? error : 0;
462} 453}
463 454
@@ -469,37 +460,8 @@ static inline int nfs_wb_page(struct inode *inode, struct page* page)
469/* 460/*
470 * Allocate and free nfs_write_data structures 461 * Allocate and free nfs_write_data structures
471 */ 462 */
472extern mempool_t *nfs_wdata_mempool; 463extern struct nfs_write_data *nfs_writedata_alloc(unsigned int pagecount);
473 464extern void nfs_writedata_free(struct nfs_write_data *p);
474static inline struct nfs_write_data *nfs_writedata_alloc(unsigned int pagecount)
475{
476 struct nfs_write_data *p = mempool_alloc(nfs_wdata_mempool, SLAB_NOFS);
477
478 if (p) {
479 memset(p, 0, sizeof(*p));
480 INIT_LIST_HEAD(&p->pages);
481 if (pagecount < NFS_PAGEVEC_SIZE)
482 p->pagevec = &p->page_array[0];
483 else {
484 size_t size = ++pagecount * sizeof(struct page *);
485 p->pagevec = kmalloc(size, GFP_NOFS);
486 if (p->pagevec) {
487 memset(p->pagevec, 0, size);
488 } else {
489 mempool_free(p, nfs_wdata_mempool);
490 p = NULL;
491 }
492 }
493 }
494 return p;
495}
496
497static inline void nfs_writedata_free(struct nfs_write_data *p)
498{
499 if (p && (p->pagevec != &p->page_array[0]))
500 kfree(p->pagevec);
501 mempool_free(p, nfs_wdata_mempool);
502}
503 465
504/* 466/*
505 * linux/fs/nfs/read.c 467 * linux/fs/nfs/read.c
@@ -507,44 +469,14 @@ static inline void nfs_writedata_free(struct nfs_write_data *p)
507extern int nfs_readpage(struct file *, struct page *); 469extern int nfs_readpage(struct file *, struct page *);
508extern int nfs_readpages(struct file *, struct address_space *, 470extern int nfs_readpages(struct file *, struct address_space *,
509 struct list_head *, unsigned); 471 struct list_head *, unsigned);
510extern void nfs_readpage_result(struct rpc_task *, void *); 472extern int nfs_readpage_result(struct rpc_task *, struct nfs_read_data *);
511extern void nfs_readdata_release(void *data); 473extern void nfs_readdata_release(void *data);
512
513 474
514/* 475/*
515 * Allocate and free nfs_read_data structures 476 * Allocate and free nfs_read_data structures
516 */ 477 */
517extern mempool_t *nfs_rdata_mempool; 478extern struct nfs_read_data *nfs_readdata_alloc(unsigned int pagecount);
518 479extern void nfs_readdata_free(struct nfs_read_data *p);
519static inline struct nfs_read_data *nfs_readdata_alloc(unsigned int pagecount)
520{
521 struct nfs_read_data *p = mempool_alloc(nfs_rdata_mempool, SLAB_NOFS);
522
523 if (p) {
524 memset(p, 0, sizeof(*p));
525 INIT_LIST_HEAD(&p->pages);
526 if (pagecount < NFS_PAGEVEC_SIZE)
527 p->pagevec = &p->page_array[0];
528 else {
529 size_t size = ++pagecount * sizeof(struct page *);
530 p->pagevec = kmalloc(size, GFP_NOFS);
531 if (p->pagevec) {
532 memset(p->pagevec, 0, size);
533 } else {
534 mempool_free(p, nfs_rdata_mempool);
535 p = NULL;
536 }
537 }
538 }
539 return p;
540}
541
542static inline void nfs_readdata_free(struct nfs_read_data *p)
543{
544 if (p && (p->pagevec != &p->page_array[0]))
545 kfree(p->pagevec);
546 mempool_free(p, nfs_rdata_mempool);
547}
548 480
549/* 481/*
550 * linux/fs/nfs3proc.c 482 * linux/fs/nfs3proc.c
diff --git a/include/linux/nfs_fs_i.h b/include/linux/nfs_fs_i.h
index e2c18dabff86..861730275ba0 100644
--- a/include/linux/nfs_fs_i.h
+++ b/include/linux/nfs_fs_i.h
@@ -12,8 +12,8 @@ struct nlm_lockowner;
12 */ 12 */
13struct nfs_lock_info { 13struct nfs_lock_info {
14 u32 state; 14 u32 state;
15 u32 flags;
16 struct nlm_lockowner *owner; 15 struct nlm_lockowner *owner;
16 struct list_head list;
17}; 17};
18 18
19struct nfs4_lock_state; 19struct nfs4_lock_state;
@@ -21,10 +21,4 @@ struct nfs4_lock_info {
21 struct nfs4_lock_state *owner; 21 struct nfs4_lock_state *owner;
22}; 22};
23 23
24/*
25 * Lock flag values
26 */
27#define NFS_LCK_GRANTED 0x0001 /* lock has been granted */
28#define NFS_LCK_RECLAIM 0x0002 /* lock marked for reclaiming */
29
30#endif 24#endif
diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h
index 3d3a305488cf..65dec21af774 100644
--- a/include/linux/nfs_fs_sb.h
+++ b/include/linux/nfs_fs_sb.h
@@ -4,6 +4,8 @@
4#include <linux/list.h> 4#include <linux/list.h>
5#include <linux/backing-dev.h> 5#include <linux/backing-dev.h>
6 6
7struct nfs_iostats;
8
7/* 9/*
8 * NFS client parameters stored in the superblock. 10 * NFS client parameters stored in the superblock.
9 */ 11 */
@@ -12,6 +14,7 @@ struct nfs_server {
12 struct rpc_clnt * client_sys; /* 2nd handle for FSINFO */ 14 struct rpc_clnt * client_sys; /* 2nd handle for FSINFO */
13 struct rpc_clnt * client_acl; /* ACL RPC client handle */ 15 struct rpc_clnt * client_acl; /* ACL RPC client handle */
14 struct nfs_rpc_ops * rpc_ops; /* NFS protocol vector */ 16 struct nfs_rpc_ops * rpc_ops; /* NFS protocol vector */
17 struct nfs_iostats * io_stats; /* I/O statistics */
15 struct backing_dev_info backing_dev_info; 18 struct backing_dev_info backing_dev_info;
16 int flags; /* various flags */ 19 int flags; /* various flags */
17 unsigned int caps; /* server capabilities */ 20 unsigned int caps; /* server capabilities */
@@ -26,10 +29,13 @@ struct nfs_server {
26 unsigned int acregmax; 29 unsigned int acregmax;
27 unsigned int acdirmin; 30 unsigned int acdirmin;
28 unsigned int acdirmax; 31 unsigned int acdirmax;
32 unsigned long retrans_timeo; /* retransmit timeout */
33 unsigned int retrans_count; /* number of retransmit tries */
29 unsigned int namelen; 34 unsigned int namelen;
30 char * hostname; /* remote hostname */ 35 char * hostname; /* remote hostname */
31 struct nfs_fh fh; 36 struct nfs_fh fh;
32 struct sockaddr_in addr; 37 struct sockaddr_in addr;
38 unsigned long mount_time; /* when this fs was mounted */
33#ifdef CONFIG_NFS_V4 39#ifdef CONFIG_NFS_V4
34 /* Our own IP address, as a null-terminated string. 40 /* Our own IP address, as a null-terminated string.
35 * This is used to generate the clientid, and the callback address. 41 * This is used to generate the clientid, and the callback address.
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 6d6f69ec5675..7fafc4c546b7 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -695,7 +695,6 @@ struct nfs_read_data {
695#ifdef CONFIG_NFS_V4 695#ifdef CONFIG_NFS_V4
696 unsigned long timestamp; /* For lease renewal */ 696 unsigned long timestamp; /* For lease renewal */
697#endif 697#endif
698 void (*complete) (struct nfs_read_data *, int);
699 struct page *page_array[NFS_PAGEVEC_SIZE + 1]; 698 struct page *page_array[NFS_PAGEVEC_SIZE + 1];
700}; 699};
701 700
@@ -714,7 +713,6 @@ struct nfs_write_data {
714#ifdef CONFIG_NFS_V4 713#ifdef CONFIG_NFS_V4
715 unsigned long timestamp; /* For lease renewal */ 714 unsigned long timestamp; /* For lease renewal */
716#endif 715#endif
717 void (*complete) (struct nfs_write_data *, int);
718 struct page *page_array[NFS_PAGEVEC_SIZE + 1]; 716 struct page *page_array[NFS_PAGEVEC_SIZE + 1];
719}; 717};
720 718
@@ -769,8 +767,11 @@ struct nfs_rpc_ops {
769 struct nfs_pathconf *); 767 struct nfs_pathconf *);
770 u32 * (*decode_dirent)(u32 *, struct nfs_entry *, int plus); 768 u32 * (*decode_dirent)(u32 *, struct nfs_entry *, int plus);
771 void (*read_setup) (struct nfs_read_data *); 769 void (*read_setup) (struct nfs_read_data *);
770 int (*read_done) (struct rpc_task *, struct nfs_read_data *);
772 void (*write_setup) (struct nfs_write_data *, int how); 771 void (*write_setup) (struct nfs_write_data *, int how);
772 int (*write_done) (struct rpc_task *, struct nfs_write_data *);
773 void (*commit_setup) (struct nfs_write_data *, int how); 773 void (*commit_setup) (struct nfs_write_data *, int how);
774 int (*commit_done) (struct rpc_task *, struct nfs_write_data *);
774 int (*file_open) (struct inode *, struct file *); 775 int (*file_open) (struct inode *, struct file *);
775 int (*file_release) (struct inode *, struct file *); 776 int (*file_release) (struct inode *, struct file *);
776 int (*lock)(struct file *, int, struct file_lock *); 777 int (*lock)(struct file *, int, struct file_lock *);
diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h
index f147e6b84332..8fe9f35eba31 100644
--- a/include/linux/sunrpc/clnt.h
+++ b/include/linux/sunrpc/clnt.h
@@ -45,7 +45,8 @@ struct rpc_clnt {
45 char * cl_server; /* server machine name */ 45 char * cl_server; /* server machine name */
46 char * cl_protname; /* protocol name */ 46 char * cl_protname; /* protocol name */
47 struct rpc_auth * cl_auth; /* authenticator */ 47 struct rpc_auth * cl_auth; /* authenticator */
48 struct rpc_stat * cl_stats; /* statistics */ 48 struct rpc_stat * cl_stats; /* per-program statistics */
49 struct rpc_iostats * cl_metrics; /* per-client statistics */
49 50
50 unsigned int cl_softrtry : 1,/* soft timeouts */ 51 unsigned int cl_softrtry : 1,/* soft timeouts */
51 cl_intr : 1,/* interruptible */ 52 cl_intr : 1,/* interruptible */
@@ -59,6 +60,7 @@ struct rpc_clnt {
59 int cl_nodelen; /* nodename length */ 60 int cl_nodelen; /* nodename length */
60 char cl_nodename[UNX_MAXNODENAME]; 61 char cl_nodename[UNX_MAXNODENAME];
61 char cl_pathname[30];/* Path in rpc_pipe_fs */ 62 char cl_pathname[30];/* Path in rpc_pipe_fs */
63 struct vfsmount * cl_vfsmnt;
62 struct dentry * cl_dentry; /* inode */ 64 struct dentry * cl_dentry; /* inode */
63 struct rpc_clnt * cl_parent; /* Points to parent of clones */ 65 struct rpc_clnt * cl_parent; /* Points to parent of clones */
64 struct rpc_rtt cl_rtt_default; 66 struct rpc_rtt cl_rtt_default;
@@ -100,6 +102,8 @@ struct rpc_procinfo {
100 unsigned int p_bufsiz; /* req. buffer size */ 102 unsigned int p_bufsiz; /* req. buffer size */
101 unsigned int p_count; /* call count */ 103 unsigned int p_count; /* call count */
102 unsigned int p_timer; /* Which RTT timer to use */ 104 unsigned int p_timer; /* Which RTT timer to use */
105 u32 p_statidx; /* Which procedure to account */
106 char * p_name; /* name of procedure */
103}; 107};
104 108
105#define RPC_CONGESTED(clnt) (RPCXPRT_CONGESTED((clnt)->cl_xprt)) 109#define RPC_CONGESTED(clnt) (RPCXPRT_CONGESTED((clnt)->cl_xprt))
@@ -137,20 +141,6 @@ size_t rpc_max_payload(struct rpc_clnt *);
137void rpc_force_rebind(struct rpc_clnt *); 141void rpc_force_rebind(struct rpc_clnt *);
138int rpc_ping(struct rpc_clnt *clnt, int flags); 142int rpc_ping(struct rpc_clnt *clnt, int flags);
139 143
140static __inline__
141int rpc_call(struct rpc_clnt *clnt, u32 proc, void *argp, void *resp, int flags)
142{
143 struct rpc_message msg = {
144 .rpc_proc = &clnt->cl_procinfo[proc],
145 .rpc_argp = argp,
146 .rpc_resp = resp,
147 .rpc_cred = NULL
148 };
149 return rpc_call_sync(clnt, &msg, flags);
150}
151
152extern void rpciod_wake_up(void);
153
154/* 144/*
155 * Helper function for NFSroot support 145 * Helper function for NFSroot support
156 */ 146 */
diff --git a/include/linux/sunrpc/gss_krb5.h b/include/linux/sunrpc/gss_krb5.h
index 2c3601d31045..1279280d7196 100644
--- a/include/linux/sunrpc/gss_krb5.h
+++ b/include/linux/sunrpc/gss_krb5.h
@@ -53,6 +53,8 @@ struct krb5_ctx {
53 struct xdr_netobj mech_used; 53 struct xdr_netobj mech_used;
54}; 54};
55 55
56extern spinlock_t krb5_seq_lock;
57
56#define KG_TOK_MIC_MSG 0x0101 58#define KG_TOK_MIC_MSG 0x0101
57#define KG_TOK_WRAP_MSG 0x0201 59#define KG_TOK_WRAP_MSG 0x0201
58 60
diff --git a/include/linux/sunrpc/metrics.h b/include/linux/sunrpc/metrics.h
new file mode 100644
index 000000000000..8f96e9dc369a
--- /dev/null
+++ b/include/linux/sunrpc/metrics.h
@@ -0,0 +1,77 @@
1/*
2 * linux/include/linux/sunrpc/metrics.h
3 *
4 * Declarations for RPC client per-operation metrics
5 *
6 * Copyright (C) 2005 Chuck Lever <cel@netapp.com>
7 *
8 * RPC client per-operation statistics provide latency and retry
9 * information about each type of RPC procedure in a given RPC program.
10 * These statistics are not for detailed problem diagnosis, but simply
11 * to indicate whether the problem is local or remote.
12 *
13 * These counters are not meant to be human-readable, but are meant to be
14 * integrated into system monitoring tools such as "sar" and "iostat". As
15 * such, the counters are sampled by the tools over time, and are never
16 * zeroed after a file system is mounted. Moving averages can be computed
17 * by the tools by taking the difference between two instantaneous samples
18 * and dividing that by the time between the samples.
19 *
20 * The counters are maintained in a single array per RPC client, indexed
21 * by procedure number. There is no need to maintain separate counter
22 * arrays per-CPU because these counters are always modified behind locks.
23 */
24
25#ifndef _LINUX_SUNRPC_METRICS_H
26#define _LINUX_SUNRPC_METRICS_H
27
28#include <linux/seq_file.h>
29
30#define RPC_IOSTATS_VERS "1.0"
31
32struct rpc_iostats {
33 /*
34 * These counters give an idea about how many request
35 * transmissions are required, on average, to complete that
36 * particular procedure. Some procedures may require more
37 * than one transmission because the server is unresponsive,
38 * the client is retransmitting too aggressively, or the
39 * requests are large and the network is congested.
40 */
41 unsigned long om_ops, /* count of operations */
42 om_ntrans, /* count of RPC transmissions */
43 om_timeouts; /* count of major timeouts */
44
45 /*
46 * These count how many bytes are sent and received for a
47 * given RPC procedure type. This indicates how much load a
48 * particular procedure is putting on the network. These
49 * counts include the RPC and ULP headers, and the request
50 * payload.
51 */
52 unsigned long long om_bytes_sent, /* count of bytes out */
53 om_bytes_recv; /* count of bytes in */
54
55 /*
56 * The length of time an RPC request waits in queue before
57 * transmission, the network + server latency of the request,
58 * and the total time the request spent from init to release
59 * are measured.
60 */
61 unsigned long long om_queue, /* jiffies queued for xmit */
62 om_rtt, /* jiffies for RPC RTT */
63 om_execute; /* jiffies for RPC execution */
64} ____cacheline_aligned;
65
66struct rpc_task;
67struct rpc_clnt;
68
69/*
70 * EXPORTed functions for managing rpc_iostats structures
71 */
72struct rpc_iostats * rpc_alloc_iostats(struct rpc_clnt *);
73void rpc_count_iostats(struct rpc_task *);
74void rpc_print_iostats(struct seq_file *, struct rpc_clnt *);
75void rpc_free_iostats(struct rpc_iostats *);
76
77#endif /* _LINUX_SUNRPC_METRICS_H */
diff --git a/include/linux/sunrpc/rpc_pipe_fs.h b/include/linux/sunrpc/rpc_pipe_fs.h
index 63929349571f..2c2189cb30aa 100644
--- a/include/linux/sunrpc/rpc_pipe_fs.h
+++ b/include/linux/sunrpc/rpc_pipe_fs.h
@@ -45,6 +45,8 @@ extern struct dentry *rpc_mkdir(char *, struct rpc_clnt *);
45extern int rpc_rmdir(char *); 45extern int rpc_rmdir(char *);
46extern struct dentry *rpc_mkpipe(char *, void *, struct rpc_pipe_ops *, int flags); 46extern struct dentry *rpc_mkpipe(char *, void *, struct rpc_pipe_ops *, int flags);
47extern int rpc_unlink(char *); 47extern int rpc_unlink(char *);
48extern struct vfsmount *rpc_get_mount(void);
49extern void rpc_put_mount(void);
48 50
49#endif 51#endif
50#endif 52#endif
diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h
index 8b25629accd8..82a91bb22362 100644
--- a/include/linux/sunrpc/sched.h
+++ b/include/linux/sunrpc/sched.h
@@ -86,6 +86,12 @@ struct rpc_task {
86 struct work_struct tk_work; /* Async task work queue */ 86 struct work_struct tk_work; /* Async task work queue */
87 struct rpc_wait tk_wait; /* RPC wait */ 87 struct rpc_wait tk_wait; /* RPC wait */
88 } u; 88 } u;
89
90 unsigned short tk_timeouts; /* maj timeouts */
91 size_t tk_bytes_sent; /* total bytes sent */
92 unsigned long tk_start; /* RPC task init timestamp */
93 long tk_rtt; /* round-trip time (jiffies) */
94
89#ifdef RPC_DEBUG 95#ifdef RPC_DEBUG
90 unsigned short tk_pid; /* debugging aid */ 96 unsigned short tk_pid; /* debugging aid */
91#endif 97#endif
@@ -203,6 +209,7 @@ struct rpc_wait_queue {
203 unsigned char priority; /* current priority */ 209 unsigned char priority; /* current priority */
204 unsigned char count; /* # task groups remaining serviced so far */ 210 unsigned char count; /* # task groups remaining serviced so far */
205 unsigned char nr; /* # tasks remaining for cookie */ 211 unsigned char nr; /* # tasks remaining for cookie */
212 unsigned short qlen; /* total # tasks waiting in queue */
206#ifdef RPC_DEBUG 213#ifdef RPC_DEBUG
207 const char * name; 214 const char * name;
208#endif 215#endif
@@ -269,13 +276,13 @@ void * rpc_malloc(struct rpc_task *, size_t);
269void rpc_free(struct rpc_task *); 276void rpc_free(struct rpc_task *);
270int rpciod_up(void); 277int rpciod_up(void);
271void rpciod_down(void); 278void rpciod_down(void);
272void rpciod_wake_up(void);
273int __rpc_wait_for_completion_task(struct rpc_task *task, int (*)(void *)); 279int __rpc_wait_for_completion_task(struct rpc_task *task, int (*)(void *));
274#ifdef RPC_DEBUG 280#ifdef RPC_DEBUG
275void rpc_show_tasks(void); 281void rpc_show_tasks(void);
276#endif 282#endif
277int rpc_init_mempool(void); 283int rpc_init_mempool(void);
278void rpc_destroy_mempool(void); 284void rpc_destroy_mempool(void);
285extern struct workqueue_struct *rpciod_workqueue;
279 286
280static inline void rpc_exit(struct rpc_task *task, int status) 287static inline void rpc_exit(struct rpc_task *task, int status)
281{ 288{
diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h
index 6ef99b14ff09..7eebbab7160b 100644
--- a/include/linux/sunrpc/xprt.h
+++ b/include/linux/sunrpc/xprt.h
@@ -114,6 +114,7 @@ struct rpc_xprt_ops {
114 void (*release_request)(struct rpc_task *task); 114 void (*release_request)(struct rpc_task *task);
115 void (*close)(struct rpc_xprt *xprt); 115 void (*close)(struct rpc_xprt *xprt);
116 void (*destroy)(struct rpc_xprt *xprt); 116 void (*destroy)(struct rpc_xprt *xprt);
117 void (*print_stats)(struct rpc_xprt *xprt, struct seq_file *seq);
117}; 118};
118 119
119struct rpc_xprt { 120struct rpc_xprt {
@@ -187,6 +188,18 @@ struct rpc_xprt {
187 188
188 struct list_head recv; 189 struct list_head recv;
189 190
191 struct {
192 unsigned long bind_count, /* total number of binds */
193 connect_count, /* total number of connects */
194 connect_start, /* connect start timestamp */
195 connect_time, /* jiffies waiting for connect */
196 sends, /* how many complete requests */
197 recvs, /* how many complete requests */
198 bad_xids; /* lookup_rqst didn't find XID */
199
200 unsigned long long req_u, /* average requests on the wire */
201 bklog_u; /* backlog queue utilization */
202 } stat;
190 203
191 void (*old_data_ready)(struct sock *, int); 204 void (*old_data_ready)(struct sock *, int);
192 void (*old_state_change)(struct sock *); 205 void (*old_state_change)(struct sock *);
diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c
index 8d6f1a176b15..55163af3dcaf 100644
--- a/net/sunrpc/auth.c
+++ b/net/sunrpc/auth.c
@@ -64,14 +64,26 @@ rpcauth_create(rpc_authflavor_t pseudoflavor, struct rpc_clnt *clnt)
64 struct rpc_authops *ops; 64 struct rpc_authops *ops;
65 u32 flavor = pseudoflavor_to_flavor(pseudoflavor); 65 u32 flavor = pseudoflavor_to_flavor(pseudoflavor);
66 66
67 if (flavor >= RPC_AUTH_MAXFLAVOR || !(ops = auth_flavors[flavor])) 67 auth = ERR_PTR(-EINVAL);
68 return ERR_PTR(-EINVAL); 68 if (flavor >= RPC_AUTH_MAXFLAVOR)
69 goto out;
70
71 /* FIXME - auth_flavors[] really needs an rw lock,
72 * and module refcounting. */
73#ifdef CONFIG_KMOD
74 if ((ops = auth_flavors[flavor]) == NULL)
75 request_module("rpc-auth-%u", flavor);
76#endif
77 if ((ops = auth_flavors[flavor]) == NULL)
78 goto out;
69 auth = ops->create(clnt, pseudoflavor); 79 auth = ops->create(clnt, pseudoflavor);
70 if (IS_ERR(auth)) 80 if (IS_ERR(auth))
71 return auth; 81 return auth;
72 if (clnt->cl_auth) 82 if (clnt->cl_auth)
73 rpcauth_destroy(clnt->cl_auth); 83 rpcauth_destroy(clnt->cl_auth);
74 clnt->cl_auth = auth; 84 clnt->cl_auth = auth;
85
86out:
75 return auth; 87 return auth;
76} 88}
77 89
diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c
index bb46efd92e57..900ef31f5a0e 100644
--- a/net/sunrpc/auth_gss/auth_gss.c
+++ b/net/sunrpc/auth_gss/auth_gss.c
@@ -721,6 +721,8 @@ gss_destroy(struct rpc_auth *auth)
721 721
722 gss_auth = container_of(auth, struct gss_auth, rpc_auth); 722 gss_auth = container_of(auth, struct gss_auth, rpc_auth);
723 rpc_unlink(gss_auth->path); 723 rpc_unlink(gss_auth->path);
724 dput(gss_auth->dentry);
725 gss_auth->dentry = NULL;
724 gss_mech_put(gss_auth->mech); 726 gss_mech_put(gss_auth->mech);
725 727
726 rpcauth_free_credcache(auth); 728 rpcauth_free_credcache(auth);
diff --git a/net/sunrpc/auth_gss/gss_krb5_seal.c b/net/sunrpc/auth_gss/gss_krb5_seal.c
index d0dfdfd5e79e..f43311221a72 100644
--- a/net/sunrpc/auth_gss/gss_krb5_seal.c
+++ b/net/sunrpc/auth_gss/gss_krb5_seal.c
@@ -70,15 +70,19 @@
70# define RPCDBG_FACILITY RPCDBG_AUTH 70# define RPCDBG_FACILITY RPCDBG_AUTH
71#endif 71#endif
72 72
73spinlock_t krb5_seq_lock = SPIN_LOCK_UNLOCKED;
74
73u32 75u32
74gss_get_mic_kerberos(struct gss_ctx *gss_ctx, struct xdr_buf *text, 76gss_get_mic_kerberos(struct gss_ctx *gss_ctx, struct xdr_buf *text,
75 struct xdr_netobj *token) 77 struct xdr_netobj *token)
76{ 78{
77 struct krb5_ctx *ctx = gss_ctx->internal_ctx_id; 79 struct krb5_ctx *ctx = gss_ctx->internal_ctx_id;
78 s32 checksum_type; 80 s32 checksum_type;
79 struct xdr_netobj md5cksum = {.len = 0, .data = NULL}; 81 char cksumdata[16];
82 struct xdr_netobj md5cksum = {.len = 0, .data = cksumdata};
80 unsigned char *ptr, *krb5_hdr, *msg_start; 83 unsigned char *ptr, *krb5_hdr, *msg_start;
81 s32 now; 84 s32 now;
85 u32 seq_send;
82 86
83 dprintk("RPC: gss_krb5_seal\n"); 87 dprintk("RPC: gss_krb5_seal\n");
84 88
@@ -133,16 +137,15 @@ gss_get_mic_kerberos(struct gss_ctx *gss_ctx, struct xdr_buf *text,
133 BUG(); 137 BUG();
134 } 138 }
135 139
136 kfree(md5cksum.data); 140 spin_lock(&krb5_seq_lock);
141 seq_send = ctx->seq_send++;
142 spin_unlock(&krb5_seq_lock);
137 143
138 if ((krb5_make_seq_num(ctx->seq, ctx->initiate ? 0 : 0xff, 144 if ((krb5_make_seq_num(ctx->seq, ctx->initiate ? 0 : 0xff,
139 ctx->seq_send, krb5_hdr + 16, krb5_hdr + 8))) 145 seq_send, krb5_hdr + 16, krb5_hdr + 8)))
140 goto out_err; 146 goto out_err;
141 147
142 ctx->seq_send++;
143
144 return ((ctx->endtime < now) ? GSS_S_CONTEXT_EXPIRED : GSS_S_COMPLETE); 148 return ((ctx->endtime < now) ? GSS_S_CONTEXT_EXPIRED : GSS_S_COMPLETE);
145out_err: 149out_err:
146 kfree(md5cksum.data);
147 return GSS_S_FAILURE; 150 return GSS_S_FAILURE;
148} 151}
diff --git a/net/sunrpc/auth_gss/gss_krb5_unseal.c b/net/sunrpc/auth_gss/gss_krb5_unseal.c
index db055fd7d778..0828cf64100f 100644
--- a/net/sunrpc/auth_gss/gss_krb5_unseal.c
+++ b/net/sunrpc/auth_gss/gss_krb5_unseal.c
@@ -79,7 +79,8 @@ gss_verify_mic_kerberos(struct gss_ctx *gss_ctx,
79 int signalg; 79 int signalg;
80 int sealalg; 80 int sealalg;
81 s32 checksum_type; 81 s32 checksum_type;
82 struct xdr_netobj md5cksum = {.len = 0, .data = NULL}; 82 char cksumdata[16];
83 struct xdr_netobj md5cksum = {.len = 0, .data = cksumdata};
83 s32 now; 84 s32 now;
84 int direction; 85 int direction;
85 s32 seqnum; 86 s32 seqnum;
@@ -176,6 +177,5 @@ gss_verify_mic_kerberos(struct gss_ctx *gss_ctx,
176 177
177 ret = GSS_S_COMPLETE; 178 ret = GSS_S_COMPLETE;
178out: 179out:
179 kfree(md5cksum.data);
180 return ret; 180 return ret;
181} 181}
diff --git a/net/sunrpc/auth_gss/gss_krb5_wrap.c b/net/sunrpc/auth_gss/gss_krb5_wrap.c
index af777cf9f251..89d1f3e14128 100644
--- a/net/sunrpc/auth_gss/gss_krb5_wrap.c
+++ b/net/sunrpc/auth_gss/gss_krb5_wrap.c
@@ -121,12 +121,14 @@ gss_wrap_kerberos(struct gss_ctx *ctx, int offset,
121{ 121{
122 struct krb5_ctx *kctx = ctx->internal_ctx_id; 122 struct krb5_ctx *kctx = ctx->internal_ctx_id;
123 s32 checksum_type; 123 s32 checksum_type;
124 struct xdr_netobj md5cksum = {.len = 0, .data = NULL}; 124 char cksumdata[16];
125 struct xdr_netobj md5cksum = {.len = 0, .data = cksumdata};
125 int blocksize = 0, plainlen; 126 int blocksize = 0, plainlen;
126 unsigned char *ptr, *krb5_hdr, *msg_start; 127 unsigned char *ptr, *krb5_hdr, *msg_start;
127 s32 now; 128 s32 now;
128 int headlen; 129 int headlen;
129 struct page **tmp_pages; 130 struct page **tmp_pages;
131 u32 seq_send;
130 132
131 dprintk("RPC: gss_wrap_kerberos\n"); 133 dprintk("RPC: gss_wrap_kerberos\n");
132 134
@@ -205,23 +207,22 @@ gss_wrap_kerberos(struct gss_ctx *ctx, int offset,
205 BUG(); 207 BUG();
206 } 208 }
207 209
208 kfree(md5cksum.data); 210 spin_lock(&krb5_seq_lock);
211 seq_send = kctx->seq_send++;
212 spin_unlock(&krb5_seq_lock);
209 213
210 /* XXX would probably be more efficient to compute checksum 214 /* XXX would probably be more efficient to compute checksum
211 * and encrypt at the same time: */ 215 * and encrypt at the same time: */
212 if ((krb5_make_seq_num(kctx->seq, kctx->initiate ? 0 : 0xff, 216 if ((krb5_make_seq_num(kctx->seq, kctx->initiate ? 0 : 0xff,
213 kctx->seq_send, krb5_hdr + 16, krb5_hdr + 8))) 217 seq_send, krb5_hdr + 16, krb5_hdr + 8)))
214 goto out_err; 218 goto out_err;
215 219
216 if (gss_encrypt_xdr_buf(kctx->enc, buf, offset + headlen - blocksize, 220 if (gss_encrypt_xdr_buf(kctx->enc, buf, offset + headlen - blocksize,
217 pages)) 221 pages))
218 goto out_err; 222 goto out_err;
219 223
220 kctx->seq_send++;
221
222 return ((kctx->endtime < now) ? GSS_S_CONTEXT_EXPIRED : GSS_S_COMPLETE); 224 return ((kctx->endtime < now) ? GSS_S_CONTEXT_EXPIRED : GSS_S_COMPLETE);
223out_err: 225out_err:
224 if (md5cksum.data) kfree(md5cksum.data);
225 return GSS_S_FAILURE; 226 return GSS_S_FAILURE;
226} 227}
227 228
@@ -232,7 +233,8 @@ gss_unwrap_kerberos(struct gss_ctx *ctx, int offset, struct xdr_buf *buf)
232 int signalg; 233 int signalg;
233 int sealalg; 234 int sealalg;
234 s32 checksum_type; 235 s32 checksum_type;
235 struct xdr_netobj md5cksum = {.len = 0, .data = NULL}; 236 char cksumdata[16];
237 struct xdr_netobj md5cksum = {.len = 0, .data = cksumdata};
236 s32 now; 238 s32 now;
237 int direction; 239 int direction;
238 s32 seqnum; 240 s32 seqnum;
@@ -358,6 +360,5 @@ gss_unwrap_kerberos(struct gss_ctx *ctx, int offset, struct xdr_buf *buf)
358 360
359 ret = GSS_S_COMPLETE; 361 ret = GSS_S_COMPLETE;
360out: 362out:
361 if (md5cksum.data) kfree(md5cksum.data);
362 return ret; 363 return ret;
363} 364}
diff --git a/net/sunrpc/auth_gss/gss_spkm3_mech.c b/net/sunrpc/auth_gss/gss_spkm3_mech.c
index 58400807d4df..5bf11ccba7cd 100644
--- a/net/sunrpc/auth_gss/gss_spkm3_mech.c
+++ b/net/sunrpc/auth_gss/gss_spkm3_mech.c
@@ -102,6 +102,12 @@ get_key(const void *p, const void *end, struct crypto_tfm **res, int *resalg)
102 alg_mode = CRYPTO_TFM_MODE_CBC; 102 alg_mode = CRYPTO_TFM_MODE_CBC;
103 setkey = 1; 103 setkey = 1;
104 break; 104 break;
105 case NID_cast5_cbc:
106 /* XXXX here in name only, not used */
107 alg_name = "cast5";
108 alg_mode = CRYPTO_TFM_MODE_CBC;
109 setkey = 0; /* XXX will need to set to 1 */
110 break;
105 case NID_md5: 111 case NID_md5:
106 if (key.len == 0) { 112 if (key.len == 0) {
107 dprintk("RPC: SPKM3 get_key: NID_md5 zero Key length\n"); 113 dprintk("RPC: SPKM3 get_key: NID_md5 zero Key length\n");
diff --git a/net/sunrpc/auth_gss/gss_spkm3_seal.c b/net/sunrpc/auth_gss/gss_spkm3_seal.c
index 86fbf7c3e39c..18c7862bc234 100644
--- a/net/sunrpc/auth_gss/gss_spkm3_seal.c
+++ b/net/sunrpc/auth_gss/gss_spkm3_seal.c
@@ -57,7 +57,8 @@ spkm3_make_token(struct spkm3_ctx *ctx,
57{ 57{
58 s32 checksum_type; 58 s32 checksum_type;
59 char tokhdrbuf[25]; 59 char tokhdrbuf[25];
60 struct xdr_netobj md5cksum = {.len = 0, .data = NULL}; 60 char cksumdata[16];
61 struct xdr_netobj md5cksum = {.len = 0, .data = cksumdata};
61 struct xdr_netobj mic_hdr = {.len = 0, .data = tokhdrbuf}; 62 struct xdr_netobj mic_hdr = {.len = 0, .data = tokhdrbuf};
62 int tokenlen = 0; 63 int tokenlen = 0;
63 unsigned char *ptr; 64 unsigned char *ptr;
@@ -115,13 +116,11 @@ spkm3_make_token(struct spkm3_ctx *ctx,
115 dprintk("RPC: gss_spkm3_seal: SPKM_WRAP_TOK not supported\n"); 116 dprintk("RPC: gss_spkm3_seal: SPKM_WRAP_TOK not supported\n");
116 goto out_err; 117 goto out_err;
117 } 118 }
118 kfree(md5cksum.data);
119 119
120 /* XXX need to implement sequence numbers, and ctx->expired */ 120 /* XXX need to implement sequence numbers, and ctx->expired */
121 121
122 return GSS_S_COMPLETE; 122 return GSS_S_COMPLETE;
123out_err: 123out_err:
124 kfree(md5cksum.data);
125 token->data = NULL; 124 token->data = NULL;
126 token->len = 0; 125 token->len = 0;
127 return GSS_S_FAILURE; 126 return GSS_S_FAILURE;
diff --git a/net/sunrpc/auth_gss/gss_spkm3_unseal.c b/net/sunrpc/auth_gss/gss_spkm3_unseal.c
index 96851b0ba1ba..8537f581ef9b 100644
--- a/net/sunrpc/auth_gss/gss_spkm3_unseal.c
+++ b/net/sunrpc/auth_gss/gss_spkm3_unseal.c
@@ -56,7 +56,8 @@ spkm3_read_token(struct spkm3_ctx *ctx,
56{ 56{
57 s32 code; 57 s32 code;
58 struct xdr_netobj wire_cksum = {.len =0, .data = NULL}; 58 struct xdr_netobj wire_cksum = {.len =0, .data = NULL};
59 struct xdr_netobj md5cksum = {.len = 0, .data = NULL}; 59 char cksumdata[16];
60 struct xdr_netobj md5cksum = {.len = 0, .data = cksumdata};
60 unsigned char *ptr = (unsigned char *)read_token->data; 61 unsigned char *ptr = (unsigned char *)read_token->data;
61 unsigned char *cksum; 62 unsigned char *cksum;
62 int bodysize, md5elen; 63 int bodysize, md5elen;
@@ -120,7 +121,6 @@ spkm3_read_token(struct spkm3_ctx *ctx,
120 /* XXX: need to add expiration and sequencing */ 121 /* XXX: need to add expiration and sequencing */
121 ret = GSS_S_COMPLETE; 122 ret = GSS_S_COMPLETE;
122out: 123out:
123 kfree(md5cksum.data);
124 kfree(wire_cksum.data); 124 kfree(wire_cksum.data);
125 return ret; 125 return ret;
126} 126}
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index d78479782045..aa8965e9d307 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -28,12 +28,11 @@
28#include <linux/mm.h> 28#include <linux/mm.h>
29#include <linux/slab.h> 29#include <linux/slab.h>
30#include <linux/utsname.h> 30#include <linux/utsname.h>
31#include <linux/workqueue.h>
31 32
32#include <linux/sunrpc/clnt.h> 33#include <linux/sunrpc/clnt.h>
33#include <linux/workqueue.h>
34#include <linux/sunrpc/rpc_pipe_fs.h> 34#include <linux/sunrpc/rpc_pipe_fs.h>
35 35#include <linux/sunrpc/metrics.h>
36#include <linux/nfs.h>
37 36
38 37
39#define RPC_SLACK_SPACE (1024) /* total overkill */ 38#define RPC_SLACK_SPACE (1024) /* total overkill */
@@ -71,8 +70,15 @@ rpc_setup_pipedir(struct rpc_clnt *clnt, char *dir_name)
71 static uint32_t clntid; 70 static uint32_t clntid;
72 int error; 71 int error;
73 72
73 clnt->cl_vfsmnt = ERR_PTR(-ENOENT);
74 clnt->cl_dentry = ERR_PTR(-ENOENT);
74 if (dir_name == NULL) 75 if (dir_name == NULL)
75 return 0; 76 return 0;
77
78 clnt->cl_vfsmnt = rpc_get_mount();
79 if (IS_ERR(clnt->cl_vfsmnt))
80 return PTR_ERR(clnt->cl_vfsmnt);
81
76 for (;;) { 82 for (;;) {
77 snprintf(clnt->cl_pathname, sizeof(clnt->cl_pathname), 83 snprintf(clnt->cl_pathname, sizeof(clnt->cl_pathname),
78 "%s/clnt%x", dir_name, 84 "%s/clnt%x", dir_name,
@@ -85,6 +91,7 @@ rpc_setup_pipedir(struct rpc_clnt *clnt, char *dir_name)
85 if (error != -EEXIST) { 91 if (error != -EEXIST) {
86 printk(KERN_INFO "RPC: Couldn't create pipefs entry %s, error %d\n", 92 printk(KERN_INFO "RPC: Couldn't create pipefs entry %s, error %d\n",
87 clnt->cl_pathname, error); 93 clnt->cl_pathname, error);
94 rpc_put_mount();
88 return error; 95 return error;
89 } 96 }
90 } 97 }
@@ -147,6 +154,7 @@ rpc_new_client(struct rpc_xprt *xprt, char *servname,
147 clnt->cl_vers = version->number; 154 clnt->cl_vers = version->number;
148 clnt->cl_prot = xprt->prot; 155 clnt->cl_prot = xprt->prot;
149 clnt->cl_stats = program->stats; 156 clnt->cl_stats = program->stats;
157 clnt->cl_metrics = rpc_alloc_iostats(clnt);
150 rpc_init_wait_queue(&clnt->cl_pmap_default.pm_bindwait, "bindwait"); 158 rpc_init_wait_queue(&clnt->cl_pmap_default.pm_bindwait, "bindwait");
151 159
152 if (!clnt->cl_port) 160 if (!clnt->cl_port)
@@ -175,7 +183,11 @@ rpc_new_client(struct rpc_xprt *xprt, char *servname,
175 return clnt; 183 return clnt;
176 184
177out_no_auth: 185out_no_auth:
178 rpc_rmdir(clnt->cl_pathname); 186 if (!IS_ERR(clnt->cl_dentry)) {
187 rpc_rmdir(clnt->cl_pathname);
188 dput(clnt->cl_dentry);
189 rpc_put_mount();
190 }
179out_no_path: 191out_no_path:
180 if (clnt->cl_server != clnt->cl_inline_name) 192 if (clnt->cl_server != clnt->cl_inline_name)
181 kfree(clnt->cl_server); 193 kfree(clnt->cl_server);
@@ -240,11 +252,15 @@ rpc_clone_client(struct rpc_clnt *clnt)
240 new->cl_autobind = 0; 252 new->cl_autobind = 0;
241 new->cl_oneshot = 0; 253 new->cl_oneshot = 0;
242 new->cl_dead = 0; 254 new->cl_dead = 0;
255 if (!IS_ERR(new->cl_dentry)) {
256 dget(new->cl_dentry);
257 rpc_get_mount();
258 }
243 rpc_init_rtt(&new->cl_rtt_default, clnt->cl_xprt->timeout.to_initval); 259 rpc_init_rtt(&new->cl_rtt_default, clnt->cl_xprt->timeout.to_initval);
244 if (new->cl_auth) 260 if (new->cl_auth)
245 atomic_inc(&new->cl_auth->au_count); 261 atomic_inc(&new->cl_auth->au_count);
246 new->cl_pmap = &new->cl_pmap_default; 262 new->cl_pmap = &new->cl_pmap_default;
247 rpc_init_wait_queue(&new->cl_pmap_default.pm_bindwait, "bindwait"); 263 new->cl_metrics = rpc_alloc_iostats(clnt);
248 return new; 264 return new;
249out_no_clnt: 265out_no_clnt:
250 printk(KERN_INFO "RPC: out of memory in %s\n", __FUNCTION__); 266 printk(KERN_INFO "RPC: out of memory in %s\n", __FUNCTION__);
@@ -314,6 +330,12 @@ rpc_destroy_client(struct rpc_clnt *clnt)
314 if (clnt->cl_server != clnt->cl_inline_name) 330 if (clnt->cl_server != clnt->cl_inline_name)
315 kfree(clnt->cl_server); 331 kfree(clnt->cl_server);
316out_free: 332out_free:
333 rpc_free_iostats(clnt->cl_metrics);
334 clnt->cl_metrics = NULL;
335 if (!IS_ERR(clnt->cl_dentry)) {
336 dput(clnt->cl_dentry);
337 rpc_put_mount();
338 }
317 kfree(clnt); 339 kfree(clnt);
318 return 0; 340 return 0;
319} 341}
@@ -473,15 +495,16 @@ rpc_call_async(struct rpc_clnt *clnt, struct rpc_message *msg, int flags,
473 int status; 495 int status;
474 496
475 /* If this client is slain all further I/O fails */ 497 /* If this client is slain all further I/O fails */
498 status = -EIO;
476 if (clnt->cl_dead) 499 if (clnt->cl_dead)
477 return -EIO; 500 goto out_release;
478 501
479 flags |= RPC_TASK_ASYNC; 502 flags |= RPC_TASK_ASYNC;
480 503
481 /* Create/initialize a new RPC task */ 504 /* Create/initialize a new RPC task */
482 status = -ENOMEM; 505 status = -ENOMEM;
483 if (!(task = rpc_new_task(clnt, flags, tk_ops, data))) 506 if (!(task = rpc_new_task(clnt, flags, tk_ops, data)))
484 goto out; 507 goto out_release;
485 508
486 /* Mask signals on GSS_AUTH upcalls */ 509 /* Mask signals on GSS_AUTH upcalls */
487 rpc_task_sigmask(task, &oldset); 510 rpc_task_sigmask(task, &oldset);
@@ -496,7 +519,10 @@ rpc_call_async(struct rpc_clnt *clnt, struct rpc_message *msg, int flags,
496 rpc_release_task(task); 519 rpc_release_task(task);
497 520
498 rpc_restore_sigmask(&oldset); 521 rpc_restore_sigmask(&oldset);
499out: 522 return status;
523out_release:
524 if (tk_ops->rpc_release != NULL)
525 tk_ops->rpc_release(data);
500 return status; 526 return status;
501} 527}
502 528
@@ -993,6 +1019,8 @@ call_timeout(struct rpc_task *task)
993 } 1019 }
994 1020
995 dprintk("RPC: %4d call_timeout (major)\n", task->tk_pid); 1021 dprintk("RPC: %4d call_timeout (major)\n", task->tk_pid);
1022 task->tk_timeouts++;
1023
996 if (RPC_IS_SOFT(task)) { 1024 if (RPC_IS_SOFT(task)) {
997 printk(KERN_NOTICE "%s: server %s not responding, timed out\n", 1025 printk(KERN_NOTICE "%s: server %s not responding, timed out\n",
998 clnt->cl_protname, clnt->cl_server); 1026 clnt->cl_protname, clnt->cl_server);
@@ -1045,6 +1073,11 @@ call_decode(struct rpc_task *task)
1045 return; 1073 return;
1046 } 1074 }
1047 1075
1076 /*
1077 * Ensure that we see all writes made by xprt_complete_rqst()
1078 * before it changed req->rq_received.
1079 */
1080 smp_rmb();
1048 req->rq_rcv_buf.len = req->rq_private_buf.len; 1081 req->rq_rcv_buf.len = req->rq_private_buf.len;
1049 1082
1050 /* Check that the softirq receive buffer is valid */ 1083 /* Check that the softirq receive buffer is valid */
@@ -1194,8 +1227,8 @@ call_verify(struct rpc_task *task)
1194 task->tk_action = call_bind; 1227 task->tk_action = call_bind;
1195 goto out_retry; 1228 goto out_retry;
1196 case RPC_AUTH_TOOWEAK: 1229 case RPC_AUTH_TOOWEAK:
1197 printk(KERN_NOTICE "call_verify: server requires stronger " 1230 printk(KERN_NOTICE "call_verify: server %s requires stronger "
1198 "authentication.\n"); 1231 "authentication.\n", task->tk_client->cl_server);
1199 break; 1232 break;
1200 default: 1233 default:
1201 printk(KERN_WARNING "call_verify: unknown auth error: %x\n", n); 1234 printk(KERN_WARNING "call_verify: unknown auth error: %x\n", n);
diff --git a/net/sunrpc/pmap_clnt.c b/net/sunrpc/pmap_clnt.c
index 8139ce68e915..d25b054ec921 100644
--- a/net/sunrpc/pmap_clnt.c
+++ b/net/sunrpc/pmap_clnt.c
@@ -82,6 +82,7 @@ rpc_getport(struct rpc_task *task, struct rpc_clnt *clnt)
82 rpc_call_setup(child, &msg, 0); 82 rpc_call_setup(child, &msg, 0);
83 83
84 /* ... and run the child task */ 84 /* ... and run the child task */
85 task->tk_xprt->stat.bind_count++;
85 rpc_run_child(task, child, pmap_getport_done); 86 rpc_run_child(task, child, pmap_getport_done);
86 return; 87 return;
87 88
@@ -103,6 +104,11 @@ rpc_getport_external(struct sockaddr_in *sin, __u32 prog, __u32 vers, int prot)
103 .pm_prot = prot, 104 .pm_prot = prot,
104 .pm_port = 0 105 .pm_port = 0
105 }; 106 };
107 struct rpc_message msg = {
108 .rpc_proc = &pmap_procedures[PMAP_GETPORT],
109 .rpc_argp = &map,
110 .rpc_resp = &map.pm_port,
111 };
106 struct rpc_clnt *pmap_clnt; 112 struct rpc_clnt *pmap_clnt;
107 char hostname[32]; 113 char hostname[32];
108 int status; 114 int status;
@@ -116,7 +122,7 @@ rpc_getport_external(struct sockaddr_in *sin, __u32 prog, __u32 vers, int prot)
116 return PTR_ERR(pmap_clnt); 122 return PTR_ERR(pmap_clnt);
117 123
118 /* Setup the call info struct */ 124 /* Setup the call info struct */
119 status = rpc_call(pmap_clnt, PMAP_GETPORT, &map, &map.pm_port, 0); 125 status = rpc_call_sync(pmap_clnt, &msg, 0);
120 126
121 if (status >= 0) { 127 if (status >= 0) {
122 if (map.pm_port != 0) 128 if (map.pm_port != 0)
@@ -161,16 +167,27 @@ pmap_getport_done(struct rpc_task *task)
161int 167int
162rpc_register(u32 prog, u32 vers, int prot, unsigned short port, int *okay) 168rpc_register(u32 prog, u32 vers, int prot, unsigned short port, int *okay)
163{ 169{
164 struct sockaddr_in sin; 170 struct sockaddr_in sin = {
165 struct rpc_portmap map; 171 .sin_family = AF_INET,
172 .sin_addr.s_addr = htonl(INADDR_LOOPBACK),
173 };
174 struct rpc_portmap map = {
175 .pm_prog = prog,
176 .pm_vers = vers,
177 .pm_prot = prot,
178 .pm_port = port,
179 };
180 struct rpc_message msg = {
181 .rpc_proc = &pmap_procedures[port ? PMAP_SET : PMAP_UNSET],
182 .rpc_argp = &map,
183 .rpc_resp = okay,
184 };
166 struct rpc_clnt *pmap_clnt; 185 struct rpc_clnt *pmap_clnt;
167 int error = 0; 186 int error = 0;
168 187
169 dprintk("RPC: registering (%d, %d, %d, %d) with portmapper.\n", 188 dprintk("RPC: registering (%d, %d, %d, %d) with portmapper.\n",
170 prog, vers, prot, port); 189 prog, vers, prot, port);
171 190
172 sin.sin_family = AF_INET;
173 sin.sin_addr.s_addr = htonl(INADDR_LOOPBACK);
174 pmap_clnt = pmap_create("localhost", &sin, IPPROTO_UDP, 1); 191 pmap_clnt = pmap_create("localhost", &sin, IPPROTO_UDP, 1);
175 if (IS_ERR(pmap_clnt)) { 192 if (IS_ERR(pmap_clnt)) {
176 error = PTR_ERR(pmap_clnt); 193 error = PTR_ERR(pmap_clnt);
@@ -178,13 +195,7 @@ rpc_register(u32 prog, u32 vers, int prot, unsigned short port, int *okay)
178 return error; 195 return error;
179 } 196 }
180 197
181 map.pm_prog = prog; 198 error = rpc_call_sync(pmap_clnt, &msg, 0);
182 map.pm_vers = vers;
183 map.pm_prot = prot;
184 map.pm_port = port;
185
186 error = rpc_call(pmap_clnt, port? PMAP_SET : PMAP_UNSET,
187 &map, okay, 0);
188 199
189 if (error < 0) { 200 if (error < 0) {
190 printk(KERN_WARNING 201 printk(KERN_WARNING
@@ -260,6 +271,8 @@ static struct rpc_procinfo pmap_procedures[] = {
260 .p_decode = (kxdrproc_t) xdr_decode_bool, 271 .p_decode = (kxdrproc_t) xdr_decode_bool,
261 .p_bufsiz = 4, 272 .p_bufsiz = 4,
262 .p_count = 1, 273 .p_count = 1,
274 .p_statidx = PMAP_SET,
275 .p_name = "SET",
263 }, 276 },
264[PMAP_UNSET] = { 277[PMAP_UNSET] = {
265 .p_proc = PMAP_UNSET, 278 .p_proc = PMAP_UNSET,
@@ -267,6 +280,8 @@ static struct rpc_procinfo pmap_procedures[] = {
267 .p_decode = (kxdrproc_t) xdr_decode_bool, 280 .p_decode = (kxdrproc_t) xdr_decode_bool,
268 .p_bufsiz = 4, 281 .p_bufsiz = 4,
269 .p_count = 1, 282 .p_count = 1,
283 .p_statidx = PMAP_UNSET,
284 .p_name = "UNSET",
270 }, 285 },
271[PMAP_GETPORT] = { 286[PMAP_GETPORT] = {
272 .p_proc = PMAP_GETPORT, 287 .p_proc = PMAP_GETPORT,
@@ -274,6 +289,8 @@ static struct rpc_procinfo pmap_procedures[] = {
274 .p_decode = (kxdrproc_t) xdr_decode_port, 289 .p_decode = (kxdrproc_t) xdr_decode_port,
275 .p_bufsiz = 4, 290 .p_bufsiz = 4,
276 .p_count = 1, 291 .p_count = 1,
292 .p_statidx = PMAP_GETPORT,
293 .p_name = "GETPORT",
277 }, 294 },
278}; 295};
279 296
diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c
index ad9d9fc4e734..aa4158be9900 100644
--- a/net/sunrpc/rpc_pipe.c
+++ b/net/sunrpc/rpc_pipe.c
@@ -91,7 +91,8 @@ rpc_queue_upcall(struct inode *inode, struct rpc_pipe_msg *msg)
91 res = 0; 91 res = 0;
92 } else if (rpci->flags & RPC_PIPE_WAIT_FOR_OPEN) { 92 } else if (rpci->flags & RPC_PIPE_WAIT_FOR_OPEN) {
93 if (list_empty(&rpci->pipe)) 93 if (list_empty(&rpci->pipe))
94 schedule_delayed_work(&rpci->queue_timeout, 94 queue_delayed_work(rpciod_workqueue,
95 &rpci->queue_timeout,
95 RPC_UPCALL_TIMEOUT); 96 RPC_UPCALL_TIMEOUT);
96 list_add_tail(&msg->list, &rpci->pipe); 97 list_add_tail(&msg->list, &rpci->pipe);
97 rpci->pipelen += msg->len; 98 rpci->pipelen += msg->len;
@@ -132,7 +133,7 @@ rpc_close_pipes(struct inode *inode)
132 if (ops->release_pipe) 133 if (ops->release_pipe)
133 ops->release_pipe(inode); 134 ops->release_pipe(inode);
134 cancel_delayed_work(&rpci->queue_timeout); 135 cancel_delayed_work(&rpci->queue_timeout);
135 flush_scheduled_work(); 136 flush_workqueue(rpciod_workqueue);
136 } 137 }
137 rpc_inode_setowner(inode, NULL); 138 rpc_inode_setowner(inode, NULL);
138 mutex_unlock(&inode->i_mutex); 139 mutex_unlock(&inode->i_mutex);
@@ -434,14 +435,17 @@ static struct rpc_filelist authfiles[] = {
434 }, 435 },
435}; 436};
436 437
437static int 438struct vfsmount *rpc_get_mount(void)
438rpc_get_mount(void)
439{ 439{
440 return simple_pin_fs("rpc_pipefs", &rpc_mount, &rpc_mount_count); 440 int err;
441
442 err = simple_pin_fs("rpc_pipefs", &rpc_mount, &rpc_mount_count);
443 if (err != 0)
444 return ERR_PTR(err);
445 return rpc_mount;
441} 446}
442 447
443static void 448void rpc_put_mount(void)
444rpc_put_mount(void)
445{ 449{
446 simple_release_fs(&rpc_mount, &rpc_mount_count); 450 simple_release_fs(&rpc_mount, &rpc_mount_count);
447} 451}
@@ -451,12 +455,13 @@ rpc_lookup_parent(char *path, struct nameidata *nd)
451{ 455{
452 if (path[0] == '\0') 456 if (path[0] == '\0')
453 return -ENOENT; 457 return -ENOENT;
454 if (rpc_get_mount()) { 458 nd->mnt = rpc_get_mount();
459 if (IS_ERR(nd->mnt)) {
455 printk(KERN_WARNING "%s: %s failed to mount " 460 printk(KERN_WARNING "%s: %s failed to mount "
456 "pseudofilesystem \n", __FILE__, __FUNCTION__); 461 "pseudofilesystem \n", __FILE__, __FUNCTION__);
457 return -ENODEV; 462 return PTR_ERR(nd->mnt);
458 } 463 }
459 nd->mnt = mntget(rpc_mount); 464 mntget(nd->mnt);
460 nd->dentry = dget(rpc_mount->mnt_root); 465 nd->dentry = dget(rpc_mount->mnt_root);
461 nd->last_type = LAST_ROOT; 466 nd->last_type = LAST_ROOT;
462 nd->flags = LOOKUP_PARENT; 467 nd->flags = LOOKUP_PARENT;
@@ -593,7 +598,6 @@ __rpc_mkdir(struct inode *dir, struct dentry *dentry)
593 d_instantiate(dentry, inode); 598 d_instantiate(dentry, inode);
594 dir->i_nlink++; 599 dir->i_nlink++;
595 inode_dir_notify(dir, DN_CREATE); 600 inode_dir_notify(dir, DN_CREATE);
596 rpc_get_mount();
597 return 0; 601 return 0;
598out_err: 602out_err:
599 printk(KERN_WARNING "%s: %s failed to allocate inode for dentry %s\n", 603 printk(KERN_WARNING "%s: %s failed to allocate inode for dentry %s\n",
@@ -614,7 +618,6 @@ __rpc_rmdir(struct inode *dir, struct dentry *dentry)
614 if (!error) { 618 if (!error) {
615 inode_dir_notify(dir, DN_DELETE); 619 inode_dir_notify(dir, DN_DELETE);
616 d_drop(dentry); 620 d_drop(dentry);
617 rpc_put_mount();
618 } 621 }
619 return 0; 622 return 0;
620} 623}
@@ -668,7 +671,7 @@ rpc_mkdir(char *path, struct rpc_clnt *rpc_client)
668out: 671out:
669 mutex_unlock(&dir->i_mutex); 672 mutex_unlock(&dir->i_mutex);
670 rpc_release_path(&nd); 673 rpc_release_path(&nd);
671 return dentry; 674 return dget(dentry);
672err_depopulate: 675err_depopulate:
673 rpc_depopulate(dentry); 676 rpc_depopulate(dentry);
674 __rpc_rmdir(dir, dentry); 677 __rpc_rmdir(dir, dentry);
@@ -732,7 +735,7 @@ rpc_mkpipe(char *path, void *private, struct rpc_pipe_ops *ops, int flags)
732out: 735out:
733 mutex_unlock(&dir->i_mutex); 736 mutex_unlock(&dir->i_mutex);
734 rpc_release_path(&nd); 737 rpc_release_path(&nd);
735 return dentry; 738 return dget(dentry);
736err_dput: 739err_dput:
737 dput(dentry); 740 dput(dentry);
738 dentry = ERR_PTR(-ENOMEM); 741 dentry = ERR_PTR(-ENOMEM);
diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c
index dff07795bd16..b9969b91a9f7 100644
--- a/net/sunrpc/sched.c
+++ b/net/sunrpc/sched.c
@@ -65,7 +65,7 @@ static LIST_HEAD(all_tasks);
65 */ 65 */
66static DEFINE_MUTEX(rpciod_mutex); 66static DEFINE_MUTEX(rpciod_mutex);
67static unsigned int rpciod_users; 67static unsigned int rpciod_users;
68static struct workqueue_struct *rpciod_workqueue; 68struct workqueue_struct *rpciod_workqueue;
69 69
70/* 70/*
71 * Spinlock for other critical sections of code. 71 * Spinlock for other critical sections of code.
@@ -182,6 +182,7 @@ static void __rpc_add_wait_queue(struct rpc_wait_queue *queue, struct rpc_task *
182 else 182 else
183 list_add_tail(&task->u.tk_wait.list, &queue->tasks[0]); 183 list_add_tail(&task->u.tk_wait.list, &queue->tasks[0]);
184 task->u.tk_wait.rpc_waitq = queue; 184 task->u.tk_wait.rpc_waitq = queue;
185 queue->qlen++;
185 rpc_set_queued(task); 186 rpc_set_queued(task);
186 187
187 dprintk("RPC: %4d added to queue %p \"%s\"\n", 188 dprintk("RPC: %4d added to queue %p \"%s\"\n",
@@ -216,6 +217,7 @@ static void __rpc_remove_wait_queue(struct rpc_task *task)
216 __rpc_remove_wait_queue_priority(task); 217 __rpc_remove_wait_queue_priority(task);
217 else 218 else
218 list_del(&task->u.tk_wait.list); 219 list_del(&task->u.tk_wait.list);
220 queue->qlen--;
219 dprintk("RPC: %4d removed from queue %p \"%s\"\n", 221 dprintk("RPC: %4d removed from queue %p \"%s\"\n",
220 task->tk_pid, queue, rpc_qname(queue)); 222 task->tk_pid, queue, rpc_qname(queue));
221} 223}
@@ -816,6 +818,9 @@ void rpc_init_task(struct rpc_task *task, struct rpc_clnt *clnt, int flags, cons
816 818
817 BUG_ON(task->tk_ops == NULL); 819 BUG_ON(task->tk_ops == NULL);
818 820
821 /* starting timestamp */
822 task->tk_start = jiffies;
823
819 dprintk("RPC: %4d new task procpid %d\n", task->tk_pid, 824 dprintk("RPC: %4d new task procpid %d\n", task->tk_pid,
820 current->pid); 825 current->pid);
821} 826}
@@ -917,8 +922,11 @@ struct rpc_task *rpc_run_task(struct rpc_clnt *clnt, int flags,
917{ 922{
918 struct rpc_task *task; 923 struct rpc_task *task;
919 task = rpc_new_task(clnt, flags, ops, data); 924 task = rpc_new_task(clnt, flags, ops, data);
920 if (task == NULL) 925 if (task == NULL) {
926 if (ops->rpc_release != NULL)
927 ops->rpc_release(data);
921 return ERR_PTR(-ENOMEM); 928 return ERR_PTR(-ENOMEM);
929 }
922 atomic_inc(&task->tk_count); 930 atomic_inc(&task->tk_count);
923 rpc_execute(task); 931 rpc_execute(task);
924 return task; 932 return task;
diff --git a/net/sunrpc/stats.c b/net/sunrpc/stats.c
index 4979f226e285..790941e8af4d 100644
--- a/net/sunrpc/stats.c
+++ b/net/sunrpc/stats.c
@@ -21,6 +21,7 @@
21#include <linux/seq_file.h> 21#include <linux/seq_file.h>
22#include <linux/sunrpc/clnt.h> 22#include <linux/sunrpc/clnt.h>
23#include <linux/sunrpc/svcsock.h> 23#include <linux/sunrpc/svcsock.h>
24#include <linux/sunrpc/metrics.h>
24 25
25#define RPCDBG_FACILITY RPCDBG_MISC 26#define RPCDBG_FACILITY RPCDBG_MISC
26 27
@@ -106,6 +107,120 @@ void svc_seq_show(struct seq_file *seq, const struct svc_stat *statp) {
106 } 107 }
107} 108}
108 109
110/**
111 * rpc_alloc_iostats - allocate an rpc_iostats structure
112 * @clnt: RPC program, version, and xprt
113 *
114 */
115struct rpc_iostats *rpc_alloc_iostats(struct rpc_clnt *clnt)
116{
117 unsigned int ops = clnt->cl_maxproc;
118 size_t size = ops * sizeof(struct rpc_iostats);
119 struct rpc_iostats *new;
120
121 new = kmalloc(size, GFP_KERNEL);
122 if (new)
123 memset(new, 0 , size);
124 return new;
125}
126EXPORT_SYMBOL(rpc_alloc_iostats);
127
128/**
129 * rpc_free_iostats - release an rpc_iostats structure
130 * @stats: doomed rpc_iostats structure
131 *
132 */
133void rpc_free_iostats(struct rpc_iostats *stats)
134{
135 kfree(stats);
136}
137EXPORT_SYMBOL(rpc_free_iostats);
138
139/**
140 * rpc_count_iostats - tally up per-task stats
141 * @task: completed rpc_task
142 *
143 * Relies on the caller for serialization.
144 */
145void rpc_count_iostats(struct rpc_task *task)
146{
147 struct rpc_rqst *req = task->tk_rqstp;
148 struct rpc_iostats *stats = task->tk_client->cl_metrics;
149 struct rpc_iostats *op_metrics;
150 long rtt, execute, queue;
151
152 if (!stats || !req)
153 return;
154 op_metrics = &stats[task->tk_msg.rpc_proc->p_statidx];
155
156 op_metrics->om_ops++;
157 op_metrics->om_ntrans += req->rq_ntrans;
158 op_metrics->om_timeouts += task->tk_timeouts;
159
160 op_metrics->om_bytes_sent += task->tk_bytes_sent;
161 op_metrics->om_bytes_recv += req->rq_received;
162
163 queue = (long)req->rq_xtime - task->tk_start;
164 if (queue < 0)
165 queue = -queue;
166 op_metrics->om_queue += queue;
167
168 rtt = task->tk_rtt;
169 if (rtt < 0)
170 rtt = -rtt;
171 op_metrics->om_rtt += rtt;
172
173 execute = (long)jiffies - task->tk_start;
174 if (execute < 0)
175 execute = -execute;
176 op_metrics->om_execute += execute;
177}
178
179void _print_name(struct seq_file *seq, unsigned int op, struct rpc_procinfo *procs)
180{
181 if (procs[op].p_name)
182 seq_printf(seq, "\t%12s: ", procs[op].p_name);
183 else if (op == 0)
184 seq_printf(seq, "\t NULL: ");
185 else
186 seq_printf(seq, "\t%12u: ", op);
187}
188
189#define MILLISECS_PER_JIFFY (1000 / HZ)
190
191void rpc_print_iostats(struct seq_file *seq, struct rpc_clnt *clnt)
192{
193 struct rpc_iostats *stats = clnt->cl_metrics;
194 struct rpc_xprt *xprt = clnt->cl_xprt;
195 unsigned int op, maxproc = clnt->cl_maxproc;
196
197 if (!stats)
198 return;
199
200 seq_printf(seq, "\tRPC iostats version: %s ", RPC_IOSTATS_VERS);
201 seq_printf(seq, "p/v: %u/%u (%s)\n",
202 clnt->cl_prog, clnt->cl_vers, clnt->cl_protname);
203
204 if (xprt)
205 xprt->ops->print_stats(xprt, seq);
206
207 seq_printf(seq, "\tper-op statistics\n");
208 for (op = 0; op < maxproc; op++) {
209 struct rpc_iostats *metrics = &stats[op];
210 _print_name(seq, op, clnt->cl_procinfo);
211 seq_printf(seq, "%lu %lu %lu %Lu %Lu %Lu %Lu %Lu\n",
212 metrics->om_ops,
213 metrics->om_ntrans,
214 metrics->om_timeouts,
215 metrics->om_bytes_sent,
216 metrics->om_bytes_recv,
217 metrics->om_queue * MILLISECS_PER_JIFFY,
218 metrics->om_rtt * MILLISECS_PER_JIFFY,
219 metrics->om_execute * MILLISECS_PER_JIFFY);
220 }
221}
222EXPORT_SYMBOL(rpc_print_iostats);
223
109/* 224/*
110 * Register/unregister RPC proc files 225 * Register/unregister RPC proc files
111 */ 226 */
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index 8ff2c8acb223..4dd5b3cfe754 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -44,13 +44,13 @@
44#include <linux/random.h> 44#include <linux/random.h>
45 45
46#include <linux/sunrpc/clnt.h> 46#include <linux/sunrpc/clnt.h>
47#include <linux/sunrpc/metrics.h>
47 48
48/* 49/*
49 * Local variables 50 * Local variables
50 */ 51 */
51 52
52#ifdef RPC_DEBUG 53#ifdef RPC_DEBUG
53# undef RPC_DEBUG_DATA
54# define RPCDBG_FACILITY RPCDBG_XPRT 54# define RPCDBG_FACILITY RPCDBG_XPRT
55#endif 55#endif
56 56
@@ -548,6 +548,7 @@ void xprt_connect(struct rpc_task *task)
548 548
549 task->tk_timeout = xprt->connect_timeout; 549 task->tk_timeout = xprt->connect_timeout;
550 rpc_sleep_on(&xprt->pending, task, xprt_connect_status, NULL); 550 rpc_sleep_on(&xprt->pending, task, xprt_connect_status, NULL);
551 xprt->stat.connect_start = jiffies;
551 xprt->ops->connect(task); 552 xprt->ops->connect(task);
552 } 553 }
553 return; 554 return;
@@ -558,6 +559,8 @@ static void xprt_connect_status(struct rpc_task *task)
558 struct rpc_xprt *xprt = task->tk_xprt; 559 struct rpc_xprt *xprt = task->tk_xprt;
559 560
560 if (task->tk_status >= 0) { 561 if (task->tk_status >= 0) {
562 xprt->stat.connect_count++;
563 xprt->stat.connect_time += (long)jiffies - xprt->stat.connect_start;
561 dprintk("RPC: %4d xprt_connect_status: connection established\n", 564 dprintk("RPC: %4d xprt_connect_status: connection established\n",
562 task->tk_pid); 565 task->tk_pid);
563 return; 566 return;
@@ -601,16 +604,14 @@ static void xprt_connect_status(struct rpc_task *task)
601struct rpc_rqst *xprt_lookup_rqst(struct rpc_xprt *xprt, u32 xid) 604struct rpc_rqst *xprt_lookup_rqst(struct rpc_xprt *xprt, u32 xid)
602{ 605{
603 struct list_head *pos; 606 struct list_head *pos;
604 struct rpc_rqst *req = NULL;
605 607
606 list_for_each(pos, &xprt->recv) { 608 list_for_each(pos, &xprt->recv) {
607 struct rpc_rqst *entry = list_entry(pos, struct rpc_rqst, rq_list); 609 struct rpc_rqst *entry = list_entry(pos, struct rpc_rqst, rq_list);
608 if (entry->rq_xid == xid) { 610 if (entry->rq_xid == xid)
609 req = entry; 611 return entry;
610 break;
611 }
612 } 612 }
613 return req; 613 xprt->stat.bad_xids++;
614 return NULL;
614} 615}
615 616
616/** 617/**
@@ -646,7 +647,12 @@ void xprt_complete_rqst(struct rpc_task *task, int copied)
646 dprintk("RPC: %5u xid %08x complete (%d bytes received)\n", 647 dprintk("RPC: %5u xid %08x complete (%d bytes received)\n",
647 task->tk_pid, ntohl(req->rq_xid), copied); 648 task->tk_pid, ntohl(req->rq_xid), copied);
648 649
650 task->tk_xprt->stat.recvs++;
651 task->tk_rtt = (long)jiffies - req->rq_xtime;
652
649 list_del_init(&req->rq_list); 653 list_del_init(&req->rq_list);
654 /* Ensure all writes are done before we update req->rq_received */
655 smp_wmb();
650 req->rq_received = req->rq_private_buf.len = copied; 656 req->rq_received = req->rq_private_buf.len = copied;
651 rpc_wake_up_task(task); 657 rpc_wake_up_task(task);
652} 658}
@@ -723,7 +729,6 @@ void xprt_transmit(struct rpc_task *task)
723 729
724 dprintk("RPC: %4d xprt_transmit(%u)\n", task->tk_pid, req->rq_slen); 730 dprintk("RPC: %4d xprt_transmit(%u)\n", task->tk_pid, req->rq_slen);
725 731
726 smp_rmb();
727 if (!req->rq_received) { 732 if (!req->rq_received) {
728 if (list_empty(&req->rq_list)) { 733 if (list_empty(&req->rq_list)) {
729 spin_lock_bh(&xprt->transport_lock); 734 spin_lock_bh(&xprt->transport_lock);
@@ -744,12 +749,19 @@ void xprt_transmit(struct rpc_task *task)
744 if (status == 0) { 749 if (status == 0) {
745 dprintk("RPC: %4d xmit complete\n", task->tk_pid); 750 dprintk("RPC: %4d xmit complete\n", task->tk_pid);
746 spin_lock_bh(&xprt->transport_lock); 751 spin_lock_bh(&xprt->transport_lock);
752
747 xprt->ops->set_retrans_timeout(task); 753 xprt->ops->set_retrans_timeout(task);
754
755 xprt->stat.sends++;
756 xprt->stat.req_u += xprt->stat.sends - xprt->stat.recvs;
757 xprt->stat.bklog_u += xprt->backlog.qlen;
758
748 /* Don't race with disconnect */ 759 /* Don't race with disconnect */
749 if (!xprt_connected(xprt)) 760 if (!xprt_connected(xprt))
750 task->tk_status = -ENOTCONN; 761 task->tk_status = -ENOTCONN;
751 else if (!req->rq_received) 762 else if (!req->rq_received)
752 rpc_sleep_on(&xprt->pending, task, NULL, xprt_timer); 763 rpc_sleep_on(&xprt->pending, task, NULL, xprt_timer);
764
753 xprt->ops->release_xprt(xprt, task); 765 xprt->ops->release_xprt(xprt, task);
754 spin_unlock_bh(&xprt->transport_lock); 766 spin_unlock_bh(&xprt->transport_lock);
755 return; 767 return;
@@ -848,6 +860,7 @@ void xprt_release(struct rpc_task *task)
848 860
849 if (!(req = task->tk_rqstp)) 861 if (!(req = task->tk_rqstp))
850 return; 862 return;
863 rpc_count_iostats(task);
851 spin_lock_bh(&xprt->transport_lock); 864 spin_lock_bh(&xprt->transport_lock);
852 xprt->ops->release_xprt(xprt, task); 865 xprt->ops->release_xprt(xprt, task);
853 if (xprt->ops->release_request) 866 if (xprt->ops->release_request)
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index c458f8d1d6d1..4b4e7dfdff14 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -382,6 +382,7 @@ static int xs_tcp_send_request(struct rpc_task *task)
382 /* If we've sent the entire packet, immediately 382 /* If we've sent the entire packet, immediately
383 * reset the count of bytes sent. */ 383 * reset the count of bytes sent. */
384 req->rq_bytes_sent += status; 384 req->rq_bytes_sent += status;
385 task->tk_bytes_sent += status;
385 if (likely(req->rq_bytes_sent >= req->rq_slen)) { 386 if (likely(req->rq_bytes_sent >= req->rq_slen)) {
386 req->rq_bytes_sent = 0; 387 req->rq_bytes_sent = 0;
387 return 0; 388 return 0;
@@ -1114,6 +1115,8 @@ static void xs_tcp_connect_worker(void *args)
1114 } 1115 }
1115 1116
1116 /* Tell the socket layer to start connecting... */ 1117 /* Tell the socket layer to start connecting... */
1118 xprt->stat.connect_count++;
1119 xprt->stat.connect_start = jiffies;
1117 status = sock->ops->connect(sock, (struct sockaddr *) &xprt->addr, 1120 status = sock->ops->connect(sock, (struct sockaddr *) &xprt->addr,
1118 sizeof(xprt->addr), O_NONBLOCK); 1121 sizeof(xprt->addr), O_NONBLOCK);
1119 dprintk("RPC: %p connect status %d connected %d sock state %d\n", 1122 dprintk("RPC: %p connect status %d connected %d sock state %d\n",
@@ -1177,6 +1180,50 @@ static void xs_connect(struct rpc_task *task)
1177 } 1180 }
1178} 1181}
1179 1182
1183/**
1184 * xs_udp_print_stats - display UDP socket-specifc stats
1185 * @xprt: rpc_xprt struct containing statistics
1186 * @seq: output file
1187 *
1188 */
1189static void xs_udp_print_stats(struct rpc_xprt *xprt, struct seq_file *seq)
1190{
1191 seq_printf(seq, "\txprt:\tudp %u %lu %lu %lu %lu %Lu %Lu\n",
1192 xprt->port,
1193 xprt->stat.bind_count,
1194 xprt->stat.sends,
1195 xprt->stat.recvs,
1196 xprt->stat.bad_xids,
1197 xprt->stat.req_u,
1198 xprt->stat.bklog_u);
1199}
1200
1201/**
1202 * xs_tcp_print_stats - display TCP socket-specifc stats
1203 * @xprt: rpc_xprt struct containing statistics
1204 * @seq: output file
1205 *
1206 */
1207static void xs_tcp_print_stats(struct rpc_xprt *xprt, struct seq_file *seq)
1208{
1209 long idle_time = 0;
1210
1211 if (xprt_connected(xprt))
1212 idle_time = (long)(jiffies - xprt->last_used) / HZ;
1213
1214 seq_printf(seq, "\txprt:\ttcp %u %lu %lu %lu %ld %lu %lu %lu %Lu %Lu\n",
1215 xprt->port,
1216 xprt->stat.bind_count,
1217 xprt->stat.connect_count,
1218 xprt->stat.connect_time,
1219 idle_time,
1220 xprt->stat.sends,
1221 xprt->stat.recvs,
1222 xprt->stat.bad_xids,
1223 xprt->stat.req_u,
1224 xprt->stat.bklog_u);
1225}
1226
1180static struct rpc_xprt_ops xs_udp_ops = { 1227static struct rpc_xprt_ops xs_udp_ops = {
1181 .set_buffer_size = xs_udp_set_buffer_size, 1228 .set_buffer_size = xs_udp_set_buffer_size,
1182 .reserve_xprt = xprt_reserve_xprt_cong, 1229 .reserve_xprt = xprt_reserve_xprt_cong,
@@ -1191,6 +1238,7 @@ static struct rpc_xprt_ops xs_udp_ops = {
1191 .release_request = xprt_release_rqst_cong, 1238 .release_request = xprt_release_rqst_cong,
1192 .close = xs_close, 1239 .close = xs_close,
1193 .destroy = xs_destroy, 1240 .destroy = xs_destroy,
1241 .print_stats = xs_udp_print_stats,
1194}; 1242};
1195 1243
1196static struct rpc_xprt_ops xs_tcp_ops = { 1244static struct rpc_xprt_ops xs_tcp_ops = {
@@ -1204,6 +1252,7 @@ static struct rpc_xprt_ops xs_tcp_ops = {
1204 .set_retrans_timeout = xprt_set_retrans_timeout_def, 1252 .set_retrans_timeout = xprt_set_retrans_timeout_def,
1205 .close = xs_close, 1253 .close = xs_close,
1206 .destroy = xs_destroy, 1254 .destroy = xs_destroy,
1255 .print_stats = xs_tcp_print_stats,
1207}; 1256};
1208 1257
1209/** 1258/**