aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@woody.linux-foundation.org>2007-05-04 22:55:11 -0400
committerLinus Torvalds <torvalds@woody.linux-foundation.org>2007-05-04 22:55:11 -0400
commit4d4700707c0d4be0efc968989fb1cd01c60c0a35 (patch)
tree478453a4ae9453bd8d26ffc3df6eedcc30799a43
parent7e20ef030dde0e52dd5a57220ee82fa9facbea4e (diff)
parent84dde76c4a2d99ed2d7de6ec82c53b56620900a3 (diff)
Merge git://git.linux-nfs.org/pub/linux/nfs-2.6
* git://git.linux-nfs.org/pub/linux/nfs-2.6: (28 commits) NFS: Fix a compile glitch on 64-bit systems NFS: Clean up nfs_create_request comments spkm3: initialize hash spkm3: remove bad kfree, unnecessary export spkm3: fix spkm3's use of hmac NFS4: invalidate cached acl on setacl NFS: Fix directory caching problem - with test case and patch. NFS: Set meaningful value for fattr->time_start in readdirplus results. NFS: Added support to turn off the NFSv3 READDIRPLUS RPC. SUNRPC: RPC client should retry with different versions of rpcbind SUNRPC: remove old portmapper NFS: switch NFSROOT to use new rpcbind client SUNRPC: switch the RPC server to use the new rpcbind registration API SUNRPC: switch socket-based RPC transports to use rpcbind SUNRPC: introduce rpcbind: replacement for in-kernel portmapper SUNRPC: Eliminate side effects from rpc_malloc SUNRPC: RPC buffer size estimates are too large NLM: Shrink the maximum request size of NLM4 requests NFS: Use pgoff_t in structures and functions that pass page cache offsets NFS: Clean up nfs_sync_mapping_wait() ...
-rw-r--r--fs/Kconfig12
-rw-r--r--fs/lockd/mon.c10
-rw-r--r--fs/lockd/xdr.c20
-rw-r--r--fs/lockd/xdr4.c24
-rw-r--r--fs/nfs/client.c3
-rw-r--r--fs/nfs/dir.c20
-rw-r--r--fs/nfs/direct.c5
-rw-r--r--fs/nfs/internal.h12
-rw-r--r--fs/nfs/mount_clnt.c7
-rw-r--r--fs/nfs/nfs2xdr.c7
-rw-r--r--fs/nfs/nfs3xdr.c13
-rw-r--r--fs/nfs/nfs4proc.c3
-rw-r--r--fs/nfs/nfs4xdr.c7
-rw-r--r--fs/nfs/nfsroot.c2
-rw-r--r--fs/nfs/pagelist.c242
-rw-r--r--fs/nfs/read.c92
-rw-r--r--fs/nfs/super.c10
-rw-r--r--fs/nfs/write.c258
-rw-r--r--fs/nfsd/nfs4callback.c7
-rw-r--r--include/linux/lockd/lockd.h2
-rw-r--r--include/linux/nfs_fs.h4
-rw-r--r--include/linux/nfs_mount.h1
-rw-r--r--include/linux/nfs_page.h33
-rw-r--r--include/linux/sunrpc/clnt.h9
-rw-r--r--include/linux/sunrpc/debug.h2
-rw-r--r--include/linux/sunrpc/msg_prot.h4
-rw-r--r--include/linux/sunrpc/sched.h2
-rw-r--r--include/linux/sunrpc/xprt.h7
-rw-r--r--include/linux/writeback.h2
-rw-r--r--net/sunrpc/Makefile2
-rw-r--r--net/sunrpc/auth_gss/gss_spkm3_seal.c13
-rw-r--r--net/sunrpc/clnt.c69
-rw-r--r--net/sunrpc/pmap_clnt.c383
-rw-r--r--net/sunrpc/rpcb_clnt.c625
-rw-r--r--net/sunrpc/sched.c65
-rw-r--r--net/sunrpc/svc.c2
-rw-r--r--net/sunrpc/xprt.c4
-rw-r--r--net/sunrpc/xprtsock.c4
38 files changed, 1105 insertions, 882 deletions
diff --git a/fs/Kconfig b/fs/Kconfig
index e33c08924572..8ea7b04c661f 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -1734,6 +1734,18 @@ config SUNRPC
1734config SUNRPC_GSS 1734config SUNRPC_GSS
1735 tristate 1735 tristate
1736 1736
1737config SUNRPC_BIND34
1738 bool "Support for rpcbind versions 3 & 4 (EXPERIMENTAL)"
1739 depends on SUNRPC && EXPERIMENTAL
1740 help
1741 Provides kernel support for querying rpcbind servers via versions 3
1742 and 4 of the rpcbind protocol. The kernel automatically falls back
1743 to version 2 if a remote rpcbind service does not support versions
1744 3 or 4.
1745
1746 If unsure, say N to get traditional behavior (version 2 rpcbind
1747 requests only).
1748
1737config RPCSEC_GSS_KRB5 1749config RPCSEC_GSS_KRB5
1738 tristate "Secure RPC: Kerberos V mechanism (EXPERIMENTAL)" 1750 tristate "Secure RPC: Kerberos V mechanism (EXPERIMENTAL)"
1739 depends on SUNRPC && EXPERIMENTAL 1751 depends on SUNRPC && EXPERIMENTAL
diff --git a/fs/lockd/mon.c b/fs/lockd/mon.c
index eb243edf8932..2102e2d0134d 100644
--- a/fs/lockd/mon.c
+++ b/fs/lockd/mon.c
@@ -225,16 +225,13 @@ xdr_decode_stat(struct rpc_rqst *rqstp, __be32 *p, struct nsm_res *resp)
225#define SM_monres_sz 2 225#define SM_monres_sz 2
226#define SM_unmonres_sz 1 226#define SM_unmonres_sz 1
227 227
228#ifndef MAX
229# define MAX(a, b) (((a) > (b))? (a) : (b))
230#endif
231
232static struct rpc_procinfo nsm_procedures[] = { 228static struct rpc_procinfo nsm_procedures[] = {
233[SM_MON] = { 229[SM_MON] = {
234 .p_proc = SM_MON, 230 .p_proc = SM_MON,
235 .p_encode = (kxdrproc_t) xdr_encode_mon, 231 .p_encode = (kxdrproc_t) xdr_encode_mon,
236 .p_decode = (kxdrproc_t) xdr_decode_stat_res, 232 .p_decode = (kxdrproc_t) xdr_decode_stat_res,
237 .p_bufsiz = MAX(SM_mon_sz, SM_monres_sz) << 2, 233 .p_arglen = SM_mon_sz,
234 .p_replen = SM_monres_sz,
238 .p_statidx = SM_MON, 235 .p_statidx = SM_MON,
239 .p_name = "MONITOR", 236 .p_name = "MONITOR",
240 }, 237 },
@@ -242,7 +239,8 @@ static struct rpc_procinfo nsm_procedures[] = {
242 .p_proc = SM_UNMON, 239 .p_proc = SM_UNMON,
243 .p_encode = (kxdrproc_t) xdr_encode_unmon, 240 .p_encode = (kxdrproc_t) xdr_encode_unmon,
244 .p_decode = (kxdrproc_t) xdr_decode_stat, 241 .p_decode = (kxdrproc_t) xdr_decode_stat,
245 .p_bufsiz = MAX(SM_mon_id_sz, SM_unmonres_sz) << 2, 242 .p_arglen = SM_mon_id_sz,
243 .p_replen = SM_unmonres_sz,
246 .p_statidx = SM_UNMON, 244 .p_statidx = SM_UNMON,
247 .p_name = "UNMONITOR", 245 .p_name = "UNMONITOR",
248 }, 246 },
diff --git a/fs/lockd/xdr.c b/fs/lockd/xdr.c
index 34dae5d70738..9702956d206c 100644
--- a/fs/lockd/xdr.c
+++ b/fs/lockd/xdr.c
@@ -510,17 +510,20 @@ nlmclt_decode_res(struct rpc_rqst *req, __be32 *p, struct nlm_res *resp)
510 return 0; 510 return 0;
511} 511}
512 512
513#if (NLMCLNT_OHSIZE > XDR_MAX_NETOBJ)
514# error "NLM host name cannot be larger than XDR_MAX_NETOBJ!"
515#endif
516
513/* 517/*
514 * Buffer requirements for NLM 518 * Buffer requirements for NLM
515 */ 519 */
516#define NLM_void_sz 0 520#define NLM_void_sz 0
517#define NLM_cookie_sz 1+XDR_QUADLEN(NLM_MAXCOOKIELEN) 521#define NLM_cookie_sz 1+XDR_QUADLEN(NLM_MAXCOOKIELEN)
518#define NLM_caller_sz 1+XDR_QUADLEN(sizeof(utsname()->nodename)) 522#define NLM_caller_sz 1+XDR_QUADLEN(NLMCLNT_OHSIZE)
519#define NLM_netobj_sz 1+XDR_QUADLEN(XDR_MAX_NETOBJ) 523#define NLM_owner_sz 1+XDR_QUADLEN(NLMCLNT_OHSIZE)
520/* #define NLM_owner_sz 1+XDR_QUADLEN(NLM_MAXOWNER) */
521#define NLM_fhandle_sz 1+XDR_QUADLEN(NFS2_FHSIZE) 524#define NLM_fhandle_sz 1+XDR_QUADLEN(NFS2_FHSIZE)
522#define NLM_lock_sz 3+NLM_caller_sz+NLM_netobj_sz+NLM_fhandle_sz 525#define NLM_lock_sz 3+NLM_caller_sz+NLM_owner_sz+NLM_fhandle_sz
523#define NLM_holder_sz 4+NLM_netobj_sz 526#define NLM_holder_sz 4+NLM_owner_sz
524 527
525#define NLM_testargs_sz NLM_cookie_sz+1+NLM_lock_sz 528#define NLM_testargs_sz NLM_cookie_sz+1+NLM_lock_sz
526#define NLM_lockargs_sz NLM_cookie_sz+4+NLM_lock_sz 529#define NLM_lockargs_sz NLM_cookie_sz+4+NLM_lock_sz
@@ -531,10 +534,6 @@ nlmclt_decode_res(struct rpc_rqst *req, __be32 *p, struct nlm_res *resp)
531#define NLM_res_sz NLM_cookie_sz+1 534#define NLM_res_sz NLM_cookie_sz+1
532#define NLM_norep_sz 0 535#define NLM_norep_sz 0
533 536
534#ifndef MAX
535# define MAX(a, b) (((a) > (b))? (a) : (b))
536#endif
537
538/* 537/*
539 * For NLM, a void procedure really returns nothing 538 * For NLM, a void procedure really returns nothing
540 */ 539 */
@@ -545,7 +544,8 @@ nlmclt_decode_res(struct rpc_rqst *req, __be32 *p, struct nlm_res *resp)
545 .p_proc = NLMPROC_##proc, \ 544 .p_proc = NLMPROC_##proc, \
546 .p_encode = (kxdrproc_t) nlmclt_encode_##argtype, \ 545 .p_encode = (kxdrproc_t) nlmclt_encode_##argtype, \
547 .p_decode = (kxdrproc_t) nlmclt_decode_##restype, \ 546 .p_decode = (kxdrproc_t) nlmclt_decode_##restype, \
548 .p_bufsiz = MAX(NLM_##argtype##_sz, NLM_##restype##_sz) << 2, \ 547 .p_arglen = NLM_##argtype##_sz, \
548 .p_replen = NLM_##restype##_sz, \
549 .p_statidx = NLMPROC_##proc, \ 549 .p_statidx = NLMPROC_##proc, \
550 .p_name = #proc, \ 550 .p_name = #proc, \
551 } 551 }
diff --git a/fs/lockd/xdr4.c b/fs/lockd/xdr4.c
index a78240551219..ce1efdbe1b3a 100644
--- a/fs/lockd/xdr4.c
+++ b/fs/lockd/xdr4.c
@@ -516,17 +516,24 @@ nlm4clt_decode_res(struct rpc_rqst *req, __be32 *p, struct nlm_res *resp)
516 return 0; 516 return 0;
517} 517}
518 518
519#if (NLMCLNT_OHSIZE > XDR_MAX_NETOBJ)
520# error "NLM host name cannot be larger than XDR_MAX_NETOBJ!"
521#endif
522
523#if (NLMCLNT_OHSIZE > NLM_MAXSTRLEN)
524# error "NLM host name cannot be larger than NLM's maximum string length!"
525#endif
526
519/* 527/*
520 * Buffer requirements for NLM 528 * Buffer requirements for NLM
521 */ 529 */
522#define NLM4_void_sz 0 530#define NLM4_void_sz 0
523#define NLM4_cookie_sz 1+XDR_QUADLEN(NLM_MAXCOOKIELEN) 531#define NLM4_cookie_sz 1+XDR_QUADLEN(NLM_MAXCOOKIELEN)
524#define NLM4_caller_sz 1+XDR_QUADLEN(NLM_MAXSTRLEN) 532#define NLM4_caller_sz 1+XDR_QUADLEN(NLMCLNT_OHSIZE)
525#define NLM4_netobj_sz 1+XDR_QUADLEN(XDR_MAX_NETOBJ) 533#define NLM4_owner_sz 1+XDR_QUADLEN(NLMCLNT_OHSIZE)
526/* #define NLM4_owner_sz 1+XDR_QUADLEN(NLM4_MAXOWNER) */
527#define NLM4_fhandle_sz 1+XDR_QUADLEN(NFS3_FHSIZE) 534#define NLM4_fhandle_sz 1+XDR_QUADLEN(NFS3_FHSIZE)
528#define NLM4_lock_sz 5+NLM4_caller_sz+NLM4_netobj_sz+NLM4_fhandle_sz 535#define NLM4_lock_sz 5+NLM4_caller_sz+NLM4_owner_sz+NLM4_fhandle_sz
529#define NLM4_holder_sz 6+NLM4_netobj_sz 536#define NLM4_holder_sz 6+NLM4_owner_sz
530 537
531#define NLM4_testargs_sz NLM4_cookie_sz+1+NLM4_lock_sz 538#define NLM4_testargs_sz NLM4_cookie_sz+1+NLM4_lock_sz
532#define NLM4_lockargs_sz NLM4_cookie_sz+4+NLM4_lock_sz 539#define NLM4_lockargs_sz NLM4_cookie_sz+4+NLM4_lock_sz
@@ -537,10 +544,6 @@ nlm4clt_decode_res(struct rpc_rqst *req, __be32 *p, struct nlm_res *resp)
537#define NLM4_res_sz NLM4_cookie_sz+1 544#define NLM4_res_sz NLM4_cookie_sz+1
538#define NLM4_norep_sz 0 545#define NLM4_norep_sz 0
539 546
540#ifndef MAX
541# define MAX(a,b) (((a) > (b))? (a) : (b))
542#endif
543
544/* 547/*
545 * For NLM, a void procedure really returns nothing 548 * For NLM, a void procedure really returns nothing
546 */ 549 */
@@ -551,7 +554,8 @@ nlm4clt_decode_res(struct rpc_rqst *req, __be32 *p, struct nlm_res *resp)
551 .p_proc = NLMPROC_##proc, \ 554 .p_proc = NLMPROC_##proc, \
552 .p_encode = (kxdrproc_t) nlm4clt_encode_##argtype, \ 555 .p_encode = (kxdrproc_t) nlm4clt_encode_##argtype, \
553 .p_decode = (kxdrproc_t) nlm4clt_decode_##restype, \ 556 .p_decode = (kxdrproc_t) nlm4clt_decode_##restype, \
554 .p_bufsiz = MAX(NLM4_##argtype##_sz, NLM4_##restype##_sz) << 2, \ 557 .p_arglen = NLM4_##argtype##_sz, \
558 .p_replen = NLM4_##restype##_sz, \
555 .p_statidx = NLMPROC_##proc, \ 559 .p_statidx = NLMPROC_##proc, \
556 .p_name = #proc, \ 560 .p_name = #proc, \
557 } 561 }
diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index 2190e6c2792e..5bd03b97002e 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -618,7 +618,8 @@ static int nfs_init_server(struct nfs_server *server, const struct nfs_mount_dat
618 if (clp->cl_nfsversion == 3) { 618 if (clp->cl_nfsversion == 3) {
619 if (server->namelen == 0 || server->namelen > NFS3_MAXNAMLEN) 619 if (server->namelen == 0 || server->namelen > NFS3_MAXNAMLEN)
620 server->namelen = NFS3_MAXNAMLEN; 620 server->namelen = NFS3_MAXNAMLEN;
621 server->caps |= NFS_CAP_READDIRPLUS; 621 if (!(data->flags & NFS_MOUNT_NORDIRPLUS))
622 server->caps |= NFS_CAP_READDIRPLUS;
622 } else { 623 } else {
623 if (server->namelen == 0 || server->namelen > NFS2_MAXNAMLEN) 624 if (server->namelen == 0 || server->namelen > NFS2_MAXNAMLEN)
624 server->namelen = NFS2_MAXNAMLEN; 625 server->namelen = NFS2_MAXNAMLEN;
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index cd3469720cbf..e59fd31c9a22 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -154,6 +154,8 @@ typedef struct {
154 decode_dirent_t decode; 154 decode_dirent_t decode;
155 int plus; 155 int plus;
156 int error; 156 int error;
157 unsigned long timestamp;
158 int timestamp_valid;
157} nfs_readdir_descriptor_t; 159} nfs_readdir_descriptor_t;
158 160
159/* Now we cache directories properly, by stuffing the dirent 161/* Now we cache directories properly, by stuffing the dirent
@@ -195,6 +197,8 @@ int nfs_readdir_filler(nfs_readdir_descriptor_t *desc, struct page *page)
195 } 197 }
196 goto error; 198 goto error;
197 } 199 }
200 desc->timestamp = timestamp;
201 desc->timestamp_valid = 1;
198 SetPageUptodate(page); 202 SetPageUptodate(page);
199 spin_lock(&inode->i_lock); 203 spin_lock(&inode->i_lock);
200 NFS_I(inode)->cache_validity |= NFS_INO_INVALID_ATIME; 204 NFS_I(inode)->cache_validity |= NFS_INO_INVALID_ATIME;
@@ -225,6 +229,10 @@ int dir_decode(nfs_readdir_descriptor_t *desc)
225 if (IS_ERR(p)) 229 if (IS_ERR(p))
226 return PTR_ERR(p); 230 return PTR_ERR(p);
227 desc->ptr = p; 231 desc->ptr = p;
232 if (desc->timestamp_valid)
233 desc->entry->fattr->time_start = desc->timestamp;
234 else
235 desc->entry->fattr->valid &= ~NFS_ATTR_FATTR;
228 return 0; 236 return 0;
229} 237}
230 238
@@ -316,6 +324,10 @@ int find_dirent_page(nfs_readdir_descriptor_t *desc)
316 __FUNCTION__, desc->page_index, 324 __FUNCTION__, desc->page_index,
317 (long long) *desc->dir_cookie); 325 (long long) *desc->dir_cookie);
318 326
327 /* If we find the page in the page_cache, we cannot be sure
328 * how fresh the data is, so we will ignore readdir_plus attributes.
329 */
330 desc->timestamp_valid = 0;
319 page = read_cache_page(inode->i_mapping, desc->page_index, 331 page = read_cache_page(inode->i_mapping, desc->page_index,
320 (filler_t *)nfs_readdir_filler, desc); 332 (filler_t *)nfs_readdir_filler, desc);
321 if (IS_ERR(page)) { 333 if (IS_ERR(page)) {
@@ -468,6 +480,7 @@ int uncached_readdir(nfs_readdir_descriptor_t *desc, void *dirent,
468 struct rpc_cred *cred = nfs_file_cred(file); 480 struct rpc_cred *cred = nfs_file_cred(file);
469 struct page *page = NULL; 481 struct page *page = NULL;
470 int status; 482 int status;
483 unsigned long timestamp;
471 484
472 dfprintk(DIRCACHE, "NFS: uncached_readdir() searching for cookie %Lu\n", 485 dfprintk(DIRCACHE, "NFS: uncached_readdir() searching for cookie %Lu\n",
473 (unsigned long long)*desc->dir_cookie); 486 (unsigned long long)*desc->dir_cookie);
@@ -477,6 +490,7 @@ int uncached_readdir(nfs_readdir_descriptor_t *desc, void *dirent,
477 status = -ENOMEM; 490 status = -ENOMEM;
478 goto out; 491 goto out;
479 } 492 }
493 timestamp = jiffies;
480 desc->error = NFS_PROTO(inode)->readdir(file->f_path.dentry, cred, *desc->dir_cookie, 494 desc->error = NFS_PROTO(inode)->readdir(file->f_path.dentry, cred, *desc->dir_cookie,
481 page, 495 page,
482 NFS_SERVER(inode)->dtsize, 496 NFS_SERVER(inode)->dtsize,
@@ -487,6 +501,8 @@ int uncached_readdir(nfs_readdir_descriptor_t *desc, void *dirent,
487 desc->page = page; 501 desc->page = page;
488 desc->ptr = kmap(page); /* matching kunmap in nfs_do_filldir */ 502 desc->ptr = kmap(page); /* matching kunmap in nfs_do_filldir */
489 if (desc->error >= 0) { 503 if (desc->error >= 0) {
504 desc->timestamp = timestamp;
505 desc->timestamp_valid = 1;
490 if ((status = dir_decode(desc)) == 0) 506 if ((status = dir_decode(desc)) == 0)
491 desc->entry->prev_cookie = *desc->dir_cookie; 507 desc->entry->prev_cookie = *desc->dir_cookie;
492 } else 508 } else
@@ -849,6 +865,10 @@ static int nfs_dentry_delete(struct dentry *dentry)
849static void nfs_dentry_iput(struct dentry *dentry, struct inode *inode) 865static void nfs_dentry_iput(struct dentry *dentry, struct inode *inode)
850{ 866{
851 nfs_inode_return_delegation(inode); 867 nfs_inode_return_delegation(inode);
868 if (S_ISDIR(inode->i_mode))
869 /* drop any readdir cache as it could easily be old */
870 NFS_I(inode)->cache_validity |= NFS_INO_INVALID_DATA;
871
852 if (dentry->d_flags & DCACHE_NFSFS_RENAMED) { 872 if (dentry->d_flags & DCACHE_NFSFS_RENAMED) {
853 lock_kernel(); 873 lock_kernel();
854 drop_nlink(inode); 874 drop_nlink(inode);
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index 2877744cb606..889de60f8a84 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -54,6 +54,7 @@
54#include <asm/uaccess.h> 54#include <asm/uaccess.h>
55#include <asm/atomic.h> 55#include <asm/atomic.h>
56 56
57#include "internal.h"
57#include "iostat.h" 58#include "iostat.h"
58 59
59#define NFSDBG_FACILITY NFSDBG_VFS 60#define NFSDBG_FACILITY NFSDBG_VFS
@@ -271,7 +272,7 @@ static ssize_t nfs_direct_read_schedule(struct nfs_direct_req *dreq, unsigned lo
271 bytes = min(rsize,count); 272 bytes = min(rsize,count);
272 273
273 result = -ENOMEM; 274 result = -ENOMEM;
274 data = nfs_readdata_alloc(pgbase + bytes); 275 data = nfs_readdata_alloc(nfs_page_array_len(pgbase, bytes));
275 if (unlikely(!data)) 276 if (unlikely(!data))
276 break; 277 break;
277 278
@@ -602,7 +603,7 @@ static ssize_t nfs_direct_write_schedule(struct nfs_direct_req *dreq, unsigned l
602 bytes = min(wsize,count); 603 bytes = min(wsize,count);
603 604
604 result = -ENOMEM; 605 result = -ENOMEM;
605 data = nfs_writedata_alloc(pgbase + bytes); 606 data = nfs_writedata_alloc(nfs_page_array_len(pgbase, bytes));
606 if (unlikely(!data)) 607 if (unlikely(!data))
607 break; 608 break;
608 609
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index 6610f2b02077..ad2b40db1e65 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -231,3 +231,15 @@ unsigned int nfs_page_length(struct page *page)
231 } 231 }
232 return 0; 232 return 0;
233} 233}
234
235/*
236 * Determine the number of pages in an array of length 'len' and
237 * with a base offset of 'base'
238 */
239static inline
240unsigned int nfs_page_array_len(unsigned int base, size_t len)
241{
242 return ((unsigned long)len + (unsigned long)base +
243 PAGE_SIZE - 1) >> PAGE_SHIFT;
244}
245
diff --git a/fs/nfs/mount_clnt.c b/fs/nfs/mount_clnt.c
index f75fe72b4160..ca5a266a3140 100644
--- a/fs/nfs/mount_clnt.c
+++ b/fs/nfs/mount_clnt.c
@@ -133,13 +133,15 @@ xdr_decode_fhstatus3(struct rpc_rqst *req, __be32 *p, struct mnt_fhstatus *res)
133 133
134#define MNT_dirpath_sz (1 + 256) 134#define MNT_dirpath_sz (1 + 256)
135#define MNT_fhstatus_sz (1 + 8) 135#define MNT_fhstatus_sz (1 + 8)
136#define MNT_fhstatus3_sz (1 + 16)
136 137
137static struct rpc_procinfo mnt_procedures[] = { 138static struct rpc_procinfo mnt_procedures[] = {
138[MNTPROC_MNT] = { 139[MNTPROC_MNT] = {
139 .p_proc = MNTPROC_MNT, 140 .p_proc = MNTPROC_MNT,
140 .p_encode = (kxdrproc_t) xdr_encode_dirpath, 141 .p_encode = (kxdrproc_t) xdr_encode_dirpath,
141 .p_decode = (kxdrproc_t) xdr_decode_fhstatus, 142 .p_decode = (kxdrproc_t) xdr_decode_fhstatus,
142 .p_bufsiz = MNT_dirpath_sz << 2, 143 .p_arglen = MNT_dirpath_sz,
144 .p_replen = MNT_fhstatus_sz,
143 .p_statidx = MNTPROC_MNT, 145 .p_statidx = MNTPROC_MNT,
144 .p_name = "MOUNT", 146 .p_name = "MOUNT",
145 }, 147 },
@@ -150,7 +152,8 @@ static struct rpc_procinfo mnt3_procedures[] = {
150 .p_proc = MOUNTPROC3_MNT, 152 .p_proc = MOUNTPROC3_MNT,
151 .p_encode = (kxdrproc_t) xdr_encode_dirpath, 153 .p_encode = (kxdrproc_t) xdr_encode_dirpath,
152 .p_decode = (kxdrproc_t) xdr_decode_fhstatus3, 154 .p_decode = (kxdrproc_t) xdr_decode_fhstatus3,
153 .p_bufsiz = MNT_dirpath_sz << 2, 155 .p_arglen = MNT_dirpath_sz,
156 .p_replen = MNT_fhstatus3_sz,
154 .p_statidx = MOUNTPROC3_MNT, 157 .p_statidx = MOUNTPROC3_MNT,
155 .p_name = "MOUNT", 158 .p_name = "MOUNT",
156 }, 159 },
diff --git a/fs/nfs/nfs2xdr.c b/fs/nfs/nfs2xdr.c
index 3be4e72a0227..abd9f8b48943 100644
--- a/fs/nfs/nfs2xdr.c
+++ b/fs/nfs/nfs2xdr.c
@@ -687,16 +687,13 @@ nfs_stat_to_errno(int stat)
687 return nfs_errtbl[i].errno; 687 return nfs_errtbl[i].errno;
688} 688}
689 689
690#ifndef MAX
691# define MAX(a, b) (((a) > (b))? (a) : (b))
692#endif
693
694#define PROC(proc, argtype, restype, timer) \ 690#define PROC(proc, argtype, restype, timer) \
695[NFSPROC_##proc] = { \ 691[NFSPROC_##proc] = { \
696 .p_proc = NFSPROC_##proc, \ 692 .p_proc = NFSPROC_##proc, \
697 .p_encode = (kxdrproc_t) nfs_xdr_##argtype, \ 693 .p_encode = (kxdrproc_t) nfs_xdr_##argtype, \
698 .p_decode = (kxdrproc_t) nfs_xdr_##restype, \ 694 .p_decode = (kxdrproc_t) nfs_xdr_##restype, \
699 .p_bufsiz = MAX(NFS_##argtype##_sz,NFS_##restype##_sz) << 2, \ 695 .p_arglen = NFS_##argtype##_sz, \
696 .p_replen = NFS_##restype##_sz, \
700 .p_timer = timer, \ 697 .p_timer = timer, \
701 .p_statidx = NFSPROC_##proc, \ 698 .p_statidx = NFSPROC_##proc, \
702 .p_name = #proc, \ 699 .p_name = #proc, \
diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c
index 0ace092d126f..b51df8eb9f01 100644
--- a/fs/nfs/nfs3xdr.c
+++ b/fs/nfs/nfs3xdr.c
@@ -1102,16 +1102,13 @@ nfs3_xdr_setaclres(struct rpc_rqst *req, __be32 *p, struct nfs_fattr *fattr)
1102} 1102}
1103#endif /* CONFIG_NFS_V3_ACL */ 1103#endif /* CONFIG_NFS_V3_ACL */
1104 1104
1105#ifndef MAX
1106# define MAX(a, b) (((a) > (b))? (a) : (b))
1107#endif
1108
1109#define PROC(proc, argtype, restype, timer) \ 1105#define PROC(proc, argtype, restype, timer) \
1110[NFS3PROC_##proc] = { \ 1106[NFS3PROC_##proc] = { \
1111 .p_proc = NFS3PROC_##proc, \ 1107 .p_proc = NFS3PROC_##proc, \
1112 .p_encode = (kxdrproc_t) nfs3_xdr_##argtype, \ 1108 .p_encode = (kxdrproc_t) nfs3_xdr_##argtype, \
1113 .p_decode = (kxdrproc_t) nfs3_xdr_##restype, \ 1109 .p_decode = (kxdrproc_t) nfs3_xdr_##restype, \
1114 .p_bufsiz = MAX(NFS3_##argtype##_sz,NFS3_##restype##_sz) << 2, \ 1110 .p_arglen = NFS3_##argtype##_sz, \
1111 .p_replen = NFS3_##restype##_sz, \
1115 .p_timer = timer, \ 1112 .p_timer = timer, \
1116 .p_statidx = NFS3PROC_##proc, \ 1113 .p_statidx = NFS3PROC_##proc, \
1117 .p_name = #proc, \ 1114 .p_name = #proc, \
@@ -1153,7 +1150,8 @@ static struct rpc_procinfo nfs3_acl_procedures[] = {
1153 .p_proc = ACLPROC3_GETACL, 1150 .p_proc = ACLPROC3_GETACL,
1154 .p_encode = (kxdrproc_t) nfs3_xdr_getaclargs, 1151 .p_encode = (kxdrproc_t) nfs3_xdr_getaclargs,
1155 .p_decode = (kxdrproc_t) nfs3_xdr_getaclres, 1152 .p_decode = (kxdrproc_t) nfs3_xdr_getaclres,
1156 .p_bufsiz = MAX(ACL3_getaclargs_sz, ACL3_getaclres_sz) << 2, 1153 .p_arglen = ACL3_getaclargs_sz,
1154 .p_replen = ACL3_getaclres_sz,
1157 .p_timer = 1, 1155 .p_timer = 1,
1158 .p_name = "GETACL", 1156 .p_name = "GETACL",
1159 }, 1157 },
@@ -1161,7 +1159,8 @@ static struct rpc_procinfo nfs3_acl_procedures[] = {
1161 .p_proc = ACLPROC3_SETACL, 1159 .p_proc = ACLPROC3_SETACL,
1162 .p_encode = (kxdrproc_t) nfs3_xdr_setaclargs, 1160 .p_encode = (kxdrproc_t) nfs3_xdr_setaclargs,
1163 .p_decode = (kxdrproc_t) nfs3_xdr_setaclres, 1161 .p_decode = (kxdrproc_t) nfs3_xdr_setaclres,
1164 .p_bufsiz = MAX(ACL3_setaclargs_sz, ACL3_setaclres_sz) << 2, 1162 .p_arglen = ACL3_setaclargs_sz,
1163 .p_replen = ACL3_setaclres_sz,
1165 .p_timer = 0, 1164 .p_timer = 0,
1166 .p_name = "SETACL", 1165 .p_name = "SETACL",
1167 }, 1166 },
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index f52cf5c33c6c..3b5ca1b15fe9 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -2647,8 +2647,7 @@ static int __nfs4_proc_set_acl(struct inode *inode, const void *buf, size_t bufl
2647 nfs_inode_return_delegation(inode); 2647 nfs_inode_return_delegation(inode);
2648 buf_to_pages(buf, buflen, arg.acl_pages, &arg.acl_pgbase); 2648 buf_to_pages(buf, buflen, arg.acl_pages, &arg.acl_pgbase);
2649 ret = rpc_call_sync(NFS_CLIENT(inode), &msg, 0); 2649 ret = rpc_call_sync(NFS_CLIENT(inode), &msg, 0);
2650 if (ret == 0) 2650 nfs_zap_caches(inode);
2651 nfs4_write_cached_acl(inode, buf, buflen);
2652 return ret; 2651 return ret;
2653} 2652}
2654 2653
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index f02d522fd788..b8c28f2380a5 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -4546,16 +4546,13 @@ nfs4_stat_to_errno(int stat)
4546 return stat; 4546 return stat;
4547} 4547}
4548 4548
4549#ifndef MAX
4550# define MAX(a, b) (((a) > (b))? (a) : (b))
4551#endif
4552
4553#define PROC(proc, argtype, restype) \ 4549#define PROC(proc, argtype, restype) \
4554[NFSPROC4_CLNT_##proc] = { \ 4550[NFSPROC4_CLNT_##proc] = { \
4555 .p_proc = NFSPROC4_COMPOUND, \ 4551 .p_proc = NFSPROC4_COMPOUND, \
4556 .p_encode = (kxdrproc_t) nfs4_xdr_##argtype, \ 4552 .p_encode = (kxdrproc_t) nfs4_xdr_##argtype, \
4557 .p_decode = (kxdrproc_t) nfs4_xdr_##restype, \ 4553 .p_decode = (kxdrproc_t) nfs4_xdr_##restype, \
4558 .p_bufsiz = MAX(NFS4_##argtype##_sz,NFS4_##restype##_sz) << 2, \ 4554 .p_arglen = NFS4_##argtype##_sz, \
4555 .p_replen = NFS4_##restype##_sz, \
4559 .p_statidx = NFSPROC4_CLNT_##proc, \ 4556 .p_statidx = NFSPROC4_CLNT_##proc, \
4560 .p_name = #proc, \ 4557 .p_name = #proc, \
4561 } 4558 }
diff --git a/fs/nfs/nfsroot.c b/fs/nfs/nfsroot.c
index 75f819dc0255..49d1008ce1d7 100644
--- a/fs/nfs/nfsroot.c
+++ b/fs/nfs/nfsroot.c
@@ -428,7 +428,7 @@ static int __init root_nfs_getport(int program, int version, int proto)
428 printk(KERN_NOTICE "Looking up port of RPC %d/%d on %u.%u.%u.%u\n", 428 printk(KERN_NOTICE "Looking up port of RPC %d/%d on %u.%u.%u.%u\n",
429 program, version, NIPQUAD(servaddr)); 429 program, version, NIPQUAD(servaddr));
430 set_sockaddr(&sin, servaddr, 0); 430 set_sockaddr(&sin, servaddr, 0);
431 return rpc_getport_external(&sin, program, version, proto); 431 return rpcb_getport_external(&sin, program, version, proto);
432} 432}
433 433
434 434
diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index ca4b1d4ff42b..388950118f59 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -17,7 +17,8 @@
17#include <linux/nfs_page.h> 17#include <linux/nfs_page.h>
18#include <linux/nfs_fs.h> 18#include <linux/nfs_fs.h>
19#include <linux/nfs_mount.h> 19#include <linux/nfs_mount.h>
20#include <linux/writeback.h> 20
21#include "internal.h"
21 22
22#define NFS_PARANOIA 1 23#define NFS_PARANOIA 1
23 24
@@ -50,9 +51,7 @@ nfs_page_free(struct nfs_page *p)
50 * @count: number of bytes to read/write 51 * @count: number of bytes to read/write
51 * 52 *
52 * The page must be locked by the caller. This makes sure we never 53 * The page must be locked by the caller. This makes sure we never
53 * create two different requests for the same page, and avoids 54 * create two different requests for the same page.
54 * a possible deadlock when we reach the hard limit on the number
55 * of dirty pages.
56 * User should ensure it is safe to sleep in this function. 55 * User should ensure it is safe to sleep in this function.
57 */ 56 */
58struct nfs_page * 57struct nfs_page *
@@ -63,16 +62,12 @@ nfs_create_request(struct nfs_open_context *ctx, struct inode *inode,
63 struct nfs_server *server = NFS_SERVER(inode); 62 struct nfs_server *server = NFS_SERVER(inode);
64 struct nfs_page *req; 63 struct nfs_page *req;
65 64
66 /* Deal with hard limits. */
67 for (;;) { 65 for (;;) {
68 /* try to allocate the request struct */ 66 /* try to allocate the request struct */
69 req = nfs_page_alloc(); 67 req = nfs_page_alloc();
70 if (req != NULL) 68 if (req != NULL)
71 break; 69 break;
72 70
73 /* Try to free up at least one request in order to stay
74 * below the hard limit
75 */
76 if (signalled() && (server->flags & NFS_MOUNT_INTR)) 71 if (signalled() && (server->flags & NFS_MOUNT_INTR))
77 return ERR_PTR(-ERESTARTSYS); 72 return ERR_PTR(-ERESTARTSYS);
78 yield(); 73 yield();
@@ -223,124 +218,151 @@ out:
223} 218}
224 219
225/** 220/**
226 * nfs_coalesce_requests - Split coalesced requests out from a list. 221 * nfs_pageio_init - initialise a page io descriptor
227 * @head: source list 222 * @desc: pointer to descriptor
228 * @dst: destination list 223 * @inode: pointer to inode
229 * @nmax: maximum number of requests to coalesce 224 * @doio: pointer to io function
230 * 225 * @bsize: io block size
231 * Moves a maximum of 'nmax' elements from one list to another. 226 * @io_flags: extra parameters for the io function
232 * The elements are checked to ensure that they form a contiguous set
233 * of pages, and that the RPC credentials are the same.
234 */ 227 */
235int 228void nfs_pageio_init(struct nfs_pageio_descriptor *desc,
236nfs_coalesce_requests(struct list_head *head, struct list_head *dst, 229 struct inode *inode,
237 unsigned int nmax) 230 int (*doio)(struct inode *, struct list_head *, unsigned int, size_t, int),
231 size_t bsize,
232 int io_flags)
238{ 233{
239 struct nfs_page *req = NULL; 234 INIT_LIST_HEAD(&desc->pg_list);
240 unsigned int npages = 0; 235 desc->pg_bytes_written = 0;
241 236 desc->pg_count = 0;
242 while (!list_empty(head)) { 237 desc->pg_bsize = bsize;
243 struct nfs_page *prev = req; 238 desc->pg_base = 0;
244 239 desc->pg_inode = inode;
245 req = nfs_list_entry(head->next); 240 desc->pg_doio = doio;
246 if (prev) { 241 desc->pg_ioflags = io_flags;
247 if (req->wb_context->cred != prev->wb_context->cred) 242 desc->pg_error = 0;
248 break;
249 if (req->wb_context->lockowner != prev->wb_context->lockowner)
250 break;
251 if (req->wb_context->state != prev->wb_context->state)
252 break;
253 if (req->wb_index != (prev->wb_index + 1))
254 break;
255
256 if (req->wb_pgbase != 0)
257 break;
258 }
259 nfs_list_remove_request(req);
260 nfs_list_add_request(req, dst);
261 npages++;
262 if (req->wb_pgbase + req->wb_bytes != PAGE_CACHE_SIZE)
263 break;
264 if (npages >= nmax)
265 break;
266 }
267 return npages;
268} 243}
269 244
270#define NFS_SCAN_MAXENTRIES 16
271/** 245/**
272 * nfs_scan_dirty - Scan the radix tree for dirty requests 246 * nfs_can_coalesce_requests - test two requests for compatibility
273 * @mapping: pointer to address space 247 * @prev: pointer to nfs_page
274 * @wbc: writeback_control structure 248 * @req: pointer to nfs_page
275 * @dst: Destination list
276 * 249 *
277 * Moves elements from one of the inode request lists. 250 * The nfs_page structures 'prev' and 'req' are compared to ensure that the
278 * If the number of requests is set to 0, the entire address_space 251 * page data area they describe is contiguous, and that their RPC
279 * starting at index idx_start, is scanned. 252 * credentials, NFSv4 open state, and lockowners are the same.
280 * The requests are *not* checked to ensure that they form a contiguous set. 253 *
281 * You must be holding the inode's req_lock when calling this function 254 * Return 'true' if this is the case, else return 'false'.
282 */ 255 */
283long nfs_scan_dirty(struct address_space *mapping, 256static int nfs_can_coalesce_requests(struct nfs_page *prev,
284 struct writeback_control *wbc, 257 struct nfs_page *req)
285 struct list_head *dst)
286{ 258{
287 struct nfs_inode *nfsi = NFS_I(mapping->host); 259 if (req->wb_context->cred != prev->wb_context->cred)
288 struct nfs_page *pgvec[NFS_SCAN_MAXENTRIES];
289 struct nfs_page *req;
290 pgoff_t idx_start, idx_end;
291 long res = 0;
292 int found, i;
293
294 if (nfsi->ndirty == 0)
295 return 0; 260 return 0;
296 if (wbc->range_cyclic) { 261 if (req->wb_context->lockowner != prev->wb_context->lockowner)
297 idx_start = 0; 262 return 0;
298 idx_end = ULONG_MAX; 263 if (req->wb_context->state != prev->wb_context->state)
299 } else if (wbc->range_end == 0) { 264 return 0;
300 idx_start = wbc->range_start >> PAGE_CACHE_SHIFT; 265 if (req->wb_index != (prev->wb_index + 1))
301 idx_end = ULONG_MAX; 266 return 0;
302 } else { 267 if (req->wb_pgbase != 0)
303 idx_start = wbc->range_start >> PAGE_CACHE_SHIFT; 268 return 0;
304 idx_end = wbc->range_end >> PAGE_CACHE_SHIFT; 269 if (prev->wb_pgbase + prev->wb_bytes != PAGE_CACHE_SIZE)
305 } 270 return 0;
271 return 1;
272}
306 273
307 for (;;) { 274/**
308 unsigned int toscan = NFS_SCAN_MAXENTRIES; 275 * nfs_pageio_do_add_request - Attempt to coalesce a request into a page list.
276 * @desc: destination io descriptor
277 * @req: request
278 *
279 * Returns true if the request 'req' was successfully coalesced into the
280 * existing list of pages 'desc'.
281 */
282static int nfs_pageio_do_add_request(struct nfs_pageio_descriptor *desc,
283 struct nfs_page *req)
284{
285 size_t newlen = req->wb_bytes;
309 286
310 found = radix_tree_gang_lookup_tag(&nfsi->nfs_page_tree, 287 if (desc->pg_count != 0) {
311 (void **)&pgvec[0], idx_start, toscan, 288 struct nfs_page *prev;
312 NFS_PAGE_TAG_DIRTY);
313 289
314 /* Did we make progress? */ 290 /*
315 if (found <= 0) 291 * FIXME: ideally we should be able to coalesce all requests
316 break; 292 * that are not block boundary aligned, but currently this
293 * is problematic for the case of bsize < PAGE_CACHE_SIZE,
294 * since nfs_flush_multi and nfs_pagein_multi assume you
295 * can have only one struct nfs_page.
296 */
297 if (desc->pg_bsize < PAGE_SIZE)
298 return 0;
299 newlen += desc->pg_count;
300 if (newlen > desc->pg_bsize)
301 return 0;
302 prev = nfs_list_entry(desc->pg_list.prev);
303 if (!nfs_can_coalesce_requests(prev, req))
304 return 0;
305 } else
306 desc->pg_base = req->wb_pgbase;
307 nfs_list_remove_request(req);
308 nfs_list_add_request(req, &desc->pg_list);
309 desc->pg_count = newlen;
310 return 1;
311}
317 312
318 for (i = 0; i < found; i++) { 313/*
319 req = pgvec[i]; 314 * Helper for nfs_pageio_add_request and nfs_pageio_complete
320 if (!wbc->range_cyclic && req->wb_index > idx_end) 315 */
321 goto out; 316static void nfs_pageio_doio(struct nfs_pageio_descriptor *desc)
317{
318 if (!list_empty(&desc->pg_list)) {
319 int error = desc->pg_doio(desc->pg_inode,
320 &desc->pg_list,
321 nfs_page_array_len(desc->pg_base,
322 desc->pg_count),
323 desc->pg_count,
324 desc->pg_ioflags);
325 if (error < 0)
326 desc->pg_error = error;
327 else
328 desc->pg_bytes_written += desc->pg_count;
329 }
330 if (list_empty(&desc->pg_list)) {
331 desc->pg_count = 0;
332 desc->pg_base = 0;
333 }
334}
322 335
323 /* Try to lock request and mark it for writeback */ 336/**
324 if (!nfs_set_page_writeback_locked(req)) 337 * nfs_pageio_add_request - Attempt to coalesce a request into a page list.
325 goto next; 338 * @desc: destination io descriptor
326 radix_tree_tag_clear(&nfsi->nfs_page_tree, 339 * @req: request
327 req->wb_index, NFS_PAGE_TAG_DIRTY); 340 *
328 nfsi->ndirty--; 341 * Returns true if the request 'req' was successfully coalesced into the
329 nfs_list_remove_request(req); 342 * existing list of pages 'desc'.
330 nfs_list_add_request(req, dst); 343 */
331 res++; 344int nfs_pageio_add_request(struct nfs_pageio_descriptor *desc,
332 if (res == LONG_MAX) 345 struct nfs_page *req)
333 goto out; 346{
334next: 347 while (!nfs_pageio_do_add_request(desc, req)) {
335 idx_start = req->wb_index + 1; 348 nfs_pageio_doio(desc);
336 } 349 if (desc->pg_error < 0)
350 return 0;
337 } 351 }
338out: 352 return 1;
339 WARN_ON ((nfsi->ndirty == 0) != list_empty(&nfsi->dirty));
340 return res;
341} 353}
342 354
343/** 355/**
356 * nfs_pageio_complete - Complete I/O on an nfs_pageio_descriptor
357 * @desc: pointer to io descriptor
358 */
359void nfs_pageio_complete(struct nfs_pageio_descriptor *desc)
360{
361 nfs_pageio_doio(desc);
362}
363
364#define NFS_SCAN_MAXENTRIES 16
365/**
344 * nfs_scan_list - Scan a list for matching requests 366 * nfs_scan_list - Scan a list for matching requests
345 * @nfsi: NFS inode 367 * @nfsi: NFS inode
346 * @head: One of the NFS inode request lists 368 * @head: One of the NFS inode request lists
@@ -355,12 +377,12 @@ out:
355 * You must be holding the inode's req_lock when calling this function 377 * You must be holding the inode's req_lock when calling this function
356 */ 378 */
357int nfs_scan_list(struct nfs_inode *nfsi, struct list_head *head, 379int nfs_scan_list(struct nfs_inode *nfsi, struct list_head *head,
358 struct list_head *dst, unsigned long idx_start, 380 struct list_head *dst, pgoff_t idx_start,
359 unsigned int npages) 381 unsigned int npages)
360{ 382{
361 struct nfs_page *pgvec[NFS_SCAN_MAXENTRIES]; 383 struct nfs_page *pgvec[NFS_SCAN_MAXENTRIES];
362 struct nfs_page *req; 384 struct nfs_page *req;
363 unsigned long idx_end; 385 pgoff_t idx_end;
364 int found, i; 386 int found, i;
365 int res; 387 int res;
366 388
diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index 6ab4d5a9edf2..9a55807b2a70 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -27,7 +27,8 @@
27 27
28#define NFSDBG_FACILITY NFSDBG_PAGECACHE 28#define NFSDBG_FACILITY NFSDBG_PAGECACHE
29 29
30static int nfs_pagein_one(struct list_head *, struct inode *); 30static int nfs_pagein_multi(struct inode *, struct list_head *, unsigned int, size_t, int);
31static int nfs_pagein_one(struct inode *, struct list_head *, unsigned int, size_t, int);
31static const struct rpc_call_ops nfs_read_partial_ops; 32static const struct rpc_call_ops nfs_read_partial_ops;
32static const struct rpc_call_ops nfs_read_full_ops; 33static const struct rpc_call_ops nfs_read_full_ops;
33 34
@@ -36,9 +37,8 @@ static mempool_t *nfs_rdata_mempool;
36 37
37#define MIN_POOL_READ (32) 38#define MIN_POOL_READ (32)
38 39
39struct nfs_read_data *nfs_readdata_alloc(size_t len) 40struct nfs_read_data *nfs_readdata_alloc(unsigned int pagecount)
40{ 41{
41 unsigned int pagecount = (len + PAGE_SIZE - 1) >> PAGE_SHIFT;
42 struct nfs_read_data *p = mempool_alloc(nfs_rdata_mempool, GFP_NOFS); 42 struct nfs_read_data *p = mempool_alloc(nfs_rdata_mempool, GFP_NOFS);
43 43
44 if (p) { 44 if (p) {
@@ -133,7 +133,10 @@ static int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode,
133 memclear_highpage_flush(page, len, PAGE_CACHE_SIZE - len); 133 memclear_highpage_flush(page, len, PAGE_CACHE_SIZE - len);
134 134
135 nfs_list_add_request(new, &one_request); 135 nfs_list_add_request(new, &one_request);
136 nfs_pagein_one(&one_request, inode); 136 if (NFS_SERVER(inode)->rsize < PAGE_CACHE_SIZE)
137 nfs_pagein_multi(inode, &one_request, 1, len, 0);
138 else
139 nfs_pagein_one(inode, &one_request, 1, len, 0);
137 return 0; 140 return 0;
138} 141}
139 142
@@ -230,7 +233,7 @@ static void nfs_execute_read(struct nfs_read_data *data)
230 * won't see the new data until our attribute cache is updated. This is more 233 * won't see the new data until our attribute cache is updated. This is more
231 * or less conventional NFS client behavior. 234 * or less conventional NFS client behavior.
232 */ 235 */
233static int nfs_pagein_multi(struct list_head *head, struct inode *inode) 236static int nfs_pagein_multi(struct inode *inode, struct list_head *head, unsigned int npages, size_t count, int flags)
234{ 237{
235 struct nfs_page *req = nfs_list_entry(head->next); 238 struct nfs_page *req = nfs_list_entry(head->next);
236 struct page *page = req->wb_page; 239 struct page *page = req->wb_page;
@@ -242,11 +245,11 @@ static int nfs_pagein_multi(struct list_head *head, struct inode *inode)
242 245
243 nfs_list_remove_request(req); 246 nfs_list_remove_request(req);
244 247
245 nbytes = req->wb_bytes; 248 nbytes = count;
246 do { 249 do {
247 size_t len = min(nbytes,rsize); 250 size_t len = min(nbytes,rsize);
248 251
249 data = nfs_readdata_alloc(len); 252 data = nfs_readdata_alloc(1);
250 if (!data) 253 if (!data)
251 goto out_bad; 254 goto out_bad;
252 INIT_LIST_HEAD(&data->pages); 255 INIT_LIST_HEAD(&data->pages);
@@ -258,23 +261,19 @@ static int nfs_pagein_multi(struct list_head *head, struct inode *inode)
258 261
259 ClearPageError(page); 262 ClearPageError(page);
260 offset = 0; 263 offset = 0;
261 nbytes = req->wb_bytes; 264 nbytes = count;
262 do { 265 do {
263 data = list_entry(list.next, struct nfs_read_data, pages); 266 data = list_entry(list.next, struct nfs_read_data, pages);
264 list_del_init(&data->pages); 267 list_del_init(&data->pages);
265 268
266 data->pagevec[0] = page; 269 data->pagevec[0] = page;
267 270
268 if (nbytes > rsize) { 271 if (nbytes < rsize)
269 nfs_read_rpcsetup(req, data, &nfs_read_partial_ops, 272 rsize = nbytes;
270 rsize, offset); 273 nfs_read_rpcsetup(req, data, &nfs_read_partial_ops,
271 offset += rsize; 274 rsize, offset);
272 nbytes -= rsize; 275 offset += rsize;
273 } else { 276 nbytes -= rsize;
274 nfs_read_rpcsetup(req, data, &nfs_read_partial_ops,
275 nbytes, offset);
276 nbytes = 0;
277 }
278 nfs_execute_read(data); 277 nfs_execute_read(data);
279 } while (nbytes != 0); 278 } while (nbytes != 0);
280 279
@@ -291,30 +290,24 @@ out_bad:
291 return -ENOMEM; 290 return -ENOMEM;
292} 291}
293 292
294static int nfs_pagein_one(struct list_head *head, struct inode *inode) 293static int nfs_pagein_one(struct inode *inode, struct list_head *head, unsigned int npages, size_t count, int flags)
295{ 294{
296 struct nfs_page *req; 295 struct nfs_page *req;
297 struct page **pages; 296 struct page **pages;
298 struct nfs_read_data *data; 297 struct nfs_read_data *data;
299 unsigned int count;
300 298
301 if (NFS_SERVER(inode)->rsize < PAGE_CACHE_SIZE) 299 data = nfs_readdata_alloc(npages);
302 return nfs_pagein_multi(head, inode);
303
304 data = nfs_readdata_alloc(NFS_SERVER(inode)->rsize);
305 if (!data) 300 if (!data)
306 goto out_bad; 301 goto out_bad;
307 302
308 INIT_LIST_HEAD(&data->pages); 303 INIT_LIST_HEAD(&data->pages);
309 pages = data->pagevec; 304 pages = data->pagevec;
310 count = 0;
311 while (!list_empty(head)) { 305 while (!list_empty(head)) {
312 req = nfs_list_entry(head->next); 306 req = nfs_list_entry(head->next);
313 nfs_list_remove_request(req); 307 nfs_list_remove_request(req);
314 nfs_list_add_request(req, &data->pages); 308 nfs_list_add_request(req, &data->pages);
315 ClearPageError(req->wb_page); 309 ClearPageError(req->wb_page);
316 *pages++ = req->wb_page; 310 *pages++ = req->wb_page;
317 count += req->wb_bytes;
318 } 311 }
319 req = nfs_list_entry(data->pages.next); 312 req = nfs_list_entry(data->pages.next);
320 313
@@ -327,28 +320,6 @@ out_bad:
327 return -ENOMEM; 320 return -ENOMEM;
328} 321}
329 322
330static int
331nfs_pagein_list(struct list_head *head, int rpages)
332{
333 LIST_HEAD(one_request);
334 struct nfs_page *req;
335 int error = 0;
336 unsigned int pages = 0;
337
338 while (!list_empty(head)) {
339 pages += nfs_coalesce_requests(head, &one_request, rpages);
340 req = nfs_list_entry(one_request.next);
341 error = nfs_pagein_one(&one_request, req->wb_context->dentry->d_inode);
342 if (error < 0)
343 break;
344 }
345 if (error >= 0)
346 return pages;
347
348 nfs_async_read_error(head);
349 return error;
350}
351
352/* 323/*
353 * This is the callback from RPC telling us whether a reply was 324 * This is the callback from RPC telling us whether a reply was
354 * received or some error occurred (timeout or socket shutdown). 325 * received or some error occurred (timeout or socket shutdown).
@@ -538,7 +509,7 @@ out_error:
538} 509}
539 510
540struct nfs_readdesc { 511struct nfs_readdesc {
541 struct list_head *head; 512 struct nfs_pageio_descriptor *pgio;
542 struct nfs_open_context *ctx; 513 struct nfs_open_context *ctx;
543}; 514};
544 515
@@ -562,19 +533,21 @@ readpage_async_filler(void *data, struct page *page)
562 } 533 }
563 if (len < PAGE_CACHE_SIZE) 534 if (len < PAGE_CACHE_SIZE)
564 memclear_highpage_flush(page, len, PAGE_CACHE_SIZE - len); 535 memclear_highpage_flush(page, len, PAGE_CACHE_SIZE - len);
565 nfs_list_add_request(new, desc->head); 536 nfs_pageio_add_request(desc->pgio, new);
566 return 0; 537 return 0;
567} 538}
568 539
569int nfs_readpages(struct file *filp, struct address_space *mapping, 540int nfs_readpages(struct file *filp, struct address_space *mapping,
570 struct list_head *pages, unsigned nr_pages) 541 struct list_head *pages, unsigned nr_pages)
571{ 542{
572 LIST_HEAD(head); 543 struct nfs_pageio_descriptor pgio;
573 struct nfs_readdesc desc = { 544 struct nfs_readdesc desc = {
574 .head = &head, 545 .pgio = &pgio,
575 }; 546 };
576 struct inode *inode = mapping->host; 547 struct inode *inode = mapping->host;
577 struct nfs_server *server = NFS_SERVER(inode); 548 struct nfs_server *server = NFS_SERVER(inode);
549 size_t rsize = server->rsize;
550 unsigned long npages;
578 int ret = -ESTALE; 551 int ret = -ESTALE;
579 552
580 dprintk("NFS: nfs_readpages (%s/%Ld %d)\n", 553 dprintk("NFS: nfs_readpages (%s/%Ld %d)\n",
@@ -593,13 +566,16 @@ int nfs_readpages(struct file *filp, struct address_space *mapping,
593 } else 566 } else
594 desc.ctx = get_nfs_open_context((struct nfs_open_context *) 567 desc.ctx = get_nfs_open_context((struct nfs_open_context *)
595 filp->private_data); 568 filp->private_data);
569 if (rsize < PAGE_CACHE_SIZE)
570 nfs_pageio_init(&pgio, inode, nfs_pagein_multi, rsize, 0);
571 else
572 nfs_pageio_init(&pgio, inode, nfs_pagein_one, rsize, 0);
573
596 ret = read_cache_pages(mapping, pages, readpage_async_filler, &desc); 574 ret = read_cache_pages(mapping, pages, readpage_async_filler, &desc);
597 if (!list_empty(&head)) { 575
598 int err = nfs_pagein_list(&head, server->rpages); 576 nfs_pageio_complete(&pgio);
599 if (!ret) 577 npages = (pgio.pg_bytes_written + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
600 nfs_add_stats(inode, NFSIOS_READPAGES, err); 578 nfs_add_stats(inode, NFSIOS_READPAGES, npages);
601 ret = err;
602 }
603 put_nfs_open_context(desc.ctx); 579 put_nfs_open_context(desc.ctx);
604out: 580out:
605 return ret; 581 return ret;
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index f1eae44b9a1a..ca20d3cc2609 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -204,9 +204,9 @@ static int nfs_statfs(struct dentry *dentry, struct kstatfs *buf)
204 lock_kernel(); 204 lock_kernel();
205 205
206 error = server->nfs_client->rpc_ops->statfs(server, fh, &res); 206 error = server->nfs_client->rpc_ops->statfs(server, fh, &res);
207 buf->f_type = NFS_SUPER_MAGIC;
208 if (error < 0) 207 if (error < 0)
209 goto out_err; 208 goto out_err;
209 buf->f_type = NFS_SUPER_MAGIC;
210 210
211 /* 211 /*
212 * Current versions of glibc do not correctly handle the 212 * Current versions of glibc do not correctly handle the
@@ -233,15 +233,14 @@ static int nfs_statfs(struct dentry *dentry, struct kstatfs *buf)
233 buf->f_ffree = res.afiles; 233 buf->f_ffree = res.afiles;
234 234
235 buf->f_namelen = server->namelen; 235 buf->f_namelen = server->namelen;
236 out: 236
237 unlock_kernel(); 237 unlock_kernel();
238 return 0; 238 return 0;
239 239
240 out_err: 240 out_err:
241 dprintk("%s: statfs error = %d\n", __FUNCTION__, -error); 241 dprintk("%s: statfs error = %d\n", __FUNCTION__, -error);
242 buf->f_bsize = buf->f_blocks = buf->f_bfree = buf->f_bavail = -1; 242 unlock_kernel();
243 goto out; 243 return error;
244
245} 244}
246 245
247/* 246/*
@@ -291,6 +290,7 @@ static void nfs_show_mount_options(struct seq_file *m, struct nfs_server *nfss,
291 { NFS_MOUNT_NOAC, ",noac", "" }, 290 { NFS_MOUNT_NOAC, ",noac", "" },
292 { NFS_MOUNT_NONLM, ",nolock", "" }, 291 { NFS_MOUNT_NONLM, ",nolock", "" },
293 { NFS_MOUNT_NOACL, ",noacl", "" }, 292 { NFS_MOUNT_NOACL, ",noacl", "" },
293 { NFS_MOUNT_NORDIRPLUS, ",nordirplus", "" },
294 { 0, NULL, NULL } 294 { 0, NULL, NULL }
295 }; 295 };
296 const struct proc_nfs_info *nfs_infop; 296 const struct proc_nfs_info *nfs_infop;
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 797558941745..5d44b8bd1070 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -38,7 +38,8 @@
38static struct nfs_page * nfs_update_request(struct nfs_open_context*, 38static struct nfs_page * nfs_update_request(struct nfs_open_context*,
39 struct page *, 39 struct page *,
40 unsigned int, unsigned int); 40 unsigned int, unsigned int);
41static long nfs_flush_mapping(struct address_space *mapping, struct writeback_control *wbc, int how); 41static void nfs_pageio_init_write(struct nfs_pageio_descriptor *desc,
42 struct inode *inode, int ioflags);
42static const struct rpc_call_ops nfs_write_partial_ops; 43static const struct rpc_call_ops nfs_write_partial_ops;
43static const struct rpc_call_ops nfs_write_full_ops; 44static const struct rpc_call_ops nfs_write_full_ops;
44static const struct rpc_call_ops nfs_commit_ops; 45static const struct rpc_call_ops nfs_commit_ops;
@@ -71,9 +72,8 @@ void nfs_commit_free(struct nfs_write_data *wdata)
71 call_rcu_bh(&wdata->task.u.tk_rcu, nfs_commit_rcu_free); 72 call_rcu_bh(&wdata->task.u.tk_rcu, nfs_commit_rcu_free);
72} 73}
73 74
74struct nfs_write_data *nfs_writedata_alloc(size_t len) 75struct nfs_write_data *nfs_writedata_alloc(unsigned int pagecount)
75{ 76{
76 unsigned int pagecount = (len + PAGE_SIZE - 1) >> PAGE_SHIFT;
77 struct nfs_write_data *p = mempool_alloc(nfs_wdata_mempool, GFP_NOFS); 77 struct nfs_write_data *p = mempool_alloc(nfs_wdata_mempool, GFP_NOFS);
78 78
79 if (p) { 79 if (p) {
@@ -139,7 +139,7 @@ static void nfs_grow_file(struct page *page, unsigned int offset, unsigned int c
139{ 139{
140 struct inode *inode = page->mapping->host; 140 struct inode *inode = page->mapping->host;
141 loff_t end, i_size = i_size_read(inode); 141 loff_t end, i_size = i_size_read(inode);
142 unsigned long end_index = (i_size - 1) >> PAGE_CACHE_SHIFT; 142 pgoff_t end_index = (i_size - 1) >> PAGE_CACHE_SHIFT;
143 143
144 if (i_size > 0 && page->index < end_index) 144 if (i_size > 0 && page->index < end_index)
145 return; 145 return;
@@ -201,7 +201,7 @@ static int nfs_writepage_setup(struct nfs_open_context *ctx, struct page *page,
201static int wb_priority(struct writeback_control *wbc) 201static int wb_priority(struct writeback_control *wbc)
202{ 202{
203 if (wbc->for_reclaim) 203 if (wbc->for_reclaim)
204 return FLUSH_HIGHPRI; 204 return FLUSH_HIGHPRI | FLUSH_STABLE;
205 if (wbc->for_kupdate) 205 if (wbc->for_kupdate)
206 return FLUSH_LOWPRI; 206 return FLUSH_LOWPRI;
207 return 0; 207 return 0;
@@ -251,7 +251,8 @@ static void nfs_end_page_writeback(struct page *page)
251 * was not tagged. 251 * was not tagged.
252 * May also return an error if the user signalled nfs_wait_on_request(). 252 * May also return an error if the user signalled nfs_wait_on_request().
253 */ 253 */
254static int nfs_page_mark_flush(struct page *page) 254static int nfs_page_async_flush(struct nfs_pageio_descriptor *pgio,
255 struct page *page)
255{ 256{
256 struct nfs_page *req; 257 struct nfs_page *req;
257 struct nfs_inode *nfsi = NFS_I(page->mapping->host); 258 struct nfs_inode *nfsi = NFS_I(page->mapping->host);
@@ -273,6 +274,8 @@ static int nfs_page_mark_flush(struct page *page)
273 * request as dirty (in which case we don't care). 274 * request as dirty (in which case we don't care).
274 */ 275 */
275 spin_unlock(req_lock); 276 spin_unlock(req_lock);
277 /* Prevent deadlock! */
278 nfs_pageio_complete(pgio);
276 ret = nfs_wait_on_request(req); 279 ret = nfs_wait_on_request(req);
277 nfs_release_request(req); 280 nfs_release_request(req);
278 if (ret != 0) 281 if (ret != 0)
@@ -283,21 +286,18 @@ static int nfs_page_mark_flush(struct page *page)
283 /* This request is marked for commit */ 286 /* This request is marked for commit */
284 spin_unlock(req_lock); 287 spin_unlock(req_lock);
285 nfs_unlock_request(req); 288 nfs_unlock_request(req);
289 nfs_pageio_complete(pgio);
286 return 1; 290 return 1;
287 } 291 }
288 if (nfs_set_page_writeback(page) == 0) { 292 if (nfs_set_page_writeback(page) != 0) {
289 nfs_list_remove_request(req);
290 /* add the request to the inode's dirty list. */
291 radix_tree_tag_set(&nfsi->nfs_page_tree,
292 req->wb_index, NFS_PAGE_TAG_DIRTY);
293 nfs_list_add_request(req, &nfsi->dirty);
294 nfsi->ndirty++;
295 spin_unlock(req_lock);
296 __mark_inode_dirty(page->mapping->host, I_DIRTY_PAGES);
297 } else
298 spin_unlock(req_lock); 293 spin_unlock(req_lock);
294 BUG();
295 }
296 radix_tree_tag_set(&nfsi->nfs_page_tree, req->wb_index,
297 NFS_PAGE_TAG_WRITEBACK);
299 ret = test_bit(PG_NEED_FLUSH, &req->wb_flags); 298 ret = test_bit(PG_NEED_FLUSH, &req->wb_flags);
300 nfs_unlock_request(req); 299 spin_unlock(req_lock);
300 nfs_pageio_add_request(pgio, req);
301 return ret; 301 return ret;
302} 302}
303 303
@@ -306,6 +306,7 @@ static int nfs_page_mark_flush(struct page *page)
306 */ 306 */
307static int nfs_writepage_locked(struct page *page, struct writeback_control *wbc) 307static int nfs_writepage_locked(struct page *page, struct writeback_control *wbc)
308{ 308{
309 struct nfs_pageio_descriptor mypgio, *pgio;
309 struct nfs_open_context *ctx; 310 struct nfs_open_context *ctx;
310 struct inode *inode = page->mapping->host; 311 struct inode *inode = page->mapping->host;
311 unsigned offset; 312 unsigned offset;
@@ -314,7 +315,14 @@ static int nfs_writepage_locked(struct page *page, struct writeback_control *wbc
314 nfs_inc_stats(inode, NFSIOS_VFSWRITEPAGE); 315 nfs_inc_stats(inode, NFSIOS_VFSWRITEPAGE);
315 nfs_add_stats(inode, NFSIOS_WRITEPAGES, 1); 316 nfs_add_stats(inode, NFSIOS_WRITEPAGES, 1);
316 317
317 err = nfs_page_mark_flush(page); 318 if (wbc->for_writepages)
319 pgio = wbc->fs_private;
320 else {
321 nfs_pageio_init_write(&mypgio, inode, wb_priority(wbc));
322 pgio = &mypgio;
323 }
324
325 err = nfs_page_async_flush(pgio, page);
318 if (err <= 0) 326 if (err <= 0)
319 goto out; 327 goto out;
320 err = 0; 328 err = 0;
@@ -331,12 +339,12 @@ static int nfs_writepage_locked(struct page *page, struct writeback_control *wbc
331 put_nfs_open_context(ctx); 339 put_nfs_open_context(ctx);
332 if (err != 0) 340 if (err != 0)
333 goto out; 341 goto out;
334 err = nfs_page_mark_flush(page); 342 err = nfs_page_async_flush(pgio, page);
335 if (err > 0) 343 if (err > 0)
336 err = 0; 344 err = 0;
337out: 345out:
338 if (!wbc->for_writepages) 346 if (!wbc->for_writepages)
339 nfs_flush_mapping(page->mapping, wbc, FLUSH_STABLE|wb_priority(wbc)); 347 nfs_pageio_complete(pgio);
340 return err; 348 return err;
341} 349}
342 350
@@ -352,20 +360,20 @@ int nfs_writepage(struct page *page, struct writeback_control *wbc)
352int nfs_writepages(struct address_space *mapping, struct writeback_control *wbc) 360int nfs_writepages(struct address_space *mapping, struct writeback_control *wbc)
353{ 361{
354 struct inode *inode = mapping->host; 362 struct inode *inode = mapping->host;
363 struct nfs_pageio_descriptor pgio;
355 int err; 364 int err;
356 365
357 nfs_inc_stats(inode, NFSIOS_VFSWRITEPAGES); 366 nfs_inc_stats(inode, NFSIOS_VFSWRITEPAGES);
358 367
368 nfs_pageio_init_write(&pgio, inode, wb_priority(wbc));
369 wbc->fs_private = &pgio;
359 err = generic_writepages(mapping, wbc); 370 err = generic_writepages(mapping, wbc);
371 nfs_pageio_complete(&pgio);
360 if (err) 372 if (err)
361 return err; 373 return err;
362 err = nfs_flush_mapping(mapping, wbc, wb_priority(wbc)); 374 if (pgio.pg_error)
363 if (err < 0) 375 return pgio.pg_error;
364 goto out; 376 return 0;
365 nfs_add_stats(inode, NFSIOS_WRITEPAGES, err);
366 err = 0;
367out:
368 return err;
369} 377}
370 378
371/* 379/*
@@ -503,11 +511,11 @@ int nfs_reschedule_unstable_write(struct nfs_page *req)
503 * 511 *
504 * Interruptible by signals only if mounted with intr flag. 512 * Interruptible by signals only if mounted with intr flag.
505 */ 513 */
506static int nfs_wait_on_requests_locked(struct inode *inode, unsigned long idx_start, unsigned int npages) 514static int nfs_wait_on_requests_locked(struct inode *inode, pgoff_t idx_start, unsigned int npages)
507{ 515{
508 struct nfs_inode *nfsi = NFS_I(inode); 516 struct nfs_inode *nfsi = NFS_I(inode);
509 struct nfs_page *req; 517 struct nfs_page *req;
510 unsigned long idx_end, next; 518 pgoff_t idx_end, next;
511 unsigned int res = 0; 519 unsigned int res = 0;
512 int error; 520 int error;
513 521
@@ -536,18 +544,6 @@ static int nfs_wait_on_requests_locked(struct inode *inode, unsigned long idx_st
536 return res; 544 return res;
537} 545}
538 546
539static void nfs_cancel_dirty_list(struct list_head *head)
540{
541 struct nfs_page *req;
542 while(!list_empty(head)) {
543 req = nfs_list_entry(head->next);
544 nfs_list_remove_request(req);
545 nfs_end_page_writeback(req->wb_page);
546 nfs_inode_remove_request(req);
547 nfs_clear_page_writeback(req);
548 }
549}
550
551static void nfs_cancel_commit_list(struct list_head *head) 547static void nfs_cancel_commit_list(struct list_head *head)
552{ 548{
553 struct nfs_page *req; 549 struct nfs_page *req;
@@ -574,7 +570,7 @@ static void nfs_cancel_commit_list(struct list_head *head)
574 * The requests are *not* checked to ensure that they form a contiguous set. 570 * The requests are *not* checked to ensure that they form a contiguous set.
575 */ 571 */
576static int 572static int
577nfs_scan_commit(struct inode *inode, struct list_head *dst, unsigned long idx_start, unsigned int npages) 573nfs_scan_commit(struct inode *inode, struct list_head *dst, pgoff_t idx_start, unsigned int npages)
578{ 574{
579 struct nfs_inode *nfsi = NFS_I(inode); 575 struct nfs_inode *nfsi = NFS_I(inode);
580 int res = 0; 576 int res = 0;
@@ -588,40 +584,12 @@ nfs_scan_commit(struct inode *inode, struct list_head *dst, unsigned long idx_st
588 return res; 584 return res;
589} 585}
590#else 586#else
591static inline int nfs_scan_commit(struct inode *inode, struct list_head *dst, unsigned long idx_start, unsigned int npages) 587static inline int nfs_scan_commit(struct inode *inode, struct list_head *dst, pgoff_t idx_start, unsigned int npages)
592{ 588{
593 return 0; 589 return 0;
594} 590}
595#endif 591#endif
596 592
597static int nfs_wait_on_write_congestion(struct address_space *mapping)
598{
599 struct inode *inode = mapping->host;
600 struct backing_dev_info *bdi = mapping->backing_dev_info;
601 int ret = 0;
602
603 might_sleep();
604
605 if (!bdi_write_congested(bdi))
606 return 0;
607
608 nfs_inc_stats(inode, NFSIOS_CONGESTIONWAIT);
609
610 do {
611 struct rpc_clnt *clnt = NFS_CLIENT(inode);
612 sigset_t oldset;
613
614 rpc_clnt_sigmask(clnt, &oldset);
615 ret = congestion_wait_interruptible(WRITE, HZ/10);
616 rpc_clnt_sigunmask(clnt, &oldset);
617 if (ret == -ERESTARTSYS)
618 break;
619 ret = 0;
620 } while (bdi_write_congested(bdi));
621
622 return ret;
623}
624
625/* 593/*
626 * Try to update any existing write request, or create one if there is none. 594 * Try to update any existing write request, or create one if there is none.
627 * In order to match, the request's credentials must match those of 595 * In order to match, the request's credentials must match those of
@@ -636,12 +604,10 @@ static struct nfs_page * nfs_update_request(struct nfs_open_context* ctx,
636 struct inode *inode = mapping->host; 604 struct inode *inode = mapping->host;
637 struct nfs_inode *nfsi = NFS_I(inode); 605 struct nfs_inode *nfsi = NFS_I(inode);
638 struct nfs_page *req, *new = NULL; 606 struct nfs_page *req, *new = NULL;
639 unsigned long rqend, end; 607 pgoff_t rqend, end;
640 608
641 end = offset + bytes; 609 end = offset + bytes;
642 610
643 if (nfs_wait_on_write_congestion(mapping))
644 return ERR_PTR(-ERESTARTSYS);
645 for (;;) { 611 for (;;) {
646 /* Loop over all inode entries and see if we find 612 /* Loop over all inode entries and see if we find
647 * A request for the page we wish to update 613 * A request for the page we wish to update
@@ -865,7 +831,7 @@ static void nfs_execute_write(struct nfs_write_data *data)
865 * Generate multiple small requests to write out a single 831 * Generate multiple small requests to write out a single
866 * contiguous dirty area on one page. 832 * contiguous dirty area on one page.
867 */ 833 */
868static int nfs_flush_multi(struct inode *inode, struct list_head *head, int how) 834static int nfs_flush_multi(struct inode *inode, struct list_head *head, unsigned int npages, size_t count, int how)
869{ 835{
870 struct nfs_page *req = nfs_list_entry(head->next); 836 struct nfs_page *req = nfs_list_entry(head->next);
871 struct page *page = req->wb_page; 837 struct page *page = req->wb_page;
@@ -877,11 +843,11 @@ static int nfs_flush_multi(struct inode *inode, struct list_head *head, int how)
877 843
878 nfs_list_remove_request(req); 844 nfs_list_remove_request(req);
879 845
880 nbytes = req->wb_bytes; 846 nbytes = count;
881 do { 847 do {
882 size_t len = min(nbytes, wsize); 848 size_t len = min(nbytes, wsize);
883 849
884 data = nfs_writedata_alloc(len); 850 data = nfs_writedata_alloc(1);
885 if (!data) 851 if (!data)
886 goto out_bad; 852 goto out_bad;
887 list_add(&data->pages, &list); 853 list_add(&data->pages, &list);
@@ -892,23 +858,19 @@ static int nfs_flush_multi(struct inode *inode, struct list_head *head, int how)
892 858
893 ClearPageError(page); 859 ClearPageError(page);
894 offset = 0; 860 offset = 0;
895 nbytes = req->wb_bytes; 861 nbytes = count;
896 do { 862 do {
897 data = list_entry(list.next, struct nfs_write_data, pages); 863 data = list_entry(list.next, struct nfs_write_data, pages);
898 list_del_init(&data->pages); 864 list_del_init(&data->pages);
899 865
900 data->pagevec[0] = page; 866 data->pagevec[0] = page;
901 867
902 if (nbytes > wsize) { 868 if (nbytes < wsize)
903 nfs_write_rpcsetup(req, data, &nfs_write_partial_ops, 869 wsize = nbytes;
904 wsize, offset, how); 870 nfs_write_rpcsetup(req, data, &nfs_write_partial_ops,
905 offset += wsize; 871 wsize, offset, how);
906 nbytes -= wsize; 872 offset += wsize;
907 } else { 873 nbytes -= wsize;
908 nfs_write_rpcsetup(req, data, &nfs_write_partial_ops,
909 nbytes, offset, how);
910 nbytes = 0;
911 }
912 nfs_execute_write(data); 874 nfs_execute_write(data);
913 } while (nbytes != 0); 875 } while (nbytes != 0);
914 876
@@ -934,26 +896,23 @@ out_bad:
934 * This is the case if nfs_updatepage detects a conflicting request 896 * This is the case if nfs_updatepage detects a conflicting request
935 * that has been written but not committed. 897 * that has been written but not committed.
936 */ 898 */
937static int nfs_flush_one(struct inode *inode, struct list_head *head, int how) 899static int nfs_flush_one(struct inode *inode, struct list_head *head, unsigned int npages, size_t count, int how)
938{ 900{
939 struct nfs_page *req; 901 struct nfs_page *req;
940 struct page **pages; 902 struct page **pages;
941 struct nfs_write_data *data; 903 struct nfs_write_data *data;
942 unsigned int count;
943 904
944 data = nfs_writedata_alloc(NFS_SERVER(inode)->wsize); 905 data = nfs_writedata_alloc(npages);
945 if (!data) 906 if (!data)
946 goto out_bad; 907 goto out_bad;
947 908
948 pages = data->pagevec; 909 pages = data->pagevec;
949 count = 0;
950 while (!list_empty(head)) { 910 while (!list_empty(head)) {
951 req = nfs_list_entry(head->next); 911 req = nfs_list_entry(head->next);
952 nfs_list_remove_request(req); 912 nfs_list_remove_request(req);
953 nfs_list_add_request(req, &data->pages); 913 nfs_list_add_request(req, &data->pages);
954 ClearPageError(req->wb_page); 914 ClearPageError(req->wb_page);
955 *pages++ = req->wb_page; 915 *pages++ = req->wb_page;
956 count += req->wb_bytes;
957 } 916 }
958 req = nfs_list_entry(data->pages.next); 917 req = nfs_list_entry(data->pages.next);
959 918
@@ -973,40 +932,15 @@ static int nfs_flush_one(struct inode *inode, struct list_head *head, int how)
973 return -ENOMEM; 932 return -ENOMEM;
974} 933}
975 934
976static int nfs_flush_list(struct inode *inode, struct list_head *head, int npages, int how) 935static void nfs_pageio_init_write(struct nfs_pageio_descriptor *pgio,
936 struct inode *inode, int ioflags)
977{ 937{
978 LIST_HEAD(one_request);
979 int (*flush_one)(struct inode *, struct list_head *, int);
980 struct nfs_page *req;
981 int wpages = NFS_SERVER(inode)->wpages;
982 int wsize = NFS_SERVER(inode)->wsize; 938 int wsize = NFS_SERVER(inode)->wsize;
983 int error;
984 939
985 flush_one = nfs_flush_one;
986 if (wsize < PAGE_CACHE_SIZE) 940 if (wsize < PAGE_CACHE_SIZE)
987 flush_one = nfs_flush_multi; 941 nfs_pageio_init(pgio, inode, nfs_flush_multi, wsize, ioflags);
988 /* For single writes, FLUSH_STABLE is more efficient */ 942 else
989 if (npages <= wpages && npages == NFS_I(inode)->npages 943 nfs_pageio_init(pgio, inode, nfs_flush_one, wsize, ioflags);
990 && nfs_list_entry(head->next)->wb_bytes <= wsize)
991 how |= FLUSH_STABLE;
992
993 do {
994 nfs_coalesce_requests(head, &one_request, wpages);
995 req = nfs_list_entry(one_request.next);
996 error = flush_one(inode, &one_request, how);
997 if (error < 0)
998 goto out_err;
999 } while (!list_empty(head));
1000 return 0;
1001out_err:
1002 while (!list_empty(head)) {
1003 req = nfs_list_entry(head->next);
1004 nfs_list_remove_request(req);
1005 nfs_redirty_request(req);
1006 nfs_end_page_writeback(req->wb_page);
1007 nfs_clear_page_writeback(req);
1008 }
1009 return error;
1010} 944}
1011 945
1012/* 946/*
@@ -1330,31 +1264,7 @@ static const struct rpc_call_ops nfs_commit_ops = {
1330 .rpc_call_done = nfs_commit_done, 1264 .rpc_call_done = nfs_commit_done,
1331 .rpc_release = nfs_commit_release, 1265 .rpc_release = nfs_commit_release,
1332}; 1266};
1333#else
1334static inline int nfs_commit_list(struct inode *inode, struct list_head *head, int how)
1335{
1336 return 0;
1337}
1338#endif
1339
1340static long nfs_flush_mapping(struct address_space *mapping, struct writeback_control *wbc, int how)
1341{
1342 struct nfs_inode *nfsi = NFS_I(mapping->host);
1343 LIST_HEAD(head);
1344 long res;
1345
1346 spin_lock(&nfsi->req_lock);
1347 res = nfs_scan_dirty(mapping, wbc, &head);
1348 spin_unlock(&nfsi->req_lock);
1349 if (res) {
1350 int error = nfs_flush_list(mapping->host, &head, res, how);
1351 if (error < 0)
1352 return error;
1353 }
1354 return res;
1355}
1356 1267
1357#if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
1358int nfs_commit_inode(struct inode *inode, int how) 1268int nfs_commit_inode(struct inode *inode, int how)
1359{ 1269{
1360 struct nfs_inode *nfsi = NFS_I(inode); 1270 struct nfs_inode *nfsi = NFS_I(inode);
@@ -1371,13 +1281,18 @@ int nfs_commit_inode(struct inode *inode, int how)
1371 } 1281 }
1372 return res; 1282 return res;
1373} 1283}
1284#else
1285static inline int nfs_commit_list(struct inode *inode, struct list_head *head, int how)
1286{
1287 return 0;
1288}
1374#endif 1289#endif
1375 1290
1376long nfs_sync_mapping_wait(struct address_space *mapping, struct writeback_control *wbc, int how) 1291long nfs_sync_mapping_wait(struct address_space *mapping, struct writeback_control *wbc, int how)
1377{ 1292{
1378 struct inode *inode = mapping->host; 1293 struct inode *inode = mapping->host;
1379 struct nfs_inode *nfsi = NFS_I(inode); 1294 struct nfs_inode *nfsi = NFS_I(inode);
1380 unsigned long idx_start, idx_end; 1295 pgoff_t idx_start, idx_end;
1381 unsigned int npages = 0; 1296 unsigned int npages = 0;
1382 LIST_HEAD(head); 1297 LIST_HEAD(head);
1383 int nocommit = how & FLUSH_NOCOMMIT; 1298 int nocommit = how & FLUSH_NOCOMMIT;
@@ -1390,41 +1305,24 @@ long nfs_sync_mapping_wait(struct address_space *mapping, struct writeback_contr
1390 idx_start = wbc->range_start >> PAGE_CACHE_SHIFT; 1305 idx_start = wbc->range_start >> PAGE_CACHE_SHIFT;
1391 idx_end = wbc->range_end >> PAGE_CACHE_SHIFT; 1306 idx_end = wbc->range_end >> PAGE_CACHE_SHIFT;
1392 if (idx_end > idx_start) { 1307 if (idx_end > idx_start) {
1393 unsigned long l_npages = 1 + idx_end - idx_start; 1308 pgoff_t l_npages = 1 + idx_end - idx_start;
1394 npages = l_npages; 1309 npages = l_npages;
1395 if (sizeof(npages) != sizeof(l_npages) && 1310 if (sizeof(npages) != sizeof(l_npages) &&
1396 (unsigned long)npages != l_npages) 1311 (pgoff_t)npages != l_npages)
1397 npages = 0; 1312 npages = 0;
1398 } 1313 }
1399 } 1314 }
1400 how &= ~FLUSH_NOCOMMIT; 1315 how &= ~FLUSH_NOCOMMIT;
1401 spin_lock(&nfsi->req_lock); 1316 spin_lock(&nfsi->req_lock);
1402 do { 1317 do {
1403 wbc->pages_skipped = 0;
1404 ret = nfs_wait_on_requests_locked(inode, idx_start, npages); 1318 ret = nfs_wait_on_requests_locked(inode, idx_start, npages);
1405 if (ret != 0) 1319 if (ret != 0)
1406 continue; 1320 continue;
1407 pages = nfs_scan_dirty(mapping, wbc, &head);
1408 if (pages != 0) {
1409 spin_unlock(&nfsi->req_lock);
1410 if (how & FLUSH_INVALIDATE) {
1411 nfs_cancel_dirty_list(&head);
1412 ret = pages;
1413 } else
1414 ret = nfs_flush_list(inode, &head, pages, how);
1415 spin_lock(&nfsi->req_lock);
1416 continue;
1417 }
1418 if (wbc->pages_skipped != 0)
1419 continue;
1420 if (nocommit) 1321 if (nocommit)
1421 break; 1322 break;
1422 pages = nfs_scan_commit(inode, &head, idx_start, npages); 1323 pages = nfs_scan_commit(inode, &head, idx_start, npages);
1423 if (pages == 0) { 1324 if (pages == 0)
1424 if (wbc->pages_skipped != 0)
1425 continue;
1426 break; 1325 break;
1427 }
1428 if (how & FLUSH_INVALIDATE) { 1326 if (how & FLUSH_INVALIDATE) {
1429 spin_unlock(&nfsi->req_lock); 1327 spin_unlock(&nfsi->req_lock);
1430 nfs_cancel_commit_list(&head); 1328 nfs_cancel_commit_list(&head);
@@ -1456,7 +1354,7 @@ int nfs_wb_all(struct inode *inode)
1456 }; 1354 };
1457 int ret; 1355 int ret;
1458 1356
1459 ret = generic_writepages(mapping, &wbc); 1357 ret = nfs_writepages(mapping, &wbc);
1460 if (ret < 0) 1358 if (ret < 0)
1461 goto out; 1359 goto out;
1462 ret = nfs_sync_mapping_wait(mapping, &wbc, 0); 1360 ret = nfs_sync_mapping_wait(mapping, &wbc, 0);
@@ -1479,11 +1377,9 @@ int nfs_sync_mapping_range(struct address_space *mapping, loff_t range_start, lo
1479 }; 1377 };
1480 int ret; 1378 int ret;
1481 1379
1482 if (!(how & FLUSH_NOWRITEPAGE)) { 1380 ret = nfs_writepages(mapping, &wbc);
1483 ret = generic_writepages(mapping, &wbc); 1381 if (ret < 0)
1484 if (ret < 0) 1382 goto out;
1485 goto out;
1486 }
1487 ret = nfs_sync_mapping_wait(mapping, &wbc, how); 1383 ret = nfs_sync_mapping_wait(mapping, &wbc, how);
1488 if (ret >= 0) 1384 if (ret >= 0)
1489 return 0; 1385 return 0;
@@ -1506,7 +1402,7 @@ int nfs_wb_page_priority(struct inode *inode, struct page *page, int how)
1506 int ret; 1402 int ret;
1507 1403
1508 BUG_ON(!PageLocked(page)); 1404 BUG_ON(!PageLocked(page));
1509 if (!(how & FLUSH_NOWRITEPAGE) && clear_page_dirty_for_io(page)) { 1405 if (clear_page_dirty_for_io(page)) {
1510 ret = nfs_writepage_locked(page, &wbc); 1406 ret = nfs_writepage_locked(page, &wbc);
1511 if (ret < 0) 1407 if (ret < 0)
1512 goto out; 1408 goto out;
@@ -1531,10 +1427,18 @@ int nfs_wb_page(struct inode *inode, struct page* page)
1531 1427
1532int nfs_set_page_dirty(struct page *page) 1428int nfs_set_page_dirty(struct page *page)
1533{ 1429{
1534 spinlock_t *req_lock = &NFS_I(page->mapping->host)->req_lock; 1430 struct address_space *mapping = page->mapping;
1431 struct inode *inode;
1432 spinlock_t *req_lock;
1535 struct nfs_page *req; 1433 struct nfs_page *req;
1536 int ret; 1434 int ret;
1537 1435
1436 if (!mapping)
1437 goto out_raced;
1438 inode = mapping->host;
1439 if (!inode)
1440 goto out_raced;
1441 req_lock = &NFS_I(inode)->req_lock;
1538 spin_lock(req_lock); 1442 spin_lock(req_lock);
1539 req = nfs_page_find_request_locked(page); 1443 req = nfs_page_find_request_locked(page);
1540 if (req != NULL) { 1444 if (req != NULL) {
@@ -1547,6 +1451,8 @@ int nfs_set_page_dirty(struct page *page)
1547 ret = __set_page_dirty_nobuffers(page); 1451 ret = __set_page_dirty_nobuffers(page);
1548 spin_unlock(req_lock); 1452 spin_unlock(req_lock);
1549 return ret; 1453 return ret;
1454out_raced:
1455 return !TestSetPageDirty(page);
1550} 1456}
1551 1457
1552 1458
diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index fb14d68eacab..32ffea033c7a 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -315,16 +315,13 @@ out:
315/* 315/*
316 * RPC procedure tables 316 * RPC procedure tables
317 */ 317 */
318#ifndef MAX
319# define MAX(a, b) (((a) > (b))? (a) : (b))
320#endif
321
322#define PROC(proc, call, argtype, restype) \ 318#define PROC(proc, call, argtype, restype) \
323[NFSPROC4_CLNT_##proc] = { \ 319[NFSPROC4_CLNT_##proc] = { \
324 .p_proc = NFSPROC4_CB_##call, \ 320 .p_proc = NFSPROC4_CB_##call, \
325 .p_encode = (kxdrproc_t) nfs4_xdr_##argtype, \ 321 .p_encode = (kxdrproc_t) nfs4_xdr_##argtype, \
326 .p_decode = (kxdrproc_t) nfs4_xdr_##restype, \ 322 .p_decode = (kxdrproc_t) nfs4_xdr_##restype, \
327 .p_bufsiz = MAX(NFS4_##argtype##_sz,NFS4_##restype##_sz) << 2, \ 323 .p_arglen = NFS4_##argtype##_sz, \
324 .p_replen = NFS4_##restype##_sz, \
328 .p_statidx = NFSPROC4_CB_##call, \ 325 .p_statidx = NFSPROC4_CB_##call, \
329 .p_name = #proc, \ 326 .p_name = #proc, \
330} 327}
diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h
index ac25b5649c59..f6a81e0b1b93 100644
--- a/include/linux/lockd/lockd.h
+++ b/include/linux/lockd/lockd.h
@@ -88,7 +88,7 @@ struct nlm_wait;
88/* 88/*
89 * Memory chunk for NLM client RPC request. 89 * Memory chunk for NLM client RPC request.
90 */ 90 */
91#define NLMCLNT_OHSIZE (sizeof(utsname()->nodename)+10) 91#define NLMCLNT_OHSIZE ((__NEW_UTS_LEN) + 10u)
92struct nlm_rqst { 92struct nlm_rqst {
93 unsigned int a_flags; /* initial RPC task flags */ 93 unsigned int a_flags; /* initial RPC task flags */
94 struct nlm_host * a_host; /* host handle */ 94 struct nlm_host * a_host; /* host handle */
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index e9ae0c6e2c62..0543439a97af 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -455,7 +455,7 @@ nfs_have_writebacks(struct inode *inode)
455/* 455/*
456 * Allocate nfs_write_data structures 456 * Allocate nfs_write_data structures
457 */ 457 */
458extern struct nfs_write_data *nfs_writedata_alloc(size_t len); 458extern struct nfs_write_data *nfs_writedata_alloc(unsigned int npages);
459 459
460/* 460/*
461 * linux/fs/nfs/read.c 461 * linux/fs/nfs/read.c
@@ -469,7 +469,7 @@ extern void nfs_readdata_release(void *data);
469/* 469/*
470 * Allocate nfs_read_data structures 470 * Allocate nfs_read_data structures
471 */ 471 */
472extern struct nfs_read_data *nfs_readdata_alloc(size_t len); 472extern struct nfs_read_data *nfs_readdata_alloc(unsigned int npages);
473 473
474/* 474/*
475 * linux/fs/nfs3proc.c 475 * linux/fs/nfs3proc.c
diff --git a/include/linux/nfs_mount.h b/include/linux/nfs_mount.h
index 659c75438454..cc8b9c59acb8 100644
--- a/include/linux/nfs_mount.h
+++ b/include/linux/nfs_mount.h
@@ -61,6 +61,7 @@ struct nfs_mount_data {
61#define NFS_MOUNT_NOACL 0x0800 /* 4 */ 61#define NFS_MOUNT_NOACL 0x0800 /* 4 */
62#define NFS_MOUNT_STRICTLOCK 0x1000 /* reserved for NFSv4 */ 62#define NFS_MOUNT_STRICTLOCK 0x1000 /* reserved for NFSv4 */
63#define NFS_MOUNT_SECFLAVOUR 0x2000 /* 5 */ 63#define NFS_MOUNT_SECFLAVOUR 0x2000 /* 5 */
64#define NFS_MOUNT_NORDIRPLUS 0x4000 /* 5 */
64#define NFS_MOUNT_FLAGMASK 0xFFFF 65#define NFS_MOUNT_FLAGMASK 0xFFFF
65 66
66#endif 67#endif
diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h
index 16b0266b14fd..41afab6b5f09 100644
--- a/include/linux/nfs_page.h
+++ b/include/linux/nfs_page.h
@@ -21,8 +21,7 @@
21/* 21/*
22 * Valid flags for the radix tree 22 * Valid flags for the radix tree
23 */ 23 */
24#define NFS_PAGE_TAG_DIRTY 0 24#define NFS_PAGE_TAG_WRITEBACK 0
25#define NFS_PAGE_TAG_WRITEBACK 1
26 25
27/* 26/*
28 * Valid flags for a dirty buffer 27 * Valid flags for a dirty buffer
@@ -39,7 +38,7 @@ struct nfs_page {
39 struct page *wb_page; /* page to read in/write out */ 38 struct page *wb_page; /* page to read in/write out */
40 struct nfs_open_context *wb_context; /* File state context info */ 39 struct nfs_open_context *wb_context; /* File state context info */
41 atomic_t wb_complete; /* i/os we're waiting for */ 40 atomic_t wb_complete; /* i/os we're waiting for */
42 unsigned long wb_index; /* Offset >> PAGE_CACHE_SHIFT */ 41 pgoff_t wb_index; /* Offset >> PAGE_CACHE_SHIFT */
43 unsigned int wb_offset, /* Offset & ~PAGE_CACHE_MASK */ 42 unsigned int wb_offset, /* Offset & ~PAGE_CACHE_MASK */
44 wb_pgbase, /* Start of page data */ 43 wb_pgbase, /* Start of page data */
45 wb_bytes; /* Length of request */ 44 wb_bytes; /* Length of request */
@@ -48,6 +47,19 @@ struct nfs_page {
48 struct nfs_writeverf wb_verf; /* Commit cookie */ 47 struct nfs_writeverf wb_verf; /* Commit cookie */
49}; 48};
50 49
50struct nfs_pageio_descriptor {
51 struct list_head pg_list;
52 unsigned long pg_bytes_written;
53 size_t pg_count;
54 size_t pg_bsize;
55 unsigned int pg_base;
56
57 struct inode *pg_inode;
58 int (*pg_doio)(struct inode *, struct list_head *, unsigned int, size_t, int);
59 int pg_ioflags;
60 int pg_error;
61};
62
51#define NFS_WBACK_BUSY(req) (test_bit(PG_BUSY,&(req)->wb_flags)) 63#define NFS_WBACK_BUSY(req) (test_bit(PG_BUSY,&(req)->wb_flags))
52 64
53extern struct nfs_page *nfs_create_request(struct nfs_open_context *ctx, 65extern struct nfs_page *nfs_create_request(struct nfs_open_context *ctx,
@@ -59,13 +71,16 @@ extern void nfs_clear_request(struct nfs_page *req);
59extern void nfs_release_request(struct nfs_page *req); 71extern void nfs_release_request(struct nfs_page *req);
60 72
61 73
62extern long nfs_scan_dirty(struct address_space *mapping,
63 struct writeback_control *wbc,
64 struct list_head *dst);
65extern int nfs_scan_list(struct nfs_inode *nfsi, struct list_head *head, struct list_head *dst, 74extern int nfs_scan_list(struct nfs_inode *nfsi, struct list_head *head, struct list_head *dst,
66 unsigned long idx_start, unsigned int npages); 75 pgoff_t idx_start, unsigned int npages);
67extern int nfs_coalesce_requests(struct list_head *, struct list_head *, 76extern void nfs_pageio_init(struct nfs_pageio_descriptor *desc,
68 unsigned int); 77 struct inode *inode,
78 int (*doio)(struct inode *, struct list_head *, unsigned int, size_t, int),
79 size_t bsize,
80 int how);
81extern int nfs_pageio_add_request(struct nfs_pageio_descriptor *,
82 struct nfs_page *);
83extern void nfs_pageio_complete(struct nfs_pageio_descriptor *desc);
69extern int nfs_wait_on_request(struct nfs_page *); 84extern int nfs_wait_on_request(struct nfs_page *);
70extern void nfs_unlock_request(struct nfs_page *req); 85extern void nfs_unlock_request(struct nfs_page *req);
71extern int nfs_set_page_writeback_locked(struct nfs_page *req); 86extern int nfs_set_page_writeback_locked(struct nfs_page *req);
diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h
index c7a78eef2b4f..66611423c8ee 100644
--- a/include/linux/sunrpc/clnt.h
+++ b/include/linux/sunrpc/clnt.h
@@ -84,7 +84,8 @@ struct rpc_procinfo {
84 u32 p_proc; /* RPC procedure number */ 84 u32 p_proc; /* RPC procedure number */
85 kxdrproc_t p_encode; /* XDR encode function */ 85 kxdrproc_t p_encode; /* XDR encode function */
86 kxdrproc_t p_decode; /* XDR decode function */ 86 kxdrproc_t p_decode; /* XDR decode function */
87 unsigned int p_bufsiz; /* req. buffer size */ 87 unsigned int p_arglen; /* argument hdr length (u32) */
88 unsigned int p_replen; /* reply hdr length (u32) */
88 unsigned int p_count; /* call count */ 89 unsigned int p_count; /* call count */
89 unsigned int p_timer; /* Which RTT timer to use */ 90 unsigned int p_timer; /* Which RTT timer to use */
90 u32 p_statidx; /* Which procedure to account */ 91 u32 p_statidx; /* Which procedure to account */
@@ -121,8 +122,8 @@ struct rpc_clnt *rpc_clone_client(struct rpc_clnt *);
121int rpc_shutdown_client(struct rpc_clnt *); 122int rpc_shutdown_client(struct rpc_clnt *);
122int rpc_destroy_client(struct rpc_clnt *); 123int rpc_destroy_client(struct rpc_clnt *);
123void rpc_release_client(struct rpc_clnt *); 124void rpc_release_client(struct rpc_clnt *);
124void rpc_getport(struct rpc_task *); 125int rpcb_register(u32, u32, int, unsigned short, int *);
125int rpc_register(u32, u32, int, unsigned short, int *); 126void rpcb_getport(struct rpc_task *);
126 127
127void rpc_call_setup(struct rpc_task *, struct rpc_message *, int); 128void rpc_call_setup(struct rpc_task *, struct rpc_message *, int);
128 129
@@ -144,7 +145,7 @@ char * rpc_peeraddr2str(struct rpc_clnt *, enum rpc_display_format_t);
144/* 145/*
145 * Helper function for NFSroot support 146 * Helper function for NFSroot support
146 */ 147 */
147int rpc_getport_external(struct sockaddr_in *, __u32, __u32, int); 148int rpcb_getport_external(struct sockaddr_in *, __u32, __u32, int);
148 149
149#endif /* __KERNEL__ */ 150#endif /* __KERNEL__ */
150#endif /* _LINUX_SUNRPC_CLNT_H */ 151#endif /* _LINUX_SUNRPC_CLNT_H */
diff --git a/include/linux/sunrpc/debug.h b/include/linux/sunrpc/debug.h
index b7c7307ceec6..3912cf16361e 100644
--- a/include/linux/sunrpc/debug.h
+++ b/include/linux/sunrpc/debug.h
@@ -17,7 +17,7 @@
17#define RPCDBG_DEBUG 0x0004 17#define RPCDBG_DEBUG 0x0004
18#define RPCDBG_NFS 0x0008 18#define RPCDBG_NFS 0x0008
19#define RPCDBG_AUTH 0x0010 19#define RPCDBG_AUTH 0x0010
20#define RPCDBG_PMAP 0x0020 20#define RPCDBG_BIND 0x0020
21#define RPCDBG_SCHED 0x0040 21#define RPCDBG_SCHED 0x0040
22#define RPCDBG_TRANS 0x0080 22#define RPCDBG_TRANS 0x0080
23#define RPCDBG_SVCSOCK 0x0100 23#define RPCDBG_SVCSOCK 0x0100
diff --git a/include/linux/sunrpc/msg_prot.h b/include/linux/sunrpc/msg_prot.h
index 606cb2165232..784d4c3ef651 100644
--- a/include/linux/sunrpc/msg_prot.h
+++ b/include/linux/sunrpc/msg_prot.h
@@ -78,10 +78,6 @@ enum rpc_auth_stat {
78 RPCSEC_GSS_CTXPROBLEM = 14 78 RPCSEC_GSS_CTXPROBLEM = 14
79}; 79};
80 80
81#define RPC_PMAP_PROGRAM 100000
82#define RPC_PMAP_VERSION 2
83#define RPC_PMAP_PORT 111
84
85#define RPC_MAXNETNAMELEN 256 81#define RPC_MAXNETNAMELEN 256
86 82
87/* 83/*
diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h
index 3069ecca0129..2047fb202a13 100644
--- a/include/linux/sunrpc/sched.h
+++ b/include/linux/sunrpc/sched.h
@@ -264,7 +264,7 @@ struct rpc_task *rpc_wake_up_next(struct rpc_wait_queue *);
264void rpc_wake_up_status(struct rpc_wait_queue *, int); 264void rpc_wake_up_status(struct rpc_wait_queue *, int);
265void rpc_delay(struct rpc_task *, unsigned long); 265void rpc_delay(struct rpc_task *, unsigned long);
266void * rpc_malloc(struct rpc_task *, size_t); 266void * rpc_malloc(struct rpc_task *, size_t);
267void rpc_free(struct rpc_task *); 267void rpc_free(void *);
268int rpciod_up(void); 268int rpciod_up(void);
269void rpciod_down(void); 269void rpciod_down(void);
270int __rpc_wait_for_completion_task(struct rpc_task *task, int (*)(void *)); 270int __rpc_wait_for_completion_task(struct rpc_task *task, int (*)(void *));
diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h
index f780e72fc417..fa89ce6ce076 100644
--- a/include/linux/sunrpc/xprt.h
+++ b/include/linux/sunrpc/xprt.h
@@ -84,7 +84,9 @@ struct rpc_rqst {
84 struct list_head rq_list; 84 struct list_head rq_list;
85 85
86 __u32 * rq_buffer; /* XDR encode buffer */ 86 __u32 * rq_buffer; /* XDR encode buffer */
87 size_t rq_bufsize; 87 size_t rq_bufsize,
88 rq_callsize,
89 rq_rcvsize;
88 90
89 struct xdr_buf rq_private_buf; /* The receive buffer 91 struct xdr_buf rq_private_buf; /* The receive buffer
90 * used in the softirq. 92 * used in the softirq.
@@ -112,7 +114,7 @@ struct rpc_xprt_ops {
112 void (*set_port)(struct rpc_xprt *xprt, unsigned short port); 114 void (*set_port)(struct rpc_xprt *xprt, unsigned short port);
113 void (*connect)(struct rpc_task *task); 115 void (*connect)(struct rpc_task *task);
114 void * (*buf_alloc)(struct rpc_task *task, size_t size); 116 void * (*buf_alloc)(struct rpc_task *task, size_t size);
115 void (*buf_free)(struct rpc_task *task); 117 void (*buf_free)(void *buffer);
116 int (*send_request)(struct rpc_task *task); 118 int (*send_request)(struct rpc_task *task);
117 void (*set_retrans_timeout)(struct rpc_task *task); 119 void (*set_retrans_timeout)(struct rpc_task *task);
118 void (*timer)(struct rpc_task *task); 120 void (*timer)(struct rpc_task *task);
@@ -150,6 +152,7 @@ struct rpc_xprt {
150 unsigned long state; /* transport state */ 152 unsigned long state; /* transport state */
151 unsigned char shutdown : 1, /* being shut down */ 153 unsigned char shutdown : 1, /* being shut down */
152 resvport : 1; /* use a reserved port */ 154 resvport : 1; /* use a reserved port */
155 unsigned int bind_index; /* bind function index */
153 156
154 /* 157 /*
155 * Connection of transports 158 * Connection of transports
diff --git a/include/linux/writeback.h b/include/linux/writeback.h
index 0c78f7f4a976..daa6c125f66e 100644
--- a/include/linux/writeback.h
+++ b/include/linux/writeback.h
@@ -59,6 +59,8 @@ struct writeback_control {
59 unsigned for_reclaim:1; /* Invoked from the page allocator */ 59 unsigned for_reclaim:1; /* Invoked from the page allocator */
60 unsigned for_writepages:1; /* This is a writepages() call */ 60 unsigned for_writepages:1; /* This is a writepages() call */
61 unsigned range_cyclic:1; /* range_start is cyclic */ 61 unsigned range_cyclic:1; /* range_start is cyclic */
62
63 void *fs_private; /* For use by ->writepages() */
62}; 64};
63 65
64/* 66/*
diff --git a/net/sunrpc/Makefile b/net/sunrpc/Makefile
index cdcab9ca4c60..8ebfc4db7f51 100644
--- a/net/sunrpc/Makefile
+++ b/net/sunrpc/Makefile
@@ -9,7 +9,7 @@ obj-$(CONFIG_SUNRPC_GSS) += auth_gss/
9sunrpc-y := clnt.o xprt.o socklib.o xprtsock.o sched.o \ 9sunrpc-y := clnt.o xprt.o socklib.o xprtsock.o sched.o \
10 auth.o auth_null.o auth_unix.o \ 10 auth.o auth_null.o auth_unix.o \
11 svc.o svcsock.o svcauth.o svcauth_unix.o \ 11 svc.o svcsock.o svcauth.o svcauth_unix.o \
12 pmap_clnt.o timer.o xdr.o \ 12 rpcb_clnt.o timer.o xdr.o \
13 sunrpc_syms.o cache.o rpc_pipe.o 13 sunrpc_syms.o cache.o rpc_pipe.o
14sunrpc-$(CONFIG_PROC_FS) += stats.o 14sunrpc-$(CONFIG_PROC_FS) += stats.o
15sunrpc-$(CONFIG_SYSCTL) += sysctl.o 15sunrpc-$(CONFIG_SYSCTL) += sysctl.o
diff --git a/net/sunrpc/auth_gss/gss_spkm3_seal.c b/net/sunrpc/auth_gss/gss_spkm3_seal.c
index 104cbf4f769f..d158635de6c0 100644
--- a/net/sunrpc/auth_gss/gss_spkm3_seal.c
+++ b/net/sunrpc/auth_gss/gss_spkm3_seal.c
@@ -123,9 +123,6 @@ spkm3_make_token(struct spkm3_ctx *ctx,
123 123
124 return GSS_S_COMPLETE; 124 return GSS_S_COMPLETE;
125out_err: 125out_err:
126 if (md5cksum.data)
127 kfree(md5cksum.data);
128
129 token->data = NULL; 126 token->data = NULL;
130 token->len = 0; 127 token->len = 0;
131 return GSS_S_FAILURE; 128 return GSS_S_FAILURE;
@@ -152,7 +149,7 @@ make_spkm3_checksum(s32 cksumtype, struct xdr_netobj *key, char *header,
152 149
153 switch (cksumtype) { 150 switch (cksumtype) {
154 case CKSUMTYPE_HMAC_MD5: 151 case CKSUMTYPE_HMAC_MD5:
155 cksumname = "md5"; 152 cksumname = "hmac(md5)";
156 break; 153 break;
157 default: 154 default:
158 dprintk("RPC: spkm3_make_checksum:" 155 dprintk("RPC: spkm3_make_checksum:"
@@ -172,8 +169,12 @@ make_spkm3_checksum(s32 cksumtype, struct xdr_netobj *key, char *header,
172 if (err) 169 if (err)
173 goto out; 170 goto out;
174 171
172 err = crypto_hash_init(&desc);
173 if (err)
174 goto out;
175
175 sg_set_buf(sg, header, hdrlen); 176 sg_set_buf(sg, header, hdrlen);
176 crypto_hash_update(&desc, sg, 1); 177 crypto_hash_update(&desc, sg, sg->length);
177 178
178 xdr_process_buf(body, body_offset, body->len - body_offset, 179 xdr_process_buf(body, body_offset, body->len - body_offset,
179 spkm3_checksummer, &desc); 180 spkm3_checksummer, &desc);
@@ -184,5 +185,3 @@ out:
184 185
185 return err ? GSS_S_FAILURE : 0; 186 return err ? GSS_S_FAILURE : 0;
186} 187}
187
188EXPORT_SYMBOL(make_spkm3_checksum);
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index 396cdbe249d1..d8fbee40a19c 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -36,8 +36,6 @@
36#include <linux/sunrpc/metrics.h> 36#include <linux/sunrpc/metrics.h>
37 37
38 38
39#define RPC_SLACK_SPACE (1024) /* total overkill */
40
41#ifdef RPC_DEBUG 39#ifdef RPC_DEBUG
42# define RPCDBG_FACILITY RPCDBG_CALL 40# define RPCDBG_FACILITY RPCDBG_CALL
43#endif 41#endif
@@ -747,21 +745,38 @@ call_reserveresult(struct rpc_task *task)
747static void 745static void
748call_allocate(struct rpc_task *task) 746call_allocate(struct rpc_task *task)
749{ 747{
748 unsigned int slack = task->tk_auth->au_cslack;
750 struct rpc_rqst *req = task->tk_rqstp; 749 struct rpc_rqst *req = task->tk_rqstp;
751 struct rpc_xprt *xprt = task->tk_xprt; 750 struct rpc_xprt *xprt = task->tk_xprt;
752 unsigned int bufsiz; 751 struct rpc_procinfo *proc = task->tk_msg.rpc_proc;
753 752
754 dprint_status(task); 753 dprint_status(task);
755 754
755 task->tk_status = 0;
756 task->tk_action = call_bind; 756 task->tk_action = call_bind;
757
757 if (req->rq_buffer) 758 if (req->rq_buffer)
758 return; 759 return;
759 760
760 /* FIXME: compute buffer requirements more exactly using 761 if (proc->p_proc != 0) {
761 * auth->au_wslack */ 762 BUG_ON(proc->p_arglen == 0);
762 bufsiz = task->tk_msg.rpc_proc->p_bufsiz + RPC_SLACK_SPACE; 763 if (proc->p_decode != NULL)
764 BUG_ON(proc->p_replen == 0);
765 }
763 766
764 if (xprt->ops->buf_alloc(task, bufsiz << 1) != NULL) 767 /*
768 * Calculate the size (in quads) of the RPC call
769 * and reply headers, and convert both values
770 * to byte sizes.
771 */
772 req->rq_callsize = RPC_CALLHDRSIZE + (slack << 1) + proc->p_arglen;
773 req->rq_callsize <<= 2;
774 req->rq_rcvsize = RPC_REPHDRSIZE + slack + proc->p_replen;
775 req->rq_rcvsize <<= 2;
776
777 req->rq_buffer = xprt->ops->buf_alloc(task,
778 req->rq_callsize + req->rq_rcvsize);
779 if (req->rq_buffer != NULL)
765 return; 780 return;
766 781
767 dprintk("RPC: %5u rpc_buffer allocation failed\n", task->tk_pid); 782 dprintk("RPC: %5u rpc_buffer allocation failed\n", task->tk_pid);
@@ -788,6 +803,17 @@ rpc_task_force_reencode(struct rpc_task *task)
788 task->tk_rqstp->rq_snd_buf.len = 0; 803 task->tk_rqstp->rq_snd_buf.len = 0;
789} 804}
790 805
806static inline void
807rpc_xdr_buf_init(struct xdr_buf *buf, void *start, size_t len)
808{
809 buf->head[0].iov_base = start;
810 buf->head[0].iov_len = len;
811 buf->tail[0].iov_len = 0;
812 buf->page_len = 0;
813 buf->len = 0;
814 buf->buflen = len;
815}
816
791/* 817/*
792 * 3. Encode arguments of an RPC call 818 * 3. Encode arguments of an RPC call
793 */ 819 */
@@ -795,28 +821,17 @@ static void
795call_encode(struct rpc_task *task) 821call_encode(struct rpc_task *task)
796{ 822{
797 struct rpc_rqst *req = task->tk_rqstp; 823 struct rpc_rqst *req = task->tk_rqstp;
798 struct xdr_buf *sndbuf = &req->rq_snd_buf;
799 struct xdr_buf *rcvbuf = &req->rq_rcv_buf;
800 unsigned int bufsiz;
801 kxdrproc_t encode; 824 kxdrproc_t encode;
802 __be32 *p; 825 __be32 *p;
803 826
804 dprint_status(task); 827 dprint_status(task);
805 828
806 /* Default buffer setup */ 829 rpc_xdr_buf_init(&req->rq_snd_buf,
807 bufsiz = req->rq_bufsize >> 1; 830 req->rq_buffer,
808 sndbuf->head[0].iov_base = (void *)req->rq_buffer; 831 req->rq_callsize);
809 sndbuf->head[0].iov_len = bufsiz; 832 rpc_xdr_buf_init(&req->rq_rcv_buf,
810 sndbuf->tail[0].iov_len = 0; 833 (char *)req->rq_buffer + req->rq_callsize,
811 sndbuf->page_len = 0; 834 req->rq_rcvsize);
812 sndbuf->len = 0;
813 sndbuf->buflen = bufsiz;
814 rcvbuf->head[0].iov_base = (void *)((char *)req->rq_buffer + bufsiz);
815 rcvbuf->head[0].iov_len = bufsiz;
816 rcvbuf->tail[0].iov_len = 0;
817 rcvbuf->page_len = 0;
818 rcvbuf->len = 0;
819 rcvbuf->buflen = bufsiz;
820 835
821 /* Encode header and provided arguments */ 836 /* Encode header and provided arguments */
822 encode = task->tk_msg.rpc_proc->p_encode; 837 encode = task->tk_msg.rpc_proc->p_encode;
@@ -887,9 +902,11 @@ call_bind_status(struct rpc_task *task)
887 task->tk_pid); 902 task->tk_pid);
888 break; 903 break;
889 case -EPROTONOSUPPORT: 904 case -EPROTONOSUPPORT:
890 dprintk("RPC: %5u remote rpcbind version 2 unavailable\n", 905 dprintk("RPC: %5u remote rpcbind version unavailable, retrying\n",
891 task->tk_pid); 906 task->tk_pid);
892 break; 907 task->tk_status = 0;
908 task->tk_action = call_bind;
909 return;
893 default: 910 default:
894 dprintk("RPC: %5u unrecognized rpcbind error (%d)\n", 911 dprintk("RPC: %5u unrecognized rpcbind error (%d)\n",
895 task->tk_pid, -task->tk_status); 912 task->tk_pid, -task->tk_status);
diff --git a/net/sunrpc/pmap_clnt.c b/net/sunrpc/pmap_clnt.c
deleted file mode 100644
index d9f765344589..000000000000
--- a/net/sunrpc/pmap_clnt.c
+++ /dev/null
@@ -1,383 +0,0 @@
1/*
2 * linux/net/sunrpc/pmap_clnt.c
3 *
4 * In-kernel RPC portmapper client.
5 *
6 * Portmapper supports version 2 of the rpcbind protocol (RFC 1833).
7 *
8 * Copyright (C) 1996, Olaf Kirch <okir@monad.swb.de>
9 */
10
11#include <linux/types.h>
12#include <linux/socket.h>
13#include <linux/kernel.h>
14#include <linux/errno.h>
15#include <linux/uio.h>
16#include <linux/in.h>
17#include <linux/sunrpc/clnt.h>
18#include <linux/sunrpc/sched.h>
19
20#ifdef RPC_DEBUG
21# define RPCDBG_FACILITY RPCDBG_PMAP
22#endif
23
24#define PMAP_SET 1
25#define PMAP_UNSET 2
26#define PMAP_GETPORT 3
27
28struct portmap_args {
29 u32 pm_prog;
30 u32 pm_vers;
31 u32 pm_prot;
32 unsigned short pm_port;
33 struct rpc_xprt * pm_xprt;
34};
35
36static struct rpc_procinfo pmap_procedures[];
37static struct rpc_clnt * pmap_create(char *, struct sockaddr_in *, int, int);
38static void pmap_getport_done(struct rpc_task *, void *);
39static struct rpc_program pmap_program;
40
41static void pmap_getport_prepare(struct rpc_task *task, void *calldata)
42{
43 struct portmap_args *map = calldata;
44 struct rpc_message msg = {
45 .rpc_proc = &pmap_procedures[PMAP_GETPORT],
46 .rpc_argp = map,
47 .rpc_resp = &map->pm_port,
48 };
49
50 rpc_call_setup(task, &msg, 0);
51}
52
53static inline struct portmap_args *pmap_map_alloc(void)
54{
55 return kmalloc(sizeof(struct portmap_args), GFP_NOFS);
56}
57
58static inline void pmap_map_free(struct portmap_args *map)
59{
60 kfree(map);
61}
62
63static void pmap_map_release(void *data)
64{
65 struct portmap_args *map = data;
66
67 xprt_put(map->pm_xprt);
68 pmap_map_free(map);
69}
70
71static const struct rpc_call_ops pmap_getport_ops = {
72 .rpc_call_prepare = pmap_getport_prepare,
73 .rpc_call_done = pmap_getport_done,
74 .rpc_release = pmap_map_release,
75};
76
77static inline void pmap_wake_portmap_waiters(struct rpc_xprt *xprt, int status)
78{
79 xprt_clear_binding(xprt);
80 rpc_wake_up_status(&xprt->binding, status);
81}
82
83/**
84 * rpc_getport - obtain the port for a given RPC service on a given host
85 * @task: task that is waiting for portmapper request
86 *
87 * This one can be called for an ongoing RPC request, and can be used in
88 * an async (rpciod) context.
89 */
90void rpc_getport(struct rpc_task *task)
91{
92 struct rpc_clnt *clnt = task->tk_client;
93 struct rpc_xprt *xprt = task->tk_xprt;
94 struct sockaddr_in addr;
95 struct portmap_args *map;
96 struct rpc_clnt *pmap_clnt;
97 struct rpc_task *child;
98 int status;
99
100 dprintk("RPC: %5u rpc_getport(%s, %u, %u, %d)\n",
101 task->tk_pid, clnt->cl_server,
102 clnt->cl_prog, clnt->cl_vers, xprt->prot);
103
104 /* Autobind on cloned rpc clients is discouraged */
105 BUG_ON(clnt->cl_parent != clnt);
106
107 status = -EACCES; /* tell caller to check again */
108 if (xprt_test_and_set_binding(xprt))
109 goto bailout_nowake;
110
111 /* Put self on queue before sending rpcbind request, in case
112 * pmap_getport_done completes before we return from rpc_run_task */
113 rpc_sleep_on(&xprt->binding, task, NULL, NULL);
114
115 /* Someone else may have bound if we slept */
116 status = 0;
117 if (xprt_bound(xprt))
118 goto bailout_nofree;
119
120 status = -ENOMEM;
121 map = pmap_map_alloc();
122 if (!map)
123 goto bailout_nofree;
124 map->pm_prog = clnt->cl_prog;
125 map->pm_vers = clnt->cl_vers;
126 map->pm_prot = xprt->prot;
127 map->pm_port = 0;
128 map->pm_xprt = xprt_get(xprt);
129
130 rpc_peeraddr(clnt, (struct sockaddr *) &addr, sizeof(addr));
131 pmap_clnt = pmap_create(clnt->cl_server, &addr, map->pm_prot, 0);
132 status = PTR_ERR(pmap_clnt);
133 if (IS_ERR(pmap_clnt))
134 goto bailout;
135
136 status = -EIO;
137 child = rpc_run_task(pmap_clnt, RPC_TASK_ASYNC, &pmap_getport_ops, map);
138 if (IS_ERR(child))
139 goto bailout_nofree;
140 rpc_put_task(child);
141
142 task->tk_xprt->stat.bind_count++;
143 return;
144
145bailout:
146 pmap_map_free(map);
147 xprt_put(xprt);
148bailout_nofree:
149 pmap_wake_portmap_waiters(xprt, status);
150bailout_nowake:
151 task->tk_status = status;
152}
153
154#ifdef CONFIG_ROOT_NFS
155/**
156 * rpc_getport_external - obtain the port for a given RPC service on a given host
157 * @sin: address of remote peer
158 * @prog: RPC program number to bind
159 * @vers: RPC version number to bind
160 * @prot: transport protocol to use to make this request
161 *
162 * This one is called from outside the RPC client in a synchronous task context.
163 */
164int rpc_getport_external(struct sockaddr_in *sin, __u32 prog, __u32 vers, int prot)
165{
166 struct portmap_args map = {
167 .pm_prog = prog,
168 .pm_vers = vers,
169 .pm_prot = prot,
170 .pm_port = 0
171 };
172 struct rpc_message msg = {
173 .rpc_proc = &pmap_procedures[PMAP_GETPORT],
174 .rpc_argp = &map,
175 .rpc_resp = &map.pm_port,
176 };
177 struct rpc_clnt *pmap_clnt;
178 char hostname[32];
179 int status;
180
181 dprintk("RPC: rpc_getport_external(%u.%u.%u.%u, %u, %u, %d)\n",
182 NIPQUAD(sin->sin_addr.s_addr), prog, vers, prot);
183
184 sprintf(hostname, "%u.%u.%u.%u", NIPQUAD(sin->sin_addr.s_addr));
185 pmap_clnt = pmap_create(hostname, sin, prot, 0);
186 if (IS_ERR(pmap_clnt))
187 return PTR_ERR(pmap_clnt);
188
189 /* Setup the call info struct */
190 status = rpc_call_sync(pmap_clnt, &msg, 0);
191
192 if (status >= 0) {
193 if (map.pm_port != 0)
194 return map.pm_port;
195 status = -EACCES;
196 }
197 return status;
198}
199#endif
200
201/*
202 * Portmapper child task invokes this callback via tk_exit.
203 */
204static void pmap_getport_done(struct rpc_task *child, void *data)
205{
206 struct portmap_args *map = data;
207 struct rpc_xprt *xprt = map->pm_xprt;
208 int status = child->tk_status;
209
210 if (status < 0) {
211 /* Portmapper not available */
212 xprt->ops->set_port(xprt, 0);
213 } else if (map->pm_port == 0) {
214 /* Requested RPC service wasn't registered */
215 xprt->ops->set_port(xprt, 0);
216 status = -EACCES;
217 } else {
218 /* Succeeded */
219 xprt->ops->set_port(xprt, map->pm_port);
220 xprt_set_bound(xprt);
221 status = 0;
222 }
223
224 dprintk("RPC: %5u pmap_getport_done(status %d, port %u)\n",
225 child->tk_pid, status, map->pm_port);
226
227 pmap_wake_portmap_waiters(xprt, status);
228}
229
230/**
231 * rpc_register - set or unset a port registration with the local portmapper
232 * @prog: RPC program number to bind
233 * @vers: RPC version number to bind
234 * @prot: transport protocol to use to make this request
235 * @port: port value to register
236 * @okay: result code
237 *
238 * port == 0 means unregister, port != 0 means register.
239 */
240int rpc_register(u32 prog, u32 vers, int prot, unsigned short port, int *okay)
241{
242 struct sockaddr_in sin = {
243 .sin_family = AF_INET,
244 .sin_addr.s_addr = htonl(INADDR_LOOPBACK),
245 };
246 struct portmap_args map = {
247 .pm_prog = prog,
248 .pm_vers = vers,
249 .pm_prot = prot,
250 .pm_port = port,
251 };
252 struct rpc_message msg = {
253 .rpc_proc = &pmap_procedures[port ? PMAP_SET : PMAP_UNSET],
254 .rpc_argp = &map,
255 .rpc_resp = okay,
256 };
257 struct rpc_clnt *pmap_clnt;
258 int error = 0;
259
260 dprintk("RPC: registering (%u, %u, %d, %u) with portmapper.\n",
261 prog, vers, prot, port);
262
263 pmap_clnt = pmap_create("localhost", &sin, IPPROTO_UDP, 1);
264 if (IS_ERR(pmap_clnt)) {
265 error = PTR_ERR(pmap_clnt);
266 dprintk("RPC: couldn't create pmap client. Error = %d\n",
267 error);
268 return error;
269 }
270
271 error = rpc_call_sync(pmap_clnt, &msg, 0);
272
273 if (error < 0) {
274 printk(KERN_WARNING
275 "RPC: failed to contact portmap (errno %d).\n",
276 error);
277 }
278 dprintk("RPC: registration status %d/%d\n", error, *okay);
279
280 /* Client deleted automatically because cl_oneshot == 1 */
281 return error;
282}
283
284static struct rpc_clnt *pmap_create(char *hostname, struct sockaddr_in *srvaddr, int proto, int privileged)
285{
286 struct rpc_create_args args = {
287 .protocol = proto,
288 .address = (struct sockaddr *)srvaddr,
289 .addrsize = sizeof(*srvaddr),
290 .servername = hostname,
291 .program = &pmap_program,
292 .version = RPC_PMAP_VERSION,
293 .authflavor = RPC_AUTH_UNIX,
294 .flags = (RPC_CLNT_CREATE_ONESHOT |
295 RPC_CLNT_CREATE_NOPING),
296 };
297
298 srvaddr->sin_port = htons(RPC_PMAP_PORT);
299 if (!privileged)
300 args.flags |= RPC_CLNT_CREATE_NONPRIVPORT;
301 return rpc_create(&args);
302}
303
304/*
305 * XDR encode/decode functions for PMAP
306 */
307static int xdr_encode_mapping(struct rpc_rqst *req, __be32 *p, struct portmap_args *map)
308{
309 dprintk("RPC: xdr_encode_mapping(%u, %u, %u, %u)\n",
310 map->pm_prog, map->pm_vers,
311 map->pm_prot, map->pm_port);
312 *p++ = htonl(map->pm_prog);
313 *p++ = htonl(map->pm_vers);
314 *p++ = htonl(map->pm_prot);
315 *p++ = htonl(map->pm_port);
316
317 req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
318 return 0;
319}
320
321static int xdr_decode_port(struct rpc_rqst *req, __be32 *p, unsigned short *portp)
322{
323 *portp = (unsigned short) ntohl(*p++);
324 return 0;
325}
326
327static int xdr_decode_bool(struct rpc_rqst *req, __be32 *p, unsigned int *boolp)
328{
329 *boolp = (unsigned int) ntohl(*p++);
330 return 0;
331}
332
333static struct rpc_procinfo pmap_procedures[] = {
334[PMAP_SET] = {
335 .p_proc = PMAP_SET,
336 .p_encode = (kxdrproc_t) xdr_encode_mapping,
337 .p_decode = (kxdrproc_t) xdr_decode_bool,
338 .p_bufsiz = 4,
339 .p_count = 1,
340 .p_statidx = PMAP_SET,
341 .p_name = "SET",
342 },
343[PMAP_UNSET] = {
344 .p_proc = PMAP_UNSET,
345 .p_encode = (kxdrproc_t) xdr_encode_mapping,
346 .p_decode = (kxdrproc_t) xdr_decode_bool,
347 .p_bufsiz = 4,
348 .p_count = 1,
349 .p_statidx = PMAP_UNSET,
350 .p_name = "UNSET",
351 },
352[PMAP_GETPORT] = {
353 .p_proc = PMAP_GETPORT,
354 .p_encode = (kxdrproc_t) xdr_encode_mapping,
355 .p_decode = (kxdrproc_t) xdr_decode_port,
356 .p_bufsiz = 4,
357 .p_count = 1,
358 .p_statidx = PMAP_GETPORT,
359 .p_name = "GETPORT",
360 },
361};
362
363static struct rpc_version pmap_version2 = {
364 .number = 2,
365 .nrprocs = 4,
366 .procs = pmap_procedures
367};
368
369static struct rpc_version * pmap_version[] = {
370 NULL,
371 NULL,
372 &pmap_version2
373};
374
375static struct rpc_stat pmap_stats;
376
377static struct rpc_program pmap_program = {
378 .name = "portmap",
379 .number = RPC_PMAP_PROGRAM,
380 .nrvers = ARRAY_SIZE(pmap_version),
381 .version = pmap_version,
382 .stats = &pmap_stats,
383};
diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c
new file mode 100644
index 000000000000..6c7aa8a1f0c6
--- /dev/null
+++ b/net/sunrpc/rpcb_clnt.c
@@ -0,0 +1,625 @@
1/*
2 * In-kernel rpcbind client supporting versions 2, 3, and 4 of the rpcbind
3 * protocol
4 *
5 * Based on RFC 1833: "Binding Protocols for ONC RPC Version 2" and
6 * RFC 3530: "Network File System (NFS) version 4 Protocol"
7 *
8 * Original: Gilles Quillard, Bull Open Source, 2005 <gilles.quillard@bull.net>
9 * Updated: Chuck Lever, Oracle Corporation, 2007 <chuck.lever@oracle.com>
10 *
11 * Descended from net/sunrpc/pmap_clnt.c,
12 * Copyright (C) 1996, Olaf Kirch <okir@monad.swb.de>
13 */
14
15#include <linux/types.h>
16#include <linux/socket.h>
17#include <linux/kernel.h>
18#include <linux/errno.h>
19
20#include <linux/sunrpc/clnt.h>
21#include <linux/sunrpc/sched.h>
22
23#ifdef RPC_DEBUG
24# define RPCDBG_FACILITY RPCDBG_BIND
25#endif
26
27#define RPCBIND_PROGRAM (100000u)
28#define RPCBIND_PORT (111u)
29
30enum {
31 RPCBPROC_NULL,
32 RPCBPROC_SET,
33 RPCBPROC_UNSET,
34 RPCBPROC_GETPORT,
35 RPCBPROC_GETADDR = 3, /* alias for GETPORT */
36 RPCBPROC_DUMP,
37 RPCBPROC_CALLIT,
38 RPCBPROC_BCAST = 5, /* alias for CALLIT */
39 RPCBPROC_GETTIME,
40 RPCBPROC_UADDR2TADDR,
41 RPCBPROC_TADDR2UADDR,
42 RPCBPROC_GETVERSADDR,
43 RPCBPROC_INDIRECT,
44 RPCBPROC_GETADDRLIST,
45 RPCBPROC_GETSTAT,
46};
47
48#define RPCB_HIGHPROC_2 RPCBPROC_CALLIT
49#define RPCB_HIGHPROC_3 RPCBPROC_TADDR2UADDR
50#define RPCB_HIGHPROC_4 RPCBPROC_GETSTAT
51
52/*
53 * r_addr
54 *
55 * Quoting RFC 3530, section 2.2:
56 *
57 * For TCP over IPv4 and for UDP over IPv4, the format of r_addr is the
58 * US-ASCII string:
59 *
60 * h1.h2.h3.h4.p1.p2
61 *
62 * The prefix, "h1.h2.h3.h4", is the standard textual form for
63 * representing an IPv4 address, which is always four octets long.
64 * Assuming big-endian ordering, h1, h2, h3, and h4, are respectively,
65 * the first through fourth octets each converted to ASCII-decimal.
66 * Assuming big-endian ordering, p1 and p2 are, respectively, the first
67 * and second octets each converted to ASCII-decimal. For example, if a
68 * host, in big-endian order, has an address of 0x0A010307 and there is
69 * a service listening on, in big endian order, port 0x020F (decimal
70 * 527), then the complete universal address is "10.1.3.7.2.15".
71 *
72 * ...
73 *
74 * For TCP over IPv6 and for UDP over IPv6, the format of r_addr is the
75 * US-ASCII string:
76 *
77 * x1:x2:x3:x4:x5:x6:x7:x8.p1.p2
78 *
79 * The suffix "p1.p2" is the service port, and is computed the same way
80 * as with universal addresses for TCP and UDP over IPv4. The prefix,
81 * "x1:x2:x3:x4:x5:x6:x7:x8", is the standard textual form for
82 * representing an IPv6 address as defined in Section 2.2 of [RFC2373].
83 * Additionally, the two alternative forms specified in Section 2.2 of
84 * [RFC2373] are also acceptable.
85 *
86 * XXX: Currently this implementation does not explicitly convert the
87 * stored address to US-ASCII on non-ASCII systems.
88 */
89#define RPCB_MAXADDRLEN (128u)
90
91/*
92 * r_netid
93 *
94 * Quoting RFC 3530, section 2.2:
95 *
96 * For TCP over IPv4 the value of r_netid is the string "tcp". For UDP
97 * over IPv4 the value of r_netid is the string "udp".
98 *
99 * ...
100 *
101 * For TCP over IPv6 the value of r_netid is the string "tcp6". For UDP
102 * over IPv6 the value of r_netid is the string "udp6".
103 */
104#define RPCB_NETID_UDP "\165\144\160" /* "udp" */
105#define RPCB_NETID_TCP "\164\143\160" /* "tcp" */
106#define RPCB_NETID_UDP6 "\165\144\160\066" /* "udp6" */
107#define RPCB_NETID_TCP6 "\164\143\160\066" /* "tcp6" */
108
109#define RPCB_MAXNETIDLEN (4u)
110
111/*
112 * r_owner
113 *
114 * The "owner" is allowed to unset a service in the rpcbind database.
115 * We always use the following (arbitrary) fixed string.
116 */
117#define RPCB_OWNER_STRING "rpcb"
118#define RPCB_MAXOWNERLEN sizeof(RPCB_OWNER_STRING)
119
120static void rpcb_getport_done(struct rpc_task *, void *);
121extern struct rpc_program rpcb_program;
122
123struct rpcbind_args {
124 struct rpc_xprt * r_xprt;
125
126 u32 r_prog;
127 u32 r_vers;
128 u32 r_prot;
129 unsigned short r_port;
130 char * r_netid;
131 char r_addr[RPCB_MAXADDRLEN];
132 char * r_owner;
133};
134
135static struct rpc_procinfo rpcb_procedures2[];
136static struct rpc_procinfo rpcb_procedures3[];
137
138static struct rpcb_info {
139 int rpc_vers;
140 struct rpc_procinfo * rpc_proc;
141} rpcb_next_version[];
142
143static void rpcb_getport_prepare(struct rpc_task *task, void *calldata)
144{
145 struct rpcbind_args *map = calldata;
146 struct rpc_xprt *xprt = map->r_xprt;
147 struct rpc_message msg = {
148 .rpc_proc = rpcb_next_version[xprt->bind_index].rpc_proc,
149 .rpc_argp = map,
150 .rpc_resp = &map->r_port,
151 };
152
153 rpc_call_setup(task, &msg, 0);
154}
155
156static void rpcb_map_release(void *data)
157{
158 struct rpcbind_args *map = data;
159
160 xprt_put(map->r_xprt);
161 kfree(map);
162}
163
164static const struct rpc_call_ops rpcb_getport_ops = {
165 .rpc_call_prepare = rpcb_getport_prepare,
166 .rpc_call_done = rpcb_getport_done,
167 .rpc_release = rpcb_map_release,
168};
169
170static void rpcb_wake_rpcbind_waiters(struct rpc_xprt *xprt, int status)
171{
172 xprt_clear_binding(xprt);
173 rpc_wake_up_status(&xprt->binding, status);
174}
175
176static struct rpc_clnt *rpcb_create(char *hostname, struct sockaddr *srvaddr,
177 int proto, int version, int privileged)
178{
179 struct rpc_create_args args = {
180 .protocol = proto,
181 .address = srvaddr,
182 .addrsize = sizeof(struct sockaddr_in),
183 .servername = hostname,
184 .program = &rpcb_program,
185 .version = version,
186 .authflavor = RPC_AUTH_UNIX,
187 .flags = (RPC_CLNT_CREATE_ONESHOT |
188 RPC_CLNT_CREATE_NOPING),
189 };
190
191 ((struct sockaddr_in *)srvaddr)->sin_port = htons(RPCBIND_PORT);
192 if (!privileged)
193 args.flags |= RPC_CLNT_CREATE_NONPRIVPORT;
194 return rpc_create(&args);
195}
196
197/**
198 * rpcb_register - set or unset a port registration with the local rpcbind svc
199 * @prog: RPC program number to bind
200 * @vers: RPC version number to bind
201 * @prot: transport protocol to use to make this request
202 * @port: port value to register
203 * @okay: result code
204 *
205 * port == 0 means unregister, port != 0 means register.
206 *
207 * This routine supports only rpcbind version 2.
208 */
209int rpcb_register(u32 prog, u32 vers, int prot, unsigned short port, int *okay)
210{
211 struct sockaddr_in sin = {
212 .sin_family = AF_INET,
213 .sin_addr.s_addr = htonl(INADDR_LOOPBACK),
214 };
215 struct rpcbind_args map = {
216 .r_prog = prog,
217 .r_vers = vers,
218 .r_prot = prot,
219 .r_port = port,
220 };
221 struct rpc_message msg = {
222 .rpc_proc = &rpcb_procedures2[port ?
223 RPCBPROC_SET : RPCBPROC_UNSET],
224 .rpc_argp = &map,
225 .rpc_resp = okay,
226 };
227 struct rpc_clnt *rpcb_clnt;
228 int error = 0;
229
230 dprintk("RPC: %sregistering (%u, %u, %d, %u) with local "
231 "rpcbind\n", (port ? "" : "un"),
232 prog, vers, prot, port);
233
234 rpcb_clnt = rpcb_create("localhost", (struct sockaddr *) &sin,
235 IPPROTO_UDP, 2, 1);
236 if (IS_ERR(rpcb_clnt))
237 return PTR_ERR(rpcb_clnt);
238
239 error = rpc_call_sync(rpcb_clnt, &msg, 0);
240
241 if (error < 0)
242 printk(KERN_WARNING "RPC: failed to contact local rpcbind "
243 "server (errno %d).\n", -error);
244 dprintk("RPC: registration status %d/%d\n", error, *okay);
245
246 return error;
247}
248
249#ifdef CONFIG_ROOT_NFS
250/**
251 * rpcb_getport_external - obtain the port for an RPC service on a given host
252 * @sin: address of remote peer
253 * @prog: RPC program number to bind
254 * @vers: RPC version number to bind
255 * @prot: transport protocol to use to make this request
256 *
257 * Called from outside the RPC client in a synchronous task context.
258 *
259 * For now, this supports only version 2 queries, but is used only by
260 * mount_clnt for NFS_ROOT.
261 */
262int rpcb_getport_external(struct sockaddr_in *sin, __u32 prog,
263 __u32 vers, int prot)
264{
265 struct rpcbind_args map = {
266 .r_prog = prog,
267 .r_vers = vers,
268 .r_prot = prot,
269 .r_port = 0,
270 };
271 struct rpc_message msg = {
272 .rpc_proc = &rpcb_procedures2[RPCBPROC_GETPORT],
273 .rpc_argp = &map,
274 .rpc_resp = &map.r_port,
275 };
276 struct rpc_clnt *rpcb_clnt;
277 char hostname[40];
278 int status;
279
280 dprintk("RPC: rpcb_getport_external(%u.%u.%u.%u, %u, %u, %d)\n",
281 NIPQUAD(sin->sin_addr.s_addr), prog, vers, prot);
282
283 sprintf(hostname, "%u.%u.%u.%u", NIPQUAD(sin->sin_addr.s_addr));
284 rpcb_clnt = rpcb_create(hostname, (struct sockaddr *)sin, prot, 2, 0);
285 if (IS_ERR(rpcb_clnt))
286 return PTR_ERR(rpcb_clnt);
287
288 status = rpc_call_sync(rpcb_clnt, &msg, 0);
289
290 if (status >= 0) {
291 if (map.r_port != 0)
292 return map.r_port;
293 status = -EACCES;
294 }
295 return status;
296}
297#endif
298
299/**
300 * rpcb_getport - obtain the port for a given RPC service on a given host
301 * @task: task that is waiting for portmapper request
302 *
303 * This one can be called for an ongoing RPC request, and can be used in
304 * an async (rpciod) context.
305 */
306void rpcb_getport(struct rpc_task *task)
307{
308 struct rpc_clnt *clnt = task->tk_client;
309 int bind_version;
310 struct rpc_xprt *xprt = task->tk_xprt;
311 struct rpc_clnt *rpcb_clnt;
312 static struct rpcbind_args *map;
313 struct rpc_task *child;
314 struct sockaddr addr;
315 int status;
316
317 dprintk("RPC: %5u rpcb_getport(%s, %u, %u, %d)\n",
318 task->tk_pid, clnt->cl_server,
319 clnt->cl_prog, clnt->cl_vers, xprt->prot);
320
321 /* Autobind on cloned rpc clients is discouraged */
322 BUG_ON(clnt->cl_parent != clnt);
323
324 if (xprt_test_and_set_binding(xprt)) {
325 status = -EACCES; /* tell caller to check again */
326 dprintk("RPC: %5u rpcb_getport waiting for another binder\n",
327 task->tk_pid);
328 goto bailout_nowake;
329 }
330
331 /* Put self on queue before sending rpcbind request, in case
332 * rpcb_getport_done completes before we return from rpc_run_task */
333 rpc_sleep_on(&xprt->binding, task, NULL, NULL);
334
335 /* Someone else may have bound if we slept */
336 if (xprt_bound(xprt)) {
337 status = 0;
338 dprintk("RPC: %5u rpcb_getport already bound\n", task->tk_pid);
339 goto bailout_nofree;
340 }
341
342 if (rpcb_next_version[xprt->bind_index].rpc_proc == NULL) {
343 xprt->bind_index = 0;
344 status = -EACCES; /* tell caller to try again later */
345 dprintk("RPC: %5u rpcb_getport no more getport versions "
346 "available\n", task->tk_pid);
347 goto bailout_nofree;
348 }
349 bind_version = rpcb_next_version[xprt->bind_index].rpc_vers;
350
351 dprintk("RPC: %5u rpcb_getport trying rpcbind version %u\n",
352 task->tk_pid, bind_version);
353
354 map = kzalloc(sizeof(struct rpcbind_args), GFP_ATOMIC);
355 if (!map) {
356 status = -ENOMEM;
357 dprintk("RPC: %5u rpcb_getport no memory available\n",
358 task->tk_pid);
359 goto bailout_nofree;
360 }
361 map->r_prog = clnt->cl_prog;
362 map->r_vers = clnt->cl_vers;
363 map->r_prot = xprt->prot;
364 map->r_port = 0;
365 map->r_xprt = xprt_get(xprt);
366 map->r_netid = (xprt->prot == IPPROTO_TCP) ? RPCB_NETID_TCP :
367 RPCB_NETID_UDP;
368 memcpy(&map->r_addr, rpc_peeraddr2str(clnt, RPC_DISPLAY_ADDR),
369 sizeof(map->r_addr));
370 map->r_owner = RPCB_OWNER_STRING; /* ignored for GETADDR */
371
372 rpc_peeraddr(clnt, (void *)&addr, sizeof(addr));
373 rpcb_clnt = rpcb_create(clnt->cl_server, &addr, xprt->prot, bind_version, 0);
374 if (IS_ERR(rpcb_clnt)) {
375 status = PTR_ERR(rpcb_clnt);
376 dprintk("RPC: %5u rpcb_getport rpcb_create failed, error %ld\n",
377 task->tk_pid, PTR_ERR(rpcb_clnt));
378 goto bailout;
379 }
380
381 child = rpc_run_task(rpcb_clnt, RPC_TASK_ASYNC, &rpcb_getport_ops, map);
382 if (IS_ERR(child)) {
383 status = -EIO;
384 dprintk("RPC: %5u rpcb_getport rpc_run_task failed\n",
385 task->tk_pid);
386 goto bailout_nofree;
387 }
388 rpc_put_task(child);
389
390 task->tk_xprt->stat.bind_count++;
391 return;
392
393bailout:
394 kfree(map);
395 xprt_put(xprt);
396bailout_nofree:
397 rpcb_wake_rpcbind_waiters(xprt, status);
398bailout_nowake:
399 task->tk_status = status;
400}
401
402/*
403 * Rpcbind child task calls this callback via tk_exit.
404 */
405static void rpcb_getport_done(struct rpc_task *child, void *data)
406{
407 struct rpcbind_args *map = data;
408 struct rpc_xprt *xprt = map->r_xprt;
409 int status = child->tk_status;
410
411 /* rpcbind server doesn't support this rpcbind protocol version */
412 if (status == -EPROTONOSUPPORT)
413 xprt->bind_index++;
414
415 if (status < 0) {
416 /* rpcbind server not available on remote host? */
417 xprt->ops->set_port(xprt, 0);
418 } else if (map->r_port == 0) {
419 /* Requested RPC service wasn't registered on remote host */
420 xprt->ops->set_port(xprt, 0);
421 status = -EACCES;
422 } else {
423 /* Succeeded */
424 xprt->ops->set_port(xprt, map->r_port);
425 xprt_set_bound(xprt);
426 status = 0;
427 }
428
429 dprintk("RPC: %5u rpcb_getport_done(status %d, port %u)\n",
430 child->tk_pid, status, map->r_port);
431
432 rpcb_wake_rpcbind_waiters(xprt, status);
433}
434
435static int rpcb_encode_mapping(struct rpc_rqst *req, __be32 *p,
436 struct rpcbind_args *rpcb)
437{
438 dprintk("RPC: rpcb_encode_mapping(%u, %u, %d, %u)\n",
439 rpcb->r_prog, rpcb->r_vers, rpcb->r_prot, rpcb->r_port);
440 *p++ = htonl(rpcb->r_prog);
441 *p++ = htonl(rpcb->r_vers);
442 *p++ = htonl(rpcb->r_prot);
443 *p++ = htonl(rpcb->r_port);
444
445 req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
446 return 0;
447}
448
449static int rpcb_decode_getport(struct rpc_rqst *req, __be32 *p,
450 unsigned short *portp)
451{
452 *portp = (unsigned short) ntohl(*p++);
453 dprintk("RPC: rpcb_decode_getport result %u\n",
454 *portp);
455 return 0;
456}
457
458static int rpcb_decode_set(struct rpc_rqst *req, __be32 *p,
459 unsigned int *boolp)
460{
461 *boolp = (unsigned int) ntohl(*p++);
462 dprintk("RPC: rpcb_decode_set result %u\n",
463 *boolp);
464 return 0;
465}
466
467static int rpcb_encode_getaddr(struct rpc_rqst *req, __be32 *p,
468 struct rpcbind_args *rpcb)
469{
470 dprintk("RPC: rpcb_encode_getaddr(%u, %u, %s)\n",
471 rpcb->r_prog, rpcb->r_vers, rpcb->r_addr);
472 *p++ = htonl(rpcb->r_prog);
473 *p++ = htonl(rpcb->r_vers);
474
475 p = xdr_encode_string(p, rpcb->r_netid);
476 p = xdr_encode_string(p, rpcb->r_addr);
477 p = xdr_encode_string(p, rpcb->r_owner);
478
479 req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
480
481 return 0;
482}
483
484static int rpcb_decode_getaddr(struct rpc_rqst *req, __be32 *p,
485 unsigned short *portp)
486{
487 char *addr;
488 int addr_len, c, i, f, first, val;
489
490 *portp = 0;
491 addr_len = (unsigned int) ntohl(*p++);
492 if (addr_len > RPCB_MAXADDRLEN) /* sanity */
493 return -EINVAL;
494
495 dprintk("RPC: rpcb_decode_getaddr returned string: '%s'\n",
496 (char *) p);
497
498 addr = (char *)p;
499 val = 0;
500 first = 1;
501 f = 1;
502 for (i = addr_len - 1; i > 0; i--) {
503 c = addr[i];
504 if (c >= '0' && c <= '9') {
505 val += (c - '0') * f;
506 f *= 10;
507 } else if (c == '.') {
508 if (first) {
509 *portp = val;
510 val = first = 0;
511 f = 1;
512 } else {
513 *portp |= (val << 8);
514 break;
515 }
516 }
517 }
518
519 dprintk("RPC: rpcb_decode_getaddr port=%u\n", *portp);
520 return 0;
521}
522
523#define RPCB_program_sz (1u)
524#define RPCB_version_sz (1u)
525#define RPCB_protocol_sz (1u)
526#define RPCB_port_sz (1u)
527#define RPCB_boolean_sz (1u)
528
529#define RPCB_netid_sz (1+XDR_QUADLEN(RPCB_MAXNETIDLEN))
530#define RPCB_addr_sz (1+XDR_QUADLEN(RPCB_MAXADDRLEN))
531#define RPCB_ownerstring_sz (1+XDR_QUADLEN(RPCB_MAXOWNERLEN))
532
533#define RPCB_mappingargs_sz RPCB_program_sz+RPCB_version_sz+ \
534 RPCB_protocol_sz+RPCB_port_sz
535#define RPCB_getaddrargs_sz RPCB_program_sz+RPCB_version_sz+ \
536 RPCB_netid_sz+RPCB_addr_sz+ \
537 RPCB_ownerstring_sz
538
539#define RPCB_setres_sz RPCB_boolean_sz
540#define RPCB_getportres_sz RPCB_port_sz
541
542/*
543 * Note that RFC 1833 does not put any size restrictions on the
544 * address string returned by the remote rpcbind database.
545 */
546#define RPCB_getaddrres_sz RPCB_addr_sz
547
548#define PROC(proc, argtype, restype) \
549 [RPCBPROC_##proc] = { \
550 .p_proc = RPCBPROC_##proc, \
551 .p_encode = (kxdrproc_t) rpcb_encode_##argtype, \
552 .p_decode = (kxdrproc_t) rpcb_decode_##restype, \
553 .p_arglen = RPCB_##argtype##args_sz, \
554 .p_replen = RPCB_##restype##res_sz, \
555 .p_statidx = RPCBPROC_##proc, \
556 .p_timer = 0, \
557 .p_name = #proc, \
558 }
559
560/*
561 * Not all rpcbind procedures described in RFC 1833 are implemented
562 * since the Linux kernel RPC code requires only these.
563 */
564static struct rpc_procinfo rpcb_procedures2[] = {
565 PROC(SET, mapping, set),
566 PROC(UNSET, mapping, set),
567 PROC(GETADDR, mapping, getport),
568};
569
570static struct rpc_procinfo rpcb_procedures3[] = {
571 PROC(SET, mapping, set),
572 PROC(UNSET, mapping, set),
573 PROC(GETADDR, getaddr, getaddr),
574};
575
576static struct rpc_procinfo rpcb_procedures4[] = {
577 PROC(SET, mapping, set),
578 PROC(UNSET, mapping, set),
579 PROC(GETVERSADDR, getaddr, getaddr),
580};
581
582static struct rpcb_info rpcb_next_version[] = {
583#ifdef CONFIG_SUNRPC_BIND34
584 { 4, &rpcb_procedures4[RPCBPROC_GETVERSADDR] },
585 { 3, &rpcb_procedures3[RPCBPROC_GETADDR] },
586#endif
587 { 2, &rpcb_procedures2[RPCBPROC_GETPORT] },
588 { 0, NULL },
589};
590
591static struct rpc_version rpcb_version2 = {
592 .number = 2,
593 .nrprocs = RPCB_HIGHPROC_2,
594 .procs = rpcb_procedures2
595};
596
597static struct rpc_version rpcb_version3 = {
598 .number = 3,
599 .nrprocs = RPCB_HIGHPROC_3,
600 .procs = rpcb_procedures3
601};
602
603static struct rpc_version rpcb_version4 = {
604 .number = 4,
605 .nrprocs = RPCB_HIGHPROC_4,
606 .procs = rpcb_procedures4
607};
608
609static struct rpc_version *rpcb_version[] = {
610 NULL,
611 NULL,
612 &rpcb_version2,
613 &rpcb_version3,
614 &rpcb_version4
615};
616
617static struct rpc_stat rpcb_stats;
618
619struct rpc_program rpcb_program = {
620 .name = "rpcbind",
621 .number = RPCBIND_PROGRAM,
622 .nrvers = ARRAY_SIZE(rpcb_version),
623 .version = rpcb_version,
624 .stats = &rpcb_stats,
625};
diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c
index 6d87320074b1..4a53e94f8134 100644
--- a/net/sunrpc/sched.c
+++ b/net/sunrpc/sched.c
@@ -741,50 +741,53 @@ static void rpc_async_schedule(struct work_struct *work)
741 * @task: RPC task that will use this buffer 741 * @task: RPC task that will use this buffer
742 * @size: requested byte size 742 * @size: requested byte size
743 * 743 *
744 * We try to ensure that some NFS reads and writes can always proceed 744 * To prevent rpciod from hanging, this allocator never sleeps,
745 * by using a mempool when allocating 'small' buffers. 745 * returning NULL if the request cannot be serviced immediately.
746 * The caller can arrange to sleep in a way that is safe for rpciod.
747 *
748 * Most requests are 'small' (under 2KiB) and can be serviced from a
749 * mempool, ensuring that NFS reads and writes can always proceed,
750 * and that there is good locality of reference for these buffers.
751 *
746 * In order to avoid memory starvation triggering more writebacks of 752 * In order to avoid memory starvation triggering more writebacks of
747 * NFS requests, we use GFP_NOFS rather than GFP_KERNEL. 753 * NFS requests, we avoid using GFP_KERNEL.
748 */ 754 */
749void * rpc_malloc(struct rpc_task *task, size_t size) 755void *rpc_malloc(struct rpc_task *task, size_t size)
750{ 756{
751 struct rpc_rqst *req = task->tk_rqstp; 757 size_t *buf;
752 gfp_t gfp; 758 gfp_t gfp = RPC_IS_SWAPPER(task) ? GFP_ATOMIC : GFP_NOWAIT;
753 759
754 if (task->tk_flags & RPC_TASK_SWAPPER) 760 size += sizeof(size_t);
755 gfp = GFP_ATOMIC; 761 if (size <= RPC_BUFFER_MAXSIZE)
762 buf = mempool_alloc(rpc_buffer_mempool, gfp);
756 else 763 else
757 gfp = GFP_NOFS; 764 buf = kmalloc(size, gfp);
758 765 *buf = size;
759 if (size > RPC_BUFFER_MAXSIZE) { 766 dprintk("RPC: %5u allocated buffer of size %u at %p\n",
760 req->rq_buffer = kmalloc(size, gfp); 767 task->tk_pid, size, buf);
761 if (req->rq_buffer) 768 return (void *) ++buf;
762 req->rq_bufsize = size;
763 } else {
764 req->rq_buffer = mempool_alloc(rpc_buffer_mempool, gfp);
765 if (req->rq_buffer)
766 req->rq_bufsize = RPC_BUFFER_MAXSIZE;
767 }
768 return req->rq_buffer;
769} 769}
770 770
771/** 771/**
772 * rpc_free - free buffer allocated via rpc_malloc 772 * rpc_free - free buffer allocated via rpc_malloc
773 * @task: RPC task with a buffer to be freed 773 * @buffer: buffer to free
774 * 774 *
775 */ 775 */
776void rpc_free(struct rpc_task *task) 776void rpc_free(void *buffer)
777{ 777{
778 struct rpc_rqst *req = task->tk_rqstp; 778 size_t size, *buf = (size_t *) buffer;
779 779
780 if (req->rq_buffer) { 780 if (!buffer)
781 if (req->rq_bufsize == RPC_BUFFER_MAXSIZE) 781 return;
782 mempool_free(req->rq_buffer, rpc_buffer_mempool); 782 size = *buf;
783 else 783 buf--;
784 kfree(req->rq_buffer); 784
785 req->rq_buffer = NULL; 785 dprintk("RPC: freeing buffer of size %u at %p\n",
786 req->rq_bufsize = 0; 786 size, buf);
787 } 787 if (size <= RPC_BUFFER_MAXSIZE)
788 mempool_free(buf, rpc_buffer_mempool);
789 else
790 kfree(buf);
788} 791}
789 792
790/* 793/*
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index b4db53ff1435..b7503c103ae8 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -757,7 +757,7 @@ svc_register(struct svc_serv *serv, int proto, unsigned short port)
757 if (progp->pg_vers[i]->vs_hidden) 757 if (progp->pg_vers[i]->vs_hidden)
758 continue; 758 continue;
759 759
760 error = rpc_register(progp->pg_prog, i, proto, port, &dummy); 760 error = rpcb_register(progp->pg_prog, i, proto, port, &dummy);
761 if (error < 0) 761 if (error < 0)
762 break; 762 break;
763 if (port && !dummy) { 763 if (port && !dummy) {
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index 456a14510308..5b05b73e4c1d 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -823,7 +823,6 @@ static void xprt_request_init(struct rpc_task *task, struct rpc_xprt *xprt)
823 req->rq_task = task; 823 req->rq_task = task;
824 req->rq_xprt = xprt; 824 req->rq_xprt = xprt;
825 req->rq_buffer = NULL; 825 req->rq_buffer = NULL;
826 req->rq_bufsize = 0;
827 req->rq_xid = xprt_alloc_xid(xprt); 826 req->rq_xid = xprt_alloc_xid(xprt);
828 req->rq_release_snd_buf = NULL; 827 req->rq_release_snd_buf = NULL;
829 xprt_reset_majortimeo(req); 828 xprt_reset_majortimeo(req);
@@ -855,7 +854,7 @@ void xprt_release(struct rpc_task *task)
855 mod_timer(&xprt->timer, 854 mod_timer(&xprt->timer,
856 xprt->last_used + xprt->idle_timeout); 855 xprt->last_used + xprt->idle_timeout);
857 spin_unlock_bh(&xprt->transport_lock); 856 spin_unlock_bh(&xprt->transport_lock);
858 xprt->ops->buf_free(task); 857 xprt->ops->buf_free(req->rq_buffer);
859 task->tk_rqstp = NULL; 858 task->tk_rqstp = NULL;
860 if (req->rq_release_snd_buf) 859 if (req->rq_release_snd_buf)
861 req->rq_release_snd_buf(req); 860 req->rq_release_snd_buf(req);
@@ -928,6 +927,7 @@ struct rpc_xprt *xprt_create_transport(int proto, struct sockaddr *ap, size_t si
928 xprt->timer.data = (unsigned long) xprt; 927 xprt->timer.data = (unsigned long) xprt;
929 xprt->last_used = jiffies; 928 xprt->last_used = jiffies;
930 xprt->cwnd = RPC_INITCWND; 929 xprt->cwnd = RPC_INITCWND;
930 xprt->bind_index = 0;
931 931
932 rpc_init_wait_queue(&xprt->binding, "xprt_binding"); 932 rpc_init_wait_queue(&xprt->binding, "xprt_binding");
933 rpc_init_wait_queue(&xprt->pending, "xprt_pending"); 933 rpc_init_wait_queue(&xprt->pending, "xprt_pending");
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index a5a32029e728..cc33c5880abb 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -1476,7 +1476,7 @@ static struct rpc_xprt_ops xs_udp_ops = {
1476 .set_buffer_size = xs_udp_set_buffer_size, 1476 .set_buffer_size = xs_udp_set_buffer_size,
1477 .reserve_xprt = xprt_reserve_xprt_cong, 1477 .reserve_xprt = xprt_reserve_xprt_cong,
1478 .release_xprt = xprt_release_xprt_cong, 1478 .release_xprt = xprt_release_xprt_cong,
1479 .rpcbind = rpc_getport, 1479 .rpcbind = rpcb_getport,
1480 .set_port = xs_set_port, 1480 .set_port = xs_set_port,
1481 .connect = xs_connect, 1481 .connect = xs_connect,
1482 .buf_alloc = rpc_malloc, 1482 .buf_alloc = rpc_malloc,
@@ -1493,7 +1493,7 @@ static struct rpc_xprt_ops xs_udp_ops = {
1493static struct rpc_xprt_ops xs_tcp_ops = { 1493static struct rpc_xprt_ops xs_tcp_ops = {
1494 .reserve_xprt = xprt_reserve_xprt, 1494 .reserve_xprt = xprt_reserve_xprt,
1495 .release_xprt = xs_tcp_release_xprt, 1495 .release_xprt = xs_tcp_release_xprt,
1496 .rpcbind = rpc_getport, 1496 .rpcbind = rpcb_getport,
1497 .set_port = xs_set_port, 1497 .set_port = xs_set_port,
1498 .connect = xs_connect, 1498 .connect = xs_connect,
1499 .buf_alloc = rpc_malloc, 1499 .buf_alloc = rpc_malloc,