aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2008-10-14 15:31:14 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2008-10-14 15:31:14 -0400
commit8acd3a60bcca17c6d89c73cee3ad6057eb83ba1e (patch)
treed610c8d39246c33c499ee9d92d302d3ca9e89ae3
parentc269bc00fcb876ae3b85f178f1e34601185c8ccc (diff)
parent107e0008dfb8bd6366bc8827f5bbbc0c1f795d2d (diff)
Merge branch 'for-2.6.28' of git://linux-nfs.org/~bfields/linux
* 'for-2.6.28' of git://linux-nfs.org/~bfields/linux: (59 commits) svcrdma: Fix IRD/ORD polarity svcrdma: Update svc_rdma_send_error to use DMA LKEY svcrdma: Modify the RPC reply path to use FRMR when available svcrdma: Modify the RPC recv path to use FRMR when available svcrdma: Add support to svc_rdma_send to handle chained WR svcrdma: Modify post recv path to use local dma key svcrdma: Add a service to register a Fast Reg MR with the device svcrdma: Query device for Fast Reg support during connection setup svcrdma: Add FRMR get/put services NLM: Remove unused argument from svc_addsock() function NLM: Remove "proto" argument from lockd_up() NLM: Always start both UDP and TCP listeners lockd: Remove unused fields in the nlm_reboot structure lockd: Add helper to sanity check incoming NOTIFY requests lockd: change nlmclnt_grant() to take a "struct sockaddr *" lockd: Adjust nlmsvc_lookup_host() to accomodate AF_INET6 addresses lockd: Adjust nlmclnt_lookup_host() signature to accomodate non-AF_INET lockd: Support non-AF_INET addresses in nlm_lookup_host() NLM: Convert nlm_lookup_host() to use a single argument svcrdma: Add Fast Reg MR Data Types ...
-rw-r--r--fs/Kconfig30
-rw-r--r--fs/Makefile3
-rw-r--r--fs/lockd/Makefile2
-rw-r--r--fs/lockd/clntlock.c13
-rw-r--r--fs/lockd/grace.c59
-rw-r--r--fs/lockd/host.c350
-rw-r--r--fs/lockd/mon.c2
-rw-r--r--fs/lockd/svc.c88
-rw-r--r--fs/lockd/svc4proc.c31
-rw-r--r--fs/lockd/svclock.c18
-rw-r--r--fs/lockd/svcproc.c31
-rw-r--r--fs/lockd/svcsubs.c2
-rw-r--r--fs/lockd/xdr.c2
-rw-r--r--fs/lockd/xdr4.c2
-rw-r--r--fs/nfs/callback.c3
-rw-r--r--fs/nfsd/lockd.c1
-rw-r--r--fs/nfsd/nfs3proc.c8
-rw-r--r--fs/nfsd/nfs4callback.c7
-rw-r--r--fs/nfsd/nfs4proc.c8
-rw-r--r--fs/nfsd/nfs4state.c34
-rw-r--r--fs/nfsd/nfs4xdr.c171
-rw-r--r--fs/nfsd/nfsctl.c5
-rw-r--r--fs/nfsd/nfsfh.c30
-rw-r--r--fs/nfsd/nfsproc.c6
-rw-r--r--fs/nfsd/nfssvc.c20
-rw-r--r--fs/nfsd/vfs.c63
-rw-r--r--fs/proc/proc_misc.c4
-rw-r--r--include/linux/fs.h65
-rw-r--r--include/linux/lockd/bind.h11
-rw-r--r--include/linux/lockd/lockd.h137
-rw-r--r--include/linux/lockd/xdr.h2
-rw-r--r--include/linux/nfsd/nfsd.h3
-rw-r--r--include/linux/sunrpc/clnt.h5
-rw-r--r--include/linux/sunrpc/svc.h19
-rw-r--r--include/linux/sunrpc/svc_rdma.h27
-rw-r--r--include/linux/sunrpc/svcsock.h5
-rw-r--r--kernel/sys_ni.c1
-rw-r--r--kernel/sysctl.c6
-rw-r--r--net/sunrpc/clnt.c2
-rw-r--r--net/sunrpc/rpcb_clnt.c81
-rw-r--r--net/sunrpc/svc.c251
-rw-r--r--net/sunrpc/svc_xprt.c39
-rw-r--r--net/sunrpc/svcsock.c17
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_recvfrom.c187
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_sendto.c255
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_transport.c364
46 files changed, 1842 insertions, 628 deletions
diff --git a/fs/Kconfig b/fs/Kconfig
index f54a157a0296..501f012e0c6f 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -433,6 +433,14 @@ config FS_POSIX_ACL
433 bool 433 bool
434 default n 434 default n
435 435
436config FILE_LOCKING
437 bool "Enable POSIX file locking API" if EMBEDDED
438 default y
439 help
440 This option enables standard file locking support, required
441 for filesystems like NFS and for the flock() system
442 call. Disabling this option saves about 11k.
443
436source "fs/xfs/Kconfig" 444source "fs/xfs/Kconfig"
437source "fs/gfs2/Kconfig" 445source "fs/gfs2/Kconfig"
438 446
@@ -1779,6 +1787,28 @@ config SUNRPC_XPRT_RDMA
1779 1787
1780 If unsure, say N. 1788 If unsure, say N.
1781 1789
1790config SUNRPC_REGISTER_V4
1791 bool "Register local RPC services via rpcbind v4 (EXPERIMENTAL)"
1792 depends on SUNRPC && EXPERIMENTAL
1793 default n
1794 help
1795 Sun added support for registering RPC services at an IPv6
1796 address by creating two new versions of the rpcbind protocol
1797 (RFC 1833).
1798
1799 This option enables support in the kernel RPC server for
1800 registering kernel RPC services via version 4 of the rpcbind
1801 protocol. If you enable this option, you must run a portmapper
1802 daemon that supports rpcbind protocol version 4.
1803
1804 Serving NFS over IPv6 from knfsd (the kernel's NFS server)
1805 requires that you enable this option and use a portmapper that
1806 supports rpcbind version 4.
1807
1808 If unsure, say N to get traditional behavior (register kernel
1809 RPC services using only rpcbind version 2). Distributions
1810 using the legacy Linux portmapper daemon must say N here.
1811
1782config RPCSEC_GSS_KRB5 1812config RPCSEC_GSS_KRB5
1783 tristate "Secure RPC: Kerberos V mechanism (EXPERIMENTAL)" 1813 tristate "Secure RPC: Kerberos V mechanism (EXPERIMENTAL)"
1784 depends on SUNRPC && EXPERIMENTAL 1814 depends on SUNRPC && EXPERIMENTAL
diff --git a/fs/Makefile b/fs/Makefile
index de404b00eb0c..b6f27dc26b72 100644
--- a/fs/Makefile
+++ b/fs/Makefile
@@ -7,7 +7,7 @@
7 7
8obj-y := open.o read_write.o file_table.o super.o \ 8obj-y := open.o read_write.o file_table.o super.o \
9 char_dev.o stat.o exec.o pipe.o namei.o fcntl.o \ 9 char_dev.o stat.o exec.o pipe.o namei.o fcntl.o \
10 ioctl.o readdir.o select.o fifo.o locks.o dcache.o inode.o \ 10 ioctl.o readdir.o select.o fifo.o dcache.o inode.o \
11 attr.o bad_inode.o file.o filesystems.o namespace.o aio.o \ 11 attr.o bad_inode.o file.o filesystems.o namespace.o aio.o \
12 seq_file.o xattr.o libfs.o fs-writeback.o \ 12 seq_file.o xattr.o libfs.o fs-writeback.o \
13 pnode.o drop_caches.o splice.o sync.o utimes.o \ 13 pnode.o drop_caches.o splice.o sync.o utimes.o \
@@ -27,6 +27,7 @@ obj-$(CONFIG_ANON_INODES) += anon_inodes.o
27obj-$(CONFIG_SIGNALFD) += signalfd.o 27obj-$(CONFIG_SIGNALFD) += signalfd.o
28obj-$(CONFIG_TIMERFD) += timerfd.o 28obj-$(CONFIG_TIMERFD) += timerfd.o
29obj-$(CONFIG_EVENTFD) += eventfd.o 29obj-$(CONFIG_EVENTFD) += eventfd.o
30obj-$(CONFIG_FILE_LOCKING) += locks.o
30obj-$(CONFIG_COMPAT) += compat.o compat_ioctl.o 31obj-$(CONFIG_COMPAT) += compat.o compat_ioctl.o
31 32
32nfsd-$(CONFIG_NFSD) := nfsctl.o 33nfsd-$(CONFIG_NFSD) := nfsctl.o
diff --git a/fs/lockd/Makefile b/fs/lockd/Makefile
index 7725a0a9a555..97f6073ab339 100644
--- a/fs/lockd/Makefile
+++ b/fs/lockd/Makefile
@@ -5,6 +5,6 @@
5obj-$(CONFIG_LOCKD) += lockd.o 5obj-$(CONFIG_LOCKD) += lockd.o
6 6
7lockd-objs-y := clntlock.o clntproc.o host.o svc.o svclock.o svcshare.o \ 7lockd-objs-y := clntlock.o clntproc.o host.o svc.o svclock.o svcshare.o \
8 svcproc.o svcsubs.o mon.o xdr.o 8 svcproc.o svcsubs.o mon.o xdr.o grace.o
9lockd-objs-$(CONFIG_LOCKD_V4) += xdr4.o svc4proc.o 9lockd-objs-$(CONFIG_LOCKD_V4) += xdr4.o svc4proc.o
10lockd-objs := $(lockd-objs-y) 10lockd-objs := $(lockd-objs-y)
diff --git a/fs/lockd/clntlock.c b/fs/lockd/clntlock.c
index 0b45fd3a4bfd..8307dd64bf46 100644
--- a/fs/lockd/clntlock.c
+++ b/fs/lockd/clntlock.c
@@ -54,14 +54,13 @@ struct nlm_host *nlmclnt_init(const struct nlmclnt_initdata *nlm_init)
54 u32 nlm_version = (nlm_init->nfs_version == 2) ? 1 : 4; 54 u32 nlm_version = (nlm_init->nfs_version == 2) ? 1 : 4;
55 int status; 55 int status;
56 56
57 status = lockd_up(nlm_init->protocol); 57 status = lockd_up();
58 if (status < 0) 58 if (status < 0)
59 return ERR_PTR(status); 59 return ERR_PTR(status);
60 60
61 host = nlmclnt_lookup_host((struct sockaddr_in *)nlm_init->address, 61 host = nlmclnt_lookup_host(nlm_init->address, nlm_init->addrlen,
62 nlm_init->protocol, nlm_version, 62 nlm_init->protocol, nlm_version,
63 nlm_init->hostname, 63 nlm_init->hostname);
64 strlen(nlm_init->hostname));
65 if (host == NULL) { 64 if (host == NULL) {
66 lockd_down(); 65 lockd_down();
67 return ERR_PTR(-ENOLCK); 66 return ERR_PTR(-ENOLCK);
@@ -142,7 +141,7 @@ int nlmclnt_block(struct nlm_wait *block, struct nlm_rqst *req, long timeout)
142/* 141/*
143 * The server lockd has called us back to tell us the lock was granted 142 * The server lockd has called us back to tell us the lock was granted
144 */ 143 */
145__be32 nlmclnt_grant(const struct sockaddr_in *addr, const struct nlm_lock *lock) 144__be32 nlmclnt_grant(const struct sockaddr *addr, const struct nlm_lock *lock)
146{ 145{
147 const struct file_lock *fl = &lock->fl; 146 const struct file_lock *fl = &lock->fl;
148 const struct nfs_fh *fh = &lock->fh; 147 const struct nfs_fh *fh = &lock->fh;
@@ -166,7 +165,7 @@ __be32 nlmclnt_grant(const struct sockaddr_in *addr, const struct nlm_lock *lock
166 */ 165 */
167 if (fl_blocked->fl_u.nfs_fl.owner->pid != lock->svid) 166 if (fl_blocked->fl_u.nfs_fl.owner->pid != lock->svid)
168 continue; 167 continue;
169 if (!nlm_cmp_addr(&block->b_host->h_addr, addr)) 168 if (!nlm_cmp_addr(nlm_addr(block->b_host), addr))
170 continue; 169 continue;
171 if (nfs_compare_fh(NFS_FH(fl_blocked->fl_file->f_path.dentry->d_inode) ,fh) != 0) 170 if (nfs_compare_fh(NFS_FH(fl_blocked->fl_file->f_path.dentry->d_inode) ,fh) != 0)
172 continue; 171 continue;
@@ -216,7 +215,7 @@ reclaimer(void *ptr)
216 /* This one ensures that our parent doesn't terminate while the 215 /* This one ensures that our parent doesn't terminate while the
217 * reclaim is in progress */ 216 * reclaim is in progress */
218 lock_kernel(); 217 lock_kernel();
219 lockd_up(0); /* note: this cannot fail as lockd is already running */ 218 lockd_up(); /* note: this cannot fail as lockd is already running */
220 219
221 dprintk("lockd: reclaiming locks for host %s\n", host->h_name); 220 dprintk("lockd: reclaiming locks for host %s\n", host->h_name);
222 221
diff --git a/fs/lockd/grace.c b/fs/lockd/grace.c
new file mode 100644
index 000000000000..183cc1f0af1c
--- /dev/null
+++ b/fs/lockd/grace.c
@@ -0,0 +1,59 @@
1/*
2 * Common code for control of lockd and nfsv4 grace periods.
3 */
4
5#include <linux/module.h>
6#include <linux/lockd/bind.h>
7
8static LIST_HEAD(grace_list);
9static DEFINE_SPINLOCK(grace_lock);
10
11/**
12 * locks_start_grace
13 * @lm: who this grace period is for
14 *
15 * A grace period is a period during which locks should not be given
16 * out. Currently grace periods are only enforced by the two lock
17 * managers (lockd and nfsd), using the locks_in_grace() function to
18 * check when they are in a grace period.
19 *
20 * This function is called to start a grace period.
21 */
22void locks_start_grace(struct lock_manager *lm)
23{
24 spin_lock(&grace_lock);
25 list_add(&lm->list, &grace_list);
26 spin_unlock(&grace_lock);
27}
28EXPORT_SYMBOL_GPL(locks_start_grace);
29
30/**
31 * locks_end_grace
32 * @lm: who this grace period is for
33 *
34 * Call this function to state that the given lock manager is ready to
35 * resume regular locking. The grace period will not end until all lock
36 * managers that called locks_start_grace() also call locks_end_grace().
37 * Note that callers count on it being safe to call this more than once,
38 * and the second call should be a no-op.
39 */
40void locks_end_grace(struct lock_manager *lm)
41{
42 spin_lock(&grace_lock);
43 list_del_init(&lm->list);
44 spin_unlock(&grace_lock);
45}
46EXPORT_SYMBOL_GPL(locks_end_grace);
47
48/**
49 * locks_in_grace
50 *
51 * Lock managers call this function to determine when it is OK for them
52 * to answer ordinary lock requests, and when they should accept only
53 * lock reclaims.
54 */
55int locks_in_grace(void)
56{
57 return !list_empty(&grace_list);
58}
59EXPORT_SYMBOL_GPL(locks_in_grace);
diff --git a/fs/lockd/host.c b/fs/lockd/host.c
index a17664c7eacc..9fd8889097b7 100644
--- a/fs/lockd/host.c
+++ b/fs/lockd/host.c
@@ -11,16 +11,17 @@
11#include <linux/types.h> 11#include <linux/types.h>
12#include <linux/slab.h> 12#include <linux/slab.h>
13#include <linux/in.h> 13#include <linux/in.h>
14#include <linux/in6.h>
14#include <linux/sunrpc/clnt.h> 15#include <linux/sunrpc/clnt.h>
15#include <linux/sunrpc/svc.h> 16#include <linux/sunrpc/svc.h>
16#include <linux/lockd/lockd.h> 17#include <linux/lockd/lockd.h>
17#include <linux/lockd/sm_inter.h> 18#include <linux/lockd/sm_inter.h>
18#include <linux/mutex.h> 19#include <linux/mutex.h>
19 20
21#include <net/ipv6.h>
20 22
21#define NLMDBG_FACILITY NLMDBG_HOSTCACHE 23#define NLMDBG_FACILITY NLMDBG_HOSTCACHE
22#define NLM_HOST_NRHASH 32 24#define NLM_HOST_NRHASH 32
23#define NLM_ADDRHASH(addr) (ntohl(addr) & (NLM_HOST_NRHASH-1))
24#define NLM_HOST_REBIND (60 * HZ) 25#define NLM_HOST_REBIND (60 * HZ)
25#define NLM_HOST_EXPIRE (300 * HZ) 26#define NLM_HOST_EXPIRE (300 * HZ)
26#define NLM_HOST_COLLECT (120 * HZ) 27#define NLM_HOST_COLLECT (120 * HZ)
@@ -30,42 +31,115 @@ static unsigned long next_gc;
30static int nrhosts; 31static int nrhosts;
31static DEFINE_MUTEX(nlm_host_mutex); 32static DEFINE_MUTEX(nlm_host_mutex);
32 33
33
34static void nlm_gc_hosts(void); 34static void nlm_gc_hosts(void);
35static struct nsm_handle * __nsm_find(const struct sockaddr_in *, 35static struct nsm_handle *nsm_find(const struct sockaddr *sap,
36 const char *, unsigned int, int); 36 const size_t salen,
37static struct nsm_handle * nsm_find(const struct sockaddr_in *sin, 37 const char *hostname,
38 const char *hostname, 38 const size_t hostname_len,
39 unsigned int hostname_len); 39 const int create);
40
41struct nlm_lookup_host_info {
42 const int server; /* search for server|client */
43 const struct sockaddr *sap; /* address to search for */
44 const size_t salen; /* it's length */
45 const unsigned short protocol; /* transport to search for*/
46 const u32 version; /* NLM version to search for */
47 const char *hostname; /* remote's hostname */
48 const size_t hostname_len; /* it's length */
49 const struct sockaddr *src_sap; /* our address (optional) */
50 const size_t src_len; /* it's length */
51};
52
53/*
54 * Hash function must work well on big- and little-endian platforms
55 */
56static unsigned int __nlm_hash32(const __be32 n)
57{
58 unsigned int hash = (__force u32)n ^ ((__force u32)n >> 16);
59 return hash ^ (hash >> 8);
60}
61
62static unsigned int __nlm_hash_addr4(const struct sockaddr *sap)
63{
64 const struct sockaddr_in *sin = (struct sockaddr_in *)sap;
65 return __nlm_hash32(sin->sin_addr.s_addr);
66}
67
68static unsigned int __nlm_hash_addr6(const struct sockaddr *sap)
69{
70 const struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)sap;
71 const struct in6_addr addr = sin6->sin6_addr;
72 return __nlm_hash32(addr.s6_addr32[0]) ^
73 __nlm_hash32(addr.s6_addr32[1]) ^
74 __nlm_hash32(addr.s6_addr32[2]) ^
75 __nlm_hash32(addr.s6_addr32[3]);
76}
77
78static unsigned int nlm_hash_address(const struct sockaddr *sap)
79{
80 unsigned int hash;
81
82 switch (sap->sa_family) {
83 case AF_INET:
84 hash = __nlm_hash_addr4(sap);
85 break;
86 case AF_INET6:
87 hash = __nlm_hash_addr6(sap);
88 break;
89 default:
90 hash = 0;
91 }
92 return hash & (NLM_HOST_NRHASH - 1);
93}
94
95static void nlm_clear_port(struct sockaddr *sap)
96{
97 switch (sap->sa_family) {
98 case AF_INET:
99 ((struct sockaddr_in *)sap)->sin_port = 0;
100 break;
101 case AF_INET6:
102 ((struct sockaddr_in6 *)sap)->sin6_port = 0;
103 break;
104 }
105}
106
107static void nlm_display_address(const struct sockaddr *sap,
108 char *buf, const size_t len)
109{
110 const struct sockaddr_in *sin = (struct sockaddr_in *)sap;
111 const struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)sap;
112
113 switch (sap->sa_family) {
114 case AF_UNSPEC:
115 snprintf(buf, len, "unspecified");
116 break;
117 case AF_INET:
118 snprintf(buf, len, NIPQUAD_FMT, NIPQUAD(sin->sin_addr.s_addr));
119 break;
120 case AF_INET6:
121 if (ipv6_addr_v4mapped(&sin6->sin6_addr))
122 snprintf(buf, len, NIPQUAD_FMT,
123 NIPQUAD(sin6->sin6_addr.s6_addr32[3]));
124 else
125 snprintf(buf, len, NIP6_FMT, NIP6(sin6->sin6_addr));
126 break;
127 default:
128 snprintf(buf, len, "unsupported address family");
129 break;
130 }
131}
40 132
41/* 133/*
42 * Common host lookup routine for server & client 134 * Common host lookup routine for server & client
43 */ 135 */
44static struct nlm_host *nlm_lookup_host(int server, 136static struct nlm_host *nlm_lookup_host(struct nlm_lookup_host_info *ni)
45 const struct sockaddr_in *sin,
46 int proto, u32 version,
47 const char *hostname,
48 unsigned int hostname_len,
49 const struct sockaddr_in *ssin)
50{ 137{
51 struct hlist_head *chain; 138 struct hlist_head *chain;
52 struct hlist_node *pos; 139 struct hlist_node *pos;
53 struct nlm_host *host; 140 struct nlm_host *host;
54 struct nsm_handle *nsm = NULL; 141 struct nsm_handle *nsm = NULL;
55 int hash;
56
57 dprintk("lockd: nlm_lookup_host("NIPQUAD_FMT"->"NIPQUAD_FMT
58 ", p=%d, v=%u, my role=%s, name=%.*s)\n",
59 NIPQUAD(ssin->sin_addr.s_addr),
60 NIPQUAD(sin->sin_addr.s_addr), proto, version,
61 server? "server" : "client",
62 hostname_len,
63 hostname? hostname : "<none>");
64 142
65
66 hash = NLM_ADDRHASH(sin->sin_addr.s_addr);
67
68 /* Lock hash table */
69 mutex_lock(&nlm_host_mutex); 143 mutex_lock(&nlm_host_mutex);
70 144
71 if (time_after_eq(jiffies, next_gc)) 145 if (time_after_eq(jiffies, next_gc))
@@ -78,22 +152,22 @@ static struct nlm_host *nlm_lookup_host(int server,
78 * different NLM rpc_clients into one single nlm_host object. 152 * different NLM rpc_clients into one single nlm_host object.
79 * This would allow us to have one nlm_host per address. 153 * This would allow us to have one nlm_host per address.
80 */ 154 */
81 chain = &nlm_hosts[hash]; 155 chain = &nlm_hosts[nlm_hash_address(ni->sap)];
82 hlist_for_each_entry(host, pos, chain, h_hash) { 156 hlist_for_each_entry(host, pos, chain, h_hash) {
83 if (!nlm_cmp_addr(&host->h_addr, sin)) 157 if (!nlm_cmp_addr(nlm_addr(host), ni->sap))
84 continue; 158 continue;
85 159
86 /* See if we have an NSM handle for this client */ 160 /* See if we have an NSM handle for this client */
87 if (!nsm) 161 if (!nsm)
88 nsm = host->h_nsmhandle; 162 nsm = host->h_nsmhandle;
89 163
90 if (host->h_proto != proto) 164 if (host->h_proto != ni->protocol)
91 continue; 165 continue;
92 if (host->h_version != version) 166 if (host->h_version != ni->version)
93 continue; 167 continue;
94 if (host->h_server != server) 168 if (host->h_server != ni->server)
95 continue; 169 continue;
96 if (!nlm_cmp_addr(&host->h_saddr, ssin)) 170 if (!nlm_cmp_addr(nlm_srcaddr(host), ni->src_sap))
97 continue; 171 continue;
98 172
99 /* Move to head of hash chain. */ 173 /* Move to head of hash chain. */
@@ -101,30 +175,41 @@ static struct nlm_host *nlm_lookup_host(int server,
101 hlist_add_head(&host->h_hash, chain); 175 hlist_add_head(&host->h_hash, chain);
102 176
103 nlm_get_host(host); 177 nlm_get_host(host);
178 dprintk("lockd: nlm_lookup_host found host %s (%s)\n",
179 host->h_name, host->h_addrbuf);
104 goto out; 180 goto out;
105 } 181 }
106 if (nsm)
107 atomic_inc(&nsm->sm_count);
108
109 host = NULL;
110 182
111 /* Sadly, the host isn't in our hash table yet. See if 183 /*
112 * we have an NSM handle for it. If not, create one. 184 * The host wasn't in our hash table. If we don't
185 * have an NSM handle for it yet, create one.
113 */ 186 */
114 if (!nsm && !(nsm = nsm_find(sin, hostname, hostname_len))) 187 if (nsm)
115 goto out; 188 atomic_inc(&nsm->sm_count);
189 else {
190 host = NULL;
191 nsm = nsm_find(ni->sap, ni->salen,
192 ni->hostname, ni->hostname_len, 1);
193 if (!nsm) {
194 dprintk("lockd: nlm_lookup_host failed; "
195 "no nsm handle\n");
196 goto out;
197 }
198 }
116 199
117 host = kzalloc(sizeof(*host), GFP_KERNEL); 200 host = kzalloc(sizeof(*host), GFP_KERNEL);
118 if (!host) { 201 if (!host) {
119 nsm_release(nsm); 202 nsm_release(nsm);
203 dprintk("lockd: nlm_lookup_host failed; no memory\n");
120 goto out; 204 goto out;
121 } 205 }
122 host->h_name = nsm->sm_name; 206 host->h_name = nsm->sm_name;
123 host->h_addr = *sin; 207 memcpy(nlm_addr(host), ni->sap, ni->salen);
124 host->h_addr.sin_port = 0; /* ouch! */ 208 host->h_addrlen = ni->salen;
125 host->h_saddr = *ssin; 209 nlm_clear_port(nlm_addr(host));
126 host->h_version = version; 210 memcpy(nlm_srcaddr(host), ni->src_sap, ni->src_len);
127 host->h_proto = proto; 211 host->h_version = ni->version;
212 host->h_proto = ni->protocol;
128 host->h_rpcclnt = NULL; 213 host->h_rpcclnt = NULL;
129 mutex_init(&host->h_mutex); 214 mutex_init(&host->h_mutex);
130 host->h_nextrebind = jiffies + NLM_HOST_REBIND; 215 host->h_nextrebind = jiffies + NLM_HOST_REBIND;
@@ -135,7 +220,7 @@ static struct nlm_host *nlm_lookup_host(int server,
135 host->h_state = 0; /* pseudo NSM state */ 220 host->h_state = 0; /* pseudo NSM state */
136 host->h_nsmstate = 0; /* real NSM state */ 221 host->h_nsmstate = 0; /* real NSM state */
137 host->h_nsmhandle = nsm; 222 host->h_nsmhandle = nsm;
138 host->h_server = server; 223 host->h_server = ni->server;
139 hlist_add_head(&host->h_hash, chain); 224 hlist_add_head(&host->h_hash, chain);
140 INIT_LIST_HEAD(&host->h_lockowners); 225 INIT_LIST_HEAD(&host->h_lockowners);
141 spin_lock_init(&host->h_lock); 226 spin_lock_init(&host->h_lock);
@@ -143,6 +228,15 @@ static struct nlm_host *nlm_lookup_host(int server,
143 INIT_LIST_HEAD(&host->h_reclaim); 228 INIT_LIST_HEAD(&host->h_reclaim);
144 229
145 nrhosts++; 230 nrhosts++;
231
232 nlm_display_address((struct sockaddr *)&host->h_addr,
233 host->h_addrbuf, sizeof(host->h_addrbuf));
234 nlm_display_address((struct sockaddr *)&host->h_srcaddr,
235 host->h_srcaddrbuf, sizeof(host->h_srcaddrbuf));
236
237 dprintk("lockd: nlm_lookup_host created host %s\n",
238 host->h_name);
239
146out: 240out:
147 mutex_unlock(&nlm_host_mutex); 241 mutex_unlock(&nlm_host_mutex);
148 return host; 242 return host;
@@ -170,33 +264,103 @@ nlm_destroy_host(struct nlm_host *host)
170 kfree(host); 264 kfree(host);
171} 265}
172 266
173/* 267/**
174 * Find an NLM server handle in the cache. If there is none, create it. 268 * nlmclnt_lookup_host - Find an NLM host handle matching a remote server
269 * @sap: network address of server
270 * @salen: length of server address
271 * @protocol: transport protocol to use
272 * @version: NLM protocol version
273 * @hostname: '\0'-terminated hostname of server
274 *
275 * Returns an nlm_host structure that matches the passed-in
276 * [server address, transport protocol, NLM version, server hostname].
277 * If one doesn't already exist in the host cache, a new handle is
278 * created and returned.
175 */ 279 */
176struct nlm_host *nlmclnt_lookup_host(const struct sockaddr_in *sin, 280struct nlm_host *nlmclnt_lookup_host(const struct sockaddr *sap,
177 int proto, u32 version, 281 const size_t salen,
178 const char *hostname, 282 const unsigned short protocol,
179 unsigned int hostname_len) 283 const u32 version, const char *hostname)
180{ 284{
181 struct sockaddr_in ssin = {0}; 285 const struct sockaddr source = {
182 286 .sa_family = AF_UNSPEC,
183 return nlm_lookup_host(0, sin, proto, version, 287 };
184 hostname, hostname_len, &ssin); 288 struct nlm_lookup_host_info ni = {
289 .server = 0,
290 .sap = sap,
291 .salen = salen,
292 .protocol = protocol,
293 .version = version,
294 .hostname = hostname,
295 .hostname_len = strlen(hostname),
296 .src_sap = &source,
297 .src_len = sizeof(source),
298 };
299
300 dprintk("lockd: %s(host='%s', vers=%u, proto=%s)\n", __func__,
301 (hostname ? hostname : "<none>"), version,
302 (protocol == IPPROTO_UDP ? "udp" : "tcp"));
303
304 return nlm_lookup_host(&ni);
185} 305}
186 306
187/* 307/**
188 * Find an NLM client handle in the cache. If there is none, create it. 308 * nlmsvc_lookup_host - Find an NLM host handle matching a remote client
309 * @rqstp: incoming NLM request
310 * @hostname: name of client host
311 * @hostname_len: length of client hostname
312 *
313 * Returns an nlm_host structure that matches the [client address,
314 * transport protocol, NLM version, client hostname] of the passed-in
315 * NLM request. If one doesn't already exist in the host cache, a
316 * new handle is created and returned.
317 *
318 * Before possibly creating a new nlm_host, construct a sockaddr
319 * for a specific source address in case the local system has
320 * multiple network addresses. The family of the address in
321 * rq_daddr is guaranteed to be the same as the family of the
322 * address in rq_addr, so it's safe to use the same family for
323 * the source address.
189 */ 324 */
190struct nlm_host * 325struct nlm_host *nlmsvc_lookup_host(const struct svc_rqst *rqstp,
191nlmsvc_lookup_host(struct svc_rqst *rqstp, 326 const char *hostname,
192 const char *hostname, unsigned int hostname_len) 327 const size_t hostname_len)
193{ 328{
194 struct sockaddr_in ssin = {0}; 329 struct sockaddr_in sin = {
330 .sin_family = AF_INET,
331 };
332 struct sockaddr_in6 sin6 = {
333 .sin6_family = AF_INET6,
334 };
335 struct nlm_lookup_host_info ni = {
336 .server = 1,
337 .sap = svc_addr(rqstp),
338 .salen = rqstp->rq_addrlen,
339 .protocol = rqstp->rq_prot,
340 .version = rqstp->rq_vers,
341 .hostname = hostname,
342 .hostname_len = hostname_len,
343 .src_len = rqstp->rq_addrlen,
344 };
345
346 dprintk("lockd: %s(host='%*s', vers=%u, proto=%s)\n", __func__,
347 (int)hostname_len, hostname, rqstp->rq_vers,
348 (rqstp->rq_prot == IPPROTO_UDP ? "udp" : "tcp"));
349
350 switch (ni.sap->sa_family) {
351 case AF_INET:
352 sin.sin_addr.s_addr = rqstp->rq_daddr.addr.s_addr;
353 ni.src_sap = (struct sockaddr *)&sin;
354 break;
355 case AF_INET6:
356 ipv6_addr_copy(&sin6.sin6_addr, &rqstp->rq_daddr.addr6);
357 ni.src_sap = (struct sockaddr *)&sin6;
358 break;
359 default:
360 return NULL;
361 }
195 362
196 ssin.sin_addr = rqstp->rq_daddr.addr; 363 return nlm_lookup_host(&ni);
197 return nlm_lookup_host(1, svc_addr_in(rqstp),
198 rqstp->rq_prot, rqstp->rq_vers,
199 hostname, hostname_len, &ssin);
200} 364}
201 365
202/* 366/*
@@ -207,9 +371,8 @@ nlm_bind_host(struct nlm_host *host)
207{ 371{
208 struct rpc_clnt *clnt; 372 struct rpc_clnt *clnt;
209 373
210 dprintk("lockd: nlm_bind_host("NIPQUAD_FMT"->"NIPQUAD_FMT")\n", 374 dprintk("lockd: nlm_bind_host %s (%s), my addr=%s\n",
211 NIPQUAD(host->h_saddr.sin_addr), 375 host->h_name, host->h_addrbuf, host->h_srcaddrbuf);
212 NIPQUAD(host->h_addr.sin_addr));
213 376
214 /* Lock host handle */ 377 /* Lock host handle */
215 mutex_lock(&host->h_mutex); 378 mutex_lock(&host->h_mutex);
@@ -221,7 +384,7 @@ nlm_bind_host(struct nlm_host *host)
221 if (time_after_eq(jiffies, host->h_nextrebind)) { 384 if (time_after_eq(jiffies, host->h_nextrebind)) {
222 rpc_force_rebind(clnt); 385 rpc_force_rebind(clnt);
223 host->h_nextrebind = jiffies + NLM_HOST_REBIND; 386 host->h_nextrebind = jiffies + NLM_HOST_REBIND;
224 dprintk("lockd: next rebind in %ld jiffies\n", 387 dprintk("lockd: next rebind in %lu jiffies\n",
225 host->h_nextrebind - jiffies); 388 host->h_nextrebind - jiffies);
226 } 389 }
227 } else { 390 } else {
@@ -234,9 +397,9 @@ nlm_bind_host(struct nlm_host *host)
234 }; 397 };
235 struct rpc_create_args args = { 398 struct rpc_create_args args = {
236 .protocol = host->h_proto, 399 .protocol = host->h_proto,
237 .address = (struct sockaddr *)&host->h_addr, 400 .address = nlm_addr(host),
238 .addrsize = sizeof(host->h_addr), 401 .addrsize = host->h_addrlen,
239 .saddress = (struct sockaddr *)&host->h_saddr, 402 .saddress = nlm_srcaddr(host),
240 .timeout = &timeparms, 403 .timeout = &timeparms,
241 .servername = host->h_name, 404 .servername = host->h_name,
242 .program = &nlm_program, 405 .program = &nlm_program,
@@ -324,12 +487,16 @@ void nlm_host_rebooted(const struct sockaddr_in *sin,
324 struct nsm_handle *nsm; 487 struct nsm_handle *nsm;
325 struct nlm_host *host; 488 struct nlm_host *host;
326 489
327 dprintk("lockd: nlm_host_rebooted(%s, %u.%u.%u.%u)\n", 490 nsm = nsm_find((struct sockaddr *)sin, sizeof(*sin),
328 hostname, NIPQUAD(sin->sin_addr)); 491 hostname, hostname_len, 0);
329 492 if (nsm == NULL) {
330 /* Find the NSM handle for this peer */ 493 dprintk("lockd: never saw rebooted peer '%.*s' before\n",
331 if (!(nsm = __nsm_find(sin, hostname, hostname_len, 0))) 494 hostname_len, hostname);
332 return; 495 return;
496 }
497
498 dprintk("lockd: nlm_host_rebooted(%.*s, %s)\n",
499 hostname_len, hostname, nsm->sm_addrbuf);
333 500
334 /* When reclaiming locks on this peer, make sure that 501 /* When reclaiming locks on this peer, make sure that
335 * we set up a new notification */ 502 * we set up a new notification */
@@ -461,22 +628,23 @@ nlm_gc_hosts(void)
461static LIST_HEAD(nsm_handles); 628static LIST_HEAD(nsm_handles);
462static DEFINE_SPINLOCK(nsm_lock); 629static DEFINE_SPINLOCK(nsm_lock);
463 630
464static struct nsm_handle * 631static struct nsm_handle *nsm_find(const struct sockaddr *sap,
465__nsm_find(const struct sockaddr_in *sin, 632 const size_t salen,
466 const char *hostname, unsigned int hostname_len, 633 const char *hostname,
467 int create) 634 const size_t hostname_len,
635 const int create)
468{ 636{
469 struct nsm_handle *nsm = NULL; 637 struct nsm_handle *nsm = NULL;
470 struct nsm_handle *pos; 638 struct nsm_handle *pos;
471 639
472 if (!sin) 640 if (!sap)
473 return NULL; 641 return NULL;
474 642
475 if (hostname && memchr(hostname, '/', hostname_len) != NULL) { 643 if (hostname && memchr(hostname, '/', hostname_len) != NULL) {
476 if (printk_ratelimit()) { 644 if (printk_ratelimit()) {
477 printk(KERN_WARNING "Invalid hostname \"%.*s\" " 645 printk(KERN_WARNING "Invalid hostname \"%.*s\" "
478 "in NFS lock request\n", 646 "in NFS lock request\n",
479 hostname_len, hostname); 647 (int)hostname_len, hostname);
480 } 648 }
481 return NULL; 649 return NULL;
482 } 650 }
@@ -489,7 +657,7 @@ retry:
489 if (strlen(pos->sm_name) != hostname_len 657 if (strlen(pos->sm_name) != hostname_len
490 || memcmp(pos->sm_name, hostname, hostname_len)) 658 || memcmp(pos->sm_name, hostname, hostname_len))
491 continue; 659 continue;
492 } else if (!nlm_cmp_addr(&pos->sm_addr, sin)) 660 } else if (!nlm_cmp_addr(nsm_addr(pos), sap))
493 continue; 661 continue;
494 atomic_inc(&pos->sm_count); 662 atomic_inc(&pos->sm_count);
495 kfree(nsm); 663 kfree(nsm);
@@ -509,10 +677,13 @@ retry:
509 if (nsm == NULL) 677 if (nsm == NULL)
510 return NULL; 678 return NULL;
511 679
512 nsm->sm_addr = *sin; 680 memcpy(nsm_addr(nsm), sap, salen);
681 nsm->sm_addrlen = salen;
513 nsm->sm_name = (char *) (nsm + 1); 682 nsm->sm_name = (char *) (nsm + 1);
514 memcpy(nsm->sm_name, hostname, hostname_len); 683 memcpy(nsm->sm_name, hostname, hostname_len);
515 nsm->sm_name[hostname_len] = '\0'; 684 nsm->sm_name[hostname_len] = '\0';
685 nlm_display_address((struct sockaddr *)&nsm->sm_addr,
686 nsm->sm_addrbuf, sizeof(nsm->sm_addrbuf));
516 atomic_set(&nsm->sm_count, 1); 687 atomic_set(&nsm->sm_count, 1);
517 goto retry; 688 goto retry;
518 689
@@ -521,13 +692,6 @@ found:
521 return nsm; 692 return nsm;
522} 693}
523 694
524static struct nsm_handle *
525nsm_find(const struct sockaddr_in *sin, const char *hostname,
526 unsigned int hostname_len)
527{
528 return __nsm_find(sin, hostname, hostname_len, 1);
529}
530
531/* 695/*
532 * Release an NSM handle 696 * Release an NSM handle
533 */ 697 */
diff --git a/fs/lockd/mon.c b/fs/lockd/mon.c
index e4d563543b11..4e7e958e8f67 100644
--- a/fs/lockd/mon.c
+++ b/fs/lockd/mon.c
@@ -51,7 +51,7 @@ nsm_mon_unmon(struct nsm_handle *nsm, u32 proc, struct nsm_res *res)
51 51
52 memset(&args, 0, sizeof(args)); 52 memset(&args, 0, sizeof(args));
53 args.mon_name = nsm->sm_name; 53 args.mon_name = nsm->sm_name;
54 args.addr = nsm->sm_addr.sin_addr.s_addr; 54 args.addr = nsm_addr_in(nsm)->sin_addr.s_addr;
55 args.prog = NLM_PROGRAM; 55 args.prog = NLM_PROGRAM;
56 args.vers = 3; 56 args.vers = 3;
57 args.proc = NLMPROC_NSM_NOTIFY; 57 args.proc = NLMPROC_NSM_NOTIFY;
diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c
index 5bd9bf0fa9df..c631a83931ce 100644
--- a/fs/lockd/svc.c
+++ b/fs/lockd/svc.c
@@ -51,7 +51,6 @@ static DEFINE_MUTEX(nlmsvc_mutex);
51static unsigned int nlmsvc_users; 51static unsigned int nlmsvc_users;
52static struct task_struct *nlmsvc_task; 52static struct task_struct *nlmsvc_task;
53static struct svc_rqst *nlmsvc_rqst; 53static struct svc_rqst *nlmsvc_rqst;
54int nlmsvc_grace_period;
55unsigned long nlmsvc_timeout; 54unsigned long nlmsvc_timeout;
56 55
57/* 56/*
@@ -85,27 +84,23 @@ static unsigned long get_lockd_grace_period(void)
85 return nlm_timeout * 5 * HZ; 84 return nlm_timeout * 5 * HZ;
86} 85}
87 86
88unsigned long get_nfs_grace_period(void) 87static struct lock_manager lockd_manager = {
89{ 88};
90 unsigned long lockdgrace = get_lockd_grace_period();
91 unsigned long nfsdgrace = 0;
92
93 if (nlmsvc_ops)
94 nfsdgrace = nlmsvc_ops->get_grace_period();
95
96 return max(lockdgrace, nfsdgrace);
97}
98EXPORT_SYMBOL(get_nfs_grace_period);
99 89
100static unsigned long set_grace_period(void) 90static void grace_ender(struct work_struct *not_used)
101{ 91{
102 nlmsvc_grace_period = 1; 92 locks_end_grace(&lockd_manager);
103 return get_nfs_grace_period() + jiffies;
104} 93}
105 94
106static inline void clear_grace_period(void) 95static DECLARE_DELAYED_WORK(grace_period_end, grace_ender);
96
97static void set_grace_period(void)
107{ 98{
108 nlmsvc_grace_period = 0; 99 unsigned long grace_period = get_lockd_grace_period();
100
101 locks_start_grace(&lockd_manager);
102 cancel_delayed_work_sync(&grace_period_end);
103 schedule_delayed_work(&grace_period_end, grace_period);
109} 104}
110 105
111/* 106/*
@@ -116,7 +111,6 @@ lockd(void *vrqstp)
116{ 111{
117 int err = 0, preverr = 0; 112 int err = 0, preverr = 0;
118 struct svc_rqst *rqstp = vrqstp; 113 struct svc_rqst *rqstp = vrqstp;
119 unsigned long grace_period_expire;
120 114
121 /* try_to_freeze() is called from svc_recv() */ 115 /* try_to_freeze() is called from svc_recv() */
122 set_freezable(); 116 set_freezable();
@@ -139,7 +133,7 @@ lockd(void *vrqstp)
139 nlm_timeout = LOCKD_DFLT_TIMEO; 133 nlm_timeout = LOCKD_DFLT_TIMEO;
140 nlmsvc_timeout = nlm_timeout * HZ; 134 nlmsvc_timeout = nlm_timeout * HZ;
141 135
142 grace_period_expire = set_grace_period(); 136 set_grace_period();
143 137
144 /* 138 /*
145 * The main request loop. We don't terminate until the last 139 * The main request loop. We don't terminate until the last
@@ -153,21 +147,12 @@ lockd(void *vrqstp)
153 flush_signals(current); 147 flush_signals(current);
154 if (nlmsvc_ops) { 148 if (nlmsvc_ops) {
155 nlmsvc_invalidate_all(); 149 nlmsvc_invalidate_all();
156 grace_period_expire = set_grace_period(); 150 set_grace_period();
157 } 151 }
158 continue; 152 continue;
159 } 153 }
160 154
161 /* 155 timeout = nlmsvc_retry_blocked();
162 * Retry any blocked locks that have been notified by
163 * the VFS. Don't do this during grace period.
164 * (Theoretically, there shouldn't even be blocked locks
165 * during grace period).
166 */
167 if (!nlmsvc_grace_period) {
168 timeout = nlmsvc_retry_blocked();
169 } else if (time_before(grace_period_expire, jiffies))
170 clear_grace_period();
171 156
172 /* 157 /*
173 * Find a socket with data available and call its 158 * Find a socket with data available and call its
@@ -195,6 +180,7 @@ lockd(void *vrqstp)
195 svc_process(rqstp); 180 svc_process(rqstp);
196 } 181 }
197 flush_signals(current); 182 flush_signals(current);
183 cancel_delayed_work_sync(&grace_period_end);
198 if (nlmsvc_ops) 184 if (nlmsvc_ops)
199 nlmsvc_invalidate_all(); 185 nlmsvc_invalidate_all();
200 nlm_shutdown_hosts(); 186 nlm_shutdown_hosts();
@@ -203,25 +189,28 @@ lockd(void *vrqstp)
203} 189}
204 190
205/* 191/*
206 * Make any sockets that are needed but not present. 192 * Ensure there are active UDP and TCP listeners for lockd.
207 * If nlm_udpport or nlm_tcpport were set as module 193 *
208 * options, make those sockets unconditionally 194 * Even if we have only TCP NFS mounts and/or TCP NFSDs, some
195 * local services (such as rpc.statd) still require UDP, and
196 * some NFS servers do not yet support NLM over TCP.
197 *
198 * Returns zero if all listeners are available; otherwise a
199 * negative errno value is returned.
209 */ 200 */
210static int make_socks(struct svc_serv *serv, int proto) 201static int make_socks(struct svc_serv *serv)
211{ 202{
212 static int warned; 203 static int warned;
213 struct svc_xprt *xprt; 204 struct svc_xprt *xprt;
214 int err = 0; 205 int err = 0;
215 206
216 if (proto == IPPROTO_UDP || nlm_udpport) { 207 xprt = svc_find_xprt(serv, "udp", 0, 0);
217 xprt = svc_find_xprt(serv, "udp", 0, 0); 208 if (!xprt)
218 if (!xprt) 209 err = svc_create_xprt(serv, "udp", nlm_udpport,
219 err = svc_create_xprt(serv, "udp", nlm_udpport, 210 SVC_SOCK_DEFAULTS);
220 SVC_SOCK_DEFAULTS); 211 else
221 else 212 svc_xprt_put(xprt);
222 svc_xprt_put(xprt); 213 if (err >= 0) {
223 }
224 if (err >= 0 && (proto == IPPROTO_TCP || nlm_tcpport)) {
225 xprt = svc_find_xprt(serv, "tcp", 0, 0); 214 xprt = svc_find_xprt(serv, "tcp", 0, 0);
226 if (!xprt) 215 if (!xprt)
227 err = svc_create_xprt(serv, "tcp", nlm_tcpport, 216 err = svc_create_xprt(serv, "tcp", nlm_tcpport,
@@ -241,8 +230,7 @@ static int make_socks(struct svc_serv *serv, int proto)
241/* 230/*
242 * Bring up the lockd process if it's not already up. 231 * Bring up the lockd process if it's not already up.
243 */ 232 */
244int 233int lockd_up(void)
245lockd_up(int proto) /* Maybe add a 'family' option when IPv6 is supported ?? */
246{ 234{
247 struct svc_serv *serv; 235 struct svc_serv *serv;
248 int error = 0; 236 int error = 0;
@@ -251,11 +239,8 @@ lockd_up(int proto) /* Maybe add a 'family' option when IPv6 is supported ?? */
251 /* 239 /*
252 * Check whether we're already up and running. 240 * Check whether we're already up and running.
253 */ 241 */
254 if (nlmsvc_rqst) { 242 if (nlmsvc_rqst)
255 if (proto)
256 error = make_socks(nlmsvc_rqst->rq_server, proto);
257 goto out; 243 goto out;
258 }
259 244
260 /* 245 /*
261 * Sanity check: if there's no pid, 246 * Sanity check: if there's no pid,
@@ -266,13 +251,14 @@ lockd_up(int proto) /* Maybe add a 'family' option when IPv6 is supported ?? */
266 "lockd_up: no pid, %d users??\n", nlmsvc_users); 251 "lockd_up: no pid, %d users??\n", nlmsvc_users);
267 252
268 error = -ENOMEM; 253 error = -ENOMEM;
269 serv = svc_create(&nlmsvc_program, LOCKD_BUFSIZE, NULL); 254 serv = svc_create(&nlmsvc_program, LOCKD_BUFSIZE, AF_INET, NULL);
270 if (!serv) { 255 if (!serv) {
271 printk(KERN_WARNING "lockd_up: create service failed\n"); 256 printk(KERN_WARNING "lockd_up: create service failed\n");
272 goto out; 257 goto out;
273 } 258 }
274 259
275 if ((error = make_socks(serv, proto)) < 0) 260 error = make_socks(serv);
261 if (error < 0)
276 goto destroy_and_out; 262 goto destroy_and_out;
277 263
278 /* 264 /*
diff --git a/fs/lockd/svc4proc.c b/fs/lockd/svc4proc.c
index 4a714f64515b..014f6ce48172 100644
--- a/fs/lockd/svc4proc.c
+++ b/fs/lockd/svc4proc.c
@@ -88,12 +88,6 @@ nlm4svc_proc_test(struct svc_rqst *rqstp, struct nlm_args *argp,
88 dprintk("lockd: TEST4 called\n"); 88 dprintk("lockd: TEST4 called\n");
89 resp->cookie = argp->cookie; 89 resp->cookie = argp->cookie;
90 90
91 /* Don't accept test requests during grace period */
92 if (nlmsvc_grace_period) {
93 resp->status = nlm_lck_denied_grace_period;
94 return rc;
95 }
96
97 /* Obtain client and file */ 91 /* Obtain client and file */
98 if ((resp->status = nlm4svc_retrieve_args(rqstp, argp, &host, &file))) 92 if ((resp->status = nlm4svc_retrieve_args(rqstp, argp, &host, &file)))
99 return resp->status == nlm_drop_reply ? rpc_drop_reply :rpc_success; 93 return resp->status == nlm_drop_reply ? rpc_drop_reply :rpc_success;
@@ -122,12 +116,6 @@ nlm4svc_proc_lock(struct svc_rqst *rqstp, struct nlm_args *argp,
122 116
123 resp->cookie = argp->cookie; 117 resp->cookie = argp->cookie;
124 118
125 /* Don't accept new lock requests during grace period */
126 if (nlmsvc_grace_period && !argp->reclaim) {
127 resp->status = nlm_lck_denied_grace_period;
128 return rc;
129 }
130
131 /* Obtain client and file */ 119 /* Obtain client and file */
132 if ((resp->status = nlm4svc_retrieve_args(rqstp, argp, &host, &file))) 120 if ((resp->status = nlm4svc_retrieve_args(rqstp, argp, &host, &file)))
133 return resp->status == nlm_drop_reply ? rpc_drop_reply :rpc_success; 121 return resp->status == nlm_drop_reply ? rpc_drop_reply :rpc_success;
@@ -146,7 +134,8 @@ nlm4svc_proc_lock(struct svc_rqst *rqstp, struct nlm_args *argp,
146 134
147 /* Now try to lock the file */ 135 /* Now try to lock the file */
148 resp->status = nlmsvc_lock(rqstp, file, host, &argp->lock, 136 resp->status = nlmsvc_lock(rqstp, file, host, &argp->lock,
149 argp->block, &argp->cookie); 137 argp->block, &argp->cookie,
138 argp->reclaim);
150 if (resp->status == nlm_drop_reply) 139 if (resp->status == nlm_drop_reply)
151 rc = rpc_drop_reply; 140 rc = rpc_drop_reply;
152 else 141 else
@@ -169,7 +158,7 @@ nlm4svc_proc_cancel(struct svc_rqst *rqstp, struct nlm_args *argp,
169 resp->cookie = argp->cookie; 158 resp->cookie = argp->cookie;
170 159
171 /* Don't accept requests during grace period */ 160 /* Don't accept requests during grace period */
172 if (nlmsvc_grace_period) { 161 if (locks_in_grace()) {
173 resp->status = nlm_lck_denied_grace_period; 162 resp->status = nlm_lck_denied_grace_period;
174 return rpc_success; 163 return rpc_success;
175 } 164 }
@@ -202,7 +191,7 @@ nlm4svc_proc_unlock(struct svc_rqst *rqstp, struct nlm_args *argp,
202 resp->cookie = argp->cookie; 191 resp->cookie = argp->cookie;
203 192
204 /* Don't accept new lock requests during grace period */ 193 /* Don't accept new lock requests during grace period */
205 if (nlmsvc_grace_period) { 194 if (locks_in_grace()) {
206 resp->status = nlm_lck_denied_grace_period; 195 resp->status = nlm_lck_denied_grace_period;
207 return rpc_success; 196 return rpc_success;
208 } 197 }
@@ -231,7 +220,7 @@ nlm4svc_proc_granted(struct svc_rqst *rqstp, struct nlm_args *argp,
231 resp->cookie = argp->cookie; 220 resp->cookie = argp->cookie;
232 221
233 dprintk("lockd: GRANTED called\n"); 222 dprintk("lockd: GRANTED called\n");
234 resp->status = nlmclnt_grant(svc_addr_in(rqstp), &argp->lock); 223 resp->status = nlmclnt_grant(svc_addr(rqstp), &argp->lock);
235 dprintk("lockd: GRANTED status %d\n", ntohl(resp->status)); 224 dprintk("lockd: GRANTED status %d\n", ntohl(resp->status));
236 return rpc_success; 225 return rpc_success;
237} 226}
@@ -341,7 +330,7 @@ nlm4svc_proc_share(struct svc_rqst *rqstp, struct nlm_args *argp,
341 resp->cookie = argp->cookie; 330 resp->cookie = argp->cookie;
342 331
343 /* Don't accept new lock requests during grace period */ 332 /* Don't accept new lock requests during grace period */
344 if (nlmsvc_grace_period && !argp->reclaim) { 333 if (locks_in_grace() && !argp->reclaim) {
345 resp->status = nlm_lck_denied_grace_period; 334 resp->status = nlm_lck_denied_grace_period;
346 return rpc_success; 335 return rpc_success;
347 } 336 }
@@ -374,7 +363,7 @@ nlm4svc_proc_unshare(struct svc_rqst *rqstp, struct nlm_args *argp,
374 resp->cookie = argp->cookie; 363 resp->cookie = argp->cookie;
375 364
376 /* Don't accept requests during grace period */ 365 /* Don't accept requests during grace period */
377 if (nlmsvc_grace_period) { 366 if (locks_in_grace()) {
378 resp->status = nlm_lck_denied_grace_period; 367 resp->status = nlm_lck_denied_grace_period;
379 return rpc_success; 368 return rpc_success;
380 } 369 }
@@ -432,11 +421,9 @@ nlm4svc_proc_sm_notify(struct svc_rqst *rqstp, struct nlm_reboot *argp,
432{ 421{
433 struct sockaddr_in saddr; 422 struct sockaddr_in saddr;
434 423
435 memcpy(&saddr, svc_addr_in(rqstp), sizeof(saddr));
436
437 dprintk("lockd: SM_NOTIFY called\n"); 424 dprintk("lockd: SM_NOTIFY called\n");
438 if (saddr.sin_addr.s_addr != htonl(INADDR_LOOPBACK) 425
439 || ntohs(saddr.sin_port) >= 1024) { 426 if (!nlm_privileged_requester(rqstp)) {
440 char buf[RPC_MAX_ADDRBUFLEN]; 427 char buf[RPC_MAX_ADDRBUFLEN];
441 printk(KERN_WARNING "lockd: rejected NSM callback from %s\n", 428 printk(KERN_WARNING "lockd: rejected NSM callback from %s\n",
442 svc_print_addr(rqstp, buf, sizeof(buf))); 429 svc_print_addr(rqstp, buf, sizeof(buf)));
diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c
index cf0d5c2c318d..6063a8e4b9f3 100644
--- a/fs/lockd/svclock.c
+++ b/fs/lockd/svclock.c
@@ -360,7 +360,7 @@ nlmsvc_defer_lock_rqst(struct svc_rqst *rqstp, struct nlm_block *block)
360__be32 360__be32
361nlmsvc_lock(struct svc_rqst *rqstp, struct nlm_file *file, 361nlmsvc_lock(struct svc_rqst *rqstp, struct nlm_file *file,
362 struct nlm_host *host, struct nlm_lock *lock, int wait, 362 struct nlm_host *host, struct nlm_lock *lock, int wait,
363 struct nlm_cookie *cookie) 363 struct nlm_cookie *cookie, int reclaim)
364{ 364{
365 struct nlm_block *block = NULL; 365 struct nlm_block *block = NULL;
366 int error; 366 int error;
@@ -406,6 +406,15 @@ nlmsvc_lock(struct svc_rqst *rqstp, struct nlm_file *file,
406 goto out; 406 goto out;
407 } 407 }
408 408
409 if (locks_in_grace() && !reclaim) {
410 ret = nlm_lck_denied_grace_period;
411 goto out;
412 }
413 if (reclaim && !locks_in_grace()) {
414 ret = nlm_lck_denied_grace_period;
415 goto out;
416 }
417
409 if (!wait) 418 if (!wait)
410 lock->fl.fl_flags &= ~FL_SLEEP; 419 lock->fl.fl_flags &= ~FL_SLEEP;
411 error = vfs_lock_file(file->f_file, F_SETLK, &lock->fl, NULL); 420 error = vfs_lock_file(file->f_file, F_SETLK, &lock->fl, NULL);
@@ -502,6 +511,10 @@ nlmsvc_testlock(struct svc_rqst *rqstp, struct nlm_file *file,
502 goto out; 511 goto out;
503 } 512 }
504 513
514 if (locks_in_grace()) {
515 ret = nlm_lck_denied_grace_period;
516 goto out;
517 }
505 error = vfs_test_lock(file->f_file, &lock->fl); 518 error = vfs_test_lock(file->f_file, &lock->fl);
506 if (error == FILE_LOCK_DEFERRED) { 519 if (error == FILE_LOCK_DEFERRED) {
507 ret = nlmsvc_defer_lock_rqst(rqstp, block); 520 ret = nlmsvc_defer_lock_rqst(rqstp, block);
@@ -582,6 +595,9 @@ nlmsvc_cancel_blocked(struct nlm_file *file, struct nlm_lock *lock)
582 (long long)lock->fl.fl_start, 595 (long long)lock->fl.fl_start,
583 (long long)lock->fl.fl_end); 596 (long long)lock->fl.fl_end);
584 597
598 if (locks_in_grace())
599 return nlm_lck_denied_grace_period;
600
585 mutex_lock(&file->f_mutex); 601 mutex_lock(&file->f_mutex);
586 block = nlmsvc_lookup_block(file, lock); 602 block = nlmsvc_lookup_block(file, lock);
587 mutex_unlock(&file->f_mutex); 603 mutex_unlock(&file->f_mutex);
diff --git a/fs/lockd/svcproc.c b/fs/lockd/svcproc.c
index 76262c1986f2..548b0bb2b84d 100644
--- a/fs/lockd/svcproc.c
+++ b/fs/lockd/svcproc.c
@@ -117,12 +117,6 @@ nlmsvc_proc_test(struct svc_rqst *rqstp, struct nlm_args *argp,
117 dprintk("lockd: TEST called\n"); 117 dprintk("lockd: TEST called\n");
118 resp->cookie = argp->cookie; 118 resp->cookie = argp->cookie;
119 119
120 /* Don't accept test requests during grace period */
121 if (nlmsvc_grace_period) {
122 resp->status = nlm_lck_denied_grace_period;
123 return rc;
124 }
125
126 /* Obtain client and file */ 120 /* Obtain client and file */
127 if ((resp->status = nlmsvc_retrieve_args(rqstp, argp, &host, &file))) 121 if ((resp->status = nlmsvc_retrieve_args(rqstp, argp, &host, &file)))
128 return resp->status == nlm_drop_reply ? rpc_drop_reply :rpc_success; 122 return resp->status == nlm_drop_reply ? rpc_drop_reply :rpc_success;
@@ -152,12 +146,6 @@ nlmsvc_proc_lock(struct svc_rqst *rqstp, struct nlm_args *argp,
152 146
153 resp->cookie = argp->cookie; 147 resp->cookie = argp->cookie;
154 148
155 /* Don't accept new lock requests during grace period */
156 if (nlmsvc_grace_period && !argp->reclaim) {
157 resp->status = nlm_lck_denied_grace_period;
158 return rc;
159 }
160
161 /* Obtain client and file */ 149 /* Obtain client and file */
162 if ((resp->status = nlmsvc_retrieve_args(rqstp, argp, &host, &file))) 150 if ((resp->status = nlmsvc_retrieve_args(rqstp, argp, &host, &file)))
163 return resp->status == nlm_drop_reply ? rpc_drop_reply :rpc_success; 151 return resp->status == nlm_drop_reply ? rpc_drop_reply :rpc_success;
@@ -176,7 +164,8 @@ nlmsvc_proc_lock(struct svc_rqst *rqstp, struct nlm_args *argp,
176 164
177 /* Now try to lock the file */ 165 /* Now try to lock the file */
178 resp->status = cast_status(nlmsvc_lock(rqstp, file, host, &argp->lock, 166 resp->status = cast_status(nlmsvc_lock(rqstp, file, host, &argp->lock,
179 argp->block, &argp->cookie)); 167 argp->block, &argp->cookie,
168 argp->reclaim));
180 if (resp->status == nlm_drop_reply) 169 if (resp->status == nlm_drop_reply)
181 rc = rpc_drop_reply; 170 rc = rpc_drop_reply;
182 else 171 else
@@ -199,7 +188,7 @@ nlmsvc_proc_cancel(struct svc_rqst *rqstp, struct nlm_args *argp,
199 resp->cookie = argp->cookie; 188 resp->cookie = argp->cookie;
200 189
201 /* Don't accept requests during grace period */ 190 /* Don't accept requests during grace period */
202 if (nlmsvc_grace_period) { 191 if (locks_in_grace()) {
203 resp->status = nlm_lck_denied_grace_period; 192 resp->status = nlm_lck_denied_grace_period;
204 return rpc_success; 193 return rpc_success;
205 } 194 }
@@ -232,7 +221,7 @@ nlmsvc_proc_unlock(struct svc_rqst *rqstp, struct nlm_args *argp,
232 resp->cookie = argp->cookie; 221 resp->cookie = argp->cookie;
233 222
234 /* Don't accept new lock requests during grace period */ 223 /* Don't accept new lock requests during grace period */
235 if (nlmsvc_grace_period) { 224 if (locks_in_grace()) {
236 resp->status = nlm_lck_denied_grace_period; 225 resp->status = nlm_lck_denied_grace_period;
237 return rpc_success; 226 return rpc_success;
238 } 227 }
@@ -261,7 +250,7 @@ nlmsvc_proc_granted(struct svc_rqst *rqstp, struct nlm_args *argp,
261 resp->cookie = argp->cookie; 250 resp->cookie = argp->cookie;
262 251
263 dprintk("lockd: GRANTED called\n"); 252 dprintk("lockd: GRANTED called\n");
264 resp->status = nlmclnt_grant(svc_addr_in(rqstp), &argp->lock); 253 resp->status = nlmclnt_grant(svc_addr(rqstp), &argp->lock);
265 dprintk("lockd: GRANTED status %d\n", ntohl(resp->status)); 254 dprintk("lockd: GRANTED status %d\n", ntohl(resp->status));
266 return rpc_success; 255 return rpc_success;
267} 256}
@@ -373,7 +362,7 @@ nlmsvc_proc_share(struct svc_rqst *rqstp, struct nlm_args *argp,
373 resp->cookie = argp->cookie; 362 resp->cookie = argp->cookie;
374 363
375 /* Don't accept new lock requests during grace period */ 364 /* Don't accept new lock requests during grace period */
376 if (nlmsvc_grace_period && !argp->reclaim) { 365 if (locks_in_grace() && !argp->reclaim) {
377 resp->status = nlm_lck_denied_grace_period; 366 resp->status = nlm_lck_denied_grace_period;
378 return rpc_success; 367 return rpc_success;
379 } 368 }
@@ -406,7 +395,7 @@ nlmsvc_proc_unshare(struct svc_rqst *rqstp, struct nlm_args *argp,
406 resp->cookie = argp->cookie; 395 resp->cookie = argp->cookie;
407 396
408 /* Don't accept requests during grace period */ 397 /* Don't accept requests during grace period */
409 if (nlmsvc_grace_period) { 398 if (locks_in_grace()) {
410 resp->status = nlm_lck_denied_grace_period; 399 resp->status = nlm_lck_denied_grace_period;
411 return rpc_success; 400 return rpc_success;
412 } 401 }
@@ -464,11 +453,9 @@ nlmsvc_proc_sm_notify(struct svc_rqst *rqstp, struct nlm_reboot *argp,
464{ 453{
465 struct sockaddr_in saddr; 454 struct sockaddr_in saddr;
466 455
467 memcpy(&saddr, svc_addr_in(rqstp), sizeof(saddr));
468
469 dprintk("lockd: SM_NOTIFY called\n"); 456 dprintk("lockd: SM_NOTIFY called\n");
470 if (saddr.sin_addr.s_addr != htonl(INADDR_LOOPBACK) 457
471 || ntohs(saddr.sin_port) >= 1024) { 458 if (!nlm_privileged_requester(rqstp)) {
472 char buf[RPC_MAX_ADDRBUFLEN]; 459 char buf[RPC_MAX_ADDRBUFLEN];
473 printk(KERN_WARNING "lockd: rejected NSM callback from %s\n", 460 printk(KERN_WARNING "lockd: rejected NSM callback from %s\n",
474 svc_print_addr(rqstp, buf, sizeof(buf))); 461 svc_print_addr(rqstp, buf, sizeof(buf)));
diff --git a/fs/lockd/svcsubs.c b/fs/lockd/svcsubs.c
index 198b4e55b373..34c2766e27c7 100644
--- a/fs/lockd/svcsubs.c
+++ b/fs/lockd/svcsubs.c
@@ -418,7 +418,7 @@ EXPORT_SYMBOL_GPL(nlmsvc_unlock_all_by_sb);
418static int 418static int
419nlmsvc_match_ip(void *datap, struct nlm_host *host) 419nlmsvc_match_ip(void *datap, struct nlm_host *host)
420{ 420{
421 return nlm_cmp_addr(&host->h_saddr, datap); 421 return nlm_cmp_addr(nlm_srcaddr(host), datap);
422} 422}
423 423
424/** 424/**
diff --git a/fs/lockd/xdr.c b/fs/lockd/xdr.c
index 3e459e18cc31..1f226290c67c 100644
--- a/fs/lockd/xdr.c
+++ b/fs/lockd/xdr.c
@@ -351,8 +351,6 @@ nlmsvc_decode_reboot(struct svc_rqst *rqstp, __be32 *p, struct nlm_reboot *argp)
351 argp->state = ntohl(*p++); 351 argp->state = ntohl(*p++);
352 /* Preserve the address in network byte order */ 352 /* Preserve the address in network byte order */
353 argp->addr = *p++; 353 argp->addr = *p++;
354 argp->vers = *p++;
355 argp->proto = *p++;
356 return xdr_argsize_check(rqstp, p); 354 return xdr_argsize_check(rqstp, p);
357} 355}
358 356
diff --git a/fs/lockd/xdr4.c b/fs/lockd/xdr4.c
index 43ff9397e6c6..50c493a8ad8e 100644
--- a/fs/lockd/xdr4.c
+++ b/fs/lockd/xdr4.c
@@ -358,8 +358,6 @@ nlm4svc_decode_reboot(struct svc_rqst *rqstp, __be32 *p, struct nlm_reboot *argp
358 argp->state = ntohl(*p++); 358 argp->state = ntohl(*p++);
359 /* Preserve the address in network byte order */ 359 /* Preserve the address in network byte order */
360 argp->addr = *p++; 360 argp->addr = *p++;
361 argp->vers = *p++;
362 argp->proto = *p++;
363 return xdr_argsize_check(rqstp, p); 361 return xdr_argsize_check(rqstp, p);
364} 362}
365 363
diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c
index f447f4b4476c..6a09760c5960 100644
--- a/fs/nfs/callback.c
+++ b/fs/nfs/callback.c
@@ -105,7 +105,8 @@ int nfs_callback_up(void)
105 mutex_lock(&nfs_callback_mutex); 105 mutex_lock(&nfs_callback_mutex);
106 if (nfs_callback_info.users++ || nfs_callback_info.task != NULL) 106 if (nfs_callback_info.users++ || nfs_callback_info.task != NULL)
107 goto out; 107 goto out;
108 serv = svc_create(&nfs4_callback_program, NFS4_CALLBACK_BUFSIZE, NULL); 108 serv = svc_create(&nfs4_callback_program, NFS4_CALLBACK_BUFSIZE,
109 AF_INET, NULL);
109 ret = -ENOMEM; 110 ret = -ENOMEM;
110 if (!serv) 111 if (!serv)
111 goto out_err; 112 goto out_err;
diff --git a/fs/nfsd/lockd.c b/fs/nfsd/lockd.c
index 15c6faeec77c..b2786a5f9afe 100644
--- a/fs/nfsd/lockd.c
+++ b/fs/nfsd/lockd.c
@@ -70,7 +70,6 @@ nlm_fclose(struct file *filp)
70static struct nlmsvc_binding nfsd_nlm_ops = { 70static struct nlmsvc_binding nfsd_nlm_ops = {
71 .fopen = nlm_fopen, /* open file for locking */ 71 .fopen = nlm_fopen, /* open file for locking */
72 .fclose = nlm_fclose, /* close file */ 72 .fclose = nlm_fclose, /* close file */
73 .get_grace_period = get_nfs4_grace_period,
74}; 73};
75 74
76void 75void
diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c
index 4d617ea28cfc..9dbd2eb91281 100644
--- a/fs/nfsd/nfs3proc.c
+++ b/fs/nfsd/nfs3proc.c
@@ -63,7 +63,8 @@ nfsd3_proc_getattr(struct svc_rqst *rqstp, struct nfsd_fhandle *argp,
63 SVCFH_fmt(&argp->fh)); 63 SVCFH_fmt(&argp->fh));
64 64
65 fh_copy(&resp->fh, &argp->fh); 65 fh_copy(&resp->fh, &argp->fh);
66 nfserr = fh_verify(rqstp, &resp->fh, 0, NFSD_MAY_NOP); 66 nfserr = fh_verify(rqstp, &resp->fh, 0,
67 NFSD_MAY_NOP | NFSD_MAY_BYPASS_GSS_ON_ROOT);
67 if (nfserr) 68 if (nfserr)
68 RETURN_STATUS(nfserr); 69 RETURN_STATUS(nfserr);
69 70
@@ -530,7 +531,7 @@ nfsd3_proc_fsstat(struct svc_rqst * rqstp, struct nfsd_fhandle *argp,
530 dprintk("nfsd: FSSTAT(3) %s\n", 531 dprintk("nfsd: FSSTAT(3) %s\n",
531 SVCFH_fmt(&argp->fh)); 532 SVCFH_fmt(&argp->fh));
532 533
533 nfserr = nfsd_statfs(rqstp, &argp->fh, &resp->stats); 534 nfserr = nfsd_statfs(rqstp, &argp->fh, &resp->stats, 0);
534 fh_put(&argp->fh); 535 fh_put(&argp->fh);
535 RETURN_STATUS(nfserr); 536 RETURN_STATUS(nfserr);
536} 537}
@@ -558,7 +559,8 @@ nfsd3_proc_fsinfo(struct svc_rqst * rqstp, struct nfsd_fhandle *argp,
558 resp->f_maxfilesize = ~(u32) 0; 559 resp->f_maxfilesize = ~(u32) 0;
559 resp->f_properties = NFS3_FSF_DEFAULT; 560 resp->f_properties = NFS3_FSF_DEFAULT;
560 561
561 nfserr = fh_verify(rqstp, &argp->fh, 0, NFSD_MAY_NOP); 562 nfserr = fh_verify(rqstp, &argp->fh, 0,
563 NFSD_MAY_NOP | NFSD_MAY_BYPASS_GSS_ON_ROOT);
562 564
563 /* Check special features of the file system. May request 565 /* Check special features of the file system. May request
564 * different read/write sizes for file systems known to have 566 * different read/write sizes for file systems known to have
diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index 702fa577aa6e..094747a1227c 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -225,7 +225,8 @@ encode_cb_recall(struct xdr_stream *xdr, struct nfs4_cb_recall *cb_rec)
225 225
226 RESERVE_SPACE(12+sizeof(cb_rec->cbr_stateid) + len); 226 RESERVE_SPACE(12+sizeof(cb_rec->cbr_stateid) + len);
227 WRITE32(OP_CB_RECALL); 227 WRITE32(OP_CB_RECALL);
228 WRITEMEM(&cb_rec->cbr_stateid, sizeof(stateid_t)); 228 WRITE32(cb_rec->cbr_stateid.si_generation);
229 WRITEMEM(&cb_rec->cbr_stateid.si_opaque, sizeof(stateid_opaque_t));
229 WRITE32(cb_rec->cbr_trunc); 230 WRITE32(cb_rec->cbr_trunc);
230 WRITE32(len); 231 WRITE32(len);
231 WRITEMEM(cb_rec->cbr_fhval, len); 232 WRITEMEM(cb_rec->cbr_fhval, len);
@@ -379,6 +380,7 @@ static int do_probe_callback(void *data)
379 .addrsize = sizeof(addr), 380 .addrsize = sizeof(addr),
380 .timeout = &timeparms, 381 .timeout = &timeparms,
381 .program = &cb_program, 382 .program = &cb_program,
383 .prognumber = cb->cb_prog,
382 .version = nfs_cb_version[1]->number, 384 .version = nfs_cb_version[1]->number,
383 .authflavor = RPC_AUTH_UNIX, /* XXX: need AUTH_GSS... */ 385 .authflavor = RPC_AUTH_UNIX, /* XXX: need AUTH_GSS... */
384 .flags = (RPC_CLNT_CREATE_NOPING | RPC_CLNT_CREATE_QUIET), 386 .flags = (RPC_CLNT_CREATE_NOPING | RPC_CLNT_CREATE_QUIET),
@@ -396,9 +398,6 @@ static int do_probe_callback(void *data)
396 addr.sin_port = htons(cb->cb_port); 398 addr.sin_port = htons(cb->cb_port);
397 addr.sin_addr.s_addr = htonl(cb->cb_addr); 399 addr.sin_addr.s_addr = htonl(cb->cb_addr);
398 400
399 /* Initialize rpc_stat */
400 memset(args.program->stats, 0, sizeof(struct rpc_stat));
401
402 /* Create RPC client */ 401 /* Create RPC client */
403 client = rpc_create(&args); 402 client = rpc_create(&args);
404 if (IS_ERR(client)) { 403 if (IS_ERR(client)) {
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index e5b51ffafc6c..669461e291ae 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -201,10 +201,10 @@ nfsd4_open(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
201 /* Openowner is now set, so sequence id will get bumped. Now we need 201 /* Openowner is now set, so sequence id will get bumped. Now we need
202 * these checks before we do any creates: */ 202 * these checks before we do any creates: */
203 status = nfserr_grace; 203 status = nfserr_grace;
204 if (nfs4_in_grace() && open->op_claim_type != NFS4_OPEN_CLAIM_PREVIOUS) 204 if (locks_in_grace() && open->op_claim_type != NFS4_OPEN_CLAIM_PREVIOUS)
205 goto out; 205 goto out;
206 status = nfserr_no_grace; 206 status = nfserr_no_grace;
207 if (!nfs4_in_grace() && open->op_claim_type == NFS4_OPEN_CLAIM_PREVIOUS) 207 if (!locks_in_grace() && open->op_claim_type == NFS4_OPEN_CLAIM_PREVIOUS)
208 goto out; 208 goto out;
209 209
210 switch (open->op_claim_type) { 210 switch (open->op_claim_type) {
@@ -575,7 +575,7 @@ nfsd4_remove(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
575{ 575{
576 __be32 status; 576 __be32 status;
577 577
578 if (nfs4_in_grace()) 578 if (locks_in_grace())
579 return nfserr_grace; 579 return nfserr_grace;
580 status = nfsd_unlink(rqstp, &cstate->current_fh, 0, 580 status = nfsd_unlink(rqstp, &cstate->current_fh, 0,
581 remove->rm_name, remove->rm_namelen); 581 remove->rm_name, remove->rm_namelen);
@@ -596,7 +596,7 @@ nfsd4_rename(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
596 596
597 if (!cstate->save_fh.fh_dentry) 597 if (!cstate->save_fh.fh_dentry)
598 return status; 598 return status;
599 if (nfs4_in_grace() && !(cstate->save_fh.fh_export->ex_flags 599 if (locks_in_grace() && !(cstate->save_fh.fh_export->ex_flags
600 & NFSEXP_NOSUBTREECHECK)) 600 & NFSEXP_NOSUBTREECHECK))
601 return nfserr_grace; 601 return nfserr_grace;
602 status = nfsd_rename(rqstp, &cstate->save_fh, rename->rn_sname, 602 status = nfsd_rename(rqstp, &cstate->save_fh, rename->rn_sname,
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 1578d7a2667e..0cc7ff5d5ab5 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -61,7 +61,6 @@
61static time_t lease_time = 90; /* default lease time */ 61static time_t lease_time = 90; /* default lease time */
62static time_t user_lease_time = 90; 62static time_t user_lease_time = 90;
63static time_t boot_time; 63static time_t boot_time;
64static int in_grace = 1;
65static u32 current_ownerid = 1; 64static u32 current_ownerid = 1;
66static u32 current_fileid = 1; 65static u32 current_fileid = 1;
67static u32 current_delegid = 1; 66static u32 current_delegid = 1;
@@ -1640,7 +1639,7 @@ nfs4_open_delegation(struct svc_fh *fh, struct nfsd4_open *open, struct nfs4_sta
1640 case NFS4_OPEN_CLAIM_NULL: 1639 case NFS4_OPEN_CLAIM_NULL:
1641 /* Let's not give out any delegations till everyone's 1640 /* Let's not give out any delegations till everyone's
1642 * had the chance to reclaim theirs.... */ 1641 * had the chance to reclaim theirs.... */
1643 if (nfs4_in_grace()) 1642 if (locks_in_grace())
1644 goto out; 1643 goto out;
1645 if (!atomic_read(&cb->cb_set) || !sop->so_confirmed) 1644 if (!atomic_read(&cb->cb_set) || !sop->so_confirmed)
1646 goto out; 1645 goto out;
@@ -1816,12 +1815,15 @@ out:
1816 return status; 1815 return status;
1817} 1816}
1818 1817
1818struct lock_manager nfsd4_manager = {
1819};
1820
1819static void 1821static void
1820end_grace(void) 1822nfsd4_end_grace(void)
1821{ 1823{
1822 dprintk("NFSD: end of grace period\n"); 1824 dprintk("NFSD: end of grace period\n");
1823 nfsd4_recdir_purge_old(); 1825 nfsd4_recdir_purge_old();
1824 in_grace = 0; 1826 locks_end_grace(&nfsd4_manager);
1825} 1827}
1826 1828
1827static time_t 1829static time_t
@@ -1838,8 +1840,8 @@ nfs4_laundromat(void)
1838 nfs4_lock_state(); 1840 nfs4_lock_state();
1839 1841
1840 dprintk("NFSD: laundromat service - starting\n"); 1842 dprintk("NFSD: laundromat service - starting\n");
1841 if (in_grace) 1843 if (locks_in_grace())
1842 end_grace(); 1844 nfsd4_end_grace();
1843 list_for_each_safe(pos, next, &client_lru) { 1845 list_for_each_safe(pos, next, &client_lru) {
1844 clp = list_entry(pos, struct nfs4_client, cl_lru); 1846 clp = list_entry(pos, struct nfs4_client, cl_lru);
1845 if (time_after((unsigned long)clp->cl_time, (unsigned long)cutoff)) { 1847 if (time_after((unsigned long)clp->cl_time, (unsigned long)cutoff)) {
@@ -1974,7 +1976,7 @@ check_special_stateids(svc_fh *current_fh, stateid_t *stateid, int flags)
1974 return nfserr_bad_stateid; 1976 return nfserr_bad_stateid;
1975 else if (ONE_STATEID(stateid) && (flags & RD_STATE)) 1977 else if (ONE_STATEID(stateid) && (flags & RD_STATE))
1976 return nfs_ok; 1978 return nfs_ok;
1977 else if (nfs4_in_grace()) { 1979 else if (locks_in_grace()) {
1978 /* Answer in remaining cases depends on existance of 1980 /* Answer in remaining cases depends on existance of
1979 * conflicting state; so we must wait out the grace period. */ 1981 * conflicting state; so we must wait out the grace period. */
1980 return nfserr_grace; 1982 return nfserr_grace;
@@ -1993,7 +1995,7 @@ check_special_stateids(svc_fh *current_fh, stateid_t *stateid, int flags)
1993static inline int 1995static inline int
1994io_during_grace_disallowed(struct inode *inode, int flags) 1996io_during_grace_disallowed(struct inode *inode, int flags)
1995{ 1997{
1996 return nfs4_in_grace() && (flags & (RD_STATE | WR_STATE)) 1998 return locks_in_grace() && (flags & (RD_STATE | WR_STATE))
1997 && mandatory_lock(inode); 1999 && mandatory_lock(inode);
1998} 2000}
1999 2001
@@ -2693,10 +2695,10 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
2693 filp = lock_stp->st_vfs_file; 2695 filp = lock_stp->st_vfs_file;
2694 2696
2695 status = nfserr_grace; 2697 status = nfserr_grace;
2696 if (nfs4_in_grace() && !lock->lk_reclaim) 2698 if (locks_in_grace() && !lock->lk_reclaim)
2697 goto out; 2699 goto out;
2698 status = nfserr_no_grace; 2700 status = nfserr_no_grace;
2699 if (!nfs4_in_grace() && lock->lk_reclaim) 2701 if (!locks_in_grace() && lock->lk_reclaim)
2700 goto out; 2702 goto out;
2701 2703
2702 locks_init_lock(&file_lock); 2704 locks_init_lock(&file_lock);
@@ -2779,7 +2781,7 @@ nfsd4_lockt(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
2779 int error; 2781 int error;
2780 __be32 status; 2782 __be32 status;
2781 2783
2782 if (nfs4_in_grace()) 2784 if (locks_in_grace())
2783 return nfserr_grace; 2785 return nfserr_grace;
2784 2786
2785 if (check_lock_length(lockt->lt_offset, lockt->lt_length)) 2787 if (check_lock_length(lockt->lt_offset, lockt->lt_length))
@@ -3192,9 +3194,9 @@ __nfs4_state_start(void)
3192 unsigned long grace_time; 3194 unsigned long grace_time;
3193 3195
3194 boot_time = get_seconds(); 3196 boot_time = get_seconds();
3195 grace_time = get_nfs_grace_period(); 3197 grace_time = get_nfs4_grace_period();
3196 lease_time = user_lease_time; 3198 lease_time = user_lease_time;
3197 in_grace = 1; 3199 locks_start_grace(&nfsd4_manager);
3198 printk(KERN_INFO "NFSD: starting %ld-second grace period\n", 3200 printk(KERN_INFO "NFSD: starting %ld-second grace period\n",
3199 grace_time/HZ); 3201 grace_time/HZ);
3200 laundry_wq = create_singlethread_workqueue("nfsd4"); 3202 laundry_wq = create_singlethread_workqueue("nfsd4");
@@ -3213,12 +3215,6 @@ nfs4_state_start(void)
3213 return; 3215 return;
3214} 3216}
3215 3217
3216int
3217nfs4_in_grace(void)
3218{
3219 return in_grace;
3220}
3221
3222time_t 3218time_t
3223nfs4_lease_time(void) 3219nfs4_lease_time(void)
3224{ 3220{
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 14ba4d9b2859..afcdf4b76843 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -413,6 +413,18 @@ out_nfserr:
413} 413}
414 414
415static __be32 415static __be32
416nfsd4_decode_stateid(struct nfsd4_compoundargs *argp, stateid_t *sid)
417{
418 DECODE_HEAD;
419
420 READ_BUF(sizeof(stateid_t));
421 READ32(sid->si_generation);
422 COPYMEM(&sid->si_opaque, sizeof(stateid_opaque_t));
423
424 DECODE_TAIL;
425}
426
427static __be32
416nfsd4_decode_access(struct nfsd4_compoundargs *argp, struct nfsd4_access *access) 428nfsd4_decode_access(struct nfsd4_compoundargs *argp, struct nfsd4_access *access)
417{ 429{
418 DECODE_HEAD; 430 DECODE_HEAD;
@@ -429,10 +441,9 @@ nfsd4_decode_close(struct nfsd4_compoundargs *argp, struct nfsd4_close *close)
429 DECODE_HEAD; 441 DECODE_HEAD;
430 442
431 close->cl_stateowner = NULL; 443 close->cl_stateowner = NULL;
432 READ_BUF(4 + sizeof(stateid_t)); 444 READ_BUF(4);
433 READ32(close->cl_seqid); 445 READ32(close->cl_seqid);
434 READ32(close->cl_stateid.si_generation); 446 return nfsd4_decode_stateid(argp, &close->cl_stateid);
435 COPYMEM(&close->cl_stateid.si_opaque, sizeof(stateid_opaque_t));
436 447
437 DECODE_TAIL; 448 DECODE_TAIL;
438} 449}
@@ -493,13 +504,7 @@ nfsd4_decode_create(struct nfsd4_compoundargs *argp, struct nfsd4_create *create
493static inline __be32 504static inline __be32
494nfsd4_decode_delegreturn(struct nfsd4_compoundargs *argp, struct nfsd4_delegreturn *dr) 505nfsd4_decode_delegreturn(struct nfsd4_compoundargs *argp, struct nfsd4_delegreturn *dr)
495{ 506{
496 DECODE_HEAD; 507 return nfsd4_decode_stateid(argp, &dr->dr_stateid);
497
498 READ_BUF(sizeof(stateid_t));
499 READ32(dr->dr_stateid.si_generation);
500 COPYMEM(&dr->dr_stateid.si_opaque, sizeof(stateid_opaque_t));
501
502 DECODE_TAIL;
503} 508}
504 509
505static inline __be32 510static inline __be32
@@ -542,20 +547,22 @@ nfsd4_decode_lock(struct nfsd4_compoundargs *argp, struct nfsd4_lock *lock)
542 READ32(lock->lk_is_new); 547 READ32(lock->lk_is_new);
543 548
544 if (lock->lk_is_new) { 549 if (lock->lk_is_new) {
545 READ_BUF(36); 550 READ_BUF(4);
546 READ32(lock->lk_new_open_seqid); 551 READ32(lock->lk_new_open_seqid);
547 READ32(lock->lk_new_open_stateid.si_generation); 552 status = nfsd4_decode_stateid(argp, &lock->lk_new_open_stateid);
548 553 if (status)
549 COPYMEM(&lock->lk_new_open_stateid.si_opaque, sizeof(stateid_opaque_t)); 554 return status;
555 READ_BUF(8 + sizeof(clientid_t));
550 READ32(lock->lk_new_lock_seqid); 556 READ32(lock->lk_new_lock_seqid);
551 COPYMEM(&lock->lk_new_clientid, sizeof(clientid_t)); 557 COPYMEM(&lock->lk_new_clientid, sizeof(clientid_t));
552 READ32(lock->lk_new_owner.len); 558 READ32(lock->lk_new_owner.len);
553 READ_BUF(lock->lk_new_owner.len); 559 READ_BUF(lock->lk_new_owner.len);
554 READMEM(lock->lk_new_owner.data, lock->lk_new_owner.len); 560 READMEM(lock->lk_new_owner.data, lock->lk_new_owner.len);
555 } else { 561 } else {
556 READ_BUF(20); 562 status = nfsd4_decode_stateid(argp, &lock->lk_old_lock_stateid);
557 READ32(lock->lk_old_lock_stateid.si_generation); 563 if (status)
558 COPYMEM(&lock->lk_old_lock_stateid.si_opaque, sizeof(stateid_opaque_t)); 564 return status;
565 READ_BUF(4);
559 READ32(lock->lk_old_lock_seqid); 566 READ32(lock->lk_old_lock_seqid);
560 } 567 }
561 568
@@ -587,13 +594,15 @@ nfsd4_decode_locku(struct nfsd4_compoundargs *argp, struct nfsd4_locku *locku)
587 DECODE_HEAD; 594 DECODE_HEAD;
588 595
589 locku->lu_stateowner = NULL; 596 locku->lu_stateowner = NULL;
590 READ_BUF(24 + sizeof(stateid_t)); 597 READ_BUF(8);
591 READ32(locku->lu_type); 598 READ32(locku->lu_type);
592 if ((locku->lu_type < NFS4_READ_LT) || (locku->lu_type > NFS4_WRITEW_LT)) 599 if ((locku->lu_type < NFS4_READ_LT) || (locku->lu_type > NFS4_WRITEW_LT))
593 goto xdr_error; 600 goto xdr_error;
594 READ32(locku->lu_seqid); 601 READ32(locku->lu_seqid);
595 READ32(locku->lu_stateid.si_generation); 602 status = nfsd4_decode_stateid(argp, &locku->lu_stateid);
596 COPYMEM(&locku->lu_stateid.si_opaque, sizeof(stateid_opaque_t)); 603 if (status)
604 return status;
605 READ_BUF(16);
597 READ64(locku->lu_offset); 606 READ64(locku->lu_offset);
598 READ64(locku->lu_length); 607 READ64(locku->lu_length);
599 608
@@ -678,8 +687,10 @@ nfsd4_decode_open(struct nfsd4_compoundargs *argp, struct nfsd4_open *open)
678 READ32(open->op_delegate_type); 687 READ32(open->op_delegate_type);
679 break; 688 break;
680 case NFS4_OPEN_CLAIM_DELEGATE_CUR: 689 case NFS4_OPEN_CLAIM_DELEGATE_CUR:
681 READ_BUF(sizeof(stateid_t) + 4); 690 status = nfsd4_decode_stateid(argp, &open->op_delegate_stateid);
682 COPYMEM(&open->op_delegate_stateid, sizeof(stateid_t)); 691 if (status)
692 return status;
693 READ_BUF(4);
683 READ32(open->op_fname.len); 694 READ32(open->op_fname.len);
684 READ_BUF(open->op_fname.len); 695 READ_BUF(open->op_fname.len);
685 SAVEMEM(open->op_fname.data, open->op_fname.len); 696 SAVEMEM(open->op_fname.data, open->op_fname.len);
@@ -699,9 +710,10 @@ nfsd4_decode_open_confirm(struct nfsd4_compoundargs *argp, struct nfsd4_open_con
699 DECODE_HEAD; 710 DECODE_HEAD;
700 711
701 open_conf->oc_stateowner = NULL; 712 open_conf->oc_stateowner = NULL;
702 READ_BUF(4 + sizeof(stateid_t)); 713 status = nfsd4_decode_stateid(argp, &open_conf->oc_req_stateid);
703 READ32(open_conf->oc_req_stateid.si_generation); 714 if (status)
704 COPYMEM(&open_conf->oc_req_stateid.si_opaque, sizeof(stateid_opaque_t)); 715 return status;
716 READ_BUF(4);
705 READ32(open_conf->oc_seqid); 717 READ32(open_conf->oc_seqid);
706 718
707 DECODE_TAIL; 719 DECODE_TAIL;
@@ -713,9 +725,10 @@ nfsd4_decode_open_downgrade(struct nfsd4_compoundargs *argp, struct nfsd4_open_d
713 DECODE_HEAD; 725 DECODE_HEAD;
714 726
715 open_down->od_stateowner = NULL; 727 open_down->od_stateowner = NULL;
716 READ_BUF(12 + sizeof(stateid_t)); 728 status = nfsd4_decode_stateid(argp, &open_down->od_stateid);
717 READ32(open_down->od_stateid.si_generation); 729 if (status)
718 COPYMEM(&open_down->od_stateid.si_opaque, sizeof(stateid_opaque_t)); 730 return status;
731 READ_BUF(12);
719 READ32(open_down->od_seqid); 732 READ32(open_down->od_seqid);
720 READ32(open_down->od_share_access); 733 READ32(open_down->od_share_access);
721 READ32(open_down->od_share_deny); 734 READ32(open_down->od_share_deny);
@@ -743,9 +756,10 @@ nfsd4_decode_read(struct nfsd4_compoundargs *argp, struct nfsd4_read *read)
743{ 756{
744 DECODE_HEAD; 757 DECODE_HEAD;
745 758
746 READ_BUF(sizeof(stateid_t) + 12); 759 status = nfsd4_decode_stateid(argp, &read->rd_stateid);
747 READ32(read->rd_stateid.si_generation); 760 if (status)
748 COPYMEM(&read->rd_stateid.si_opaque, sizeof(stateid_opaque_t)); 761 return status;
762 READ_BUF(12);
749 READ64(read->rd_offset); 763 READ64(read->rd_offset);
750 READ32(read->rd_length); 764 READ32(read->rd_length);
751 765
@@ -834,15 +848,13 @@ nfsd4_decode_secinfo(struct nfsd4_compoundargs *argp,
834static __be32 848static __be32
835nfsd4_decode_setattr(struct nfsd4_compoundargs *argp, struct nfsd4_setattr *setattr) 849nfsd4_decode_setattr(struct nfsd4_compoundargs *argp, struct nfsd4_setattr *setattr)
836{ 850{
837 DECODE_HEAD; 851 __be32 status;
838
839 READ_BUF(sizeof(stateid_t));
840 READ32(setattr->sa_stateid.si_generation);
841 COPYMEM(&setattr->sa_stateid.si_opaque, sizeof(stateid_opaque_t));
842 if ((status = nfsd4_decode_fattr(argp, setattr->sa_bmval, &setattr->sa_iattr, &setattr->sa_acl)))
843 goto out;
844 852
845 DECODE_TAIL; 853 status = nfsd4_decode_stateid(argp, &setattr->sa_stateid);
854 if (status)
855 return status;
856 return nfsd4_decode_fattr(argp, setattr->sa_bmval,
857 &setattr->sa_iattr, &setattr->sa_acl);
846} 858}
847 859
848static __be32 860static __be32
@@ -927,9 +939,10 @@ nfsd4_decode_write(struct nfsd4_compoundargs *argp, struct nfsd4_write *write)
927 int len; 939 int len;
928 DECODE_HEAD; 940 DECODE_HEAD;
929 941
930 READ_BUF(sizeof(stateid_opaque_t) + 20); 942 status = nfsd4_decode_stateid(argp, &write->wr_stateid);
931 READ32(write->wr_stateid.si_generation); 943 if (status)
932 COPYMEM(&write->wr_stateid.si_opaque, sizeof(stateid_opaque_t)); 944 return status;
945 READ_BUF(16);
933 READ64(write->wr_offset); 946 READ64(write->wr_offset);
934 READ32(write->wr_stable_how); 947 READ32(write->wr_stable_how);
935 if (write->wr_stable_how > 2) 948 if (write->wr_stable_how > 2)
@@ -1183,7 +1196,6 @@ nfsd4_decode_compound(struct nfsd4_compoundargs *argp)
1183 * Header routine to setup seqid operation replay cache 1196 * Header routine to setup seqid operation replay cache
1184 */ 1197 */
1185#define ENCODE_SEQID_OP_HEAD \ 1198#define ENCODE_SEQID_OP_HEAD \
1186 __be32 *p; \
1187 __be32 *save; \ 1199 __be32 *save; \
1188 \ 1200 \
1189 save = resp->p; 1201 save = resp->p;
@@ -1950,6 +1962,17 @@ fail:
1950 return -EINVAL; 1962 return -EINVAL;
1951} 1963}
1952 1964
1965static void
1966nfsd4_encode_stateid(struct nfsd4_compoundres *resp, stateid_t *sid)
1967{
1968 ENCODE_HEAD;
1969
1970 RESERVE_SPACE(sizeof(stateid_t));
1971 WRITE32(sid->si_generation);
1972 WRITEMEM(&sid->si_opaque, sizeof(stateid_opaque_t));
1973 ADJUST_ARGS();
1974}
1975
1953static __be32 1976static __be32
1954nfsd4_encode_access(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_access *access) 1977nfsd4_encode_access(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_access *access)
1955{ 1978{
@@ -1969,12 +1992,9 @@ nfsd4_encode_close(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_c
1969{ 1992{
1970 ENCODE_SEQID_OP_HEAD; 1993 ENCODE_SEQID_OP_HEAD;
1971 1994
1972 if (!nfserr) { 1995 if (!nfserr)
1973 RESERVE_SPACE(sizeof(stateid_t)); 1996 nfsd4_encode_stateid(resp, &close->cl_stateid);
1974 WRITE32(close->cl_stateid.si_generation); 1997
1975 WRITEMEM(&close->cl_stateid.si_opaque, sizeof(stateid_opaque_t));
1976 ADJUST_ARGS();
1977 }
1978 ENCODE_SEQID_OP_TAIL(close->cl_stateowner); 1998 ENCODE_SEQID_OP_TAIL(close->cl_stateowner);
1979 return nfserr; 1999 return nfserr;
1980} 2000}
@@ -2074,12 +2094,9 @@ nfsd4_encode_lock(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_lo
2074{ 2094{
2075 ENCODE_SEQID_OP_HEAD; 2095 ENCODE_SEQID_OP_HEAD;
2076 2096
2077 if (!nfserr) { 2097 if (!nfserr)
2078 RESERVE_SPACE(4 + sizeof(stateid_t)); 2098 nfsd4_encode_stateid(resp, &lock->lk_resp_stateid);
2079 WRITE32(lock->lk_resp_stateid.si_generation); 2099 else if (nfserr == nfserr_denied)
2080 WRITEMEM(&lock->lk_resp_stateid.si_opaque, sizeof(stateid_opaque_t));
2081 ADJUST_ARGS();
2082 } else if (nfserr == nfserr_denied)
2083 nfsd4_encode_lock_denied(resp, &lock->lk_denied); 2100 nfsd4_encode_lock_denied(resp, &lock->lk_denied);
2084 2101
2085 ENCODE_SEQID_OP_TAIL(lock->lk_replay_owner); 2102 ENCODE_SEQID_OP_TAIL(lock->lk_replay_owner);
@@ -2099,13 +2116,9 @@ nfsd4_encode_locku(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_l
2099{ 2116{
2100 ENCODE_SEQID_OP_HEAD; 2117 ENCODE_SEQID_OP_HEAD;
2101 2118
2102 if (!nfserr) { 2119 if (!nfserr)
2103 RESERVE_SPACE(sizeof(stateid_t)); 2120 nfsd4_encode_stateid(resp, &locku->lu_stateid);
2104 WRITE32(locku->lu_stateid.si_generation); 2121
2105 WRITEMEM(&locku->lu_stateid.si_opaque, sizeof(stateid_opaque_t));
2106 ADJUST_ARGS();
2107 }
2108
2109 ENCODE_SEQID_OP_TAIL(locku->lu_stateowner); 2122 ENCODE_SEQID_OP_TAIL(locku->lu_stateowner);
2110 return nfserr; 2123 return nfserr;
2111} 2124}
@@ -2128,14 +2141,14 @@ nfsd4_encode_link(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_li
2128static __be32 2141static __be32
2129nfsd4_encode_open(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_open *open) 2142nfsd4_encode_open(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_open *open)
2130{ 2143{
2144 ENCODE_HEAD;
2131 ENCODE_SEQID_OP_HEAD; 2145 ENCODE_SEQID_OP_HEAD;
2132 2146
2133 if (nfserr) 2147 if (nfserr)
2134 goto out; 2148 goto out;
2135 2149
2136 RESERVE_SPACE(36 + sizeof(stateid_t)); 2150 nfsd4_encode_stateid(resp, &open->op_stateid);
2137 WRITE32(open->op_stateid.si_generation); 2151 RESERVE_SPACE(40);
2138 WRITEMEM(&open->op_stateid.si_opaque, sizeof(stateid_opaque_t));
2139 WRITECINFO(open->op_cinfo); 2152 WRITECINFO(open->op_cinfo);
2140 WRITE32(open->op_rflags); 2153 WRITE32(open->op_rflags);
2141 WRITE32(2); 2154 WRITE32(2);
@@ -2148,8 +2161,8 @@ nfsd4_encode_open(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_op
2148 case NFS4_OPEN_DELEGATE_NONE: 2161 case NFS4_OPEN_DELEGATE_NONE:
2149 break; 2162 break;
2150 case NFS4_OPEN_DELEGATE_READ: 2163 case NFS4_OPEN_DELEGATE_READ:
2151 RESERVE_SPACE(20 + sizeof(stateid_t)); 2164 nfsd4_encode_stateid(resp, &open->op_delegate_stateid);
2152 WRITEMEM(&open->op_delegate_stateid, sizeof(stateid_t)); 2165 RESERVE_SPACE(20);
2153 WRITE32(open->op_recall); 2166 WRITE32(open->op_recall);
2154 2167
2155 /* 2168 /*
@@ -2162,8 +2175,8 @@ nfsd4_encode_open(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_op
2162 ADJUST_ARGS(); 2175 ADJUST_ARGS();
2163 break; 2176 break;
2164 case NFS4_OPEN_DELEGATE_WRITE: 2177 case NFS4_OPEN_DELEGATE_WRITE:
2165 RESERVE_SPACE(32 + sizeof(stateid_t)); 2178 nfsd4_encode_stateid(resp, &open->op_delegate_stateid);
2166 WRITEMEM(&open->op_delegate_stateid, sizeof(stateid_t)); 2179 RESERVE_SPACE(32);
2167 WRITE32(0); 2180 WRITE32(0);
2168 2181
2169 /* 2182 /*
@@ -2195,13 +2208,9 @@ static __be32
2195nfsd4_encode_open_confirm(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_open_confirm *oc) 2208nfsd4_encode_open_confirm(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_open_confirm *oc)
2196{ 2209{
2197 ENCODE_SEQID_OP_HEAD; 2210 ENCODE_SEQID_OP_HEAD;
2198 2211
2199 if (!nfserr) { 2212 if (!nfserr)
2200 RESERVE_SPACE(sizeof(stateid_t)); 2213 nfsd4_encode_stateid(resp, &oc->oc_resp_stateid);
2201 WRITE32(oc->oc_resp_stateid.si_generation);
2202 WRITEMEM(&oc->oc_resp_stateid.si_opaque, sizeof(stateid_opaque_t));
2203 ADJUST_ARGS();
2204 }
2205 2214
2206 ENCODE_SEQID_OP_TAIL(oc->oc_stateowner); 2215 ENCODE_SEQID_OP_TAIL(oc->oc_stateowner);
2207 return nfserr; 2216 return nfserr;
@@ -2211,13 +2220,9 @@ static __be32
2211nfsd4_encode_open_downgrade(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_open_downgrade *od) 2220nfsd4_encode_open_downgrade(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_open_downgrade *od)
2212{ 2221{
2213 ENCODE_SEQID_OP_HEAD; 2222 ENCODE_SEQID_OP_HEAD;
2214 2223
2215 if (!nfserr) { 2224 if (!nfserr)
2216 RESERVE_SPACE(sizeof(stateid_t)); 2225 nfsd4_encode_stateid(resp, &od->od_stateid);
2217 WRITE32(od->od_stateid.si_generation);
2218 WRITEMEM(&od->od_stateid.si_opaque, sizeof(stateid_opaque_t));
2219 ADJUST_ARGS();
2220 }
2221 2226
2222 ENCODE_SEQID_OP_TAIL(od->od_stateowner); 2227 ENCODE_SEQID_OP_TAIL(od->od_stateowner);
2223 return nfserr; 2228 return nfserr;
diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
index c53e65f8f3a2..97543df58242 100644
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c
@@ -614,10 +614,9 @@ static ssize_t __write_ports(struct file *file, char *buf, size_t size)
614 return -EINVAL; 614 return -EINVAL;
615 err = nfsd_create_serv(); 615 err = nfsd_create_serv();
616 if (!err) { 616 if (!err) {
617 int proto = 0; 617 err = svc_addsock(nfsd_serv, fd, buf);
618 err = svc_addsock(nfsd_serv, fd, buf, &proto);
619 if (err >= 0) { 618 if (err >= 0) {
620 err = lockd_up(proto); 619 err = lockd_up();
621 if (err < 0) 620 if (err < 0)
622 svc_sock_names(buf+strlen(buf)+1, nfsd_serv, buf); 621 svc_sock_names(buf+strlen(buf)+1, nfsd_serv, buf);
623 } 622 }
diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c
index ea37c96f0445..cd25d91895a1 100644
--- a/fs/nfsd/nfsfh.c
+++ b/fs/nfsd/nfsfh.c
@@ -302,17 +302,27 @@ fh_verify(struct svc_rqst *rqstp, struct svc_fh *fhp, int type, int access)
302 if (error) 302 if (error)
303 goto out; 303 goto out;
304 304
305 if (!(access & NFSD_MAY_LOCK)) { 305 /*
306 /* 306 * pseudoflavor restrictions are not enforced on NLM,
307 * pseudoflavor restrictions are not enforced on NLM, 307 * which clients virtually always use auth_sys for,
308 * which clients virtually always use auth_sys for, 308 * even while using RPCSEC_GSS for NFS.
309 * even while using RPCSEC_GSS for NFS. 309 */
310 */ 310 if (access & NFSD_MAY_LOCK)
311 error = check_nfsd_access(exp, rqstp); 311 goto skip_pseudoflavor_check;
312 if (error) 312 /*
313 goto out; 313 * Clients may expect to be able to use auth_sys during mount,
314 } 314 * even if they use gss for everything else; see section 2.3.2
315 * of rfc 2623.
316 */
317 if (access & NFSD_MAY_BYPASS_GSS_ON_ROOT
318 && exp->ex_path.dentry == dentry)
319 goto skip_pseudoflavor_check;
320
321 error = check_nfsd_access(exp, rqstp);
322 if (error)
323 goto out;
315 324
325skip_pseudoflavor_check:
316 /* Finally, check access permissions. */ 326 /* Finally, check access permissions. */
317 error = nfsd_permission(rqstp, exp, dentry, access); 327 error = nfsd_permission(rqstp, exp, dentry, access);
318 328
diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c
index 0766f95d236a..5cffeca7acef 100644
--- a/fs/nfsd/nfsproc.c
+++ b/fs/nfsd/nfsproc.c
@@ -65,7 +65,8 @@ nfsd_proc_getattr(struct svc_rqst *rqstp, struct nfsd_fhandle *argp,
65 dprintk("nfsd: GETATTR %s\n", SVCFH_fmt(&argp->fh)); 65 dprintk("nfsd: GETATTR %s\n", SVCFH_fmt(&argp->fh));
66 66
67 fh_copy(&resp->fh, &argp->fh); 67 fh_copy(&resp->fh, &argp->fh);
68 nfserr = fh_verify(rqstp, &resp->fh, 0, NFSD_MAY_NOP); 68 nfserr = fh_verify(rqstp, &resp->fh, 0,
69 NFSD_MAY_NOP | NFSD_MAY_BYPASS_GSS_ON_ROOT);
69 return nfsd_return_attrs(nfserr, resp); 70 return nfsd_return_attrs(nfserr, resp);
70} 71}
71 72
@@ -521,7 +522,8 @@ nfsd_proc_statfs(struct svc_rqst * rqstp, struct nfsd_fhandle *argp,
521 522
522 dprintk("nfsd: STATFS %s\n", SVCFH_fmt(&argp->fh)); 523 dprintk("nfsd: STATFS %s\n", SVCFH_fmt(&argp->fh));
523 524
524 nfserr = nfsd_statfs(rqstp, &argp->fh, &resp->stats); 525 nfserr = nfsd_statfs(rqstp, &argp->fh, &resp->stats,
526 NFSD_MAY_BYPASS_GSS_ON_ROOT);
525 fh_put(&argp->fh); 527 fh_put(&argp->fh);
526 return nfserr; 528 return nfserr;
527} 529}
diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
index 80292ff5e924..59eeb46f82c5 100644
--- a/fs/nfsd/nfssvc.c
+++ b/fs/nfsd/nfssvc.c
@@ -229,6 +229,7 @@ int nfsd_create_serv(void)
229 229
230 atomic_set(&nfsd_busy, 0); 230 atomic_set(&nfsd_busy, 0);
231 nfsd_serv = svc_create_pooled(&nfsd_program, nfsd_max_blksize, 231 nfsd_serv = svc_create_pooled(&nfsd_program, nfsd_max_blksize,
232 AF_INET,
232 nfsd_last_thread, nfsd, THIS_MODULE); 233 nfsd_last_thread, nfsd, THIS_MODULE);
233 if (nfsd_serv == NULL) 234 if (nfsd_serv == NULL)
234 err = -ENOMEM; 235 err = -ENOMEM;
@@ -243,25 +244,20 @@ static int nfsd_init_socks(int port)
243 if (!list_empty(&nfsd_serv->sv_permsocks)) 244 if (!list_empty(&nfsd_serv->sv_permsocks))
244 return 0; 245 return 0;
245 246
246 error = lockd_up(IPPROTO_UDP); 247 error = svc_create_xprt(nfsd_serv, "udp", port,
247 if (error >= 0) {
248 error = svc_create_xprt(nfsd_serv, "udp", port,
249 SVC_SOCK_DEFAULTS); 248 SVC_SOCK_DEFAULTS);
250 if (error < 0)
251 lockd_down();
252 }
253 if (error < 0) 249 if (error < 0)
254 return error; 250 return error;
255 251
256 error = lockd_up(IPPROTO_TCP); 252 error = svc_create_xprt(nfsd_serv, "tcp", port,
257 if (error >= 0) {
258 error = svc_create_xprt(nfsd_serv, "tcp", port,
259 SVC_SOCK_DEFAULTS); 253 SVC_SOCK_DEFAULTS);
260 if (error < 0)
261 lockd_down();
262 }
263 if (error < 0) 254 if (error < 0)
264 return error; 255 return error;
256
257 error = lockd_up();
258 if (error < 0)
259 return error;
260
265 return 0; 261 return 0;
266} 262}
267 263
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 18060bed5267..aa1d0d6489a1 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -83,7 +83,6 @@ struct raparm_hbucket {
83 spinlock_t pb_lock; 83 spinlock_t pb_lock;
84} ____cacheline_aligned_in_smp; 84} ____cacheline_aligned_in_smp;
85 85
86static struct raparms * raparml;
87#define RAPARM_HASH_BITS 4 86#define RAPARM_HASH_BITS 4
88#define RAPARM_HASH_SIZE (1<<RAPARM_HASH_BITS) 87#define RAPARM_HASH_SIZE (1<<RAPARM_HASH_BITS)
89#define RAPARM_HASH_MASK (RAPARM_HASH_SIZE-1) 88#define RAPARM_HASH_MASK (RAPARM_HASH_SIZE-1)
@@ -1866,9 +1865,9 @@ out:
1866 * N.B. After this call fhp needs an fh_put 1865 * N.B. After this call fhp needs an fh_put
1867 */ 1866 */
1868__be32 1867__be32
1869nfsd_statfs(struct svc_rqst *rqstp, struct svc_fh *fhp, struct kstatfs *stat) 1868nfsd_statfs(struct svc_rqst *rqstp, struct svc_fh *fhp, struct kstatfs *stat, int access)
1870{ 1869{
1871 __be32 err = fh_verify(rqstp, fhp, 0, NFSD_MAY_NOP); 1870 __be32 err = fh_verify(rqstp, fhp, 0, NFSD_MAY_NOP | access);
1872 if (!err && vfs_statfs(fhp->fh_dentry,stat)) 1871 if (!err && vfs_statfs(fhp->fh_dentry,stat))
1873 err = nfserr_io; 1872 err = nfserr_io;
1874 return err; 1873 return err;
@@ -1966,11 +1965,20 @@ nfsd_permission(struct svc_rqst *rqstp, struct svc_export *exp,
1966void 1965void
1967nfsd_racache_shutdown(void) 1966nfsd_racache_shutdown(void)
1968{ 1967{
1969 if (!raparml) 1968 struct raparms *raparm, *last_raparm;
1970 return; 1969 unsigned int i;
1970
1971 dprintk("nfsd: freeing readahead buffers.\n"); 1971 dprintk("nfsd: freeing readahead buffers.\n");
1972 kfree(raparml); 1972
1973 raparml = NULL; 1973 for (i = 0; i < RAPARM_HASH_SIZE; i++) {
1974 raparm = raparm_hash[i].pb_head;
1975 while(raparm) {
1976 last_raparm = raparm;
1977 raparm = raparm->p_next;
1978 kfree(last_raparm);
1979 }
1980 raparm_hash[i].pb_head = NULL;
1981 }
1974} 1982}
1975/* 1983/*
1976 * Initialize readahead param cache 1984 * Initialize readahead param cache
@@ -1981,35 +1989,38 @@ nfsd_racache_init(int cache_size)
1981 int i; 1989 int i;
1982 int j = 0; 1990 int j = 0;
1983 int nperbucket; 1991 int nperbucket;
1992 struct raparms **raparm = NULL;
1984 1993
1985 1994
1986 if (raparml) 1995 if (raparm_hash[0].pb_head)
1987 return 0; 1996 return 0;
1988 if (cache_size < 2*RAPARM_HASH_SIZE) 1997 nperbucket = DIV_ROUND_UP(cache_size, RAPARM_HASH_SIZE);
1989 cache_size = 2*RAPARM_HASH_SIZE; 1998 if (nperbucket < 2)
1990 raparml = kcalloc(cache_size, sizeof(struct raparms), GFP_KERNEL); 1999 nperbucket = 2;
1991 2000 cache_size = nperbucket * RAPARM_HASH_SIZE;
1992 if (!raparml) {
1993 printk(KERN_WARNING
1994 "nfsd: Could not allocate memory read-ahead cache.\n");
1995 return -ENOMEM;
1996 }
1997 2001
1998 dprintk("nfsd: allocating %d readahead buffers.\n", cache_size); 2002 dprintk("nfsd: allocating %d readahead buffers.\n", cache_size);
1999 for (i = 0 ; i < RAPARM_HASH_SIZE ; i++) { 2003
2000 raparm_hash[i].pb_head = NULL; 2004 for (i = 0; i < RAPARM_HASH_SIZE; i++) {
2001 spin_lock_init(&raparm_hash[i].pb_lock); 2005 spin_lock_init(&raparm_hash[i].pb_lock);
2002 } 2006
2003 nperbucket = DIV_ROUND_UP(cache_size, RAPARM_HASH_SIZE); 2007 raparm = &raparm_hash[i].pb_head;
2004 for (i = 0; i < cache_size - 1; i++) { 2008 for (j = 0; j < nperbucket; j++) {
2005 if (i % nperbucket == 0) 2009 *raparm = kzalloc(sizeof(struct raparms), GFP_KERNEL);
2006 raparm_hash[j++].pb_head = raparml + i; 2010 if (!*raparm)
2007 if (i % nperbucket < nperbucket-1) 2011 goto out_nomem;
2008 raparml[i].p_next = raparml + i + 1; 2012 raparm = &(*raparm)->p_next;
2013 }
2014 *raparm = NULL;
2009 } 2015 }
2010 2016
2011 nfsdstats.ra_size = cache_size; 2017 nfsdstats.ra_size = cache_size;
2012 return 0; 2018 return 0;
2019
2020out_nomem:
2021 dprintk("nfsd: kmalloc failed, freeing readahead buffers\n");
2022 nfsd_racache_shutdown();
2023 return -ENOMEM;
2013} 2024}
2014 2025
2015#if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL) 2026#if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL)
diff --git a/fs/proc/proc_misc.c b/fs/proc/proc_misc.c
index 66c1ab87656c..b675a49c1823 100644
--- a/fs/proc/proc_misc.c
+++ b/fs/proc/proc_misc.c
@@ -683,6 +683,7 @@ static int cmdline_read_proc(char *page, char **start, off_t off,
683 return proc_calc_metrics(page, start, off, count, eof, len); 683 return proc_calc_metrics(page, start, off, count, eof, len);
684} 684}
685 685
686#ifdef CONFIG_FILE_LOCKING
686static int locks_open(struct inode *inode, struct file *filp) 687static int locks_open(struct inode *inode, struct file *filp)
687{ 688{
688 return seq_open(filp, &locks_seq_operations); 689 return seq_open(filp, &locks_seq_operations);
@@ -694,6 +695,7 @@ static const struct file_operations proc_locks_operations = {
694 .llseek = seq_lseek, 695 .llseek = seq_lseek,
695 .release = seq_release, 696 .release = seq_release,
696}; 697};
698#endif /* CONFIG_FILE_LOCKING */
697 699
698static int execdomains_read_proc(char *page, char **start, off_t off, 700static int execdomains_read_proc(char *page, char **start, off_t off,
699 int count, int *eof, void *data) 701 int count, int *eof, void *data)
@@ -887,7 +889,9 @@ void __init proc_misc_init(void)
887#ifdef CONFIG_PRINTK 889#ifdef CONFIG_PRINTK
888 proc_create("kmsg", S_IRUSR, NULL, &proc_kmsg_operations); 890 proc_create("kmsg", S_IRUSR, NULL, &proc_kmsg_operations);
889#endif 891#endif
892#ifdef CONFIG_FILE_LOCKING
890 proc_create("locks", 0, NULL, &proc_locks_operations); 893 proc_create("locks", 0, NULL, &proc_locks_operations);
894#endif
891 proc_create("devices", 0, NULL, &proc_devinfo_operations); 895 proc_create("devices", 0, NULL, &proc_devinfo_operations);
892 proc_create("cpuinfo", 0, NULL, &proc_cpuinfo_operations); 896 proc_create("cpuinfo", 0, NULL, &proc_cpuinfo_operations);
893#ifdef CONFIG_BLOCK 897#ifdef CONFIG_BLOCK
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 44e3cb2f1966..a6a625be13fc 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -947,6 +947,14 @@ struct lock_manager_operations {
947 int (*fl_change)(struct file_lock **, int); 947 int (*fl_change)(struct file_lock **, int);
948}; 948};
949 949
950struct lock_manager {
951 struct list_head list;
952};
953
954void locks_start_grace(struct lock_manager *);
955void locks_end_grace(struct lock_manager *);
956int locks_in_grace(void);
957
950/* that will die - we need it for nfs_lock_info */ 958/* that will die - we need it for nfs_lock_info */
951#include <linux/nfs_fs_i.h> 959#include <linux/nfs_fs_i.h>
952 960
@@ -988,6 +996,13 @@ struct file_lock {
988 996
989#include <linux/fcntl.h> 997#include <linux/fcntl.h>
990 998
999extern void send_sigio(struct fown_struct *fown, int fd, int band);
1000
1001/* fs/sync.c */
1002extern int do_sync_mapping_range(struct address_space *mapping, loff_t offset,
1003 loff_t endbyte, unsigned int flags);
1004
1005#ifdef CONFIG_FILE_LOCKING
991extern int fcntl_getlk(struct file *, struct flock __user *); 1006extern int fcntl_getlk(struct file *, struct flock __user *);
992extern int fcntl_setlk(unsigned int, struct file *, unsigned int, 1007extern int fcntl_setlk(unsigned int, struct file *, unsigned int,
993 struct flock __user *); 1008 struct flock __user *);
@@ -998,14 +1013,9 @@ extern int fcntl_setlk64(unsigned int, struct file *, unsigned int,
998 struct flock64 __user *); 1013 struct flock64 __user *);
999#endif 1014#endif
1000 1015
1001extern void send_sigio(struct fown_struct *fown, int fd, int band);
1002extern int fcntl_setlease(unsigned int fd, struct file *filp, long arg); 1016extern int fcntl_setlease(unsigned int fd, struct file *filp, long arg);
1003extern int fcntl_getlease(struct file *filp); 1017extern int fcntl_getlease(struct file *filp);
1004 1018
1005/* fs/sync.c */
1006extern int do_sync_mapping_range(struct address_space *mapping, loff_t offset,
1007 loff_t endbyte, unsigned int flags);
1008
1009/* fs/locks.c */ 1019/* fs/locks.c */
1010extern void locks_init_lock(struct file_lock *); 1020extern void locks_init_lock(struct file_lock *);
1011extern void locks_copy_lock(struct file_lock *, struct file_lock *); 1021extern void locks_copy_lock(struct file_lock *, struct file_lock *);
@@ -1028,6 +1038,37 @@ extern int lease_modify(struct file_lock **, int);
1028extern int lock_may_read(struct inode *, loff_t start, unsigned long count); 1038extern int lock_may_read(struct inode *, loff_t start, unsigned long count);
1029extern int lock_may_write(struct inode *, loff_t start, unsigned long count); 1039extern int lock_may_write(struct inode *, loff_t start, unsigned long count);
1030extern struct seq_operations locks_seq_operations; 1040extern struct seq_operations locks_seq_operations;
1041#else /* !CONFIG_FILE_LOCKING */
1042#define fcntl_getlk(a, b) ({ -EINVAL; })
1043#define fcntl_setlk(a, b, c, d) ({ -EACCES; })
1044#if BITS_PER_LONG == 32
1045#define fcntl_getlk64(a, b) ({ -EINVAL; })
1046#define fcntl_setlk64(a, b, c, d) ({ -EACCES; })
1047#endif
1048#define fcntl_setlease(a, b, c) ({ 0; })
1049#define fcntl_getlease(a) ({ 0; })
1050#define locks_init_lock(a) ({ })
1051#define __locks_copy_lock(a, b) ({ })
1052#define locks_copy_lock(a, b) ({ })
1053#define locks_remove_posix(a, b) ({ })
1054#define locks_remove_flock(a) ({ })
1055#define posix_test_lock(a, b) ({ 0; })
1056#define posix_lock_file(a, b, c) ({ -ENOLCK; })
1057#define posix_lock_file_wait(a, b) ({ -ENOLCK; })
1058#define posix_unblock_lock(a, b) (-ENOENT)
1059#define vfs_test_lock(a, b) ({ 0; })
1060#define vfs_lock_file(a, b, c, d) (-ENOLCK)
1061#define vfs_cancel_lock(a, b) ({ 0; })
1062#define flock_lock_file_wait(a, b) ({ -ENOLCK; })
1063#define __break_lease(a, b) ({ 0; })
1064#define lease_get_mtime(a, b) ({ })
1065#define generic_setlease(a, b, c) ({ -EINVAL; })
1066#define vfs_setlease(a, b, c) ({ -EINVAL; })
1067#define lease_modify(a, b) ({ -EINVAL; })
1068#define lock_may_read(a, b, c) ({ 1; })
1069#define lock_may_write(a, b, c) ({ 1; })
1070#endif /* !CONFIG_FILE_LOCKING */
1071
1031 1072
1032struct fasync_struct { 1073struct fasync_struct {
1033 int magic; 1074 int magic;
@@ -1575,9 +1616,12 @@ extern int vfs_statfs(struct dentry *, struct kstatfs *);
1575/* /sys/fs */ 1616/* /sys/fs */
1576extern struct kobject *fs_kobj; 1617extern struct kobject *fs_kobj;
1577 1618
1619extern int rw_verify_area(int, struct file *, loff_t *, size_t);
1620
1578#define FLOCK_VERIFY_READ 1 1621#define FLOCK_VERIFY_READ 1
1579#define FLOCK_VERIFY_WRITE 2 1622#define FLOCK_VERIFY_WRITE 2
1580 1623
1624#ifdef CONFIG_FILE_LOCKING
1581extern int locks_mandatory_locked(struct inode *); 1625extern int locks_mandatory_locked(struct inode *);
1582extern int locks_mandatory_area(int, struct inode *, struct file *, loff_t, size_t); 1626extern int locks_mandatory_area(int, struct inode *, struct file *, loff_t, size_t);
1583 1627
@@ -1608,8 +1652,6 @@ static inline int locks_verify_locked(struct inode *inode)
1608 return 0; 1652 return 0;
1609} 1653}
1610 1654
1611extern int rw_verify_area(int, struct file *, loff_t *, size_t);
1612
1613static inline int locks_verify_truncate(struct inode *inode, 1655static inline int locks_verify_truncate(struct inode *inode,
1614 struct file *filp, 1656 struct file *filp,
1615 loff_t size) 1657 loff_t size)
@@ -1630,6 +1672,15 @@ static inline int break_lease(struct inode *inode, unsigned int mode)
1630 return __break_lease(inode, mode); 1672 return __break_lease(inode, mode);
1631 return 0; 1673 return 0;
1632} 1674}
1675#else /* !CONFIG_FILE_LOCKING */
1676#define locks_mandatory_locked(a) ({ 0; })
1677#define locks_mandatory_area(a, b, c, d, e) ({ 0; })
1678#define __mandatory_lock(a) ({ 0; })
1679#define mandatory_lock(a) ({ 0; })
1680#define locks_verify_locked(a) ({ 0; })
1681#define locks_verify_truncate(a, b, c) ({ 0; })
1682#define break_lease(a, b) ({ 0; })
1683#endif /* CONFIG_FILE_LOCKING */
1633 1684
1634/* fs/open.c */ 1685/* fs/open.c */
1635 1686
diff --git a/include/linux/lockd/bind.h b/include/linux/lockd/bind.h
index 3d25bcd139d1..e5872dc994c0 100644
--- a/include/linux/lockd/bind.h
+++ b/include/linux/lockd/bind.h
@@ -27,7 +27,6 @@ struct nlmsvc_binding {
27 struct nfs_fh *, 27 struct nfs_fh *,
28 struct file **); 28 struct file **);
29 void (*fclose)(struct file *); 29 void (*fclose)(struct file *);
30 unsigned long (*get_grace_period)(void);
31}; 30};
32 31
33extern struct nlmsvc_binding * nlmsvc_ops; 32extern struct nlmsvc_binding * nlmsvc_ops;
@@ -53,15 +52,7 @@ extern void nlmclnt_done(struct nlm_host *host);
53 52
54extern int nlmclnt_proc(struct nlm_host *host, int cmd, 53extern int nlmclnt_proc(struct nlm_host *host, int cmd,
55 struct file_lock *fl); 54 struct file_lock *fl);
56extern int lockd_up(int proto); 55extern int lockd_up(void);
57extern void lockd_down(void); 56extern void lockd_down(void);
58 57
59unsigned long get_nfs_grace_period(void);
60
61#ifdef CONFIG_NFSD_V4
62unsigned long get_nfs4_grace_period(void);
63#else
64static inline unsigned long get_nfs4_grace_period(void) {return 0;}
65#endif
66
67#endif /* LINUX_LOCKD_BIND_H */ 58#endif /* LINUX_LOCKD_BIND_H */
diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h
index dbb87ab282e8..b56d5aa9b194 100644
--- a/include/linux/lockd/lockd.h
+++ b/include/linux/lockd/lockd.h
@@ -12,6 +12,8 @@
12#ifdef __KERNEL__ 12#ifdef __KERNEL__
13 13
14#include <linux/in.h> 14#include <linux/in.h>
15#include <linux/in6.h>
16#include <net/ipv6.h>
15#include <linux/fs.h> 17#include <linux/fs.h>
16#include <linux/kref.h> 18#include <linux/kref.h>
17#include <linux/utsname.h> 19#include <linux/utsname.h>
@@ -38,8 +40,9 @@
38 */ 40 */
39struct nlm_host { 41struct nlm_host {
40 struct hlist_node h_hash; /* doubly linked list */ 42 struct hlist_node h_hash; /* doubly linked list */
41 struct sockaddr_in h_addr; /* peer address */ 43 struct sockaddr_storage h_addr; /* peer address */
42 struct sockaddr_in h_saddr; /* our address (optional) */ 44 size_t h_addrlen;
45 struct sockaddr_storage h_srcaddr; /* our address (optional) */
43 struct rpc_clnt * h_rpcclnt; /* RPC client to talk to peer */ 46 struct rpc_clnt * h_rpcclnt; /* RPC client to talk to peer */
44 char * h_name; /* remote hostname */ 47 char * h_name; /* remote hostname */
45 u32 h_version; /* interface version */ 48 u32 h_version; /* interface version */
@@ -61,18 +64,56 @@ struct nlm_host {
61 struct list_head h_granted; /* Locks in GRANTED state */ 64 struct list_head h_granted; /* Locks in GRANTED state */
62 struct list_head h_reclaim; /* Locks in RECLAIM state */ 65 struct list_head h_reclaim; /* Locks in RECLAIM state */
63 struct nsm_handle * h_nsmhandle; /* NSM status handle */ 66 struct nsm_handle * h_nsmhandle; /* NSM status handle */
67
68 char h_addrbuf[48], /* address eyecatchers */
69 h_srcaddrbuf[48];
64}; 70};
65 71
66struct nsm_handle { 72struct nsm_handle {
67 struct list_head sm_link; 73 struct list_head sm_link;
68 atomic_t sm_count; 74 atomic_t sm_count;
69 char * sm_name; 75 char * sm_name;
70 struct sockaddr_in sm_addr; 76 struct sockaddr_storage sm_addr;
77 size_t sm_addrlen;
71 unsigned int sm_monitored : 1, 78 unsigned int sm_monitored : 1,
72 sm_sticky : 1; /* don't unmonitor */ 79 sm_sticky : 1; /* don't unmonitor */
80 char sm_addrbuf[48]; /* address eyecatcher */
73}; 81};
74 82
75/* 83/*
84 * Rigorous type checking on sockaddr type conversions
85 */
86static inline struct sockaddr_in *nlm_addr_in(const struct nlm_host *host)
87{
88 return (struct sockaddr_in *)&host->h_addr;
89}
90
91static inline struct sockaddr *nlm_addr(const struct nlm_host *host)
92{
93 return (struct sockaddr *)&host->h_addr;
94}
95
96static inline struct sockaddr_in *nlm_srcaddr_in(const struct nlm_host *host)
97{
98 return (struct sockaddr_in *)&host->h_srcaddr;
99}
100
101static inline struct sockaddr *nlm_srcaddr(const struct nlm_host *host)
102{
103 return (struct sockaddr *)&host->h_srcaddr;
104}
105
106static inline struct sockaddr_in *nsm_addr_in(const struct nsm_handle *handle)
107{
108 return (struct sockaddr_in *)&handle->sm_addr;
109}
110
111static inline struct sockaddr *nsm_addr(const struct nsm_handle *handle)
112{
113 return (struct sockaddr *)&handle->sm_addr;
114}
115
116/*
76 * Map an fl_owner_t into a unique 32-bit "pid" 117 * Map an fl_owner_t into a unique 32-bit "pid"
77 */ 118 */
78struct nlm_lockowner { 119struct nlm_lockowner {
@@ -166,7 +207,8 @@ int nlm_async_reply(struct nlm_rqst *, u32, const struct rpc_call_ops *);
166struct nlm_wait * nlmclnt_prepare_block(struct nlm_host *host, struct file_lock *fl); 207struct nlm_wait * nlmclnt_prepare_block(struct nlm_host *host, struct file_lock *fl);
167void nlmclnt_finish_block(struct nlm_wait *block); 208void nlmclnt_finish_block(struct nlm_wait *block);
168int nlmclnt_block(struct nlm_wait *block, struct nlm_rqst *req, long timeout); 209int nlmclnt_block(struct nlm_wait *block, struct nlm_rqst *req, long timeout);
169__be32 nlmclnt_grant(const struct sockaddr_in *addr, const struct nlm_lock *); 210__be32 nlmclnt_grant(const struct sockaddr *addr,
211 const struct nlm_lock *lock);
170void nlmclnt_recovery(struct nlm_host *); 212void nlmclnt_recovery(struct nlm_host *);
171int nlmclnt_reclaim(struct nlm_host *, struct file_lock *); 213int nlmclnt_reclaim(struct nlm_host *, struct file_lock *);
172void nlmclnt_next_cookie(struct nlm_cookie *); 214void nlmclnt_next_cookie(struct nlm_cookie *);
@@ -174,12 +216,14 @@ void nlmclnt_next_cookie(struct nlm_cookie *);
174/* 216/*
175 * Host cache 217 * Host cache
176 */ 218 */
177struct nlm_host *nlmclnt_lookup_host(const struct sockaddr_in *sin, 219struct nlm_host *nlmclnt_lookup_host(const struct sockaddr *sap,
178 int proto, u32 version, 220 const size_t salen,
221 const unsigned short protocol,
222 const u32 version,
223 const char *hostname);
224struct nlm_host *nlmsvc_lookup_host(const struct svc_rqst *rqstp,
179 const char *hostname, 225 const char *hostname,
180 unsigned int hostname_len); 226 const size_t hostname_len);
181struct nlm_host *nlmsvc_lookup_host(struct svc_rqst *, const char *,
182 unsigned int);
183struct rpc_clnt * nlm_bind_host(struct nlm_host *); 227struct rpc_clnt * nlm_bind_host(struct nlm_host *);
184void nlm_rebind_host(struct nlm_host *); 228void nlm_rebind_host(struct nlm_host *);
185struct nlm_host * nlm_get_host(struct nlm_host *); 229struct nlm_host * nlm_get_host(struct nlm_host *);
@@ -201,7 +245,7 @@ typedef int (*nlm_host_match_fn_t)(void *cur, struct nlm_host *ref);
201 */ 245 */
202__be32 nlmsvc_lock(struct svc_rqst *, struct nlm_file *, 246__be32 nlmsvc_lock(struct svc_rqst *, struct nlm_file *,
203 struct nlm_host *, struct nlm_lock *, int, 247 struct nlm_host *, struct nlm_lock *, int,
204 struct nlm_cookie *); 248 struct nlm_cookie *, int);
205__be32 nlmsvc_unlock(struct nlm_file *, struct nlm_lock *); 249__be32 nlmsvc_unlock(struct nlm_file *, struct nlm_lock *);
206__be32 nlmsvc_testlock(struct svc_rqst *, struct nlm_file *, 250__be32 nlmsvc_testlock(struct svc_rqst *, struct nlm_file *,
207 struct nlm_host *, struct nlm_lock *, 251 struct nlm_host *, struct nlm_lock *,
@@ -233,15 +277,82 @@ static inline struct inode *nlmsvc_file_inode(struct nlm_file *file)
233 return file->f_file->f_path.dentry->d_inode; 277 return file->f_file->f_path.dentry->d_inode;
234} 278}
235 279
280static inline int __nlm_privileged_request4(const struct sockaddr *sap)
281{
282 const struct sockaddr_in *sin = (struct sockaddr_in *)sap;
283 return (sin->sin_addr.s_addr == htonl(INADDR_LOOPBACK)) &&
284 (ntohs(sin->sin_port) < 1024);
285}
286
287#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
288static inline int __nlm_privileged_request6(const struct sockaddr *sap)
289{
290 const struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)sap;
291 return (ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_LOOPBACK) &&
292 (ntohs(sin6->sin6_port) < 1024);
293}
294#else /* defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) */
295static inline int __nlm_privileged_request6(const struct sockaddr *sap)
296{
297 return 0;
298}
299#endif /* defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) */
300
236/* 301/*
237 * Compare two host addresses (needs modifying for ipv6) 302 * Ensure incoming requests are from local privileged callers.
303 *
304 * Return TRUE if sender is local and is connecting via a privileged port;
305 * otherwise return FALSE.
238 */ 306 */
239static inline int nlm_cmp_addr(const struct sockaddr_in *sin1, 307static inline int nlm_privileged_requester(const struct svc_rqst *rqstp)
240 const struct sockaddr_in *sin2)
241{ 308{
309 const struct sockaddr *sap = svc_addr(rqstp);
310
311 switch (sap->sa_family) {
312 case AF_INET:
313 return __nlm_privileged_request4(sap);
314 case AF_INET6:
315 return __nlm_privileged_request6(sap);
316 default:
317 return 0;
318 }
319}
320
321static inline int __nlm_cmp_addr4(const struct sockaddr *sap1,
322 const struct sockaddr *sap2)
323{
324 const struct sockaddr_in *sin1 = (const struct sockaddr_in *)sap1;
325 const struct sockaddr_in *sin2 = (const struct sockaddr_in *)sap2;
242 return sin1->sin_addr.s_addr == sin2->sin_addr.s_addr; 326 return sin1->sin_addr.s_addr == sin2->sin_addr.s_addr;
243} 327}
244 328
329static inline int __nlm_cmp_addr6(const struct sockaddr *sap1,
330 const struct sockaddr *sap2)
331{
332 const struct sockaddr_in6 *sin1 = (const struct sockaddr_in6 *)sap1;
333 const struct sockaddr_in6 *sin2 = (const struct sockaddr_in6 *)sap2;
334 return ipv6_addr_equal(&sin1->sin6_addr, &sin2->sin6_addr);
335}
336
337/*
338 * Compare two host addresses
339 *
340 * Return TRUE if the addresses are the same; otherwise FALSE.
341 */
342static inline int nlm_cmp_addr(const struct sockaddr *sap1,
343 const struct sockaddr *sap2)
344{
345 if (sap1->sa_family == sap2->sa_family) {
346 switch (sap1->sa_family) {
347 case AF_INET:
348 return __nlm_cmp_addr4(sap1, sap2);
349 case AF_INET6:
350 return __nlm_cmp_addr6(sap1, sap2);
351 }
352 }
353 return 0;
354}
355
245/* 356/*
246 * Compare two NLM locks. 357 * Compare two NLM locks.
247 * When the second lock is of type F_UNLCK, this acts like a wildcard. 358 * When the second lock is of type F_UNLCK, this acts like a wildcard.
diff --git a/include/linux/lockd/xdr.h b/include/linux/lockd/xdr.h
index df18fa053bcd..d6b3a802c046 100644
--- a/include/linux/lockd/xdr.h
+++ b/include/linux/lockd/xdr.h
@@ -81,8 +81,6 @@ struct nlm_reboot {
81 unsigned int len; 81 unsigned int len;
82 u32 state; 82 u32 state;
83 __be32 addr; 83 __be32 addr;
84 __be32 vers;
85 __be32 proto;
86}; 84};
87 85
88/* 86/*
diff --git a/include/linux/nfsd/nfsd.h b/include/linux/nfsd/nfsd.h
index 108f47e5fd95..21269405ffe2 100644
--- a/include/linux/nfsd/nfsd.h
+++ b/include/linux/nfsd/nfsd.h
@@ -38,6 +38,7 @@
38#define NFSD_MAY_LOCK 32 38#define NFSD_MAY_LOCK 32
39#define NFSD_MAY_OWNER_OVERRIDE 64 39#define NFSD_MAY_OWNER_OVERRIDE 64
40#define NFSD_MAY_LOCAL_ACCESS 128 /* IRIX doing local access check on device special file*/ 40#define NFSD_MAY_LOCAL_ACCESS 128 /* IRIX doing local access check on device special file*/
41#define NFSD_MAY_BYPASS_GSS_ON_ROOT 256
41 42
42#define NFSD_MAY_CREATE (NFSD_MAY_EXEC|NFSD_MAY_WRITE) 43#define NFSD_MAY_CREATE (NFSD_MAY_EXEC|NFSD_MAY_WRITE)
43#define NFSD_MAY_REMOVE (NFSD_MAY_EXEC|NFSD_MAY_WRITE|NFSD_MAY_TRUNC) 44#define NFSD_MAY_REMOVE (NFSD_MAY_EXEC|NFSD_MAY_WRITE|NFSD_MAY_TRUNC)
@@ -125,7 +126,7 @@ int nfsd_truncate(struct svc_rqst *, struct svc_fh *,
125__be32 nfsd_readdir(struct svc_rqst *, struct svc_fh *, 126__be32 nfsd_readdir(struct svc_rqst *, struct svc_fh *,
126 loff_t *, struct readdir_cd *, filldir_t); 127 loff_t *, struct readdir_cd *, filldir_t);
127__be32 nfsd_statfs(struct svc_rqst *, struct svc_fh *, 128__be32 nfsd_statfs(struct svc_rqst *, struct svc_fh *,
128 struct kstatfs *); 129 struct kstatfs *, int access);
129 130
130int nfsd_notify_change(struct inode *, struct iattr *); 131int nfsd_notify_change(struct inode *, struct iattr *);
131__be32 nfsd_permission(struct svc_rqst *, struct svc_export *, 132__be32 nfsd_permission(struct svc_rqst *, struct svc_export *,
diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h
index e5bfe01ee305..6f0ee1b84a4f 100644
--- a/include/linux/sunrpc/clnt.h
+++ b/include/linux/sunrpc/clnt.h
@@ -104,6 +104,7 @@ struct rpc_create_args {
104 const struct rpc_timeout *timeout; 104 const struct rpc_timeout *timeout;
105 char *servername; 105 char *servername;
106 struct rpc_program *program; 106 struct rpc_program *program;
107 u32 prognumber; /* overrides program->number */
107 u32 version; 108 u32 version;
108 rpc_authflavor_t authflavor; 109 rpc_authflavor_t authflavor;
109 unsigned long flags; 110 unsigned long flags;
@@ -124,10 +125,10 @@ struct rpc_clnt *rpc_clone_client(struct rpc_clnt *);
124void rpc_shutdown_client(struct rpc_clnt *); 125void rpc_shutdown_client(struct rpc_clnt *);
125void rpc_release_client(struct rpc_clnt *); 126void rpc_release_client(struct rpc_clnt *);
126 127
127int rpcb_register(u32, u32, int, unsigned short, int *); 128int rpcb_register(u32, u32, int, unsigned short);
128int rpcb_v4_register(const u32 program, const u32 version, 129int rpcb_v4_register(const u32 program, const u32 version,
129 const struct sockaddr *address, 130 const struct sockaddr *address,
130 const char *netid, int *result); 131 const char *netid);
131int rpcb_getport_sync(struct sockaddr_in *, u32, u32, int); 132int rpcb_getport_sync(struct sockaddr_in *, u32, u32, int);
132void rpcb_getport_async(struct rpc_task *); 133void rpcb_getport_async(struct rpc_task *);
133 134
diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
index dc69068d94c7..3afe7fb403b2 100644
--- a/include/linux/sunrpc/svc.h
+++ b/include/linux/sunrpc/svc.h
@@ -66,6 +66,7 @@ struct svc_serv {
66 struct list_head sv_tempsocks; /* all temporary sockets */ 66 struct list_head sv_tempsocks; /* all temporary sockets */
67 int sv_tmpcnt; /* count of temporary sockets */ 67 int sv_tmpcnt; /* count of temporary sockets */
68 struct timer_list sv_temptimer; /* timer for aging temporary sockets */ 68 struct timer_list sv_temptimer; /* timer for aging temporary sockets */
69 sa_family_t sv_family; /* listener's address family */
69 70
70 char * sv_name; /* service name */ 71 char * sv_name; /* service name */
71 72
@@ -265,17 +266,17 @@ struct svc_rqst {
265/* 266/*
266 * Rigorous type checking on sockaddr type conversions 267 * Rigorous type checking on sockaddr type conversions
267 */ 268 */
268static inline struct sockaddr_in *svc_addr_in(struct svc_rqst *rqst) 269static inline struct sockaddr_in *svc_addr_in(const struct svc_rqst *rqst)
269{ 270{
270 return (struct sockaddr_in *) &rqst->rq_addr; 271 return (struct sockaddr_in *) &rqst->rq_addr;
271} 272}
272 273
273static inline struct sockaddr_in6 *svc_addr_in6(struct svc_rqst *rqst) 274static inline struct sockaddr_in6 *svc_addr_in6(const struct svc_rqst *rqst)
274{ 275{
275 return (struct sockaddr_in6 *) &rqst->rq_addr; 276 return (struct sockaddr_in6 *) &rqst->rq_addr;
276} 277}
277 278
278static inline struct sockaddr *svc_addr(struct svc_rqst *rqst) 279static inline struct sockaddr *svc_addr(const struct svc_rqst *rqst)
279{ 280{
280 return (struct sockaddr *) &rqst->rq_addr; 281 return (struct sockaddr *) &rqst->rq_addr;
281} 282}
@@ -381,18 +382,20 @@ struct svc_procedure {
381/* 382/*
382 * Function prototypes. 383 * Function prototypes.
383 */ 384 */
384struct svc_serv * svc_create(struct svc_program *, unsigned int, 385struct svc_serv *svc_create(struct svc_program *, unsigned int, sa_family_t,
385 void (*shutdown)(struct svc_serv*)); 386 void (*shutdown)(struct svc_serv *));
386struct svc_rqst *svc_prepare_thread(struct svc_serv *serv, 387struct svc_rqst *svc_prepare_thread(struct svc_serv *serv,
387 struct svc_pool *pool); 388 struct svc_pool *pool);
388void svc_exit_thread(struct svc_rqst *); 389void svc_exit_thread(struct svc_rqst *);
389struct svc_serv * svc_create_pooled(struct svc_program *, unsigned int, 390struct svc_serv * svc_create_pooled(struct svc_program *, unsigned int,
390 void (*shutdown)(struct svc_serv*), svc_thread_fn, 391 sa_family_t, void (*shutdown)(struct svc_serv *),
391 struct module *); 392 svc_thread_fn, struct module *);
392int svc_set_num_threads(struct svc_serv *, struct svc_pool *, int); 393int svc_set_num_threads(struct svc_serv *, struct svc_pool *, int);
393void svc_destroy(struct svc_serv *); 394void svc_destroy(struct svc_serv *);
394int svc_process(struct svc_rqst *); 395int svc_process(struct svc_rqst *);
395int svc_register(struct svc_serv *, int, unsigned short); 396int svc_register(const struct svc_serv *, const unsigned short,
397 const unsigned short);
398
396void svc_wake_up(struct svc_serv *); 399void svc_wake_up(struct svc_serv *);
397void svc_reserve(struct svc_rqst *rqstp, int space); 400void svc_reserve(struct svc_rqst *rqstp, int space);
398struct svc_pool * svc_pool_for_cpu(struct svc_serv *serv, int cpu); 401struct svc_pool * svc_pool_for_cpu(struct svc_serv *serv, int cpu);
diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h
index dc05b54bd3a3..c14fe86dac59 100644
--- a/include/linux/sunrpc/svc_rdma.h
+++ b/include/linux/sunrpc/svc_rdma.h
@@ -72,6 +72,7 @@ extern atomic_t rdma_stat_sq_prod;
72 */ 72 */
73struct svc_rdma_op_ctxt { 73struct svc_rdma_op_ctxt {
74 struct svc_rdma_op_ctxt *read_hdr; 74 struct svc_rdma_op_ctxt *read_hdr;
75 struct svc_rdma_fastreg_mr *frmr;
75 int hdr_count; 76 int hdr_count;
76 struct xdr_buf arg; 77 struct xdr_buf arg;
77 struct list_head dto_q; 78 struct list_head dto_q;
@@ -103,16 +104,30 @@ struct svc_rdma_chunk_sge {
103 int start; /* sge no for this chunk */ 104 int start; /* sge no for this chunk */
104 int count; /* sge count for this chunk */ 105 int count; /* sge count for this chunk */
105}; 106};
107struct svc_rdma_fastreg_mr {
108 struct ib_mr *mr;
109 void *kva;
110 struct ib_fast_reg_page_list *page_list;
111 int page_list_len;
112 unsigned long access_flags;
113 unsigned long map_len;
114 enum dma_data_direction direction;
115 struct list_head frmr_list;
116};
106struct svc_rdma_req_map { 117struct svc_rdma_req_map {
118 struct svc_rdma_fastreg_mr *frmr;
107 unsigned long count; 119 unsigned long count;
108 union { 120 union {
109 struct kvec sge[RPCSVC_MAXPAGES]; 121 struct kvec sge[RPCSVC_MAXPAGES];
110 struct svc_rdma_chunk_sge ch[RPCSVC_MAXPAGES]; 122 struct svc_rdma_chunk_sge ch[RPCSVC_MAXPAGES];
111 }; 123 };
112}; 124};
113 125#define RDMACTXT_F_FAST_UNREG 1
114#define RDMACTXT_F_LAST_CTXT 2 126#define RDMACTXT_F_LAST_CTXT 2
115 127
128#define SVCRDMA_DEVCAP_FAST_REG 1 /* fast mr registration */
129#define SVCRDMA_DEVCAP_READ_W_INV 2 /* read w/ invalidate */
130
116struct svcxprt_rdma { 131struct svcxprt_rdma {
117 struct svc_xprt sc_xprt; /* SVC transport structure */ 132 struct svc_xprt sc_xprt; /* SVC transport structure */
118 struct rdma_cm_id *sc_cm_id; /* RDMA connection id */ 133 struct rdma_cm_id *sc_cm_id; /* RDMA connection id */
@@ -136,6 +151,11 @@ struct svcxprt_rdma {
136 struct ib_cq *sc_rq_cq; 151 struct ib_cq *sc_rq_cq;
137 struct ib_cq *sc_sq_cq; 152 struct ib_cq *sc_sq_cq;
138 struct ib_mr *sc_phys_mr; /* MR for server memory */ 153 struct ib_mr *sc_phys_mr; /* MR for server memory */
154 u32 sc_dev_caps; /* distilled device caps */
155 u32 sc_dma_lkey; /* local dma key */
156 unsigned int sc_frmr_pg_list_len;
157 struct list_head sc_frmr_q;
158 spinlock_t sc_frmr_q_lock;
139 159
140 spinlock_t sc_lock; /* transport lock */ 160 spinlock_t sc_lock; /* transport lock */
141 161
@@ -192,8 +212,13 @@ extern int svc_rdma_post_recv(struct svcxprt_rdma *);
192extern int svc_rdma_create_listen(struct svc_serv *, int, struct sockaddr *); 212extern int svc_rdma_create_listen(struct svc_serv *, int, struct sockaddr *);
193extern struct svc_rdma_op_ctxt *svc_rdma_get_context(struct svcxprt_rdma *); 213extern struct svc_rdma_op_ctxt *svc_rdma_get_context(struct svcxprt_rdma *);
194extern void svc_rdma_put_context(struct svc_rdma_op_ctxt *, int); 214extern void svc_rdma_put_context(struct svc_rdma_op_ctxt *, int);
215extern void svc_rdma_unmap_dma(struct svc_rdma_op_ctxt *ctxt);
195extern struct svc_rdma_req_map *svc_rdma_get_req_map(void); 216extern struct svc_rdma_req_map *svc_rdma_get_req_map(void);
196extern void svc_rdma_put_req_map(struct svc_rdma_req_map *); 217extern void svc_rdma_put_req_map(struct svc_rdma_req_map *);
218extern int svc_rdma_fastreg(struct svcxprt_rdma *, struct svc_rdma_fastreg_mr *);
219extern struct svc_rdma_fastreg_mr *svc_rdma_get_frmr(struct svcxprt_rdma *);
220extern void svc_rdma_put_frmr(struct svcxprt_rdma *,
221 struct svc_rdma_fastreg_mr *);
197extern void svc_sq_reap(struct svcxprt_rdma *); 222extern void svc_sq_reap(struct svcxprt_rdma *);
198extern void svc_rq_reap(struct svcxprt_rdma *); 223extern void svc_rq_reap(struct svcxprt_rdma *);
199extern struct svc_xprt_class svc_rdma_class; 224extern struct svc_xprt_class svc_rdma_class;
diff --git a/include/linux/sunrpc/svcsock.h b/include/linux/sunrpc/svcsock.h
index 8cff696dedf5..483e10380aae 100644
--- a/include/linux/sunrpc/svcsock.h
+++ b/include/linux/sunrpc/svcsock.h
@@ -39,10 +39,7 @@ int svc_send(struct svc_rqst *);
39void svc_drop(struct svc_rqst *); 39void svc_drop(struct svc_rqst *);
40void svc_sock_update_bufs(struct svc_serv *serv); 40void svc_sock_update_bufs(struct svc_serv *serv);
41int svc_sock_names(char *buf, struct svc_serv *serv, char *toclose); 41int svc_sock_names(char *buf, struct svc_serv *serv, char *toclose);
42int svc_addsock(struct svc_serv *serv, 42int svc_addsock(struct svc_serv *serv, int fd, char *name_return);
43 int fd,
44 char *name_return,
45 int *proto);
46void svc_init_xprt_sock(void); 43void svc_init_xprt_sock(void);
47void svc_cleanup_xprt_sock(void); 44void svc_cleanup_xprt_sock(void);
48 45
diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c
index 08d6e1bb99ac..503d8d4eb80a 100644
--- a/kernel/sys_ni.c
+++ b/kernel/sys_ni.c
@@ -125,6 +125,7 @@ cond_syscall(sys_vm86old);
125cond_syscall(sys_vm86); 125cond_syscall(sys_vm86);
126cond_syscall(compat_sys_ipc); 126cond_syscall(compat_sys_ipc);
127cond_syscall(compat_sys_sysctl); 127cond_syscall(compat_sys_sysctl);
128cond_syscall(sys_flock);
128 129
129/* arch-specific weak syscall entries */ 130/* arch-specific weak syscall entries */
130cond_syscall(sys_pciconfig_read); 131cond_syscall(sys_pciconfig_read);
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index c468c3c6dfc5..cfc5295f1e82 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -96,7 +96,7 @@ static int sixty = 60;
96static int neg_one = -1; 96static int neg_one = -1;
97#endif 97#endif
98 98
99#ifdef CONFIG_MMU 99#if defined(CONFIG_MMU) && defined(CONFIG_FILE_LOCKING)
100static int two = 2; 100static int two = 2;
101#endif 101#endif
102 102
@@ -1248,6 +1248,7 @@ static struct ctl_table fs_table[] = {
1248 .extra1 = &minolduid, 1248 .extra1 = &minolduid,
1249 .extra2 = &maxolduid, 1249 .extra2 = &maxolduid,
1250 }, 1250 },
1251#ifdef CONFIG_FILE_LOCKING
1251 { 1252 {
1252 .ctl_name = FS_LEASES, 1253 .ctl_name = FS_LEASES,
1253 .procname = "leases-enable", 1254 .procname = "leases-enable",
@@ -1256,6 +1257,7 @@ static struct ctl_table fs_table[] = {
1256 .mode = 0644, 1257 .mode = 0644,
1257 .proc_handler = &proc_dointvec, 1258 .proc_handler = &proc_dointvec,
1258 }, 1259 },
1260#endif
1259#ifdef CONFIG_DNOTIFY 1261#ifdef CONFIG_DNOTIFY
1260 { 1262 {
1261 .ctl_name = FS_DIR_NOTIFY, 1263 .ctl_name = FS_DIR_NOTIFY,
@@ -1267,6 +1269,7 @@ static struct ctl_table fs_table[] = {
1267 }, 1269 },
1268#endif 1270#endif
1269#ifdef CONFIG_MMU 1271#ifdef CONFIG_MMU
1272#ifdef CONFIG_FILE_LOCKING
1270 { 1273 {
1271 .ctl_name = FS_LEASE_TIME, 1274 .ctl_name = FS_LEASE_TIME,
1272 .procname = "lease-break-time", 1275 .procname = "lease-break-time",
@@ -1278,6 +1281,7 @@ static struct ctl_table fs_table[] = {
1278 .extra1 = &zero, 1281 .extra1 = &zero,
1279 .extra2 = &two, 1282 .extra2 = &two,
1280 }, 1283 },
1284#endif
1281 { 1285 {
1282 .procname = "aio-nr", 1286 .procname = "aio-nr",
1283 .data = &aio_nr, 1287 .data = &aio_nr,
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index 76739e928d0d..da0789fa1b88 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -174,7 +174,7 @@ static struct rpc_clnt * rpc_new_client(const struct rpc_create_args *args, stru
174 clnt->cl_procinfo = version->procs; 174 clnt->cl_procinfo = version->procs;
175 clnt->cl_maxproc = version->nrprocs; 175 clnt->cl_maxproc = version->nrprocs;
176 clnt->cl_protname = program->name; 176 clnt->cl_protname = program->name;
177 clnt->cl_prog = program->number; 177 clnt->cl_prog = args->prognumber ? : program->number;
178 clnt->cl_vers = version->number; 178 clnt->cl_vers = version->number;
179 clnt->cl_stats = program->stats; 179 clnt->cl_stats = program->stats;
180 clnt->cl_metrics = rpc_alloc_iostats(clnt); 180 clnt->cl_metrics = rpc_alloc_iostats(clnt);
diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c
index 24db2b4d12d3..34abc91058d8 100644
--- a/net/sunrpc/rpcb_clnt.c
+++ b/net/sunrpc/rpcb_clnt.c
@@ -20,6 +20,7 @@
20#include <linux/in6.h> 20#include <linux/in6.h>
21#include <linux/kernel.h> 21#include <linux/kernel.h>
22#include <linux/errno.h> 22#include <linux/errno.h>
23#include <net/ipv6.h>
23 24
24#include <linux/sunrpc/clnt.h> 25#include <linux/sunrpc/clnt.h>
25#include <linux/sunrpc/sched.h> 26#include <linux/sunrpc/sched.h>
@@ -176,13 +177,12 @@ static struct rpc_clnt *rpcb_create(char *hostname, struct sockaddr *srvaddr,
176} 177}
177 178
178static int rpcb_register_call(struct sockaddr *addr, size_t addrlen, 179static int rpcb_register_call(struct sockaddr *addr, size_t addrlen,
179 u32 version, struct rpc_message *msg, 180 u32 version, struct rpc_message *msg)
180 int *result)
181{ 181{
182 struct rpc_clnt *rpcb_clnt; 182 struct rpc_clnt *rpcb_clnt;
183 int error = 0; 183 int result, error = 0;
184 184
185 *result = 0; 185 msg->rpc_resp = &result;
186 186
187 rpcb_clnt = rpcb_create_local(addr, addrlen, version); 187 rpcb_clnt = rpcb_create_local(addr, addrlen, version);
188 if (!IS_ERR(rpcb_clnt)) { 188 if (!IS_ERR(rpcb_clnt)) {
@@ -191,12 +191,15 @@ static int rpcb_register_call(struct sockaddr *addr, size_t addrlen,
191 } else 191 } else
192 error = PTR_ERR(rpcb_clnt); 192 error = PTR_ERR(rpcb_clnt);
193 193
194 if (error < 0) 194 if (error < 0) {
195 printk(KERN_WARNING "RPC: failed to contact local rpcbind " 195 printk(KERN_WARNING "RPC: failed to contact local rpcbind "
196 "server (errno %d).\n", -error); 196 "server (errno %d).\n", -error);
197 dprintk("RPC: registration status %d/%d\n", error, *result); 197 return error;
198 }
198 199
199 return error; 200 if (!result)
201 return -EACCES;
202 return 0;
200} 203}
201 204
202/** 205/**
@@ -205,7 +208,11 @@ static int rpcb_register_call(struct sockaddr *addr, size_t addrlen,
205 * @vers: RPC version number to bind 208 * @vers: RPC version number to bind
206 * @prot: transport protocol to register 209 * @prot: transport protocol to register
207 * @port: port value to register 210 * @port: port value to register
208 * @okay: OUT: result code 211 *
212 * Returns zero if the registration request was dispatched successfully
213 * and the rpcbind daemon returned success. Otherwise, returns an errno
214 * value that reflects the nature of the error (request could not be
215 * dispatched, timed out, or rpcbind returned an error).
209 * 216 *
210 * RPC services invoke this function to advertise their contact 217 * RPC services invoke this function to advertise their contact
211 * information via the system's rpcbind daemon. RPC services 218 * information via the system's rpcbind daemon. RPC services
@@ -217,15 +224,6 @@ static int rpcb_register_call(struct sockaddr *addr, size_t addrlen,
217 * all registered transports for [program, version] from the local 224 * all registered transports for [program, version] from the local
218 * rpcbind database. 225 * rpcbind database.
219 * 226 *
220 * Returns zero if the registration request was dispatched
221 * successfully and a reply was received. The rpcbind daemon's
222 * boolean result code is stored in *okay.
223 *
224 * Returns an errno value and sets *result to zero if there was
225 * some problem that prevented the rpcbind request from being
226 * dispatched, or if the rpcbind daemon did not respond within
227 * the timeout.
228 *
229 * This function uses rpcbind protocol version 2 to contact the 227 * This function uses rpcbind protocol version 2 to contact the
230 * local rpcbind daemon. 228 * local rpcbind daemon.
231 * 229 *
@@ -236,7 +234,7 @@ static int rpcb_register_call(struct sockaddr *addr, size_t addrlen,
236 * IN6ADDR_ANY (ie available for all AF_INET and AF_INET6 234 * IN6ADDR_ANY (ie available for all AF_INET and AF_INET6
237 * addresses). 235 * addresses).
238 */ 236 */
239int rpcb_register(u32 prog, u32 vers, int prot, unsigned short port, int *okay) 237int rpcb_register(u32 prog, u32 vers, int prot, unsigned short port)
240{ 238{
241 struct rpcbind_args map = { 239 struct rpcbind_args map = {
242 .r_prog = prog, 240 .r_prog = prog,
@@ -246,7 +244,6 @@ int rpcb_register(u32 prog, u32 vers, int prot, unsigned short port, int *okay)
246 }; 244 };
247 struct rpc_message msg = { 245 struct rpc_message msg = {
248 .rpc_argp = &map, 246 .rpc_argp = &map,
249 .rpc_resp = okay,
250 }; 247 };
251 248
252 dprintk("RPC: %sregistering (%u, %u, %d, %u) with local " 249 dprintk("RPC: %sregistering (%u, %u, %d, %u) with local "
@@ -259,7 +256,7 @@ int rpcb_register(u32 prog, u32 vers, int prot, unsigned short port, int *okay)
259 256
260 return rpcb_register_call((struct sockaddr *)&rpcb_inaddr_loopback, 257 return rpcb_register_call((struct sockaddr *)&rpcb_inaddr_loopback,
261 sizeof(rpcb_inaddr_loopback), 258 sizeof(rpcb_inaddr_loopback),
262 RPCBVERS_2, &msg, okay); 259 RPCBVERS_2, &msg);
263} 260}
264 261
265/* 262/*
@@ -290,7 +287,7 @@ static int rpcb_register_netid4(struct sockaddr_in *address_to_register,
290 287
291 return rpcb_register_call((struct sockaddr *)&rpcb_inaddr_loopback, 288 return rpcb_register_call((struct sockaddr *)&rpcb_inaddr_loopback,
292 sizeof(rpcb_inaddr_loopback), 289 sizeof(rpcb_inaddr_loopback),
293 RPCBVERS_4, msg, msg->rpc_resp); 290 RPCBVERS_4, msg);
294} 291}
295 292
296/* 293/*
@@ -304,10 +301,13 @@ static int rpcb_register_netid6(struct sockaddr_in6 *address_to_register,
304 char buf[64]; 301 char buf[64];
305 302
306 /* Construct AF_INET6 universal address */ 303 /* Construct AF_INET6 universal address */
307 snprintf(buf, sizeof(buf), 304 if (ipv6_addr_any(&address_to_register->sin6_addr))
308 NIP6_FMT".%u.%u", 305 snprintf(buf, sizeof(buf), "::.%u.%u",
309 NIP6(address_to_register->sin6_addr), 306 port >> 8, port & 0xff);
310 port >> 8, port & 0xff); 307 else
308 snprintf(buf, sizeof(buf), NIP6_FMT".%u.%u",
309 NIP6(address_to_register->sin6_addr),
310 port >> 8, port & 0xff);
311 map->r_addr = buf; 311 map->r_addr = buf;
312 312
313 dprintk("RPC: %sregistering [%u, %u, %s, '%s'] with " 313 dprintk("RPC: %sregistering [%u, %u, %s, '%s'] with "
@@ -321,7 +321,7 @@ static int rpcb_register_netid6(struct sockaddr_in6 *address_to_register,
321 321
322 return rpcb_register_call((struct sockaddr *)&rpcb_in6addr_loopback, 322 return rpcb_register_call((struct sockaddr *)&rpcb_in6addr_loopback,
323 sizeof(rpcb_in6addr_loopback), 323 sizeof(rpcb_in6addr_loopback),
324 RPCBVERS_4, msg, msg->rpc_resp); 324 RPCBVERS_4, msg);
325} 325}
326 326
327/** 327/**
@@ -330,7 +330,11 @@ static int rpcb_register_netid6(struct sockaddr_in6 *address_to_register,
330 * @version: RPC version number of service to (un)register 330 * @version: RPC version number of service to (un)register
331 * @address: address family, IP address, and port to (un)register 331 * @address: address family, IP address, and port to (un)register
332 * @netid: netid of transport protocol to (un)register 332 * @netid: netid of transport protocol to (un)register
333 * @result: result code from rpcbind RPC call 333 *
334 * Returns zero if the registration request was dispatched successfully
335 * and the rpcbind daemon returned success. Otherwise, returns an errno
336 * value that reflects the nature of the error (request could not be
337 * dispatched, timed out, or rpcbind returned an error).
334 * 338 *
335 * RPC services invoke this function to advertise their contact 339 * RPC services invoke this function to advertise their contact
336 * information via the system's rpcbind daemon. RPC services 340 * information via the system's rpcbind daemon. RPC services
@@ -342,15 +346,6 @@ static int rpcb_register_netid6(struct sockaddr_in6 *address_to_register,
342 * to zero. Callers pass a netid of "" to unregister all 346 * to zero. Callers pass a netid of "" to unregister all
343 * transport netids associated with [program, version, address]. 347 * transport netids associated with [program, version, address].
344 * 348 *
345 * Returns zero if the registration request was dispatched
346 * successfully and a reply was received. The rpcbind daemon's
347 * result code is stored in *result.
348 *
349 * Returns an errno value and sets *result to zero if there was
350 * some problem that prevented the rpcbind request from being
351 * dispatched, or if the rpcbind daemon did not respond within
352 * the timeout.
353 *
354 * This function uses rpcbind protocol version 4 to contact the 349 * This function uses rpcbind protocol version 4 to contact the
355 * local rpcbind daemon. The local rpcbind daemon must support 350 * local rpcbind daemon. The local rpcbind daemon must support
356 * version 4 of the rpcbind protocol in order for these functions 351 * version 4 of the rpcbind protocol in order for these functions
@@ -372,8 +367,7 @@ static int rpcb_register_netid6(struct sockaddr_in6 *address_to_register,
372 * advertises the service on all IPv4 and IPv6 addresses. 367 * advertises the service on all IPv4 and IPv6 addresses.
373 */ 368 */
374int rpcb_v4_register(const u32 program, const u32 version, 369int rpcb_v4_register(const u32 program, const u32 version,
375 const struct sockaddr *address, const char *netid, 370 const struct sockaddr *address, const char *netid)
376 int *result)
377{ 371{
378 struct rpcbind_args map = { 372 struct rpcbind_args map = {
379 .r_prog = program, 373 .r_prog = program,
@@ -383,11 +377,8 @@ int rpcb_v4_register(const u32 program, const u32 version,
383 }; 377 };
384 struct rpc_message msg = { 378 struct rpc_message msg = {
385 .rpc_argp = &map, 379 .rpc_argp = &map,
386 .rpc_resp = result,
387 }; 380 };
388 381
389 *result = 0;
390
391 switch (address->sa_family) { 382 switch (address->sa_family) {
392 case AF_INET: 383 case AF_INET:
393 return rpcb_register_netid4((struct sockaddr_in *)address, 384 return rpcb_register_netid4((struct sockaddr_in *)address,
@@ -633,7 +624,7 @@ static void rpcb_getport_done(struct rpc_task *child, void *data)
633static int rpcb_encode_mapping(struct rpc_rqst *req, __be32 *p, 624static int rpcb_encode_mapping(struct rpc_rqst *req, __be32 *p,
634 struct rpcbind_args *rpcb) 625 struct rpcbind_args *rpcb)
635{ 626{
636 dprintk("RPC: rpcb_encode_mapping(%u, %u, %d, %u)\n", 627 dprintk("RPC: encoding rpcb request (%u, %u, %d, %u)\n",
637 rpcb->r_prog, rpcb->r_vers, rpcb->r_prot, rpcb->r_port); 628 rpcb->r_prog, rpcb->r_vers, rpcb->r_prot, rpcb->r_port);
638 *p++ = htonl(rpcb->r_prog); 629 *p++ = htonl(rpcb->r_prog);
639 *p++ = htonl(rpcb->r_vers); 630 *p++ = htonl(rpcb->r_vers);
@@ -648,7 +639,7 @@ static int rpcb_decode_getport(struct rpc_rqst *req, __be32 *p,
648 unsigned short *portp) 639 unsigned short *portp)
649{ 640{
650 *portp = (unsigned short) ntohl(*p++); 641 *portp = (unsigned short) ntohl(*p++);
651 dprintk("RPC: rpcb_decode_getport result %u\n", 642 dprintk("RPC: rpcb getport result: %u\n",
652 *portp); 643 *portp);
653 return 0; 644 return 0;
654} 645}
@@ -657,7 +648,7 @@ static int rpcb_decode_set(struct rpc_rqst *req, __be32 *p,
657 unsigned int *boolp) 648 unsigned int *boolp)
658{ 649{
659 *boolp = (unsigned int) ntohl(*p++); 650 *boolp = (unsigned int) ntohl(*p++);
660 dprintk("RPC: rpcb_decode_set: call %s\n", 651 dprintk("RPC: rpcb set/unset call %s\n",
661 (*boolp ? "succeeded" : "failed")); 652 (*boolp ? "succeeded" : "failed"));
662 return 0; 653 return 0;
663} 654}
@@ -665,7 +656,7 @@ static int rpcb_decode_set(struct rpc_rqst *req, __be32 *p,
665static int rpcb_encode_getaddr(struct rpc_rqst *req, __be32 *p, 656static int rpcb_encode_getaddr(struct rpc_rqst *req, __be32 *p,
666 struct rpcbind_args *rpcb) 657 struct rpcbind_args *rpcb)
667{ 658{
668 dprintk("RPC: rpcb_encode_getaddr(%u, %u, %s)\n", 659 dprintk("RPC: encoding rpcb request (%u, %u, %s)\n",
669 rpcb->r_prog, rpcb->r_vers, rpcb->r_addr); 660 rpcb->r_prog, rpcb->r_vers, rpcb->r_addr);
670 *p++ = htonl(rpcb->r_prog); 661 *p++ = htonl(rpcb->r_prog);
671 *p++ = htonl(rpcb->r_vers); 662 *p++ = htonl(rpcb->r_vers);
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index 5a32cb7c4bb4..54c98d876847 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -28,6 +28,8 @@
28 28
29#define RPCDBG_FACILITY RPCDBG_SVCDSP 29#define RPCDBG_FACILITY RPCDBG_SVCDSP
30 30
31static void svc_unregister(const struct svc_serv *serv);
32
31#define svc_serv_is_pooled(serv) ((serv)->sv_function) 33#define svc_serv_is_pooled(serv) ((serv)->sv_function)
32 34
33/* 35/*
@@ -357,7 +359,7 @@ svc_pool_for_cpu(struct svc_serv *serv, int cpu)
357 */ 359 */
358static struct svc_serv * 360static struct svc_serv *
359__svc_create(struct svc_program *prog, unsigned int bufsize, int npools, 361__svc_create(struct svc_program *prog, unsigned int bufsize, int npools,
360 void (*shutdown)(struct svc_serv *serv)) 362 sa_family_t family, void (*shutdown)(struct svc_serv *serv))
361{ 363{
362 struct svc_serv *serv; 364 struct svc_serv *serv;
363 unsigned int vers; 365 unsigned int vers;
@@ -366,6 +368,7 @@ __svc_create(struct svc_program *prog, unsigned int bufsize, int npools,
366 368
367 if (!(serv = kzalloc(sizeof(*serv), GFP_KERNEL))) 369 if (!(serv = kzalloc(sizeof(*serv), GFP_KERNEL)))
368 return NULL; 370 return NULL;
371 serv->sv_family = family;
369 serv->sv_name = prog->pg_name; 372 serv->sv_name = prog->pg_name;
370 serv->sv_program = prog; 373 serv->sv_program = prog;
371 serv->sv_nrthreads = 1; 374 serv->sv_nrthreads = 1;
@@ -416,30 +419,29 @@ __svc_create(struct svc_program *prog, unsigned int bufsize, int npools,
416 spin_lock_init(&pool->sp_lock); 419 spin_lock_init(&pool->sp_lock);
417 } 420 }
418 421
419
420 /* Remove any stale portmap registrations */ 422 /* Remove any stale portmap registrations */
421 svc_register(serv, 0, 0); 423 svc_unregister(serv);
422 424
423 return serv; 425 return serv;
424} 426}
425 427
426struct svc_serv * 428struct svc_serv *
427svc_create(struct svc_program *prog, unsigned int bufsize, 429svc_create(struct svc_program *prog, unsigned int bufsize,
428 void (*shutdown)(struct svc_serv *serv)) 430 sa_family_t family, void (*shutdown)(struct svc_serv *serv))
429{ 431{
430 return __svc_create(prog, bufsize, /*npools*/1, shutdown); 432 return __svc_create(prog, bufsize, /*npools*/1, family, shutdown);
431} 433}
432EXPORT_SYMBOL(svc_create); 434EXPORT_SYMBOL(svc_create);
433 435
434struct svc_serv * 436struct svc_serv *
435svc_create_pooled(struct svc_program *prog, unsigned int bufsize, 437svc_create_pooled(struct svc_program *prog, unsigned int bufsize,
436 void (*shutdown)(struct svc_serv *serv), 438 sa_family_t family, void (*shutdown)(struct svc_serv *serv),
437 svc_thread_fn func, struct module *mod) 439 svc_thread_fn func, struct module *mod)
438{ 440{
439 struct svc_serv *serv; 441 struct svc_serv *serv;
440 unsigned int npools = svc_pool_map_get(); 442 unsigned int npools = svc_pool_map_get();
441 443
442 serv = __svc_create(prog, bufsize, npools, shutdown); 444 serv = __svc_create(prog, bufsize, npools, family, shutdown);
443 445
444 if (serv != NULL) { 446 if (serv != NULL) {
445 serv->sv_function = func; 447 serv->sv_function = func;
@@ -486,8 +488,7 @@ svc_destroy(struct svc_serv *serv)
486 if (svc_serv_is_pooled(serv)) 488 if (svc_serv_is_pooled(serv))
487 svc_pool_map_put(); 489 svc_pool_map_put();
488 490
489 /* Unregister service with the portmapper */ 491 svc_unregister(serv);
490 svc_register(serv, 0, 0);
491 kfree(serv->sv_pools); 492 kfree(serv->sv_pools);
492 kfree(serv); 493 kfree(serv);
493} 494}
@@ -718,55 +719,245 @@ svc_exit_thread(struct svc_rqst *rqstp)
718} 719}
719EXPORT_SYMBOL(svc_exit_thread); 720EXPORT_SYMBOL(svc_exit_thread);
720 721
722#ifdef CONFIG_SUNRPC_REGISTER_V4
723
721/* 724/*
722 * Register an RPC service with the local portmapper. 725 * Register an "inet" protocol family netid with the local
723 * To unregister a service, call this routine with 726 * rpcbind daemon via an rpcbind v4 SET request.
724 * proto and port == 0. 727 *
728 * No netconfig infrastructure is available in the kernel, so
729 * we map IP_ protocol numbers to netids by hand.
730 *
731 * Returns zero on success; a negative errno value is returned
732 * if any error occurs.
725 */ 733 */
726int 734static int __svc_rpcb_register4(const u32 program, const u32 version,
727svc_register(struct svc_serv *serv, int proto, unsigned short port) 735 const unsigned short protocol,
736 const unsigned short port)
737{
738 struct sockaddr_in sin = {
739 .sin_family = AF_INET,
740 .sin_addr.s_addr = htonl(INADDR_ANY),
741 .sin_port = htons(port),
742 };
743 char *netid;
744
745 switch (protocol) {
746 case IPPROTO_UDP:
747 netid = RPCBIND_NETID_UDP;
748 break;
749 case IPPROTO_TCP:
750 netid = RPCBIND_NETID_TCP;
751 break;
752 default:
753 return -EPROTONOSUPPORT;
754 }
755
756 return rpcb_v4_register(program, version,
757 (struct sockaddr *)&sin, netid);
758}
759
760/*
761 * Register an "inet6" protocol family netid with the local
762 * rpcbind daemon via an rpcbind v4 SET request.
763 *
764 * No netconfig infrastructure is available in the kernel, so
765 * we map IP_ protocol numbers to netids by hand.
766 *
767 * Returns zero on success; a negative errno value is returned
768 * if any error occurs.
769 */
770static int __svc_rpcb_register6(const u32 program, const u32 version,
771 const unsigned short protocol,
772 const unsigned short port)
773{
774 struct sockaddr_in6 sin6 = {
775 .sin6_family = AF_INET6,
776 .sin6_addr = IN6ADDR_ANY_INIT,
777 .sin6_port = htons(port),
778 };
779 char *netid;
780
781 switch (protocol) {
782 case IPPROTO_UDP:
783 netid = RPCBIND_NETID_UDP6;
784 break;
785 case IPPROTO_TCP:
786 netid = RPCBIND_NETID_TCP6;
787 break;
788 default:
789 return -EPROTONOSUPPORT;
790 }
791
792 return rpcb_v4_register(program, version,
793 (struct sockaddr *)&sin6, netid);
794}
795
796/*
797 * Register a kernel RPC service via rpcbind version 4.
798 *
799 * Returns zero on success; a negative errno value is returned
800 * if any error occurs.
801 */
802static int __svc_register(const u32 program, const u32 version,
803 const sa_family_t family,
804 const unsigned short protocol,
805 const unsigned short port)
806{
807 int error;
808
809 switch (family) {
810 case AF_INET:
811 return __svc_rpcb_register4(program, version,
812 protocol, port);
813 case AF_INET6:
814 error = __svc_rpcb_register6(program, version,
815 protocol, port);
816 if (error < 0)
817 return error;
818
819 /*
820 * Work around bug in some versions of Linux rpcbind
821 * which don't allow registration of both inet and
822 * inet6 netids.
823 *
824 * Error return ignored for now.
825 */
826 __svc_rpcb_register4(program, version,
827 protocol, port);
828 return 0;
829 }
830
831 return -EAFNOSUPPORT;
832}
833
834#else /* CONFIG_SUNRPC_REGISTER_V4 */
835
836/*
837 * Register a kernel RPC service via rpcbind version 2.
838 *
839 * Returns zero on success; a negative errno value is returned
840 * if any error occurs.
841 */
842static int __svc_register(const u32 program, const u32 version,
843 sa_family_t family,
844 const unsigned short protocol,
845 const unsigned short port)
846{
847 if (family != AF_INET)
848 return -EAFNOSUPPORT;
849
850 return rpcb_register(program, version, protocol, port);
851}
852
853#endif /* CONFIG_SUNRPC_REGISTER_V4 */
854
855/**
856 * svc_register - register an RPC service with the local portmapper
857 * @serv: svc_serv struct for the service to register
858 * @proto: transport protocol number to advertise
859 * @port: port to advertise
860 *
861 * Service is registered for any address in serv's address family
862 */
863int svc_register(const struct svc_serv *serv, const unsigned short proto,
864 const unsigned short port)
728{ 865{
729 struct svc_program *progp; 866 struct svc_program *progp;
730 unsigned long flags;
731 unsigned int i; 867 unsigned int i;
732 int error = 0, dummy; 868 int error = 0;
733 869
734 if (!port) 870 BUG_ON(proto == 0 && port == 0);
735 clear_thread_flag(TIF_SIGPENDING);
736 871
737 for (progp = serv->sv_program; progp; progp = progp->pg_next) { 872 for (progp = serv->sv_program; progp; progp = progp->pg_next) {
738 for (i = 0; i < progp->pg_nvers; i++) { 873 for (i = 0; i < progp->pg_nvers; i++) {
739 if (progp->pg_vers[i] == NULL) 874 if (progp->pg_vers[i] == NULL)
740 continue; 875 continue;
741 876
742 dprintk("svc: svc_register(%s, %s, %d, %d)%s\n", 877 dprintk("svc: svc_register(%sv%d, %s, %u, %u)%s\n",
743 progp->pg_name, 878 progp->pg_name,
879 i,
744 proto == IPPROTO_UDP? "udp" : "tcp", 880 proto == IPPROTO_UDP? "udp" : "tcp",
745 port, 881 port,
746 i, 882 serv->sv_family,
747 progp->pg_vers[i]->vs_hidden? 883 progp->pg_vers[i]->vs_hidden?
748 " (but not telling portmap)" : ""); 884 " (but not telling portmap)" : "");
749 885
750 if (progp->pg_vers[i]->vs_hidden) 886 if (progp->pg_vers[i]->vs_hidden)
751 continue; 887 continue;
752 888
753 error = rpcb_register(progp->pg_prog, i, proto, port, &dummy); 889 error = __svc_register(progp->pg_prog, i,
890 serv->sv_family, proto, port);
754 if (error < 0) 891 if (error < 0)
755 break; 892 break;
756 if (port && !dummy) {
757 error = -EACCES;
758 break;
759 }
760 } 893 }
761 } 894 }
762 895
763 if (!port) { 896 return error;
764 spin_lock_irqsave(&current->sighand->siglock, flags); 897}
765 recalc_sigpending(); 898
766 spin_unlock_irqrestore(&current->sighand->siglock, flags); 899#ifdef CONFIG_SUNRPC_REGISTER_V4
900
901static void __svc_unregister(const u32 program, const u32 version,
902 const char *progname)
903{
904 struct sockaddr_in6 sin6 = {
905 .sin6_family = AF_INET6,
906 .sin6_addr = IN6ADDR_ANY_INIT,
907 .sin6_port = 0,
908 };
909 int error;
910
911 error = rpcb_v4_register(program, version,
912 (struct sockaddr *)&sin6, "");
913 dprintk("svc: %s(%sv%u), error %d\n",
914 __func__, progname, version, error);
915}
916
917#else /* CONFIG_SUNRPC_REGISTER_V4 */
918
919static void __svc_unregister(const u32 program, const u32 version,
920 const char *progname)
921{
922 int error;
923
924 error = rpcb_register(program, version, 0, 0);
925 dprintk("svc: %s(%sv%u), error %d\n",
926 __func__, progname, version, error);
927}
928
929#endif /* CONFIG_SUNRPC_REGISTER_V4 */
930
931/*
932 * All netids, bind addresses and ports registered for [program, version]
933 * are removed from the local rpcbind database (if the service is not
934 * hidden) to make way for a new instance of the service.
935 *
936 * The result of unregistration is reported via dprintk for those who want
937 * verification of the result, but is otherwise not important.
938 */
939static void svc_unregister(const struct svc_serv *serv)
940{
941 struct svc_program *progp;
942 unsigned long flags;
943 unsigned int i;
944
945 clear_thread_flag(TIF_SIGPENDING);
946
947 for (progp = serv->sv_program; progp; progp = progp->pg_next) {
948 for (i = 0; i < progp->pg_nvers; i++) {
949 if (progp->pg_vers[i] == NULL)
950 continue;
951 if (progp->pg_vers[i]->vs_hidden)
952 continue;
953
954 __svc_unregister(progp->pg_prog, i, progp->pg_name);
955 }
767 } 956 }
768 957
769 return error; 958 spin_lock_irqsave(&current->sighand->siglock, flags);
959 recalc_sigpending();
960 spin_unlock_irqrestore(&current->sighand->siglock, flags);
770} 961}
771 962
772/* 963/*
diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
index e46c825f4954..bf5b5cdafebf 100644
--- a/net/sunrpc/svc_xprt.c
+++ b/net/sunrpc/svc_xprt.c
@@ -159,15 +159,44 @@ void svc_xprt_init(struct svc_xprt_class *xcl, struct svc_xprt *xprt,
159} 159}
160EXPORT_SYMBOL_GPL(svc_xprt_init); 160EXPORT_SYMBOL_GPL(svc_xprt_init);
161 161
162int svc_create_xprt(struct svc_serv *serv, char *xprt_name, unsigned short port, 162static struct svc_xprt *__svc_xpo_create(struct svc_xprt_class *xcl,
163 int flags) 163 struct svc_serv *serv,
164 unsigned short port, int flags)
164{ 165{
165 struct svc_xprt_class *xcl;
166 struct sockaddr_in sin = { 166 struct sockaddr_in sin = {
167 .sin_family = AF_INET, 167 .sin_family = AF_INET,
168 .sin_addr.s_addr = htonl(INADDR_ANY), 168 .sin_addr.s_addr = htonl(INADDR_ANY),
169 .sin_port = htons(port), 169 .sin_port = htons(port),
170 }; 170 };
171 struct sockaddr_in6 sin6 = {
172 .sin6_family = AF_INET6,
173 .sin6_addr = IN6ADDR_ANY_INIT,
174 .sin6_port = htons(port),
175 };
176 struct sockaddr *sap;
177 size_t len;
178
179 switch (serv->sv_family) {
180 case AF_INET:
181 sap = (struct sockaddr *)&sin;
182 len = sizeof(sin);
183 break;
184 case AF_INET6:
185 sap = (struct sockaddr *)&sin6;
186 len = sizeof(sin6);
187 break;
188 default:
189 return ERR_PTR(-EAFNOSUPPORT);
190 }
191
192 return xcl->xcl_ops->xpo_create(serv, sap, len, flags);
193}
194
195int svc_create_xprt(struct svc_serv *serv, char *xprt_name, unsigned short port,
196 int flags)
197{
198 struct svc_xprt_class *xcl;
199
171 dprintk("svc: creating transport %s[%d]\n", xprt_name, port); 200 dprintk("svc: creating transport %s[%d]\n", xprt_name, port);
172 spin_lock(&svc_xprt_class_lock); 201 spin_lock(&svc_xprt_class_lock);
173 list_for_each_entry(xcl, &svc_xprt_class_list, xcl_list) { 202 list_for_each_entry(xcl, &svc_xprt_class_list, xcl_list) {
@@ -180,9 +209,7 @@ int svc_create_xprt(struct svc_serv *serv, char *xprt_name, unsigned short port,
180 goto err; 209 goto err;
181 210
182 spin_unlock(&svc_xprt_class_lock); 211 spin_unlock(&svc_xprt_class_lock);
183 newxprt = xcl->xcl_ops-> 212 newxprt = __svc_xpo_create(xcl, serv, port, flags);
184 xpo_create(serv, (struct sockaddr *)&sin, sizeof(sin),
185 flags);
186 if (IS_ERR(newxprt)) { 213 if (IS_ERR(newxprt)) {
187 module_put(xcl->xcl_owner); 214 module_put(xcl->xcl_owner);
188 return PTR_ERR(newxprt); 215 return PTR_ERR(newxprt);
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index 3e65719f1ef6..95293f549e9c 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -1114,6 +1114,7 @@ static struct svc_sock *svc_setup_socket(struct svc_serv *serv,
1114 struct svc_sock *svsk; 1114 struct svc_sock *svsk;
1115 struct sock *inet; 1115 struct sock *inet;
1116 int pmap_register = !(flags & SVC_SOCK_ANONYMOUS); 1116 int pmap_register = !(flags & SVC_SOCK_ANONYMOUS);
1117 int val;
1117 1118
1118 dprintk("svc: svc_setup_socket %p\n", sock); 1119 dprintk("svc: svc_setup_socket %p\n", sock);
1119 if (!(svsk = kzalloc(sizeof(*svsk), GFP_KERNEL))) { 1120 if (!(svsk = kzalloc(sizeof(*svsk), GFP_KERNEL))) {
@@ -1146,6 +1147,18 @@ static struct svc_sock *svc_setup_socket(struct svc_serv *serv,
1146 else 1147 else
1147 svc_tcp_init(svsk, serv); 1148 svc_tcp_init(svsk, serv);
1148 1149
1150 /*
1151 * We start one listener per sv_serv. We want AF_INET
1152 * requests to be automatically shunted to our AF_INET6
1153 * listener using a mapped IPv4 address. Make sure
1154 * no-one starts an equivalent IPv4 listener, which
1155 * would steal our incoming connections.
1156 */
1157 val = 0;
1158 if (serv->sv_family == AF_INET6)
1159 kernel_setsockopt(sock, SOL_IPV6, IPV6_V6ONLY,
1160 (char *)&val, sizeof(val));
1161
1149 dprintk("svc: svc_setup_socket created %p (inet %p)\n", 1162 dprintk("svc: svc_setup_socket created %p (inet %p)\n",
1150 svsk, svsk->sk_sk); 1163 svsk, svsk->sk_sk);
1151 1164
@@ -1154,8 +1167,7 @@ static struct svc_sock *svc_setup_socket(struct svc_serv *serv,
1154 1167
1155int svc_addsock(struct svc_serv *serv, 1168int svc_addsock(struct svc_serv *serv,
1156 int fd, 1169 int fd,
1157 char *name_return, 1170 char *name_return)
1158 int *proto)
1159{ 1171{
1160 int err = 0; 1172 int err = 0;
1161 struct socket *so = sockfd_lookup(fd, &err); 1173 struct socket *so = sockfd_lookup(fd, &err);
@@ -1190,7 +1202,6 @@ int svc_addsock(struct svc_serv *serv,
1190 sockfd_put(so); 1202 sockfd_put(so);
1191 return err; 1203 return err;
1192 } 1204 }
1193 if (proto) *proto = so->sk->sk_protocol;
1194 return one_sock_name(name_return, svsk); 1205 return one_sock_name(name_return, svsk);
1195} 1206}
1196EXPORT_SYMBOL_GPL(svc_addsock); 1207EXPORT_SYMBOL_GPL(svc_addsock);
diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
index 74de31a06616..a4756576d687 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
@@ -116,7 +116,7 @@ static void rdma_build_arg_xdr(struct svc_rqst *rqstp,
116 * 116 *
117 * Assumptions: 117 * Assumptions:
118 * - chunk[0]->position points to pages[0] at an offset of 0 118 * - chunk[0]->position points to pages[0] at an offset of 0
119 * - pages[] is not physically or virtually contigous and consists of 119 * - pages[] is not physically or virtually contiguous and consists of
120 * PAGE_SIZE elements. 120 * PAGE_SIZE elements.
121 * 121 *
122 * Output: 122 * Output:
@@ -125,7 +125,7 @@ static void rdma_build_arg_xdr(struct svc_rqst *rqstp,
125 * chunk in the read list 125 * chunk in the read list
126 * 126 *
127 */ 127 */
128static int rdma_rcl_to_sge(struct svcxprt_rdma *xprt, 128static int map_read_chunks(struct svcxprt_rdma *xprt,
129 struct svc_rqst *rqstp, 129 struct svc_rqst *rqstp,
130 struct svc_rdma_op_ctxt *head, 130 struct svc_rdma_op_ctxt *head,
131 struct rpcrdma_msg *rmsgp, 131 struct rpcrdma_msg *rmsgp,
@@ -211,26 +211,128 @@ static int rdma_rcl_to_sge(struct svcxprt_rdma *xprt,
211 return sge_no; 211 return sge_no;
212} 212}
213 213
214static void rdma_set_ctxt_sge(struct svcxprt_rdma *xprt, 214/* Map a read-chunk-list to an XDR and fast register the page-list.
215 struct svc_rdma_op_ctxt *ctxt, 215 *
216 struct kvec *vec, 216 * Assumptions:
217 u64 *sgl_offset, 217 * - chunk[0] position points to pages[0] at an offset of 0
218 int count) 218 * - pages[] will be made physically contiguous by creating a one-off memory
219 * region using the fastreg verb.
220 * - byte_count is # of bytes in read-chunk-list
221 * - ch_count is # of chunks in read-chunk-list
222 *
223 * Output:
224 * - sge array pointing into pages[] array.
225 * - chunk_sge array specifying sge index and count for each
226 * chunk in the read list
227 */
228static int fast_reg_read_chunks(struct svcxprt_rdma *xprt,
229 struct svc_rqst *rqstp,
230 struct svc_rdma_op_ctxt *head,
231 struct rpcrdma_msg *rmsgp,
232 struct svc_rdma_req_map *rpl_map,
233 struct svc_rdma_req_map *chl_map,
234 int ch_count,
235 int byte_count)
236{
237 int page_no;
238 int ch_no;
239 u32 offset;
240 struct rpcrdma_read_chunk *ch;
241 struct svc_rdma_fastreg_mr *frmr;
242 int ret = 0;
243
244 frmr = svc_rdma_get_frmr(xprt);
245 if (IS_ERR(frmr))
246 return -ENOMEM;
247
248 head->frmr = frmr;
249 head->arg.head[0] = rqstp->rq_arg.head[0];
250 head->arg.tail[0] = rqstp->rq_arg.tail[0];
251 head->arg.pages = &head->pages[head->count];
252 head->hdr_count = head->count; /* save count of hdr pages */
253 head->arg.page_base = 0;
254 head->arg.page_len = byte_count;
255 head->arg.len = rqstp->rq_arg.len + byte_count;
256 head->arg.buflen = rqstp->rq_arg.buflen + byte_count;
257
258 /* Fast register the page list */
259 frmr->kva = page_address(rqstp->rq_arg.pages[0]);
260 frmr->direction = DMA_FROM_DEVICE;
261 frmr->access_flags = (IB_ACCESS_LOCAL_WRITE|IB_ACCESS_REMOTE_WRITE);
262 frmr->map_len = byte_count;
263 frmr->page_list_len = PAGE_ALIGN(byte_count) >> PAGE_SHIFT;
264 for (page_no = 0; page_no < frmr->page_list_len; page_no++) {
265 frmr->page_list->page_list[page_no] =
266 ib_dma_map_single(xprt->sc_cm_id->device,
267 page_address(rqstp->rq_arg.pages[page_no]),
268 PAGE_SIZE, DMA_TO_DEVICE);
269 if (ib_dma_mapping_error(xprt->sc_cm_id->device,
270 frmr->page_list->page_list[page_no]))
271 goto fatal_err;
272 atomic_inc(&xprt->sc_dma_used);
273 head->arg.pages[page_no] = rqstp->rq_arg.pages[page_no];
274 }
275 head->count += page_no;
276
277 /* rq_respages points one past arg pages */
278 rqstp->rq_respages = &rqstp->rq_arg.pages[page_no];
279
280 /* Create the reply and chunk maps */
281 offset = 0;
282 ch = (struct rpcrdma_read_chunk *)&rmsgp->rm_body.rm_chunks[0];
283 for (ch_no = 0; ch_no < ch_count; ch_no++) {
284 rpl_map->sge[ch_no].iov_base = frmr->kva + offset;
285 rpl_map->sge[ch_no].iov_len = ch->rc_target.rs_length;
286 chl_map->ch[ch_no].count = 1;
287 chl_map->ch[ch_no].start = ch_no;
288 offset += ch->rc_target.rs_length;
289 ch++;
290 }
291
292 ret = svc_rdma_fastreg(xprt, frmr);
293 if (ret)
294 goto fatal_err;
295
296 return ch_no;
297
298 fatal_err:
299 printk("svcrdma: error fast registering xdr for xprt %p", xprt);
300 svc_rdma_put_frmr(xprt, frmr);
301 return -EIO;
302}
303
304static int rdma_set_ctxt_sge(struct svcxprt_rdma *xprt,
305 struct svc_rdma_op_ctxt *ctxt,
306 struct svc_rdma_fastreg_mr *frmr,
307 struct kvec *vec,
308 u64 *sgl_offset,
309 int count)
219{ 310{
220 int i; 311 int i;
221 312
222 ctxt->count = count; 313 ctxt->count = count;
223 ctxt->direction = DMA_FROM_DEVICE; 314 ctxt->direction = DMA_FROM_DEVICE;
224 for (i = 0; i < count; i++) { 315 for (i = 0; i < count; i++) {
225 atomic_inc(&xprt->sc_dma_used); 316 ctxt->sge[i].length = 0; /* in case map fails */
226 ctxt->sge[i].addr = 317 if (!frmr) {
227 ib_dma_map_single(xprt->sc_cm_id->device, 318 ctxt->sge[i].addr =
228 vec[i].iov_base, vec[i].iov_len, 319 ib_dma_map_single(xprt->sc_cm_id->device,
229 DMA_FROM_DEVICE); 320 vec[i].iov_base,
321 vec[i].iov_len,
322 DMA_FROM_DEVICE);
323 if (ib_dma_mapping_error(xprt->sc_cm_id->device,
324 ctxt->sge[i].addr))
325 return -EINVAL;
326 ctxt->sge[i].lkey = xprt->sc_dma_lkey;
327 atomic_inc(&xprt->sc_dma_used);
328 } else {
329 ctxt->sge[i].addr = (unsigned long)vec[i].iov_base;
330 ctxt->sge[i].lkey = frmr->mr->lkey;
331 }
230 ctxt->sge[i].length = vec[i].iov_len; 332 ctxt->sge[i].length = vec[i].iov_len;
231 ctxt->sge[i].lkey = xprt->sc_phys_mr->lkey;
232 *sgl_offset = *sgl_offset + vec[i].iov_len; 333 *sgl_offset = *sgl_offset + vec[i].iov_len;
233 } 334 }
335 return 0;
234} 336}
235 337
236static int rdma_read_max_sge(struct svcxprt_rdma *xprt, int sge_count) 338static int rdma_read_max_sge(struct svcxprt_rdma *xprt, int sge_count)
@@ -278,6 +380,7 @@ static int rdma_read_xdr(struct svcxprt_rdma *xprt,
278 struct svc_rdma_op_ctxt *hdr_ctxt) 380 struct svc_rdma_op_ctxt *hdr_ctxt)
279{ 381{
280 struct ib_send_wr read_wr; 382 struct ib_send_wr read_wr;
383 struct ib_send_wr inv_wr;
281 int err = 0; 384 int err = 0;
282 int ch_no; 385 int ch_no;
283 int ch_count; 386 int ch_count;
@@ -301,9 +404,20 @@ static int rdma_read_xdr(struct svcxprt_rdma *xprt,
301 svc_rdma_rcl_chunk_counts(ch, &ch_count, &byte_count); 404 svc_rdma_rcl_chunk_counts(ch, &ch_count, &byte_count);
302 if (ch_count > RPCSVC_MAXPAGES) 405 if (ch_count > RPCSVC_MAXPAGES)
303 return -EINVAL; 406 return -EINVAL;
304 sge_count = rdma_rcl_to_sge(xprt, rqstp, hdr_ctxt, rmsgp, 407
305 rpl_map, chl_map, 408 if (!xprt->sc_frmr_pg_list_len)
306 ch_count, byte_count); 409 sge_count = map_read_chunks(xprt, rqstp, hdr_ctxt, rmsgp,
410 rpl_map, chl_map, ch_count,
411 byte_count);
412 else
413 sge_count = fast_reg_read_chunks(xprt, rqstp, hdr_ctxt, rmsgp,
414 rpl_map, chl_map, ch_count,
415 byte_count);
416 if (sge_count < 0) {
417 err = -EIO;
418 goto out;
419 }
420
307 sgl_offset = 0; 421 sgl_offset = 0;
308 ch_no = 0; 422 ch_no = 0;
309 423
@@ -312,13 +426,16 @@ static int rdma_read_xdr(struct svcxprt_rdma *xprt,
312next_sge: 426next_sge:
313 ctxt = svc_rdma_get_context(xprt); 427 ctxt = svc_rdma_get_context(xprt);
314 ctxt->direction = DMA_FROM_DEVICE; 428 ctxt->direction = DMA_FROM_DEVICE;
429 ctxt->frmr = hdr_ctxt->frmr;
430 ctxt->read_hdr = NULL;
315 clear_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags); 431 clear_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags);
432 clear_bit(RDMACTXT_F_FAST_UNREG, &ctxt->flags);
316 433
317 /* Prepare READ WR */ 434 /* Prepare READ WR */
318 memset(&read_wr, 0, sizeof read_wr); 435 memset(&read_wr, 0, sizeof read_wr);
319 ctxt->wr_op = IB_WR_RDMA_READ;
320 read_wr.wr_id = (unsigned long)ctxt; 436 read_wr.wr_id = (unsigned long)ctxt;
321 read_wr.opcode = IB_WR_RDMA_READ; 437 read_wr.opcode = IB_WR_RDMA_READ;
438 ctxt->wr_op = read_wr.opcode;
322 read_wr.send_flags = IB_SEND_SIGNALED; 439 read_wr.send_flags = IB_SEND_SIGNALED;
323 read_wr.wr.rdma.rkey = ch->rc_target.rs_handle; 440 read_wr.wr.rdma.rkey = ch->rc_target.rs_handle;
324 read_wr.wr.rdma.remote_addr = 441 read_wr.wr.rdma.remote_addr =
@@ -327,10 +444,15 @@ next_sge:
327 read_wr.sg_list = ctxt->sge; 444 read_wr.sg_list = ctxt->sge;
328 read_wr.num_sge = 445 read_wr.num_sge =
329 rdma_read_max_sge(xprt, chl_map->ch[ch_no].count); 446 rdma_read_max_sge(xprt, chl_map->ch[ch_no].count);
330 rdma_set_ctxt_sge(xprt, ctxt, 447 err = rdma_set_ctxt_sge(xprt, ctxt, hdr_ctxt->frmr,
331 &rpl_map->sge[chl_map->ch[ch_no].start], 448 &rpl_map->sge[chl_map->ch[ch_no].start],
332 &sgl_offset, 449 &sgl_offset,
333 read_wr.num_sge); 450 read_wr.num_sge);
451 if (err) {
452 svc_rdma_unmap_dma(ctxt);
453 svc_rdma_put_context(ctxt, 0);
454 goto out;
455 }
334 if (((ch+1)->rc_discrim == 0) && 456 if (((ch+1)->rc_discrim == 0) &&
335 (read_wr.num_sge == chl_map->ch[ch_no].count)) { 457 (read_wr.num_sge == chl_map->ch[ch_no].count)) {
336 /* 458 /*
@@ -339,6 +461,29 @@ next_sge:
339 * the client and the RPC needs to be enqueued. 461 * the client and the RPC needs to be enqueued.
340 */ 462 */
341 set_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags); 463 set_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags);
464 if (hdr_ctxt->frmr) {
465 set_bit(RDMACTXT_F_FAST_UNREG, &ctxt->flags);
466 /*
467 * Invalidate the local MR used to map the data
468 * sink.
469 */
470 if (xprt->sc_dev_caps &
471 SVCRDMA_DEVCAP_READ_W_INV) {
472 read_wr.opcode =
473 IB_WR_RDMA_READ_WITH_INV;
474 ctxt->wr_op = read_wr.opcode;
475 read_wr.ex.invalidate_rkey =
476 ctxt->frmr->mr->lkey;
477 } else {
478 /* Prepare INVALIDATE WR */
479 memset(&inv_wr, 0, sizeof inv_wr);
480 inv_wr.opcode = IB_WR_LOCAL_INV;
481 inv_wr.send_flags = IB_SEND_SIGNALED;
482 inv_wr.ex.invalidate_rkey =
483 hdr_ctxt->frmr->mr->lkey;
484 read_wr.next = &inv_wr;
485 }
486 }
342 ctxt->read_hdr = hdr_ctxt; 487 ctxt->read_hdr = hdr_ctxt;
343 } 488 }
344 /* Post the read */ 489 /* Post the read */
diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
index 84d328329d98..9a7a8e7ae038 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
@@ -69,9 +69,127 @@
69 * array is only concerned with the reply we are assured that we have 69 * array is only concerned with the reply we are assured that we have
70 * on extra page for the RPCRMDA header. 70 * on extra page for the RPCRMDA header.
71 */ 71 */
72static void xdr_to_sge(struct svcxprt_rdma *xprt, 72int fast_reg_xdr(struct svcxprt_rdma *xprt,
73 struct xdr_buf *xdr, 73 struct xdr_buf *xdr,
74 struct svc_rdma_req_map *vec) 74 struct svc_rdma_req_map *vec)
75{
76 int sge_no;
77 u32 sge_bytes;
78 u32 page_bytes;
79 u32 page_off;
80 int page_no = 0;
81 u8 *frva;
82 struct svc_rdma_fastreg_mr *frmr;
83
84 frmr = svc_rdma_get_frmr(xprt);
85 if (IS_ERR(frmr))
86 return -ENOMEM;
87 vec->frmr = frmr;
88
89 /* Skip the RPCRDMA header */
90 sge_no = 1;
91
92 /* Map the head. */
93 frva = (void *)((unsigned long)(xdr->head[0].iov_base) & PAGE_MASK);
94 vec->sge[sge_no].iov_base = xdr->head[0].iov_base;
95 vec->sge[sge_no].iov_len = xdr->head[0].iov_len;
96 vec->count = 2;
97 sge_no++;
98
99 /* Build the FRMR */
100 frmr->kva = frva;
101 frmr->direction = DMA_TO_DEVICE;
102 frmr->access_flags = 0;
103 frmr->map_len = PAGE_SIZE;
104 frmr->page_list_len = 1;
105 frmr->page_list->page_list[page_no] =
106 ib_dma_map_single(xprt->sc_cm_id->device,
107 (void *)xdr->head[0].iov_base,
108 PAGE_SIZE, DMA_TO_DEVICE);
109 if (ib_dma_mapping_error(xprt->sc_cm_id->device,
110 frmr->page_list->page_list[page_no]))
111 goto fatal_err;
112 atomic_inc(&xprt->sc_dma_used);
113
114 page_off = xdr->page_base;
115 page_bytes = xdr->page_len + page_off;
116 if (!page_bytes)
117 goto encode_tail;
118
119 /* Map the pages */
120 vec->sge[sge_no].iov_base = frva + frmr->map_len + page_off;
121 vec->sge[sge_no].iov_len = page_bytes;
122 sge_no++;
123 while (page_bytes) {
124 struct page *page;
125
126 page = xdr->pages[page_no++];
127 sge_bytes = min_t(u32, page_bytes, (PAGE_SIZE - page_off));
128 page_bytes -= sge_bytes;
129
130 frmr->page_list->page_list[page_no] =
131 ib_dma_map_page(xprt->sc_cm_id->device, page, 0,
132 PAGE_SIZE, DMA_TO_DEVICE);
133 if (ib_dma_mapping_error(xprt->sc_cm_id->device,
134 frmr->page_list->page_list[page_no]))
135 goto fatal_err;
136
137 atomic_inc(&xprt->sc_dma_used);
138 page_off = 0; /* reset for next time through loop */
139 frmr->map_len += PAGE_SIZE;
140 frmr->page_list_len++;
141 }
142 vec->count++;
143
144 encode_tail:
145 /* Map tail */
146 if (0 == xdr->tail[0].iov_len)
147 goto done;
148
149 vec->count++;
150 vec->sge[sge_no].iov_len = xdr->tail[0].iov_len;
151
152 if (((unsigned long)xdr->tail[0].iov_base & PAGE_MASK) ==
153 ((unsigned long)xdr->head[0].iov_base & PAGE_MASK)) {
154 /*
155 * If head and tail use the same page, we don't need
156 * to map it again.
157 */
158 vec->sge[sge_no].iov_base = xdr->tail[0].iov_base;
159 } else {
160 void *va;
161
162 /* Map another page for the tail */
163 page_off = (unsigned long)xdr->tail[0].iov_base & ~PAGE_MASK;
164 va = (void *)((unsigned long)xdr->tail[0].iov_base & PAGE_MASK);
165 vec->sge[sge_no].iov_base = frva + frmr->map_len + page_off;
166
167 frmr->page_list->page_list[page_no] =
168 ib_dma_map_single(xprt->sc_cm_id->device, va, PAGE_SIZE,
169 DMA_TO_DEVICE);
170 if (ib_dma_mapping_error(xprt->sc_cm_id->device,
171 frmr->page_list->page_list[page_no]))
172 goto fatal_err;
173 atomic_inc(&xprt->sc_dma_used);
174 frmr->map_len += PAGE_SIZE;
175 frmr->page_list_len++;
176 }
177
178 done:
179 if (svc_rdma_fastreg(xprt, frmr))
180 goto fatal_err;
181
182 return 0;
183
184 fatal_err:
185 printk("svcrdma: Error fast registering memory for xprt %p\n", xprt);
186 svc_rdma_put_frmr(xprt, frmr);
187 return -EIO;
188}
189
190static int map_xdr(struct svcxprt_rdma *xprt,
191 struct xdr_buf *xdr,
192 struct svc_rdma_req_map *vec)
75{ 193{
76 int sge_max = (xdr->len+PAGE_SIZE-1) / PAGE_SIZE + 3; 194 int sge_max = (xdr->len+PAGE_SIZE-1) / PAGE_SIZE + 3;
77 int sge_no; 195 int sge_no;
@@ -83,6 +201,9 @@ static void xdr_to_sge(struct svcxprt_rdma *xprt,
83 BUG_ON(xdr->len != 201 BUG_ON(xdr->len !=
84 (xdr->head[0].iov_len + xdr->page_len + xdr->tail[0].iov_len)); 202 (xdr->head[0].iov_len + xdr->page_len + xdr->tail[0].iov_len));
85 203
204 if (xprt->sc_frmr_pg_list_len)
205 return fast_reg_xdr(xprt, xdr, vec);
206
86 /* Skip the first sge, this is for the RPCRDMA header */ 207 /* Skip the first sge, this is for the RPCRDMA header */
87 sge_no = 1; 208 sge_no = 1;
88 209
@@ -116,9 +237,12 @@ static void xdr_to_sge(struct svcxprt_rdma *xprt,
116 237
117 BUG_ON(sge_no > sge_max); 238 BUG_ON(sge_no > sge_max);
118 vec->count = sge_no; 239 vec->count = sge_no;
240 return 0;
119} 241}
120 242
121/* Assumptions: 243/* Assumptions:
244 * - We are using FRMR
245 * - or -
122 * - The specified write_len can be represented in sc_max_sge * PAGE_SIZE 246 * - The specified write_len can be represented in sc_max_sge * PAGE_SIZE
123 */ 247 */
124static int send_write(struct svcxprt_rdma *xprt, struct svc_rqst *rqstp, 248static int send_write(struct svcxprt_rdma *xprt, struct svc_rqst *rqstp,
@@ -158,30 +282,35 @@ static int send_write(struct svcxprt_rdma *xprt, struct svc_rqst *rqstp,
158 sge_no = 0; 282 sge_no = 0;
159 283
160 /* Copy the remaining SGE */ 284 /* Copy the remaining SGE */
161 while (bc != 0 && xdr_sge_no < vec->count) { 285 while (bc != 0) {
162 sge[sge_no].lkey = xprt->sc_phys_mr->lkey; 286 sge_bytes = min_t(size_t,
163 sge_bytes = min((size_t)bc, 287 bc, vec->sge[xdr_sge_no].iov_len-sge_off);
164 (size_t)(vec->sge[xdr_sge_no].iov_len-sge_off));
165 sge[sge_no].length = sge_bytes; 288 sge[sge_no].length = sge_bytes;
166 atomic_inc(&xprt->sc_dma_used); 289 if (!vec->frmr) {
167 sge[sge_no].addr = 290 sge[sge_no].addr =
168 ib_dma_map_single(xprt->sc_cm_id->device, 291 ib_dma_map_single(xprt->sc_cm_id->device,
169 (void *) 292 (void *)
170 vec->sge[xdr_sge_no].iov_base + sge_off, 293 vec->sge[xdr_sge_no].iov_base + sge_off,
171 sge_bytes, DMA_TO_DEVICE); 294 sge_bytes, DMA_TO_DEVICE);
172 if (dma_mapping_error(xprt->sc_cm_id->device->dma_device, 295 if (ib_dma_mapping_error(xprt->sc_cm_id->device,
173 sge[sge_no].addr)) 296 sge[sge_no].addr))
174 goto err; 297 goto err;
298 atomic_inc(&xprt->sc_dma_used);
299 sge[sge_no].lkey = xprt->sc_dma_lkey;
300 } else {
301 sge[sge_no].addr = (unsigned long)
302 vec->sge[xdr_sge_no].iov_base + sge_off;
303 sge[sge_no].lkey = vec->frmr->mr->lkey;
304 }
305 ctxt->count++;
306 ctxt->frmr = vec->frmr;
175 sge_off = 0; 307 sge_off = 0;
176 sge_no++; 308 sge_no++;
177 ctxt->count++;
178 xdr_sge_no++; 309 xdr_sge_no++;
310 BUG_ON(xdr_sge_no > vec->count);
179 bc -= sge_bytes; 311 bc -= sge_bytes;
180 } 312 }
181 313
182 BUG_ON(bc != 0);
183 BUG_ON(xdr_sge_no > vec->count);
184
185 /* Prepare WRITE WR */ 314 /* Prepare WRITE WR */
186 memset(&write_wr, 0, sizeof write_wr); 315 memset(&write_wr, 0, sizeof write_wr);
187 ctxt->wr_op = IB_WR_RDMA_WRITE; 316 ctxt->wr_op = IB_WR_RDMA_WRITE;
@@ -226,7 +355,10 @@ static int send_write_chunks(struct svcxprt_rdma *xprt,
226 res_ary = (struct rpcrdma_write_array *) 355 res_ary = (struct rpcrdma_write_array *)
227 &rdma_resp->rm_body.rm_chunks[1]; 356 &rdma_resp->rm_body.rm_chunks[1];
228 357
229 max_write = xprt->sc_max_sge * PAGE_SIZE; 358 if (vec->frmr)
359 max_write = vec->frmr->map_len;
360 else
361 max_write = xprt->sc_max_sge * PAGE_SIZE;
230 362
231 /* Write chunks start at the pagelist */ 363 /* Write chunks start at the pagelist */
232 for (xdr_off = rqstp->rq_res.head[0].iov_len, chunk_no = 0; 364 for (xdr_off = rqstp->rq_res.head[0].iov_len, chunk_no = 0;
@@ -297,7 +429,10 @@ static int send_reply_chunks(struct svcxprt_rdma *xprt,
297 res_ary = (struct rpcrdma_write_array *) 429 res_ary = (struct rpcrdma_write_array *)
298 &rdma_resp->rm_body.rm_chunks[2]; 430 &rdma_resp->rm_body.rm_chunks[2];
299 431
300 max_write = xprt->sc_max_sge * PAGE_SIZE; 432 if (vec->frmr)
433 max_write = vec->frmr->map_len;
434 else
435 max_write = xprt->sc_max_sge * PAGE_SIZE;
301 436
302 /* xdr offset starts at RPC message */ 437 /* xdr offset starts at RPC message */
303 for (xdr_off = 0, chunk_no = 0; 438 for (xdr_off = 0, chunk_no = 0;
@@ -307,7 +442,6 @@ static int send_reply_chunks(struct svcxprt_rdma *xprt,
307 ch = &arg_ary->wc_array[chunk_no].wc_target; 442 ch = &arg_ary->wc_array[chunk_no].wc_target;
308 write_len = min(xfer_len, ch->rs_length); 443 write_len = min(xfer_len, ch->rs_length);
309 444
310
311 /* Prepare the reply chunk given the length actually 445 /* Prepare the reply chunk given the length actually
312 * written */ 446 * written */
313 rs_offset = get_unaligned(&(ch->rs_offset)); 447 rs_offset = get_unaligned(&(ch->rs_offset));
@@ -366,6 +500,7 @@ static int send_reply(struct svcxprt_rdma *rdma,
366 int byte_count) 500 int byte_count)
367{ 501{
368 struct ib_send_wr send_wr; 502 struct ib_send_wr send_wr;
503 struct ib_send_wr inv_wr;
369 int sge_no; 504 int sge_no;
370 int sge_bytes; 505 int sge_bytes;
371 int page_no; 506 int page_no;
@@ -385,27 +520,45 @@ static int send_reply(struct svcxprt_rdma *rdma,
385 /* Prepare the context */ 520 /* Prepare the context */
386 ctxt->pages[0] = page; 521 ctxt->pages[0] = page;
387 ctxt->count = 1; 522 ctxt->count = 1;
523 ctxt->frmr = vec->frmr;
524 if (vec->frmr)
525 set_bit(RDMACTXT_F_FAST_UNREG, &ctxt->flags);
526 else
527 clear_bit(RDMACTXT_F_FAST_UNREG, &ctxt->flags);
388 528
389 /* Prepare the SGE for the RPCRDMA Header */ 529 /* Prepare the SGE for the RPCRDMA Header */
390 atomic_inc(&rdma->sc_dma_used);
391 ctxt->sge[0].addr = 530 ctxt->sge[0].addr =
392 ib_dma_map_page(rdma->sc_cm_id->device, 531 ib_dma_map_page(rdma->sc_cm_id->device,
393 page, 0, PAGE_SIZE, DMA_TO_DEVICE); 532 page, 0, PAGE_SIZE, DMA_TO_DEVICE);
533 if (ib_dma_mapping_error(rdma->sc_cm_id->device, ctxt->sge[0].addr))
534 goto err;
535 atomic_inc(&rdma->sc_dma_used);
536
394 ctxt->direction = DMA_TO_DEVICE; 537 ctxt->direction = DMA_TO_DEVICE;
538
395 ctxt->sge[0].length = svc_rdma_xdr_get_reply_hdr_len(rdma_resp); 539 ctxt->sge[0].length = svc_rdma_xdr_get_reply_hdr_len(rdma_resp);
396 ctxt->sge[0].lkey = rdma->sc_phys_mr->lkey; 540 ctxt->sge[0].lkey = rdma->sc_dma_lkey;
397 541
398 /* Determine how many of our SGE are to be transmitted */ 542 /* Determine how many of our SGE are to be transmitted */
399 for (sge_no = 1; byte_count && sge_no < vec->count; sge_no++) { 543 for (sge_no = 1; byte_count && sge_no < vec->count; sge_no++) {
400 sge_bytes = min_t(size_t, vec->sge[sge_no].iov_len, byte_count); 544 sge_bytes = min_t(size_t, vec->sge[sge_no].iov_len, byte_count);
401 byte_count -= sge_bytes; 545 byte_count -= sge_bytes;
402 atomic_inc(&rdma->sc_dma_used); 546 if (!vec->frmr) {
403 ctxt->sge[sge_no].addr = 547 ctxt->sge[sge_no].addr =
404 ib_dma_map_single(rdma->sc_cm_id->device, 548 ib_dma_map_single(rdma->sc_cm_id->device,
405 vec->sge[sge_no].iov_base, 549 vec->sge[sge_no].iov_base,
406 sge_bytes, DMA_TO_DEVICE); 550 sge_bytes, DMA_TO_DEVICE);
551 if (ib_dma_mapping_error(rdma->sc_cm_id->device,
552 ctxt->sge[sge_no].addr))
553 goto err;
554 atomic_inc(&rdma->sc_dma_used);
555 ctxt->sge[sge_no].lkey = rdma->sc_dma_lkey;
556 } else {
557 ctxt->sge[sge_no].addr = (unsigned long)
558 vec->sge[sge_no].iov_base;
559 ctxt->sge[sge_no].lkey = vec->frmr->mr->lkey;
560 }
407 ctxt->sge[sge_no].length = sge_bytes; 561 ctxt->sge[sge_no].length = sge_bytes;
408 ctxt->sge[sge_no].lkey = rdma->sc_phys_mr->lkey;
409 } 562 }
410 BUG_ON(byte_count != 0); 563 BUG_ON(byte_count != 0);
411 564
@@ -417,11 +570,16 @@ static int send_reply(struct svcxprt_rdma *rdma,
417 ctxt->pages[page_no+1] = rqstp->rq_respages[page_no]; 570 ctxt->pages[page_no+1] = rqstp->rq_respages[page_no];
418 ctxt->count++; 571 ctxt->count++;
419 rqstp->rq_respages[page_no] = NULL; 572 rqstp->rq_respages[page_no] = NULL;
420 /* If there are more pages than SGE, terminate SGE list */ 573 /*
574 * If there are more pages than SGE, terminate SGE
575 * list so that svc_rdma_unmap_dma doesn't attempt to
576 * unmap garbage.
577 */
421 if (page_no+1 >= sge_no) 578 if (page_no+1 >= sge_no)
422 ctxt->sge[page_no+1].length = 0; 579 ctxt->sge[page_no+1].length = 0;
423 } 580 }
424 BUG_ON(sge_no > rdma->sc_max_sge); 581 BUG_ON(sge_no > rdma->sc_max_sge);
582 BUG_ON(sge_no > ctxt->count);
425 memset(&send_wr, 0, sizeof send_wr); 583 memset(&send_wr, 0, sizeof send_wr);
426 ctxt->wr_op = IB_WR_SEND; 584 ctxt->wr_op = IB_WR_SEND;
427 send_wr.wr_id = (unsigned long)ctxt; 585 send_wr.wr_id = (unsigned long)ctxt;
@@ -429,12 +587,26 @@ static int send_reply(struct svcxprt_rdma *rdma,
429 send_wr.num_sge = sge_no; 587 send_wr.num_sge = sge_no;
430 send_wr.opcode = IB_WR_SEND; 588 send_wr.opcode = IB_WR_SEND;
431 send_wr.send_flags = IB_SEND_SIGNALED; 589 send_wr.send_flags = IB_SEND_SIGNALED;
590 if (vec->frmr) {
591 /* Prepare INVALIDATE WR */
592 memset(&inv_wr, 0, sizeof inv_wr);
593 inv_wr.opcode = IB_WR_LOCAL_INV;
594 inv_wr.send_flags = IB_SEND_SIGNALED;
595 inv_wr.ex.invalidate_rkey =
596 vec->frmr->mr->lkey;
597 send_wr.next = &inv_wr;
598 }
432 599
433 ret = svc_rdma_send(rdma, &send_wr); 600 ret = svc_rdma_send(rdma, &send_wr);
434 if (ret) 601 if (ret)
435 svc_rdma_put_context(ctxt, 1); 602 goto err;
436 603
437 return ret; 604 return 0;
605
606 err:
607 svc_rdma_put_frmr(rdma, vec->frmr);
608 svc_rdma_put_context(ctxt, 1);
609 return -EIO;
438} 610}
439 611
440void svc_rdma_prep_reply_hdr(struct svc_rqst *rqstp) 612void svc_rdma_prep_reply_hdr(struct svc_rqst *rqstp)
@@ -477,8 +649,9 @@ int svc_rdma_sendto(struct svc_rqst *rqstp)
477 ctxt = svc_rdma_get_context(rdma); 649 ctxt = svc_rdma_get_context(rdma);
478 ctxt->direction = DMA_TO_DEVICE; 650 ctxt->direction = DMA_TO_DEVICE;
479 vec = svc_rdma_get_req_map(); 651 vec = svc_rdma_get_req_map();
480 xdr_to_sge(rdma, &rqstp->rq_res, vec); 652 ret = map_xdr(rdma, &rqstp->rq_res, vec);
481 653 if (ret)
654 goto err0;
482 inline_bytes = rqstp->rq_res.len; 655 inline_bytes = rqstp->rq_res.len;
483 656
484 /* Create the RDMA response header */ 657 /* Create the RDMA response header */
@@ -498,7 +671,7 @@ int svc_rdma_sendto(struct svc_rqst *rqstp)
498 if (ret < 0) { 671 if (ret < 0) {
499 printk(KERN_ERR "svcrdma: failed to send write chunks, rc=%d\n", 672 printk(KERN_ERR "svcrdma: failed to send write chunks, rc=%d\n",
500 ret); 673 ret);
501 goto error; 674 goto err1;
502 } 675 }
503 inline_bytes -= ret; 676 inline_bytes -= ret;
504 677
@@ -508,7 +681,7 @@ int svc_rdma_sendto(struct svc_rqst *rqstp)
508 if (ret < 0) { 681 if (ret < 0) {
509 printk(KERN_ERR "svcrdma: failed to send reply chunks, rc=%d\n", 682 printk(KERN_ERR "svcrdma: failed to send reply chunks, rc=%d\n",
510 ret); 683 ret);
511 goto error; 684 goto err1;
512 } 685 }
513 inline_bytes -= ret; 686 inline_bytes -= ret;
514 687
@@ -517,9 +690,11 @@ int svc_rdma_sendto(struct svc_rqst *rqstp)
517 svc_rdma_put_req_map(vec); 690 svc_rdma_put_req_map(vec);
518 dprintk("svcrdma: send_reply returns %d\n", ret); 691 dprintk("svcrdma: send_reply returns %d\n", ret);
519 return ret; 692 return ret;
520 error: 693
694 err1:
695 put_page(res_page);
696 err0:
521 svc_rdma_put_req_map(vec); 697 svc_rdma_put_req_map(vec);
522 svc_rdma_put_context(ctxt, 0); 698 svc_rdma_put_context(ctxt, 0);
523 put_page(res_page);
524 return ret; 699 return ret;
525} 700}
diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c
index 900cb69728c6..6fb493cbd29f 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_transport.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c
@@ -100,20 +100,29 @@ struct svc_rdma_op_ctxt *svc_rdma_get_context(struct svcxprt_rdma *xprt)
100 ctxt->xprt = xprt; 100 ctxt->xprt = xprt;
101 INIT_LIST_HEAD(&ctxt->dto_q); 101 INIT_LIST_HEAD(&ctxt->dto_q);
102 ctxt->count = 0; 102 ctxt->count = 0;
103 ctxt->frmr = NULL;
103 atomic_inc(&xprt->sc_ctxt_used); 104 atomic_inc(&xprt->sc_ctxt_used);
104 return ctxt; 105 return ctxt;
105} 106}
106 107
107static void svc_rdma_unmap_dma(struct svc_rdma_op_ctxt *ctxt) 108void svc_rdma_unmap_dma(struct svc_rdma_op_ctxt *ctxt)
108{ 109{
109 struct svcxprt_rdma *xprt = ctxt->xprt; 110 struct svcxprt_rdma *xprt = ctxt->xprt;
110 int i; 111 int i;
111 for (i = 0; i < ctxt->count && ctxt->sge[i].length; i++) { 112 for (i = 0; i < ctxt->count && ctxt->sge[i].length; i++) {
112 atomic_dec(&xprt->sc_dma_used); 113 /*
113 ib_dma_unmap_single(xprt->sc_cm_id->device, 114 * Unmap the DMA addr in the SGE if the lkey matches
114 ctxt->sge[i].addr, 115 * the sc_dma_lkey, otherwise, ignore it since it is
115 ctxt->sge[i].length, 116 * an FRMR lkey and will be unmapped later when the
116 ctxt->direction); 117 * last WR that uses it completes.
118 */
119 if (ctxt->sge[i].lkey == xprt->sc_dma_lkey) {
120 atomic_dec(&xprt->sc_dma_used);
121 ib_dma_unmap_single(xprt->sc_cm_id->device,
122 ctxt->sge[i].addr,
123 ctxt->sge[i].length,
124 ctxt->direction);
125 }
117 } 126 }
118} 127}
119 128
@@ -150,6 +159,7 @@ struct svc_rdma_req_map *svc_rdma_get_req_map(void)
150 schedule_timeout_uninterruptible(msecs_to_jiffies(500)); 159 schedule_timeout_uninterruptible(msecs_to_jiffies(500));
151 } 160 }
152 map->count = 0; 161 map->count = 0;
162 map->frmr = NULL;
153 return map; 163 return map;
154} 164}
155 165
@@ -316,6 +326,50 @@ static void rq_cq_reap(struct svcxprt_rdma *xprt)
316} 326}
317 327
318/* 328/*
329 * Processs a completion context
330 */
331static void process_context(struct svcxprt_rdma *xprt,
332 struct svc_rdma_op_ctxt *ctxt)
333{
334 svc_rdma_unmap_dma(ctxt);
335
336 switch (ctxt->wr_op) {
337 case IB_WR_SEND:
338 if (test_bit(RDMACTXT_F_FAST_UNREG, &ctxt->flags))
339 svc_rdma_put_frmr(xprt, ctxt->frmr);
340 svc_rdma_put_context(ctxt, 1);
341 break;
342
343 case IB_WR_RDMA_WRITE:
344 svc_rdma_put_context(ctxt, 0);
345 break;
346
347 case IB_WR_RDMA_READ:
348 case IB_WR_RDMA_READ_WITH_INV:
349 if (test_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags)) {
350 struct svc_rdma_op_ctxt *read_hdr = ctxt->read_hdr;
351 BUG_ON(!read_hdr);
352 if (test_bit(RDMACTXT_F_FAST_UNREG, &ctxt->flags))
353 svc_rdma_put_frmr(xprt, ctxt->frmr);
354 spin_lock_bh(&xprt->sc_rq_dto_lock);
355 set_bit(XPT_DATA, &xprt->sc_xprt.xpt_flags);
356 list_add_tail(&read_hdr->dto_q,
357 &xprt->sc_read_complete_q);
358 spin_unlock_bh(&xprt->sc_rq_dto_lock);
359 svc_xprt_enqueue(&xprt->sc_xprt);
360 }
361 svc_rdma_put_context(ctxt, 0);
362 break;
363
364 default:
365 printk(KERN_ERR "svcrdma: unexpected completion type, "
366 "opcode=%d\n",
367 ctxt->wr_op);
368 break;
369 }
370}
371
372/*
319 * Send Queue Completion Handler - potentially called on interrupt context. 373 * Send Queue Completion Handler - potentially called on interrupt context.
320 * 374 *
321 * Note that caller must hold a transport reference. 375 * Note that caller must hold a transport reference.
@@ -327,17 +381,12 @@ static void sq_cq_reap(struct svcxprt_rdma *xprt)
327 struct ib_cq *cq = xprt->sc_sq_cq; 381 struct ib_cq *cq = xprt->sc_sq_cq;
328 int ret; 382 int ret;
329 383
330
331 if (!test_and_clear_bit(RDMAXPRT_SQ_PENDING, &xprt->sc_flags)) 384 if (!test_and_clear_bit(RDMAXPRT_SQ_PENDING, &xprt->sc_flags))
332 return; 385 return;
333 386
334 ib_req_notify_cq(xprt->sc_sq_cq, IB_CQ_NEXT_COMP); 387 ib_req_notify_cq(xprt->sc_sq_cq, IB_CQ_NEXT_COMP);
335 atomic_inc(&rdma_stat_sq_poll); 388 atomic_inc(&rdma_stat_sq_poll);
336 while ((ret = ib_poll_cq(cq, 1, &wc)) > 0) { 389 while ((ret = ib_poll_cq(cq, 1, &wc)) > 0) {
337 ctxt = (struct svc_rdma_op_ctxt *)(unsigned long)wc.wr_id;
338 xprt = ctxt->xprt;
339
340 svc_rdma_unmap_dma(ctxt);
341 if (wc.status != IB_WC_SUCCESS) 390 if (wc.status != IB_WC_SUCCESS)
342 /* Close the transport */ 391 /* Close the transport */
343 set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags); 392 set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags);
@@ -346,35 +395,10 @@ static void sq_cq_reap(struct svcxprt_rdma *xprt)
346 atomic_dec(&xprt->sc_sq_count); 395 atomic_dec(&xprt->sc_sq_count);
347 wake_up(&xprt->sc_send_wait); 396 wake_up(&xprt->sc_send_wait);
348 397
349 switch (ctxt->wr_op) { 398 ctxt = (struct svc_rdma_op_ctxt *)(unsigned long)wc.wr_id;
350 case IB_WR_SEND: 399 if (ctxt)
351 svc_rdma_put_context(ctxt, 1); 400 process_context(xprt, ctxt);
352 break;
353
354 case IB_WR_RDMA_WRITE:
355 svc_rdma_put_context(ctxt, 0);
356 break;
357
358 case IB_WR_RDMA_READ:
359 if (test_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags)) {
360 struct svc_rdma_op_ctxt *read_hdr = ctxt->read_hdr;
361 BUG_ON(!read_hdr);
362 spin_lock_bh(&xprt->sc_rq_dto_lock);
363 set_bit(XPT_DATA, &xprt->sc_xprt.xpt_flags);
364 list_add_tail(&read_hdr->dto_q,
365 &xprt->sc_read_complete_q);
366 spin_unlock_bh(&xprt->sc_rq_dto_lock);
367 svc_xprt_enqueue(&xprt->sc_xprt);
368 }
369 svc_rdma_put_context(ctxt, 0);
370 break;
371 401
372 default:
373 printk(KERN_ERR "svcrdma: unexpected completion type, "
374 "opcode=%d, status=%d\n",
375 wc.opcode, wc.status);
376 break;
377 }
378 svc_xprt_put(&xprt->sc_xprt); 402 svc_xprt_put(&xprt->sc_xprt);
379 } 403 }
380 404
@@ -425,10 +449,12 @@ static struct svcxprt_rdma *rdma_create_xprt(struct svc_serv *serv,
425 INIT_LIST_HEAD(&cma_xprt->sc_dto_q); 449 INIT_LIST_HEAD(&cma_xprt->sc_dto_q);
426 INIT_LIST_HEAD(&cma_xprt->sc_rq_dto_q); 450 INIT_LIST_HEAD(&cma_xprt->sc_rq_dto_q);
427 INIT_LIST_HEAD(&cma_xprt->sc_read_complete_q); 451 INIT_LIST_HEAD(&cma_xprt->sc_read_complete_q);
452 INIT_LIST_HEAD(&cma_xprt->sc_frmr_q);
428 init_waitqueue_head(&cma_xprt->sc_send_wait); 453 init_waitqueue_head(&cma_xprt->sc_send_wait);
429 454
430 spin_lock_init(&cma_xprt->sc_lock); 455 spin_lock_init(&cma_xprt->sc_lock);
431 spin_lock_init(&cma_xprt->sc_rq_dto_lock); 456 spin_lock_init(&cma_xprt->sc_rq_dto_lock);
457 spin_lock_init(&cma_xprt->sc_frmr_q_lock);
432 458
433 cma_xprt->sc_ord = svcrdma_ord; 459 cma_xprt->sc_ord = svcrdma_ord;
434 460
@@ -462,7 +488,7 @@ int svc_rdma_post_recv(struct svcxprt_rdma *xprt)
462 struct ib_recv_wr recv_wr, *bad_recv_wr; 488 struct ib_recv_wr recv_wr, *bad_recv_wr;
463 struct svc_rdma_op_ctxt *ctxt; 489 struct svc_rdma_op_ctxt *ctxt;
464 struct page *page; 490 struct page *page;
465 unsigned long pa; 491 dma_addr_t pa;
466 int sge_no; 492 int sge_no;
467 int buflen; 493 int buflen;
468 int ret; 494 int ret;
@@ -474,13 +500,15 @@ int svc_rdma_post_recv(struct svcxprt_rdma *xprt)
474 BUG_ON(sge_no >= xprt->sc_max_sge); 500 BUG_ON(sge_no >= xprt->sc_max_sge);
475 page = svc_rdma_get_page(); 501 page = svc_rdma_get_page();
476 ctxt->pages[sge_no] = page; 502 ctxt->pages[sge_no] = page;
477 atomic_inc(&xprt->sc_dma_used);
478 pa = ib_dma_map_page(xprt->sc_cm_id->device, 503 pa = ib_dma_map_page(xprt->sc_cm_id->device,
479 page, 0, PAGE_SIZE, 504 page, 0, PAGE_SIZE,
480 DMA_FROM_DEVICE); 505 DMA_FROM_DEVICE);
506 if (ib_dma_mapping_error(xprt->sc_cm_id->device, pa))
507 goto err_put_ctxt;
508 atomic_inc(&xprt->sc_dma_used);
481 ctxt->sge[sge_no].addr = pa; 509 ctxt->sge[sge_no].addr = pa;
482 ctxt->sge[sge_no].length = PAGE_SIZE; 510 ctxt->sge[sge_no].length = PAGE_SIZE;
483 ctxt->sge[sge_no].lkey = xprt->sc_phys_mr->lkey; 511 ctxt->sge[sge_no].lkey = xprt->sc_dma_lkey;
484 buflen += PAGE_SIZE; 512 buflen += PAGE_SIZE;
485 } 513 }
486 ctxt->count = sge_no; 514 ctxt->count = sge_no;
@@ -496,6 +524,10 @@ int svc_rdma_post_recv(struct svcxprt_rdma *xprt)
496 svc_rdma_put_context(ctxt, 1); 524 svc_rdma_put_context(ctxt, 1);
497 } 525 }
498 return ret; 526 return ret;
527
528 err_put_ctxt:
529 svc_rdma_put_context(ctxt, 1);
530 return -ENOMEM;
499} 531}
500 532
501/* 533/*
@@ -566,7 +598,7 @@ static int rdma_listen_handler(struct rdma_cm_id *cma_id,
566 dprintk("svcrdma: Connect request on cma_id=%p, xprt = %p, " 598 dprintk("svcrdma: Connect request on cma_id=%p, xprt = %p, "
567 "event=%d\n", cma_id, cma_id->context, event->event); 599 "event=%d\n", cma_id, cma_id->context, event->event);
568 handle_connect_req(cma_id, 600 handle_connect_req(cma_id,
569 event->param.conn.responder_resources); 601 event->param.conn.initiator_depth);
570 break; 602 break;
571 603
572 case RDMA_CM_EVENT_ESTABLISHED: 604 case RDMA_CM_EVENT_ESTABLISHED:
@@ -686,6 +718,97 @@ static struct svc_xprt *svc_rdma_create(struct svc_serv *serv,
686 return ERR_PTR(ret); 718 return ERR_PTR(ret);
687} 719}
688 720
721static struct svc_rdma_fastreg_mr *rdma_alloc_frmr(struct svcxprt_rdma *xprt)
722{
723 struct ib_mr *mr;
724 struct ib_fast_reg_page_list *pl;
725 struct svc_rdma_fastreg_mr *frmr;
726
727 frmr = kmalloc(sizeof(*frmr), GFP_KERNEL);
728 if (!frmr)
729 goto err;
730
731 mr = ib_alloc_fast_reg_mr(xprt->sc_pd, RPCSVC_MAXPAGES);
732 if (!mr)
733 goto err_free_frmr;
734
735 pl = ib_alloc_fast_reg_page_list(xprt->sc_cm_id->device,
736 RPCSVC_MAXPAGES);
737 if (!pl)
738 goto err_free_mr;
739
740 frmr->mr = mr;
741 frmr->page_list = pl;
742 INIT_LIST_HEAD(&frmr->frmr_list);
743 return frmr;
744
745 err_free_mr:
746 ib_dereg_mr(mr);
747 err_free_frmr:
748 kfree(frmr);
749 err:
750 return ERR_PTR(-ENOMEM);
751}
752
753static void rdma_dealloc_frmr_q(struct svcxprt_rdma *xprt)
754{
755 struct svc_rdma_fastreg_mr *frmr;
756
757 while (!list_empty(&xprt->sc_frmr_q)) {
758 frmr = list_entry(xprt->sc_frmr_q.next,
759 struct svc_rdma_fastreg_mr, frmr_list);
760 list_del_init(&frmr->frmr_list);
761 ib_dereg_mr(frmr->mr);
762 ib_free_fast_reg_page_list(frmr->page_list);
763 kfree(frmr);
764 }
765}
766
767struct svc_rdma_fastreg_mr *svc_rdma_get_frmr(struct svcxprt_rdma *rdma)
768{
769 struct svc_rdma_fastreg_mr *frmr = NULL;
770
771 spin_lock_bh(&rdma->sc_frmr_q_lock);
772 if (!list_empty(&rdma->sc_frmr_q)) {
773 frmr = list_entry(rdma->sc_frmr_q.next,
774 struct svc_rdma_fastreg_mr, frmr_list);
775 list_del_init(&frmr->frmr_list);
776 frmr->map_len = 0;
777 frmr->page_list_len = 0;
778 }
779 spin_unlock_bh(&rdma->sc_frmr_q_lock);
780 if (frmr)
781 return frmr;
782
783 return rdma_alloc_frmr(rdma);
784}
785
786static void frmr_unmap_dma(struct svcxprt_rdma *xprt,
787 struct svc_rdma_fastreg_mr *frmr)
788{
789 int page_no;
790 for (page_no = 0; page_no < frmr->page_list_len; page_no++) {
791 dma_addr_t addr = frmr->page_list->page_list[page_no];
792 if (ib_dma_mapping_error(frmr->mr->device, addr))
793 continue;
794 atomic_dec(&xprt->sc_dma_used);
795 ib_dma_unmap_single(frmr->mr->device, addr, PAGE_SIZE,
796 frmr->direction);
797 }
798}
799
800void svc_rdma_put_frmr(struct svcxprt_rdma *rdma,
801 struct svc_rdma_fastreg_mr *frmr)
802{
803 if (frmr) {
804 frmr_unmap_dma(rdma, frmr);
805 spin_lock_bh(&rdma->sc_frmr_q_lock);
806 BUG_ON(!list_empty(&frmr->frmr_list));
807 list_add(&frmr->frmr_list, &rdma->sc_frmr_q);
808 spin_unlock_bh(&rdma->sc_frmr_q_lock);
809 }
810}
811
689/* 812/*
690 * This is the xpo_recvfrom function for listening endpoints. Its 813 * This is the xpo_recvfrom function for listening endpoints. Its
691 * purpose is to accept incoming connections. The CMA callback handler 814 * purpose is to accept incoming connections. The CMA callback handler
@@ -704,6 +827,8 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
704 struct rdma_conn_param conn_param; 827 struct rdma_conn_param conn_param;
705 struct ib_qp_init_attr qp_attr; 828 struct ib_qp_init_attr qp_attr;
706 struct ib_device_attr devattr; 829 struct ib_device_attr devattr;
830 int dma_mr_acc;
831 int need_dma_mr;
707 int ret; 832 int ret;
708 int i; 833 int i;
709 834
@@ -819,15 +944,77 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
819 } 944 }
820 newxprt->sc_qp = newxprt->sc_cm_id->qp; 945 newxprt->sc_qp = newxprt->sc_cm_id->qp;
821 946
822 /* Register all of physical memory */ 947 /*
823 newxprt->sc_phys_mr = ib_get_dma_mr(newxprt->sc_pd, 948 * Use the most secure set of MR resources based on the
824 IB_ACCESS_LOCAL_WRITE | 949 * transport type and available memory management features in
825 IB_ACCESS_REMOTE_WRITE); 950 * the device. Here's the table implemented below:
826 if (IS_ERR(newxprt->sc_phys_mr)) { 951 *
827 dprintk("svcrdma: Failed to create DMA MR ret=%d\n", ret); 952 * Fast Global DMA Remote WR
953 * Reg LKEY MR Access
954 * Sup'd Sup'd Needed Needed
955 *
956 * IWARP N N Y Y
957 * N Y Y Y
958 * Y N Y N
959 * Y Y N -
960 *
961 * IB N N Y N
962 * N Y N -
963 * Y N Y N
964 * Y Y N -
965 *
966 * NB: iWARP requires remote write access for the data sink
967 * of an RDMA_READ. IB does not.
968 */
969 if (devattr.device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS) {
970 newxprt->sc_frmr_pg_list_len =
971 devattr.max_fast_reg_page_list_len;
972 newxprt->sc_dev_caps |= SVCRDMA_DEVCAP_FAST_REG;
973 }
974
975 /*
976 * Determine if a DMA MR is required and if so, what privs are required
977 */
978 switch (rdma_node_get_transport(newxprt->sc_cm_id->device->node_type)) {
979 case RDMA_TRANSPORT_IWARP:
980 newxprt->sc_dev_caps |= SVCRDMA_DEVCAP_READ_W_INV;
981 if (!(newxprt->sc_dev_caps & SVCRDMA_DEVCAP_FAST_REG)) {
982 need_dma_mr = 1;
983 dma_mr_acc =
984 (IB_ACCESS_LOCAL_WRITE |
985 IB_ACCESS_REMOTE_WRITE);
986 } else if (!(devattr.device_cap_flags & IB_DEVICE_LOCAL_DMA_LKEY)) {
987 need_dma_mr = 1;
988 dma_mr_acc = IB_ACCESS_LOCAL_WRITE;
989 } else
990 need_dma_mr = 0;
991 break;
992 case RDMA_TRANSPORT_IB:
993 if (!(devattr.device_cap_flags & IB_DEVICE_LOCAL_DMA_LKEY)) {
994 need_dma_mr = 1;
995 dma_mr_acc = IB_ACCESS_LOCAL_WRITE;
996 } else
997 need_dma_mr = 0;
998 break;
999 default:
828 goto errout; 1000 goto errout;
829 } 1001 }
830 1002
1003 /* Create the DMA MR if needed, otherwise, use the DMA LKEY */
1004 if (need_dma_mr) {
1005 /* Register all of physical memory */
1006 newxprt->sc_phys_mr =
1007 ib_get_dma_mr(newxprt->sc_pd, dma_mr_acc);
1008 if (IS_ERR(newxprt->sc_phys_mr)) {
1009 dprintk("svcrdma: Failed to create DMA MR ret=%d\n",
1010 ret);
1011 goto errout;
1012 }
1013 newxprt->sc_dma_lkey = newxprt->sc_phys_mr->lkey;
1014 } else
1015 newxprt->sc_dma_lkey =
1016 newxprt->sc_cm_id->device->local_dma_lkey;
1017
831 /* Post receive buffers */ 1018 /* Post receive buffers */
832 for (i = 0; i < newxprt->sc_max_requests; i++) { 1019 for (i = 0; i < newxprt->sc_max_requests; i++) {
833 ret = svc_rdma_post_recv(newxprt); 1020 ret = svc_rdma_post_recv(newxprt);
@@ -961,6 +1148,9 @@ static void __svc_rdma_free(struct work_struct *work)
961 WARN_ON(atomic_read(&rdma->sc_ctxt_used) != 0); 1148 WARN_ON(atomic_read(&rdma->sc_ctxt_used) != 0);
962 WARN_ON(atomic_read(&rdma->sc_dma_used) != 0); 1149 WARN_ON(atomic_read(&rdma->sc_dma_used) != 0);
963 1150
1151 /* De-allocate fastreg mr */
1152 rdma_dealloc_frmr_q(rdma);
1153
964 /* Destroy the QP if present (not a listener) */ 1154 /* Destroy the QP if present (not a listener) */
965 if (rdma->sc_qp && !IS_ERR(rdma->sc_qp)) 1155 if (rdma->sc_qp && !IS_ERR(rdma->sc_qp))
966 ib_destroy_qp(rdma->sc_qp); 1156 ib_destroy_qp(rdma->sc_qp);
@@ -1014,21 +1204,59 @@ static int svc_rdma_has_wspace(struct svc_xprt *xprt)
1014 return 1; 1204 return 1;
1015} 1205}
1016 1206
1207/*
1208 * Attempt to register the kvec representing the RPC memory with the
1209 * device.
1210 *
1211 * Returns:
1212 * NULL : The device does not support fastreg or there were no more
1213 * fastreg mr.
1214 * frmr : The kvec register request was successfully posted.
1215 * <0 : An error was encountered attempting to register the kvec.
1216 */
1217int svc_rdma_fastreg(struct svcxprt_rdma *xprt,
1218 struct svc_rdma_fastreg_mr *frmr)
1219{
1220 struct ib_send_wr fastreg_wr;
1221 u8 key;
1222
1223 /* Bump the key */
1224 key = (u8)(frmr->mr->lkey & 0x000000FF);
1225 ib_update_fast_reg_key(frmr->mr, ++key);
1226
1227 /* Prepare FASTREG WR */
1228 memset(&fastreg_wr, 0, sizeof fastreg_wr);
1229 fastreg_wr.opcode = IB_WR_FAST_REG_MR;
1230 fastreg_wr.send_flags = IB_SEND_SIGNALED;
1231 fastreg_wr.wr.fast_reg.iova_start = (unsigned long)frmr->kva;
1232 fastreg_wr.wr.fast_reg.page_list = frmr->page_list;
1233 fastreg_wr.wr.fast_reg.page_list_len = frmr->page_list_len;
1234 fastreg_wr.wr.fast_reg.page_shift = PAGE_SHIFT;
1235 fastreg_wr.wr.fast_reg.length = frmr->map_len;
1236 fastreg_wr.wr.fast_reg.access_flags = frmr->access_flags;
1237 fastreg_wr.wr.fast_reg.rkey = frmr->mr->lkey;
1238 return svc_rdma_send(xprt, &fastreg_wr);
1239}
1240
1017int svc_rdma_send(struct svcxprt_rdma *xprt, struct ib_send_wr *wr) 1241int svc_rdma_send(struct svcxprt_rdma *xprt, struct ib_send_wr *wr)
1018{ 1242{
1019 struct ib_send_wr *bad_wr; 1243 struct ib_send_wr *bad_wr, *n_wr;
1244 int wr_count;
1245 int i;
1020 int ret; 1246 int ret;
1021 1247
1022 if (test_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags)) 1248 if (test_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags))
1023 return -ENOTCONN; 1249 return -ENOTCONN;
1024 1250
1025 BUG_ON(wr->send_flags != IB_SEND_SIGNALED); 1251 BUG_ON(wr->send_flags != IB_SEND_SIGNALED);
1026 BUG_ON(((struct svc_rdma_op_ctxt *)(unsigned long)wr->wr_id)->wr_op != 1252 wr_count = 1;
1027 wr->opcode); 1253 for (n_wr = wr->next; n_wr; n_wr = n_wr->next)
1254 wr_count++;
1255
1028 /* If the SQ is full, wait until an SQ entry is available */ 1256 /* If the SQ is full, wait until an SQ entry is available */
1029 while (1) { 1257 while (1) {
1030 spin_lock_bh(&xprt->sc_lock); 1258 spin_lock_bh(&xprt->sc_lock);
1031 if (xprt->sc_sq_depth == atomic_read(&xprt->sc_sq_count)) { 1259 if (xprt->sc_sq_depth < atomic_read(&xprt->sc_sq_count) + wr_count) {
1032 spin_unlock_bh(&xprt->sc_lock); 1260 spin_unlock_bh(&xprt->sc_lock);
1033 atomic_inc(&rdma_stat_sq_starve); 1261 atomic_inc(&rdma_stat_sq_starve);
1034 1262
@@ -1043,19 +1271,26 @@ int svc_rdma_send(struct svcxprt_rdma *xprt, struct ib_send_wr *wr)
1043 return 0; 1271 return 0;
1044 continue; 1272 continue;
1045 } 1273 }
1046 /* Bumped used SQ WR count and post */ 1274 /* Take a transport ref for each WR posted */
1047 svc_xprt_get(&xprt->sc_xprt); 1275 for (i = 0; i < wr_count; i++)
1276 svc_xprt_get(&xprt->sc_xprt);
1277
1278 /* Bump used SQ WR count and post */
1279 atomic_add(wr_count, &xprt->sc_sq_count);
1048 ret = ib_post_send(xprt->sc_qp, wr, &bad_wr); 1280 ret = ib_post_send(xprt->sc_qp, wr, &bad_wr);
1049 if (!ret) 1281 if (ret) {
1050 atomic_inc(&xprt->sc_sq_count); 1282 set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags);
1051 else { 1283 atomic_sub(wr_count, &xprt->sc_sq_count);
1052 svc_xprt_put(&xprt->sc_xprt); 1284 for (i = 0; i < wr_count; i ++)
1285 svc_xprt_put(&xprt->sc_xprt);
1053 dprintk("svcrdma: failed to post SQ WR rc=%d, " 1286 dprintk("svcrdma: failed to post SQ WR rc=%d, "
1054 "sc_sq_count=%d, sc_sq_depth=%d\n", 1287 "sc_sq_count=%d, sc_sq_depth=%d\n",
1055 ret, atomic_read(&xprt->sc_sq_count), 1288 ret, atomic_read(&xprt->sc_sq_count),
1056 xprt->sc_sq_depth); 1289 xprt->sc_sq_depth);
1057 } 1290 }
1058 spin_unlock_bh(&xprt->sc_lock); 1291 spin_unlock_bh(&xprt->sc_lock);
1292 if (ret)
1293 wake_up(&xprt->sc_send_wait);
1059 break; 1294 break;
1060 } 1295 }
1061 return ret; 1296 return ret;
@@ -1079,10 +1314,14 @@ void svc_rdma_send_error(struct svcxprt_rdma *xprt, struct rpcrdma_msg *rmsgp,
1079 length = svc_rdma_xdr_encode_error(xprt, rmsgp, err, va); 1314 length = svc_rdma_xdr_encode_error(xprt, rmsgp, err, va);
1080 1315
1081 /* Prepare SGE for local address */ 1316 /* Prepare SGE for local address */
1082 atomic_inc(&xprt->sc_dma_used);
1083 sge.addr = ib_dma_map_page(xprt->sc_cm_id->device, 1317 sge.addr = ib_dma_map_page(xprt->sc_cm_id->device,
1084 p, 0, PAGE_SIZE, DMA_FROM_DEVICE); 1318 p, 0, PAGE_SIZE, DMA_FROM_DEVICE);
1085 sge.lkey = xprt->sc_phys_mr->lkey; 1319 if (ib_dma_mapping_error(xprt->sc_cm_id->device, sge.addr)) {
1320 put_page(p);
1321 return;
1322 }
1323 atomic_inc(&xprt->sc_dma_used);
1324 sge.lkey = xprt->sc_dma_lkey;
1086 sge.length = length; 1325 sge.length = length;
1087 1326
1088 ctxt = svc_rdma_get_context(xprt); 1327 ctxt = svc_rdma_get_context(xprt);
@@ -1103,6 +1342,9 @@ void svc_rdma_send_error(struct svcxprt_rdma *xprt, struct rpcrdma_msg *rmsgp,
1103 if (ret) { 1342 if (ret) {
1104 dprintk("svcrdma: Error %d posting send for protocol error\n", 1343 dprintk("svcrdma: Error %d posting send for protocol error\n",
1105 ret); 1344 ret);
1345 ib_dma_unmap_page(xprt->sc_cm_id->device,
1346 sge.addr, PAGE_SIZE,
1347 DMA_FROM_DEVICE);
1106 svc_rdma_put_context(ctxt, 1); 1348 svc_rdma_put_context(ctxt, 1);
1107 } 1349 }
1108} 1350}