aboutsummaryrefslogtreecommitdiffstats
path: root/net/sunrpc
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@ppc970.osdl.org>2005-04-16 18:20:36 -0400
committerLinus Torvalds <torvalds@ppc970.osdl.org>2005-04-16 18:20:36 -0400
commit1da177e4c3f41524e886b7f1b8a0c1fc7321cac2 (patch)
tree0bba044c4ce775e45a88a51686b5d9f90697ea9d /net/sunrpc
Linux-2.6.12-rc2v2.6.12-rc2
Initial git repository build. I'm not bothering with the full history, even though we have it. We can create a separate "historical" git archive of that later if we want to, and in the meantime it's about 3.2GB when imported into git - space that would just make the early git days unnecessarily complicated, when we don't have a lot of good infrastructure for it. Let it rip!
Diffstat (limited to 'net/sunrpc')
-rw-r--r--net/sunrpc/Makefile15
-rw-r--r--net/sunrpc/auth.c395
-rw-r--r--net/sunrpc/auth_gss/Makefile18
-rw-r--r--net/sunrpc/auth_gss/auth_gss.c1152
-rw-r--r--net/sunrpc/auth_gss/gss_generic_token.c235
-rw-r--r--net/sunrpc/auth_gss/gss_krb5_crypto.c209
-rw-r--r--net/sunrpc/auth_gss/gss_krb5_mech.c275
-rw-r--r--net/sunrpc/auth_gss/gss_krb5_seal.c176
-rw-r--r--net/sunrpc/auth_gss/gss_krb5_seqnum.c88
-rw-r--r--net/sunrpc/auth_gss/gss_krb5_unseal.c202
-rw-r--r--net/sunrpc/auth_gss/gss_mech_switch.c301
-rw-r--r--net/sunrpc/auth_gss/gss_spkm3_mech.c300
-rw-r--r--net/sunrpc/auth_gss/gss_spkm3_seal.c132
-rw-r--r--net/sunrpc/auth_gss/gss_spkm3_token.c266
-rw-r--r--net/sunrpc/auth_gss/gss_spkm3_unseal.c128
-rw-r--r--net/sunrpc/auth_gss/svcauth_gss.c1080
-rw-r--r--net/sunrpc/auth_null.c143
-rw-r--r--net/sunrpc/auth_unix.c242
-rw-r--r--net/sunrpc/cache.c1189
-rw-r--r--net/sunrpc/clnt.c1085
-rw-r--r--net/sunrpc/pmap_clnt.c298
-rw-r--r--net/sunrpc/rpc_pipe.c838
-rw-r--r--net/sunrpc/sched.c1119
-rw-r--r--net/sunrpc/stats.c175
-rw-r--r--net/sunrpc/sunrpc_syms.c185
-rw-r--r--net/sunrpc/svc.c490
-rw-r--r--net/sunrpc/svcauth.c216
-rw-r--r--net/sunrpc/svcauth_unix.c502
-rw-r--r--net/sunrpc/svcsock.c1585
-rw-r--r--net/sunrpc/sysctl.c193
-rw-r--r--net/sunrpc/timer.c107
-rw-r--r--net/sunrpc/xdr.c917
-rw-r--r--net/sunrpc/xprt.c1678
33 files changed, 15934 insertions, 0 deletions
diff --git a/net/sunrpc/Makefile b/net/sunrpc/Makefile
new file mode 100644
index 000000000000..46a2ce00a29b
--- /dev/null
+++ b/net/sunrpc/Makefile
@@ -0,0 +1,15 @@
1#
2# Makefile for Linux kernel SUN RPC
3#
4
5
6obj-$(CONFIG_SUNRPC) += sunrpc.o
7obj-$(CONFIG_SUNRPC_GSS) += auth_gss/
8
9sunrpc-y := clnt.o xprt.o sched.o \
10 auth.o auth_null.o auth_unix.o \
11 svc.o svcsock.o svcauth.o svcauth_unix.o \
12 pmap_clnt.o timer.o xdr.o \
13 sunrpc_syms.o cache.o rpc_pipe.o
14sunrpc-$(CONFIG_PROC_FS) += stats.o
15sunrpc-$(CONFIG_SYSCTL) += sysctl.o
diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c
new file mode 100644
index 000000000000..9bcec9b927b9
--- /dev/null
+++ b/net/sunrpc/auth.c
@@ -0,0 +1,395 @@
1/*
2 * linux/net/sunrpc/auth.c
3 *
4 * Generic RPC client authentication API.
5 *
6 * Copyright (C) 1996, Olaf Kirch <okir@monad.swb.de>
7 */
8
9#include <linux/types.h>
10#include <linux/sched.h>
11#include <linux/module.h>
12#include <linux/slab.h>
13#include <linux/errno.h>
14#include <linux/socket.h>
15#include <linux/sunrpc/clnt.h>
16#include <linux/spinlock.h>
17
18#ifdef RPC_DEBUG
19# define RPCDBG_FACILITY RPCDBG_AUTH
20#endif
21
22static struct rpc_authops * auth_flavors[RPC_AUTH_MAXFLAVOR] = {
23 &authnull_ops, /* AUTH_NULL */
24 &authunix_ops, /* AUTH_UNIX */
25 NULL, /* others can be loadable modules */
26};
27
28static u32
29pseudoflavor_to_flavor(u32 flavor) {
30 if (flavor >= RPC_AUTH_MAXFLAVOR)
31 return RPC_AUTH_GSS;
32 return flavor;
33}
34
35int
36rpcauth_register(struct rpc_authops *ops)
37{
38 rpc_authflavor_t flavor;
39
40 if ((flavor = ops->au_flavor) >= RPC_AUTH_MAXFLAVOR)
41 return -EINVAL;
42 if (auth_flavors[flavor] != NULL)
43 return -EPERM; /* what else? */
44 auth_flavors[flavor] = ops;
45 return 0;
46}
47
48int
49rpcauth_unregister(struct rpc_authops *ops)
50{
51 rpc_authflavor_t flavor;
52
53 if ((flavor = ops->au_flavor) >= RPC_AUTH_MAXFLAVOR)
54 return -EINVAL;
55 if (auth_flavors[flavor] != ops)
56 return -EPERM; /* what else? */
57 auth_flavors[flavor] = NULL;
58 return 0;
59}
60
61struct rpc_auth *
62rpcauth_create(rpc_authflavor_t pseudoflavor, struct rpc_clnt *clnt)
63{
64 struct rpc_auth *auth;
65 struct rpc_authops *ops;
66 u32 flavor = pseudoflavor_to_flavor(pseudoflavor);
67
68 if (flavor >= RPC_AUTH_MAXFLAVOR || !(ops = auth_flavors[flavor]))
69 return NULL;
70 auth = ops->create(clnt, pseudoflavor);
71 if (!auth)
72 return NULL;
73 if (clnt->cl_auth)
74 rpcauth_destroy(clnt->cl_auth);
75 clnt->cl_auth = auth;
76 return auth;
77}
78
79void
80rpcauth_destroy(struct rpc_auth *auth)
81{
82 if (!atomic_dec_and_test(&auth->au_count))
83 return;
84 auth->au_ops->destroy(auth);
85}
86
87static DEFINE_SPINLOCK(rpc_credcache_lock);
88
89/*
90 * Initialize RPC credential cache
91 */
92int
93rpcauth_init_credcache(struct rpc_auth *auth, unsigned long expire)
94{
95 struct rpc_cred_cache *new;
96 int i;
97
98 new = (struct rpc_cred_cache *)kmalloc(sizeof(*new), GFP_KERNEL);
99 if (!new)
100 return -ENOMEM;
101 for (i = 0; i < RPC_CREDCACHE_NR; i++)
102 INIT_HLIST_HEAD(&new->hashtable[i]);
103 new->expire = expire;
104 new->nextgc = jiffies + (expire >> 1);
105 auth->au_credcache = new;
106 return 0;
107}
108
109/*
110 * Destroy a list of credentials
111 */
112static inline
113void rpcauth_destroy_credlist(struct hlist_head *head)
114{
115 struct rpc_cred *cred;
116
117 while (!hlist_empty(head)) {
118 cred = hlist_entry(head->first, struct rpc_cred, cr_hash);
119 hlist_del_init(&cred->cr_hash);
120 put_rpccred(cred);
121 }
122}
123
124/*
125 * Clear the RPC credential cache, and delete those credentials
126 * that are not referenced.
127 */
128void
129rpcauth_free_credcache(struct rpc_auth *auth)
130{
131 struct rpc_cred_cache *cache = auth->au_credcache;
132 HLIST_HEAD(free);
133 struct hlist_node *pos, *next;
134 struct rpc_cred *cred;
135 int i;
136
137 spin_lock(&rpc_credcache_lock);
138 for (i = 0; i < RPC_CREDCACHE_NR; i++) {
139 hlist_for_each_safe(pos, next, &cache->hashtable[i]) {
140 cred = hlist_entry(pos, struct rpc_cred, cr_hash);
141 __hlist_del(&cred->cr_hash);
142 hlist_add_head(&cred->cr_hash, &free);
143 }
144 }
145 spin_unlock(&rpc_credcache_lock);
146 rpcauth_destroy_credlist(&free);
147}
148
149static void
150rpcauth_prune_expired(struct rpc_auth *auth, struct rpc_cred *cred, struct hlist_head *free)
151{
152 if (atomic_read(&cred->cr_count) != 1)
153 return;
154 if (time_after(jiffies, cred->cr_expire + auth->au_credcache->expire))
155 cred->cr_flags &= ~RPCAUTH_CRED_UPTODATE;
156 if (!(cred->cr_flags & RPCAUTH_CRED_UPTODATE)) {
157 __hlist_del(&cred->cr_hash);
158 hlist_add_head(&cred->cr_hash, free);
159 }
160}
161
162/*
163 * Remove stale credentials. Avoid sleeping inside the loop.
164 */
165static void
166rpcauth_gc_credcache(struct rpc_auth *auth, struct hlist_head *free)
167{
168 struct rpc_cred_cache *cache = auth->au_credcache;
169 struct hlist_node *pos, *next;
170 struct rpc_cred *cred;
171 int i;
172
173 dprintk("RPC: gc'ing RPC credentials for auth %p\n", auth);
174 for (i = 0; i < RPC_CREDCACHE_NR; i++) {
175 hlist_for_each_safe(pos, next, &cache->hashtable[i]) {
176 cred = hlist_entry(pos, struct rpc_cred, cr_hash);
177 rpcauth_prune_expired(auth, cred, free);
178 }
179 }
180 cache->nextgc = jiffies + cache->expire;
181}
182
183/*
184 * Look up a process' credentials in the authentication cache
185 */
186struct rpc_cred *
187rpcauth_lookup_credcache(struct rpc_auth *auth, struct auth_cred * acred,
188 int taskflags)
189{
190 struct rpc_cred_cache *cache = auth->au_credcache;
191 HLIST_HEAD(free);
192 struct hlist_node *pos, *next;
193 struct rpc_cred *new = NULL,
194 *cred = NULL;
195 int nr = 0;
196
197 if (!(taskflags & RPC_TASK_ROOTCREDS))
198 nr = acred->uid & RPC_CREDCACHE_MASK;
199retry:
200 spin_lock(&rpc_credcache_lock);
201 if (time_before(cache->nextgc, jiffies))
202 rpcauth_gc_credcache(auth, &free);
203 hlist_for_each_safe(pos, next, &cache->hashtable[nr]) {
204 struct rpc_cred *entry;
205 entry = hlist_entry(pos, struct rpc_cred, cr_hash);
206 if (entry->cr_ops->crmatch(acred, entry, taskflags)) {
207 hlist_del(&entry->cr_hash);
208 cred = entry;
209 break;
210 }
211 rpcauth_prune_expired(auth, entry, &free);
212 }
213 if (new) {
214 if (cred)
215 hlist_add_head(&new->cr_hash, &free);
216 else
217 cred = new;
218 }
219 if (cred) {
220 hlist_add_head(&cred->cr_hash, &cache->hashtable[nr]);
221 get_rpccred(cred);
222 }
223 spin_unlock(&rpc_credcache_lock);
224
225 rpcauth_destroy_credlist(&free);
226
227 if (!cred) {
228 new = auth->au_ops->crcreate(auth, acred, taskflags);
229 if (!IS_ERR(new)) {
230#ifdef RPC_DEBUG
231 new->cr_magic = RPCAUTH_CRED_MAGIC;
232#endif
233 goto retry;
234 } else
235 cred = new;
236 }
237
238 return (struct rpc_cred *) cred;
239}
240
241struct rpc_cred *
242rpcauth_lookupcred(struct rpc_auth *auth, int taskflags)
243{
244 struct auth_cred acred = {
245 .uid = current->fsuid,
246 .gid = current->fsgid,
247 .group_info = current->group_info,
248 };
249 struct rpc_cred *ret;
250
251 dprintk("RPC: looking up %s cred\n",
252 auth->au_ops->au_name);
253 get_group_info(acred.group_info);
254 ret = auth->au_ops->lookup_cred(auth, &acred, taskflags);
255 put_group_info(acred.group_info);
256 return ret;
257}
258
259struct rpc_cred *
260rpcauth_bindcred(struct rpc_task *task)
261{
262 struct rpc_auth *auth = task->tk_auth;
263 struct auth_cred acred = {
264 .uid = current->fsuid,
265 .gid = current->fsgid,
266 .group_info = current->group_info,
267 };
268 struct rpc_cred *ret;
269
270 dprintk("RPC: %4d looking up %s cred\n",
271 task->tk_pid, task->tk_auth->au_ops->au_name);
272 get_group_info(acred.group_info);
273 ret = auth->au_ops->lookup_cred(auth, &acred, task->tk_flags);
274 if (!IS_ERR(ret))
275 task->tk_msg.rpc_cred = ret;
276 else
277 task->tk_status = PTR_ERR(ret);
278 put_group_info(acred.group_info);
279 return ret;
280}
281
282void
283rpcauth_holdcred(struct rpc_task *task)
284{
285 dprintk("RPC: %4d holding %s cred %p\n",
286 task->tk_pid, task->tk_auth->au_ops->au_name, task->tk_msg.rpc_cred);
287 if (task->tk_msg.rpc_cred)
288 get_rpccred(task->tk_msg.rpc_cred);
289}
290
291void
292put_rpccred(struct rpc_cred *cred)
293{
294 cred->cr_expire = jiffies;
295 if (!atomic_dec_and_test(&cred->cr_count))
296 return;
297 cred->cr_ops->crdestroy(cred);
298}
299
300void
301rpcauth_unbindcred(struct rpc_task *task)
302{
303 struct rpc_auth *auth = task->tk_auth;
304 struct rpc_cred *cred = task->tk_msg.rpc_cred;
305
306 dprintk("RPC: %4d releasing %s cred %p\n",
307 task->tk_pid, auth->au_ops->au_name, cred);
308
309 put_rpccred(cred);
310 task->tk_msg.rpc_cred = NULL;
311}
312
313u32 *
314rpcauth_marshcred(struct rpc_task *task, u32 *p)
315{
316 struct rpc_auth *auth = task->tk_auth;
317 struct rpc_cred *cred = task->tk_msg.rpc_cred;
318
319 dprintk("RPC: %4d marshaling %s cred %p\n",
320 task->tk_pid, auth->au_ops->au_name, cred);
321 return cred->cr_ops->crmarshal(task, p);
322}
323
324u32 *
325rpcauth_checkverf(struct rpc_task *task, u32 *p)
326{
327 struct rpc_auth *auth = task->tk_auth;
328 struct rpc_cred *cred = task->tk_msg.rpc_cred;
329
330 dprintk("RPC: %4d validating %s cred %p\n",
331 task->tk_pid, auth->au_ops->au_name, cred);
332 return cred->cr_ops->crvalidate(task, p);
333}
334
335int
336rpcauth_wrap_req(struct rpc_task *task, kxdrproc_t encode, void *rqstp,
337 u32 *data, void *obj)
338{
339 struct rpc_cred *cred = task->tk_msg.rpc_cred;
340
341 dprintk("RPC: %4d using %s cred %p to wrap rpc data\n",
342 task->tk_pid, cred->cr_ops->cr_name, cred);
343 if (cred->cr_ops->crwrap_req)
344 return cred->cr_ops->crwrap_req(task, encode, rqstp, data, obj);
345 /* By default, we encode the arguments normally. */
346 return encode(rqstp, data, obj);
347}
348
349int
350rpcauth_unwrap_resp(struct rpc_task *task, kxdrproc_t decode, void *rqstp,
351 u32 *data, void *obj)
352{
353 struct rpc_cred *cred = task->tk_msg.rpc_cred;
354
355 dprintk("RPC: %4d using %s cred %p to unwrap rpc data\n",
356 task->tk_pid, cred->cr_ops->cr_name, cred);
357 if (cred->cr_ops->crunwrap_resp)
358 return cred->cr_ops->crunwrap_resp(task, decode, rqstp,
359 data, obj);
360 /* By default, we decode the arguments normally. */
361 return decode(rqstp, data, obj);
362}
363
364int
365rpcauth_refreshcred(struct rpc_task *task)
366{
367 struct rpc_auth *auth = task->tk_auth;
368 struct rpc_cred *cred = task->tk_msg.rpc_cred;
369 int err;
370
371 dprintk("RPC: %4d refreshing %s cred %p\n",
372 task->tk_pid, auth->au_ops->au_name, cred);
373 err = cred->cr_ops->crrefresh(task);
374 if (err < 0)
375 task->tk_status = err;
376 return err;
377}
378
379void
380rpcauth_invalcred(struct rpc_task *task)
381{
382 dprintk("RPC: %4d invalidating %s cred %p\n",
383 task->tk_pid, task->tk_auth->au_ops->au_name, task->tk_msg.rpc_cred);
384 spin_lock(&rpc_credcache_lock);
385 if (task->tk_msg.rpc_cred)
386 task->tk_msg.rpc_cred->cr_flags &= ~RPCAUTH_CRED_UPTODATE;
387 spin_unlock(&rpc_credcache_lock);
388}
389
390int
391rpcauth_uptodatecred(struct rpc_task *task)
392{
393 return !(task->tk_msg.rpc_cred) ||
394 (task->tk_msg.rpc_cred->cr_flags & RPCAUTH_CRED_UPTODATE);
395}
diff --git a/net/sunrpc/auth_gss/Makefile b/net/sunrpc/auth_gss/Makefile
new file mode 100644
index 000000000000..fe1b874084bc
--- /dev/null
+++ b/net/sunrpc/auth_gss/Makefile
@@ -0,0 +1,18 @@
1#
2# Makefile for Linux kernel rpcsec_gss implementation
3#
4
5obj-$(CONFIG_SUNRPC_GSS) += auth_rpcgss.o
6
7auth_rpcgss-objs := auth_gss.o gss_generic_token.o \
8 gss_mech_switch.o svcauth_gss.o gss_krb5_crypto.o
9
10obj-$(CONFIG_RPCSEC_GSS_KRB5) += rpcsec_gss_krb5.o
11
12rpcsec_gss_krb5-objs := gss_krb5_mech.o gss_krb5_seal.o gss_krb5_unseal.o \
13 gss_krb5_seqnum.o
14
15obj-$(CONFIG_RPCSEC_GSS_SPKM3) += rpcsec_gss_spkm3.o
16
17rpcsec_gss_spkm3-objs := gss_spkm3_mech.o gss_spkm3_seal.o gss_spkm3_unseal.o \
18 gss_spkm3_token.o
diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c
new file mode 100644
index 000000000000..a33b627cbef4
--- /dev/null
+++ b/net/sunrpc/auth_gss/auth_gss.c
@@ -0,0 +1,1152 @@
1/*
2 * linux/net/sunrpc/auth_gss.c
3 *
4 * RPCSEC_GSS client authentication.
5 *
6 * Copyright (c) 2000 The Regents of the University of Michigan.
7 * All rights reserved.
8 *
9 * Dug Song <dugsong@monkey.org>
10 * Andy Adamson <andros@umich.edu>
11 *
12 * Redistribution and use in source and binary forms, with or without
13 * modification, are permitted provided that the following conditions
14 * are met:
15 *
16 * 1. Redistributions of source code must retain the above copyright
17 * notice, this list of conditions and the following disclaimer.
18 * 2. Redistributions in binary form must reproduce the above copyright
19 * notice, this list of conditions and the following disclaimer in the
20 * documentation and/or other materials provided with the distribution.
21 * 3. Neither the name of the University nor the names of its
22 * contributors may be used to endorse or promote products derived
23 * from this software without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
26 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
27 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
28 * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
29 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
32 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 *
37 * $Id$
38 */
39
40
41#include <linux/module.h>
42#include <linux/init.h>
43#include <linux/types.h>
44#include <linux/slab.h>
45#include <linux/socket.h>
46#include <linux/in.h>
47#include <linux/sched.h>
48#include <linux/sunrpc/clnt.h>
49#include <linux/sunrpc/auth.h>
50#include <linux/sunrpc/auth_gss.h>
51#include <linux/sunrpc/svcauth_gss.h>
52#include <linux/sunrpc/gss_err.h>
53#include <linux/workqueue.h>
54#include <linux/sunrpc/rpc_pipe_fs.h>
55#include <linux/sunrpc/gss_api.h>
56#include <asm/uaccess.h>
57
58static struct rpc_authops authgss_ops;
59
60static struct rpc_credops gss_credops;
61
62#ifdef RPC_DEBUG
63# define RPCDBG_FACILITY RPCDBG_AUTH
64#endif
65
66#define NFS_NGROUPS 16
67
68#define GSS_CRED_EXPIRE (60 * HZ) /* XXX: reasonable? */
69#define GSS_CRED_SLACK 1024 /* XXX: unused */
70/* length of a krb5 verifier (48), plus data added before arguments when
71 * using integrity (two 4-byte integers): */
72#define GSS_VERF_SLACK 56
73
74/* XXX this define must match the gssd define
75* as it is passed to gssd to signal the use of
76* machine creds should be part of the shared rpc interface */
77
78#define CA_RUN_AS_MACHINE 0x00000200
79
80/* dump the buffer in `emacs-hexl' style */
81#define isprint(c) ((c > 0x1f) && (c < 0x7f))
82
83static DEFINE_RWLOCK(gss_ctx_lock);
84
85struct gss_auth {
86 struct rpc_auth rpc_auth;
87 struct gss_api_mech *mech;
88 enum rpc_gss_svc service;
89 struct list_head upcalls;
90 struct rpc_clnt *client;
91 struct dentry *dentry;
92 char path[48];
93 spinlock_t lock;
94};
95
96static void gss_destroy_ctx(struct gss_cl_ctx *);
97static struct rpc_pipe_ops gss_upcall_ops;
98
99void
100print_hexl(u32 *p, u_int length, u_int offset)
101{
102 u_int i, j, jm;
103 u8 c, *cp;
104
105 dprintk("RPC: print_hexl: length %d\n",length);
106 dprintk("\n");
107 cp = (u8 *) p;
108
109 for (i = 0; i < length; i += 0x10) {
110 dprintk(" %04x: ", (u_int)(i + offset));
111 jm = length - i;
112 jm = jm > 16 ? 16 : jm;
113
114 for (j = 0; j < jm; j++) {
115 if ((j % 2) == 1)
116 dprintk("%02x ", (u_int)cp[i+j]);
117 else
118 dprintk("%02x", (u_int)cp[i+j]);
119 }
120 for (; j < 16; j++) {
121 if ((j % 2) == 1)
122 dprintk(" ");
123 else
124 dprintk(" ");
125 }
126 dprintk(" ");
127
128 for (j = 0; j < jm; j++) {
129 c = cp[i+j];
130 c = isprint(c) ? c : '.';
131 dprintk("%c", c);
132 }
133 dprintk("\n");
134 }
135}
136
137EXPORT_SYMBOL(print_hexl);
138
139static inline struct gss_cl_ctx *
140gss_get_ctx(struct gss_cl_ctx *ctx)
141{
142 atomic_inc(&ctx->count);
143 return ctx;
144}
145
146static inline void
147gss_put_ctx(struct gss_cl_ctx *ctx)
148{
149 if (atomic_dec_and_test(&ctx->count))
150 gss_destroy_ctx(ctx);
151}
152
153static void
154gss_cred_set_ctx(struct rpc_cred *cred, struct gss_cl_ctx *ctx)
155{
156 struct gss_cred *gss_cred = container_of(cred, struct gss_cred, gc_base);
157 struct gss_cl_ctx *old;
158 write_lock(&gss_ctx_lock);
159 old = gss_cred->gc_ctx;
160 gss_cred->gc_ctx = ctx;
161 cred->cr_flags |= RPCAUTH_CRED_UPTODATE;
162 write_unlock(&gss_ctx_lock);
163 if (old)
164 gss_put_ctx(old);
165}
166
167static int
168gss_cred_is_uptodate_ctx(struct rpc_cred *cred)
169{
170 struct gss_cred *gss_cred = container_of(cred, struct gss_cred, gc_base);
171 int res = 0;
172
173 read_lock(&gss_ctx_lock);
174 if ((cred->cr_flags & RPCAUTH_CRED_UPTODATE) && gss_cred->gc_ctx)
175 res = 1;
176 read_unlock(&gss_ctx_lock);
177 return res;
178}
179
180static const void *
181simple_get_bytes(const void *p, const void *end, void *res, size_t len)
182{
183 const void *q = (const void *)((const char *)p + len);
184 if (unlikely(q > end || q < p))
185 return ERR_PTR(-EFAULT);
186 memcpy(res, p, len);
187 return q;
188}
189
190static inline const void *
191simple_get_netobj(const void *p, const void *end, struct xdr_netobj *dest)
192{
193 const void *q;
194 unsigned int len;
195
196 p = simple_get_bytes(p, end, &len, sizeof(len));
197 if (IS_ERR(p))
198 return p;
199 q = (const void *)((const char *)p + len);
200 if (unlikely(q > end || q < p))
201 return ERR_PTR(-EFAULT);
202 dest->data = kmalloc(len, GFP_KERNEL);
203 if (unlikely(dest->data == NULL))
204 return ERR_PTR(-ENOMEM);
205 dest->len = len;
206 memcpy(dest->data, p, len);
207 return q;
208}
209
210static struct gss_cl_ctx *
211gss_cred_get_ctx(struct rpc_cred *cred)
212{
213 struct gss_cred *gss_cred = container_of(cred, struct gss_cred, gc_base);
214 struct gss_cl_ctx *ctx = NULL;
215
216 read_lock(&gss_ctx_lock);
217 if (gss_cred->gc_ctx)
218 ctx = gss_get_ctx(gss_cred->gc_ctx);
219 read_unlock(&gss_ctx_lock);
220 return ctx;
221}
222
223static struct gss_cl_ctx *
224gss_alloc_context(void)
225{
226 struct gss_cl_ctx *ctx;
227
228 ctx = kmalloc(sizeof(*ctx), GFP_KERNEL);
229 if (ctx != NULL) {
230 memset(ctx, 0, sizeof(*ctx));
231 ctx->gc_proc = RPC_GSS_PROC_DATA;
232 ctx->gc_seq = 1; /* NetApp 6.4R1 doesn't accept seq. no. 0 */
233 spin_lock_init(&ctx->gc_seq_lock);
234 atomic_set(&ctx->count,1);
235 }
236 return ctx;
237}
238
239#define GSSD_MIN_TIMEOUT (60 * 60)
240static const void *
241gss_fill_context(const void *p, const void *end, struct gss_cl_ctx *ctx, struct gss_api_mech *gm)
242{
243 const void *q;
244 unsigned int seclen;
245 unsigned int timeout;
246 u32 window_size;
247 int ret;
248
249 /* First unsigned int gives the lifetime (in seconds) of the cred */
250 p = simple_get_bytes(p, end, &timeout, sizeof(timeout));
251 if (IS_ERR(p))
252 goto err;
253 if (timeout == 0)
254 timeout = GSSD_MIN_TIMEOUT;
255 ctx->gc_expiry = jiffies + (unsigned long)timeout * HZ * 3 / 4;
256 /* Sequence number window. Determines the maximum number of simultaneous requests */
257 p = simple_get_bytes(p, end, &window_size, sizeof(window_size));
258 if (IS_ERR(p))
259 goto err;
260 ctx->gc_win = window_size;
261 /* gssd signals an error by passing ctx->gc_win = 0: */
262 if (ctx->gc_win == 0) {
263 /* in which case, p points to an error code which we ignore */
264 p = ERR_PTR(-EACCES);
265 goto err;
266 }
267 /* copy the opaque wire context */
268 p = simple_get_netobj(p, end, &ctx->gc_wire_ctx);
269 if (IS_ERR(p))
270 goto err;
271 /* import the opaque security context */
272 p = simple_get_bytes(p, end, &seclen, sizeof(seclen));
273 if (IS_ERR(p))
274 goto err;
275 q = (const void *)((const char *)p + seclen);
276 if (unlikely(q > end || q < p)) {
277 p = ERR_PTR(-EFAULT);
278 goto err;
279 }
280 ret = gss_import_sec_context(p, seclen, gm, &ctx->gc_gss_ctx);
281 if (ret < 0) {
282 p = ERR_PTR(ret);
283 goto err;
284 }
285 return q;
286err:
287 dprintk("RPC: gss_fill_context returning %ld\n", -PTR_ERR(p));
288 return p;
289}
290
291
292struct gss_upcall_msg {
293 atomic_t count;
294 uid_t uid;
295 struct rpc_pipe_msg msg;
296 struct list_head list;
297 struct gss_auth *auth;
298 struct rpc_wait_queue rpc_waitqueue;
299 wait_queue_head_t waitqueue;
300 struct gss_cl_ctx *ctx;
301};
302
303static void
304gss_release_msg(struct gss_upcall_msg *gss_msg)
305{
306 if (!atomic_dec_and_test(&gss_msg->count))
307 return;
308 BUG_ON(!list_empty(&gss_msg->list));
309 if (gss_msg->ctx != NULL)
310 gss_put_ctx(gss_msg->ctx);
311 kfree(gss_msg);
312}
313
314static struct gss_upcall_msg *
315__gss_find_upcall(struct gss_auth *gss_auth, uid_t uid)
316{
317 struct gss_upcall_msg *pos;
318 list_for_each_entry(pos, &gss_auth->upcalls, list) {
319 if (pos->uid != uid)
320 continue;
321 atomic_inc(&pos->count);
322 dprintk("RPC: gss_find_upcall found msg %p\n", pos);
323 return pos;
324 }
325 dprintk("RPC: gss_find_upcall found nothing\n");
326 return NULL;
327}
328
329/* Try to add a upcall to the pipefs queue.
330 * If an upcall owned by our uid already exists, then we return a reference
331 * to that upcall instead of adding the new upcall.
332 */
333static inline struct gss_upcall_msg *
334gss_add_msg(struct gss_auth *gss_auth, struct gss_upcall_msg *gss_msg)
335{
336 struct gss_upcall_msg *old;
337
338 spin_lock(&gss_auth->lock);
339 old = __gss_find_upcall(gss_auth, gss_msg->uid);
340 if (old == NULL) {
341 atomic_inc(&gss_msg->count);
342 list_add(&gss_msg->list, &gss_auth->upcalls);
343 } else
344 gss_msg = old;
345 spin_unlock(&gss_auth->lock);
346 return gss_msg;
347}
348
349static void
350__gss_unhash_msg(struct gss_upcall_msg *gss_msg)
351{
352 if (list_empty(&gss_msg->list))
353 return;
354 list_del_init(&gss_msg->list);
355 rpc_wake_up_status(&gss_msg->rpc_waitqueue, gss_msg->msg.errno);
356 wake_up_all(&gss_msg->waitqueue);
357 atomic_dec(&gss_msg->count);
358}
359
360static void
361gss_unhash_msg(struct gss_upcall_msg *gss_msg)
362{
363 struct gss_auth *gss_auth = gss_msg->auth;
364
365 spin_lock(&gss_auth->lock);
366 __gss_unhash_msg(gss_msg);
367 spin_unlock(&gss_auth->lock);
368}
369
370static void
371gss_upcall_callback(struct rpc_task *task)
372{
373 struct gss_cred *gss_cred = container_of(task->tk_msg.rpc_cred,
374 struct gss_cred, gc_base);
375 struct gss_upcall_msg *gss_msg = gss_cred->gc_upcall;
376
377 BUG_ON(gss_msg == NULL);
378 if (gss_msg->ctx)
379 gss_cred_set_ctx(task->tk_msg.rpc_cred, gss_get_ctx(gss_msg->ctx));
380 else
381 task->tk_status = gss_msg->msg.errno;
382 spin_lock(&gss_msg->auth->lock);
383 gss_cred->gc_upcall = NULL;
384 rpc_wake_up_status(&gss_msg->rpc_waitqueue, gss_msg->msg.errno);
385 spin_unlock(&gss_msg->auth->lock);
386 gss_release_msg(gss_msg);
387}
388
389static inline struct gss_upcall_msg *
390gss_alloc_msg(struct gss_auth *gss_auth, uid_t uid)
391{
392 struct gss_upcall_msg *gss_msg;
393
394 gss_msg = kmalloc(sizeof(*gss_msg), GFP_KERNEL);
395 if (gss_msg != NULL) {
396 memset(gss_msg, 0, sizeof(*gss_msg));
397 INIT_LIST_HEAD(&gss_msg->list);
398 rpc_init_wait_queue(&gss_msg->rpc_waitqueue, "RPCSEC_GSS upcall waitq");
399 init_waitqueue_head(&gss_msg->waitqueue);
400 atomic_set(&gss_msg->count, 1);
401 gss_msg->msg.data = &gss_msg->uid;
402 gss_msg->msg.len = sizeof(gss_msg->uid);
403 gss_msg->uid = uid;
404 gss_msg->auth = gss_auth;
405 }
406 return gss_msg;
407}
408
409static struct gss_upcall_msg *
410gss_setup_upcall(struct rpc_clnt *clnt, struct gss_auth *gss_auth, struct rpc_cred *cred)
411{
412 struct gss_upcall_msg *gss_new, *gss_msg;
413
414 gss_new = gss_alloc_msg(gss_auth, cred->cr_uid);
415 if (gss_new == NULL)
416 return ERR_PTR(-ENOMEM);
417 gss_msg = gss_add_msg(gss_auth, gss_new);
418 if (gss_msg == gss_new) {
419 int res = rpc_queue_upcall(gss_auth->dentry->d_inode, &gss_new->msg);
420 if (res) {
421 gss_unhash_msg(gss_new);
422 gss_msg = ERR_PTR(res);
423 }
424 } else
425 gss_release_msg(gss_new);
426 return gss_msg;
427}
428
429static inline int
430gss_refresh_upcall(struct rpc_task *task)
431{
432 struct rpc_cred *cred = task->tk_msg.rpc_cred;
433 struct gss_auth *gss_auth = container_of(task->tk_client->cl_auth,
434 struct gss_auth, rpc_auth);
435 struct gss_cred *gss_cred = container_of(cred,
436 struct gss_cred, gc_base);
437 struct gss_upcall_msg *gss_msg;
438 int err = 0;
439
440 dprintk("RPC: %4u gss_refresh_upcall for uid %u\n", task->tk_pid, cred->cr_uid);
441 gss_msg = gss_setup_upcall(task->tk_client, gss_auth, cred);
442 if (IS_ERR(gss_msg)) {
443 err = PTR_ERR(gss_msg);
444 goto out;
445 }
446 spin_lock(&gss_auth->lock);
447 if (gss_cred->gc_upcall != NULL)
448 rpc_sleep_on(&gss_cred->gc_upcall->rpc_waitqueue, task, NULL, NULL);
449 else if (gss_msg->ctx == NULL && gss_msg->msg.errno >= 0) {
450 task->tk_timeout = 0;
451 gss_cred->gc_upcall = gss_msg;
452 /* gss_upcall_callback will release the reference to gss_upcall_msg */
453 atomic_inc(&gss_msg->count);
454 rpc_sleep_on(&gss_msg->rpc_waitqueue, task, gss_upcall_callback, NULL);
455 } else
456 err = gss_msg->msg.errno;
457 spin_unlock(&gss_auth->lock);
458 gss_release_msg(gss_msg);
459out:
460 dprintk("RPC: %4u gss_refresh_upcall for uid %u result %d\n", task->tk_pid,
461 cred->cr_uid, err);
462 return err;
463}
464
465static inline int
466gss_create_upcall(struct gss_auth *gss_auth, struct gss_cred *gss_cred)
467{
468 struct rpc_cred *cred = &gss_cred->gc_base;
469 struct gss_upcall_msg *gss_msg;
470 DEFINE_WAIT(wait);
471 int err = 0;
472
473 dprintk("RPC: gss_upcall for uid %u\n", cred->cr_uid);
474 gss_msg = gss_setup_upcall(gss_auth->client, gss_auth, cred);
475 if (IS_ERR(gss_msg)) {
476 err = PTR_ERR(gss_msg);
477 goto out;
478 }
479 for (;;) {
480 prepare_to_wait(&gss_msg->waitqueue, &wait, TASK_INTERRUPTIBLE);
481 spin_lock(&gss_auth->lock);
482 if (gss_msg->ctx != NULL || gss_msg->msg.errno < 0) {
483 spin_unlock(&gss_auth->lock);
484 break;
485 }
486 spin_unlock(&gss_auth->lock);
487 if (signalled()) {
488 err = -ERESTARTSYS;
489 goto out_intr;
490 }
491 schedule();
492 }
493 if (gss_msg->ctx)
494 gss_cred_set_ctx(cred, gss_get_ctx(gss_msg->ctx));
495 else
496 err = gss_msg->msg.errno;
497out_intr:
498 finish_wait(&gss_msg->waitqueue, &wait);
499 gss_release_msg(gss_msg);
500out:
501 dprintk("RPC: gss_create_upcall for uid %u result %d\n", cred->cr_uid, err);
502 return err;
503}
504
505static ssize_t
506gss_pipe_upcall(struct file *filp, struct rpc_pipe_msg *msg,
507 char __user *dst, size_t buflen)
508{
509 char *data = (char *)msg->data + msg->copied;
510 ssize_t mlen = msg->len;
511 ssize_t left;
512
513 if (mlen > buflen)
514 mlen = buflen;
515 left = copy_to_user(dst, data, mlen);
516 if (left < 0) {
517 msg->errno = left;
518 return left;
519 }
520 mlen -= left;
521 msg->copied += mlen;
522 msg->errno = 0;
523 return mlen;
524}
525
526#define MSG_BUF_MAXSIZE 1024
527
528static ssize_t
529gss_pipe_downcall(struct file *filp, const char __user *src, size_t mlen)
530{
531 const void *p, *end;
532 void *buf;
533 struct rpc_clnt *clnt;
534 struct gss_auth *gss_auth;
535 struct rpc_cred *cred;
536 struct gss_upcall_msg *gss_msg;
537 struct gss_cl_ctx *ctx;
538 uid_t uid;
539 int err = -EFBIG;
540
541 if (mlen > MSG_BUF_MAXSIZE)
542 goto out;
543 err = -ENOMEM;
544 buf = kmalloc(mlen, GFP_KERNEL);
545 if (!buf)
546 goto out;
547
548 clnt = RPC_I(filp->f_dentry->d_inode)->private;
549 err = -EFAULT;
550 if (copy_from_user(buf, src, mlen))
551 goto err;
552
553 end = (const void *)((char *)buf + mlen);
554 p = simple_get_bytes(buf, end, &uid, sizeof(uid));
555 if (IS_ERR(p)) {
556 err = PTR_ERR(p);
557 goto err;
558 }
559
560 err = -ENOMEM;
561 ctx = gss_alloc_context();
562 if (ctx == NULL)
563 goto err;
564 err = 0;
565 gss_auth = container_of(clnt->cl_auth, struct gss_auth, rpc_auth);
566 p = gss_fill_context(p, end, ctx, gss_auth->mech);
567 if (IS_ERR(p)) {
568 err = PTR_ERR(p);
569 if (err != -EACCES)
570 goto err_put_ctx;
571 }
572 spin_lock(&gss_auth->lock);
573 gss_msg = __gss_find_upcall(gss_auth, uid);
574 if (gss_msg) {
575 if (err == 0 && gss_msg->ctx == NULL)
576 gss_msg->ctx = gss_get_ctx(ctx);
577 gss_msg->msg.errno = err;
578 __gss_unhash_msg(gss_msg);
579 spin_unlock(&gss_auth->lock);
580 gss_release_msg(gss_msg);
581 } else {
582 struct auth_cred acred = { .uid = uid };
583 spin_unlock(&gss_auth->lock);
584 cred = rpcauth_lookup_credcache(clnt->cl_auth, &acred, 0);
585 if (IS_ERR(cred)) {
586 err = PTR_ERR(cred);
587 goto err_put_ctx;
588 }
589 gss_cred_set_ctx(cred, gss_get_ctx(ctx));
590 }
591 gss_put_ctx(ctx);
592 kfree(buf);
593 dprintk("RPC: gss_pipe_downcall returning length %Zu\n", mlen);
594 return mlen;
595err_put_ctx:
596 gss_put_ctx(ctx);
597err:
598 kfree(buf);
599out:
600 dprintk("RPC: gss_pipe_downcall returning %d\n", err);
601 return err;
602}
603
604static void
605gss_pipe_release(struct inode *inode)
606{
607 struct rpc_inode *rpci = RPC_I(inode);
608 struct rpc_clnt *clnt;
609 struct rpc_auth *auth;
610 struct gss_auth *gss_auth;
611
612 clnt = rpci->private;
613 auth = clnt->cl_auth;
614 gss_auth = container_of(auth, struct gss_auth, rpc_auth);
615 spin_lock(&gss_auth->lock);
616 while (!list_empty(&gss_auth->upcalls)) {
617 struct gss_upcall_msg *gss_msg;
618
619 gss_msg = list_entry(gss_auth->upcalls.next,
620 struct gss_upcall_msg, list);
621 gss_msg->msg.errno = -EPIPE;
622 atomic_inc(&gss_msg->count);
623 __gss_unhash_msg(gss_msg);
624 spin_unlock(&gss_auth->lock);
625 gss_release_msg(gss_msg);
626 spin_lock(&gss_auth->lock);
627 }
628 spin_unlock(&gss_auth->lock);
629}
630
631static void
632gss_pipe_destroy_msg(struct rpc_pipe_msg *msg)
633{
634 struct gss_upcall_msg *gss_msg = container_of(msg, struct gss_upcall_msg, msg);
635 static unsigned long ratelimit;
636
637 if (msg->errno < 0) {
638 dprintk("RPC: gss_pipe_destroy_msg releasing msg %p\n",
639 gss_msg);
640 atomic_inc(&gss_msg->count);
641 gss_unhash_msg(gss_msg);
642 if (msg->errno == -ETIMEDOUT || msg->errno == -EPIPE) {
643 unsigned long now = jiffies;
644 if (time_after(now, ratelimit)) {
645 printk(KERN_WARNING "RPC: AUTH_GSS upcall timed out.\n"
646 "Please check user daemon is running!\n");
647 ratelimit = now + 15*HZ;
648 }
649 }
650 gss_release_msg(gss_msg);
651 }
652}
653
654/*
655 * NOTE: we have the opportunity to use different
656 * parameters based on the input flavor (which must be a pseudoflavor)
657 */
658static struct rpc_auth *
659gss_create(struct rpc_clnt *clnt, rpc_authflavor_t flavor)
660{
661 struct gss_auth *gss_auth;
662 struct rpc_auth * auth;
663
664 dprintk("RPC: creating GSS authenticator for client %p\n",clnt);
665
666 if (!try_module_get(THIS_MODULE))
667 return NULL;
668 if (!(gss_auth = kmalloc(sizeof(*gss_auth), GFP_KERNEL)))
669 goto out_dec;
670 gss_auth->client = clnt;
671 gss_auth->mech = gss_mech_get_by_pseudoflavor(flavor);
672 if (!gss_auth->mech) {
673 printk(KERN_WARNING "%s: Pseudoflavor %d not found!",
674 __FUNCTION__, flavor);
675 goto err_free;
676 }
677 gss_auth->service = gss_pseudoflavor_to_service(gss_auth->mech, flavor);
678 /* FIXME: Will go away once privacy support is merged in */
679 if (gss_auth->service == RPC_GSS_SVC_PRIVACY)
680 gss_auth->service = RPC_GSS_SVC_INTEGRITY;
681 INIT_LIST_HEAD(&gss_auth->upcalls);
682 spin_lock_init(&gss_auth->lock);
683 auth = &gss_auth->rpc_auth;
684 auth->au_cslack = GSS_CRED_SLACK >> 2;
685 auth->au_rslack = GSS_VERF_SLACK >> 2;
686 auth->au_ops = &authgss_ops;
687 auth->au_flavor = flavor;
688 atomic_set(&auth->au_count, 1);
689
690 if (rpcauth_init_credcache(auth, GSS_CRED_EXPIRE) < 0)
691 goto err_put_mech;
692
693 snprintf(gss_auth->path, sizeof(gss_auth->path), "%s/%s",
694 clnt->cl_pathname,
695 gss_auth->mech->gm_name);
696 gss_auth->dentry = rpc_mkpipe(gss_auth->path, clnt, &gss_upcall_ops, RPC_PIPE_WAIT_FOR_OPEN);
697 if (IS_ERR(gss_auth->dentry))
698 goto err_put_mech;
699
700 return auth;
701err_put_mech:
702 gss_mech_put(gss_auth->mech);
703err_free:
704 kfree(gss_auth);
705out_dec:
706 module_put(THIS_MODULE);
707 return NULL;
708}
709
710static void
711gss_destroy(struct rpc_auth *auth)
712{
713 struct gss_auth *gss_auth;
714
715 dprintk("RPC: destroying GSS authenticator %p flavor %d\n",
716 auth, auth->au_flavor);
717
718 gss_auth = container_of(auth, struct gss_auth, rpc_auth);
719 rpc_unlink(gss_auth->path);
720 gss_mech_put(gss_auth->mech);
721
722 rpcauth_free_credcache(auth);
723 kfree(gss_auth);
724 module_put(THIS_MODULE);
725}
726
727/* gss_destroy_cred (and gss_destroy_ctx) are used to clean up after failure
728 * to create a new cred or context, so they check that things have been
729 * allocated before freeing them. */
730static void
731gss_destroy_ctx(struct gss_cl_ctx *ctx)
732{
733 dprintk("RPC: gss_destroy_ctx\n");
734
735 if (ctx->gc_gss_ctx)
736 gss_delete_sec_context(&ctx->gc_gss_ctx);
737
738 kfree(ctx->gc_wire_ctx.data);
739 kfree(ctx);
740}
741
742static void
743gss_destroy_cred(struct rpc_cred *rc)
744{
745 struct gss_cred *cred = container_of(rc, struct gss_cred, gc_base);
746
747 dprintk("RPC: gss_destroy_cred \n");
748
749 if (cred->gc_ctx)
750 gss_put_ctx(cred->gc_ctx);
751 kfree(cred);
752}
753
754/*
755 * Lookup RPCSEC_GSS cred for the current process
756 */
757static struct rpc_cred *
758gss_lookup_cred(struct rpc_auth *auth, struct auth_cred *acred, int taskflags)
759{
760 return rpcauth_lookup_credcache(auth, acred, taskflags);
761}
762
763static struct rpc_cred *
764gss_create_cred(struct rpc_auth *auth, struct auth_cred *acred, int taskflags)
765{
766 struct gss_auth *gss_auth = container_of(auth, struct gss_auth, rpc_auth);
767 struct gss_cred *cred = NULL;
768 int err = -ENOMEM;
769
770 dprintk("RPC: gss_create_cred for uid %d, flavor %d\n",
771 acred->uid, auth->au_flavor);
772
773 if (!(cred = kmalloc(sizeof(*cred), GFP_KERNEL)))
774 goto out_err;
775
776 memset(cred, 0, sizeof(*cred));
777 atomic_set(&cred->gc_count, 1);
778 cred->gc_uid = acred->uid;
779 /*
780 * Note: in order to force a call to call_refresh(), we deliberately
781 * fail to flag the credential as RPCAUTH_CRED_UPTODATE.
782 */
783 cred->gc_flags = 0;
784 cred->gc_base.cr_ops = &gss_credops;
785 cred->gc_service = gss_auth->service;
786 err = gss_create_upcall(gss_auth, cred);
787 if (err < 0)
788 goto out_err;
789
790 return &cred->gc_base;
791
792out_err:
793 dprintk("RPC: gss_create_cred failed with error %d\n", err);
794 if (cred) gss_destroy_cred(&cred->gc_base);
795 return ERR_PTR(err);
796}
797
798static int
799gss_match(struct auth_cred *acred, struct rpc_cred *rc, int taskflags)
800{
801 struct gss_cred *gss_cred = container_of(rc, struct gss_cred, gc_base);
802
803 /* Don't match with creds that have expired. */
804 if (gss_cred->gc_ctx && time_after(jiffies, gss_cred->gc_ctx->gc_expiry))
805 return 0;
806 return (rc->cr_uid == acred->uid);
807}
808
809/*
810* Marshal credentials.
811* Maybe we should keep a cached credential for performance reasons.
812*/
813static u32 *
814gss_marshal(struct rpc_task *task, u32 *p)
815{
816 struct rpc_cred *cred = task->tk_msg.rpc_cred;
817 struct gss_cred *gss_cred = container_of(cred, struct gss_cred,
818 gc_base);
819 struct gss_cl_ctx *ctx = gss_cred_get_ctx(cred);
820 u32 *cred_len;
821 struct rpc_rqst *req = task->tk_rqstp;
822 u32 maj_stat = 0;
823 struct xdr_netobj mic;
824 struct kvec iov;
825 struct xdr_buf verf_buf;
826
827 dprintk("RPC: %4u gss_marshal\n", task->tk_pid);
828
829 *p++ = htonl(RPC_AUTH_GSS);
830 cred_len = p++;
831
832 spin_lock(&ctx->gc_seq_lock);
833 req->rq_seqno = ctx->gc_seq++;
834 spin_unlock(&ctx->gc_seq_lock);
835
836 *p++ = htonl((u32) RPC_GSS_VERSION);
837 *p++ = htonl((u32) ctx->gc_proc);
838 *p++ = htonl((u32) req->rq_seqno);
839 *p++ = htonl((u32) gss_cred->gc_service);
840 p = xdr_encode_netobj(p, &ctx->gc_wire_ctx);
841 *cred_len = htonl((p - (cred_len + 1)) << 2);
842
843 /* We compute the checksum for the verifier over the xdr-encoded bytes
844 * starting with the xid and ending at the end of the credential: */
845 iov.iov_base = req->rq_snd_buf.head[0].iov_base;
846 if (task->tk_client->cl_xprt->stream)
847 /* See clnt.c:call_header() */
848 iov.iov_base += 4;
849 iov.iov_len = (u8 *)p - (u8 *)iov.iov_base;
850 xdr_buf_from_iov(&iov, &verf_buf);
851
852 /* set verifier flavor*/
853 *p++ = htonl(RPC_AUTH_GSS);
854
855 mic.data = (u8 *)(p + 1);
856 maj_stat = gss_get_mic(ctx->gc_gss_ctx,
857 GSS_C_QOP_DEFAULT,
858 &verf_buf, &mic);
859 if (maj_stat == GSS_S_CONTEXT_EXPIRED) {
860 cred->cr_flags &= ~RPCAUTH_CRED_UPTODATE;
861 } else if (maj_stat != 0) {
862 printk("gss_marshal: gss_get_mic FAILED (%d)\n", maj_stat);
863 goto out_put_ctx;
864 }
865 p = xdr_encode_opaque(p, NULL, mic.len);
866 gss_put_ctx(ctx);
867 return p;
868out_put_ctx:
869 gss_put_ctx(ctx);
870 return NULL;
871}
872
873/*
874* Refresh credentials. XXX - finish
875*/
876static int
877gss_refresh(struct rpc_task *task)
878{
879
880 if (!gss_cred_is_uptodate_ctx(task->tk_msg.rpc_cred))
881 return gss_refresh_upcall(task);
882 return 0;
883}
884
885static u32 *
886gss_validate(struct rpc_task *task, u32 *p)
887{
888 struct rpc_cred *cred = task->tk_msg.rpc_cred;
889 struct gss_cred *gss_cred = container_of(cred, struct gss_cred,
890 gc_base);
891 struct gss_cl_ctx *ctx = gss_cred_get_ctx(cred);
892 u32 seq, qop_state;
893 struct kvec iov;
894 struct xdr_buf verf_buf;
895 struct xdr_netobj mic;
896 u32 flav,len;
897 u32 maj_stat;
898
899 dprintk("RPC: %4u gss_validate\n", task->tk_pid);
900
901 flav = ntohl(*p++);
902 if ((len = ntohl(*p++)) > RPC_MAX_AUTH_SIZE)
903 goto out_bad;
904 if (flav != RPC_AUTH_GSS)
905 goto out_bad;
906 seq = htonl(task->tk_rqstp->rq_seqno);
907 iov.iov_base = &seq;
908 iov.iov_len = sizeof(seq);
909 xdr_buf_from_iov(&iov, &verf_buf);
910 mic.data = (u8 *)p;
911 mic.len = len;
912
913 maj_stat = gss_verify_mic(ctx->gc_gss_ctx, &verf_buf, &mic, &qop_state);
914 if (maj_stat == GSS_S_CONTEXT_EXPIRED)
915 cred->cr_flags &= ~RPCAUTH_CRED_UPTODATE;
916 if (maj_stat)
917 goto out_bad;
918 switch (gss_cred->gc_service) {
919 case RPC_GSS_SVC_NONE:
920 /* verifier data, flavor, length: */
921 task->tk_auth->au_rslack = XDR_QUADLEN(len) + 2;
922 break;
923 case RPC_GSS_SVC_INTEGRITY:
924 /* verifier data, flavor, length, length, sequence number: */
925 task->tk_auth->au_rslack = XDR_QUADLEN(len) + 4;
926 break;
927 case RPC_GSS_SVC_PRIVACY:
928 goto out_bad;
929 }
930 gss_put_ctx(ctx);
931 dprintk("RPC: %4u GSS gss_validate: gss_verify_mic succeeded.\n",
932 task->tk_pid);
933 return p + XDR_QUADLEN(len);
934out_bad:
935 gss_put_ctx(ctx);
936 dprintk("RPC: %4u gss_validate failed.\n", task->tk_pid);
937 return NULL;
938}
939
940static inline int
941gss_wrap_req_integ(struct rpc_cred *cred, struct gss_cl_ctx *ctx,
942 kxdrproc_t encode, struct rpc_rqst *rqstp, u32 *p, void *obj)
943{
944 struct xdr_buf *snd_buf = &rqstp->rq_snd_buf;
945 struct xdr_buf integ_buf;
946 u32 *integ_len = NULL;
947 struct xdr_netobj mic;
948 u32 offset, *q;
949 struct kvec *iov;
950 u32 maj_stat = 0;
951 int status = -EIO;
952
953 integ_len = p++;
954 offset = (u8 *)p - (u8 *)snd_buf->head[0].iov_base;
955 *p++ = htonl(rqstp->rq_seqno);
956
957 status = encode(rqstp, p, obj);
958 if (status)
959 return status;
960
961 if (xdr_buf_subsegment(snd_buf, &integ_buf,
962 offset, snd_buf->len - offset))
963 return status;
964 *integ_len = htonl(integ_buf.len);
965
966 /* guess whether we're in the head or the tail: */
967 if (snd_buf->page_len || snd_buf->tail[0].iov_len)
968 iov = snd_buf->tail;
969 else
970 iov = snd_buf->head;
971 p = iov->iov_base + iov->iov_len;
972 mic.data = (u8 *)(p + 1);
973
974 maj_stat = gss_get_mic(ctx->gc_gss_ctx,
975 GSS_C_QOP_DEFAULT, &integ_buf, &mic);
976 status = -EIO; /* XXX? */
977 if (maj_stat == GSS_S_CONTEXT_EXPIRED)
978 cred->cr_flags &= ~RPCAUTH_CRED_UPTODATE;
979 else if (maj_stat)
980 return status;
981 q = xdr_encode_opaque(p, NULL, mic.len);
982
983 offset = (u8 *)q - (u8 *)p;
984 iov->iov_len += offset;
985 snd_buf->len += offset;
986 return 0;
987}
988
989static int
990gss_wrap_req(struct rpc_task *task,
991 kxdrproc_t encode, void *rqstp, u32 *p, void *obj)
992{
993 struct rpc_cred *cred = task->tk_msg.rpc_cred;
994 struct gss_cred *gss_cred = container_of(cred, struct gss_cred,
995 gc_base);
996 struct gss_cl_ctx *ctx = gss_cred_get_ctx(cred);
997 int status = -EIO;
998
999 dprintk("RPC: %4u gss_wrap_req\n", task->tk_pid);
1000 if (ctx->gc_proc != RPC_GSS_PROC_DATA) {
1001 /* The spec seems a little ambiguous here, but I think that not
1002 * wrapping context destruction requests makes the most sense.
1003 */
1004 status = encode(rqstp, p, obj);
1005 goto out;
1006 }
1007 switch (gss_cred->gc_service) {
1008 case RPC_GSS_SVC_NONE:
1009 status = encode(rqstp, p, obj);
1010 break;
1011 case RPC_GSS_SVC_INTEGRITY:
1012 status = gss_wrap_req_integ(cred, ctx, encode,
1013 rqstp, p, obj);
1014 break;
1015 case RPC_GSS_SVC_PRIVACY:
1016 break;
1017 }
1018out:
1019 gss_put_ctx(ctx);
1020 dprintk("RPC: %4u gss_wrap_req returning %d\n", task->tk_pid, status);
1021 return status;
1022}
1023
1024static inline int
1025gss_unwrap_resp_integ(struct rpc_cred *cred, struct gss_cl_ctx *ctx,
1026 struct rpc_rqst *rqstp, u32 **p)
1027{
1028 struct xdr_buf *rcv_buf = &rqstp->rq_rcv_buf;
1029 struct xdr_buf integ_buf;
1030 struct xdr_netobj mic;
1031 u32 data_offset, mic_offset;
1032 u32 integ_len;
1033 u32 maj_stat;
1034 int status = -EIO;
1035
1036 integ_len = ntohl(*(*p)++);
1037 if (integ_len & 3)
1038 return status;
1039 data_offset = (u8 *)(*p) - (u8 *)rcv_buf->head[0].iov_base;
1040 mic_offset = integ_len + data_offset;
1041 if (mic_offset > rcv_buf->len)
1042 return status;
1043 if (ntohl(*(*p)++) != rqstp->rq_seqno)
1044 return status;
1045
1046 if (xdr_buf_subsegment(rcv_buf, &integ_buf, data_offset,
1047 mic_offset - data_offset))
1048 return status;
1049
1050 if (xdr_buf_read_netobj(rcv_buf, &mic, mic_offset))
1051 return status;
1052
1053 maj_stat = gss_verify_mic(ctx->gc_gss_ctx, &integ_buf,
1054 &mic, NULL);
1055 if (maj_stat == GSS_S_CONTEXT_EXPIRED)
1056 cred->cr_flags &= ~RPCAUTH_CRED_UPTODATE;
1057 if (maj_stat != GSS_S_COMPLETE)
1058 return status;
1059 return 0;
1060}
1061
1062static int
1063gss_unwrap_resp(struct rpc_task *task,
1064 kxdrproc_t decode, void *rqstp, u32 *p, void *obj)
1065{
1066 struct rpc_cred *cred = task->tk_msg.rpc_cred;
1067 struct gss_cred *gss_cred = container_of(cred, struct gss_cred,
1068 gc_base);
1069 struct gss_cl_ctx *ctx = gss_cred_get_ctx(cred);
1070 int status = -EIO;
1071
1072 if (ctx->gc_proc != RPC_GSS_PROC_DATA)
1073 goto out_decode;
1074 switch (gss_cred->gc_service) {
1075 case RPC_GSS_SVC_NONE:
1076 break;
1077 case RPC_GSS_SVC_INTEGRITY:
1078 status = gss_unwrap_resp_integ(cred, ctx, rqstp, &p);
1079 if (status)
1080 goto out;
1081 break;
1082 case RPC_GSS_SVC_PRIVACY:
1083 break;
1084 }
1085out_decode:
1086 status = decode(rqstp, p, obj);
1087out:
1088 gss_put_ctx(ctx);
1089 dprintk("RPC: %4u gss_unwrap_resp returning %d\n", task->tk_pid,
1090 status);
1091 return status;
1092}
1093
1094static struct rpc_authops authgss_ops = {
1095 .owner = THIS_MODULE,
1096 .au_flavor = RPC_AUTH_GSS,
1097#ifdef RPC_DEBUG
1098 .au_name = "RPCSEC_GSS",
1099#endif
1100 .create = gss_create,
1101 .destroy = gss_destroy,
1102 .lookup_cred = gss_lookup_cred,
1103 .crcreate = gss_create_cred
1104};
1105
1106static struct rpc_credops gss_credops = {
1107 .cr_name = "AUTH_GSS",
1108 .crdestroy = gss_destroy_cred,
1109 .crmatch = gss_match,
1110 .crmarshal = gss_marshal,
1111 .crrefresh = gss_refresh,
1112 .crvalidate = gss_validate,
1113 .crwrap_req = gss_wrap_req,
1114 .crunwrap_resp = gss_unwrap_resp,
1115};
1116
1117static struct rpc_pipe_ops gss_upcall_ops = {
1118 .upcall = gss_pipe_upcall,
1119 .downcall = gss_pipe_downcall,
1120 .destroy_msg = gss_pipe_destroy_msg,
1121 .release_pipe = gss_pipe_release,
1122};
1123
1124/*
1125 * Initialize RPCSEC_GSS module
1126 */
1127static int __init init_rpcsec_gss(void)
1128{
1129 int err = 0;
1130
1131 err = rpcauth_register(&authgss_ops);
1132 if (err)
1133 goto out;
1134 err = gss_svc_init();
1135 if (err)
1136 goto out_unregister;
1137 return 0;
1138out_unregister:
1139 rpcauth_unregister(&authgss_ops);
1140out:
1141 return err;
1142}
1143
1144static void __exit exit_rpcsec_gss(void)
1145{
1146 gss_svc_shutdown();
1147 rpcauth_unregister(&authgss_ops);
1148}
1149
1150MODULE_LICENSE("GPL");
1151module_init(init_rpcsec_gss)
1152module_exit(exit_rpcsec_gss)
diff --git a/net/sunrpc/auth_gss/gss_generic_token.c b/net/sunrpc/auth_gss/gss_generic_token.c
new file mode 100644
index 000000000000..826df44e7fca
--- /dev/null
+++ b/net/sunrpc/auth_gss/gss_generic_token.c
@@ -0,0 +1,235 @@
1/*
2 * linux/net/sunrpc/gss_generic_token.c
3 *
4 * Adapted from MIT Kerberos 5-1.2.1 lib/gssapi/generic/util_token.c
5 *
6 * Copyright (c) 2000 The Regents of the University of Michigan.
7 * All rights reserved.
8 *
9 * Andy Adamson <andros@umich.edu>
10 */
11
12/*
13 * Copyright 1993 by OpenVision Technologies, Inc.
14 *
15 * Permission to use, copy, modify, distribute, and sell this software
16 * and its documentation for any purpose is hereby granted without fee,
17 * provided that the above copyright notice appears in all copies and
18 * that both that copyright notice and this permission notice appear in
19 * supporting documentation, and that the name of OpenVision not be used
20 * in advertising or publicity pertaining to distribution of the software
21 * without specific, written prior permission. OpenVision makes no
22 * representations about the suitability of this software for any
23 * purpose. It is provided "as is" without express or implied warranty.
24 *
25 * OPENVISION DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
26 * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO
27 * EVENT SHALL OPENVISION BE LIABLE FOR ANY SPECIAL, INDIRECT OR
28 * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF
29 * USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR
30 * OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
31 * PERFORMANCE OF THIS SOFTWARE.
32 */
33
34#include <linux/types.h>
35#include <linux/module.h>
36#include <linux/slab.h>
37#include <linux/string.h>
38#include <linux/sunrpc/sched.h>
39#include <linux/sunrpc/gss_asn1.h>
40
41
42#ifdef RPC_DEBUG
43# define RPCDBG_FACILITY RPCDBG_AUTH
44#endif
45
46
47/* TWRITE_STR from gssapiP_generic.h */
48#define TWRITE_STR(ptr, str, len) \
49 memcpy((ptr), (char *) (str), (len)); \
50 (ptr) += (len);
51
52/* XXXX this code currently makes the assumption that a mech oid will
53 never be longer than 127 bytes. This assumption is not inherent in
54 the interfaces, so the code can be fixed if the OSI namespace
55 balloons unexpectedly. */
56
57/* Each token looks like this:
58
590x60 tag for APPLICATION 0, SEQUENCE
60 (constructed, definite-length)
61 <length> possible multiple bytes, need to parse/generate
62 0x06 tag for OBJECT IDENTIFIER
63 <moid_length> compile-time constant string (assume 1 byte)
64 <moid_bytes> compile-time constant string
65 <inner_bytes> the ANY containing the application token
66 bytes 0,1 are the token type
67 bytes 2,n are the token data
68
69For the purposes of this abstraction, the token "header" consists of
70the sequence tag and length octets, the mech OID DER encoding, and the
71first two inner bytes, which indicate the token type. The token
72"body" consists of everything else.
73
74*/
75
76static int
77der_length_size( int length)
78{
79 if (length < (1<<7))
80 return(1);
81 else if (length < (1<<8))
82 return(2);
83#if (SIZEOF_INT == 2)
84 else
85 return(3);
86#else
87 else if (length < (1<<16))
88 return(3);
89 else if (length < (1<<24))
90 return(4);
91 else
92 return(5);
93#endif
94}
95
96static void
97der_write_length(unsigned char **buf, int length)
98{
99 if (length < (1<<7)) {
100 *(*buf)++ = (unsigned char) length;
101 } else {
102 *(*buf)++ = (unsigned char) (der_length_size(length)+127);
103#if (SIZEOF_INT > 2)
104 if (length >= (1<<24))
105 *(*buf)++ = (unsigned char) (length>>24);
106 if (length >= (1<<16))
107 *(*buf)++ = (unsigned char) ((length>>16)&0xff);
108#endif
109 if (length >= (1<<8))
110 *(*buf)++ = (unsigned char) ((length>>8)&0xff);
111 *(*buf)++ = (unsigned char) (length&0xff);
112 }
113}
114
115/* returns decoded length, or < 0 on failure. Advances buf and
116 decrements bufsize */
117
118static int
119der_read_length(unsigned char **buf, int *bufsize)
120{
121 unsigned char sf;
122 int ret;
123
124 if (*bufsize < 1)
125 return(-1);
126 sf = *(*buf)++;
127 (*bufsize)--;
128 if (sf & 0x80) {
129 if ((sf &= 0x7f) > ((*bufsize)-1))
130 return(-1);
131 if (sf > SIZEOF_INT)
132 return (-1);
133 ret = 0;
134 for (; sf; sf--) {
135 ret = (ret<<8) + (*(*buf)++);
136 (*bufsize)--;
137 }
138 } else {
139 ret = sf;
140 }
141
142 return(ret);
143}
144
145/* returns the length of a token, given the mech oid and the body size */
146
147int
148g_token_size(struct xdr_netobj *mech, unsigned int body_size)
149{
150 /* set body_size to sequence contents size */
151 body_size += 4 + (int) mech->len; /* NEED overflow check */
152 return(1 + der_length_size(body_size) + body_size);
153}
154
155EXPORT_SYMBOL(g_token_size);
156
157/* fills in a buffer with the token header. The buffer is assumed to
158 be the right size. buf is advanced past the token header */
159
160void
161g_make_token_header(struct xdr_netobj *mech, int body_size, unsigned char **buf)
162{
163 *(*buf)++ = 0x60;
164 der_write_length(buf, 4 + mech->len + body_size);
165 *(*buf)++ = 0x06;
166 *(*buf)++ = (unsigned char) mech->len;
167 TWRITE_STR(*buf, mech->data, ((int) mech->len));
168}
169
170EXPORT_SYMBOL(g_make_token_header);
171
172/*
173 * Given a buffer containing a token, reads and verifies the token,
174 * leaving buf advanced past the token header, and setting body_size
175 * to the number of remaining bytes. Returns 0 on success,
176 * G_BAD_TOK_HEADER for a variety of errors, and G_WRONG_MECH if the
177 * mechanism in the token does not match the mech argument. buf and
178 * *body_size are left unmodified on error.
179 */
180u32
181g_verify_token_header(struct xdr_netobj *mech, int *body_size,
182 unsigned char **buf_in, int toksize)
183{
184 unsigned char *buf = *buf_in;
185 int seqsize;
186 struct xdr_netobj toid;
187 int ret = 0;
188
189 if ((toksize-=1) < 0)
190 return(G_BAD_TOK_HEADER);
191 if (*buf++ != 0x60)
192 return(G_BAD_TOK_HEADER);
193
194 if ((seqsize = der_read_length(&buf, &toksize)) < 0)
195 return(G_BAD_TOK_HEADER);
196
197 if (seqsize != toksize)
198 return(G_BAD_TOK_HEADER);
199
200 if ((toksize-=1) < 0)
201 return(G_BAD_TOK_HEADER);
202 if (*buf++ != 0x06)
203 return(G_BAD_TOK_HEADER);
204
205 if ((toksize-=1) < 0)
206 return(G_BAD_TOK_HEADER);
207 toid.len = *buf++;
208
209 if ((toksize-=toid.len) < 0)
210 return(G_BAD_TOK_HEADER);
211 toid.data = buf;
212 buf+=toid.len;
213
214 if (! g_OID_equal(&toid, mech))
215 ret = G_WRONG_MECH;
216
217 /* G_WRONG_MECH is not returned immediately because it's more important
218 to return G_BAD_TOK_HEADER if the token header is in fact bad */
219
220 if ((toksize-=2) < 0)
221 return(G_BAD_TOK_HEADER);
222
223 if (ret)
224 return(ret);
225
226 if (!ret) {
227 *buf_in = buf;
228 *body_size = toksize;
229 }
230
231 return(ret);
232}
233
234EXPORT_SYMBOL(g_verify_token_header);
235
diff --git a/net/sunrpc/auth_gss/gss_krb5_crypto.c b/net/sunrpc/auth_gss/gss_krb5_crypto.c
new file mode 100644
index 000000000000..24c21f2a33a7
--- /dev/null
+++ b/net/sunrpc/auth_gss/gss_krb5_crypto.c
@@ -0,0 +1,209 @@
1/*
2 * linux/net/sunrpc/gss_krb5_crypto.c
3 *
4 * Copyright (c) 2000 The Regents of the University of Michigan.
5 * All rights reserved.
6 *
7 * Andy Adamson <andros@umich.edu>
8 * Bruce Fields <bfields@umich.edu>
9 */
10
11/*
12 * Copyright (C) 1998 by the FundsXpress, INC.
13 *
14 * All rights reserved.
15 *
16 * Export of this software from the United States of America may require
17 * a specific license from the United States Government. It is the
18 * responsibility of any person or organization contemplating export to
19 * obtain such a license before exporting.
20 *
21 * WITHIN THAT CONSTRAINT, permission to use, copy, modify, and
22 * distribute this software and its documentation for any purpose and
23 * without fee is hereby granted, provided that the above copyright
24 * notice appear in all copies and that both that copyright notice and
25 * this permission notice appear in supporting documentation, and that
26 * the name of FundsXpress. not be used in advertising or publicity pertaining
27 * to distribution of the software without specific, written prior
28 * permission. FundsXpress makes no representations about the suitability of
29 * this software for any purpose. It is provided "as is" without express
30 * or implied warranty.
31 *
32 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR
33 * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
34 * WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
35 */
36
37#include <linux/types.h>
38#include <linux/mm.h>
39#include <linux/slab.h>
40#include <asm/scatterlist.h>
41#include <linux/crypto.h>
42#include <linux/highmem.h>
43#include <linux/pagemap.h>
44#include <linux/sunrpc/gss_krb5.h>
45
46#ifdef RPC_DEBUG
47# define RPCDBG_FACILITY RPCDBG_AUTH
48#endif
49
50u32
51krb5_encrypt(
52 struct crypto_tfm *tfm,
53 void * iv,
54 void * in,
55 void * out,
56 int length)
57{
58 u32 ret = -EINVAL;
59 struct scatterlist sg[1];
60 u8 local_iv[16] = {0};
61
62 dprintk("RPC: krb5_encrypt: input data:\n");
63 print_hexl((u32 *)in, length, 0);
64
65 if (length % crypto_tfm_alg_blocksize(tfm) != 0)
66 goto out;
67
68 if (crypto_tfm_alg_ivsize(tfm) > 16) {
69 dprintk("RPC: gss_k5encrypt: tfm iv size to large %d\n",
70 crypto_tfm_alg_ivsize(tfm));
71 goto out;
72 }
73
74 if (iv)
75 memcpy(local_iv, iv, crypto_tfm_alg_ivsize(tfm));
76
77 memcpy(out, in, length);
78 sg[0].page = virt_to_page(out);
79 sg[0].offset = offset_in_page(out);
80 sg[0].length = length;
81
82 ret = crypto_cipher_encrypt_iv(tfm, sg, sg, length, local_iv);
83
84 dprintk("RPC: krb5_encrypt: output data:\n");
85 print_hexl((u32 *)out, length, 0);
86out:
87 dprintk("RPC: krb5_encrypt returns %d\n",ret);
88 return(ret);
89}
90
91EXPORT_SYMBOL(krb5_encrypt);
92
93u32
94krb5_decrypt(
95 struct crypto_tfm *tfm,
96 void * iv,
97 void * in,
98 void * out,
99 int length)
100{
101 u32 ret = -EINVAL;
102 struct scatterlist sg[1];
103 u8 local_iv[16] = {0};
104
105 dprintk("RPC: krb5_decrypt: input data:\n");
106 print_hexl((u32 *)in, length, 0);
107
108 if (length % crypto_tfm_alg_blocksize(tfm) != 0)
109 goto out;
110
111 if (crypto_tfm_alg_ivsize(tfm) > 16) {
112 dprintk("RPC: gss_k5decrypt: tfm iv size to large %d\n",
113 crypto_tfm_alg_ivsize(tfm));
114 goto out;
115 }
116 if (iv)
117 memcpy(local_iv,iv, crypto_tfm_alg_ivsize(tfm));
118
119 memcpy(out, in, length);
120 sg[0].page = virt_to_page(out);
121 sg[0].offset = offset_in_page(out);
122 sg[0].length = length;
123
124 ret = crypto_cipher_decrypt_iv(tfm, sg, sg, length, local_iv);
125
126 dprintk("RPC: krb5_decrypt: output_data:\n");
127 print_hexl((u32 *)out, length, 0);
128out:
129 dprintk("RPC: gss_k5decrypt returns %d\n",ret);
130 return(ret);
131}
132
133EXPORT_SYMBOL(krb5_decrypt);
134
135static void
136buf_to_sg(struct scatterlist *sg, char *ptr, int len) {
137 sg->page = virt_to_page(ptr);
138 sg->offset = offset_in_page(ptr);
139 sg->length = len;
140}
141
142/* checksum the plaintext data and hdrlen bytes of the token header */
143s32
144make_checksum(s32 cksumtype, char *header, int hdrlen, struct xdr_buf *body,
145 struct xdr_netobj *cksum)
146{
147 char *cksumname;
148 struct crypto_tfm *tfm = NULL; /* XXX add to ctx? */
149 struct scatterlist sg[1];
150 u32 code = GSS_S_FAILURE;
151 int len, thislen, offset;
152 int i;
153
154 switch (cksumtype) {
155 case CKSUMTYPE_RSA_MD5:
156 cksumname = "md5";
157 break;
158 default:
159 dprintk("RPC: krb5_make_checksum:"
160 " unsupported checksum %d", cksumtype);
161 goto out;
162 }
163 if (!(tfm = crypto_alloc_tfm(cksumname, 0)))
164 goto out;
165 cksum->len = crypto_tfm_alg_digestsize(tfm);
166 if ((cksum->data = kmalloc(cksum->len, GFP_KERNEL)) == NULL)
167 goto out;
168
169 crypto_digest_init(tfm);
170 buf_to_sg(sg, header, hdrlen);
171 crypto_digest_update(tfm, sg, 1);
172 if (body->head[0].iov_len) {
173 buf_to_sg(sg, body->head[0].iov_base, body->head[0].iov_len);
174 crypto_digest_update(tfm, sg, 1);
175 }
176
177 len = body->page_len;
178 if (len != 0) {
179 offset = body->page_base & (PAGE_CACHE_SIZE - 1);
180 i = body->page_base >> PAGE_CACHE_SHIFT;
181 thislen = PAGE_CACHE_SIZE - offset;
182 do {
183 if (thislen > len)
184 thislen = len;
185 sg->page = body->pages[i];
186 sg->offset = offset;
187 sg->length = thislen;
188 kmap(sg->page); /* XXX kmap_atomic? */
189 crypto_digest_update(tfm, sg, 1);
190 kunmap(sg->page);
191 len -= thislen;
192 i++;
193 offset = 0;
194 thislen = PAGE_CACHE_SIZE;
195 } while(len != 0);
196 }
197 if (body->tail[0].iov_len) {
198 buf_to_sg(sg, body->tail[0].iov_base, body->tail[0].iov_len);
199 crypto_digest_update(tfm, sg, 1);
200 }
201 crypto_digest_final(tfm, cksum->data);
202 code = 0;
203out:
204 if (tfm)
205 crypto_free_tfm(tfm);
206 return code;
207}
208
209EXPORT_SYMBOL(make_checksum);
diff --git a/net/sunrpc/auth_gss/gss_krb5_mech.c b/net/sunrpc/auth_gss/gss_krb5_mech.c
new file mode 100644
index 000000000000..cf726510df8e
--- /dev/null
+++ b/net/sunrpc/auth_gss/gss_krb5_mech.c
@@ -0,0 +1,275 @@
1/*
2 * linux/net/sunrpc/gss_krb5_mech.c
3 *
4 * Copyright (c) 2001 The Regents of the University of Michigan.
5 * All rights reserved.
6 *
7 * Andy Adamson <andros@umich.edu>
8 * J. Bruce Fields <bfields@umich.edu>
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 *
14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution.
19 * 3. Neither the name of the University nor the names of its
20 * contributors may be used to endorse or promote products derived
21 * from this software without specific prior written permission.
22 *
23 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
24 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
25 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
26 * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
28 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
29 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
30 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
31 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
32 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
33 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34 *
35 */
36
37#include <linux/module.h>
38#include <linux/init.h>
39#include <linux/types.h>
40#include <linux/slab.h>
41#include <linux/sunrpc/auth.h>
42#include <linux/in.h>
43#include <linux/sunrpc/gss_krb5.h>
44#include <linux/sunrpc/xdr.h>
45#include <linux/crypto.h>
46
47#ifdef RPC_DEBUG
48# define RPCDBG_FACILITY RPCDBG_AUTH
49#endif
50
51static const void *
52simple_get_bytes(const void *p, const void *end, void *res, int len)
53{
54 const void *q = (const void *)((const char *)p + len);
55 if (unlikely(q > end || q < p))
56 return ERR_PTR(-EFAULT);
57 memcpy(res, p, len);
58 return q;
59}
60
61static const void *
62simple_get_netobj(const void *p, const void *end, struct xdr_netobj *res)
63{
64 const void *q;
65 unsigned int len;
66
67 p = simple_get_bytes(p, end, &len, sizeof(len));
68 if (IS_ERR(p))
69 return p;
70 q = (const void *)((const char *)p + len);
71 if (unlikely(q > end || q < p))
72 return ERR_PTR(-EFAULT);
73 res->data = kmalloc(len, GFP_KERNEL);
74 if (unlikely(res->data == NULL))
75 return ERR_PTR(-ENOMEM);
76 memcpy(res->data, p, len);
77 res->len = len;
78 return q;
79}
80
81static inline const void *
82get_key(const void *p, const void *end, struct crypto_tfm **res)
83{
84 struct xdr_netobj key;
85 int alg, alg_mode;
86 char *alg_name;
87
88 p = simple_get_bytes(p, end, &alg, sizeof(alg));
89 if (IS_ERR(p))
90 goto out_err;
91 p = simple_get_netobj(p, end, &key);
92 if (IS_ERR(p))
93 goto out_err;
94
95 switch (alg) {
96 case ENCTYPE_DES_CBC_RAW:
97 alg_name = "des";
98 alg_mode = CRYPTO_TFM_MODE_CBC;
99 break;
100 default:
101 dprintk("RPC: get_key: unsupported algorithm %d\n", alg);
102 goto out_err_free_key;
103 }
104 if (!(*res = crypto_alloc_tfm(alg_name, alg_mode)))
105 goto out_err_free_key;
106 if (crypto_cipher_setkey(*res, key.data, key.len))
107 goto out_err_free_tfm;
108
109 kfree(key.data);
110 return p;
111
112out_err_free_tfm:
113 crypto_free_tfm(*res);
114out_err_free_key:
115 kfree(key.data);
116 p = ERR_PTR(-EINVAL);
117out_err:
118 return p;
119}
120
121static int
122gss_import_sec_context_kerberos(const void *p,
123 size_t len,
124 struct gss_ctx *ctx_id)
125{
126 const void *end = (const void *)((const char *)p + len);
127 struct krb5_ctx *ctx;
128
129 if (!(ctx = kmalloc(sizeof(*ctx), GFP_KERNEL)))
130 goto out_err;
131 memset(ctx, 0, sizeof(*ctx));
132
133 p = simple_get_bytes(p, end, &ctx->initiate, sizeof(ctx->initiate));
134 if (IS_ERR(p))
135 goto out_err_free_ctx;
136 p = simple_get_bytes(p, end, &ctx->seed_init, sizeof(ctx->seed_init));
137 if (IS_ERR(p))
138 goto out_err_free_ctx;
139 p = simple_get_bytes(p, end, ctx->seed, sizeof(ctx->seed));
140 if (IS_ERR(p))
141 goto out_err_free_ctx;
142 p = simple_get_bytes(p, end, &ctx->signalg, sizeof(ctx->signalg));
143 if (IS_ERR(p))
144 goto out_err_free_ctx;
145 p = simple_get_bytes(p, end, &ctx->sealalg, sizeof(ctx->sealalg));
146 if (IS_ERR(p))
147 goto out_err_free_ctx;
148 p = simple_get_bytes(p, end, &ctx->endtime, sizeof(ctx->endtime));
149 if (IS_ERR(p))
150 goto out_err_free_ctx;
151 p = simple_get_bytes(p, end, &ctx->seq_send, sizeof(ctx->seq_send));
152 if (IS_ERR(p))
153 goto out_err_free_ctx;
154 p = simple_get_netobj(p, end, &ctx->mech_used);
155 if (IS_ERR(p))
156 goto out_err_free_ctx;
157 p = get_key(p, end, &ctx->enc);
158 if (IS_ERR(p))
159 goto out_err_free_mech;
160 p = get_key(p, end, &ctx->seq);
161 if (IS_ERR(p))
162 goto out_err_free_key1;
163 if (p != end) {
164 p = ERR_PTR(-EFAULT);
165 goto out_err_free_key2;
166 }
167
168 ctx_id->internal_ctx_id = ctx;
169 dprintk("RPC: Succesfully imported new context.\n");
170 return 0;
171
172out_err_free_key2:
173 crypto_free_tfm(ctx->seq);
174out_err_free_key1:
175 crypto_free_tfm(ctx->enc);
176out_err_free_mech:
177 kfree(ctx->mech_used.data);
178out_err_free_ctx:
179 kfree(ctx);
180out_err:
181 return PTR_ERR(p);
182}
183
184static void
185gss_delete_sec_context_kerberos(void *internal_ctx) {
186 struct krb5_ctx *kctx = internal_ctx;
187
188 if (kctx->seq)
189 crypto_free_tfm(kctx->seq);
190 if (kctx->enc)
191 crypto_free_tfm(kctx->enc);
192 if (kctx->mech_used.data)
193 kfree(kctx->mech_used.data);
194 kfree(kctx);
195}
196
197static u32
198gss_verify_mic_kerberos(struct gss_ctx *ctx,
199 struct xdr_buf *message,
200 struct xdr_netobj *mic_token,
201 u32 *qstate) {
202 u32 maj_stat = 0;
203 int qop_state;
204 struct krb5_ctx *kctx = ctx->internal_ctx_id;
205
206 maj_stat = krb5_read_token(kctx, mic_token, message, &qop_state,
207 KG_TOK_MIC_MSG);
208 if (!maj_stat && qop_state)
209 *qstate = qop_state;
210
211 dprintk("RPC: gss_verify_mic_kerberos returning %d\n", maj_stat);
212 return maj_stat;
213}
214
215static u32
216gss_get_mic_kerberos(struct gss_ctx *ctx,
217 u32 qop,
218 struct xdr_buf *message,
219 struct xdr_netobj *mic_token) {
220 u32 err = 0;
221 struct krb5_ctx *kctx = ctx->internal_ctx_id;
222
223 err = krb5_make_token(kctx, qop, message, mic_token, KG_TOK_MIC_MSG);
224
225 dprintk("RPC: gss_get_mic_kerberos returning %d\n",err);
226
227 return err;
228}
229
230static struct gss_api_ops gss_kerberos_ops = {
231 .gss_import_sec_context = gss_import_sec_context_kerberos,
232 .gss_get_mic = gss_get_mic_kerberos,
233 .gss_verify_mic = gss_verify_mic_kerberos,
234 .gss_delete_sec_context = gss_delete_sec_context_kerberos,
235};
236
237static struct pf_desc gss_kerberos_pfs[] = {
238 [0] = {
239 .pseudoflavor = RPC_AUTH_GSS_KRB5,
240 .service = RPC_GSS_SVC_NONE,
241 .name = "krb5",
242 },
243 [1] = {
244 .pseudoflavor = RPC_AUTH_GSS_KRB5I,
245 .service = RPC_GSS_SVC_INTEGRITY,
246 .name = "krb5i",
247 },
248};
249
250static struct gss_api_mech gss_kerberos_mech = {
251 .gm_name = "krb5",
252 .gm_owner = THIS_MODULE,
253 .gm_ops = &gss_kerberos_ops,
254 .gm_pf_num = ARRAY_SIZE(gss_kerberos_pfs),
255 .gm_pfs = gss_kerberos_pfs,
256};
257
258static int __init init_kerberos_module(void)
259{
260 int status;
261
262 status = gss_mech_register(&gss_kerberos_mech);
263 if (status)
264 printk("Failed to register kerberos gss mechanism!\n");
265 return status;
266}
267
268static void __exit cleanup_kerberos_module(void)
269{
270 gss_mech_unregister(&gss_kerberos_mech);
271}
272
273MODULE_LICENSE("GPL");
274module_init(init_kerberos_module);
275module_exit(cleanup_kerberos_module);
diff --git a/net/sunrpc/auth_gss/gss_krb5_seal.c b/net/sunrpc/auth_gss/gss_krb5_seal.c
new file mode 100644
index 000000000000..afeeb8715a77
--- /dev/null
+++ b/net/sunrpc/auth_gss/gss_krb5_seal.c
@@ -0,0 +1,176 @@
1/*
2 * linux/net/sunrpc/gss_krb5_seal.c
3 *
4 * Adapted from MIT Kerberos 5-1.2.1 lib/gssapi/krb5/k5seal.c
5 *
6 * Copyright (c) 2000 The Regents of the University of Michigan.
7 * All rights reserved.
8 *
9 * Andy Adamson <andros@umich.edu>
10 * J. Bruce Fields <bfields@umich.edu>
11 */
12
13/*
14 * Copyright 1993 by OpenVision Technologies, Inc.
15 *
16 * Permission to use, copy, modify, distribute, and sell this software
17 * and its documentation for any purpose is hereby granted without fee,
18 * provided that the above copyright notice appears in all copies and
19 * that both that copyright notice and this permission notice appear in
20 * supporting documentation, and that the name of OpenVision not be used
21 * in advertising or publicity pertaining to distribution of the software
22 * without specific, written prior permission. OpenVision makes no
23 * representations about the suitability of this software for any
24 * purpose. It is provided "as is" without express or implied warranty.
25 *
26 * OPENVISION DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
27 * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO
28 * EVENT SHALL OPENVISION BE LIABLE FOR ANY SPECIAL, INDIRECT OR
29 * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF
30 * USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR
31 * OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
32 * PERFORMANCE OF THIS SOFTWARE.
33 */
34
35/*
36 * Copyright (C) 1998 by the FundsXpress, INC.
37 *
38 * All rights reserved.
39 *
40 * Export of this software from the United States of America may require
41 * a specific license from the United States Government. It is the
42 * responsibility of any person or organization contemplating export to
43 * obtain such a license before exporting.
44 *
45 * WITHIN THAT CONSTRAINT, permission to use, copy, modify, and
46 * distribute this software and its documentation for any purpose and
47 * without fee is hereby granted, provided that the above copyright
48 * notice appear in all copies and that both that copyright notice and
49 * this permission notice appear in supporting documentation, and that
50 * the name of FundsXpress. not be used in advertising or publicity pertaining
51 * to distribution of the software without specific, written prior
52 * permission. FundsXpress makes no representations about the suitability of
53 * this software for any purpose. It is provided "as is" without express
54 * or implied warranty.
55 *
56 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR
57 * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
58 * WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
59 */
60
61#include <linux/types.h>
62#include <linux/slab.h>
63#include <linux/jiffies.h>
64#include <linux/sunrpc/gss_krb5.h>
65#include <linux/random.h>
66#include <asm/scatterlist.h>
67#include <linux/crypto.h>
68
69#ifdef RPC_DEBUG
70# define RPCDBG_FACILITY RPCDBG_AUTH
71#endif
72
73static inline int
74gss_krb5_padding(int blocksize, int length) {
75 /* Most of the code is block-size independent but in practice we
76 * use only 8: */
77 BUG_ON(blocksize != 8);
78 return 8 - (length & 7);
79}
80
81u32
82krb5_make_token(struct krb5_ctx *ctx, int qop_req,
83 struct xdr_buf *text, struct xdr_netobj *token,
84 int toktype)
85{
86 s32 checksum_type;
87 struct xdr_netobj md5cksum = {.len = 0, .data = NULL};
88 int blocksize = 0, tmsglen;
89 unsigned char *ptr, *krb5_hdr, *msg_start;
90 s32 now;
91
92 dprintk("RPC: gss_krb5_seal\n");
93
94 now = get_seconds();
95
96 if (qop_req != 0)
97 goto out_err;
98
99 switch (ctx->signalg) {
100 case SGN_ALG_DES_MAC_MD5:
101 checksum_type = CKSUMTYPE_RSA_MD5;
102 break;
103 default:
104 dprintk("RPC: gss_krb5_seal: ctx->signalg %d not"
105 " supported\n", ctx->signalg);
106 goto out_err;
107 }
108 if (ctx->sealalg != SEAL_ALG_NONE && ctx->sealalg != SEAL_ALG_DES) {
109 dprintk("RPC: gss_krb5_seal: ctx->sealalg %d not supported\n",
110 ctx->sealalg);
111 goto out_err;
112 }
113
114 if (toktype == KG_TOK_WRAP_MSG) {
115 blocksize = crypto_tfm_alg_blocksize(ctx->enc);
116 tmsglen = blocksize + text->len
117 + gss_krb5_padding(blocksize, blocksize + text->len);
118 } else {
119 tmsglen = 0;
120 }
121
122 token->len = g_token_size(&ctx->mech_used, 22 + tmsglen);
123
124 ptr = token->data;
125 g_make_token_header(&ctx->mech_used, 22 + tmsglen, &ptr);
126
127 *ptr++ = (unsigned char) ((toktype>>8)&0xff);
128 *ptr++ = (unsigned char) (toktype&0xff);
129
130 /* ptr now at byte 2 of header described in rfc 1964, section 1.2.1: */
131 krb5_hdr = ptr - 2;
132 msg_start = krb5_hdr + 24;
133
134 *(u16 *)(krb5_hdr + 2) = htons(ctx->signalg);
135 memset(krb5_hdr + 4, 0xff, 4);
136 if (toktype == KG_TOK_WRAP_MSG)
137 *(u16 *)(krb5_hdr + 4) = htons(ctx->sealalg);
138
139 if (toktype == KG_TOK_WRAP_MSG) {
140 /* XXX removing support for now */
141 goto out_err;
142 } else { /* Sign only. */
143 if (make_checksum(checksum_type, krb5_hdr, 8, text,
144 &md5cksum))
145 goto out_err;
146 }
147
148 switch (ctx->signalg) {
149 case SGN_ALG_DES_MAC_MD5:
150 if (krb5_encrypt(ctx->seq, NULL, md5cksum.data,
151 md5cksum.data, md5cksum.len))
152 goto out_err;
153 memcpy(krb5_hdr + 16,
154 md5cksum.data + md5cksum.len - KRB5_CKSUM_LENGTH,
155 KRB5_CKSUM_LENGTH);
156
157 dprintk("RPC: make_seal_token: cksum data: \n");
158 print_hexl((u32 *) (krb5_hdr + 16), KRB5_CKSUM_LENGTH, 0);
159 break;
160 default:
161 BUG();
162 }
163
164 kfree(md5cksum.data);
165
166 if ((krb5_make_seq_num(ctx->seq, ctx->initiate ? 0 : 0xff,
167 ctx->seq_send, krb5_hdr + 16, krb5_hdr + 8)))
168 goto out_err;
169
170 ctx->seq_send++;
171
172 return ((ctx->endtime < now) ? GSS_S_CONTEXT_EXPIRED : GSS_S_COMPLETE);
173out_err:
174 if (md5cksum.data) kfree(md5cksum.data);
175 return GSS_S_FAILURE;
176}
diff --git a/net/sunrpc/auth_gss/gss_krb5_seqnum.c b/net/sunrpc/auth_gss/gss_krb5_seqnum.c
new file mode 100644
index 000000000000..c53ead39118d
--- /dev/null
+++ b/net/sunrpc/auth_gss/gss_krb5_seqnum.c
@@ -0,0 +1,88 @@
1/*
2 * linux/net/sunrpc/gss_krb5_seqnum.c
3 *
4 * Adapted from MIT Kerberos 5-1.2.1 lib/gssapi/krb5/util_seqnum.c
5 *
6 * Copyright (c) 2000 The Regents of the University of Michigan.
7 * All rights reserved.
8 *
9 * Andy Adamson <andros@umich.edu>
10 */
11
12/*
13 * Copyright 1993 by OpenVision Technologies, Inc.
14 *
15 * Permission to use, copy, modify, distribute, and sell this software
16 * and its documentation for any purpose is hereby granted without fee,
17 * provided that the above copyright notice appears in all copies and
18 * that both that copyright notice and this permission notice appear in
19 * supporting documentation, and that the name of OpenVision not be used
20 * in advertising or publicity pertaining to distribution of the software
21 * without specific, written prior permission. OpenVision makes no
22 * representations about the suitability of this software for any
23 * purpose. It is provided "as is" without express or implied warranty.
24 *
25 * OPENVISION DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
26 * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO
27 * EVENT SHALL OPENVISION BE LIABLE FOR ANY SPECIAL, INDIRECT OR
28 * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF
29 * USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR
30 * OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
31 * PERFORMANCE OF THIS SOFTWARE.
32 */
33
34#include <linux/types.h>
35#include <linux/slab.h>
36#include <linux/sunrpc/gss_krb5.h>
37#include <linux/crypto.h>
38
39#ifdef RPC_DEBUG
40# define RPCDBG_FACILITY RPCDBG_AUTH
41#endif
42
43s32
44krb5_make_seq_num(struct crypto_tfm *key,
45 int direction,
46 s32 seqnum,
47 unsigned char *cksum, unsigned char *buf)
48{
49 unsigned char plain[8];
50
51 plain[0] = (unsigned char) (seqnum & 0xff);
52 plain[1] = (unsigned char) ((seqnum >> 8) & 0xff);
53 plain[2] = (unsigned char) ((seqnum >> 16) & 0xff);
54 plain[3] = (unsigned char) ((seqnum >> 24) & 0xff);
55
56 plain[4] = direction;
57 plain[5] = direction;
58 plain[6] = direction;
59 plain[7] = direction;
60
61 return krb5_encrypt(key, cksum, plain, buf, 8);
62}
63
64s32
65krb5_get_seq_num(struct crypto_tfm *key,
66 unsigned char *cksum,
67 unsigned char *buf,
68 int *direction, s32 * seqnum)
69{
70 s32 code;
71 unsigned char plain[8];
72
73 dprintk("RPC: krb5_get_seq_num:\n");
74
75 if ((code = krb5_decrypt(key, cksum, buf, plain, 8)))
76 return code;
77
78 if ((plain[4] != plain[5]) || (plain[4] != plain[6])
79 || (plain[4] != plain[7]))
80 return (s32)KG_BAD_SEQ;
81
82 *direction = plain[4];
83
84 *seqnum = ((plain[0]) |
85 (plain[1] << 8) | (plain[2] << 16) | (plain[3] << 24));
86
87 return (0);
88}
diff --git a/net/sunrpc/auth_gss/gss_krb5_unseal.c b/net/sunrpc/auth_gss/gss_krb5_unseal.c
new file mode 100644
index 000000000000..8767fc53183d
--- /dev/null
+++ b/net/sunrpc/auth_gss/gss_krb5_unseal.c
@@ -0,0 +1,202 @@
1/*
2 * linux/net/sunrpc/gss_krb5_unseal.c
3 *
4 * Adapted from MIT Kerberos 5-1.2.1 lib/gssapi/krb5/k5unseal.c
5 *
6 * Copyright (c) 2000 The Regents of the University of Michigan.
7 * All rights reserved.
8 *
9 * Andy Adamson <andros@umich.edu>
10 */
11
12/*
13 * Copyright 1993 by OpenVision Technologies, Inc.
14 *
15 * Permission to use, copy, modify, distribute, and sell this software
16 * and its documentation for any purpose is hereby granted without fee,
17 * provided that the above copyright notice appears in all copies and
18 * that both that copyright notice and this permission notice appear in
19 * supporting documentation, and that the name of OpenVision not be used
20 * in advertising or publicity pertaining to distribution of the software
21 * without specific, written prior permission. OpenVision makes no
22 * representations about the suitability of this software for any
23 * purpose. It is provided "as is" without express or implied warranty.
24 *
25 * OPENVISION DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
26 * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO
27 * EVENT SHALL OPENVISION BE LIABLE FOR ANY SPECIAL, INDIRECT OR
28 * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF
29 * USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR
30 * OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
31 * PERFORMANCE OF THIS SOFTWARE.
32 */
33
34/*
35 * Copyright (C) 1998 by the FundsXpress, INC.
36 *
37 * All rights reserved.
38 *
39 * Export of this software from the United States of America may require
40 * a specific license from the United States Government. It is the
41 * responsibility of any person or organization contemplating export to
42 * obtain such a license before exporting.
43 *
44 * WITHIN THAT CONSTRAINT, permission to use, copy, modify, and
45 * distribute this software and its documentation for any purpose and
46 * without fee is hereby granted, provided that the above copyright
47 * notice appear in all copies and that both that copyright notice and
48 * this permission notice appear in supporting documentation, and that
49 * the name of FundsXpress. not be used in advertising or publicity pertaining
50 * to distribution of the software without specific, written prior
51 * permission. FundsXpress makes no representations about the suitability of
52 * this software for any purpose. It is provided "as is" without express
53 * or implied warranty.
54 *
55 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR
56 * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
57 * WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
58 */
59
60#include <linux/types.h>
61#include <linux/slab.h>
62#include <linux/jiffies.h>
63#include <linux/sunrpc/gss_krb5.h>
64#include <linux/crypto.h>
65
66#ifdef RPC_DEBUG
67# define RPCDBG_FACILITY RPCDBG_AUTH
68#endif
69
70
71/* message_buffer is an input if toktype is MIC and an output if it is WRAP:
72 * If toktype is MIC: read_token is a mic token, and message_buffer is the
73 * data that the mic was supposedly taken over.
74 * If toktype is WRAP: read_token is a wrap token, and message_buffer is used
75 * to return the decrypted data.
76 */
77
78/* XXX will need to change prototype and/or just split into a separate function
79 * when we add privacy (because read_token will be in pages too). */
80u32
81krb5_read_token(struct krb5_ctx *ctx,
82 struct xdr_netobj *read_token,
83 struct xdr_buf *message_buffer,
84 int *qop_state, int toktype)
85{
86 int signalg;
87 int sealalg;
88 s32 checksum_type;
89 struct xdr_netobj md5cksum = {.len = 0, .data = NULL};
90 s32 now;
91 int direction;
92 s32 seqnum;
93 unsigned char *ptr = (unsigned char *)read_token->data;
94 int bodysize;
95 u32 ret = GSS_S_DEFECTIVE_TOKEN;
96
97 dprintk("RPC: krb5_read_token\n");
98
99 if (g_verify_token_header(&ctx->mech_used, &bodysize, &ptr,
100 read_token->len))
101 goto out;
102
103 if ((*ptr++ != ((toktype>>8)&0xff)) || (*ptr++ != (toktype&0xff)))
104 goto out;
105
106 /* XXX sanity-check bodysize?? */
107
108 if (toktype == KG_TOK_WRAP_MSG) {
109 /* XXX gone */
110 goto out;
111 }
112
113 /* get the sign and seal algorithms */
114
115 signalg = ptr[0] + (ptr[1] << 8);
116 sealalg = ptr[2] + (ptr[3] << 8);
117
118 /* Sanity checks */
119
120 if ((ptr[4] != 0xff) || (ptr[5] != 0xff))
121 goto out;
122
123 if (((toktype != KG_TOK_WRAP_MSG) && (sealalg != 0xffff)) ||
124 ((toktype == KG_TOK_WRAP_MSG) && (sealalg == 0xffff)))
125 goto out;
126
127 /* in the current spec, there is only one valid seal algorithm per
128 key type, so a simple comparison is ok */
129
130 if ((toktype == KG_TOK_WRAP_MSG) && !(sealalg == ctx->sealalg))
131 goto out;
132
133 /* there are several mappings of seal algorithms to sign algorithms,
134 but few enough that we can try them all. */
135
136 if ((ctx->sealalg == SEAL_ALG_NONE && signalg > 1) ||
137 (ctx->sealalg == SEAL_ALG_1 && signalg != SGN_ALG_3) ||
138 (ctx->sealalg == SEAL_ALG_DES3KD &&
139 signalg != SGN_ALG_HMAC_SHA1_DES3_KD))
140 goto out;
141
142 /* compute the checksum of the message */
143
144 /* initialize the the cksum */
145 switch (signalg) {
146 case SGN_ALG_DES_MAC_MD5:
147 checksum_type = CKSUMTYPE_RSA_MD5;
148 break;
149 default:
150 ret = GSS_S_DEFECTIVE_TOKEN;
151 goto out;
152 }
153
154 switch (signalg) {
155 case SGN_ALG_DES_MAC_MD5:
156 ret = make_checksum(checksum_type, ptr - 2, 8,
157 message_buffer, &md5cksum);
158 if (ret)
159 goto out;
160
161 ret = krb5_encrypt(ctx->seq, NULL, md5cksum.data,
162 md5cksum.data, 16);
163 if (ret)
164 goto out;
165
166 if (memcmp(md5cksum.data + 8, ptr + 14, 8)) {
167 ret = GSS_S_BAD_SIG;
168 goto out;
169 }
170 break;
171 default:
172 ret = GSS_S_DEFECTIVE_TOKEN;
173 goto out;
174 }
175
176 /* it got through unscathed. Make sure the context is unexpired */
177
178 if (qop_state)
179 *qop_state = GSS_C_QOP_DEFAULT;
180
181 now = get_seconds();
182
183 ret = GSS_S_CONTEXT_EXPIRED;
184 if (now > ctx->endtime)
185 goto out;
186
187 /* do sequencing checks */
188
189 ret = GSS_S_BAD_SIG;
190 if ((ret = krb5_get_seq_num(ctx->seq, ptr + 14, ptr + 6, &direction,
191 &seqnum)))
192 goto out;
193
194 if ((ctx->initiate && direction != 0xff) ||
195 (!ctx->initiate && direction != 0))
196 goto out;
197
198 ret = GSS_S_COMPLETE;
199out:
200 if (md5cksum.data) kfree(md5cksum.data);
201 return ret;
202}
diff --git a/net/sunrpc/auth_gss/gss_mech_switch.c b/net/sunrpc/auth_gss/gss_mech_switch.c
new file mode 100644
index 000000000000..9dfb68377d69
--- /dev/null
+++ b/net/sunrpc/auth_gss/gss_mech_switch.c
@@ -0,0 +1,301 @@
1/*
2 * linux/net/sunrpc/gss_mech_switch.c
3 *
4 * Copyright (c) 2001 The Regents of the University of Michigan.
5 * All rights reserved.
6 *
7 * J. Bruce Fields <bfields@umich.edu>
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 *
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 * 3. Neither the name of the University nor the names of its
19 * contributors may be used to endorse or promote products derived
20 * from this software without specific prior written permission.
21 *
22 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
23 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
24 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
25 * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
27 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
28 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
29 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
30 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
31 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
32 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33 *
34 */
35
36#include <linux/types.h>
37#include <linux/slab.h>
38#include <linux/socket.h>
39#include <linux/module.h>
40#include <linux/sunrpc/msg_prot.h>
41#include <linux/sunrpc/gss_asn1.h>
42#include <linux/sunrpc/auth_gss.h>
43#include <linux/sunrpc/svcauth_gss.h>
44#include <linux/sunrpc/gss_err.h>
45#include <linux/sunrpc/sched.h>
46#include <linux/sunrpc/gss_api.h>
47#include <linux/sunrpc/clnt.h>
48
49#ifdef RPC_DEBUG
50# define RPCDBG_FACILITY RPCDBG_AUTH
51#endif
52
53static LIST_HEAD(registered_mechs);
54static DEFINE_SPINLOCK(registered_mechs_lock);
55
56static void
57gss_mech_free(struct gss_api_mech *gm)
58{
59 struct pf_desc *pf;
60 int i;
61
62 for (i = 0; i < gm->gm_pf_num; i++) {
63 pf = &gm->gm_pfs[i];
64 if (pf->auth_domain_name)
65 kfree(pf->auth_domain_name);
66 pf->auth_domain_name = NULL;
67 }
68}
69
70static inline char *
71make_auth_domain_name(char *name)
72{
73 static char *prefix = "gss/";
74 char *new;
75
76 new = kmalloc(strlen(name) + strlen(prefix) + 1, GFP_KERNEL);
77 if (new) {
78 strcpy(new, prefix);
79 strcat(new, name);
80 }
81 return new;
82}
83
84static int
85gss_mech_svc_setup(struct gss_api_mech *gm)
86{
87 struct pf_desc *pf;
88 int i, status;
89
90 for (i = 0; i < gm->gm_pf_num; i++) {
91 pf = &gm->gm_pfs[i];
92 pf->auth_domain_name = make_auth_domain_name(pf->name);
93 status = -ENOMEM;
94 if (pf->auth_domain_name == NULL)
95 goto out;
96 status = svcauth_gss_register_pseudoflavor(pf->pseudoflavor,
97 pf->auth_domain_name);
98 if (status)
99 goto out;
100 }
101 return 0;
102out:
103 gss_mech_free(gm);
104 return status;
105}
106
107int
108gss_mech_register(struct gss_api_mech *gm)
109{
110 int status;
111
112 status = gss_mech_svc_setup(gm);
113 if (status)
114 return status;
115 spin_lock(&registered_mechs_lock);
116 list_add(&gm->gm_list, &registered_mechs);
117 spin_unlock(&registered_mechs_lock);
118 dprintk("RPC: registered gss mechanism %s\n", gm->gm_name);
119 return 0;
120}
121
122EXPORT_SYMBOL(gss_mech_register);
123
124void
125gss_mech_unregister(struct gss_api_mech *gm)
126{
127 spin_lock(&registered_mechs_lock);
128 list_del(&gm->gm_list);
129 spin_unlock(&registered_mechs_lock);
130 dprintk("RPC: unregistered gss mechanism %s\n", gm->gm_name);
131 gss_mech_free(gm);
132}
133
134EXPORT_SYMBOL(gss_mech_unregister);
135
136struct gss_api_mech *
137gss_mech_get(struct gss_api_mech *gm)
138{
139 __module_get(gm->gm_owner);
140 return gm;
141}
142
143EXPORT_SYMBOL(gss_mech_get);
144
145struct gss_api_mech *
146gss_mech_get_by_name(const char *name)
147{
148 struct gss_api_mech *pos, *gm = NULL;
149
150 spin_lock(&registered_mechs_lock);
151 list_for_each_entry(pos, &registered_mechs, gm_list) {
152 if (0 == strcmp(name, pos->gm_name)) {
153 if (try_module_get(pos->gm_owner))
154 gm = pos;
155 break;
156 }
157 }
158 spin_unlock(&registered_mechs_lock);
159 return gm;
160
161}
162
163EXPORT_SYMBOL(gss_mech_get_by_name);
164
165static inline int
166mech_supports_pseudoflavor(struct gss_api_mech *gm, u32 pseudoflavor)
167{
168 int i;
169
170 for (i = 0; i < gm->gm_pf_num; i++) {
171 if (gm->gm_pfs[i].pseudoflavor == pseudoflavor)
172 return 1;
173 }
174 return 0;
175}
176
177struct gss_api_mech *
178gss_mech_get_by_pseudoflavor(u32 pseudoflavor)
179{
180 struct gss_api_mech *pos, *gm = NULL;
181
182 spin_lock(&registered_mechs_lock);
183 list_for_each_entry(pos, &registered_mechs, gm_list) {
184 if (!mech_supports_pseudoflavor(pos, pseudoflavor)) {
185 module_put(pos->gm_owner);
186 continue;
187 }
188 if (try_module_get(pos->gm_owner))
189 gm = pos;
190 break;
191 }
192 spin_unlock(&registered_mechs_lock);
193 return gm;
194}
195
196EXPORT_SYMBOL(gss_mech_get_by_pseudoflavor);
197
198u32
199gss_pseudoflavor_to_service(struct gss_api_mech *gm, u32 pseudoflavor)
200{
201 int i;
202
203 for (i = 0; i < gm->gm_pf_num; i++) {
204 if (gm->gm_pfs[i].pseudoflavor == pseudoflavor)
205 return gm->gm_pfs[i].service;
206 }
207 return 0;
208}
209
210EXPORT_SYMBOL(gss_pseudoflavor_to_service);
211
212char *
213gss_service_to_auth_domain_name(struct gss_api_mech *gm, u32 service)
214{
215 int i;
216
217 for (i = 0; i < gm->gm_pf_num; i++) {
218 if (gm->gm_pfs[i].service == service)
219 return gm->gm_pfs[i].auth_domain_name;
220 }
221 return NULL;
222}
223
224EXPORT_SYMBOL(gss_service_to_auth_domain_name);
225
226void
227gss_mech_put(struct gss_api_mech * gm)
228{
229 module_put(gm->gm_owner);
230}
231
232EXPORT_SYMBOL(gss_mech_put);
233
234/* The mech could probably be determined from the token instead, but it's just
235 * as easy for now to pass it in. */
236int
237gss_import_sec_context(const void *input_token, size_t bufsize,
238 struct gss_api_mech *mech,
239 struct gss_ctx **ctx_id)
240{
241 if (!(*ctx_id = kmalloc(sizeof(**ctx_id), GFP_KERNEL)))
242 return GSS_S_FAILURE;
243 memset(*ctx_id, 0, sizeof(**ctx_id));
244 (*ctx_id)->mech_type = gss_mech_get(mech);
245
246 return mech->gm_ops
247 ->gss_import_sec_context(input_token, bufsize, *ctx_id);
248}
249
250/* gss_get_mic: compute a mic over message and return mic_token. */
251
252u32
253gss_get_mic(struct gss_ctx *context_handle,
254 u32 qop,
255 struct xdr_buf *message,
256 struct xdr_netobj *mic_token)
257{
258 return context_handle->mech_type->gm_ops
259 ->gss_get_mic(context_handle,
260 qop,
261 message,
262 mic_token);
263}
264
265/* gss_verify_mic: check whether the provided mic_token verifies message. */
266
267u32
268gss_verify_mic(struct gss_ctx *context_handle,
269 struct xdr_buf *message,
270 struct xdr_netobj *mic_token,
271 u32 *qstate)
272{
273 return context_handle->mech_type->gm_ops
274 ->gss_verify_mic(context_handle,
275 message,
276 mic_token,
277 qstate);
278}
279
280/* gss_delete_sec_context: free all resources associated with context_handle.
281 * Note this differs from the RFC 2744-specified prototype in that we don't
282 * bother returning an output token, since it would never be used anyway. */
283
284u32
285gss_delete_sec_context(struct gss_ctx **context_handle)
286{
287 dprintk("RPC: gss_delete_sec_context deleting %p\n",
288 *context_handle);
289
290 if (!*context_handle)
291 return(GSS_S_NO_CONTEXT);
292 if ((*context_handle)->internal_ctx_id != 0)
293 (*context_handle)->mech_type->gm_ops
294 ->gss_delete_sec_context((*context_handle)
295 ->internal_ctx_id);
296 if ((*context_handle)->mech_type)
297 gss_mech_put((*context_handle)->mech_type);
298 kfree(*context_handle);
299 *context_handle=NULL;
300 return GSS_S_COMPLETE;
301}
diff --git a/net/sunrpc/auth_gss/gss_spkm3_mech.c b/net/sunrpc/auth_gss/gss_spkm3_mech.c
new file mode 100644
index 000000000000..dad05994c3eb
--- /dev/null
+++ b/net/sunrpc/auth_gss/gss_spkm3_mech.c
@@ -0,0 +1,300 @@
1/*
2 * linux/net/sunrpc/gss_spkm3_mech.c
3 *
4 * Copyright (c) 2003 The Regents of the University of Michigan.
5 * All rights reserved.
6 *
7 * Andy Adamson <andros@umich.edu>
8 * J. Bruce Fields <bfields@umich.edu>
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 *
14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution.
19 * 3. Neither the name of the University nor the names of its
20 * contributors may be used to endorse or promote products derived
21 * from this software without specific prior written permission.
22 *
23 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
24 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
25 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
26 * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
28 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
29 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
30 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
31 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
32 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
33 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34 *
35 */
36
37#include <linux/module.h>
38#include <linux/init.h>
39#include <linux/types.h>
40#include <linux/slab.h>
41#include <linux/sunrpc/auth.h>
42#include <linux/in.h>
43#include <linux/sunrpc/svcauth_gss.h>
44#include <linux/sunrpc/gss_spkm3.h>
45#include <linux/sunrpc/xdr.h>
46#include <linux/crypto.h>
47
48#ifdef RPC_DEBUG
49# define RPCDBG_FACILITY RPCDBG_AUTH
50#endif
51
52static const void *
53simple_get_bytes(const void *p, const void *end, void *res, int len)
54{
55 const void *q = (const void *)((const char *)p + len);
56 if (unlikely(q > end || q < p))
57 return ERR_PTR(-EFAULT);
58 memcpy(res, p, len);
59 return q;
60}
61
62static const void *
63simple_get_netobj(const void *p, const void *end, struct xdr_netobj *res)
64{
65 const void *q;
66 unsigned int len;
67 p = simple_get_bytes(p, end, &len, sizeof(len));
68 if (IS_ERR(p))
69 return p;
70 res->len = len;
71 if (len == 0) {
72 res->data = NULL;
73 return p;
74 }
75 q = (const void *)((const char *)p + len);
76 if (unlikely(q > end || q < p))
77 return ERR_PTR(-EFAULT);
78 res->data = kmalloc(len, GFP_KERNEL);
79 if (unlikely(res->data == NULL))
80 return ERR_PTR(-ENOMEM);
81 memcpy(res->data, p, len);
82 return q;
83}
84
85static inline const void *
86get_key(const void *p, const void *end, struct crypto_tfm **res, int *resalg)
87{
88 struct xdr_netobj key = { 0 };
89 int alg_mode,setkey = 0;
90 char *alg_name;
91
92 p = simple_get_bytes(p, end, resalg, sizeof(*resalg));
93 if (IS_ERR(p))
94 goto out_err;
95 p = simple_get_netobj(p, end, &key);
96 if (IS_ERR(p))
97 goto out_err;
98
99 switch (*resalg) {
100 case NID_des_cbc:
101 alg_name = "des";
102 alg_mode = CRYPTO_TFM_MODE_CBC;
103 setkey = 1;
104 break;
105 case NID_md5:
106 if (key.len == 0) {
107 dprintk("RPC: SPKM3 get_key: NID_md5 zero Key length\n");
108 }
109 alg_name = "md5";
110 alg_mode = 0;
111 setkey = 0;
112 break;
113 default:
114 dprintk("RPC: SPKM3 get_key: unsupported algorithm %d", *resalg);
115 goto out_err_free_key;
116 }
117 if (!(*res = crypto_alloc_tfm(alg_name, alg_mode)))
118 goto out_err_free_key;
119 if (setkey) {
120 if (crypto_cipher_setkey(*res, key.data, key.len))
121 goto out_err_free_tfm;
122 }
123
124 if(key.len > 0)
125 kfree(key.data);
126 return p;
127
128out_err_free_tfm:
129 crypto_free_tfm(*res);
130out_err_free_key:
131 if(key.len > 0)
132 kfree(key.data);
133 p = ERR_PTR(-EINVAL);
134out_err:
135 return p;
136}
137
138static int
139gss_import_sec_context_spkm3(const void *p, size_t len,
140 struct gss_ctx *ctx_id)
141{
142 const void *end = (const void *)((const char *)p + len);
143 struct spkm3_ctx *ctx;
144
145 if (!(ctx = kmalloc(sizeof(*ctx), GFP_KERNEL)))
146 goto out_err;
147 memset(ctx, 0, sizeof(*ctx));
148
149 p = simple_get_netobj(p, end, &ctx->ctx_id);
150 if (IS_ERR(p))
151 goto out_err_free_ctx;
152
153 p = simple_get_bytes(p, end, &ctx->qop, sizeof(ctx->qop));
154 if (IS_ERR(p))
155 goto out_err_free_ctx_id;
156
157 p = simple_get_netobj(p, end, &ctx->mech_used);
158 if (IS_ERR(p))
159 goto out_err_free_mech;
160
161 p = simple_get_bytes(p, end, &ctx->ret_flags, sizeof(ctx->ret_flags));
162 if (IS_ERR(p))
163 goto out_err_free_mech;
164
165 p = simple_get_bytes(p, end, &ctx->req_flags, sizeof(ctx->req_flags));
166 if (IS_ERR(p))
167 goto out_err_free_mech;
168
169 p = simple_get_netobj(p, end, &ctx->share_key);
170 if (IS_ERR(p))
171 goto out_err_free_s_key;
172
173 p = get_key(p, end, &ctx->derived_conf_key, &ctx->conf_alg);
174 if (IS_ERR(p))
175 goto out_err_free_s_key;
176
177 p = get_key(p, end, &ctx->derived_integ_key, &ctx->intg_alg);
178 if (IS_ERR(p))
179 goto out_err_free_key1;
180
181 p = simple_get_bytes(p, end, &ctx->keyestb_alg, sizeof(ctx->keyestb_alg));
182 if (IS_ERR(p))
183 goto out_err_free_key2;
184
185 p = simple_get_bytes(p, end, &ctx->owf_alg, sizeof(ctx->owf_alg));
186 if (IS_ERR(p))
187 goto out_err_free_key2;
188
189 if (p != end)
190 goto out_err_free_key2;
191
192 ctx_id->internal_ctx_id = ctx;
193
194 dprintk("Succesfully imported new spkm context.\n");
195 return 0;
196
197out_err_free_key2:
198 crypto_free_tfm(ctx->derived_integ_key);
199out_err_free_key1:
200 crypto_free_tfm(ctx->derived_conf_key);
201out_err_free_s_key:
202 kfree(ctx->share_key.data);
203out_err_free_mech:
204 kfree(ctx->mech_used.data);
205out_err_free_ctx_id:
206 kfree(ctx->ctx_id.data);
207out_err_free_ctx:
208 kfree(ctx);
209out_err:
210 return PTR_ERR(p);
211}
212
213static void
214gss_delete_sec_context_spkm3(void *internal_ctx) {
215 struct spkm3_ctx *sctx = internal_ctx;
216
217 if(sctx->derived_integ_key)
218 crypto_free_tfm(sctx->derived_integ_key);
219 if(sctx->derived_conf_key)
220 crypto_free_tfm(sctx->derived_conf_key);
221 if(sctx->share_key.data)
222 kfree(sctx->share_key.data);
223 if(sctx->mech_used.data)
224 kfree(sctx->mech_used.data);
225 kfree(sctx);
226}
227
228static u32
229gss_verify_mic_spkm3(struct gss_ctx *ctx,
230 struct xdr_buf *signbuf,
231 struct xdr_netobj *checksum,
232 u32 *qstate) {
233 u32 maj_stat = 0;
234 int qop_state = 0;
235 struct spkm3_ctx *sctx = ctx->internal_ctx_id;
236
237 dprintk("RPC: gss_verify_mic_spkm3 calling spkm3_read_token\n");
238 maj_stat = spkm3_read_token(sctx, checksum, signbuf, &qop_state,
239 SPKM_MIC_TOK);
240
241 if (!maj_stat && qop_state)
242 *qstate = qop_state;
243
244 dprintk("RPC: gss_verify_mic_spkm3 returning %d\n", maj_stat);
245 return maj_stat;
246}
247
248static u32
249gss_get_mic_spkm3(struct gss_ctx *ctx,
250 u32 qop,
251 struct xdr_buf *message_buffer,
252 struct xdr_netobj *message_token) {
253 u32 err = 0;
254 struct spkm3_ctx *sctx = ctx->internal_ctx_id;
255
256 dprintk("RPC: gss_get_mic_spkm3\n");
257
258 err = spkm3_make_token(sctx, qop, message_buffer,
259 message_token, SPKM_MIC_TOK);
260 return err;
261}
262
263static struct gss_api_ops gss_spkm3_ops = {
264 .gss_import_sec_context = gss_import_sec_context_spkm3,
265 .gss_get_mic = gss_get_mic_spkm3,
266 .gss_verify_mic = gss_verify_mic_spkm3,
267 .gss_delete_sec_context = gss_delete_sec_context_spkm3,
268};
269
270static struct pf_desc gss_spkm3_pfs[] = {
271 {RPC_AUTH_GSS_SPKM, 0, RPC_GSS_SVC_NONE, "spkm3"},
272 {RPC_AUTH_GSS_SPKMI, 0, RPC_GSS_SVC_INTEGRITY, "spkm3i"},
273};
274
275static struct gss_api_mech gss_spkm3_mech = {
276 .gm_name = "spkm3",
277 .gm_owner = THIS_MODULE,
278 .gm_ops = &gss_spkm3_ops,
279 .gm_pf_num = ARRAY_SIZE(gss_spkm3_pfs),
280 .gm_pfs = gss_spkm3_pfs,
281};
282
283static int __init init_spkm3_module(void)
284{
285 int status;
286
287 status = gss_mech_register(&gss_spkm3_mech);
288 if (status)
289 printk("Failed to register spkm3 gss mechanism!\n");
290 return 0;
291}
292
293static void __exit cleanup_spkm3_module(void)
294{
295 gss_mech_unregister(&gss_spkm3_mech);
296}
297
298MODULE_LICENSE("GPL");
299module_init(init_spkm3_module);
300module_exit(cleanup_spkm3_module);
diff --git a/net/sunrpc/auth_gss/gss_spkm3_seal.c b/net/sunrpc/auth_gss/gss_spkm3_seal.c
new file mode 100644
index 000000000000..25339868d462
--- /dev/null
+++ b/net/sunrpc/auth_gss/gss_spkm3_seal.c
@@ -0,0 +1,132 @@
1/*
2 * linux/net/sunrpc/gss_spkm3_seal.c
3 *
4 * Copyright (c) 2003 The Regents of the University of Michigan.
5 * All rights reserved.
6 *
7 * Andy Adamson <andros@umich.edu>
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 *
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 * 3. Neither the name of the University nor the names of its
19 * contributors may be used to endorse or promote products derived
20 * from this software without specific prior written permission.
21 *
22 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
23 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
24 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
25 * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
27 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
28 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
29 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
30 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
31 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
32 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33 *
34 */
35
36#include <linux/types.h>
37#include <linux/slab.h>
38#include <linux/jiffies.h>
39#include <linux/sunrpc/gss_spkm3.h>
40#include <linux/random.h>
41#include <linux/crypto.h>
42
43#ifdef RPC_DEBUG
44# define RPCDBG_FACILITY RPCDBG_AUTH
45#endif
46
47/*
48 * spkm3_make_token()
49 *
50 * Only SPKM_MIC_TOK with md5 intg-alg is supported
51 */
52
53u32
54spkm3_make_token(struct spkm3_ctx *ctx, int qop_req,
55 struct xdr_buf * text, struct xdr_netobj * token,
56 int toktype)
57{
58 s32 checksum_type;
59 char tokhdrbuf[25];
60 struct xdr_netobj md5cksum = {.len = 0, .data = NULL};
61 struct xdr_netobj mic_hdr = {.len = 0, .data = tokhdrbuf};
62 int tmsglen, tokenlen = 0;
63 unsigned char *ptr;
64 s32 now;
65 int ctxelen = 0, ctxzbit = 0;
66 int md5elen = 0, md5zbit = 0;
67
68 dprintk("RPC: spkm3_make_token\n");
69
70 now = jiffies;
71 if (qop_req != 0)
72 goto out_err;
73
74 if (ctx->ctx_id.len != 16) {
75 dprintk("RPC: spkm3_make_token BAD ctx_id.len %d\n",
76 ctx->ctx_id.len);
77 goto out_err;
78 }
79
80 switch (ctx->intg_alg) {
81 case NID_md5:
82 checksum_type = CKSUMTYPE_RSA_MD5;
83 break;
84 default:
85 dprintk("RPC: gss_spkm3_seal: ctx->signalg %d not"
86 " supported\n", ctx->intg_alg);
87 goto out_err;
88 }
89 /* XXX since we don't support WRAP, perhaps we don't care... */
90 if (ctx->conf_alg != NID_cast5_cbc) {
91 dprintk("RPC: gss_spkm3_seal: ctx->sealalg %d not supported\n",
92 ctx->conf_alg);
93 goto out_err;
94 }
95
96 if (toktype == SPKM_MIC_TOK) {
97 tmsglen = 0;
98 /* Calculate checksum over the mic-header */
99 asn1_bitstring_len(&ctx->ctx_id, &ctxelen, &ctxzbit);
100 spkm3_mic_header(&mic_hdr.data, &mic_hdr.len, ctx->ctx_id.data,
101 ctxelen, ctxzbit);
102
103 if (make_checksum(checksum_type, mic_hdr.data, mic_hdr.len,
104 text, &md5cksum))
105 goto out_err;
106
107 asn1_bitstring_len(&md5cksum, &md5elen, &md5zbit);
108 tokenlen = 10 + ctxelen + 1 + 2 + md5elen + 1;
109
110 /* Create token header using generic routines */
111 token->len = g_token_size(&ctx->mech_used, tokenlen + tmsglen);
112
113 ptr = token->data;
114 g_make_token_header(&ctx->mech_used, tokenlen + tmsglen, &ptr);
115
116 spkm3_make_mic_token(&ptr, tokenlen, &mic_hdr, &md5cksum, md5elen, md5zbit);
117 } else if (toktype == SPKM_WRAP_TOK) { /* Not Supported */
118 dprintk("RPC: gss_spkm3_seal: SPKM_WRAP_TOK not supported\n");
119 goto out_err;
120 }
121 kfree(md5cksum.data);
122
123 /* XXX need to implement sequence numbers, and ctx->expired */
124
125 return GSS_S_COMPLETE;
126out_err:
127 if (md5cksum.data)
128 kfree(md5cksum.data);
129 token->data = NULL;
130 token->len = 0;
131 return GSS_S_FAILURE;
132}
diff --git a/net/sunrpc/auth_gss/gss_spkm3_token.c b/net/sunrpc/auth_gss/gss_spkm3_token.c
new file mode 100644
index 000000000000..46c08a0710f6
--- /dev/null
+++ b/net/sunrpc/auth_gss/gss_spkm3_token.c
@@ -0,0 +1,266 @@
1/*
2 * linux/net/sunrpc/gss_spkm3_token.c
3 *
4 * Copyright (c) 2003 The Regents of the University of Michigan.
5 * All rights reserved.
6 *
7 * Andy Adamson <andros@umich.edu>
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 *
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 * 3. Neither the name of the University nor the names of its
19 * contributors may be used to endorse or promote products derived
20 * from this software without specific prior written permission.
21 *
22 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
23 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
24 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
25 * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
27 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
28 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
29 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
30 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
31 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
32 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33 *
34 */
35
36#include <linux/types.h>
37#include <linux/slab.h>
38#include <linux/jiffies.h>
39#include <linux/sunrpc/gss_spkm3.h>
40#include <linux/random.h>
41#include <linux/crypto.h>
42
43#ifdef RPC_DEBUG
44# define RPCDBG_FACILITY RPCDBG_AUTH
45#endif
46
47/*
48 * asn1_bitstring_len()
49 *
50 * calculate the asn1 bitstring length of the xdr_netobject
51 */
52void
53asn1_bitstring_len(struct xdr_netobj *in, int *enclen, int *zerobits)
54{
55 int i, zbit = 0,elen = in->len;
56 char *ptr;
57
58 ptr = &in->data[in->len -1];
59
60 /* count trailing 0's */
61 for(i = in->len; i > 0; i--) {
62 if (*ptr == 0) {
63 ptr--;
64 elen--;
65 } else
66 break;
67 }
68
69 /* count number of 0 bits in final octet */
70 ptr = &in->data[elen - 1];
71 for(i = 0; i < 8; i++) {
72 short mask = 0x01;
73
74 if (!((mask << i) & *ptr))
75 zbit++;
76 else
77 break;
78 }
79 *enclen = elen;
80 *zerobits = zbit;
81}
82
83/*
84 * decode_asn1_bitstring()
85 *
86 * decode a bitstring into a buffer of the expected length.
87 * enclen = bit string length
88 * explen = expected length (define in rfc)
89 */
90int
91decode_asn1_bitstring(struct xdr_netobj *out, char *in, int enclen, int explen)
92{
93 if (!(out->data = kmalloc(explen,GFP_KERNEL)))
94 return 0;
95 out->len = explen;
96 memset(out->data, 0, explen);
97 memcpy(out->data, in, enclen);
98 return 1;
99}
100
101/*
102 * SPKMInnerContextToken choice SPKM_MIC asn1 token layout
103 *
104 * contextid is always 16 bytes plain data. max asn1 bitstring len = 17.
105 *
106 * tokenlen = pos[0] to end of token (max pos[45] with MD5 cksum)
107 *
108 * pos value
109 * ----------
110 * [0] a4 SPKM-MIC tag
111 * [1] ?? innertoken length (max 44)
112 *
113 *
114 * tok_hdr piece of checksum data starts here
115 *
116 * the maximum mic-header len = 9 + 17 = 26
117 * mic-header
118 * ----------
119 * [2] 30 SEQUENCE tag
120 * [3] ?? mic-header length: (max 23) = TokenID + ContextID
121 *
122 * TokenID - all fields constant and can be hardcoded
123 * -------
124 * [4] 02 Type 2
125 * [5] 02 Length 2
126 * [6][7] 01 01 TokenID (SPKM_MIC_TOK)
127 *
128 * ContextID - encoded length not constant, calculated
129 * ---------
130 * [8] 03 Type 3
131 * [9] ?? encoded length
132 * [10] ?? ctxzbit
133 * [11] contextid
134 *
135 * mic_header piece of checksum data ends here.
136 *
137 * int-cksum - encoded length not constant, calculated
138 * ---------
139 * [??] 03 Type 3
140 * [??] ?? encoded length
141 * [??] ?? md5zbit
142 * [??] int-cksum (NID_md5 = 16)
143 *
144 * maximum SPKM-MIC innercontext token length =
145 * 10 + encoded contextid_size(17 max) + 2 + encoded
146 * cksum_size (17 maxfor NID_md5) = 46
147 */
148
149/*
150 * spkm3_mic_header()
151 *
152 * Prepare the SPKM_MIC_TOK mic-header for check-sum calculation
153 * elen: 16 byte context id asn1 bitstring encoded length
154 */
155void
156spkm3_mic_header(unsigned char **hdrbuf, unsigned int *hdrlen, unsigned char *ctxdata, int elen, int zbit)
157{
158 char *hptr = *hdrbuf;
159 char *top = *hdrbuf;
160
161 *(u8 *)hptr++ = 0x30;
162 *(u8 *)hptr++ = elen + 7; /* on the wire header length */
163
164 /* tokenid */
165 *(u8 *)hptr++ = 0x02;
166 *(u8 *)hptr++ = 0x02;
167 *(u8 *)hptr++ = 0x01;
168 *(u8 *)hptr++ = 0x01;
169
170 /* coniextid */
171 *(u8 *)hptr++ = 0x03;
172 *(u8 *)hptr++ = elen + 1; /* add 1 to include zbit */
173 *(u8 *)hptr++ = zbit;
174 memcpy(hptr, ctxdata, elen);
175 hptr += elen;
176 *hdrlen = hptr - top;
177}
178
179/*
180 * spkm3_mic_innercontext_token()
181 *
182 * *tokp points to the beginning of the SPKM_MIC token described
183 * in rfc 2025, section 3.2.1:
184 *
185 */
186void
187spkm3_make_mic_token(unsigned char **tokp, int toklen, struct xdr_netobj *mic_hdr, struct xdr_netobj *md5cksum, int md5elen, int md5zbit)
188{
189 unsigned char *ict = *tokp;
190
191 *(u8 *)ict++ = 0xa4;
192 *(u8 *)ict++ = toklen - 2;
193 memcpy(ict, mic_hdr->data, mic_hdr->len);
194 ict += mic_hdr->len;
195
196 *(u8 *)ict++ = 0x03;
197 *(u8 *)ict++ = md5elen + 1; /* add 1 to include zbit */
198 *(u8 *)ict++ = md5zbit;
199 memcpy(ict, md5cksum->data, md5elen);
200}
201
202u32
203spkm3_verify_mic_token(unsigned char **tokp, int *mic_hdrlen, unsigned char **cksum)
204{
205 struct xdr_netobj spkm3_ctx_id = {.len =0, .data = NULL};
206 unsigned char *ptr = *tokp;
207 int ctxelen;
208 u32 ret = GSS_S_DEFECTIVE_TOKEN;
209
210 /* spkm3 innercontext token preamble */
211 if ((ptr[0] != 0xa4) || (ptr[2] != 0x30)) {
212 dprintk("RPC: BAD SPKM ictoken preamble\n");
213 goto out;
214 }
215
216 *mic_hdrlen = ptr[3];
217
218 /* token type */
219 if ((ptr[4] != 0x02) || (ptr[5] != 0x02)) {
220 dprintk("RPC: BAD asn1 SPKM3 token type\n");
221 goto out;
222 }
223
224 /* only support SPKM_MIC_TOK */
225 if((ptr[6] != 0x01) || (ptr[7] != 0x01)) {
226 dprintk("RPC: ERROR unsupported SPKM3 token \n");
227 goto out;
228 }
229
230 /* contextid */
231 if (ptr[8] != 0x03) {
232 dprintk("RPC: BAD SPKM3 asn1 context-id type\n");
233 goto out;
234 }
235
236 ctxelen = ptr[9];
237 if (ctxelen > 17) { /* length includes asn1 zbit octet */
238 dprintk("RPC: BAD SPKM3 contextid len %d\n", ctxelen);
239 goto out;
240 }
241
242 /* ignore ptr[10] */
243
244 if(!decode_asn1_bitstring(&spkm3_ctx_id, &ptr[11], ctxelen - 1, 16))
245 goto out;
246
247 /*
248 * in the current implementation: the optional int-alg is not present
249 * so the default int-alg (md5) is used the optional snd-seq field is
250 * also not present
251 */
252
253 if (*mic_hdrlen != 6 + ctxelen) {
254 dprintk("RPC: BAD SPKM_ MIC_TOK header len %d: we only support default int-alg (should be absent) and do not support snd-seq\n", *mic_hdrlen);
255 goto out;
256 }
257 /* checksum */
258 *cksum = (&ptr[10] + ctxelen); /* ctxelen includes ptr[10] */
259
260 ret = GSS_S_COMPLETE;
261out:
262 if (spkm3_ctx_id.data)
263 kfree(spkm3_ctx_id.data);
264 return ret;
265}
266
diff --git a/net/sunrpc/auth_gss/gss_spkm3_unseal.c b/net/sunrpc/auth_gss/gss_spkm3_unseal.c
new file mode 100644
index 000000000000..65ce81bf0bc4
--- /dev/null
+++ b/net/sunrpc/auth_gss/gss_spkm3_unseal.c
@@ -0,0 +1,128 @@
1/*
2 * linux/net/sunrpc/gss_spkm3_unseal.c
3 *
4 * Copyright (c) 2003 The Regents of the University of Michigan.
5 * All rights reserved.
6 *
7 * Andy Adamson <andros@umich.edu>
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 *
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 * 3. Neither the name of the University nor the names of its
19 * contributors may be used to endorse or promote products derived
20 * from this software without specific prior written permission.
21 *
22 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
23 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
24 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
25 * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
27 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
28 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
29 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
30 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
31 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
32 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33 *
34 */
35
36#include <linux/types.h>
37#include <linux/slab.h>
38#include <linux/jiffies.h>
39#include <linux/sunrpc/gss_spkm3.h>
40#include <linux/crypto.h>
41
42#ifdef RPC_DEBUG
43# define RPCDBG_FACILITY RPCDBG_AUTH
44#endif
45
46/*
47 * spkm3_read_token()
48 *
49 * only SPKM_MIC_TOK with md5 intg-alg is supported
50 */
51u32
52spkm3_read_token(struct spkm3_ctx *ctx,
53 struct xdr_netobj *read_token, /* checksum */
54 struct xdr_buf *message_buffer, /* signbuf */
55 int *qop_state, int toktype)
56{
57 s32 code;
58 struct xdr_netobj wire_cksum = {.len =0, .data = NULL};
59 struct xdr_netobj md5cksum = {.len = 0, .data = NULL};
60 unsigned char *ptr = (unsigned char *)read_token->data;
61 unsigned char *cksum;
62 int bodysize, md5elen;
63 int mic_hdrlen;
64 u32 ret = GSS_S_DEFECTIVE_TOKEN;
65
66 dprintk("RPC: spkm3_read_token read_token->len %d\n", read_token->len);
67
68 if (g_verify_token_header((struct xdr_netobj *) &ctx->mech_used,
69 &bodysize, &ptr, read_token->len))
70 goto out;
71
72 /* decode the token */
73
74 if (toktype == SPKM_MIC_TOK) {
75
76 if ((ret = spkm3_verify_mic_token(&ptr, &mic_hdrlen, &cksum)))
77 goto out;
78
79 if (*cksum++ != 0x03) {
80 dprintk("RPC: spkm3_read_token BAD checksum type\n");
81 goto out;
82 }
83 md5elen = *cksum++;
84 cksum++; /* move past the zbit */
85
86 if(!decode_asn1_bitstring(&wire_cksum, cksum, md5elen - 1, 16))
87 goto out;
88
89 /* HARD CODED FOR MD5 */
90
91 /* compute the checksum of the message.
92 * ptr + 2 = start of header piece of checksum
93 * mic_hdrlen + 2 = length of header piece of checksum
94 */
95 ret = GSS_S_DEFECTIVE_TOKEN;
96 code = make_checksum(CKSUMTYPE_RSA_MD5, ptr + 2,
97 mic_hdrlen + 2,
98 message_buffer, &md5cksum);
99
100 if (code)
101 goto out;
102
103 dprintk("RPC: spkm3_read_token: digest wire_cksum.len %d:\n",
104 wire_cksum.len);
105 dprintk(" md5cksum.data\n");
106 print_hexl((u32 *) md5cksum.data, 16, 0);
107 dprintk(" cksum.data:\n");
108 print_hexl((u32 *) wire_cksum.data, wire_cksum.len, 0);
109
110 ret = GSS_S_BAD_SIG;
111 code = memcmp(md5cksum.data, wire_cksum.data, wire_cksum.len);
112 if (code)
113 goto out;
114
115 } else {
116 dprintk("RPC: BAD or UNSUPPORTED SPKM3 token type: %d\n",toktype);
117 goto out;
118 }
119
120 /* XXX: need to add expiration and sequencing */
121 ret = GSS_S_COMPLETE;
122out:
123 if (md5cksum.data)
124 kfree(md5cksum.data);
125 if (wire_cksum.data)
126 kfree(wire_cksum.data);
127 return ret;
128}
diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c
new file mode 100644
index 000000000000..5c8fe3bfc494
--- /dev/null
+++ b/net/sunrpc/auth_gss/svcauth_gss.c
@@ -0,0 +1,1080 @@
1/*
2 * Neil Brown <neilb@cse.unsw.edu.au>
3 * J. Bruce Fields <bfields@umich.edu>
4 * Andy Adamson <andros@umich.edu>
5 * Dug Song <dugsong@monkey.org>
6 *
7 * RPCSEC_GSS server authentication.
8 * This implements RPCSEC_GSS as defined in rfc2203 (rpcsec_gss) and rfc2078
9 * (gssapi)
10 *
11 * The RPCSEC_GSS involves three stages:
12 * 1/ context creation
13 * 2/ data exchange
14 * 3/ context destruction
15 *
16 * Context creation is handled largely by upcalls to user-space.
17 * In particular, GSS_Accept_sec_context is handled by an upcall
18 * Data exchange is handled entirely within the kernel
19 * In particular, GSS_GetMIC, GSS_VerifyMIC, GSS_Seal, GSS_Unseal are in-kernel.
20 * Context destruction is handled in-kernel
21 * GSS_Delete_sec_context is in-kernel
22 *
23 * Context creation is initiated by a RPCSEC_GSS_INIT request arriving.
24 * The context handle and gss_token are used as a key into the rpcsec_init cache.
25 * The content of this cache includes some of the outputs of GSS_Accept_sec_context,
26 * being major_status, minor_status, context_handle, reply_token.
27 * These are sent back to the client.
28 * Sequence window management is handled by the kernel. The window size if currently
29 * a compile time constant.
30 *
31 * When user-space is happy that a context is established, it places an entry
32 * in the rpcsec_context cache. The key for this cache is the context_handle.
33 * The content includes:
34 * uid/gidlist - for determining access rights
35 * mechanism type
36 * mechanism specific information, such as a key
37 *
38 */
39
40#include <linux/types.h>
41#include <linux/module.h>
42#include <linux/pagemap.h>
43
44#include <linux/sunrpc/auth_gss.h>
45#include <linux/sunrpc/svcauth.h>
46#include <linux/sunrpc/gss_err.h>
47#include <linux/sunrpc/svcauth.h>
48#include <linux/sunrpc/svcauth_gss.h>
49#include <linux/sunrpc/cache.h>
50
51#ifdef RPC_DEBUG
52# define RPCDBG_FACILITY RPCDBG_AUTH
53#endif
54
55/* The rpcsec_init cache is used for mapping RPCSEC_GSS_{,CONT_}INIT requests
56 * into replies.
57 *
58 * Key is context handle (\x if empty) and gss_token.
59 * Content is major_status minor_status (integers) context_handle, reply_token.
60 *
61 */
62
63static int netobj_equal(struct xdr_netobj *a, struct xdr_netobj *b)
64{
65 return a->len == b->len && 0 == memcmp(a->data, b->data, a->len);
66}
67
68#define RSI_HASHBITS 6
69#define RSI_HASHMAX (1<<RSI_HASHBITS)
70#define RSI_HASHMASK (RSI_HASHMAX-1)
71
72struct rsi {
73 struct cache_head h;
74 struct xdr_netobj in_handle, in_token;
75 struct xdr_netobj out_handle, out_token;
76 int major_status, minor_status;
77};
78
79static struct cache_head *rsi_table[RSI_HASHMAX];
80static struct cache_detail rsi_cache;
81static struct rsi *rsi_lookup(struct rsi *item, int set);
82
83static void rsi_free(struct rsi *rsii)
84{
85 kfree(rsii->in_handle.data);
86 kfree(rsii->in_token.data);
87 kfree(rsii->out_handle.data);
88 kfree(rsii->out_token.data);
89}
90
91static void rsi_put(struct cache_head *item, struct cache_detail *cd)
92{
93 struct rsi *rsii = container_of(item, struct rsi, h);
94 if (cache_put(item, cd)) {
95 rsi_free(rsii);
96 kfree(rsii);
97 }
98}
99
100static inline int rsi_hash(struct rsi *item)
101{
102 return hash_mem(item->in_handle.data, item->in_handle.len, RSI_HASHBITS)
103 ^ hash_mem(item->in_token.data, item->in_token.len, RSI_HASHBITS);
104}
105
106static inline int rsi_match(struct rsi *item, struct rsi *tmp)
107{
108 return netobj_equal(&item->in_handle, &tmp->in_handle)
109 && netobj_equal(&item->in_token, &tmp->in_token);
110}
111
112static int dup_to_netobj(struct xdr_netobj *dst, char *src, int len)
113{
114 dst->len = len;
115 dst->data = (len ? kmalloc(len, GFP_KERNEL) : NULL);
116 if (dst->data)
117 memcpy(dst->data, src, len);
118 if (len && !dst->data)
119 return -ENOMEM;
120 return 0;
121}
122
123static inline int dup_netobj(struct xdr_netobj *dst, struct xdr_netobj *src)
124{
125 return dup_to_netobj(dst, src->data, src->len);
126}
127
128static inline void rsi_init(struct rsi *new, struct rsi *item)
129{
130 new->out_handle.data = NULL;
131 new->out_handle.len = 0;
132 new->out_token.data = NULL;
133 new->out_token.len = 0;
134 new->in_handle.len = item->in_handle.len;
135 item->in_handle.len = 0;
136 new->in_token.len = item->in_token.len;
137 item->in_token.len = 0;
138 new->in_handle.data = item->in_handle.data;
139 item->in_handle.data = NULL;
140 new->in_token.data = item->in_token.data;
141 item->in_token.data = NULL;
142}
143
144static inline void rsi_update(struct rsi *new, struct rsi *item)
145{
146 BUG_ON(new->out_handle.data || new->out_token.data);
147 new->out_handle.len = item->out_handle.len;
148 item->out_handle.len = 0;
149 new->out_token.len = item->out_token.len;
150 item->out_token.len = 0;
151 new->out_handle.data = item->out_handle.data;
152 item->out_handle.data = NULL;
153 new->out_token.data = item->out_token.data;
154 item->out_token.data = NULL;
155
156 new->major_status = item->major_status;
157 new->minor_status = item->minor_status;
158}
159
160static void rsi_request(struct cache_detail *cd,
161 struct cache_head *h,
162 char **bpp, int *blen)
163{
164 struct rsi *rsii = container_of(h, struct rsi, h);
165
166 qword_addhex(bpp, blen, rsii->in_handle.data, rsii->in_handle.len);
167 qword_addhex(bpp, blen, rsii->in_token.data, rsii->in_token.len);
168 (*bpp)[-1] = '\n';
169}
170
171
172static int rsi_parse(struct cache_detail *cd,
173 char *mesg, int mlen)
174{
175 /* context token expiry major minor context token */
176 char *buf = mesg;
177 char *ep;
178 int len;
179 struct rsi rsii, *rsip = NULL;
180 time_t expiry;
181 int status = -EINVAL;
182
183 memset(&rsii, 0, sizeof(rsii));
184 /* handle */
185 len = qword_get(&mesg, buf, mlen);
186 if (len < 0)
187 goto out;
188 status = -ENOMEM;
189 if (dup_to_netobj(&rsii.in_handle, buf, len))
190 goto out;
191
192 /* token */
193 len = qword_get(&mesg, buf, mlen);
194 status = -EINVAL;
195 if (len < 0)
196 goto out;
197 status = -ENOMEM;
198 if (dup_to_netobj(&rsii.in_token, buf, len))
199 goto out;
200
201 rsii.h.flags = 0;
202 /* expiry */
203 expiry = get_expiry(&mesg);
204 status = -EINVAL;
205 if (expiry == 0)
206 goto out;
207
208 /* major/minor */
209 len = qword_get(&mesg, buf, mlen);
210 if (len < 0)
211 goto out;
212 if (len == 0) {
213 goto out;
214 } else {
215 rsii.major_status = simple_strtoul(buf, &ep, 10);
216 if (*ep)
217 goto out;
218 len = qword_get(&mesg, buf, mlen);
219 if (len <= 0)
220 goto out;
221 rsii.minor_status = simple_strtoul(buf, &ep, 10);
222 if (*ep)
223 goto out;
224
225 /* out_handle */
226 len = qword_get(&mesg, buf, mlen);
227 if (len < 0)
228 goto out;
229 status = -ENOMEM;
230 if (dup_to_netobj(&rsii.out_handle, buf, len))
231 goto out;
232
233 /* out_token */
234 len = qword_get(&mesg, buf, mlen);
235 status = -EINVAL;
236 if (len < 0)
237 goto out;
238 status = -ENOMEM;
239 if (dup_to_netobj(&rsii.out_token, buf, len))
240 goto out;
241 }
242 rsii.h.expiry_time = expiry;
243 rsip = rsi_lookup(&rsii, 1);
244 status = 0;
245out:
246 rsi_free(&rsii);
247 if (rsip)
248 rsi_put(&rsip->h, &rsi_cache);
249 return status;
250}
251
252static struct cache_detail rsi_cache = {
253 .hash_size = RSI_HASHMAX,
254 .hash_table = rsi_table,
255 .name = "auth.rpcsec.init",
256 .cache_put = rsi_put,
257 .cache_request = rsi_request,
258 .cache_parse = rsi_parse,
259};
260
261static DefineSimpleCacheLookup(rsi, 0)
262
263/*
264 * The rpcsec_context cache is used to store a context that is
265 * used in data exchange.
266 * The key is a context handle. The content is:
267 * uid, gidlist, mechanism, service-set, mech-specific-data
268 */
269
270#define RSC_HASHBITS 10
271#define RSC_HASHMAX (1<<RSC_HASHBITS)
272#define RSC_HASHMASK (RSC_HASHMAX-1)
273
274#define GSS_SEQ_WIN 128
275
276struct gss_svc_seq_data {
277 /* highest seq number seen so far: */
278 int sd_max;
279 /* for i such that sd_max-GSS_SEQ_WIN < i <= sd_max, the i-th bit of
280 * sd_win is nonzero iff sequence number i has been seen already: */
281 unsigned long sd_win[GSS_SEQ_WIN/BITS_PER_LONG];
282 spinlock_t sd_lock;
283};
284
285struct rsc {
286 struct cache_head h;
287 struct xdr_netobj handle;
288 struct svc_cred cred;
289 struct gss_svc_seq_data seqdata;
290 struct gss_ctx *mechctx;
291};
292
293static struct cache_head *rsc_table[RSC_HASHMAX];
294static struct cache_detail rsc_cache;
295static struct rsc *rsc_lookup(struct rsc *item, int set);
296
297static void rsc_free(struct rsc *rsci)
298{
299 kfree(rsci->handle.data);
300 if (rsci->mechctx)
301 gss_delete_sec_context(&rsci->mechctx);
302 if (rsci->cred.cr_group_info)
303 put_group_info(rsci->cred.cr_group_info);
304}
305
306static void rsc_put(struct cache_head *item, struct cache_detail *cd)
307{
308 struct rsc *rsci = container_of(item, struct rsc, h);
309
310 if (cache_put(item, cd)) {
311 rsc_free(rsci);
312 kfree(rsci);
313 }
314}
315
316static inline int
317rsc_hash(struct rsc *rsci)
318{
319 return hash_mem(rsci->handle.data, rsci->handle.len, RSC_HASHBITS);
320}
321
322static inline int
323rsc_match(struct rsc *new, struct rsc *tmp)
324{
325 return netobj_equal(&new->handle, &tmp->handle);
326}
327
328static inline void
329rsc_init(struct rsc *new, struct rsc *tmp)
330{
331 new->handle.len = tmp->handle.len;
332 tmp->handle.len = 0;
333 new->handle.data = tmp->handle.data;
334 tmp->handle.data = NULL;
335 new->mechctx = NULL;
336 new->cred.cr_group_info = NULL;
337}
338
339static inline void
340rsc_update(struct rsc *new, struct rsc *tmp)
341{
342 new->mechctx = tmp->mechctx;
343 tmp->mechctx = NULL;
344 memset(&new->seqdata, 0, sizeof(new->seqdata));
345 spin_lock_init(&new->seqdata.sd_lock);
346 new->cred = tmp->cred;
347 tmp->cred.cr_group_info = NULL;
348}
349
350static int rsc_parse(struct cache_detail *cd,
351 char *mesg, int mlen)
352{
353 /* contexthandle expiry [ uid gid N <n gids> mechname ...mechdata... ] */
354 char *buf = mesg;
355 int len, rv;
356 struct rsc rsci, *rscp = NULL;
357 time_t expiry;
358 int status = -EINVAL;
359
360 memset(&rsci, 0, sizeof(rsci));
361 /* context handle */
362 len = qword_get(&mesg, buf, mlen);
363 if (len < 0) goto out;
364 status = -ENOMEM;
365 if (dup_to_netobj(&rsci.handle, buf, len))
366 goto out;
367
368 rsci.h.flags = 0;
369 /* expiry */
370 expiry = get_expiry(&mesg);
371 status = -EINVAL;
372 if (expiry == 0)
373 goto out;
374
375 /* uid, or NEGATIVE */
376 rv = get_int(&mesg, &rsci.cred.cr_uid);
377 if (rv == -EINVAL)
378 goto out;
379 if (rv == -ENOENT)
380 set_bit(CACHE_NEGATIVE, &rsci.h.flags);
381 else {
382 int N, i;
383 struct gss_api_mech *gm;
384
385 /* gid */
386 if (get_int(&mesg, &rsci.cred.cr_gid))
387 goto out;
388
389 /* number of additional gid's */
390 if (get_int(&mesg, &N))
391 goto out;
392 status = -ENOMEM;
393 rsci.cred.cr_group_info = groups_alloc(N);
394 if (rsci.cred.cr_group_info == NULL)
395 goto out;
396
397 /* gid's */
398 status = -EINVAL;
399 for (i=0; i<N; i++) {
400 gid_t gid;
401 if (get_int(&mesg, &gid))
402 goto out;
403 GROUP_AT(rsci.cred.cr_group_info, i) = gid;
404 }
405
406 /* mech name */
407 len = qword_get(&mesg, buf, mlen);
408 if (len < 0)
409 goto out;
410 gm = gss_mech_get_by_name(buf);
411 status = -EOPNOTSUPP;
412 if (!gm)
413 goto out;
414
415 status = -EINVAL;
416 /* mech-specific data: */
417 len = qword_get(&mesg, buf, mlen);
418 if (len < 0) {
419 gss_mech_put(gm);
420 goto out;
421 }
422 if (gss_import_sec_context(buf, len, gm, &rsci.mechctx)) {
423 gss_mech_put(gm);
424 goto out;
425 }
426 gss_mech_put(gm);
427 }
428 rsci.h.expiry_time = expiry;
429 rscp = rsc_lookup(&rsci, 1);
430 status = 0;
431out:
432 rsc_free(&rsci);
433 if (rscp)
434 rsc_put(&rscp->h, &rsc_cache);
435 return status;
436}
437
438static struct cache_detail rsc_cache = {
439 .hash_size = RSC_HASHMAX,
440 .hash_table = rsc_table,
441 .name = "auth.rpcsec.context",
442 .cache_put = rsc_put,
443 .cache_parse = rsc_parse,
444};
445
446static DefineSimpleCacheLookup(rsc, 0);
447
448static struct rsc *
449gss_svc_searchbyctx(struct xdr_netobj *handle)
450{
451 struct rsc rsci;
452 struct rsc *found;
453
454 memset(&rsci, 0, sizeof(rsci));
455 if (dup_to_netobj(&rsci.handle, handle->data, handle->len))
456 return NULL;
457 found = rsc_lookup(&rsci, 0);
458 rsc_free(&rsci);
459 if (!found)
460 return NULL;
461 if (cache_check(&rsc_cache, &found->h, NULL))
462 return NULL;
463 return found;
464}
465
466/* Implements sequence number algorithm as specified in RFC 2203. */
467static int
468gss_check_seq_num(struct rsc *rsci, int seq_num)
469{
470 struct gss_svc_seq_data *sd = &rsci->seqdata;
471
472 spin_lock(&sd->sd_lock);
473 if (seq_num > sd->sd_max) {
474 if (seq_num >= sd->sd_max + GSS_SEQ_WIN) {
475 memset(sd->sd_win,0,sizeof(sd->sd_win));
476 sd->sd_max = seq_num;
477 } else while (sd->sd_max < seq_num) {
478 sd->sd_max++;
479 __clear_bit(sd->sd_max % GSS_SEQ_WIN, sd->sd_win);
480 }
481 __set_bit(seq_num % GSS_SEQ_WIN, sd->sd_win);
482 goto ok;
483 } else if (seq_num <= sd->sd_max - GSS_SEQ_WIN) {
484 goto drop;
485 }
486 /* sd_max - GSS_SEQ_WIN < seq_num <= sd_max */
487 if (__test_and_set_bit(seq_num % GSS_SEQ_WIN, sd->sd_win))
488 goto drop;
489ok:
490 spin_unlock(&sd->sd_lock);
491 return 1;
492drop:
493 spin_unlock(&sd->sd_lock);
494 return 0;
495}
496
497static inline u32 round_up_to_quad(u32 i)
498{
499 return (i + 3 ) & ~3;
500}
501
502static inline int
503svc_safe_getnetobj(struct kvec *argv, struct xdr_netobj *o)
504{
505 int l;
506
507 if (argv->iov_len < 4)
508 return -1;
509 o->len = ntohl(svc_getu32(argv));
510 l = round_up_to_quad(o->len);
511 if (argv->iov_len < l)
512 return -1;
513 o->data = argv->iov_base;
514 argv->iov_base += l;
515 argv->iov_len -= l;
516 return 0;
517}
518
519static inline int
520svc_safe_putnetobj(struct kvec *resv, struct xdr_netobj *o)
521{
522 u32 *p;
523
524 if (resv->iov_len + 4 > PAGE_SIZE)
525 return -1;
526 svc_putu32(resv, htonl(o->len));
527 p = resv->iov_base + resv->iov_len;
528 resv->iov_len += round_up_to_quad(o->len);
529 if (resv->iov_len > PAGE_SIZE)
530 return -1;
531 memcpy(p, o->data, o->len);
532 memset((u8 *)p + o->len, 0, round_up_to_quad(o->len) - o->len);
533 return 0;
534}
535
536/* Verify the checksum on the header and return SVC_OK on success.
537 * Otherwise, return SVC_DROP (in the case of a bad sequence number)
538 * or return SVC_DENIED and indicate error in authp.
539 */
540static int
541gss_verify_header(struct svc_rqst *rqstp, struct rsc *rsci,
542 u32 *rpcstart, struct rpc_gss_wire_cred *gc, u32 *authp)
543{
544 struct gss_ctx *ctx_id = rsci->mechctx;
545 struct xdr_buf rpchdr;
546 struct xdr_netobj checksum;
547 u32 flavor = 0;
548 struct kvec *argv = &rqstp->rq_arg.head[0];
549 struct kvec iov;
550
551 /* data to compute the checksum over: */
552 iov.iov_base = rpcstart;
553 iov.iov_len = (u8 *)argv->iov_base - (u8 *)rpcstart;
554 xdr_buf_from_iov(&iov, &rpchdr);
555
556 *authp = rpc_autherr_badverf;
557 if (argv->iov_len < 4)
558 return SVC_DENIED;
559 flavor = ntohl(svc_getu32(argv));
560 if (flavor != RPC_AUTH_GSS)
561 return SVC_DENIED;
562 if (svc_safe_getnetobj(argv, &checksum))
563 return SVC_DENIED;
564
565 if (rqstp->rq_deferred) /* skip verification of revisited request */
566 return SVC_OK;
567 if (gss_verify_mic(ctx_id, &rpchdr, &checksum, NULL)
568 != GSS_S_COMPLETE) {
569 *authp = rpcsec_gsserr_credproblem;
570 return SVC_DENIED;
571 }
572
573 if (gc->gc_seq > MAXSEQ) {
574 dprintk("RPC: svcauth_gss: discarding request with large sequence number %d\n",
575 gc->gc_seq);
576 *authp = rpcsec_gsserr_ctxproblem;
577 return SVC_DENIED;
578 }
579 if (!gss_check_seq_num(rsci, gc->gc_seq)) {
580 dprintk("RPC: svcauth_gss: discarding request with old sequence number %d\n",
581 gc->gc_seq);
582 return SVC_DROP;
583 }
584 return SVC_OK;
585}
586
587static int
588gss_write_verf(struct svc_rqst *rqstp, struct gss_ctx *ctx_id, u32 seq)
589{
590 u32 xdr_seq;
591 u32 maj_stat;
592 struct xdr_buf verf_data;
593 struct xdr_netobj mic;
594 u32 *p;
595 struct kvec iov;
596
597 svc_putu32(rqstp->rq_res.head, htonl(RPC_AUTH_GSS));
598 xdr_seq = htonl(seq);
599
600 iov.iov_base = &xdr_seq;
601 iov.iov_len = sizeof(xdr_seq);
602 xdr_buf_from_iov(&iov, &verf_data);
603 p = rqstp->rq_res.head->iov_base + rqstp->rq_res.head->iov_len;
604 mic.data = (u8 *)(p + 1);
605 maj_stat = gss_get_mic(ctx_id, 0, &verf_data, &mic);
606 if (maj_stat != GSS_S_COMPLETE)
607 return -1;
608 *p++ = htonl(mic.len);
609 memset((u8 *)p + mic.len, 0, round_up_to_quad(mic.len) - mic.len);
610 p += XDR_QUADLEN(mic.len);
611 if (!xdr_ressize_check(rqstp, p))
612 return -1;
613 return 0;
614}
615
616struct gss_domain {
617 struct auth_domain h;
618 u32 pseudoflavor;
619};
620
621static struct auth_domain *
622find_gss_auth_domain(struct gss_ctx *ctx, u32 svc)
623{
624 char *name;
625
626 name = gss_service_to_auth_domain_name(ctx->mech_type, svc);
627 if (!name)
628 return NULL;
629 return auth_domain_find(name);
630}
631
632int
633svcauth_gss_register_pseudoflavor(u32 pseudoflavor, char * name)
634{
635 struct gss_domain *new;
636 struct auth_domain *test;
637 int stat = -ENOMEM;
638
639 new = kmalloc(sizeof(*new), GFP_KERNEL);
640 if (!new)
641 goto out;
642 cache_init(&new->h.h);
643 new->h.name = kmalloc(strlen(name) + 1, GFP_KERNEL);
644 if (!new->h.name)
645 goto out_free_dom;
646 strcpy(new->h.name, name);
647 new->h.flavour = RPC_AUTH_GSS;
648 new->pseudoflavor = pseudoflavor;
649 new->h.h.expiry_time = NEVER;
650
651 test = auth_domain_lookup(&new->h, 1);
652 if (test == &new->h) {
653 BUG_ON(atomic_dec_and_test(&new->h.h.refcnt));
654 } else { /* XXX Duplicate registration? */
655 auth_domain_put(&new->h);
656 goto out;
657 }
658 return 0;
659
660out_free_dom:
661 kfree(new);
662out:
663 return stat;
664}
665
666EXPORT_SYMBOL(svcauth_gss_register_pseudoflavor);
667
668static inline int
669read_u32_from_xdr_buf(struct xdr_buf *buf, int base, u32 *obj)
670{
671 u32 raw;
672 int status;
673
674 status = read_bytes_from_xdr_buf(buf, base, &raw, sizeof(*obj));
675 if (status)
676 return status;
677 *obj = ntohl(raw);
678 return 0;
679}
680
681/* It would be nice if this bit of code could be shared with the client.
682 * Obstacles:
683 * The client shouldn't malloc(), would have to pass in own memory.
684 * The server uses base of head iovec as read pointer, while the
685 * client uses separate pointer. */
686static int
687unwrap_integ_data(struct xdr_buf *buf, u32 seq, struct gss_ctx *ctx)
688{
689 int stat = -EINVAL;
690 u32 integ_len, maj_stat;
691 struct xdr_netobj mic;
692 struct xdr_buf integ_buf;
693
694 integ_len = ntohl(svc_getu32(&buf->head[0]));
695 if (integ_len & 3)
696 goto out;
697 if (integ_len > buf->len)
698 goto out;
699 if (xdr_buf_subsegment(buf, &integ_buf, 0, integ_len))
700 BUG();
701 /* copy out mic... */
702 if (read_u32_from_xdr_buf(buf, integ_len, &mic.len))
703 BUG();
704 if (mic.len > RPC_MAX_AUTH_SIZE)
705 goto out;
706 mic.data = kmalloc(mic.len, GFP_KERNEL);
707 if (!mic.data)
708 goto out;
709 if (read_bytes_from_xdr_buf(buf, integ_len + 4, mic.data, mic.len))
710 goto out;
711 maj_stat = gss_verify_mic(ctx, &integ_buf, &mic, NULL);
712 if (maj_stat != GSS_S_COMPLETE)
713 goto out;
714 if (ntohl(svc_getu32(&buf->head[0])) != seq)
715 goto out;
716 stat = 0;
717out:
718 return stat;
719}
720
721struct gss_svc_data {
722 /* decoded gss client cred: */
723 struct rpc_gss_wire_cred clcred;
724 /* pointer to the beginning of the procedure-specific results,
725 * which may be encrypted/checksummed in svcauth_gss_release: */
726 u32 *body_start;
727 struct rsc *rsci;
728};
729
730static int
731svcauth_gss_set_client(struct svc_rqst *rqstp)
732{
733 struct gss_svc_data *svcdata = rqstp->rq_auth_data;
734 struct rsc *rsci = svcdata->rsci;
735 struct rpc_gss_wire_cred *gc = &svcdata->clcred;
736
737 rqstp->rq_client = find_gss_auth_domain(rsci->mechctx, gc->gc_svc);
738 if (rqstp->rq_client == NULL)
739 return SVC_DENIED;
740 return SVC_OK;
741}
742
743/*
744 * Accept an rpcsec packet.
745 * If context establishment, punt to user space
746 * If data exchange, verify/decrypt
747 * If context destruction, handle here
748 * In the context establishment and destruction case we encode
749 * response here and return SVC_COMPLETE.
750 */
751static int
752svcauth_gss_accept(struct svc_rqst *rqstp, u32 *authp)
753{
754 struct kvec *argv = &rqstp->rq_arg.head[0];
755 struct kvec *resv = &rqstp->rq_res.head[0];
756 u32 crlen;
757 struct xdr_netobj tmpobj;
758 struct gss_svc_data *svcdata = rqstp->rq_auth_data;
759 struct rpc_gss_wire_cred *gc;
760 struct rsc *rsci = NULL;
761 struct rsi *rsip, rsikey;
762 u32 *rpcstart;
763 u32 *reject_stat = resv->iov_base + resv->iov_len;
764 int ret;
765
766 dprintk("RPC: svcauth_gss: argv->iov_len = %zd\n",argv->iov_len);
767
768 *authp = rpc_autherr_badcred;
769 if (!svcdata)
770 svcdata = kmalloc(sizeof(*svcdata), GFP_KERNEL);
771 if (!svcdata)
772 goto auth_err;
773 rqstp->rq_auth_data = svcdata;
774 svcdata->body_start = NULL;
775 svcdata->rsci = NULL;
776 gc = &svcdata->clcred;
777
778 /* start of rpc packet is 7 u32's back from here:
779 * xid direction rpcversion prog vers proc flavour
780 */
781 rpcstart = argv->iov_base;
782 rpcstart -= 7;
783
784 /* credential is:
785 * version(==1), proc(0,1,2,3), seq, service (1,2,3), handle
786 * at least 5 u32s, and is preceeded by length, so that makes 6.
787 */
788
789 if (argv->iov_len < 5 * 4)
790 goto auth_err;
791 crlen = ntohl(svc_getu32(argv));
792 if (ntohl(svc_getu32(argv)) != RPC_GSS_VERSION)
793 goto auth_err;
794 gc->gc_proc = ntohl(svc_getu32(argv));
795 gc->gc_seq = ntohl(svc_getu32(argv));
796 gc->gc_svc = ntohl(svc_getu32(argv));
797 if (svc_safe_getnetobj(argv, &gc->gc_ctx))
798 goto auth_err;
799 if (crlen != round_up_to_quad(gc->gc_ctx.len) + 5 * 4)
800 goto auth_err;
801
802 if ((gc->gc_proc != RPC_GSS_PROC_DATA) && (rqstp->rq_proc != 0))
803 goto auth_err;
804
805 /*
806 * We've successfully parsed the credential. Let's check out the
807 * verifier. An AUTH_NULL verifier is allowed (and required) for
808 * INIT and CONTINUE_INIT requests. AUTH_RPCSEC_GSS is required for
809 * PROC_DATA and PROC_DESTROY.
810 *
811 * AUTH_NULL verifier is 0 (AUTH_NULL), 0 (length).
812 * AUTH_RPCSEC_GSS verifier is:
813 * 6 (AUTH_RPCSEC_GSS), length, checksum.
814 * checksum is calculated over rpcheader from xid up to here.
815 */
816 *authp = rpc_autherr_badverf;
817 switch (gc->gc_proc) {
818 case RPC_GSS_PROC_INIT:
819 case RPC_GSS_PROC_CONTINUE_INIT:
820 if (argv->iov_len < 2 * 4)
821 goto auth_err;
822 if (ntohl(svc_getu32(argv)) != RPC_AUTH_NULL)
823 goto auth_err;
824 if (ntohl(svc_getu32(argv)) != 0)
825 goto auth_err;
826 break;
827 case RPC_GSS_PROC_DATA:
828 case RPC_GSS_PROC_DESTROY:
829 *authp = rpcsec_gsserr_credproblem;
830 rsci = gss_svc_searchbyctx(&gc->gc_ctx);
831 if (!rsci)
832 goto auth_err;
833 switch (gss_verify_header(rqstp, rsci, rpcstart, gc, authp)) {
834 case SVC_OK:
835 break;
836 case SVC_DENIED:
837 goto auth_err;
838 case SVC_DROP:
839 goto drop;
840 }
841 break;
842 default:
843 *authp = rpc_autherr_rejectedcred;
844 goto auth_err;
845 }
846
847 /* now act upon the command: */
848 switch (gc->gc_proc) {
849 case RPC_GSS_PROC_INIT:
850 case RPC_GSS_PROC_CONTINUE_INIT:
851 *authp = rpc_autherr_badcred;
852 if (gc->gc_proc == RPC_GSS_PROC_INIT && gc->gc_ctx.len != 0)
853 goto auth_err;
854 memset(&rsikey, 0, sizeof(rsikey));
855 if (dup_netobj(&rsikey.in_handle, &gc->gc_ctx))
856 goto drop;
857 *authp = rpc_autherr_badverf;
858 if (svc_safe_getnetobj(argv, &tmpobj)) {
859 kfree(rsikey.in_handle.data);
860 goto auth_err;
861 }
862 if (dup_netobj(&rsikey.in_token, &tmpobj)) {
863 kfree(rsikey.in_handle.data);
864 goto drop;
865 }
866
867 rsip = rsi_lookup(&rsikey, 0);
868 rsi_free(&rsikey);
869 if (!rsip) {
870 goto drop;
871 }
872 switch(cache_check(&rsi_cache, &rsip->h, &rqstp->rq_chandle)) {
873 case -EAGAIN:
874 goto drop;
875 case -ENOENT:
876 goto drop;
877 case 0:
878 rsci = gss_svc_searchbyctx(&rsip->out_handle);
879 if (!rsci) {
880 goto drop;
881 }
882 if (gss_write_verf(rqstp, rsci->mechctx, GSS_SEQ_WIN))
883 goto drop;
884 if (resv->iov_len + 4 > PAGE_SIZE)
885 goto drop;
886 svc_putu32(resv, rpc_success);
887 if (svc_safe_putnetobj(resv, &rsip->out_handle))
888 goto drop;
889 if (resv->iov_len + 3 * 4 > PAGE_SIZE)
890 goto drop;
891 svc_putu32(resv, htonl(rsip->major_status));
892 svc_putu32(resv, htonl(rsip->minor_status));
893 svc_putu32(resv, htonl(GSS_SEQ_WIN));
894 if (svc_safe_putnetobj(resv, &rsip->out_token))
895 goto drop;
896 rqstp->rq_client = NULL;
897 }
898 goto complete;
899 case RPC_GSS_PROC_DESTROY:
900 set_bit(CACHE_NEGATIVE, &rsci->h.flags);
901 if (resv->iov_len + 4 > PAGE_SIZE)
902 goto drop;
903 svc_putu32(resv, rpc_success);
904 goto complete;
905 case RPC_GSS_PROC_DATA:
906 *authp = rpcsec_gsserr_ctxproblem;
907 if (gss_write_verf(rqstp, rsci->mechctx, gc->gc_seq))
908 goto auth_err;
909 rqstp->rq_cred = rsci->cred;
910 get_group_info(rsci->cred.cr_group_info);
911 *authp = rpc_autherr_badcred;
912 switch (gc->gc_svc) {
913 case RPC_GSS_SVC_NONE:
914 break;
915 case RPC_GSS_SVC_INTEGRITY:
916 if (unwrap_integ_data(&rqstp->rq_arg,
917 gc->gc_seq, rsci->mechctx))
918 goto auth_err;
919 /* placeholders for length and seq. number: */
920 svcdata->body_start = resv->iov_base + resv->iov_len;
921 svc_putu32(resv, 0);
922 svc_putu32(resv, 0);
923 break;
924 case RPC_GSS_SVC_PRIVACY:
925 /* currently unsupported */
926 default:
927 goto auth_err;
928 }
929 svcdata->rsci = rsci;
930 cache_get(&rsci->h);
931 ret = SVC_OK;
932 goto out;
933 }
934auth_err:
935 /* Restore write pointer to original value: */
936 xdr_ressize_check(rqstp, reject_stat);
937 ret = SVC_DENIED;
938 goto out;
939complete:
940 ret = SVC_COMPLETE;
941 goto out;
942drop:
943 ret = SVC_DROP;
944out:
945 if (rsci)
946 rsc_put(&rsci->h, &rsc_cache);
947 return ret;
948}
949
950static int
951svcauth_gss_release(struct svc_rqst *rqstp)
952{
953 struct gss_svc_data *gsd = (struct gss_svc_data *)rqstp->rq_auth_data;
954 struct rpc_gss_wire_cred *gc = &gsd->clcred;
955 struct xdr_buf *resbuf = &rqstp->rq_res;
956 struct xdr_buf integ_buf;
957 struct xdr_netobj mic;
958 struct kvec *resv;
959 u32 *p;
960 int integ_offset, integ_len;
961 int stat = -EINVAL;
962
963 if (gc->gc_proc != RPC_GSS_PROC_DATA)
964 goto out;
965 /* Release can be called twice, but we only wrap once. */
966 if (gsd->body_start == NULL)
967 goto out;
968 /* normally not set till svc_send, but we need it here: */
969 resbuf->len = resbuf->head[0].iov_len
970 + resbuf->page_len + resbuf->tail[0].iov_len;
971 switch (gc->gc_svc) {
972 case RPC_GSS_SVC_NONE:
973 break;
974 case RPC_GSS_SVC_INTEGRITY:
975 p = gsd->body_start;
976 gsd->body_start = NULL;
977 /* move accept_stat to right place: */
978 memcpy(p, p + 2, 4);
979 /* don't wrap in failure case: */
980 /* Note: counting on not getting here if call was not even
981 * accepted! */
982 if (*p != rpc_success) {
983 resbuf->head[0].iov_len -= 2 * 4;
984 goto out;
985 }
986 p++;
987 integ_offset = (u8 *)(p + 1) - (u8 *)resbuf->head[0].iov_base;
988 integ_len = resbuf->len - integ_offset;
989 BUG_ON(integ_len % 4);
990 *p++ = htonl(integ_len);
991 *p++ = htonl(gc->gc_seq);
992 if (xdr_buf_subsegment(resbuf, &integ_buf, integ_offset,
993 integ_len))
994 BUG();
995 if (resbuf->page_len == 0
996 && resbuf->tail[0].iov_len + RPC_MAX_AUTH_SIZE
997 < PAGE_SIZE) {
998 BUG_ON(resbuf->tail[0].iov_len);
999 /* Use head for everything */
1000 resv = &resbuf->head[0];
1001 } else if (resbuf->tail[0].iov_base == NULL) {
1002 /* copied from nfsd4_encode_read */
1003 svc_take_page(rqstp);
1004 resbuf->tail[0].iov_base = page_address(rqstp
1005 ->rq_respages[rqstp->rq_resused-1]);
1006 rqstp->rq_restailpage = rqstp->rq_resused-1;
1007 resbuf->tail[0].iov_len = 0;
1008 resv = &resbuf->tail[0];
1009 } else {
1010 resv = &resbuf->tail[0];
1011 }
1012 mic.data = (u8 *)resv->iov_base + resv->iov_len + 4;
1013 if (gss_get_mic(gsd->rsci->mechctx, 0, &integ_buf, &mic))
1014 goto out_err;
1015 svc_putu32(resv, htonl(mic.len));
1016 memset(mic.data + mic.len, 0,
1017 round_up_to_quad(mic.len) - mic.len);
1018 resv->iov_len += XDR_QUADLEN(mic.len) << 2;
1019 /* not strictly required: */
1020 resbuf->len += XDR_QUADLEN(mic.len) << 2;
1021 BUG_ON(resv->iov_len > PAGE_SIZE);
1022 break;
1023 case RPC_GSS_SVC_PRIVACY:
1024 default:
1025 goto out_err;
1026 }
1027
1028out:
1029 stat = 0;
1030out_err:
1031 if (rqstp->rq_client)
1032 auth_domain_put(rqstp->rq_client);
1033 rqstp->rq_client = NULL;
1034 if (rqstp->rq_cred.cr_group_info)
1035 put_group_info(rqstp->rq_cred.cr_group_info);
1036 rqstp->rq_cred.cr_group_info = NULL;
1037 if (gsd->rsci)
1038 rsc_put(&gsd->rsci->h, &rsc_cache);
1039 gsd->rsci = NULL;
1040
1041 return stat;
1042}
1043
1044static void
1045svcauth_gss_domain_release(struct auth_domain *dom)
1046{
1047 struct gss_domain *gd = container_of(dom, struct gss_domain, h);
1048
1049 kfree(dom->name);
1050 kfree(gd);
1051}
1052
1053static struct auth_ops svcauthops_gss = {
1054 .name = "rpcsec_gss",
1055 .owner = THIS_MODULE,
1056 .flavour = RPC_AUTH_GSS,
1057 .accept = svcauth_gss_accept,
1058 .release = svcauth_gss_release,
1059 .domain_release = svcauth_gss_domain_release,
1060 .set_client = svcauth_gss_set_client,
1061};
1062
1063int
1064gss_svc_init(void)
1065{
1066 int rv = svc_auth_register(RPC_AUTH_GSS, &svcauthops_gss);
1067 if (rv == 0) {
1068 cache_register(&rsc_cache);
1069 cache_register(&rsi_cache);
1070 }
1071 return rv;
1072}
1073
1074void
1075gss_svc_shutdown(void)
1076{
1077 cache_unregister(&rsc_cache);
1078 cache_unregister(&rsi_cache);
1079 svc_auth_unregister(RPC_AUTH_GSS);
1080}
diff --git a/net/sunrpc/auth_null.c b/net/sunrpc/auth_null.c
new file mode 100644
index 000000000000..9b72d3abf823
--- /dev/null
+++ b/net/sunrpc/auth_null.c
@@ -0,0 +1,143 @@
1/*
2 * linux/net/sunrpc/auth_null.c
3 *
4 * AUTH_NULL authentication. Really :-)
5 *
6 * Copyright (C) 1996, Olaf Kirch <okir@monad.swb.de>
7 */
8
9#include <linux/types.h>
10#include <linux/socket.h>
11#include <linux/module.h>
12#include <linux/in.h>
13#include <linux/utsname.h>
14#include <linux/sunrpc/clnt.h>
15#include <linux/sched.h>
16
17#ifdef RPC_DEBUG
18# define RPCDBG_FACILITY RPCDBG_AUTH
19#endif
20
21static struct rpc_auth null_auth;
22static struct rpc_cred null_cred;
23
24static struct rpc_auth *
25nul_create(struct rpc_clnt *clnt, rpc_authflavor_t flavor)
26{
27 atomic_inc(&null_auth.au_count);
28 return &null_auth;
29}
30
31static void
32nul_destroy(struct rpc_auth *auth)
33{
34}
35
36/*
37 * Lookup NULL creds for current process
38 */
39static struct rpc_cred *
40nul_lookup_cred(struct rpc_auth *auth, struct auth_cred *acred, int flags)
41{
42 return get_rpccred(&null_cred);
43}
44
45/*
46 * Destroy cred handle.
47 */
48static void
49nul_destroy_cred(struct rpc_cred *cred)
50{
51}
52
53/*
54 * Match cred handle against current process
55 */
56static int
57nul_match(struct auth_cred *acred, struct rpc_cred *cred, int taskflags)
58{
59 return 1;
60}
61
62/*
63 * Marshal credential.
64 */
65static u32 *
66nul_marshal(struct rpc_task *task, u32 *p)
67{
68 *p++ = htonl(RPC_AUTH_NULL);
69 *p++ = 0;
70 *p++ = htonl(RPC_AUTH_NULL);
71 *p++ = 0;
72
73 return p;
74}
75
76/*
77 * Refresh credential. This is a no-op for AUTH_NULL
78 */
79static int
80nul_refresh(struct rpc_task *task)
81{
82 task->tk_msg.rpc_cred->cr_flags |= RPCAUTH_CRED_UPTODATE;
83 return 0;
84}
85
86static u32 *
87nul_validate(struct rpc_task *task, u32 *p)
88{
89 rpc_authflavor_t flavor;
90 u32 size;
91
92 flavor = ntohl(*p++);
93 if (flavor != RPC_AUTH_NULL) {
94 printk("RPC: bad verf flavor: %u\n", flavor);
95 return NULL;
96 }
97
98 size = ntohl(*p++);
99 if (size != 0) {
100 printk("RPC: bad verf size: %u\n", size);
101 return NULL;
102 }
103
104 return p;
105}
106
107struct rpc_authops authnull_ops = {
108 .owner = THIS_MODULE,
109 .au_flavor = RPC_AUTH_NULL,
110#ifdef RPC_DEBUG
111 .au_name = "NULL",
112#endif
113 .create = nul_create,
114 .destroy = nul_destroy,
115 .lookup_cred = nul_lookup_cred,
116};
117
118static
119struct rpc_auth null_auth = {
120 .au_cslack = 4,
121 .au_rslack = 2,
122 .au_ops = &authnull_ops,
123};
124
125static
126struct rpc_credops null_credops = {
127 .cr_name = "AUTH_NULL",
128 .crdestroy = nul_destroy_cred,
129 .crmatch = nul_match,
130 .crmarshal = nul_marshal,
131 .crrefresh = nul_refresh,
132 .crvalidate = nul_validate,
133};
134
135static
136struct rpc_cred null_cred = {
137 .cr_ops = &null_credops,
138 .cr_count = ATOMIC_INIT(1),
139 .cr_flags = RPCAUTH_CRED_UPTODATE,
140#ifdef RPC_DEBUG
141 .cr_magic = RPCAUTH_CRED_MAGIC,
142#endif
143};
diff --git a/net/sunrpc/auth_unix.c b/net/sunrpc/auth_unix.c
new file mode 100644
index 000000000000..4ff297a9b15b
--- /dev/null
+++ b/net/sunrpc/auth_unix.c
@@ -0,0 +1,242 @@
1/*
2 * linux/net/sunrpc/auth_unix.c
3 *
4 * UNIX-style authentication; no AUTH_SHORT support
5 *
6 * Copyright (C) 1996, Olaf Kirch <okir@monad.swb.de>
7 */
8
9#include <linux/types.h>
10#include <linux/sched.h>
11#include <linux/module.h>
12#include <linux/socket.h>
13#include <linux/in.h>
14#include <linux/sunrpc/clnt.h>
15#include <linux/sunrpc/auth.h>
16
17#define NFS_NGROUPS 16
18
19struct unx_cred {
20 struct rpc_cred uc_base;
21 gid_t uc_gid;
22 gid_t uc_gids[NFS_NGROUPS];
23};
24#define uc_uid uc_base.cr_uid
25#define uc_count uc_base.cr_count
26#define uc_flags uc_base.cr_flags
27#define uc_expire uc_base.cr_expire
28
29#define UNX_CRED_EXPIRE (60 * HZ)
30
31#define UNX_WRITESLACK (21 + (UNX_MAXNODENAME >> 2))
32
33#ifdef RPC_DEBUG
34# define RPCDBG_FACILITY RPCDBG_AUTH
35#endif
36
37static struct rpc_auth unix_auth;
38static struct rpc_cred_cache unix_cred_cache;
39static struct rpc_credops unix_credops;
40
41static struct rpc_auth *
42unx_create(struct rpc_clnt *clnt, rpc_authflavor_t flavor)
43{
44 dprintk("RPC: creating UNIX authenticator for client %p\n", clnt);
45 if (atomic_inc_return(&unix_auth.au_count) == 0)
46 unix_cred_cache.nextgc = jiffies + (unix_cred_cache.expire >> 1);
47 return &unix_auth;
48}
49
50static void
51unx_destroy(struct rpc_auth *auth)
52{
53 dprintk("RPC: destroying UNIX authenticator %p\n", auth);
54 rpcauth_free_credcache(auth);
55}
56
57/*
58 * Lookup AUTH_UNIX creds for current process
59 */
60static struct rpc_cred *
61unx_lookup_cred(struct rpc_auth *auth, struct auth_cred *acred, int flags)
62{
63 return rpcauth_lookup_credcache(auth, acred, flags);
64}
65
66static struct rpc_cred *
67unx_create_cred(struct rpc_auth *auth, struct auth_cred *acred, int flags)
68{
69 struct unx_cred *cred;
70 int i;
71
72 dprintk("RPC: allocating UNIX cred for uid %d gid %d\n",
73 acred->uid, acred->gid);
74
75 if (!(cred = (struct unx_cred *) kmalloc(sizeof(*cred), GFP_KERNEL)))
76 return ERR_PTR(-ENOMEM);
77
78 atomic_set(&cred->uc_count, 1);
79 cred->uc_flags = RPCAUTH_CRED_UPTODATE;
80 if (flags & RPC_TASK_ROOTCREDS) {
81 cred->uc_uid = 0;
82 cred->uc_gid = 0;
83 cred->uc_gids[0] = NOGROUP;
84 } else {
85 int groups = acred->group_info->ngroups;
86 if (groups > NFS_NGROUPS)
87 groups = NFS_NGROUPS;
88
89 cred->uc_uid = acred->uid;
90 cred->uc_gid = acred->gid;
91 for (i = 0; i < groups; i++)
92 cred->uc_gids[i] = GROUP_AT(acred->group_info, i);
93 if (i < NFS_NGROUPS)
94 cred->uc_gids[i] = NOGROUP;
95 }
96 cred->uc_base.cr_ops = &unix_credops;
97
98 return (struct rpc_cred *) cred;
99}
100
101static void
102unx_destroy_cred(struct rpc_cred *cred)
103{
104 kfree(cred);
105}
106
107/*
108 * Match credentials against current process creds.
109 * The root_override argument takes care of cases where the caller may
110 * request root creds (e.g. for NFS swapping).
111 */
112static int
113unx_match(struct auth_cred *acred, struct rpc_cred *rcred, int taskflags)
114{
115 struct unx_cred *cred = (struct unx_cred *) rcred;
116 int i;
117
118 if (!(taskflags & RPC_TASK_ROOTCREDS)) {
119 int groups;
120
121 if (cred->uc_uid != acred->uid
122 || cred->uc_gid != acred->gid)
123 return 0;
124
125 groups = acred->group_info->ngroups;
126 if (groups > NFS_NGROUPS)
127 groups = NFS_NGROUPS;
128 for (i = 0; i < groups ; i++)
129 if (cred->uc_gids[i] != GROUP_AT(acred->group_info, i))
130 return 0;
131 return 1;
132 }
133 return (cred->uc_uid == 0
134 && cred->uc_gid == 0
135 && cred->uc_gids[0] == (gid_t) NOGROUP);
136}
137
138/*
139 * Marshal credentials.
140 * Maybe we should keep a cached credential for performance reasons.
141 */
142static u32 *
143unx_marshal(struct rpc_task *task, u32 *p)
144{
145 struct rpc_clnt *clnt = task->tk_client;
146 struct unx_cred *cred = (struct unx_cred *) task->tk_msg.rpc_cred;
147 u32 *base, *hold;
148 int i;
149
150 *p++ = htonl(RPC_AUTH_UNIX);
151 base = p++;
152 *p++ = htonl(jiffies/HZ);
153
154 /*
155 * Copy the UTS nodename captured when the client was created.
156 */
157 p = xdr_encode_array(p, clnt->cl_nodename, clnt->cl_nodelen);
158
159 *p++ = htonl((u32) cred->uc_uid);
160 *p++ = htonl((u32) cred->uc_gid);
161 hold = p++;
162 for (i = 0; i < 16 && cred->uc_gids[i] != (gid_t) NOGROUP; i++)
163 *p++ = htonl((u32) cred->uc_gids[i]);
164 *hold = htonl(p - hold - 1); /* gid array length */
165 *base = htonl((p - base - 1) << 2); /* cred length */
166
167 *p++ = htonl(RPC_AUTH_NULL);
168 *p++ = htonl(0);
169
170 return p;
171}
172
173/*
174 * Refresh credentials. This is a no-op for AUTH_UNIX
175 */
176static int
177unx_refresh(struct rpc_task *task)
178{
179 task->tk_msg.rpc_cred->cr_flags |= RPCAUTH_CRED_UPTODATE;
180 return 0;
181}
182
183static u32 *
184unx_validate(struct rpc_task *task, u32 *p)
185{
186 rpc_authflavor_t flavor;
187 u32 size;
188
189 flavor = ntohl(*p++);
190 if (flavor != RPC_AUTH_NULL &&
191 flavor != RPC_AUTH_UNIX &&
192 flavor != RPC_AUTH_SHORT) {
193 printk("RPC: bad verf flavor: %u\n", flavor);
194 return NULL;
195 }
196
197 size = ntohl(*p++);
198 if (size > RPC_MAX_AUTH_SIZE) {
199 printk("RPC: giant verf size: %u\n", size);
200 return NULL;
201 }
202 task->tk_auth->au_rslack = (size >> 2) + 2;
203 p += (size >> 2);
204
205 return p;
206}
207
208struct rpc_authops authunix_ops = {
209 .owner = THIS_MODULE,
210 .au_flavor = RPC_AUTH_UNIX,
211#ifdef RPC_DEBUG
212 .au_name = "UNIX",
213#endif
214 .create = unx_create,
215 .destroy = unx_destroy,
216 .lookup_cred = unx_lookup_cred,
217 .crcreate = unx_create_cred,
218};
219
220static
221struct rpc_cred_cache unix_cred_cache = {
222 .expire = UNX_CRED_EXPIRE,
223};
224
225static
226struct rpc_auth unix_auth = {
227 .au_cslack = UNX_WRITESLACK,
228 .au_rslack = 2, /* assume AUTH_NULL verf */
229 .au_ops = &authunix_ops,
230 .au_count = ATOMIC_INIT(0),
231 .au_credcache = &unix_cred_cache,
232};
233
234static
235struct rpc_credops unix_credops = {
236 .cr_name = "AUTH_UNIX",
237 .crdestroy = unx_destroy_cred,
238 .crmatch = unx_match,
239 .crmarshal = unx_marshal,
240 .crrefresh = unx_refresh,
241 .crvalidate = unx_validate,
242};
diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c
new file mode 100644
index 000000000000..900f5bc7e336
--- /dev/null
+++ b/net/sunrpc/cache.c
@@ -0,0 +1,1189 @@
1/*
2 * net/sunrpc/cache.c
3 *
4 * Generic code for various authentication-related caches
5 * used by sunrpc clients and servers.
6 *
7 * Copyright (C) 2002 Neil Brown <neilb@cse.unsw.edu.au>
8 *
9 * Released under terms in GPL version 2. See COPYING.
10 *
11 */
12
13#include <linux/types.h>
14#include <linux/fs.h>
15#include <linux/file.h>
16#include <linux/slab.h>
17#include <linux/signal.h>
18#include <linux/sched.h>
19#include <linux/kmod.h>
20#include <linux/list.h>
21#include <linux/module.h>
22#include <linux/ctype.h>
23#include <asm/uaccess.h>
24#include <linux/poll.h>
25#include <linux/seq_file.h>
26#include <linux/proc_fs.h>
27#include <linux/net.h>
28#include <linux/workqueue.h>
29#include <asm/ioctls.h>
30#include <linux/sunrpc/types.h>
31#include <linux/sunrpc/cache.h>
32#include <linux/sunrpc/stats.h>
33
34#define RPCDBG_FACILITY RPCDBG_CACHE
35
36static void cache_defer_req(struct cache_req *req, struct cache_head *item);
37static void cache_revisit_request(struct cache_head *item);
38
39void cache_init(struct cache_head *h)
40{
41 time_t now = get_seconds();
42 h->next = NULL;
43 h->flags = 0;
44 atomic_set(&h->refcnt, 1);
45 h->expiry_time = now + CACHE_NEW_EXPIRY;
46 h->last_refresh = now;
47}
48
49
50static int cache_make_upcall(struct cache_detail *detail, struct cache_head *h);
51/*
52 * This is the generic cache management routine for all
53 * the authentication caches.
54 * It checks the currency of a cache item and will (later)
55 * initiate an upcall to fill it if needed.
56 *
57 *
58 * Returns 0 if the cache_head can be used, or cache_puts it and returns
59 * -EAGAIN if upcall is pending,
60 * -ENOENT if cache entry was negative
61 */
62int cache_check(struct cache_detail *detail,
63 struct cache_head *h, struct cache_req *rqstp)
64{
65 int rv;
66 long refresh_age, age;
67
68 /* First decide return status as best we can */
69 if (!test_bit(CACHE_VALID, &h->flags) ||
70 h->expiry_time < get_seconds())
71 rv = -EAGAIN;
72 else if (detail->flush_time > h->last_refresh)
73 rv = -EAGAIN;
74 else {
75 /* entry is valid */
76 if (test_bit(CACHE_NEGATIVE, &h->flags))
77 rv = -ENOENT;
78 else rv = 0;
79 }
80
81 /* now see if we want to start an upcall */
82 refresh_age = (h->expiry_time - h->last_refresh);
83 age = get_seconds() - h->last_refresh;
84
85 if (rqstp == NULL) {
86 if (rv == -EAGAIN)
87 rv = -ENOENT;
88 } else if (rv == -EAGAIN || age > refresh_age/2) {
89 dprintk("Want update, refage=%ld, age=%ld\n", refresh_age, age);
90 if (!test_and_set_bit(CACHE_PENDING, &h->flags)) {
91 switch (cache_make_upcall(detail, h)) {
92 case -EINVAL:
93 clear_bit(CACHE_PENDING, &h->flags);
94 if (rv == -EAGAIN) {
95 set_bit(CACHE_NEGATIVE, &h->flags);
96 cache_fresh(detail, h, get_seconds()+CACHE_NEW_EXPIRY);
97 rv = -ENOENT;
98 }
99 break;
100
101 case -EAGAIN:
102 clear_bit(CACHE_PENDING, &h->flags);
103 cache_revisit_request(h);
104 break;
105 }
106 }
107 }
108
109 if (rv == -EAGAIN)
110 cache_defer_req(rqstp, h);
111
112 if (rv && h)
113 detail->cache_put(h, detail);
114 return rv;
115}
116
117static void queue_loose(struct cache_detail *detail, struct cache_head *ch);
118
119void cache_fresh(struct cache_detail *detail,
120 struct cache_head *head, time_t expiry)
121{
122
123 head->expiry_time = expiry;
124 head->last_refresh = get_seconds();
125 if (!test_and_set_bit(CACHE_VALID, &head->flags))
126 cache_revisit_request(head);
127 if (test_and_clear_bit(CACHE_PENDING, &head->flags))
128 queue_loose(detail, head);
129}
130
131/*
132 * caches need to be periodically cleaned.
133 * For this we maintain a list of cache_detail and
134 * a current pointer into that list and into the table
135 * for that entry.
136 *
137 * Each time clean_cache is called it finds the next non-empty entry
138 * in the current table and walks the list in that entry
139 * looking for entries that can be removed.
140 *
141 * An entry gets removed if:
142 * - The expiry is before current time
143 * - The last_refresh time is before the flush_time for that cache
144 *
145 * later we might drop old entries with non-NEVER expiry if that table
146 * is getting 'full' for some definition of 'full'
147 *
148 * The question of "how often to scan a table" is an interesting one
149 * and is answered in part by the use of the "nextcheck" field in the
150 * cache_detail.
151 * When a scan of a table begins, the nextcheck field is set to a time
152 * that is well into the future.
153 * While scanning, if an expiry time is found that is earlier than the
154 * current nextcheck time, nextcheck is set to that expiry time.
155 * If the flush_time is ever set to a time earlier than the nextcheck
156 * time, the nextcheck time is then set to that flush_time.
157 *
158 * A table is then only scanned if the current time is at least
159 * the nextcheck time.
160 *
161 */
162
163static LIST_HEAD(cache_list);
164static DEFINE_SPINLOCK(cache_list_lock);
165static struct cache_detail *current_detail;
166static int current_index;
167
168static struct file_operations cache_file_operations;
169static struct file_operations content_file_operations;
170static struct file_operations cache_flush_operations;
171
172static void do_cache_clean(void *data);
173static DECLARE_WORK(cache_cleaner, do_cache_clean, NULL);
174
175void cache_register(struct cache_detail *cd)
176{
177 cd->proc_ent = proc_mkdir(cd->name, proc_net_rpc);
178 if (cd->proc_ent) {
179 struct proc_dir_entry *p;
180 cd->proc_ent->owner = THIS_MODULE;
181 cd->channel_ent = cd->content_ent = NULL;
182
183 p = create_proc_entry("flush", S_IFREG|S_IRUSR|S_IWUSR,
184 cd->proc_ent);
185 cd->flush_ent = p;
186 if (p) {
187 p->proc_fops = &cache_flush_operations;
188 p->owner = THIS_MODULE;
189 p->data = cd;
190 }
191
192 if (cd->cache_request || cd->cache_parse) {
193 p = create_proc_entry("channel", S_IFREG|S_IRUSR|S_IWUSR,
194 cd->proc_ent);
195 cd->channel_ent = p;
196 if (p) {
197 p->proc_fops = &cache_file_operations;
198 p->owner = THIS_MODULE;
199 p->data = cd;
200 }
201 }
202 if (cd->cache_show) {
203 p = create_proc_entry("content", S_IFREG|S_IRUSR|S_IWUSR,
204 cd->proc_ent);
205 cd->content_ent = p;
206 if (p) {
207 p->proc_fops = &content_file_operations;
208 p->owner = THIS_MODULE;
209 p->data = cd;
210 }
211 }
212 }
213 rwlock_init(&cd->hash_lock);
214 INIT_LIST_HEAD(&cd->queue);
215 spin_lock(&cache_list_lock);
216 cd->nextcheck = 0;
217 cd->entries = 0;
218 atomic_set(&cd->readers, 0);
219 cd->last_close = 0;
220 cd->last_warn = -1;
221 list_add(&cd->others, &cache_list);
222 spin_unlock(&cache_list_lock);
223
224 /* start the cleaning process */
225 schedule_work(&cache_cleaner);
226}
227
228int cache_unregister(struct cache_detail *cd)
229{
230 cache_purge(cd);
231 spin_lock(&cache_list_lock);
232 write_lock(&cd->hash_lock);
233 if (cd->entries || atomic_read(&cd->inuse)) {
234 write_unlock(&cd->hash_lock);
235 spin_unlock(&cache_list_lock);
236 return -EBUSY;
237 }
238 if (current_detail == cd)
239 current_detail = NULL;
240 list_del_init(&cd->others);
241 write_unlock(&cd->hash_lock);
242 spin_unlock(&cache_list_lock);
243 if (cd->proc_ent) {
244 if (cd->flush_ent)
245 remove_proc_entry("flush", cd->proc_ent);
246 if (cd->channel_ent)
247 remove_proc_entry("channel", cd->proc_ent);
248 if (cd->content_ent)
249 remove_proc_entry("content", cd->proc_ent);
250
251 cd->proc_ent = NULL;
252 remove_proc_entry(cd->name, proc_net_rpc);
253 }
254 if (list_empty(&cache_list)) {
255 /* module must be being unloaded so its safe to kill the worker */
256 cancel_delayed_work(&cache_cleaner);
257 flush_scheduled_work();
258 }
259 return 0;
260}
261
262/* clean cache tries to find something to clean
263 * and cleans it.
264 * It returns 1 if it cleaned something,
265 * 0 if it didn't find anything this time
266 * -1 if it fell off the end of the list.
267 */
268static int cache_clean(void)
269{
270 int rv = 0;
271 struct list_head *next;
272
273 spin_lock(&cache_list_lock);
274
275 /* find a suitable table if we don't already have one */
276 while (current_detail == NULL ||
277 current_index >= current_detail->hash_size) {
278 if (current_detail)
279 next = current_detail->others.next;
280 else
281 next = cache_list.next;
282 if (next == &cache_list) {
283 current_detail = NULL;
284 spin_unlock(&cache_list_lock);
285 return -1;
286 }
287 current_detail = list_entry(next, struct cache_detail, others);
288 if (current_detail->nextcheck > get_seconds())
289 current_index = current_detail->hash_size;
290 else {
291 current_index = 0;
292 current_detail->nextcheck = get_seconds()+30*60;
293 }
294 }
295
296 /* find a non-empty bucket in the table */
297 while (current_detail &&
298 current_index < current_detail->hash_size &&
299 current_detail->hash_table[current_index] == NULL)
300 current_index++;
301
302 /* find a cleanable entry in the bucket and clean it, or set to next bucket */
303
304 if (current_detail && current_index < current_detail->hash_size) {
305 struct cache_head *ch, **cp;
306 struct cache_detail *d;
307
308 write_lock(&current_detail->hash_lock);
309
310 /* Ok, now to clean this strand */
311
312 cp = & current_detail->hash_table[current_index];
313 ch = *cp;
314 for (; ch; cp= & ch->next, ch= *cp) {
315 if (current_detail->nextcheck > ch->expiry_time)
316 current_detail->nextcheck = ch->expiry_time+1;
317 if (ch->expiry_time >= get_seconds()
318 && ch->last_refresh >= current_detail->flush_time
319 )
320 continue;
321 if (test_and_clear_bit(CACHE_PENDING, &ch->flags))
322 queue_loose(current_detail, ch);
323
324 if (atomic_read(&ch->refcnt) == 1)
325 break;
326 }
327 if (ch) {
328 *cp = ch->next;
329 ch->next = NULL;
330 current_detail->entries--;
331 rv = 1;
332 }
333 write_unlock(&current_detail->hash_lock);
334 d = current_detail;
335 if (!ch)
336 current_index ++;
337 spin_unlock(&cache_list_lock);
338 if (ch)
339 d->cache_put(ch, d);
340 } else
341 spin_unlock(&cache_list_lock);
342
343 return rv;
344}
345
346/*
347 * We want to regularly clean the cache, so we need to schedule some work ...
348 */
349static void do_cache_clean(void *data)
350{
351 int delay = 5;
352 if (cache_clean() == -1)
353 delay = 30*HZ;
354
355 if (list_empty(&cache_list))
356 delay = 0;
357
358 if (delay)
359 schedule_delayed_work(&cache_cleaner, delay);
360}
361
362
363/*
364 * Clean all caches promptly. This just calls cache_clean
365 * repeatedly until we are sure that every cache has had a chance to
366 * be fully cleaned
367 */
368void cache_flush(void)
369{
370 while (cache_clean() != -1)
371 cond_resched();
372 while (cache_clean() != -1)
373 cond_resched();
374}
375
376void cache_purge(struct cache_detail *detail)
377{
378 detail->flush_time = LONG_MAX;
379 detail->nextcheck = get_seconds();
380 cache_flush();
381 detail->flush_time = 1;
382}
383
384
385
386/*
387 * Deferral and Revisiting of Requests.
388 *
389 * If a cache lookup finds a pending entry, we
390 * need to defer the request and revisit it later.
391 * All deferred requests are stored in a hash table,
392 * indexed by "struct cache_head *".
393 * As it may be wasteful to store a whole request
394 * structure, we allow the request to provide a
395 * deferred form, which must contain a
396 * 'struct cache_deferred_req'
397 * This cache_deferred_req contains a method to allow
398 * it to be revisited when cache info is available
399 */
400
401#define DFR_HASHSIZE (PAGE_SIZE/sizeof(struct list_head))
402#define DFR_HASH(item) ((((long)item)>>4 ^ (((long)item)>>13)) % DFR_HASHSIZE)
403
404#define DFR_MAX 300 /* ??? */
405
406static DEFINE_SPINLOCK(cache_defer_lock);
407static LIST_HEAD(cache_defer_list);
408static struct list_head cache_defer_hash[DFR_HASHSIZE];
409static int cache_defer_cnt;
410
411static void cache_defer_req(struct cache_req *req, struct cache_head *item)
412{
413 struct cache_deferred_req *dreq;
414 int hash = DFR_HASH(item);
415
416 dreq = req->defer(req);
417 if (dreq == NULL)
418 return;
419
420 dreq->item = item;
421 dreq->recv_time = get_seconds();
422
423 spin_lock(&cache_defer_lock);
424
425 list_add(&dreq->recent, &cache_defer_list);
426
427 if (cache_defer_hash[hash].next == NULL)
428 INIT_LIST_HEAD(&cache_defer_hash[hash]);
429 list_add(&dreq->hash, &cache_defer_hash[hash]);
430
431 /* it is in, now maybe clean up */
432 dreq = NULL;
433 if (++cache_defer_cnt > DFR_MAX) {
434 /* too much in the cache, randomly drop
435 * first or last
436 */
437 if (net_random()&1)
438 dreq = list_entry(cache_defer_list.next,
439 struct cache_deferred_req,
440 recent);
441 else
442 dreq = list_entry(cache_defer_list.prev,
443 struct cache_deferred_req,
444 recent);
445 list_del(&dreq->recent);
446 list_del(&dreq->hash);
447 cache_defer_cnt--;
448 }
449 spin_unlock(&cache_defer_lock);
450
451 if (dreq) {
452 /* there was one too many */
453 dreq->revisit(dreq, 1);
454 }
455 if (test_bit(CACHE_VALID, &item->flags)) {
456 /* must have just been validated... */
457 cache_revisit_request(item);
458 }
459}
460
461static void cache_revisit_request(struct cache_head *item)
462{
463 struct cache_deferred_req *dreq;
464 struct list_head pending;
465
466 struct list_head *lp;
467 int hash = DFR_HASH(item);
468
469 INIT_LIST_HEAD(&pending);
470 spin_lock(&cache_defer_lock);
471
472 lp = cache_defer_hash[hash].next;
473 if (lp) {
474 while (lp != &cache_defer_hash[hash]) {
475 dreq = list_entry(lp, struct cache_deferred_req, hash);
476 lp = lp->next;
477 if (dreq->item == item) {
478 list_del(&dreq->hash);
479 list_move(&dreq->recent, &pending);
480 cache_defer_cnt--;
481 }
482 }
483 }
484 spin_unlock(&cache_defer_lock);
485
486 while (!list_empty(&pending)) {
487 dreq = list_entry(pending.next, struct cache_deferred_req, recent);
488 list_del_init(&dreq->recent);
489 dreq->revisit(dreq, 0);
490 }
491}
492
493void cache_clean_deferred(void *owner)
494{
495 struct cache_deferred_req *dreq, *tmp;
496 struct list_head pending;
497
498
499 INIT_LIST_HEAD(&pending);
500 spin_lock(&cache_defer_lock);
501
502 list_for_each_entry_safe(dreq, tmp, &cache_defer_list, recent) {
503 if (dreq->owner == owner) {
504 list_del(&dreq->hash);
505 list_move(&dreq->recent, &pending);
506 cache_defer_cnt--;
507 }
508 }
509 spin_unlock(&cache_defer_lock);
510
511 while (!list_empty(&pending)) {
512 dreq = list_entry(pending.next, struct cache_deferred_req, recent);
513 list_del_init(&dreq->recent);
514 dreq->revisit(dreq, 1);
515 }
516}
517
518/*
519 * communicate with user-space
520 *
521 * We have a magic /proc file - /proc/sunrpc/cache
522 * On read, you get a full request, or block
523 * On write, an update request is processed
524 * Poll works if anything to read, and always allows write
525 *
526 * Implemented by linked list of requests. Each open file has
527 * a ->private that also exists in this list. New request are added
528 * to the end and may wakeup and preceding readers.
529 * New readers are added to the head. If, on read, an item is found with
530 * CACHE_UPCALLING clear, we free it from the list.
531 *
532 */
533
534static DEFINE_SPINLOCK(queue_lock);
535static DECLARE_MUTEX(queue_io_sem);
536
537struct cache_queue {
538 struct list_head list;
539 int reader; /* if 0, then request */
540};
541struct cache_request {
542 struct cache_queue q;
543 struct cache_head *item;
544 char * buf;
545 int len;
546 int readers;
547};
548struct cache_reader {
549 struct cache_queue q;
550 int offset; /* if non-0, we have a refcnt on next request */
551};
552
553static ssize_t
554cache_read(struct file *filp, char __user *buf, size_t count, loff_t *ppos)
555{
556 struct cache_reader *rp = filp->private_data;
557 struct cache_request *rq;
558 struct cache_detail *cd = PDE(filp->f_dentry->d_inode)->data;
559 int err;
560
561 if (count == 0)
562 return 0;
563
564 down(&queue_io_sem); /* protect against multiple concurrent
565 * readers on this file */
566 again:
567 spin_lock(&queue_lock);
568 /* need to find next request */
569 while (rp->q.list.next != &cd->queue &&
570 list_entry(rp->q.list.next, struct cache_queue, list)
571 ->reader) {
572 struct list_head *next = rp->q.list.next;
573 list_move(&rp->q.list, next);
574 }
575 if (rp->q.list.next == &cd->queue) {
576 spin_unlock(&queue_lock);
577 up(&queue_io_sem);
578 if (rp->offset)
579 BUG();
580 return 0;
581 }
582 rq = container_of(rp->q.list.next, struct cache_request, q.list);
583 if (rq->q.reader) BUG();
584 if (rp->offset == 0)
585 rq->readers++;
586 spin_unlock(&queue_lock);
587
588 if (rp->offset == 0 && !test_bit(CACHE_PENDING, &rq->item->flags)) {
589 err = -EAGAIN;
590 spin_lock(&queue_lock);
591 list_move(&rp->q.list, &rq->q.list);
592 spin_unlock(&queue_lock);
593 } else {
594 if (rp->offset + count > rq->len)
595 count = rq->len - rp->offset;
596 err = -EFAULT;
597 if (copy_to_user(buf, rq->buf + rp->offset, count))
598 goto out;
599 rp->offset += count;
600 if (rp->offset >= rq->len) {
601 rp->offset = 0;
602 spin_lock(&queue_lock);
603 list_move(&rp->q.list, &rq->q.list);
604 spin_unlock(&queue_lock);
605 }
606 err = 0;
607 }
608 out:
609 if (rp->offset == 0) {
610 /* need to release rq */
611 spin_lock(&queue_lock);
612 rq->readers--;
613 if (rq->readers == 0 &&
614 !test_bit(CACHE_PENDING, &rq->item->flags)) {
615 list_del(&rq->q.list);
616 spin_unlock(&queue_lock);
617 cd->cache_put(rq->item, cd);
618 kfree(rq->buf);
619 kfree(rq);
620 } else
621 spin_unlock(&queue_lock);
622 }
623 if (err == -EAGAIN)
624 goto again;
625 up(&queue_io_sem);
626 return err ? err : count;
627}
628
629static char write_buf[8192]; /* protected by queue_io_sem */
630
631static ssize_t
632cache_write(struct file *filp, const char __user *buf, size_t count,
633 loff_t *ppos)
634{
635 int err;
636 struct cache_detail *cd = PDE(filp->f_dentry->d_inode)->data;
637
638 if (count == 0)
639 return 0;
640 if (count >= sizeof(write_buf))
641 return -EINVAL;
642
643 down(&queue_io_sem);
644
645 if (copy_from_user(write_buf, buf, count)) {
646 up(&queue_io_sem);
647 return -EFAULT;
648 }
649 write_buf[count] = '\0';
650 if (cd->cache_parse)
651 err = cd->cache_parse(cd, write_buf, count);
652 else
653 err = -EINVAL;
654
655 up(&queue_io_sem);
656 return err ? err : count;
657}
658
659static DECLARE_WAIT_QUEUE_HEAD(queue_wait);
660
661static unsigned int
662cache_poll(struct file *filp, poll_table *wait)
663{
664 unsigned int mask;
665 struct cache_reader *rp = filp->private_data;
666 struct cache_queue *cq;
667 struct cache_detail *cd = PDE(filp->f_dentry->d_inode)->data;
668
669 poll_wait(filp, &queue_wait, wait);
670
671 /* alway allow write */
672 mask = POLL_OUT | POLLWRNORM;
673
674 if (!rp)
675 return mask;
676
677 spin_lock(&queue_lock);
678
679 for (cq= &rp->q; &cq->list != &cd->queue;
680 cq = list_entry(cq->list.next, struct cache_queue, list))
681 if (!cq->reader) {
682 mask |= POLLIN | POLLRDNORM;
683 break;
684 }
685 spin_unlock(&queue_lock);
686 return mask;
687}
688
689static int
690cache_ioctl(struct inode *ino, struct file *filp,
691 unsigned int cmd, unsigned long arg)
692{
693 int len = 0;
694 struct cache_reader *rp = filp->private_data;
695 struct cache_queue *cq;
696 struct cache_detail *cd = PDE(ino)->data;
697
698 if (cmd != FIONREAD || !rp)
699 return -EINVAL;
700
701 spin_lock(&queue_lock);
702
703 /* only find the length remaining in current request,
704 * or the length of the next request
705 */
706 for (cq= &rp->q; &cq->list != &cd->queue;
707 cq = list_entry(cq->list.next, struct cache_queue, list))
708 if (!cq->reader) {
709 struct cache_request *cr =
710 container_of(cq, struct cache_request, q);
711 len = cr->len - rp->offset;
712 break;
713 }
714 spin_unlock(&queue_lock);
715
716 return put_user(len, (int __user *)arg);
717}
718
719static int
720cache_open(struct inode *inode, struct file *filp)
721{
722 struct cache_reader *rp = NULL;
723
724 nonseekable_open(inode, filp);
725 if (filp->f_mode & FMODE_READ) {
726 struct cache_detail *cd = PDE(inode)->data;
727
728 rp = kmalloc(sizeof(*rp), GFP_KERNEL);
729 if (!rp)
730 return -ENOMEM;
731 rp->offset = 0;
732 rp->q.reader = 1;
733 atomic_inc(&cd->readers);
734 spin_lock(&queue_lock);
735 list_add(&rp->q.list, &cd->queue);
736 spin_unlock(&queue_lock);
737 }
738 filp->private_data = rp;
739 return 0;
740}
741
742static int
743cache_release(struct inode *inode, struct file *filp)
744{
745 struct cache_reader *rp = filp->private_data;
746 struct cache_detail *cd = PDE(inode)->data;
747
748 if (rp) {
749 spin_lock(&queue_lock);
750 if (rp->offset) {
751 struct cache_queue *cq;
752 for (cq= &rp->q; &cq->list != &cd->queue;
753 cq = list_entry(cq->list.next, struct cache_queue, list))
754 if (!cq->reader) {
755 container_of(cq, struct cache_request, q)
756 ->readers--;
757 break;
758 }
759 rp->offset = 0;
760 }
761 list_del(&rp->q.list);
762 spin_unlock(&queue_lock);
763
764 filp->private_data = NULL;
765 kfree(rp);
766
767 cd->last_close = get_seconds();
768 atomic_dec(&cd->readers);
769 }
770 return 0;
771}
772
773
774
775static struct file_operations cache_file_operations = {
776 .owner = THIS_MODULE,
777 .llseek = no_llseek,
778 .read = cache_read,
779 .write = cache_write,
780 .poll = cache_poll,
781 .ioctl = cache_ioctl, /* for FIONREAD */
782 .open = cache_open,
783 .release = cache_release,
784};
785
786
787static void queue_loose(struct cache_detail *detail, struct cache_head *ch)
788{
789 struct cache_queue *cq;
790 spin_lock(&queue_lock);
791 list_for_each_entry(cq, &detail->queue, list)
792 if (!cq->reader) {
793 struct cache_request *cr = container_of(cq, struct cache_request, q);
794 if (cr->item != ch)
795 continue;
796 if (cr->readers != 0)
797 break;
798 list_del(&cr->q.list);
799 spin_unlock(&queue_lock);
800 detail->cache_put(cr->item, detail);
801 kfree(cr->buf);
802 kfree(cr);
803 return;
804 }
805 spin_unlock(&queue_lock);
806}
807
808/*
809 * Support routines for text-based upcalls.
810 * Fields are separated by spaces.
811 * Fields are either mangled to quote space tab newline slosh with slosh
812 * or a hexified with a leading \x
813 * Record is terminated with newline.
814 *
815 */
816
817void qword_add(char **bpp, int *lp, char *str)
818{
819 char *bp = *bpp;
820 int len = *lp;
821 char c;
822
823 if (len < 0) return;
824
825 while ((c=*str++) && len)
826 switch(c) {
827 case ' ':
828 case '\t':
829 case '\n':
830 case '\\':
831 if (len >= 4) {
832 *bp++ = '\\';
833 *bp++ = '0' + ((c & 0300)>>6);
834 *bp++ = '0' + ((c & 0070)>>3);
835 *bp++ = '0' + ((c & 0007)>>0);
836 }
837 len -= 4;
838 break;
839 default:
840 *bp++ = c;
841 len--;
842 }
843 if (c || len <1) len = -1;
844 else {
845 *bp++ = ' ';
846 len--;
847 }
848 *bpp = bp;
849 *lp = len;
850}
851
852void qword_addhex(char **bpp, int *lp, char *buf, int blen)
853{
854 char *bp = *bpp;
855 int len = *lp;
856
857 if (len < 0) return;
858
859 if (len > 2) {
860 *bp++ = '\\';
861 *bp++ = 'x';
862 len -= 2;
863 while (blen && len >= 2) {
864 unsigned char c = *buf++;
865 *bp++ = '0' + ((c&0xf0)>>4) + (c>=0xa0)*('a'-'9'-1);
866 *bp++ = '0' + (c&0x0f) + ((c&0x0f)>=0x0a)*('a'-'9'-1);
867 len -= 2;
868 blen--;
869 }
870 }
871 if (blen || len<1) len = -1;
872 else {
873 *bp++ = ' ';
874 len--;
875 }
876 *bpp = bp;
877 *lp = len;
878}
879
880static void warn_no_listener(struct cache_detail *detail)
881{
882 if (detail->last_warn != detail->last_close) {
883 detail->last_warn = detail->last_close;
884 if (detail->warn_no_listener)
885 detail->warn_no_listener(detail);
886 }
887}
888
889/*
890 * register an upcall request to user-space.
891 * Each request is at most one page long.
892 */
893static int cache_make_upcall(struct cache_detail *detail, struct cache_head *h)
894{
895
896 char *buf;
897 struct cache_request *crq;
898 char *bp;
899 int len;
900
901 if (detail->cache_request == NULL)
902 return -EINVAL;
903
904 if (atomic_read(&detail->readers) == 0 &&
905 detail->last_close < get_seconds() - 30) {
906 warn_no_listener(detail);
907 return -EINVAL;
908 }
909
910 buf = kmalloc(PAGE_SIZE, GFP_KERNEL);
911 if (!buf)
912 return -EAGAIN;
913
914 crq = kmalloc(sizeof (*crq), GFP_KERNEL);
915 if (!crq) {
916 kfree(buf);
917 return -EAGAIN;
918 }
919
920 bp = buf; len = PAGE_SIZE;
921
922 detail->cache_request(detail, h, &bp, &len);
923
924 if (len < 0) {
925 kfree(buf);
926 kfree(crq);
927 return -EAGAIN;
928 }
929 crq->q.reader = 0;
930 crq->item = cache_get(h);
931 crq->buf = buf;
932 crq->len = PAGE_SIZE - len;
933 crq->readers = 0;
934 spin_lock(&queue_lock);
935 list_add_tail(&crq->q.list, &detail->queue);
936 spin_unlock(&queue_lock);
937 wake_up(&queue_wait);
938 return 0;
939}
940
941/*
942 * parse a message from user-space and pass it
943 * to an appropriate cache
944 * Messages are, like requests, separated into fields by
945 * spaces and dequotes as \xHEXSTRING or embedded \nnn octal
946 *
947 * Message is
948 * reply cachename expiry key ... content....
949 *
950 * key and content are both parsed by cache
951 */
952
953#define isodigit(c) (isdigit(c) && c <= '7')
954int qword_get(char **bpp, char *dest, int bufsize)
955{
956 /* return bytes copied, or -1 on error */
957 char *bp = *bpp;
958 int len = 0;
959
960 while (*bp == ' ') bp++;
961
962 if (bp[0] == '\\' && bp[1] == 'x') {
963 /* HEX STRING */
964 bp += 2;
965 while (isxdigit(bp[0]) && isxdigit(bp[1]) && len < bufsize) {
966 int byte = isdigit(*bp) ? *bp-'0' : toupper(*bp)-'A'+10;
967 bp++;
968 byte <<= 4;
969 byte |= isdigit(*bp) ? *bp-'0' : toupper(*bp)-'A'+10;
970 *dest++ = byte;
971 bp++;
972 len++;
973 }
974 } else {
975 /* text with \nnn octal quoting */
976 while (*bp != ' ' && *bp != '\n' && *bp && len < bufsize-1) {
977 if (*bp == '\\' &&
978 isodigit(bp[1]) && (bp[1] <= '3') &&
979 isodigit(bp[2]) &&
980 isodigit(bp[3])) {
981 int byte = (*++bp -'0');
982 bp++;
983 byte = (byte << 3) | (*bp++ - '0');
984 byte = (byte << 3) | (*bp++ - '0');
985 *dest++ = byte;
986 len++;
987 } else {
988 *dest++ = *bp++;
989 len++;
990 }
991 }
992 }
993
994 if (*bp != ' ' && *bp != '\n' && *bp != '\0')
995 return -1;
996 while (*bp == ' ') bp++;
997 *bpp = bp;
998 *dest = '\0';
999 return len;
1000}
1001
1002
1003/*
1004 * support /proc/sunrpc/cache/$CACHENAME/content
1005 * as a seqfile.
1006 * We call ->cache_show passing NULL for the item to
1007 * get a header, then pass each real item in the cache
1008 */
1009
1010struct handle {
1011 struct cache_detail *cd;
1012};
1013
1014static void *c_start(struct seq_file *m, loff_t *pos)
1015{
1016 loff_t n = *pos;
1017 unsigned hash, entry;
1018 struct cache_head *ch;
1019 struct cache_detail *cd = ((struct handle*)m->private)->cd;
1020
1021
1022 read_lock(&cd->hash_lock);
1023 if (!n--)
1024 return SEQ_START_TOKEN;
1025 hash = n >> 32;
1026 entry = n & ((1LL<<32) - 1);
1027
1028 for (ch=cd->hash_table[hash]; ch; ch=ch->next)
1029 if (!entry--)
1030 return ch;
1031 n &= ~((1LL<<32) - 1);
1032 do {
1033 hash++;
1034 n += 1LL<<32;
1035 } while(hash < cd->hash_size &&
1036 cd->hash_table[hash]==NULL);
1037 if (hash >= cd->hash_size)
1038 return NULL;
1039 *pos = n+1;
1040 return cd->hash_table[hash];
1041}
1042
1043static void *c_next(struct seq_file *m, void *p, loff_t *pos)
1044{
1045 struct cache_head *ch = p;
1046 int hash = (*pos >> 32);
1047 struct cache_detail *cd = ((struct handle*)m->private)->cd;
1048
1049 if (p == SEQ_START_TOKEN)
1050 hash = 0;
1051 else if (ch->next == NULL) {
1052 hash++;
1053 *pos += 1LL<<32;
1054 } else {
1055 ++*pos;
1056 return ch->next;
1057 }
1058 *pos &= ~((1LL<<32) - 1);
1059 while (hash < cd->hash_size &&
1060 cd->hash_table[hash] == NULL) {
1061 hash++;
1062 *pos += 1LL<<32;
1063 }
1064 if (hash >= cd->hash_size)
1065 return NULL;
1066 ++*pos;
1067 return cd->hash_table[hash];
1068}
1069
1070static void c_stop(struct seq_file *m, void *p)
1071{
1072 struct cache_detail *cd = ((struct handle*)m->private)->cd;
1073 read_unlock(&cd->hash_lock);
1074}
1075
1076static int c_show(struct seq_file *m, void *p)
1077{
1078 struct cache_head *cp = p;
1079 struct cache_detail *cd = ((struct handle*)m->private)->cd;
1080
1081 if (p == SEQ_START_TOKEN)
1082 return cd->cache_show(m, cd, NULL);
1083
1084 ifdebug(CACHE)
1085 seq_printf(m, "# expiry=%ld refcnt=%d\n",
1086 cp->expiry_time, atomic_read(&cp->refcnt));
1087 cache_get(cp);
1088 if (cache_check(cd, cp, NULL))
1089 /* cache_check does a cache_put on failure */
1090 seq_printf(m, "# ");
1091 else
1092 cache_put(cp, cd);
1093
1094 return cd->cache_show(m, cd, cp);
1095}
1096
1097static struct seq_operations cache_content_op = {
1098 .start = c_start,
1099 .next = c_next,
1100 .stop = c_stop,
1101 .show = c_show,
1102};
1103
1104static int content_open(struct inode *inode, struct file *file)
1105{
1106 int res;
1107 struct handle *han;
1108 struct cache_detail *cd = PDE(inode)->data;
1109
1110 han = kmalloc(sizeof(*han), GFP_KERNEL);
1111 if (han == NULL)
1112 return -ENOMEM;
1113
1114 han->cd = cd;
1115
1116 res = seq_open(file, &cache_content_op);
1117 if (res)
1118 kfree(han);
1119 else
1120 ((struct seq_file *)file->private_data)->private = han;
1121
1122 return res;
1123}
1124static int content_release(struct inode *inode, struct file *file)
1125{
1126 struct seq_file *m = (struct seq_file *)file->private_data;
1127 struct handle *han = m->private;
1128 kfree(han);
1129 m->private = NULL;
1130 return seq_release(inode, file);
1131}
1132
1133static struct file_operations content_file_operations = {
1134 .open = content_open,
1135 .read = seq_read,
1136 .llseek = seq_lseek,
1137 .release = content_release,
1138};
1139
1140static ssize_t read_flush(struct file *file, char __user *buf,
1141 size_t count, loff_t *ppos)
1142{
1143 struct cache_detail *cd = PDE(file->f_dentry->d_inode)->data;
1144 char tbuf[20];
1145 unsigned long p = *ppos;
1146 int len;
1147
1148 sprintf(tbuf, "%lu\n", cd->flush_time);
1149 len = strlen(tbuf);
1150 if (p >= len)
1151 return 0;
1152 len -= p;
1153 if (len > count) len = count;
1154 if (copy_to_user(buf, (void*)(tbuf+p), len))
1155 len = -EFAULT;
1156 else
1157 *ppos += len;
1158 return len;
1159}
1160
1161static ssize_t write_flush(struct file * file, const char __user * buf,
1162 size_t count, loff_t *ppos)
1163{
1164 struct cache_detail *cd = PDE(file->f_dentry->d_inode)->data;
1165 char tbuf[20];
1166 char *ep;
1167 long flushtime;
1168 if (*ppos || count > sizeof(tbuf)-1)
1169 return -EINVAL;
1170 if (copy_from_user(tbuf, buf, count))
1171 return -EFAULT;
1172 tbuf[count] = 0;
1173 flushtime = simple_strtoul(tbuf, &ep, 0);
1174 if (*ep && *ep != '\n')
1175 return -EINVAL;
1176
1177 cd->flush_time = flushtime;
1178 cd->nextcheck = get_seconds();
1179 cache_flush();
1180
1181 *ppos += count;
1182 return count;
1183}
1184
1185static struct file_operations cache_flush_operations = {
1186 .open = nonseekable_open,
1187 .read = read_flush,
1188 .write = write_flush,
1189};
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
new file mode 100644
index 000000000000..02bc029d46fe
--- /dev/null
+++ b/net/sunrpc/clnt.c
@@ -0,0 +1,1085 @@
1/*
2 * linux/net/sunrpc/rpcclnt.c
3 *
4 * This file contains the high-level RPC interface.
5 * It is modeled as a finite state machine to support both synchronous
6 * and asynchronous requests.
7 *
8 * - RPC header generation and argument serialization.
9 * - Credential refresh.
10 * - TCP connect handling.
11 * - Retry of operation when it is suspected the operation failed because
12 * of uid squashing on the server, or when the credentials were stale
13 * and need to be refreshed, or when a packet was damaged in transit.
14 * This may be have to be moved to the VFS layer.
15 *
16 * NB: BSD uses a more intelligent approach to guessing when a request
17 * or reply has been lost by keeping the RTO estimate for each procedure.
18 * We currently make do with a constant timeout value.
19 *
20 * Copyright (C) 1992,1993 Rick Sladkey <jrs@world.std.com>
21 * Copyright (C) 1995,1996 Olaf Kirch <okir@monad.swb.de>
22 */
23
24#include <asm/system.h>
25
26#include <linux/module.h>
27#include <linux/types.h>
28#include <linux/mm.h>
29#include <linux/slab.h>
30#include <linux/in.h>
31#include <linux/utsname.h>
32
33#include <linux/sunrpc/clnt.h>
34#include <linux/workqueue.h>
35#include <linux/sunrpc/rpc_pipe_fs.h>
36
37#include <linux/nfs.h>
38
39
40#define RPC_SLACK_SPACE (1024) /* total overkill */
41
42#ifdef RPC_DEBUG
43# define RPCDBG_FACILITY RPCDBG_CALL
44#endif
45
46static DECLARE_WAIT_QUEUE_HEAD(destroy_wait);
47
48
49static void call_start(struct rpc_task *task);
50static void call_reserve(struct rpc_task *task);
51static void call_reserveresult(struct rpc_task *task);
52static void call_allocate(struct rpc_task *task);
53static void call_encode(struct rpc_task *task);
54static void call_decode(struct rpc_task *task);
55static void call_bind(struct rpc_task *task);
56static void call_transmit(struct rpc_task *task);
57static void call_status(struct rpc_task *task);
58static void call_refresh(struct rpc_task *task);
59static void call_refreshresult(struct rpc_task *task);
60static void call_timeout(struct rpc_task *task);
61static void call_connect(struct rpc_task *task);
62static void call_connect_status(struct rpc_task *task);
63static u32 * call_header(struct rpc_task *task);
64static u32 * call_verify(struct rpc_task *task);
65
66
67static int
68rpc_setup_pipedir(struct rpc_clnt *clnt, char *dir_name)
69{
70 static uint32_t clntid;
71 int error;
72
73 if (dir_name == NULL)
74 return 0;
75 for (;;) {
76 snprintf(clnt->cl_pathname, sizeof(clnt->cl_pathname),
77 "%s/clnt%x", dir_name,
78 (unsigned int)clntid++);
79 clnt->cl_pathname[sizeof(clnt->cl_pathname) - 1] = '\0';
80 clnt->cl_dentry = rpc_mkdir(clnt->cl_pathname, clnt);
81 if (!IS_ERR(clnt->cl_dentry))
82 return 0;
83 error = PTR_ERR(clnt->cl_dentry);
84 if (error != -EEXIST) {
85 printk(KERN_INFO "RPC: Couldn't create pipefs entry %s, error %d\n",
86 clnt->cl_pathname, error);
87 return error;
88 }
89 }
90}
91
92/*
93 * Create an RPC client
94 * FIXME: This should also take a flags argument (as in task->tk_flags).
95 * It's called (among others) from pmap_create_client, which may in
96 * turn be called by an async task. In this case, rpciod should not be
97 * made to sleep too long.
98 */
99struct rpc_clnt *
100rpc_create_client(struct rpc_xprt *xprt, char *servname,
101 struct rpc_program *program, u32 vers,
102 rpc_authflavor_t flavor)
103{
104 struct rpc_version *version;
105 struct rpc_clnt *clnt = NULL;
106 int err;
107 int len;
108
109 dprintk("RPC: creating %s client for %s (xprt %p)\n",
110 program->name, servname, xprt);
111
112 err = -EINVAL;
113 if (!xprt)
114 goto out_err;
115 if (vers >= program->nrvers || !(version = program->version[vers]))
116 goto out_err;
117
118 err = -ENOMEM;
119 clnt = (struct rpc_clnt *) kmalloc(sizeof(*clnt), GFP_KERNEL);
120 if (!clnt)
121 goto out_err;
122 memset(clnt, 0, sizeof(*clnt));
123 atomic_set(&clnt->cl_users, 0);
124 atomic_set(&clnt->cl_count, 1);
125 clnt->cl_parent = clnt;
126
127 clnt->cl_server = clnt->cl_inline_name;
128 len = strlen(servname) + 1;
129 if (len > sizeof(clnt->cl_inline_name)) {
130 char *buf = kmalloc(len, GFP_KERNEL);
131 if (buf != 0)
132 clnt->cl_server = buf;
133 else
134 len = sizeof(clnt->cl_inline_name);
135 }
136 strlcpy(clnt->cl_server, servname, len);
137
138 clnt->cl_xprt = xprt;
139 clnt->cl_procinfo = version->procs;
140 clnt->cl_maxproc = version->nrprocs;
141 clnt->cl_protname = program->name;
142 clnt->cl_pmap = &clnt->cl_pmap_default;
143 clnt->cl_port = xprt->addr.sin_port;
144 clnt->cl_prog = program->number;
145 clnt->cl_vers = version->number;
146 clnt->cl_prot = xprt->prot;
147 clnt->cl_stats = program->stats;
148 rpc_init_wait_queue(&clnt->cl_pmap_default.pm_bindwait, "bindwait");
149
150 if (!clnt->cl_port)
151 clnt->cl_autobind = 1;
152
153 clnt->cl_rtt = &clnt->cl_rtt_default;
154 rpc_init_rtt(&clnt->cl_rtt_default, xprt->timeout.to_initval);
155
156 err = rpc_setup_pipedir(clnt, program->pipe_dir_name);
157 if (err < 0)
158 goto out_no_path;
159
160 err = -ENOMEM;
161 if (!rpcauth_create(flavor, clnt)) {
162 printk(KERN_INFO "RPC: Couldn't create auth handle (flavor %u)\n",
163 flavor);
164 goto out_no_auth;
165 }
166
167 /* save the nodename */
168 clnt->cl_nodelen = strlen(system_utsname.nodename);
169 if (clnt->cl_nodelen > UNX_MAXNODENAME)
170 clnt->cl_nodelen = UNX_MAXNODENAME;
171 memcpy(clnt->cl_nodename, system_utsname.nodename, clnt->cl_nodelen);
172 return clnt;
173
174out_no_auth:
175 rpc_rmdir(clnt->cl_pathname);
176out_no_path:
177 if (clnt->cl_server != clnt->cl_inline_name)
178 kfree(clnt->cl_server);
179 kfree(clnt);
180out_err:
181 return ERR_PTR(err);
182}
183
184/*
185 * This function clones the RPC client structure. It allows us to share the
186 * same transport while varying parameters such as the authentication
187 * flavour.
188 */
189struct rpc_clnt *
190rpc_clone_client(struct rpc_clnt *clnt)
191{
192 struct rpc_clnt *new;
193
194 new = (struct rpc_clnt *)kmalloc(sizeof(*new), GFP_KERNEL);
195 if (!new)
196 goto out_no_clnt;
197 memcpy(new, clnt, sizeof(*new));
198 atomic_set(&new->cl_count, 1);
199 atomic_set(&new->cl_users, 0);
200 new->cl_parent = clnt;
201 atomic_inc(&clnt->cl_count);
202 /* Duplicate portmapper */
203 rpc_init_wait_queue(&new->cl_pmap_default.pm_bindwait, "bindwait");
204 /* Turn off autobind on clones */
205 new->cl_autobind = 0;
206 new->cl_oneshot = 0;
207 new->cl_dead = 0;
208 rpc_init_rtt(&new->cl_rtt_default, clnt->cl_xprt->timeout.to_initval);
209 if (new->cl_auth)
210 atomic_inc(&new->cl_auth->au_count);
211 return new;
212out_no_clnt:
213 printk(KERN_INFO "RPC: out of memory in %s\n", __FUNCTION__);
214 return ERR_PTR(-ENOMEM);
215}
216
217/*
218 * Properly shut down an RPC client, terminating all outstanding
219 * requests. Note that we must be certain that cl_oneshot and
220 * cl_dead are cleared, or else the client would be destroyed
221 * when the last task releases it.
222 */
223int
224rpc_shutdown_client(struct rpc_clnt *clnt)
225{
226 dprintk("RPC: shutting down %s client for %s, tasks=%d\n",
227 clnt->cl_protname, clnt->cl_server,
228 atomic_read(&clnt->cl_users));
229
230 while (atomic_read(&clnt->cl_users) > 0) {
231 /* Don't let rpc_release_client destroy us */
232 clnt->cl_oneshot = 0;
233 clnt->cl_dead = 0;
234 rpc_killall_tasks(clnt);
235 sleep_on_timeout(&destroy_wait, 1*HZ);
236 }
237
238 if (atomic_read(&clnt->cl_users) < 0) {
239 printk(KERN_ERR "RPC: rpc_shutdown_client clnt %p tasks=%d\n",
240 clnt, atomic_read(&clnt->cl_users));
241#ifdef RPC_DEBUG
242 rpc_show_tasks();
243#endif
244 BUG();
245 }
246
247 return rpc_destroy_client(clnt);
248}
249
250/*
251 * Delete an RPC client
252 */
253int
254rpc_destroy_client(struct rpc_clnt *clnt)
255{
256 if (!atomic_dec_and_test(&clnt->cl_count))
257 return 1;
258 BUG_ON(atomic_read(&clnt->cl_users) != 0);
259
260 dprintk("RPC: destroying %s client for %s\n",
261 clnt->cl_protname, clnt->cl_server);
262 if (clnt->cl_auth) {
263 rpcauth_destroy(clnt->cl_auth);
264 clnt->cl_auth = NULL;
265 }
266 if (clnt->cl_parent != clnt) {
267 rpc_destroy_client(clnt->cl_parent);
268 goto out_free;
269 }
270 if (clnt->cl_pathname[0])
271 rpc_rmdir(clnt->cl_pathname);
272 if (clnt->cl_xprt) {
273 xprt_destroy(clnt->cl_xprt);
274 clnt->cl_xprt = NULL;
275 }
276 if (clnt->cl_server != clnt->cl_inline_name)
277 kfree(clnt->cl_server);
278out_free:
279 kfree(clnt);
280 return 0;
281}
282
283/*
284 * Release an RPC client
285 */
286void
287rpc_release_client(struct rpc_clnt *clnt)
288{
289 dprintk("RPC: rpc_release_client(%p, %d)\n",
290 clnt, atomic_read(&clnt->cl_users));
291
292 if (!atomic_dec_and_test(&clnt->cl_users))
293 return;
294 wake_up(&destroy_wait);
295 if (clnt->cl_oneshot || clnt->cl_dead)
296 rpc_destroy_client(clnt);
297}
298
299/*
300 * Default callback for async RPC calls
301 */
302static void
303rpc_default_callback(struct rpc_task *task)
304{
305}
306
307/*
308 * Export the signal mask handling for aysnchronous code that
309 * sleeps on RPC calls
310 */
311
312void rpc_clnt_sigmask(struct rpc_clnt *clnt, sigset_t *oldset)
313{
314 unsigned long sigallow = sigmask(SIGKILL);
315 unsigned long irqflags;
316
317 /* Turn off various signals */
318 if (clnt->cl_intr) {
319 struct k_sigaction *action = current->sighand->action;
320 if (action[SIGINT-1].sa.sa_handler == SIG_DFL)
321 sigallow |= sigmask(SIGINT);
322 if (action[SIGQUIT-1].sa.sa_handler == SIG_DFL)
323 sigallow |= sigmask(SIGQUIT);
324 }
325 spin_lock_irqsave(&current->sighand->siglock, irqflags);
326 *oldset = current->blocked;
327 siginitsetinv(&current->blocked, sigallow & ~oldset->sig[0]);
328 recalc_sigpending();
329 spin_unlock_irqrestore(&current->sighand->siglock, irqflags);
330}
331
332void rpc_clnt_sigunmask(struct rpc_clnt *clnt, sigset_t *oldset)
333{
334 unsigned long irqflags;
335
336 spin_lock_irqsave(&current->sighand->siglock, irqflags);
337 current->blocked = *oldset;
338 recalc_sigpending();
339 spin_unlock_irqrestore(&current->sighand->siglock, irqflags);
340}
341
342/*
343 * New rpc_call implementation
344 */
345int rpc_call_sync(struct rpc_clnt *clnt, struct rpc_message *msg, int flags)
346{
347 struct rpc_task *task;
348 sigset_t oldset;
349 int status;
350
351 /* If this client is slain all further I/O fails */
352 if (clnt->cl_dead)
353 return -EIO;
354
355 BUG_ON(flags & RPC_TASK_ASYNC);
356
357 rpc_clnt_sigmask(clnt, &oldset);
358
359 status = -ENOMEM;
360 task = rpc_new_task(clnt, NULL, flags);
361 if (task == NULL)
362 goto out;
363
364 rpc_call_setup(task, msg, 0);
365
366 /* Set up the call info struct and execute the task */
367 if (task->tk_status == 0)
368 status = rpc_execute(task);
369 else {
370 status = task->tk_status;
371 rpc_release_task(task);
372 }
373
374out:
375 rpc_clnt_sigunmask(clnt, &oldset);
376
377 return status;
378}
379
380/*
381 * New rpc_call implementation
382 */
383int
384rpc_call_async(struct rpc_clnt *clnt, struct rpc_message *msg, int flags,
385 rpc_action callback, void *data)
386{
387 struct rpc_task *task;
388 sigset_t oldset;
389 int status;
390
391 /* If this client is slain all further I/O fails */
392 if (clnt->cl_dead)
393 return -EIO;
394
395 flags |= RPC_TASK_ASYNC;
396
397 rpc_clnt_sigmask(clnt, &oldset);
398
399 /* Create/initialize a new RPC task */
400 if (!callback)
401 callback = rpc_default_callback;
402 status = -ENOMEM;
403 if (!(task = rpc_new_task(clnt, callback, flags)))
404 goto out;
405 task->tk_calldata = data;
406
407 rpc_call_setup(task, msg, 0);
408
409 /* Set up the call info struct and execute the task */
410 status = task->tk_status;
411 if (status == 0)
412 rpc_execute(task);
413 else
414 rpc_release_task(task);
415
416out:
417 rpc_clnt_sigunmask(clnt, &oldset);
418
419 return status;
420}
421
422
423void
424rpc_call_setup(struct rpc_task *task, struct rpc_message *msg, int flags)
425{
426 task->tk_msg = *msg;
427 task->tk_flags |= flags;
428 /* Bind the user cred */
429 if (task->tk_msg.rpc_cred != NULL)
430 rpcauth_holdcred(task);
431 else
432 rpcauth_bindcred(task);
433
434 if (task->tk_status == 0)
435 task->tk_action = call_start;
436 else
437 task->tk_action = NULL;
438}
439
440void
441rpc_setbufsize(struct rpc_clnt *clnt, unsigned int sndsize, unsigned int rcvsize)
442{
443 struct rpc_xprt *xprt = clnt->cl_xprt;
444
445 xprt->sndsize = 0;
446 if (sndsize)
447 xprt->sndsize = sndsize + RPC_SLACK_SPACE;
448 xprt->rcvsize = 0;
449 if (rcvsize)
450 xprt->rcvsize = rcvsize + RPC_SLACK_SPACE;
451 if (xprt_connected(xprt))
452 xprt_sock_setbufsize(xprt);
453}
454
455/*
456 * Return size of largest payload RPC client can support, in bytes
457 *
458 * For stream transports, this is one RPC record fragment (see RFC
459 * 1831), as we don't support multi-record requests yet. For datagram
460 * transports, this is the size of an IP packet minus the IP, UDP, and
461 * RPC header sizes.
462 */
463size_t rpc_max_payload(struct rpc_clnt *clnt)
464{
465 return clnt->cl_xprt->max_payload;
466}
467EXPORT_SYMBOL(rpc_max_payload);
468
469/*
470 * Restart an (async) RPC call. Usually called from within the
471 * exit handler.
472 */
473void
474rpc_restart_call(struct rpc_task *task)
475{
476 if (RPC_ASSASSINATED(task))
477 return;
478
479 task->tk_action = call_start;
480}
481
482/*
483 * 0. Initial state
484 *
485 * Other FSM states can be visited zero or more times, but
486 * this state is visited exactly once for each RPC.
487 */
488static void
489call_start(struct rpc_task *task)
490{
491 struct rpc_clnt *clnt = task->tk_client;
492
493 dprintk("RPC: %4d call_start %s%d proc %d (%s)\n", task->tk_pid,
494 clnt->cl_protname, clnt->cl_vers, task->tk_msg.rpc_proc->p_proc,
495 (RPC_IS_ASYNC(task) ? "async" : "sync"));
496
497 /* Increment call count */
498 task->tk_msg.rpc_proc->p_count++;
499 clnt->cl_stats->rpccnt++;
500 task->tk_action = call_reserve;
501}
502
503/*
504 * 1. Reserve an RPC call slot
505 */
506static void
507call_reserve(struct rpc_task *task)
508{
509 dprintk("RPC: %4d call_reserve\n", task->tk_pid);
510
511 if (!rpcauth_uptodatecred(task)) {
512 task->tk_action = call_refresh;
513 return;
514 }
515
516 task->tk_status = 0;
517 task->tk_action = call_reserveresult;
518 xprt_reserve(task);
519}
520
521/*
522 * 1b. Grok the result of xprt_reserve()
523 */
524static void
525call_reserveresult(struct rpc_task *task)
526{
527 int status = task->tk_status;
528
529 dprintk("RPC: %4d call_reserveresult (status %d)\n",
530 task->tk_pid, task->tk_status);
531
532 /*
533 * After a call to xprt_reserve(), we must have either
534 * a request slot or else an error status.
535 */
536 task->tk_status = 0;
537 if (status >= 0) {
538 if (task->tk_rqstp) {
539 task->tk_action = call_allocate;
540 return;
541 }
542
543 printk(KERN_ERR "%s: status=%d, but no request slot, exiting\n",
544 __FUNCTION__, status);
545 rpc_exit(task, -EIO);
546 return;
547 }
548
549 /*
550 * Even though there was an error, we may have acquired
551 * a request slot somehow. Make sure not to leak it.
552 */
553 if (task->tk_rqstp) {
554 printk(KERN_ERR "%s: status=%d, request allocated anyway\n",
555 __FUNCTION__, status);
556 xprt_release(task);
557 }
558
559 switch (status) {
560 case -EAGAIN: /* woken up; retry */
561 task->tk_action = call_reserve;
562 return;
563 case -EIO: /* probably a shutdown */
564 break;
565 default:
566 printk(KERN_ERR "%s: unrecognized error %d, exiting\n",
567 __FUNCTION__, status);
568 break;
569 }
570 rpc_exit(task, status);
571}
572
573/*
574 * 2. Allocate the buffer. For details, see sched.c:rpc_malloc.
575 * (Note: buffer memory is freed in rpc_task_release).
576 */
577static void
578call_allocate(struct rpc_task *task)
579{
580 unsigned int bufsiz;
581
582 dprintk("RPC: %4d call_allocate (status %d)\n",
583 task->tk_pid, task->tk_status);
584 task->tk_action = call_bind;
585 if (task->tk_buffer)
586 return;
587
588 /* FIXME: compute buffer requirements more exactly using
589 * auth->au_wslack */
590 bufsiz = task->tk_msg.rpc_proc->p_bufsiz + RPC_SLACK_SPACE;
591
592 if (rpc_malloc(task, bufsiz << 1) != NULL)
593 return;
594 printk(KERN_INFO "RPC: buffer allocation failed for task %p\n", task);
595
596 if (RPC_IS_ASYNC(task) || !(task->tk_client->cl_intr && signalled())) {
597 xprt_release(task);
598 task->tk_action = call_reserve;
599 rpc_delay(task, HZ>>4);
600 return;
601 }
602
603 rpc_exit(task, -ERESTARTSYS);
604}
605
606/*
607 * 3. Encode arguments of an RPC call
608 */
609static void
610call_encode(struct rpc_task *task)
611{
612 struct rpc_clnt *clnt = task->tk_client;
613 struct rpc_rqst *req = task->tk_rqstp;
614 struct xdr_buf *sndbuf = &req->rq_snd_buf;
615 struct xdr_buf *rcvbuf = &req->rq_rcv_buf;
616 unsigned int bufsiz;
617 kxdrproc_t encode;
618 int status;
619 u32 *p;
620
621 dprintk("RPC: %4d call_encode (status %d)\n",
622 task->tk_pid, task->tk_status);
623
624 /* Default buffer setup */
625 bufsiz = task->tk_bufsize >> 1;
626 sndbuf->head[0].iov_base = (void *)task->tk_buffer;
627 sndbuf->head[0].iov_len = bufsiz;
628 sndbuf->tail[0].iov_len = 0;
629 sndbuf->page_len = 0;
630 sndbuf->len = 0;
631 sndbuf->buflen = bufsiz;
632 rcvbuf->head[0].iov_base = (void *)((char *)task->tk_buffer + bufsiz);
633 rcvbuf->head[0].iov_len = bufsiz;
634 rcvbuf->tail[0].iov_len = 0;
635 rcvbuf->page_len = 0;
636 rcvbuf->len = 0;
637 rcvbuf->buflen = bufsiz;
638
639 /* Encode header and provided arguments */
640 encode = task->tk_msg.rpc_proc->p_encode;
641 if (!(p = call_header(task))) {
642 printk(KERN_INFO "RPC: call_header failed, exit EIO\n");
643 rpc_exit(task, -EIO);
644 return;
645 }
646 if (encode && (status = rpcauth_wrap_req(task, encode, req, p,
647 task->tk_msg.rpc_argp)) < 0) {
648 printk(KERN_WARNING "%s: can't encode arguments: %d\n",
649 clnt->cl_protname, -status);
650 rpc_exit(task, status);
651 }
652}
653
654/*
655 * 4. Get the server port number if not yet set
656 */
657static void
658call_bind(struct rpc_task *task)
659{
660 struct rpc_clnt *clnt = task->tk_client;
661 struct rpc_xprt *xprt = clnt->cl_xprt;
662
663 dprintk("RPC: %4d call_bind xprt %p %s connected\n", task->tk_pid,
664 xprt, (xprt_connected(xprt) ? "is" : "is not"));
665
666 task->tk_action = (xprt_connected(xprt)) ? call_transmit : call_connect;
667
668 if (!clnt->cl_port) {
669 task->tk_action = call_connect;
670 task->tk_timeout = RPC_CONNECT_TIMEOUT;
671 rpc_getport(task, clnt);
672 }
673}
674
675/*
676 * 4a. Connect to the RPC server (TCP case)
677 */
678static void
679call_connect(struct rpc_task *task)
680{
681 struct rpc_clnt *clnt = task->tk_client;
682
683 dprintk("RPC: %4d call_connect status %d\n",
684 task->tk_pid, task->tk_status);
685
686 if (xprt_connected(clnt->cl_xprt)) {
687 task->tk_action = call_transmit;
688 return;
689 }
690 task->tk_action = call_connect_status;
691 if (task->tk_status < 0)
692 return;
693 xprt_connect(task);
694}
695
696/*
697 * 4b. Sort out connect result
698 */
699static void
700call_connect_status(struct rpc_task *task)
701{
702 struct rpc_clnt *clnt = task->tk_client;
703 int status = task->tk_status;
704
705 task->tk_status = 0;
706 if (status >= 0) {
707 clnt->cl_stats->netreconn++;
708 task->tk_action = call_transmit;
709 return;
710 }
711
712 /* Something failed: we may have to rebind */
713 if (clnt->cl_autobind)
714 clnt->cl_port = 0;
715 switch (status) {
716 case -ENOTCONN:
717 case -ETIMEDOUT:
718 case -EAGAIN:
719 task->tk_action = (clnt->cl_port == 0) ? call_bind : call_connect;
720 break;
721 default:
722 rpc_exit(task, -EIO);
723 }
724}
725
726/*
727 * 5. Transmit the RPC request, and wait for reply
728 */
729static void
730call_transmit(struct rpc_task *task)
731{
732 dprintk("RPC: %4d call_transmit (status %d)\n",
733 task->tk_pid, task->tk_status);
734
735 task->tk_action = call_status;
736 if (task->tk_status < 0)
737 return;
738 task->tk_status = xprt_prepare_transmit(task);
739 if (task->tk_status != 0)
740 return;
741 /* Encode here so that rpcsec_gss can use correct sequence number. */
742 if (!task->tk_rqstp->rq_bytes_sent)
743 call_encode(task);
744 if (task->tk_status < 0)
745 return;
746 xprt_transmit(task);
747 if (task->tk_status < 0)
748 return;
749 if (!task->tk_msg.rpc_proc->p_decode) {
750 task->tk_action = NULL;
751 rpc_wake_up_task(task);
752 }
753}
754
755/*
756 * 6. Sort out the RPC call status
757 */
758static void
759call_status(struct rpc_task *task)
760{
761 struct rpc_clnt *clnt = task->tk_client;
762 struct rpc_rqst *req = task->tk_rqstp;
763 int status;
764
765 if (req->rq_received > 0 && !req->rq_bytes_sent)
766 task->tk_status = req->rq_received;
767
768 dprintk("RPC: %4d call_status (status %d)\n",
769 task->tk_pid, task->tk_status);
770
771 status = task->tk_status;
772 if (status >= 0) {
773 task->tk_action = call_decode;
774 return;
775 }
776
777 task->tk_status = 0;
778 switch(status) {
779 case -ETIMEDOUT:
780 task->tk_action = call_timeout;
781 break;
782 case -ECONNREFUSED:
783 case -ENOTCONN:
784 req->rq_bytes_sent = 0;
785 if (clnt->cl_autobind)
786 clnt->cl_port = 0;
787 task->tk_action = call_bind;
788 break;
789 case -EAGAIN:
790 task->tk_action = call_transmit;
791 break;
792 case -EIO:
793 /* shutdown or soft timeout */
794 rpc_exit(task, status);
795 break;
796 default:
797 if (clnt->cl_chatty)
798 printk("%s: RPC call returned error %d\n",
799 clnt->cl_protname, -status);
800 rpc_exit(task, status);
801 break;
802 }
803}
804
805/*
806 * 6a. Handle RPC timeout
807 * We do not release the request slot, so we keep using the
808 * same XID for all retransmits.
809 */
810static void
811call_timeout(struct rpc_task *task)
812{
813 struct rpc_clnt *clnt = task->tk_client;
814
815 if (xprt_adjust_timeout(task->tk_rqstp) == 0) {
816 dprintk("RPC: %4d call_timeout (minor)\n", task->tk_pid);
817 goto retry;
818 }
819
820 dprintk("RPC: %4d call_timeout (major)\n", task->tk_pid);
821 if (RPC_IS_SOFT(task)) {
822 if (clnt->cl_chatty)
823 printk(KERN_NOTICE "%s: server %s not responding, timed out\n",
824 clnt->cl_protname, clnt->cl_server);
825 rpc_exit(task, -EIO);
826 return;
827 }
828
829 if (clnt->cl_chatty && !(task->tk_flags & RPC_CALL_MAJORSEEN)) {
830 task->tk_flags |= RPC_CALL_MAJORSEEN;
831 printk(KERN_NOTICE "%s: server %s not responding, still trying\n",
832 clnt->cl_protname, clnt->cl_server);
833 }
834 if (clnt->cl_autobind)
835 clnt->cl_port = 0;
836
837retry:
838 clnt->cl_stats->rpcretrans++;
839 task->tk_action = call_bind;
840 task->tk_status = 0;
841}
842
843/*
844 * 7. Decode the RPC reply
845 */
846static void
847call_decode(struct rpc_task *task)
848{
849 struct rpc_clnt *clnt = task->tk_client;
850 struct rpc_rqst *req = task->tk_rqstp;
851 kxdrproc_t decode = task->tk_msg.rpc_proc->p_decode;
852 u32 *p;
853
854 dprintk("RPC: %4d call_decode (status %d)\n",
855 task->tk_pid, task->tk_status);
856
857 if (clnt->cl_chatty && (task->tk_flags & RPC_CALL_MAJORSEEN)) {
858 printk(KERN_NOTICE "%s: server %s OK\n",
859 clnt->cl_protname, clnt->cl_server);
860 task->tk_flags &= ~RPC_CALL_MAJORSEEN;
861 }
862
863 if (task->tk_status < 12) {
864 if (!RPC_IS_SOFT(task)) {
865 task->tk_action = call_bind;
866 clnt->cl_stats->rpcretrans++;
867 goto out_retry;
868 }
869 printk(KERN_WARNING "%s: too small RPC reply size (%d bytes)\n",
870 clnt->cl_protname, task->tk_status);
871 rpc_exit(task, -EIO);
872 return;
873 }
874
875 req->rq_rcv_buf.len = req->rq_private_buf.len;
876
877 /* Check that the softirq receive buffer is valid */
878 WARN_ON(memcmp(&req->rq_rcv_buf, &req->rq_private_buf,
879 sizeof(req->rq_rcv_buf)) != 0);
880
881 /* Verify the RPC header */
882 if (!(p = call_verify(task))) {
883 if (task->tk_action == NULL)
884 return;
885 goto out_retry;
886 }
887
888 task->tk_action = NULL;
889
890 if (decode)
891 task->tk_status = rpcauth_unwrap_resp(task, decode, req, p,
892 task->tk_msg.rpc_resp);
893 dprintk("RPC: %4d call_decode result %d\n", task->tk_pid,
894 task->tk_status);
895 return;
896out_retry:
897 req->rq_received = req->rq_private_buf.len = 0;
898 task->tk_status = 0;
899}
900
901/*
902 * 8. Refresh the credentials if rejected by the server
903 */
904static void
905call_refresh(struct rpc_task *task)
906{
907 dprintk("RPC: %4d call_refresh\n", task->tk_pid);
908
909 xprt_release(task); /* Must do to obtain new XID */
910 task->tk_action = call_refreshresult;
911 task->tk_status = 0;
912 task->tk_client->cl_stats->rpcauthrefresh++;
913 rpcauth_refreshcred(task);
914}
915
916/*
917 * 8a. Process the results of a credential refresh
918 */
919static void
920call_refreshresult(struct rpc_task *task)
921{
922 int status = task->tk_status;
923 dprintk("RPC: %4d call_refreshresult (status %d)\n",
924 task->tk_pid, task->tk_status);
925
926 task->tk_status = 0;
927 task->tk_action = call_reserve;
928 if (status >= 0 && rpcauth_uptodatecred(task))
929 return;
930 if (status == -EACCES) {
931 rpc_exit(task, -EACCES);
932 return;
933 }
934 task->tk_action = call_refresh;
935 if (status != -ETIMEDOUT)
936 rpc_delay(task, 3*HZ);
937 return;
938}
939
940/*
941 * Call header serialization
942 */
943static u32 *
944call_header(struct rpc_task *task)
945{
946 struct rpc_clnt *clnt = task->tk_client;
947 struct rpc_xprt *xprt = clnt->cl_xprt;
948 struct rpc_rqst *req = task->tk_rqstp;
949 u32 *p = req->rq_svec[0].iov_base;
950
951 /* FIXME: check buffer size? */
952 if (xprt->stream)
953 *p++ = 0; /* fill in later */
954 *p++ = req->rq_xid; /* XID */
955 *p++ = htonl(RPC_CALL); /* CALL */
956 *p++ = htonl(RPC_VERSION); /* RPC version */
957 *p++ = htonl(clnt->cl_prog); /* program number */
958 *p++ = htonl(clnt->cl_vers); /* program version */
959 *p++ = htonl(task->tk_msg.rpc_proc->p_proc); /* procedure */
960 return rpcauth_marshcred(task, p);
961}
962
963/*
964 * Reply header verification
965 */
966static u32 *
967call_verify(struct rpc_task *task)
968{
969 struct kvec *iov = &task->tk_rqstp->rq_rcv_buf.head[0];
970 int len = task->tk_rqstp->rq_rcv_buf.len >> 2;
971 u32 *p = iov->iov_base, n;
972 int error = -EACCES;
973
974 if ((len -= 3) < 0)
975 goto out_overflow;
976 p += 1; /* skip XID */
977
978 if ((n = ntohl(*p++)) != RPC_REPLY) {
979 printk(KERN_WARNING "call_verify: not an RPC reply: %x\n", n);
980 goto out_retry;
981 }
982 if ((n = ntohl(*p++)) != RPC_MSG_ACCEPTED) {
983 if (--len < 0)
984 goto out_overflow;
985 switch ((n = ntohl(*p++))) {
986 case RPC_AUTH_ERROR:
987 break;
988 case RPC_MISMATCH:
989 printk(KERN_WARNING "%s: RPC call version mismatch!\n", __FUNCTION__);
990 goto out_eio;
991 default:
992 printk(KERN_WARNING "%s: RPC call rejected, unknown error: %x\n", __FUNCTION__, n);
993 goto out_eio;
994 }
995 if (--len < 0)
996 goto out_overflow;
997 switch ((n = ntohl(*p++))) {
998 case RPC_AUTH_REJECTEDCRED:
999 case RPC_AUTH_REJECTEDVERF:
1000 case RPCSEC_GSS_CREDPROBLEM:
1001 case RPCSEC_GSS_CTXPROBLEM:
1002 if (!task->tk_cred_retry)
1003 break;
1004 task->tk_cred_retry--;
1005 dprintk("RPC: %4d call_verify: retry stale creds\n",
1006 task->tk_pid);
1007 rpcauth_invalcred(task);
1008 task->tk_action = call_refresh;
1009 return NULL;
1010 case RPC_AUTH_BADCRED:
1011 case RPC_AUTH_BADVERF:
1012 /* possibly garbled cred/verf? */
1013 if (!task->tk_garb_retry)
1014 break;
1015 task->tk_garb_retry--;
1016 dprintk("RPC: %4d call_verify: retry garbled creds\n",
1017 task->tk_pid);
1018 task->tk_action = call_bind;
1019 return NULL;
1020 case RPC_AUTH_TOOWEAK:
1021 printk(KERN_NOTICE "call_verify: server requires stronger "
1022 "authentication.\n");
1023 break;
1024 default:
1025 printk(KERN_WARNING "call_verify: unknown auth error: %x\n", n);
1026 error = -EIO;
1027 }
1028 dprintk("RPC: %4d call_verify: call rejected %d\n",
1029 task->tk_pid, n);
1030 goto out_err;
1031 }
1032 if (!(p = rpcauth_checkverf(task, p))) {
1033 printk(KERN_WARNING "call_verify: auth check failed\n");
1034 goto out_retry; /* bad verifier, retry */
1035 }
1036 len = p - (u32 *)iov->iov_base - 1;
1037 if (len < 0)
1038 goto out_overflow;
1039 switch ((n = ntohl(*p++))) {
1040 case RPC_SUCCESS:
1041 return p;
1042 case RPC_PROG_UNAVAIL:
1043 printk(KERN_WARNING "RPC: call_verify: program %u is unsupported by server %s\n",
1044 (unsigned int)task->tk_client->cl_prog,
1045 task->tk_client->cl_server);
1046 goto out_eio;
1047 case RPC_PROG_MISMATCH:
1048 printk(KERN_WARNING "RPC: call_verify: program %u, version %u unsupported by server %s\n",
1049 (unsigned int)task->tk_client->cl_prog,
1050 (unsigned int)task->tk_client->cl_vers,
1051 task->tk_client->cl_server);
1052 goto out_eio;
1053 case RPC_PROC_UNAVAIL:
1054 printk(KERN_WARNING "RPC: call_verify: proc %p unsupported by program %u, version %u on server %s\n",
1055 task->tk_msg.rpc_proc,
1056 task->tk_client->cl_prog,
1057 task->tk_client->cl_vers,
1058 task->tk_client->cl_server);
1059 goto out_eio;
1060 case RPC_GARBAGE_ARGS:
1061 dprintk("RPC: %4d %s: server saw garbage\n", task->tk_pid, __FUNCTION__);
1062 break; /* retry */
1063 default:
1064 printk(KERN_WARNING "call_verify: server accept status: %x\n", n);
1065 /* Also retry */
1066 }
1067
1068out_retry:
1069 task->tk_client->cl_stats->rpcgarbage++;
1070 if (task->tk_garb_retry) {
1071 task->tk_garb_retry--;
1072 dprintk(KERN_WARNING "RPC %s: retrying %4d\n", __FUNCTION__, task->tk_pid);
1073 task->tk_action = call_bind;
1074 return NULL;
1075 }
1076 printk(KERN_WARNING "RPC %s: retry failed, exit EIO\n", __FUNCTION__);
1077out_eio:
1078 error = -EIO;
1079out_err:
1080 rpc_exit(task, error);
1081 return NULL;
1082out_overflow:
1083 printk(KERN_WARNING "RPC %s: server reply was truncated.\n", __FUNCTION__);
1084 goto out_retry;
1085}
diff --git a/net/sunrpc/pmap_clnt.c b/net/sunrpc/pmap_clnt.c
new file mode 100644
index 000000000000..d0b1d2c34a4d
--- /dev/null
+++ b/net/sunrpc/pmap_clnt.c
@@ -0,0 +1,298 @@
1/*
2 * linux/net/sunrpc/pmap.c
3 *
4 * Portmapper client.
5 *
6 * Copyright (C) 1996, Olaf Kirch <okir@monad.swb.de>
7 */
8
9#include <linux/config.h>
10#include <linux/types.h>
11#include <linux/socket.h>
12#include <linux/kernel.h>
13#include <linux/errno.h>
14#include <linux/uio.h>
15#include <linux/in.h>
16#include <linux/sunrpc/clnt.h>
17#include <linux/sunrpc/xprt.h>
18#include <linux/sunrpc/sched.h>
19
20#ifdef RPC_DEBUG
21# define RPCDBG_FACILITY RPCDBG_PMAP
22#endif
23
24#define PMAP_SET 1
25#define PMAP_UNSET 2
26#define PMAP_GETPORT 3
27
28static struct rpc_procinfo pmap_procedures[];
29static struct rpc_clnt * pmap_create(char *, struct sockaddr_in *, int);
30static void pmap_getport_done(struct rpc_task *);
31static struct rpc_program pmap_program;
32static DEFINE_SPINLOCK(pmap_lock);
33
34/*
35 * Obtain the port for a given RPC service on a given host. This one can
36 * be called for an ongoing RPC request.
37 */
38void
39rpc_getport(struct rpc_task *task, struct rpc_clnt *clnt)
40{
41 struct rpc_portmap *map = clnt->cl_pmap;
42 struct sockaddr_in *sap = &clnt->cl_xprt->addr;
43 struct rpc_message msg = {
44 .rpc_proc = &pmap_procedures[PMAP_GETPORT],
45 .rpc_argp = map,
46 .rpc_resp = &clnt->cl_port,
47 .rpc_cred = NULL
48 };
49 struct rpc_clnt *pmap_clnt;
50 struct rpc_task *child;
51
52 dprintk("RPC: %4d rpc_getport(%s, %d, %d, %d)\n",
53 task->tk_pid, clnt->cl_server,
54 map->pm_prog, map->pm_vers, map->pm_prot);
55
56 spin_lock(&pmap_lock);
57 if (map->pm_binding) {
58 rpc_sleep_on(&map->pm_bindwait, task, NULL, NULL);
59 spin_unlock(&pmap_lock);
60 return;
61 }
62 map->pm_binding = 1;
63 spin_unlock(&pmap_lock);
64
65 pmap_clnt = pmap_create(clnt->cl_server, sap, map->pm_prot);
66 if (IS_ERR(pmap_clnt)) {
67 task->tk_status = PTR_ERR(pmap_clnt);
68 goto bailout;
69 }
70 task->tk_status = 0;
71
72 /*
73 * Note: rpc_new_child will release client after a failure.
74 */
75 if (!(child = rpc_new_child(pmap_clnt, task)))
76 goto bailout;
77
78 /* Setup the call info struct */
79 rpc_call_setup(child, &msg, 0);
80
81 /* ... and run the child task */
82 rpc_run_child(task, child, pmap_getport_done);
83 return;
84
85bailout:
86 spin_lock(&pmap_lock);
87 map->pm_binding = 0;
88 rpc_wake_up(&map->pm_bindwait);
89 spin_unlock(&pmap_lock);
90 task->tk_status = -EIO;
91 task->tk_action = NULL;
92}
93
94#ifdef CONFIG_ROOT_NFS
95int
96rpc_getport_external(struct sockaddr_in *sin, __u32 prog, __u32 vers, int prot)
97{
98 struct rpc_portmap map = {
99 .pm_prog = prog,
100 .pm_vers = vers,
101 .pm_prot = prot,
102 .pm_port = 0
103 };
104 struct rpc_clnt *pmap_clnt;
105 char hostname[32];
106 int status;
107
108 dprintk("RPC: rpc_getport_external(%u.%u.%u.%u, %d, %d, %d)\n",
109 NIPQUAD(sin->sin_addr.s_addr), prog, vers, prot);
110
111 sprintf(hostname, "%u.%u.%u.%u", NIPQUAD(sin->sin_addr.s_addr));
112 pmap_clnt = pmap_create(hostname, sin, prot);
113 if (IS_ERR(pmap_clnt))
114 return PTR_ERR(pmap_clnt);
115
116 /* Setup the call info struct */
117 status = rpc_call(pmap_clnt, PMAP_GETPORT, &map, &map.pm_port, 0);
118
119 if (status >= 0) {
120 if (map.pm_port != 0)
121 return map.pm_port;
122 status = -EACCES;
123 }
124 return status;
125}
126#endif
127
128static void
129pmap_getport_done(struct rpc_task *task)
130{
131 struct rpc_clnt *clnt = task->tk_client;
132 struct rpc_portmap *map = clnt->cl_pmap;
133
134 dprintk("RPC: %4d pmap_getport_done(status %d, port %d)\n",
135 task->tk_pid, task->tk_status, clnt->cl_port);
136 if (task->tk_status < 0) {
137 /* Make the calling task exit with an error */
138 task->tk_action = NULL;
139 } else if (clnt->cl_port == 0) {
140 /* Program not registered */
141 task->tk_status = -EACCES;
142 task->tk_action = NULL;
143 } else {
144 /* byte-swap port number first */
145 clnt->cl_port = htons(clnt->cl_port);
146 clnt->cl_xprt->addr.sin_port = clnt->cl_port;
147 }
148 spin_lock(&pmap_lock);
149 map->pm_binding = 0;
150 rpc_wake_up(&map->pm_bindwait);
151 spin_unlock(&pmap_lock);
152}
153
154/*
155 * Set or unset a port registration with the local portmapper.
156 * port == 0 means unregister, port != 0 means register.
157 */
158int
159rpc_register(u32 prog, u32 vers, int prot, unsigned short port, int *okay)
160{
161 struct sockaddr_in sin;
162 struct rpc_portmap map;
163 struct rpc_clnt *pmap_clnt;
164 int error = 0;
165
166 dprintk("RPC: registering (%d, %d, %d, %d) with portmapper.\n",
167 prog, vers, prot, port);
168
169 sin.sin_family = AF_INET;
170 sin.sin_addr.s_addr = htonl(INADDR_LOOPBACK);
171 pmap_clnt = pmap_create("localhost", &sin, IPPROTO_UDP);
172 if (IS_ERR(pmap_clnt)) {
173 error = PTR_ERR(pmap_clnt);
174 dprintk("RPC: couldn't create pmap client. Error = %d\n", error);
175 return error;
176 }
177
178 map.pm_prog = prog;
179 map.pm_vers = vers;
180 map.pm_prot = prot;
181 map.pm_port = port;
182
183 error = rpc_call(pmap_clnt, port? PMAP_SET : PMAP_UNSET,
184 &map, okay, 0);
185
186 if (error < 0) {
187 printk(KERN_WARNING
188 "RPC: failed to contact portmap (errno %d).\n",
189 error);
190 }
191 dprintk("RPC: registration status %d/%d\n", error, *okay);
192
193 /* Client deleted automatically because cl_oneshot == 1 */
194 return error;
195}
196
197static struct rpc_clnt *
198pmap_create(char *hostname, struct sockaddr_in *srvaddr, int proto)
199{
200 struct rpc_xprt *xprt;
201 struct rpc_clnt *clnt;
202
203 /* printk("pmap: create xprt\n"); */
204 xprt = xprt_create_proto(proto, srvaddr, NULL);
205 if (IS_ERR(xprt))
206 return (struct rpc_clnt *)xprt;
207 xprt->addr.sin_port = htons(RPC_PMAP_PORT);
208
209 /* printk("pmap: create clnt\n"); */
210 clnt = rpc_create_client(xprt, hostname,
211 &pmap_program, RPC_PMAP_VERSION,
212 RPC_AUTH_UNIX);
213 if (IS_ERR(clnt)) {
214 xprt_destroy(xprt);
215 } else {
216 clnt->cl_softrtry = 1;
217 clnt->cl_chatty = 1;
218 clnt->cl_oneshot = 1;
219 }
220 return clnt;
221}
222
223/*
224 * XDR encode/decode functions for PMAP
225 */
226static int
227xdr_encode_mapping(struct rpc_rqst *req, u32 *p, struct rpc_portmap *map)
228{
229 dprintk("RPC: xdr_encode_mapping(%d, %d, %d, %d)\n",
230 map->pm_prog, map->pm_vers, map->pm_prot, map->pm_port);
231 *p++ = htonl(map->pm_prog);
232 *p++ = htonl(map->pm_vers);
233 *p++ = htonl(map->pm_prot);
234 *p++ = htonl(map->pm_port);
235
236 req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
237 return 0;
238}
239
240static int
241xdr_decode_port(struct rpc_rqst *req, u32 *p, unsigned short *portp)
242{
243 *portp = (unsigned short) ntohl(*p++);
244 return 0;
245}
246
247static int
248xdr_decode_bool(struct rpc_rqst *req, u32 *p, unsigned int *boolp)
249{
250 *boolp = (unsigned int) ntohl(*p++);
251 return 0;
252}
253
254static struct rpc_procinfo pmap_procedures[] = {
255[PMAP_SET] = {
256 .p_proc = PMAP_SET,
257 .p_encode = (kxdrproc_t) xdr_encode_mapping,
258 .p_decode = (kxdrproc_t) xdr_decode_bool,
259 .p_bufsiz = 4,
260 .p_count = 1,
261 },
262[PMAP_UNSET] = {
263 .p_proc = PMAP_UNSET,
264 .p_encode = (kxdrproc_t) xdr_encode_mapping,
265 .p_decode = (kxdrproc_t) xdr_decode_bool,
266 .p_bufsiz = 4,
267 .p_count = 1,
268 },
269[PMAP_GETPORT] = {
270 .p_proc = PMAP_GETPORT,
271 .p_encode = (kxdrproc_t) xdr_encode_mapping,
272 .p_decode = (kxdrproc_t) xdr_decode_port,
273 .p_bufsiz = 4,
274 .p_count = 1,
275 },
276};
277
278static struct rpc_version pmap_version2 = {
279 .number = 2,
280 .nrprocs = 4,
281 .procs = pmap_procedures
282};
283
284static struct rpc_version * pmap_version[] = {
285 NULL,
286 NULL,
287 &pmap_version2
288};
289
290static struct rpc_stat pmap_stats;
291
292static struct rpc_program pmap_program = {
293 .name = "portmap",
294 .number = RPC_PMAP_PROGRAM,
295 .nrvers = ARRAY_SIZE(pmap_version),
296 .version = pmap_version,
297 .stats = &pmap_stats,
298};
diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c
new file mode 100644
index 000000000000..554f224c0445
--- /dev/null
+++ b/net/sunrpc/rpc_pipe.c
@@ -0,0 +1,838 @@
1/*
2 * net/sunrpc/rpc_pipe.c
3 *
4 * Userland/kernel interface for rpcauth_gss.
5 * Code shamelessly plagiarized from fs/nfsd/nfsctl.c
6 * and fs/driverfs/inode.c
7 *
8 * Copyright (c) 2002, Trond Myklebust <trond.myklebust@fys.uio.no>
9 *
10 */
11#include <linux/config.h>
12#include <linux/module.h>
13#include <linux/slab.h>
14#include <linux/string.h>
15#include <linux/pagemap.h>
16#include <linux/mount.h>
17#include <linux/namei.h>
18#include <linux/dnotify.h>
19#include <linux/kernel.h>
20
21#include <asm/ioctls.h>
22#include <linux/fs.h>
23#include <linux/poll.h>
24#include <linux/wait.h>
25#include <linux/seq_file.h>
26
27#include <linux/sunrpc/clnt.h>
28#include <linux/workqueue.h>
29#include <linux/sunrpc/rpc_pipe_fs.h>
30
31static struct vfsmount *rpc_mount;
32static int rpc_mount_count;
33
34static struct file_system_type rpc_pipe_fs_type;
35
36
37static kmem_cache_t *rpc_inode_cachep;
38
39#define RPC_UPCALL_TIMEOUT (30*HZ)
40
41static void
42__rpc_purge_upcall(struct inode *inode, int err)
43{
44 struct rpc_inode *rpci = RPC_I(inode);
45 struct rpc_pipe_msg *msg;
46
47 while (!list_empty(&rpci->pipe)) {
48 msg = list_entry(rpci->pipe.next, struct rpc_pipe_msg, list);
49 list_del_init(&msg->list);
50 msg->errno = err;
51 rpci->ops->destroy_msg(msg);
52 }
53 while (!list_empty(&rpci->in_upcall)) {
54 msg = list_entry(rpci->pipe.next, struct rpc_pipe_msg, list);
55 list_del_init(&msg->list);
56 msg->errno = err;
57 rpci->ops->destroy_msg(msg);
58 }
59 rpci->pipelen = 0;
60 wake_up(&rpci->waitq);
61}
62
63static void
64rpc_timeout_upcall_queue(void *data)
65{
66 struct rpc_inode *rpci = (struct rpc_inode *)data;
67 struct inode *inode = &rpci->vfs_inode;
68
69 down(&inode->i_sem);
70 if (rpci->nreaders == 0 && !list_empty(&rpci->pipe))
71 __rpc_purge_upcall(inode, -ETIMEDOUT);
72 up(&inode->i_sem);
73}
74
75int
76rpc_queue_upcall(struct inode *inode, struct rpc_pipe_msg *msg)
77{
78 struct rpc_inode *rpci = RPC_I(inode);
79 int res = 0;
80
81 down(&inode->i_sem);
82 if (rpci->nreaders) {
83 list_add_tail(&msg->list, &rpci->pipe);
84 rpci->pipelen += msg->len;
85 } else if (rpci->flags & RPC_PIPE_WAIT_FOR_OPEN) {
86 if (list_empty(&rpci->pipe))
87 schedule_delayed_work(&rpci->queue_timeout,
88 RPC_UPCALL_TIMEOUT);
89 list_add_tail(&msg->list, &rpci->pipe);
90 rpci->pipelen += msg->len;
91 } else
92 res = -EPIPE;
93 up(&inode->i_sem);
94 wake_up(&rpci->waitq);
95 return res;
96}
97
98static void
99rpc_close_pipes(struct inode *inode)
100{
101 struct rpc_inode *rpci = RPC_I(inode);
102
103 cancel_delayed_work(&rpci->queue_timeout);
104 flush_scheduled_work();
105 down(&inode->i_sem);
106 if (rpci->ops != NULL) {
107 rpci->nreaders = 0;
108 __rpc_purge_upcall(inode, -EPIPE);
109 rpci->nwriters = 0;
110 if (rpci->ops->release_pipe)
111 rpci->ops->release_pipe(inode);
112 rpci->ops = NULL;
113 }
114 up(&inode->i_sem);
115}
116
117static inline void
118rpc_inode_setowner(struct inode *inode, void *private)
119{
120 RPC_I(inode)->private = private;
121}
122
123static struct inode *
124rpc_alloc_inode(struct super_block *sb)
125{
126 struct rpc_inode *rpci;
127 rpci = (struct rpc_inode *)kmem_cache_alloc(rpc_inode_cachep, SLAB_KERNEL);
128 if (!rpci)
129 return NULL;
130 return &rpci->vfs_inode;
131}
132
133static void
134rpc_destroy_inode(struct inode *inode)
135{
136 kmem_cache_free(rpc_inode_cachep, RPC_I(inode));
137}
138
139static int
140rpc_pipe_open(struct inode *inode, struct file *filp)
141{
142 struct rpc_inode *rpci = RPC_I(inode);
143 int res = -ENXIO;
144
145 down(&inode->i_sem);
146 if (rpci->ops != NULL) {
147 if (filp->f_mode & FMODE_READ)
148 rpci->nreaders ++;
149 if (filp->f_mode & FMODE_WRITE)
150 rpci->nwriters ++;
151 res = 0;
152 }
153 up(&inode->i_sem);
154 return res;
155}
156
157static int
158rpc_pipe_release(struct inode *inode, struct file *filp)
159{
160 struct rpc_inode *rpci = RPC_I(filp->f_dentry->d_inode);
161 struct rpc_pipe_msg *msg;
162
163 down(&inode->i_sem);
164 if (rpci->ops == NULL)
165 goto out;
166 msg = (struct rpc_pipe_msg *)filp->private_data;
167 if (msg != NULL) {
168 msg->errno = -EPIPE;
169 list_del_init(&msg->list);
170 rpci->ops->destroy_msg(msg);
171 }
172 if (filp->f_mode & FMODE_WRITE)
173 rpci->nwriters --;
174 if (filp->f_mode & FMODE_READ)
175 rpci->nreaders --;
176 if (!rpci->nreaders)
177 __rpc_purge_upcall(inode, -EPIPE);
178 if (rpci->ops->release_pipe)
179 rpci->ops->release_pipe(inode);
180out:
181 up(&inode->i_sem);
182 return 0;
183}
184
185static ssize_t
186rpc_pipe_read(struct file *filp, char __user *buf, size_t len, loff_t *offset)
187{
188 struct inode *inode = filp->f_dentry->d_inode;
189 struct rpc_inode *rpci = RPC_I(inode);
190 struct rpc_pipe_msg *msg;
191 int res = 0;
192
193 down(&inode->i_sem);
194 if (rpci->ops == NULL) {
195 res = -EPIPE;
196 goto out_unlock;
197 }
198 msg = filp->private_data;
199 if (msg == NULL) {
200 if (!list_empty(&rpci->pipe)) {
201 msg = list_entry(rpci->pipe.next,
202 struct rpc_pipe_msg,
203 list);
204 list_move(&msg->list, &rpci->in_upcall);
205 rpci->pipelen -= msg->len;
206 filp->private_data = msg;
207 msg->copied = 0;
208 }
209 if (msg == NULL)
210 goto out_unlock;
211 }
212 /* NOTE: it is up to the callback to update msg->copied */
213 res = rpci->ops->upcall(filp, msg, buf, len);
214 if (res < 0 || msg->len == msg->copied) {
215 filp->private_data = NULL;
216 list_del_init(&msg->list);
217 rpci->ops->destroy_msg(msg);
218 }
219out_unlock:
220 up(&inode->i_sem);
221 return res;
222}
223
224static ssize_t
225rpc_pipe_write(struct file *filp, const char __user *buf, size_t len, loff_t *offset)
226{
227 struct inode *inode = filp->f_dentry->d_inode;
228 struct rpc_inode *rpci = RPC_I(inode);
229 int res;
230
231 down(&inode->i_sem);
232 res = -EPIPE;
233 if (rpci->ops != NULL)
234 res = rpci->ops->downcall(filp, buf, len);
235 up(&inode->i_sem);
236 return res;
237}
238
239static unsigned int
240rpc_pipe_poll(struct file *filp, struct poll_table_struct *wait)
241{
242 struct rpc_inode *rpci;
243 unsigned int mask = 0;
244
245 rpci = RPC_I(filp->f_dentry->d_inode);
246 poll_wait(filp, &rpci->waitq, wait);
247
248 mask = POLLOUT | POLLWRNORM;
249 if (rpci->ops == NULL)
250 mask |= POLLERR | POLLHUP;
251 if (!list_empty(&rpci->pipe))
252 mask |= POLLIN | POLLRDNORM;
253 return mask;
254}
255
256static int
257rpc_pipe_ioctl(struct inode *ino, struct file *filp,
258 unsigned int cmd, unsigned long arg)
259{
260 struct rpc_inode *rpci = RPC_I(filp->f_dentry->d_inode);
261 int len;
262
263 switch (cmd) {
264 case FIONREAD:
265 if (rpci->ops == NULL)
266 return -EPIPE;
267 len = rpci->pipelen;
268 if (filp->private_data) {
269 struct rpc_pipe_msg *msg;
270 msg = (struct rpc_pipe_msg *)filp->private_data;
271 len += msg->len - msg->copied;
272 }
273 return put_user(len, (int __user *)arg);
274 default:
275 return -EINVAL;
276 }
277}
278
279static struct file_operations rpc_pipe_fops = {
280 .owner = THIS_MODULE,
281 .llseek = no_llseek,
282 .read = rpc_pipe_read,
283 .write = rpc_pipe_write,
284 .poll = rpc_pipe_poll,
285 .ioctl = rpc_pipe_ioctl,
286 .open = rpc_pipe_open,
287 .release = rpc_pipe_release,
288};
289
290static int
291rpc_show_info(struct seq_file *m, void *v)
292{
293 struct rpc_clnt *clnt = m->private;
294
295 seq_printf(m, "RPC server: %s\n", clnt->cl_server);
296 seq_printf(m, "service: %s (%d) version %d\n", clnt->cl_protname,
297 clnt->cl_prog, clnt->cl_vers);
298 seq_printf(m, "address: %u.%u.%u.%u\n",
299 NIPQUAD(clnt->cl_xprt->addr.sin_addr.s_addr));
300 seq_printf(m, "protocol: %s\n",
301 clnt->cl_xprt->prot == IPPROTO_UDP ? "udp" : "tcp");
302 return 0;
303}
304
305static int
306rpc_info_open(struct inode *inode, struct file *file)
307{
308 struct rpc_clnt *clnt;
309 int ret = single_open(file, rpc_show_info, NULL);
310
311 if (!ret) {
312 struct seq_file *m = file->private_data;
313 down(&inode->i_sem);
314 clnt = RPC_I(inode)->private;
315 if (clnt) {
316 atomic_inc(&clnt->cl_users);
317 m->private = clnt;
318 } else {
319 single_release(inode, file);
320 ret = -EINVAL;
321 }
322 up(&inode->i_sem);
323 }
324 return ret;
325}
326
327static int
328rpc_info_release(struct inode *inode, struct file *file)
329{
330 struct seq_file *m = file->private_data;
331 struct rpc_clnt *clnt = (struct rpc_clnt *)m->private;
332
333 if (clnt)
334 rpc_release_client(clnt);
335 return single_release(inode, file);
336}
337
338static struct file_operations rpc_info_operations = {
339 .owner = THIS_MODULE,
340 .open = rpc_info_open,
341 .read = seq_read,
342 .llseek = seq_lseek,
343 .release = rpc_info_release,
344};
345
346
347/*
348 * We have a single directory with 1 node in it.
349 */
350enum {
351 RPCAUTH_Root = 1,
352 RPCAUTH_lockd,
353 RPCAUTH_mount,
354 RPCAUTH_nfs,
355 RPCAUTH_portmap,
356 RPCAUTH_statd,
357 RPCAUTH_RootEOF
358};
359
360/*
361 * Description of fs contents.
362 */
363struct rpc_filelist {
364 char *name;
365 struct file_operations *i_fop;
366 int mode;
367};
368
369static struct rpc_filelist files[] = {
370 [RPCAUTH_lockd] = {
371 .name = "lockd",
372 .mode = S_IFDIR | S_IRUGO | S_IXUGO,
373 },
374 [RPCAUTH_mount] = {
375 .name = "mount",
376 .mode = S_IFDIR | S_IRUGO | S_IXUGO,
377 },
378 [RPCAUTH_nfs] = {
379 .name = "nfs",
380 .mode = S_IFDIR | S_IRUGO | S_IXUGO,
381 },
382 [RPCAUTH_portmap] = {
383 .name = "portmap",
384 .mode = S_IFDIR | S_IRUGO | S_IXUGO,
385 },
386 [RPCAUTH_statd] = {
387 .name = "statd",
388 .mode = S_IFDIR | S_IRUGO | S_IXUGO,
389 },
390};
391
392enum {
393 RPCAUTH_info = 2,
394 RPCAUTH_EOF
395};
396
397static struct rpc_filelist authfiles[] = {
398 [RPCAUTH_info] = {
399 .name = "info",
400 .i_fop = &rpc_info_operations,
401 .mode = S_IFREG | S_IRUSR,
402 },
403};
404
405static int
406rpc_get_mount(void)
407{
408 return simple_pin_fs("rpc_pipefs", &rpc_mount, &rpc_mount_count);
409}
410
411static void
412rpc_put_mount(void)
413{
414 simple_release_fs(&rpc_mount, &rpc_mount_count);
415}
416
417static int
418rpc_lookup_parent(char *path, struct nameidata *nd)
419{
420 if (path[0] == '\0')
421 return -ENOENT;
422 if (rpc_get_mount()) {
423 printk(KERN_WARNING "%s: %s failed to mount "
424 "pseudofilesystem \n", __FILE__, __FUNCTION__);
425 return -ENODEV;
426 }
427 nd->mnt = mntget(rpc_mount);
428 nd->dentry = dget(rpc_mount->mnt_root);
429 nd->last_type = LAST_ROOT;
430 nd->flags = LOOKUP_PARENT;
431 nd->depth = 0;
432
433 if (path_walk(path, nd)) {
434 printk(KERN_WARNING "%s: %s failed to find path %s\n",
435 __FILE__, __FUNCTION__, path);
436 rpc_put_mount();
437 return -ENOENT;
438 }
439 return 0;
440}
441
442static void
443rpc_release_path(struct nameidata *nd)
444{
445 path_release(nd);
446 rpc_put_mount();
447}
448
449static struct inode *
450rpc_get_inode(struct super_block *sb, int mode)
451{
452 struct inode *inode = new_inode(sb);
453 if (!inode)
454 return NULL;
455 inode->i_mode = mode;
456 inode->i_uid = inode->i_gid = 0;
457 inode->i_blksize = PAGE_CACHE_SIZE;
458 inode->i_blocks = 0;
459 inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
460 switch(mode & S_IFMT) {
461 case S_IFDIR:
462 inode->i_fop = &simple_dir_operations;
463 inode->i_op = &simple_dir_inode_operations;
464 inode->i_nlink++;
465 default:
466 break;
467 }
468 return inode;
469}
470
471/*
472 * FIXME: This probably has races.
473 */
474static void
475rpc_depopulate(struct dentry *parent)
476{
477 struct inode *dir = parent->d_inode;
478 struct list_head *pos, *next;
479 struct dentry *dentry, *dvec[10];
480 int n = 0;
481
482 down(&dir->i_sem);
483repeat:
484 spin_lock(&dcache_lock);
485 list_for_each_safe(pos, next, &parent->d_subdirs) {
486 dentry = list_entry(pos, struct dentry, d_child);
487 spin_lock(&dentry->d_lock);
488 if (!d_unhashed(dentry)) {
489 dget_locked(dentry);
490 __d_drop(dentry);
491 spin_unlock(&dentry->d_lock);
492 dvec[n++] = dentry;
493 if (n == ARRAY_SIZE(dvec))
494 break;
495 } else
496 spin_unlock(&dentry->d_lock);
497 }
498 spin_unlock(&dcache_lock);
499 if (n) {
500 do {
501 dentry = dvec[--n];
502 if (dentry->d_inode) {
503 rpc_close_pipes(dentry->d_inode);
504 rpc_inode_setowner(dentry->d_inode, NULL);
505 simple_unlink(dir, dentry);
506 }
507 dput(dentry);
508 } while (n);
509 goto repeat;
510 }
511 up(&dir->i_sem);
512}
513
514static int
515rpc_populate(struct dentry *parent,
516 struct rpc_filelist *files,
517 int start, int eof)
518{
519 struct inode *inode, *dir = parent->d_inode;
520 void *private = RPC_I(dir)->private;
521 struct dentry *dentry;
522 int mode, i;
523
524 down(&dir->i_sem);
525 for (i = start; i < eof; i++) {
526 dentry = d_alloc_name(parent, files[i].name);
527 if (!dentry)
528 goto out_bad;
529 mode = files[i].mode;
530 inode = rpc_get_inode(dir->i_sb, mode);
531 if (!inode) {
532 dput(dentry);
533 goto out_bad;
534 }
535 inode->i_ino = i;
536 if (files[i].i_fop)
537 inode->i_fop = files[i].i_fop;
538 if (private)
539 rpc_inode_setowner(inode, private);
540 if (S_ISDIR(mode))
541 dir->i_nlink++;
542 d_add(dentry, inode);
543 }
544 up(&dir->i_sem);
545 return 0;
546out_bad:
547 up(&dir->i_sem);
548 printk(KERN_WARNING "%s: %s failed to populate directory %s\n",
549 __FILE__, __FUNCTION__, parent->d_name.name);
550 return -ENOMEM;
551}
552
553static int
554__rpc_mkdir(struct inode *dir, struct dentry *dentry)
555{
556 struct inode *inode;
557
558 inode = rpc_get_inode(dir->i_sb, S_IFDIR | S_IRUSR | S_IXUSR);
559 if (!inode)
560 goto out_err;
561 inode->i_ino = iunique(dir->i_sb, 100);
562 d_instantiate(dentry, inode);
563 dir->i_nlink++;
564 inode_dir_notify(dir, DN_CREATE);
565 rpc_get_mount();
566 return 0;
567out_err:
568 printk(KERN_WARNING "%s: %s failed to allocate inode for dentry %s\n",
569 __FILE__, __FUNCTION__, dentry->d_name.name);
570 return -ENOMEM;
571}
572
573static int
574__rpc_rmdir(struct inode *dir, struct dentry *dentry)
575{
576 int error;
577
578 shrink_dcache_parent(dentry);
579 if (dentry->d_inode) {
580 rpc_close_pipes(dentry->d_inode);
581 rpc_inode_setowner(dentry->d_inode, NULL);
582 }
583 if ((error = simple_rmdir(dir, dentry)) != 0)
584 return error;
585 if (!error) {
586 inode_dir_notify(dir, DN_DELETE);
587 d_drop(dentry);
588 rpc_put_mount();
589 }
590 return 0;
591}
592
593static struct dentry *
594rpc_lookup_negative(char *path, struct nameidata *nd)
595{
596 struct dentry *dentry;
597 struct inode *dir;
598 int error;
599
600 if ((error = rpc_lookup_parent(path, nd)) != 0)
601 return ERR_PTR(error);
602 dir = nd->dentry->d_inode;
603 down(&dir->i_sem);
604 dentry = lookup_hash(&nd->last, nd->dentry);
605 if (IS_ERR(dentry))
606 goto out_err;
607 if (dentry->d_inode) {
608 dput(dentry);
609 dentry = ERR_PTR(-EEXIST);
610 goto out_err;
611 }
612 return dentry;
613out_err:
614 up(&dir->i_sem);
615 rpc_release_path(nd);
616 return dentry;
617}
618
619
620struct dentry *
621rpc_mkdir(char *path, struct rpc_clnt *rpc_client)
622{
623 struct nameidata nd;
624 struct dentry *dentry;
625 struct inode *dir;
626 int error;
627
628 dentry = rpc_lookup_negative(path, &nd);
629 if (IS_ERR(dentry))
630 return dentry;
631 dir = nd.dentry->d_inode;
632 if ((error = __rpc_mkdir(dir, dentry)) != 0)
633 goto err_dput;
634 RPC_I(dentry->d_inode)->private = rpc_client;
635 error = rpc_populate(dentry, authfiles,
636 RPCAUTH_info, RPCAUTH_EOF);
637 if (error)
638 goto err_depopulate;
639out:
640 up(&dir->i_sem);
641 rpc_release_path(&nd);
642 return dentry;
643err_depopulate:
644 rpc_depopulate(dentry);
645 __rpc_rmdir(dir, dentry);
646err_dput:
647 dput(dentry);
648 printk(KERN_WARNING "%s: %s() failed to create directory %s (errno = %d)\n",
649 __FILE__, __FUNCTION__, path, error);
650 dentry = ERR_PTR(error);
651 goto out;
652}
653
654int
655rpc_rmdir(char *path)
656{
657 struct nameidata nd;
658 struct dentry *dentry;
659 struct inode *dir;
660 int error;
661
662 if ((error = rpc_lookup_parent(path, &nd)) != 0)
663 return error;
664 dir = nd.dentry->d_inode;
665 down(&dir->i_sem);
666 dentry = lookup_hash(&nd.last, nd.dentry);
667 if (IS_ERR(dentry)) {
668 error = PTR_ERR(dentry);
669 goto out_release;
670 }
671 rpc_depopulate(dentry);
672 error = __rpc_rmdir(dir, dentry);
673 dput(dentry);
674out_release:
675 up(&dir->i_sem);
676 rpc_release_path(&nd);
677 return error;
678}
679
680struct dentry *
681rpc_mkpipe(char *path, void *private, struct rpc_pipe_ops *ops, int flags)
682{
683 struct nameidata nd;
684 struct dentry *dentry;
685 struct inode *dir, *inode;
686 struct rpc_inode *rpci;
687
688 dentry = rpc_lookup_negative(path, &nd);
689 if (IS_ERR(dentry))
690 return dentry;
691 dir = nd.dentry->d_inode;
692 inode = rpc_get_inode(dir->i_sb, S_IFSOCK | S_IRUSR | S_IWUSR);
693 if (!inode)
694 goto err_dput;
695 inode->i_ino = iunique(dir->i_sb, 100);
696 inode->i_fop = &rpc_pipe_fops;
697 d_instantiate(dentry, inode);
698 rpci = RPC_I(inode);
699 rpci->private = private;
700 rpci->flags = flags;
701 rpci->ops = ops;
702 inode_dir_notify(dir, DN_CREATE);
703out:
704 up(&dir->i_sem);
705 rpc_release_path(&nd);
706 return dentry;
707err_dput:
708 dput(dentry);
709 dentry = ERR_PTR(-ENOMEM);
710 printk(KERN_WARNING "%s: %s() failed to create pipe %s (errno = %d)\n",
711 __FILE__, __FUNCTION__, path, -ENOMEM);
712 goto out;
713}
714
715int
716rpc_unlink(char *path)
717{
718 struct nameidata nd;
719 struct dentry *dentry;
720 struct inode *dir;
721 int error;
722
723 if ((error = rpc_lookup_parent(path, &nd)) != 0)
724 return error;
725 dir = nd.dentry->d_inode;
726 down(&dir->i_sem);
727 dentry = lookup_hash(&nd.last, nd.dentry);
728 if (IS_ERR(dentry)) {
729 error = PTR_ERR(dentry);
730 goto out_release;
731 }
732 d_drop(dentry);
733 if (dentry->d_inode) {
734 rpc_close_pipes(dentry->d_inode);
735 rpc_inode_setowner(dentry->d_inode, NULL);
736 error = simple_unlink(dir, dentry);
737 }
738 dput(dentry);
739 inode_dir_notify(dir, DN_DELETE);
740out_release:
741 up(&dir->i_sem);
742 rpc_release_path(&nd);
743 return error;
744}
745
746/*
747 * populate the filesystem
748 */
749static struct super_operations s_ops = {
750 .alloc_inode = rpc_alloc_inode,
751 .destroy_inode = rpc_destroy_inode,
752 .statfs = simple_statfs,
753};
754
755#define RPCAUTH_GSSMAGIC 0x67596969
756
757static int
758rpc_fill_super(struct super_block *sb, void *data, int silent)
759{
760 struct inode *inode;
761 struct dentry *root;
762
763 sb->s_blocksize = PAGE_CACHE_SIZE;
764 sb->s_blocksize_bits = PAGE_CACHE_SHIFT;
765 sb->s_magic = RPCAUTH_GSSMAGIC;
766 sb->s_op = &s_ops;
767 sb->s_time_gran = 1;
768
769 inode = rpc_get_inode(sb, S_IFDIR | 0755);
770 if (!inode)
771 return -ENOMEM;
772 root = d_alloc_root(inode);
773 if (!root) {
774 iput(inode);
775 return -ENOMEM;
776 }
777 if (rpc_populate(root, files, RPCAUTH_Root + 1, RPCAUTH_RootEOF))
778 goto out;
779 sb->s_root = root;
780 return 0;
781out:
782 d_genocide(root);
783 dput(root);
784 return -ENOMEM;
785}
786
787static struct super_block *
788rpc_get_sb(struct file_system_type *fs_type,
789 int flags, const char *dev_name, void *data)
790{
791 return get_sb_single(fs_type, flags, data, rpc_fill_super);
792}
793
794static struct file_system_type rpc_pipe_fs_type = {
795 .owner = THIS_MODULE,
796 .name = "rpc_pipefs",
797 .get_sb = rpc_get_sb,
798 .kill_sb = kill_litter_super,
799};
800
801static void
802init_once(void * foo, kmem_cache_t * cachep, unsigned long flags)
803{
804 struct rpc_inode *rpci = (struct rpc_inode *) foo;
805
806 if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) ==
807 SLAB_CTOR_CONSTRUCTOR) {
808 inode_init_once(&rpci->vfs_inode);
809 rpci->private = NULL;
810 rpci->nreaders = 0;
811 rpci->nwriters = 0;
812 INIT_LIST_HEAD(&rpci->in_upcall);
813 INIT_LIST_HEAD(&rpci->pipe);
814 rpci->pipelen = 0;
815 init_waitqueue_head(&rpci->waitq);
816 INIT_WORK(&rpci->queue_timeout, rpc_timeout_upcall_queue, rpci);
817 rpci->ops = NULL;
818 }
819}
820
821int register_rpc_pipefs(void)
822{
823 rpc_inode_cachep = kmem_cache_create("rpc_inode_cache",
824 sizeof(struct rpc_inode),
825 0, SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT,
826 init_once, NULL);
827 if (!rpc_inode_cachep)
828 return -ENOMEM;
829 register_filesystem(&rpc_pipe_fs_type);
830 return 0;
831}
832
833void unregister_rpc_pipefs(void)
834{
835 if (kmem_cache_destroy(rpc_inode_cachep))
836 printk(KERN_WARNING "RPC: unable to free inode cache\n");
837 unregister_filesystem(&rpc_pipe_fs_type);
838}
diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c
new file mode 100644
index 000000000000..c06614d0e31d
--- /dev/null
+++ b/net/sunrpc/sched.c
@@ -0,0 +1,1119 @@
1/*
2 * linux/net/sunrpc/sched.c
3 *
4 * Scheduling for synchronous and asynchronous RPC requests.
5 *
6 * Copyright (C) 1996 Olaf Kirch, <okir@monad.swb.de>
7 *
8 * TCP NFS related read + write fixes
9 * (C) 1999 Dave Airlie, University of Limerick, Ireland <airlied@linux.ie>
10 */
11
12#include <linux/module.h>
13
14#include <linux/sched.h>
15#include <linux/interrupt.h>
16#include <linux/slab.h>
17#include <linux/mempool.h>
18#include <linux/smp.h>
19#include <linux/smp_lock.h>
20#include <linux/spinlock.h>
21
22#include <linux/sunrpc/clnt.h>
23#include <linux/sunrpc/xprt.h>
24
25#ifdef RPC_DEBUG
26#define RPCDBG_FACILITY RPCDBG_SCHED
27#define RPC_TASK_MAGIC_ID 0xf00baa
28static int rpc_task_id;
29#endif
30
31/*
32 * RPC slabs and memory pools
33 */
34#define RPC_BUFFER_MAXSIZE (2048)
35#define RPC_BUFFER_POOLSIZE (8)
36#define RPC_TASK_POOLSIZE (8)
37static kmem_cache_t *rpc_task_slabp;
38static kmem_cache_t *rpc_buffer_slabp;
39static mempool_t *rpc_task_mempool;
40static mempool_t *rpc_buffer_mempool;
41
42static void __rpc_default_timer(struct rpc_task *task);
43static void rpciod_killall(void);
44static void rpc_free(struct rpc_task *task);
45
46static void rpc_async_schedule(void *);
47
48/*
49 * RPC tasks that create another task (e.g. for contacting the portmapper)
50 * will wait on this queue for their child's completion
51 */
52static RPC_WAITQ(childq, "childq");
53
54/*
55 * RPC tasks sit here while waiting for conditions to improve.
56 */
57static RPC_WAITQ(delay_queue, "delayq");
58
59/*
60 * All RPC tasks are linked into this list
61 */
62static LIST_HEAD(all_tasks);
63
64/*
65 * rpciod-related stuff
66 */
67static DECLARE_MUTEX(rpciod_sema);
68static unsigned int rpciod_users;
69static struct workqueue_struct *rpciod_workqueue;
70
71/*
72 * Spinlock for other critical sections of code.
73 */
74static DEFINE_SPINLOCK(rpc_sched_lock);
75
76/*
77 * Disable the timer for a given RPC task. Should be called with
78 * queue->lock and bh_disabled in order to avoid races within
79 * rpc_run_timer().
80 */
81static inline void
82__rpc_disable_timer(struct rpc_task *task)
83{
84 dprintk("RPC: %4d disabling timer\n", task->tk_pid);
85 task->tk_timeout_fn = NULL;
86 task->tk_timeout = 0;
87}
88
89/*
90 * Run a timeout function.
91 * We use the callback in order to allow __rpc_wake_up_task()
92 * and friends to disable the timer synchronously on SMP systems
93 * without calling del_timer_sync(). The latter could cause a
94 * deadlock if called while we're holding spinlocks...
95 */
96static void rpc_run_timer(struct rpc_task *task)
97{
98 void (*callback)(struct rpc_task *);
99
100 callback = task->tk_timeout_fn;
101 task->tk_timeout_fn = NULL;
102 if (callback && RPC_IS_QUEUED(task)) {
103 dprintk("RPC: %4d running timer\n", task->tk_pid);
104 callback(task);
105 }
106 smp_mb__before_clear_bit();
107 clear_bit(RPC_TASK_HAS_TIMER, &task->tk_runstate);
108 smp_mb__after_clear_bit();
109}
110
111/*
112 * Set up a timer for the current task.
113 */
114static inline void
115__rpc_add_timer(struct rpc_task *task, rpc_action timer)
116{
117 if (!task->tk_timeout)
118 return;
119
120 dprintk("RPC: %4d setting alarm for %lu ms\n",
121 task->tk_pid, task->tk_timeout * 1000 / HZ);
122
123 if (timer)
124 task->tk_timeout_fn = timer;
125 else
126 task->tk_timeout_fn = __rpc_default_timer;
127 set_bit(RPC_TASK_HAS_TIMER, &task->tk_runstate);
128 mod_timer(&task->tk_timer, jiffies + task->tk_timeout);
129}
130
131/*
132 * Delete any timer for the current task. Because we use del_timer_sync(),
133 * this function should never be called while holding queue->lock.
134 */
135static void
136rpc_delete_timer(struct rpc_task *task)
137{
138 if (RPC_IS_QUEUED(task))
139 return;
140 if (test_and_clear_bit(RPC_TASK_HAS_TIMER, &task->tk_runstate)) {
141 del_singleshot_timer_sync(&task->tk_timer);
142 dprintk("RPC: %4d deleting timer\n", task->tk_pid);
143 }
144}
145
146/*
147 * Add new request to a priority queue.
148 */
149static void __rpc_add_wait_queue_priority(struct rpc_wait_queue *queue, struct rpc_task *task)
150{
151 struct list_head *q;
152 struct rpc_task *t;
153
154 INIT_LIST_HEAD(&task->u.tk_wait.links);
155 q = &queue->tasks[task->tk_priority];
156 if (unlikely(task->tk_priority > queue->maxpriority))
157 q = &queue->tasks[queue->maxpriority];
158 list_for_each_entry(t, q, u.tk_wait.list) {
159 if (t->tk_cookie == task->tk_cookie) {
160 list_add_tail(&task->u.tk_wait.list, &t->u.tk_wait.links);
161 return;
162 }
163 }
164 list_add_tail(&task->u.tk_wait.list, q);
165}
166
167/*
168 * Add new request to wait queue.
169 *
170 * Swapper tasks always get inserted at the head of the queue.
171 * This should avoid many nasty memory deadlocks and hopefully
172 * improve overall performance.
173 * Everyone else gets appended to the queue to ensure proper FIFO behavior.
174 */
175static void __rpc_add_wait_queue(struct rpc_wait_queue *queue, struct rpc_task *task)
176{
177 BUG_ON (RPC_IS_QUEUED(task));
178
179 if (RPC_IS_PRIORITY(queue))
180 __rpc_add_wait_queue_priority(queue, task);
181 else if (RPC_IS_SWAPPER(task))
182 list_add(&task->u.tk_wait.list, &queue->tasks[0]);
183 else
184 list_add_tail(&task->u.tk_wait.list, &queue->tasks[0]);
185 task->u.tk_wait.rpc_waitq = queue;
186 rpc_set_queued(task);
187
188 dprintk("RPC: %4d added to queue %p \"%s\"\n",
189 task->tk_pid, queue, rpc_qname(queue));
190}
191
192/*
193 * Remove request from a priority queue.
194 */
195static void __rpc_remove_wait_queue_priority(struct rpc_task *task)
196{
197 struct rpc_task *t;
198
199 if (!list_empty(&task->u.tk_wait.links)) {
200 t = list_entry(task->u.tk_wait.links.next, struct rpc_task, u.tk_wait.list);
201 list_move(&t->u.tk_wait.list, &task->u.tk_wait.list);
202 list_splice_init(&task->u.tk_wait.links, &t->u.tk_wait.links);
203 }
204 list_del(&task->u.tk_wait.list);
205}
206
207/*
208 * Remove request from queue.
209 * Note: must be called with spin lock held.
210 */
211static void __rpc_remove_wait_queue(struct rpc_task *task)
212{
213 struct rpc_wait_queue *queue;
214 queue = task->u.tk_wait.rpc_waitq;
215
216 if (RPC_IS_PRIORITY(queue))
217 __rpc_remove_wait_queue_priority(task);
218 else
219 list_del(&task->u.tk_wait.list);
220 dprintk("RPC: %4d removed from queue %p \"%s\"\n",
221 task->tk_pid, queue, rpc_qname(queue));
222}
223
224static inline void rpc_set_waitqueue_priority(struct rpc_wait_queue *queue, int priority)
225{
226 queue->priority = priority;
227 queue->count = 1 << (priority * 2);
228}
229
230static inline void rpc_set_waitqueue_cookie(struct rpc_wait_queue *queue, unsigned long cookie)
231{
232 queue->cookie = cookie;
233 queue->nr = RPC_BATCH_COUNT;
234}
235
236static inline void rpc_reset_waitqueue_priority(struct rpc_wait_queue *queue)
237{
238 rpc_set_waitqueue_priority(queue, queue->maxpriority);
239 rpc_set_waitqueue_cookie(queue, 0);
240}
241
242static void __rpc_init_priority_wait_queue(struct rpc_wait_queue *queue, const char *qname, int maxprio)
243{
244 int i;
245
246 spin_lock_init(&queue->lock);
247 for (i = 0; i < ARRAY_SIZE(queue->tasks); i++)
248 INIT_LIST_HEAD(&queue->tasks[i]);
249 queue->maxpriority = maxprio;
250 rpc_reset_waitqueue_priority(queue);
251#ifdef RPC_DEBUG
252 queue->name = qname;
253#endif
254}
255
256void rpc_init_priority_wait_queue(struct rpc_wait_queue *queue, const char *qname)
257{
258 __rpc_init_priority_wait_queue(queue, qname, RPC_PRIORITY_HIGH);
259}
260
261void rpc_init_wait_queue(struct rpc_wait_queue *queue, const char *qname)
262{
263 __rpc_init_priority_wait_queue(queue, qname, 0);
264}
265EXPORT_SYMBOL(rpc_init_wait_queue);
266
267/*
268 * Make an RPC task runnable.
269 *
270 * Note: If the task is ASYNC, this must be called with
271 * the spinlock held to protect the wait queue operation.
272 */
273static void rpc_make_runnable(struct rpc_task *task)
274{
275 int do_ret;
276
277 BUG_ON(task->tk_timeout_fn);
278 do_ret = rpc_test_and_set_running(task);
279 rpc_clear_queued(task);
280 if (do_ret)
281 return;
282 if (RPC_IS_ASYNC(task)) {
283 int status;
284
285 INIT_WORK(&task->u.tk_work, rpc_async_schedule, (void *)task);
286 status = queue_work(task->tk_workqueue, &task->u.tk_work);
287 if (status < 0) {
288 printk(KERN_WARNING "RPC: failed to add task to queue: error: %d!\n", status);
289 task->tk_status = status;
290 return;
291 }
292 } else
293 wake_up(&task->u.tk_wait.waitq);
294}
295
296/*
297 * Place a newly initialized task on the workqueue.
298 */
299static inline void
300rpc_schedule_run(struct rpc_task *task)
301{
302 /* Don't run a child twice! */
303 if (RPC_IS_ACTIVATED(task))
304 return;
305 task->tk_active = 1;
306 rpc_make_runnable(task);
307}
308
309/*
310 * Prepare for sleeping on a wait queue.
311 * By always appending tasks to the list we ensure FIFO behavior.
312 * NB: An RPC task will only receive interrupt-driven events as long
313 * as it's on a wait queue.
314 */
315static void __rpc_sleep_on(struct rpc_wait_queue *q, struct rpc_task *task,
316 rpc_action action, rpc_action timer)
317{
318 dprintk("RPC: %4d sleep_on(queue \"%s\" time %ld)\n", task->tk_pid,
319 rpc_qname(q), jiffies);
320
321 if (!RPC_IS_ASYNC(task) && !RPC_IS_ACTIVATED(task)) {
322 printk(KERN_ERR "RPC: Inactive synchronous task put to sleep!\n");
323 return;
324 }
325
326 /* Mark the task as being activated if so needed */
327 if (!RPC_IS_ACTIVATED(task))
328 task->tk_active = 1;
329
330 __rpc_add_wait_queue(q, task);
331
332 BUG_ON(task->tk_callback != NULL);
333 task->tk_callback = action;
334 __rpc_add_timer(task, timer);
335}
336
337void rpc_sleep_on(struct rpc_wait_queue *q, struct rpc_task *task,
338 rpc_action action, rpc_action timer)
339{
340 /*
341 * Protect the queue operations.
342 */
343 spin_lock_bh(&q->lock);
344 __rpc_sleep_on(q, task, action, timer);
345 spin_unlock_bh(&q->lock);
346}
347
348/**
349 * __rpc_do_wake_up_task - wake up a single rpc_task
350 * @task: task to be woken up
351 *
352 * Caller must hold queue->lock, and have cleared the task queued flag.
353 */
354static void __rpc_do_wake_up_task(struct rpc_task *task)
355{
356 dprintk("RPC: %4d __rpc_wake_up_task (now %ld)\n", task->tk_pid, jiffies);
357
358#ifdef RPC_DEBUG
359 BUG_ON(task->tk_magic != RPC_TASK_MAGIC_ID);
360#endif
361 /* Has the task been executed yet? If not, we cannot wake it up! */
362 if (!RPC_IS_ACTIVATED(task)) {
363 printk(KERN_ERR "RPC: Inactive task (%p) being woken up!\n", task);
364 return;
365 }
366
367 __rpc_disable_timer(task);
368 __rpc_remove_wait_queue(task);
369
370 rpc_make_runnable(task);
371
372 dprintk("RPC: __rpc_wake_up_task done\n");
373}
374
375/*
376 * Wake up the specified task
377 */
378static void __rpc_wake_up_task(struct rpc_task *task)
379{
380 if (rpc_start_wakeup(task)) {
381 if (RPC_IS_QUEUED(task))
382 __rpc_do_wake_up_task(task);
383 rpc_finish_wakeup(task);
384 }
385}
386
387/*
388 * Default timeout handler if none specified by user
389 */
390static void
391__rpc_default_timer(struct rpc_task *task)
392{
393 dprintk("RPC: %d timeout (default timer)\n", task->tk_pid);
394 task->tk_status = -ETIMEDOUT;
395 rpc_wake_up_task(task);
396}
397
398/*
399 * Wake up the specified task
400 */
401void rpc_wake_up_task(struct rpc_task *task)
402{
403 if (rpc_start_wakeup(task)) {
404 if (RPC_IS_QUEUED(task)) {
405 struct rpc_wait_queue *queue = task->u.tk_wait.rpc_waitq;
406
407 spin_lock_bh(&queue->lock);
408 __rpc_do_wake_up_task(task);
409 spin_unlock_bh(&queue->lock);
410 }
411 rpc_finish_wakeup(task);
412 }
413}
414
415/*
416 * Wake up the next task on a priority queue.
417 */
418static struct rpc_task * __rpc_wake_up_next_priority(struct rpc_wait_queue *queue)
419{
420 struct list_head *q;
421 struct rpc_task *task;
422
423 /*
424 * Service a batch of tasks from a single cookie.
425 */
426 q = &queue->tasks[queue->priority];
427 if (!list_empty(q)) {
428 task = list_entry(q->next, struct rpc_task, u.tk_wait.list);
429 if (queue->cookie == task->tk_cookie) {
430 if (--queue->nr)
431 goto out;
432 list_move_tail(&task->u.tk_wait.list, q);
433 }
434 /*
435 * Check if we need to switch queues.
436 */
437 if (--queue->count)
438 goto new_cookie;
439 }
440
441 /*
442 * Service the next queue.
443 */
444 do {
445 if (q == &queue->tasks[0])
446 q = &queue->tasks[queue->maxpriority];
447 else
448 q = q - 1;
449 if (!list_empty(q)) {
450 task = list_entry(q->next, struct rpc_task, u.tk_wait.list);
451 goto new_queue;
452 }
453 } while (q != &queue->tasks[queue->priority]);
454
455 rpc_reset_waitqueue_priority(queue);
456 return NULL;
457
458new_queue:
459 rpc_set_waitqueue_priority(queue, (unsigned int)(q - &queue->tasks[0]));
460new_cookie:
461 rpc_set_waitqueue_cookie(queue, task->tk_cookie);
462out:
463 __rpc_wake_up_task(task);
464 return task;
465}
466
467/*
468 * Wake up the next task on the wait queue.
469 */
470struct rpc_task * rpc_wake_up_next(struct rpc_wait_queue *queue)
471{
472 struct rpc_task *task = NULL;
473
474 dprintk("RPC: wake_up_next(%p \"%s\")\n", queue, rpc_qname(queue));
475 spin_lock_bh(&queue->lock);
476 if (RPC_IS_PRIORITY(queue))
477 task = __rpc_wake_up_next_priority(queue);
478 else {
479 task_for_first(task, &queue->tasks[0])
480 __rpc_wake_up_task(task);
481 }
482 spin_unlock_bh(&queue->lock);
483
484 return task;
485}
486
487/**
488 * rpc_wake_up - wake up all rpc_tasks
489 * @queue: rpc_wait_queue on which the tasks are sleeping
490 *
491 * Grabs queue->lock
492 */
493void rpc_wake_up(struct rpc_wait_queue *queue)
494{
495 struct rpc_task *task;
496
497 struct list_head *head;
498 spin_lock_bh(&queue->lock);
499 head = &queue->tasks[queue->maxpriority];
500 for (;;) {
501 while (!list_empty(head)) {
502 task = list_entry(head->next, struct rpc_task, u.tk_wait.list);
503 __rpc_wake_up_task(task);
504 }
505 if (head == &queue->tasks[0])
506 break;
507 head--;
508 }
509 spin_unlock_bh(&queue->lock);
510}
511
512/**
513 * rpc_wake_up_status - wake up all rpc_tasks and set their status value.
514 * @queue: rpc_wait_queue on which the tasks are sleeping
515 * @status: status value to set
516 *
517 * Grabs queue->lock
518 */
519void rpc_wake_up_status(struct rpc_wait_queue *queue, int status)
520{
521 struct list_head *head;
522 struct rpc_task *task;
523
524 spin_lock_bh(&queue->lock);
525 head = &queue->tasks[queue->maxpriority];
526 for (;;) {
527 while (!list_empty(head)) {
528 task = list_entry(head->next, struct rpc_task, u.tk_wait.list);
529 task->tk_status = status;
530 __rpc_wake_up_task(task);
531 }
532 if (head == &queue->tasks[0])
533 break;
534 head--;
535 }
536 spin_unlock_bh(&queue->lock);
537}
538
539/*
540 * Run a task at a later time
541 */
542static void __rpc_atrun(struct rpc_task *);
543void
544rpc_delay(struct rpc_task *task, unsigned long delay)
545{
546 task->tk_timeout = delay;
547 rpc_sleep_on(&delay_queue, task, NULL, __rpc_atrun);
548}
549
550static void
551__rpc_atrun(struct rpc_task *task)
552{
553 task->tk_status = 0;
554 rpc_wake_up_task(task);
555}
556
557/*
558 * This is the RPC `scheduler' (or rather, the finite state machine).
559 */
560static int __rpc_execute(struct rpc_task *task)
561{
562 int status = 0;
563
564 dprintk("RPC: %4d rpc_execute flgs %x\n",
565 task->tk_pid, task->tk_flags);
566
567 BUG_ON(RPC_IS_QUEUED(task));
568
569 restarted:
570 while (1) {
571 /*
572 * Garbage collection of pending timers...
573 */
574 rpc_delete_timer(task);
575
576 /*
577 * Execute any pending callback.
578 */
579 if (RPC_DO_CALLBACK(task)) {
580 /* Define a callback save pointer */
581 void (*save_callback)(struct rpc_task *);
582
583 /*
584 * If a callback exists, save it, reset it,
585 * call it.
586 * The save is needed to stop from resetting
587 * another callback set within the callback handler
588 * - Dave
589 */
590 save_callback=task->tk_callback;
591 task->tk_callback=NULL;
592 lock_kernel();
593 save_callback(task);
594 unlock_kernel();
595 }
596
597 /*
598 * Perform the next FSM step.
599 * tk_action may be NULL when the task has been killed
600 * by someone else.
601 */
602 if (!RPC_IS_QUEUED(task)) {
603 if (!task->tk_action)
604 break;
605 lock_kernel();
606 task->tk_action(task);
607 unlock_kernel();
608 }
609
610 /*
611 * Lockless check for whether task is sleeping or not.
612 */
613 if (!RPC_IS_QUEUED(task))
614 continue;
615 rpc_clear_running(task);
616 if (RPC_IS_ASYNC(task)) {
617 /* Careful! we may have raced... */
618 if (RPC_IS_QUEUED(task))
619 return 0;
620 if (rpc_test_and_set_running(task))
621 return 0;
622 continue;
623 }
624
625 /* sync task: sleep here */
626 dprintk("RPC: %4d sync task going to sleep\n", task->tk_pid);
627 if (RPC_TASK_UNINTERRUPTIBLE(task)) {
628 __wait_event(task->u.tk_wait.waitq, !RPC_IS_QUEUED(task));
629 } else {
630 __wait_event_interruptible(task->u.tk_wait.waitq, !RPC_IS_QUEUED(task), status);
631 /*
632 * When a sync task receives a signal, it exits with
633 * -ERESTARTSYS. In order to catch any callbacks that
634 * clean up after sleeping on some queue, we don't
635 * break the loop here, but go around once more.
636 */
637 if (status == -ERESTARTSYS) {
638 dprintk("RPC: %4d got signal\n", task->tk_pid);
639 task->tk_flags |= RPC_TASK_KILLED;
640 rpc_exit(task, -ERESTARTSYS);
641 rpc_wake_up_task(task);
642 }
643 }
644 rpc_set_running(task);
645 dprintk("RPC: %4d sync task resuming\n", task->tk_pid);
646 }
647
648 if (task->tk_exit) {
649 lock_kernel();
650 task->tk_exit(task);
651 unlock_kernel();
652 /* If tk_action is non-null, the user wants us to restart */
653 if (task->tk_action) {
654 if (!RPC_ASSASSINATED(task)) {
655 /* Release RPC slot and buffer memory */
656 if (task->tk_rqstp)
657 xprt_release(task);
658 rpc_free(task);
659 goto restarted;
660 }
661 printk(KERN_ERR "RPC: dead task tries to walk away.\n");
662 }
663 }
664
665 dprintk("RPC: %4d exit() = %d\n", task->tk_pid, task->tk_status);
666 status = task->tk_status;
667
668 /* Release all resources associated with the task */
669 rpc_release_task(task);
670 return status;
671}
672
673/*
674 * User-visible entry point to the scheduler.
675 *
676 * This may be called recursively if e.g. an async NFS task updates
677 * the attributes and finds that dirty pages must be flushed.
678 * NOTE: Upon exit of this function the task is guaranteed to be
679 * released. In particular note that tk_release() will have
680 * been called, so your task memory may have been freed.
681 */
682int
683rpc_execute(struct rpc_task *task)
684{
685 BUG_ON(task->tk_active);
686
687 task->tk_active = 1;
688 rpc_set_running(task);
689 return __rpc_execute(task);
690}
691
692static void rpc_async_schedule(void *arg)
693{
694 __rpc_execute((struct rpc_task *)arg);
695}
696
697/*
698 * Allocate memory for RPC purposes.
699 *
700 * We try to ensure that some NFS reads and writes can always proceed
701 * by using a mempool when allocating 'small' buffers.
702 * In order to avoid memory starvation triggering more writebacks of
703 * NFS requests, we use GFP_NOFS rather than GFP_KERNEL.
704 */
705void *
706rpc_malloc(struct rpc_task *task, size_t size)
707{
708 int gfp;
709
710 if (task->tk_flags & RPC_TASK_SWAPPER)
711 gfp = GFP_ATOMIC;
712 else
713 gfp = GFP_NOFS;
714
715 if (size > RPC_BUFFER_MAXSIZE) {
716 task->tk_buffer = kmalloc(size, gfp);
717 if (task->tk_buffer)
718 task->tk_bufsize = size;
719 } else {
720 task->tk_buffer = mempool_alloc(rpc_buffer_mempool, gfp);
721 if (task->tk_buffer)
722 task->tk_bufsize = RPC_BUFFER_MAXSIZE;
723 }
724 return task->tk_buffer;
725}
726
727static void
728rpc_free(struct rpc_task *task)
729{
730 if (task->tk_buffer) {
731 if (task->tk_bufsize == RPC_BUFFER_MAXSIZE)
732 mempool_free(task->tk_buffer, rpc_buffer_mempool);
733 else
734 kfree(task->tk_buffer);
735 task->tk_buffer = NULL;
736 task->tk_bufsize = 0;
737 }
738}
739
740/*
741 * Creation and deletion of RPC task structures
742 */
743void rpc_init_task(struct rpc_task *task, struct rpc_clnt *clnt, rpc_action callback, int flags)
744{
745 memset(task, 0, sizeof(*task));
746 init_timer(&task->tk_timer);
747 task->tk_timer.data = (unsigned long) task;
748 task->tk_timer.function = (void (*)(unsigned long)) rpc_run_timer;
749 task->tk_client = clnt;
750 task->tk_flags = flags;
751 task->tk_exit = callback;
752
753 /* Initialize retry counters */
754 task->tk_garb_retry = 2;
755 task->tk_cred_retry = 2;
756
757 task->tk_priority = RPC_PRIORITY_NORMAL;
758 task->tk_cookie = (unsigned long)current;
759
760 /* Initialize workqueue for async tasks */
761 task->tk_workqueue = rpciod_workqueue;
762 if (!RPC_IS_ASYNC(task))
763 init_waitqueue_head(&task->u.tk_wait.waitq);
764
765 if (clnt) {
766 atomic_inc(&clnt->cl_users);
767 if (clnt->cl_softrtry)
768 task->tk_flags |= RPC_TASK_SOFT;
769 if (!clnt->cl_intr)
770 task->tk_flags |= RPC_TASK_NOINTR;
771 }
772
773#ifdef RPC_DEBUG
774 task->tk_magic = RPC_TASK_MAGIC_ID;
775 task->tk_pid = rpc_task_id++;
776#endif
777 /* Add to global list of all tasks */
778 spin_lock(&rpc_sched_lock);
779 list_add_tail(&task->tk_task, &all_tasks);
780 spin_unlock(&rpc_sched_lock);
781
782 dprintk("RPC: %4d new task procpid %d\n", task->tk_pid,
783 current->pid);
784}
785
786static struct rpc_task *
787rpc_alloc_task(void)
788{
789 return (struct rpc_task *)mempool_alloc(rpc_task_mempool, GFP_NOFS);
790}
791
792static void
793rpc_default_free_task(struct rpc_task *task)
794{
795 dprintk("RPC: %4d freeing task\n", task->tk_pid);
796 mempool_free(task, rpc_task_mempool);
797}
798
799/*
800 * Create a new task for the specified client. We have to
801 * clean up after an allocation failure, as the client may
802 * have specified "oneshot".
803 */
804struct rpc_task *
805rpc_new_task(struct rpc_clnt *clnt, rpc_action callback, int flags)
806{
807 struct rpc_task *task;
808
809 task = rpc_alloc_task();
810 if (!task)
811 goto cleanup;
812
813 rpc_init_task(task, clnt, callback, flags);
814
815 /* Replace tk_release */
816 task->tk_release = rpc_default_free_task;
817
818 dprintk("RPC: %4d allocated task\n", task->tk_pid);
819 task->tk_flags |= RPC_TASK_DYNAMIC;
820out:
821 return task;
822
823cleanup:
824 /* Check whether to release the client */
825 if (clnt) {
826 printk("rpc_new_task: failed, users=%d, oneshot=%d\n",
827 atomic_read(&clnt->cl_users), clnt->cl_oneshot);
828 atomic_inc(&clnt->cl_users); /* pretend we were used ... */
829 rpc_release_client(clnt);
830 }
831 goto out;
832}
833
834void rpc_release_task(struct rpc_task *task)
835{
836 dprintk("RPC: %4d release task\n", task->tk_pid);
837
838#ifdef RPC_DEBUG
839 BUG_ON(task->tk_magic != RPC_TASK_MAGIC_ID);
840#endif
841
842 /* Remove from global task list */
843 spin_lock(&rpc_sched_lock);
844 list_del(&task->tk_task);
845 spin_unlock(&rpc_sched_lock);
846
847 BUG_ON (RPC_IS_QUEUED(task));
848 task->tk_active = 0;
849
850 /* Synchronously delete any running timer */
851 rpc_delete_timer(task);
852
853 /* Release resources */
854 if (task->tk_rqstp)
855 xprt_release(task);
856 if (task->tk_msg.rpc_cred)
857 rpcauth_unbindcred(task);
858 rpc_free(task);
859 if (task->tk_client) {
860 rpc_release_client(task->tk_client);
861 task->tk_client = NULL;
862 }
863
864#ifdef RPC_DEBUG
865 task->tk_magic = 0;
866#endif
867 if (task->tk_release)
868 task->tk_release(task);
869}
870
871/**
872 * rpc_find_parent - find the parent of a child task.
873 * @child: child task
874 *
875 * Checks that the parent task is still sleeping on the
876 * queue 'childq'. If so returns a pointer to the parent.
877 * Upon failure returns NULL.
878 *
879 * Caller must hold childq.lock
880 */
881static inline struct rpc_task *rpc_find_parent(struct rpc_task *child)
882{
883 struct rpc_task *task, *parent;
884 struct list_head *le;
885
886 parent = (struct rpc_task *) child->tk_calldata;
887 task_for_each(task, le, &childq.tasks[0])
888 if (task == parent)
889 return parent;
890
891 return NULL;
892}
893
894static void rpc_child_exit(struct rpc_task *child)
895{
896 struct rpc_task *parent;
897
898 spin_lock_bh(&childq.lock);
899 if ((parent = rpc_find_parent(child)) != NULL) {
900 parent->tk_status = child->tk_status;
901 __rpc_wake_up_task(parent);
902 }
903 spin_unlock_bh(&childq.lock);
904}
905
906/*
907 * Note: rpc_new_task releases the client after a failure.
908 */
909struct rpc_task *
910rpc_new_child(struct rpc_clnt *clnt, struct rpc_task *parent)
911{
912 struct rpc_task *task;
913
914 task = rpc_new_task(clnt, NULL, RPC_TASK_ASYNC | RPC_TASK_CHILD);
915 if (!task)
916 goto fail;
917 task->tk_exit = rpc_child_exit;
918 task->tk_calldata = parent;
919 return task;
920
921fail:
922 parent->tk_status = -ENOMEM;
923 return NULL;
924}
925
926void rpc_run_child(struct rpc_task *task, struct rpc_task *child, rpc_action func)
927{
928 spin_lock_bh(&childq.lock);
929 /* N.B. Is it possible for the child to have already finished? */
930 __rpc_sleep_on(&childq, task, func, NULL);
931 rpc_schedule_run(child);
932 spin_unlock_bh(&childq.lock);
933}
934
935/*
936 * Kill all tasks for the given client.
937 * XXX: kill their descendants as well?
938 */
939void rpc_killall_tasks(struct rpc_clnt *clnt)
940{
941 struct rpc_task *rovr;
942 struct list_head *le;
943
944 dprintk("RPC: killing all tasks for client %p\n", clnt);
945
946 /*
947 * Spin lock all_tasks to prevent changes...
948 */
949 spin_lock(&rpc_sched_lock);
950 alltask_for_each(rovr, le, &all_tasks) {
951 if (! RPC_IS_ACTIVATED(rovr))
952 continue;
953 if (!clnt || rovr->tk_client == clnt) {
954 rovr->tk_flags |= RPC_TASK_KILLED;
955 rpc_exit(rovr, -EIO);
956 rpc_wake_up_task(rovr);
957 }
958 }
959 spin_unlock(&rpc_sched_lock);
960}
961
962static DECLARE_MUTEX_LOCKED(rpciod_running);
963
964static void rpciod_killall(void)
965{
966 unsigned long flags;
967
968 while (!list_empty(&all_tasks)) {
969 clear_thread_flag(TIF_SIGPENDING);
970 rpc_killall_tasks(NULL);
971 flush_workqueue(rpciod_workqueue);
972 if (!list_empty(&all_tasks)) {
973 dprintk("rpciod_killall: waiting for tasks to exit\n");
974 yield();
975 }
976 }
977
978 spin_lock_irqsave(&current->sighand->siglock, flags);
979 recalc_sigpending();
980 spin_unlock_irqrestore(&current->sighand->siglock, flags);
981}
982
983/*
984 * Start up the rpciod process if it's not already running.
985 */
986int
987rpciod_up(void)
988{
989 struct workqueue_struct *wq;
990 int error = 0;
991
992 down(&rpciod_sema);
993 dprintk("rpciod_up: users %d\n", rpciod_users);
994 rpciod_users++;
995 if (rpciod_workqueue)
996 goto out;
997 /*
998 * If there's no pid, we should be the first user.
999 */
1000 if (rpciod_users > 1)
1001 printk(KERN_WARNING "rpciod_up: no workqueue, %d users??\n", rpciod_users);
1002 /*
1003 * Create the rpciod thread and wait for it to start.
1004 */
1005 error = -ENOMEM;
1006 wq = create_workqueue("rpciod");
1007 if (wq == NULL) {
1008 printk(KERN_WARNING "rpciod_up: create workqueue failed, error=%d\n", error);
1009 rpciod_users--;
1010 goto out;
1011 }
1012 rpciod_workqueue = wq;
1013 error = 0;
1014out:
1015 up(&rpciod_sema);
1016 return error;
1017}
1018
1019void
1020rpciod_down(void)
1021{
1022 down(&rpciod_sema);
1023 dprintk("rpciod_down sema %d\n", rpciod_users);
1024 if (rpciod_users) {
1025 if (--rpciod_users)
1026 goto out;
1027 } else
1028 printk(KERN_WARNING "rpciod_down: no users??\n");
1029
1030 if (!rpciod_workqueue) {
1031 dprintk("rpciod_down: Nothing to do!\n");
1032 goto out;
1033 }
1034 rpciod_killall();
1035
1036 destroy_workqueue(rpciod_workqueue);
1037 rpciod_workqueue = NULL;
1038 out:
1039 up(&rpciod_sema);
1040}
1041
1042#ifdef RPC_DEBUG
1043void rpc_show_tasks(void)
1044{
1045 struct list_head *le;
1046 struct rpc_task *t;
1047
1048 spin_lock(&rpc_sched_lock);
1049 if (list_empty(&all_tasks)) {
1050 spin_unlock(&rpc_sched_lock);
1051 return;
1052 }
1053 printk("-pid- proc flgs status -client- -prog- --rqstp- -timeout "
1054 "-rpcwait -action- --exit--\n");
1055 alltask_for_each(t, le, &all_tasks) {
1056 const char *rpc_waitq = "none";
1057
1058 if (RPC_IS_QUEUED(t))
1059 rpc_waitq = rpc_qname(t->u.tk_wait.rpc_waitq);
1060
1061 printk("%05d %04d %04x %06d %8p %6d %8p %08ld %8s %8p %8p\n",
1062 t->tk_pid,
1063 (t->tk_msg.rpc_proc ? t->tk_msg.rpc_proc->p_proc : -1),
1064 t->tk_flags, t->tk_status,
1065 t->tk_client,
1066 (t->tk_client ? t->tk_client->cl_prog : 0),
1067 t->tk_rqstp, t->tk_timeout,
1068 rpc_waitq,
1069 t->tk_action, t->tk_exit);
1070 }
1071 spin_unlock(&rpc_sched_lock);
1072}
1073#endif
1074
1075void
1076rpc_destroy_mempool(void)
1077{
1078 if (rpc_buffer_mempool)
1079 mempool_destroy(rpc_buffer_mempool);
1080 if (rpc_task_mempool)
1081 mempool_destroy(rpc_task_mempool);
1082 if (rpc_task_slabp && kmem_cache_destroy(rpc_task_slabp))
1083 printk(KERN_INFO "rpc_task: not all structures were freed\n");
1084 if (rpc_buffer_slabp && kmem_cache_destroy(rpc_buffer_slabp))
1085 printk(KERN_INFO "rpc_buffers: not all structures were freed\n");
1086}
1087
1088int
1089rpc_init_mempool(void)
1090{
1091 rpc_task_slabp = kmem_cache_create("rpc_tasks",
1092 sizeof(struct rpc_task),
1093 0, SLAB_HWCACHE_ALIGN,
1094 NULL, NULL);
1095 if (!rpc_task_slabp)
1096 goto err_nomem;
1097 rpc_buffer_slabp = kmem_cache_create("rpc_buffers",
1098 RPC_BUFFER_MAXSIZE,
1099 0, SLAB_HWCACHE_ALIGN,
1100 NULL, NULL);
1101 if (!rpc_buffer_slabp)
1102 goto err_nomem;
1103 rpc_task_mempool = mempool_create(RPC_TASK_POOLSIZE,
1104 mempool_alloc_slab,
1105 mempool_free_slab,
1106 rpc_task_slabp);
1107 if (!rpc_task_mempool)
1108 goto err_nomem;
1109 rpc_buffer_mempool = mempool_create(RPC_BUFFER_POOLSIZE,
1110 mempool_alloc_slab,
1111 mempool_free_slab,
1112 rpc_buffer_slabp);
1113 if (!rpc_buffer_mempool)
1114 goto err_nomem;
1115 return 0;
1116err_nomem:
1117 rpc_destroy_mempool();
1118 return -ENOMEM;
1119}
diff --git a/net/sunrpc/stats.c b/net/sunrpc/stats.c
new file mode 100644
index 000000000000..9b67dc19944c
--- /dev/null
+++ b/net/sunrpc/stats.c
@@ -0,0 +1,175 @@
1/*
2 * linux/net/sunrpc/stats.c
3 *
4 * procfs-based user access to generic RPC statistics. The stats files
5 * reside in /proc/net/rpc.
6 *
7 * The read routines assume that the buffer passed in is just big enough.
8 * If you implement an RPC service that has its own stats routine which
9 * appends the generic RPC stats, make sure you don't exceed the PAGE_SIZE
10 * limit.
11 *
12 * Copyright (C) 1995, 1996, 1997 Olaf Kirch <okir@monad.swb.de>
13 */
14
15#include <linux/module.h>
16
17#include <linux/init.h>
18#include <linux/kernel.h>
19#include <linux/sched.h>
20#include <linux/proc_fs.h>
21#include <linux/seq_file.h>
22#include <linux/sunrpc/clnt.h>
23#include <linux/sunrpc/svcsock.h>
24
25#define RPCDBG_FACILITY RPCDBG_MISC
26
27struct proc_dir_entry *proc_net_rpc = NULL;
28
29/*
30 * Get RPC client stats
31 */
32static int rpc_proc_show(struct seq_file *seq, void *v) {
33 const struct rpc_stat *statp = seq->private;
34 const struct rpc_program *prog = statp->program;
35 int i, j;
36
37 seq_printf(seq,
38 "net %d %d %d %d\n",
39 statp->netcnt,
40 statp->netudpcnt,
41 statp->nettcpcnt,
42 statp->nettcpconn);
43 seq_printf(seq,
44 "rpc %d %d %d\n",
45 statp->rpccnt,
46 statp->rpcretrans,
47 statp->rpcauthrefresh);
48
49 for (i = 0; i < prog->nrvers; i++) {
50 const struct rpc_version *vers = prog->version[i];
51 if (!vers)
52 continue;
53 seq_printf(seq, "proc%d %d",
54 vers->number, vers->nrprocs);
55 for (j = 0; j < vers->nrprocs; j++)
56 seq_printf(seq, " %d",
57 vers->procs[j].p_count);
58 seq_putc(seq, '\n');
59 }
60 return 0;
61}
62
63static int rpc_proc_open(struct inode *inode, struct file *file)
64{
65 return single_open(file, rpc_proc_show, PDE(inode)->data);
66}
67
68static struct file_operations rpc_proc_fops = {
69 .owner = THIS_MODULE,
70 .open = rpc_proc_open,
71 .read = seq_read,
72 .llseek = seq_lseek,
73 .release = single_release,
74};
75
76/*
77 * Get RPC server stats
78 */
79void svc_seq_show(struct seq_file *seq, const struct svc_stat *statp) {
80 const struct svc_program *prog = statp->program;
81 const struct svc_procedure *proc;
82 const struct svc_version *vers;
83 int i, j;
84
85 seq_printf(seq,
86 "net %d %d %d %d\n",
87 statp->netcnt,
88 statp->netudpcnt,
89 statp->nettcpcnt,
90 statp->nettcpconn);
91 seq_printf(seq,
92 "rpc %d %d %d %d %d\n",
93 statp->rpccnt,
94 statp->rpcbadfmt+statp->rpcbadauth+statp->rpcbadclnt,
95 statp->rpcbadfmt,
96 statp->rpcbadauth,
97 statp->rpcbadclnt);
98
99 for (i = 0; i < prog->pg_nvers; i++) {
100 if (!(vers = prog->pg_vers[i]) || !(proc = vers->vs_proc))
101 continue;
102 seq_printf(seq, "proc%d %d", i, vers->vs_nproc);
103 for (j = 0; j < vers->vs_nproc; j++, proc++)
104 seq_printf(seq, " %d", proc->pc_count);
105 seq_putc(seq, '\n');
106 }
107}
108
109/*
110 * Register/unregister RPC proc files
111 */
112static inline struct proc_dir_entry *
113do_register(const char *name, void *data, struct file_operations *fops)
114{
115 struct proc_dir_entry *ent;
116
117 rpc_proc_init();
118 dprintk("RPC: registering /proc/net/rpc/%s\n", name);
119
120 ent = create_proc_entry(name, 0, proc_net_rpc);
121 if (ent) {
122 ent->proc_fops = fops;
123 ent->data = data;
124 }
125 return ent;
126}
127
128struct proc_dir_entry *
129rpc_proc_register(struct rpc_stat *statp)
130{
131 return do_register(statp->program->name, statp, &rpc_proc_fops);
132}
133
134void
135rpc_proc_unregister(const char *name)
136{
137 remove_proc_entry(name, proc_net_rpc);
138}
139
140struct proc_dir_entry *
141svc_proc_register(struct svc_stat *statp, struct file_operations *fops)
142{
143 return do_register(statp->program->pg_name, statp, fops);
144}
145
146void
147svc_proc_unregister(const char *name)
148{
149 remove_proc_entry(name, proc_net_rpc);
150}
151
152void
153rpc_proc_init(void)
154{
155 dprintk("RPC: registering /proc/net/rpc\n");
156 if (!proc_net_rpc) {
157 struct proc_dir_entry *ent;
158 ent = proc_mkdir("rpc", proc_net);
159 if (ent) {
160 ent->owner = THIS_MODULE;
161 proc_net_rpc = ent;
162 }
163 }
164}
165
166void
167rpc_proc_exit(void)
168{
169 dprintk("RPC: unregistering /proc/net/rpc\n");
170 if (proc_net_rpc) {
171 proc_net_rpc = NULL;
172 remove_proc_entry("net/rpc", NULL);
173 }
174}
175
diff --git a/net/sunrpc/sunrpc_syms.c b/net/sunrpc/sunrpc_syms.c
new file mode 100644
index 000000000000..d4f26bf9e732
--- /dev/null
+++ b/net/sunrpc/sunrpc_syms.c
@@ -0,0 +1,185 @@
1/*
2 * linux/net/sunrpc/sunrpc_syms.c
3 *
4 * Symbols exported by the sunrpc module.
5 *
6 * Copyright (C) 1997 Olaf Kirch <okir@monad.swb.de>
7 */
8
9#include <linux/config.h>
10#include <linux/module.h>
11
12#include <linux/types.h>
13#include <linux/socket.h>
14#include <linux/sched.h>
15#include <linux/uio.h>
16#include <linux/unistd.h>
17#include <linux/init.h>
18
19#include <linux/sunrpc/sched.h>
20#include <linux/sunrpc/clnt.h>
21#include <linux/sunrpc/svc.h>
22#include <linux/sunrpc/svcsock.h>
23#include <linux/sunrpc/auth.h>
24#include <linux/workqueue.h>
25#include <linux/sunrpc/rpc_pipe_fs.h>
26
27
28/* RPC scheduler */
29EXPORT_SYMBOL(rpc_execute);
30EXPORT_SYMBOL(rpc_init_task);
31EXPORT_SYMBOL(rpc_sleep_on);
32EXPORT_SYMBOL(rpc_wake_up_next);
33EXPORT_SYMBOL(rpc_wake_up_task);
34EXPORT_SYMBOL(rpc_new_child);
35EXPORT_SYMBOL(rpc_run_child);
36EXPORT_SYMBOL(rpciod_down);
37EXPORT_SYMBOL(rpciod_up);
38EXPORT_SYMBOL(rpc_new_task);
39EXPORT_SYMBOL(rpc_wake_up_status);
40EXPORT_SYMBOL(rpc_release_task);
41
42/* RPC client functions */
43EXPORT_SYMBOL(rpc_create_client);
44EXPORT_SYMBOL(rpc_clone_client);
45EXPORT_SYMBOL(rpc_destroy_client);
46EXPORT_SYMBOL(rpc_shutdown_client);
47EXPORT_SYMBOL(rpc_release_client);
48EXPORT_SYMBOL(rpc_killall_tasks);
49EXPORT_SYMBOL(rpc_call_sync);
50EXPORT_SYMBOL(rpc_call_async);
51EXPORT_SYMBOL(rpc_call_setup);
52EXPORT_SYMBOL(rpc_clnt_sigmask);
53EXPORT_SYMBOL(rpc_clnt_sigunmask);
54EXPORT_SYMBOL(rpc_delay);
55EXPORT_SYMBOL(rpc_restart_call);
56EXPORT_SYMBOL(rpc_setbufsize);
57EXPORT_SYMBOL(rpc_unlink);
58EXPORT_SYMBOL(rpc_wake_up);
59EXPORT_SYMBOL(rpc_queue_upcall);
60EXPORT_SYMBOL(rpc_mkpipe);
61
62/* Client transport */
63EXPORT_SYMBOL(xprt_create_proto);
64EXPORT_SYMBOL(xprt_destroy);
65EXPORT_SYMBOL(xprt_set_timeout);
66EXPORT_SYMBOL(xprt_udp_slot_table_entries);
67EXPORT_SYMBOL(xprt_tcp_slot_table_entries);
68
69/* Client credential cache */
70EXPORT_SYMBOL(rpcauth_register);
71EXPORT_SYMBOL(rpcauth_unregister);
72EXPORT_SYMBOL(rpcauth_create);
73EXPORT_SYMBOL(rpcauth_lookupcred);
74EXPORT_SYMBOL(rpcauth_lookup_credcache);
75EXPORT_SYMBOL(rpcauth_free_credcache);
76EXPORT_SYMBOL(rpcauth_init_credcache);
77EXPORT_SYMBOL(put_rpccred);
78
79/* RPC server stuff */
80EXPORT_SYMBOL(svc_create);
81EXPORT_SYMBOL(svc_create_thread);
82EXPORT_SYMBOL(svc_exit_thread);
83EXPORT_SYMBOL(svc_destroy);
84EXPORT_SYMBOL(svc_drop);
85EXPORT_SYMBOL(svc_process);
86EXPORT_SYMBOL(svc_recv);
87EXPORT_SYMBOL(svc_wake_up);
88EXPORT_SYMBOL(svc_makesock);
89EXPORT_SYMBOL(svc_reserve);
90EXPORT_SYMBOL(svc_auth_register);
91EXPORT_SYMBOL(auth_domain_lookup);
92EXPORT_SYMBOL(svc_authenticate);
93EXPORT_SYMBOL(svc_set_client);
94
95/* RPC statistics */
96#ifdef CONFIG_PROC_FS
97EXPORT_SYMBOL(rpc_proc_register);
98EXPORT_SYMBOL(rpc_proc_unregister);
99EXPORT_SYMBOL(svc_proc_register);
100EXPORT_SYMBOL(svc_proc_unregister);
101EXPORT_SYMBOL(svc_seq_show);
102#endif
103
104/* caching... */
105EXPORT_SYMBOL(auth_domain_find);
106EXPORT_SYMBOL(auth_domain_put);
107EXPORT_SYMBOL(auth_unix_add_addr);
108EXPORT_SYMBOL(auth_unix_forget_old);
109EXPORT_SYMBOL(auth_unix_lookup);
110EXPORT_SYMBOL(cache_check);
111EXPORT_SYMBOL(cache_flush);
112EXPORT_SYMBOL(cache_purge);
113EXPORT_SYMBOL(cache_fresh);
114EXPORT_SYMBOL(cache_init);
115EXPORT_SYMBOL(cache_register);
116EXPORT_SYMBOL(cache_unregister);
117EXPORT_SYMBOL(qword_add);
118EXPORT_SYMBOL(qword_addhex);
119EXPORT_SYMBOL(qword_get);
120EXPORT_SYMBOL(svcauth_unix_purge);
121EXPORT_SYMBOL(unix_domain_find);
122
123/* Generic XDR */
124EXPORT_SYMBOL(xdr_encode_string);
125EXPORT_SYMBOL(xdr_decode_string);
126EXPORT_SYMBOL(xdr_decode_string_inplace);
127EXPORT_SYMBOL(xdr_decode_netobj);
128EXPORT_SYMBOL(xdr_encode_netobj);
129EXPORT_SYMBOL(xdr_encode_pages);
130EXPORT_SYMBOL(xdr_inline_pages);
131EXPORT_SYMBOL(xdr_shift_buf);
132EXPORT_SYMBOL(xdr_buf_from_iov);
133EXPORT_SYMBOL(xdr_buf_subsegment);
134EXPORT_SYMBOL(xdr_buf_read_netobj);
135EXPORT_SYMBOL(read_bytes_from_xdr_buf);
136
137/* Debugging symbols */
138#ifdef RPC_DEBUG
139EXPORT_SYMBOL(rpc_debug);
140EXPORT_SYMBOL(nfs_debug);
141EXPORT_SYMBOL(nfsd_debug);
142EXPORT_SYMBOL(nlm_debug);
143#endif
144
145extern int register_rpc_pipefs(void);
146extern void unregister_rpc_pipefs(void);
147
148static int __init
149init_sunrpc(void)
150{
151 int err = register_rpc_pipefs();
152 if (err)
153 goto out;
154 err = rpc_init_mempool() != 0;
155 if (err)
156 goto out;
157#ifdef RPC_DEBUG
158 rpc_register_sysctl();
159#endif
160#ifdef CONFIG_PROC_FS
161 rpc_proc_init();
162#endif
163 cache_register(&auth_domain_cache);
164 cache_register(&ip_map_cache);
165out:
166 return err;
167}
168
169static void __exit
170cleanup_sunrpc(void)
171{
172 unregister_rpc_pipefs();
173 rpc_destroy_mempool();
174 cache_unregister(&auth_domain_cache);
175 cache_unregister(&ip_map_cache);
176#ifdef RPC_DEBUG
177 rpc_unregister_sysctl();
178#endif
179#ifdef CONFIG_PROC_FS
180 rpc_proc_exit();
181#endif
182}
183MODULE_LICENSE("GPL");
184module_init(init_sunrpc);
185module_exit(cleanup_sunrpc);
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
new file mode 100644
index 000000000000..bb2d99f33315
--- /dev/null
+++ b/net/sunrpc/svc.c
@@ -0,0 +1,490 @@
1/*
2 * linux/net/sunrpc/svc.c
3 *
4 * High-level RPC service routines
5 *
6 * Copyright (C) 1995, 1996 Olaf Kirch <okir@monad.swb.de>
7 */
8
9#include <linux/linkage.h>
10#include <linux/sched.h>
11#include <linux/errno.h>
12#include <linux/net.h>
13#include <linux/in.h>
14#include <linux/mm.h>
15
16#include <linux/sunrpc/types.h>
17#include <linux/sunrpc/xdr.h>
18#include <linux/sunrpc/stats.h>
19#include <linux/sunrpc/svcsock.h>
20#include <linux/sunrpc/clnt.h>
21
22#define RPCDBG_FACILITY RPCDBG_SVCDSP
23#define RPC_PARANOIA 1
24
25/*
26 * Create an RPC service
27 */
28struct svc_serv *
29svc_create(struct svc_program *prog, unsigned int bufsize)
30{
31 struct svc_serv *serv;
32 int vers;
33 unsigned int xdrsize;
34
35 if (!(serv = (struct svc_serv *) kmalloc(sizeof(*serv), GFP_KERNEL)))
36 return NULL;
37 memset(serv, 0, sizeof(*serv));
38 serv->sv_program = prog;
39 serv->sv_nrthreads = 1;
40 serv->sv_stats = prog->pg_stats;
41 serv->sv_bufsz = bufsize? bufsize : 4096;
42 prog->pg_lovers = prog->pg_nvers-1;
43 xdrsize = 0;
44 for (vers=0; vers<prog->pg_nvers ; vers++)
45 if (prog->pg_vers[vers]) {
46 prog->pg_hivers = vers;
47 if (prog->pg_lovers > vers)
48 prog->pg_lovers = vers;
49 if (prog->pg_vers[vers]->vs_xdrsize > xdrsize)
50 xdrsize = prog->pg_vers[vers]->vs_xdrsize;
51 }
52 serv->sv_xdrsize = xdrsize;
53 INIT_LIST_HEAD(&serv->sv_threads);
54 INIT_LIST_HEAD(&serv->sv_sockets);
55 INIT_LIST_HEAD(&serv->sv_tempsocks);
56 INIT_LIST_HEAD(&serv->sv_permsocks);
57 spin_lock_init(&serv->sv_lock);
58
59 serv->sv_name = prog->pg_name;
60
61 /* Remove any stale portmap registrations */
62 svc_register(serv, 0, 0);
63
64 return serv;
65}
66
67/*
68 * Destroy an RPC service
69 */
70void
71svc_destroy(struct svc_serv *serv)
72{
73 struct svc_sock *svsk;
74
75 dprintk("RPC: svc_destroy(%s, %d)\n",
76 serv->sv_program->pg_name,
77 serv->sv_nrthreads);
78
79 if (serv->sv_nrthreads) {
80 if (--(serv->sv_nrthreads) != 0) {
81 svc_sock_update_bufs(serv);
82 return;
83 }
84 } else
85 printk("svc_destroy: no threads for serv=%p!\n", serv);
86
87 while (!list_empty(&serv->sv_tempsocks)) {
88 svsk = list_entry(serv->sv_tempsocks.next,
89 struct svc_sock,
90 sk_list);
91 svc_delete_socket(svsk);
92 }
93 while (!list_empty(&serv->sv_permsocks)) {
94 svsk = list_entry(serv->sv_permsocks.next,
95 struct svc_sock,
96 sk_list);
97 svc_delete_socket(svsk);
98 }
99
100 cache_clean_deferred(serv);
101
102 /* Unregister service with the portmapper */
103 svc_register(serv, 0, 0);
104 kfree(serv);
105}
106
107/*
108 * Allocate an RPC server's buffer space.
109 * We allocate pages and place them in rq_argpages.
110 */
111static int
112svc_init_buffer(struct svc_rqst *rqstp, unsigned int size)
113{
114 int pages;
115 int arghi;
116
117 if (size > RPCSVC_MAXPAYLOAD)
118 size = RPCSVC_MAXPAYLOAD;
119 pages = 2 + (size+ PAGE_SIZE -1) / PAGE_SIZE;
120 rqstp->rq_argused = 0;
121 rqstp->rq_resused = 0;
122 arghi = 0;
123 if (pages > RPCSVC_MAXPAGES)
124 BUG();
125 while (pages) {
126 struct page *p = alloc_page(GFP_KERNEL);
127 if (!p)
128 break;
129 rqstp->rq_argpages[arghi++] = p;
130 pages--;
131 }
132 rqstp->rq_arghi = arghi;
133 return ! pages;
134}
135
136/*
137 * Release an RPC server buffer
138 */
139static void
140svc_release_buffer(struct svc_rqst *rqstp)
141{
142 while (rqstp->rq_arghi)
143 put_page(rqstp->rq_argpages[--rqstp->rq_arghi]);
144 while (rqstp->rq_resused) {
145 if (rqstp->rq_respages[--rqstp->rq_resused] == NULL)
146 continue;
147 put_page(rqstp->rq_respages[rqstp->rq_resused]);
148 }
149 rqstp->rq_argused = 0;
150}
151
152/*
153 * Create a server thread
154 */
155int
156svc_create_thread(svc_thread_fn func, struct svc_serv *serv)
157{
158 struct svc_rqst *rqstp;
159 int error = -ENOMEM;
160
161 rqstp = kmalloc(sizeof(*rqstp), GFP_KERNEL);
162 if (!rqstp)
163 goto out;
164
165 memset(rqstp, 0, sizeof(*rqstp));
166 init_waitqueue_head(&rqstp->rq_wait);
167
168 if (!(rqstp->rq_argp = (u32 *) kmalloc(serv->sv_xdrsize, GFP_KERNEL))
169 || !(rqstp->rq_resp = (u32 *) kmalloc(serv->sv_xdrsize, GFP_KERNEL))
170 || !svc_init_buffer(rqstp, serv->sv_bufsz))
171 goto out_thread;
172
173 serv->sv_nrthreads++;
174 rqstp->rq_server = serv;
175 error = kernel_thread((int (*)(void *)) func, rqstp, 0);
176 if (error < 0)
177 goto out_thread;
178 svc_sock_update_bufs(serv);
179 error = 0;
180out:
181 return error;
182
183out_thread:
184 svc_exit_thread(rqstp);
185 goto out;
186}
187
188/*
189 * Destroy an RPC server thread
190 */
191void
192svc_exit_thread(struct svc_rqst *rqstp)
193{
194 struct svc_serv *serv = rqstp->rq_server;
195
196 svc_release_buffer(rqstp);
197 if (rqstp->rq_resp)
198 kfree(rqstp->rq_resp);
199 if (rqstp->rq_argp)
200 kfree(rqstp->rq_argp);
201 if (rqstp->rq_auth_data)
202 kfree(rqstp->rq_auth_data);
203 kfree(rqstp);
204
205 /* Release the server */
206 if (serv)
207 svc_destroy(serv);
208}
209
210/*
211 * Register an RPC service with the local portmapper.
212 * To unregister a service, call this routine with
213 * proto and port == 0.
214 */
215int
216svc_register(struct svc_serv *serv, int proto, unsigned short port)
217{
218 struct svc_program *progp;
219 unsigned long flags;
220 int i, error = 0, dummy;
221
222 progp = serv->sv_program;
223
224 dprintk("RPC: svc_register(%s, %s, %d)\n",
225 progp->pg_name, proto == IPPROTO_UDP? "udp" : "tcp", port);
226
227 if (!port)
228 clear_thread_flag(TIF_SIGPENDING);
229
230 for (i = 0; i < progp->pg_nvers; i++) {
231 if (progp->pg_vers[i] == NULL)
232 continue;
233 error = rpc_register(progp->pg_prog, i, proto, port, &dummy);
234 if (error < 0)
235 break;
236 if (port && !dummy) {
237 error = -EACCES;
238 break;
239 }
240 }
241
242 if (!port) {
243 spin_lock_irqsave(&current->sighand->siglock, flags);
244 recalc_sigpending();
245 spin_unlock_irqrestore(&current->sighand->siglock, flags);
246 }
247
248 return error;
249}
250
251/*
252 * Process the RPC request.
253 */
254int
255svc_process(struct svc_serv *serv, struct svc_rqst *rqstp)
256{
257 struct svc_program *progp;
258 struct svc_version *versp = NULL; /* compiler food */
259 struct svc_procedure *procp = NULL;
260 struct kvec * argv = &rqstp->rq_arg.head[0];
261 struct kvec * resv = &rqstp->rq_res.head[0];
262 kxdrproc_t xdr;
263 u32 *statp;
264 u32 dir, prog, vers, proc,
265 auth_stat, rpc_stat;
266 int auth_res;
267 u32 *accept_statp;
268
269 rpc_stat = rpc_success;
270
271 if (argv->iov_len < 6*4)
272 goto err_short_len;
273
274 /* setup response xdr_buf.
275 * Initially it has just one page
276 */
277 svc_take_page(rqstp); /* must succeed */
278 resv->iov_base = page_address(rqstp->rq_respages[0]);
279 resv->iov_len = 0;
280 rqstp->rq_res.pages = rqstp->rq_respages+1;
281 rqstp->rq_res.len = 0;
282 rqstp->rq_res.page_base = 0;
283 rqstp->rq_res.page_len = 0;
284 rqstp->rq_res.tail[0].iov_len = 0;
285 /* tcp needs a space for the record length... */
286 if (rqstp->rq_prot == IPPROTO_TCP)
287 svc_putu32(resv, 0);
288
289 rqstp->rq_xid = svc_getu32(argv);
290 svc_putu32(resv, rqstp->rq_xid);
291
292 dir = ntohl(svc_getu32(argv));
293 vers = ntohl(svc_getu32(argv));
294
295 /* First words of reply: */
296 svc_putu32(resv, xdr_one); /* REPLY */
297
298 if (dir != 0) /* direction != CALL */
299 goto err_bad_dir;
300 if (vers != 2) /* RPC version number */
301 goto err_bad_rpc;
302
303 /* Save position in case we later decide to reject: */
304 accept_statp = resv->iov_base + resv->iov_len;
305
306 svc_putu32(resv, xdr_zero); /* ACCEPT */
307
308 rqstp->rq_prog = prog = ntohl(svc_getu32(argv)); /* program number */
309 rqstp->rq_vers = vers = ntohl(svc_getu32(argv)); /* version number */
310 rqstp->rq_proc = proc = ntohl(svc_getu32(argv)); /* procedure number */
311
312 progp = serv->sv_program;
313 /*
314 * Decode auth data, and add verifier to reply buffer.
315 * We do this before anything else in order to get a decent
316 * auth verifier.
317 */
318 auth_res = svc_authenticate(rqstp, &auth_stat);
319 /* Also give the program a chance to reject this call: */
320 if (auth_res == SVC_OK) {
321 auth_stat = rpc_autherr_badcred;
322 auth_res = progp->pg_authenticate(rqstp);
323 }
324 switch (auth_res) {
325 case SVC_OK:
326 break;
327 case SVC_GARBAGE:
328 rpc_stat = rpc_garbage_args;
329 goto err_bad;
330 case SVC_SYSERR:
331 rpc_stat = rpc_system_err;
332 goto err_bad;
333 case SVC_DENIED:
334 goto err_bad_auth;
335 case SVC_DROP:
336 goto dropit;
337 case SVC_COMPLETE:
338 goto sendit;
339 }
340
341 if (prog != progp->pg_prog)
342 goto err_bad_prog;
343
344 if (vers >= progp->pg_nvers ||
345 !(versp = progp->pg_vers[vers]))
346 goto err_bad_vers;
347
348 procp = versp->vs_proc + proc;
349 if (proc >= versp->vs_nproc || !procp->pc_func)
350 goto err_bad_proc;
351 rqstp->rq_server = serv;
352 rqstp->rq_procinfo = procp;
353
354 /* Syntactic check complete */
355 serv->sv_stats->rpccnt++;
356
357 /* Build the reply header. */
358 statp = resv->iov_base +resv->iov_len;
359 svc_putu32(resv, rpc_success); /* RPC_SUCCESS */
360
361 /* Bump per-procedure stats counter */
362 procp->pc_count++;
363
364 /* Initialize storage for argp and resp */
365 memset(rqstp->rq_argp, 0, procp->pc_argsize);
366 memset(rqstp->rq_resp, 0, procp->pc_ressize);
367
368 /* un-reserve some of the out-queue now that we have a
369 * better idea of reply size
370 */
371 if (procp->pc_xdrressize)
372 svc_reserve(rqstp, procp->pc_xdrressize<<2);
373
374 /* Call the function that processes the request. */
375 if (!versp->vs_dispatch) {
376 /* Decode arguments */
377 xdr = procp->pc_decode;
378 if (xdr && !xdr(rqstp, argv->iov_base, rqstp->rq_argp))
379 goto err_garbage;
380
381 *statp = procp->pc_func(rqstp, rqstp->rq_argp, rqstp->rq_resp);
382
383 /* Encode reply */
384 if (*statp == rpc_success && (xdr = procp->pc_encode)
385 && !xdr(rqstp, resv->iov_base+resv->iov_len, rqstp->rq_resp)) {
386 dprintk("svc: failed to encode reply\n");
387 /* serv->sv_stats->rpcsystemerr++; */
388 *statp = rpc_system_err;
389 }
390 } else {
391 dprintk("svc: calling dispatcher\n");
392 if (!versp->vs_dispatch(rqstp, statp)) {
393 /* Release reply info */
394 if (procp->pc_release)
395 procp->pc_release(rqstp, NULL, rqstp->rq_resp);
396 goto dropit;
397 }
398 }
399
400 /* Check RPC status result */
401 if (*statp != rpc_success)
402 resv->iov_len = ((void*)statp) - resv->iov_base + 4;
403
404 /* Release reply info */
405 if (procp->pc_release)
406 procp->pc_release(rqstp, NULL, rqstp->rq_resp);
407
408 if (procp->pc_encode == NULL)
409 goto dropit;
410
411 sendit:
412 if (svc_authorise(rqstp))
413 goto dropit;
414 return svc_send(rqstp);
415
416 dropit:
417 svc_authorise(rqstp); /* doesn't hurt to call this twice */
418 dprintk("svc: svc_process dropit\n");
419 svc_drop(rqstp);
420 return 0;
421
422err_short_len:
423#ifdef RPC_PARANOIA
424 printk("svc: short len %Zd, dropping request\n", argv->iov_len);
425#endif
426 goto dropit; /* drop request */
427
428err_bad_dir:
429#ifdef RPC_PARANOIA
430 printk("svc: bad direction %d, dropping request\n", dir);
431#endif
432 serv->sv_stats->rpcbadfmt++;
433 goto dropit; /* drop request */
434
435err_bad_rpc:
436 serv->sv_stats->rpcbadfmt++;
437 svc_putu32(resv, xdr_one); /* REJECT */
438 svc_putu32(resv, xdr_zero); /* RPC_MISMATCH */
439 svc_putu32(resv, xdr_two); /* Only RPCv2 supported */
440 svc_putu32(resv, xdr_two);
441 goto sendit;
442
443err_bad_auth:
444 dprintk("svc: authentication failed (%d)\n", ntohl(auth_stat));
445 serv->sv_stats->rpcbadauth++;
446 /* Restore write pointer to location of accept status: */
447 xdr_ressize_check(rqstp, accept_statp);
448 svc_putu32(resv, xdr_one); /* REJECT */
449 svc_putu32(resv, xdr_one); /* AUTH_ERROR */
450 svc_putu32(resv, auth_stat); /* status */
451 goto sendit;
452
453err_bad_prog:
454#ifdef RPC_PARANOIA
455 if (prog != 100227 || progp->pg_prog != 100003)
456 printk("svc: unknown program %d (me %d)\n", prog, progp->pg_prog);
457 /* else it is just a Solaris client seeing if ACLs are supported */
458#endif
459 serv->sv_stats->rpcbadfmt++;
460 svc_putu32(resv, rpc_prog_unavail);
461 goto sendit;
462
463err_bad_vers:
464#ifdef RPC_PARANOIA
465 printk("svc: unknown version (%d)\n", vers);
466#endif
467 serv->sv_stats->rpcbadfmt++;
468 svc_putu32(resv, rpc_prog_mismatch);
469 svc_putu32(resv, htonl(progp->pg_lovers));
470 svc_putu32(resv, htonl(progp->pg_hivers));
471 goto sendit;
472
473err_bad_proc:
474#ifdef RPC_PARANOIA
475 printk("svc: unknown procedure (%d)\n", proc);
476#endif
477 serv->sv_stats->rpcbadfmt++;
478 svc_putu32(resv, rpc_proc_unavail);
479 goto sendit;
480
481err_garbage:
482#ifdef RPC_PARANOIA
483 printk("svc: failed to decode args\n");
484#endif
485 rpc_stat = rpc_garbage_args;
486err_bad:
487 serv->sv_stats->rpcbadfmt++;
488 svc_putu32(resv, rpc_stat);
489 goto sendit;
490}
diff --git a/net/sunrpc/svcauth.c b/net/sunrpc/svcauth.c
new file mode 100644
index 000000000000..bde8147ef2db
--- /dev/null
+++ b/net/sunrpc/svcauth.c
@@ -0,0 +1,216 @@
1/*
2 * linux/net/sunrpc/svcauth.c
3 *
4 * The generic interface for RPC authentication on the server side.
5 *
6 * Copyright (C) 1995, 1996 Olaf Kirch <okir@monad.swb.de>
7 *
8 * CHANGES
9 * 19-Apr-2000 Chris Evans - Security fix
10 */
11
12#include <linux/types.h>
13#include <linux/sched.h>
14#include <linux/module.h>
15#include <linux/sunrpc/types.h>
16#include <linux/sunrpc/xdr.h>
17#include <linux/sunrpc/svcsock.h>
18#include <linux/sunrpc/svcauth.h>
19#include <linux/err.h>
20#include <linux/hash.h>
21
22#define RPCDBG_FACILITY RPCDBG_AUTH
23
24
25/*
26 * Table of authenticators
27 */
28extern struct auth_ops svcauth_null;
29extern struct auth_ops svcauth_unix;
30
31static DEFINE_SPINLOCK(authtab_lock);
32static struct auth_ops *authtab[RPC_AUTH_MAXFLAVOR] = {
33 [0] = &svcauth_null,
34 [1] = &svcauth_unix,
35};
36
37int
38svc_authenticate(struct svc_rqst *rqstp, u32 *authp)
39{
40 rpc_authflavor_t flavor;
41 struct auth_ops *aops;
42
43 *authp = rpc_auth_ok;
44
45 flavor = ntohl(svc_getu32(&rqstp->rq_arg.head[0]));
46
47 dprintk("svc: svc_authenticate (%d)\n", flavor);
48
49 spin_lock(&authtab_lock);
50 if (flavor >= RPC_AUTH_MAXFLAVOR || !(aops = authtab[flavor])
51 || !try_module_get(aops->owner)) {
52 spin_unlock(&authtab_lock);
53 *authp = rpc_autherr_badcred;
54 return SVC_DENIED;
55 }
56 spin_unlock(&authtab_lock);
57
58 rqstp->rq_authop = aops;
59 return aops->accept(rqstp, authp);
60}
61
62int svc_set_client(struct svc_rqst *rqstp)
63{
64 return rqstp->rq_authop->set_client(rqstp);
65}
66
67/* A request, which was authenticated, has now executed.
68 * Time to finalise the the credentials and verifier
69 * and release and resources
70 */
71int svc_authorise(struct svc_rqst *rqstp)
72{
73 struct auth_ops *aops = rqstp->rq_authop;
74 int rv = 0;
75
76 rqstp->rq_authop = NULL;
77
78 if (aops) {
79 rv = aops->release(rqstp);
80 module_put(aops->owner);
81 }
82 return rv;
83}
84
85int
86svc_auth_register(rpc_authflavor_t flavor, struct auth_ops *aops)
87{
88 int rv = -EINVAL;
89 spin_lock(&authtab_lock);
90 if (flavor < RPC_AUTH_MAXFLAVOR && authtab[flavor] == NULL) {
91 authtab[flavor] = aops;
92 rv = 0;
93 }
94 spin_unlock(&authtab_lock);
95 return rv;
96}
97
98void
99svc_auth_unregister(rpc_authflavor_t flavor)
100{
101 spin_lock(&authtab_lock);
102 if (flavor < RPC_AUTH_MAXFLAVOR)
103 authtab[flavor] = NULL;
104 spin_unlock(&authtab_lock);
105}
106EXPORT_SYMBOL(svc_auth_unregister);
107
108/**************************************************
109 * cache for domain name to auth_domain
110 * Entries are only added by flavours which will normally
111 * have a structure that 'inherits' from auth_domain.
112 * e.g. when an IP -> domainname is given to auth_unix,
113 * and the domain name doesn't exist, it will create a
114 * auth_unix_domain and add it to this hash table.
115 * If it finds the name does exist, but isn't AUTH_UNIX,
116 * it will complain.
117 */
118
119/*
120 * Auth auth_domain cache is somewhat different to other caches,
121 * largely because the entries are possibly of different types:
122 * each auth flavour has it's own type.
123 * One consequence of this that DefineCacheLookup cannot
124 * allocate a new structure as it cannot know the size.
125 * Notice that the "INIT" code fragment is quite different
126 * from other caches. When auth_domain_lookup might be
127 * creating a new domain, the new domain is passed in
128 * complete and it is used as-is rather than being copied into
129 * another structure.
130 */
131#define DN_HASHBITS 6
132#define DN_HASHMAX (1<<DN_HASHBITS)
133#define DN_HASHMASK (DN_HASHMAX-1)
134
135static struct cache_head *auth_domain_table[DN_HASHMAX];
136
137static void auth_domain_drop(struct cache_head *item, struct cache_detail *cd)
138{
139 struct auth_domain *dom = container_of(item, struct auth_domain, h);
140 if (cache_put(item,cd))
141 authtab[dom->flavour]->domain_release(dom);
142}
143
144
145struct cache_detail auth_domain_cache = {
146 .hash_size = DN_HASHMAX,
147 .hash_table = auth_domain_table,
148 .name = "auth.domain",
149 .cache_put = auth_domain_drop,
150};
151
152void auth_domain_put(struct auth_domain *dom)
153{
154 auth_domain_drop(&dom->h, &auth_domain_cache);
155}
156
157static inline int auth_domain_hash(struct auth_domain *item)
158{
159 return hash_str(item->name, DN_HASHBITS);
160}
161static inline int auth_domain_match(struct auth_domain *tmp, struct auth_domain *item)
162{
163 return strcmp(tmp->name, item->name) == 0;
164}
165
166struct auth_domain *
167auth_domain_lookup(struct auth_domain *item, int set)
168{
169 struct auth_domain *tmp = NULL;
170 struct cache_head **hp, **head;
171 head = &auth_domain_cache.hash_table[auth_domain_hash(item)];
172
173 if (set)
174 write_lock(&auth_domain_cache.hash_lock);
175 else
176 read_lock(&auth_domain_cache.hash_lock);
177 for (hp=head; *hp != NULL; hp = &tmp->h.next) {
178 tmp = container_of(*hp, struct auth_domain, h);
179 if (!auth_domain_match(tmp, item))
180 continue;
181 if (!set) {
182 cache_get(&tmp->h);
183 goto out_noset;
184 }
185 *hp = tmp->h.next;
186 tmp->h.next = NULL;
187 auth_domain_drop(&tmp->h, &auth_domain_cache);
188 goto out_set;
189 }
190 /* Didn't find anything */
191 if (!set)
192 goto out_nada;
193 auth_domain_cache.entries++;
194out_set:
195 item->h.next = *head;
196 *head = &item->h;
197 cache_get(&item->h);
198 write_unlock(&auth_domain_cache.hash_lock);
199 cache_fresh(&auth_domain_cache, &item->h, item->h.expiry_time);
200 cache_get(&item->h);
201 return item;
202out_nada:
203 tmp = NULL;
204out_noset:
205 read_unlock(&auth_domain_cache.hash_lock);
206 return tmp;
207}
208
209struct auth_domain *auth_domain_find(char *name)
210{
211 struct auth_domain *rv, ad;
212
213 ad.name = name;
214 rv = auth_domain_lookup(&ad, 0);
215 return rv;
216}
diff --git a/net/sunrpc/svcauth_unix.c b/net/sunrpc/svcauth_unix.c
new file mode 100644
index 000000000000..2b99b4028d31
--- /dev/null
+++ b/net/sunrpc/svcauth_unix.c
@@ -0,0 +1,502 @@
1#include <linux/types.h>
2#include <linux/sched.h>
3#include <linux/module.h>
4#include <linux/sunrpc/types.h>
5#include <linux/sunrpc/xdr.h>
6#include <linux/sunrpc/svcsock.h>
7#include <linux/sunrpc/svcauth.h>
8#include <linux/err.h>
9#include <linux/seq_file.h>
10#include <linux/hash.h>
11
12#define RPCDBG_FACILITY RPCDBG_AUTH
13
14
15/*
16 * AUTHUNIX and AUTHNULL credentials are both handled here.
17 * AUTHNULL is treated just like AUTHUNIX except that the uid/gid
18 * are always nobody (-2). i.e. we do the same IP address checks for
19 * AUTHNULL as for AUTHUNIX, and that is done here.
20 */
21
22
23static char *strdup(char *s)
24{
25 char *rv = kmalloc(strlen(s)+1, GFP_KERNEL);
26 if (rv)
27 strcpy(rv, s);
28 return rv;
29}
30
31struct unix_domain {
32 struct auth_domain h;
33 int addr_changes;
34 /* other stuff later */
35};
36
37struct auth_domain *unix_domain_find(char *name)
38{
39 struct auth_domain *rv, ud;
40 struct unix_domain *new;
41
42 ud.name = name;
43
44 rv = auth_domain_lookup(&ud, 0);
45
46 foundit:
47 if (rv && rv->flavour != RPC_AUTH_UNIX) {
48 auth_domain_put(rv);
49 return NULL;
50 }
51 if (rv)
52 return rv;
53
54 new = kmalloc(sizeof(*new), GFP_KERNEL);
55 if (new == NULL)
56 return NULL;
57 cache_init(&new->h.h);
58 new->h.name = strdup(name);
59 new->h.flavour = RPC_AUTH_UNIX;
60 new->addr_changes = 0;
61 new->h.h.expiry_time = NEVER;
62
63 rv = auth_domain_lookup(&new->h, 2);
64 if (rv == &new->h) {
65 if (atomic_dec_and_test(&new->h.h.refcnt)) BUG();
66 } else {
67 auth_domain_put(&new->h);
68 goto foundit;
69 }
70
71 return rv;
72}
73
74static void svcauth_unix_domain_release(struct auth_domain *dom)
75{
76 struct unix_domain *ud = container_of(dom, struct unix_domain, h);
77
78 kfree(dom->name);
79 kfree(ud);
80}
81
82
83/**************************************************
84 * cache for IP address to unix_domain
85 * as needed by AUTH_UNIX
86 */
87#define IP_HASHBITS 8
88#define IP_HASHMAX (1<<IP_HASHBITS)
89#define IP_HASHMASK (IP_HASHMAX-1)
90
91struct ip_map {
92 struct cache_head h;
93 char m_class[8]; /* e.g. "nfsd" */
94 struct in_addr m_addr;
95 struct unix_domain *m_client;
96 int m_add_change;
97};
98static struct cache_head *ip_table[IP_HASHMAX];
99
100static void ip_map_put(struct cache_head *item, struct cache_detail *cd)
101{
102 struct ip_map *im = container_of(item, struct ip_map,h);
103 if (cache_put(item, cd)) {
104 if (test_bit(CACHE_VALID, &item->flags) &&
105 !test_bit(CACHE_NEGATIVE, &item->flags))
106 auth_domain_put(&im->m_client->h);
107 kfree(im);
108 }
109}
110
111static inline int ip_map_hash(struct ip_map *item)
112{
113 return hash_str(item->m_class, IP_HASHBITS) ^
114 hash_long((unsigned long)item->m_addr.s_addr, IP_HASHBITS);
115}
116static inline int ip_map_match(struct ip_map *item, struct ip_map *tmp)
117{
118 return strcmp(tmp->m_class, item->m_class) == 0
119 && tmp->m_addr.s_addr == item->m_addr.s_addr;
120}
121static inline void ip_map_init(struct ip_map *new, struct ip_map *item)
122{
123 strcpy(new->m_class, item->m_class);
124 new->m_addr.s_addr = item->m_addr.s_addr;
125}
126static inline void ip_map_update(struct ip_map *new, struct ip_map *item)
127{
128 cache_get(&item->m_client->h.h);
129 new->m_client = item->m_client;
130 new->m_add_change = item->m_add_change;
131}
132
133static void ip_map_request(struct cache_detail *cd,
134 struct cache_head *h,
135 char **bpp, int *blen)
136{
137 char text_addr[20];
138 struct ip_map *im = container_of(h, struct ip_map, h);
139 __u32 addr = im->m_addr.s_addr;
140
141 snprintf(text_addr, 20, "%u.%u.%u.%u",
142 ntohl(addr) >> 24 & 0xff,
143 ntohl(addr) >> 16 & 0xff,
144 ntohl(addr) >> 8 & 0xff,
145 ntohl(addr) >> 0 & 0xff);
146
147 qword_add(bpp, blen, im->m_class);
148 qword_add(bpp, blen, text_addr);
149 (*bpp)[-1] = '\n';
150}
151
152static struct ip_map *ip_map_lookup(struct ip_map *, int);
153
154static int ip_map_parse(struct cache_detail *cd,
155 char *mesg, int mlen)
156{
157 /* class ipaddress [domainname] */
158 /* should be safe just to use the start of the input buffer
159 * for scratch: */
160 char *buf = mesg;
161 int len;
162 int b1,b2,b3,b4;
163 char c;
164 struct ip_map ipm, *ipmp;
165 struct auth_domain *dom;
166 time_t expiry;
167
168 if (mesg[mlen-1] != '\n')
169 return -EINVAL;
170 mesg[mlen-1] = 0;
171
172 /* class */
173 len = qword_get(&mesg, ipm.m_class, sizeof(ipm.m_class));
174 if (len <= 0) return -EINVAL;
175
176 /* ip address */
177 len = qword_get(&mesg, buf, mlen);
178 if (len <= 0) return -EINVAL;
179
180 if (sscanf(buf, "%u.%u.%u.%u%c", &b1, &b2, &b3, &b4, &c) != 4)
181 return -EINVAL;
182
183 expiry = get_expiry(&mesg);
184 if (expiry ==0)
185 return -EINVAL;
186
187 /* domainname, or empty for NEGATIVE */
188 len = qword_get(&mesg, buf, mlen);
189 if (len < 0) return -EINVAL;
190
191 if (len) {
192 dom = unix_domain_find(buf);
193 if (dom == NULL)
194 return -ENOENT;
195 } else
196 dom = NULL;
197
198 ipm.m_addr.s_addr =
199 htonl((((((b1<<8)|b2)<<8)|b3)<<8)|b4);
200 ipm.h.flags = 0;
201 if (dom) {
202 ipm.m_client = container_of(dom, struct unix_domain, h);
203 ipm.m_add_change = ipm.m_client->addr_changes;
204 } else
205 set_bit(CACHE_NEGATIVE, &ipm.h.flags);
206 ipm.h.expiry_time = expiry;
207
208 ipmp = ip_map_lookup(&ipm, 1);
209 if (ipmp)
210 ip_map_put(&ipmp->h, &ip_map_cache);
211 if (dom)
212 auth_domain_put(dom);
213 if (!ipmp)
214 return -ENOMEM;
215 cache_flush();
216 return 0;
217}
218
219static int ip_map_show(struct seq_file *m,
220 struct cache_detail *cd,
221 struct cache_head *h)
222{
223 struct ip_map *im;
224 struct in_addr addr;
225 char *dom = "-no-domain-";
226
227 if (h == NULL) {
228 seq_puts(m, "#class IP domain\n");
229 return 0;
230 }
231 im = container_of(h, struct ip_map, h);
232 /* class addr domain */
233 addr = im->m_addr;
234
235 if (test_bit(CACHE_VALID, &h->flags) &&
236 !test_bit(CACHE_NEGATIVE, &h->flags))
237 dom = im->m_client->h.name;
238
239 seq_printf(m, "%s %d.%d.%d.%d %s\n",
240 im->m_class,
241 htonl(addr.s_addr) >> 24 & 0xff,
242 htonl(addr.s_addr) >> 16 & 0xff,
243 htonl(addr.s_addr) >> 8 & 0xff,
244 htonl(addr.s_addr) >> 0 & 0xff,
245 dom
246 );
247 return 0;
248}
249
250
251struct cache_detail ip_map_cache = {
252 .hash_size = IP_HASHMAX,
253 .hash_table = ip_table,
254 .name = "auth.unix.ip",
255 .cache_put = ip_map_put,
256 .cache_request = ip_map_request,
257 .cache_parse = ip_map_parse,
258 .cache_show = ip_map_show,
259};
260
261static DefineSimpleCacheLookup(ip_map, 0)
262
263
264int auth_unix_add_addr(struct in_addr addr, struct auth_domain *dom)
265{
266 struct unix_domain *udom;
267 struct ip_map ip, *ipmp;
268
269 if (dom->flavour != RPC_AUTH_UNIX)
270 return -EINVAL;
271 udom = container_of(dom, struct unix_domain, h);
272 strcpy(ip.m_class, "nfsd");
273 ip.m_addr = addr;
274 ip.m_client = udom;
275 ip.m_add_change = udom->addr_changes+1;
276 ip.h.flags = 0;
277 ip.h.expiry_time = NEVER;
278
279 ipmp = ip_map_lookup(&ip, 1);
280
281 if (ipmp) {
282 ip_map_put(&ipmp->h, &ip_map_cache);
283 return 0;
284 } else
285 return -ENOMEM;
286}
287
288int auth_unix_forget_old(struct auth_domain *dom)
289{
290 struct unix_domain *udom;
291
292 if (dom->flavour != RPC_AUTH_UNIX)
293 return -EINVAL;
294 udom = container_of(dom, struct unix_domain, h);
295 udom->addr_changes++;
296 return 0;
297}
298
299struct auth_domain *auth_unix_lookup(struct in_addr addr)
300{
301 struct ip_map key, *ipm;
302 struct auth_domain *rv;
303
304 strcpy(key.m_class, "nfsd");
305 key.m_addr = addr;
306
307 ipm = ip_map_lookup(&key, 0);
308
309 if (!ipm)
310 return NULL;
311 if (cache_check(&ip_map_cache, &ipm->h, NULL))
312 return NULL;
313
314 if ((ipm->m_client->addr_changes - ipm->m_add_change) >0) {
315 if (test_and_set_bit(CACHE_NEGATIVE, &ipm->h.flags) == 0)
316 auth_domain_put(&ipm->m_client->h);
317 rv = NULL;
318 } else {
319 rv = &ipm->m_client->h;
320 cache_get(&rv->h);
321 }
322 ip_map_put(&ipm->h, &ip_map_cache);
323 return rv;
324}
325
326void svcauth_unix_purge(void)
327{
328 cache_purge(&ip_map_cache);
329 cache_purge(&auth_domain_cache);
330}
331
332static int
333svcauth_unix_set_client(struct svc_rqst *rqstp)
334{
335 struct ip_map key, *ipm;
336
337 rqstp->rq_client = NULL;
338 if (rqstp->rq_proc == 0)
339 return SVC_OK;
340
341 strcpy(key.m_class, rqstp->rq_server->sv_program->pg_class);
342 key.m_addr = rqstp->rq_addr.sin_addr;
343
344 ipm = ip_map_lookup(&key, 0);
345
346 if (ipm == NULL)
347 return SVC_DENIED;
348
349 switch (cache_check(&ip_map_cache, &ipm->h, &rqstp->rq_chandle)) {
350 default:
351 BUG();
352 case -EAGAIN:
353 return SVC_DROP;
354 case -ENOENT:
355 return SVC_DENIED;
356 case 0:
357 rqstp->rq_client = &ipm->m_client->h;
358 cache_get(&rqstp->rq_client->h);
359 ip_map_put(&ipm->h, &ip_map_cache);
360 break;
361 }
362 return SVC_OK;
363}
364
365static int
366svcauth_null_accept(struct svc_rqst *rqstp, u32 *authp)
367{
368 struct kvec *argv = &rqstp->rq_arg.head[0];
369 struct kvec *resv = &rqstp->rq_res.head[0];
370 struct svc_cred *cred = &rqstp->rq_cred;
371
372 cred->cr_group_info = NULL;
373 rqstp->rq_client = NULL;
374
375 if (argv->iov_len < 3*4)
376 return SVC_GARBAGE;
377
378 if (svc_getu32(argv) != 0) {
379 dprintk("svc: bad null cred\n");
380 *authp = rpc_autherr_badcred;
381 return SVC_DENIED;
382 }
383 if (svc_getu32(argv) != RPC_AUTH_NULL || svc_getu32(argv) != 0) {
384 dprintk("svc: bad null verf\n");
385 *authp = rpc_autherr_badverf;
386 return SVC_DENIED;
387 }
388
389 /* Signal that mapping to nobody uid/gid is required */
390 cred->cr_uid = (uid_t) -1;
391 cred->cr_gid = (gid_t) -1;
392 cred->cr_group_info = groups_alloc(0);
393 if (cred->cr_group_info == NULL)
394 return SVC_DROP; /* kmalloc failure - client must retry */
395
396 /* Put NULL verifier */
397 svc_putu32(resv, RPC_AUTH_NULL);
398 svc_putu32(resv, 0);
399
400 return SVC_OK;
401}
402
403static int
404svcauth_null_release(struct svc_rqst *rqstp)
405{
406 if (rqstp->rq_client)
407 auth_domain_put(rqstp->rq_client);
408 rqstp->rq_client = NULL;
409 if (rqstp->rq_cred.cr_group_info)
410 put_group_info(rqstp->rq_cred.cr_group_info);
411 rqstp->rq_cred.cr_group_info = NULL;
412
413 return 0; /* don't drop */
414}
415
416
417struct auth_ops svcauth_null = {
418 .name = "null",
419 .owner = THIS_MODULE,
420 .flavour = RPC_AUTH_NULL,
421 .accept = svcauth_null_accept,
422 .release = svcauth_null_release,
423 .set_client = svcauth_unix_set_client,
424};
425
426
427static int
428svcauth_unix_accept(struct svc_rqst *rqstp, u32 *authp)
429{
430 struct kvec *argv = &rqstp->rq_arg.head[0];
431 struct kvec *resv = &rqstp->rq_res.head[0];
432 struct svc_cred *cred = &rqstp->rq_cred;
433 u32 slen, i;
434 int len = argv->iov_len;
435
436 cred->cr_group_info = NULL;
437 rqstp->rq_client = NULL;
438
439 if ((len -= 3*4) < 0)
440 return SVC_GARBAGE;
441
442 svc_getu32(argv); /* length */
443 svc_getu32(argv); /* time stamp */
444 slen = XDR_QUADLEN(ntohl(svc_getu32(argv))); /* machname length */
445 if (slen > 64 || (len -= (slen + 3)*4) < 0)
446 goto badcred;
447 argv->iov_base = (void*)((u32*)argv->iov_base + slen); /* skip machname */
448 argv->iov_len -= slen*4;
449
450 cred->cr_uid = ntohl(svc_getu32(argv)); /* uid */
451 cred->cr_gid = ntohl(svc_getu32(argv)); /* gid */
452 slen = ntohl(svc_getu32(argv)); /* gids length */
453 if (slen > 16 || (len -= (slen + 2)*4) < 0)
454 goto badcred;
455 cred->cr_group_info = groups_alloc(slen);
456 if (cred->cr_group_info == NULL)
457 return SVC_DROP;
458 for (i = 0; i < slen; i++)
459 GROUP_AT(cred->cr_group_info, i) = ntohl(svc_getu32(argv));
460
461 if (svc_getu32(argv) != RPC_AUTH_NULL || svc_getu32(argv) != 0) {
462 *authp = rpc_autherr_badverf;
463 return SVC_DENIED;
464 }
465
466 /* Put NULL verifier */
467 svc_putu32(resv, RPC_AUTH_NULL);
468 svc_putu32(resv, 0);
469
470 return SVC_OK;
471
472badcred:
473 *authp = rpc_autherr_badcred;
474 return SVC_DENIED;
475}
476
477static int
478svcauth_unix_release(struct svc_rqst *rqstp)
479{
480 /* Verifier (such as it is) is already in place.
481 */
482 if (rqstp->rq_client)
483 auth_domain_put(rqstp->rq_client);
484 rqstp->rq_client = NULL;
485 if (rqstp->rq_cred.cr_group_info)
486 put_group_info(rqstp->rq_cred.cr_group_info);
487 rqstp->rq_cred.cr_group_info = NULL;
488
489 return 0;
490}
491
492
493struct auth_ops svcauth_unix = {
494 .name = "unix",
495 .owner = THIS_MODULE,
496 .flavour = RPC_AUTH_UNIX,
497 .accept = svcauth_unix_accept,
498 .release = svcauth_unix_release,
499 .domain_release = svcauth_unix_domain_release,
500 .set_client = svcauth_unix_set_client,
501};
502
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
new file mode 100644
index 000000000000..05907035bc96
--- /dev/null
+++ b/net/sunrpc/svcsock.c
@@ -0,0 +1,1585 @@
1/*
2 * linux/net/sunrpc/svcsock.c
3 *
4 * These are the RPC server socket internals.
5 *
6 * The server scheduling algorithm does not always distribute the load
7 * evenly when servicing a single client. May need to modify the
8 * svc_sock_enqueue procedure...
9 *
10 * TCP support is largely untested and may be a little slow. The problem
11 * is that we currently do two separate recvfrom's, one for the 4-byte
12 * record length, and the second for the actual record. This could possibly
13 * be improved by always reading a minimum size of around 100 bytes and
14 * tucking any superfluous bytes away in a temporary store. Still, that
15 * leaves write requests out in the rain. An alternative may be to peek at
16 * the first skb in the queue, and if it matches the next TCP sequence
17 * number, to extract the record marker. Yuck.
18 *
19 * Copyright (C) 1995, 1996 Olaf Kirch <okir@monad.swb.de>
20 */
21
22#include <linux/sched.h>
23#include <linux/errno.h>
24#include <linux/fcntl.h>
25#include <linux/net.h>
26#include <linux/in.h>
27#include <linux/inet.h>
28#include <linux/udp.h>
29#include <linux/tcp.h>
30#include <linux/unistd.h>
31#include <linux/slab.h>
32#include <linux/netdevice.h>
33#include <linux/skbuff.h>
34#include <net/sock.h>
35#include <net/checksum.h>
36#include <net/ip.h>
37#include <net/tcp.h>
38#include <asm/uaccess.h>
39#include <asm/ioctls.h>
40
41#include <linux/sunrpc/types.h>
42#include <linux/sunrpc/xdr.h>
43#include <linux/sunrpc/svcsock.h>
44#include <linux/sunrpc/stats.h>
45
46/* SMP locking strategy:
47 *
48 * svc_serv->sv_lock protects most stuff for that service.
49 *
50 * Some flags can be set to certain values at any time
51 * providing that certain rules are followed:
52 *
53 * SK_BUSY can be set to 0 at any time.
54 * svc_sock_enqueue must be called afterwards
55 * SK_CONN, SK_DATA, can be set or cleared at any time.
56 * after a set, svc_sock_enqueue must be called.
57 * after a clear, the socket must be read/accepted
58 * if this succeeds, it must be set again.
59 * SK_CLOSE can set at any time. It is never cleared.
60 *
61 */
62
63#define RPCDBG_FACILITY RPCDBG_SVCSOCK
64
65
66static struct svc_sock *svc_setup_socket(struct svc_serv *, struct socket *,
67 int *errp, int pmap_reg);
68static void svc_udp_data_ready(struct sock *, int);
69static int svc_udp_recvfrom(struct svc_rqst *);
70static int svc_udp_sendto(struct svc_rqst *);
71
72static struct svc_deferred_req *svc_deferred_dequeue(struct svc_sock *svsk);
73static int svc_deferred_recv(struct svc_rqst *rqstp);
74static struct cache_deferred_req *svc_defer(struct cache_req *req);
75
76/*
77 * Queue up an idle server thread. Must have serv->sv_lock held.
78 * Note: this is really a stack rather than a queue, so that we only
79 * use as many different threads as we need, and the rest don't polute
80 * the cache.
81 */
82static inline void
83svc_serv_enqueue(struct svc_serv *serv, struct svc_rqst *rqstp)
84{
85 list_add(&rqstp->rq_list, &serv->sv_threads);
86}
87
88/*
89 * Dequeue an nfsd thread. Must have serv->sv_lock held.
90 */
91static inline void
92svc_serv_dequeue(struct svc_serv *serv, struct svc_rqst *rqstp)
93{
94 list_del(&rqstp->rq_list);
95}
96
97/*
98 * Release an skbuff after use
99 */
100static inline void
101svc_release_skb(struct svc_rqst *rqstp)
102{
103 struct sk_buff *skb = rqstp->rq_skbuff;
104 struct svc_deferred_req *dr = rqstp->rq_deferred;
105
106 if (skb) {
107 rqstp->rq_skbuff = NULL;
108
109 dprintk("svc: service %p, releasing skb %p\n", rqstp, skb);
110 skb_free_datagram(rqstp->rq_sock->sk_sk, skb);
111 }
112 if (dr) {
113 rqstp->rq_deferred = NULL;
114 kfree(dr);
115 }
116}
117
118/*
119 * Any space to write?
120 */
121static inline unsigned long
122svc_sock_wspace(struct svc_sock *svsk)
123{
124 int wspace;
125
126 if (svsk->sk_sock->type == SOCK_STREAM)
127 wspace = sk_stream_wspace(svsk->sk_sk);
128 else
129 wspace = sock_wspace(svsk->sk_sk);
130
131 return wspace;
132}
133
134/*
135 * Queue up a socket with data pending. If there are idle nfsd
136 * processes, wake 'em up.
137 *
138 */
139static void
140svc_sock_enqueue(struct svc_sock *svsk)
141{
142 struct svc_serv *serv = svsk->sk_server;
143 struct svc_rqst *rqstp;
144
145 if (!(svsk->sk_flags &
146 ( (1<<SK_CONN)|(1<<SK_DATA)|(1<<SK_CLOSE)|(1<<SK_DEFERRED)) ))
147 return;
148 if (test_bit(SK_DEAD, &svsk->sk_flags))
149 return;
150
151 spin_lock_bh(&serv->sv_lock);
152
153 if (!list_empty(&serv->sv_threads) &&
154 !list_empty(&serv->sv_sockets))
155 printk(KERN_ERR
156 "svc_sock_enqueue: threads and sockets both waiting??\n");
157
158 if (test_bit(SK_DEAD, &svsk->sk_flags)) {
159 /* Don't enqueue dead sockets */
160 dprintk("svc: socket %p is dead, not enqueued\n", svsk->sk_sk);
161 goto out_unlock;
162 }
163
164 if (test_bit(SK_BUSY, &svsk->sk_flags)) {
165 /* Don't enqueue socket while daemon is receiving */
166 dprintk("svc: socket %p busy, not enqueued\n", svsk->sk_sk);
167 goto out_unlock;
168 }
169
170 set_bit(SOCK_NOSPACE, &svsk->sk_sock->flags);
171 if (((svsk->sk_reserved + serv->sv_bufsz)*2
172 > svc_sock_wspace(svsk))
173 && !test_bit(SK_CLOSE, &svsk->sk_flags)
174 && !test_bit(SK_CONN, &svsk->sk_flags)) {
175 /* Don't enqueue while not enough space for reply */
176 dprintk("svc: socket %p no space, %d*2 > %ld, not enqueued\n",
177 svsk->sk_sk, svsk->sk_reserved+serv->sv_bufsz,
178 svc_sock_wspace(svsk));
179 goto out_unlock;
180 }
181 clear_bit(SOCK_NOSPACE, &svsk->sk_sock->flags);
182
183 /* Mark socket as busy. It will remain in this state until the
184 * server has processed all pending data and put the socket back
185 * on the idle list.
186 */
187 set_bit(SK_BUSY, &svsk->sk_flags);
188
189 if (!list_empty(&serv->sv_threads)) {
190 rqstp = list_entry(serv->sv_threads.next,
191 struct svc_rqst,
192 rq_list);
193 dprintk("svc: socket %p served by daemon %p\n",
194 svsk->sk_sk, rqstp);
195 svc_serv_dequeue(serv, rqstp);
196 if (rqstp->rq_sock)
197 printk(KERN_ERR
198 "svc_sock_enqueue: server %p, rq_sock=%p!\n",
199 rqstp, rqstp->rq_sock);
200 rqstp->rq_sock = svsk;
201 svsk->sk_inuse++;
202 rqstp->rq_reserved = serv->sv_bufsz;
203 svsk->sk_reserved += rqstp->rq_reserved;
204 wake_up(&rqstp->rq_wait);
205 } else {
206 dprintk("svc: socket %p put into queue\n", svsk->sk_sk);
207 list_add_tail(&svsk->sk_ready, &serv->sv_sockets);
208 }
209
210out_unlock:
211 spin_unlock_bh(&serv->sv_lock);
212}
213
214/*
215 * Dequeue the first socket. Must be called with the serv->sv_lock held.
216 */
217static inline struct svc_sock *
218svc_sock_dequeue(struct svc_serv *serv)
219{
220 struct svc_sock *svsk;
221
222 if (list_empty(&serv->sv_sockets))
223 return NULL;
224
225 svsk = list_entry(serv->sv_sockets.next,
226 struct svc_sock, sk_ready);
227 list_del_init(&svsk->sk_ready);
228
229 dprintk("svc: socket %p dequeued, inuse=%d\n",
230 svsk->sk_sk, svsk->sk_inuse);
231
232 return svsk;
233}
234
235/*
236 * Having read something from a socket, check whether it
237 * needs to be re-enqueued.
238 * Note: SK_DATA only gets cleared when a read-attempt finds
239 * no (or insufficient) data.
240 */
241static inline void
242svc_sock_received(struct svc_sock *svsk)
243{
244 clear_bit(SK_BUSY, &svsk->sk_flags);
245 svc_sock_enqueue(svsk);
246}
247
248
249/**
250 * svc_reserve - change the space reserved for the reply to a request.
251 * @rqstp: The request in question
252 * @space: new max space to reserve
253 *
254 * Each request reserves some space on the output queue of the socket
255 * to make sure the reply fits. This function reduces that reserved
256 * space to be the amount of space used already, plus @space.
257 *
258 */
259void svc_reserve(struct svc_rqst *rqstp, int space)
260{
261 space += rqstp->rq_res.head[0].iov_len;
262
263 if (space < rqstp->rq_reserved) {
264 struct svc_sock *svsk = rqstp->rq_sock;
265 spin_lock_bh(&svsk->sk_server->sv_lock);
266 svsk->sk_reserved -= (rqstp->rq_reserved - space);
267 rqstp->rq_reserved = space;
268 spin_unlock_bh(&svsk->sk_server->sv_lock);
269
270 svc_sock_enqueue(svsk);
271 }
272}
273
274/*
275 * Release a socket after use.
276 */
277static inline void
278svc_sock_put(struct svc_sock *svsk)
279{
280 struct svc_serv *serv = svsk->sk_server;
281
282 spin_lock_bh(&serv->sv_lock);
283 if (!--(svsk->sk_inuse) && test_bit(SK_DEAD, &svsk->sk_flags)) {
284 spin_unlock_bh(&serv->sv_lock);
285 dprintk("svc: releasing dead socket\n");
286 sock_release(svsk->sk_sock);
287 kfree(svsk);
288 }
289 else
290 spin_unlock_bh(&serv->sv_lock);
291}
292
293static void
294svc_sock_release(struct svc_rqst *rqstp)
295{
296 struct svc_sock *svsk = rqstp->rq_sock;
297
298 svc_release_skb(rqstp);
299
300 svc_free_allpages(rqstp);
301 rqstp->rq_res.page_len = 0;
302 rqstp->rq_res.page_base = 0;
303
304
305 /* Reset response buffer and release
306 * the reservation.
307 * But first, check that enough space was reserved
308 * for the reply, otherwise we have a bug!
309 */
310 if ((rqstp->rq_res.len) > rqstp->rq_reserved)
311 printk(KERN_ERR "RPC request reserved %d but used %d\n",
312 rqstp->rq_reserved,
313 rqstp->rq_res.len);
314
315 rqstp->rq_res.head[0].iov_len = 0;
316 svc_reserve(rqstp, 0);
317 rqstp->rq_sock = NULL;
318
319 svc_sock_put(svsk);
320}
321
322/*
323 * External function to wake up a server waiting for data
324 */
325void
326svc_wake_up(struct svc_serv *serv)
327{
328 struct svc_rqst *rqstp;
329
330 spin_lock_bh(&serv->sv_lock);
331 if (!list_empty(&serv->sv_threads)) {
332 rqstp = list_entry(serv->sv_threads.next,
333 struct svc_rqst,
334 rq_list);
335 dprintk("svc: daemon %p woken up.\n", rqstp);
336 /*
337 svc_serv_dequeue(serv, rqstp);
338 rqstp->rq_sock = NULL;
339 */
340 wake_up(&rqstp->rq_wait);
341 }
342 spin_unlock_bh(&serv->sv_lock);
343}
344
345/*
346 * Generic sendto routine
347 */
348static int
349svc_sendto(struct svc_rqst *rqstp, struct xdr_buf *xdr)
350{
351 struct svc_sock *svsk = rqstp->rq_sock;
352 struct socket *sock = svsk->sk_sock;
353 int slen;
354 char buffer[CMSG_SPACE(sizeof(struct in_pktinfo))];
355 struct cmsghdr *cmh = (struct cmsghdr *)buffer;
356 struct in_pktinfo *pki = (struct in_pktinfo *)CMSG_DATA(cmh);
357 int len = 0;
358 int result;
359 int size;
360 struct page **ppage = xdr->pages;
361 size_t base = xdr->page_base;
362 unsigned int pglen = xdr->page_len;
363 unsigned int flags = MSG_MORE;
364
365 slen = xdr->len;
366
367 if (rqstp->rq_prot == IPPROTO_UDP) {
368 /* set the source and destination */
369 struct msghdr msg;
370 msg.msg_name = &rqstp->rq_addr;
371 msg.msg_namelen = sizeof(rqstp->rq_addr);
372 msg.msg_iov = NULL;
373 msg.msg_iovlen = 0;
374 msg.msg_flags = MSG_MORE;
375
376 msg.msg_control = cmh;
377 msg.msg_controllen = sizeof(buffer);
378 cmh->cmsg_len = CMSG_LEN(sizeof(*pki));
379 cmh->cmsg_level = SOL_IP;
380 cmh->cmsg_type = IP_PKTINFO;
381 pki->ipi_ifindex = 0;
382 pki->ipi_spec_dst.s_addr = rqstp->rq_daddr;
383
384 if (sock_sendmsg(sock, &msg, 0) < 0)
385 goto out;
386 }
387
388 /* send head */
389 if (slen == xdr->head[0].iov_len)
390 flags = 0;
391 len = sock->ops->sendpage(sock, rqstp->rq_respages[0], 0, xdr->head[0].iov_len, flags);
392 if (len != xdr->head[0].iov_len)
393 goto out;
394 slen -= xdr->head[0].iov_len;
395 if (slen == 0)
396 goto out;
397
398 /* send page data */
399 size = PAGE_SIZE - base < pglen ? PAGE_SIZE - base : pglen;
400 while (pglen > 0) {
401 if (slen == size)
402 flags = 0;
403 result = sock->ops->sendpage(sock, *ppage, base, size, flags);
404 if (result > 0)
405 len += result;
406 if (result != size)
407 goto out;
408 slen -= size;
409 pglen -= size;
410 size = PAGE_SIZE < pglen ? PAGE_SIZE : pglen;
411 base = 0;
412 ppage++;
413 }
414 /* send tail */
415 if (xdr->tail[0].iov_len) {
416 result = sock->ops->sendpage(sock, rqstp->rq_respages[rqstp->rq_restailpage],
417 ((unsigned long)xdr->tail[0].iov_base)& (PAGE_SIZE-1),
418 xdr->tail[0].iov_len, 0);
419
420 if (result > 0)
421 len += result;
422 }
423out:
424 dprintk("svc: socket %p sendto([%p %Zu... ], %d) = %d (addr %x)\n",
425 rqstp->rq_sock, xdr->head[0].iov_base, xdr->head[0].iov_len, xdr->len, len,
426 rqstp->rq_addr.sin_addr.s_addr);
427
428 return len;
429}
430
431/*
432 * Check input queue length
433 */
434static int
435svc_recv_available(struct svc_sock *svsk)
436{
437 mm_segment_t oldfs;
438 struct socket *sock = svsk->sk_sock;
439 int avail, err;
440
441 oldfs = get_fs(); set_fs(KERNEL_DS);
442 err = sock->ops->ioctl(sock, TIOCINQ, (unsigned long) &avail);
443 set_fs(oldfs);
444
445 return (err >= 0)? avail : err;
446}
447
448/*
449 * Generic recvfrom routine.
450 */
451static int
452svc_recvfrom(struct svc_rqst *rqstp, struct kvec *iov, int nr, int buflen)
453{
454 struct msghdr msg;
455 struct socket *sock;
456 int len, alen;
457
458 rqstp->rq_addrlen = sizeof(rqstp->rq_addr);
459 sock = rqstp->rq_sock->sk_sock;
460
461 msg.msg_name = &rqstp->rq_addr;
462 msg.msg_namelen = sizeof(rqstp->rq_addr);
463 msg.msg_control = NULL;
464 msg.msg_controllen = 0;
465
466 msg.msg_flags = MSG_DONTWAIT;
467
468 len = kernel_recvmsg(sock, &msg, iov, nr, buflen, MSG_DONTWAIT);
469
470 /* sock_recvmsg doesn't fill in the name/namelen, so we must..
471 * possibly we should cache this in the svc_sock structure
472 * at accept time. FIXME
473 */
474 alen = sizeof(rqstp->rq_addr);
475 sock->ops->getname(sock, (struct sockaddr *)&rqstp->rq_addr, &alen, 1);
476
477 dprintk("svc: socket %p recvfrom(%p, %Zu) = %d\n",
478 rqstp->rq_sock, iov[0].iov_base, iov[0].iov_len, len);
479
480 return len;
481}
482
483/*
484 * Set socket snd and rcv buffer lengths
485 */
486static inline void
487svc_sock_setbufsize(struct socket *sock, unsigned int snd, unsigned int rcv)
488{
489#if 0
490 mm_segment_t oldfs;
491 oldfs = get_fs(); set_fs(KERNEL_DS);
492 sock_setsockopt(sock, SOL_SOCKET, SO_SNDBUF,
493 (char*)&snd, sizeof(snd));
494 sock_setsockopt(sock, SOL_SOCKET, SO_RCVBUF,
495 (char*)&rcv, sizeof(rcv));
496#else
497 /* sock_setsockopt limits use to sysctl_?mem_max,
498 * which isn't acceptable. Until that is made conditional
499 * on not having CAP_SYS_RESOURCE or similar, we go direct...
500 * DaveM said I could!
501 */
502 lock_sock(sock->sk);
503 sock->sk->sk_sndbuf = snd * 2;
504 sock->sk->sk_rcvbuf = rcv * 2;
505 sock->sk->sk_userlocks |= SOCK_SNDBUF_LOCK|SOCK_RCVBUF_LOCK;
506 release_sock(sock->sk);
507#endif
508}
509/*
510 * INET callback when data has been received on the socket.
511 */
512static void
513svc_udp_data_ready(struct sock *sk, int count)
514{
515 struct svc_sock *svsk = (struct svc_sock *)(sk->sk_user_data);
516
517 if (!svsk)
518 goto out;
519 dprintk("svc: socket %p(inet %p), count=%d, busy=%d\n",
520 svsk, sk, count, test_bit(SK_BUSY, &svsk->sk_flags));
521 set_bit(SK_DATA, &svsk->sk_flags);
522 svc_sock_enqueue(svsk);
523 out:
524 if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
525 wake_up_interruptible(sk->sk_sleep);
526}
527
528/*
529 * INET callback when space is newly available on the socket.
530 */
531static void
532svc_write_space(struct sock *sk)
533{
534 struct svc_sock *svsk = (struct svc_sock *)(sk->sk_user_data);
535
536 if (svsk) {
537 dprintk("svc: socket %p(inet %p), write_space busy=%d\n",
538 svsk, sk, test_bit(SK_BUSY, &svsk->sk_flags));
539 svc_sock_enqueue(svsk);
540 }
541
542 if (sk->sk_sleep && waitqueue_active(sk->sk_sleep)) {
543 printk(KERN_WARNING "RPC svc_write_space: some sleeping on %p\n",
544 svsk);
545 wake_up_interruptible(sk->sk_sleep);
546 }
547}
548
549/*
550 * Receive a datagram from a UDP socket.
551 */
552extern int
553csum_partial_copy_to_xdr(struct xdr_buf *xdr, struct sk_buff *skb);
554
555static int
556svc_udp_recvfrom(struct svc_rqst *rqstp)
557{
558 struct svc_sock *svsk = rqstp->rq_sock;
559 struct svc_serv *serv = svsk->sk_server;
560 struct sk_buff *skb;
561 int err, len;
562
563 if (test_and_clear_bit(SK_CHNGBUF, &svsk->sk_flags))
564 /* udp sockets need large rcvbuf as all pending
565 * requests are still in that buffer. sndbuf must
566 * also be large enough that there is enough space
567 * for one reply per thread.
568 */
569 svc_sock_setbufsize(svsk->sk_sock,
570 (serv->sv_nrthreads+3) * serv->sv_bufsz,
571 (serv->sv_nrthreads+3) * serv->sv_bufsz);
572
573 if ((rqstp->rq_deferred = svc_deferred_dequeue(svsk))) {
574 svc_sock_received(svsk);
575 return svc_deferred_recv(rqstp);
576 }
577
578 clear_bit(SK_DATA, &svsk->sk_flags);
579 while ((skb = skb_recv_datagram(svsk->sk_sk, 0, 1, &err)) == NULL) {
580 if (err == -EAGAIN) {
581 svc_sock_received(svsk);
582 return err;
583 }
584 /* possibly an icmp error */
585 dprintk("svc: recvfrom returned error %d\n", -err);
586 }
587 if (skb->stamp.tv_sec == 0) {
588 skb->stamp.tv_sec = xtime.tv_sec;
589 skb->stamp.tv_usec = xtime.tv_nsec * 1000;
590 /* Don't enable netstamp, sunrpc doesn't
591 need that much accuracy */
592 }
593 svsk->sk_sk->sk_stamp = skb->stamp;
594 set_bit(SK_DATA, &svsk->sk_flags); /* there may be more data... */
595
596 /*
597 * Maybe more packets - kick another thread ASAP.
598 */
599 svc_sock_received(svsk);
600
601 len = skb->len - sizeof(struct udphdr);
602 rqstp->rq_arg.len = len;
603
604 rqstp->rq_prot = IPPROTO_UDP;
605
606 /* Get sender address */
607 rqstp->rq_addr.sin_family = AF_INET;
608 rqstp->rq_addr.sin_port = skb->h.uh->source;
609 rqstp->rq_addr.sin_addr.s_addr = skb->nh.iph->saddr;
610 rqstp->rq_daddr = skb->nh.iph->daddr;
611
612 if (skb_is_nonlinear(skb)) {
613 /* we have to copy */
614 local_bh_disable();
615 if (csum_partial_copy_to_xdr(&rqstp->rq_arg, skb)) {
616 local_bh_enable();
617 /* checksum error */
618 skb_free_datagram(svsk->sk_sk, skb);
619 return 0;
620 }
621 local_bh_enable();
622 skb_free_datagram(svsk->sk_sk, skb);
623 } else {
624 /* we can use it in-place */
625 rqstp->rq_arg.head[0].iov_base = skb->data + sizeof(struct udphdr);
626 rqstp->rq_arg.head[0].iov_len = len;
627 if (skb->ip_summed != CHECKSUM_UNNECESSARY) {
628 if ((unsigned short)csum_fold(skb_checksum(skb, 0, skb->len, skb->csum))) {
629 skb_free_datagram(svsk->sk_sk, skb);
630 return 0;
631 }
632 skb->ip_summed = CHECKSUM_UNNECESSARY;
633 }
634 rqstp->rq_skbuff = skb;
635 }
636
637 rqstp->rq_arg.page_base = 0;
638 if (len <= rqstp->rq_arg.head[0].iov_len) {
639 rqstp->rq_arg.head[0].iov_len = len;
640 rqstp->rq_arg.page_len = 0;
641 } else {
642 rqstp->rq_arg.page_len = len - rqstp->rq_arg.head[0].iov_len;
643 rqstp->rq_argused += (rqstp->rq_arg.page_len + PAGE_SIZE - 1)/ PAGE_SIZE;
644 }
645
646 if (serv->sv_stats)
647 serv->sv_stats->netudpcnt++;
648
649 return len;
650}
651
652static int
653svc_udp_sendto(struct svc_rqst *rqstp)
654{
655 int error;
656
657 error = svc_sendto(rqstp, &rqstp->rq_res);
658 if (error == -ECONNREFUSED)
659 /* ICMP error on earlier request. */
660 error = svc_sendto(rqstp, &rqstp->rq_res);
661
662 return error;
663}
664
665static void
666svc_udp_init(struct svc_sock *svsk)
667{
668 svsk->sk_sk->sk_data_ready = svc_udp_data_ready;
669 svsk->sk_sk->sk_write_space = svc_write_space;
670 svsk->sk_recvfrom = svc_udp_recvfrom;
671 svsk->sk_sendto = svc_udp_sendto;
672
673 /* initialise setting must have enough space to
674 * receive and respond to one request.
675 * svc_udp_recvfrom will re-adjust if necessary
676 */
677 svc_sock_setbufsize(svsk->sk_sock,
678 3 * svsk->sk_server->sv_bufsz,
679 3 * svsk->sk_server->sv_bufsz);
680
681 set_bit(SK_DATA, &svsk->sk_flags); /* might have come in before data_ready set up */
682 set_bit(SK_CHNGBUF, &svsk->sk_flags);
683}
684
685/*
686 * A data_ready event on a listening socket means there's a connection
687 * pending. Do not use state_change as a substitute for it.
688 */
689static void
690svc_tcp_listen_data_ready(struct sock *sk, int count_unused)
691{
692 struct svc_sock *svsk;
693
694 dprintk("svc: socket %p TCP (listen) state change %d\n",
695 sk, sk->sk_state);
696
697 if (sk->sk_state != TCP_LISTEN) {
698 /*
699 * This callback may called twice when a new connection
700 * is established as a child socket inherits everything
701 * from a parent LISTEN socket.
702 * 1) data_ready method of the parent socket will be called
703 * when one of child sockets become ESTABLISHED.
704 * 2) data_ready method of the child socket may be called
705 * when it receives data before the socket is accepted.
706 * In case of 2, we should ignore it silently.
707 */
708 goto out;
709 }
710 if (!(svsk = (struct svc_sock *) sk->sk_user_data)) {
711 printk("svc: socket %p: no user data\n", sk);
712 goto out;
713 }
714 set_bit(SK_CONN, &svsk->sk_flags);
715 svc_sock_enqueue(svsk);
716 out:
717 if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
718 wake_up_interruptible_all(sk->sk_sleep);
719}
720
721/*
722 * A state change on a connected socket means it's dying or dead.
723 */
724static void
725svc_tcp_state_change(struct sock *sk)
726{
727 struct svc_sock *svsk;
728
729 dprintk("svc: socket %p TCP (connected) state change %d (svsk %p)\n",
730 sk, sk->sk_state, sk->sk_user_data);
731
732 if (!(svsk = (struct svc_sock *) sk->sk_user_data)) {
733 printk("svc: socket %p: no user data\n", sk);
734 goto out;
735 }
736 set_bit(SK_CLOSE, &svsk->sk_flags);
737 svc_sock_enqueue(svsk);
738 out:
739 if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
740 wake_up_interruptible_all(sk->sk_sleep);
741}
742
743static void
744svc_tcp_data_ready(struct sock *sk, int count)
745{
746 struct svc_sock * svsk;
747
748 dprintk("svc: socket %p TCP data ready (svsk %p)\n",
749 sk, sk->sk_user_data);
750 if (!(svsk = (struct svc_sock *)(sk->sk_user_data)))
751 goto out;
752 set_bit(SK_DATA, &svsk->sk_flags);
753 svc_sock_enqueue(svsk);
754 out:
755 if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
756 wake_up_interruptible(sk->sk_sleep);
757}
758
759/*
760 * Accept a TCP connection
761 */
762static void
763svc_tcp_accept(struct svc_sock *svsk)
764{
765 struct sockaddr_in sin;
766 struct svc_serv *serv = svsk->sk_server;
767 struct socket *sock = svsk->sk_sock;
768 struct socket *newsock;
769 struct proto_ops *ops;
770 struct svc_sock *newsvsk;
771 int err, slen;
772
773 dprintk("svc: tcp_accept %p sock %p\n", svsk, sock);
774 if (!sock)
775 return;
776
777 err = sock_create_lite(PF_INET, SOCK_STREAM, IPPROTO_TCP, &newsock);
778 if (err) {
779 if (err == -ENOMEM)
780 printk(KERN_WARNING "%s: no more sockets!\n",
781 serv->sv_name);
782 return;
783 }
784
785 dprintk("svc: tcp_accept %p allocated\n", newsock);
786 newsock->ops = ops = sock->ops;
787
788 clear_bit(SK_CONN, &svsk->sk_flags);
789 if ((err = ops->accept(sock, newsock, O_NONBLOCK)) < 0) {
790 if (err != -EAGAIN && net_ratelimit())
791 printk(KERN_WARNING "%s: accept failed (err %d)!\n",
792 serv->sv_name, -err);
793 goto failed; /* aborted connection or whatever */
794 }
795 set_bit(SK_CONN, &svsk->sk_flags);
796 svc_sock_enqueue(svsk);
797
798 slen = sizeof(sin);
799 err = ops->getname(newsock, (struct sockaddr *) &sin, &slen, 1);
800 if (err < 0) {
801 if (net_ratelimit())
802 printk(KERN_WARNING "%s: peername failed (err %d)!\n",
803 serv->sv_name, -err);
804 goto failed; /* aborted connection or whatever */
805 }
806
807 /* Ideally, we would want to reject connections from unauthorized
808 * hosts here, but when we get encription, the IP of the host won't
809 * tell us anything. For now just warn about unpriv connections.
810 */
811 if (ntohs(sin.sin_port) >= 1024) {
812 dprintk(KERN_WARNING
813 "%s: connect from unprivileged port: %u.%u.%u.%u:%d\n",
814 serv->sv_name,
815 NIPQUAD(sin.sin_addr.s_addr), ntohs(sin.sin_port));
816 }
817
818 dprintk("%s: connect from %u.%u.%u.%u:%04x\n", serv->sv_name,
819 NIPQUAD(sin.sin_addr.s_addr), ntohs(sin.sin_port));
820
821 /* make sure that a write doesn't block forever when
822 * low on memory
823 */
824 newsock->sk->sk_sndtimeo = HZ*30;
825
826 if (!(newsvsk = svc_setup_socket(serv, newsock, &err, 0)))
827 goto failed;
828
829
830 /* make sure that we don't have too many active connections.
831 * If we have, something must be dropped.
832 *
833 * There's no point in trying to do random drop here for
834 * DoS prevention. The NFS clients does 1 reconnect in 15
835 * seconds. An attacker can easily beat that.
836 *
837 * The only somewhat efficient mechanism would be if drop
838 * old connections from the same IP first. But right now
839 * we don't even record the client IP in svc_sock.
840 */
841 if (serv->sv_tmpcnt > (serv->sv_nrthreads+3)*20) {
842 struct svc_sock *svsk = NULL;
843 spin_lock_bh(&serv->sv_lock);
844 if (!list_empty(&serv->sv_tempsocks)) {
845 if (net_ratelimit()) {
846 /* Try to help the admin */
847 printk(KERN_NOTICE "%s: too many open TCP "
848 "sockets, consider increasing the "
849 "number of nfsd threads\n",
850 serv->sv_name);
851 printk(KERN_NOTICE "%s: last TCP connect from "
852 "%u.%u.%u.%u:%d\n",
853 serv->sv_name,
854 NIPQUAD(sin.sin_addr.s_addr),
855 ntohs(sin.sin_port));
856 }
857 /*
858 * Always select the oldest socket. It's not fair,
859 * but so is life
860 */
861 svsk = list_entry(serv->sv_tempsocks.prev,
862 struct svc_sock,
863 sk_list);
864 set_bit(SK_CLOSE, &svsk->sk_flags);
865 svsk->sk_inuse ++;
866 }
867 spin_unlock_bh(&serv->sv_lock);
868
869 if (svsk) {
870 svc_sock_enqueue(svsk);
871 svc_sock_put(svsk);
872 }
873
874 }
875
876 if (serv->sv_stats)
877 serv->sv_stats->nettcpconn++;
878
879 return;
880
881failed:
882 sock_release(newsock);
883 return;
884}
885
886/*
887 * Receive data from a TCP socket.
888 */
889static int
890svc_tcp_recvfrom(struct svc_rqst *rqstp)
891{
892 struct svc_sock *svsk = rqstp->rq_sock;
893 struct svc_serv *serv = svsk->sk_server;
894 int len;
895 struct kvec vec[RPCSVC_MAXPAGES];
896 int pnum, vlen;
897
898 dprintk("svc: tcp_recv %p data %d conn %d close %d\n",
899 svsk, test_bit(SK_DATA, &svsk->sk_flags),
900 test_bit(SK_CONN, &svsk->sk_flags),
901 test_bit(SK_CLOSE, &svsk->sk_flags));
902
903 if ((rqstp->rq_deferred = svc_deferred_dequeue(svsk))) {
904 svc_sock_received(svsk);
905 return svc_deferred_recv(rqstp);
906 }
907
908 if (test_bit(SK_CLOSE, &svsk->sk_flags)) {
909 svc_delete_socket(svsk);
910 return 0;
911 }
912
913 if (test_bit(SK_CONN, &svsk->sk_flags)) {
914 svc_tcp_accept(svsk);
915 svc_sock_received(svsk);
916 return 0;
917 }
918
919 if (test_and_clear_bit(SK_CHNGBUF, &svsk->sk_flags))
920 /* sndbuf needs to have room for one request
921 * per thread, otherwise we can stall even when the
922 * network isn't a bottleneck.
923 * rcvbuf just needs to be able to hold a few requests.
924 * Normally they will be removed from the queue
925 * as soon a a complete request arrives.
926 */
927 svc_sock_setbufsize(svsk->sk_sock,
928 (serv->sv_nrthreads+3) * serv->sv_bufsz,
929 3 * serv->sv_bufsz);
930
931 clear_bit(SK_DATA, &svsk->sk_flags);
932
933 /* Receive data. If we haven't got the record length yet, get
934 * the next four bytes. Otherwise try to gobble up as much as
935 * possible up to the complete record length.
936 */
937 if (svsk->sk_tcplen < 4) {
938 unsigned long want = 4 - svsk->sk_tcplen;
939 struct kvec iov;
940
941 iov.iov_base = ((char *) &svsk->sk_reclen) + svsk->sk_tcplen;
942 iov.iov_len = want;
943 if ((len = svc_recvfrom(rqstp, &iov, 1, want)) < 0)
944 goto error;
945 svsk->sk_tcplen += len;
946
947 if (len < want) {
948 dprintk("svc: short recvfrom while reading record length (%d of %lu)\n",
949 len, want);
950 svc_sock_received(svsk);
951 return -EAGAIN; /* record header not complete */
952 }
953
954 svsk->sk_reclen = ntohl(svsk->sk_reclen);
955 if (!(svsk->sk_reclen & 0x80000000)) {
956 /* FIXME: technically, a record can be fragmented,
957 * and non-terminal fragments will not have the top
958 * bit set in the fragment length header.
959 * But apparently no known nfs clients send fragmented
960 * records. */
961 printk(KERN_NOTICE "RPC: bad TCP reclen 0x%08lx (non-terminal)\n",
962 (unsigned long) svsk->sk_reclen);
963 goto err_delete;
964 }
965 svsk->sk_reclen &= 0x7fffffff;
966 dprintk("svc: TCP record, %d bytes\n", svsk->sk_reclen);
967 if (svsk->sk_reclen > serv->sv_bufsz) {
968 printk(KERN_NOTICE "RPC: bad TCP reclen 0x%08lx (large)\n",
969 (unsigned long) svsk->sk_reclen);
970 goto err_delete;
971 }
972 }
973
974 /* Check whether enough data is available */
975 len = svc_recv_available(svsk);
976 if (len < 0)
977 goto error;
978
979 if (len < svsk->sk_reclen) {
980 dprintk("svc: incomplete TCP record (%d of %d)\n",
981 len, svsk->sk_reclen);
982 svc_sock_received(svsk);
983 return -EAGAIN; /* record not complete */
984 }
985 len = svsk->sk_reclen;
986 set_bit(SK_DATA, &svsk->sk_flags);
987
988 vec[0] = rqstp->rq_arg.head[0];
989 vlen = PAGE_SIZE;
990 pnum = 1;
991 while (vlen < len) {
992 vec[pnum].iov_base = page_address(rqstp->rq_argpages[rqstp->rq_argused++]);
993 vec[pnum].iov_len = PAGE_SIZE;
994 pnum++;
995 vlen += PAGE_SIZE;
996 }
997
998 /* Now receive data */
999 len = svc_recvfrom(rqstp, vec, pnum, len);
1000 if (len < 0)
1001 goto error;
1002
1003 dprintk("svc: TCP complete record (%d bytes)\n", len);
1004 rqstp->rq_arg.len = len;
1005 rqstp->rq_arg.page_base = 0;
1006 if (len <= rqstp->rq_arg.head[0].iov_len) {
1007 rqstp->rq_arg.head[0].iov_len = len;
1008 rqstp->rq_arg.page_len = 0;
1009 } else {
1010 rqstp->rq_arg.page_len = len - rqstp->rq_arg.head[0].iov_len;
1011 }
1012
1013 rqstp->rq_skbuff = NULL;
1014 rqstp->rq_prot = IPPROTO_TCP;
1015
1016 /* Reset TCP read info */
1017 svsk->sk_reclen = 0;
1018 svsk->sk_tcplen = 0;
1019
1020 svc_sock_received(svsk);
1021 if (serv->sv_stats)
1022 serv->sv_stats->nettcpcnt++;
1023
1024 return len;
1025
1026 err_delete:
1027 svc_delete_socket(svsk);
1028 return -EAGAIN;
1029
1030 error:
1031 if (len == -EAGAIN) {
1032 dprintk("RPC: TCP recvfrom got EAGAIN\n");
1033 svc_sock_received(svsk);
1034 } else {
1035 printk(KERN_NOTICE "%s: recvfrom returned errno %d\n",
1036 svsk->sk_server->sv_name, -len);
1037 svc_sock_received(svsk);
1038 }
1039
1040 return len;
1041}
1042
1043/*
1044 * Send out data on TCP socket.
1045 */
1046static int
1047svc_tcp_sendto(struct svc_rqst *rqstp)
1048{
1049 struct xdr_buf *xbufp = &rqstp->rq_res;
1050 int sent;
1051 u32 reclen;
1052
1053 /* Set up the first element of the reply kvec.
1054 * Any other kvecs that may be in use have been taken
1055 * care of by the server implementation itself.
1056 */
1057 reclen = htonl(0x80000000|((xbufp->len ) - 4));
1058 memcpy(xbufp->head[0].iov_base, &reclen, 4);
1059
1060 if (test_bit(SK_DEAD, &rqstp->rq_sock->sk_flags))
1061 return -ENOTCONN;
1062
1063 sent = svc_sendto(rqstp, &rqstp->rq_res);
1064 if (sent != xbufp->len) {
1065 printk(KERN_NOTICE "rpc-srv/tcp: %s: %s %d when sending %d bytes - shutting down socket\n",
1066 rqstp->rq_sock->sk_server->sv_name,
1067 (sent<0)?"got error":"sent only",
1068 sent, xbufp->len);
1069 svc_delete_socket(rqstp->rq_sock);
1070 sent = -EAGAIN;
1071 }
1072 return sent;
1073}
1074
1075static void
1076svc_tcp_init(struct svc_sock *svsk)
1077{
1078 struct sock *sk = svsk->sk_sk;
1079 struct tcp_sock *tp = tcp_sk(sk);
1080
1081 svsk->sk_recvfrom = svc_tcp_recvfrom;
1082 svsk->sk_sendto = svc_tcp_sendto;
1083
1084 if (sk->sk_state == TCP_LISTEN) {
1085 dprintk("setting up TCP socket for listening\n");
1086 sk->sk_data_ready = svc_tcp_listen_data_ready;
1087 set_bit(SK_CONN, &svsk->sk_flags);
1088 } else {
1089 dprintk("setting up TCP socket for reading\n");
1090 sk->sk_state_change = svc_tcp_state_change;
1091 sk->sk_data_ready = svc_tcp_data_ready;
1092 sk->sk_write_space = svc_write_space;
1093
1094 svsk->sk_reclen = 0;
1095 svsk->sk_tcplen = 0;
1096
1097 tp->nonagle = 1; /* disable Nagle's algorithm */
1098
1099 /* initialise setting must have enough space to
1100 * receive and respond to one request.
1101 * svc_tcp_recvfrom will re-adjust if necessary
1102 */
1103 svc_sock_setbufsize(svsk->sk_sock,
1104 3 * svsk->sk_server->sv_bufsz,
1105 3 * svsk->sk_server->sv_bufsz);
1106
1107 set_bit(SK_CHNGBUF, &svsk->sk_flags);
1108 set_bit(SK_DATA, &svsk->sk_flags);
1109 if (sk->sk_state != TCP_ESTABLISHED)
1110 set_bit(SK_CLOSE, &svsk->sk_flags);
1111 }
1112}
1113
1114void
1115svc_sock_update_bufs(struct svc_serv *serv)
1116{
1117 /*
1118 * The number of server threads has changed. Update
1119 * rcvbuf and sndbuf accordingly on all sockets
1120 */
1121 struct list_head *le;
1122
1123 spin_lock_bh(&serv->sv_lock);
1124 list_for_each(le, &serv->sv_permsocks) {
1125 struct svc_sock *svsk =
1126 list_entry(le, struct svc_sock, sk_list);
1127 set_bit(SK_CHNGBUF, &svsk->sk_flags);
1128 }
1129 list_for_each(le, &serv->sv_tempsocks) {
1130 struct svc_sock *svsk =
1131 list_entry(le, struct svc_sock, sk_list);
1132 set_bit(SK_CHNGBUF, &svsk->sk_flags);
1133 }
1134 spin_unlock_bh(&serv->sv_lock);
1135}
1136
1137/*
1138 * Receive the next request on any socket.
1139 */
1140int
1141svc_recv(struct svc_serv *serv, struct svc_rqst *rqstp, long timeout)
1142{
1143 struct svc_sock *svsk =NULL;
1144 int len;
1145 int pages;
1146 struct xdr_buf *arg;
1147 DECLARE_WAITQUEUE(wait, current);
1148
1149 dprintk("svc: server %p waiting for data (to = %ld)\n",
1150 rqstp, timeout);
1151
1152 if (rqstp->rq_sock)
1153 printk(KERN_ERR
1154 "svc_recv: service %p, socket not NULL!\n",
1155 rqstp);
1156 if (waitqueue_active(&rqstp->rq_wait))
1157 printk(KERN_ERR
1158 "svc_recv: service %p, wait queue active!\n",
1159 rqstp);
1160
1161 /* Initialize the buffers */
1162 /* first reclaim pages that were moved to response list */
1163 svc_pushback_allpages(rqstp);
1164
1165 /* now allocate needed pages. If we get a failure, sleep briefly */
1166 pages = 2 + (serv->sv_bufsz + PAGE_SIZE -1) / PAGE_SIZE;
1167 while (rqstp->rq_arghi < pages) {
1168 struct page *p = alloc_page(GFP_KERNEL);
1169 if (!p) {
1170 set_current_state(TASK_UNINTERRUPTIBLE);
1171 schedule_timeout(HZ/2);
1172 continue;
1173 }
1174 rqstp->rq_argpages[rqstp->rq_arghi++] = p;
1175 }
1176
1177 /* Make arg->head point to first page and arg->pages point to rest */
1178 arg = &rqstp->rq_arg;
1179 arg->head[0].iov_base = page_address(rqstp->rq_argpages[0]);
1180 arg->head[0].iov_len = PAGE_SIZE;
1181 rqstp->rq_argused = 1;
1182 arg->pages = rqstp->rq_argpages + 1;
1183 arg->page_base = 0;
1184 /* save at least one page for response */
1185 arg->page_len = (pages-2)*PAGE_SIZE;
1186 arg->len = (pages-1)*PAGE_SIZE;
1187 arg->tail[0].iov_len = 0;
1188
1189 try_to_freeze(PF_FREEZE);
1190 if (signalled())
1191 return -EINTR;
1192
1193 spin_lock_bh(&serv->sv_lock);
1194 if (!list_empty(&serv->sv_tempsocks)) {
1195 svsk = list_entry(serv->sv_tempsocks.next,
1196 struct svc_sock, sk_list);
1197 /* apparently the "standard" is that clients close
1198 * idle connections after 5 minutes, servers after
1199 * 6 minutes
1200 * http://www.connectathon.org/talks96/nfstcp.pdf
1201 */
1202 if (get_seconds() - svsk->sk_lastrecv < 6*60
1203 || test_bit(SK_BUSY, &svsk->sk_flags))
1204 svsk = NULL;
1205 }
1206 if (svsk) {
1207 set_bit(SK_BUSY, &svsk->sk_flags);
1208 set_bit(SK_CLOSE, &svsk->sk_flags);
1209 rqstp->rq_sock = svsk;
1210 svsk->sk_inuse++;
1211 } else if ((svsk = svc_sock_dequeue(serv)) != NULL) {
1212 rqstp->rq_sock = svsk;
1213 svsk->sk_inuse++;
1214 rqstp->rq_reserved = serv->sv_bufsz;
1215 svsk->sk_reserved += rqstp->rq_reserved;
1216 } else {
1217 /* No data pending. Go to sleep */
1218 svc_serv_enqueue(serv, rqstp);
1219
1220 /*
1221 * We have to be able to interrupt this wait
1222 * to bring down the daemons ...
1223 */
1224 set_current_state(TASK_INTERRUPTIBLE);
1225 add_wait_queue(&rqstp->rq_wait, &wait);
1226 spin_unlock_bh(&serv->sv_lock);
1227
1228 schedule_timeout(timeout);
1229
1230 try_to_freeze(PF_FREEZE);
1231
1232 spin_lock_bh(&serv->sv_lock);
1233 remove_wait_queue(&rqstp->rq_wait, &wait);
1234
1235 if (!(svsk = rqstp->rq_sock)) {
1236 svc_serv_dequeue(serv, rqstp);
1237 spin_unlock_bh(&serv->sv_lock);
1238 dprintk("svc: server %p, no data yet\n", rqstp);
1239 return signalled()? -EINTR : -EAGAIN;
1240 }
1241 }
1242 spin_unlock_bh(&serv->sv_lock);
1243
1244 dprintk("svc: server %p, socket %p, inuse=%d\n",
1245 rqstp, svsk, svsk->sk_inuse);
1246 len = svsk->sk_recvfrom(rqstp);
1247 dprintk("svc: got len=%d\n", len);
1248
1249 /* No data, incomplete (TCP) read, or accept() */
1250 if (len == 0 || len == -EAGAIN) {
1251 rqstp->rq_res.len = 0;
1252 svc_sock_release(rqstp);
1253 return -EAGAIN;
1254 }
1255 svsk->sk_lastrecv = get_seconds();
1256 if (test_bit(SK_TEMP, &svsk->sk_flags)) {
1257 /* push active sockets to end of list */
1258 spin_lock_bh(&serv->sv_lock);
1259 if (!list_empty(&svsk->sk_list))
1260 list_move_tail(&svsk->sk_list, &serv->sv_tempsocks);
1261 spin_unlock_bh(&serv->sv_lock);
1262 }
1263
1264 rqstp->rq_secure = ntohs(rqstp->rq_addr.sin_port) < 1024;
1265 rqstp->rq_chandle.defer = svc_defer;
1266
1267 if (serv->sv_stats)
1268 serv->sv_stats->netcnt++;
1269 return len;
1270}
1271
1272/*
1273 * Drop request
1274 */
1275void
1276svc_drop(struct svc_rqst *rqstp)
1277{
1278 dprintk("svc: socket %p dropped request\n", rqstp->rq_sock);
1279 svc_sock_release(rqstp);
1280}
1281
1282/*
1283 * Return reply to client.
1284 */
1285int
1286svc_send(struct svc_rqst *rqstp)
1287{
1288 struct svc_sock *svsk;
1289 int len;
1290 struct xdr_buf *xb;
1291
1292 if ((svsk = rqstp->rq_sock) == NULL) {
1293 printk(KERN_WARNING "NULL socket pointer in %s:%d\n",
1294 __FILE__, __LINE__);
1295 return -EFAULT;
1296 }
1297
1298 /* release the receive skb before sending the reply */
1299 svc_release_skb(rqstp);
1300
1301 /* calculate over-all length */
1302 xb = & rqstp->rq_res;
1303 xb->len = xb->head[0].iov_len +
1304 xb->page_len +
1305 xb->tail[0].iov_len;
1306
1307 /* Grab svsk->sk_sem to serialize outgoing data. */
1308 down(&svsk->sk_sem);
1309 if (test_bit(SK_DEAD, &svsk->sk_flags))
1310 len = -ENOTCONN;
1311 else
1312 len = svsk->sk_sendto(rqstp);
1313 up(&svsk->sk_sem);
1314 svc_sock_release(rqstp);
1315
1316 if (len == -ECONNREFUSED || len == -ENOTCONN || len == -EAGAIN)
1317 return 0;
1318 return len;
1319}
1320
1321/*
1322 * Initialize socket for RPC use and create svc_sock struct
1323 * XXX: May want to setsockopt SO_SNDBUF and SO_RCVBUF.
1324 */
1325static struct svc_sock *
1326svc_setup_socket(struct svc_serv *serv, struct socket *sock,
1327 int *errp, int pmap_register)
1328{
1329 struct svc_sock *svsk;
1330 struct sock *inet;
1331
1332 dprintk("svc: svc_setup_socket %p\n", sock);
1333 if (!(svsk = kmalloc(sizeof(*svsk), GFP_KERNEL))) {
1334 *errp = -ENOMEM;
1335 return NULL;
1336 }
1337 memset(svsk, 0, sizeof(*svsk));
1338
1339 inet = sock->sk;
1340
1341 /* Register socket with portmapper */
1342 if (*errp >= 0 && pmap_register)
1343 *errp = svc_register(serv, inet->sk_protocol,
1344 ntohs(inet_sk(inet)->sport));
1345
1346 if (*errp < 0) {
1347 kfree(svsk);
1348 return NULL;
1349 }
1350
1351 set_bit(SK_BUSY, &svsk->sk_flags);
1352 inet->sk_user_data = svsk;
1353 svsk->sk_sock = sock;
1354 svsk->sk_sk = inet;
1355 svsk->sk_ostate = inet->sk_state_change;
1356 svsk->sk_odata = inet->sk_data_ready;
1357 svsk->sk_owspace = inet->sk_write_space;
1358 svsk->sk_server = serv;
1359 svsk->sk_lastrecv = get_seconds();
1360 INIT_LIST_HEAD(&svsk->sk_deferred);
1361 INIT_LIST_HEAD(&svsk->sk_ready);
1362 sema_init(&svsk->sk_sem, 1);
1363
1364 /* Initialize the socket */
1365 if (sock->type == SOCK_DGRAM)
1366 svc_udp_init(svsk);
1367 else
1368 svc_tcp_init(svsk);
1369
1370 spin_lock_bh(&serv->sv_lock);
1371 if (!pmap_register) {
1372 set_bit(SK_TEMP, &svsk->sk_flags);
1373 list_add(&svsk->sk_list, &serv->sv_tempsocks);
1374 serv->sv_tmpcnt++;
1375 } else {
1376 clear_bit(SK_TEMP, &svsk->sk_flags);
1377 list_add(&svsk->sk_list, &serv->sv_permsocks);
1378 }
1379 spin_unlock_bh(&serv->sv_lock);
1380
1381 dprintk("svc: svc_setup_socket created %p (inet %p)\n",
1382 svsk, svsk->sk_sk);
1383
1384 clear_bit(SK_BUSY, &svsk->sk_flags);
1385 svc_sock_enqueue(svsk);
1386 return svsk;
1387}
1388
1389/*
1390 * Create socket for RPC service.
1391 */
1392static int
1393svc_create_socket(struct svc_serv *serv, int protocol, struct sockaddr_in *sin)
1394{
1395 struct svc_sock *svsk;
1396 struct socket *sock;
1397 int error;
1398 int type;
1399
1400 dprintk("svc: svc_create_socket(%s, %d, %u.%u.%u.%u:%d)\n",
1401 serv->sv_program->pg_name, protocol,
1402 NIPQUAD(sin->sin_addr.s_addr),
1403 ntohs(sin->sin_port));
1404
1405 if (protocol != IPPROTO_UDP && protocol != IPPROTO_TCP) {
1406 printk(KERN_WARNING "svc: only UDP and TCP "
1407 "sockets supported\n");
1408 return -EINVAL;
1409 }
1410 type = (protocol == IPPROTO_UDP)? SOCK_DGRAM : SOCK_STREAM;
1411
1412 if ((error = sock_create_kern(PF_INET, type, protocol, &sock)) < 0)
1413 return error;
1414
1415 if (sin != NULL) {
1416 if (type == SOCK_STREAM)
1417 sock->sk->sk_reuse = 1; /* allow address reuse */
1418 error = sock->ops->bind(sock, (struct sockaddr *) sin,
1419 sizeof(*sin));
1420 if (error < 0)
1421 goto bummer;
1422 }
1423
1424 if (protocol == IPPROTO_TCP) {
1425 if ((error = sock->ops->listen(sock, 64)) < 0)
1426 goto bummer;
1427 }
1428
1429 if ((svsk = svc_setup_socket(serv, sock, &error, 1)) != NULL)
1430 return 0;
1431
1432bummer:
1433 dprintk("svc: svc_create_socket error = %d\n", -error);
1434 sock_release(sock);
1435 return error;
1436}
1437
1438/*
1439 * Remove a dead socket
1440 */
1441void
1442svc_delete_socket(struct svc_sock *svsk)
1443{
1444 struct svc_serv *serv;
1445 struct sock *sk;
1446
1447 dprintk("svc: svc_delete_socket(%p)\n", svsk);
1448
1449 serv = svsk->sk_server;
1450 sk = svsk->sk_sk;
1451
1452 sk->sk_state_change = svsk->sk_ostate;
1453 sk->sk_data_ready = svsk->sk_odata;
1454 sk->sk_write_space = svsk->sk_owspace;
1455
1456 spin_lock_bh(&serv->sv_lock);
1457
1458 list_del_init(&svsk->sk_list);
1459 list_del_init(&svsk->sk_ready);
1460 if (!test_and_set_bit(SK_DEAD, &svsk->sk_flags))
1461 if (test_bit(SK_TEMP, &svsk->sk_flags))
1462 serv->sv_tmpcnt--;
1463
1464 if (!svsk->sk_inuse) {
1465 spin_unlock_bh(&serv->sv_lock);
1466 sock_release(svsk->sk_sock);
1467 kfree(svsk);
1468 } else {
1469 spin_unlock_bh(&serv->sv_lock);
1470 dprintk(KERN_NOTICE "svc: server socket destroy delayed\n");
1471 /* svsk->sk_server = NULL; */
1472 }
1473}
1474
1475/*
1476 * Make a socket for nfsd and lockd
1477 */
1478int
1479svc_makesock(struct svc_serv *serv, int protocol, unsigned short port)
1480{
1481 struct sockaddr_in sin;
1482
1483 dprintk("svc: creating socket proto = %d\n", protocol);
1484 sin.sin_family = AF_INET;
1485 sin.sin_addr.s_addr = INADDR_ANY;
1486 sin.sin_port = htons(port);
1487 return svc_create_socket(serv, protocol, &sin);
1488}
1489
1490/*
1491 * Handle defer and revisit of requests
1492 */
1493
1494static void svc_revisit(struct cache_deferred_req *dreq, int too_many)
1495{
1496 struct svc_deferred_req *dr = container_of(dreq, struct svc_deferred_req, handle);
1497 struct svc_serv *serv = dreq->owner;
1498 struct svc_sock *svsk;
1499
1500 if (too_many) {
1501 svc_sock_put(dr->svsk);
1502 kfree(dr);
1503 return;
1504 }
1505 dprintk("revisit queued\n");
1506 svsk = dr->svsk;
1507 dr->svsk = NULL;
1508 spin_lock_bh(&serv->sv_lock);
1509 list_add(&dr->handle.recent, &svsk->sk_deferred);
1510 spin_unlock_bh(&serv->sv_lock);
1511 set_bit(SK_DEFERRED, &svsk->sk_flags);
1512 svc_sock_enqueue(svsk);
1513 svc_sock_put(svsk);
1514}
1515
1516static struct cache_deferred_req *
1517svc_defer(struct cache_req *req)
1518{
1519 struct svc_rqst *rqstp = container_of(req, struct svc_rqst, rq_chandle);
1520 int size = sizeof(struct svc_deferred_req) + (rqstp->rq_arg.len);
1521 struct svc_deferred_req *dr;
1522
1523 if (rqstp->rq_arg.page_len)
1524 return NULL; /* if more than a page, give up FIXME */
1525 if (rqstp->rq_deferred) {
1526 dr = rqstp->rq_deferred;
1527 rqstp->rq_deferred = NULL;
1528 } else {
1529 int skip = rqstp->rq_arg.len - rqstp->rq_arg.head[0].iov_len;
1530 /* FIXME maybe discard if size too large */
1531 dr = kmalloc(size, GFP_KERNEL);
1532 if (dr == NULL)
1533 return NULL;
1534
1535 dr->handle.owner = rqstp->rq_server;
1536 dr->prot = rqstp->rq_prot;
1537 dr->addr = rqstp->rq_addr;
1538 dr->argslen = rqstp->rq_arg.len >> 2;
1539 memcpy(dr->args, rqstp->rq_arg.head[0].iov_base-skip, dr->argslen<<2);
1540 }
1541 spin_lock_bh(&rqstp->rq_server->sv_lock);
1542 rqstp->rq_sock->sk_inuse++;
1543 dr->svsk = rqstp->rq_sock;
1544 spin_unlock_bh(&rqstp->rq_server->sv_lock);
1545
1546 dr->handle.revisit = svc_revisit;
1547 return &dr->handle;
1548}
1549
1550/*
1551 * recv data from a deferred request into an active one
1552 */
1553static int svc_deferred_recv(struct svc_rqst *rqstp)
1554{
1555 struct svc_deferred_req *dr = rqstp->rq_deferred;
1556
1557 rqstp->rq_arg.head[0].iov_base = dr->args;
1558 rqstp->rq_arg.head[0].iov_len = dr->argslen<<2;
1559 rqstp->rq_arg.page_len = 0;
1560 rqstp->rq_arg.len = dr->argslen<<2;
1561 rqstp->rq_prot = dr->prot;
1562 rqstp->rq_addr = dr->addr;
1563 return dr->argslen<<2;
1564}
1565
1566
1567static struct svc_deferred_req *svc_deferred_dequeue(struct svc_sock *svsk)
1568{
1569 struct svc_deferred_req *dr = NULL;
1570 struct svc_serv *serv = svsk->sk_server;
1571
1572 if (!test_bit(SK_DEFERRED, &svsk->sk_flags))
1573 return NULL;
1574 spin_lock_bh(&serv->sv_lock);
1575 clear_bit(SK_DEFERRED, &svsk->sk_flags);
1576 if (!list_empty(&svsk->sk_deferred)) {
1577 dr = list_entry(svsk->sk_deferred.next,
1578 struct svc_deferred_req,
1579 handle.recent);
1580 list_del_init(&dr->handle.recent);
1581 set_bit(SK_DEFERRED, &svsk->sk_flags);
1582 }
1583 spin_unlock_bh(&serv->sv_lock);
1584 return dr;
1585}
diff --git a/net/sunrpc/sysctl.c b/net/sunrpc/sysctl.c
new file mode 100644
index 000000000000..1b9616a12e24
--- /dev/null
+++ b/net/sunrpc/sysctl.c
@@ -0,0 +1,193 @@
1/*
2 * linux/net/sunrpc/sysctl.c
3 *
4 * Sysctl interface to sunrpc module.
5 *
6 * I would prefer to register the sunrpc table below sys/net, but that's
7 * impossible at the moment.
8 */
9
10#include <linux/config.h>
11#include <linux/types.h>
12#include <linux/linkage.h>
13#include <linux/ctype.h>
14#include <linux/fs.h>
15#include <linux/sysctl.h>
16#include <linux/module.h>
17
18#include <asm/uaccess.h>
19#include <linux/sunrpc/types.h>
20#include <linux/sunrpc/sched.h>
21#include <linux/sunrpc/stats.h>
22#include <linux/sunrpc/xprt.h>
23
24/*
25 * Declare the debug flags here
26 */
27unsigned int rpc_debug;
28unsigned int nfs_debug;
29unsigned int nfsd_debug;
30unsigned int nlm_debug;
31
32#ifdef RPC_DEBUG
33
34static struct ctl_table_header *sunrpc_table_header;
35static ctl_table sunrpc_table[];
36
37void
38rpc_register_sysctl(void)
39{
40 if (!sunrpc_table_header) {
41 sunrpc_table_header = register_sysctl_table(sunrpc_table, 1);
42#ifdef CONFIG_PROC_FS
43 if (sunrpc_table[0].de)
44 sunrpc_table[0].de->owner = THIS_MODULE;
45#endif
46 }
47
48}
49
50void
51rpc_unregister_sysctl(void)
52{
53 if (sunrpc_table_header) {
54 unregister_sysctl_table(sunrpc_table_header);
55 sunrpc_table_header = NULL;
56 }
57}
58
59static int
60proc_dodebug(ctl_table *table, int write, struct file *file,
61 void __user *buffer, size_t *lenp, loff_t *ppos)
62{
63 char tmpbuf[20], c, *s;
64 char __user *p;
65 unsigned int value;
66 size_t left, len;
67
68 if ((*ppos && !write) || !*lenp) {
69 *lenp = 0;
70 return 0;
71 }
72
73 left = *lenp;
74
75 if (write) {
76 if (!access_ok(VERIFY_READ, buffer, left))
77 return -EFAULT;
78 p = buffer;
79 while (left && __get_user(c, p) >= 0 && isspace(c))
80 left--, p++;
81 if (!left)
82 goto done;
83
84 if (left > sizeof(tmpbuf) - 1)
85 return -EINVAL;
86 if (copy_from_user(tmpbuf, p, left))
87 return -EFAULT;
88 tmpbuf[left] = '\0';
89
90 for (s = tmpbuf, value = 0; '0' <= *s && *s <= '9'; s++, left--)
91 value = 10 * value + (*s - '0');
92 if (*s && !isspace(*s))
93 return -EINVAL;
94 while (left && isspace(*s))
95 left--, s++;
96 *(unsigned int *) table->data = value;
97 /* Display the RPC tasks on writing to rpc_debug */
98 if (table->ctl_name == CTL_RPCDEBUG) {
99 rpc_show_tasks();
100 }
101 } else {
102 if (!access_ok(VERIFY_WRITE, buffer, left))
103 return -EFAULT;
104 len = sprintf(tmpbuf, "%d", *(unsigned int *) table->data);
105 if (len > left)
106 len = left;
107 if (__copy_to_user(buffer, tmpbuf, len))
108 return -EFAULT;
109 if ((left -= len) > 0) {
110 if (put_user('\n', (char __user *)buffer + len))
111 return -EFAULT;
112 left--;
113 }
114 }
115
116done:
117 *lenp -= left;
118 *ppos += *lenp;
119 return 0;
120}
121
122static unsigned int min_slot_table_size = RPC_MIN_SLOT_TABLE;
123static unsigned int max_slot_table_size = RPC_MAX_SLOT_TABLE;
124
125static ctl_table debug_table[] = {
126 {
127 .ctl_name = CTL_RPCDEBUG,
128 .procname = "rpc_debug",
129 .data = &rpc_debug,
130 .maxlen = sizeof(int),
131 .mode = 0644,
132 .proc_handler = &proc_dodebug
133 },
134 {
135 .ctl_name = CTL_NFSDEBUG,
136 .procname = "nfs_debug",
137 .data = &nfs_debug,
138 .maxlen = sizeof(int),
139 .mode = 0644,
140 .proc_handler = &proc_dodebug
141 },
142 {
143 .ctl_name = CTL_NFSDDEBUG,
144 .procname = "nfsd_debug",
145 .data = &nfsd_debug,
146 .maxlen = sizeof(int),
147 .mode = 0644,
148 .proc_handler = &proc_dodebug
149 },
150 {
151 .ctl_name = CTL_NLMDEBUG,
152 .procname = "nlm_debug",
153 .data = &nlm_debug,
154 .maxlen = sizeof(int),
155 .mode = 0644,
156 .proc_handler = &proc_dodebug
157 },
158 {
159 .ctl_name = CTL_SLOTTABLE_UDP,
160 .procname = "udp_slot_table_entries",
161 .data = &xprt_udp_slot_table_entries,
162 .maxlen = sizeof(unsigned int),
163 .mode = 0644,
164 .proc_handler = &proc_dointvec_minmax,
165 .strategy = &sysctl_intvec,
166 .extra1 = &min_slot_table_size,
167 .extra2 = &max_slot_table_size
168 },
169 {
170 .ctl_name = CTL_SLOTTABLE_TCP,
171 .procname = "tcp_slot_table_entries",
172 .data = &xprt_tcp_slot_table_entries,
173 .maxlen = sizeof(unsigned int),
174 .mode = 0644,
175 .proc_handler = &proc_dointvec_minmax,
176 .strategy = &sysctl_intvec,
177 .extra1 = &min_slot_table_size,
178 .extra2 = &max_slot_table_size
179 },
180 { .ctl_name = 0 }
181};
182
183static ctl_table sunrpc_table[] = {
184 {
185 .ctl_name = CTL_SUNRPC,
186 .procname = "sunrpc",
187 .mode = 0555,
188 .child = debug_table
189 },
190 { .ctl_name = 0 }
191};
192
193#endif
diff --git a/net/sunrpc/timer.c b/net/sunrpc/timer.c
new file mode 100644
index 000000000000..bcbdf6430d5c
--- /dev/null
+++ b/net/sunrpc/timer.c
@@ -0,0 +1,107 @@
1/*
2 * linux/net/sunrpc/timer.c
3 *
4 * Estimate RPC request round trip time.
5 *
6 * Based on packet round-trip and variance estimator algorithms described
7 * in appendix A of "Congestion Avoidance and Control" by Van Jacobson
8 * and Michael J. Karels (ACM Computer Communication Review; Proceedings
9 * of the Sigcomm '88 Symposium in Stanford, CA, August, 1988).
10 *
11 * This RTT estimator is used only for RPC over datagram protocols.
12 *
13 * Copyright (C) 2002 Trond Myklebust <trond.myklebust@fys.uio.no>
14 */
15
16#include <asm/param.h>
17
18#include <linux/types.h>
19#include <linux/unistd.h>
20
21#include <linux/sunrpc/clnt.h>
22#include <linux/sunrpc/xprt.h>
23#include <linux/sunrpc/timer.h>
24
25#define RPC_RTO_MAX (60*HZ)
26#define RPC_RTO_INIT (HZ/5)
27#define RPC_RTO_MIN (HZ/10)
28
29void
30rpc_init_rtt(struct rpc_rtt *rt, unsigned long timeo)
31{
32 unsigned long init = 0;
33 unsigned i;
34
35 rt->timeo = timeo;
36
37 if (timeo > RPC_RTO_INIT)
38 init = (timeo - RPC_RTO_INIT) << 3;
39 for (i = 0; i < 5; i++) {
40 rt->srtt[i] = init;
41 rt->sdrtt[i] = RPC_RTO_INIT;
42 rt->ntimeouts[i] = 0;
43 }
44}
45
46/*
47 * NB: When computing the smoothed RTT and standard deviation,
48 * be careful not to produce negative intermediate results.
49 */
50void
51rpc_update_rtt(struct rpc_rtt *rt, unsigned timer, long m)
52{
53 long *srtt, *sdrtt;
54
55 if (timer-- == 0)
56 return;
57
58 /* jiffies wrapped; ignore this one */
59 if (m < 0)
60 return;
61
62 if (m == 0)
63 m = 1L;
64
65 srtt = (long *)&rt->srtt[timer];
66 m -= *srtt >> 3;
67 *srtt += m;
68
69 if (m < 0)
70 m = -m;
71
72 sdrtt = (long *)&rt->sdrtt[timer];
73 m -= *sdrtt >> 2;
74 *sdrtt += m;
75
76 /* Set lower bound on the variance */
77 if (*sdrtt < RPC_RTO_MIN)
78 *sdrtt = RPC_RTO_MIN;
79}
80
81/*
82 * Estimate rto for an nfs rpc sent via. an unreliable datagram.
83 * Use the mean and mean deviation of rtt for the appropriate type of rpc
84 * for the frequent rpcs and a default for the others.
85 * The justification for doing "other" this way is that these rpcs
86 * happen so infrequently that timer est. would probably be stale.
87 * Also, since many of these rpcs are
88 * non-idempotent, a conservative timeout is desired.
89 * getattr, lookup,
90 * read, write, commit - A+4D
91 * other - timeo
92 */
93
94unsigned long
95rpc_calc_rto(struct rpc_rtt *rt, unsigned timer)
96{
97 unsigned long res;
98
99 if (timer-- == 0)
100 return rt->timeo;
101
102 res = ((rt->srtt[timer] + 7) >> 3) + rt->sdrtt[timer];
103 if (res > RPC_RTO_MAX)
104 res = RPC_RTO_MAX;
105
106 return res;
107}
diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c
new file mode 100644
index 000000000000..4484931018eb
--- /dev/null
+++ b/net/sunrpc/xdr.c
@@ -0,0 +1,917 @@
1/*
2 * linux/net/sunrpc/xdr.c
3 *
4 * Generic XDR support.
5 *
6 * Copyright (C) 1995, 1996 Olaf Kirch <okir@monad.swb.de>
7 */
8
9#include <linux/types.h>
10#include <linux/socket.h>
11#include <linux/string.h>
12#include <linux/kernel.h>
13#include <linux/pagemap.h>
14#include <linux/errno.h>
15#include <linux/in.h>
16#include <linux/net.h>
17#include <net/sock.h>
18#include <linux/sunrpc/xdr.h>
19#include <linux/sunrpc/msg_prot.h>
20
21/*
22 * XDR functions for basic NFS types
23 */
24u32 *
25xdr_encode_netobj(u32 *p, const struct xdr_netobj *obj)
26{
27 unsigned int quadlen = XDR_QUADLEN(obj->len);
28
29 p[quadlen] = 0; /* zero trailing bytes */
30 *p++ = htonl(obj->len);
31 memcpy(p, obj->data, obj->len);
32 return p + XDR_QUADLEN(obj->len);
33}
34
35u32 *
36xdr_decode_netobj(u32 *p, struct xdr_netobj *obj)
37{
38 unsigned int len;
39
40 if ((len = ntohl(*p++)) > XDR_MAX_NETOBJ)
41 return NULL;
42 obj->len = len;
43 obj->data = (u8 *) p;
44 return p + XDR_QUADLEN(len);
45}
46
47/**
48 * xdr_encode_opaque_fixed - Encode fixed length opaque data
49 * @p - pointer to current position in XDR buffer.
50 * @ptr - pointer to data to encode (or NULL)
51 * @nbytes - size of data.
52 *
53 * Copy the array of data of length nbytes at ptr to the XDR buffer
54 * at position p, then align to the next 32-bit boundary by padding
55 * with zero bytes (see RFC1832).
56 * Note: if ptr is NULL, only the padding is performed.
57 *
58 * Returns the updated current XDR buffer position
59 *
60 */
61u32 *xdr_encode_opaque_fixed(u32 *p, const void *ptr, unsigned int nbytes)
62{
63 if (likely(nbytes != 0)) {
64 unsigned int quadlen = XDR_QUADLEN(nbytes);
65 unsigned int padding = (quadlen << 2) - nbytes;
66
67 if (ptr != NULL)
68 memcpy(p, ptr, nbytes);
69 if (padding != 0)
70 memset((char *)p + nbytes, 0, padding);
71 p += quadlen;
72 }
73 return p;
74}
75EXPORT_SYMBOL(xdr_encode_opaque_fixed);
76
77/**
78 * xdr_encode_opaque - Encode variable length opaque data
79 * @p - pointer to current position in XDR buffer.
80 * @ptr - pointer to data to encode (or NULL)
81 * @nbytes - size of data.
82 *
83 * Returns the updated current XDR buffer position
84 */
85u32 *xdr_encode_opaque(u32 *p, const void *ptr, unsigned int nbytes)
86{
87 *p++ = htonl(nbytes);
88 return xdr_encode_opaque_fixed(p, ptr, nbytes);
89}
90EXPORT_SYMBOL(xdr_encode_opaque);
91
92u32 *
93xdr_encode_string(u32 *p, const char *string)
94{
95 return xdr_encode_array(p, string, strlen(string));
96}
97
98u32 *
99xdr_decode_string(u32 *p, char **sp, int *lenp, int maxlen)
100{
101 unsigned int len;
102 char *string;
103
104 if ((len = ntohl(*p++)) > maxlen)
105 return NULL;
106 if (lenp)
107 *lenp = len;
108 if ((len % 4) != 0) {
109 string = (char *) p;
110 } else {
111 string = (char *) (p - 1);
112 memmove(string, p, len);
113 }
114 string[len] = '\0';
115 *sp = string;
116 return p + XDR_QUADLEN(len);
117}
118
119u32 *
120xdr_decode_string_inplace(u32 *p, char **sp, int *lenp, int maxlen)
121{
122 unsigned int len;
123
124 if ((len = ntohl(*p++)) > maxlen)
125 return NULL;
126 *lenp = len;
127 *sp = (char *) p;
128 return p + XDR_QUADLEN(len);
129}
130
131void
132xdr_encode_pages(struct xdr_buf *xdr, struct page **pages, unsigned int base,
133 unsigned int len)
134{
135 struct kvec *tail = xdr->tail;
136 u32 *p;
137
138 xdr->pages = pages;
139 xdr->page_base = base;
140 xdr->page_len = len;
141
142 p = (u32 *)xdr->head[0].iov_base + XDR_QUADLEN(xdr->head[0].iov_len);
143 tail->iov_base = p;
144 tail->iov_len = 0;
145
146 if (len & 3) {
147 unsigned int pad = 4 - (len & 3);
148
149 *p = 0;
150 tail->iov_base = (char *)p + (len & 3);
151 tail->iov_len = pad;
152 len += pad;
153 }
154 xdr->buflen += len;
155 xdr->len += len;
156}
157
158void
159xdr_inline_pages(struct xdr_buf *xdr, unsigned int offset,
160 struct page **pages, unsigned int base, unsigned int len)
161{
162 struct kvec *head = xdr->head;
163 struct kvec *tail = xdr->tail;
164 char *buf = (char *)head->iov_base;
165 unsigned int buflen = head->iov_len;
166
167 head->iov_len = offset;
168
169 xdr->pages = pages;
170 xdr->page_base = base;
171 xdr->page_len = len;
172
173 tail->iov_base = buf + offset;
174 tail->iov_len = buflen - offset;
175
176 xdr->buflen += len;
177}
178
179void
180xdr_partial_copy_from_skb(struct xdr_buf *xdr, unsigned int base,
181 skb_reader_t *desc,
182 skb_read_actor_t copy_actor)
183{
184 struct page **ppage = xdr->pages;
185 unsigned int len, pglen = xdr->page_len;
186 int ret;
187
188 len = xdr->head[0].iov_len;
189 if (base < len) {
190 len -= base;
191 ret = copy_actor(desc, (char *)xdr->head[0].iov_base + base, len);
192 if (ret != len || !desc->count)
193 return;
194 base = 0;
195 } else
196 base -= len;
197
198 if (pglen == 0)
199 goto copy_tail;
200 if (base >= pglen) {
201 base -= pglen;
202 goto copy_tail;
203 }
204 if (base || xdr->page_base) {
205 pglen -= base;
206 base += xdr->page_base;
207 ppage += base >> PAGE_CACHE_SHIFT;
208 base &= ~PAGE_CACHE_MASK;
209 }
210 do {
211 char *kaddr;
212
213 len = PAGE_CACHE_SIZE;
214 kaddr = kmap_atomic(*ppage, KM_SKB_SUNRPC_DATA);
215 if (base) {
216 len -= base;
217 if (pglen < len)
218 len = pglen;
219 ret = copy_actor(desc, kaddr + base, len);
220 base = 0;
221 } else {
222 if (pglen < len)
223 len = pglen;
224 ret = copy_actor(desc, kaddr, len);
225 }
226 flush_dcache_page(*ppage);
227 kunmap_atomic(kaddr, KM_SKB_SUNRPC_DATA);
228 if (ret != len || !desc->count)
229 return;
230 ppage++;
231 } while ((pglen -= len) != 0);
232copy_tail:
233 len = xdr->tail[0].iov_len;
234 if (base < len)
235 copy_actor(desc, (char *)xdr->tail[0].iov_base + base, len - base);
236}
237
238
239int
240xdr_sendpages(struct socket *sock, struct sockaddr *addr, int addrlen,
241 struct xdr_buf *xdr, unsigned int base, int msgflags)
242{
243 struct page **ppage = xdr->pages;
244 unsigned int len, pglen = xdr->page_len;
245 int err, ret = 0;
246 ssize_t (*sendpage)(struct socket *, struct page *, int, size_t, int);
247
248 len = xdr->head[0].iov_len;
249 if (base < len || (addr != NULL && base == 0)) {
250 struct kvec iov = {
251 .iov_base = xdr->head[0].iov_base + base,
252 .iov_len = len - base,
253 };
254 struct msghdr msg = {
255 .msg_name = addr,
256 .msg_namelen = addrlen,
257 .msg_flags = msgflags,
258 };
259 if (xdr->len > len)
260 msg.msg_flags |= MSG_MORE;
261
262 if (iov.iov_len != 0)
263 err = kernel_sendmsg(sock, &msg, &iov, 1, iov.iov_len);
264 else
265 err = kernel_sendmsg(sock, &msg, NULL, 0, 0);
266 if (ret == 0)
267 ret = err;
268 else if (err > 0)
269 ret += err;
270 if (err != iov.iov_len)
271 goto out;
272 base = 0;
273 } else
274 base -= len;
275
276 if (pglen == 0)
277 goto copy_tail;
278 if (base >= pglen) {
279 base -= pglen;
280 goto copy_tail;
281 }
282 if (base || xdr->page_base) {
283 pglen -= base;
284 base += xdr->page_base;
285 ppage += base >> PAGE_CACHE_SHIFT;
286 base &= ~PAGE_CACHE_MASK;
287 }
288
289 sendpage = sock->ops->sendpage ? : sock_no_sendpage;
290 do {
291 int flags = msgflags;
292
293 len = PAGE_CACHE_SIZE;
294 if (base)
295 len -= base;
296 if (pglen < len)
297 len = pglen;
298
299 if (pglen != len || xdr->tail[0].iov_len != 0)
300 flags |= MSG_MORE;
301
302 /* Hmm... We might be dealing with highmem pages */
303 if (PageHighMem(*ppage))
304 sendpage = sock_no_sendpage;
305 err = sendpage(sock, *ppage, base, len, flags);
306 if (ret == 0)
307 ret = err;
308 else if (err > 0)
309 ret += err;
310 if (err != len)
311 goto out;
312 base = 0;
313 ppage++;
314 } while ((pglen -= len) != 0);
315copy_tail:
316 len = xdr->tail[0].iov_len;
317 if (base < len) {
318 struct kvec iov = {
319 .iov_base = xdr->tail[0].iov_base + base,
320 .iov_len = len - base,
321 };
322 struct msghdr msg = {
323 .msg_flags = msgflags,
324 };
325 err = kernel_sendmsg(sock, &msg, &iov, 1, iov.iov_len);
326 if (ret == 0)
327 ret = err;
328 else if (err > 0)
329 ret += err;
330 }
331out:
332 return ret;
333}
334
335
336/*
337 * Helper routines for doing 'memmove' like operations on a struct xdr_buf
338 *
339 * _shift_data_right_pages
340 * @pages: vector of pages containing both the source and dest memory area.
341 * @pgto_base: page vector address of destination
342 * @pgfrom_base: page vector address of source
343 * @len: number of bytes to copy
344 *
345 * Note: the addresses pgto_base and pgfrom_base are both calculated in
346 * the same way:
347 * if a memory area starts at byte 'base' in page 'pages[i]',
348 * then its address is given as (i << PAGE_CACHE_SHIFT) + base
349 * Also note: pgfrom_base must be < pgto_base, but the memory areas
350 * they point to may overlap.
351 */
352static void
353_shift_data_right_pages(struct page **pages, size_t pgto_base,
354 size_t pgfrom_base, size_t len)
355{
356 struct page **pgfrom, **pgto;
357 char *vfrom, *vto;
358 size_t copy;
359
360 BUG_ON(pgto_base <= pgfrom_base);
361
362 pgto_base += len;
363 pgfrom_base += len;
364
365 pgto = pages + (pgto_base >> PAGE_CACHE_SHIFT);
366 pgfrom = pages + (pgfrom_base >> PAGE_CACHE_SHIFT);
367
368 pgto_base &= ~PAGE_CACHE_MASK;
369 pgfrom_base &= ~PAGE_CACHE_MASK;
370
371 do {
372 /* Are any pointers crossing a page boundary? */
373 if (pgto_base == 0) {
374 flush_dcache_page(*pgto);
375 pgto_base = PAGE_CACHE_SIZE;
376 pgto--;
377 }
378 if (pgfrom_base == 0) {
379 pgfrom_base = PAGE_CACHE_SIZE;
380 pgfrom--;
381 }
382
383 copy = len;
384 if (copy > pgto_base)
385 copy = pgto_base;
386 if (copy > pgfrom_base)
387 copy = pgfrom_base;
388 pgto_base -= copy;
389 pgfrom_base -= copy;
390
391 vto = kmap_atomic(*pgto, KM_USER0);
392 vfrom = kmap_atomic(*pgfrom, KM_USER1);
393 memmove(vto + pgto_base, vfrom + pgfrom_base, copy);
394 kunmap_atomic(vfrom, KM_USER1);
395 kunmap_atomic(vto, KM_USER0);
396
397 } while ((len -= copy) != 0);
398 flush_dcache_page(*pgto);
399}
400
401/*
402 * _copy_to_pages
403 * @pages: array of pages
404 * @pgbase: page vector address of destination
405 * @p: pointer to source data
406 * @len: length
407 *
408 * Copies data from an arbitrary memory location into an array of pages
409 * The copy is assumed to be non-overlapping.
410 */
411static void
412_copy_to_pages(struct page **pages, size_t pgbase, const char *p, size_t len)
413{
414 struct page **pgto;
415 char *vto;
416 size_t copy;
417
418 pgto = pages + (pgbase >> PAGE_CACHE_SHIFT);
419 pgbase &= ~PAGE_CACHE_MASK;
420
421 do {
422 copy = PAGE_CACHE_SIZE - pgbase;
423 if (copy > len)
424 copy = len;
425
426 vto = kmap_atomic(*pgto, KM_USER0);
427 memcpy(vto + pgbase, p, copy);
428 kunmap_atomic(vto, KM_USER0);
429
430 pgbase += copy;
431 if (pgbase == PAGE_CACHE_SIZE) {
432 flush_dcache_page(*pgto);
433 pgbase = 0;
434 pgto++;
435 }
436 p += copy;
437
438 } while ((len -= copy) != 0);
439 flush_dcache_page(*pgto);
440}
441
442/*
443 * _copy_from_pages
444 * @p: pointer to destination
445 * @pages: array of pages
446 * @pgbase: offset of source data
447 * @len: length
448 *
449 * Copies data into an arbitrary memory location from an array of pages
450 * The copy is assumed to be non-overlapping.
451 */
452static void
453_copy_from_pages(char *p, struct page **pages, size_t pgbase, size_t len)
454{
455 struct page **pgfrom;
456 char *vfrom;
457 size_t copy;
458
459 pgfrom = pages + (pgbase >> PAGE_CACHE_SHIFT);
460 pgbase &= ~PAGE_CACHE_MASK;
461
462 do {
463 copy = PAGE_CACHE_SIZE - pgbase;
464 if (copy > len)
465 copy = len;
466
467 vfrom = kmap_atomic(*pgfrom, KM_USER0);
468 memcpy(p, vfrom + pgbase, copy);
469 kunmap_atomic(vfrom, KM_USER0);
470
471 pgbase += copy;
472 if (pgbase == PAGE_CACHE_SIZE) {
473 pgbase = 0;
474 pgfrom++;
475 }
476 p += copy;
477
478 } while ((len -= copy) != 0);
479}
480
481/*
482 * xdr_shrink_bufhead
483 * @buf: xdr_buf
484 * @len: bytes to remove from buf->head[0]
485 *
486 * Shrinks XDR buffer's header kvec buf->head[0] by
487 * 'len' bytes. The extra data is not lost, but is instead
488 * moved into the inlined pages and/or the tail.
489 */
490static void
491xdr_shrink_bufhead(struct xdr_buf *buf, size_t len)
492{
493 struct kvec *head, *tail;
494 size_t copy, offs;
495 unsigned int pglen = buf->page_len;
496
497 tail = buf->tail;
498 head = buf->head;
499 BUG_ON (len > head->iov_len);
500
501 /* Shift the tail first */
502 if (tail->iov_len != 0) {
503 if (tail->iov_len > len) {
504 copy = tail->iov_len - len;
505 memmove((char *)tail->iov_base + len,
506 tail->iov_base, copy);
507 }
508 /* Copy from the inlined pages into the tail */
509 copy = len;
510 if (copy > pglen)
511 copy = pglen;
512 offs = len - copy;
513 if (offs >= tail->iov_len)
514 copy = 0;
515 else if (copy > tail->iov_len - offs)
516 copy = tail->iov_len - offs;
517 if (copy != 0)
518 _copy_from_pages((char *)tail->iov_base + offs,
519 buf->pages,
520 buf->page_base + pglen + offs - len,
521 copy);
522 /* Do we also need to copy data from the head into the tail ? */
523 if (len > pglen) {
524 offs = copy = len - pglen;
525 if (copy > tail->iov_len)
526 copy = tail->iov_len;
527 memcpy(tail->iov_base,
528 (char *)head->iov_base +
529 head->iov_len - offs,
530 copy);
531 }
532 }
533 /* Now handle pages */
534 if (pglen != 0) {
535 if (pglen > len)
536 _shift_data_right_pages(buf->pages,
537 buf->page_base + len,
538 buf->page_base,
539 pglen - len);
540 copy = len;
541 if (len > pglen)
542 copy = pglen;
543 _copy_to_pages(buf->pages, buf->page_base,
544 (char *)head->iov_base + head->iov_len - len,
545 copy);
546 }
547 head->iov_len -= len;
548 buf->buflen -= len;
549 /* Have we truncated the message? */
550 if (buf->len > buf->buflen)
551 buf->len = buf->buflen;
552}
553
554/*
555 * xdr_shrink_pagelen
556 * @buf: xdr_buf
557 * @len: bytes to remove from buf->pages
558 *
559 * Shrinks XDR buffer's page array buf->pages by
560 * 'len' bytes. The extra data is not lost, but is instead
561 * moved into the tail.
562 */
563static void
564xdr_shrink_pagelen(struct xdr_buf *buf, size_t len)
565{
566 struct kvec *tail;
567 size_t copy;
568 char *p;
569 unsigned int pglen = buf->page_len;
570
571 tail = buf->tail;
572 BUG_ON (len > pglen);
573
574 /* Shift the tail first */
575 if (tail->iov_len != 0) {
576 p = (char *)tail->iov_base + len;
577 if (tail->iov_len > len) {
578 copy = tail->iov_len - len;
579 memmove(p, tail->iov_base, copy);
580 } else
581 buf->buflen -= len;
582 /* Copy from the inlined pages into the tail */
583 copy = len;
584 if (copy > tail->iov_len)
585 copy = tail->iov_len;
586 _copy_from_pages((char *)tail->iov_base,
587 buf->pages, buf->page_base + pglen - len,
588 copy);
589 }
590 buf->page_len -= len;
591 buf->buflen -= len;
592 /* Have we truncated the message? */
593 if (buf->len > buf->buflen)
594 buf->len = buf->buflen;
595}
596
597void
598xdr_shift_buf(struct xdr_buf *buf, size_t len)
599{
600 xdr_shrink_bufhead(buf, len);
601}
602
603/**
604 * xdr_init_encode - Initialize a struct xdr_stream for sending data.
605 * @xdr: pointer to xdr_stream struct
606 * @buf: pointer to XDR buffer in which to encode data
607 * @p: current pointer inside XDR buffer
608 *
609 * Note: at the moment the RPC client only passes the length of our
610 * scratch buffer in the xdr_buf's header kvec. Previously this
611 * meant we needed to call xdr_adjust_iovec() after encoding the
612 * data. With the new scheme, the xdr_stream manages the details
613 * of the buffer length, and takes care of adjusting the kvec
614 * length for us.
615 */
616void xdr_init_encode(struct xdr_stream *xdr, struct xdr_buf *buf, uint32_t *p)
617{
618 struct kvec *iov = buf->head;
619
620 xdr->buf = buf;
621 xdr->iov = iov;
622 xdr->end = (uint32_t *)((char *)iov->iov_base + iov->iov_len);
623 buf->len = iov->iov_len = (char *)p - (char *)iov->iov_base;
624 xdr->p = p;
625}
626EXPORT_SYMBOL(xdr_init_encode);
627
628/**
629 * xdr_reserve_space - Reserve buffer space for sending
630 * @xdr: pointer to xdr_stream
631 * @nbytes: number of bytes to reserve
632 *
633 * Checks that we have enough buffer space to encode 'nbytes' more
634 * bytes of data. If so, update the total xdr_buf length, and
635 * adjust the length of the current kvec.
636 */
637uint32_t * xdr_reserve_space(struct xdr_stream *xdr, size_t nbytes)
638{
639 uint32_t *p = xdr->p;
640 uint32_t *q;
641
642 /* align nbytes on the next 32-bit boundary */
643 nbytes += 3;
644 nbytes &= ~3;
645 q = p + (nbytes >> 2);
646 if (unlikely(q > xdr->end || q < p))
647 return NULL;
648 xdr->p = q;
649 xdr->iov->iov_len += nbytes;
650 xdr->buf->len += nbytes;
651 return p;
652}
653EXPORT_SYMBOL(xdr_reserve_space);
654
655/**
656 * xdr_write_pages - Insert a list of pages into an XDR buffer for sending
657 * @xdr: pointer to xdr_stream
658 * @pages: list of pages
659 * @base: offset of first byte
660 * @len: length of data in bytes
661 *
662 */
663void xdr_write_pages(struct xdr_stream *xdr, struct page **pages, unsigned int base,
664 unsigned int len)
665{
666 struct xdr_buf *buf = xdr->buf;
667 struct kvec *iov = buf->tail;
668 buf->pages = pages;
669 buf->page_base = base;
670 buf->page_len = len;
671
672 iov->iov_base = (char *)xdr->p;
673 iov->iov_len = 0;
674 xdr->iov = iov;
675
676 if (len & 3) {
677 unsigned int pad = 4 - (len & 3);
678
679 BUG_ON(xdr->p >= xdr->end);
680 iov->iov_base = (char *)xdr->p + (len & 3);
681 iov->iov_len += pad;
682 len += pad;
683 *xdr->p++ = 0;
684 }
685 buf->buflen += len;
686 buf->len += len;
687}
688EXPORT_SYMBOL(xdr_write_pages);
689
690/**
691 * xdr_init_decode - Initialize an xdr_stream for decoding data.
692 * @xdr: pointer to xdr_stream struct
693 * @buf: pointer to XDR buffer from which to decode data
694 * @p: current pointer inside XDR buffer
695 */
696void xdr_init_decode(struct xdr_stream *xdr, struct xdr_buf *buf, uint32_t *p)
697{
698 struct kvec *iov = buf->head;
699 unsigned int len = iov->iov_len;
700
701 if (len > buf->len)
702 len = buf->len;
703 xdr->buf = buf;
704 xdr->iov = iov;
705 xdr->p = p;
706 xdr->end = (uint32_t *)((char *)iov->iov_base + len);
707}
708EXPORT_SYMBOL(xdr_init_decode);
709
710/**
711 * xdr_inline_decode - Retrieve non-page XDR data to decode
712 * @xdr: pointer to xdr_stream struct
713 * @nbytes: number of bytes of data to decode
714 *
715 * Check if the input buffer is long enough to enable us to decode
716 * 'nbytes' more bytes of data starting at the current position.
717 * If so return the current pointer, then update the current
718 * pointer position.
719 */
720uint32_t * xdr_inline_decode(struct xdr_stream *xdr, size_t nbytes)
721{
722 uint32_t *p = xdr->p;
723 uint32_t *q = p + XDR_QUADLEN(nbytes);
724
725 if (unlikely(q > xdr->end || q < p))
726 return NULL;
727 xdr->p = q;
728 return p;
729}
730EXPORT_SYMBOL(xdr_inline_decode);
731
732/**
733 * xdr_read_pages - Ensure page-based XDR data to decode is aligned at current pointer position
734 * @xdr: pointer to xdr_stream struct
735 * @len: number of bytes of page data
736 *
737 * Moves data beyond the current pointer position from the XDR head[] buffer
738 * into the page list. Any data that lies beyond current position + "len"
739 * bytes is moved into the XDR tail[]. The current pointer is then
740 * repositioned at the beginning of the XDR tail.
741 */
742void xdr_read_pages(struct xdr_stream *xdr, unsigned int len)
743{
744 struct xdr_buf *buf = xdr->buf;
745 struct kvec *iov;
746 ssize_t shift;
747 unsigned int end;
748 int padding;
749
750 /* Realign pages to current pointer position */
751 iov = buf->head;
752 shift = iov->iov_len + (char *)iov->iov_base - (char *)xdr->p;
753 if (shift > 0)
754 xdr_shrink_bufhead(buf, shift);
755
756 /* Truncate page data and move it into the tail */
757 if (buf->page_len > len)
758 xdr_shrink_pagelen(buf, buf->page_len - len);
759 padding = (XDR_QUADLEN(len) << 2) - len;
760 xdr->iov = iov = buf->tail;
761 /* Compute remaining message length. */
762 end = iov->iov_len;
763 shift = buf->buflen - buf->len;
764 if (shift < end)
765 end -= shift;
766 else if (shift > 0)
767 end = 0;
768 /*
769 * Position current pointer at beginning of tail, and
770 * set remaining message length.
771 */
772 xdr->p = (uint32_t *)((char *)iov->iov_base + padding);
773 xdr->end = (uint32_t *)((char *)iov->iov_base + end);
774}
775EXPORT_SYMBOL(xdr_read_pages);
776
777static struct kvec empty_iov = {.iov_base = NULL, .iov_len = 0};
778
779void
780xdr_buf_from_iov(struct kvec *iov, struct xdr_buf *buf)
781{
782 buf->head[0] = *iov;
783 buf->tail[0] = empty_iov;
784 buf->page_len = 0;
785 buf->buflen = buf->len = iov->iov_len;
786}
787
788/* Sets subiov to the intersection of iov with the buffer of length len
789 * starting base bytes after iov. Indicates empty intersection by setting
790 * length of subiov to zero. Decrements len by length of subiov, sets base
791 * to zero (or decrements it by length of iov if subiov is empty). */
792static void
793iov_subsegment(struct kvec *iov, struct kvec *subiov, int *base, int *len)
794{
795 if (*base > iov->iov_len) {
796 subiov->iov_base = NULL;
797 subiov->iov_len = 0;
798 *base -= iov->iov_len;
799 } else {
800 subiov->iov_base = iov->iov_base + *base;
801 subiov->iov_len = min(*len, (int)iov->iov_len - *base);
802 *base = 0;
803 }
804 *len -= subiov->iov_len;
805}
806
807/* Sets subbuf to the portion of buf of length len beginning base bytes
808 * from the start of buf. Returns -1 if base of length are out of bounds. */
809int
810xdr_buf_subsegment(struct xdr_buf *buf, struct xdr_buf *subbuf,
811 int base, int len)
812{
813 int i;
814
815 subbuf->buflen = subbuf->len = len;
816 iov_subsegment(buf->head, subbuf->head, &base, &len);
817
818 if (base < buf->page_len) {
819 i = (base + buf->page_base) >> PAGE_CACHE_SHIFT;
820 subbuf->pages = &buf->pages[i];
821 subbuf->page_base = (base + buf->page_base) & ~PAGE_CACHE_MASK;
822 subbuf->page_len = min((int)buf->page_len - base, len);
823 len -= subbuf->page_len;
824 base = 0;
825 } else {
826 base -= buf->page_len;
827 subbuf->page_len = 0;
828 }
829
830 iov_subsegment(buf->tail, subbuf->tail, &base, &len);
831 if (base || len)
832 return -1;
833 return 0;
834}
835
836/* obj is assumed to point to allocated memory of size at least len: */
837int
838read_bytes_from_xdr_buf(struct xdr_buf *buf, int base, void *obj, int len)
839{
840 struct xdr_buf subbuf;
841 int this_len;
842 int status;
843
844 status = xdr_buf_subsegment(buf, &subbuf, base, len);
845 if (status)
846 goto out;
847 this_len = min(len, (int)subbuf.head[0].iov_len);
848 memcpy(obj, subbuf.head[0].iov_base, this_len);
849 len -= this_len;
850 obj += this_len;
851 this_len = min(len, (int)subbuf.page_len);
852 if (this_len)
853 _copy_from_pages(obj, subbuf.pages, subbuf.page_base, this_len);
854 len -= this_len;
855 obj += this_len;
856 this_len = min(len, (int)subbuf.tail[0].iov_len);
857 memcpy(obj, subbuf.tail[0].iov_base, this_len);
858out:
859 return status;
860}
861
862static int
863read_u32_from_xdr_buf(struct xdr_buf *buf, int base, u32 *obj)
864{
865 u32 raw;
866 int status;
867
868 status = read_bytes_from_xdr_buf(buf, base, &raw, sizeof(*obj));
869 if (status)
870 return status;
871 *obj = ntohl(raw);
872 return 0;
873}
874
875/* If the netobj starting offset bytes from the start of xdr_buf is contained
876 * entirely in the head or the tail, set object to point to it; otherwise
877 * try to find space for it at the end of the tail, copy it there, and
878 * set obj to point to it. */
879int
880xdr_buf_read_netobj(struct xdr_buf *buf, struct xdr_netobj *obj, int offset)
881{
882 u32 tail_offset = buf->head[0].iov_len + buf->page_len;
883 u32 obj_end_offset;
884
885 if (read_u32_from_xdr_buf(buf, offset, &obj->len))
886 goto out;
887 obj_end_offset = offset + 4 + obj->len;
888
889 if (obj_end_offset <= buf->head[0].iov_len) {
890 /* The obj is contained entirely in the head: */
891 obj->data = buf->head[0].iov_base + offset + 4;
892 } else if (offset + 4 >= tail_offset) {
893 if (obj_end_offset - tail_offset
894 > buf->tail[0].iov_len)
895 goto out;
896 /* The obj is contained entirely in the tail: */
897 obj->data = buf->tail[0].iov_base
898 + offset - tail_offset + 4;
899 } else {
900 /* use end of tail as storage for obj:
901 * (We don't copy to the beginning because then we'd have
902 * to worry about doing a potentially overlapping copy.
903 * This assumes the object is at most half the length of the
904 * tail.) */
905 if (obj->len > buf->tail[0].iov_len)
906 goto out;
907 obj->data = buf->tail[0].iov_base + buf->tail[0].iov_len -
908 obj->len;
909 if (read_bytes_from_xdr_buf(buf, offset + 4,
910 obj->data, obj->len))
911 goto out;
912
913 }
914 return 0;
915out:
916 return -1;
917}
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
new file mode 100644
index 000000000000..c74a6bb94074
--- /dev/null
+++ b/net/sunrpc/xprt.c
@@ -0,0 +1,1678 @@
1/*
2 * linux/net/sunrpc/xprt.c
3 *
4 * This is a generic RPC call interface supporting congestion avoidance,
5 * and asynchronous calls.
6 *
7 * The interface works like this:
8 *
9 * - When a process places a call, it allocates a request slot if
10 * one is available. Otherwise, it sleeps on the backlog queue
11 * (xprt_reserve).
12 * - Next, the caller puts together the RPC message, stuffs it into
13 * the request struct, and calls xprt_call().
14 * - xprt_call transmits the message and installs the caller on the
15 * socket's wait list. At the same time, it installs a timer that
16 * is run after the packet's timeout has expired.
17 * - When a packet arrives, the data_ready handler walks the list of
18 * pending requests for that socket. If a matching XID is found, the
19 * caller is woken up, and the timer removed.
20 * - When no reply arrives within the timeout interval, the timer is
21 * fired by the kernel and runs xprt_timer(). It either adjusts the
22 * timeout values (minor timeout) or wakes up the caller with a status
23 * of -ETIMEDOUT.
24 * - When the caller receives a notification from RPC that a reply arrived,
25 * it should release the RPC slot, and process the reply.
26 * If the call timed out, it may choose to retry the operation by
27 * adjusting the initial timeout value, and simply calling rpc_call
28 * again.
29 *
30 * Support for async RPC is done through a set of RPC-specific scheduling
31 * primitives that `transparently' work for processes as well as async
32 * tasks that rely on callbacks.
33 *
34 * Copyright (C) 1995-1997, Olaf Kirch <okir@monad.swb.de>
35 *
36 * TCP callback races fixes (C) 1998 Red Hat Software <alan@redhat.com>
37 * TCP send fixes (C) 1998 Red Hat Software <alan@redhat.com>
38 * TCP NFS related read + write fixes
39 * (C) 1999 Dave Airlie, University of Limerick, Ireland <airlied@linux.ie>
40 *
41 * Rewrite of larges part of the code in order to stabilize TCP stuff.
42 * Fix behaviour when socket buffer is full.
43 * (C) 1999 Trond Myklebust <trond.myklebust@fys.uio.no>
44 */
45
46#include <linux/types.h>
47#include <linux/slab.h>
48#include <linux/capability.h>
49#include <linux/sched.h>
50#include <linux/errno.h>
51#include <linux/socket.h>
52#include <linux/in.h>
53#include <linux/net.h>
54#include <linux/mm.h>
55#include <linux/udp.h>
56#include <linux/tcp.h>
57#include <linux/sunrpc/clnt.h>
58#include <linux/file.h>
59#include <linux/workqueue.h>
60#include <linux/random.h>
61
62#include <net/sock.h>
63#include <net/checksum.h>
64#include <net/udp.h>
65#include <net/tcp.h>
66
67/*
68 * Local variables
69 */
70
71#ifdef RPC_DEBUG
72# undef RPC_DEBUG_DATA
73# define RPCDBG_FACILITY RPCDBG_XPRT
74#endif
75
76#define XPRT_MAX_BACKOFF (8)
77#define XPRT_IDLE_TIMEOUT (5*60*HZ)
78#define XPRT_MAX_RESVPORT (800)
79
80/*
81 * Local functions
82 */
83static void xprt_request_init(struct rpc_task *, struct rpc_xprt *);
84static inline void do_xprt_reserve(struct rpc_task *);
85static void xprt_disconnect(struct rpc_xprt *);
86static void xprt_connect_status(struct rpc_task *task);
87static struct rpc_xprt * xprt_setup(int proto, struct sockaddr_in *ap,
88 struct rpc_timeout *to);
89static struct socket *xprt_create_socket(struct rpc_xprt *, int, int);
90static void xprt_bind_socket(struct rpc_xprt *, struct socket *);
91static int __xprt_get_cong(struct rpc_xprt *, struct rpc_task *);
92
93static int xprt_clear_backlog(struct rpc_xprt *xprt);
94
95#ifdef RPC_DEBUG_DATA
96/*
97 * Print the buffer contents (first 128 bytes only--just enough for
98 * diropres return).
99 */
100static void
101xprt_pktdump(char *msg, u32 *packet, unsigned int count)
102{
103 u8 *buf = (u8 *) packet;
104 int j;
105
106 dprintk("RPC: %s\n", msg);
107 for (j = 0; j < count && j < 128; j += 4) {
108 if (!(j & 31)) {
109 if (j)
110 dprintk("\n");
111 dprintk("0x%04x ", j);
112 }
113 dprintk("%02x%02x%02x%02x ",
114 buf[j], buf[j+1], buf[j+2], buf[j+3]);
115 }
116 dprintk("\n");
117}
118#else
119static inline void
120xprt_pktdump(char *msg, u32 *packet, unsigned int count)
121{
122 /* NOP */
123}
124#endif
125
126/*
127 * Look up RPC transport given an INET socket
128 */
129static inline struct rpc_xprt *
130xprt_from_sock(struct sock *sk)
131{
132 return (struct rpc_xprt *) sk->sk_user_data;
133}
134
135/*
136 * Serialize write access to sockets, in order to prevent different
137 * requests from interfering with each other.
138 * Also prevents TCP socket connects from colliding with writes.
139 */
140static int
141__xprt_lock_write(struct rpc_xprt *xprt, struct rpc_task *task)
142{
143 struct rpc_rqst *req = task->tk_rqstp;
144
145 if (test_and_set_bit(XPRT_LOCKED, &xprt->sockstate)) {
146 if (task == xprt->snd_task)
147 return 1;
148 if (task == NULL)
149 return 0;
150 goto out_sleep;
151 }
152 if (xprt->nocong || __xprt_get_cong(xprt, task)) {
153 xprt->snd_task = task;
154 if (req) {
155 req->rq_bytes_sent = 0;
156 req->rq_ntrans++;
157 }
158 return 1;
159 }
160 smp_mb__before_clear_bit();
161 clear_bit(XPRT_LOCKED, &xprt->sockstate);
162 smp_mb__after_clear_bit();
163out_sleep:
164 dprintk("RPC: %4d failed to lock socket %p\n", task->tk_pid, xprt);
165 task->tk_timeout = 0;
166 task->tk_status = -EAGAIN;
167 if (req && req->rq_ntrans)
168 rpc_sleep_on(&xprt->resend, task, NULL, NULL);
169 else
170 rpc_sleep_on(&xprt->sending, task, NULL, NULL);
171 return 0;
172}
173
174static inline int
175xprt_lock_write(struct rpc_xprt *xprt, struct rpc_task *task)
176{
177 int retval;
178
179 spin_lock_bh(&xprt->sock_lock);
180 retval = __xprt_lock_write(xprt, task);
181 spin_unlock_bh(&xprt->sock_lock);
182 return retval;
183}
184
185
186static void
187__xprt_lock_write_next(struct rpc_xprt *xprt)
188{
189 struct rpc_task *task;
190
191 if (test_and_set_bit(XPRT_LOCKED, &xprt->sockstate))
192 return;
193 if (!xprt->nocong && RPCXPRT_CONGESTED(xprt))
194 goto out_unlock;
195 task = rpc_wake_up_next(&xprt->resend);
196 if (!task) {
197 task = rpc_wake_up_next(&xprt->sending);
198 if (!task)
199 goto out_unlock;
200 }
201 if (xprt->nocong || __xprt_get_cong(xprt, task)) {
202 struct rpc_rqst *req = task->tk_rqstp;
203 xprt->snd_task = task;
204 if (req) {
205 req->rq_bytes_sent = 0;
206 req->rq_ntrans++;
207 }
208 return;
209 }
210out_unlock:
211 smp_mb__before_clear_bit();
212 clear_bit(XPRT_LOCKED, &xprt->sockstate);
213 smp_mb__after_clear_bit();
214}
215
216/*
217 * Releases the socket for use by other requests.
218 */
219static void
220__xprt_release_write(struct rpc_xprt *xprt, struct rpc_task *task)
221{
222 if (xprt->snd_task == task) {
223 xprt->snd_task = NULL;
224 smp_mb__before_clear_bit();
225 clear_bit(XPRT_LOCKED, &xprt->sockstate);
226 smp_mb__after_clear_bit();
227 __xprt_lock_write_next(xprt);
228 }
229}
230
231static inline void
232xprt_release_write(struct rpc_xprt *xprt, struct rpc_task *task)
233{
234 spin_lock_bh(&xprt->sock_lock);
235 __xprt_release_write(xprt, task);
236 spin_unlock_bh(&xprt->sock_lock);
237}
238
239/*
240 * Write data to socket.
241 */
242static inline int
243xprt_sendmsg(struct rpc_xprt *xprt, struct rpc_rqst *req)
244{
245 struct socket *sock = xprt->sock;
246 struct xdr_buf *xdr = &req->rq_snd_buf;
247 struct sockaddr *addr = NULL;
248 int addrlen = 0;
249 unsigned int skip;
250 int result;
251
252 if (!sock)
253 return -ENOTCONN;
254
255 xprt_pktdump("packet data:",
256 req->rq_svec->iov_base,
257 req->rq_svec->iov_len);
258
259 /* For UDP, we need to provide an address */
260 if (!xprt->stream) {
261 addr = (struct sockaddr *) &xprt->addr;
262 addrlen = sizeof(xprt->addr);
263 }
264 /* Dont repeat bytes */
265 skip = req->rq_bytes_sent;
266
267 clear_bit(SOCK_ASYNC_NOSPACE, &sock->flags);
268 result = xdr_sendpages(sock, addr, addrlen, xdr, skip, MSG_DONTWAIT);
269
270 dprintk("RPC: xprt_sendmsg(%d) = %d\n", xdr->len - skip, result);
271
272 if (result >= 0)
273 return result;
274
275 switch (result) {
276 case -ECONNREFUSED:
277 /* When the server has died, an ICMP port unreachable message
278 * prompts ECONNREFUSED.
279 */
280 case -EAGAIN:
281 break;
282 case -ECONNRESET:
283 case -ENOTCONN:
284 case -EPIPE:
285 /* connection broken */
286 if (xprt->stream)
287 result = -ENOTCONN;
288 break;
289 default:
290 printk(KERN_NOTICE "RPC: sendmsg returned error %d\n", -result);
291 }
292 return result;
293}
294
295/*
296 * Van Jacobson congestion avoidance. Check if the congestion window
297 * overflowed. Put the task to sleep if this is the case.
298 */
299static int
300__xprt_get_cong(struct rpc_xprt *xprt, struct rpc_task *task)
301{
302 struct rpc_rqst *req = task->tk_rqstp;
303
304 if (req->rq_cong)
305 return 1;
306 dprintk("RPC: %4d xprt_cwnd_limited cong = %ld cwnd = %ld\n",
307 task->tk_pid, xprt->cong, xprt->cwnd);
308 if (RPCXPRT_CONGESTED(xprt))
309 return 0;
310 req->rq_cong = 1;
311 xprt->cong += RPC_CWNDSCALE;
312 return 1;
313}
314
315/*
316 * Adjust the congestion window, and wake up the next task
317 * that has been sleeping due to congestion
318 */
319static void
320__xprt_put_cong(struct rpc_xprt *xprt, struct rpc_rqst *req)
321{
322 if (!req->rq_cong)
323 return;
324 req->rq_cong = 0;
325 xprt->cong -= RPC_CWNDSCALE;
326 __xprt_lock_write_next(xprt);
327}
328
329/*
330 * Adjust RPC congestion window
331 * We use a time-smoothed congestion estimator to avoid heavy oscillation.
332 */
333static void
334xprt_adjust_cwnd(struct rpc_xprt *xprt, int result)
335{
336 unsigned long cwnd;
337
338 cwnd = xprt->cwnd;
339 if (result >= 0 && cwnd <= xprt->cong) {
340 /* The (cwnd >> 1) term makes sure
341 * the result gets rounded properly. */
342 cwnd += (RPC_CWNDSCALE * RPC_CWNDSCALE + (cwnd >> 1)) / cwnd;
343 if (cwnd > RPC_MAXCWND(xprt))
344 cwnd = RPC_MAXCWND(xprt);
345 __xprt_lock_write_next(xprt);
346 } else if (result == -ETIMEDOUT) {
347 cwnd >>= 1;
348 if (cwnd < RPC_CWNDSCALE)
349 cwnd = RPC_CWNDSCALE;
350 }
351 dprintk("RPC: cong %ld, cwnd was %ld, now %ld\n",
352 xprt->cong, xprt->cwnd, cwnd);
353 xprt->cwnd = cwnd;
354}
355
356/*
357 * Reset the major timeout value
358 */
359static void xprt_reset_majortimeo(struct rpc_rqst *req)
360{
361 struct rpc_timeout *to = &req->rq_xprt->timeout;
362
363 req->rq_majortimeo = req->rq_timeout;
364 if (to->to_exponential)
365 req->rq_majortimeo <<= to->to_retries;
366 else
367 req->rq_majortimeo += to->to_increment * to->to_retries;
368 if (req->rq_majortimeo > to->to_maxval || req->rq_majortimeo == 0)
369 req->rq_majortimeo = to->to_maxval;
370 req->rq_majortimeo += jiffies;
371}
372
373/*
374 * Adjust timeout values etc for next retransmit
375 */
376int xprt_adjust_timeout(struct rpc_rqst *req)
377{
378 struct rpc_xprt *xprt = req->rq_xprt;
379 struct rpc_timeout *to = &xprt->timeout;
380 int status = 0;
381
382 if (time_before(jiffies, req->rq_majortimeo)) {
383 if (to->to_exponential)
384 req->rq_timeout <<= 1;
385 else
386 req->rq_timeout += to->to_increment;
387 if (to->to_maxval && req->rq_timeout >= to->to_maxval)
388 req->rq_timeout = to->to_maxval;
389 req->rq_retries++;
390 pprintk("RPC: %lu retrans\n", jiffies);
391 } else {
392 req->rq_timeout = to->to_initval;
393 req->rq_retries = 0;
394 xprt_reset_majortimeo(req);
395 /* Reset the RTT counters == "slow start" */
396 spin_lock_bh(&xprt->sock_lock);
397 rpc_init_rtt(req->rq_task->tk_client->cl_rtt, to->to_initval);
398 spin_unlock_bh(&xprt->sock_lock);
399 pprintk("RPC: %lu timeout\n", jiffies);
400 status = -ETIMEDOUT;
401 }
402
403 if (req->rq_timeout == 0) {
404 printk(KERN_WARNING "xprt_adjust_timeout: rq_timeout = 0!\n");
405 req->rq_timeout = 5 * HZ;
406 }
407 return status;
408}
409
410/*
411 * Close down a transport socket
412 */
413static void
414xprt_close(struct rpc_xprt *xprt)
415{
416 struct socket *sock = xprt->sock;
417 struct sock *sk = xprt->inet;
418
419 if (!sk)
420 return;
421
422 write_lock_bh(&sk->sk_callback_lock);
423 xprt->inet = NULL;
424 xprt->sock = NULL;
425
426 sk->sk_user_data = NULL;
427 sk->sk_data_ready = xprt->old_data_ready;
428 sk->sk_state_change = xprt->old_state_change;
429 sk->sk_write_space = xprt->old_write_space;
430 write_unlock_bh(&sk->sk_callback_lock);
431
432 sk->sk_no_check = 0;
433
434 sock_release(sock);
435}
436
437static void
438xprt_socket_autoclose(void *args)
439{
440 struct rpc_xprt *xprt = (struct rpc_xprt *)args;
441
442 xprt_disconnect(xprt);
443 xprt_close(xprt);
444 xprt_release_write(xprt, NULL);
445}
446
447/*
448 * Mark a transport as disconnected
449 */
450static void
451xprt_disconnect(struct rpc_xprt *xprt)
452{
453 dprintk("RPC: disconnected transport %p\n", xprt);
454 spin_lock_bh(&xprt->sock_lock);
455 xprt_clear_connected(xprt);
456 rpc_wake_up_status(&xprt->pending, -ENOTCONN);
457 spin_unlock_bh(&xprt->sock_lock);
458}
459
460/*
461 * Used to allow disconnection when we've been idle
462 */
463static void
464xprt_init_autodisconnect(unsigned long data)
465{
466 struct rpc_xprt *xprt = (struct rpc_xprt *)data;
467
468 spin_lock(&xprt->sock_lock);
469 if (!list_empty(&xprt->recv) || xprt->shutdown)
470 goto out_abort;
471 if (test_and_set_bit(XPRT_LOCKED, &xprt->sockstate))
472 goto out_abort;
473 spin_unlock(&xprt->sock_lock);
474 /* Let keventd close the socket */
475 if (test_bit(XPRT_CONNECTING, &xprt->sockstate) != 0)
476 xprt_release_write(xprt, NULL);
477 else
478 schedule_work(&xprt->task_cleanup);
479 return;
480out_abort:
481 spin_unlock(&xprt->sock_lock);
482}
483
484static void xprt_socket_connect(void *args)
485{
486 struct rpc_xprt *xprt = (struct rpc_xprt *)args;
487 struct socket *sock = xprt->sock;
488 int status = -EIO;
489
490 if (xprt->shutdown || xprt->addr.sin_port == 0)
491 goto out;
492
493 /*
494 * Start by resetting any existing state
495 */
496 xprt_close(xprt);
497 sock = xprt_create_socket(xprt, xprt->prot, xprt->resvport);
498 if (sock == NULL) {
499 /* couldn't create socket or bind to reserved port;
500 * this is likely a permanent error, so cause an abort */
501 goto out;
502 }
503 xprt_bind_socket(xprt, sock);
504 xprt_sock_setbufsize(xprt);
505
506 status = 0;
507 if (!xprt->stream)
508 goto out;
509
510 /*
511 * Tell the socket layer to start connecting...
512 */
513 status = sock->ops->connect(sock, (struct sockaddr *) &xprt->addr,
514 sizeof(xprt->addr), O_NONBLOCK);
515 dprintk("RPC: %p connect status %d connected %d sock state %d\n",
516 xprt, -status, xprt_connected(xprt), sock->sk->sk_state);
517 if (status < 0) {
518 switch (status) {
519 case -EINPROGRESS:
520 case -EALREADY:
521 goto out_clear;
522 }
523 }
524out:
525 if (status < 0)
526 rpc_wake_up_status(&xprt->pending, status);
527 else
528 rpc_wake_up(&xprt->pending);
529out_clear:
530 smp_mb__before_clear_bit();
531 clear_bit(XPRT_CONNECTING, &xprt->sockstate);
532 smp_mb__after_clear_bit();
533}
534
535/*
536 * Attempt to connect a TCP socket.
537 *
538 */
539void xprt_connect(struct rpc_task *task)
540{
541 struct rpc_xprt *xprt = task->tk_xprt;
542
543 dprintk("RPC: %4d xprt_connect xprt %p %s connected\n", task->tk_pid,
544 xprt, (xprt_connected(xprt) ? "is" : "is not"));
545
546 if (xprt->shutdown) {
547 task->tk_status = -EIO;
548 return;
549 }
550 if (!xprt->addr.sin_port) {
551 task->tk_status = -EIO;
552 return;
553 }
554 if (!xprt_lock_write(xprt, task))
555 return;
556 if (xprt_connected(xprt))
557 goto out_write;
558
559 if (task->tk_rqstp)
560 task->tk_rqstp->rq_bytes_sent = 0;
561
562 task->tk_timeout = RPC_CONNECT_TIMEOUT;
563 rpc_sleep_on(&xprt->pending, task, xprt_connect_status, NULL);
564 if (!test_and_set_bit(XPRT_CONNECTING, &xprt->sockstate)) {
565 /* Note: if we are here due to a dropped connection
566 * we delay reconnecting by RPC_REESTABLISH_TIMEOUT/HZ
567 * seconds
568 */
569 if (xprt->sock != NULL)
570 schedule_delayed_work(&xprt->sock_connect,
571 RPC_REESTABLISH_TIMEOUT);
572 else
573 schedule_work(&xprt->sock_connect);
574 }
575 return;
576 out_write:
577 xprt_release_write(xprt, task);
578}
579
580/*
581 * We arrive here when awoken from waiting on connection establishment.
582 */
583static void
584xprt_connect_status(struct rpc_task *task)
585{
586 struct rpc_xprt *xprt = task->tk_xprt;
587
588 if (task->tk_status >= 0) {
589 dprintk("RPC: %4d xprt_connect_status: connection established\n",
590 task->tk_pid);
591 return;
592 }
593
594 /* if soft mounted, just cause this RPC to fail */
595 if (RPC_IS_SOFT(task))
596 task->tk_status = -EIO;
597
598 switch (task->tk_status) {
599 case -ECONNREFUSED:
600 case -ECONNRESET:
601 case -ENOTCONN:
602 return;
603 case -ETIMEDOUT:
604 dprintk("RPC: %4d xprt_connect_status: timed out\n",
605 task->tk_pid);
606 break;
607 default:
608 printk(KERN_ERR "RPC: error %d connecting to server %s\n",
609 -task->tk_status, task->tk_client->cl_server);
610 }
611 xprt_release_write(xprt, task);
612}
613
614/*
615 * Look up the RPC request corresponding to a reply, and then lock it.
616 */
617static inline struct rpc_rqst *
618xprt_lookup_rqst(struct rpc_xprt *xprt, u32 xid)
619{
620 struct list_head *pos;
621 struct rpc_rqst *req = NULL;
622
623 list_for_each(pos, &xprt->recv) {
624 struct rpc_rqst *entry = list_entry(pos, struct rpc_rqst, rq_list);
625 if (entry->rq_xid == xid) {
626 req = entry;
627 break;
628 }
629 }
630 return req;
631}
632
633/*
634 * Complete reply received.
635 * The TCP code relies on us to remove the request from xprt->pending.
636 */
637static void
638xprt_complete_rqst(struct rpc_xprt *xprt, struct rpc_rqst *req, int copied)
639{
640 struct rpc_task *task = req->rq_task;
641 struct rpc_clnt *clnt = task->tk_client;
642
643 /* Adjust congestion window */
644 if (!xprt->nocong) {
645 unsigned timer = task->tk_msg.rpc_proc->p_timer;
646 xprt_adjust_cwnd(xprt, copied);
647 __xprt_put_cong(xprt, req);
648 if (timer) {
649 if (req->rq_ntrans == 1)
650 rpc_update_rtt(clnt->cl_rtt, timer,
651 (long)jiffies - req->rq_xtime);
652 rpc_set_timeo(clnt->cl_rtt, timer, req->rq_ntrans - 1);
653 }
654 }
655
656#ifdef RPC_PROFILE
657 /* Profile only reads for now */
658 if (copied > 1024) {
659 static unsigned long nextstat;
660 static unsigned long pkt_rtt, pkt_len, pkt_cnt;
661
662 pkt_cnt++;
663 pkt_len += req->rq_slen + copied;
664 pkt_rtt += jiffies - req->rq_xtime;
665 if (time_before(nextstat, jiffies)) {
666 printk("RPC: %lu %ld cwnd\n", jiffies, xprt->cwnd);
667 printk("RPC: %ld %ld %ld %ld stat\n",
668 jiffies, pkt_cnt, pkt_len, pkt_rtt);
669 pkt_rtt = pkt_len = pkt_cnt = 0;
670 nextstat = jiffies + 5 * HZ;
671 }
672 }
673#endif
674
675 dprintk("RPC: %4d has input (%d bytes)\n", task->tk_pid, copied);
676 list_del_init(&req->rq_list);
677 req->rq_received = req->rq_private_buf.len = copied;
678
679 /* ... and wake up the process. */
680 rpc_wake_up_task(task);
681 return;
682}
683
684static size_t
685skb_read_bits(skb_reader_t *desc, void *to, size_t len)
686{
687 if (len > desc->count)
688 len = desc->count;
689 if (skb_copy_bits(desc->skb, desc->offset, to, len))
690 return 0;
691 desc->count -= len;
692 desc->offset += len;
693 return len;
694}
695
696static size_t
697skb_read_and_csum_bits(skb_reader_t *desc, void *to, size_t len)
698{
699 unsigned int csum2, pos;
700
701 if (len > desc->count)
702 len = desc->count;
703 pos = desc->offset;
704 csum2 = skb_copy_and_csum_bits(desc->skb, pos, to, len, 0);
705 desc->csum = csum_block_add(desc->csum, csum2, pos);
706 desc->count -= len;
707 desc->offset += len;
708 return len;
709}
710
711/*
712 * We have set things up such that we perform the checksum of the UDP
713 * packet in parallel with the copies into the RPC client iovec. -DaveM
714 */
715int
716csum_partial_copy_to_xdr(struct xdr_buf *xdr, struct sk_buff *skb)
717{
718 skb_reader_t desc;
719
720 desc.skb = skb;
721 desc.offset = sizeof(struct udphdr);
722 desc.count = skb->len - desc.offset;
723
724 if (skb->ip_summed == CHECKSUM_UNNECESSARY)
725 goto no_checksum;
726
727 desc.csum = csum_partial(skb->data, desc.offset, skb->csum);
728 xdr_partial_copy_from_skb(xdr, 0, &desc, skb_read_and_csum_bits);
729 if (desc.offset != skb->len) {
730 unsigned int csum2;
731 csum2 = skb_checksum(skb, desc.offset, skb->len - desc.offset, 0);
732 desc.csum = csum_block_add(desc.csum, csum2, desc.offset);
733 }
734 if (desc.count)
735 return -1;
736 if ((unsigned short)csum_fold(desc.csum))
737 return -1;
738 return 0;
739no_checksum:
740 xdr_partial_copy_from_skb(xdr, 0, &desc, skb_read_bits);
741 if (desc.count)
742 return -1;
743 return 0;
744}
745
746/*
747 * Input handler for RPC replies. Called from a bottom half and hence
748 * atomic.
749 */
750static void
751udp_data_ready(struct sock *sk, int len)
752{
753 struct rpc_task *task;
754 struct rpc_xprt *xprt;
755 struct rpc_rqst *rovr;
756 struct sk_buff *skb;
757 int err, repsize, copied;
758 u32 _xid, *xp;
759
760 read_lock(&sk->sk_callback_lock);
761 dprintk("RPC: udp_data_ready...\n");
762 if (!(xprt = xprt_from_sock(sk))) {
763 printk("RPC: udp_data_ready request not found!\n");
764 goto out;
765 }
766
767 dprintk("RPC: udp_data_ready client %p\n", xprt);
768
769 if ((skb = skb_recv_datagram(sk, 0, 1, &err)) == NULL)
770 goto out;
771
772 if (xprt->shutdown)
773 goto dropit;
774
775 repsize = skb->len - sizeof(struct udphdr);
776 if (repsize < 4) {
777 printk("RPC: impossible RPC reply size %d!\n", repsize);
778 goto dropit;
779 }
780
781 /* Copy the XID from the skb... */
782 xp = skb_header_pointer(skb, sizeof(struct udphdr),
783 sizeof(_xid), &_xid);
784 if (xp == NULL)
785 goto dropit;
786
787 /* Look up and lock the request corresponding to the given XID */
788 spin_lock(&xprt->sock_lock);
789 rovr = xprt_lookup_rqst(xprt, *xp);
790 if (!rovr)
791 goto out_unlock;
792 task = rovr->rq_task;
793
794 dprintk("RPC: %4d received reply\n", task->tk_pid);
795
796 if ((copied = rovr->rq_private_buf.buflen) > repsize)
797 copied = repsize;
798
799 /* Suck it into the iovec, verify checksum if not done by hw. */
800 if (csum_partial_copy_to_xdr(&rovr->rq_private_buf, skb))
801 goto out_unlock;
802
803 /* Something worked... */
804 dst_confirm(skb->dst);
805
806 xprt_complete_rqst(xprt, rovr, copied);
807
808 out_unlock:
809 spin_unlock(&xprt->sock_lock);
810 dropit:
811 skb_free_datagram(sk, skb);
812 out:
813 read_unlock(&sk->sk_callback_lock);
814}
815
816/*
817 * Copy from an skb into memory and shrink the skb.
818 */
819static inline size_t
820tcp_copy_data(skb_reader_t *desc, void *p, size_t len)
821{
822 if (len > desc->count)
823 len = desc->count;
824 if (skb_copy_bits(desc->skb, desc->offset, p, len))
825 return 0;
826 desc->offset += len;
827 desc->count -= len;
828 return len;
829}
830
831/*
832 * TCP read fragment marker
833 */
834static inline void
835tcp_read_fraghdr(struct rpc_xprt *xprt, skb_reader_t *desc)
836{
837 size_t len, used;
838 char *p;
839
840 p = ((char *) &xprt->tcp_recm) + xprt->tcp_offset;
841 len = sizeof(xprt->tcp_recm) - xprt->tcp_offset;
842 used = tcp_copy_data(desc, p, len);
843 xprt->tcp_offset += used;
844 if (used != len)
845 return;
846 xprt->tcp_reclen = ntohl(xprt->tcp_recm);
847 if (xprt->tcp_reclen & 0x80000000)
848 xprt->tcp_flags |= XPRT_LAST_FRAG;
849 else
850 xprt->tcp_flags &= ~XPRT_LAST_FRAG;
851 xprt->tcp_reclen &= 0x7fffffff;
852 xprt->tcp_flags &= ~XPRT_COPY_RECM;
853 xprt->tcp_offset = 0;
854 /* Sanity check of the record length */
855 if (xprt->tcp_reclen < 4) {
856 printk(KERN_ERR "RPC: Invalid TCP record fragment length\n");
857 xprt_disconnect(xprt);
858 }
859 dprintk("RPC: reading TCP record fragment of length %d\n",
860 xprt->tcp_reclen);
861}
862
863static void
864tcp_check_recm(struct rpc_xprt *xprt)
865{
866 if (xprt->tcp_offset == xprt->tcp_reclen) {
867 xprt->tcp_flags |= XPRT_COPY_RECM;
868 xprt->tcp_offset = 0;
869 if (xprt->tcp_flags & XPRT_LAST_FRAG) {
870 xprt->tcp_flags &= ~XPRT_COPY_DATA;
871 xprt->tcp_flags |= XPRT_COPY_XID;
872 xprt->tcp_copied = 0;
873 }
874 }
875}
876
877/*
878 * TCP read xid
879 */
880static inline void
881tcp_read_xid(struct rpc_xprt *xprt, skb_reader_t *desc)
882{
883 size_t len, used;
884 char *p;
885
886 len = sizeof(xprt->tcp_xid) - xprt->tcp_offset;
887 dprintk("RPC: reading XID (%Zu bytes)\n", len);
888 p = ((char *) &xprt->tcp_xid) + xprt->tcp_offset;
889 used = tcp_copy_data(desc, p, len);
890 xprt->tcp_offset += used;
891 if (used != len)
892 return;
893 xprt->tcp_flags &= ~XPRT_COPY_XID;
894 xprt->tcp_flags |= XPRT_COPY_DATA;
895 xprt->tcp_copied = 4;
896 dprintk("RPC: reading reply for XID %08x\n",
897 ntohl(xprt->tcp_xid));
898 tcp_check_recm(xprt);
899}
900
901/*
902 * TCP read and complete request
903 */
904static inline void
905tcp_read_request(struct rpc_xprt *xprt, skb_reader_t *desc)
906{
907 struct rpc_rqst *req;
908 struct xdr_buf *rcvbuf;
909 size_t len;
910
911 /* Find and lock the request corresponding to this xid */
912 spin_lock(&xprt->sock_lock);
913 req = xprt_lookup_rqst(xprt, xprt->tcp_xid);
914 if (!req) {
915 xprt->tcp_flags &= ~XPRT_COPY_DATA;
916 dprintk("RPC: XID %08x request not found!\n",
917 ntohl(xprt->tcp_xid));
918 spin_unlock(&xprt->sock_lock);
919 return;
920 }
921
922 rcvbuf = &req->rq_private_buf;
923 len = desc->count;
924 if (len > xprt->tcp_reclen - xprt->tcp_offset) {
925 skb_reader_t my_desc;
926
927 len = xprt->tcp_reclen - xprt->tcp_offset;
928 memcpy(&my_desc, desc, sizeof(my_desc));
929 my_desc.count = len;
930 xdr_partial_copy_from_skb(rcvbuf, xprt->tcp_copied,
931 &my_desc, tcp_copy_data);
932 desc->count -= len;
933 desc->offset += len;
934 } else
935 xdr_partial_copy_from_skb(rcvbuf, xprt->tcp_copied,
936 desc, tcp_copy_data);
937 xprt->tcp_copied += len;
938 xprt->tcp_offset += len;
939
940 if (xprt->tcp_copied == req->rq_private_buf.buflen)
941 xprt->tcp_flags &= ~XPRT_COPY_DATA;
942 else if (xprt->tcp_offset == xprt->tcp_reclen) {
943 if (xprt->tcp_flags & XPRT_LAST_FRAG)
944 xprt->tcp_flags &= ~XPRT_COPY_DATA;
945 }
946
947 if (!(xprt->tcp_flags & XPRT_COPY_DATA)) {
948 dprintk("RPC: %4d received reply complete\n",
949 req->rq_task->tk_pid);
950 xprt_complete_rqst(xprt, req, xprt->tcp_copied);
951 }
952 spin_unlock(&xprt->sock_lock);
953 tcp_check_recm(xprt);
954}
955
956/*
957 * TCP discard extra bytes from a short read
958 */
959static inline void
960tcp_read_discard(struct rpc_xprt *xprt, skb_reader_t *desc)
961{
962 size_t len;
963
964 len = xprt->tcp_reclen - xprt->tcp_offset;
965 if (len > desc->count)
966 len = desc->count;
967 desc->count -= len;
968 desc->offset += len;
969 xprt->tcp_offset += len;
970 tcp_check_recm(xprt);
971}
972
973/*
974 * TCP record receive routine
975 * We first have to grab the record marker, then the XID, then the data.
976 */
977static int
978tcp_data_recv(read_descriptor_t *rd_desc, struct sk_buff *skb,
979 unsigned int offset, size_t len)
980{
981 struct rpc_xprt *xprt = rd_desc->arg.data;
982 skb_reader_t desc = {
983 .skb = skb,
984 .offset = offset,
985 .count = len,
986 .csum = 0
987 };
988
989 dprintk("RPC: tcp_data_recv\n");
990 do {
991 /* Read in a new fragment marker if necessary */
992 /* Can we ever really expect to get completely empty fragments? */
993 if (xprt->tcp_flags & XPRT_COPY_RECM) {
994 tcp_read_fraghdr(xprt, &desc);
995 continue;
996 }
997 /* Read in the xid if necessary */
998 if (xprt->tcp_flags & XPRT_COPY_XID) {
999 tcp_read_xid(xprt, &desc);
1000 continue;
1001 }
1002 /* Read in the request data */
1003 if (xprt->tcp_flags & XPRT_COPY_DATA) {
1004 tcp_read_request(xprt, &desc);
1005 continue;
1006 }
1007 /* Skip over any trailing bytes on short reads */
1008 tcp_read_discard(xprt, &desc);
1009 } while (desc.count);
1010 dprintk("RPC: tcp_data_recv done\n");
1011 return len - desc.count;
1012}
1013
1014static void tcp_data_ready(struct sock *sk, int bytes)
1015{
1016 struct rpc_xprt *xprt;
1017 read_descriptor_t rd_desc;
1018
1019 read_lock(&sk->sk_callback_lock);
1020 dprintk("RPC: tcp_data_ready...\n");
1021 if (!(xprt = xprt_from_sock(sk))) {
1022 printk("RPC: tcp_data_ready socket info not found!\n");
1023 goto out;
1024 }
1025 if (xprt->shutdown)
1026 goto out;
1027
1028 /* We use rd_desc to pass struct xprt to tcp_data_recv */
1029 rd_desc.arg.data = xprt;
1030 rd_desc.count = 65536;
1031 tcp_read_sock(sk, &rd_desc, tcp_data_recv);
1032out:
1033 read_unlock(&sk->sk_callback_lock);
1034}
1035
1036static void
1037tcp_state_change(struct sock *sk)
1038{
1039 struct rpc_xprt *xprt;
1040
1041 read_lock(&sk->sk_callback_lock);
1042 if (!(xprt = xprt_from_sock(sk)))
1043 goto out;
1044 dprintk("RPC: tcp_state_change client %p...\n", xprt);
1045 dprintk("RPC: state %x conn %d dead %d zapped %d\n",
1046 sk->sk_state, xprt_connected(xprt),
1047 sock_flag(sk, SOCK_DEAD),
1048 sock_flag(sk, SOCK_ZAPPED));
1049
1050 switch (sk->sk_state) {
1051 case TCP_ESTABLISHED:
1052 spin_lock_bh(&xprt->sock_lock);
1053 if (!xprt_test_and_set_connected(xprt)) {
1054 /* Reset TCP record info */
1055 xprt->tcp_offset = 0;
1056 xprt->tcp_reclen = 0;
1057 xprt->tcp_copied = 0;
1058 xprt->tcp_flags = XPRT_COPY_RECM | XPRT_COPY_XID;
1059 rpc_wake_up(&xprt->pending);
1060 }
1061 spin_unlock_bh(&xprt->sock_lock);
1062 break;
1063 case TCP_SYN_SENT:
1064 case TCP_SYN_RECV:
1065 break;
1066 default:
1067 if (xprt_test_and_clear_connected(xprt))
1068 rpc_wake_up_status(&xprt->pending, -ENOTCONN);
1069 break;
1070 }
1071 out:
1072 read_unlock(&sk->sk_callback_lock);
1073}
1074
1075/*
1076 * Called when more output buffer space is available for this socket.
1077 * We try not to wake our writers until they can make "significant"
1078 * progress, otherwise we'll waste resources thrashing sock_sendmsg
1079 * with a bunch of small requests.
1080 */
1081static void
1082xprt_write_space(struct sock *sk)
1083{
1084 struct rpc_xprt *xprt;
1085 struct socket *sock;
1086
1087 read_lock(&sk->sk_callback_lock);
1088 if (!(xprt = xprt_from_sock(sk)) || !(sock = sk->sk_socket))
1089 goto out;
1090 if (xprt->shutdown)
1091 goto out;
1092
1093 /* Wait until we have enough socket memory */
1094 if (xprt->stream) {
1095 /* from net/core/stream.c:sk_stream_write_space */
1096 if (sk_stream_wspace(sk) < sk_stream_min_wspace(sk))
1097 goto out;
1098 } else {
1099 /* from net/core/sock.c:sock_def_write_space */
1100 if (!sock_writeable(sk))
1101 goto out;
1102 }
1103
1104 if (!test_and_clear_bit(SOCK_NOSPACE, &sock->flags))
1105 goto out;
1106
1107 spin_lock_bh(&xprt->sock_lock);
1108 if (xprt->snd_task)
1109 rpc_wake_up_task(xprt->snd_task);
1110 spin_unlock_bh(&xprt->sock_lock);
1111out:
1112 read_unlock(&sk->sk_callback_lock);
1113}
1114
1115/*
1116 * RPC receive timeout handler.
1117 */
1118static void
1119xprt_timer(struct rpc_task *task)
1120{
1121 struct rpc_rqst *req = task->tk_rqstp;
1122 struct rpc_xprt *xprt = req->rq_xprt;
1123
1124 spin_lock(&xprt->sock_lock);
1125 if (req->rq_received)
1126 goto out;
1127
1128 xprt_adjust_cwnd(req->rq_xprt, -ETIMEDOUT);
1129 __xprt_put_cong(xprt, req);
1130
1131 dprintk("RPC: %4d xprt_timer (%s request)\n",
1132 task->tk_pid, req ? "pending" : "backlogged");
1133
1134 task->tk_status = -ETIMEDOUT;
1135out:
1136 task->tk_timeout = 0;
1137 rpc_wake_up_task(task);
1138 spin_unlock(&xprt->sock_lock);
1139}
1140
1141/*
1142 * Place the actual RPC call.
1143 * We have to copy the iovec because sendmsg fiddles with its contents.
1144 */
1145int
1146xprt_prepare_transmit(struct rpc_task *task)
1147{
1148 struct rpc_rqst *req = task->tk_rqstp;
1149 struct rpc_xprt *xprt = req->rq_xprt;
1150 int err = 0;
1151
1152 dprintk("RPC: %4d xprt_prepare_transmit\n", task->tk_pid);
1153
1154 if (xprt->shutdown)
1155 return -EIO;
1156
1157 spin_lock_bh(&xprt->sock_lock);
1158 if (req->rq_received && !req->rq_bytes_sent) {
1159 err = req->rq_received;
1160 goto out_unlock;
1161 }
1162 if (!__xprt_lock_write(xprt, task)) {
1163 err = -EAGAIN;
1164 goto out_unlock;
1165 }
1166
1167 if (!xprt_connected(xprt)) {
1168 err = -ENOTCONN;
1169 goto out_unlock;
1170 }
1171out_unlock:
1172 spin_unlock_bh(&xprt->sock_lock);
1173 return err;
1174}
1175
1176void
1177xprt_transmit(struct rpc_task *task)
1178{
1179 struct rpc_clnt *clnt = task->tk_client;
1180 struct rpc_rqst *req = task->tk_rqstp;
1181 struct rpc_xprt *xprt = req->rq_xprt;
1182 int status, retry = 0;
1183
1184
1185 dprintk("RPC: %4d xprt_transmit(%u)\n", task->tk_pid, req->rq_slen);
1186
1187 /* set up everything as needed. */
1188 /* Write the record marker */
1189 if (xprt->stream) {
1190 u32 *marker = req->rq_svec[0].iov_base;
1191
1192 *marker = htonl(0x80000000|(req->rq_slen-sizeof(*marker)));
1193 }
1194
1195 smp_rmb();
1196 if (!req->rq_received) {
1197 if (list_empty(&req->rq_list)) {
1198 spin_lock_bh(&xprt->sock_lock);
1199 /* Update the softirq receive buffer */
1200 memcpy(&req->rq_private_buf, &req->rq_rcv_buf,
1201 sizeof(req->rq_private_buf));
1202 /* Add request to the receive list */
1203 list_add_tail(&req->rq_list, &xprt->recv);
1204 spin_unlock_bh(&xprt->sock_lock);
1205 xprt_reset_majortimeo(req);
1206 }
1207 } else if (!req->rq_bytes_sent)
1208 return;
1209
1210 /* Continue transmitting the packet/record. We must be careful
1211 * to cope with writespace callbacks arriving _after_ we have
1212 * called xprt_sendmsg().
1213 */
1214 while (1) {
1215 req->rq_xtime = jiffies;
1216 status = xprt_sendmsg(xprt, req);
1217
1218 if (status < 0)
1219 break;
1220
1221 if (xprt->stream) {
1222 req->rq_bytes_sent += status;
1223
1224 /* If we've sent the entire packet, immediately
1225 * reset the count of bytes sent. */
1226 if (req->rq_bytes_sent >= req->rq_slen) {
1227 req->rq_bytes_sent = 0;
1228 goto out_receive;
1229 }
1230 } else {
1231 if (status >= req->rq_slen)
1232 goto out_receive;
1233 status = -EAGAIN;
1234 break;
1235 }
1236
1237 dprintk("RPC: %4d xmit incomplete (%d left of %d)\n",
1238 task->tk_pid, req->rq_slen - req->rq_bytes_sent,
1239 req->rq_slen);
1240
1241 status = -EAGAIN;
1242 if (retry++ > 50)
1243 break;
1244 }
1245
1246 /* Note: at this point, task->tk_sleeping has not yet been set,
1247 * hence there is no danger of the waking up task being put on
1248 * schedq, and being picked up by a parallel run of rpciod().
1249 */
1250 task->tk_status = status;
1251
1252 switch (status) {
1253 case -EAGAIN:
1254 if (test_bit(SOCK_ASYNC_NOSPACE, &xprt->sock->flags)) {
1255 /* Protect against races with xprt_write_space */
1256 spin_lock_bh(&xprt->sock_lock);
1257 /* Don't race with disconnect */
1258 if (!xprt_connected(xprt))
1259 task->tk_status = -ENOTCONN;
1260 else if (test_bit(SOCK_NOSPACE, &xprt->sock->flags)) {
1261 task->tk_timeout = req->rq_timeout;
1262 rpc_sleep_on(&xprt->pending, task, NULL, NULL);
1263 }
1264 spin_unlock_bh(&xprt->sock_lock);
1265 return;
1266 }
1267 /* Keep holding the socket if it is blocked */
1268 rpc_delay(task, HZ>>4);
1269 return;
1270 case -ECONNREFUSED:
1271 task->tk_timeout = RPC_REESTABLISH_TIMEOUT;
1272 rpc_sleep_on(&xprt->sending, task, NULL, NULL);
1273 case -ENOTCONN:
1274 return;
1275 default:
1276 if (xprt->stream)
1277 xprt_disconnect(xprt);
1278 }
1279 xprt_release_write(xprt, task);
1280 return;
1281 out_receive:
1282 dprintk("RPC: %4d xmit complete\n", task->tk_pid);
1283 /* Set the task's receive timeout value */
1284 spin_lock_bh(&xprt->sock_lock);
1285 if (!xprt->nocong) {
1286 int timer = task->tk_msg.rpc_proc->p_timer;
1287 task->tk_timeout = rpc_calc_rto(clnt->cl_rtt, timer);
1288 task->tk_timeout <<= rpc_ntimeo(clnt->cl_rtt, timer) + req->rq_retries;
1289 if (task->tk_timeout > xprt->timeout.to_maxval || task->tk_timeout == 0)
1290 task->tk_timeout = xprt->timeout.to_maxval;
1291 } else
1292 task->tk_timeout = req->rq_timeout;
1293 /* Don't race with disconnect */
1294 if (!xprt_connected(xprt))
1295 task->tk_status = -ENOTCONN;
1296 else if (!req->rq_received)
1297 rpc_sleep_on(&xprt->pending, task, NULL, xprt_timer);
1298 __xprt_release_write(xprt, task);
1299 spin_unlock_bh(&xprt->sock_lock);
1300}
1301
1302/*
1303 * Reserve an RPC call slot.
1304 */
1305static inline void
1306do_xprt_reserve(struct rpc_task *task)
1307{
1308 struct rpc_xprt *xprt = task->tk_xprt;
1309
1310 task->tk_status = 0;
1311 if (task->tk_rqstp)
1312 return;
1313 if (!list_empty(&xprt->free)) {
1314 struct rpc_rqst *req = list_entry(xprt->free.next, struct rpc_rqst, rq_list);
1315 list_del_init(&req->rq_list);
1316 task->tk_rqstp = req;
1317 xprt_request_init(task, xprt);
1318 return;
1319 }
1320 dprintk("RPC: waiting for request slot\n");
1321 task->tk_status = -EAGAIN;
1322 task->tk_timeout = 0;
1323 rpc_sleep_on(&xprt->backlog, task, NULL, NULL);
1324}
1325
1326void
1327xprt_reserve(struct rpc_task *task)
1328{
1329 struct rpc_xprt *xprt = task->tk_xprt;
1330
1331 task->tk_status = -EIO;
1332 if (!xprt->shutdown) {
1333 spin_lock(&xprt->xprt_lock);
1334 do_xprt_reserve(task);
1335 spin_unlock(&xprt->xprt_lock);
1336 if (task->tk_rqstp)
1337 del_timer_sync(&xprt->timer);
1338 }
1339}
1340
1341/*
1342 * Allocate a 'unique' XID
1343 */
1344static inline u32 xprt_alloc_xid(struct rpc_xprt *xprt)
1345{
1346 return xprt->xid++;
1347}
1348
1349static inline void xprt_init_xid(struct rpc_xprt *xprt)
1350{
1351 get_random_bytes(&xprt->xid, sizeof(xprt->xid));
1352}
1353
1354/*
1355 * Initialize RPC request
1356 */
1357static void
1358xprt_request_init(struct rpc_task *task, struct rpc_xprt *xprt)
1359{
1360 struct rpc_rqst *req = task->tk_rqstp;
1361
1362 req->rq_timeout = xprt->timeout.to_initval;
1363 req->rq_task = task;
1364 req->rq_xprt = xprt;
1365 req->rq_xid = xprt_alloc_xid(xprt);
1366 dprintk("RPC: %4d reserved req %p xid %08x\n", task->tk_pid,
1367 req, ntohl(req->rq_xid));
1368}
1369
1370/*
1371 * Release an RPC call slot
1372 */
1373void
1374xprt_release(struct rpc_task *task)
1375{
1376 struct rpc_xprt *xprt = task->tk_xprt;
1377 struct rpc_rqst *req;
1378
1379 if (!(req = task->tk_rqstp))
1380 return;
1381 spin_lock_bh(&xprt->sock_lock);
1382 __xprt_release_write(xprt, task);
1383 __xprt_put_cong(xprt, req);
1384 if (!list_empty(&req->rq_list))
1385 list_del(&req->rq_list);
1386 xprt->last_used = jiffies;
1387 if (list_empty(&xprt->recv) && !xprt->shutdown)
1388 mod_timer(&xprt->timer, xprt->last_used + XPRT_IDLE_TIMEOUT);
1389 spin_unlock_bh(&xprt->sock_lock);
1390 task->tk_rqstp = NULL;
1391 memset(req, 0, sizeof(*req)); /* mark unused */
1392
1393 dprintk("RPC: %4d release request %p\n", task->tk_pid, req);
1394
1395 spin_lock(&xprt->xprt_lock);
1396 list_add(&req->rq_list, &xprt->free);
1397 xprt_clear_backlog(xprt);
1398 spin_unlock(&xprt->xprt_lock);
1399}
1400
1401/*
1402 * Set default timeout parameters
1403 */
1404static void
1405xprt_default_timeout(struct rpc_timeout *to, int proto)
1406{
1407 if (proto == IPPROTO_UDP)
1408 xprt_set_timeout(to, 5, 5 * HZ);
1409 else
1410 xprt_set_timeout(to, 5, 60 * HZ);
1411}
1412
1413/*
1414 * Set constant timeout
1415 */
1416void
1417xprt_set_timeout(struct rpc_timeout *to, unsigned int retr, unsigned long incr)
1418{
1419 to->to_initval =
1420 to->to_increment = incr;
1421 to->to_maxval = incr * retr;
1422 to->to_retries = retr;
1423 to->to_exponential = 0;
1424}
1425
1426unsigned int xprt_udp_slot_table_entries = RPC_DEF_SLOT_TABLE;
1427unsigned int xprt_tcp_slot_table_entries = RPC_DEF_SLOT_TABLE;
1428
1429/*
1430 * Initialize an RPC client
1431 */
1432static struct rpc_xprt *
1433xprt_setup(int proto, struct sockaddr_in *ap, struct rpc_timeout *to)
1434{
1435 struct rpc_xprt *xprt;
1436 unsigned int entries;
1437 size_t slot_table_size;
1438 struct rpc_rqst *req;
1439
1440 dprintk("RPC: setting up %s transport...\n",
1441 proto == IPPROTO_UDP? "UDP" : "TCP");
1442
1443 entries = (proto == IPPROTO_TCP)?
1444 xprt_tcp_slot_table_entries : xprt_udp_slot_table_entries;
1445
1446 if ((xprt = kmalloc(sizeof(struct rpc_xprt), GFP_KERNEL)) == NULL)
1447 return ERR_PTR(-ENOMEM);
1448 memset(xprt, 0, sizeof(*xprt)); /* Nnnngh! */
1449 xprt->max_reqs = entries;
1450 slot_table_size = entries * sizeof(xprt->slot[0]);
1451 xprt->slot = kmalloc(slot_table_size, GFP_KERNEL);
1452 if (xprt->slot == NULL) {
1453 kfree(xprt);
1454 return ERR_PTR(-ENOMEM);
1455 }
1456 memset(xprt->slot, 0, slot_table_size);
1457
1458 xprt->addr = *ap;
1459 xprt->prot = proto;
1460 xprt->stream = (proto == IPPROTO_TCP)? 1 : 0;
1461 if (xprt->stream) {
1462 xprt->cwnd = RPC_MAXCWND(xprt);
1463 xprt->nocong = 1;
1464 xprt->max_payload = (1U << 31) - 1;
1465 } else {
1466 xprt->cwnd = RPC_INITCWND;
1467 xprt->max_payload = (1U << 16) - (MAX_HEADER << 3);
1468 }
1469 spin_lock_init(&xprt->sock_lock);
1470 spin_lock_init(&xprt->xprt_lock);
1471 init_waitqueue_head(&xprt->cong_wait);
1472
1473 INIT_LIST_HEAD(&xprt->free);
1474 INIT_LIST_HEAD(&xprt->recv);
1475 INIT_WORK(&xprt->sock_connect, xprt_socket_connect, xprt);
1476 INIT_WORK(&xprt->task_cleanup, xprt_socket_autoclose, xprt);
1477 init_timer(&xprt->timer);
1478 xprt->timer.function = xprt_init_autodisconnect;
1479 xprt->timer.data = (unsigned long) xprt;
1480 xprt->last_used = jiffies;
1481 xprt->port = XPRT_MAX_RESVPORT;
1482
1483 /* Set timeout parameters */
1484 if (to) {
1485 xprt->timeout = *to;
1486 } else
1487 xprt_default_timeout(&xprt->timeout, xprt->prot);
1488
1489 rpc_init_wait_queue(&xprt->pending, "xprt_pending");
1490 rpc_init_wait_queue(&xprt->sending, "xprt_sending");
1491 rpc_init_wait_queue(&xprt->resend, "xprt_resend");
1492 rpc_init_priority_wait_queue(&xprt->backlog, "xprt_backlog");
1493
1494 /* initialize free list */
1495 for (req = &xprt->slot[entries-1]; req >= &xprt->slot[0]; req--)
1496 list_add(&req->rq_list, &xprt->free);
1497
1498 xprt_init_xid(xprt);
1499
1500 /* Check whether we want to use a reserved port */
1501 xprt->resvport = capable(CAP_NET_BIND_SERVICE) ? 1 : 0;
1502
1503 dprintk("RPC: created transport %p with %u slots\n", xprt,
1504 xprt->max_reqs);
1505
1506 return xprt;
1507}
1508
1509/*
1510 * Bind to a reserved port
1511 */
1512static inline int xprt_bindresvport(struct rpc_xprt *xprt, struct socket *sock)
1513{
1514 struct sockaddr_in myaddr = {
1515 .sin_family = AF_INET,
1516 };
1517 int err, port;
1518
1519 /* Were we already bound to a given port? Try to reuse it */
1520 port = xprt->port;
1521 do {
1522 myaddr.sin_port = htons(port);
1523 err = sock->ops->bind(sock, (struct sockaddr *) &myaddr,
1524 sizeof(myaddr));
1525 if (err == 0) {
1526 xprt->port = port;
1527 return 0;
1528 }
1529 if (--port == 0)
1530 port = XPRT_MAX_RESVPORT;
1531 } while (err == -EADDRINUSE && port != xprt->port);
1532
1533 printk("RPC: Can't bind to reserved port (%d).\n", -err);
1534 return err;
1535}
1536
1537static void
1538xprt_bind_socket(struct rpc_xprt *xprt, struct socket *sock)
1539{
1540 struct sock *sk = sock->sk;
1541
1542 if (xprt->inet)
1543 return;
1544
1545 write_lock_bh(&sk->sk_callback_lock);
1546 sk->sk_user_data = xprt;
1547 xprt->old_data_ready = sk->sk_data_ready;
1548 xprt->old_state_change = sk->sk_state_change;
1549 xprt->old_write_space = sk->sk_write_space;
1550 if (xprt->prot == IPPROTO_UDP) {
1551 sk->sk_data_ready = udp_data_ready;
1552 sk->sk_no_check = UDP_CSUM_NORCV;
1553 xprt_set_connected(xprt);
1554 } else {
1555 tcp_sk(sk)->nonagle = 1; /* disable Nagle's algorithm */
1556 sk->sk_data_ready = tcp_data_ready;
1557 sk->sk_state_change = tcp_state_change;
1558 xprt_clear_connected(xprt);
1559 }
1560 sk->sk_write_space = xprt_write_space;
1561
1562 /* Reset to new socket */
1563 xprt->sock = sock;
1564 xprt->inet = sk;
1565 write_unlock_bh(&sk->sk_callback_lock);
1566
1567 return;
1568}
1569
1570/*
1571 * Set socket buffer length
1572 */
1573void
1574xprt_sock_setbufsize(struct rpc_xprt *xprt)
1575{
1576 struct sock *sk = xprt->inet;
1577
1578 if (xprt->stream)
1579 return;
1580 if (xprt->rcvsize) {
1581 sk->sk_userlocks |= SOCK_RCVBUF_LOCK;
1582 sk->sk_rcvbuf = xprt->rcvsize * xprt->max_reqs * 2;
1583 }
1584 if (xprt->sndsize) {
1585 sk->sk_userlocks |= SOCK_SNDBUF_LOCK;
1586 sk->sk_sndbuf = xprt->sndsize * xprt->max_reqs * 2;
1587 sk->sk_write_space(sk);
1588 }
1589}
1590
1591/*
1592 * Datastream sockets are created here, but xprt_connect will create
1593 * and connect stream sockets.
1594 */
1595static struct socket * xprt_create_socket(struct rpc_xprt *xprt, int proto, int resvport)
1596{
1597 struct socket *sock;
1598 int type, err;
1599
1600 dprintk("RPC: xprt_create_socket(%s %d)\n",
1601 (proto == IPPROTO_UDP)? "udp" : "tcp", proto);
1602
1603 type = (proto == IPPROTO_UDP)? SOCK_DGRAM : SOCK_STREAM;
1604
1605 if ((err = sock_create_kern(PF_INET, type, proto, &sock)) < 0) {
1606 printk("RPC: can't create socket (%d).\n", -err);
1607 return NULL;
1608 }
1609
1610 /* If the caller has the capability, bind to a reserved port */
1611 if (resvport && xprt_bindresvport(xprt, sock) < 0) {
1612 printk("RPC: can't bind to reserved port.\n");
1613 goto failed;
1614 }
1615
1616 return sock;
1617
1618failed:
1619 sock_release(sock);
1620 return NULL;
1621}
1622
1623/*
1624 * Create an RPC client transport given the protocol and peer address.
1625 */
1626struct rpc_xprt *
1627xprt_create_proto(int proto, struct sockaddr_in *sap, struct rpc_timeout *to)
1628{
1629 struct rpc_xprt *xprt;
1630
1631 xprt = xprt_setup(proto, sap, to);
1632 if (IS_ERR(xprt))
1633 dprintk("RPC: xprt_create_proto failed\n");
1634 else
1635 dprintk("RPC: xprt_create_proto created xprt %p\n", xprt);
1636 return xprt;
1637}
1638
1639/*
1640 * Prepare for transport shutdown.
1641 */
1642static void
1643xprt_shutdown(struct rpc_xprt *xprt)
1644{
1645 xprt->shutdown = 1;
1646 rpc_wake_up(&xprt->sending);
1647 rpc_wake_up(&xprt->resend);
1648 rpc_wake_up(&xprt->pending);
1649 rpc_wake_up(&xprt->backlog);
1650 wake_up(&xprt->cong_wait);
1651 del_timer_sync(&xprt->timer);
1652}
1653
1654/*
1655 * Clear the xprt backlog queue
1656 */
1657static int
1658xprt_clear_backlog(struct rpc_xprt *xprt) {
1659 rpc_wake_up_next(&xprt->backlog);
1660 wake_up(&xprt->cong_wait);
1661 return 1;
1662}
1663
1664/*
1665 * Destroy an RPC transport, killing off all requests.
1666 */
1667int
1668xprt_destroy(struct rpc_xprt *xprt)
1669{
1670 dprintk("RPC: destroying transport %p\n", xprt);
1671 xprt_shutdown(xprt);
1672 xprt_disconnect(xprt);
1673 xprt_close(xprt);
1674 kfree(xprt->slot);
1675 kfree(xprt);
1676
1677 return 0;
1678}