diff options
Diffstat (limited to 'net/sunrpc')
33 files changed, 15934 insertions, 0 deletions
diff --git a/net/sunrpc/Makefile b/net/sunrpc/Makefile new file mode 100644 index 000000000000..46a2ce00a29b --- /dev/null +++ b/net/sunrpc/Makefile | |||
@@ -0,0 +1,15 @@ | |||
1 | # | ||
2 | # Makefile for Linux kernel SUN RPC | ||
3 | # | ||
4 | |||
5 | |||
6 | obj-$(CONFIG_SUNRPC) += sunrpc.o | ||
7 | obj-$(CONFIG_SUNRPC_GSS) += auth_gss/ | ||
8 | |||
9 | sunrpc-y := clnt.o xprt.o sched.o \ | ||
10 | auth.o auth_null.o auth_unix.o \ | ||
11 | svc.o svcsock.o svcauth.o svcauth_unix.o \ | ||
12 | pmap_clnt.o timer.o xdr.o \ | ||
13 | sunrpc_syms.o cache.o rpc_pipe.o | ||
14 | sunrpc-$(CONFIG_PROC_FS) += stats.o | ||
15 | sunrpc-$(CONFIG_SYSCTL) += sysctl.o | ||
diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c new file mode 100644 index 000000000000..9bcec9b927b9 --- /dev/null +++ b/net/sunrpc/auth.c | |||
@@ -0,0 +1,395 @@ | |||
1 | /* | ||
2 | * linux/net/sunrpc/auth.c | ||
3 | * | ||
4 | * Generic RPC client authentication API. | ||
5 | * | ||
6 | * Copyright (C) 1996, Olaf Kirch <okir@monad.swb.de> | ||
7 | */ | ||
8 | |||
9 | #include <linux/types.h> | ||
10 | #include <linux/sched.h> | ||
11 | #include <linux/module.h> | ||
12 | #include <linux/slab.h> | ||
13 | #include <linux/errno.h> | ||
14 | #include <linux/socket.h> | ||
15 | #include <linux/sunrpc/clnt.h> | ||
16 | #include <linux/spinlock.h> | ||
17 | |||
18 | #ifdef RPC_DEBUG | ||
19 | # define RPCDBG_FACILITY RPCDBG_AUTH | ||
20 | #endif | ||
21 | |||
22 | static struct rpc_authops * auth_flavors[RPC_AUTH_MAXFLAVOR] = { | ||
23 | &authnull_ops, /* AUTH_NULL */ | ||
24 | &authunix_ops, /* AUTH_UNIX */ | ||
25 | NULL, /* others can be loadable modules */ | ||
26 | }; | ||
27 | |||
28 | static u32 | ||
29 | pseudoflavor_to_flavor(u32 flavor) { | ||
30 | if (flavor >= RPC_AUTH_MAXFLAVOR) | ||
31 | return RPC_AUTH_GSS; | ||
32 | return flavor; | ||
33 | } | ||
34 | |||
35 | int | ||
36 | rpcauth_register(struct rpc_authops *ops) | ||
37 | { | ||
38 | rpc_authflavor_t flavor; | ||
39 | |||
40 | if ((flavor = ops->au_flavor) >= RPC_AUTH_MAXFLAVOR) | ||
41 | return -EINVAL; | ||
42 | if (auth_flavors[flavor] != NULL) | ||
43 | return -EPERM; /* what else? */ | ||
44 | auth_flavors[flavor] = ops; | ||
45 | return 0; | ||
46 | } | ||
47 | |||
48 | int | ||
49 | rpcauth_unregister(struct rpc_authops *ops) | ||
50 | { | ||
51 | rpc_authflavor_t flavor; | ||
52 | |||
53 | if ((flavor = ops->au_flavor) >= RPC_AUTH_MAXFLAVOR) | ||
54 | return -EINVAL; | ||
55 | if (auth_flavors[flavor] != ops) | ||
56 | return -EPERM; /* what else? */ | ||
57 | auth_flavors[flavor] = NULL; | ||
58 | return 0; | ||
59 | } | ||
60 | |||
61 | struct rpc_auth * | ||
62 | rpcauth_create(rpc_authflavor_t pseudoflavor, struct rpc_clnt *clnt) | ||
63 | { | ||
64 | struct rpc_auth *auth; | ||
65 | struct rpc_authops *ops; | ||
66 | u32 flavor = pseudoflavor_to_flavor(pseudoflavor); | ||
67 | |||
68 | if (flavor >= RPC_AUTH_MAXFLAVOR || !(ops = auth_flavors[flavor])) | ||
69 | return NULL; | ||
70 | auth = ops->create(clnt, pseudoflavor); | ||
71 | if (!auth) | ||
72 | return NULL; | ||
73 | if (clnt->cl_auth) | ||
74 | rpcauth_destroy(clnt->cl_auth); | ||
75 | clnt->cl_auth = auth; | ||
76 | return auth; | ||
77 | } | ||
78 | |||
79 | void | ||
80 | rpcauth_destroy(struct rpc_auth *auth) | ||
81 | { | ||
82 | if (!atomic_dec_and_test(&auth->au_count)) | ||
83 | return; | ||
84 | auth->au_ops->destroy(auth); | ||
85 | } | ||
86 | |||
87 | static DEFINE_SPINLOCK(rpc_credcache_lock); | ||
88 | |||
89 | /* | ||
90 | * Initialize RPC credential cache | ||
91 | */ | ||
92 | int | ||
93 | rpcauth_init_credcache(struct rpc_auth *auth, unsigned long expire) | ||
94 | { | ||
95 | struct rpc_cred_cache *new; | ||
96 | int i; | ||
97 | |||
98 | new = (struct rpc_cred_cache *)kmalloc(sizeof(*new), GFP_KERNEL); | ||
99 | if (!new) | ||
100 | return -ENOMEM; | ||
101 | for (i = 0; i < RPC_CREDCACHE_NR; i++) | ||
102 | INIT_HLIST_HEAD(&new->hashtable[i]); | ||
103 | new->expire = expire; | ||
104 | new->nextgc = jiffies + (expire >> 1); | ||
105 | auth->au_credcache = new; | ||
106 | return 0; | ||
107 | } | ||
108 | |||
109 | /* | ||
110 | * Destroy a list of credentials | ||
111 | */ | ||
112 | static inline | ||
113 | void rpcauth_destroy_credlist(struct hlist_head *head) | ||
114 | { | ||
115 | struct rpc_cred *cred; | ||
116 | |||
117 | while (!hlist_empty(head)) { | ||
118 | cred = hlist_entry(head->first, struct rpc_cred, cr_hash); | ||
119 | hlist_del_init(&cred->cr_hash); | ||
120 | put_rpccred(cred); | ||
121 | } | ||
122 | } | ||
123 | |||
124 | /* | ||
125 | * Clear the RPC credential cache, and delete those credentials | ||
126 | * that are not referenced. | ||
127 | */ | ||
128 | void | ||
129 | rpcauth_free_credcache(struct rpc_auth *auth) | ||
130 | { | ||
131 | struct rpc_cred_cache *cache = auth->au_credcache; | ||
132 | HLIST_HEAD(free); | ||
133 | struct hlist_node *pos, *next; | ||
134 | struct rpc_cred *cred; | ||
135 | int i; | ||
136 | |||
137 | spin_lock(&rpc_credcache_lock); | ||
138 | for (i = 0; i < RPC_CREDCACHE_NR; i++) { | ||
139 | hlist_for_each_safe(pos, next, &cache->hashtable[i]) { | ||
140 | cred = hlist_entry(pos, struct rpc_cred, cr_hash); | ||
141 | __hlist_del(&cred->cr_hash); | ||
142 | hlist_add_head(&cred->cr_hash, &free); | ||
143 | } | ||
144 | } | ||
145 | spin_unlock(&rpc_credcache_lock); | ||
146 | rpcauth_destroy_credlist(&free); | ||
147 | } | ||
148 | |||
149 | static void | ||
150 | rpcauth_prune_expired(struct rpc_auth *auth, struct rpc_cred *cred, struct hlist_head *free) | ||
151 | { | ||
152 | if (atomic_read(&cred->cr_count) != 1) | ||
153 | return; | ||
154 | if (time_after(jiffies, cred->cr_expire + auth->au_credcache->expire)) | ||
155 | cred->cr_flags &= ~RPCAUTH_CRED_UPTODATE; | ||
156 | if (!(cred->cr_flags & RPCAUTH_CRED_UPTODATE)) { | ||
157 | __hlist_del(&cred->cr_hash); | ||
158 | hlist_add_head(&cred->cr_hash, free); | ||
159 | } | ||
160 | } | ||
161 | |||
162 | /* | ||
163 | * Remove stale credentials. Avoid sleeping inside the loop. | ||
164 | */ | ||
165 | static void | ||
166 | rpcauth_gc_credcache(struct rpc_auth *auth, struct hlist_head *free) | ||
167 | { | ||
168 | struct rpc_cred_cache *cache = auth->au_credcache; | ||
169 | struct hlist_node *pos, *next; | ||
170 | struct rpc_cred *cred; | ||
171 | int i; | ||
172 | |||
173 | dprintk("RPC: gc'ing RPC credentials for auth %p\n", auth); | ||
174 | for (i = 0; i < RPC_CREDCACHE_NR; i++) { | ||
175 | hlist_for_each_safe(pos, next, &cache->hashtable[i]) { | ||
176 | cred = hlist_entry(pos, struct rpc_cred, cr_hash); | ||
177 | rpcauth_prune_expired(auth, cred, free); | ||
178 | } | ||
179 | } | ||
180 | cache->nextgc = jiffies + cache->expire; | ||
181 | } | ||
182 | |||
183 | /* | ||
184 | * Look up a process' credentials in the authentication cache | ||
185 | */ | ||
186 | struct rpc_cred * | ||
187 | rpcauth_lookup_credcache(struct rpc_auth *auth, struct auth_cred * acred, | ||
188 | int taskflags) | ||
189 | { | ||
190 | struct rpc_cred_cache *cache = auth->au_credcache; | ||
191 | HLIST_HEAD(free); | ||
192 | struct hlist_node *pos, *next; | ||
193 | struct rpc_cred *new = NULL, | ||
194 | *cred = NULL; | ||
195 | int nr = 0; | ||
196 | |||
197 | if (!(taskflags & RPC_TASK_ROOTCREDS)) | ||
198 | nr = acred->uid & RPC_CREDCACHE_MASK; | ||
199 | retry: | ||
200 | spin_lock(&rpc_credcache_lock); | ||
201 | if (time_before(cache->nextgc, jiffies)) | ||
202 | rpcauth_gc_credcache(auth, &free); | ||
203 | hlist_for_each_safe(pos, next, &cache->hashtable[nr]) { | ||
204 | struct rpc_cred *entry; | ||
205 | entry = hlist_entry(pos, struct rpc_cred, cr_hash); | ||
206 | if (entry->cr_ops->crmatch(acred, entry, taskflags)) { | ||
207 | hlist_del(&entry->cr_hash); | ||
208 | cred = entry; | ||
209 | break; | ||
210 | } | ||
211 | rpcauth_prune_expired(auth, entry, &free); | ||
212 | } | ||
213 | if (new) { | ||
214 | if (cred) | ||
215 | hlist_add_head(&new->cr_hash, &free); | ||
216 | else | ||
217 | cred = new; | ||
218 | } | ||
219 | if (cred) { | ||
220 | hlist_add_head(&cred->cr_hash, &cache->hashtable[nr]); | ||
221 | get_rpccred(cred); | ||
222 | } | ||
223 | spin_unlock(&rpc_credcache_lock); | ||
224 | |||
225 | rpcauth_destroy_credlist(&free); | ||
226 | |||
227 | if (!cred) { | ||
228 | new = auth->au_ops->crcreate(auth, acred, taskflags); | ||
229 | if (!IS_ERR(new)) { | ||
230 | #ifdef RPC_DEBUG | ||
231 | new->cr_magic = RPCAUTH_CRED_MAGIC; | ||
232 | #endif | ||
233 | goto retry; | ||
234 | } else | ||
235 | cred = new; | ||
236 | } | ||
237 | |||
238 | return (struct rpc_cred *) cred; | ||
239 | } | ||
240 | |||
241 | struct rpc_cred * | ||
242 | rpcauth_lookupcred(struct rpc_auth *auth, int taskflags) | ||
243 | { | ||
244 | struct auth_cred acred = { | ||
245 | .uid = current->fsuid, | ||
246 | .gid = current->fsgid, | ||
247 | .group_info = current->group_info, | ||
248 | }; | ||
249 | struct rpc_cred *ret; | ||
250 | |||
251 | dprintk("RPC: looking up %s cred\n", | ||
252 | auth->au_ops->au_name); | ||
253 | get_group_info(acred.group_info); | ||
254 | ret = auth->au_ops->lookup_cred(auth, &acred, taskflags); | ||
255 | put_group_info(acred.group_info); | ||
256 | return ret; | ||
257 | } | ||
258 | |||
259 | struct rpc_cred * | ||
260 | rpcauth_bindcred(struct rpc_task *task) | ||
261 | { | ||
262 | struct rpc_auth *auth = task->tk_auth; | ||
263 | struct auth_cred acred = { | ||
264 | .uid = current->fsuid, | ||
265 | .gid = current->fsgid, | ||
266 | .group_info = current->group_info, | ||
267 | }; | ||
268 | struct rpc_cred *ret; | ||
269 | |||
270 | dprintk("RPC: %4d looking up %s cred\n", | ||
271 | task->tk_pid, task->tk_auth->au_ops->au_name); | ||
272 | get_group_info(acred.group_info); | ||
273 | ret = auth->au_ops->lookup_cred(auth, &acred, task->tk_flags); | ||
274 | if (!IS_ERR(ret)) | ||
275 | task->tk_msg.rpc_cred = ret; | ||
276 | else | ||
277 | task->tk_status = PTR_ERR(ret); | ||
278 | put_group_info(acred.group_info); | ||
279 | return ret; | ||
280 | } | ||
281 | |||
282 | void | ||
283 | rpcauth_holdcred(struct rpc_task *task) | ||
284 | { | ||
285 | dprintk("RPC: %4d holding %s cred %p\n", | ||
286 | task->tk_pid, task->tk_auth->au_ops->au_name, task->tk_msg.rpc_cred); | ||
287 | if (task->tk_msg.rpc_cred) | ||
288 | get_rpccred(task->tk_msg.rpc_cred); | ||
289 | } | ||
290 | |||
291 | void | ||
292 | put_rpccred(struct rpc_cred *cred) | ||
293 | { | ||
294 | cred->cr_expire = jiffies; | ||
295 | if (!atomic_dec_and_test(&cred->cr_count)) | ||
296 | return; | ||
297 | cred->cr_ops->crdestroy(cred); | ||
298 | } | ||
299 | |||
300 | void | ||
301 | rpcauth_unbindcred(struct rpc_task *task) | ||
302 | { | ||
303 | struct rpc_auth *auth = task->tk_auth; | ||
304 | struct rpc_cred *cred = task->tk_msg.rpc_cred; | ||
305 | |||
306 | dprintk("RPC: %4d releasing %s cred %p\n", | ||
307 | task->tk_pid, auth->au_ops->au_name, cred); | ||
308 | |||
309 | put_rpccred(cred); | ||
310 | task->tk_msg.rpc_cred = NULL; | ||
311 | } | ||
312 | |||
313 | u32 * | ||
314 | rpcauth_marshcred(struct rpc_task *task, u32 *p) | ||
315 | { | ||
316 | struct rpc_auth *auth = task->tk_auth; | ||
317 | struct rpc_cred *cred = task->tk_msg.rpc_cred; | ||
318 | |||
319 | dprintk("RPC: %4d marshaling %s cred %p\n", | ||
320 | task->tk_pid, auth->au_ops->au_name, cred); | ||
321 | return cred->cr_ops->crmarshal(task, p); | ||
322 | } | ||
323 | |||
324 | u32 * | ||
325 | rpcauth_checkverf(struct rpc_task *task, u32 *p) | ||
326 | { | ||
327 | struct rpc_auth *auth = task->tk_auth; | ||
328 | struct rpc_cred *cred = task->tk_msg.rpc_cred; | ||
329 | |||
330 | dprintk("RPC: %4d validating %s cred %p\n", | ||
331 | task->tk_pid, auth->au_ops->au_name, cred); | ||
332 | return cred->cr_ops->crvalidate(task, p); | ||
333 | } | ||
334 | |||
335 | int | ||
336 | rpcauth_wrap_req(struct rpc_task *task, kxdrproc_t encode, void *rqstp, | ||
337 | u32 *data, void *obj) | ||
338 | { | ||
339 | struct rpc_cred *cred = task->tk_msg.rpc_cred; | ||
340 | |||
341 | dprintk("RPC: %4d using %s cred %p to wrap rpc data\n", | ||
342 | task->tk_pid, cred->cr_ops->cr_name, cred); | ||
343 | if (cred->cr_ops->crwrap_req) | ||
344 | return cred->cr_ops->crwrap_req(task, encode, rqstp, data, obj); | ||
345 | /* By default, we encode the arguments normally. */ | ||
346 | return encode(rqstp, data, obj); | ||
347 | } | ||
348 | |||
349 | int | ||
350 | rpcauth_unwrap_resp(struct rpc_task *task, kxdrproc_t decode, void *rqstp, | ||
351 | u32 *data, void *obj) | ||
352 | { | ||
353 | struct rpc_cred *cred = task->tk_msg.rpc_cred; | ||
354 | |||
355 | dprintk("RPC: %4d using %s cred %p to unwrap rpc data\n", | ||
356 | task->tk_pid, cred->cr_ops->cr_name, cred); | ||
357 | if (cred->cr_ops->crunwrap_resp) | ||
358 | return cred->cr_ops->crunwrap_resp(task, decode, rqstp, | ||
359 | data, obj); | ||
360 | /* By default, we decode the arguments normally. */ | ||
361 | return decode(rqstp, data, obj); | ||
362 | } | ||
363 | |||
364 | int | ||
365 | rpcauth_refreshcred(struct rpc_task *task) | ||
366 | { | ||
367 | struct rpc_auth *auth = task->tk_auth; | ||
368 | struct rpc_cred *cred = task->tk_msg.rpc_cred; | ||
369 | int err; | ||
370 | |||
371 | dprintk("RPC: %4d refreshing %s cred %p\n", | ||
372 | task->tk_pid, auth->au_ops->au_name, cred); | ||
373 | err = cred->cr_ops->crrefresh(task); | ||
374 | if (err < 0) | ||
375 | task->tk_status = err; | ||
376 | return err; | ||
377 | } | ||
378 | |||
379 | void | ||
380 | rpcauth_invalcred(struct rpc_task *task) | ||
381 | { | ||
382 | dprintk("RPC: %4d invalidating %s cred %p\n", | ||
383 | task->tk_pid, task->tk_auth->au_ops->au_name, task->tk_msg.rpc_cred); | ||
384 | spin_lock(&rpc_credcache_lock); | ||
385 | if (task->tk_msg.rpc_cred) | ||
386 | task->tk_msg.rpc_cred->cr_flags &= ~RPCAUTH_CRED_UPTODATE; | ||
387 | spin_unlock(&rpc_credcache_lock); | ||
388 | } | ||
389 | |||
390 | int | ||
391 | rpcauth_uptodatecred(struct rpc_task *task) | ||
392 | { | ||
393 | return !(task->tk_msg.rpc_cred) || | ||
394 | (task->tk_msg.rpc_cred->cr_flags & RPCAUTH_CRED_UPTODATE); | ||
395 | } | ||
diff --git a/net/sunrpc/auth_gss/Makefile b/net/sunrpc/auth_gss/Makefile new file mode 100644 index 000000000000..fe1b874084bc --- /dev/null +++ b/net/sunrpc/auth_gss/Makefile | |||
@@ -0,0 +1,18 @@ | |||
1 | # | ||
2 | # Makefile for Linux kernel rpcsec_gss implementation | ||
3 | # | ||
4 | |||
5 | obj-$(CONFIG_SUNRPC_GSS) += auth_rpcgss.o | ||
6 | |||
7 | auth_rpcgss-objs := auth_gss.o gss_generic_token.o \ | ||
8 | gss_mech_switch.o svcauth_gss.o gss_krb5_crypto.o | ||
9 | |||
10 | obj-$(CONFIG_RPCSEC_GSS_KRB5) += rpcsec_gss_krb5.o | ||
11 | |||
12 | rpcsec_gss_krb5-objs := gss_krb5_mech.o gss_krb5_seal.o gss_krb5_unseal.o \ | ||
13 | gss_krb5_seqnum.o | ||
14 | |||
15 | obj-$(CONFIG_RPCSEC_GSS_SPKM3) += rpcsec_gss_spkm3.o | ||
16 | |||
17 | rpcsec_gss_spkm3-objs := gss_spkm3_mech.o gss_spkm3_seal.o gss_spkm3_unseal.o \ | ||
18 | gss_spkm3_token.o | ||
diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c new file mode 100644 index 000000000000..a33b627cbef4 --- /dev/null +++ b/net/sunrpc/auth_gss/auth_gss.c | |||
@@ -0,0 +1,1152 @@ | |||
1 | /* | ||
2 | * linux/net/sunrpc/auth_gss.c | ||
3 | * | ||
4 | * RPCSEC_GSS client authentication. | ||
5 | * | ||
6 | * Copyright (c) 2000 The Regents of the University of Michigan. | ||
7 | * All rights reserved. | ||
8 | * | ||
9 | * Dug Song <dugsong@monkey.org> | ||
10 | * Andy Adamson <andros@umich.edu> | ||
11 | * | ||
12 | * Redistribution and use in source and binary forms, with or without | ||
13 | * modification, are permitted provided that the following conditions | ||
14 | * are met: | ||
15 | * | ||
16 | * 1. Redistributions of source code must retain the above copyright | ||
17 | * notice, this list of conditions and the following disclaimer. | ||
18 | * 2. Redistributions in binary form must reproduce the above copyright | ||
19 | * notice, this list of conditions and the following disclaimer in the | ||
20 | * documentation and/or other materials provided with the distribution. | ||
21 | * 3. Neither the name of the University nor the names of its | ||
22 | * contributors may be used to endorse or promote products derived | ||
23 | * from this software without specific prior written permission. | ||
24 | * | ||
25 | * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED | ||
26 | * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF | ||
27 | * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE | ||
28 | * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE | ||
29 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR | ||
30 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF | ||
31 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR | ||
32 | * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF | ||
33 | * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING | ||
34 | * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS | ||
35 | * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | ||
36 | * | ||
37 | * $Id$ | ||
38 | */ | ||
39 | |||
40 | |||
41 | #include <linux/module.h> | ||
42 | #include <linux/init.h> | ||
43 | #include <linux/types.h> | ||
44 | #include <linux/slab.h> | ||
45 | #include <linux/socket.h> | ||
46 | #include <linux/in.h> | ||
47 | #include <linux/sched.h> | ||
48 | #include <linux/sunrpc/clnt.h> | ||
49 | #include <linux/sunrpc/auth.h> | ||
50 | #include <linux/sunrpc/auth_gss.h> | ||
51 | #include <linux/sunrpc/svcauth_gss.h> | ||
52 | #include <linux/sunrpc/gss_err.h> | ||
53 | #include <linux/workqueue.h> | ||
54 | #include <linux/sunrpc/rpc_pipe_fs.h> | ||
55 | #include <linux/sunrpc/gss_api.h> | ||
56 | #include <asm/uaccess.h> | ||
57 | |||
58 | static struct rpc_authops authgss_ops; | ||
59 | |||
60 | static struct rpc_credops gss_credops; | ||
61 | |||
62 | #ifdef RPC_DEBUG | ||
63 | # define RPCDBG_FACILITY RPCDBG_AUTH | ||
64 | #endif | ||
65 | |||
66 | #define NFS_NGROUPS 16 | ||
67 | |||
68 | #define GSS_CRED_EXPIRE (60 * HZ) /* XXX: reasonable? */ | ||
69 | #define GSS_CRED_SLACK 1024 /* XXX: unused */ | ||
70 | /* length of a krb5 verifier (48), plus data added before arguments when | ||
71 | * using integrity (two 4-byte integers): */ | ||
72 | #define GSS_VERF_SLACK 56 | ||
73 | |||
74 | /* XXX this define must match the gssd define | ||
75 | * as it is passed to gssd to signal the use of | ||
76 | * machine creds should be part of the shared rpc interface */ | ||
77 | |||
78 | #define CA_RUN_AS_MACHINE 0x00000200 | ||
79 | |||
80 | /* dump the buffer in `emacs-hexl' style */ | ||
81 | #define isprint(c) ((c > 0x1f) && (c < 0x7f)) | ||
82 | |||
83 | static DEFINE_RWLOCK(gss_ctx_lock); | ||
84 | |||
85 | struct gss_auth { | ||
86 | struct rpc_auth rpc_auth; | ||
87 | struct gss_api_mech *mech; | ||
88 | enum rpc_gss_svc service; | ||
89 | struct list_head upcalls; | ||
90 | struct rpc_clnt *client; | ||
91 | struct dentry *dentry; | ||
92 | char path[48]; | ||
93 | spinlock_t lock; | ||
94 | }; | ||
95 | |||
96 | static void gss_destroy_ctx(struct gss_cl_ctx *); | ||
97 | static struct rpc_pipe_ops gss_upcall_ops; | ||
98 | |||
99 | void | ||
100 | print_hexl(u32 *p, u_int length, u_int offset) | ||
101 | { | ||
102 | u_int i, j, jm; | ||
103 | u8 c, *cp; | ||
104 | |||
105 | dprintk("RPC: print_hexl: length %d\n",length); | ||
106 | dprintk("\n"); | ||
107 | cp = (u8 *) p; | ||
108 | |||
109 | for (i = 0; i < length; i += 0x10) { | ||
110 | dprintk(" %04x: ", (u_int)(i + offset)); | ||
111 | jm = length - i; | ||
112 | jm = jm > 16 ? 16 : jm; | ||
113 | |||
114 | for (j = 0; j < jm; j++) { | ||
115 | if ((j % 2) == 1) | ||
116 | dprintk("%02x ", (u_int)cp[i+j]); | ||
117 | else | ||
118 | dprintk("%02x", (u_int)cp[i+j]); | ||
119 | } | ||
120 | for (; j < 16; j++) { | ||
121 | if ((j % 2) == 1) | ||
122 | dprintk(" "); | ||
123 | else | ||
124 | dprintk(" "); | ||
125 | } | ||
126 | dprintk(" "); | ||
127 | |||
128 | for (j = 0; j < jm; j++) { | ||
129 | c = cp[i+j]; | ||
130 | c = isprint(c) ? c : '.'; | ||
131 | dprintk("%c", c); | ||
132 | } | ||
133 | dprintk("\n"); | ||
134 | } | ||
135 | } | ||
136 | |||
137 | EXPORT_SYMBOL(print_hexl); | ||
138 | |||
139 | static inline struct gss_cl_ctx * | ||
140 | gss_get_ctx(struct gss_cl_ctx *ctx) | ||
141 | { | ||
142 | atomic_inc(&ctx->count); | ||
143 | return ctx; | ||
144 | } | ||
145 | |||
146 | static inline void | ||
147 | gss_put_ctx(struct gss_cl_ctx *ctx) | ||
148 | { | ||
149 | if (atomic_dec_and_test(&ctx->count)) | ||
150 | gss_destroy_ctx(ctx); | ||
151 | } | ||
152 | |||
153 | static void | ||
154 | gss_cred_set_ctx(struct rpc_cred *cred, struct gss_cl_ctx *ctx) | ||
155 | { | ||
156 | struct gss_cred *gss_cred = container_of(cred, struct gss_cred, gc_base); | ||
157 | struct gss_cl_ctx *old; | ||
158 | write_lock(&gss_ctx_lock); | ||
159 | old = gss_cred->gc_ctx; | ||
160 | gss_cred->gc_ctx = ctx; | ||
161 | cred->cr_flags |= RPCAUTH_CRED_UPTODATE; | ||
162 | write_unlock(&gss_ctx_lock); | ||
163 | if (old) | ||
164 | gss_put_ctx(old); | ||
165 | } | ||
166 | |||
167 | static int | ||
168 | gss_cred_is_uptodate_ctx(struct rpc_cred *cred) | ||
169 | { | ||
170 | struct gss_cred *gss_cred = container_of(cred, struct gss_cred, gc_base); | ||
171 | int res = 0; | ||
172 | |||
173 | read_lock(&gss_ctx_lock); | ||
174 | if ((cred->cr_flags & RPCAUTH_CRED_UPTODATE) && gss_cred->gc_ctx) | ||
175 | res = 1; | ||
176 | read_unlock(&gss_ctx_lock); | ||
177 | return res; | ||
178 | } | ||
179 | |||
180 | static const void * | ||
181 | simple_get_bytes(const void *p, const void *end, void *res, size_t len) | ||
182 | { | ||
183 | const void *q = (const void *)((const char *)p + len); | ||
184 | if (unlikely(q > end || q < p)) | ||
185 | return ERR_PTR(-EFAULT); | ||
186 | memcpy(res, p, len); | ||
187 | return q; | ||
188 | } | ||
189 | |||
190 | static inline const void * | ||
191 | simple_get_netobj(const void *p, const void *end, struct xdr_netobj *dest) | ||
192 | { | ||
193 | const void *q; | ||
194 | unsigned int len; | ||
195 | |||
196 | p = simple_get_bytes(p, end, &len, sizeof(len)); | ||
197 | if (IS_ERR(p)) | ||
198 | return p; | ||
199 | q = (const void *)((const char *)p + len); | ||
200 | if (unlikely(q > end || q < p)) | ||
201 | return ERR_PTR(-EFAULT); | ||
202 | dest->data = kmalloc(len, GFP_KERNEL); | ||
203 | if (unlikely(dest->data == NULL)) | ||
204 | return ERR_PTR(-ENOMEM); | ||
205 | dest->len = len; | ||
206 | memcpy(dest->data, p, len); | ||
207 | return q; | ||
208 | } | ||
209 | |||
210 | static struct gss_cl_ctx * | ||
211 | gss_cred_get_ctx(struct rpc_cred *cred) | ||
212 | { | ||
213 | struct gss_cred *gss_cred = container_of(cred, struct gss_cred, gc_base); | ||
214 | struct gss_cl_ctx *ctx = NULL; | ||
215 | |||
216 | read_lock(&gss_ctx_lock); | ||
217 | if (gss_cred->gc_ctx) | ||
218 | ctx = gss_get_ctx(gss_cred->gc_ctx); | ||
219 | read_unlock(&gss_ctx_lock); | ||
220 | return ctx; | ||
221 | } | ||
222 | |||
223 | static struct gss_cl_ctx * | ||
224 | gss_alloc_context(void) | ||
225 | { | ||
226 | struct gss_cl_ctx *ctx; | ||
227 | |||
228 | ctx = kmalloc(sizeof(*ctx), GFP_KERNEL); | ||
229 | if (ctx != NULL) { | ||
230 | memset(ctx, 0, sizeof(*ctx)); | ||
231 | ctx->gc_proc = RPC_GSS_PROC_DATA; | ||
232 | ctx->gc_seq = 1; /* NetApp 6.4R1 doesn't accept seq. no. 0 */ | ||
233 | spin_lock_init(&ctx->gc_seq_lock); | ||
234 | atomic_set(&ctx->count,1); | ||
235 | } | ||
236 | return ctx; | ||
237 | } | ||
238 | |||
239 | #define GSSD_MIN_TIMEOUT (60 * 60) | ||
240 | static const void * | ||
241 | gss_fill_context(const void *p, const void *end, struct gss_cl_ctx *ctx, struct gss_api_mech *gm) | ||
242 | { | ||
243 | const void *q; | ||
244 | unsigned int seclen; | ||
245 | unsigned int timeout; | ||
246 | u32 window_size; | ||
247 | int ret; | ||
248 | |||
249 | /* First unsigned int gives the lifetime (in seconds) of the cred */ | ||
250 | p = simple_get_bytes(p, end, &timeout, sizeof(timeout)); | ||
251 | if (IS_ERR(p)) | ||
252 | goto err; | ||
253 | if (timeout == 0) | ||
254 | timeout = GSSD_MIN_TIMEOUT; | ||
255 | ctx->gc_expiry = jiffies + (unsigned long)timeout * HZ * 3 / 4; | ||
256 | /* Sequence number window. Determines the maximum number of simultaneous requests */ | ||
257 | p = simple_get_bytes(p, end, &window_size, sizeof(window_size)); | ||
258 | if (IS_ERR(p)) | ||
259 | goto err; | ||
260 | ctx->gc_win = window_size; | ||
261 | /* gssd signals an error by passing ctx->gc_win = 0: */ | ||
262 | if (ctx->gc_win == 0) { | ||
263 | /* in which case, p points to an error code which we ignore */ | ||
264 | p = ERR_PTR(-EACCES); | ||
265 | goto err; | ||
266 | } | ||
267 | /* copy the opaque wire context */ | ||
268 | p = simple_get_netobj(p, end, &ctx->gc_wire_ctx); | ||
269 | if (IS_ERR(p)) | ||
270 | goto err; | ||
271 | /* import the opaque security context */ | ||
272 | p = simple_get_bytes(p, end, &seclen, sizeof(seclen)); | ||
273 | if (IS_ERR(p)) | ||
274 | goto err; | ||
275 | q = (const void *)((const char *)p + seclen); | ||
276 | if (unlikely(q > end || q < p)) { | ||
277 | p = ERR_PTR(-EFAULT); | ||
278 | goto err; | ||
279 | } | ||
280 | ret = gss_import_sec_context(p, seclen, gm, &ctx->gc_gss_ctx); | ||
281 | if (ret < 0) { | ||
282 | p = ERR_PTR(ret); | ||
283 | goto err; | ||
284 | } | ||
285 | return q; | ||
286 | err: | ||
287 | dprintk("RPC: gss_fill_context returning %ld\n", -PTR_ERR(p)); | ||
288 | return p; | ||
289 | } | ||
290 | |||
291 | |||
292 | struct gss_upcall_msg { | ||
293 | atomic_t count; | ||
294 | uid_t uid; | ||
295 | struct rpc_pipe_msg msg; | ||
296 | struct list_head list; | ||
297 | struct gss_auth *auth; | ||
298 | struct rpc_wait_queue rpc_waitqueue; | ||
299 | wait_queue_head_t waitqueue; | ||
300 | struct gss_cl_ctx *ctx; | ||
301 | }; | ||
302 | |||
303 | static void | ||
304 | gss_release_msg(struct gss_upcall_msg *gss_msg) | ||
305 | { | ||
306 | if (!atomic_dec_and_test(&gss_msg->count)) | ||
307 | return; | ||
308 | BUG_ON(!list_empty(&gss_msg->list)); | ||
309 | if (gss_msg->ctx != NULL) | ||
310 | gss_put_ctx(gss_msg->ctx); | ||
311 | kfree(gss_msg); | ||
312 | } | ||
313 | |||
314 | static struct gss_upcall_msg * | ||
315 | __gss_find_upcall(struct gss_auth *gss_auth, uid_t uid) | ||
316 | { | ||
317 | struct gss_upcall_msg *pos; | ||
318 | list_for_each_entry(pos, &gss_auth->upcalls, list) { | ||
319 | if (pos->uid != uid) | ||
320 | continue; | ||
321 | atomic_inc(&pos->count); | ||
322 | dprintk("RPC: gss_find_upcall found msg %p\n", pos); | ||
323 | return pos; | ||
324 | } | ||
325 | dprintk("RPC: gss_find_upcall found nothing\n"); | ||
326 | return NULL; | ||
327 | } | ||
328 | |||
329 | /* Try to add a upcall to the pipefs queue. | ||
330 | * If an upcall owned by our uid already exists, then we return a reference | ||
331 | * to that upcall instead of adding the new upcall. | ||
332 | */ | ||
333 | static inline struct gss_upcall_msg * | ||
334 | gss_add_msg(struct gss_auth *gss_auth, struct gss_upcall_msg *gss_msg) | ||
335 | { | ||
336 | struct gss_upcall_msg *old; | ||
337 | |||
338 | spin_lock(&gss_auth->lock); | ||
339 | old = __gss_find_upcall(gss_auth, gss_msg->uid); | ||
340 | if (old == NULL) { | ||
341 | atomic_inc(&gss_msg->count); | ||
342 | list_add(&gss_msg->list, &gss_auth->upcalls); | ||
343 | } else | ||
344 | gss_msg = old; | ||
345 | spin_unlock(&gss_auth->lock); | ||
346 | return gss_msg; | ||
347 | } | ||
348 | |||
349 | static void | ||
350 | __gss_unhash_msg(struct gss_upcall_msg *gss_msg) | ||
351 | { | ||
352 | if (list_empty(&gss_msg->list)) | ||
353 | return; | ||
354 | list_del_init(&gss_msg->list); | ||
355 | rpc_wake_up_status(&gss_msg->rpc_waitqueue, gss_msg->msg.errno); | ||
356 | wake_up_all(&gss_msg->waitqueue); | ||
357 | atomic_dec(&gss_msg->count); | ||
358 | } | ||
359 | |||
360 | static void | ||
361 | gss_unhash_msg(struct gss_upcall_msg *gss_msg) | ||
362 | { | ||
363 | struct gss_auth *gss_auth = gss_msg->auth; | ||
364 | |||
365 | spin_lock(&gss_auth->lock); | ||
366 | __gss_unhash_msg(gss_msg); | ||
367 | spin_unlock(&gss_auth->lock); | ||
368 | } | ||
369 | |||
370 | static void | ||
371 | gss_upcall_callback(struct rpc_task *task) | ||
372 | { | ||
373 | struct gss_cred *gss_cred = container_of(task->tk_msg.rpc_cred, | ||
374 | struct gss_cred, gc_base); | ||
375 | struct gss_upcall_msg *gss_msg = gss_cred->gc_upcall; | ||
376 | |||
377 | BUG_ON(gss_msg == NULL); | ||
378 | if (gss_msg->ctx) | ||
379 | gss_cred_set_ctx(task->tk_msg.rpc_cred, gss_get_ctx(gss_msg->ctx)); | ||
380 | else | ||
381 | task->tk_status = gss_msg->msg.errno; | ||
382 | spin_lock(&gss_msg->auth->lock); | ||
383 | gss_cred->gc_upcall = NULL; | ||
384 | rpc_wake_up_status(&gss_msg->rpc_waitqueue, gss_msg->msg.errno); | ||
385 | spin_unlock(&gss_msg->auth->lock); | ||
386 | gss_release_msg(gss_msg); | ||
387 | } | ||
388 | |||
389 | static inline struct gss_upcall_msg * | ||
390 | gss_alloc_msg(struct gss_auth *gss_auth, uid_t uid) | ||
391 | { | ||
392 | struct gss_upcall_msg *gss_msg; | ||
393 | |||
394 | gss_msg = kmalloc(sizeof(*gss_msg), GFP_KERNEL); | ||
395 | if (gss_msg != NULL) { | ||
396 | memset(gss_msg, 0, sizeof(*gss_msg)); | ||
397 | INIT_LIST_HEAD(&gss_msg->list); | ||
398 | rpc_init_wait_queue(&gss_msg->rpc_waitqueue, "RPCSEC_GSS upcall waitq"); | ||
399 | init_waitqueue_head(&gss_msg->waitqueue); | ||
400 | atomic_set(&gss_msg->count, 1); | ||
401 | gss_msg->msg.data = &gss_msg->uid; | ||
402 | gss_msg->msg.len = sizeof(gss_msg->uid); | ||
403 | gss_msg->uid = uid; | ||
404 | gss_msg->auth = gss_auth; | ||
405 | } | ||
406 | return gss_msg; | ||
407 | } | ||
408 | |||
409 | static struct gss_upcall_msg * | ||
410 | gss_setup_upcall(struct rpc_clnt *clnt, struct gss_auth *gss_auth, struct rpc_cred *cred) | ||
411 | { | ||
412 | struct gss_upcall_msg *gss_new, *gss_msg; | ||
413 | |||
414 | gss_new = gss_alloc_msg(gss_auth, cred->cr_uid); | ||
415 | if (gss_new == NULL) | ||
416 | return ERR_PTR(-ENOMEM); | ||
417 | gss_msg = gss_add_msg(gss_auth, gss_new); | ||
418 | if (gss_msg == gss_new) { | ||
419 | int res = rpc_queue_upcall(gss_auth->dentry->d_inode, &gss_new->msg); | ||
420 | if (res) { | ||
421 | gss_unhash_msg(gss_new); | ||
422 | gss_msg = ERR_PTR(res); | ||
423 | } | ||
424 | } else | ||
425 | gss_release_msg(gss_new); | ||
426 | return gss_msg; | ||
427 | } | ||
428 | |||
429 | static inline int | ||
430 | gss_refresh_upcall(struct rpc_task *task) | ||
431 | { | ||
432 | struct rpc_cred *cred = task->tk_msg.rpc_cred; | ||
433 | struct gss_auth *gss_auth = container_of(task->tk_client->cl_auth, | ||
434 | struct gss_auth, rpc_auth); | ||
435 | struct gss_cred *gss_cred = container_of(cred, | ||
436 | struct gss_cred, gc_base); | ||
437 | struct gss_upcall_msg *gss_msg; | ||
438 | int err = 0; | ||
439 | |||
440 | dprintk("RPC: %4u gss_refresh_upcall for uid %u\n", task->tk_pid, cred->cr_uid); | ||
441 | gss_msg = gss_setup_upcall(task->tk_client, gss_auth, cred); | ||
442 | if (IS_ERR(gss_msg)) { | ||
443 | err = PTR_ERR(gss_msg); | ||
444 | goto out; | ||
445 | } | ||
446 | spin_lock(&gss_auth->lock); | ||
447 | if (gss_cred->gc_upcall != NULL) | ||
448 | rpc_sleep_on(&gss_cred->gc_upcall->rpc_waitqueue, task, NULL, NULL); | ||
449 | else if (gss_msg->ctx == NULL && gss_msg->msg.errno >= 0) { | ||
450 | task->tk_timeout = 0; | ||
451 | gss_cred->gc_upcall = gss_msg; | ||
452 | /* gss_upcall_callback will release the reference to gss_upcall_msg */ | ||
453 | atomic_inc(&gss_msg->count); | ||
454 | rpc_sleep_on(&gss_msg->rpc_waitqueue, task, gss_upcall_callback, NULL); | ||
455 | } else | ||
456 | err = gss_msg->msg.errno; | ||
457 | spin_unlock(&gss_auth->lock); | ||
458 | gss_release_msg(gss_msg); | ||
459 | out: | ||
460 | dprintk("RPC: %4u gss_refresh_upcall for uid %u result %d\n", task->tk_pid, | ||
461 | cred->cr_uid, err); | ||
462 | return err; | ||
463 | } | ||
464 | |||
465 | static inline int | ||
466 | gss_create_upcall(struct gss_auth *gss_auth, struct gss_cred *gss_cred) | ||
467 | { | ||
468 | struct rpc_cred *cred = &gss_cred->gc_base; | ||
469 | struct gss_upcall_msg *gss_msg; | ||
470 | DEFINE_WAIT(wait); | ||
471 | int err = 0; | ||
472 | |||
473 | dprintk("RPC: gss_upcall for uid %u\n", cred->cr_uid); | ||
474 | gss_msg = gss_setup_upcall(gss_auth->client, gss_auth, cred); | ||
475 | if (IS_ERR(gss_msg)) { | ||
476 | err = PTR_ERR(gss_msg); | ||
477 | goto out; | ||
478 | } | ||
479 | for (;;) { | ||
480 | prepare_to_wait(&gss_msg->waitqueue, &wait, TASK_INTERRUPTIBLE); | ||
481 | spin_lock(&gss_auth->lock); | ||
482 | if (gss_msg->ctx != NULL || gss_msg->msg.errno < 0) { | ||
483 | spin_unlock(&gss_auth->lock); | ||
484 | break; | ||
485 | } | ||
486 | spin_unlock(&gss_auth->lock); | ||
487 | if (signalled()) { | ||
488 | err = -ERESTARTSYS; | ||
489 | goto out_intr; | ||
490 | } | ||
491 | schedule(); | ||
492 | } | ||
493 | if (gss_msg->ctx) | ||
494 | gss_cred_set_ctx(cred, gss_get_ctx(gss_msg->ctx)); | ||
495 | else | ||
496 | err = gss_msg->msg.errno; | ||
497 | out_intr: | ||
498 | finish_wait(&gss_msg->waitqueue, &wait); | ||
499 | gss_release_msg(gss_msg); | ||
500 | out: | ||
501 | dprintk("RPC: gss_create_upcall for uid %u result %d\n", cred->cr_uid, err); | ||
502 | return err; | ||
503 | } | ||
504 | |||
505 | static ssize_t | ||
506 | gss_pipe_upcall(struct file *filp, struct rpc_pipe_msg *msg, | ||
507 | char __user *dst, size_t buflen) | ||
508 | { | ||
509 | char *data = (char *)msg->data + msg->copied; | ||
510 | ssize_t mlen = msg->len; | ||
511 | ssize_t left; | ||
512 | |||
513 | if (mlen > buflen) | ||
514 | mlen = buflen; | ||
515 | left = copy_to_user(dst, data, mlen); | ||
516 | if (left < 0) { | ||
517 | msg->errno = left; | ||
518 | return left; | ||
519 | } | ||
520 | mlen -= left; | ||
521 | msg->copied += mlen; | ||
522 | msg->errno = 0; | ||
523 | return mlen; | ||
524 | } | ||
525 | |||
526 | #define MSG_BUF_MAXSIZE 1024 | ||
527 | |||
528 | static ssize_t | ||
529 | gss_pipe_downcall(struct file *filp, const char __user *src, size_t mlen) | ||
530 | { | ||
531 | const void *p, *end; | ||
532 | void *buf; | ||
533 | struct rpc_clnt *clnt; | ||
534 | struct gss_auth *gss_auth; | ||
535 | struct rpc_cred *cred; | ||
536 | struct gss_upcall_msg *gss_msg; | ||
537 | struct gss_cl_ctx *ctx; | ||
538 | uid_t uid; | ||
539 | int err = -EFBIG; | ||
540 | |||
541 | if (mlen > MSG_BUF_MAXSIZE) | ||
542 | goto out; | ||
543 | err = -ENOMEM; | ||
544 | buf = kmalloc(mlen, GFP_KERNEL); | ||
545 | if (!buf) | ||
546 | goto out; | ||
547 | |||
548 | clnt = RPC_I(filp->f_dentry->d_inode)->private; | ||
549 | err = -EFAULT; | ||
550 | if (copy_from_user(buf, src, mlen)) | ||
551 | goto err; | ||
552 | |||
553 | end = (const void *)((char *)buf + mlen); | ||
554 | p = simple_get_bytes(buf, end, &uid, sizeof(uid)); | ||
555 | if (IS_ERR(p)) { | ||
556 | err = PTR_ERR(p); | ||
557 | goto err; | ||
558 | } | ||
559 | |||
560 | err = -ENOMEM; | ||
561 | ctx = gss_alloc_context(); | ||
562 | if (ctx == NULL) | ||
563 | goto err; | ||
564 | err = 0; | ||
565 | gss_auth = container_of(clnt->cl_auth, struct gss_auth, rpc_auth); | ||
566 | p = gss_fill_context(p, end, ctx, gss_auth->mech); | ||
567 | if (IS_ERR(p)) { | ||
568 | err = PTR_ERR(p); | ||
569 | if (err != -EACCES) | ||
570 | goto err_put_ctx; | ||
571 | } | ||
572 | spin_lock(&gss_auth->lock); | ||
573 | gss_msg = __gss_find_upcall(gss_auth, uid); | ||
574 | if (gss_msg) { | ||
575 | if (err == 0 && gss_msg->ctx == NULL) | ||
576 | gss_msg->ctx = gss_get_ctx(ctx); | ||
577 | gss_msg->msg.errno = err; | ||
578 | __gss_unhash_msg(gss_msg); | ||
579 | spin_unlock(&gss_auth->lock); | ||
580 | gss_release_msg(gss_msg); | ||
581 | } else { | ||
582 | struct auth_cred acred = { .uid = uid }; | ||
583 | spin_unlock(&gss_auth->lock); | ||
584 | cred = rpcauth_lookup_credcache(clnt->cl_auth, &acred, 0); | ||
585 | if (IS_ERR(cred)) { | ||
586 | err = PTR_ERR(cred); | ||
587 | goto err_put_ctx; | ||
588 | } | ||
589 | gss_cred_set_ctx(cred, gss_get_ctx(ctx)); | ||
590 | } | ||
591 | gss_put_ctx(ctx); | ||
592 | kfree(buf); | ||
593 | dprintk("RPC: gss_pipe_downcall returning length %Zu\n", mlen); | ||
594 | return mlen; | ||
595 | err_put_ctx: | ||
596 | gss_put_ctx(ctx); | ||
597 | err: | ||
598 | kfree(buf); | ||
599 | out: | ||
600 | dprintk("RPC: gss_pipe_downcall returning %d\n", err); | ||
601 | return err; | ||
602 | } | ||
603 | |||
604 | static void | ||
605 | gss_pipe_release(struct inode *inode) | ||
606 | { | ||
607 | struct rpc_inode *rpci = RPC_I(inode); | ||
608 | struct rpc_clnt *clnt; | ||
609 | struct rpc_auth *auth; | ||
610 | struct gss_auth *gss_auth; | ||
611 | |||
612 | clnt = rpci->private; | ||
613 | auth = clnt->cl_auth; | ||
614 | gss_auth = container_of(auth, struct gss_auth, rpc_auth); | ||
615 | spin_lock(&gss_auth->lock); | ||
616 | while (!list_empty(&gss_auth->upcalls)) { | ||
617 | struct gss_upcall_msg *gss_msg; | ||
618 | |||
619 | gss_msg = list_entry(gss_auth->upcalls.next, | ||
620 | struct gss_upcall_msg, list); | ||
621 | gss_msg->msg.errno = -EPIPE; | ||
622 | atomic_inc(&gss_msg->count); | ||
623 | __gss_unhash_msg(gss_msg); | ||
624 | spin_unlock(&gss_auth->lock); | ||
625 | gss_release_msg(gss_msg); | ||
626 | spin_lock(&gss_auth->lock); | ||
627 | } | ||
628 | spin_unlock(&gss_auth->lock); | ||
629 | } | ||
630 | |||
631 | static void | ||
632 | gss_pipe_destroy_msg(struct rpc_pipe_msg *msg) | ||
633 | { | ||
634 | struct gss_upcall_msg *gss_msg = container_of(msg, struct gss_upcall_msg, msg); | ||
635 | static unsigned long ratelimit; | ||
636 | |||
637 | if (msg->errno < 0) { | ||
638 | dprintk("RPC: gss_pipe_destroy_msg releasing msg %p\n", | ||
639 | gss_msg); | ||
640 | atomic_inc(&gss_msg->count); | ||
641 | gss_unhash_msg(gss_msg); | ||
642 | if (msg->errno == -ETIMEDOUT || msg->errno == -EPIPE) { | ||
643 | unsigned long now = jiffies; | ||
644 | if (time_after(now, ratelimit)) { | ||
645 | printk(KERN_WARNING "RPC: AUTH_GSS upcall timed out.\n" | ||
646 | "Please check user daemon is running!\n"); | ||
647 | ratelimit = now + 15*HZ; | ||
648 | } | ||
649 | } | ||
650 | gss_release_msg(gss_msg); | ||
651 | } | ||
652 | } | ||
653 | |||
654 | /* | ||
655 | * NOTE: we have the opportunity to use different | ||
656 | * parameters based on the input flavor (which must be a pseudoflavor) | ||
657 | */ | ||
658 | static struct rpc_auth * | ||
659 | gss_create(struct rpc_clnt *clnt, rpc_authflavor_t flavor) | ||
660 | { | ||
661 | struct gss_auth *gss_auth; | ||
662 | struct rpc_auth * auth; | ||
663 | |||
664 | dprintk("RPC: creating GSS authenticator for client %p\n",clnt); | ||
665 | |||
666 | if (!try_module_get(THIS_MODULE)) | ||
667 | return NULL; | ||
668 | if (!(gss_auth = kmalloc(sizeof(*gss_auth), GFP_KERNEL))) | ||
669 | goto out_dec; | ||
670 | gss_auth->client = clnt; | ||
671 | gss_auth->mech = gss_mech_get_by_pseudoflavor(flavor); | ||
672 | if (!gss_auth->mech) { | ||
673 | printk(KERN_WARNING "%s: Pseudoflavor %d not found!", | ||
674 | __FUNCTION__, flavor); | ||
675 | goto err_free; | ||
676 | } | ||
677 | gss_auth->service = gss_pseudoflavor_to_service(gss_auth->mech, flavor); | ||
678 | /* FIXME: Will go away once privacy support is merged in */ | ||
679 | if (gss_auth->service == RPC_GSS_SVC_PRIVACY) | ||
680 | gss_auth->service = RPC_GSS_SVC_INTEGRITY; | ||
681 | INIT_LIST_HEAD(&gss_auth->upcalls); | ||
682 | spin_lock_init(&gss_auth->lock); | ||
683 | auth = &gss_auth->rpc_auth; | ||
684 | auth->au_cslack = GSS_CRED_SLACK >> 2; | ||
685 | auth->au_rslack = GSS_VERF_SLACK >> 2; | ||
686 | auth->au_ops = &authgss_ops; | ||
687 | auth->au_flavor = flavor; | ||
688 | atomic_set(&auth->au_count, 1); | ||
689 | |||
690 | if (rpcauth_init_credcache(auth, GSS_CRED_EXPIRE) < 0) | ||
691 | goto err_put_mech; | ||
692 | |||
693 | snprintf(gss_auth->path, sizeof(gss_auth->path), "%s/%s", | ||
694 | clnt->cl_pathname, | ||
695 | gss_auth->mech->gm_name); | ||
696 | gss_auth->dentry = rpc_mkpipe(gss_auth->path, clnt, &gss_upcall_ops, RPC_PIPE_WAIT_FOR_OPEN); | ||
697 | if (IS_ERR(gss_auth->dentry)) | ||
698 | goto err_put_mech; | ||
699 | |||
700 | return auth; | ||
701 | err_put_mech: | ||
702 | gss_mech_put(gss_auth->mech); | ||
703 | err_free: | ||
704 | kfree(gss_auth); | ||
705 | out_dec: | ||
706 | module_put(THIS_MODULE); | ||
707 | return NULL; | ||
708 | } | ||
709 | |||
710 | static void | ||
711 | gss_destroy(struct rpc_auth *auth) | ||
712 | { | ||
713 | struct gss_auth *gss_auth; | ||
714 | |||
715 | dprintk("RPC: destroying GSS authenticator %p flavor %d\n", | ||
716 | auth, auth->au_flavor); | ||
717 | |||
718 | gss_auth = container_of(auth, struct gss_auth, rpc_auth); | ||
719 | rpc_unlink(gss_auth->path); | ||
720 | gss_mech_put(gss_auth->mech); | ||
721 | |||
722 | rpcauth_free_credcache(auth); | ||
723 | kfree(gss_auth); | ||
724 | module_put(THIS_MODULE); | ||
725 | } | ||
726 | |||
727 | /* gss_destroy_cred (and gss_destroy_ctx) are used to clean up after failure | ||
728 | * to create a new cred or context, so they check that things have been | ||
729 | * allocated before freeing them. */ | ||
730 | static void | ||
731 | gss_destroy_ctx(struct gss_cl_ctx *ctx) | ||
732 | { | ||
733 | dprintk("RPC: gss_destroy_ctx\n"); | ||
734 | |||
735 | if (ctx->gc_gss_ctx) | ||
736 | gss_delete_sec_context(&ctx->gc_gss_ctx); | ||
737 | |||
738 | kfree(ctx->gc_wire_ctx.data); | ||
739 | kfree(ctx); | ||
740 | } | ||
741 | |||
742 | static void | ||
743 | gss_destroy_cred(struct rpc_cred *rc) | ||
744 | { | ||
745 | struct gss_cred *cred = container_of(rc, struct gss_cred, gc_base); | ||
746 | |||
747 | dprintk("RPC: gss_destroy_cred \n"); | ||
748 | |||
749 | if (cred->gc_ctx) | ||
750 | gss_put_ctx(cred->gc_ctx); | ||
751 | kfree(cred); | ||
752 | } | ||
753 | |||
754 | /* | ||
755 | * Lookup RPCSEC_GSS cred for the current process | ||
756 | */ | ||
757 | static struct rpc_cred * | ||
758 | gss_lookup_cred(struct rpc_auth *auth, struct auth_cred *acred, int taskflags) | ||
759 | { | ||
760 | return rpcauth_lookup_credcache(auth, acred, taskflags); | ||
761 | } | ||
762 | |||
763 | static struct rpc_cred * | ||
764 | gss_create_cred(struct rpc_auth *auth, struct auth_cred *acred, int taskflags) | ||
765 | { | ||
766 | struct gss_auth *gss_auth = container_of(auth, struct gss_auth, rpc_auth); | ||
767 | struct gss_cred *cred = NULL; | ||
768 | int err = -ENOMEM; | ||
769 | |||
770 | dprintk("RPC: gss_create_cred for uid %d, flavor %d\n", | ||
771 | acred->uid, auth->au_flavor); | ||
772 | |||
773 | if (!(cred = kmalloc(sizeof(*cred), GFP_KERNEL))) | ||
774 | goto out_err; | ||
775 | |||
776 | memset(cred, 0, sizeof(*cred)); | ||
777 | atomic_set(&cred->gc_count, 1); | ||
778 | cred->gc_uid = acred->uid; | ||
779 | /* | ||
780 | * Note: in order to force a call to call_refresh(), we deliberately | ||
781 | * fail to flag the credential as RPCAUTH_CRED_UPTODATE. | ||
782 | */ | ||
783 | cred->gc_flags = 0; | ||
784 | cred->gc_base.cr_ops = &gss_credops; | ||
785 | cred->gc_service = gss_auth->service; | ||
786 | err = gss_create_upcall(gss_auth, cred); | ||
787 | if (err < 0) | ||
788 | goto out_err; | ||
789 | |||
790 | return &cred->gc_base; | ||
791 | |||
792 | out_err: | ||
793 | dprintk("RPC: gss_create_cred failed with error %d\n", err); | ||
794 | if (cred) gss_destroy_cred(&cred->gc_base); | ||
795 | return ERR_PTR(err); | ||
796 | } | ||
797 | |||
798 | static int | ||
799 | gss_match(struct auth_cred *acred, struct rpc_cred *rc, int taskflags) | ||
800 | { | ||
801 | struct gss_cred *gss_cred = container_of(rc, struct gss_cred, gc_base); | ||
802 | |||
803 | /* Don't match with creds that have expired. */ | ||
804 | if (gss_cred->gc_ctx && time_after(jiffies, gss_cred->gc_ctx->gc_expiry)) | ||
805 | return 0; | ||
806 | return (rc->cr_uid == acred->uid); | ||
807 | } | ||
808 | |||
809 | /* | ||
810 | * Marshal credentials. | ||
811 | * Maybe we should keep a cached credential for performance reasons. | ||
812 | */ | ||
813 | static u32 * | ||
814 | gss_marshal(struct rpc_task *task, u32 *p) | ||
815 | { | ||
816 | struct rpc_cred *cred = task->tk_msg.rpc_cred; | ||
817 | struct gss_cred *gss_cred = container_of(cred, struct gss_cred, | ||
818 | gc_base); | ||
819 | struct gss_cl_ctx *ctx = gss_cred_get_ctx(cred); | ||
820 | u32 *cred_len; | ||
821 | struct rpc_rqst *req = task->tk_rqstp; | ||
822 | u32 maj_stat = 0; | ||
823 | struct xdr_netobj mic; | ||
824 | struct kvec iov; | ||
825 | struct xdr_buf verf_buf; | ||
826 | |||
827 | dprintk("RPC: %4u gss_marshal\n", task->tk_pid); | ||
828 | |||
829 | *p++ = htonl(RPC_AUTH_GSS); | ||
830 | cred_len = p++; | ||
831 | |||
832 | spin_lock(&ctx->gc_seq_lock); | ||
833 | req->rq_seqno = ctx->gc_seq++; | ||
834 | spin_unlock(&ctx->gc_seq_lock); | ||
835 | |||
836 | *p++ = htonl((u32) RPC_GSS_VERSION); | ||
837 | *p++ = htonl((u32) ctx->gc_proc); | ||
838 | *p++ = htonl((u32) req->rq_seqno); | ||
839 | *p++ = htonl((u32) gss_cred->gc_service); | ||
840 | p = xdr_encode_netobj(p, &ctx->gc_wire_ctx); | ||
841 | *cred_len = htonl((p - (cred_len + 1)) << 2); | ||
842 | |||
843 | /* We compute the checksum for the verifier over the xdr-encoded bytes | ||
844 | * starting with the xid and ending at the end of the credential: */ | ||
845 | iov.iov_base = req->rq_snd_buf.head[0].iov_base; | ||
846 | if (task->tk_client->cl_xprt->stream) | ||
847 | /* See clnt.c:call_header() */ | ||
848 | iov.iov_base += 4; | ||
849 | iov.iov_len = (u8 *)p - (u8 *)iov.iov_base; | ||
850 | xdr_buf_from_iov(&iov, &verf_buf); | ||
851 | |||
852 | /* set verifier flavor*/ | ||
853 | *p++ = htonl(RPC_AUTH_GSS); | ||
854 | |||
855 | mic.data = (u8 *)(p + 1); | ||
856 | maj_stat = gss_get_mic(ctx->gc_gss_ctx, | ||
857 | GSS_C_QOP_DEFAULT, | ||
858 | &verf_buf, &mic); | ||
859 | if (maj_stat == GSS_S_CONTEXT_EXPIRED) { | ||
860 | cred->cr_flags &= ~RPCAUTH_CRED_UPTODATE; | ||
861 | } else if (maj_stat != 0) { | ||
862 | printk("gss_marshal: gss_get_mic FAILED (%d)\n", maj_stat); | ||
863 | goto out_put_ctx; | ||
864 | } | ||
865 | p = xdr_encode_opaque(p, NULL, mic.len); | ||
866 | gss_put_ctx(ctx); | ||
867 | return p; | ||
868 | out_put_ctx: | ||
869 | gss_put_ctx(ctx); | ||
870 | return NULL; | ||
871 | } | ||
872 | |||
873 | /* | ||
874 | * Refresh credentials. XXX - finish | ||
875 | */ | ||
876 | static int | ||
877 | gss_refresh(struct rpc_task *task) | ||
878 | { | ||
879 | |||
880 | if (!gss_cred_is_uptodate_ctx(task->tk_msg.rpc_cred)) | ||
881 | return gss_refresh_upcall(task); | ||
882 | return 0; | ||
883 | } | ||
884 | |||
885 | static u32 * | ||
886 | gss_validate(struct rpc_task *task, u32 *p) | ||
887 | { | ||
888 | struct rpc_cred *cred = task->tk_msg.rpc_cred; | ||
889 | struct gss_cred *gss_cred = container_of(cred, struct gss_cred, | ||
890 | gc_base); | ||
891 | struct gss_cl_ctx *ctx = gss_cred_get_ctx(cred); | ||
892 | u32 seq, qop_state; | ||
893 | struct kvec iov; | ||
894 | struct xdr_buf verf_buf; | ||
895 | struct xdr_netobj mic; | ||
896 | u32 flav,len; | ||
897 | u32 maj_stat; | ||
898 | |||
899 | dprintk("RPC: %4u gss_validate\n", task->tk_pid); | ||
900 | |||
901 | flav = ntohl(*p++); | ||
902 | if ((len = ntohl(*p++)) > RPC_MAX_AUTH_SIZE) | ||
903 | goto out_bad; | ||
904 | if (flav != RPC_AUTH_GSS) | ||
905 | goto out_bad; | ||
906 | seq = htonl(task->tk_rqstp->rq_seqno); | ||
907 | iov.iov_base = &seq; | ||
908 | iov.iov_len = sizeof(seq); | ||
909 | xdr_buf_from_iov(&iov, &verf_buf); | ||
910 | mic.data = (u8 *)p; | ||
911 | mic.len = len; | ||
912 | |||
913 | maj_stat = gss_verify_mic(ctx->gc_gss_ctx, &verf_buf, &mic, &qop_state); | ||
914 | if (maj_stat == GSS_S_CONTEXT_EXPIRED) | ||
915 | cred->cr_flags &= ~RPCAUTH_CRED_UPTODATE; | ||
916 | if (maj_stat) | ||
917 | goto out_bad; | ||
918 | switch (gss_cred->gc_service) { | ||
919 | case RPC_GSS_SVC_NONE: | ||
920 | /* verifier data, flavor, length: */ | ||
921 | task->tk_auth->au_rslack = XDR_QUADLEN(len) + 2; | ||
922 | break; | ||
923 | case RPC_GSS_SVC_INTEGRITY: | ||
924 | /* verifier data, flavor, length, length, sequence number: */ | ||
925 | task->tk_auth->au_rslack = XDR_QUADLEN(len) + 4; | ||
926 | break; | ||
927 | case RPC_GSS_SVC_PRIVACY: | ||
928 | goto out_bad; | ||
929 | } | ||
930 | gss_put_ctx(ctx); | ||
931 | dprintk("RPC: %4u GSS gss_validate: gss_verify_mic succeeded.\n", | ||
932 | task->tk_pid); | ||
933 | return p + XDR_QUADLEN(len); | ||
934 | out_bad: | ||
935 | gss_put_ctx(ctx); | ||
936 | dprintk("RPC: %4u gss_validate failed.\n", task->tk_pid); | ||
937 | return NULL; | ||
938 | } | ||
939 | |||
940 | static inline int | ||
941 | gss_wrap_req_integ(struct rpc_cred *cred, struct gss_cl_ctx *ctx, | ||
942 | kxdrproc_t encode, struct rpc_rqst *rqstp, u32 *p, void *obj) | ||
943 | { | ||
944 | struct xdr_buf *snd_buf = &rqstp->rq_snd_buf; | ||
945 | struct xdr_buf integ_buf; | ||
946 | u32 *integ_len = NULL; | ||
947 | struct xdr_netobj mic; | ||
948 | u32 offset, *q; | ||
949 | struct kvec *iov; | ||
950 | u32 maj_stat = 0; | ||
951 | int status = -EIO; | ||
952 | |||
953 | integ_len = p++; | ||
954 | offset = (u8 *)p - (u8 *)snd_buf->head[0].iov_base; | ||
955 | *p++ = htonl(rqstp->rq_seqno); | ||
956 | |||
957 | status = encode(rqstp, p, obj); | ||
958 | if (status) | ||
959 | return status; | ||
960 | |||
961 | if (xdr_buf_subsegment(snd_buf, &integ_buf, | ||
962 | offset, snd_buf->len - offset)) | ||
963 | return status; | ||
964 | *integ_len = htonl(integ_buf.len); | ||
965 | |||
966 | /* guess whether we're in the head or the tail: */ | ||
967 | if (snd_buf->page_len || snd_buf->tail[0].iov_len) | ||
968 | iov = snd_buf->tail; | ||
969 | else | ||
970 | iov = snd_buf->head; | ||
971 | p = iov->iov_base + iov->iov_len; | ||
972 | mic.data = (u8 *)(p + 1); | ||
973 | |||
974 | maj_stat = gss_get_mic(ctx->gc_gss_ctx, | ||
975 | GSS_C_QOP_DEFAULT, &integ_buf, &mic); | ||
976 | status = -EIO; /* XXX? */ | ||
977 | if (maj_stat == GSS_S_CONTEXT_EXPIRED) | ||
978 | cred->cr_flags &= ~RPCAUTH_CRED_UPTODATE; | ||
979 | else if (maj_stat) | ||
980 | return status; | ||
981 | q = xdr_encode_opaque(p, NULL, mic.len); | ||
982 | |||
983 | offset = (u8 *)q - (u8 *)p; | ||
984 | iov->iov_len += offset; | ||
985 | snd_buf->len += offset; | ||
986 | return 0; | ||
987 | } | ||
988 | |||
989 | static int | ||
990 | gss_wrap_req(struct rpc_task *task, | ||
991 | kxdrproc_t encode, void *rqstp, u32 *p, void *obj) | ||
992 | { | ||
993 | struct rpc_cred *cred = task->tk_msg.rpc_cred; | ||
994 | struct gss_cred *gss_cred = container_of(cred, struct gss_cred, | ||
995 | gc_base); | ||
996 | struct gss_cl_ctx *ctx = gss_cred_get_ctx(cred); | ||
997 | int status = -EIO; | ||
998 | |||
999 | dprintk("RPC: %4u gss_wrap_req\n", task->tk_pid); | ||
1000 | if (ctx->gc_proc != RPC_GSS_PROC_DATA) { | ||
1001 | /* The spec seems a little ambiguous here, but I think that not | ||
1002 | * wrapping context destruction requests makes the most sense. | ||
1003 | */ | ||
1004 | status = encode(rqstp, p, obj); | ||
1005 | goto out; | ||
1006 | } | ||
1007 | switch (gss_cred->gc_service) { | ||
1008 | case RPC_GSS_SVC_NONE: | ||
1009 | status = encode(rqstp, p, obj); | ||
1010 | break; | ||
1011 | case RPC_GSS_SVC_INTEGRITY: | ||
1012 | status = gss_wrap_req_integ(cred, ctx, encode, | ||
1013 | rqstp, p, obj); | ||
1014 | break; | ||
1015 | case RPC_GSS_SVC_PRIVACY: | ||
1016 | break; | ||
1017 | } | ||
1018 | out: | ||
1019 | gss_put_ctx(ctx); | ||
1020 | dprintk("RPC: %4u gss_wrap_req returning %d\n", task->tk_pid, status); | ||
1021 | return status; | ||
1022 | } | ||
1023 | |||
1024 | static inline int | ||
1025 | gss_unwrap_resp_integ(struct rpc_cred *cred, struct gss_cl_ctx *ctx, | ||
1026 | struct rpc_rqst *rqstp, u32 **p) | ||
1027 | { | ||
1028 | struct xdr_buf *rcv_buf = &rqstp->rq_rcv_buf; | ||
1029 | struct xdr_buf integ_buf; | ||
1030 | struct xdr_netobj mic; | ||
1031 | u32 data_offset, mic_offset; | ||
1032 | u32 integ_len; | ||
1033 | u32 maj_stat; | ||
1034 | int status = -EIO; | ||
1035 | |||
1036 | integ_len = ntohl(*(*p)++); | ||
1037 | if (integ_len & 3) | ||
1038 | return status; | ||
1039 | data_offset = (u8 *)(*p) - (u8 *)rcv_buf->head[0].iov_base; | ||
1040 | mic_offset = integ_len + data_offset; | ||
1041 | if (mic_offset > rcv_buf->len) | ||
1042 | return status; | ||
1043 | if (ntohl(*(*p)++) != rqstp->rq_seqno) | ||
1044 | return status; | ||
1045 | |||
1046 | if (xdr_buf_subsegment(rcv_buf, &integ_buf, data_offset, | ||
1047 | mic_offset - data_offset)) | ||
1048 | return status; | ||
1049 | |||
1050 | if (xdr_buf_read_netobj(rcv_buf, &mic, mic_offset)) | ||
1051 | return status; | ||
1052 | |||
1053 | maj_stat = gss_verify_mic(ctx->gc_gss_ctx, &integ_buf, | ||
1054 | &mic, NULL); | ||
1055 | if (maj_stat == GSS_S_CONTEXT_EXPIRED) | ||
1056 | cred->cr_flags &= ~RPCAUTH_CRED_UPTODATE; | ||
1057 | if (maj_stat != GSS_S_COMPLETE) | ||
1058 | return status; | ||
1059 | return 0; | ||
1060 | } | ||
1061 | |||
1062 | static int | ||
1063 | gss_unwrap_resp(struct rpc_task *task, | ||
1064 | kxdrproc_t decode, void *rqstp, u32 *p, void *obj) | ||
1065 | { | ||
1066 | struct rpc_cred *cred = task->tk_msg.rpc_cred; | ||
1067 | struct gss_cred *gss_cred = container_of(cred, struct gss_cred, | ||
1068 | gc_base); | ||
1069 | struct gss_cl_ctx *ctx = gss_cred_get_ctx(cred); | ||
1070 | int status = -EIO; | ||
1071 | |||
1072 | if (ctx->gc_proc != RPC_GSS_PROC_DATA) | ||
1073 | goto out_decode; | ||
1074 | switch (gss_cred->gc_service) { | ||
1075 | case RPC_GSS_SVC_NONE: | ||
1076 | break; | ||
1077 | case RPC_GSS_SVC_INTEGRITY: | ||
1078 | status = gss_unwrap_resp_integ(cred, ctx, rqstp, &p); | ||
1079 | if (status) | ||
1080 | goto out; | ||
1081 | break; | ||
1082 | case RPC_GSS_SVC_PRIVACY: | ||
1083 | break; | ||
1084 | } | ||
1085 | out_decode: | ||
1086 | status = decode(rqstp, p, obj); | ||
1087 | out: | ||
1088 | gss_put_ctx(ctx); | ||
1089 | dprintk("RPC: %4u gss_unwrap_resp returning %d\n", task->tk_pid, | ||
1090 | status); | ||
1091 | return status; | ||
1092 | } | ||
1093 | |||
1094 | static struct rpc_authops authgss_ops = { | ||
1095 | .owner = THIS_MODULE, | ||
1096 | .au_flavor = RPC_AUTH_GSS, | ||
1097 | #ifdef RPC_DEBUG | ||
1098 | .au_name = "RPCSEC_GSS", | ||
1099 | #endif | ||
1100 | .create = gss_create, | ||
1101 | .destroy = gss_destroy, | ||
1102 | .lookup_cred = gss_lookup_cred, | ||
1103 | .crcreate = gss_create_cred | ||
1104 | }; | ||
1105 | |||
1106 | static struct rpc_credops gss_credops = { | ||
1107 | .cr_name = "AUTH_GSS", | ||
1108 | .crdestroy = gss_destroy_cred, | ||
1109 | .crmatch = gss_match, | ||
1110 | .crmarshal = gss_marshal, | ||
1111 | .crrefresh = gss_refresh, | ||
1112 | .crvalidate = gss_validate, | ||
1113 | .crwrap_req = gss_wrap_req, | ||
1114 | .crunwrap_resp = gss_unwrap_resp, | ||
1115 | }; | ||
1116 | |||
1117 | static struct rpc_pipe_ops gss_upcall_ops = { | ||
1118 | .upcall = gss_pipe_upcall, | ||
1119 | .downcall = gss_pipe_downcall, | ||
1120 | .destroy_msg = gss_pipe_destroy_msg, | ||
1121 | .release_pipe = gss_pipe_release, | ||
1122 | }; | ||
1123 | |||
1124 | /* | ||
1125 | * Initialize RPCSEC_GSS module | ||
1126 | */ | ||
1127 | static int __init init_rpcsec_gss(void) | ||
1128 | { | ||
1129 | int err = 0; | ||
1130 | |||
1131 | err = rpcauth_register(&authgss_ops); | ||
1132 | if (err) | ||
1133 | goto out; | ||
1134 | err = gss_svc_init(); | ||
1135 | if (err) | ||
1136 | goto out_unregister; | ||
1137 | return 0; | ||
1138 | out_unregister: | ||
1139 | rpcauth_unregister(&authgss_ops); | ||
1140 | out: | ||
1141 | return err; | ||
1142 | } | ||
1143 | |||
1144 | static void __exit exit_rpcsec_gss(void) | ||
1145 | { | ||
1146 | gss_svc_shutdown(); | ||
1147 | rpcauth_unregister(&authgss_ops); | ||
1148 | } | ||
1149 | |||
1150 | MODULE_LICENSE("GPL"); | ||
1151 | module_init(init_rpcsec_gss) | ||
1152 | module_exit(exit_rpcsec_gss) | ||
diff --git a/net/sunrpc/auth_gss/gss_generic_token.c b/net/sunrpc/auth_gss/gss_generic_token.c new file mode 100644 index 000000000000..826df44e7fca --- /dev/null +++ b/net/sunrpc/auth_gss/gss_generic_token.c | |||
@@ -0,0 +1,235 @@ | |||
1 | /* | ||
2 | * linux/net/sunrpc/gss_generic_token.c | ||
3 | * | ||
4 | * Adapted from MIT Kerberos 5-1.2.1 lib/gssapi/generic/util_token.c | ||
5 | * | ||
6 | * Copyright (c) 2000 The Regents of the University of Michigan. | ||
7 | * All rights reserved. | ||
8 | * | ||
9 | * Andy Adamson <andros@umich.edu> | ||
10 | */ | ||
11 | |||
12 | /* | ||
13 | * Copyright 1993 by OpenVision Technologies, Inc. | ||
14 | * | ||
15 | * Permission to use, copy, modify, distribute, and sell this software | ||
16 | * and its documentation for any purpose is hereby granted without fee, | ||
17 | * provided that the above copyright notice appears in all copies and | ||
18 | * that both that copyright notice and this permission notice appear in | ||
19 | * supporting documentation, and that the name of OpenVision not be used | ||
20 | * in advertising or publicity pertaining to distribution of the software | ||
21 | * without specific, written prior permission. OpenVision makes no | ||
22 | * representations about the suitability of this software for any | ||
23 | * purpose. It is provided "as is" without express or implied warranty. | ||
24 | * | ||
25 | * OPENVISION DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, | ||
26 | * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO | ||
27 | * EVENT SHALL OPENVISION BE LIABLE FOR ANY SPECIAL, INDIRECT OR | ||
28 | * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF | ||
29 | * USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR | ||
30 | * OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR | ||
31 | * PERFORMANCE OF THIS SOFTWARE. | ||
32 | */ | ||
33 | |||
34 | #include <linux/types.h> | ||
35 | #include <linux/module.h> | ||
36 | #include <linux/slab.h> | ||
37 | #include <linux/string.h> | ||
38 | #include <linux/sunrpc/sched.h> | ||
39 | #include <linux/sunrpc/gss_asn1.h> | ||
40 | |||
41 | |||
42 | #ifdef RPC_DEBUG | ||
43 | # define RPCDBG_FACILITY RPCDBG_AUTH | ||
44 | #endif | ||
45 | |||
46 | |||
47 | /* TWRITE_STR from gssapiP_generic.h */ | ||
48 | #define TWRITE_STR(ptr, str, len) \ | ||
49 | memcpy((ptr), (char *) (str), (len)); \ | ||
50 | (ptr) += (len); | ||
51 | |||
52 | /* XXXX this code currently makes the assumption that a mech oid will | ||
53 | never be longer than 127 bytes. This assumption is not inherent in | ||
54 | the interfaces, so the code can be fixed if the OSI namespace | ||
55 | balloons unexpectedly. */ | ||
56 | |||
57 | /* Each token looks like this: | ||
58 | |||
59 | 0x60 tag for APPLICATION 0, SEQUENCE | ||
60 | (constructed, definite-length) | ||
61 | <length> possible multiple bytes, need to parse/generate | ||
62 | 0x06 tag for OBJECT IDENTIFIER | ||
63 | <moid_length> compile-time constant string (assume 1 byte) | ||
64 | <moid_bytes> compile-time constant string | ||
65 | <inner_bytes> the ANY containing the application token | ||
66 | bytes 0,1 are the token type | ||
67 | bytes 2,n are the token data | ||
68 | |||
69 | For the purposes of this abstraction, the token "header" consists of | ||
70 | the sequence tag and length octets, the mech OID DER encoding, and the | ||
71 | first two inner bytes, which indicate the token type. The token | ||
72 | "body" consists of everything else. | ||
73 | |||
74 | */ | ||
75 | |||
76 | static int | ||
77 | der_length_size( int length) | ||
78 | { | ||
79 | if (length < (1<<7)) | ||
80 | return(1); | ||
81 | else if (length < (1<<8)) | ||
82 | return(2); | ||
83 | #if (SIZEOF_INT == 2) | ||
84 | else | ||
85 | return(3); | ||
86 | #else | ||
87 | else if (length < (1<<16)) | ||
88 | return(3); | ||
89 | else if (length < (1<<24)) | ||
90 | return(4); | ||
91 | else | ||
92 | return(5); | ||
93 | #endif | ||
94 | } | ||
95 | |||
96 | static void | ||
97 | der_write_length(unsigned char **buf, int length) | ||
98 | { | ||
99 | if (length < (1<<7)) { | ||
100 | *(*buf)++ = (unsigned char) length; | ||
101 | } else { | ||
102 | *(*buf)++ = (unsigned char) (der_length_size(length)+127); | ||
103 | #if (SIZEOF_INT > 2) | ||
104 | if (length >= (1<<24)) | ||
105 | *(*buf)++ = (unsigned char) (length>>24); | ||
106 | if (length >= (1<<16)) | ||
107 | *(*buf)++ = (unsigned char) ((length>>16)&0xff); | ||
108 | #endif | ||
109 | if (length >= (1<<8)) | ||
110 | *(*buf)++ = (unsigned char) ((length>>8)&0xff); | ||
111 | *(*buf)++ = (unsigned char) (length&0xff); | ||
112 | } | ||
113 | } | ||
114 | |||
115 | /* returns decoded length, or < 0 on failure. Advances buf and | ||
116 | decrements bufsize */ | ||
117 | |||
118 | static int | ||
119 | der_read_length(unsigned char **buf, int *bufsize) | ||
120 | { | ||
121 | unsigned char sf; | ||
122 | int ret; | ||
123 | |||
124 | if (*bufsize < 1) | ||
125 | return(-1); | ||
126 | sf = *(*buf)++; | ||
127 | (*bufsize)--; | ||
128 | if (sf & 0x80) { | ||
129 | if ((sf &= 0x7f) > ((*bufsize)-1)) | ||
130 | return(-1); | ||
131 | if (sf > SIZEOF_INT) | ||
132 | return (-1); | ||
133 | ret = 0; | ||
134 | for (; sf; sf--) { | ||
135 | ret = (ret<<8) + (*(*buf)++); | ||
136 | (*bufsize)--; | ||
137 | } | ||
138 | } else { | ||
139 | ret = sf; | ||
140 | } | ||
141 | |||
142 | return(ret); | ||
143 | } | ||
144 | |||
145 | /* returns the length of a token, given the mech oid and the body size */ | ||
146 | |||
147 | int | ||
148 | g_token_size(struct xdr_netobj *mech, unsigned int body_size) | ||
149 | { | ||
150 | /* set body_size to sequence contents size */ | ||
151 | body_size += 4 + (int) mech->len; /* NEED overflow check */ | ||
152 | return(1 + der_length_size(body_size) + body_size); | ||
153 | } | ||
154 | |||
155 | EXPORT_SYMBOL(g_token_size); | ||
156 | |||
157 | /* fills in a buffer with the token header. The buffer is assumed to | ||
158 | be the right size. buf is advanced past the token header */ | ||
159 | |||
160 | void | ||
161 | g_make_token_header(struct xdr_netobj *mech, int body_size, unsigned char **buf) | ||
162 | { | ||
163 | *(*buf)++ = 0x60; | ||
164 | der_write_length(buf, 4 + mech->len + body_size); | ||
165 | *(*buf)++ = 0x06; | ||
166 | *(*buf)++ = (unsigned char) mech->len; | ||
167 | TWRITE_STR(*buf, mech->data, ((int) mech->len)); | ||
168 | } | ||
169 | |||
170 | EXPORT_SYMBOL(g_make_token_header); | ||
171 | |||
172 | /* | ||
173 | * Given a buffer containing a token, reads and verifies the token, | ||
174 | * leaving buf advanced past the token header, and setting body_size | ||
175 | * to the number of remaining bytes. Returns 0 on success, | ||
176 | * G_BAD_TOK_HEADER for a variety of errors, and G_WRONG_MECH if the | ||
177 | * mechanism in the token does not match the mech argument. buf and | ||
178 | * *body_size are left unmodified on error. | ||
179 | */ | ||
180 | u32 | ||
181 | g_verify_token_header(struct xdr_netobj *mech, int *body_size, | ||
182 | unsigned char **buf_in, int toksize) | ||
183 | { | ||
184 | unsigned char *buf = *buf_in; | ||
185 | int seqsize; | ||
186 | struct xdr_netobj toid; | ||
187 | int ret = 0; | ||
188 | |||
189 | if ((toksize-=1) < 0) | ||
190 | return(G_BAD_TOK_HEADER); | ||
191 | if (*buf++ != 0x60) | ||
192 | return(G_BAD_TOK_HEADER); | ||
193 | |||
194 | if ((seqsize = der_read_length(&buf, &toksize)) < 0) | ||
195 | return(G_BAD_TOK_HEADER); | ||
196 | |||
197 | if (seqsize != toksize) | ||
198 | return(G_BAD_TOK_HEADER); | ||
199 | |||
200 | if ((toksize-=1) < 0) | ||
201 | return(G_BAD_TOK_HEADER); | ||
202 | if (*buf++ != 0x06) | ||
203 | return(G_BAD_TOK_HEADER); | ||
204 | |||
205 | if ((toksize-=1) < 0) | ||
206 | return(G_BAD_TOK_HEADER); | ||
207 | toid.len = *buf++; | ||
208 | |||
209 | if ((toksize-=toid.len) < 0) | ||
210 | return(G_BAD_TOK_HEADER); | ||
211 | toid.data = buf; | ||
212 | buf+=toid.len; | ||
213 | |||
214 | if (! g_OID_equal(&toid, mech)) | ||
215 | ret = G_WRONG_MECH; | ||
216 | |||
217 | /* G_WRONG_MECH is not returned immediately because it's more important | ||
218 | to return G_BAD_TOK_HEADER if the token header is in fact bad */ | ||
219 | |||
220 | if ((toksize-=2) < 0) | ||
221 | return(G_BAD_TOK_HEADER); | ||
222 | |||
223 | if (ret) | ||
224 | return(ret); | ||
225 | |||
226 | if (!ret) { | ||
227 | *buf_in = buf; | ||
228 | *body_size = toksize; | ||
229 | } | ||
230 | |||
231 | return(ret); | ||
232 | } | ||
233 | |||
234 | EXPORT_SYMBOL(g_verify_token_header); | ||
235 | |||
diff --git a/net/sunrpc/auth_gss/gss_krb5_crypto.c b/net/sunrpc/auth_gss/gss_krb5_crypto.c new file mode 100644 index 000000000000..24c21f2a33a7 --- /dev/null +++ b/net/sunrpc/auth_gss/gss_krb5_crypto.c | |||
@@ -0,0 +1,209 @@ | |||
1 | /* | ||
2 | * linux/net/sunrpc/gss_krb5_crypto.c | ||
3 | * | ||
4 | * Copyright (c) 2000 The Regents of the University of Michigan. | ||
5 | * All rights reserved. | ||
6 | * | ||
7 | * Andy Adamson <andros@umich.edu> | ||
8 | * Bruce Fields <bfields@umich.edu> | ||
9 | */ | ||
10 | |||
11 | /* | ||
12 | * Copyright (C) 1998 by the FundsXpress, INC. | ||
13 | * | ||
14 | * All rights reserved. | ||
15 | * | ||
16 | * Export of this software from the United States of America may require | ||
17 | * a specific license from the United States Government. It is the | ||
18 | * responsibility of any person or organization contemplating export to | ||
19 | * obtain such a license before exporting. | ||
20 | * | ||
21 | * WITHIN THAT CONSTRAINT, permission to use, copy, modify, and | ||
22 | * distribute this software and its documentation for any purpose and | ||
23 | * without fee is hereby granted, provided that the above copyright | ||
24 | * notice appear in all copies and that both that copyright notice and | ||
25 | * this permission notice appear in supporting documentation, and that | ||
26 | * the name of FundsXpress. not be used in advertising or publicity pertaining | ||
27 | * to distribution of the software without specific, written prior | ||
28 | * permission. FundsXpress makes no representations about the suitability of | ||
29 | * this software for any purpose. It is provided "as is" without express | ||
30 | * or implied warranty. | ||
31 | * | ||
32 | * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR | ||
33 | * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED | ||
34 | * WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE. | ||
35 | */ | ||
36 | |||
37 | #include <linux/types.h> | ||
38 | #include <linux/mm.h> | ||
39 | #include <linux/slab.h> | ||
40 | #include <asm/scatterlist.h> | ||
41 | #include <linux/crypto.h> | ||
42 | #include <linux/highmem.h> | ||
43 | #include <linux/pagemap.h> | ||
44 | #include <linux/sunrpc/gss_krb5.h> | ||
45 | |||
46 | #ifdef RPC_DEBUG | ||
47 | # define RPCDBG_FACILITY RPCDBG_AUTH | ||
48 | #endif | ||
49 | |||
50 | u32 | ||
51 | krb5_encrypt( | ||
52 | struct crypto_tfm *tfm, | ||
53 | void * iv, | ||
54 | void * in, | ||
55 | void * out, | ||
56 | int length) | ||
57 | { | ||
58 | u32 ret = -EINVAL; | ||
59 | struct scatterlist sg[1]; | ||
60 | u8 local_iv[16] = {0}; | ||
61 | |||
62 | dprintk("RPC: krb5_encrypt: input data:\n"); | ||
63 | print_hexl((u32 *)in, length, 0); | ||
64 | |||
65 | if (length % crypto_tfm_alg_blocksize(tfm) != 0) | ||
66 | goto out; | ||
67 | |||
68 | if (crypto_tfm_alg_ivsize(tfm) > 16) { | ||
69 | dprintk("RPC: gss_k5encrypt: tfm iv size to large %d\n", | ||
70 | crypto_tfm_alg_ivsize(tfm)); | ||
71 | goto out; | ||
72 | } | ||
73 | |||
74 | if (iv) | ||
75 | memcpy(local_iv, iv, crypto_tfm_alg_ivsize(tfm)); | ||
76 | |||
77 | memcpy(out, in, length); | ||
78 | sg[0].page = virt_to_page(out); | ||
79 | sg[0].offset = offset_in_page(out); | ||
80 | sg[0].length = length; | ||
81 | |||
82 | ret = crypto_cipher_encrypt_iv(tfm, sg, sg, length, local_iv); | ||
83 | |||
84 | dprintk("RPC: krb5_encrypt: output data:\n"); | ||
85 | print_hexl((u32 *)out, length, 0); | ||
86 | out: | ||
87 | dprintk("RPC: krb5_encrypt returns %d\n",ret); | ||
88 | return(ret); | ||
89 | } | ||
90 | |||
91 | EXPORT_SYMBOL(krb5_encrypt); | ||
92 | |||
93 | u32 | ||
94 | krb5_decrypt( | ||
95 | struct crypto_tfm *tfm, | ||
96 | void * iv, | ||
97 | void * in, | ||
98 | void * out, | ||
99 | int length) | ||
100 | { | ||
101 | u32 ret = -EINVAL; | ||
102 | struct scatterlist sg[1]; | ||
103 | u8 local_iv[16] = {0}; | ||
104 | |||
105 | dprintk("RPC: krb5_decrypt: input data:\n"); | ||
106 | print_hexl((u32 *)in, length, 0); | ||
107 | |||
108 | if (length % crypto_tfm_alg_blocksize(tfm) != 0) | ||
109 | goto out; | ||
110 | |||
111 | if (crypto_tfm_alg_ivsize(tfm) > 16) { | ||
112 | dprintk("RPC: gss_k5decrypt: tfm iv size to large %d\n", | ||
113 | crypto_tfm_alg_ivsize(tfm)); | ||
114 | goto out; | ||
115 | } | ||
116 | if (iv) | ||
117 | memcpy(local_iv,iv, crypto_tfm_alg_ivsize(tfm)); | ||
118 | |||
119 | memcpy(out, in, length); | ||
120 | sg[0].page = virt_to_page(out); | ||
121 | sg[0].offset = offset_in_page(out); | ||
122 | sg[0].length = length; | ||
123 | |||
124 | ret = crypto_cipher_decrypt_iv(tfm, sg, sg, length, local_iv); | ||
125 | |||
126 | dprintk("RPC: krb5_decrypt: output_data:\n"); | ||
127 | print_hexl((u32 *)out, length, 0); | ||
128 | out: | ||
129 | dprintk("RPC: gss_k5decrypt returns %d\n",ret); | ||
130 | return(ret); | ||
131 | } | ||
132 | |||
133 | EXPORT_SYMBOL(krb5_decrypt); | ||
134 | |||
135 | static void | ||
136 | buf_to_sg(struct scatterlist *sg, char *ptr, int len) { | ||
137 | sg->page = virt_to_page(ptr); | ||
138 | sg->offset = offset_in_page(ptr); | ||
139 | sg->length = len; | ||
140 | } | ||
141 | |||
142 | /* checksum the plaintext data and hdrlen bytes of the token header */ | ||
143 | s32 | ||
144 | make_checksum(s32 cksumtype, char *header, int hdrlen, struct xdr_buf *body, | ||
145 | struct xdr_netobj *cksum) | ||
146 | { | ||
147 | char *cksumname; | ||
148 | struct crypto_tfm *tfm = NULL; /* XXX add to ctx? */ | ||
149 | struct scatterlist sg[1]; | ||
150 | u32 code = GSS_S_FAILURE; | ||
151 | int len, thislen, offset; | ||
152 | int i; | ||
153 | |||
154 | switch (cksumtype) { | ||
155 | case CKSUMTYPE_RSA_MD5: | ||
156 | cksumname = "md5"; | ||
157 | break; | ||
158 | default: | ||
159 | dprintk("RPC: krb5_make_checksum:" | ||
160 | " unsupported checksum %d", cksumtype); | ||
161 | goto out; | ||
162 | } | ||
163 | if (!(tfm = crypto_alloc_tfm(cksumname, 0))) | ||
164 | goto out; | ||
165 | cksum->len = crypto_tfm_alg_digestsize(tfm); | ||
166 | if ((cksum->data = kmalloc(cksum->len, GFP_KERNEL)) == NULL) | ||
167 | goto out; | ||
168 | |||
169 | crypto_digest_init(tfm); | ||
170 | buf_to_sg(sg, header, hdrlen); | ||
171 | crypto_digest_update(tfm, sg, 1); | ||
172 | if (body->head[0].iov_len) { | ||
173 | buf_to_sg(sg, body->head[0].iov_base, body->head[0].iov_len); | ||
174 | crypto_digest_update(tfm, sg, 1); | ||
175 | } | ||
176 | |||
177 | len = body->page_len; | ||
178 | if (len != 0) { | ||
179 | offset = body->page_base & (PAGE_CACHE_SIZE - 1); | ||
180 | i = body->page_base >> PAGE_CACHE_SHIFT; | ||
181 | thislen = PAGE_CACHE_SIZE - offset; | ||
182 | do { | ||
183 | if (thislen > len) | ||
184 | thislen = len; | ||
185 | sg->page = body->pages[i]; | ||
186 | sg->offset = offset; | ||
187 | sg->length = thislen; | ||
188 | kmap(sg->page); /* XXX kmap_atomic? */ | ||
189 | crypto_digest_update(tfm, sg, 1); | ||
190 | kunmap(sg->page); | ||
191 | len -= thislen; | ||
192 | i++; | ||
193 | offset = 0; | ||
194 | thislen = PAGE_CACHE_SIZE; | ||
195 | } while(len != 0); | ||
196 | } | ||
197 | if (body->tail[0].iov_len) { | ||
198 | buf_to_sg(sg, body->tail[0].iov_base, body->tail[0].iov_len); | ||
199 | crypto_digest_update(tfm, sg, 1); | ||
200 | } | ||
201 | crypto_digest_final(tfm, cksum->data); | ||
202 | code = 0; | ||
203 | out: | ||
204 | if (tfm) | ||
205 | crypto_free_tfm(tfm); | ||
206 | return code; | ||
207 | } | ||
208 | |||
209 | EXPORT_SYMBOL(make_checksum); | ||
diff --git a/net/sunrpc/auth_gss/gss_krb5_mech.c b/net/sunrpc/auth_gss/gss_krb5_mech.c new file mode 100644 index 000000000000..cf726510df8e --- /dev/null +++ b/net/sunrpc/auth_gss/gss_krb5_mech.c | |||
@@ -0,0 +1,275 @@ | |||
1 | /* | ||
2 | * linux/net/sunrpc/gss_krb5_mech.c | ||
3 | * | ||
4 | * Copyright (c) 2001 The Regents of the University of Michigan. | ||
5 | * All rights reserved. | ||
6 | * | ||
7 | * Andy Adamson <andros@umich.edu> | ||
8 | * J. Bruce Fields <bfields@umich.edu> | ||
9 | * | ||
10 | * Redistribution and use in source and binary forms, with or without | ||
11 | * modification, are permitted provided that the following conditions | ||
12 | * are met: | ||
13 | * | ||
14 | * 1. Redistributions of source code must retain the above copyright | ||
15 | * notice, this list of conditions and the following disclaimer. | ||
16 | * 2. Redistributions in binary form must reproduce the above copyright | ||
17 | * notice, this list of conditions and the following disclaimer in the | ||
18 | * documentation and/or other materials provided with the distribution. | ||
19 | * 3. Neither the name of the University nor the names of its | ||
20 | * contributors may be used to endorse or promote products derived | ||
21 | * from this software without specific prior written permission. | ||
22 | * | ||
23 | * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED | ||
24 | * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF | ||
25 | * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE | ||
26 | * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE | ||
27 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR | ||
28 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF | ||
29 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR | ||
30 | * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF | ||
31 | * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING | ||
32 | * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS | ||
33 | * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | ||
34 | * | ||
35 | */ | ||
36 | |||
37 | #include <linux/module.h> | ||
38 | #include <linux/init.h> | ||
39 | #include <linux/types.h> | ||
40 | #include <linux/slab.h> | ||
41 | #include <linux/sunrpc/auth.h> | ||
42 | #include <linux/in.h> | ||
43 | #include <linux/sunrpc/gss_krb5.h> | ||
44 | #include <linux/sunrpc/xdr.h> | ||
45 | #include <linux/crypto.h> | ||
46 | |||
47 | #ifdef RPC_DEBUG | ||
48 | # define RPCDBG_FACILITY RPCDBG_AUTH | ||
49 | #endif | ||
50 | |||
51 | static const void * | ||
52 | simple_get_bytes(const void *p, const void *end, void *res, int len) | ||
53 | { | ||
54 | const void *q = (const void *)((const char *)p + len); | ||
55 | if (unlikely(q > end || q < p)) | ||
56 | return ERR_PTR(-EFAULT); | ||
57 | memcpy(res, p, len); | ||
58 | return q; | ||
59 | } | ||
60 | |||
61 | static const void * | ||
62 | simple_get_netobj(const void *p, const void *end, struct xdr_netobj *res) | ||
63 | { | ||
64 | const void *q; | ||
65 | unsigned int len; | ||
66 | |||
67 | p = simple_get_bytes(p, end, &len, sizeof(len)); | ||
68 | if (IS_ERR(p)) | ||
69 | return p; | ||
70 | q = (const void *)((const char *)p + len); | ||
71 | if (unlikely(q > end || q < p)) | ||
72 | return ERR_PTR(-EFAULT); | ||
73 | res->data = kmalloc(len, GFP_KERNEL); | ||
74 | if (unlikely(res->data == NULL)) | ||
75 | return ERR_PTR(-ENOMEM); | ||
76 | memcpy(res->data, p, len); | ||
77 | res->len = len; | ||
78 | return q; | ||
79 | } | ||
80 | |||
81 | static inline const void * | ||
82 | get_key(const void *p, const void *end, struct crypto_tfm **res) | ||
83 | { | ||
84 | struct xdr_netobj key; | ||
85 | int alg, alg_mode; | ||
86 | char *alg_name; | ||
87 | |||
88 | p = simple_get_bytes(p, end, &alg, sizeof(alg)); | ||
89 | if (IS_ERR(p)) | ||
90 | goto out_err; | ||
91 | p = simple_get_netobj(p, end, &key); | ||
92 | if (IS_ERR(p)) | ||
93 | goto out_err; | ||
94 | |||
95 | switch (alg) { | ||
96 | case ENCTYPE_DES_CBC_RAW: | ||
97 | alg_name = "des"; | ||
98 | alg_mode = CRYPTO_TFM_MODE_CBC; | ||
99 | break; | ||
100 | default: | ||
101 | dprintk("RPC: get_key: unsupported algorithm %d\n", alg); | ||
102 | goto out_err_free_key; | ||
103 | } | ||
104 | if (!(*res = crypto_alloc_tfm(alg_name, alg_mode))) | ||
105 | goto out_err_free_key; | ||
106 | if (crypto_cipher_setkey(*res, key.data, key.len)) | ||
107 | goto out_err_free_tfm; | ||
108 | |||
109 | kfree(key.data); | ||
110 | return p; | ||
111 | |||
112 | out_err_free_tfm: | ||
113 | crypto_free_tfm(*res); | ||
114 | out_err_free_key: | ||
115 | kfree(key.data); | ||
116 | p = ERR_PTR(-EINVAL); | ||
117 | out_err: | ||
118 | return p; | ||
119 | } | ||
120 | |||
121 | static int | ||
122 | gss_import_sec_context_kerberos(const void *p, | ||
123 | size_t len, | ||
124 | struct gss_ctx *ctx_id) | ||
125 | { | ||
126 | const void *end = (const void *)((const char *)p + len); | ||
127 | struct krb5_ctx *ctx; | ||
128 | |||
129 | if (!(ctx = kmalloc(sizeof(*ctx), GFP_KERNEL))) | ||
130 | goto out_err; | ||
131 | memset(ctx, 0, sizeof(*ctx)); | ||
132 | |||
133 | p = simple_get_bytes(p, end, &ctx->initiate, sizeof(ctx->initiate)); | ||
134 | if (IS_ERR(p)) | ||
135 | goto out_err_free_ctx; | ||
136 | p = simple_get_bytes(p, end, &ctx->seed_init, sizeof(ctx->seed_init)); | ||
137 | if (IS_ERR(p)) | ||
138 | goto out_err_free_ctx; | ||
139 | p = simple_get_bytes(p, end, ctx->seed, sizeof(ctx->seed)); | ||
140 | if (IS_ERR(p)) | ||
141 | goto out_err_free_ctx; | ||
142 | p = simple_get_bytes(p, end, &ctx->signalg, sizeof(ctx->signalg)); | ||
143 | if (IS_ERR(p)) | ||
144 | goto out_err_free_ctx; | ||
145 | p = simple_get_bytes(p, end, &ctx->sealalg, sizeof(ctx->sealalg)); | ||
146 | if (IS_ERR(p)) | ||
147 | goto out_err_free_ctx; | ||
148 | p = simple_get_bytes(p, end, &ctx->endtime, sizeof(ctx->endtime)); | ||
149 | if (IS_ERR(p)) | ||
150 | goto out_err_free_ctx; | ||
151 | p = simple_get_bytes(p, end, &ctx->seq_send, sizeof(ctx->seq_send)); | ||
152 | if (IS_ERR(p)) | ||
153 | goto out_err_free_ctx; | ||
154 | p = simple_get_netobj(p, end, &ctx->mech_used); | ||
155 | if (IS_ERR(p)) | ||
156 | goto out_err_free_ctx; | ||
157 | p = get_key(p, end, &ctx->enc); | ||
158 | if (IS_ERR(p)) | ||
159 | goto out_err_free_mech; | ||
160 | p = get_key(p, end, &ctx->seq); | ||
161 | if (IS_ERR(p)) | ||
162 | goto out_err_free_key1; | ||
163 | if (p != end) { | ||
164 | p = ERR_PTR(-EFAULT); | ||
165 | goto out_err_free_key2; | ||
166 | } | ||
167 | |||
168 | ctx_id->internal_ctx_id = ctx; | ||
169 | dprintk("RPC: Succesfully imported new context.\n"); | ||
170 | return 0; | ||
171 | |||
172 | out_err_free_key2: | ||
173 | crypto_free_tfm(ctx->seq); | ||
174 | out_err_free_key1: | ||
175 | crypto_free_tfm(ctx->enc); | ||
176 | out_err_free_mech: | ||
177 | kfree(ctx->mech_used.data); | ||
178 | out_err_free_ctx: | ||
179 | kfree(ctx); | ||
180 | out_err: | ||
181 | return PTR_ERR(p); | ||
182 | } | ||
183 | |||
184 | static void | ||
185 | gss_delete_sec_context_kerberos(void *internal_ctx) { | ||
186 | struct krb5_ctx *kctx = internal_ctx; | ||
187 | |||
188 | if (kctx->seq) | ||
189 | crypto_free_tfm(kctx->seq); | ||
190 | if (kctx->enc) | ||
191 | crypto_free_tfm(kctx->enc); | ||
192 | if (kctx->mech_used.data) | ||
193 | kfree(kctx->mech_used.data); | ||
194 | kfree(kctx); | ||
195 | } | ||
196 | |||
197 | static u32 | ||
198 | gss_verify_mic_kerberos(struct gss_ctx *ctx, | ||
199 | struct xdr_buf *message, | ||
200 | struct xdr_netobj *mic_token, | ||
201 | u32 *qstate) { | ||
202 | u32 maj_stat = 0; | ||
203 | int qop_state; | ||
204 | struct krb5_ctx *kctx = ctx->internal_ctx_id; | ||
205 | |||
206 | maj_stat = krb5_read_token(kctx, mic_token, message, &qop_state, | ||
207 | KG_TOK_MIC_MSG); | ||
208 | if (!maj_stat && qop_state) | ||
209 | *qstate = qop_state; | ||
210 | |||
211 | dprintk("RPC: gss_verify_mic_kerberos returning %d\n", maj_stat); | ||
212 | return maj_stat; | ||
213 | } | ||
214 | |||
215 | static u32 | ||
216 | gss_get_mic_kerberos(struct gss_ctx *ctx, | ||
217 | u32 qop, | ||
218 | struct xdr_buf *message, | ||
219 | struct xdr_netobj *mic_token) { | ||
220 | u32 err = 0; | ||
221 | struct krb5_ctx *kctx = ctx->internal_ctx_id; | ||
222 | |||
223 | err = krb5_make_token(kctx, qop, message, mic_token, KG_TOK_MIC_MSG); | ||
224 | |||
225 | dprintk("RPC: gss_get_mic_kerberos returning %d\n",err); | ||
226 | |||
227 | return err; | ||
228 | } | ||
229 | |||
230 | static struct gss_api_ops gss_kerberos_ops = { | ||
231 | .gss_import_sec_context = gss_import_sec_context_kerberos, | ||
232 | .gss_get_mic = gss_get_mic_kerberos, | ||
233 | .gss_verify_mic = gss_verify_mic_kerberos, | ||
234 | .gss_delete_sec_context = gss_delete_sec_context_kerberos, | ||
235 | }; | ||
236 | |||
237 | static struct pf_desc gss_kerberos_pfs[] = { | ||
238 | [0] = { | ||
239 | .pseudoflavor = RPC_AUTH_GSS_KRB5, | ||
240 | .service = RPC_GSS_SVC_NONE, | ||
241 | .name = "krb5", | ||
242 | }, | ||
243 | [1] = { | ||
244 | .pseudoflavor = RPC_AUTH_GSS_KRB5I, | ||
245 | .service = RPC_GSS_SVC_INTEGRITY, | ||
246 | .name = "krb5i", | ||
247 | }, | ||
248 | }; | ||
249 | |||
250 | static struct gss_api_mech gss_kerberos_mech = { | ||
251 | .gm_name = "krb5", | ||
252 | .gm_owner = THIS_MODULE, | ||
253 | .gm_ops = &gss_kerberos_ops, | ||
254 | .gm_pf_num = ARRAY_SIZE(gss_kerberos_pfs), | ||
255 | .gm_pfs = gss_kerberos_pfs, | ||
256 | }; | ||
257 | |||
258 | static int __init init_kerberos_module(void) | ||
259 | { | ||
260 | int status; | ||
261 | |||
262 | status = gss_mech_register(&gss_kerberos_mech); | ||
263 | if (status) | ||
264 | printk("Failed to register kerberos gss mechanism!\n"); | ||
265 | return status; | ||
266 | } | ||
267 | |||
268 | static void __exit cleanup_kerberos_module(void) | ||
269 | { | ||
270 | gss_mech_unregister(&gss_kerberos_mech); | ||
271 | } | ||
272 | |||
273 | MODULE_LICENSE("GPL"); | ||
274 | module_init(init_kerberos_module); | ||
275 | module_exit(cleanup_kerberos_module); | ||
diff --git a/net/sunrpc/auth_gss/gss_krb5_seal.c b/net/sunrpc/auth_gss/gss_krb5_seal.c new file mode 100644 index 000000000000..afeeb8715a77 --- /dev/null +++ b/net/sunrpc/auth_gss/gss_krb5_seal.c | |||
@@ -0,0 +1,176 @@ | |||
1 | /* | ||
2 | * linux/net/sunrpc/gss_krb5_seal.c | ||
3 | * | ||
4 | * Adapted from MIT Kerberos 5-1.2.1 lib/gssapi/krb5/k5seal.c | ||
5 | * | ||
6 | * Copyright (c) 2000 The Regents of the University of Michigan. | ||
7 | * All rights reserved. | ||
8 | * | ||
9 | * Andy Adamson <andros@umich.edu> | ||
10 | * J. Bruce Fields <bfields@umich.edu> | ||
11 | */ | ||
12 | |||
13 | /* | ||
14 | * Copyright 1993 by OpenVision Technologies, Inc. | ||
15 | * | ||
16 | * Permission to use, copy, modify, distribute, and sell this software | ||
17 | * and its documentation for any purpose is hereby granted without fee, | ||
18 | * provided that the above copyright notice appears in all copies and | ||
19 | * that both that copyright notice and this permission notice appear in | ||
20 | * supporting documentation, and that the name of OpenVision not be used | ||
21 | * in advertising or publicity pertaining to distribution of the software | ||
22 | * without specific, written prior permission. OpenVision makes no | ||
23 | * representations about the suitability of this software for any | ||
24 | * purpose. It is provided "as is" without express or implied warranty. | ||
25 | * | ||
26 | * OPENVISION DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, | ||
27 | * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO | ||
28 | * EVENT SHALL OPENVISION BE LIABLE FOR ANY SPECIAL, INDIRECT OR | ||
29 | * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF | ||
30 | * USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR | ||
31 | * OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR | ||
32 | * PERFORMANCE OF THIS SOFTWARE. | ||
33 | */ | ||
34 | |||
35 | /* | ||
36 | * Copyright (C) 1998 by the FundsXpress, INC. | ||
37 | * | ||
38 | * All rights reserved. | ||
39 | * | ||
40 | * Export of this software from the United States of America may require | ||
41 | * a specific license from the United States Government. It is the | ||
42 | * responsibility of any person or organization contemplating export to | ||
43 | * obtain such a license before exporting. | ||
44 | * | ||
45 | * WITHIN THAT CONSTRAINT, permission to use, copy, modify, and | ||
46 | * distribute this software and its documentation for any purpose and | ||
47 | * without fee is hereby granted, provided that the above copyright | ||
48 | * notice appear in all copies and that both that copyright notice and | ||
49 | * this permission notice appear in supporting documentation, and that | ||
50 | * the name of FundsXpress. not be used in advertising or publicity pertaining | ||
51 | * to distribution of the software without specific, written prior | ||
52 | * permission. FundsXpress makes no representations about the suitability of | ||
53 | * this software for any purpose. It is provided "as is" without express | ||
54 | * or implied warranty. | ||
55 | * | ||
56 | * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR | ||
57 | * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED | ||
58 | * WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE. | ||
59 | */ | ||
60 | |||
61 | #include <linux/types.h> | ||
62 | #include <linux/slab.h> | ||
63 | #include <linux/jiffies.h> | ||
64 | #include <linux/sunrpc/gss_krb5.h> | ||
65 | #include <linux/random.h> | ||
66 | #include <asm/scatterlist.h> | ||
67 | #include <linux/crypto.h> | ||
68 | |||
69 | #ifdef RPC_DEBUG | ||
70 | # define RPCDBG_FACILITY RPCDBG_AUTH | ||
71 | #endif | ||
72 | |||
73 | static inline int | ||
74 | gss_krb5_padding(int blocksize, int length) { | ||
75 | /* Most of the code is block-size independent but in practice we | ||
76 | * use only 8: */ | ||
77 | BUG_ON(blocksize != 8); | ||
78 | return 8 - (length & 7); | ||
79 | } | ||
80 | |||
81 | u32 | ||
82 | krb5_make_token(struct krb5_ctx *ctx, int qop_req, | ||
83 | struct xdr_buf *text, struct xdr_netobj *token, | ||
84 | int toktype) | ||
85 | { | ||
86 | s32 checksum_type; | ||
87 | struct xdr_netobj md5cksum = {.len = 0, .data = NULL}; | ||
88 | int blocksize = 0, tmsglen; | ||
89 | unsigned char *ptr, *krb5_hdr, *msg_start; | ||
90 | s32 now; | ||
91 | |||
92 | dprintk("RPC: gss_krb5_seal\n"); | ||
93 | |||
94 | now = get_seconds(); | ||
95 | |||
96 | if (qop_req != 0) | ||
97 | goto out_err; | ||
98 | |||
99 | switch (ctx->signalg) { | ||
100 | case SGN_ALG_DES_MAC_MD5: | ||
101 | checksum_type = CKSUMTYPE_RSA_MD5; | ||
102 | break; | ||
103 | default: | ||
104 | dprintk("RPC: gss_krb5_seal: ctx->signalg %d not" | ||
105 | " supported\n", ctx->signalg); | ||
106 | goto out_err; | ||
107 | } | ||
108 | if (ctx->sealalg != SEAL_ALG_NONE && ctx->sealalg != SEAL_ALG_DES) { | ||
109 | dprintk("RPC: gss_krb5_seal: ctx->sealalg %d not supported\n", | ||
110 | ctx->sealalg); | ||
111 | goto out_err; | ||
112 | } | ||
113 | |||
114 | if (toktype == KG_TOK_WRAP_MSG) { | ||
115 | blocksize = crypto_tfm_alg_blocksize(ctx->enc); | ||
116 | tmsglen = blocksize + text->len | ||
117 | + gss_krb5_padding(blocksize, blocksize + text->len); | ||
118 | } else { | ||
119 | tmsglen = 0; | ||
120 | } | ||
121 | |||
122 | token->len = g_token_size(&ctx->mech_used, 22 + tmsglen); | ||
123 | |||
124 | ptr = token->data; | ||
125 | g_make_token_header(&ctx->mech_used, 22 + tmsglen, &ptr); | ||
126 | |||
127 | *ptr++ = (unsigned char) ((toktype>>8)&0xff); | ||
128 | *ptr++ = (unsigned char) (toktype&0xff); | ||
129 | |||
130 | /* ptr now at byte 2 of header described in rfc 1964, section 1.2.1: */ | ||
131 | krb5_hdr = ptr - 2; | ||
132 | msg_start = krb5_hdr + 24; | ||
133 | |||
134 | *(u16 *)(krb5_hdr + 2) = htons(ctx->signalg); | ||
135 | memset(krb5_hdr + 4, 0xff, 4); | ||
136 | if (toktype == KG_TOK_WRAP_MSG) | ||
137 | *(u16 *)(krb5_hdr + 4) = htons(ctx->sealalg); | ||
138 | |||
139 | if (toktype == KG_TOK_WRAP_MSG) { | ||
140 | /* XXX removing support for now */ | ||
141 | goto out_err; | ||
142 | } else { /* Sign only. */ | ||
143 | if (make_checksum(checksum_type, krb5_hdr, 8, text, | ||
144 | &md5cksum)) | ||
145 | goto out_err; | ||
146 | } | ||
147 | |||
148 | switch (ctx->signalg) { | ||
149 | case SGN_ALG_DES_MAC_MD5: | ||
150 | if (krb5_encrypt(ctx->seq, NULL, md5cksum.data, | ||
151 | md5cksum.data, md5cksum.len)) | ||
152 | goto out_err; | ||
153 | memcpy(krb5_hdr + 16, | ||
154 | md5cksum.data + md5cksum.len - KRB5_CKSUM_LENGTH, | ||
155 | KRB5_CKSUM_LENGTH); | ||
156 | |||
157 | dprintk("RPC: make_seal_token: cksum data: \n"); | ||
158 | print_hexl((u32 *) (krb5_hdr + 16), KRB5_CKSUM_LENGTH, 0); | ||
159 | break; | ||
160 | default: | ||
161 | BUG(); | ||
162 | } | ||
163 | |||
164 | kfree(md5cksum.data); | ||
165 | |||
166 | if ((krb5_make_seq_num(ctx->seq, ctx->initiate ? 0 : 0xff, | ||
167 | ctx->seq_send, krb5_hdr + 16, krb5_hdr + 8))) | ||
168 | goto out_err; | ||
169 | |||
170 | ctx->seq_send++; | ||
171 | |||
172 | return ((ctx->endtime < now) ? GSS_S_CONTEXT_EXPIRED : GSS_S_COMPLETE); | ||
173 | out_err: | ||
174 | if (md5cksum.data) kfree(md5cksum.data); | ||
175 | return GSS_S_FAILURE; | ||
176 | } | ||
diff --git a/net/sunrpc/auth_gss/gss_krb5_seqnum.c b/net/sunrpc/auth_gss/gss_krb5_seqnum.c new file mode 100644 index 000000000000..c53ead39118d --- /dev/null +++ b/net/sunrpc/auth_gss/gss_krb5_seqnum.c | |||
@@ -0,0 +1,88 @@ | |||
1 | /* | ||
2 | * linux/net/sunrpc/gss_krb5_seqnum.c | ||
3 | * | ||
4 | * Adapted from MIT Kerberos 5-1.2.1 lib/gssapi/krb5/util_seqnum.c | ||
5 | * | ||
6 | * Copyright (c) 2000 The Regents of the University of Michigan. | ||
7 | * All rights reserved. | ||
8 | * | ||
9 | * Andy Adamson <andros@umich.edu> | ||
10 | */ | ||
11 | |||
12 | /* | ||
13 | * Copyright 1993 by OpenVision Technologies, Inc. | ||
14 | * | ||
15 | * Permission to use, copy, modify, distribute, and sell this software | ||
16 | * and its documentation for any purpose is hereby granted without fee, | ||
17 | * provided that the above copyright notice appears in all copies and | ||
18 | * that both that copyright notice and this permission notice appear in | ||
19 | * supporting documentation, and that the name of OpenVision not be used | ||
20 | * in advertising or publicity pertaining to distribution of the software | ||
21 | * without specific, written prior permission. OpenVision makes no | ||
22 | * representations about the suitability of this software for any | ||
23 | * purpose. It is provided "as is" without express or implied warranty. | ||
24 | * | ||
25 | * OPENVISION DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, | ||
26 | * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO | ||
27 | * EVENT SHALL OPENVISION BE LIABLE FOR ANY SPECIAL, INDIRECT OR | ||
28 | * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF | ||
29 | * USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR | ||
30 | * OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR | ||
31 | * PERFORMANCE OF THIS SOFTWARE. | ||
32 | */ | ||
33 | |||
34 | #include <linux/types.h> | ||
35 | #include <linux/slab.h> | ||
36 | #include <linux/sunrpc/gss_krb5.h> | ||
37 | #include <linux/crypto.h> | ||
38 | |||
39 | #ifdef RPC_DEBUG | ||
40 | # define RPCDBG_FACILITY RPCDBG_AUTH | ||
41 | #endif | ||
42 | |||
43 | s32 | ||
44 | krb5_make_seq_num(struct crypto_tfm *key, | ||
45 | int direction, | ||
46 | s32 seqnum, | ||
47 | unsigned char *cksum, unsigned char *buf) | ||
48 | { | ||
49 | unsigned char plain[8]; | ||
50 | |||
51 | plain[0] = (unsigned char) (seqnum & 0xff); | ||
52 | plain[1] = (unsigned char) ((seqnum >> 8) & 0xff); | ||
53 | plain[2] = (unsigned char) ((seqnum >> 16) & 0xff); | ||
54 | plain[3] = (unsigned char) ((seqnum >> 24) & 0xff); | ||
55 | |||
56 | plain[4] = direction; | ||
57 | plain[5] = direction; | ||
58 | plain[6] = direction; | ||
59 | plain[7] = direction; | ||
60 | |||
61 | return krb5_encrypt(key, cksum, plain, buf, 8); | ||
62 | } | ||
63 | |||
64 | s32 | ||
65 | krb5_get_seq_num(struct crypto_tfm *key, | ||
66 | unsigned char *cksum, | ||
67 | unsigned char *buf, | ||
68 | int *direction, s32 * seqnum) | ||
69 | { | ||
70 | s32 code; | ||
71 | unsigned char plain[8]; | ||
72 | |||
73 | dprintk("RPC: krb5_get_seq_num:\n"); | ||
74 | |||
75 | if ((code = krb5_decrypt(key, cksum, buf, plain, 8))) | ||
76 | return code; | ||
77 | |||
78 | if ((plain[4] != plain[5]) || (plain[4] != plain[6]) | ||
79 | || (plain[4] != plain[7])) | ||
80 | return (s32)KG_BAD_SEQ; | ||
81 | |||
82 | *direction = plain[4]; | ||
83 | |||
84 | *seqnum = ((plain[0]) | | ||
85 | (plain[1] << 8) | (plain[2] << 16) | (plain[3] << 24)); | ||
86 | |||
87 | return (0); | ||
88 | } | ||
diff --git a/net/sunrpc/auth_gss/gss_krb5_unseal.c b/net/sunrpc/auth_gss/gss_krb5_unseal.c new file mode 100644 index 000000000000..8767fc53183d --- /dev/null +++ b/net/sunrpc/auth_gss/gss_krb5_unseal.c | |||
@@ -0,0 +1,202 @@ | |||
1 | /* | ||
2 | * linux/net/sunrpc/gss_krb5_unseal.c | ||
3 | * | ||
4 | * Adapted from MIT Kerberos 5-1.2.1 lib/gssapi/krb5/k5unseal.c | ||
5 | * | ||
6 | * Copyright (c) 2000 The Regents of the University of Michigan. | ||
7 | * All rights reserved. | ||
8 | * | ||
9 | * Andy Adamson <andros@umich.edu> | ||
10 | */ | ||
11 | |||
12 | /* | ||
13 | * Copyright 1993 by OpenVision Technologies, Inc. | ||
14 | * | ||
15 | * Permission to use, copy, modify, distribute, and sell this software | ||
16 | * and its documentation for any purpose is hereby granted without fee, | ||
17 | * provided that the above copyright notice appears in all copies and | ||
18 | * that both that copyright notice and this permission notice appear in | ||
19 | * supporting documentation, and that the name of OpenVision not be used | ||
20 | * in advertising or publicity pertaining to distribution of the software | ||
21 | * without specific, written prior permission. OpenVision makes no | ||
22 | * representations about the suitability of this software for any | ||
23 | * purpose. It is provided "as is" without express or implied warranty. | ||
24 | * | ||
25 | * OPENVISION DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, | ||
26 | * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO | ||
27 | * EVENT SHALL OPENVISION BE LIABLE FOR ANY SPECIAL, INDIRECT OR | ||
28 | * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF | ||
29 | * USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR | ||
30 | * OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR | ||
31 | * PERFORMANCE OF THIS SOFTWARE. | ||
32 | */ | ||
33 | |||
34 | /* | ||
35 | * Copyright (C) 1998 by the FundsXpress, INC. | ||
36 | * | ||
37 | * All rights reserved. | ||
38 | * | ||
39 | * Export of this software from the United States of America may require | ||
40 | * a specific license from the United States Government. It is the | ||
41 | * responsibility of any person or organization contemplating export to | ||
42 | * obtain such a license before exporting. | ||
43 | * | ||
44 | * WITHIN THAT CONSTRAINT, permission to use, copy, modify, and | ||
45 | * distribute this software and its documentation for any purpose and | ||
46 | * without fee is hereby granted, provided that the above copyright | ||
47 | * notice appear in all copies and that both that copyright notice and | ||
48 | * this permission notice appear in supporting documentation, and that | ||
49 | * the name of FundsXpress. not be used in advertising or publicity pertaining | ||
50 | * to distribution of the software without specific, written prior | ||
51 | * permission. FundsXpress makes no representations about the suitability of | ||
52 | * this software for any purpose. It is provided "as is" without express | ||
53 | * or implied warranty. | ||
54 | * | ||
55 | * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR | ||
56 | * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED | ||
57 | * WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE. | ||
58 | */ | ||
59 | |||
60 | #include <linux/types.h> | ||
61 | #include <linux/slab.h> | ||
62 | #include <linux/jiffies.h> | ||
63 | #include <linux/sunrpc/gss_krb5.h> | ||
64 | #include <linux/crypto.h> | ||
65 | |||
66 | #ifdef RPC_DEBUG | ||
67 | # define RPCDBG_FACILITY RPCDBG_AUTH | ||
68 | #endif | ||
69 | |||
70 | |||
71 | /* message_buffer is an input if toktype is MIC and an output if it is WRAP: | ||
72 | * If toktype is MIC: read_token is a mic token, and message_buffer is the | ||
73 | * data that the mic was supposedly taken over. | ||
74 | * If toktype is WRAP: read_token is a wrap token, and message_buffer is used | ||
75 | * to return the decrypted data. | ||
76 | */ | ||
77 | |||
78 | /* XXX will need to change prototype and/or just split into a separate function | ||
79 | * when we add privacy (because read_token will be in pages too). */ | ||
80 | u32 | ||
81 | krb5_read_token(struct krb5_ctx *ctx, | ||
82 | struct xdr_netobj *read_token, | ||
83 | struct xdr_buf *message_buffer, | ||
84 | int *qop_state, int toktype) | ||
85 | { | ||
86 | int signalg; | ||
87 | int sealalg; | ||
88 | s32 checksum_type; | ||
89 | struct xdr_netobj md5cksum = {.len = 0, .data = NULL}; | ||
90 | s32 now; | ||
91 | int direction; | ||
92 | s32 seqnum; | ||
93 | unsigned char *ptr = (unsigned char *)read_token->data; | ||
94 | int bodysize; | ||
95 | u32 ret = GSS_S_DEFECTIVE_TOKEN; | ||
96 | |||
97 | dprintk("RPC: krb5_read_token\n"); | ||
98 | |||
99 | if (g_verify_token_header(&ctx->mech_used, &bodysize, &ptr, | ||
100 | read_token->len)) | ||
101 | goto out; | ||
102 | |||
103 | if ((*ptr++ != ((toktype>>8)&0xff)) || (*ptr++ != (toktype&0xff))) | ||
104 | goto out; | ||
105 | |||
106 | /* XXX sanity-check bodysize?? */ | ||
107 | |||
108 | if (toktype == KG_TOK_WRAP_MSG) { | ||
109 | /* XXX gone */ | ||
110 | goto out; | ||
111 | } | ||
112 | |||
113 | /* get the sign and seal algorithms */ | ||
114 | |||
115 | signalg = ptr[0] + (ptr[1] << 8); | ||
116 | sealalg = ptr[2] + (ptr[3] << 8); | ||
117 | |||
118 | /* Sanity checks */ | ||
119 | |||
120 | if ((ptr[4] != 0xff) || (ptr[5] != 0xff)) | ||
121 | goto out; | ||
122 | |||
123 | if (((toktype != KG_TOK_WRAP_MSG) && (sealalg != 0xffff)) || | ||
124 | ((toktype == KG_TOK_WRAP_MSG) && (sealalg == 0xffff))) | ||
125 | goto out; | ||
126 | |||
127 | /* in the current spec, there is only one valid seal algorithm per | ||
128 | key type, so a simple comparison is ok */ | ||
129 | |||
130 | if ((toktype == KG_TOK_WRAP_MSG) && !(sealalg == ctx->sealalg)) | ||
131 | goto out; | ||
132 | |||
133 | /* there are several mappings of seal algorithms to sign algorithms, | ||
134 | but few enough that we can try them all. */ | ||
135 | |||
136 | if ((ctx->sealalg == SEAL_ALG_NONE && signalg > 1) || | ||
137 | (ctx->sealalg == SEAL_ALG_1 && signalg != SGN_ALG_3) || | ||
138 | (ctx->sealalg == SEAL_ALG_DES3KD && | ||
139 | signalg != SGN_ALG_HMAC_SHA1_DES3_KD)) | ||
140 | goto out; | ||
141 | |||
142 | /* compute the checksum of the message */ | ||
143 | |||
144 | /* initialize the the cksum */ | ||
145 | switch (signalg) { | ||
146 | case SGN_ALG_DES_MAC_MD5: | ||
147 | checksum_type = CKSUMTYPE_RSA_MD5; | ||
148 | break; | ||
149 | default: | ||
150 | ret = GSS_S_DEFECTIVE_TOKEN; | ||
151 | goto out; | ||
152 | } | ||
153 | |||
154 | switch (signalg) { | ||
155 | case SGN_ALG_DES_MAC_MD5: | ||
156 | ret = make_checksum(checksum_type, ptr - 2, 8, | ||
157 | message_buffer, &md5cksum); | ||
158 | if (ret) | ||
159 | goto out; | ||
160 | |||
161 | ret = krb5_encrypt(ctx->seq, NULL, md5cksum.data, | ||
162 | md5cksum.data, 16); | ||
163 | if (ret) | ||
164 | goto out; | ||
165 | |||
166 | if (memcmp(md5cksum.data + 8, ptr + 14, 8)) { | ||
167 | ret = GSS_S_BAD_SIG; | ||
168 | goto out; | ||
169 | } | ||
170 | break; | ||
171 | default: | ||
172 | ret = GSS_S_DEFECTIVE_TOKEN; | ||
173 | goto out; | ||
174 | } | ||
175 | |||
176 | /* it got through unscathed. Make sure the context is unexpired */ | ||
177 | |||
178 | if (qop_state) | ||
179 | *qop_state = GSS_C_QOP_DEFAULT; | ||
180 | |||
181 | now = get_seconds(); | ||
182 | |||
183 | ret = GSS_S_CONTEXT_EXPIRED; | ||
184 | if (now > ctx->endtime) | ||
185 | goto out; | ||
186 | |||
187 | /* do sequencing checks */ | ||
188 | |||
189 | ret = GSS_S_BAD_SIG; | ||
190 | if ((ret = krb5_get_seq_num(ctx->seq, ptr + 14, ptr + 6, &direction, | ||
191 | &seqnum))) | ||
192 | goto out; | ||
193 | |||
194 | if ((ctx->initiate && direction != 0xff) || | ||
195 | (!ctx->initiate && direction != 0)) | ||
196 | goto out; | ||
197 | |||
198 | ret = GSS_S_COMPLETE; | ||
199 | out: | ||
200 | if (md5cksum.data) kfree(md5cksum.data); | ||
201 | return ret; | ||
202 | } | ||
diff --git a/net/sunrpc/auth_gss/gss_mech_switch.c b/net/sunrpc/auth_gss/gss_mech_switch.c new file mode 100644 index 000000000000..9dfb68377d69 --- /dev/null +++ b/net/sunrpc/auth_gss/gss_mech_switch.c | |||
@@ -0,0 +1,301 @@ | |||
1 | /* | ||
2 | * linux/net/sunrpc/gss_mech_switch.c | ||
3 | * | ||
4 | * Copyright (c) 2001 The Regents of the University of Michigan. | ||
5 | * All rights reserved. | ||
6 | * | ||
7 | * J. Bruce Fields <bfields@umich.edu> | ||
8 | * | ||
9 | * Redistribution and use in source and binary forms, with or without | ||
10 | * modification, are permitted provided that the following conditions | ||
11 | * are met: | ||
12 | * | ||
13 | * 1. Redistributions of source code must retain the above copyright | ||
14 | * notice, this list of conditions and the following disclaimer. | ||
15 | * 2. Redistributions in binary form must reproduce the above copyright | ||
16 | * notice, this list of conditions and the following disclaimer in the | ||
17 | * documentation and/or other materials provided with the distribution. | ||
18 | * 3. Neither the name of the University nor the names of its | ||
19 | * contributors may be used to endorse or promote products derived | ||
20 | * from this software without specific prior written permission. | ||
21 | * | ||
22 | * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED | ||
23 | * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF | ||
24 | * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE | ||
25 | * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE | ||
26 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR | ||
27 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF | ||
28 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR | ||
29 | * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF | ||
30 | * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING | ||
31 | * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS | ||
32 | * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | ||
33 | * | ||
34 | */ | ||
35 | |||
36 | #include <linux/types.h> | ||
37 | #include <linux/slab.h> | ||
38 | #include <linux/socket.h> | ||
39 | #include <linux/module.h> | ||
40 | #include <linux/sunrpc/msg_prot.h> | ||
41 | #include <linux/sunrpc/gss_asn1.h> | ||
42 | #include <linux/sunrpc/auth_gss.h> | ||
43 | #include <linux/sunrpc/svcauth_gss.h> | ||
44 | #include <linux/sunrpc/gss_err.h> | ||
45 | #include <linux/sunrpc/sched.h> | ||
46 | #include <linux/sunrpc/gss_api.h> | ||
47 | #include <linux/sunrpc/clnt.h> | ||
48 | |||
49 | #ifdef RPC_DEBUG | ||
50 | # define RPCDBG_FACILITY RPCDBG_AUTH | ||
51 | #endif | ||
52 | |||
53 | static LIST_HEAD(registered_mechs); | ||
54 | static DEFINE_SPINLOCK(registered_mechs_lock); | ||
55 | |||
56 | static void | ||
57 | gss_mech_free(struct gss_api_mech *gm) | ||
58 | { | ||
59 | struct pf_desc *pf; | ||
60 | int i; | ||
61 | |||
62 | for (i = 0; i < gm->gm_pf_num; i++) { | ||
63 | pf = &gm->gm_pfs[i]; | ||
64 | if (pf->auth_domain_name) | ||
65 | kfree(pf->auth_domain_name); | ||
66 | pf->auth_domain_name = NULL; | ||
67 | } | ||
68 | } | ||
69 | |||
70 | static inline char * | ||
71 | make_auth_domain_name(char *name) | ||
72 | { | ||
73 | static char *prefix = "gss/"; | ||
74 | char *new; | ||
75 | |||
76 | new = kmalloc(strlen(name) + strlen(prefix) + 1, GFP_KERNEL); | ||
77 | if (new) { | ||
78 | strcpy(new, prefix); | ||
79 | strcat(new, name); | ||
80 | } | ||
81 | return new; | ||
82 | } | ||
83 | |||
84 | static int | ||
85 | gss_mech_svc_setup(struct gss_api_mech *gm) | ||
86 | { | ||
87 | struct pf_desc *pf; | ||
88 | int i, status; | ||
89 | |||
90 | for (i = 0; i < gm->gm_pf_num; i++) { | ||
91 | pf = &gm->gm_pfs[i]; | ||
92 | pf->auth_domain_name = make_auth_domain_name(pf->name); | ||
93 | status = -ENOMEM; | ||
94 | if (pf->auth_domain_name == NULL) | ||
95 | goto out; | ||
96 | status = svcauth_gss_register_pseudoflavor(pf->pseudoflavor, | ||
97 | pf->auth_domain_name); | ||
98 | if (status) | ||
99 | goto out; | ||
100 | } | ||
101 | return 0; | ||
102 | out: | ||
103 | gss_mech_free(gm); | ||
104 | return status; | ||
105 | } | ||
106 | |||
107 | int | ||
108 | gss_mech_register(struct gss_api_mech *gm) | ||
109 | { | ||
110 | int status; | ||
111 | |||
112 | status = gss_mech_svc_setup(gm); | ||
113 | if (status) | ||
114 | return status; | ||
115 | spin_lock(®istered_mechs_lock); | ||
116 | list_add(&gm->gm_list, ®istered_mechs); | ||
117 | spin_unlock(®istered_mechs_lock); | ||
118 | dprintk("RPC: registered gss mechanism %s\n", gm->gm_name); | ||
119 | return 0; | ||
120 | } | ||
121 | |||
122 | EXPORT_SYMBOL(gss_mech_register); | ||
123 | |||
124 | void | ||
125 | gss_mech_unregister(struct gss_api_mech *gm) | ||
126 | { | ||
127 | spin_lock(®istered_mechs_lock); | ||
128 | list_del(&gm->gm_list); | ||
129 | spin_unlock(®istered_mechs_lock); | ||
130 | dprintk("RPC: unregistered gss mechanism %s\n", gm->gm_name); | ||
131 | gss_mech_free(gm); | ||
132 | } | ||
133 | |||
134 | EXPORT_SYMBOL(gss_mech_unregister); | ||
135 | |||
136 | struct gss_api_mech * | ||
137 | gss_mech_get(struct gss_api_mech *gm) | ||
138 | { | ||
139 | __module_get(gm->gm_owner); | ||
140 | return gm; | ||
141 | } | ||
142 | |||
143 | EXPORT_SYMBOL(gss_mech_get); | ||
144 | |||
145 | struct gss_api_mech * | ||
146 | gss_mech_get_by_name(const char *name) | ||
147 | { | ||
148 | struct gss_api_mech *pos, *gm = NULL; | ||
149 | |||
150 | spin_lock(®istered_mechs_lock); | ||
151 | list_for_each_entry(pos, ®istered_mechs, gm_list) { | ||
152 | if (0 == strcmp(name, pos->gm_name)) { | ||
153 | if (try_module_get(pos->gm_owner)) | ||
154 | gm = pos; | ||
155 | break; | ||
156 | } | ||
157 | } | ||
158 | spin_unlock(®istered_mechs_lock); | ||
159 | return gm; | ||
160 | |||
161 | } | ||
162 | |||
163 | EXPORT_SYMBOL(gss_mech_get_by_name); | ||
164 | |||
165 | static inline int | ||
166 | mech_supports_pseudoflavor(struct gss_api_mech *gm, u32 pseudoflavor) | ||
167 | { | ||
168 | int i; | ||
169 | |||
170 | for (i = 0; i < gm->gm_pf_num; i++) { | ||
171 | if (gm->gm_pfs[i].pseudoflavor == pseudoflavor) | ||
172 | return 1; | ||
173 | } | ||
174 | return 0; | ||
175 | } | ||
176 | |||
177 | struct gss_api_mech * | ||
178 | gss_mech_get_by_pseudoflavor(u32 pseudoflavor) | ||
179 | { | ||
180 | struct gss_api_mech *pos, *gm = NULL; | ||
181 | |||
182 | spin_lock(®istered_mechs_lock); | ||
183 | list_for_each_entry(pos, ®istered_mechs, gm_list) { | ||
184 | if (!mech_supports_pseudoflavor(pos, pseudoflavor)) { | ||
185 | module_put(pos->gm_owner); | ||
186 | continue; | ||
187 | } | ||
188 | if (try_module_get(pos->gm_owner)) | ||
189 | gm = pos; | ||
190 | break; | ||
191 | } | ||
192 | spin_unlock(®istered_mechs_lock); | ||
193 | return gm; | ||
194 | } | ||
195 | |||
196 | EXPORT_SYMBOL(gss_mech_get_by_pseudoflavor); | ||
197 | |||
198 | u32 | ||
199 | gss_pseudoflavor_to_service(struct gss_api_mech *gm, u32 pseudoflavor) | ||
200 | { | ||
201 | int i; | ||
202 | |||
203 | for (i = 0; i < gm->gm_pf_num; i++) { | ||
204 | if (gm->gm_pfs[i].pseudoflavor == pseudoflavor) | ||
205 | return gm->gm_pfs[i].service; | ||
206 | } | ||
207 | return 0; | ||
208 | } | ||
209 | |||
210 | EXPORT_SYMBOL(gss_pseudoflavor_to_service); | ||
211 | |||
212 | char * | ||
213 | gss_service_to_auth_domain_name(struct gss_api_mech *gm, u32 service) | ||
214 | { | ||
215 | int i; | ||
216 | |||
217 | for (i = 0; i < gm->gm_pf_num; i++) { | ||
218 | if (gm->gm_pfs[i].service == service) | ||
219 | return gm->gm_pfs[i].auth_domain_name; | ||
220 | } | ||
221 | return NULL; | ||
222 | } | ||
223 | |||
224 | EXPORT_SYMBOL(gss_service_to_auth_domain_name); | ||
225 | |||
226 | void | ||
227 | gss_mech_put(struct gss_api_mech * gm) | ||
228 | { | ||
229 | module_put(gm->gm_owner); | ||
230 | } | ||
231 | |||
232 | EXPORT_SYMBOL(gss_mech_put); | ||
233 | |||
234 | /* The mech could probably be determined from the token instead, but it's just | ||
235 | * as easy for now to pass it in. */ | ||
236 | int | ||
237 | gss_import_sec_context(const void *input_token, size_t bufsize, | ||
238 | struct gss_api_mech *mech, | ||
239 | struct gss_ctx **ctx_id) | ||
240 | { | ||
241 | if (!(*ctx_id = kmalloc(sizeof(**ctx_id), GFP_KERNEL))) | ||
242 | return GSS_S_FAILURE; | ||
243 | memset(*ctx_id, 0, sizeof(**ctx_id)); | ||
244 | (*ctx_id)->mech_type = gss_mech_get(mech); | ||
245 | |||
246 | return mech->gm_ops | ||
247 | ->gss_import_sec_context(input_token, bufsize, *ctx_id); | ||
248 | } | ||
249 | |||
250 | /* gss_get_mic: compute a mic over message and return mic_token. */ | ||
251 | |||
252 | u32 | ||
253 | gss_get_mic(struct gss_ctx *context_handle, | ||
254 | u32 qop, | ||
255 | struct xdr_buf *message, | ||
256 | struct xdr_netobj *mic_token) | ||
257 | { | ||
258 | return context_handle->mech_type->gm_ops | ||
259 | ->gss_get_mic(context_handle, | ||
260 | qop, | ||
261 | message, | ||
262 | mic_token); | ||
263 | } | ||
264 | |||
265 | /* gss_verify_mic: check whether the provided mic_token verifies message. */ | ||
266 | |||
267 | u32 | ||
268 | gss_verify_mic(struct gss_ctx *context_handle, | ||
269 | struct xdr_buf *message, | ||
270 | struct xdr_netobj *mic_token, | ||
271 | u32 *qstate) | ||
272 | { | ||
273 | return context_handle->mech_type->gm_ops | ||
274 | ->gss_verify_mic(context_handle, | ||
275 | message, | ||
276 | mic_token, | ||
277 | qstate); | ||
278 | } | ||
279 | |||
280 | /* gss_delete_sec_context: free all resources associated with context_handle. | ||
281 | * Note this differs from the RFC 2744-specified prototype in that we don't | ||
282 | * bother returning an output token, since it would never be used anyway. */ | ||
283 | |||
284 | u32 | ||
285 | gss_delete_sec_context(struct gss_ctx **context_handle) | ||
286 | { | ||
287 | dprintk("RPC: gss_delete_sec_context deleting %p\n", | ||
288 | *context_handle); | ||
289 | |||
290 | if (!*context_handle) | ||
291 | return(GSS_S_NO_CONTEXT); | ||
292 | if ((*context_handle)->internal_ctx_id != 0) | ||
293 | (*context_handle)->mech_type->gm_ops | ||
294 | ->gss_delete_sec_context((*context_handle) | ||
295 | ->internal_ctx_id); | ||
296 | if ((*context_handle)->mech_type) | ||
297 | gss_mech_put((*context_handle)->mech_type); | ||
298 | kfree(*context_handle); | ||
299 | *context_handle=NULL; | ||
300 | return GSS_S_COMPLETE; | ||
301 | } | ||
diff --git a/net/sunrpc/auth_gss/gss_spkm3_mech.c b/net/sunrpc/auth_gss/gss_spkm3_mech.c new file mode 100644 index 000000000000..dad05994c3eb --- /dev/null +++ b/net/sunrpc/auth_gss/gss_spkm3_mech.c | |||
@@ -0,0 +1,300 @@ | |||
1 | /* | ||
2 | * linux/net/sunrpc/gss_spkm3_mech.c | ||
3 | * | ||
4 | * Copyright (c) 2003 The Regents of the University of Michigan. | ||
5 | * All rights reserved. | ||
6 | * | ||
7 | * Andy Adamson <andros@umich.edu> | ||
8 | * J. Bruce Fields <bfields@umich.edu> | ||
9 | * | ||
10 | * Redistribution and use in source and binary forms, with or without | ||
11 | * modification, are permitted provided that the following conditions | ||
12 | * are met: | ||
13 | * | ||
14 | * 1. Redistributions of source code must retain the above copyright | ||
15 | * notice, this list of conditions and the following disclaimer. | ||
16 | * 2. Redistributions in binary form must reproduce the above copyright | ||
17 | * notice, this list of conditions and the following disclaimer in the | ||
18 | * documentation and/or other materials provided with the distribution. | ||
19 | * 3. Neither the name of the University nor the names of its | ||
20 | * contributors may be used to endorse or promote products derived | ||
21 | * from this software without specific prior written permission. | ||
22 | * | ||
23 | * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED | ||
24 | * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF | ||
25 | * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE | ||
26 | * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE | ||
27 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR | ||
28 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF | ||
29 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR | ||
30 | * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF | ||
31 | * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING | ||
32 | * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS | ||
33 | * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | ||
34 | * | ||
35 | */ | ||
36 | |||
37 | #include <linux/module.h> | ||
38 | #include <linux/init.h> | ||
39 | #include <linux/types.h> | ||
40 | #include <linux/slab.h> | ||
41 | #include <linux/sunrpc/auth.h> | ||
42 | #include <linux/in.h> | ||
43 | #include <linux/sunrpc/svcauth_gss.h> | ||
44 | #include <linux/sunrpc/gss_spkm3.h> | ||
45 | #include <linux/sunrpc/xdr.h> | ||
46 | #include <linux/crypto.h> | ||
47 | |||
48 | #ifdef RPC_DEBUG | ||
49 | # define RPCDBG_FACILITY RPCDBG_AUTH | ||
50 | #endif | ||
51 | |||
52 | static const void * | ||
53 | simple_get_bytes(const void *p, const void *end, void *res, int len) | ||
54 | { | ||
55 | const void *q = (const void *)((const char *)p + len); | ||
56 | if (unlikely(q > end || q < p)) | ||
57 | return ERR_PTR(-EFAULT); | ||
58 | memcpy(res, p, len); | ||
59 | return q; | ||
60 | } | ||
61 | |||
62 | static const void * | ||
63 | simple_get_netobj(const void *p, const void *end, struct xdr_netobj *res) | ||
64 | { | ||
65 | const void *q; | ||
66 | unsigned int len; | ||
67 | p = simple_get_bytes(p, end, &len, sizeof(len)); | ||
68 | if (IS_ERR(p)) | ||
69 | return p; | ||
70 | res->len = len; | ||
71 | if (len == 0) { | ||
72 | res->data = NULL; | ||
73 | return p; | ||
74 | } | ||
75 | q = (const void *)((const char *)p + len); | ||
76 | if (unlikely(q > end || q < p)) | ||
77 | return ERR_PTR(-EFAULT); | ||
78 | res->data = kmalloc(len, GFP_KERNEL); | ||
79 | if (unlikely(res->data == NULL)) | ||
80 | return ERR_PTR(-ENOMEM); | ||
81 | memcpy(res->data, p, len); | ||
82 | return q; | ||
83 | } | ||
84 | |||
85 | static inline const void * | ||
86 | get_key(const void *p, const void *end, struct crypto_tfm **res, int *resalg) | ||
87 | { | ||
88 | struct xdr_netobj key = { 0 }; | ||
89 | int alg_mode,setkey = 0; | ||
90 | char *alg_name; | ||
91 | |||
92 | p = simple_get_bytes(p, end, resalg, sizeof(*resalg)); | ||
93 | if (IS_ERR(p)) | ||
94 | goto out_err; | ||
95 | p = simple_get_netobj(p, end, &key); | ||
96 | if (IS_ERR(p)) | ||
97 | goto out_err; | ||
98 | |||
99 | switch (*resalg) { | ||
100 | case NID_des_cbc: | ||
101 | alg_name = "des"; | ||
102 | alg_mode = CRYPTO_TFM_MODE_CBC; | ||
103 | setkey = 1; | ||
104 | break; | ||
105 | case NID_md5: | ||
106 | if (key.len == 0) { | ||
107 | dprintk("RPC: SPKM3 get_key: NID_md5 zero Key length\n"); | ||
108 | } | ||
109 | alg_name = "md5"; | ||
110 | alg_mode = 0; | ||
111 | setkey = 0; | ||
112 | break; | ||
113 | default: | ||
114 | dprintk("RPC: SPKM3 get_key: unsupported algorithm %d", *resalg); | ||
115 | goto out_err_free_key; | ||
116 | } | ||
117 | if (!(*res = crypto_alloc_tfm(alg_name, alg_mode))) | ||
118 | goto out_err_free_key; | ||
119 | if (setkey) { | ||
120 | if (crypto_cipher_setkey(*res, key.data, key.len)) | ||
121 | goto out_err_free_tfm; | ||
122 | } | ||
123 | |||
124 | if(key.len > 0) | ||
125 | kfree(key.data); | ||
126 | return p; | ||
127 | |||
128 | out_err_free_tfm: | ||
129 | crypto_free_tfm(*res); | ||
130 | out_err_free_key: | ||
131 | if(key.len > 0) | ||
132 | kfree(key.data); | ||
133 | p = ERR_PTR(-EINVAL); | ||
134 | out_err: | ||
135 | return p; | ||
136 | } | ||
137 | |||
138 | static int | ||
139 | gss_import_sec_context_spkm3(const void *p, size_t len, | ||
140 | struct gss_ctx *ctx_id) | ||
141 | { | ||
142 | const void *end = (const void *)((const char *)p + len); | ||
143 | struct spkm3_ctx *ctx; | ||
144 | |||
145 | if (!(ctx = kmalloc(sizeof(*ctx), GFP_KERNEL))) | ||
146 | goto out_err; | ||
147 | memset(ctx, 0, sizeof(*ctx)); | ||
148 | |||
149 | p = simple_get_netobj(p, end, &ctx->ctx_id); | ||
150 | if (IS_ERR(p)) | ||
151 | goto out_err_free_ctx; | ||
152 | |||
153 | p = simple_get_bytes(p, end, &ctx->qop, sizeof(ctx->qop)); | ||
154 | if (IS_ERR(p)) | ||
155 | goto out_err_free_ctx_id; | ||
156 | |||
157 | p = simple_get_netobj(p, end, &ctx->mech_used); | ||
158 | if (IS_ERR(p)) | ||
159 | goto out_err_free_mech; | ||
160 | |||
161 | p = simple_get_bytes(p, end, &ctx->ret_flags, sizeof(ctx->ret_flags)); | ||
162 | if (IS_ERR(p)) | ||
163 | goto out_err_free_mech; | ||
164 | |||
165 | p = simple_get_bytes(p, end, &ctx->req_flags, sizeof(ctx->req_flags)); | ||
166 | if (IS_ERR(p)) | ||
167 | goto out_err_free_mech; | ||
168 | |||
169 | p = simple_get_netobj(p, end, &ctx->share_key); | ||
170 | if (IS_ERR(p)) | ||
171 | goto out_err_free_s_key; | ||
172 | |||
173 | p = get_key(p, end, &ctx->derived_conf_key, &ctx->conf_alg); | ||
174 | if (IS_ERR(p)) | ||
175 | goto out_err_free_s_key; | ||
176 | |||
177 | p = get_key(p, end, &ctx->derived_integ_key, &ctx->intg_alg); | ||
178 | if (IS_ERR(p)) | ||
179 | goto out_err_free_key1; | ||
180 | |||
181 | p = simple_get_bytes(p, end, &ctx->keyestb_alg, sizeof(ctx->keyestb_alg)); | ||
182 | if (IS_ERR(p)) | ||
183 | goto out_err_free_key2; | ||
184 | |||
185 | p = simple_get_bytes(p, end, &ctx->owf_alg, sizeof(ctx->owf_alg)); | ||
186 | if (IS_ERR(p)) | ||
187 | goto out_err_free_key2; | ||
188 | |||
189 | if (p != end) | ||
190 | goto out_err_free_key2; | ||
191 | |||
192 | ctx_id->internal_ctx_id = ctx; | ||
193 | |||
194 | dprintk("Succesfully imported new spkm context.\n"); | ||
195 | return 0; | ||
196 | |||
197 | out_err_free_key2: | ||
198 | crypto_free_tfm(ctx->derived_integ_key); | ||
199 | out_err_free_key1: | ||
200 | crypto_free_tfm(ctx->derived_conf_key); | ||
201 | out_err_free_s_key: | ||
202 | kfree(ctx->share_key.data); | ||
203 | out_err_free_mech: | ||
204 | kfree(ctx->mech_used.data); | ||
205 | out_err_free_ctx_id: | ||
206 | kfree(ctx->ctx_id.data); | ||
207 | out_err_free_ctx: | ||
208 | kfree(ctx); | ||
209 | out_err: | ||
210 | return PTR_ERR(p); | ||
211 | } | ||
212 | |||
213 | static void | ||
214 | gss_delete_sec_context_spkm3(void *internal_ctx) { | ||
215 | struct spkm3_ctx *sctx = internal_ctx; | ||
216 | |||
217 | if(sctx->derived_integ_key) | ||
218 | crypto_free_tfm(sctx->derived_integ_key); | ||
219 | if(sctx->derived_conf_key) | ||
220 | crypto_free_tfm(sctx->derived_conf_key); | ||
221 | if(sctx->share_key.data) | ||
222 | kfree(sctx->share_key.data); | ||
223 | if(sctx->mech_used.data) | ||
224 | kfree(sctx->mech_used.data); | ||
225 | kfree(sctx); | ||
226 | } | ||
227 | |||
228 | static u32 | ||
229 | gss_verify_mic_spkm3(struct gss_ctx *ctx, | ||
230 | struct xdr_buf *signbuf, | ||
231 | struct xdr_netobj *checksum, | ||
232 | u32 *qstate) { | ||
233 | u32 maj_stat = 0; | ||
234 | int qop_state = 0; | ||
235 | struct spkm3_ctx *sctx = ctx->internal_ctx_id; | ||
236 | |||
237 | dprintk("RPC: gss_verify_mic_spkm3 calling spkm3_read_token\n"); | ||
238 | maj_stat = spkm3_read_token(sctx, checksum, signbuf, &qop_state, | ||
239 | SPKM_MIC_TOK); | ||
240 | |||
241 | if (!maj_stat && qop_state) | ||
242 | *qstate = qop_state; | ||
243 | |||
244 | dprintk("RPC: gss_verify_mic_spkm3 returning %d\n", maj_stat); | ||
245 | return maj_stat; | ||
246 | } | ||
247 | |||
248 | static u32 | ||
249 | gss_get_mic_spkm3(struct gss_ctx *ctx, | ||
250 | u32 qop, | ||
251 | struct xdr_buf *message_buffer, | ||
252 | struct xdr_netobj *message_token) { | ||
253 | u32 err = 0; | ||
254 | struct spkm3_ctx *sctx = ctx->internal_ctx_id; | ||
255 | |||
256 | dprintk("RPC: gss_get_mic_spkm3\n"); | ||
257 | |||
258 | err = spkm3_make_token(sctx, qop, message_buffer, | ||
259 | message_token, SPKM_MIC_TOK); | ||
260 | return err; | ||
261 | } | ||
262 | |||
263 | static struct gss_api_ops gss_spkm3_ops = { | ||
264 | .gss_import_sec_context = gss_import_sec_context_spkm3, | ||
265 | .gss_get_mic = gss_get_mic_spkm3, | ||
266 | .gss_verify_mic = gss_verify_mic_spkm3, | ||
267 | .gss_delete_sec_context = gss_delete_sec_context_spkm3, | ||
268 | }; | ||
269 | |||
270 | static struct pf_desc gss_spkm3_pfs[] = { | ||
271 | {RPC_AUTH_GSS_SPKM, 0, RPC_GSS_SVC_NONE, "spkm3"}, | ||
272 | {RPC_AUTH_GSS_SPKMI, 0, RPC_GSS_SVC_INTEGRITY, "spkm3i"}, | ||
273 | }; | ||
274 | |||
275 | static struct gss_api_mech gss_spkm3_mech = { | ||
276 | .gm_name = "spkm3", | ||
277 | .gm_owner = THIS_MODULE, | ||
278 | .gm_ops = &gss_spkm3_ops, | ||
279 | .gm_pf_num = ARRAY_SIZE(gss_spkm3_pfs), | ||
280 | .gm_pfs = gss_spkm3_pfs, | ||
281 | }; | ||
282 | |||
283 | static int __init init_spkm3_module(void) | ||
284 | { | ||
285 | int status; | ||
286 | |||
287 | status = gss_mech_register(&gss_spkm3_mech); | ||
288 | if (status) | ||
289 | printk("Failed to register spkm3 gss mechanism!\n"); | ||
290 | return 0; | ||
291 | } | ||
292 | |||
293 | static void __exit cleanup_spkm3_module(void) | ||
294 | { | ||
295 | gss_mech_unregister(&gss_spkm3_mech); | ||
296 | } | ||
297 | |||
298 | MODULE_LICENSE("GPL"); | ||
299 | module_init(init_spkm3_module); | ||
300 | module_exit(cleanup_spkm3_module); | ||
diff --git a/net/sunrpc/auth_gss/gss_spkm3_seal.c b/net/sunrpc/auth_gss/gss_spkm3_seal.c new file mode 100644 index 000000000000..25339868d462 --- /dev/null +++ b/net/sunrpc/auth_gss/gss_spkm3_seal.c | |||
@@ -0,0 +1,132 @@ | |||
1 | /* | ||
2 | * linux/net/sunrpc/gss_spkm3_seal.c | ||
3 | * | ||
4 | * Copyright (c) 2003 The Regents of the University of Michigan. | ||
5 | * All rights reserved. | ||
6 | * | ||
7 | * Andy Adamson <andros@umich.edu> | ||
8 | * | ||
9 | * Redistribution and use in source and binary forms, with or without | ||
10 | * modification, are permitted provided that the following conditions | ||
11 | * are met: | ||
12 | * | ||
13 | * 1. Redistributions of source code must retain the above copyright | ||
14 | * notice, this list of conditions and the following disclaimer. | ||
15 | * 2. Redistributions in binary form must reproduce the above copyright | ||
16 | * notice, this list of conditions and the following disclaimer in the | ||
17 | * documentation and/or other materials provided with the distribution. | ||
18 | * 3. Neither the name of the University nor the names of its | ||
19 | * contributors may be used to endorse or promote products derived | ||
20 | * from this software without specific prior written permission. | ||
21 | * | ||
22 | * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED | ||
23 | * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF | ||
24 | * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE | ||
25 | * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE | ||
26 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR | ||
27 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF | ||
28 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR | ||
29 | * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF | ||
30 | * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING | ||
31 | * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS | ||
32 | * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | ||
33 | * | ||
34 | */ | ||
35 | |||
36 | #include <linux/types.h> | ||
37 | #include <linux/slab.h> | ||
38 | #include <linux/jiffies.h> | ||
39 | #include <linux/sunrpc/gss_spkm3.h> | ||
40 | #include <linux/random.h> | ||
41 | #include <linux/crypto.h> | ||
42 | |||
43 | #ifdef RPC_DEBUG | ||
44 | # define RPCDBG_FACILITY RPCDBG_AUTH | ||
45 | #endif | ||
46 | |||
47 | /* | ||
48 | * spkm3_make_token() | ||
49 | * | ||
50 | * Only SPKM_MIC_TOK with md5 intg-alg is supported | ||
51 | */ | ||
52 | |||
53 | u32 | ||
54 | spkm3_make_token(struct spkm3_ctx *ctx, int qop_req, | ||
55 | struct xdr_buf * text, struct xdr_netobj * token, | ||
56 | int toktype) | ||
57 | { | ||
58 | s32 checksum_type; | ||
59 | char tokhdrbuf[25]; | ||
60 | struct xdr_netobj md5cksum = {.len = 0, .data = NULL}; | ||
61 | struct xdr_netobj mic_hdr = {.len = 0, .data = tokhdrbuf}; | ||
62 | int tmsglen, tokenlen = 0; | ||
63 | unsigned char *ptr; | ||
64 | s32 now; | ||
65 | int ctxelen = 0, ctxzbit = 0; | ||
66 | int md5elen = 0, md5zbit = 0; | ||
67 | |||
68 | dprintk("RPC: spkm3_make_token\n"); | ||
69 | |||
70 | now = jiffies; | ||
71 | if (qop_req != 0) | ||
72 | goto out_err; | ||
73 | |||
74 | if (ctx->ctx_id.len != 16) { | ||
75 | dprintk("RPC: spkm3_make_token BAD ctx_id.len %d\n", | ||
76 | ctx->ctx_id.len); | ||
77 | goto out_err; | ||
78 | } | ||
79 | |||
80 | switch (ctx->intg_alg) { | ||
81 | case NID_md5: | ||
82 | checksum_type = CKSUMTYPE_RSA_MD5; | ||
83 | break; | ||
84 | default: | ||
85 | dprintk("RPC: gss_spkm3_seal: ctx->signalg %d not" | ||
86 | " supported\n", ctx->intg_alg); | ||
87 | goto out_err; | ||
88 | } | ||
89 | /* XXX since we don't support WRAP, perhaps we don't care... */ | ||
90 | if (ctx->conf_alg != NID_cast5_cbc) { | ||
91 | dprintk("RPC: gss_spkm3_seal: ctx->sealalg %d not supported\n", | ||
92 | ctx->conf_alg); | ||
93 | goto out_err; | ||
94 | } | ||
95 | |||
96 | if (toktype == SPKM_MIC_TOK) { | ||
97 | tmsglen = 0; | ||
98 | /* Calculate checksum over the mic-header */ | ||
99 | asn1_bitstring_len(&ctx->ctx_id, &ctxelen, &ctxzbit); | ||
100 | spkm3_mic_header(&mic_hdr.data, &mic_hdr.len, ctx->ctx_id.data, | ||
101 | ctxelen, ctxzbit); | ||
102 | |||
103 | if (make_checksum(checksum_type, mic_hdr.data, mic_hdr.len, | ||
104 | text, &md5cksum)) | ||
105 | goto out_err; | ||
106 | |||
107 | asn1_bitstring_len(&md5cksum, &md5elen, &md5zbit); | ||
108 | tokenlen = 10 + ctxelen + 1 + 2 + md5elen + 1; | ||
109 | |||
110 | /* Create token header using generic routines */ | ||
111 | token->len = g_token_size(&ctx->mech_used, tokenlen + tmsglen); | ||
112 | |||
113 | ptr = token->data; | ||
114 | g_make_token_header(&ctx->mech_used, tokenlen + tmsglen, &ptr); | ||
115 | |||
116 | spkm3_make_mic_token(&ptr, tokenlen, &mic_hdr, &md5cksum, md5elen, md5zbit); | ||
117 | } else if (toktype == SPKM_WRAP_TOK) { /* Not Supported */ | ||
118 | dprintk("RPC: gss_spkm3_seal: SPKM_WRAP_TOK not supported\n"); | ||
119 | goto out_err; | ||
120 | } | ||
121 | kfree(md5cksum.data); | ||
122 | |||
123 | /* XXX need to implement sequence numbers, and ctx->expired */ | ||
124 | |||
125 | return GSS_S_COMPLETE; | ||
126 | out_err: | ||
127 | if (md5cksum.data) | ||
128 | kfree(md5cksum.data); | ||
129 | token->data = NULL; | ||
130 | token->len = 0; | ||
131 | return GSS_S_FAILURE; | ||
132 | } | ||
diff --git a/net/sunrpc/auth_gss/gss_spkm3_token.c b/net/sunrpc/auth_gss/gss_spkm3_token.c new file mode 100644 index 000000000000..46c08a0710f6 --- /dev/null +++ b/net/sunrpc/auth_gss/gss_spkm3_token.c | |||
@@ -0,0 +1,266 @@ | |||
1 | /* | ||
2 | * linux/net/sunrpc/gss_spkm3_token.c | ||
3 | * | ||
4 | * Copyright (c) 2003 The Regents of the University of Michigan. | ||
5 | * All rights reserved. | ||
6 | * | ||
7 | * Andy Adamson <andros@umich.edu> | ||
8 | * | ||
9 | * Redistribution and use in source and binary forms, with or without | ||
10 | * modification, are permitted provided that the following conditions | ||
11 | * are met: | ||
12 | * | ||
13 | * 1. Redistributions of source code must retain the above copyright | ||
14 | * notice, this list of conditions and the following disclaimer. | ||
15 | * 2. Redistributions in binary form must reproduce the above copyright | ||
16 | * notice, this list of conditions and the following disclaimer in the | ||
17 | * documentation and/or other materials provided with the distribution. | ||
18 | * 3. Neither the name of the University nor the names of its | ||
19 | * contributors may be used to endorse or promote products derived | ||
20 | * from this software without specific prior written permission. | ||
21 | * | ||
22 | * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED | ||
23 | * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF | ||
24 | * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE | ||
25 | * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE | ||
26 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR | ||
27 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF | ||
28 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR | ||
29 | * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF | ||
30 | * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING | ||
31 | * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS | ||
32 | * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | ||
33 | * | ||
34 | */ | ||
35 | |||
36 | #include <linux/types.h> | ||
37 | #include <linux/slab.h> | ||
38 | #include <linux/jiffies.h> | ||
39 | #include <linux/sunrpc/gss_spkm3.h> | ||
40 | #include <linux/random.h> | ||
41 | #include <linux/crypto.h> | ||
42 | |||
43 | #ifdef RPC_DEBUG | ||
44 | # define RPCDBG_FACILITY RPCDBG_AUTH | ||
45 | #endif | ||
46 | |||
47 | /* | ||
48 | * asn1_bitstring_len() | ||
49 | * | ||
50 | * calculate the asn1 bitstring length of the xdr_netobject | ||
51 | */ | ||
52 | void | ||
53 | asn1_bitstring_len(struct xdr_netobj *in, int *enclen, int *zerobits) | ||
54 | { | ||
55 | int i, zbit = 0,elen = in->len; | ||
56 | char *ptr; | ||
57 | |||
58 | ptr = &in->data[in->len -1]; | ||
59 | |||
60 | /* count trailing 0's */ | ||
61 | for(i = in->len; i > 0; i--) { | ||
62 | if (*ptr == 0) { | ||
63 | ptr--; | ||
64 | elen--; | ||
65 | } else | ||
66 | break; | ||
67 | } | ||
68 | |||
69 | /* count number of 0 bits in final octet */ | ||
70 | ptr = &in->data[elen - 1]; | ||
71 | for(i = 0; i < 8; i++) { | ||
72 | short mask = 0x01; | ||
73 | |||
74 | if (!((mask << i) & *ptr)) | ||
75 | zbit++; | ||
76 | else | ||
77 | break; | ||
78 | } | ||
79 | *enclen = elen; | ||
80 | *zerobits = zbit; | ||
81 | } | ||
82 | |||
83 | /* | ||
84 | * decode_asn1_bitstring() | ||
85 | * | ||
86 | * decode a bitstring into a buffer of the expected length. | ||
87 | * enclen = bit string length | ||
88 | * explen = expected length (define in rfc) | ||
89 | */ | ||
90 | int | ||
91 | decode_asn1_bitstring(struct xdr_netobj *out, char *in, int enclen, int explen) | ||
92 | { | ||
93 | if (!(out->data = kmalloc(explen,GFP_KERNEL))) | ||
94 | return 0; | ||
95 | out->len = explen; | ||
96 | memset(out->data, 0, explen); | ||
97 | memcpy(out->data, in, enclen); | ||
98 | return 1; | ||
99 | } | ||
100 | |||
101 | /* | ||
102 | * SPKMInnerContextToken choice SPKM_MIC asn1 token layout | ||
103 | * | ||
104 | * contextid is always 16 bytes plain data. max asn1 bitstring len = 17. | ||
105 | * | ||
106 | * tokenlen = pos[0] to end of token (max pos[45] with MD5 cksum) | ||
107 | * | ||
108 | * pos value | ||
109 | * ---------- | ||
110 | * [0] a4 SPKM-MIC tag | ||
111 | * [1] ?? innertoken length (max 44) | ||
112 | * | ||
113 | * | ||
114 | * tok_hdr piece of checksum data starts here | ||
115 | * | ||
116 | * the maximum mic-header len = 9 + 17 = 26 | ||
117 | * mic-header | ||
118 | * ---------- | ||
119 | * [2] 30 SEQUENCE tag | ||
120 | * [3] ?? mic-header length: (max 23) = TokenID + ContextID | ||
121 | * | ||
122 | * TokenID - all fields constant and can be hardcoded | ||
123 | * ------- | ||
124 | * [4] 02 Type 2 | ||
125 | * [5] 02 Length 2 | ||
126 | * [6][7] 01 01 TokenID (SPKM_MIC_TOK) | ||
127 | * | ||
128 | * ContextID - encoded length not constant, calculated | ||
129 | * --------- | ||
130 | * [8] 03 Type 3 | ||
131 | * [9] ?? encoded length | ||
132 | * [10] ?? ctxzbit | ||
133 | * [11] contextid | ||
134 | * | ||
135 | * mic_header piece of checksum data ends here. | ||
136 | * | ||
137 | * int-cksum - encoded length not constant, calculated | ||
138 | * --------- | ||
139 | * [??] 03 Type 3 | ||
140 | * [??] ?? encoded length | ||
141 | * [??] ?? md5zbit | ||
142 | * [??] int-cksum (NID_md5 = 16) | ||
143 | * | ||
144 | * maximum SPKM-MIC innercontext token length = | ||
145 | * 10 + encoded contextid_size(17 max) + 2 + encoded | ||
146 | * cksum_size (17 maxfor NID_md5) = 46 | ||
147 | */ | ||
148 | |||
149 | /* | ||
150 | * spkm3_mic_header() | ||
151 | * | ||
152 | * Prepare the SPKM_MIC_TOK mic-header for check-sum calculation | ||
153 | * elen: 16 byte context id asn1 bitstring encoded length | ||
154 | */ | ||
155 | void | ||
156 | spkm3_mic_header(unsigned char **hdrbuf, unsigned int *hdrlen, unsigned char *ctxdata, int elen, int zbit) | ||
157 | { | ||
158 | char *hptr = *hdrbuf; | ||
159 | char *top = *hdrbuf; | ||
160 | |||
161 | *(u8 *)hptr++ = 0x30; | ||
162 | *(u8 *)hptr++ = elen + 7; /* on the wire header length */ | ||
163 | |||
164 | /* tokenid */ | ||
165 | *(u8 *)hptr++ = 0x02; | ||
166 | *(u8 *)hptr++ = 0x02; | ||
167 | *(u8 *)hptr++ = 0x01; | ||
168 | *(u8 *)hptr++ = 0x01; | ||
169 | |||
170 | /* coniextid */ | ||
171 | *(u8 *)hptr++ = 0x03; | ||
172 | *(u8 *)hptr++ = elen + 1; /* add 1 to include zbit */ | ||
173 | *(u8 *)hptr++ = zbit; | ||
174 | memcpy(hptr, ctxdata, elen); | ||
175 | hptr += elen; | ||
176 | *hdrlen = hptr - top; | ||
177 | } | ||
178 | |||
179 | /* | ||
180 | * spkm3_mic_innercontext_token() | ||
181 | * | ||
182 | * *tokp points to the beginning of the SPKM_MIC token described | ||
183 | * in rfc 2025, section 3.2.1: | ||
184 | * | ||
185 | */ | ||
186 | void | ||
187 | spkm3_make_mic_token(unsigned char **tokp, int toklen, struct xdr_netobj *mic_hdr, struct xdr_netobj *md5cksum, int md5elen, int md5zbit) | ||
188 | { | ||
189 | unsigned char *ict = *tokp; | ||
190 | |||
191 | *(u8 *)ict++ = 0xa4; | ||
192 | *(u8 *)ict++ = toklen - 2; | ||
193 | memcpy(ict, mic_hdr->data, mic_hdr->len); | ||
194 | ict += mic_hdr->len; | ||
195 | |||
196 | *(u8 *)ict++ = 0x03; | ||
197 | *(u8 *)ict++ = md5elen + 1; /* add 1 to include zbit */ | ||
198 | *(u8 *)ict++ = md5zbit; | ||
199 | memcpy(ict, md5cksum->data, md5elen); | ||
200 | } | ||
201 | |||
202 | u32 | ||
203 | spkm3_verify_mic_token(unsigned char **tokp, int *mic_hdrlen, unsigned char **cksum) | ||
204 | { | ||
205 | struct xdr_netobj spkm3_ctx_id = {.len =0, .data = NULL}; | ||
206 | unsigned char *ptr = *tokp; | ||
207 | int ctxelen; | ||
208 | u32 ret = GSS_S_DEFECTIVE_TOKEN; | ||
209 | |||
210 | /* spkm3 innercontext token preamble */ | ||
211 | if ((ptr[0] != 0xa4) || (ptr[2] != 0x30)) { | ||
212 | dprintk("RPC: BAD SPKM ictoken preamble\n"); | ||
213 | goto out; | ||
214 | } | ||
215 | |||
216 | *mic_hdrlen = ptr[3]; | ||
217 | |||
218 | /* token type */ | ||
219 | if ((ptr[4] != 0x02) || (ptr[5] != 0x02)) { | ||
220 | dprintk("RPC: BAD asn1 SPKM3 token type\n"); | ||
221 | goto out; | ||
222 | } | ||
223 | |||
224 | /* only support SPKM_MIC_TOK */ | ||
225 | if((ptr[6] != 0x01) || (ptr[7] != 0x01)) { | ||
226 | dprintk("RPC: ERROR unsupported SPKM3 token \n"); | ||
227 | goto out; | ||
228 | } | ||
229 | |||
230 | /* contextid */ | ||
231 | if (ptr[8] != 0x03) { | ||
232 | dprintk("RPC: BAD SPKM3 asn1 context-id type\n"); | ||
233 | goto out; | ||
234 | } | ||
235 | |||
236 | ctxelen = ptr[9]; | ||
237 | if (ctxelen > 17) { /* length includes asn1 zbit octet */ | ||
238 | dprintk("RPC: BAD SPKM3 contextid len %d\n", ctxelen); | ||
239 | goto out; | ||
240 | } | ||
241 | |||
242 | /* ignore ptr[10] */ | ||
243 | |||
244 | if(!decode_asn1_bitstring(&spkm3_ctx_id, &ptr[11], ctxelen - 1, 16)) | ||
245 | goto out; | ||
246 | |||
247 | /* | ||
248 | * in the current implementation: the optional int-alg is not present | ||
249 | * so the default int-alg (md5) is used the optional snd-seq field is | ||
250 | * also not present | ||
251 | */ | ||
252 | |||
253 | if (*mic_hdrlen != 6 + ctxelen) { | ||
254 | dprintk("RPC: BAD SPKM_ MIC_TOK header len %d: we only support default int-alg (should be absent) and do not support snd-seq\n", *mic_hdrlen); | ||
255 | goto out; | ||
256 | } | ||
257 | /* checksum */ | ||
258 | *cksum = (&ptr[10] + ctxelen); /* ctxelen includes ptr[10] */ | ||
259 | |||
260 | ret = GSS_S_COMPLETE; | ||
261 | out: | ||
262 | if (spkm3_ctx_id.data) | ||
263 | kfree(spkm3_ctx_id.data); | ||
264 | return ret; | ||
265 | } | ||
266 | |||
diff --git a/net/sunrpc/auth_gss/gss_spkm3_unseal.c b/net/sunrpc/auth_gss/gss_spkm3_unseal.c new file mode 100644 index 000000000000..65ce81bf0bc4 --- /dev/null +++ b/net/sunrpc/auth_gss/gss_spkm3_unseal.c | |||
@@ -0,0 +1,128 @@ | |||
1 | /* | ||
2 | * linux/net/sunrpc/gss_spkm3_unseal.c | ||
3 | * | ||
4 | * Copyright (c) 2003 The Regents of the University of Michigan. | ||
5 | * All rights reserved. | ||
6 | * | ||
7 | * Andy Adamson <andros@umich.edu> | ||
8 | * | ||
9 | * Redistribution and use in source and binary forms, with or without | ||
10 | * modification, are permitted provided that the following conditions | ||
11 | * are met: | ||
12 | * | ||
13 | * 1. Redistributions of source code must retain the above copyright | ||
14 | * notice, this list of conditions and the following disclaimer. | ||
15 | * 2. Redistributions in binary form must reproduce the above copyright | ||
16 | * notice, this list of conditions and the following disclaimer in the | ||
17 | * documentation and/or other materials provided with the distribution. | ||
18 | * 3. Neither the name of the University nor the names of its | ||
19 | * contributors may be used to endorse or promote products derived | ||
20 | * from this software without specific prior written permission. | ||
21 | * | ||
22 | * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED | ||
23 | * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF | ||
24 | * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE | ||
25 | * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE | ||
26 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR | ||
27 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF | ||
28 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR | ||
29 | * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF | ||
30 | * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING | ||
31 | * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS | ||
32 | * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | ||
33 | * | ||
34 | */ | ||
35 | |||
36 | #include <linux/types.h> | ||
37 | #include <linux/slab.h> | ||
38 | #include <linux/jiffies.h> | ||
39 | #include <linux/sunrpc/gss_spkm3.h> | ||
40 | #include <linux/crypto.h> | ||
41 | |||
42 | #ifdef RPC_DEBUG | ||
43 | # define RPCDBG_FACILITY RPCDBG_AUTH | ||
44 | #endif | ||
45 | |||
46 | /* | ||
47 | * spkm3_read_token() | ||
48 | * | ||
49 | * only SPKM_MIC_TOK with md5 intg-alg is supported | ||
50 | */ | ||
51 | u32 | ||
52 | spkm3_read_token(struct spkm3_ctx *ctx, | ||
53 | struct xdr_netobj *read_token, /* checksum */ | ||
54 | struct xdr_buf *message_buffer, /* signbuf */ | ||
55 | int *qop_state, int toktype) | ||
56 | { | ||
57 | s32 code; | ||
58 | struct xdr_netobj wire_cksum = {.len =0, .data = NULL}; | ||
59 | struct xdr_netobj md5cksum = {.len = 0, .data = NULL}; | ||
60 | unsigned char *ptr = (unsigned char *)read_token->data; | ||
61 | unsigned char *cksum; | ||
62 | int bodysize, md5elen; | ||
63 | int mic_hdrlen; | ||
64 | u32 ret = GSS_S_DEFECTIVE_TOKEN; | ||
65 | |||
66 | dprintk("RPC: spkm3_read_token read_token->len %d\n", read_token->len); | ||
67 | |||
68 | if (g_verify_token_header((struct xdr_netobj *) &ctx->mech_used, | ||
69 | &bodysize, &ptr, read_token->len)) | ||
70 | goto out; | ||
71 | |||
72 | /* decode the token */ | ||
73 | |||
74 | if (toktype == SPKM_MIC_TOK) { | ||
75 | |||
76 | if ((ret = spkm3_verify_mic_token(&ptr, &mic_hdrlen, &cksum))) | ||
77 | goto out; | ||
78 | |||
79 | if (*cksum++ != 0x03) { | ||
80 | dprintk("RPC: spkm3_read_token BAD checksum type\n"); | ||
81 | goto out; | ||
82 | } | ||
83 | md5elen = *cksum++; | ||
84 | cksum++; /* move past the zbit */ | ||
85 | |||
86 | if(!decode_asn1_bitstring(&wire_cksum, cksum, md5elen - 1, 16)) | ||
87 | goto out; | ||
88 | |||
89 | /* HARD CODED FOR MD5 */ | ||
90 | |||
91 | /* compute the checksum of the message. | ||
92 | * ptr + 2 = start of header piece of checksum | ||
93 | * mic_hdrlen + 2 = length of header piece of checksum | ||
94 | */ | ||
95 | ret = GSS_S_DEFECTIVE_TOKEN; | ||
96 | code = make_checksum(CKSUMTYPE_RSA_MD5, ptr + 2, | ||
97 | mic_hdrlen + 2, | ||
98 | message_buffer, &md5cksum); | ||
99 | |||
100 | if (code) | ||
101 | goto out; | ||
102 | |||
103 | dprintk("RPC: spkm3_read_token: digest wire_cksum.len %d:\n", | ||
104 | wire_cksum.len); | ||
105 | dprintk(" md5cksum.data\n"); | ||
106 | print_hexl((u32 *) md5cksum.data, 16, 0); | ||
107 | dprintk(" cksum.data:\n"); | ||
108 | print_hexl((u32 *) wire_cksum.data, wire_cksum.len, 0); | ||
109 | |||
110 | ret = GSS_S_BAD_SIG; | ||
111 | code = memcmp(md5cksum.data, wire_cksum.data, wire_cksum.len); | ||
112 | if (code) | ||
113 | goto out; | ||
114 | |||
115 | } else { | ||
116 | dprintk("RPC: BAD or UNSUPPORTED SPKM3 token type: %d\n",toktype); | ||
117 | goto out; | ||
118 | } | ||
119 | |||
120 | /* XXX: need to add expiration and sequencing */ | ||
121 | ret = GSS_S_COMPLETE; | ||
122 | out: | ||
123 | if (md5cksum.data) | ||
124 | kfree(md5cksum.data); | ||
125 | if (wire_cksum.data) | ||
126 | kfree(wire_cksum.data); | ||
127 | return ret; | ||
128 | } | ||
diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c new file mode 100644 index 000000000000..5c8fe3bfc494 --- /dev/null +++ b/net/sunrpc/auth_gss/svcauth_gss.c | |||
@@ -0,0 +1,1080 @@ | |||
1 | /* | ||
2 | * Neil Brown <neilb@cse.unsw.edu.au> | ||
3 | * J. Bruce Fields <bfields@umich.edu> | ||
4 | * Andy Adamson <andros@umich.edu> | ||
5 | * Dug Song <dugsong@monkey.org> | ||
6 | * | ||
7 | * RPCSEC_GSS server authentication. | ||
8 | * This implements RPCSEC_GSS as defined in rfc2203 (rpcsec_gss) and rfc2078 | ||
9 | * (gssapi) | ||
10 | * | ||
11 | * The RPCSEC_GSS involves three stages: | ||
12 | * 1/ context creation | ||
13 | * 2/ data exchange | ||
14 | * 3/ context destruction | ||
15 | * | ||
16 | * Context creation is handled largely by upcalls to user-space. | ||
17 | * In particular, GSS_Accept_sec_context is handled by an upcall | ||
18 | * Data exchange is handled entirely within the kernel | ||
19 | * In particular, GSS_GetMIC, GSS_VerifyMIC, GSS_Seal, GSS_Unseal are in-kernel. | ||
20 | * Context destruction is handled in-kernel | ||
21 | * GSS_Delete_sec_context is in-kernel | ||
22 | * | ||
23 | * Context creation is initiated by a RPCSEC_GSS_INIT request arriving. | ||
24 | * The context handle and gss_token are used as a key into the rpcsec_init cache. | ||
25 | * The content of this cache includes some of the outputs of GSS_Accept_sec_context, | ||
26 | * being major_status, minor_status, context_handle, reply_token. | ||
27 | * These are sent back to the client. | ||
28 | * Sequence window management is handled by the kernel. The window size if currently | ||
29 | * a compile time constant. | ||
30 | * | ||
31 | * When user-space is happy that a context is established, it places an entry | ||
32 | * in the rpcsec_context cache. The key for this cache is the context_handle. | ||
33 | * The content includes: | ||
34 | * uid/gidlist - for determining access rights | ||
35 | * mechanism type | ||
36 | * mechanism specific information, such as a key | ||
37 | * | ||
38 | */ | ||
39 | |||
40 | #include <linux/types.h> | ||
41 | #include <linux/module.h> | ||
42 | #include <linux/pagemap.h> | ||
43 | |||
44 | #include <linux/sunrpc/auth_gss.h> | ||
45 | #include <linux/sunrpc/svcauth.h> | ||
46 | #include <linux/sunrpc/gss_err.h> | ||
47 | #include <linux/sunrpc/svcauth.h> | ||
48 | #include <linux/sunrpc/svcauth_gss.h> | ||
49 | #include <linux/sunrpc/cache.h> | ||
50 | |||
51 | #ifdef RPC_DEBUG | ||
52 | # define RPCDBG_FACILITY RPCDBG_AUTH | ||
53 | #endif | ||
54 | |||
55 | /* The rpcsec_init cache is used for mapping RPCSEC_GSS_{,CONT_}INIT requests | ||
56 | * into replies. | ||
57 | * | ||
58 | * Key is context handle (\x if empty) and gss_token. | ||
59 | * Content is major_status minor_status (integers) context_handle, reply_token. | ||
60 | * | ||
61 | */ | ||
62 | |||
63 | static int netobj_equal(struct xdr_netobj *a, struct xdr_netobj *b) | ||
64 | { | ||
65 | return a->len == b->len && 0 == memcmp(a->data, b->data, a->len); | ||
66 | } | ||
67 | |||
68 | #define RSI_HASHBITS 6 | ||
69 | #define RSI_HASHMAX (1<<RSI_HASHBITS) | ||
70 | #define RSI_HASHMASK (RSI_HASHMAX-1) | ||
71 | |||
72 | struct rsi { | ||
73 | struct cache_head h; | ||
74 | struct xdr_netobj in_handle, in_token; | ||
75 | struct xdr_netobj out_handle, out_token; | ||
76 | int major_status, minor_status; | ||
77 | }; | ||
78 | |||
79 | static struct cache_head *rsi_table[RSI_HASHMAX]; | ||
80 | static struct cache_detail rsi_cache; | ||
81 | static struct rsi *rsi_lookup(struct rsi *item, int set); | ||
82 | |||
83 | static void rsi_free(struct rsi *rsii) | ||
84 | { | ||
85 | kfree(rsii->in_handle.data); | ||
86 | kfree(rsii->in_token.data); | ||
87 | kfree(rsii->out_handle.data); | ||
88 | kfree(rsii->out_token.data); | ||
89 | } | ||
90 | |||
91 | static void rsi_put(struct cache_head *item, struct cache_detail *cd) | ||
92 | { | ||
93 | struct rsi *rsii = container_of(item, struct rsi, h); | ||
94 | if (cache_put(item, cd)) { | ||
95 | rsi_free(rsii); | ||
96 | kfree(rsii); | ||
97 | } | ||
98 | } | ||
99 | |||
100 | static inline int rsi_hash(struct rsi *item) | ||
101 | { | ||
102 | return hash_mem(item->in_handle.data, item->in_handle.len, RSI_HASHBITS) | ||
103 | ^ hash_mem(item->in_token.data, item->in_token.len, RSI_HASHBITS); | ||
104 | } | ||
105 | |||
106 | static inline int rsi_match(struct rsi *item, struct rsi *tmp) | ||
107 | { | ||
108 | return netobj_equal(&item->in_handle, &tmp->in_handle) | ||
109 | && netobj_equal(&item->in_token, &tmp->in_token); | ||
110 | } | ||
111 | |||
112 | static int dup_to_netobj(struct xdr_netobj *dst, char *src, int len) | ||
113 | { | ||
114 | dst->len = len; | ||
115 | dst->data = (len ? kmalloc(len, GFP_KERNEL) : NULL); | ||
116 | if (dst->data) | ||
117 | memcpy(dst->data, src, len); | ||
118 | if (len && !dst->data) | ||
119 | return -ENOMEM; | ||
120 | return 0; | ||
121 | } | ||
122 | |||
123 | static inline int dup_netobj(struct xdr_netobj *dst, struct xdr_netobj *src) | ||
124 | { | ||
125 | return dup_to_netobj(dst, src->data, src->len); | ||
126 | } | ||
127 | |||
128 | static inline void rsi_init(struct rsi *new, struct rsi *item) | ||
129 | { | ||
130 | new->out_handle.data = NULL; | ||
131 | new->out_handle.len = 0; | ||
132 | new->out_token.data = NULL; | ||
133 | new->out_token.len = 0; | ||
134 | new->in_handle.len = item->in_handle.len; | ||
135 | item->in_handle.len = 0; | ||
136 | new->in_token.len = item->in_token.len; | ||
137 | item->in_token.len = 0; | ||
138 | new->in_handle.data = item->in_handle.data; | ||
139 | item->in_handle.data = NULL; | ||
140 | new->in_token.data = item->in_token.data; | ||
141 | item->in_token.data = NULL; | ||
142 | } | ||
143 | |||
144 | static inline void rsi_update(struct rsi *new, struct rsi *item) | ||
145 | { | ||
146 | BUG_ON(new->out_handle.data || new->out_token.data); | ||
147 | new->out_handle.len = item->out_handle.len; | ||
148 | item->out_handle.len = 0; | ||
149 | new->out_token.len = item->out_token.len; | ||
150 | item->out_token.len = 0; | ||
151 | new->out_handle.data = item->out_handle.data; | ||
152 | item->out_handle.data = NULL; | ||
153 | new->out_token.data = item->out_token.data; | ||
154 | item->out_token.data = NULL; | ||
155 | |||
156 | new->major_status = item->major_status; | ||
157 | new->minor_status = item->minor_status; | ||
158 | } | ||
159 | |||
160 | static void rsi_request(struct cache_detail *cd, | ||
161 | struct cache_head *h, | ||
162 | char **bpp, int *blen) | ||
163 | { | ||
164 | struct rsi *rsii = container_of(h, struct rsi, h); | ||
165 | |||
166 | qword_addhex(bpp, blen, rsii->in_handle.data, rsii->in_handle.len); | ||
167 | qword_addhex(bpp, blen, rsii->in_token.data, rsii->in_token.len); | ||
168 | (*bpp)[-1] = '\n'; | ||
169 | } | ||
170 | |||
171 | |||
172 | static int rsi_parse(struct cache_detail *cd, | ||
173 | char *mesg, int mlen) | ||
174 | { | ||
175 | /* context token expiry major minor context token */ | ||
176 | char *buf = mesg; | ||
177 | char *ep; | ||
178 | int len; | ||
179 | struct rsi rsii, *rsip = NULL; | ||
180 | time_t expiry; | ||
181 | int status = -EINVAL; | ||
182 | |||
183 | memset(&rsii, 0, sizeof(rsii)); | ||
184 | /* handle */ | ||
185 | len = qword_get(&mesg, buf, mlen); | ||
186 | if (len < 0) | ||
187 | goto out; | ||
188 | status = -ENOMEM; | ||
189 | if (dup_to_netobj(&rsii.in_handle, buf, len)) | ||
190 | goto out; | ||
191 | |||
192 | /* token */ | ||
193 | len = qword_get(&mesg, buf, mlen); | ||
194 | status = -EINVAL; | ||
195 | if (len < 0) | ||
196 | goto out; | ||
197 | status = -ENOMEM; | ||
198 | if (dup_to_netobj(&rsii.in_token, buf, len)) | ||
199 | goto out; | ||
200 | |||
201 | rsii.h.flags = 0; | ||
202 | /* expiry */ | ||
203 | expiry = get_expiry(&mesg); | ||
204 | status = -EINVAL; | ||
205 | if (expiry == 0) | ||
206 | goto out; | ||
207 | |||
208 | /* major/minor */ | ||
209 | len = qword_get(&mesg, buf, mlen); | ||
210 | if (len < 0) | ||
211 | goto out; | ||
212 | if (len == 0) { | ||
213 | goto out; | ||
214 | } else { | ||
215 | rsii.major_status = simple_strtoul(buf, &ep, 10); | ||
216 | if (*ep) | ||
217 | goto out; | ||
218 | len = qword_get(&mesg, buf, mlen); | ||
219 | if (len <= 0) | ||
220 | goto out; | ||
221 | rsii.minor_status = simple_strtoul(buf, &ep, 10); | ||
222 | if (*ep) | ||
223 | goto out; | ||
224 | |||
225 | /* out_handle */ | ||
226 | len = qword_get(&mesg, buf, mlen); | ||
227 | if (len < 0) | ||
228 | goto out; | ||
229 | status = -ENOMEM; | ||
230 | if (dup_to_netobj(&rsii.out_handle, buf, len)) | ||
231 | goto out; | ||
232 | |||
233 | /* out_token */ | ||
234 | len = qword_get(&mesg, buf, mlen); | ||
235 | status = -EINVAL; | ||
236 | if (len < 0) | ||
237 | goto out; | ||
238 | status = -ENOMEM; | ||
239 | if (dup_to_netobj(&rsii.out_token, buf, len)) | ||
240 | goto out; | ||
241 | } | ||
242 | rsii.h.expiry_time = expiry; | ||
243 | rsip = rsi_lookup(&rsii, 1); | ||
244 | status = 0; | ||
245 | out: | ||
246 | rsi_free(&rsii); | ||
247 | if (rsip) | ||
248 | rsi_put(&rsip->h, &rsi_cache); | ||
249 | return status; | ||
250 | } | ||
251 | |||
252 | static struct cache_detail rsi_cache = { | ||
253 | .hash_size = RSI_HASHMAX, | ||
254 | .hash_table = rsi_table, | ||
255 | .name = "auth.rpcsec.init", | ||
256 | .cache_put = rsi_put, | ||
257 | .cache_request = rsi_request, | ||
258 | .cache_parse = rsi_parse, | ||
259 | }; | ||
260 | |||
261 | static DefineSimpleCacheLookup(rsi, 0) | ||
262 | |||
263 | /* | ||
264 | * The rpcsec_context cache is used to store a context that is | ||
265 | * used in data exchange. | ||
266 | * The key is a context handle. The content is: | ||
267 | * uid, gidlist, mechanism, service-set, mech-specific-data | ||
268 | */ | ||
269 | |||
270 | #define RSC_HASHBITS 10 | ||
271 | #define RSC_HASHMAX (1<<RSC_HASHBITS) | ||
272 | #define RSC_HASHMASK (RSC_HASHMAX-1) | ||
273 | |||
274 | #define GSS_SEQ_WIN 128 | ||
275 | |||
276 | struct gss_svc_seq_data { | ||
277 | /* highest seq number seen so far: */ | ||
278 | int sd_max; | ||
279 | /* for i such that sd_max-GSS_SEQ_WIN < i <= sd_max, the i-th bit of | ||
280 | * sd_win is nonzero iff sequence number i has been seen already: */ | ||
281 | unsigned long sd_win[GSS_SEQ_WIN/BITS_PER_LONG]; | ||
282 | spinlock_t sd_lock; | ||
283 | }; | ||
284 | |||
285 | struct rsc { | ||
286 | struct cache_head h; | ||
287 | struct xdr_netobj handle; | ||
288 | struct svc_cred cred; | ||
289 | struct gss_svc_seq_data seqdata; | ||
290 | struct gss_ctx *mechctx; | ||
291 | }; | ||
292 | |||
293 | static struct cache_head *rsc_table[RSC_HASHMAX]; | ||
294 | static struct cache_detail rsc_cache; | ||
295 | static struct rsc *rsc_lookup(struct rsc *item, int set); | ||
296 | |||
297 | static void rsc_free(struct rsc *rsci) | ||
298 | { | ||
299 | kfree(rsci->handle.data); | ||
300 | if (rsci->mechctx) | ||
301 | gss_delete_sec_context(&rsci->mechctx); | ||
302 | if (rsci->cred.cr_group_info) | ||
303 | put_group_info(rsci->cred.cr_group_info); | ||
304 | } | ||
305 | |||
306 | static void rsc_put(struct cache_head *item, struct cache_detail *cd) | ||
307 | { | ||
308 | struct rsc *rsci = container_of(item, struct rsc, h); | ||
309 | |||
310 | if (cache_put(item, cd)) { | ||
311 | rsc_free(rsci); | ||
312 | kfree(rsci); | ||
313 | } | ||
314 | } | ||
315 | |||
316 | static inline int | ||
317 | rsc_hash(struct rsc *rsci) | ||
318 | { | ||
319 | return hash_mem(rsci->handle.data, rsci->handle.len, RSC_HASHBITS); | ||
320 | } | ||
321 | |||
322 | static inline int | ||
323 | rsc_match(struct rsc *new, struct rsc *tmp) | ||
324 | { | ||
325 | return netobj_equal(&new->handle, &tmp->handle); | ||
326 | } | ||
327 | |||
328 | static inline void | ||
329 | rsc_init(struct rsc *new, struct rsc *tmp) | ||
330 | { | ||
331 | new->handle.len = tmp->handle.len; | ||
332 | tmp->handle.len = 0; | ||
333 | new->handle.data = tmp->handle.data; | ||
334 | tmp->handle.data = NULL; | ||
335 | new->mechctx = NULL; | ||
336 | new->cred.cr_group_info = NULL; | ||
337 | } | ||
338 | |||
339 | static inline void | ||
340 | rsc_update(struct rsc *new, struct rsc *tmp) | ||
341 | { | ||
342 | new->mechctx = tmp->mechctx; | ||
343 | tmp->mechctx = NULL; | ||
344 | memset(&new->seqdata, 0, sizeof(new->seqdata)); | ||
345 | spin_lock_init(&new->seqdata.sd_lock); | ||
346 | new->cred = tmp->cred; | ||
347 | tmp->cred.cr_group_info = NULL; | ||
348 | } | ||
349 | |||
350 | static int rsc_parse(struct cache_detail *cd, | ||
351 | char *mesg, int mlen) | ||
352 | { | ||
353 | /* contexthandle expiry [ uid gid N <n gids> mechname ...mechdata... ] */ | ||
354 | char *buf = mesg; | ||
355 | int len, rv; | ||
356 | struct rsc rsci, *rscp = NULL; | ||
357 | time_t expiry; | ||
358 | int status = -EINVAL; | ||
359 | |||
360 | memset(&rsci, 0, sizeof(rsci)); | ||
361 | /* context handle */ | ||
362 | len = qword_get(&mesg, buf, mlen); | ||
363 | if (len < 0) goto out; | ||
364 | status = -ENOMEM; | ||
365 | if (dup_to_netobj(&rsci.handle, buf, len)) | ||
366 | goto out; | ||
367 | |||
368 | rsci.h.flags = 0; | ||
369 | /* expiry */ | ||
370 | expiry = get_expiry(&mesg); | ||
371 | status = -EINVAL; | ||
372 | if (expiry == 0) | ||
373 | goto out; | ||
374 | |||
375 | /* uid, or NEGATIVE */ | ||
376 | rv = get_int(&mesg, &rsci.cred.cr_uid); | ||
377 | if (rv == -EINVAL) | ||
378 | goto out; | ||
379 | if (rv == -ENOENT) | ||
380 | set_bit(CACHE_NEGATIVE, &rsci.h.flags); | ||
381 | else { | ||
382 | int N, i; | ||
383 | struct gss_api_mech *gm; | ||
384 | |||
385 | /* gid */ | ||
386 | if (get_int(&mesg, &rsci.cred.cr_gid)) | ||
387 | goto out; | ||
388 | |||
389 | /* number of additional gid's */ | ||
390 | if (get_int(&mesg, &N)) | ||
391 | goto out; | ||
392 | status = -ENOMEM; | ||
393 | rsci.cred.cr_group_info = groups_alloc(N); | ||
394 | if (rsci.cred.cr_group_info == NULL) | ||
395 | goto out; | ||
396 | |||
397 | /* gid's */ | ||
398 | status = -EINVAL; | ||
399 | for (i=0; i<N; i++) { | ||
400 | gid_t gid; | ||
401 | if (get_int(&mesg, &gid)) | ||
402 | goto out; | ||
403 | GROUP_AT(rsci.cred.cr_group_info, i) = gid; | ||
404 | } | ||
405 | |||
406 | /* mech name */ | ||
407 | len = qword_get(&mesg, buf, mlen); | ||
408 | if (len < 0) | ||
409 | goto out; | ||
410 | gm = gss_mech_get_by_name(buf); | ||
411 | status = -EOPNOTSUPP; | ||
412 | if (!gm) | ||
413 | goto out; | ||
414 | |||
415 | status = -EINVAL; | ||
416 | /* mech-specific data: */ | ||
417 | len = qword_get(&mesg, buf, mlen); | ||
418 | if (len < 0) { | ||
419 | gss_mech_put(gm); | ||
420 | goto out; | ||
421 | } | ||
422 | if (gss_import_sec_context(buf, len, gm, &rsci.mechctx)) { | ||
423 | gss_mech_put(gm); | ||
424 | goto out; | ||
425 | } | ||
426 | gss_mech_put(gm); | ||
427 | } | ||
428 | rsci.h.expiry_time = expiry; | ||
429 | rscp = rsc_lookup(&rsci, 1); | ||
430 | status = 0; | ||
431 | out: | ||
432 | rsc_free(&rsci); | ||
433 | if (rscp) | ||
434 | rsc_put(&rscp->h, &rsc_cache); | ||
435 | return status; | ||
436 | } | ||
437 | |||
438 | static struct cache_detail rsc_cache = { | ||
439 | .hash_size = RSC_HASHMAX, | ||
440 | .hash_table = rsc_table, | ||
441 | .name = "auth.rpcsec.context", | ||
442 | .cache_put = rsc_put, | ||
443 | .cache_parse = rsc_parse, | ||
444 | }; | ||
445 | |||
446 | static DefineSimpleCacheLookup(rsc, 0); | ||
447 | |||
448 | static struct rsc * | ||
449 | gss_svc_searchbyctx(struct xdr_netobj *handle) | ||
450 | { | ||
451 | struct rsc rsci; | ||
452 | struct rsc *found; | ||
453 | |||
454 | memset(&rsci, 0, sizeof(rsci)); | ||
455 | if (dup_to_netobj(&rsci.handle, handle->data, handle->len)) | ||
456 | return NULL; | ||
457 | found = rsc_lookup(&rsci, 0); | ||
458 | rsc_free(&rsci); | ||
459 | if (!found) | ||
460 | return NULL; | ||
461 | if (cache_check(&rsc_cache, &found->h, NULL)) | ||
462 | return NULL; | ||
463 | return found; | ||
464 | } | ||
465 | |||
466 | /* Implements sequence number algorithm as specified in RFC 2203. */ | ||
467 | static int | ||
468 | gss_check_seq_num(struct rsc *rsci, int seq_num) | ||
469 | { | ||
470 | struct gss_svc_seq_data *sd = &rsci->seqdata; | ||
471 | |||
472 | spin_lock(&sd->sd_lock); | ||
473 | if (seq_num > sd->sd_max) { | ||
474 | if (seq_num >= sd->sd_max + GSS_SEQ_WIN) { | ||
475 | memset(sd->sd_win,0,sizeof(sd->sd_win)); | ||
476 | sd->sd_max = seq_num; | ||
477 | } else while (sd->sd_max < seq_num) { | ||
478 | sd->sd_max++; | ||
479 | __clear_bit(sd->sd_max % GSS_SEQ_WIN, sd->sd_win); | ||
480 | } | ||
481 | __set_bit(seq_num % GSS_SEQ_WIN, sd->sd_win); | ||
482 | goto ok; | ||
483 | } else if (seq_num <= sd->sd_max - GSS_SEQ_WIN) { | ||
484 | goto drop; | ||
485 | } | ||
486 | /* sd_max - GSS_SEQ_WIN < seq_num <= sd_max */ | ||
487 | if (__test_and_set_bit(seq_num % GSS_SEQ_WIN, sd->sd_win)) | ||
488 | goto drop; | ||
489 | ok: | ||
490 | spin_unlock(&sd->sd_lock); | ||
491 | return 1; | ||
492 | drop: | ||
493 | spin_unlock(&sd->sd_lock); | ||
494 | return 0; | ||
495 | } | ||
496 | |||
497 | static inline u32 round_up_to_quad(u32 i) | ||
498 | { | ||
499 | return (i + 3 ) & ~3; | ||
500 | } | ||
501 | |||
502 | static inline int | ||
503 | svc_safe_getnetobj(struct kvec *argv, struct xdr_netobj *o) | ||
504 | { | ||
505 | int l; | ||
506 | |||
507 | if (argv->iov_len < 4) | ||
508 | return -1; | ||
509 | o->len = ntohl(svc_getu32(argv)); | ||
510 | l = round_up_to_quad(o->len); | ||
511 | if (argv->iov_len < l) | ||
512 | return -1; | ||
513 | o->data = argv->iov_base; | ||
514 | argv->iov_base += l; | ||
515 | argv->iov_len -= l; | ||
516 | return 0; | ||
517 | } | ||
518 | |||
519 | static inline int | ||
520 | svc_safe_putnetobj(struct kvec *resv, struct xdr_netobj *o) | ||
521 | { | ||
522 | u32 *p; | ||
523 | |||
524 | if (resv->iov_len + 4 > PAGE_SIZE) | ||
525 | return -1; | ||
526 | svc_putu32(resv, htonl(o->len)); | ||
527 | p = resv->iov_base + resv->iov_len; | ||
528 | resv->iov_len += round_up_to_quad(o->len); | ||
529 | if (resv->iov_len > PAGE_SIZE) | ||
530 | return -1; | ||
531 | memcpy(p, o->data, o->len); | ||
532 | memset((u8 *)p + o->len, 0, round_up_to_quad(o->len) - o->len); | ||
533 | return 0; | ||
534 | } | ||
535 | |||
536 | /* Verify the checksum on the header and return SVC_OK on success. | ||
537 | * Otherwise, return SVC_DROP (in the case of a bad sequence number) | ||
538 | * or return SVC_DENIED and indicate error in authp. | ||
539 | */ | ||
540 | static int | ||
541 | gss_verify_header(struct svc_rqst *rqstp, struct rsc *rsci, | ||
542 | u32 *rpcstart, struct rpc_gss_wire_cred *gc, u32 *authp) | ||
543 | { | ||
544 | struct gss_ctx *ctx_id = rsci->mechctx; | ||
545 | struct xdr_buf rpchdr; | ||
546 | struct xdr_netobj checksum; | ||
547 | u32 flavor = 0; | ||
548 | struct kvec *argv = &rqstp->rq_arg.head[0]; | ||
549 | struct kvec iov; | ||
550 | |||
551 | /* data to compute the checksum over: */ | ||
552 | iov.iov_base = rpcstart; | ||
553 | iov.iov_len = (u8 *)argv->iov_base - (u8 *)rpcstart; | ||
554 | xdr_buf_from_iov(&iov, &rpchdr); | ||
555 | |||
556 | *authp = rpc_autherr_badverf; | ||
557 | if (argv->iov_len < 4) | ||
558 | return SVC_DENIED; | ||
559 | flavor = ntohl(svc_getu32(argv)); | ||
560 | if (flavor != RPC_AUTH_GSS) | ||
561 | return SVC_DENIED; | ||
562 | if (svc_safe_getnetobj(argv, &checksum)) | ||
563 | return SVC_DENIED; | ||
564 | |||
565 | if (rqstp->rq_deferred) /* skip verification of revisited request */ | ||
566 | return SVC_OK; | ||
567 | if (gss_verify_mic(ctx_id, &rpchdr, &checksum, NULL) | ||
568 | != GSS_S_COMPLETE) { | ||
569 | *authp = rpcsec_gsserr_credproblem; | ||
570 | return SVC_DENIED; | ||
571 | } | ||
572 | |||
573 | if (gc->gc_seq > MAXSEQ) { | ||
574 | dprintk("RPC: svcauth_gss: discarding request with large sequence number %d\n", | ||
575 | gc->gc_seq); | ||
576 | *authp = rpcsec_gsserr_ctxproblem; | ||
577 | return SVC_DENIED; | ||
578 | } | ||
579 | if (!gss_check_seq_num(rsci, gc->gc_seq)) { | ||
580 | dprintk("RPC: svcauth_gss: discarding request with old sequence number %d\n", | ||
581 | gc->gc_seq); | ||
582 | return SVC_DROP; | ||
583 | } | ||
584 | return SVC_OK; | ||
585 | } | ||
586 | |||
587 | static int | ||
588 | gss_write_verf(struct svc_rqst *rqstp, struct gss_ctx *ctx_id, u32 seq) | ||
589 | { | ||
590 | u32 xdr_seq; | ||
591 | u32 maj_stat; | ||
592 | struct xdr_buf verf_data; | ||
593 | struct xdr_netobj mic; | ||
594 | u32 *p; | ||
595 | struct kvec iov; | ||
596 | |||
597 | svc_putu32(rqstp->rq_res.head, htonl(RPC_AUTH_GSS)); | ||
598 | xdr_seq = htonl(seq); | ||
599 | |||
600 | iov.iov_base = &xdr_seq; | ||
601 | iov.iov_len = sizeof(xdr_seq); | ||
602 | xdr_buf_from_iov(&iov, &verf_data); | ||
603 | p = rqstp->rq_res.head->iov_base + rqstp->rq_res.head->iov_len; | ||
604 | mic.data = (u8 *)(p + 1); | ||
605 | maj_stat = gss_get_mic(ctx_id, 0, &verf_data, &mic); | ||
606 | if (maj_stat != GSS_S_COMPLETE) | ||
607 | return -1; | ||
608 | *p++ = htonl(mic.len); | ||
609 | memset((u8 *)p + mic.len, 0, round_up_to_quad(mic.len) - mic.len); | ||
610 | p += XDR_QUADLEN(mic.len); | ||
611 | if (!xdr_ressize_check(rqstp, p)) | ||
612 | return -1; | ||
613 | return 0; | ||
614 | } | ||
615 | |||
616 | struct gss_domain { | ||
617 | struct auth_domain h; | ||
618 | u32 pseudoflavor; | ||
619 | }; | ||
620 | |||
621 | static struct auth_domain * | ||
622 | find_gss_auth_domain(struct gss_ctx *ctx, u32 svc) | ||
623 | { | ||
624 | char *name; | ||
625 | |||
626 | name = gss_service_to_auth_domain_name(ctx->mech_type, svc); | ||
627 | if (!name) | ||
628 | return NULL; | ||
629 | return auth_domain_find(name); | ||
630 | } | ||
631 | |||
632 | int | ||
633 | svcauth_gss_register_pseudoflavor(u32 pseudoflavor, char * name) | ||
634 | { | ||
635 | struct gss_domain *new; | ||
636 | struct auth_domain *test; | ||
637 | int stat = -ENOMEM; | ||
638 | |||
639 | new = kmalloc(sizeof(*new), GFP_KERNEL); | ||
640 | if (!new) | ||
641 | goto out; | ||
642 | cache_init(&new->h.h); | ||
643 | new->h.name = kmalloc(strlen(name) + 1, GFP_KERNEL); | ||
644 | if (!new->h.name) | ||
645 | goto out_free_dom; | ||
646 | strcpy(new->h.name, name); | ||
647 | new->h.flavour = RPC_AUTH_GSS; | ||
648 | new->pseudoflavor = pseudoflavor; | ||
649 | new->h.h.expiry_time = NEVER; | ||
650 | |||
651 | test = auth_domain_lookup(&new->h, 1); | ||
652 | if (test == &new->h) { | ||
653 | BUG_ON(atomic_dec_and_test(&new->h.h.refcnt)); | ||
654 | } else { /* XXX Duplicate registration? */ | ||
655 | auth_domain_put(&new->h); | ||
656 | goto out; | ||
657 | } | ||
658 | return 0; | ||
659 | |||
660 | out_free_dom: | ||
661 | kfree(new); | ||
662 | out: | ||
663 | return stat; | ||
664 | } | ||
665 | |||
666 | EXPORT_SYMBOL(svcauth_gss_register_pseudoflavor); | ||
667 | |||
668 | static inline int | ||
669 | read_u32_from_xdr_buf(struct xdr_buf *buf, int base, u32 *obj) | ||
670 | { | ||
671 | u32 raw; | ||
672 | int status; | ||
673 | |||
674 | status = read_bytes_from_xdr_buf(buf, base, &raw, sizeof(*obj)); | ||
675 | if (status) | ||
676 | return status; | ||
677 | *obj = ntohl(raw); | ||
678 | return 0; | ||
679 | } | ||
680 | |||
681 | /* It would be nice if this bit of code could be shared with the client. | ||
682 | * Obstacles: | ||
683 | * The client shouldn't malloc(), would have to pass in own memory. | ||
684 | * The server uses base of head iovec as read pointer, while the | ||
685 | * client uses separate pointer. */ | ||
686 | static int | ||
687 | unwrap_integ_data(struct xdr_buf *buf, u32 seq, struct gss_ctx *ctx) | ||
688 | { | ||
689 | int stat = -EINVAL; | ||
690 | u32 integ_len, maj_stat; | ||
691 | struct xdr_netobj mic; | ||
692 | struct xdr_buf integ_buf; | ||
693 | |||
694 | integ_len = ntohl(svc_getu32(&buf->head[0])); | ||
695 | if (integ_len & 3) | ||
696 | goto out; | ||
697 | if (integ_len > buf->len) | ||
698 | goto out; | ||
699 | if (xdr_buf_subsegment(buf, &integ_buf, 0, integ_len)) | ||
700 | BUG(); | ||
701 | /* copy out mic... */ | ||
702 | if (read_u32_from_xdr_buf(buf, integ_len, &mic.len)) | ||
703 | BUG(); | ||
704 | if (mic.len > RPC_MAX_AUTH_SIZE) | ||
705 | goto out; | ||
706 | mic.data = kmalloc(mic.len, GFP_KERNEL); | ||
707 | if (!mic.data) | ||
708 | goto out; | ||
709 | if (read_bytes_from_xdr_buf(buf, integ_len + 4, mic.data, mic.len)) | ||
710 | goto out; | ||
711 | maj_stat = gss_verify_mic(ctx, &integ_buf, &mic, NULL); | ||
712 | if (maj_stat != GSS_S_COMPLETE) | ||
713 | goto out; | ||
714 | if (ntohl(svc_getu32(&buf->head[0])) != seq) | ||
715 | goto out; | ||
716 | stat = 0; | ||
717 | out: | ||
718 | return stat; | ||
719 | } | ||
720 | |||
721 | struct gss_svc_data { | ||
722 | /* decoded gss client cred: */ | ||
723 | struct rpc_gss_wire_cred clcred; | ||
724 | /* pointer to the beginning of the procedure-specific results, | ||
725 | * which may be encrypted/checksummed in svcauth_gss_release: */ | ||
726 | u32 *body_start; | ||
727 | struct rsc *rsci; | ||
728 | }; | ||
729 | |||
730 | static int | ||
731 | svcauth_gss_set_client(struct svc_rqst *rqstp) | ||
732 | { | ||
733 | struct gss_svc_data *svcdata = rqstp->rq_auth_data; | ||
734 | struct rsc *rsci = svcdata->rsci; | ||
735 | struct rpc_gss_wire_cred *gc = &svcdata->clcred; | ||
736 | |||
737 | rqstp->rq_client = find_gss_auth_domain(rsci->mechctx, gc->gc_svc); | ||
738 | if (rqstp->rq_client == NULL) | ||
739 | return SVC_DENIED; | ||
740 | return SVC_OK; | ||
741 | } | ||
742 | |||
743 | /* | ||
744 | * Accept an rpcsec packet. | ||
745 | * If context establishment, punt to user space | ||
746 | * If data exchange, verify/decrypt | ||
747 | * If context destruction, handle here | ||
748 | * In the context establishment and destruction case we encode | ||
749 | * response here and return SVC_COMPLETE. | ||
750 | */ | ||
751 | static int | ||
752 | svcauth_gss_accept(struct svc_rqst *rqstp, u32 *authp) | ||
753 | { | ||
754 | struct kvec *argv = &rqstp->rq_arg.head[0]; | ||
755 | struct kvec *resv = &rqstp->rq_res.head[0]; | ||
756 | u32 crlen; | ||
757 | struct xdr_netobj tmpobj; | ||
758 | struct gss_svc_data *svcdata = rqstp->rq_auth_data; | ||
759 | struct rpc_gss_wire_cred *gc; | ||
760 | struct rsc *rsci = NULL; | ||
761 | struct rsi *rsip, rsikey; | ||
762 | u32 *rpcstart; | ||
763 | u32 *reject_stat = resv->iov_base + resv->iov_len; | ||
764 | int ret; | ||
765 | |||
766 | dprintk("RPC: svcauth_gss: argv->iov_len = %zd\n",argv->iov_len); | ||
767 | |||
768 | *authp = rpc_autherr_badcred; | ||
769 | if (!svcdata) | ||
770 | svcdata = kmalloc(sizeof(*svcdata), GFP_KERNEL); | ||
771 | if (!svcdata) | ||
772 | goto auth_err; | ||
773 | rqstp->rq_auth_data = svcdata; | ||
774 | svcdata->body_start = NULL; | ||
775 | svcdata->rsci = NULL; | ||
776 | gc = &svcdata->clcred; | ||
777 | |||
778 | /* start of rpc packet is 7 u32's back from here: | ||
779 | * xid direction rpcversion prog vers proc flavour | ||
780 | */ | ||
781 | rpcstart = argv->iov_base; | ||
782 | rpcstart -= 7; | ||
783 | |||
784 | /* credential is: | ||
785 | * version(==1), proc(0,1,2,3), seq, service (1,2,3), handle | ||
786 | * at least 5 u32s, and is preceeded by length, so that makes 6. | ||
787 | */ | ||
788 | |||
789 | if (argv->iov_len < 5 * 4) | ||
790 | goto auth_err; | ||
791 | crlen = ntohl(svc_getu32(argv)); | ||
792 | if (ntohl(svc_getu32(argv)) != RPC_GSS_VERSION) | ||
793 | goto auth_err; | ||
794 | gc->gc_proc = ntohl(svc_getu32(argv)); | ||
795 | gc->gc_seq = ntohl(svc_getu32(argv)); | ||
796 | gc->gc_svc = ntohl(svc_getu32(argv)); | ||
797 | if (svc_safe_getnetobj(argv, &gc->gc_ctx)) | ||
798 | goto auth_err; | ||
799 | if (crlen != round_up_to_quad(gc->gc_ctx.len) + 5 * 4) | ||
800 | goto auth_err; | ||
801 | |||
802 | if ((gc->gc_proc != RPC_GSS_PROC_DATA) && (rqstp->rq_proc != 0)) | ||
803 | goto auth_err; | ||
804 | |||
805 | /* | ||
806 | * We've successfully parsed the credential. Let's check out the | ||
807 | * verifier. An AUTH_NULL verifier is allowed (and required) for | ||
808 | * INIT and CONTINUE_INIT requests. AUTH_RPCSEC_GSS is required for | ||
809 | * PROC_DATA and PROC_DESTROY. | ||
810 | * | ||
811 | * AUTH_NULL verifier is 0 (AUTH_NULL), 0 (length). | ||
812 | * AUTH_RPCSEC_GSS verifier is: | ||
813 | * 6 (AUTH_RPCSEC_GSS), length, checksum. | ||
814 | * checksum is calculated over rpcheader from xid up to here. | ||
815 | */ | ||
816 | *authp = rpc_autherr_badverf; | ||
817 | switch (gc->gc_proc) { | ||
818 | case RPC_GSS_PROC_INIT: | ||
819 | case RPC_GSS_PROC_CONTINUE_INIT: | ||
820 | if (argv->iov_len < 2 * 4) | ||
821 | goto auth_err; | ||
822 | if (ntohl(svc_getu32(argv)) != RPC_AUTH_NULL) | ||
823 | goto auth_err; | ||
824 | if (ntohl(svc_getu32(argv)) != 0) | ||
825 | goto auth_err; | ||
826 | break; | ||
827 | case RPC_GSS_PROC_DATA: | ||
828 | case RPC_GSS_PROC_DESTROY: | ||
829 | *authp = rpcsec_gsserr_credproblem; | ||
830 | rsci = gss_svc_searchbyctx(&gc->gc_ctx); | ||
831 | if (!rsci) | ||
832 | goto auth_err; | ||
833 | switch (gss_verify_header(rqstp, rsci, rpcstart, gc, authp)) { | ||
834 | case SVC_OK: | ||
835 | break; | ||
836 | case SVC_DENIED: | ||
837 | goto auth_err; | ||
838 | case SVC_DROP: | ||
839 | goto drop; | ||
840 | } | ||
841 | break; | ||
842 | default: | ||
843 | *authp = rpc_autherr_rejectedcred; | ||
844 | goto auth_err; | ||
845 | } | ||
846 | |||
847 | /* now act upon the command: */ | ||
848 | switch (gc->gc_proc) { | ||
849 | case RPC_GSS_PROC_INIT: | ||
850 | case RPC_GSS_PROC_CONTINUE_INIT: | ||
851 | *authp = rpc_autherr_badcred; | ||
852 | if (gc->gc_proc == RPC_GSS_PROC_INIT && gc->gc_ctx.len != 0) | ||
853 | goto auth_err; | ||
854 | memset(&rsikey, 0, sizeof(rsikey)); | ||
855 | if (dup_netobj(&rsikey.in_handle, &gc->gc_ctx)) | ||
856 | goto drop; | ||
857 | *authp = rpc_autherr_badverf; | ||
858 | if (svc_safe_getnetobj(argv, &tmpobj)) { | ||
859 | kfree(rsikey.in_handle.data); | ||
860 | goto auth_err; | ||
861 | } | ||
862 | if (dup_netobj(&rsikey.in_token, &tmpobj)) { | ||
863 | kfree(rsikey.in_handle.data); | ||
864 | goto drop; | ||
865 | } | ||
866 | |||
867 | rsip = rsi_lookup(&rsikey, 0); | ||
868 | rsi_free(&rsikey); | ||
869 | if (!rsip) { | ||
870 | goto drop; | ||
871 | } | ||
872 | switch(cache_check(&rsi_cache, &rsip->h, &rqstp->rq_chandle)) { | ||
873 | case -EAGAIN: | ||
874 | goto drop; | ||
875 | case -ENOENT: | ||
876 | goto drop; | ||
877 | case 0: | ||
878 | rsci = gss_svc_searchbyctx(&rsip->out_handle); | ||
879 | if (!rsci) { | ||
880 | goto drop; | ||
881 | } | ||
882 | if (gss_write_verf(rqstp, rsci->mechctx, GSS_SEQ_WIN)) | ||
883 | goto drop; | ||
884 | if (resv->iov_len + 4 > PAGE_SIZE) | ||
885 | goto drop; | ||
886 | svc_putu32(resv, rpc_success); | ||
887 | if (svc_safe_putnetobj(resv, &rsip->out_handle)) | ||
888 | goto drop; | ||
889 | if (resv->iov_len + 3 * 4 > PAGE_SIZE) | ||
890 | goto drop; | ||
891 | svc_putu32(resv, htonl(rsip->major_status)); | ||
892 | svc_putu32(resv, htonl(rsip->minor_status)); | ||
893 | svc_putu32(resv, htonl(GSS_SEQ_WIN)); | ||
894 | if (svc_safe_putnetobj(resv, &rsip->out_token)) | ||
895 | goto drop; | ||
896 | rqstp->rq_client = NULL; | ||
897 | } | ||
898 | goto complete; | ||
899 | case RPC_GSS_PROC_DESTROY: | ||
900 | set_bit(CACHE_NEGATIVE, &rsci->h.flags); | ||
901 | if (resv->iov_len + 4 > PAGE_SIZE) | ||
902 | goto drop; | ||
903 | svc_putu32(resv, rpc_success); | ||
904 | goto complete; | ||
905 | case RPC_GSS_PROC_DATA: | ||
906 | *authp = rpcsec_gsserr_ctxproblem; | ||
907 | if (gss_write_verf(rqstp, rsci->mechctx, gc->gc_seq)) | ||
908 | goto auth_err; | ||
909 | rqstp->rq_cred = rsci->cred; | ||
910 | get_group_info(rsci->cred.cr_group_info); | ||
911 | *authp = rpc_autherr_badcred; | ||
912 | switch (gc->gc_svc) { | ||
913 | case RPC_GSS_SVC_NONE: | ||
914 | break; | ||
915 | case RPC_GSS_SVC_INTEGRITY: | ||
916 | if (unwrap_integ_data(&rqstp->rq_arg, | ||
917 | gc->gc_seq, rsci->mechctx)) | ||
918 | goto auth_err; | ||
919 | /* placeholders for length and seq. number: */ | ||
920 | svcdata->body_start = resv->iov_base + resv->iov_len; | ||
921 | svc_putu32(resv, 0); | ||
922 | svc_putu32(resv, 0); | ||
923 | break; | ||
924 | case RPC_GSS_SVC_PRIVACY: | ||
925 | /* currently unsupported */ | ||
926 | default: | ||
927 | goto auth_err; | ||
928 | } | ||
929 | svcdata->rsci = rsci; | ||
930 | cache_get(&rsci->h); | ||
931 | ret = SVC_OK; | ||
932 | goto out; | ||
933 | } | ||
934 | auth_err: | ||
935 | /* Restore write pointer to original value: */ | ||
936 | xdr_ressize_check(rqstp, reject_stat); | ||
937 | ret = SVC_DENIED; | ||
938 | goto out; | ||
939 | complete: | ||
940 | ret = SVC_COMPLETE; | ||
941 | goto out; | ||
942 | drop: | ||
943 | ret = SVC_DROP; | ||
944 | out: | ||
945 | if (rsci) | ||
946 | rsc_put(&rsci->h, &rsc_cache); | ||
947 | return ret; | ||
948 | } | ||
949 | |||
950 | static int | ||
951 | svcauth_gss_release(struct svc_rqst *rqstp) | ||
952 | { | ||
953 | struct gss_svc_data *gsd = (struct gss_svc_data *)rqstp->rq_auth_data; | ||
954 | struct rpc_gss_wire_cred *gc = &gsd->clcred; | ||
955 | struct xdr_buf *resbuf = &rqstp->rq_res; | ||
956 | struct xdr_buf integ_buf; | ||
957 | struct xdr_netobj mic; | ||
958 | struct kvec *resv; | ||
959 | u32 *p; | ||
960 | int integ_offset, integ_len; | ||
961 | int stat = -EINVAL; | ||
962 | |||
963 | if (gc->gc_proc != RPC_GSS_PROC_DATA) | ||
964 | goto out; | ||
965 | /* Release can be called twice, but we only wrap once. */ | ||
966 | if (gsd->body_start == NULL) | ||
967 | goto out; | ||
968 | /* normally not set till svc_send, but we need it here: */ | ||
969 | resbuf->len = resbuf->head[0].iov_len | ||
970 | + resbuf->page_len + resbuf->tail[0].iov_len; | ||
971 | switch (gc->gc_svc) { | ||
972 | case RPC_GSS_SVC_NONE: | ||
973 | break; | ||
974 | case RPC_GSS_SVC_INTEGRITY: | ||
975 | p = gsd->body_start; | ||
976 | gsd->body_start = NULL; | ||
977 | /* move accept_stat to right place: */ | ||
978 | memcpy(p, p + 2, 4); | ||
979 | /* don't wrap in failure case: */ | ||
980 | /* Note: counting on not getting here if call was not even | ||
981 | * accepted! */ | ||
982 | if (*p != rpc_success) { | ||
983 | resbuf->head[0].iov_len -= 2 * 4; | ||
984 | goto out; | ||
985 | } | ||
986 | p++; | ||
987 | integ_offset = (u8 *)(p + 1) - (u8 *)resbuf->head[0].iov_base; | ||
988 | integ_len = resbuf->len - integ_offset; | ||
989 | BUG_ON(integ_len % 4); | ||
990 | *p++ = htonl(integ_len); | ||
991 | *p++ = htonl(gc->gc_seq); | ||
992 | if (xdr_buf_subsegment(resbuf, &integ_buf, integ_offset, | ||
993 | integ_len)) | ||
994 | BUG(); | ||
995 | if (resbuf->page_len == 0 | ||
996 | && resbuf->tail[0].iov_len + RPC_MAX_AUTH_SIZE | ||
997 | < PAGE_SIZE) { | ||
998 | BUG_ON(resbuf->tail[0].iov_len); | ||
999 | /* Use head for everything */ | ||
1000 | resv = &resbuf->head[0]; | ||
1001 | } else if (resbuf->tail[0].iov_base == NULL) { | ||
1002 | /* copied from nfsd4_encode_read */ | ||
1003 | svc_take_page(rqstp); | ||
1004 | resbuf->tail[0].iov_base = page_address(rqstp | ||
1005 | ->rq_respages[rqstp->rq_resused-1]); | ||
1006 | rqstp->rq_restailpage = rqstp->rq_resused-1; | ||
1007 | resbuf->tail[0].iov_len = 0; | ||
1008 | resv = &resbuf->tail[0]; | ||
1009 | } else { | ||
1010 | resv = &resbuf->tail[0]; | ||
1011 | } | ||
1012 | mic.data = (u8 *)resv->iov_base + resv->iov_len + 4; | ||
1013 | if (gss_get_mic(gsd->rsci->mechctx, 0, &integ_buf, &mic)) | ||
1014 | goto out_err; | ||
1015 | svc_putu32(resv, htonl(mic.len)); | ||
1016 | memset(mic.data + mic.len, 0, | ||
1017 | round_up_to_quad(mic.len) - mic.len); | ||
1018 | resv->iov_len += XDR_QUADLEN(mic.len) << 2; | ||
1019 | /* not strictly required: */ | ||
1020 | resbuf->len += XDR_QUADLEN(mic.len) << 2; | ||
1021 | BUG_ON(resv->iov_len > PAGE_SIZE); | ||
1022 | break; | ||
1023 | case RPC_GSS_SVC_PRIVACY: | ||
1024 | default: | ||
1025 | goto out_err; | ||
1026 | } | ||
1027 | |||
1028 | out: | ||
1029 | stat = 0; | ||
1030 | out_err: | ||
1031 | if (rqstp->rq_client) | ||
1032 | auth_domain_put(rqstp->rq_client); | ||
1033 | rqstp->rq_client = NULL; | ||
1034 | if (rqstp->rq_cred.cr_group_info) | ||
1035 | put_group_info(rqstp->rq_cred.cr_group_info); | ||
1036 | rqstp->rq_cred.cr_group_info = NULL; | ||
1037 | if (gsd->rsci) | ||
1038 | rsc_put(&gsd->rsci->h, &rsc_cache); | ||
1039 | gsd->rsci = NULL; | ||
1040 | |||
1041 | return stat; | ||
1042 | } | ||
1043 | |||
1044 | static void | ||
1045 | svcauth_gss_domain_release(struct auth_domain *dom) | ||
1046 | { | ||
1047 | struct gss_domain *gd = container_of(dom, struct gss_domain, h); | ||
1048 | |||
1049 | kfree(dom->name); | ||
1050 | kfree(gd); | ||
1051 | } | ||
1052 | |||
1053 | static struct auth_ops svcauthops_gss = { | ||
1054 | .name = "rpcsec_gss", | ||
1055 | .owner = THIS_MODULE, | ||
1056 | .flavour = RPC_AUTH_GSS, | ||
1057 | .accept = svcauth_gss_accept, | ||
1058 | .release = svcauth_gss_release, | ||
1059 | .domain_release = svcauth_gss_domain_release, | ||
1060 | .set_client = svcauth_gss_set_client, | ||
1061 | }; | ||
1062 | |||
1063 | int | ||
1064 | gss_svc_init(void) | ||
1065 | { | ||
1066 | int rv = svc_auth_register(RPC_AUTH_GSS, &svcauthops_gss); | ||
1067 | if (rv == 0) { | ||
1068 | cache_register(&rsc_cache); | ||
1069 | cache_register(&rsi_cache); | ||
1070 | } | ||
1071 | return rv; | ||
1072 | } | ||
1073 | |||
1074 | void | ||
1075 | gss_svc_shutdown(void) | ||
1076 | { | ||
1077 | cache_unregister(&rsc_cache); | ||
1078 | cache_unregister(&rsi_cache); | ||
1079 | svc_auth_unregister(RPC_AUTH_GSS); | ||
1080 | } | ||
diff --git a/net/sunrpc/auth_null.c b/net/sunrpc/auth_null.c new file mode 100644 index 000000000000..9b72d3abf823 --- /dev/null +++ b/net/sunrpc/auth_null.c | |||
@@ -0,0 +1,143 @@ | |||
1 | /* | ||
2 | * linux/net/sunrpc/auth_null.c | ||
3 | * | ||
4 | * AUTH_NULL authentication. Really :-) | ||
5 | * | ||
6 | * Copyright (C) 1996, Olaf Kirch <okir@monad.swb.de> | ||
7 | */ | ||
8 | |||
9 | #include <linux/types.h> | ||
10 | #include <linux/socket.h> | ||
11 | #include <linux/module.h> | ||
12 | #include <linux/in.h> | ||
13 | #include <linux/utsname.h> | ||
14 | #include <linux/sunrpc/clnt.h> | ||
15 | #include <linux/sched.h> | ||
16 | |||
17 | #ifdef RPC_DEBUG | ||
18 | # define RPCDBG_FACILITY RPCDBG_AUTH | ||
19 | #endif | ||
20 | |||
21 | static struct rpc_auth null_auth; | ||
22 | static struct rpc_cred null_cred; | ||
23 | |||
24 | static struct rpc_auth * | ||
25 | nul_create(struct rpc_clnt *clnt, rpc_authflavor_t flavor) | ||
26 | { | ||
27 | atomic_inc(&null_auth.au_count); | ||
28 | return &null_auth; | ||
29 | } | ||
30 | |||
31 | static void | ||
32 | nul_destroy(struct rpc_auth *auth) | ||
33 | { | ||
34 | } | ||
35 | |||
36 | /* | ||
37 | * Lookup NULL creds for current process | ||
38 | */ | ||
39 | static struct rpc_cred * | ||
40 | nul_lookup_cred(struct rpc_auth *auth, struct auth_cred *acred, int flags) | ||
41 | { | ||
42 | return get_rpccred(&null_cred); | ||
43 | } | ||
44 | |||
45 | /* | ||
46 | * Destroy cred handle. | ||
47 | */ | ||
48 | static void | ||
49 | nul_destroy_cred(struct rpc_cred *cred) | ||
50 | { | ||
51 | } | ||
52 | |||
53 | /* | ||
54 | * Match cred handle against current process | ||
55 | */ | ||
56 | static int | ||
57 | nul_match(struct auth_cred *acred, struct rpc_cred *cred, int taskflags) | ||
58 | { | ||
59 | return 1; | ||
60 | } | ||
61 | |||
62 | /* | ||
63 | * Marshal credential. | ||
64 | */ | ||
65 | static u32 * | ||
66 | nul_marshal(struct rpc_task *task, u32 *p) | ||
67 | { | ||
68 | *p++ = htonl(RPC_AUTH_NULL); | ||
69 | *p++ = 0; | ||
70 | *p++ = htonl(RPC_AUTH_NULL); | ||
71 | *p++ = 0; | ||
72 | |||
73 | return p; | ||
74 | } | ||
75 | |||
76 | /* | ||
77 | * Refresh credential. This is a no-op for AUTH_NULL | ||
78 | */ | ||
79 | static int | ||
80 | nul_refresh(struct rpc_task *task) | ||
81 | { | ||
82 | task->tk_msg.rpc_cred->cr_flags |= RPCAUTH_CRED_UPTODATE; | ||
83 | return 0; | ||
84 | } | ||
85 | |||
86 | static u32 * | ||
87 | nul_validate(struct rpc_task *task, u32 *p) | ||
88 | { | ||
89 | rpc_authflavor_t flavor; | ||
90 | u32 size; | ||
91 | |||
92 | flavor = ntohl(*p++); | ||
93 | if (flavor != RPC_AUTH_NULL) { | ||
94 | printk("RPC: bad verf flavor: %u\n", flavor); | ||
95 | return NULL; | ||
96 | } | ||
97 | |||
98 | size = ntohl(*p++); | ||
99 | if (size != 0) { | ||
100 | printk("RPC: bad verf size: %u\n", size); | ||
101 | return NULL; | ||
102 | } | ||
103 | |||
104 | return p; | ||
105 | } | ||
106 | |||
107 | struct rpc_authops authnull_ops = { | ||
108 | .owner = THIS_MODULE, | ||
109 | .au_flavor = RPC_AUTH_NULL, | ||
110 | #ifdef RPC_DEBUG | ||
111 | .au_name = "NULL", | ||
112 | #endif | ||
113 | .create = nul_create, | ||
114 | .destroy = nul_destroy, | ||
115 | .lookup_cred = nul_lookup_cred, | ||
116 | }; | ||
117 | |||
118 | static | ||
119 | struct rpc_auth null_auth = { | ||
120 | .au_cslack = 4, | ||
121 | .au_rslack = 2, | ||
122 | .au_ops = &authnull_ops, | ||
123 | }; | ||
124 | |||
125 | static | ||
126 | struct rpc_credops null_credops = { | ||
127 | .cr_name = "AUTH_NULL", | ||
128 | .crdestroy = nul_destroy_cred, | ||
129 | .crmatch = nul_match, | ||
130 | .crmarshal = nul_marshal, | ||
131 | .crrefresh = nul_refresh, | ||
132 | .crvalidate = nul_validate, | ||
133 | }; | ||
134 | |||
135 | static | ||
136 | struct rpc_cred null_cred = { | ||
137 | .cr_ops = &null_credops, | ||
138 | .cr_count = ATOMIC_INIT(1), | ||
139 | .cr_flags = RPCAUTH_CRED_UPTODATE, | ||
140 | #ifdef RPC_DEBUG | ||
141 | .cr_magic = RPCAUTH_CRED_MAGIC, | ||
142 | #endif | ||
143 | }; | ||
diff --git a/net/sunrpc/auth_unix.c b/net/sunrpc/auth_unix.c new file mode 100644 index 000000000000..4ff297a9b15b --- /dev/null +++ b/net/sunrpc/auth_unix.c | |||
@@ -0,0 +1,242 @@ | |||
1 | /* | ||
2 | * linux/net/sunrpc/auth_unix.c | ||
3 | * | ||
4 | * UNIX-style authentication; no AUTH_SHORT support | ||
5 | * | ||
6 | * Copyright (C) 1996, Olaf Kirch <okir@monad.swb.de> | ||
7 | */ | ||
8 | |||
9 | #include <linux/types.h> | ||
10 | #include <linux/sched.h> | ||
11 | #include <linux/module.h> | ||
12 | #include <linux/socket.h> | ||
13 | #include <linux/in.h> | ||
14 | #include <linux/sunrpc/clnt.h> | ||
15 | #include <linux/sunrpc/auth.h> | ||
16 | |||
17 | #define NFS_NGROUPS 16 | ||
18 | |||
19 | struct unx_cred { | ||
20 | struct rpc_cred uc_base; | ||
21 | gid_t uc_gid; | ||
22 | gid_t uc_gids[NFS_NGROUPS]; | ||
23 | }; | ||
24 | #define uc_uid uc_base.cr_uid | ||
25 | #define uc_count uc_base.cr_count | ||
26 | #define uc_flags uc_base.cr_flags | ||
27 | #define uc_expire uc_base.cr_expire | ||
28 | |||
29 | #define UNX_CRED_EXPIRE (60 * HZ) | ||
30 | |||
31 | #define UNX_WRITESLACK (21 + (UNX_MAXNODENAME >> 2)) | ||
32 | |||
33 | #ifdef RPC_DEBUG | ||
34 | # define RPCDBG_FACILITY RPCDBG_AUTH | ||
35 | #endif | ||
36 | |||
37 | static struct rpc_auth unix_auth; | ||
38 | static struct rpc_cred_cache unix_cred_cache; | ||
39 | static struct rpc_credops unix_credops; | ||
40 | |||
41 | static struct rpc_auth * | ||
42 | unx_create(struct rpc_clnt *clnt, rpc_authflavor_t flavor) | ||
43 | { | ||
44 | dprintk("RPC: creating UNIX authenticator for client %p\n", clnt); | ||
45 | if (atomic_inc_return(&unix_auth.au_count) == 0) | ||
46 | unix_cred_cache.nextgc = jiffies + (unix_cred_cache.expire >> 1); | ||
47 | return &unix_auth; | ||
48 | } | ||
49 | |||
50 | static void | ||
51 | unx_destroy(struct rpc_auth *auth) | ||
52 | { | ||
53 | dprintk("RPC: destroying UNIX authenticator %p\n", auth); | ||
54 | rpcauth_free_credcache(auth); | ||
55 | } | ||
56 | |||
57 | /* | ||
58 | * Lookup AUTH_UNIX creds for current process | ||
59 | */ | ||
60 | static struct rpc_cred * | ||
61 | unx_lookup_cred(struct rpc_auth *auth, struct auth_cred *acred, int flags) | ||
62 | { | ||
63 | return rpcauth_lookup_credcache(auth, acred, flags); | ||
64 | } | ||
65 | |||
66 | static struct rpc_cred * | ||
67 | unx_create_cred(struct rpc_auth *auth, struct auth_cred *acred, int flags) | ||
68 | { | ||
69 | struct unx_cred *cred; | ||
70 | int i; | ||
71 | |||
72 | dprintk("RPC: allocating UNIX cred for uid %d gid %d\n", | ||
73 | acred->uid, acred->gid); | ||
74 | |||
75 | if (!(cred = (struct unx_cred *) kmalloc(sizeof(*cred), GFP_KERNEL))) | ||
76 | return ERR_PTR(-ENOMEM); | ||
77 | |||
78 | atomic_set(&cred->uc_count, 1); | ||
79 | cred->uc_flags = RPCAUTH_CRED_UPTODATE; | ||
80 | if (flags & RPC_TASK_ROOTCREDS) { | ||
81 | cred->uc_uid = 0; | ||
82 | cred->uc_gid = 0; | ||
83 | cred->uc_gids[0] = NOGROUP; | ||
84 | } else { | ||
85 | int groups = acred->group_info->ngroups; | ||
86 | if (groups > NFS_NGROUPS) | ||
87 | groups = NFS_NGROUPS; | ||
88 | |||
89 | cred->uc_uid = acred->uid; | ||
90 | cred->uc_gid = acred->gid; | ||
91 | for (i = 0; i < groups; i++) | ||
92 | cred->uc_gids[i] = GROUP_AT(acred->group_info, i); | ||
93 | if (i < NFS_NGROUPS) | ||
94 | cred->uc_gids[i] = NOGROUP; | ||
95 | } | ||
96 | cred->uc_base.cr_ops = &unix_credops; | ||
97 | |||
98 | return (struct rpc_cred *) cred; | ||
99 | } | ||
100 | |||
101 | static void | ||
102 | unx_destroy_cred(struct rpc_cred *cred) | ||
103 | { | ||
104 | kfree(cred); | ||
105 | } | ||
106 | |||
107 | /* | ||
108 | * Match credentials against current process creds. | ||
109 | * The root_override argument takes care of cases where the caller may | ||
110 | * request root creds (e.g. for NFS swapping). | ||
111 | */ | ||
112 | static int | ||
113 | unx_match(struct auth_cred *acred, struct rpc_cred *rcred, int taskflags) | ||
114 | { | ||
115 | struct unx_cred *cred = (struct unx_cred *) rcred; | ||
116 | int i; | ||
117 | |||
118 | if (!(taskflags & RPC_TASK_ROOTCREDS)) { | ||
119 | int groups; | ||
120 | |||
121 | if (cred->uc_uid != acred->uid | ||
122 | || cred->uc_gid != acred->gid) | ||
123 | return 0; | ||
124 | |||
125 | groups = acred->group_info->ngroups; | ||
126 | if (groups > NFS_NGROUPS) | ||
127 | groups = NFS_NGROUPS; | ||
128 | for (i = 0; i < groups ; i++) | ||
129 | if (cred->uc_gids[i] != GROUP_AT(acred->group_info, i)) | ||
130 | return 0; | ||
131 | return 1; | ||
132 | } | ||
133 | return (cred->uc_uid == 0 | ||
134 | && cred->uc_gid == 0 | ||
135 | && cred->uc_gids[0] == (gid_t) NOGROUP); | ||
136 | } | ||
137 | |||
138 | /* | ||
139 | * Marshal credentials. | ||
140 | * Maybe we should keep a cached credential for performance reasons. | ||
141 | */ | ||
142 | static u32 * | ||
143 | unx_marshal(struct rpc_task *task, u32 *p) | ||
144 | { | ||
145 | struct rpc_clnt *clnt = task->tk_client; | ||
146 | struct unx_cred *cred = (struct unx_cred *) task->tk_msg.rpc_cred; | ||
147 | u32 *base, *hold; | ||
148 | int i; | ||
149 | |||
150 | *p++ = htonl(RPC_AUTH_UNIX); | ||
151 | base = p++; | ||
152 | *p++ = htonl(jiffies/HZ); | ||
153 | |||
154 | /* | ||
155 | * Copy the UTS nodename captured when the client was created. | ||
156 | */ | ||
157 | p = xdr_encode_array(p, clnt->cl_nodename, clnt->cl_nodelen); | ||
158 | |||
159 | *p++ = htonl((u32) cred->uc_uid); | ||
160 | *p++ = htonl((u32) cred->uc_gid); | ||
161 | hold = p++; | ||
162 | for (i = 0; i < 16 && cred->uc_gids[i] != (gid_t) NOGROUP; i++) | ||
163 | *p++ = htonl((u32) cred->uc_gids[i]); | ||
164 | *hold = htonl(p - hold - 1); /* gid array length */ | ||
165 | *base = htonl((p - base - 1) << 2); /* cred length */ | ||
166 | |||
167 | *p++ = htonl(RPC_AUTH_NULL); | ||
168 | *p++ = htonl(0); | ||
169 | |||
170 | return p; | ||
171 | } | ||
172 | |||
173 | /* | ||
174 | * Refresh credentials. This is a no-op for AUTH_UNIX | ||
175 | */ | ||
176 | static int | ||
177 | unx_refresh(struct rpc_task *task) | ||
178 | { | ||
179 | task->tk_msg.rpc_cred->cr_flags |= RPCAUTH_CRED_UPTODATE; | ||
180 | return 0; | ||
181 | } | ||
182 | |||
183 | static u32 * | ||
184 | unx_validate(struct rpc_task *task, u32 *p) | ||
185 | { | ||
186 | rpc_authflavor_t flavor; | ||
187 | u32 size; | ||
188 | |||
189 | flavor = ntohl(*p++); | ||
190 | if (flavor != RPC_AUTH_NULL && | ||
191 | flavor != RPC_AUTH_UNIX && | ||
192 | flavor != RPC_AUTH_SHORT) { | ||
193 | printk("RPC: bad verf flavor: %u\n", flavor); | ||
194 | return NULL; | ||
195 | } | ||
196 | |||
197 | size = ntohl(*p++); | ||
198 | if (size > RPC_MAX_AUTH_SIZE) { | ||
199 | printk("RPC: giant verf size: %u\n", size); | ||
200 | return NULL; | ||
201 | } | ||
202 | task->tk_auth->au_rslack = (size >> 2) + 2; | ||
203 | p += (size >> 2); | ||
204 | |||
205 | return p; | ||
206 | } | ||
207 | |||
208 | struct rpc_authops authunix_ops = { | ||
209 | .owner = THIS_MODULE, | ||
210 | .au_flavor = RPC_AUTH_UNIX, | ||
211 | #ifdef RPC_DEBUG | ||
212 | .au_name = "UNIX", | ||
213 | #endif | ||
214 | .create = unx_create, | ||
215 | .destroy = unx_destroy, | ||
216 | .lookup_cred = unx_lookup_cred, | ||
217 | .crcreate = unx_create_cred, | ||
218 | }; | ||
219 | |||
220 | static | ||
221 | struct rpc_cred_cache unix_cred_cache = { | ||
222 | .expire = UNX_CRED_EXPIRE, | ||
223 | }; | ||
224 | |||
225 | static | ||
226 | struct rpc_auth unix_auth = { | ||
227 | .au_cslack = UNX_WRITESLACK, | ||
228 | .au_rslack = 2, /* assume AUTH_NULL verf */ | ||
229 | .au_ops = &authunix_ops, | ||
230 | .au_count = ATOMIC_INIT(0), | ||
231 | .au_credcache = &unix_cred_cache, | ||
232 | }; | ||
233 | |||
234 | static | ||
235 | struct rpc_credops unix_credops = { | ||
236 | .cr_name = "AUTH_UNIX", | ||
237 | .crdestroy = unx_destroy_cred, | ||
238 | .crmatch = unx_match, | ||
239 | .crmarshal = unx_marshal, | ||
240 | .crrefresh = unx_refresh, | ||
241 | .crvalidate = unx_validate, | ||
242 | }; | ||
diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c new file mode 100644 index 000000000000..900f5bc7e336 --- /dev/null +++ b/net/sunrpc/cache.c | |||
@@ -0,0 +1,1189 @@ | |||
1 | /* | ||
2 | * net/sunrpc/cache.c | ||
3 | * | ||
4 | * Generic code for various authentication-related caches | ||
5 | * used by sunrpc clients and servers. | ||
6 | * | ||
7 | * Copyright (C) 2002 Neil Brown <neilb@cse.unsw.edu.au> | ||
8 | * | ||
9 | * Released under terms in GPL version 2. See COPYING. | ||
10 | * | ||
11 | */ | ||
12 | |||
13 | #include <linux/types.h> | ||
14 | #include <linux/fs.h> | ||
15 | #include <linux/file.h> | ||
16 | #include <linux/slab.h> | ||
17 | #include <linux/signal.h> | ||
18 | #include <linux/sched.h> | ||
19 | #include <linux/kmod.h> | ||
20 | #include <linux/list.h> | ||
21 | #include <linux/module.h> | ||
22 | #include <linux/ctype.h> | ||
23 | #include <asm/uaccess.h> | ||
24 | #include <linux/poll.h> | ||
25 | #include <linux/seq_file.h> | ||
26 | #include <linux/proc_fs.h> | ||
27 | #include <linux/net.h> | ||
28 | #include <linux/workqueue.h> | ||
29 | #include <asm/ioctls.h> | ||
30 | #include <linux/sunrpc/types.h> | ||
31 | #include <linux/sunrpc/cache.h> | ||
32 | #include <linux/sunrpc/stats.h> | ||
33 | |||
34 | #define RPCDBG_FACILITY RPCDBG_CACHE | ||
35 | |||
36 | static void cache_defer_req(struct cache_req *req, struct cache_head *item); | ||
37 | static void cache_revisit_request(struct cache_head *item); | ||
38 | |||
39 | void cache_init(struct cache_head *h) | ||
40 | { | ||
41 | time_t now = get_seconds(); | ||
42 | h->next = NULL; | ||
43 | h->flags = 0; | ||
44 | atomic_set(&h->refcnt, 1); | ||
45 | h->expiry_time = now + CACHE_NEW_EXPIRY; | ||
46 | h->last_refresh = now; | ||
47 | } | ||
48 | |||
49 | |||
50 | static int cache_make_upcall(struct cache_detail *detail, struct cache_head *h); | ||
51 | /* | ||
52 | * This is the generic cache management routine for all | ||
53 | * the authentication caches. | ||
54 | * It checks the currency of a cache item and will (later) | ||
55 | * initiate an upcall to fill it if needed. | ||
56 | * | ||
57 | * | ||
58 | * Returns 0 if the cache_head can be used, or cache_puts it and returns | ||
59 | * -EAGAIN if upcall is pending, | ||
60 | * -ENOENT if cache entry was negative | ||
61 | */ | ||
62 | int cache_check(struct cache_detail *detail, | ||
63 | struct cache_head *h, struct cache_req *rqstp) | ||
64 | { | ||
65 | int rv; | ||
66 | long refresh_age, age; | ||
67 | |||
68 | /* First decide return status as best we can */ | ||
69 | if (!test_bit(CACHE_VALID, &h->flags) || | ||
70 | h->expiry_time < get_seconds()) | ||
71 | rv = -EAGAIN; | ||
72 | else if (detail->flush_time > h->last_refresh) | ||
73 | rv = -EAGAIN; | ||
74 | else { | ||
75 | /* entry is valid */ | ||
76 | if (test_bit(CACHE_NEGATIVE, &h->flags)) | ||
77 | rv = -ENOENT; | ||
78 | else rv = 0; | ||
79 | } | ||
80 | |||
81 | /* now see if we want to start an upcall */ | ||
82 | refresh_age = (h->expiry_time - h->last_refresh); | ||
83 | age = get_seconds() - h->last_refresh; | ||
84 | |||
85 | if (rqstp == NULL) { | ||
86 | if (rv == -EAGAIN) | ||
87 | rv = -ENOENT; | ||
88 | } else if (rv == -EAGAIN || age > refresh_age/2) { | ||
89 | dprintk("Want update, refage=%ld, age=%ld\n", refresh_age, age); | ||
90 | if (!test_and_set_bit(CACHE_PENDING, &h->flags)) { | ||
91 | switch (cache_make_upcall(detail, h)) { | ||
92 | case -EINVAL: | ||
93 | clear_bit(CACHE_PENDING, &h->flags); | ||
94 | if (rv == -EAGAIN) { | ||
95 | set_bit(CACHE_NEGATIVE, &h->flags); | ||
96 | cache_fresh(detail, h, get_seconds()+CACHE_NEW_EXPIRY); | ||
97 | rv = -ENOENT; | ||
98 | } | ||
99 | break; | ||
100 | |||
101 | case -EAGAIN: | ||
102 | clear_bit(CACHE_PENDING, &h->flags); | ||
103 | cache_revisit_request(h); | ||
104 | break; | ||
105 | } | ||
106 | } | ||
107 | } | ||
108 | |||
109 | if (rv == -EAGAIN) | ||
110 | cache_defer_req(rqstp, h); | ||
111 | |||
112 | if (rv && h) | ||
113 | detail->cache_put(h, detail); | ||
114 | return rv; | ||
115 | } | ||
116 | |||
117 | static void queue_loose(struct cache_detail *detail, struct cache_head *ch); | ||
118 | |||
119 | void cache_fresh(struct cache_detail *detail, | ||
120 | struct cache_head *head, time_t expiry) | ||
121 | { | ||
122 | |||
123 | head->expiry_time = expiry; | ||
124 | head->last_refresh = get_seconds(); | ||
125 | if (!test_and_set_bit(CACHE_VALID, &head->flags)) | ||
126 | cache_revisit_request(head); | ||
127 | if (test_and_clear_bit(CACHE_PENDING, &head->flags)) | ||
128 | queue_loose(detail, head); | ||
129 | } | ||
130 | |||
131 | /* | ||
132 | * caches need to be periodically cleaned. | ||
133 | * For this we maintain a list of cache_detail and | ||
134 | * a current pointer into that list and into the table | ||
135 | * for that entry. | ||
136 | * | ||
137 | * Each time clean_cache is called it finds the next non-empty entry | ||
138 | * in the current table and walks the list in that entry | ||
139 | * looking for entries that can be removed. | ||
140 | * | ||
141 | * An entry gets removed if: | ||
142 | * - The expiry is before current time | ||
143 | * - The last_refresh time is before the flush_time for that cache | ||
144 | * | ||
145 | * later we might drop old entries with non-NEVER expiry if that table | ||
146 | * is getting 'full' for some definition of 'full' | ||
147 | * | ||
148 | * The question of "how often to scan a table" is an interesting one | ||
149 | * and is answered in part by the use of the "nextcheck" field in the | ||
150 | * cache_detail. | ||
151 | * When a scan of a table begins, the nextcheck field is set to a time | ||
152 | * that is well into the future. | ||
153 | * While scanning, if an expiry time is found that is earlier than the | ||
154 | * current nextcheck time, nextcheck is set to that expiry time. | ||
155 | * If the flush_time is ever set to a time earlier than the nextcheck | ||
156 | * time, the nextcheck time is then set to that flush_time. | ||
157 | * | ||
158 | * A table is then only scanned if the current time is at least | ||
159 | * the nextcheck time. | ||
160 | * | ||
161 | */ | ||
162 | |||
163 | static LIST_HEAD(cache_list); | ||
164 | static DEFINE_SPINLOCK(cache_list_lock); | ||
165 | static struct cache_detail *current_detail; | ||
166 | static int current_index; | ||
167 | |||
168 | static struct file_operations cache_file_operations; | ||
169 | static struct file_operations content_file_operations; | ||
170 | static struct file_operations cache_flush_operations; | ||
171 | |||
172 | static void do_cache_clean(void *data); | ||
173 | static DECLARE_WORK(cache_cleaner, do_cache_clean, NULL); | ||
174 | |||
175 | void cache_register(struct cache_detail *cd) | ||
176 | { | ||
177 | cd->proc_ent = proc_mkdir(cd->name, proc_net_rpc); | ||
178 | if (cd->proc_ent) { | ||
179 | struct proc_dir_entry *p; | ||
180 | cd->proc_ent->owner = THIS_MODULE; | ||
181 | cd->channel_ent = cd->content_ent = NULL; | ||
182 | |||
183 | p = create_proc_entry("flush", S_IFREG|S_IRUSR|S_IWUSR, | ||
184 | cd->proc_ent); | ||
185 | cd->flush_ent = p; | ||
186 | if (p) { | ||
187 | p->proc_fops = &cache_flush_operations; | ||
188 | p->owner = THIS_MODULE; | ||
189 | p->data = cd; | ||
190 | } | ||
191 | |||
192 | if (cd->cache_request || cd->cache_parse) { | ||
193 | p = create_proc_entry("channel", S_IFREG|S_IRUSR|S_IWUSR, | ||
194 | cd->proc_ent); | ||
195 | cd->channel_ent = p; | ||
196 | if (p) { | ||
197 | p->proc_fops = &cache_file_operations; | ||
198 | p->owner = THIS_MODULE; | ||
199 | p->data = cd; | ||
200 | } | ||
201 | } | ||
202 | if (cd->cache_show) { | ||
203 | p = create_proc_entry("content", S_IFREG|S_IRUSR|S_IWUSR, | ||
204 | cd->proc_ent); | ||
205 | cd->content_ent = p; | ||
206 | if (p) { | ||
207 | p->proc_fops = &content_file_operations; | ||
208 | p->owner = THIS_MODULE; | ||
209 | p->data = cd; | ||
210 | } | ||
211 | } | ||
212 | } | ||
213 | rwlock_init(&cd->hash_lock); | ||
214 | INIT_LIST_HEAD(&cd->queue); | ||
215 | spin_lock(&cache_list_lock); | ||
216 | cd->nextcheck = 0; | ||
217 | cd->entries = 0; | ||
218 | atomic_set(&cd->readers, 0); | ||
219 | cd->last_close = 0; | ||
220 | cd->last_warn = -1; | ||
221 | list_add(&cd->others, &cache_list); | ||
222 | spin_unlock(&cache_list_lock); | ||
223 | |||
224 | /* start the cleaning process */ | ||
225 | schedule_work(&cache_cleaner); | ||
226 | } | ||
227 | |||
228 | int cache_unregister(struct cache_detail *cd) | ||
229 | { | ||
230 | cache_purge(cd); | ||
231 | spin_lock(&cache_list_lock); | ||
232 | write_lock(&cd->hash_lock); | ||
233 | if (cd->entries || atomic_read(&cd->inuse)) { | ||
234 | write_unlock(&cd->hash_lock); | ||
235 | spin_unlock(&cache_list_lock); | ||
236 | return -EBUSY; | ||
237 | } | ||
238 | if (current_detail == cd) | ||
239 | current_detail = NULL; | ||
240 | list_del_init(&cd->others); | ||
241 | write_unlock(&cd->hash_lock); | ||
242 | spin_unlock(&cache_list_lock); | ||
243 | if (cd->proc_ent) { | ||
244 | if (cd->flush_ent) | ||
245 | remove_proc_entry("flush", cd->proc_ent); | ||
246 | if (cd->channel_ent) | ||
247 | remove_proc_entry("channel", cd->proc_ent); | ||
248 | if (cd->content_ent) | ||
249 | remove_proc_entry("content", cd->proc_ent); | ||
250 | |||
251 | cd->proc_ent = NULL; | ||
252 | remove_proc_entry(cd->name, proc_net_rpc); | ||
253 | } | ||
254 | if (list_empty(&cache_list)) { | ||
255 | /* module must be being unloaded so its safe to kill the worker */ | ||
256 | cancel_delayed_work(&cache_cleaner); | ||
257 | flush_scheduled_work(); | ||
258 | } | ||
259 | return 0; | ||
260 | } | ||
261 | |||
262 | /* clean cache tries to find something to clean | ||
263 | * and cleans it. | ||
264 | * It returns 1 if it cleaned something, | ||
265 | * 0 if it didn't find anything this time | ||
266 | * -1 if it fell off the end of the list. | ||
267 | */ | ||
268 | static int cache_clean(void) | ||
269 | { | ||
270 | int rv = 0; | ||
271 | struct list_head *next; | ||
272 | |||
273 | spin_lock(&cache_list_lock); | ||
274 | |||
275 | /* find a suitable table if we don't already have one */ | ||
276 | while (current_detail == NULL || | ||
277 | current_index >= current_detail->hash_size) { | ||
278 | if (current_detail) | ||
279 | next = current_detail->others.next; | ||
280 | else | ||
281 | next = cache_list.next; | ||
282 | if (next == &cache_list) { | ||
283 | current_detail = NULL; | ||
284 | spin_unlock(&cache_list_lock); | ||
285 | return -1; | ||
286 | } | ||
287 | current_detail = list_entry(next, struct cache_detail, others); | ||
288 | if (current_detail->nextcheck > get_seconds()) | ||
289 | current_index = current_detail->hash_size; | ||
290 | else { | ||
291 | current_index = 0; | ||
292 | current_detail->nextcheck = get_seconds()+30*60; | ||
293 | } | ||
294 | } | ||
295 | |||
296 | /* find a non-empty bucket in the table */ | ||
297 | while (current_detail && | ||
298 | current_index < current_detail->hash_size && | ||
299 | current_detail->hash_table[current_index] == NULL) | ||
300 | current_index++; | ||
301 | |||
302 | /* find a cleanable entry in the bucket and clean it, or set to next bucket */ | ||
303 | |||
304 | if (current_detail && current_index < current_detail->hash_size) { | ||
305 | struct cache_head *ch, **cp; | ||
306 | struct cache_detail *d; | ||
307 | |||
308 | write_lock(¤t_detail->hash_lock); | ||
309 | |||
310 | /* Ok, now to clean this strand */ | ||
311 | |||
312 | cp = & current_detail->hash_table[current_index]; | ||
313 | ch = *cp; | ||
314 | for (; ch; cp= & ch->next, ch= *cp) { | ||
315 | if (current_detail->nextcheck > ch->expiry_time) | ||
316 | current_detail->nextcheck = ch->expiry_time+1; | ||
317 | if (ch->expiry_time >= get_seconds() | ||
318 | && ch->last_refresh >= current_detail->flush_time | ||
319 | ) | ||
320 | continue; | ||
321 | if (test_and_clear_bit(CACHE_PENDING, &ch->flags)) | ||
322 | queue_loose(current_detail, ch); | ||
323 | |||
324 | if (atomic_read(&ch->refcnt) == 1) | ||
325 | break; | ||
326 | } | ||
327 | if (ch) { | ||
328 | *cp = ch->next; | ||
329 | ch->next = NULL; | ||
330 | current_detail->entries--; | ||
331 | rv = 1; | ||
332 | } | ||
333 | write_unlock(¤t_detail->hash_lock); | ||
334 | d = current_detail; | ||
335 | if (!ch) | ||
336 | current_index ++; | ||
337 | spin_unlock(&cache_list_lock); | ||
338 | if (ch) | ||
339 | d->cache_put(ch, d); | ||
340 | } else | ||
341 | spin_unlock(&cache_list_lock); | ||
342 | |||
343 | return rv; | ||
344 | } | ||
345 | |||
346 | /* | ||
347 | * We want to regularly clean the cache, so we need to schedule some work ... | ||
348 | */ | ||
349 | static void do_cache_clean(void *data) | ||
350 | { | ||
351 | int delay = 5; | ||
352 | if (cache_clean() == -1) | ||
353 | delay = 30*HZ; | ||
354 | |||
355 | if (list_empty(&cache_list)) | ||
356 | delay = 0; | ||
357 | |||
358 | if (delay) | ||
359 | schedule_delayed_work(&cache_cleaner, delay); | ||
360 | } | ||
361 | |||
362 | |||
363 | /* | ||
364 | * Clean all caches promptly. This just calls cache_clean | ||
365 | * repeatedly until we are sure that every cache has had a chance to | ||
366 | * be fully cleaned | ||
367 | */ | ||
368 | void cache_flush(void) | ||
369 | { | ||
370 | while (cache_clean() != -1) | ||
371 | cond_resched(); | ||
372 | while (cache_clean() != -1) | ||
373 | cond_resched(); | ||
374 | } | ||
375 | |||
376 | void cache_purge(struct cache_detail *detail) | ||
377 | { | ||
378 | detail->flush_time = LONG_MAX; | ||
379 | detail->nextcheck = get_seconds(); | ||
380 | cache_flush(); | ||
381 | detail->flush_time = 1; | ||
382 | } | ||
383 | |||
384 | |||
385 | |||
386 | /* | ||
387 | * Deferral and Revisiting of Requests. | ||
388 | * | ||
389 | * If a cache lookup finds a pending entry, we | ||
390 | * need to defer the request and revisit it later. | ||
391 | * All deferred requests are stored in a hash table, | ||
392 | * indexed by "struct cache_head *". | ||
393 | * As it may be wasteful to store a whole request | ||
394 | * structure, we allow the request to provide a | ||
395 | * deferred form, which must contain a | ||
396 | * 'struct cache_deferred_req' | ||
397 | * This cache_deferred_req contains a method to allow | ||
398 | * it to be revisited when cache info is available | ||
399 | */ | ||
400 | |||
401 | #define DFR_HASHSIZE (PAGE_SIZE/sizeof(struct list_head)) | ||
402 | #define DFR_HASH(item) ((((long)item)>>4 ^ (((long)item)>>13)) % DFR_HASHSIZE) | ||
403 | |||
404 | #define DFR_MAX 300 /* ??? */ | ||
405 | |||
406 | static DEFINE_SPINLOCK(cache_defer_lock); | ||
407 | static LIST_HEAD(cache_defer_list); | ||
408 | static struct list_head cache_defer_hash[DFR_HASHSIZE]; | ||
409 | static int cache_defer_cnt; | ||
410 | |||
411 | static void cache_defer_req(struct cache_req *req, struct cache_head *item) | ||
412 | { | ||
413 | struct cache_deferred_req *dreq; | ||
414 | int hash = DFR_HASH(item); | ||
415 | |||
416 | dreq = req->defer(req); | ||
417 | if (dreq == NULL) | ||
418 | return; | ||
419 | |||
420 | dreq->item = item; | ||
421 | dreq->recv_time = get_seconds(); | ||
422 | |||
423 | spin_lock(&cache_defer_lock); | ||
424 | |||
425 | list_add(&dreq->recent, &cache_defer_list); | ||
426 | |||
427 | if (cache_defer_hash[hash].next == NULL) | ||
428 | INIT_LIST_HEAD(&cache_defer_hash[hash]); | ||
429 | list_add(&dreq->hash, &cache_defer_hash[hash]); | ||
430 | |||
431 | /* it is in, now maybe clean up */ | ||
432 | dreq = NULL; | ||
433 | if (++cache_defer_cnt > DFR_MAX) { | ||
434 | /* too much in the cache, randomly drop | ||
435 | * first or last | ||
436 | */ | ||
437 | if (net_random()&1) | ||
438 | dreq = list_entry(cache_defer_list.next, | ||
439 | struct cache_deferred_req, | ||
440 | recent); | ||
441 | else | ||
442 | dreq = list_entry(cache_defer_list.prev, | ||
443 | struct cache_deferred_req, | ||
444 | recent); | ||
445 | list_del(&dreq->recent); | ||
446 | list_del(&dreq->hash); | ||
447 | cache_defer_cnt--; | ||
448 | } | ||
449 | spin_unlock(&cache_defer_lock); | ||
450 | |||
451 | if (dreq) { | ||
452 | /* there was one too many */ | ||
453 | dreq->revisit(dreq, 1); | ||
454 | } | ||
455 | if (test_bit(CACHE_VALID, &item->flags)) { | ||
456 | /* must have just been validated... */ | ||
457 | cache_revisit_request(item); | ||
458 | } | ||
459 | } | ||
460 | |||
461 | static void cache_revisit_request(struct cache_head *item) | ||
462 | { | ||
463 | struct cache_deferred_req *dreq; | ||
464 | struct list_head pending; | ||
465 | |||
466 | struct list_head *lp; | ||
467 | int hash = DFR_HASH(item); | ||
468 | |||
469 | INIT_LIST_HEAD(&pending); | ||
470 | spin_lock(&cache_defer_lock); | ||
471 | |||
472 | lp = cache_defer_hash[hash].next; | ||
473 | if (lp) { | ||
474 | while (lp != &cache_defer_hash[hash]) { | ||
475 | dreq = list_entry(lp, struct cache_deferred_req, hash); | ||
476 | lp = lp->next; | ||
477 | if (dreq->item == item) { | ||
478 | list_del(&dreq->hash); | ||
479 | list_move(&dreq->recent, &pending); | ||
480 | cache_defer_cnt--; | ||
481 | } | ||
482 | } | ||
483 | } | ||
484 | spin_unlock(&cache_defer_lock); | ||
485 | |||
486 | while (!list_empty(&pending)) { | ||
487 | dreq = list_entry(pending.next, struct cache_deferred_req, recent); | ||
488 | list_del_init(&dreq->recent); | ||
489 | dreq->revisit(dreq, 0); | ||
490 | } | ||
491 | } | ||
492 | |||
493 | void cache_clean_deferred(void *owner) | ||
494 | { | ||
495 | struct cache_deferred_req *dreq, *tmp; | ||
496 | struct list_head pending; | ||
497 | |||
498 | |||
499 | INIT_LIST_HEAD(&pending); | ||
500 | spin_lock(&cache_defer_lock); | ||
501 | |||
502 | list_for_each_entry_safe(dreq, tmp, &cache_defer_list, recent) { | ||
503 | if (dreq->owner == owner) { | ||
504 | list_del(&dreq->hash); | ||
505 | list_move(&dreq->recent, &pending); | ||
506 | cache_defer_cnt--; | ||
507 | } | ||
508 | } | ||
509 | spin_unlock(&cache_defer_lock); | ||
510 | |||
511 | while (!list_empty(&pending)) { | ||
512 | dreq = list_entry(pending.next, struct cache_deferred_req, recent); | ||
513 | list_del_init(&dreq->recent); | ||
514 | dreq->revisit(dreq, 1); | ||
515 | } | ||
516 | } | ||
517 | |||
518 | /* | ||
519 | * communicate with user-space | ||
520 | * | ||
521 | * We have a magic /proc file - /proc/sunrpc/cache | ||
522 | * On read, you get a full request, or block | ||
523 | * On write, an update request is processed | ||
524 | * Poll works if anything to read, and always allows write | ||
525 | * | ||
526 | * Implemented by linked list of requests. Each open file has | ||
527 | * a ->private that also exists in this list. New request are added | ||
528 | * to the end and may wakeup and preceding readers. | ||
529 | * New readers are added to the head. If, on read, an item is found with | ||
530 | * CACHE_UPCALLING clear, we free it from the list. | ||
531 | * | ||
532 | */ | ||
533 | |||
534 | static DEFINE_SPINLOCK(queue_lock); | ||
535 | static DECLARE_MUTEX(queue_io_sem); | ||
536 | |||
537 | struct cache_queue { | ||
538 | struct list_head list; | ||
539 | int reader; /* if 0, then request */ | ||
540 | }; | ||
541 | struct cache_request { | ||
542 | struct cache_queue q; | ||
543 | struct cache_head *item; | ||
544 | char * buf; | ||
545 | int len; | ||
546 | int readers; | ||
547 | }; | ||
548 | struct cache_reader { | ||
549 | struct cache_queue q; | ||
550 | int offset; /* if non-0, we have a refcnt on next request */ | ||
551 | }; | ||
552 | |||
553 | static ssize_t | ||
554 | cache_read(struct file *filp, char __user *buf, size_t count, loff_t *ppos) | ||
555 | { | ||
556 | struct cache_reader *rp = filp->private_data; | ||
557 | struct cache_request *rq; | ||
558 | struct cache_detail *cd = PDE(filp->f_dentry->d_inode)->data; | ||
559 | int err; | ||
560 | |||
561 | if (count == 0) | ||
562 | return 0; | ||
563 | |||
564 | down(&queue_io_sem); /* protect against multiple concurrent | ||
565 | * readers on this file */ | ||
566 | again: | ||
567 | spin_lock(&queue_lock); | ||
568 | /* need to find next request */ | ||
569 | while (rp->q.list.next != &cd->queue && | ||
570 | list_entry(rp->q.list.next, struct cache_queue, list) | ||
571 | ->reader) { | ||
572 | struct list_head *next = rp->q.list.next; | ||
573 | list_move(&rp->q.list, next); | ||
574 | } | ||
575 | if (rp->q.list.next == &cd->queue) { | ||
576 | spin_unlock(&queue_lock); | ||
577 | up(&queue_io_sem); | ||
578 | if (rp->offset) | ||
579 | BUG(); | ||
580 | return 0; | ||
581 | } | ||
582 | rq = container_of(rp->q.list.next, struct cache_request, q.list); | ||
583 | if (rq->q.reader) BUG(); | ||
584 | if (rp->offset == 0) | ||
585 | rq->readers++; | ||
586 | spin_unlock(&queue_lock); | ||
587 | |||
588 | if (rp->offset == 0 && !test_bit(CACHE_PENDING, &rq->item->flags)) { | ||
589 | err = -EAGAIN; | ||
590 | spin_lock(&queue_lock); | ||
591 | list_move(&rp->q.list, &rq->q.list); | ||
592 | spin_unlock(&queue_lock); | ||
593 | } else { | ||
594 | if (rp->offset + count > rq->len) | ||
595 | count = rq->len - rp->offset; | ||
596 | err = -EFAULT; | ||
597 | if (copy_to_user(buf, rq->buf + rp->offset, count)) | ||
598 | goto out; | ||
599 | rp->offset += count; | ||
600 | if (rp->offset >= rq->len) { | ||
601 | rp->offset = 0; | ||
602 | spin_lock(&queue_lock); | ||
603 | list_move(&rp->q.list, &rq->q.list); | ||
604 | spin_unlock(&queue_lock); | ||
605 | } | ||
606 | err = 0; | ||
607 | } | ||
608 | out: | ||
609 | if (rp->offset == 0) { | ||
610 | /* need to release rq */ | ||
611 | spin_lock(&queue_lock); | ||
612 | rq->readers--; | ||
613 | if (rq->readers == 0 && | ||
614 | !test_bit(CACHE_PENDING, &rq->item->flags)) { | ||
615 | list_del(&rq->q.list); | ||
616 | spin_unlock(&queue_lock); | ||
617 | cd->cache_put(rq->item, cd); | ||
618 | kfree(rq->buf); | ||
619 | kfree(rq); | ||
620 | } else | ||
621 | spin_unlock(&queue_lock); | ||
622 | } | ||
623 | if (err == -EAGAIN) | ||
624 | goto again; | ||
625 | up(&queue_io_sem); | ||
626 | return err ? err : count; | ||
627 | } | ||
628 | |||
629 | static char write_buf[8192]; /* protected by queue_io_sem */ | ||
630 | |||
631 | static ssize_t | ||
632 | cache_write(struct file *filp, const char __user *buf, size_t count, | ||
633 | loff_t *ppos) | ||
634 | { | ||
635 | int err; | ||
636 | struct cache_detail *cd = PDE(filp->f_dentry->d_inode)->data; | ||
637 | |||
638 | if (count == 0) | ||
639 | return 0; | ||
640 | if (count >= sizeof(write_buf)) | ||
641 | return -EINVAL; | ||
642 | |||
643 | down(&queue_io_sem); | ||
644 | |||
645 | if (copy_from_user(write_buf, buf, count)) { | ||
646 | up(&queue_io_sem); | ||
647 | return -EFAULT; | ||
648 | } | ||
649 | write_buf[count] = '\0'; | ||
650 | if (cd->cache_parse) | ||
651 | err = cd->cache_parse(cd, write_buf, count); | ||
652 | else | ||
653 | err = -EINVAL; | ||
654 | |||
655 | up(&queue_io_sem); | ||
656 | return err ? err : count; | ||
657 | } | ||
658 | |||
659 | static DECLARE_WAIT_QUEUE_HEAD(queue_wait); | ||
660 | |||
661 | static unsigned int | ||
662 | cache_poll(struct file *filp, poll_table *wait) | ||
663 | { | ||
664 | unsigned int mask; | ||
665 | struct cache_reader *rp = filp->private_data; | ||
666 | struct cache_queue *cq; | ||
667 | struct cache_detail *cd = PDE(filp->f_dentry->d_inode)->data; | ||
668 | |||
669 | poll_wait(filp, &queue_wait, wait); | ||
670 | |||
671 | /* alway allow write */ | ||
672 | mask = POLL_OUT | POLLWRNORM; | ||
673 | |||
674 | if (!rp) | ||
675 | return mask; | ||
676 | |||
677 | spin_lock(&queue_lock); | ||
678 | |||
679 | for (cq= &rp->q; &cq->list != &cd->queue; | ||
680 | cq = list_entry(cq->list.next, struct cache_queue, list)) | ||
681 | if (!cq->reader) { | ||
682 | mask |= POLLIN | POLLRDNORM; | ||
683 | break; | ||
684 | } | ||
685 | spin_unlock(&queue_lock); | ||
686 | return mask; | ||
687 | } | ||
688 | |||
689 | static int | ||
690 | cache_ioctl(struct inode *ino, struct file *filp, | ||
691 | unsigned int cmd, unsigned long arg) | ||
692 | { | ||
693 | int len = 0; | ||
694 | struct cache_reader *rp = filp->private_data; | ||
695 | struct cache_queue *cq; | ||
696 | struct cache_detail *cd = PDE(ino)->data; | ||
697 | |||
698 | if (cmd != FIONREAD || !rp) | ||
699 | return -EINVAL; | ||
700 | |||
701 | spin_lock(&queue_lock); | ||
702 | |||
703 | /* only find the length remaining in current request, | ||
704 | * or the length of the next request | ||
705 | */ | ||
706 | for (cq= &rp->q; &cq->list != &cd->queue; | ||
707 | cq = list_entry(cq->list.next, struct cache_queue, list)) | ||
708 | if (!cq->reader) { | ||
709 | struct cache_request *cr = | ||
710 | container_of(cq, struct cache_request, q); | ||
711 | len = cr->len - rp->offset; | ||
712 | break; | ||
713 | } | ||
714 | spin_unlock(&queue_lock); | ||
715 | |||
716 | return put_user(len, (int __user *)arg); | ||
717 | } | ||
718 | |||
719 | static int | ||
720 | cache_open(struct inode *inode, struct file *filp) | ||
721 | { | ||
722 | struct cache_reader *rp = NULL; | ||
723 | |||
724 | nonseekable_open(inode, filp); | ||
725 | if (filp->f_mode & FMODE_READ) { | ||
726 | struct cache_detail *cd = PDE(inode)->data; | ||
727 | |||
728 | rp = kmalloc(sizeof(*rp), GFP_KERNEL); | ||
729 | if (!rp) | ||
730 | return -ENOMEM; | ||
731 | rp->offset = 0; | ||
732 | rp->q.reader = 1; | ||
733 | atomic_inc(&cd->readers); | ||
734 | spin_lock(&queue_lock); | ||
735 | list_add(&rp->q.list, &cd->queue); | ||
736 | spin_unlock(&queue_lock); | ||
737 | } | ||
738 | filp->private_data = rp; | ||
739 | return 0; | ||
740 | } | ||
741 | |||
742 | static int | ||
743 | cache_release(struct inode *inode, struct file *filp) | ||
744 | { | ||
745 | struct cache_reader *rp = filp->private_data; | ||
746 | struct cache_detail *cd = PDE(inode)->data; | ||
747 | |||
748 | if (rp) { | ||
749 | spin_lock(&queue_lock); | ||
750 | if (rp->offset) { | ||
751 | struct cache_queue *cq; | ||
752 | for (cq= &rp->q; &cq->list != &cd->queue; | ||
753 | cq = list_entry(cq->list.next, struct cache_queue, list)) | ||
754 | if (!cq->reader) { | ||
755 | container_of(cq, struct cache_request, q) | ||
756 | ->readers--; | ||
757 | break; | ||
758 | } | ||
759 | rp->offset = 0; | ||
760 | } | ||
761 | list_del(&rp->q.list); | ||
762 | spin_unlock(&queue_lock); | ||
763 | |||
764 | filp->private_data = NULL; | ||
765 | kfree(rp); | ||
766 | |||
767 | cd->last_close = get_seconds(); | ||
768 | atomic_dec(&cd->readers); | ||
769 | } | ||
770 | return 0; | ||
771 | } | ||
772 | |||
773 | |||
774 | |||
775 | static struct file_operations cache_file_operations = { | ||
776 | .owner = THIS_MODULE, | ||
777 | .llseek = no_llseek, | ||
778 | .read = cache_read, | ||
779 | .write = cache_write, | ||
780 | .poll = cache_poll, | ||
781 | .ioctl = cache_ioctl, /* for FIONREAD */ | ||
782 | .open = cache_open, | ||
783 | .release = cache_release, | ||
784 | }; | ||
785 | |||
786 | |||
787 | static void queue_loose(struct cache_detail *detail, struct cache_head *ch) | ||
788 | { | ||
789 | struct cache_queue *cq; | ||
790 | spin_lock(&queue_lock); | ||
791 | list_for_each_entry(cq, &detail->queue, list) | ||
792 | if (!cq->reader) { | ||
793 | struct cache_request *cr = container_of(cq, struct cache_request, q); | ||
794 | if (cr->item != ch) | ||
795 | continue; | ||
796 | if (cr->readers != 0) | ||
797 | break; | ||
798 | list_del(&cr->q.list); | ||
799 | spin_unlock(&queue_lock); | ||
800 | detail->cache_put(cr->item, detail); | ||
801 | kfree(cr->buf); | ||
802 | kfree(cr); | ||
803 | return; | ||
804 | } | ||
805 | spin_unlock(&queue_lock); | ||
806 | } | ||
807 | |||
808 | /* | ||
809 | * Support routines for text-based upcalls. | ||
810 | * Fields are separated by spaces. | ||
811 | * Fields are either mangled to quote space tab newline slosh with slosh | ||
812 | * or a hexified with a leading \x | ||
813 | * Record is terminated with newline. | ||
814 | * | ||
815 | */ | ||
816 | |||
817 | void qword_add(char **bpp, int *lp, char *str) | ||
818 | { | ||
819 | char *bp = *bpp; | ||
820 | int len = *lp; | ||
821 | char c; | ||
822 | |||
823 | if (len < 0) return; | ||
824 | |||
825 | while ((c=*str++) && len) | ||
826 | switch(c) { | ||
827 | case ' ': | ||
828 | case '\t': | ||
829 | case '\n': | ||
830 | case '\\': | ||
831 | if (len >= 4) { | ||
832 | *bp++ = '\\'; | ||
833 | *bp++ = '0' + ((c & 0300)>>6); | ||
834 | *bp++ = '0' + ((c & 0070)>>3); | ||
835 | *bp++ = '0' + ((c & 0007)>>0); | ||
836 | } | ||
837 | len -= 4; | ||
838 | break; | ||
839 | default: | ||
840 | *bp++ = c; | ||
841 | len--; | ||
842 | } | ||
843 | if (c || len <1) len = -1; | ||
844 | else { | ||
845 | *bp++ = ' '; | ||
846 | len--; | ||
847 | } | ||
848 | *bpp = bp; | ||
849 | *lp = len; | ||
850 | } | ||
851 | |||
852 | void qword_addhex(char **bpp, int *lp, char *buf, int blen) | ||
853 | { | ||
854 | char *bp = *bpp; | ||
855 | int len = *lp; | ||
856 | |||
857 | if (len < 0) return; | ||
858 | |||
859 | if (len > 2) { | ||
860 | *bp++ = '\\'; | ||
861 | *bp++ = 'x'; | ||
862 | len -= 2; | ||
863 | while (blen && len >= 2) { | ||
864 | unsigned char c = *buf++; | ||
865 | *bp++ = '0' + ((c&0xf0)>>4) + (c>=0xa0)*('a'-'9'-1); | ||
866 | *bp++ = '0' + (c&0x0f) + ((c&0x0f)>=0x0a)*('a'-'9'-1); | ||
867 | len -= 2; | ||
868 | blen--; | ||
869 | } | ||
870 | } | ||
871 | if (blen || len<1) len = -1; | ||
872 | else { | ||
873 | *bp++ = ' '; | ||
874 | len--; | ||
875 | } | ||
876 | *bpp = bp; | ||
877 | *lp = len; | ||
878 | } | ||
879 | |||
880 | static void warn_no_listener(struct cache_detail *detail) | ||
881 | { | ||
882 | if (detail->last_warn != detail->last_close) { | ||
883 | detail->last_warn = detail->last_close; | ||
884 | if (detail->warn_no_listener) | ||
885 | detail->warn_no_listener(detail); | ||
886 | } | ||
887 | } | ||
888 | |||
889 | /* | ||
890 | * register an upcall request to user-space. | ||
891 | * Each request is at most one page long. | ||
892 | */ | ||
893 | static int cache_make_upcall(struct cache_detail *detail, struct cache_head *h) | ||
894 | { | ||
895 | |||
896 | char *buf; | ||
897 | struct cache_request *crq; | ||
898 | char *bp; | ||
899 | int len; | ||
900 | |||
901 | if (detail->cache_request == NULL) | ||
902 | return -EINVAL; | ||
903 | |||
904 | if (atomic_read(&detail->readers) == 0 && | ||
905 | detail->last_close < get_seconds() - 30) { | ||
906 | warn_no_listener(detail); | ||
907 | return -EINVAL; | ||
908 | } | ||
909 | |||
910 | buf = kmalloc(PAGE_SIZE, GFP_KERNEL); | ||
911 | if (!buf) | ||
912 | return -EAGAIN; | ||
913 | |||
914 | crq = kmalloc(sizeof (*crq), GFP_KERNEL); | ||
915 | if (!crq) { | ||
916 | kfree(buf); | ||
917 | return -EAGAIN; | ||
918 | } | ||
919 | |||
920 | bp = buf; len = PAGE_SIZE; | ||
921 | |||
922 | detail->cache_request(detail, h, &bp, &len); | ||
923 | |||
924 | if (len < 0) { | ||
925 | kfree(buf); | ||
926 | kfree(crq); | ||
927 | return -EAGAIN; | ||
928 | } | ||
929 | crq->q.reader = 0; | ||
930 | crq->item = cache_get(h); | ||
931 | crq->buf = buf; | ||
932 | crq->len = PAGE_SIZE - len; | ||
933 | crq->readers = 0; | ||
934 | spin_lock(&queue_lock); | ||
935 | list_add_tail(&crq->q.list, &detail->queue); | ||
936 | spin_unlock(&queue_lock); | ||
937 | wake_up(&queue_wait); | ||
938 | return 0; | ||
939 | } | ||
940 | |||
941 | /* | ||
942 | * parse a message from user-space and pass it | ||
943 | * to an appropriate cache | ||
944 | * Messages are, like requests, separated into fields by | ||
945 | * spaces and dequotes as \xHEXSTRING or embedded \nnn octal | ||
946 | * | ||
947 | * Message is | ||
948 | * reply cachename expiry key ... content.... | ||
949 | * | ||
950 | * key and content are both parsed by cache | ||
951 | */ | ||
952 | |||
953 | #define isodigit(c) (isdigit(c) && c <= '7') | ||
954 | int qword_get(char **bpp, char *dest, int bufsize) | ||
955 | { | ||
956 | /* return bytes copied, or -1 on error */ | ||
957 | char *bp = *bpp; | ||
958 | int len = 0; | ||
959 | |||
960 | while (*bp == ' ') bp++; | ||
961 | |||
962 | if (bp[0] == '\\' && bp[1] == 'x') { | ||
963 | /* HEX STRING */ | ||
964 | bp += 2; | ||
965 | while (isxdigit(bp[0]) && isxdigit(bp[1]) && len < bufsize) { | ||
966 | int byte = isdigit(*bp) ? *bp-'0' : toupper(*bp)-'A'+10; | ||
967 | bp++; | ||
968 | byte <<= 4; | ||
969 | byte |= isdigit(*bp) ? *bp-'0' : toupper(*bp)-'A'+10; | ||
970 | *dest++ = byte; | ||
971 | bp++; | ||
972 | len++; | ||
973 | } | ||
974 | } else { | ||
975 | /* text with \nnn octal quoting */ | ||
976 | while (*bp != ' ' && *bp != '\n' && *bp && len < bufsize-1) { | ||
977 | if (*bp == '\\' && | ||
978 | isodigit(bp[1]) && (bp[1] <= '3') && | ||
979 | isodigit(bp[2]) && | ||
980 | isodigit(bp[3])) { | ||
981 | int byte = (*++bp -'0'); | ||
982 | bp++; | ||
983 | byte = (byte << 3) | (*bp++ - '0'); | ||
984 | byte = (byte << 3) | (*bp++ - '0'); | ||
985 | *dest++ = byte; | ||
986 | len++; | ||
987 | } else { | ||
988 | *dest++ = *bp++; | ||
989 | len++; | ||
990 | } | ||
991 | } | ||
992 | } | ||
993 | |||
994 | if (*bp != ' ' && *bp != '\n' && *bp != '\0') | ||
995 | return -1; | ||
996 | while (*bp == ' ') bp++; | ||
997 | *bpp = bp; | ||
998 | *dest = '\0'; | ||
999 | return len; | ||
1000 | } | ||
1001 | |||
1002 | |||
1003 | /* | ||
1004 | * support /proc/sunrpc/cache/$CACHENAME/content | ||
1005 | * as a seqfile. | ||
1006 | * We call ->cache_show passing NULL for the item to | ||
1007 | * get a header, then pass each real item in the cache | ||
1008 | */ | ||
1009 | |||
1010 | struct handle { | ||
1011 | struct cache_detail *cd; | ||
1012 | }; | ||
1013 | |||
1014 | static void *c_start(struct seq_file *m, loff_t *pos) | ||
1015 | { | ||
1016 | loff_t n = *pos; | ||
1017 | unsigned hash, entry; | ||
1018 | struct cache_head *ch; | ||
1019 | struct cache_detail *cd = ((struct handle*)m->private)->cd; | ||
1020 | |||
1021 | |||
1022 | read_lock(&cd->hash_lock); | ||
1023 | if (!n--) | ||
1024 | return SEQ_START_TOKEN; | ||
1025 | hash = n >> 32; | ||
1026 | entry = n & ((1LL<<32) - 1); | ||
1027 | |||
1028 | for (ch=cd->hash_table[hash]; ch; ch=ch->next) | ||
1029 | if (!entry--) | ||
1030 | return ch; | ||
1031 | n &= ~((1LL<<32) - 1); | ||
1032 | do { | ||
1033 | hash++; | ||
1034 | n += 1LL<<32; | ||
1035 | } while(hash < cd->hash_size && | ||
1036 | cd->hash_table[hash]==NULL); | ||
1037 | if (hash >= cd->hash_size) | ||
1038 | return NULL; | ||
1039 | *pos = n+1; | ||
1040 | return cd->hash_table[hash]; | ||
1041 | } | ||
1042 | |||
1043 | static void *c_next(struct seq_file *m, void *p, loff_t *pos) | ||
1044 | { | ||
1045 | struct cache_head *ch = p; | ||
1046 | int hash = (*pos >> 32); | ||
1047 | struct cache_detail *cd = ((struct handle*)m->private)->cd; | ||
1048 | |||
1049 | if (p == SEQ_START_TOKEN) | ||
1050 | hash = 0; | ||
1051 | else if (ch->next == NULL) { | ||
1052 | hash++; | ||
1053 | *pos += 1LL<<32; | ||
1054 | } else { | ||
1055 | ++*pos; | ||
1056 | return ch->next; | ||
1057 | } | ||
1058 | *pos &= ~((1LL<<32) - 1); | ||
1059 | while (hash < cd->hash_size && | ||
1060 | cd->hash_table[hash] == NULL) { | ||
1061 | hash++; | ||
1062 | *pos += 1LL<<32; | ||
1063 | } | ||
1064 | if (hash >= cd->hash_size) | ||
1065 | return NULL; | ||
1066 | ++*pos; | ||
1067 | return cd->hash_table[hash]; | ||
1068 | } | ||
1069 | |||
1070 | static void c_stop(struct seq_file *m, void *p) | ||
1071 | { | ||
1072 | struct cache_detail *cd = ((struct handle*)m->private)->cd; | ||
1073 | read_unlock(&cd->hash_lock); | ||
1074 | } | ||
1075 | |||
1076 | static int c_show(struct seq_file *m, void *p) | ||
1077 | { | ||
1078 | struct cache_head *cp = p; | ||
1079 | struct cache_detail *cd = ((struct handle*)m->private)->cd; | ||
1080 | |||
1081 | if (p == SEQ_START_TOKEN) | ||
1082 | return cd->cache_show(m, cd, NULL); | ||
1083 | |||
1084 | ifdebug(CACHE) | ||
1085 | seq_printf(m, "# expiry=%ld refcnt=%d\n", | ||
1086 | cp->expiry_time, atomic_read(&cp->refcnt)); | ||
1087 | cache_get(cp); | ||
1088 | if (cache_check(cd, cp, NULL)) | ||
1089 | /* cache_check does a cache_put on failure */ | ||
1090 | seq_printf(m, "# "); | ||
1091 | else | ||
1092 | cache_put(cp, cd); | ||
1093 | |||
1094 | return cd->cache_show(m, cd, cp); | ||
1095 | } | ||
1096 | |||
1097 | static struct seq_operations cache_content_op = { | ||
1098 | .start = c_start, | ||
1099 | .next = c_next, | ||
1100 | .stop = c_stop, | ||
1101 | .show = c_show, | ||
1102 | }; | ||
1103 | |||
1104 | static int content_open(struct inode *inode, struct file *file) | ||
1105 | { | ||
1106 | int res; | ||
1107 | struct handle *han; | ||
1108 | struct cache_detail *cd = PDE(inode)->data; | ||
1109 | |||
1110 | han = kmalloc(sizeof(*han), GFP_KERNEL); | ||
1111 | if (han == NULL) | ||
1112 | return -ENOMEM; | ||
1113 | |||
1114 | han->cd = cd; | ||
1115 | |||
1116 | res = seq_open(file, &cache_content_op); | ||
1117 | if (res) | ||
1118 | kfree(han); | ||
1119 | else | ||
1120 | ((struct seq_file *)file->private_data)->private = han; | ||
1121 | |||
1122 | return res; | ||
1123 | } | ||
1124 | static int content_release(struct inode *inode, struct file *file) | ||
1125 | { | ||
1126 | struct seq_file *m = (struct seq_file *)file->private_data; | ||
1127 | struct handle *han = m->private; | ||
1128 | kfree(han); | ||
1129 | m->private = NULL; | ||
1130 | return seq_release(inode, file); | ||
1131 | } | ||
1132 | |||
1133 | static struct file_operations content_file_operations = { | ||
1134 | .open = content_open, | ||
1135 | .read = seq_read, | ||
1136 | .llseek = seq_lseek, | ||
1137 | .release = content_release, | ||
1138 | }; | ||
1139 | |||
1140 | static ssize_t read_flush(struct file *file, char __user *buf, | ||
1141 | size_t count, loff_t *ppos) | ||
1142 | { | ||
1143 | struct cache_detail *cd = PDE(file->f_dentry->d_inode)->data; | ||
1144 | char tbuf[20]; | ||
1145 | unsigned long p = *ppos; | ||
1146 | int len; | ||
1147 | |||
1148 | sprintf(tbuf, "%lu\n", cd->flush_time); | ||
1149 | len = strlen(tbuf); | ||
1150 | if (p >= len) | ||
1151 | return 0; | ||
1152 | len -= p; | ||
1153 | if (len > count) len = count; | ||
1154 | if (copy_to_user(buf, (void*)(tbuf+p), len)) | ||
1155 | len = -EFAULT; | ||
1156 | else | ||
1157 | *ppos += len; | ||
1158 | return len; | ||
1159 | } | ||
1160 | |||
1161 | static ssize_t write_flush(struct file * file, const char __user * buf, | ||
1162 | size_t count, loff_t *ppos) | ||
1163 | { | ||
1164 | struct cache_detail *cd = PDE(file->f_dentry->d_inode)->data; | ||
1165 | char tbuf[20]; | ||
1166 | char *ep; | ||
1167 | long flushtime; | ||
1168 | if (*ppos || count > sizeof(tbuf)-1) | ||
1169 | return -EINVAL; | ||
1170 | if (copy_from_user(tbuf, buf, count)) | ||
1171 | return -EFAULT; | ||
1172 | tbuf[count] = 0; | ||
1173 | flushtime = simple_strtoul(tbuf, &ep, 0); | ||
1174 | if (*ep && *ep != '\n') | ||
1175 | return -EINVAL; | ||
1176 | |||
1177 | cd->flush_time = flushtime; | ||
1178 | cd->nextcheck = get_seconds(); | ||
1179 | cache_flush(); | ||
1180 | |||
1181 | *ppos += count; | ||
1182 | return count; | ||
1183 | } | ||
1184 | |||
1185 | static struct file_operations cache_flush_operations = { | ||
1186 | .open = nonseekable_open, | ||
1187 | .read = read_flush, | ||
1188 | .write = write_flush, | ||
1189 | }; | ||
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c new file mode 100644 index 000000000000..02bc029d46fe --- /dev/null +++ b/net/sunrpc/clnt.c | |||
@@ -0,0 +1,1085 @@ | |||
1 | /* | ||
2 | * linux/net/sunrpc/rpcclnt.c | ||
3 | * | ||
4 | * This file contains the high-level RPC interface. | ||
5 | * It is modeled as a finite state machine to support both synchronous | ||
6 | * and asynchronous requests. | ||
7 | * | ||
8 | * - RPC header generation and argument serialization. | ||
9 | * - Credential refresh. | ||
10 | * - TCP connect handling. | ||
11 | * - Retry of operation when it is suspected the operation failed because | ||
12 | * of uid squashing on the server, or when the credentials were stale | ||
13 | * and need to be refreshed, or when a packet was damaged in transit. | ||
14 | * This may be have to be moved to the VFS layer. | ||
15 | * | ||
16 | * NB: BSD uses a more intelligent approach to guessing when a request | ||
17 | * or reply has been lost by keeping the RTO estimate for each procedure. | ||
18 | * We currently make do with a constant timeout value. | ||
19 | * | ||
20 | * Copyright (C) 1992,1993 Rick Sladkey <jrs@world.std.com> | ||
21 | * Copyright (C) 1995,1996 Olaf Kirch <okir@monad.swb.de> | ||
22 | */ | ||
23 | |||
24 | #include <asm/system.h> | ||
25 | |||
26 | #include <linux/module.h> | ||
27 | #include <linux/types.h> | ||
28 | #include <linux/mm.h> | ||
29 | #include <linux/slab.h> | ||
30 | #include <linux/in.h> | ||
31 | #include <linux/utsname.h> | ||
32 | |||
33 | #include <linux/sunrpc/clnt.h> | ||
34 | #include <linux/workqueue.h> | ||
35 | #include <linux/sunrpc/rpc_pipe_fs.h> | ||
36 | |||
37 | #include <linux/nfs.h> | ||
38 | |||
39 | |||
40 | #define RPC_SLACK_SPACE (1024) /* total overkill */ | ||
41 | |||
42 | #ifdef RPC_DEBUG | ||
43 | # define RPCDBG_FACILITY RPCDBG_CALL | ||
44 | #endif | ||
45 | |||
46 | static DECLARE_WAIT_QUEUE_HEAD(destroy_wait); | ||
47 | |||
48 | |||
49 | static void call_start(struct rpc_task *task); | ||
50 | static void call_reserve(struct rpc_task *task); | ||
51 | static void call_reserveresult(struct rpc_task *task); | ||
52 | static void call_allocate(struct rpc_task *task); | ||
53 | static void call_encode(struct rpc_task *task); | ||
54 | static void call_decode(struct rpc_task *task); | ||
55 | static void call_bind(struct rpc_task *task); | ||
56 | static void call_transmit(struct rpc_task *task); | ||
57 | static void call_status(struct rpc_task *task); | ||
58 | static void call_refresh(struct rpc_task *task); | ||
59 | static void call_refreshresult(struct rpc_task *task); | ||
60 | static void call_timeout(struct rpc_task *task); | ||
61 | static void call_connect(struct rpc_task *task); | ||
62 | static void call_connect_status(struct rpc_task *task); | ||
63 | static u32 * call_header(struct rpc_task *task); | ||
64 | static u32 * call_verify(struct rpc_task *task); | ||
65 | |||
66 | |||
67 | static int | ||
68 | rpc_setup_pipedir(struct rpc_clnt *clnt, char *dir_name) | ||
69 | { | ||
70 | static uint32_t clntid; | ||
71 | int error; | ||
72 | |||
73 | if (dir_name == NULL) | ||
74 | return 0; | ||
75 | for (;;) { | ||
76 | snprintf(clnt->cl_pathname, sizeof(clnt->cl_pathname), | ||
77 | "%s/clnt%x", dir_name, | ||
78 | (unsigned int)clntid++); | ||
79 | clnt->cl_pathname[sizeof(clnt->cl_pathname) - 1] = '\0'; | ||
80 | clnt->cl_dentry = rpc_mkdir(clnt->cl_pathname, clnt); | ||
81 | if (!IS_ERR(clnt->cl_dentry)) | ||
82 | return 0; | ||
83 | error = PTR_ERR(clnt->cl_dentry); | ||
84 | if (error != -EEXIST) { | ||
85 | printk(KERN_INFO "RPC: Couldn't create pipefs entry %s, error %d\n", | ||
86 | clnt->cl_pathname, error); | ||
87 | return error; | ||
88 | } | ||
89 | } | ||
90 | } | ||
91 | |||
92 | /* | ||
93 | * Create an RPC client | ||
94 | * FIXME: This should also take a flags argument (as in task->tk_flags). | ||
95 | * It's called (among others) from pmap_create_client, which may in | ||
96 | * turn be called by an async task. In this case, rpciod should not be | ||
97 | * made to sleep too long. | ||
98 | */ | ||
99 | struct rpc_clnt * | ||
100 | rpc_create_client(struct rpc_xprt *xprt, char *servname, | ||
101 | struct rpc_program *program, u32 vers, | ||
102 | rpc_authflavor_t flavor) | ||
103 | { | ||
104 | struct rpc_version *version; | ||
105 | struct rpc_clnt *clnt = NULL; | ||
106 | int err; | ||
107 | int len; | ||
108 | |||
109 | dprintk("RPC: creating %s client for %s (xprt %p)\n", | ||
110 | program->name, servname, xprt); | ||
111 | |||
112 | err = -EINVAL; | ||
113 | if (!xprt) | ||
114 | goto out_err; | ||
115 | if (vers >= program->nrvers || !(version = program->version[vers])) | ||
116 | goto out_err; | ||
117 | |||
118 | err = -ENOMEM; | ||
119 | clnt = (struct rpc_clnt *) kmalloc(sizeof(*clnt), GFP_KERNEL); | ||
120 | if (!clnt) | ||
121 | goto out_err; | ||
122 | memset(clnt, 0, sizeof(*clnt)); | ||
123 | atomic_set(&clnt->cl_users, 0); | ||
124 | atomic_set(&clnt->cl_count, 1); | ||
125 | clnt->cl_parent = clnt; | ||
126 | |||
127 | clnt->cl_server = clnt->cl_inline_name; | ||
128 | len = strlen(servname) + 1; | ||
129 | if (len > sizeof(clnt->cl_inline_name)) { | ||
130 | char *buf = kmalloc(len, GFP_KERNEL); | ||
131 | if (buf != 0) | ||
132 | clnt->cl_server = buf; | ||
133 | else | ||
134 | len = sizeof(clnt->cl_inline_name); | ||
135 | } | ||
136 | strlcpy(clnt->cl_server, servname, len); | ||
137 | |||
138 | clnt->cl_xprt = xprt; | ||
139 | clnt->cl_procinfo = version->procs; | ||
140 | clnt->cl_maxproc = version->nrprocs; | ||
141 | clnt->cl_protname = program->name; | ||
142 | clnt->cl_pmap = &clnt->cl_pmap_default; | ||
143 | clnt->cl_port = xprt->addr.sin_port; | ||
144 | clnt->cl_prog = program->number; | ||
145 | clnt->cl_vers = version->number; | ||
146 | clnt->cl_prot = xprt->prot; | ||
147 | clnt->cl_stats = program->stats; | ||
148 | rpc_init_wait_queue(&clnt->cl_pmap_default.pm_bindwait, "bindwait"); | ||
149 | |||
150 | if (!clnt->cl_port) | ||
151 | clnt->cl_autobind = 1; | ||
152 | |||
153 | clnt->cl_rtt = &clnt->cl_rtt_default; | ||
154 | rpc_init_rtt(&clnt->cl_rtt_default, xprt->timeout.to_initval); | ||
155 | |||
156 | err = rpc_setup_pipedir(clnt, program->pipe_dir_name); | ||
157 | if (err < 0) | ||
158 | goto out_no_path; | ||
159 | |||
160 | err = -ENOMEM; | ||
161 | if (!rpcauth_create(flavor, clnt)) { | ||
162 | printk(KERN_INFO "RPC: Couldn't create auth handle (flavor %u)\n", | ||
163 | flavor); | ||
164 | goto out_no_auth; | ||
165 | } | ||
166 | |||
167 | /* save the nodename */ | ||
168 | clnt->cl_nodelen = strlen(system_utsname.nodename); | ||
169 | if (clnt->cl_nodelen > UNX_MAXNODENAME) | ||
170 | clnt->cl_nodelen = UNX_MAXNODENAME; | ||
171 | memcpy(clnt->cl_nodename, system_utsname.nodename, clnt->cl_nodelen); | ||
172 | return clnt; | ||
173 | |||
174 | out_no_auth: | ||
175 | rpc_rmdir(clnt->cl_pathname); | ||
176 | out_no_path: | ||
177 | if (clnt->cl_server != clnt->cl_inline_name) | ||
178 | kfree(clnt->cl_server); | ||
179 | kfree(clnt); | ||
180 | out_err: | ||
181 | return ERR_PTR(err); | ||
182 | } | ||
183 | |||
184 | /* | ||
185 | * This function clones the RPC client structure. It allows us to share the | ||
186 | * same transport while varying parameters such as the authentication | ||
187 | * flavour. | ||
188 | */ | ||
189 | struct rpc_clnt * | ||
190 | rpc_clone_client(struct rpc_clnt *clnt) | ||
191 | { | ||
192 | struct rpc_clnt *new; | ||
193 | |||
194 | new = (struct rpc_clnt *)kmalloc(sizeof(*new), GFP_KERNEL); | ||
195 | if (!new) | ||
196 | goto out_no_clnt; | ||
197 | memcpy(new, clnt, sizeof(*new)); | ||
198 | atomic_set(&new->cl_count, 1); | ||
199 | atomic_set(&new->cl_users, 0); | ||
200 | new->cl_parent = clnt; | ||
201 | atomic_inc(&clnt->cl_count); | ||
202 | /* Duplicate portmapper */ | ||
203 | rpc_init_wait_queue(&new->cl_pmap_default.pm_bindwait, "bindwait"); | ||
204 | /* Turn off autobind on clones */ | ||
205 | new->cl_autobind = 0; | ||
206 | new->cl_oneshot = 0; | ||
207 | new->cl_dead = 0; | ||
208 | rpc_init_rtt(&new->cl_rtt_default, clnt->cl_xprt->timeout.to_initval); | ||
209 | if (new->cl_auth) | ||
210 | atomic_inc(&new->cl_auth->au_count); | ||
211 | return new; | ||
212 | out_no_clnt: | ||
213 | printk(KERN_INFO "RPC: out of memory in %s\n", __FUNCTION__); | ||
214 | return ERR_PTR(-ENOMEM); | ||
215 | } | ||
216 | |||
217 | /* | ||
218 | * Properly shut down an RPC client, terminating all outstanding | ||
219 | * requests. Note that we must be certain that cl_oneshot and | ||
220 | * cl_dead are cleared, or else the client would be destroyed | ||
221 | * when the last task releases it. | ||
222 | */ | ||
223 | int | ||
224 | rpc_shutdown_client(struct rpc_clnt *clnt) | ||
225 | { | ||
226 | dprintk("RPC: shutting down %s client for %s, tasks=%d\n", | ||
227 | clnt->cl_protname, clnt->cl_server, | ||
228 | atomic_read(&clnt->cl_users)); | ||
229 | |||
230 | while (atomic_read(&clnt->cl_users) > 0) { | ||
231 | /* Don't let rpc_release_client destroy us */ | ||
232 | clnt->cl_oneshot = 0; | ||
233 | clnt->cl_dead = 0; | ||
234 | rpc_killall_tasks(clnt); | ||
235 | sleep_on_timeout(&destroy_wait, 1*HZ); | ||
236 | } | ||
237 | |||
238 | if (atomic_read(&clnt->cl_users) < 0) { | ||
239 | printk(KERN_ERR "RPC: rpc_shutdown_client clnt %p tasks=%d\n", | ||
240 | clnt, atomic_read(&clnt->cl_users)); | ||
241 | #ifdef RPC_DEBUG | ||
242 | rpc_show_tasks(); | ||
243 | #endif | ||
244 | BUG(); | ||
245 | } | ||
246 | |||
247 | return rpc_destroy_client(clnt); | ||
248 | } | ||
249 | |||
250 | /* | ||
251 | * Delete an RPC client | ||
252 | */ | ||
253 | int | ||
254 | rpc_destroy_client(struct rpc_clnt *clnt) | ||
255 | { | ||
256 | if (!atomic_dec_and_test(&clnt->cl_count)) | ||
257 | return 1; | ||
258 | BUG_ON(atomic_read(&clnt->cl_users) != 0); | ||
259 | |||
260 | dprintk("RPC: destroying %s client for %s\n", | ||
261 | clnt->cl_protname, clnt->cl_server); | ||
262 | if (clnt->cl_auth) { | ||
263 | rpcauth_destroy(clnt->cl_auth); | ||
264 | clnt->cl_auth = NULL; | ||
265 | } | ||
266 | if (clnt->cl_parent != clnt) { | ||
267 | rpc_destroy_client(clnt->cl_parent); | ||
268 | goto out_free; | ||
269 | } | ||
270 | if (clnt->cl_pathname[0]) | ||
271 | rpc_rmdir(clnt->cl_pathname); | ||
272 | if (clnt->cl_xprt) { | ||
273 | xprt_destroy(clnt->cl_xprt); | ||
274 | clnt->cl_xprt = NULL; | ||
275 | } | ||
276 | if (clnt->cl_server != clnt->cl_inline_name) | ||
277 | kfree(clnt->cl_server); | ||
278 | out_free: | ||
279 | kfree(clnt); | ||
280 | return 0; | ||
281 | } | ||
282 | |||
283 | /* | ||
284 | * Release an RPC client | ||
285 | */ | ||
286 | void | ||
287 | rpc_release_client(struct rpc_clnt *clnt) | ||
288 | { | ||
289 | dprintk("RPC: rpc_release_client(%p, %d)\n", | ||
290 | clnt, atomic_read(&clnt->cl_users)); | ||
291 | |||
292 | if (!atomic_dec_and_test(&clnt->cl_users)) | ||
293 | return; | ||
294 | wake_up(&destroy_wait); | ||
295 | if (clnt->cl_oneshot || clnt->cl_dead) | ||
296 | rpc_destroy_client(clnt); | ||
297 | } | ||
298 | |||
299 | /* | ||
300 | * Default callback for async RPC calls | ||
301 | */ | ||
302 | static void | ||
303 | rpc_default_callback(struct rpc_task *task) | ||
304 | { | ||
305 | } | ||
306 | |||
307 | /* | ||
308 | * Export the signal mask handling for aysnchronous code that | ||
309 | * sleeps on RPC calls | ||
310 | */ | ||
311 | |||
312 | void rpc_clnt_sigmask(struct rpc_clnt *clnt, sigset_t *oldset) | ||
313 | { | ||
314 | unsigned long sigallow = sigmask(SIGKILL); | ||
315 | unsigned long irqflags; | ||
316 | |||
317 | /* Turn off various signals */ | ||
318 | if (clnt->cl_intr) { | ||
319 | struct k_sigaction *action = current->sighand->action; | ||
320 | if (action[SIGINT-1].sa.sa_handler == SIG_DFL) | ||
321 | sigallow |= sigmask(SIGINT); | ||
322 | if (action[SIGQUIT-1].sa.sa_handler == SIG_DFL) | ||
323 | sigallow |= sigmask(SIGQUIT); | ||
324 | } | ||
325 | spin_lock_irqsave(¤t->sighand->siglock, irqflags); | ||
326 | *oldset = current->blocked; | ||
327 | siginitsetinv(¤t->blocked, sigallow & ~oldset->sig[0]); | ||
328 | recalc_sigpending(); | ||
329 | spin_unlock_irqrestore(¤t->sighand->siglock, irqflags); | ||
330 | } | ||
331 | |||
332 | void rpc_clnt_sigunmask(struct rpc_clnt *clnt, sigset_t *oldset) | ||
333 | { | ||
334 | unsigned long irqflags; | ||
335 | |||
336 | spin_lock_irqsave(¤t->sighand->siglock, irqflags); | ||
337 | current->blocked = *oldset; | ||
338 | recalc_sigpending(); | ||
339 | spin_unlock_irqrestore(¤t->sighand->siglock, irqflags); | ||
340 | } | ||
341 | |||
342 | /* | ||
343 | * New rpc_call implementation | ||
344 | */ | ||
345 | int rpc_call_sync(struct rpc_clnt *clnt, struct rpc_message *msg, int flags) | ||
346 | { | ||
347 | struct rpc_task *task; | ||
348 | sigset_t oldset; | ||
349 | int status; | ||
350 | |||
351 | /* If this client is slain all further I/O fails */ | ||
352 | if (clnt->cl_dead) | ||
353 | return -EIO; | ||
354 | |||
355 | BUG_ON(flags & RPC_TASK_ASYNC); | ||
356 | |||
357 | rpc_clnt_sigmask(clnt, &oldset); | ||
358 | |||
359 | status = -ENOMEM; | ||
360 | task = rpc_new_task(clnt, NULL, flags); | ||
361 | if (task == NULL) | ||
362 | goto out; | ||
363 | |||
364 | rpc_call_setup(task, msg, 0); | ||
365 | |||
366 | /* Set up the call info struct and execute the task */ | ||
367 | if (task->tk_status == 0) | ||
368 | status = rpc_execute(task); | ||
369 | else { | ||
370 | status = task->tk_status; | ||
371 | rpc_release_task(task); | ||
372 | } | ||
373 | |||
374 | out: | ||
375 | rpc_clnt_sigunmask(clnt, &oldset); | ||
376 | |||
377 | return status; | ||
378 | } | ||
379 | |||
380 | /* | ||
381 | * New rpc_call implementation | ||
382 | */ | ||
383 | int | ||
384 | rpc_call_async(struct rpc_clnt *clnt, struct rpc_message *msg, int flags, | ||
385 | rpc_action callback, void *data) | ||
386 | { | ||
387 | struct rpc_task *task; | ||
388 | sigset_t oldset; | ||
389 | int status; | ||
390 | |||
391 | /* If this client is slain all further I/O fails */ | ||
392 | if (clnt->cl_dead) | ||
393 | return -EIO; | ||
394 | |||
395 | flags |= RPC_TASK_ASYNC; | ||
396 | |||
397 | rpc_clnt_sigmask(clnt, &oldset); | ||
398 | |||
399 | /* Create/initialize a new RPC task */ | ||
400 | if (!callback) | ||
401 | callback = rpc_default_callback; | ||
402 | status = -ENOMEM; | ||
403 | if (!(task = rpc_new_task(clnt, callback, flags))) | ||
404 | goto out; | ||
405 | task->tk_calldata = data; | ||
406 | |||
407 | rpc_call_setup(task, msg, 0); | ||
408 | |||
409 | /* Set up the call info struct and execute the task */ | ||
410 | status = task->tk_status; | ||
411 | if (status == 0) | ||
412 | rpc_execute(task); | ||
413 | else | ||
414 | rpc_release_task(task); | ||
415 | |||
416 | out: | ||
417 | rpc_clnt_sigunmask(clnt, &oldset); | ||
418 | |||
419 | return status; | ||
420 | } | ||
421 | |||
422 | |||
423 | void | ||
424 | rpc_call_setup(struct rpc_task *task, struct rpc_message *msg, int flags) | ||
425 | { | ||
426 | task->tk_msg = *msg; | ||
427 | task->tk_flags |= flags; | ||
428 | /* Bind the user cred */ | ||
429 | if (task->tk_msg.rpc_cred != NULL) | ||
430 | rpcauth_holdcred(task); | ||
431 | else | ||
432 | rpcauth_bindcred(task); | ||
433 | |||
434 | if (task->tk_status == 0) | ||
435 | task->tk_action = call_start; | ||
436 | else | ||
437 | task->tk_action = NULL; | ||
438 | } | ||
439 | |||
440 | void | ||
441 | rpc_setbufsize(struct rpc_clnt *clnt, unsigned int sndsize, unsigned int rcvsize) | ||
442 | { | ||
443 | struct rpc_xprt *xprt = clnt->cl_xprt; | ||
444 | |||
445 | xprt->sndsize = 0; | ||
446 | if (sndsize) | ||
447 | xprt->sndsize = sndsize + RPC_SLACK_SPACE; | ||
448 | xprt->rcvsize = 0; | ||
449 | if (rcvsize) | ||
450 | xprt->rcvsize = rcvsize + RPC_SLACK_SPACE; | ||
451 | if (xprt_connected(xprt)) | ||
452 | xprt_sock_setbufsize(xprt); | ||
453 | } | ||
454 | |||
455 | /* | ||
456 | * Return size of largest payload RPC client can support, in bytes | ||
457 | * | ||
458 | * For stream transports, this is one RPC record fragment (see RFC | ||
459 | * 1831), as we don't support multi-record requests yet. For datagram | ||
460 | * transports, this is the size of an IP packet minus the IP, UDP, and | ||
461 | * RPC header sizes. | ||
462 | */ | ||
463 | size_t rpc_max_payload(struct rpc_clnt *clnt) | ||
464 | { | ||
465 | return clnt->cl_xprt->max_payload; | ||
466 | } | ||
467 | EXPORT_SYMBOL(rpc_max_payload); | ||
468 | |||
469 | /* | ||
470 | * Restart an (async) RPC call. Usually called from within the | ||
471 | * exit handler. | ||
472 | */ | ||
473 | void | ||
474 | rpc_restart_call(struct rpc_task *task) | ||
475 | { | ||
476 | if (RPC_ASSASSINATED(task)) | ||
477 | return; | ||
478 | |||
479 | task->tk_action = call_start; | ||
480 | } | ||
481 | |||
482 | /* | ||
483 | * 0. Initial state | ||
484 | * | ||
485 | * Other FSM states can be visited zero or more times, but | ||
486 | * this state is visited exactly once for each RPC. | ||
487 | */ | ||
488 | static void | ||
489 | call_start(struct rpc_task *task) | ||
490 | { | ||
491 | struct rpc_clnt *clnt = task->tk_client; | ||
492 | |||
493 | dprintk("RPC: %4d call_start %s%d proc %d (%s)\n", task->tk_pid, | ||
494 | clnt->cl_protname, clnt->cl_vers, task->tk_msg.rpc_proc->p_proc, | ||
495 | (RPC_IS_ASYNC(task) ? "async" : "sync")); | ||
496 | |||
497 | /* Increment call count */ | ||
498 | task->tk_msg.rpc_proc->p_count++; | ||
499 | clnt->cl_stats->rpccnt++; | ||
500 | task->tk_action = call_reserve; | ||
501 | } | ||
502 | |||
503 | /* | ||
504 | * 1. Reserve an RPC call slot | ||
505 | */ | ||
506 | static void | ||
507 | call_reserve(struct rpc_task *task) | ||
508 | { | ||
509 | dprintk("RPC: %4d call_reserve\n", task->tk_pid); | ||
510 | |||
511 | if (!rpcauth_uptodatecred(task)) { | ||
512 | task->tk_action = call_refresh; | ||
513 | return; | ||
514 | } | ||
515 | |||
516 | task->tk_status = 0; | ||
517 | task->tk_action = call_reserveresult; | ||
518 | xprt_reserve(task); | ||
519 | } | ||
520 | |||
521 | /* | ||
522 | * 1b. Grok the result of xprt_reserve() | ||
523 | */ | ||
524 | static void | ||
525 | call_reserveresult(struct rpc_task *task) | ||
526 | { | ||
527 | int status = task->tk_status; | ||
528 | |||
529 | dprintk("RPC: %4d call_reserveresult (status %d)\n", | ||
530 | task->tk_pid, task->tk_status); | ||
531 | |||
532 | /* | ||
533 | * After a call to xprt_reserve(), we must have either | ||
534 | * a request slot or else an error status. | ||
535 | */ | ||
536 | task->tk_status = 0; | ||
537 | if (status >= 0) { | ||
538 | if (task->tk_rqstp) { | ||
539 | task->tk_action = call_allocate; | ||
540 | return; | ||
541 | } | ||
542 | |||
543 | printk(KERN_ERR "%s: status=%d, but no request slot, exiting\n", | ||
544 | __FUNCTION__, status); | ||
545 | rpc_exit(task, -EIO); | ||
546 | return; | ||
547 | } | ||
548 | |||
549 | /* | ||
550 | * Even though there was an error, we may have acquired | ||
551 | * a request slot somehow. Make sure not to leak it. | ||
552 | */ | ||
553 | if (task->tk_rqstp) { | ||
554 | printk(KERN_ERR "%s: status=%d, request allocated anyway\n", | ||
555 | __FUNCTION__, status); | ||
556 | xprt_release(task); | ||
557 | } | ||
558 | |||
559 | switch (status) { | ||
560 | case -EAGAIN: /* woken up; retry */ | ||
561 | task->tk_action = call_reserve; | ||
562 | return; | ||
563 | case -EIO: /* probably a shutdown */ | ||
564 | break; | ||
565 | default: | ||
566 | printk(KERN_ERR "%s: unrecognized error %d, exiting\n", | ||
567 | __FUNCTION__, status); | ||
568 | break; | ||
569 | } | ||
570 | rpc_exit(task, status); | ||
571 | } | ||
572 | |||
573 | /* | ||
574 | * 2. Allocate the buffer. For details, see sched.c:rpc_malloc. | ||
575 | * (Note: buffer memory is freed in rpc_task_release). | ||
576 | */ | ||
577 | static void | ||
578 | call_allocate(struct rpc_task *task) | ||
579 | { | ||
580 | unsigned int bufsiz; | ||
581 | |||
582 | dprintk("RPC: %4d call_allocate (status %d)\n", | ||
583 | task->tk_pid, task->tk_status); | ||
584 | task->tk_action = call_bind; | ||
585 | if (task->tk_buffer) | ||
586 | return; | ||
587 | |||
588 | /* FIXME: compute buffer requirements more exactly using | ||
589 | * auth->au_wslack */ | ||
590 | bufsiz = task->tk_msg.rpc_proc->p_bufsiz + RPC_SLACK_SPACE; | ||
591 | |||
592 | if (rpc_malloc(task, bufsiz << 1) != NULL) | ||
593 | return; | ||
594 | printk(KERN_INFO "RPC: buffer allocation failed for task %p\n", task); | ||
595 | |||
596 | if (RPC_IS_ASYNC(task) || !(task->tk_client->cl_intr && signalled())) { | ||
597 | xprt_release(task); | ||
598 | task->tk_action = call_reserve; | ||
599 | rpc_delay(task, HZ>>4); | ||
600 | return; | ||
601 | } | ||
602 | |||
603 | rpc_exit(task, -ERESTARTSYS); | ||
604 | } | ||
605 | |||
606 | /* | ||
607 | * 3. Encode arguments of an RPC call | ||
608 | */ | ||
609 | static void | ||
610 | call_encode(struct rpc_task *task) | ||
611 | { | ||
612 | struct rpc_clnt *clnt = task->tk_client; | ||
613 | struct rpc_rqst *req = task->tk_rqstp; | ||
614 | struct xdr_buf *sndbuf = &req->rq_snd_buf; | ||
615 | struct xdr_buf *rcvbuf = &req->rq_rcv_buf; | ||
616 | unsigned int bufsiz; | ||
617 | kxdrproc_t encode; | ||
618 | int status; | ||
619 | u32 *p; | ||
620 | |||
621 | dprintk("RPC: %4d call_encode (status %d)\n", | ||
622 | task->tk_pid, task->tk_status); | ||
623 | |||
624 | /* Default buffer setup */ | ||
625 | bufsiz = task->tk_bufsize >> 1; | ||
626 | sndbuf->head[0].iov_base = (void *)task->tk_buffer; | ||
627 | sndbuf->head[0].iov_len = bufsiz; | ||
628 | sndbuf->tail[0].iov_len = 0; | ||
629 | sndbuf->page_len = 0; | ||
630 | sndbuf->len = 0; | ||
631 | sndbuf->buflen = bufsiz; | ||
632 | rcvbuf->head[0].iov_base = (void *)((char *)task->tk_buffer + bufsiz); | ||
633 | rcvbuf->head[0].iov_len = bufsiz; | ||
634 | rcvbuf->tail[0].iov_len = 0; | ||
635 | rcvbuf->page_len = 0; | ||
636 | rcvbuf->len = 0; | ||
637 | rcvbuf->buflen = bufsiz; | ||
638 | |||
639 | /* Encode header and provided arguments */ | ||
640 | encode = task->tk_msg.rpc_proc->p_encode; | ||
641 | if (!(p = call_header(task))) { | ||
642 | printk(KERN_INFO "RPC: call_header failed, exit EIO\n"); | ||
643 | rpc_exit(task, -EIO); | ||
644 | return; | ||
645 | } | ||
646 | if (encode && (status = rpcauth_wrap_req(task, encode, req, p, | ||
647 | task->tk_msg.rpc_argp)) < 0) { | ||
648 | printk(KERN_WARNING "%s: can't encode arguments: %d\n", | ||
649 | clnt->cl_protname, -status); | ||
650 | rpc_exit(task, status); | ||
651 | } | ||
652 | } | ||
653 | |||
654 | /* | ||
655 | * 4. Get the server port number if not yet set | ||
656 | */ | ||
657 | static void | ||
658 | call_bind(struct rpc_task *task) | ||
659 | { | ||
660 | struct rpc_clnt *clnt = task->tk_client; | ||
661 | struct rpc_xprt *xprt = clnt->cl_xprt; | ||
662 | |||
663 | dprintk("RPC: %4d call_bind xprt %p %s connected\n", task->tk_pid, | ||
664 | xprt, (xprt_connected(xprt) ? "is" : "is not")); | ||
665 | |||
666 | task->tk_action = (xprt_connected(xprt)) ? call_transmit : call_connect; | ||
667 | |||
668 | if (!clnt->cl_port) { | ||
669 | task->tk_action = call_connect; | ||
670 | task->tk_timeout = RPC_CONNECT_TIMEOUT; | ||
671 | rpc_getport(task, clnt); | ||
672 | } | ||
673 | } | ||
674 | |||
675 | /* | ||
676 | * 4a. Connect to the RPC server (TCP case) | ||
677 | */ | ||
678 | static void | ||
679 | call_connect(struct rpc_task *task) | ||
680 | { | ||
681 | struct rpc_clnt *clnt = task->tk_client; | ||
682 | |||
683 | dprintk("RPC: %4d call_connect status %d\n", | ||
684 | task->tk_pid, task->tk_status); | ||
685 | |||
686 | if (xprt_connected(clnt->cl_xprt)) { | ||
687 | task->tk_action = call_transmit; | ||
688 | return; | ||
689 | } | ||
690 | task->tk_action = call_connect_status; | ||
691 | if (task->tk_status < 0) | ||
692 | return; | ||
693 | xprt_connect(task); | ||
694 | } | ||
695 | |||
696 | /* | ||
697 | * 4b. Sort out connect result | ||
698 | */ | ||
699 | static void | ||
700 | call_connect_status(struct rpc_task *task) | ||
701 | { | ||
702 | struct rpc_clnt *clnt = task->tk_client; | ||
703 | int status = task->tk_status; | ||
704 | |||
705 | task->tk_status = 0; | ||
706 | if (status >= 0) { | ||
707 | clnt->cl_stats->netreconn++; | ||
708 | task->tk_action = call_transmit; | ||
709 | return; | ||
710 | } | ||
711 | |||
712 | /* Something failed: we may have to rebind */ | ||
713 | if (clnt->cl_autobind) | ||
714 | clnt->cl_port = 0; | ||
715 | switch (status) { | ||
716 | case -ENOTCONN: | ||
717 | case -ETIMEDOUT: | ||
718 | case -EAGAIN: | ||
719 | task->tk_action = (clnt->cl_port == 0) ? call_bind : call_connect; | ||
720 | break; | ||
721 | default: | ||
722 | rpc_exit(task, -EIO); | ||
723 | } | ||
724 | } | ||
725 | |||
726 | /* | ||
727 | * 5. Transmit the RPC request, and wait for reply | ||
728 | */ | ||
729 | static void | ||
730 | call_transmit(struct rpc_task *task) | ||
731 | { | ||
732 | dprintk("RPC: %4d call_transmit (status %d)\n", | ||
733 | task->tk_pid, task->tk_status); | ||
734 | |||
735 | task->tk_action = call_status; | ||
736 | if (task->tk_status < 0) | ||
737 | return; | ||
738 | task->tk_status = xprt_prepare_transmit(task); | ||
739 | if (task->tk_status != 0) | ||
740 | return; | ||
741 | /* Encode here so that rpcsec_gss can use correct sequence number. */ | ||
742 | if (!task->tk_rqstp->rq_bytes_sent) | ||
743 | call_encode(task); | ||
744 | if (task->tk_status < 0) | ||
745 | return; | ||
746 | xprt_transmit(task); | ||
747 | if (task->tk_status < 0) | ||
748 | return; | ||
749 | if (!task->tk_msg.rpc_proc->p_decode) { | ||
750 | task->tk_action = NULL; | ||
751 | rpc_wake_up_task(task); | ||
752 | } | ||
753 | } | ||
754 | |||
755 | /* | ||
756 | * 6. Sort out the RPC call status | ||
757 | */ | ||
758 | static void | ||
759 | call_status(struct rpc_task *task) | ||
760 | { | ||
761 | struct rpc_clnt *clnt = task->tk_client; | ||
762 | struct rpc_rqst *req = task->tk_rqstp; | ||
763 | int status; | ||
764 | |||
765 | if (req->rq_received > 0 && !req->rq_bytes_sent) | ||
766 | task->tk_status = req->rq_received; | ||
767 | |||
768 | dprintk("RPC: %4d call_status (status %d)\n", | ||
769 | task->tk_pid, task->tk_status); | ||
770 | |||
771 | status = task->tk_status; | ||
772 | if (status >= 0) { | ||
773 | task->tk_action = call_decode; | ||
774 | return; | ||
775 | } | ||
776 | |||
777 | task->tk_status = 0; | ||
778 | switch(status) { | ||
779 | case -ETIMEDOUT: | ||
780 | task->tk_action = call_timeout; | ||
781 | break; | ||
782 | case -ECONNREFUSED: | ||
783 | case -ENOTCONN: | ||
784 | req->rq_bytes_sent = 0; | ||
785 | if (clnt->cl_autobind) | ||
786 | clnt->cl_port = 0; | ||
787 | task->tk_action = call_bind; | ||
788 | break; | ||
789 | case -EAGAIN: | ||
790 | task->tk_action = call_transmit; | ||
791 | break; | ||
792 | case -EIO: | ||
793 | /* shutdown or soft timeout */ | ||
794 | rpc_exit(task, status); | ||
795 | break; | ||
796 | default: | ||
797 | if (clnt->cl_chatty) | ||
798 | printk("%s: RPC call returned error %d\n", | ||
799 | clnt->cl_protname, -status); | ||
800 | rpc_exit(task, status); | ||
801 | break; | ||
802 | } | ||
803 | } | ||
804 | |||
805 | /* | ||
806 | * 6a. Handle RPC timeout | ||
807 | * We do not release the request slot, so we keep using the | ||
808 | * same XID for all retransmits. | ||
809 | */ | ||
810 | static void | ||
811 | call_timeout(struct rpc_task *task) | ||
812 | { | ||
813 | struct rpc_clnt *clnt = task->tk_client; | ||
814 | |||
815 | if (xprt_adjust_timeout(task->tk_rqstp) == 0) { | ||
816 | dprintk("RPC: %4d call_timeout (minor)\n", task->tk_pid); | ||
817 | goto retry; | ||
818 | } | ||
819 | |||
820 | dprintk("RPC: %4d call_timeout (major)\n", task->tk_pid); | ||
821 | if (RPC_IS_SOFT(task)) { | ||
822 | if (clnt->cl_chatty) | ||
823 | printk(KERN_NOTICE "%s: server %s not responding, timed out\n", | ||
824 | clnt->cl_protname, clnt->cl_server); | ||
825 | rpc_exit(task, -EIO); | ||
826 | return; | ||
827 | } | ||
828 | |||
829 | if (clnt->cl_chatty && !(task->tk_flags & RPC_CALL_MAJORSEEN)) { | ||
830 | task->tk_flags |= RPC_CALL_MAJORSEEN; | ||
831 | printk(KERN_NOTICE "%s: server %s not responding, still trying\n", | ||
832 | clnt->cl_protname, clnt->cl_server); | ||
833 | } | ||
834 | if (clnt->cl_autobind) | ||
835 | clnt->cl_port = 0; | ||
836 | |||
837 | retry: | ||
838 | clnt->cl_stats->rpcretrans++; | ||
839 | task->tk_action = call_bind; | ||
840 | task->tk_status = 0; | ||
841 | } | ||
842 | |||
843 | /* | ||
844 | * 7. Decode the RPC reply | ||
845 | */ | ||
846 | static void | ||
847 | call_decode(struct rpc_task *task) | ||
848 | { | ||
849 | struct rpc_clnt *clnt = task->tk_client; | ||
850 | struct rpc_rqst *req = task->tk_rqstp; | ||
851 | kxdrproc_t decode = task->tk_msg.rpc_proc->p_decode; | ||
852 | u32 *p; | ||
853 | |||
854 | dprintk("RPC: %4d call_decode (status %d)\n", | ||
855 | task->tk_pid, task->tk_status); | ||
856 | |||
857 | if (clnt->cl_chatty && (task->tk_flags & RPC_CALL_MAJORSEEN)) { | ||
858 | printk(KERN_NOTICE "%s: server %s OK\n", | ||
859 | clnt->cl_protname, clnt->cl_server); | ||
860 | task->tk_flags &= ~RPC_CALL_MAJORSEEN; | ||
861 | } | ||
862 | |||
863 | if (task->tk_status < 12) { | ||
864 | if (!RPC_IS_SOFT(task)) { | ||
865 | task->tk_action = call_bind; | ||
866 | clnt->cl_stats->rpcretrans++; | ||
867 | goto out_retry; | ||
868 | } | ||
869 | printk(KERN_WARNING "%s: too small RPC reply size (%d bytes)\n", | ||
870 | clnt->cl_protname, task->tk_status); | ||
871 | rpc_exit(task, -EIO); | ||
872 | return; | ||
873 | } | ||
874 | |||
875 | req->rq_rcv_buf.len = req->rq_private_buf.len; | ||
876 | |||
877 | /* Check that the softirq receive buffer is valid */ | ||
878 | WARN_ON(memcmp(&req->rq_rcv_buf, &req->rq_private_buf, | ||
879 | sizeof(req->rq_rcv_buf)) != 0); | ||
880 | |||
881 | /* Verify the RPC header */ | ||
882 | if (!(p = call_verify(task))) { | ||
883 | if (task->tk_action == NULL) | ||
884 | return; | ||
885 | goto out_retry; | ||
886 | } | ||
887 | |||
888 | task->tk_action = NULL; | ||
889 | |||
890 | if (decode) | ||
891 | task->tk_status = rpcauth_unwrap_resp(task, decode, req, p, | ||
892 | task->tk_msg.rpc_resp); | ||
893 | dprintk("RPC: %4d call_decode result %d\n", task->tk_pid, | ||
894 | task->tk_status); | ||
895 | return; | ||
896 | out_retry: | ||
897 | req->rq_received = req->rq_private_buf.len = 0; | ||
898 | task->tk_status = 0; | ||
899 | } | ||
900 | |||
901 | /* | ||
902 | * 8. Refresh the credentials if rejected by the server | ||
903 | */ | ||
904 | static void | ||
905 | call_refresh(struct rpc_task *task) | ||
906 | { | ||
907 | dprintk("RPC: %4d call_refresh\n", task->tk_pid); | ||
908 | |||
909 | xprt_release(task); /* Must do to obtain new XID */ | ||
910 | task->tk_action = call_refreshresult; | ||
911 | task->tk_status = 0; | ||
912 | task->tk_client->cl_stats->rpcauthrefresh++; | ||
913 | rpcauth_refreshcred(task); | ||
914 | } | ||
915 | |||
916 | /* | ||
917 | * 8a. Process the results of a credential refresh | ||
918 | */ | ||
919 | static void | ||
920 | call_refreshresult(struct rpc_task *task) | ||
921 | { | ||
922 | int status = task->tk_status; | ||
923 | dprintk("RPC: %4d call_refreshresult (status %d)\n", | ||
924 | task->tk_pid, task->tk_status); | ||
925 | |||
926 | task->tk_status = 0; | ||
927 | task->tk_action = call_reserve; | ||
928 | if (status >= 0 && rpcauth_uptodatecred(task)) | ||
929 | return; | ||
930 | if (status == -EACCES) { | ||
931 | rpc_exit(task, -EACCES); | ||
932 | return; | ||
933 | } | ||
934 | task->tk_action = call_refresh; | ||
935 | if (status != -ETIMEDOUT) | ||
936 | rpc_delay(task, 3*HZ); | ||
937 | return; | ||
938 | } | ||
939 | |||
940 | /* | ||
941 | * Call header serialization | ||
942 | */ | ||
943 | static u32 * | ||
944 | call_header(struct rpc_task *task) | ||
945 | { | ||
946 | struct rpc_clnt *clnt = task->tk_client; | ||
947 | struct rpc_xprt *xprt = clnt->cl_xprt; | ||
948 | struct rpc_rqst *req = task->tk_rqstp; | ||
949 | u32 *p = req->rq_svec[0].iov_base; | ||
950 | |||
951 | /* FIXME: check buffer size? */ | ||
952 | if (xprt->stream) | ||
953 | *p++ = 0; /* fill in later */ | ||
954 | *p++ = req->rq_xid; /* XID */ | ||
955 | *p++ = htonl(RPC_CALL); /* CALL */ | ||
956 | *p++ = htonl(RPC_VERSION); /* RPC version */ | ||
957 | *p++ = htonl(clnt->cl_prog); /* program number */ | ||
958 | *p++ = htonl(clnt->cl_vers); /* program version */ | ||
959 | *p++ = htonl(task->tk_msg.rpc_proc->p_proc); /* procedure */ | ||
960 | return rpcauth_marshcred(task, p); | ||
961 | } | ||
962 | |||
963 | /* | ||
964 | * Reply header verification | ||
965 | */ | ||
966 | static u32 * | ||
967 | call_verify(struct rpc_task *task) | ||
968 | { | ||
969 | struct kvec *iov = &task->tk_rqstp->rq_rcv_buf.head[0]; | ||
970 | int len = task->tk_rqstp->rq_rcv_buf.len >> 2; | ||
971 | u32 *p = iov->iov_base, n; | ||
972 | int error = -EACCES; | ||
973 | |||
974 | if ((len -= 3) < 0) | ||
975 | goto out_overflow; | ||
976 | p += 1; /* skip XID */ | ||
977 | |||
978 | if ((n = ntohl(*p++)) != RPC_REPLY) { | ||
979 | printk(KERN_WARNING "call_verify: not an RPC reply: %x\n", n); | ||
980 | goto out_retry; | ||
981 | } | ||
982 | if ((n = ntohl(*p++)) != RPC_MSG_ACCEPTED) { | ||
983 | if (--len < 0) | ||
984 | goto out_overflow; | ||
985 | switch ((n = ntohl(*p++))) { | ||
986 | case RPC_AUTH_ERROR: | ||
987 | break; | ||
988 | case RPC_MISMATCH: | ||
989 | printk(KERN_WARNING "%s: RPC call version mismatch!\n", __FUNCTION__); | ||
990 | goto out_eio; | ||
991 | default: | ||
992 | printk(KERN_WARNING "%s: RPC call rejected, unknown error: %x\n", __FUNCTION__, n); | ||
993 | goto out_eio; | ||
994 | } | ||
995 | if (--len < 0) | ||
996 | goto out_overflow; | ||
997 | switch ((n = ntohl(*p++))) { | ||
998 | case RPC_AUTH_REJECTEDCRED: | ||
999 | case RPC_AUTH_REJECTEDVERF: | ||
1000 | case RPCSEC_GSS_CREDPROBLEM: | ||
1001 | case RPCSEC_GSS_CTXPROBLEM: | ||
1002 | if (!task->tk_cred_retry) | ||
1003 | break; | ||
1004 | task->tk_cred_retry--; | ||
1005 | dprintk("RPC: %4d call_verify: retry stale creds\n", | ||
1006 | task->tk_pid); | ||
1007 | rpcauth_invalcred(task); | ||
1008 | task->tk_action = call_refresh; | ||
1009 | return NULL; | ||
1010 | case RPC_AUTH_BADCRED: | ||
1011 | case RPC_AUTH_BADVERF: | ||
1012 | /* possibly garbled cred/verf? */ | ||
1013 | if (!task->tk_garb_retry) | ||
1014 | break; | ||
1015 | task->tk_garb_retry--; | ||
1016 | dprintk("RPC: %4d call_verify: retry garbled creds\n", | ||
1017 | task->tk_pid); | ||
1018 | task->tk_action = call_bind; | ||
1019 | return NULL; | ||
1020 | case RPC_AUTH_TOOWEAK: | ||
1021 | printk(KERN_NOTICE "call_verify: server requires stronger " | ||
1022 | "authentication.\n"); | ||
1023 | break; | ||
1024 | default: | ||
1025 | printk(KERN_WARNING "call_verify: unknown auth error: %x\n", n); | ||
1026 | error = -EIO; | ||
1027 | } | ||
1028 | dprintk("RPC: %4d call_verify: call rejected %d\n", | ||
1029 | task->tk_pid, n); | ||
1030 | goto out_err; | ||
1031 | } | ||
1032 | if (!(p = rpcauth_checkverf(task, p))) { | ||
1033 | printk(KERN_WARNING "call_verify: auth check failed\n"); | ||
1034 | goto out_retry; /* bad verifier, retry */ | ||
1035 | } | ||
1036 | len = p - (u32 *)iov->iov_base - 1; | ||
1037 | if (len < 0) | ||
1038 | goto out_overflow; | ||
1039 | switch ((n = ntohl(*p++))) { | ||
1040 | case RPC_SUCCESS: | ||
1041 | return p; | ||
1042 | case RPC_PROG_UNAVAIL: | ||
1043 | printk(KERN_WARNING "RPC: call_verify: program %u is unsupported by server %s\n", | ||
1044 | (unsigned int)task->tk_client->cl_prog, | ||
1045 | task->tk_client->cl_server); | ||
1046 | goto out_eio; | ||
1047 | case RPC_PROG_MISMATCH: | ||
1048 | printk(KERN_WARNING "RPC: call_verify: program %u, version %u unsupported by server %s\n", | ||
1049 | (unsigned int)task->tk_client->cl_prog, | ||
1050 | (unsigned int)task->tk_client->cl_vers, | ||
1051 | task->tk_client->cl_server); | ||
1052 | goto out_eio; | ||
1053 | case RPC_PROC_UNAVAIL: | ||
1054 | printk(KERN_WARNING "RPC: call_verify: proc %p unsupported by program %u, version %u on server %s\n", | ||
1055 | task->tk_msg.rpc_proc, | ||
1056 | task->tk_client->cl_prog, | ||
1057 | task->tk_client->cl_vers, | ||
1058 | task->tk_client->cl_server); | ||
1059 | goto out_eio; | ||
1060 | case RPC_GARBAGE_ARGS: | ||
1061 | dprintk("RPC: %4d %s: server saw garbage\n", task->tk_pid, __FUNCTION__); | ||
1062 | break; /* retry */ | ||
1063 | default: | ||
1064 | printk(KERN_WARNING "call_verify: server accept status: %x\n", n); | ||
1065 | /* Also retry */ | ||
1066 | } | ||
1067 | |||
1068 | out_retry: | ||
1069 | task->tk_client->cl_stats->rpcgarbage++; | ||
1070 | if (task->tk_garb_retry) { | ||
1071 | task->tk_garb_retry--; | ||
1072 | dprintk(KERN_WARNING "RPC %s: retrying %4d\n", __FUNCTION__, task->tk_pid); | ||
1073 | task->tk_action = call_bind; | ||
1074 | return NULL; | ||
1075 | } | ||
1076 | printk(KERN_WARNING "RPC %s: retry failed, exit EIO\n", __FUNCTION__); | ||
1077 | out_eio: | ||
1078 | error = -EIO; | ||
1079 | out_err: | ||
1080 | rpc_exit(task, error); | ||
1081 | return NULL; | ||
1082 | out_overflow: | ||
1083 | printk(KERN_WARNING "RPC %s: server reply was truncated.\n", __FUNCTION__); | ||
1084 | goto out_retry; | ||
1085 | } | ||
diff --git a/net/sunrpc/pmap_clnt.c b/net/sunrpc/pmap_clnt.c new file mode 100644 index 000000000000..d0b1d2c34a4d --- /dev/null +++ b/net/sunrpc/pmap_clnt.c | |||
@@ -0,0 +1,298 @@ | |||
1 | /* | ||
2 | * linux/net/sunrpc/pmap.c | ||
3 | * | ||
4 | * Portmapper client. | ||
5 | * | ||
6 | * Copyright (C) 1996, Olaf Kirch <okir@monad.swb.de> | ||
7 | */ | ||
8 | |||
9 | #include <linux/config.h> | ||
10 | #include <linux/types.h> | ||
11 | #include <linux/socket.h> | ||
12 | #include <linux/kernel.h> | ||
13 | #include <linux/errno.h> | ||
14 | #include <linux/uio.h> | ||
15 | #include <linux/in.h> | ||
16 | #include <linux/sunrpc/clnt.h> | ||
17 | #include <linux/sunrpc/xprt.h> | ||
18 | #include <linux/sunrpc/sched.h> | ||
19 | |||
20 | #ifdef RPC_DEBUG | ||
21 | # define RPCDBG_FACILITY RPCDBG_PMAP | ||
22 | #endif | ||
23 | |||
24 | #define PMAP_SET 1 | ||
25 | #define PMAP_UNSET 2 | ||
26 | #define PMAP_GETPORT 3 | ||
27 | |||
28 | static struct rpc_procinfo pmap_procedures[]; | ||
29 | static struct rpc_clnt * pmap_create(char *, struct sockaddr_in *, int); | ||
30 | static void pmap_getport_done(struct rpc_task *); | ||
31 | static struct rpc_program pmap_program; | ||
32 | static DEFINE_SPINLOCK(pmap_lock); | ||
33 | |||
34 | /* | ||
35 | * Obtain the port for a given RPC service on a given host. This one can | ||
36 | * be called for an ongoing RPC request. | ||
37 | */ | ||
38 | void | ||
39 | rpc_getport(struct rpc_task *task, struct rpc_clnt *clnt) | ||
40 | { | ||
41 | struct rpc_portmap *map = clnt->cl_pmap; | ||
42 | struct sockaddr_in *sap = &clnt->cl_xprt->addr; | ||
43 | struct rpc_message msg = { | ||
44 | .rpc_proc = &pmap_procedures[PMAP_GETPORT], | ||
45 | .rpc_argp = map, | ||
46 | .rpc_resp = &clnt->cl_port, | ||
47 | .rpc_cred = NULL | ||
48 | }; | ||
49 | struct rpc_clnt *pmap_clnt; | ||
50 | struct rpc_task *child; | ||
51 | |||
52 | dprintk("RPC: %4d rpc_getport(%s, %d, %d, %d)\n", | ||
53 | task->tk_pid, clnt->cl_server, | ||
54 | map->pm_prog, map->pm_vers, map->pm_prot); | ||
55 | |||
56 | spin_lock(&pmap_lock); | ||
57 | if (map->pm_binding) { | ||
58 | rpc_sleep_on(&map->pm_bindwait, task, NULL, NULL); | ||
59 | spin_unlock(&pmap_lock); | ||
60 | return; | ||
61 | } | ||
62 | map->pm_binding = 1; | ||
63 | spin_unlock(&pmap_lock); | ||
64 | |||
65 | pmap_clnt = pmap_create(clnt->cl_server, sap, map->pm_prot); | ||
66 | if (IS_ERR(pmap_clnt)) { | ||
67 | task->tk_status = PTR_ERR(pmap_clnt); | ||
68 | goto bailout; | ||
69 | } | ||
70 | task->tk_status = 0; | ||
71 | |||
72 | /* | ||
73 | * Note: rpc_new_child will release client after a failure. | ||
74 | */ | ||
75 | if (!(child = rpc_new_child(pmap_clnt, task))) | ||
76 | goto bailout; | ||
77 | |||
78 | /* Setup the call info struct */ | ||
79 | rpc_call_setup(child, &msg, 0); | ||
80 | |||
81 | /* ... and run the child task */ | ||
82 | rpc_run_child(task, child, pmap_getport_done); | ||
83 | return; | ||
84 | |||
85 | bailout: | ||
86 | spin_lock(&pmap_lock); | ||
87 | map->pm_binding = 0; | ||
88 | rpc_wake_up(&map->pm_bindwait); | ||
89 | spin_unlock(&pmap_lock); | ||
90 | task->tk_status = -EIO; | ||
91 | task->tk_action = NULL; | ||
92 | } | ||
93 | |||
94 | #ifdef CONFIG_ROOT_NFS | ||
95 | int | ||
96 | rpc_getport_external(struct sockaddr_in *sin, __u32 prog, __u32 vers, int prot) | ||
97 | { | ||
98 | struct rpc_portmap map = { | ||
99 | .pm_prog = prog, | ||
100 | .pm_vers = vers, | ||
101 | .pm_prot = prot, | ||
102 | .pm_port = 0 | ||
103 | }; | ||
104 | struct rpc_clnt *pmap_clnt; | ||
105 | char hostname[32]; | ||
106 | int status; | ||
107 | |||
108 | dprintk("RPC: rpc_getport_external(%u.%u.%u.%u, %d, %d, %d)\n", | ||
109 | NIPQUAD(sin->sin_addr.s_addr), prog, vers, prot); | ||
110 | |||
111 | sprintf(hostname, "%u.%u.%u.%u", NIPQUAD(sin->sin_addr.s_addr)); | ||
112 | pmap_clnt = pmap_create(hostname, sin, prot); | ||
113 | if (IS_ERR(pmap_clnt)) | ||
114 | return PTR_ERR(pmap_clnt); | ||
115 | |||
116 | /* Setup the call info struct */ | ||
117 | status = rpc_call(pmap_clnt, PMAP_GETPORT, &map, &map.pm_port, 0); | ||
118 | |||
119 | if (status >= 0) { | ||
120 | if (map.pm_port != 0) | ||
121 | return map.pm_port; | ||
122 | status = -EACCES; | ||
123 | } | ||
124 | return status; | ||
125 | } | ||
126 | #endif | ||
127 | |||
128 | static void | ||
129 | pmap_getport_done(struct rpc_task *task) | ||
130 | { | ||
131 | struct rpc_clnt *clnt = task->tk_client; | ||
132 | struct rpc_portmap *map = clnt->cl_pmap; | ||
133 | |||
134 | dprintk("RPC: %4d pmap_getport_done(status %d, port %d)\n", | ||
135 | task->tk_pid, task->tk_status, clnt->cl_port); | ||
136 | if (task->tk_status < 0) { | ||
137 | /* Make the calling task exit with an error */ | ||
138 | task->tk_action = NULL; | ||
139 | } else if (clnt->cl_port == 0) { | ||
140 | /* Program not registered */ | ||
141 | task->tk_status = -EACCES; | ||
142 | task->tk_action = NULL; | ||
143 | } else { | ||
144 | /* byte-swap port number first */ | ||
145 | clnt->cl_port = htons(clnt->cl_port); | ||
146 | clnt->cl_xprt->addr.sin_port = clnt->cl_port; | ||
147 | } | ||
148 | spin_lock(&pmap_lock); | ||
149 | map->pm_binding = 0; | ||
150 | rpc_wake_up(&map->pm_bindwait); | ||
151 | spin_unlock(&pmap_lock); | ||
152 | } | ||
153 | |||
154 | /* | ||
155 | * Set or unset a port registration with the local portmapper. | ||
156 | * port == 0 means unregister, port != 0 means register. | ||
157 | */ | ||
158 | int | ||
159 | rpc_register(u32 prog, u32 vers, int prot, unsigned short port, int *okay) | ||
160 | { | ||
161 | struct sockaddr_in sin; | ||
162 | struct rpc_portmap map; | ||
163 | struct rpc_clnt *pmap_clnt; | ||
164 | int error = 0; | ||
165 | |||
166 | dprintk("RPC: registering (%d, %d, %d, %d) with portmapper.\n", | ||
167 | prog, vers, prot, port); | ||
168 | |||
169 | sin.sin_family = AF_INET; | ||
170 | sin.sin_addr.s_addr = htonl(INADDR_LOOPBACK); | ||
171 | pmap_clnt = pmap_create("localhost", &sin, IPPROTO_UDP); | ||
172 | if (IS_ERR(pmap_clnt)) { | ||
173 | error = PTR_ERR(pmap_clnt); | ||
174 | dprintk("RPC: couldn't create pmap client. Error = %d\n", error); | ||
175 | return error; | ||
176 | } | ||
177 | |||
178 | map.pm_prog = prog; | ||
179 | map.pm_vers = vers; | ||
180 | map.pm_prot = prot; | ||
181 | map.pm_port = port; | ||
182 | |||
183 | error = rpc_call(pmap_clnt, port? PMAP_SET : PMAP_UNSET, | ||
184 | &map, okay, 0); | ||
185 | |||
186 | if (error < 0) { | ||
187 | printk(KERN_WARNING | ||
188 | "RPC: failed to contact portmap (errno %d).\n", | ||
189 | error); | ||
190 | } | ||
191 | dprintk("RPC: registration status %d/%d\n", error, *okay); | ||
192 | |||
193 | /* Client deleted automatically because cl_oneshot == 1 */ | ||
194 | return error; | ||
195 | } | ||
196 | |||
197 | static struct rpc_clnt * | ||
198 | pmap_create(char *hostname, struct sockaddr_in *srvaddr, int proto) | ||
199 | { | ||
200 | struct rpc_xprt *xprt; | ||
201 | struct rpc_clnt *clnt; | ||
202 | |||
203 | /* printk("pmap: create xprt\n"); */ | ||
204 | xprt = xprt_create_proto(proto, srvaddr, NULL); | ||
205 | if (IS_ERR(xprt)) | ||
206 | return (struct rpc_clnt *)xprt; | ||
207 | xprt->addr.sin_port = htons(RPC_PMAP_PORT); | ||
208 | |||
209 | /* printk("pmap: create clnt\n"); */ | ||
210 | clnt = rpc_create_client(xprt, hostname, | ||
211 | &pmap_program, RPC_PMAP_VERSION, | ||
212 | RPC_AUTH_UNIX); | ||
213 | if (IS_ERR(clnt)) { | ||
214 | xprt_destroy(xprt); | ||
215 | } else { | ||
216 | clnt->cl_softrtry = 1; | ||
217 | clnt->cl_chatty = 1; | ||
218 | clnt->cl_oneshot = 1; | ||
219 | } | ||
220 | return clnt; | ||
221 | } | ||
222 | |||
223 | /* | ||
224 | * XDR encode/decode functions for PMAP | ||
225 | */ | ||
226 | static int | ||
227 | xdr_encode_mapping(struct rpc_rqst *req, u32 *p, struct rpc_portmap *map) | ||
228 | { | ||
229 | dprintk("RPC: xdr_encode_mapping(%d, %d, %d, %d)\n", | ||
230 | map->pm_prog, map->pm_vers, map->pm_prot, map->pm_port); | ||
231 | *p++ = htonl(map->pm_prog); | ||
232 | *p++ = htonl(map->pm_vers); | ||
233 | *p++ = htonl(map->pm_prot); | ||
234 | *p++ = htonl(map->pm_port); | ||
235 | |||
236 | req->rq_slen = xdr_adjust_iovec(req->rq_svec, p); | ||
237 | return 0; | ||
238 | } | ||
239 | |||
240 | static int | ||
241 | xdr_decode_port(struct rpc_rqst *req, u32 *p, unsigned short *portp) | ||
242 | { | ||
243 | *portp = (unsigned short) ntohl(*p++); | ||
244 | return 0; | ||
245 | } | ||
246 | |||
247 | static int | ||
248 | xdr_decode_bool(struct rpc_rqst *req, u32 *p, unsigned int *boolp) | ||
249 | { | ||
250 | *boolp = (unsigned int) ntohl(*p++); | ||
251 | return 0; | ||
252 | } | ||
253 | |||
254 | static struct rpc_procinfo pmap_procedures[] = { | ||
255 | [PMAP_SET] = { | ||
256 | .p_proc = PMAP_SET, | ||
257 | .p_encode = (kxdrproc_t) xdr_encode_mapping, | ||
258 | .p_decode = (kxdrproc_t) xdr_decode_bool, | ||
259 | .p_bufsiz = 4, | ||
260 | .p_count = 1, | ||
261 | }, | ||
262 | [PMAP_UNSET] = { | ||
263 | .p_proc = PMAP_UNSET, | ||
264 | .p_encode = (kxdrproc_t) xdr_encode_mapping, | ||
265 | .p_decode = (kxdrproc_t) xdr_decode_bool, | ||
266 | .p_bufsiz = 4, | ||
267 | .p_count = 1, | ||
268 | }, | ||
269 | [PMAP_GETPORT] = { | ||
270 | .p_proc = PMAP_GETPORT, | ||
271 | .p_encode = (kxdrproc_t) xdr_encode_mapping, | ||
272 | .p_decode = (kxdrproc_t) xdr_decode_port, | ||
273 | .p_bufsiz = 4, | ||
274 | .p_count = 1, | ||
275 | }, | ||
276 | }; | ||
277 | |||
278 | static struct rpc_version pmap_version2 = { | ||
279 | .number = 2, | ||
280 | .nrprocs = 4, | ||
281 | .procs = pmap_procedures | ||
282 | }; | ||
283 | |||
284 | static struct rpc_version * pmap_version[] = { | ||
285 | NULL, | ||
286 | NULL, | ||
287 | &pmap_version2 | ||
288 | }; | ||
289 | |||
290 | static struct rpc_stat pmap_stats; | ||
291 | |||
292 | static struct rpc_program pmap_program = { | ||
293 | .name = "portmap", | ||
294 | .number = RPC_PMAP_PROGRAM, | ||
295 | .nrvers = ARRAY_SIZE(pmap_version), | ||
296 | .version = pmap_version, | ||
297 | .stats = &pmap_stats, | ||
298 | }; | ||
diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c new file mode 100644 index 000000000000..554f224c0445 --- /dev/null +++ b/net/sunrpc/rpc_pipe.c | |||
@@ -0,0 +1,838 @@ | |||
1 | /* | ||
2 | * net/sunrpc/rpc_pipe.c | ||
3 | * | ||
4 | * Userland/kernel interface for rpcauth_gss. | ||
5 | * Code shamelessly plagiarized from fs/nfsd/nfsctl.c | ||
6 | * and fs/driverfs/inode.c | ||
7 | * | ||
8 | * Copyright (c) 2002, Trond Myklebust <trond.myklebust@fys.uio.no> | ||
9 | * | ||
10 | */ | ||
11 | #include <linux/config.h> | ||
12 | #include <linux/module.h> | ||
13 | #include <linux/slab.h> | ||
14 | #include <linux/string.h> | ||
15 | #include <linux/pagemap.h> | ||
16 | #include <linux/mount.h> | ||
17 | #include <linux/namei.h> | ||
18 | #include <linux/dnotify.h> | ||
19 | #include <linux/kernel.h> | ||
20 | |||
21 | #include <asm/ioctls.h> | ||
22 | #include <linux/fs.h> | ||
23 | #include <linux/poll.h> | ||
24 | #include <linux/wait.h> | ||
25 | #include <linux/seq_file.h> | ||
26 | |||
27 | #include <linux/sunrpc/clnt.h> | ||
28 | #include <linux/workqueue.h> | ||
29 | #include <linux/sunrpc/rpc_pipe_fs.h> | ||
30 | |||
31 | static struct vfsmount *rpc_mount; | ||
32 | static int rpc_mount_count; | ||
33 | |||
34 | static struct file_system_type rpc_pipe_fs_type; | ||
35 | |||
36 | |||
37 | static kmem_cache_t *rpc_inode_cachep; | ||
38 | |||
39 | #define RPC_UPCALL_TIMEOUT (30*HZ) | ||
40 | |||
41 | static void | ||
42 | __rpc_purge_upcall(struct inode *inode, int err) | ||
43 | { | ||
44 | struct rpc_inode *rpci = RPC_I(inode); | ||
45 | struct rpc_pipe_msg *msg; | ||
46 | |||
47 | while (!list_empty(&rpci->pipe)) { | ||
48 | msg = list_entry(rpci->pipe.next, struct rpc_pipe_msg, list); | ||
49 | list_del_init(&msg->list); | ||
50 | msg->errno = err; | ||
51 | rpci->ops->destroy_msg(msg); | ||
52 | } | ||
53 | while (!list_empty(&rpci->in_upcall)) { | ||
54 | msg = list_entry(rpci->pipe.next, struct rpc_pipe_msg, list); | ||
55 | list_del_init(&msg->list); | ||
56 | msg->errno = err; | ||
57 | rpci->ops->destroy_msg(msg); | ||
58 | } | ||
59 | rpci->pipelen = 0; | ||
60 | wake_up(&rpci->waitq); | ||
61 | } | ||
62 | |||
63 | static void | ||
64 | rpc_timeout_upcall_queue(void *data) | ||
65 | { | ||
66 | struct rpc_inode *rpci = (struct rpc_inode *)data; | ||
67 | struct inode *inode = &rpci->vfs_inode; | ||
68 | |||
69 | down(&inode->i_sem); | ||
70 | if (rpci->nreaders == 0 && !list_empty(&rpci->pipe)) | ||
71 | __rpc_purge_upcall(inode, -ETIMEDOUT); | ||
72 | up(&inode->i_sem); | ||
73 | } | ||
74 | |||
75 | int | ||
76 | rpc_queue_upcall(struct inode *inode, struct rpc_pipe_msg *msg) | ||
77 | { | ||
78 | struct rpc_inode *rpci = RPC_I(inode); | ||
79 | int res = 0; | ||
80 | |||
81 | down(&inode->i_sem); | ||
82 | if (rpci->nreaders) { | ||
83 | list_add_tail(&msg->list, &rpci->pipe); | ||
84 | rpci->pipelen += msg->len; | ||
85 | } else if (rpci->flags & RPC_PIPE_WAIT_FOR_OPEN) { | ||
86 | if (list_empty(&rpci->pipe)) | ||
87 | schedule_delayed_work(&rpci->queue_timeout, | ||
88 | RPC_UPCALL_TIMEOUT); | ||
89 | list_add_tail(&msg->list, &rpci->pipe); | ||
90 | rpci->pipelen += msg->len; | ||
91 | } else | ||
92 | res = -EPIPE; | ||
93 | up(&inode->i_sem); | ||
94 | wake_up(&rpci->waitq); | ||
95 | return res; | ||
96 | } | ||
97 | |||
98 | static void | ||
99 | rpc_close_pipes(struct inode *inode) | ||
100 | { | ||
101 | struct rpc_inode *rpci = RPC_I(inode); | ||
102 | |||
103 | cancel_delayed_work(&rpci->queue_timeout); | ||
104 | flush_scheduled_work(); | ||
105 | down(&inode->i_sem); | ||
106 | if (rpci->ops != NULL) { | ||
107 | rpci->nreaders = 0; | ||
108 | __rpc_purge_upcall(inode, -EPIPE); | ||
109 | rpci->nwriters = 0; | ||
110 | if (rpci->ops->release_pipe) | ||
111 | rpci->ops->release_pipe(inode); | ||
112 | rpci->ops = NULL; | ||
113 | } | ||
114 | up(&inode->i_sem); | ||
115 | } | ||
116 | |||
117 | static inline void | ||
118 | rpc_inode_setowner(struct inode *inode, void *private) | ||
119 | { | ||
120 | RPC_I(inode)->private = private; | ||
121 | } | ||
122 | |||
123 | static struct inode * | ||
124 | rpc_alloc_inode(struct super_block *sb) | ||
125 | { | ||
126 | struct rpc_inode *rpci; | ||
127 | rpci = (struct rpc_inode *)kmem_cache_alloc(rpc_inode_cachep, SLAB_KERNEL); | ||
128 | if (!rpci) | ||
129 | return NULL; | ||
130 | return &rpci->vfs_inode; | ||
131 | } | ||
132 | |||
133 | static void | ||
134 | rpc_destroy_inode(struct inode *inode) | ||
135 | { | ||
136 | kmem_cache_free(rpc_inode_cachep, RPC_I(inode)); | ||
137 | } | ||
138 | |||
139 | static int | ||
140 | rpc_pipe_open(struct inode *inode, struct file *filp) | ||
141 | { | ||
142 | struct rpc_inode *rpci = RPC_I(inode); | ||
143 | int res = -ENXIO; | ||
144 | |||
145 | down(&inode->i_sem); | ||
146 | if (rpci->ops != NULL) { | ||
147 | if (filp->f_mode & FMODE_READ) | ||
148 | rpci->nreaders ++; | ||
149 | if (filp->f_mode & FMODE_WRITE) | ||
150 | rpci->nwriters ++; | ||
151 | res = 0; | ||
152 | } | ||
153 | up(&inode->i_sem); | ||
154 | return res; | ||
155 | } | ||
156 | |||
157 | static int | ||
158 | rpc_pipe_release(struct inode *inode, struct file *filp) | ||
159 | { | ||
160 | struct rpc_inode *rpci = RPC_I(filp->f_dentry->d_inode); | ||
161 | struct rpc_pipe_msg *msg; | ||
162 | |||
163 | down(&inode->i_sem); | ||
164 | if (rpci->ops == NULL) | ||
165 | goto out; | ||
166 | msg = (struct rpc_pipe_msg *)filp->private_data; | ||
167 | if (msg != NULL) { | ||
168 | msg->errno = -EPIPE; | ||
169 | list_del_init(&msg->list); | ||
170 | rpci->ops->destroy_msg(msg); | ||
171 | } | ||
172 | if (filp->f_mode & FMODE_WRITE) | ||
173 | rpci->nwriters --; | ||
174 | if (filp->f_mode & FMODE_READ) | ||
175 | rpci->nreaders --; | ||
176 | if (!rpci->nreaders) | ||
177 | __rpc_purge_upcall(inode, -EPIPE); | ||
178 | if (rpci->ops->release_pipe) | ||
179 | rpci->ops->release_pipe(inode); | ||
180 | out: | ||
181 | up(&inode->i_sem); | ||
182 | return 0; | ||
183 | } | ||
184 | |||
185 | static ssize_t | ||
186 | rpc_pipe_read(struct file *filp, char __user *buf, size_t len, loff_t *offset) | ||
187 | { | ||
188 | struct inode *inode = filp->f_dentry->d_inode; | ||
189 | struct rpc_inode *rpci = RPC_I(inode); | ||
190 | struct rpc_pipe_msg *msg; | ||
191 | int res = 0; | ||
192 | |||
193 | down(&inode->i_sem); | ||
194 | if (rpci->ops == NULL) { | ||
195 | res = -EPIPE; | ||
196 | goto out_unlock; | ||
197 | } | ||
198 | msg = filp->private_data; | ||
199 | if (msg == NULL) { | ||
200 | if (!list_empty(&rpci->pipe)) { | ||
201 | msg = list_entry(rpci->pipe.next, | ||
202 | struct rpc_pipe_msg, | ||
203 | list); | ||
204 | list_move(&msg->list, &rpci->in_upcall); | ||
205 | rpci->pipelen -= msg->len; | ||
206 | filp->private_data = msg; | ||
207 | msg->copied = 0; | ||
208 | } | ||
209 | if (msg == NULL) | ||
210 | goto out_unlock; | ||
211 | } | ||
212 | /* NOTE: it is up to the callback to update msg->copied */ | ||
213 | res = rpci->ops->upcall(filp, msg, buf, len); | ||
214 | if (res < 0 || msg->len == msg->copied) { | ||
215 | filp->private_data = NULL; | ||
216 | list_del_init(&msg->list); | ||
217 | rpci->ops->destroy_msg(msg); | ||
218 | } | ||
219 | out_unlock: | ||
220 | up(&inode->i_sem); | ||
221 | return res; | ||
222 | } | ||
223 | |||
224 | static ssize_t | ||
225 | rpc_pipe_write(struct file *filp, const char __user *buf, size_t len, loff_t *offset) | ||
226 | { | ||
227 | struct inode *inode = filp->f_dentry->d_inode; | ||
228 | struct rpc_inode *rpci = RPC_I(inode); | ||
229 | int res; | ||
230 | |||
231 | down(&inode->i_sem); | ||
232 | res = -EPIPE; | ||
233 | if (rpci->ops != NULL) | ||
234 | res = rpci->ops->downcall(filp, buf, len); | ||
235 | up(&inode->i_sem); | ||
236 | return res; | ||
237 | } | ||
238 | |||
239 | static unsigned int | ||
240 | rpc_pipe_poll(struct file *filp, struct poll_table_struct *wait) | ||
241 | { | ||
242 | struct rpc_inode *rpci; | ||
243 | unsigned int mask = 0; | ||
244 | |||
245 | rpci = RPC_I(filp->f_dentry->d_inode); | ||
246 | poll_wait(filp, &rpci->waitq, wait); | ||
247 | |||
248 | mask = POLLOUT | POLLWRNORM; | ||
249 | if (rpci->ops == NULL) | ||
250 | mask |= POLLERR | POLLHUP; | ||
251 | if (!list_empty(&rpci->pipe)) | ||
252 | mask |= POLLIN | POLLRDNORM; | ||
253 | return mask; | ||
254 | } | ||
255 | |||
256 | static int | ||
257 | rpc_pipe_ioctl(struct inode *ino, struct file *filp, | ||
258 | unsigned int cmd, unsigned long arg) | ||
259 | { | ||
260 | struct rpc_inode *rpci = RPC_I(filp->f_dentry->d_inode); | ||
261 | int len; | ||
262 | |||
263 | switch (cmd) { | ||
264 | case FIONREAD: | ||
265 | if (rpci->ops == NULL) | ||
266 | return -EPIPE; | ||
267 | len = rpci->pipelen; | ||
268 | if (filp->private_data) { | ||
269 | struct rpc_pipe_msg *msg; | ||
270 | msg = (struct rpc_pipe_msg *)filp->private_data; | ||
271 | len += msg->len - msg->copied; | ||
272 | } | ||
273 | return put_user(len, (int __user *)arg); | ||
274 | default: | ||
275 | return -EINVAL; | ||
276 | } | ||
277 | } | ||
278 | |||
279 | static struct file_operations rpc_pipe_fops = { | ||
280 | .owner = THIS_MODULE, | ||
281 | .llseek = no_llseek, | ||
282 | .read = rpc_pipe_read, | ||
283 | .write = rpc_pipe_write, | ||
284 | .poll = rpc_pipe_poll, | ||
285 | .ioctl = rpc_pipe_ioctl, | ||
286 | .open = rpc_pipe_open, | ||
287 | .release = rpc_pipe_release, | ||
288 | }; | ||
289 | |||
290 | static int | ||
291 | rpc_show_info(struct seq_file *m, void *v) | ||
292 | { | ||
293 | struct rpc_clnt *clnt = m->private; | ||
294 | |||
295 | seq_printf(m, "RPC server: %s\n", clnt->cl_server); | ||
296 | seq_printf(m, "service: %s (%d) version %d\n", clnt->cl_protname, | ||
297 | clnt->cl_prog, clnt->cl_vers); | ||
298 | seq_printf(m, "address: %u.%u.%u.%u\n", | ||
299 | NIPQUAD(clnt->cl_xprt->addr.sin_addr.s_addr)); | ||
300 | seq_printf(m, "protocol: %s\n", | ||
301 | clnt->cl_xprt->prot == IPPROTO_UDP ? "udp" : "tcp"); | ||
302 | return 0; | ||
303 | } | ||
304 | |||
305 | static int | ||
306 | rpc_info_open(struct inode *inode, struct file *file) | ||
307 | { | ||
308 | struct rpc_clnt *clnt; | ||
309 | int ret = single_open(file, rpc_show_info, NULL); | ||
310 | |||
311 | if (!ret) { | ||
312 | struct seq_file *m = file->private_data; | ||
313 | down(&inode->i_sem); | ||
314 | clnt = RPC_I(inode)->private; | ||
315 | if (clnt) { | ||
316 | atomic_inc(&clnt->cl_users); | ||
317 | m->private = clnt; | ||
318 | } else { | ||
319 | single_release(inode, file); | ||
320 | ret = -EINVAL; | ||
321 | } | ||
322 | up(&inode->i_sem); | ||
323 | } | ||
324 | return ret; | ||
325 | } | ||
326 | |||
327 | static int | ||
328 | rpc_info_release(struct inode *inode, struct file *file) | ||
329 | { | ||
330 | struct seq_file *m = file->private_data; | ||
331 | struct rpc_clnt *clnt = (struct rpc_clnt *)m->private; | ||
332 | |||
333 | if (clnt) | ||
334 | rpc_release_client(clnt); | ||
335 | return single_release(inode, file); | ||
336 | } | ||
337 | |||
338 | static struct file_operations rpc_info_operations = { | ||
339 | .owner = THIS_MODULE, | ||
340 | .open = rpc_info_open, | ||
341 | .read = seq_read, | ||
342 | .llseek = seq_lseek, | ||
343 | .release = rpc_info_release, | ||
344 | }; | ||
345 | |||
346 | |||
347 | /* | ||
348 | * We have a single directory with 1 node in it. | ||
349 | */ | ||
350 | enum { | ||
351 | RPCAUTH_Root = 1, | ||
352 | RPCAUTH_lockd, | ||
353 | RPCAUTH_mount, | ||
354 | RPCAUTH_nfs, | ||
355 | RPCAUTH_portmap, | ||
356 | RPCAUTH_statd, | ||
357 | RPCAUTH_RootEOF | ||
358 | }; | ||
359 | |||
360 | /* | ||
361 | * Description of fs contents. | ||
362 | */ | ||
363 | struct rpc_filelist { | ||
364 | char *name; | ||
365 | struct file_operations *i_fop; | ||
366 | int mode; | ||
367 | }; | ||
368 | |||
369 | static struct rpc_filelist files[] = { | ||
370 | [RPCAUTH_lockd] = { | ||
371 | .name = "lockd", | ||
372 | .mode = S_IFDIR | S_IRUGO | S_IXUGO, | ||
373 | }, | ||
374 | [RPCAUTH_mount] = { | ||
375 | .name = "mount", | ||
376 | .mode = S_IFDIR | S_IRUGO | S_IXUGO, | ||
377 | }, | ||
378 | [RPCAUTH_nfs] = { | ||
379 | .name = "nfs", | ||
380 | .mode = S_IFDIR | S_IRUGO | S_IXUGO, | ||
381 | }, | ||
382 | [RPCAUTH_portmap] = { | ||
383 | .name = "portmap", | ||
384 | .mode = S_IFDIR | S_IRUGO | S_IXUGO, | ||
385 | }, | ||
386 | [RPCAUTH_statd] = { | ||
387 | .name = "statd", | ||
388 | .mode = S_IFDIR | S_IRUGO | S_IXUGO, | ||
389 | }, | ||
390 | }; | ||
391 | |||
392 | enum { | ||
393 | RPCAUTH_info = 2, | ||
394 | RPCAUTH_EOF | ||
395 | }; | ||
396 | |||
397 | static struct rpc_filelist authfiles[] = { | ||
398 | [RPCAUTH_info] = { | ||
399 | .name = "info", | ||
400 | .i_fop = &rpc_info_operations, | ||
401 | .mode = S_IFREG | S_IRUSR, | ||
402 | }, | ||
403 | }; | ||
404 | |||
405 | static int | ||
406 | rpc_get_mount(void) | ||
407 | { | ||
408 | return simple_pin_fs("rpc_pipefs", &rpc_mount, &rpc_mount_count); | ||
409 | } | ||
410 | |||
411 | static void | ||
412 | rpc_put_mount(void) | ||
413 | { | ||
414 | simple_release_fs(&rpc_mount, &rpc_mount_count); | ||
415 | } | ||
416 | |||
417 | static int | ||
418 | rpc_lookup_parent(char *path, struct nameidata *nd) | ||
419 | { | ||
420 | if (path[0] == '\0') | ||
421 | return -ENOENT; | ||
422 | if (rpc_get_mount()) { | ||
423 | printk(KERN_WARNING "%s: %s failed to mount " | ||
424 | "pseudofilesystem \n", __FILE__, __FUNCTION__); | ||
425 | return -ENODEV; | ||
426 | } | ||
427 | nd->mnt = mntget(rpc_mount); | ||
428 | nd->dentry = dget(rpc_mount->mnt_root); | ||
429 | nd->last_type = LAST_ROOT; | ||
430 | nd->flags = LOOKUP_PARENT; | ||
431 | nd->depth = 0; | ||
432 | |||
433 | if (path_walk(path, nd)) { | ||
434 | printk(KERN_WARNING "%s: %s failed to find path %s\n", | ||
435 | __FILE__, __FUNCTION__, path); | ||
436 | rpc_put_mount(); | ||
437 | return -ENOENT; | ||
438 | } | ||
439 | return 0; | ||
440 | } | ||
441 | |||
442 | static void | ||
443 | rpc_release_path(struct nameidata *nd) | ||
444 | { | ||
445 | path_release(nd); | ||
446 | rpc_put_mount(); | ||
447 | } | ||
448 | |||
449 | static struct inode * | ||
450 | rpc_get_inode(struct super_block *sb, int mode) | ||
451 | { | ||
452 | struct inode *inode = new_inode(sb); | ||
453 | if (!inode) | ||
454 | return NULL; | ||
455 | inode->i_mode = mode; | ||
456 | inode->i_uid = inode->i_gid = 0; | ||
457 | inode->i_blksize = PAGE_CACHE_SIZE; | ||
458 | inode->i_blocks = 0; | ||
459 | inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; | ||
460 | switch(mode & S_IFMT) { | ||
461 | case S_IFDIR: | ||
462 | inode->i_fop = &simple_dir_operations; | ||
463 | inode->i_op = &simple_dir_inode_operations; | ||
464 | inode->i_nlink++; | ||
465 | default: | ||
466 | break; | ||
467 | } | ||
468 | return inode; | ||
469 | } | ||
470 | |||
471 | /* | ||
472 | * FIXME: This probably has races. | ||
473 | */ | ||
474 | static void | ||
475 | rpc_depopulate(struct dentry *parent) | ||
476 | { | ||
477 | struct inode *dir = parent->d_inode; | ||
478 | struct list_head *pos, *next; | ||
479 | struct dentry *dentry, *dvec[10]; | ||
480 | int n = 0; | ||
481 | |||
482 | down(&dir->i_sem); | ||
483 | repeat: | ||
484 | spin_lock(&dcache_lock); | ||
485 | list_for_each_safe(pos, next, &parent->d_subdirs) { | ||
486 | dentry = list_entry(pos, struct dentry, d_child); | ||
487 | spin_lock(&dentry->d_lock); | ||
488 | if (!d_unhashed(dentry)) { | ||
489 | dget_locked(dentry); | ||
490 | __d_drop(dentry); | ||
491 | spin_unlock(&dentry->d_lock); | ||
492 | dvec[n++] = dentry; | ||
493 | if (n == ARRAY_SIZE(dvec)) | ||
494 | break; | ||
495 | } else | ||
496 | spin_unlock(&dentry->d_lock); | ||
497 | } | ||
498 | spin_unlock(&dcache_lock); | ||
499 | if (n) { | ||
500 | do { | ||
501 | dentry = dvec[--n]; | ||
502 | if (dentry->d_inode) { | ||
503 | rpc_close_pipes(dentry->d_inode); | ||
504 | rpc_inode_setowner(dentry->d_inode, NULL); | ||
505 | simple_unlink(dir, dentry); | ||
506 | } | ||
507 | dput(dentry); | ||
508 | } while (n); | ||
509 | goto repeat; | ||
510 | } | ||
511 | up(&dir->i_sem); | ||
512 | } | ||
513 | |||
514 | static int | ||
515 | rpc_populate(struct dentry *parent, | ||
516 | struct rpc_filelist *files, | ||
517 | int start, int eof) | ||
518 | { | ||
519 | struct inode *inode, *dir = parent->d_inode; | ||
520 | void *private = RPC_I(dir)->private; | ||
521 | struct dentry *dentry; | ||
522 | int mode, i; | ||
523 | |||
524 | down(&dir->i_sem); | ||
525 | for (i = start; i < eof; i++) { | ||
526 | dentry = d_alloc_name(parent, files[i].name); | ||
527 | if (!dentry) | ||
528 | goto out_bad; | ||
529 | mode = files[i].mode; | ||
530 | inode = rpc_get_inode(dir->i_sb, mode); | ||
531 | if (!inode) { | ||
532 | dput(dentry); | ||
533 | goto out_bad; | ||
534 | } | ||
535 | inode->i_ino = i; | ||
536 | if (files[i].i_fop) | ||
537 | inode->i_fop = files[i].i_fop; | ||
538 | if (private) | ||
539 | rpc_inode_setowner(inode, private); | ||
540 | if (S_ISDIR(mode)) | ||
541 | dir->i_nlink++; | ||
542 | d_add(dentry, inode); | ||
543 | } | ||
544 | up(&dir->i_sem); | ||
545 | return 0; | ||
546 | out_bad: | ||
547 | up(&dir->i_sem); | ||
548 | printk(KERN_WARNING "%s: %s failed to populate directory %s\n", | ||
549 | __FILE__, __FUNCTION__, parent->d_name.name); | ||
550 | return -ENOMEM; | ||
551 | } | ||
552 | |||
553 | static int | ||
554 | __rpc_mkdir(struct inode *dir, struct dentry *dentry) | ||
555 | { | ||
556 | struct inode *inode; | ||
557 | |||
558 | inode = rpc_get_inode(dir->i_sb, S_IFDIR | S_IRUSR | S_IXUSR); | ||
559 | if (!inode) | ||
560 | goto out_err; | ||
561 | inode->i_ino = iunique(dir->i_sb, 100); | ||
562 | d_instantiate(dentry, inode); | ||
563 | dir->i_nlink++; | ||
564 | inode_dir_notify(dir, DN_CREATE); | ||
565 | rpc_get_mount(); | ||
566 | return 0; | ||
567 | out_err: | ||
568 | printk(KERN_WARNING "%s: %s failed to allocate inode for dentry %s\n", | ||
569 | __FILE__, __FUNCTION__, dentry->d_name.name); | ||
570 | return -ENOMEM; | ||
571 | } | ||
572 | |||
573 | static int | ||
574 | __rpc_rmdir(struct inode *dir, struct dentry *dentry) | ||
575 | { | ||
576 | int error; | ||
577 | |||
578 | shrink_dcache_parent(dentry); | ||
579 | if (dentry->d_inode) { | ||
580 | rpc_close_pipes(dentry->d_inode); | ||
581 | rpc_inode_setowner(dentry->d_inode, NULL); | ||
582 | } | ||
583 | if ((error = simple_rmdir(dir, dentry)) != 0) | ||
584 | return error; | ||
585 | if (!error) { | ||
586 | inode_dir_notify(dir, DN_DELETE); | ||
587 | d_drop(dentry); | ||
588 | rpc_put_mount(); | ||
589 | } | ||
590 | return 0; | ||
591 | } | ||
592 | |||
593 | static struct dentry * | ||
594 | rpc_lookup_negative(char *path, struct nameidata *nd) | ||
595 | { | ||
596 | struct dentry *dentry; | ||
597 | struct inode *dir; | ||
598 | int error; | ||
599 | |||
600 | if ((error = rpc_lookup_parent(path, nd)) != 0) | ||
601 | return ERR_PTR(error); | ||
602 | dir = nd->dentry->d_inode; | ||
603 | down(&dir->i_sem); | ||
604 | dentry = lookup_hash(&nd->last, nd->dentry); | ||
605 | if (IS_ERR(dentry)) | ||
606 | goto out_err; | ||
607 | if (dentry->d_inode) { | ||
608 | dput(dentry); | ||
609 | dentry = ERR_PTR(-EEXIST); | ||
610 | goto out_err; | ||
611 | } | ||
612 | return dentry; | ||
613 | out_err: | ||
614 | up(&dir->i_sem); | ||
615 | rpc_release_path(nd); | ||
616 | return dentry; | ||
617 | } | ||
618 | |||
619 | |||
620 | struct dentry * | ||
621 | rpc_mkdir(char *path, struct rpc_clnt *rpc_client) | ||
622 | { | ||
623 | struct nameidata nd; | ||
624 | struct dentry *dentry; | ||
625 | struct inode *dir; | ||
626 | int error; | ||
627 | |||
628 | dentry = rpc_lookup_negative(path, &nd); | ||
629 | if (IS_ERR(dentry)) | ||
630 | return dentry; | ||
631 | dir = nd.dentry->d_inode; | ||
632 | if ((error = __rpc_mkdir(dir, dentry)) != 0) | ||
633 | goto err_dput; | ||
634 | RPC_I(dentry->d_inode)->private = rpc_client; | ||
635 | error = rpc_populate(dentry, authfiles, | ||
636 | RPCAUTH_info, RPCAUTH_EOF); | ||
637 | if (error) | ||
638 | goto err_depopulate; | ||
639 | out: | ||
640 | up(&dir->i_sem); | ||
641 | rpc_release_path(&nd); | ||
642 | return dentry; | ||
643 | err_depopulate: | ||
644 | rpc_depopulate(dentry); | ||
645 | __rpc_rmdir(dir, dentry); | ||
646 | err_dput: | ||
647 | dput(dentry); | ||
648 | printk(KERN_WARNING "%s: %s() failed to create directory %s (errno = %d)\n", | ||
649 | __FILE__, __FUNCTION__, path, error); | ||
650 | dentry = ERR_PTR(error); | ||
651 | goto out; | ||
652 | } | ||
653 | |||
654 | int | ||
655 | rpc_rmdir(char *path) | ||
656 | { | ||
657 | struct nameidata nd; | ||
658 | struct dentry *dentry; | ||
659 | struct inode *dir; | ||
660 | int error; | ||
661 | |||
662 | if ((error = rpc_lookup_parent(path, &nd)) != 0) | ||
663 | return error; | ||
664 | dir = nd.dentry->d_inode; | ||
665 | down(&dir->i_sem); | ||
666 | dentry = lookup_hash(&nd.last, nd.dentry); | ||
667 | if (IS_ERR(dentry)) { | ||
668 | error = PTR_ERR(dentry); | ||
669 | goto out_release; | ||
670 | } | ||
671 | rpc_depopulate(dentry); | ||
672 | error = __rpc_rmdir(dir, dentry); | ||
673 | dput(dentry); | ||
674 | out_release: | ||
675 | up(&dir->i_sem); | ||
676 | rpc_release_path(&nd); | ||
677 | return error; | ||
678 | } | ||
679 | |||
680 | struct dentry * | ||
681 | rpc_mkpipe(char *path, void *private, struct rpc_pipe_ops *ops, int flags) | ||
682 | { | ||
683 | struct nameidata nd; | ||
684 | struct dentry *dentry; | ||
685 | struct inode *dir, *inode; | ||
686 | struct rpc_inode *rpci; | ||
687 | |||
688 | dentry = rpc_lookup_negative(path, &nd); | ||
689 | if (IS_ERR(dentry)) | ||
690 | return dentry; | ||
691 | dir = nd.dentry->d_inode; | ||
692 | inode = rpc_get_inode(dir->i_sb, S_IFSOCK | S_IRUSR | S_IWUSR); | ||
693 | if (!inode) | ||
694 | goto err_dput; | ||
695 | inode->i_ino = iunique(dir->i_sb, 100); | ||
696 | inode->i_fop = &rpc_pipe_fops; | ||
697 | d_instantiate(dentry, inode); | ||
698 | rpci = RPC_I(inode); | ||
699 | rpci->private = private; | ||
700 | rpci->flags = flags; | ||
701 | rpci->ops = ops; | ||
702 | inode_dir_notify(dir, DN_CREATE); | ||
703 | out: | ||
704 | up(&dir->i_sem); | ||
705 | rpc_release_path(&nd); | ||
706 | return dentry; | ||
707 | err_dput: | ||
708 | dput(dentry); | ||
709 | dentry = ERR_PTR(-ENOMEM); | ||
710 | printk(KERN_WARNING "%s: %s() failed to create pipe %s (errno = %d)\n", | ||
711 | __FILE__, __FUNCTION__, path, -ENOMEM); | ||
712 | goto out; | ||
713 | } | ||
714 | |||
715 | int | ||
716 | rpc_unlink(char *path) | ||
717 | { | ||
718 | struct nameidata nd; | ||
719 | struct dentry *dentry; | ||
720 | struct inode *dir; | ||
721 | int error; | ||
722 | |||
723 | if ((error = rpc_lookup_parent(path, &nd)) != 0) | ||
724 | return error; | ||
725 | dir = nd.dentry->d_inode; | ||
726 | down(&dir->i_sem); | ||
727 | dentry = lookup_hash(&nd.last, nd.dentry); | ||
728 | if (IS_ERR(dentry)) { | ||
729 | error = PTR_ERR(dentry); | ||
730 | goto out_release; | ||
731 | } | ||
732 | d_drop(dentry); | ||
733 | if (dentry->d_inode) { | ||
734 | rpc_close_pipes(dentry->d_inode); | ||
735 | rpc_inode_setowner(dentry->d_inode, NULL); | ||
736 | error = simple_unlink(dir, dentry); | ||
737 | } | ||
738 | dput(dentry); | ||
739 | inode_dir_notify(dir, DN_DELETE); | ||
740 | out_release: | ||
741 | up(&dir->i_sem); | ||
742 | rpc_release_path(&nd); | ||
743 | return error; | ||
744 | } | ||
745 | |||
746 | /* | ||
747 | * populate the filesystem | ||
748 | */ | ||
749 | static struct super_operations s_ops = { | ||
750 | .alloc_inode = rpc_alloc_inode, | ||
751 | .destroy_inode = rpc_destroy_inode, | ||
752 | .statfs = simple_statfs, | ||
753 | }; | ||
754 | |||
755 | #define RPCAUTH_GSSMAGIC 0x67596969 | ||
756 | |||
757 | static int | ||
758 | rpc_fill_super(struct super_block *sb, void *data, int silent) | ||
759 | { | ||
760 | struct inode *inode; | ||
761 | struct dentry *root; | ||
762 | |||
763 | sb->s_blocksize = PAGE_CACHE_SIZE; | ||
764 | sb->s_blocksize_bits = PAGE_CACHE_SHIFT; | ||
765 | sb->s_magic = RPCAUTH_GSSMAGIC; | ||
766 | sb->s_op = &s_ops; | ||
767 | sb->s_time_gran = 1; | ||
768 | |||
769 | inode = rpc_get_inode(sb, S_IFDIR | 0755); | ||
770 | if (!inode) | ||
771 | return -ENOMEM; | ||
772 | root = d_alloc_root(inode); | ||
773 | if (!root) { | ||
774 | iput(inode); | ||
775 | return -ENOMEM; | ||
776 | } | ||
777 | if (rpc_populate(root, files, RPCAUTH_Root + 1, RPCAUTH_RootEOF)) | ||
778 | goto out; | ||
779 | sb->s_root = root; | ||
780 | return 0; | ||
781 | out: | ||
782 | d_genocide(root); | ||
783 | dput(root); | ||
784 | return -ENOMEM; | ||
785 | } | ||
786 | |||
787 | static struct super_block * | ||
788 | rpc_get_sb(struct file_system_type *fs_type, | ||
789 | int flags, const char *dev_name, void *data) | ||
790 | { | ||
791 | return get_sb_single(fs_type, flags, data, rpc_fill_super); | ||
792 | } | ||
793 | |||
794 | static struct file_system_type rpc_pipe_fs_type = { | ||
795 | .owner = THIS_MODULE, | ||
796 | .name = "rpc_pipefs", | ||
797 | .get_sb = rpc_get_sb, | ||
798 | .kill_sb = kill_litter_super, | ||
799 | }; | ||
800 | |||
801 | static void | ||
802 | init_once(void * foo, kmem_cache_t * cachep, unsigned long flags) | ||
803 | { | ||
804 | struct rpc_inode *rpci = (struct rpc_inode *) foo; | ||
805 | |||
806 | if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) == | ||
807 | SLAB_CTOR_CONSTRUCTOR) { | ||
808 | inode_init_once(&rpci->vfs_inode); | ||
809 | rpci->private = NULL; | ||
810 | rpci->nreaders = 0; | ||
811 | rpci->nwriters = 0; | ||
812 | INIT_LIST_HEAD(&rpci->in_upcall); | ||
813 | INIT_LIST_HEAD(&rpci->pipe); | ||
814 | rpci->pipelen = 0; | ||
815 | init_waitqueue_head(&rpci->waitq); | ||
816 | INIT_WORK(&rpci->queue_timeout, rpc_timeout_upcall_queue, rpci); | ||
817 | rpci->ops = NULL; | ||
818 | } | ||
819 | } | ||
820 | |||
821 | int register_rpc_pipefs(void) | ||
822 | { | ||
823 | rpc_inode_cachep = kmem_cache_create("rpc_inode_cache", | ||
824 | sizeof(struct rpc_inode), | ||
825 | 0, SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT, | ||
826 | init_once, NULL); | ||
827 | if (!rpc_inode_cachep) | ||
828 | return -ENOMEM; | ||
829 | register_filesystem(&rpc_pipe_fs_type); | ||
830 | return 0; | ||
831 | } | ||
832 | |||
833 | void unregister_rpc_pipefs(void) | ||
834 | { | ||
835 | if (kmem_cache_destroy(rpc_inode_cachep)) | ||
836 | printk(KERN_WARNING "RPC: unable to free inode cache\n"); | ||
837 | unregister_filesystem(&rpc_pipe_fs_type); | ||
838 | } | ||
diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c new file mode 100644 index 000000000000..c06614d0e31d --- /dev/null +++ b/net/sunrpc/sched.c | |||
@@ -0,0 +1,1119 @@ | |||
1 | /* | ||
2 | * linux/net/sunrpc/sched.c | ||
3 | * | ||
4 | * Scheduling for synchronous and asynchronous RPC requests. | ||
5 | * | ||
6 | * Copyright (C) 1996 Olaf Kirch, <okir@monad.swb.de> | ||
7 | * | ||
8 | * TCP NFS related read + write fixes | ||
9 | * (C) 1999 Dave Airlie, University of Limerick, Ireland <airlied@linux.ie> | ||
10 | */ | ||
11 | |||
12 | #include <linux/module.h> | ||
13 | |||
14 | #include <linux/sched.h> | ||
15 | #include <linux/interrupt.h> | ||
16 | #include <linux/slab.h> | ||
17 | #include <linux/mempool.h> | ||
18 | #include <linux/smp.h> | ||
19 | #include <linux/smp_lock.h> | ||
20 | #include <linux/spinlock.h> | ||
21 | |||
22 | #include <linux/sunrpc/clnt.h> | ||
23 | #include <linux/sunrpc/xprt.h> | ||
24 | |||
25 | #ifdef RPC_DEBUG | ||
26 | #define RPCDBG_FACILITY RPCDBG_SCHED | ||
27 | #define RPC_TASK_MAGIC_ID 0xf00baa | ||
28 | static int rpc_task_id; | ||
29 | #endif | ||
30 | |||
31 | /* | ||
32 | * RPC slabs and memory pools | ||
33 | */ | ||
34 | #define RPC_BUFFER_MAXSIZE (2048) | ||
35 | #define RPC_BUFFER_POOLSIZE (8) | ||
36 | #define RPC_TASK_POOLSIZE (8) | ||
37 | static kmem_cache_t *rpc_task_slabp; | ||
38 | static kmem_cache_t *rpc_buffer_slabp; | ||
39 | static mempool_t *rpc_task_mempool; | ||
40 | static mempool_t *rpc_buffer_mempool; | ||
41 | |||
42 | static void __rpc_default_timer(struct rpc_task *task); | ||
43 | static void rpciod_killall(void); | ||
44 | static void rpc_free(struct rpc_task *task); | ||
45 | |||
46 | static void rpc_async_schedule(void *); | ||
47 | |||
48 | /* | ||
49 | * RPC tasks that create another task (e.g. for contacting the portmapper) | ||
50 | * will wait on this queue for their child's completion | ||
51 | */ | ||
52 | static RPC_WAITQ(childq, "childq"); | ||
53 | |||
54 | /* | ||
55 | * RPC tasks sit here while waiting for conditions to improve. | ||
56 | */ | ||
57 | static RPC_WAITQ(delay_queue, "delayq"); | ||
58 | |||
59 | /* | ||
60 | * All RPC tasks are linked into this list | ||
61 | */ | ||
62 | static LIST_HEAD(all_tasks); | ||
63 | |||
64 | /* | ||
65 | * rpciod-related stuff | ||
66 | */ | ||
67 | static DECLARE_MUTEX(rpciod_sema); | ||
68 | static unsigned int rpciod_users; | ||
69 | static struct workqueue_struct *rpciod_workqueue; | ||
70 | |||
71 | /* | ||
72 | * Spinlock for other critical sections of code. | ||
73 | */ | ||
74 | static DEFINE_SPINLOCK(rpc_sched_lock); | ||
75 | |||
76 | /* | ||
77 | * Disable the timer for a given RPC task. Should be called with | ||
78 | * queue->lock and bh_disabled in order to avoid races within | ||
79 | * rpc_run_timer(). | ||
80 | */ | ||
81 | static inline void | ||
82 | __rpc_disable_timer(struct rpc_task *task) | ||
83 | { | ||
84 | dprintk("RPC: %4d disabling timer\n", task->tk_pid); | ||
85 | task->tk_timeout_fn = NULL; | ||
86 | task->tk_timeout = 0; | ||
87 | } | ||
88 | |||
89 | /* | ||
90 | * Run a timeout function. | ||
91 | * We use the callback in order to allow __rpc_wake_up_task() | ||
92 | * and friends to disable the timer synchronously on SMP systems | ||
93 | * without calling del_timer_sync(). The latter could cause a | ||
94 | * deadlock if called while we're holding spinlocks... | ||
95 | */ | ||
96 | static void rpc_run_timer(struct rpc_task *task) | ||
97 | { | ||
98 | void (*callback)(struct rpc_task *); | ||
99 | |||
100 | callback = task->tk_timeout_fn; | ||
101 | task->tk_timeout_fn = NULL; | ||
102 | if (callback && RPC_IS_QUEUED(task)) { | ||
103 | dprintk("RPC: %4d running timer\n", task->tk_pid); | ||
104 | callback(task); | ||
105 | } | ||
106 | smp_mb__before_clear_bit(); | ||
107 | clear_bit(RPC_TASK_HAS_TIMER, &task->tk_runstate); | ||
108 | smp_mb__after_clear_bit(); | ||
109 | } | ||
110 | |||
111 | /* | ||
112 | * Set up a timer for the current task. | ||
113 | */ | ||
114 | static inline void | ||
115 | __rpc_add_timer(struct rpc_task *task, rpc_action timer) | ||
116 | { | ||
117 | if (!task->tk_timeout) | ||
118 | return; | ||
119 | |||
120 | dprintk("RPC: %4d setting alarm for %lu ms\n", | ||
121 | task->tk_pid, task->tk_timeout * 1000 / HZ); | ||
122 | |||
123 | if (timer) | ||
124 | task->tk_timeout_fn = timer; | ||
125 | else | ||
126 | task->tk_timeout_fn = __rpc_default_timer; | ||
127 | set_bit(RPC_TASK_HAS_TIMER, &task->tk_runstate); | ||
128 | mod_timer(&task->tk_timer, jiffies + task->tk_timeout); | ||
129 | } | ||
130 | |||
131 | /* | ||
132 | * Delete any timer for the current task. Because we use del_timer_sync(), | ||
133 | * this function should never be called while holding queue->lock. | ||
134 | */ | ||
135 | static void | ||
136 | rpc_delete_timer(struct rpc_task *task) | ||
137 | { | ||
138 | if (RPC_IS_QUEUED(task)) | ||
139 | return; | ||
140 | if (test_and_clear_bit(RPC_TASK_HAS_TIMER, &task->tk_runstate)) { | ||
141 | del_singleshot_timer_sync(&task->tk_timer); | ||
142 | dprintk("RPC: %4d deleting timer\n", task->tk_pid); | ||
143 | } | ||
144 | } | ||
145 | |||
146 | /* | ||
147 | * Add new request to a priority queue. | ||
148 | */ | ||
149 | static void __rpc_add_wait_queue_priority(struct rpc_wait_queue *queue, struct rpc_task *task) | ||
150 | { | ||
151 | struct list_head *q; | ||
152 | struct rpc_task *t; | ||
153 | |||
154 | INIT_LIST_HEAD(&task->u.tk_wait.links); | ||
155 | q = &queue->tasks[task->tk_priority]; | ||
156 | if (unlikely(task->tk_priority > queue->maxpriority)) | ||
157 | q = &queue->tasks[queue->maxpriority]; | ||
158 | list_for_each_entry(t, q, u.tk_wait.list) { | ||
159 | if (t->tk_cookie == task->tk_cookie) { | ||
160 | list_add_tail(&task->u.tk_wait.list, &t->u.tk_wait.links); | ||
161 | return; | ||
162 | } | ||
163 | } | ||
164 | list_add_tail(&task->u.tk_wait.list, q); | ||
165 | } | ||
166 | |||
167 | /* | ||
168 | * Add new request to wait queue. | ||
169 | * | ||
170 | * Swapper tasks always get inserted at the head of the queue. | ||
171 | * This should avoid many nasty memory deadlocks and hopefully | ||
172 | * improve overall performance. | ||
173 | * Everyone else gets appended to the queue to ensure proper FIFO behavior. | ||
174 | */ | ||
175 | static void __rpc_add_wait_queue(struct rpc_wait_queue *queue, struct rpc_task *task) | ||
176 | { | ||
177 | BUG_ON (RPC_IS_QUEUED(task)); | ||
178 | |||
179 | if (RPC_IS_PRIORITY(queue)) | ||
180 | __rpc_add_wait_queue_priority(queue, task); | ||
181 | else if (RPC_IS_SWAPPER(task)) | ||
182 | list_add(&task->u.tk_wait.list, &queue->tasks[0]); | ||
183 | else | ||
184 | list_add_tail(&task->u.tk_wait.list, &queue->tasks[0]); | ||
185 | task->u.tk_wait.rpc_waitq = queue; | ||
186 | rpc_set_queued(task); | ||
187 | |||
188 | dprintk("RPC: %4d added to queue %p \"%s\"\n", | ||
189 | task->tk_pid, queue, rpc_qname(queue)); | ||
190 | } | ||
191 | |||
192 | /* | ||
193 | * Remove request from a priority queue. | ||
194 | */ | ||
195 | static void __rpc_remove_wait_queue_priority(struct rpc_task *task) | ||
196 | { | ||
197 | struct rpc_task *t; | ||
198 | |||
199 | if (!list_empty(&task->u.tk_wait.links)) { | ||
200 | t = list_entry(task->u.tk_wait.links.next, struct rpc_task, u.tk_wait.list); | ||
201 | list_move(&t->u.tk_wait.list, &task->u.tk_wait.list); | ||
202 | list_splice_init(&task->u.tk_wait.links, &t->u.tk_wait.links); | ||
203 | } | ||
204 | list_del(&task->u.tk_wait.list); | ||
205 | } | ||
206 | |||
207 | /* | ||
208 | * Remove request from queue. | ||
209 | * Note: must be called with spin lock held. | ||
210 | */ | ||
211 | static void __rpc_remove_wait_queue(struct rpc_task *task) | ||
212 | { | ||
213 | struct rpc_wait_queue *queue; | ||
214 | queue = task->u.tk_wait.rpc_waitq; | ||
215 | |||
216 | if (RPC_IS_PRIORITY(queue)) | ||
217 | __rpc_remove_wait_queue_priority(task); | ||
218 | else | ||
219 | list_del(&task->u.tk_wait.list); | ||
220 | dprintk("RPC: %4d removed from queue %p \"%s\"\n", | ||
221 | task->tk_pid, queue, rpc_qname(queue)); | ||
222 | } | ||
223 | |||
224 | static inline void rpc_set_waitqueue_priority(struct rpc_wait_queue *queue, int priority) | ||
225 | { | ||
226 | queue->priority = priority; | ||
227 | queue->count = 1 << (priority * 2); | ||
228 | } | ||
229 | |||
230 | static inline void rpc_set_waitqueue_cookie(struct rpc_wait_queue *queue, unsigned long cookie) | ||
231 | { | ||
232 | queue->cookie = cookie; | ||
233 | queue->nr = RPC_BATCH_COUNT; | ||
234 | } | ||
235 | |||
236 | static inline void rpc_reset_waitqueue_priority(struct rpc_wait_queue *queue) | ||
237 | { | ||
238 | rpc_set_waitqueue_priority(queue, queue->maxpriority); | ||
239 | rpc_set_waitqueue_cookie(queue, 0); | ||
240 | } | ||
241 | |||
242 | static void __rpc_init_priority_wait_queue(struct rpc_wait_queue *queue, const char *qname, int maxprio) | ||
243 | { | ||
244 | int i; | ||
245 | |||
246 | spin_lock_init(&queue->lock); | ||
247 | for (i = 0; i < ARRAY_SIZE(queue->tasks); i++) | ||
248 | INIT_LIST_HEAD(&queue->tasks[i]); | ||
249 | queue->maxpriority = maxprio; | ||
250 | rpc_reset_waitqueue_priority(queue); | ||
251 | #ifdef RPC_DEBUG | ||
252 | queue->name = qname; | ||
253 | #endif | ||
254 | } | ||
255 | |||
256 | void rpc_init_priority_wait_queue(struct rpc_wait_queue *queue, const char *qname) | ||
257 | { | ||
258 | __rpc_init_priority_wait_queue(queue, qname, RPC_PRIORITY_HIGH); | ||
259 | } | ||
260 | |||
261 | void rpc_init_wait_queue(struct rpc_wait_queue *queue, const char *qname) | ||
262 | { | ||
263 | __rpc_init_priority_wait_queue(queue, qname, 0); | ||
264 | } | ||
265 | EXPORT_SYMBOL(rpc_init_wait_queue); | ||
266 | |||
267 | /* | ||
268 | * Make an RPC task runnable. | ||
269 | * | ||
270 | * Note: If the task is ASYNC, this must be called with | ||
271 | * the spinlock held to protect the wait queue operation. | ||
272 | */ | ||
273 | static void rpc_make_runnable(struct rpc_task *task) | ||
274 | { | ||
275 | int do_ret; | ||
276 | |||
277 | BUG_ON(task->tk_timeout_fn); | ||
278 | do_ret = rpc_test_and_set_running(task); | ||
279 | rpc_clear_queued(task); | ||
280 | if (do_ret) | ||
281 | return; | ||
282 | if (RPC_IS_ASYNC(task)) { | ||
283 | int status; | ||
284 | |||
285 | INIT_WORK(&task->u.tk_work, rpc_async_schedule, (void *)task); | ||
286 | status = queue_work(task->tk_workqueue, &task->u.tk_work); | ||
287 | if (status < 0) { | ||
288 | printk(KERN_WARNING "RPC: failed to add task to queue: error: %d!\n", status); | ||
289 | task->tk_status = status; | ||
290 | return; | ||
291 | } | ||
292 | } else | ||
293 | wake_up(&task->u.tk_wait.waitq); | ||
294 | } | ||
295 | |||
296 | /* | ||
297 | * Place a newly initialized task on the workqueue. | ||
298 | */ | ||
299 | static inline void | ||
300 | rpc_schedule_run(struct rpc_task *task) | ||
301 | { | ||
302 | /* Don't run a child twice! */ | ||
303 | if (RPC_IS_ACTIVATED(task)) | ||
304 | return; | ||
305 | task->tk_active = 1; | ||
306 | rpc_make_runnable(task); | ||
307 | } | ||
308 | |||
309 | /* | ||
310 | * Prepare for sleeping on a wait queue. | ||
311 | * By always appending tasks to the list we ensure FIFO behavior. | ||
312 | * NB: An RPC task will only receive interrupt-driven events as long | ||
313 | * as it's on a wait queue. | ||
314 | */ | ||
315 | static void __rpc_sleep_on(struct rpc_wait_queue *q, struct rpc_task *task, | ||
316 | rpc_action action, rpc_action timer) | ||
317 | { | ||
318 | dprintk("RPC: %4d sleep_on(queue \"%s\" time %ld)\n", task->tk_pid, | ||
319 | rpc_qname(q), jiffies); | ||
320 | |||
321 | if (!RPC_IS_ASYNC(task) && !RPC_IS_ACTIVATED(task)) { | ||
322 | printk(KERN_ERR "RPC: Inactive synchronous task put to sleep!\n"); | ||
323 | return; | ||
324 | } | ||
325 | |||
326 | /* Mark the task as being activated if so needed */ | ||
327 | if (!RPC_IS_ACTIVATED(task)) | ||
328 | task->tk_active = 1; | ||
329 | |||
330 | __rpc_add_wait_queue(q, task); | ||
331 | |||
332 | BUG_ON(task->tk_callback != NULL); | ||
333 | task->tk_callback = action; | ||
334 | __rpc_add_timer(task, timer); | ||
335 | } | ||
336 | |||
337 | void rpc_sleep_on(struct rpc_wait_queue *q, struct rpc_task *task, | ||
338 | rpc_action action, rpc_action timer) | ||
339 | { | ||
340 | /* | ||
341 | * Protect the queue operations. | ||
342 | */ | ||
343 | spin_lock_bh(&q->lock); | ||
344 | __rpc_sleep_on(q, task, action, timer); | ||
345 | spin_unlock_bh(&q->lock); | ||
346 | } | ||
347 | |||
348 | /** | ||
349 | * __rpc_do_wake_up_task - wake up a single rpc_task | ||
350 | * @task: task to be woken up | ||
351 | * | ||
352 | * Caller must hold queue->lock, and have cleared the task queued flag. | ||
353 | */ | ||
354 | static void __rpc_do_wake_up_task(struct rpc_task *task) | ||
355 | { | ||
356 | dprintk("RPC: %4d __rpc_wake_up_task (now %ld)\n", task->tk_pid, jiffies); | ||
357 | |||
358 | #ifdef RPC_DEBUG | ||
359 | BUG_ON(task->tk_magic != RPC_TASK_MAGIC_ID); | ||
360 | #endif | ||
361 | /* Has the task been executed yet? If not, we cannot wake it up! */ | ||
362 | if (!RPC_IS_ACTIVATED(task)) { | ||
363 | printk(KERN_ERR "RPC: Inactive task (%p) being woken up!\n", task); | ||
364 | return; | ||
365 | } | ||
366 | |||
367 | __rpc_disable_timer(task); | ||
368 | __rpc_remove_wait_queue(task); | ||
369 | |||
370 | rpc_make_runnable(task); | ||
371 | |||
372 | dprintk("RPC: __rpc_wake_up_task done\n"); | ||
373 | } | ||
374 | |||
375 | /* | ||
376 | * Wake up the specified task | ||
377 | */ | ||
378 | static void __rpc_wake_up_task(struct rpc_task *task) | ||
379 | { | ||
380 | if (rpc_start_wakeup(task)) { | ||
381 | if (RPC_IS_QUEUED(task)) | ||
382 | __rpc_do_wake_up_task(task); | ||
383 | rpc_finish_wakeup(task); | ||
384 | } | ||
385 | } | ||
386 | |||
387 | /* | ||
388 | * Default timeout handler if none specified by user | ||
389 | */ | ||
390 | static void | ||
391 | __rpc_default_timer(struct rpc_task *task) | ||
392 | { | ||
393 | dprintk("RPC: %d timeout (default timer)\n", task->tk_pid); | ||
394 | task->tk_status = -ETIMEDOUT; | ||
395 | rpc_wake_up_task(task); | ||
396 | } | ||
397 | |||
398 | /* | ||
399 | * Wake up the specified task | ||
400 | */ | ||
401 | void rpc_wake_up_task(struct rpc_task *task) | ||
402 | { | ||
403 | if (rpc_start_wakeup(task)) { | ||
404 | if (RPC_IS_QUEUED(task)) { | ||
405 | struct rpc_wait_queue *queue = task->u.tk_wait.rpc_waitq; | ||
406 | |||
407 | spin_lock_bh(&queue->lock); | ||
408 | __rpc_do_wake_up_task(task); | ||
409 | spin_unlock_bh(&queue->lock); | ||
410 | } | ||
411 | rpc_finish_wakeup(task); | ||
412 | } | ||
413 | } | ||
414 | |||
415 | /* | ||
416 | * Wake up the next task on a priority queue. | ||
417 | */ | ||
418 | static struct rpc_task * __rpc_wake_up_next_priority(struct rpc_wait_queue *queue) | ||
419 | { | ||
420 | struct list_head *q; | ||
421 | struct rpc_task *task; | ||
422 | |||
423 | /* | ||
424 | * Service a batch of tasks from a single cookie. | ||
425 | */ | ||
426 | q = &queue->tasks[queue->priority]; | ||
427 | if (!list_empty(q)) { | ||
428 | task = list_entry(q->next, struct rpc_task, u.tk_wait.list); | ||
429 | if (queue->cookie == task->tk_cookie) { | ||
430 | if (--queue->nr) | ||
431 | goto out; | ||
432 | list_move_tail(&task->u.tk_wait.list, q); | ||
433 | } | ||
434 | /* | ||
435 | * Check if we need to switch queues. | ||
436 | */ | ||
437 | if (--queue->count) | ||
438 | goto new_cookie; | ||
439 | } | ||
440 | |||
441 | /* | ||
442 | * Service the next queue. | ||
443 | */ | ||
444 | do { | ||
445 | if (q == &queue->tasks[0]) | ||
446 | q = &queue->tasks[queue->maxpriority]; | ||
447 | else | ||
448 | q = q - 1; | ||
449 | if (!list_empty(q)) { | ||
450 | task = list_entry(q->next, struct rpc_task, u.tk_wait.list); | ||
451 | goto new_queue; | ||
452 | } | ||
453 | } while (q != &queue->tasks[queue->priority]); | ||
454 | |||
455 | rpc_reset_waitqueue_priority(queue); | ||
456 | return NULL; | ||
457 | |||
458 | new_queue: | ||
459 | rpc_set_waitqueue_priority(queue, (unsigned int)(q - &queue->tasks[0])); | ||
460 | new_cookie: | ||
461 | rpc_set_waitqueue_cookie(queue, task->tk_cookie); | ||
462 | out: | ||
463 | __rpc_wake_up_task(task); | ||
464 | return task; | ||
465 | } | ||
466 | |||
467 | /* | ||
468 | * Wake up the next task on the wait queue. | ||
469 | */ | ||
470 | struct rpc_task * rpc_wake_up_next(struct rpc_wait_queue *queue) | ||
471 | { | ||
472 | struct rpc_task *task = NULL; | ||
473 | |||
474 | dprintk("RPC: wake_up_next(%p \"%s\")\n", queue, rpc_qname(queue)); | ||
475 | spin_lock_bh(&queue->lock); | ||
476 | if (RPC_IS_PRIORITY(queue)) | ||
477 | task = __rpc_wake_up_next_priority(queue); | ||
478 | else { | ||
479 | task_for_first(task, &queue->tasks[0]) | ||
480 | __rpc_wake_up_task(task); | ||
481 | } | ||
482 | spin_unlock_bh(&queue->lock); | ||
483 | |||
484 | return task; | ||
485 | } | ||
486 | |||
487 | /** | ||
488 | * rpc_wake_up - wake up all rpc_tasks | ||
489 | * @queue: rpc_wait_queue on which the tasks are sleeping | ||
490 | * | ||
491 | * Grabs queue->lock | ||
492 | */ | ||
493 | void rpc_wake_up(struct rpc_wait_queue *queue) | ||
494 | { | ||
495 | struct rpc_task *task; | ||
496 | |||
497 | struct list_head *head; | ||
498 | spin_lock_bh(&queue->lock); | ||
499 | head = &queue->tasks[queue->maxpriority]; | ||
500 | for (;;) { | ||
501 | while (!list_empty(head)) { | ||
502 | task = list_entry(head->next, struct rpc_task, u.tk_wait.list); | ||
503 | __rpc_wake_up_task(task); | ||
504 | } | ||
505 | if (head == &queue->tasks[0]) | ||
506 | break; | ||
507 | head--; | ||
508 | } | ||
509 | spin_unlock_bh(&queue->lock); | ||
510 | } | ||
511 | |||
512 | /** | ||
513 | * rpc_wake_up_status - wake up all rpc_tasks and set their status value. | ||
514 | * @queue: rpc_wait_queue on which the tasks are sleeping | ||
515 | * @status: status value to set | ||
516 | * | ||
517 | * Grabs queue->lock | ||
518 | */ | ||
519 | void rpc_wake_up_status(struct rpc_wait_queue *queue, int status) | ||
520 | { | ||
521 | struct list_head *head; | ||
522 | struct rpc_task *task; | ||
523 | |||
524 | spin_lock_bh(&queue->lock); | ||
525 | head = &queue->tasks[queue->maxpriority]; | ||
526 | for (;;) { | ||
527 | while (!list_empty(head)) { | ||
528 | task = list_entry(head->next, struct rpc_task, u.tk_wait.list); | ||
529 | task->tk_status = status; | ||
530 | __rpc_wake_up_task(task); | ||
531 | } | ||
532 | if (head == &queue->tasks[0]) | ||
533 | break; | ||
534 | head--; | ||
535 | } | ||
536 | spin_unlock_bh(&queue->lock); | ||
537 | } | ||
538 | |||
539 | /* | ||
540 | * Run a task at a later time | ||
541 | */ | ||
542 | static void __rpc_atrun(struct rpc_task *); | ||
543 | void | ||
544 | rpc_delay(struct rpc_task *task, unsigned long delay) | ||
545 | { | ||
546 | task->tk_timeout = delay; | ||
547 | rpc_sleep_on(&delay_queue, task, NULL, __rpc_atrun); | ||
548 | } | ||
549 | |||
550 | static void | ||
551 | __rpc_atrun(struct rpc_task *task) | ||
552 | { | ||
553 | task->tk_status = 0; | ||
554 | rpc_wake_up_task(task); | ||
555 | } | ||
556 | |||
557 | /* | ||
558 | * This is the RPC `scheduler' (or rather, the finite state machine). | ||
559 | */ | ||
560 | static int __rpc_execute(struct rpc_task *task) | ||
561 | { | ||
562 | int status = 0; | ||
563 | |||
564 | dprintk("RPC: %4d rpc_execute flgs %x\n", | ||
565 | task->tk_pid, task->tk_flags); | ||
566 | |||
567 | BUG_ON(RPC_IS_QUEUED(task)); | ||
568 | |||
569 | restarted: | ||
570 | while (1) { | ||
571 | /* | ||
572 | * Garbage collection of pending timers... | ||
573 | */ | ||
574 | rpc_delete_timer(task); | ||
575 | |||
576 | /* | ||
577 | * Execute any pending callback. | ||
578 | */ | ||
579 | if (RPC_DO_CALLBACK(task)) { | ||
580 | /* Define a callback save pointer */ | ||
581 | void (*save_callback)(struct rpc_task *); | ||
582 | |||
583 | /* | ||
584 | * If a callback exists, save it, reset it, | ||
585 | * call it. | ||
586 | * The save is needed to stop from resetting | ||
587 | * another callback set within the callback handler | ||
588 | * - Dave | ||
589 | */ | ||
590 | save_callback=task->tk_callback; | ||
591 | task->tk_callback=NULL; | ||
592 | lock_kernel(); | ||
593 | save_callback(task); | ||
594 | unlock_kernel(); | ||
595 | } | ||
596 | |||
597 | /* | ||
598 | * Perform the next FSM step. | ||
599 | * tk_action may be NULL when the task has been killed | ||
600 | * by someone else. | ||
601 | */ | ||
602 | if (!RPC_IS_QUEUED(task)) { | ||
603 | if (!task->tk_action) | ||
604 | break; | ||
605 | lock_kernel(); | ||
606 | task->tk_action(task); | ||
607 | unlock_kernel(); | ||
608 | } | ||
609 | |||
610 | /* | ||
611 | * Lockless check for whether task is sleeping or not. | ||
612 | */ | ||
613 | if (!RPC_IS_QUEUED(task)) | ||
614 | continue; | ||
615 | rpc_clear_running(task); | ||
616 | if (RPC_IS_ASYNC(task)) { | ||
617 | /* Careful! we may have raced... */ | ||
618 | if (RPC_IS_QUEUED(task)) | ||
619 | return 0; | ||
620 | if (rpc_test_and_set_running(task)) | ||
621 | return 0; | ||
622 | continue; | ||
623 | } | ||
624 | |||
625 | /* sync task: sleep here */ | ||
626 | dprintk("RPC: %4d sync task going to sleep\n", task->tk_pid); | ||
627 | if (RPC_TASK_UNINTERRUPTIBLE(task)) { | ||
628 | __wait_event(task->u.tk_wait.waitq, !RPC_IS_QUEUED(task)); | ||
629 | } else { | ||
630 | __wait_event_interruptible(task->u.tk_wait.waitq, !RPC_IS_QUEUED(task), status); | ||
631 | /* | ||
632 | * When a sync task receives a signal, it exits with | ||
633 | * -ERESTARTSYS. In order to catch any callbacks that | ||
634 | * clean up after sleeping on some queue, we don't | ||
635 | * break the loop here, but go around once more. | ||
636 | */ | ||
637 | if (status == -ERESTARTSYS) { | ||
638 | dprintk("RPC: %4d got signal\n", task->tk_pid); | ||
639 | task->tk_flags |= RPC_TASK_KILLED; | ||
640 | rpc_exit(task, -ERESTARTSYS); | ||
641 | rpc_wake_up_task(task); | ||
642 | } | ||
643 | } | ||
644 | rpc_set_running(task); | ||
645 | dprintk("RPC: %4d sync task resuming\n", task->tk_pid); | ||
646 | } | ||
647 | |||
648 | if (task->tk_exit) { | ||
649 | lock_kernel(); | ||
650 | task->tk_exit(task); | ||
651 | unlock_kernel(); | ||
652 | /* If tk_action is non-null, the user wants us to restart */ | ||
653 | if (task->tk_action) { | ||
654 | if (!RPC_ASSASSINATED(task)) { | ||
655 | /* Release RPC slot and buffer memory */ | ||
656 | if (task->tk_rqstp) | ||
657 | xprt_release(task); | ||
658 | rpc_free(task); | ||
659 | goto restarted; | ||
660 | } | ||
661 | printk(KERN_ERR "RPC: dead task tries to walk away.\n"); | ||
662 | } | ||
663 | } | ||
664 | |||
665 | dprintk("RPC: %4d exit() = %d\n", task->tk_pid, task->tk_status); | ||
666 | status = task->tk_status; | ||
667 | |||
668 | /* Release all resources associated with the task */ | ||
669 | rpc_release_task(task); | ||
670 | return status; | ||
671 | } | ||
672 | |||
673 | /* | ||
674 | * User-visible entry point to the scheduler. | ||
675 | * | ||
676 | * This may be called recursively if e.g. an async NFS task updates | ||
677 | * the attributes and finds that dirty pages must be flushed. | ||
678 | * NOTE: Upon exit of this function the task is guaranteed to be | ||
679 | * released. In particular note that tk_release() will have | ||
680 | * been called, so your task memory may have been freed. | ||
681 | */ | ||
682 | int | ||
683 | rpc_execute(struct rpc_task *task) | ||
684 | { | ||
685 | BUG_ON(task->tk_active); | ||
686 | |||
687 | task->tk_active = 1; | ||
688 | rpc_set_running(task); | ||
689 | return __rpc_execute(task); | ||
690 | } | ||
691 | |||
692 | static void rpc_async_schedule(void *arg) | ||
693 | { | ||
694 | __rpc_execute((struct rpc_task *)arg); | ||
695 | } | ||
696 | |||
697 | /* | ||
698 | * Allocate memory for RPC purposes. | ||
699 | * | ||
700 | * We try to ensure that some NFS reads and writes can always proceed | ||
701 | * by using a mempool when allocating 'small' buffers. | ||
702 | * In order to avoid memory starvation triggering more writebacks of | ||
703 | * NFS requests, we use GFP_NOFS rather than GFP_KERNEL. | ||
704 | */ | ||
705 | void * | ||
706 | rpc_malloc(struct rpc_task *task, size_t size) | ||
707 | { | ||
708 | int gfp; | ||
709 | |||
710 | if (task->tk_flags & RPC_TASK_SWAPPER) | ||
711 | gfp = GFP_ATOMIC; | ||
712 | else | ||
713 | gfp = GFP_NOFS; | ||
714 | |||
715 | if (size > RPC_BUFFER_MAXSIZE) { | ||
716 | task->tk_buffer = kmalloc(size, gfp); | ||
717 | if (task->tk_buffer) | ||
718 | task->tk_bufsize = size; | ||
719 | } else { | ||
720 | task->tk_buffer = mempool_alloc(rpc_buffer_mempool, gfp); | ||
721 | if (task->tk_buffer) | ||
722 | task->tk_bufsize = RPC_BUFFER_MAXSIZE; | ||
723 | } | ||
724 | return task->tk_buffer; | ||
725 | } | ||
726 | |||
727 | static void | ||
728 | rpc_free(struct rpc_task *task) | ||
729 | { | ||
730 | if (task->tk_buffer) { | ||
731 | if (task->tk_bufsize == RPC_BUFFER_MAXSIZE) | ||
732 | mempool_free(task->tk_buffer, rpc_buffer_mempool); | ||
733 | else | ||
734 | kfree(task->tk_buffer); | ||
735 | task->tk_buffer = NULL; | ||
736 | task->tk_bufsize = 0; | ||
737 | } | ||
738 | } | ||
739 | |||
740 | /* | ||
741 | * Creation and deletion of RPC task structures | ||
742 | */ | ||
743 | void rpc_init_task(struct rpc_task *task, struct rpc_clnt *clnt, rpc_action callback, int flags) | ||
744 | { | ||
745 | memset(task, 0, sizeof(*task)); | ||
746 | init_timer(&task->tk_timer); | ||
747 | task->tk_timer.data = (unsigned long) task; | ||
748 | task->tk_timer.function = (void (*)(unsigned long)) rpc_run_timer; | ||
749 | task->tk_client = clnt; | ||
750 | task->tk_flags = flags; | ||
751 | task->tk_exit = callback; | ||
752 | |||
753 | /* Initialize retry counters */ | ||
754 | task->tk_garb_retry = 2; | ||
755 | task->tk_cred_retry = 2; | ||
756 | |||
757 | task->tk_priority = RPC_PRIORITY_NORMAL; | ||
758 | task->tk_cookie = (unsigned long)current; | ||
759 | |||
760 | /* Initialize workqueue for async tasks */ | ||
761 | task->tk_workqueue = rpciod_workqueue; | ||
762 | if (!RPC_IS_ASYNC(task)) | ||
763 | init_waitqueue_head(&task->u.tk_wait.waitq); | ||
764 | |||
765 | if (clnt) { | ||
766 | atomic_inc(&clnt->cl_users); | ||
767 | if (clnt->cl_softrtry) | ||
768 | task->tk_flags |= RPC_TASK_SOFT; | ||
769 | if (!clnt->cl_intr) | ||
770 | task->tk_flags |= RPC_TASK_NOINTR; | ||
771 | } | ||
772 | |||
773 | #ifdef RPC_DEBUG | ||
774 | task->tk_magic = RPC_TASK_MAGIC_ID; | ||
775 | task->tk_pid = rpc_task_id++; | ||
776 | #endif | ||
777 | /* Add to global list of all tasks */ | ||
778 | spin_lock(&rpc_sched_lock); | ||
779 | list_add_tail(&task->tk_task, &all_tasks); | ||
780 | spin_unlock(&rpc_sched_lock); | ||
781 | |||
782 | dprintk("RPC: %4d new task procpid %d\n", task->tk_pid, | ||
783 | current->pid); | ||
784 | } | ||
785 | |||
786 | static struct rpc_task * | ||
787 | rpc_alloc_task(void) | ||
788 | { | ||
789 | return (struct rpc_task *)mempool_alloc(rpc_task_mempool, GFP_NOFS); | ||
790 | } | ||
791 | |||
792 | static void | ||
793 | rpc_default_free_task(struct rpc_task *task) | ||
794 | { | ||
795 | dprintk("RPC: %4d freeing task\n", task->tk_pid); | ||
796 | mempool_free(task, rpc_task_mempool); | ||
797 | } | ||
798 | |||
799 | /* | ||
800 | * Create a new task for the specified client. We have to | ||
801 | * clean up after an allocation failure, as the client may | ||
802 | * have specified "oneshot". | ||
803 | */ | ||
804 | struct rpc_task * | ||
805 | rpc_new_task(struct rpc_clnt *clnt, rpc_action callback, int flags) | ||
806 | { | ||
807 | struct rpc_task *task; | ||
808 | |||
809 | task = rpc_alloc_task(); | ||
810 | if (!task) | ||
811 | goto cleanup; | ||
812 | |||
813 | rpc_init_task(task, clnt, callback, flags); | ||
814 | |||
815 | /* Replace tk_release */ | ||
816 | task->tk_release = rpc_default_free_task; | ||
817 | |||
818 | dprintk("RPC: %4d allocated task\n", task->tk_pid); | ||
819 | task->tk_flags |= RPC_TASK_DYNAMIC; | ||
820 | out: | ||
821 | return task; | ||
822 | |||
823 | cleanup: | ||
824 | /* Check whether to release the client */ | ||
825 | if (clnt) { | ||
826 | printk("rpc_new_task: failed, users=%d, oneshot=%d\n", | ||
827 | atomic_read(&clnt->cl_users), clnt->cl_oneshot); | ||
828 | atomic_inc(&clnt->cl_users); /* pretend we were used ... */ | ||
829 | rpc_release_client(clnt); | ||
830 | } | ||
831 | goto out; | ||
832 | } | ||
833 | |||
834 | void rpc_release_task(struct rpc_task *task) | ||
835 | { | ||
836 | dprintk("RPC: %4d release task\n", task->tk_pid); | ||
837 | |||
838 | #ifdef RPC_DEBUG | ||
839 | BUG_ON(task->tk_magic != RPC_TASK_MAGIC_ID); | ||
840 | #endif | ||
841 | |||
842 | /* Remove from global task list */ | ||
843 | spin_lock(&rpc_sched_lock); | ||
844 | list_del(&task->tk_task); | ||
845 | spin_unlock(&rpc_sched_lock); | ||
846 | |||
847 | BUG_ON (RPC_IS_QUEUED(task)); | ||
848 | task->tk_active = 0; | ||
849 | |||
850 | /* Synchronously delete any running timer */ | ||
851 | rpc_delete_timer(task); | ||
852 | |||
853 | /* Release resources */ | ||
854 | if (task->tk_rqstp) | ||
855 | xprt_release(task); | ||
856 | if (task->tk_msg.rpc_cred) | ||
857 | rpcauth_unbindcred(task); | ||
858 | rpc_free(task); | ||
859 | if (task->tk_client) { | ||
860 | rpc_release_client(task->tk_client); | ||
861 | task->tk_client = NULL; | ||
862 | } | ||
863 | |||
864 | #ifdef RPC_DEBUG | ||
865 | task->tk_magic = 0; | ||
866 | #endif | ||
867 | if (task->tk_release) | ||
868 | task->tk_release(task); | ||
869 | } | ||
870 | |||
871 | /** | ||
872 | * rpc_find_parent - find the parent of a child task. | ||
873 | * @child: child task | ||
874 | * | ||
875 | * Checks that the parent task is still sleeping on the | ||
876 | * queue 'childq'. If so returns a pointer to the parent. | ||
877 | * Upon failure returns NULL. | ||
878 | * | ||
879 | * Caller must hold childq.lock | ||
880 | */ | ||
881 | static inline struct rpc_task *rpc_find_parent(struct rpc_task *child) | ||
882 | { | ||
883 | struct rpc_task *task, *parent; | ||
884 | struct list_head *le; | ||
885 | |||
886 | parent = (struct rpc_task *) child->tk_calldata; | ||
887 | task_for_each(task, le, &childq.tasks[0]) | ||
888 | if (task == parent) | ||
889 | return parent; | ||
890 | |||
891 | return NULL; | ||
892 | } | ||
893 | |||
894 | static void rpc_child_exit(struct rpc_task *child) | ||
895 | { | ||
896 | struct rpc_task *parent; | ||
897 | |||
898 | spin_lock_bh(&childq.lock); | ||
899 | if ((parent = rpc_find_parent(child)) != NULL) { | ||
900 | parent->tk_status = child->tk_status; | ||
901 | __rpc_wake_up_task(parent); | ||
902 | } | ||
903 | spin_unlock_bh(&childq.lock); | ||
904 | } | ||
905 | |||
906 | /* | ||
907 | * Note: rpc_new_task releases the client after a failure. | ||
908 | */ | ||
909 | struct rpc_task * | ||
910 | rpc_new_child(struct rpc_clnt *clnt, struct rpc_task *parent) | ||
911 | { | ||
912 | struct rpc_task *task; | ||
913 | |||
914 | task = rpc_new_task(clnt, NULL, RPC_TASK_ASYNC | RPC_TASK_CHILD); | ||
915 | if (!task) | ||
916 | goto fail; | ||
917 | task->tk_exit = rpc_child_exit; | ||
918 | task->tk_calldata = parent; | ||
919 | return task; | ||
920 | |||
921 | fail: | ||
922 | parent->tk_status = -ENOMEM; | ||
923 | return NULL; | ||
924 | } | ||
925 | |||
926 | void rpc_run_child(struct rpc_task *task, struct rpc_task *child, rpc_action func) | ||
927 | { | ||
928 | spin_lock_bh(&childq.lock); | ||
929 | /* N.B. Is it possible for the child to have already finished? */ | ||
930 | __rpc_sleep_on(&childq, task, func, NULL); | ||
931 | rpc_schedule_run(child); | ||
932 | spin_unlock_bh(&childq.lock); | ||
933 | } | ||
934 | |||
935 | /* | ||
936 | * Kill all tasks for the given client. | ||
937 | * XXX: kill their descendants as well? | ||
938 | */ | ||
939 | void rpc_killall_tasks(struct rpc_clnt *clnt) | ||
940 | { | ||
941 | struct rpc_task *rovr; | ||
942 | struct list_head *le; | ||
943 | |||
944 | dprintk("RPC: killing all tasks for client %p\n", clnt); | ||
945 | |||
946 | /* | ||
947 | * Spin lock all_tasks to prevent changes... | ||
948 | */ | ||
949 | spin_lock(&rpc_sched_lock); | ||
950 | alltask_for_each(rovr, le, &all_tasks) { | ||
951 | if (! RPC_IS_ACTIVATED(rovr)) | ||
952 | continue; | ||
953 | if (!clnt || rovr->tk_client == clnt) { | ||
954 | rovr->tk_flags |= RPC_TASK_KILLED; | ||
955 | rpc_exit(rovr, -EIO); | ||
956 | rpc_wake_up_task(rovr); | ||
957 | } | ||
958 | } | ||
959 | spin_unlock(&rpc_sched_lock); | ||
960 | } | ||
961 | |||
962 | static DECLARE_MUTEX_LOCKED(rpciod_running); | ||
963 | |||
964 | static void rpciod_killall(void) | ||
965 | { | ||
966 | unsigned long flags; | ||
967 | |||
968 | while (!list_empty(&all_tasks)) { | ||
969 | clear_thread_flag(TIF_SIGPENDING); | ||
970 | rpc_killall_tasks(NULL); | ||
971 | flush_workqueue(rpciod_workqueue); | ||
972 | if (!list_empty(&all_tasks)) { | ||
973 | dprintk("rpciod_killall: waiting for tasks to exit\n"); | ||
974 | yield(); | ||
975 | } | ||
976 | } | ||
977 | |||
978 | spin_lock_irqsave(¤t->sighand->siglock, flags); | ||
979 | recalc_sigpending(); | ||
980 | spin_unlock_irqrestore(¤t->sighand->siglock, flags); | ||
981 | } | ||
982 | |||
983 | /* | ||
984 | * Start up the rpciod process if it's not already running. | ||
985 | */ | ||
986 | int | ||
987 | rpciod_up(void) | ||
988 | { | ||
989 | struct workqueue_struct *wq; | ||
990 | int error = 0; | ||
991 | |||
992 | down(&rpciod_sema); | ||
993 | dprintk("rpciod_up: users %d\n", rpciod_users); | ||
994 | rpciod_users++; | ||
995 | if (rpciod_workqueue) | ||
996 | goto out; | ||
997 | /* | ||
998 | * If there's no pid, we should be the first user. | ||
999 | */ | ||
1000 | if (rpciod_users > 1) | ||
1001 | printk(KERN_WARNING "rpciod_up: no workqueue, %d users??\n", rpciod_users); | ||
1002 | /* | ||
1003 | * Create the rpciod thread and wait for it to start. | ||
1004 | */ | ||
1005 | error = -ENOMEM; | ||
1006 | wq = create_workqueue("rpciod"); | ||
1007 | if (wq == NULL) { | ||
1008 | printk(KERN_WARNING "rpciod_up: create workqueue failed, error=%d\n", error); | ||
1009 | rpciod_users--; | ||
1010 | goto out; | ||
1011 | } | ||
1012 | rpciod_workqueue = wq; | ||
1013 | error = 0; | ||
1014 | out: | ||
1015 | up(&rpciod_sema); | ||
1016 | return error; | ||
1017 | } | ||
1018 | |||
1019 | void | ||
1020 | rpciod_down(void) | ||
1021 | { | ||
1022 | down(&rpciod_sema); | ||
1023 | dprintk("rpciod_down sema %d\n", rpciod_users); | ||
1024 | if (rpciod_users) { | ||
1025 | if (--rpciod_users) | ||
1026 | goto out; | ||
1027 | } else | ||
1028 | printk(KERN_WARNING "rpciod_down: no users??\n"); | ||
1029 | |||
1030 | if (!rpciod_workqueue) { | ||
1031 | dprintk("rpciod_down: Nothing to do!\n"); | ||
1032 | goto out; | ||
1033 | } | ||
1034 | rpciod_killall(); | ||
1035 | |||
1036 | destroy_workqueue(rpciod_workqueue); | ||
1037 | rpciod_workqueue = NULL; | ||
1038 | out: | ||
1039 | up(&rpciod_sema); | ||
1040 | } | ||
1041 | |||
1042 | #ifdef RPC_DEBUG | ||
1043 | void rpc_show_tasks(void) | ||
1044 | { | ||
1045 | struct list_head *le; | ||
1046 | struct rpc_task *t; | ||
1047 | |||
1048 | spin_lock(&rpc_sched_lock); | ||
1049 | if (list_empty(&all_tasks)) { | ||
1050 | spin_unlock(&rpc_sched_lock); | ||
1051 | return; | ||
1052 | } | ||
1053 | printk("-pid- proc flgs status -client- -prog- --rqstp- -timeout " | ||
1054 | "-rpcwait -action- --exit--\n"); | ||
1055 | alltask_for_each(t, le, &all_tasks) { | ||
1056 | const char *rpc_waitq = "none"; | ||
1057 | |||
1058 | if (RPC_IS_QUEUED(t)) | ||
1059 | rpc_waitq = rpc_qname(t->u.tk_wait.rpc_waitq); | ||
1060 | |||
1061 | printk("%05d %04d %04x %06d %8p %6d %8p %08ld %8s %8p %8p\n", | ||
1062 | t->tk_pid, | ||
1063 | (t->tk_msg.rpc_proc ? t->tk_msg.rpc_proc->p_proc : -1), | ||
1064 | t->tk_flags, t->tk_status, | ||
1065 | t->tk_client, | ||
1066 | (t->tk_client ? t->tk_client->cl_prog : 0), | ||
1067 | t->tk_rqstp, t->tk_timeout, | ||
1068 | rpc_waitq, | ||
1069 | t->tk_action, t->tk_exit); | ||
1070 | } | ||
1071 | spin_unlock(&rpc_sched_lock); | ||
1072 | } | ||
1073 | #endif | ||
1074 | |||
1075 | void | ||
1076 | rpc_destroy_mempool(void) | ||
1077 | { | ||
1078 | if (rpc_buffer_mempool) | ||
1079 | mempool_destroy(rpc_buffer_mempool); | ||
1080 | if (rpc_task_mempool) | ||
1081 | mempool_destroy(rpc_task_mempool); | ||
1082 | if (rpc_task_slabp && kmem_cache_destroy(rpc_task_slabp)) | ||
1083 | printk(KERN_INFO "rpc_task: not all structures were freed\n"); | ||
1084 | if (rpc_buffer_slabp && kmem_cache_destroy(rpc_buffer_slabp)) | ||
1085 | printk(KERN_INFO "rpc_buffers: not all structures were freed\n"); | ||
1086 | } | ||
1087 | |||
1088 | int | ||
1089 | rpc_init_mempool(void) | ||
1090 | { | ||
1091 | rpc_task_slabp = kmem_cache_create("rpc_tasks", | ||
1092 | sizeof(struct rpc_task), | ||
1093 | 0, SLAB_HWCACHE_ALIGN, | ||
1094 | NULL, NULL); | ||
1095 | if (!rpc_task_slabp) | ||
1096 | goto err_nomem; | ||
1097 | rpc_buffer_slabp = kmem_cache_create("rpc_buffers", | ||
1098 | RPC_BUFFER_MAXSIZE, | ||
1099 | 0, SLAB_HWCACHE_ALIGN, | ||
1100 | NULL, NULL); | ||
1101 | if (!rpc_buffer_slabp) | ||
1102 | goto err_nomem; | ||
1103 | rpc_task_mempool = mempool_create(RPC_TASK_POOLSIZE, | ||
1104 | mempool_alloc_slab, | ||
1105 | mempool_free_slab, | ||
1106 | rpc_task_slabp); | ||
1107 | if (!rpc_task_mempool) | ||
1108 | goto err_nomem; | ||
1109 | rpc_buffer_mempool = mempool_create(RPC_BUFFER_POOLSIZE, | ||
1110 | mempool_alloc_slab, | ||
1111 | mempool_free_slab, | ||
1112 | rpc_buffer_slabp); | ||
1113 | if (!rpc_buffer_mempool) | ||
1114 | goto err_nomem; | ||
1115 | return 0; | ||
1116 | err_nomem: | ||
1117 | rpc_destroy_mempool(); | ||
1118 | return -ENOMEM; | ||
1119 | } | ||
diff --git a/net/sunrpc/stats.c b/net/sunrpc/stats.c new file mode 100644 index 000000000000..9b67dc19944c --- /dev/null +++ b/net/sunrpc/stats.c | |||
@@ -0,0 +1,175 @@ | |||
1 | /* | ||
2 | * linux/net/sunrpc/stats.c | ||
3 | * | ||
4 | * procfs-based user access to generic RPC statistics. The stats files | ||
5 | * reside in /proc/net/rpc. | ||
6 | * | ||
7 | * The read routines assume that the buffer passed in is just big enough. | ||
8 | * If you implement an RPC service that has its own stats routine which | ||
9 | * appends the generic RPC stats, make sure you don't exceed the PAGE_SIZE | ||
10 | * limit. | ||
11 | * | ||
12 | * Copyright (C) 1995, 1996, 1997 Olaf Kirch <okir@monad.swb.de> | ||
13 | */ | ||
14 | |||
15 | #include <linux/module.h> | ||
16 | |||
17 | #include <linux/init.h> | ||
18 | #include <linux/kernel.h> | ||
19 | #include <linux/sched.h> | ||
20 | #include <linux/proc_fs.h> | ||
21 | #include <linux/seq_file.h> | ||
22 | #include <linux/sunrpc/clnt.h> | ||
23 | #include <linux/sunrpc/svcsock.h> | ||
24 | |||
25 | #define RPCDBG_FACILITY RPCDBG_MISC | ||
26 | |||
27 | struct proc_dir_entry *proc_net_rpc = NULL; | ||
28 | |||
29 | /* | ||
30 | * Get RPC client stats | ||
31 | */ | ||
32 | static int rpc_proc_show(struct seq_file *seq, void *v) { | ||
33 | const struct rpc_stat *statp = seq->private; | ||
34 | const struct rpc_program *prog = statp->program; | ||
35 | int i, j; | ||
36 | |||
37 | seq_printf(seq, | ||
38 | "net %d %d %d %d\n", | ||
39 | statp->netcnt, | ||
40 | statp->netudpcnt, | ||
41 | statp->nettcpcnt, | ||
42 | statp->nettcpconn); | ||
43 | seq_printf(seq, | ||
44 | "rpc %d %d %d\n", | ||
45 | statp->rpccnt, | ||
46 | statp->rpcretrans, | ||
47 | statp->rpcauthrefresh); | ||
48 | |||
49 | for (i = 0; i < prog->nrvers; i++) { | ||
50 | const struct rpc_version *vers = prog->version[i]; | ||
51 | if (!vers) | ||
52 | continue; | ||
53 | seq_printf(seq, "proc%d %d", | ||
54 | vers->number, vers->nrprocs); | ||
55 | for (j = 0; j < vers->nrprocs; j++) | ||
56 | seq_printf(seq, " %d", | ||
57 | vers->procs[j].p_count); | ||
58 | seq_putc(seq, '\n'); | ||
59 | } | ||
60 | return 0; | ||
61 | } | ||
62 | |||
63 | static int rpc_proc_open(struct inode *inode, struct file *file) | ||
64 | { | ||
65 | return single_open(file, rpc_proc_show, PDE(inode)->data); | ||
66 | } | ||
67 | |||
68 | static struct file_operations rpc_proc_fops = { | ||
69 | .owner = THIS_MODULE, | ||
70 | .open = rpc_proc_open, | ||
71 | .read = seq_read, | ||
72 | .llseek = seq_lseek, | ||
73 | .release = single_release, | ||
74 | }; | ||
75 | |||
76 | /* | ||
77 | * Get RPC server stats | ||
78 | */ | ||
79 | void svc_seq_show(struct seq_file *seq, const struct svc_stat *statp) { | ||
80 | const struct svc_program *prog = statp->program; | ||
81 | const struct svc_procedure *proc; | ||
82 | const struct svc_version *vers; | ||
83 | int i, j; | ||
84 | |||
85 | seq_printf(seq, | ||
86 | "net %d %d %d %d\n", | ||
87 | statp->netcnt, | ||
88 | statp->netudpcnt, | ||
89 | statp->nettcpcnt, | ||
90 | statp->nettcpconn); | ||
91 | seq_printf(seq, | ||
92 | "rpc %d %d %d %d %d\n", | ||
93 | statp->rpccnt, | ||
94 | statp->rpcbadfmt+statp->rpcbadauth+statp->rpcbadclnt, | ||
95 | statp->rpcbadfmt, | ||
96 | statp->rpcbadauth, | ||
97 | statp->rpcbadclnt); | ||
98 | |||
99 | for (i = 0; i < prog->pg_nvers; i++) { | ||
100 | if (!(vers = prog->pg_vers[i]) || !(proc = vers->vs_proc)) | ||
101 | continue; | ||
102 | seq_printf(seq, "proc%d %d", i, vers->vs_nproc); | ||
103 | for (j = 0; j < vers->vs_nproc; j++, proc++) | ||
104 | seq_printf(seq, " %d", proc->pc_count); | ||
105 | seq_putc(seq, '\n'); | ||
106 | } | ||
107 | } | ||
108 | |||
109 | /* | ||
110 | * Register/unregister RPC proc files | ||
111 | */ | ||
112 | static inline struct proc_dir_entry * | ||
113 | do_register(const char *name, void *data, struct file_operations *fops) | ||
114 | { | ||
115 | struct proc_dir_entry *ent; | ||
116 | |||
117 | rpc_proc_init(); | ||
118 | dprintk("RPC: registering /proc/net/rpc/%s\n", name); | ||
119 | |||
120 | ent = create_proc_entry(name, 0, proc_net_rpc); | ||
121 | if (ent) { | ||
122 | ent->proc_fops = fops; | ||
123 | ent->data = data; | ||
124 | } | ||
125 | return ent; | ||
126 | } | ||
127 | |||
128 | struct proc_dir_entry * | ||
129 | rpc_proc_register(struct rpc_stat *statp) | ||
130 | { | ||
131 | return do_register(statp->program->name, statp, &rpc_proc_fops); | ||
132 | } | ||
133 | |||
134 | void | ||
135 | rpc_proc_unregister(const char *name) | ||
136 | { | ||
137 | remove_proc_entry(name, proc_net_rpc); | ||
138 | } | ||
139 | |||
140 | struct proc_dir_entry * | ||
141 | svc_proc_register(struct svc_stat *statp, struct file_operations *fops) | ||
142 | { | ||
143 | return do_register(statp->program->pg_name, statp, fops); | ||
144 | } | ||
145 | |||
146 | void | ||
147 | svc_proc_unregister(const char *name) | ||
148 | { | ||
149 | remove_proc_entry(name, proc_net_rpc); | ||
150 | } | ||
151 | |||
152 | void | ||
153 | rpc_proc_init(void) | ||
154 | { | ||
155 | dprintk("RPC: registering /proc/net/rpc\n"); | ||
156 | if (!proc_net_rpc) { | ||
157 | struct proc_dir_entry *ent; | ||
158 | ent = proc_mkdir("rpc", proc_net); | ||
159 | if (ent) { | ||
160 | ent->owner = THIS_MODULE; | ||
161 | proc_net_rpc = ent; | ||
162 | } | ||
163 | } | ||
164 | } | ||
165 | |||
166 | void | ||
167 | rpc_proc_exit(void) | ||
168 | { | ||
169 | dprintk("RPC: unregistering /proc/net/rpc\n"); | ||
170 | if (proc_net_rpc) { | ||
171 | proc_net_rpc = NULL; | ||
172 | remove_proc_entry("net/rpc", NULL); | ||
173 | } | ||
174 | } | ||
175 | |||
diff --git a/net/sunrpc/sunrpc_syms.c b/net/sunrpc/sunrpc_syms.c new file mode 100644 index 000000000000..d4f26bf9e732 --- /dev/null +++ b/net/sunrpc/sunrpc_syms.c | |||
@@ -0,0 +1,185 @@ | |||
1 | /* | ||
2 | * linux/net/sunrpc/sunrpc_syms.c | ||
3 | * | ||
4 | * Symbols exported by the sunrpc module. | ||
5 | * | ||
6 | * Copyright (C) 1997 Olaf Kirch <okir@monad.swb.de> | ||
7 | */ | ||
8 | |||
9 | #include <linux/config.h> | ||
10 | #include <linux/module.h> | ||
11 | |||
12 | #include <linux/types.h> | ||
13 | #include <linux/socket.h> | ||
14 | #include <linux/sched.h> | ||
15 | #include <linux/uio.h> | ||
16 | #include <linux/unistd.h> | ||
17 | #include <linux/init.h> | ||
18 | |||
19 | #include <linux/sunrpc/sched.h> | ||
20 | #include <linux/sunrpc/clnt.h> | ||
21 | #include <linux/sunrpc/svc.h> | ||
22 | #include <linux/sunrpc/svcsock.h> | ||
23 | #include <linux/sunrpc/auth.h> | ||
24 | #include <linux/workqueue.h> | ||
25 | #include <linux/sunrpc/rpc_pipe_fs.h> | ||
26 | |||
27 | |||
28 | /* RPC scheduler */ | ||
29 | EXPORT_SYMBOL(rpc_execute); | ||
30 | EXPORT_SYMBOL(rpc_init_task); | ||
31 | EXPORT_SYMBOL(rpc_sleep_on); | ||
32 | EXPORT_SYMBOL(rpc_wake_up_next); | ||
33 | EXPORT_SYMBOL(rpc_wake_up_task); | ||
34 | EXPORT_SYMBOL(rpc_new_child); | ||
35 | EXPORT_SYMBOL(rpc_run_child); | ||
36 | EXPORT_SYMBOL(rpciod_down); | ||
37 | EXPORT_SYMBOL(rpciod_up); | ||
38 | EXPORT_SYMBOL(rpc_new_task); | ||
39 | EXPORT_SYMBOL(rpc_wake_up_status); | ||
40 | EXPORT_SYMBOL(rpc_release_task); | ||
41 | |||
42 | /* RPC client functions */ | ||
43 | EXPORT_SYMBOL(rpc_create_client); | ||
44 | EXPORT_SYMBOL(rpc_clone_client); | ||
45 | EXPORT_SYMBOL(rpc_destroy_client); | ||
46 | EXPORT_SYMBOL(rpc_shutdown_client); | ||
47 | EXPORT_SYMBOL(rpc_release_client); | ||
48 | EXPORT_SYMBOL(rpc_killall_tasks); | ||
49 | EXPORT_SYMBOL(rpc_call_sync); | ||
50 | EXPORT_SYMBOL(rpc_call_async); | ||
51 | EXPORT_SYMBOL(rpc_call_setup); | ||
52 | EXPORT_SYMBOL(rpc_clnt_sigmask); | ||
53 | EXPORT_SYMBOL(rpc_clnt_sigunmask); | ||
54 | EXPORT_SYMBOL(rpc_delay); | ||
55 | EXPORT_SYMBOL(rpc_restart_call); | ||
56 | EXPORT_SYMBOL(rpc_setbufsize); | ||
57 | EXPORT_SYMBOL(rpc_unlink); | ||
58 | EXPORT_SYMBOL(rpc_wake_up); | ||
59 | EXPORT_SYMBOL(rpc_queue_upcall); | ||
60 | EXPORT_SYMBOL(rpc_mkpipe); | ||
61 | |||
62 | /* Client transport */ | ||
63 | EXPORT_SYMBOL(xprt_create_proto); | ||
64 | EXPORT_SYMBOL(xprt_destroy); | ||
65 | EXPORT_SYMBOL(xprt_set_timeout); | ||
66 | EXPORT_SYMBOL(xprt_udp_slot_table_entries); | ||
67 | EXPORT_SYMBOL(xprt_tcp_slot_table_entries); | ||
68 | |||
69 | /* Client credential cache */ | ||
70 | EXPORT_SYMBOL(rpcauth_register); | ||
71 | EXPORT_SYMBOL(rpcauth_unregister); | ||
72 | EXPORT_SYMBOL(rpcauth_create); | ||
73 | EXPORT_SYMBOL(rpcauth_lookupcred); | ||
74 | EXPORT_SYMBOL(rpcauth_lookup_credcache); | ||
75 | EXPORT_SYMBOL(rpcauth_free_credcache); | ||
76 | EXPORT_SYMBOL(rpcauth_init_credcache); | ||
77 | EXPORT_SYMBOL(put_rpccred); | ||
78 | |||
79 | /* RPC server stuff */ | ||
80 | EXPORT_SYMBOL(svc_create); | ||
81 | EXPORT_SYMBOL(svc_create_thread); | ||
82 | EXPORT_SYMBOL(svc_exit_thread); | ||
83 | EXPORT_SYMBOL(svc_destroy); | ||
84 | EXPORT_SYMBOL(svc_drop); | ||
85 | EXPORT_SYMBOL(svc_process); | ||
86 | EXPORT_SYMBOL(svc_recv); | ||
87 | EXPORT_SYMBOL(svc_wake_up); | ||
88 | EXPORT_SYMBOL(svc_makesock); | ||
89 | EXPORT_SYMBOL(svc_reserve); | ||
90 | EXPORT_SYMBOL(svc_auth_register); | ||
91 | EXPORT_SYMBOL(auth_domain_lookup); | ||
92 | EXPORT_SYMBOL(svc_authenticate); | ||
93 | EXPORT_SYMBOL(svc_set_client); | ||
94 | |||
95 | /* RPC statistics */ | ||
96 | #ifdef CONFIG_PROC_FS | ||
97 | EXPORT_SYMBOL(rpc_proc_register); | ||
98 | EXPORT_SYMBOL(rpc_proc_unregister); | ||
99 | EXPORT_SYMBOL(svc_proc_register); | ||
100 | EXPORT_SYMBOL(svc_proc_unregister); | ||
101 | EXPORT_SYMBOL(svc_seq_show); | ||
102 | #endif | ||
103 | |||
104 | /* caching... */ | ||
105 | EXPORT_SYMBOL(auth_domain_find); | ||
106 | EXPORT_SYMBOL(auth_domain_put); | ||
107 | EXPORT_SYMBOL(auth_unix_add_addr); | ||
108 | EXPORT_SYMBOL(auth_unix_forget_old); | ||
109 | EXPORT_SYMBOL(auth_unix_lookup); | ||
110 | EXPORT_SYMBOL(cache_check); | ||
111 | EXPORT_SYMBOL(cache_flush); | ||
112 | EXPORT_SYMBOL(cache_purge); | ||
113 | EXPORT_SYMBOL(cache_fresh); | ||
114 | EXPORT_SYMBOL(cache_init); | ||
115 | EXPORT_SYMBOL(cache_register); | ||
116 | EXPORT_SYMBOL(cache_unregister); | ||
117 | EXPORT_SYMBOL(qword_add); | ||
118 | EXPORT_SYMBOL(qword_addhex); | ||
119 | EXPORT_SYMBOL(qword_get); | ||
120 | EXPORT_SYMBOL(svcauth_unix_purge); | ||
121 | EXPORT_SYMBOL(unix_domain_find); | ||
122 | |||
123 | /* Generic XDR */ | ||
124 | EXPORT_SYMBOL(xdr_encode_string); | ||
125 | EXPORT_SYMBOL(xdr_decode_string); | ||
126 | EXPORT_SYMBOL(xdr_decode_string_inplace); | ||
127 | EXPORT_SYMBOL(xdr_decode_netobj); | ||
128 | EXPORT_SYMBOL(xdr_encode_netobj); | ||
129 | EXPORT_SYMBOL(xdr_encode_pages); | ||
130 | EXPORT_SYMBOL(xdr_inline_pages); | ||
131 | EXPORT_SYMBOL(xdr_shift_buf); | ||
132 | EXPORT_SYMBOL(xdr_buf_from_iov); | ||
133 | EXPORT_SYMBOL(xdr_buf_subsegment); | ||
134 | EXPORT_SYMBOL(xdr_buf_read_netobj); | ||
135 | EXPORT_SYMBOL(read_bytes_from_xdr_buf); | ||
136 | |||
137 | /* Debugging symbols */ | ||
138 | #ifdef RPC_DEBUG | ||
139 | EXPORT_SYMBOL(rpc_debug); | ||
140 | EXPORT_SYMBOL(nfs_debug); | ||
141 | EXPORT_SYMBOL(nfsd_debug); | ||
142 | EXPORT_SYMBOL(nlm_debug); | ||
143 | #endif | ||
144 | |||
145 | extern int register_rpc_pipefs(void); | ||
146 | extern void unregister_rpc_pipefs(void); | ||
147 | |||
148 | static int __init | ||
149 | init_sunrpc(void) | ||
150 | { | ||
151 | int err = register_rpc_pipefs(); | ||
152 | if (err) | ||
153 | goto out; | ||
154 | err = rpc_init_mempool() != 0; | ||
155 | if (err) | ||
156 | goto out; | ||
157 | #ifdef RPC_DEBUG | ||
158 | rpc_register_sysctl(); | ||
159 | #endif | ||
160 | #ifdef CONFIG_PROC_FS | ||
161 | rpc_proc_init(); | ||
162 | #endif | ||
163 | cache_register(&auth_domain_cache); | ||
164 | cache_register(&ip_map_cache); | ||
165 | out: | ||
166 | return err; | ||
167 | } | ||
168 | |||
169 | static void __exit | ||
170 | cleanup_sunrpc(void) | ||
171 | { | ||
172 | unregister_rpc_pipefs(); | ||
173 | rpc_destroy_mempool(); | ||
174 | cache_unregister(&auth_domain_cache); | ||
175 | cache_unregister(&ip_map_cache); | ||
176 | #ifdef RPC_DEBUG | ||
177 | rpc_unregister_sysctl(); | ||
178 | #endif | ||
179 | #ifdef CONFIG_PROC_FS | ||
180 | rpc_proc_exit(); | ||
181 | #endif | ||
182 | } | ||
183 | MODULE_LICENSE("GPL"); | ||
184 | module_init(init_sunrpc); | ||
185 | module_exit(cleanup_sunrpc); | ||
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c new file mode 100644 index 000000000000..bb2d99f33315 --- /dev/null +++ b/net/sunrpc/svc.c | |||
@@ -0,0 +1,490 @@ | |||
1 | /* | ||
2 | * linux/net/sunrpc/svc.c | ||
3 | * | ||
4 | * High-level RPC service routines | ||
5 | * | ||
6 | * Copyright (C) 1995, 1996 Olaf Kirch <okir@monad.swb.de> | ||
7 | */ | ||
8 | |||
9 | #include <linux/linkage.h> | ||
10 | #include <linux/sched.h> | ||
11 | #include <linux/errno.h> | ||
12 | #include <linux/net.h> | ||
13 | #include <linux/in.h> | ||
14 | #include <linux/mm.h> | ||
15 | |||
16 | #include <linux/sunrpc/types.h> | ||
17 | #include <linux/sunrpc/xdr.h> | ||
18 | #include <linux/sunrpc/stats.h> | ||
19 | #include <linux/sunrpc/svcsock.h> | ||
20 | #include <linux/sunrpc/clnt.h> | ||
21 | |||
22 | #define RPCDBG_FACILITY RPCDBG_SVCDSP | ||
23 | #define RPC_PARANOIA 1 | ||
24 | |||
25 | /* | ||
26 | * Create an RPC service | ||
27 | */ | ||
28 | struct svc_serv * | ||
29 | svc_create(struct svc_program *prog, unsigned int bufsize) | ||
30 | { | ||
31 | struct svc_serv *serv; | ||
32 | int vers; | ||
33 | unsigned int xdrsize; | ||
34 | |||
35 | if (!(serv = (struct svc_serv *) kmalloc(sizeof(*serv), GFP_KERNEL))) | ||
36 | return NULL; | ||
37 | memset(serv, 0, sizeof(*serv)); | ||
38 | serv->sv_program = prog; | ||
39 | serv->sv_nrthreads = 1; | ||
40 | serv->sv_stats = prog->pg_stats; | ||
41 | serv->sv_bufsz = bufsize? bufsize : 4096; | ||
42 | prog->pg_lovers = prog->pg_nvers-1; | ||
43 | xdrsize = 0; | ||
44 | for (vers=0; vers<prog->pg_nvers ; vers++) | ||
45 | if (prog->pg_vers[vers]) { | ||
46 | prog->pg_hivers = vers; | ||
47 | if (prog->pg_lovers > vers) | ||
48 | prog->pg_lovers = vers; | ||
49 | if (prog->pg_vers[vers]->vs_xdrsize > xdrsize) | ||
50 | xdrsize = prog->pg_vers[vers]->vs_xdrsize; | ||
51 | } | ||
52 | serv->sv_xdrsize = xdrsize; | ||
53 | INIT_LIST_HEAD(&serv->sv_threads); | ||
54 | INIT_LIST_HEAD(&serv->sv_sockets); | ||
55 | INIT_LIST_HEAD(&serv->sv_tempsocks); | ||
56 | INIT_LIST_HEAD(&serv->sv_permsocks); | ||
57 | spin_lock_init(&serv->sv_lock); | ||
58 | |||
59 | serv->sv_name = prog->pg_name; | ||
60 | |||
61 | /* Remove any stale portmap registrations */ | ||
62 | svc_register(serv, 0, 0); | ||
63 | |||
64 | return serv; | ||
65 | } | ||
66 | |||
67 | /* | ||
68 | * Destroy an RPC service | ||
69 | */ | ||
70 | void | ||
71 | svc_destroy(struct svc_serv *serv) | ||
72 | { | ||
73 | struct svc_sock *svsk; | ||
74 | |||
75 | dprintk("RPC: svc_destroy(%s, %d)\n", | ||
76 | serv->sv_program->pg_name, | ||
77 | serv->sv_nrthreads); | ||
78 | |||
79 | if (serv->sv_nrthreads) { | ||
80 | if (--(serv->sv_nrthreads) != 0) { | ||
81 | svc_sock_update_bufs(serv); | ||
82 | return; | ||
83 | } | ||
84 | } else | ||
85 | printk("svc_destroy: no threads for serv=%p!\n", serv); | ||
86 | |||
87 | while (!list_empty(&serv->sv_tempsocks)) { | ||
88 | svsk = list_entry(serv->sv_tempsocks.next, | ||
89 | struct svc_sock, | ||
90 | sk_list); | ||
91 | svc_delete_socket(svsk); | ||
92 | } | ||
93 | while (!list_empty(&serv->sv_permsocks)) { | ||
94 | svsk = list_entry(serv->sv_permsocks.next, | ||
95 | struct svc_sock, | ||
96 | sk_list); | ||
97 | svc_delete_socket(svsk); | ||
98 | } | ||
99 | |||
100 | cache_clean_deferred(serv); | ||
101 | |||
102 | /* Unregister service with the portmapper */ | ||
103 | svc_register(serv, 0, 0); | ||
104 | kfree(serv); | ||
105 | } | ||
106 | |||
107 | /* | ||
108 | * Allocate an RPC server's buffer space. | ||
109 | * We allocate pages and place them in rq_argpages. | ||
110 | */ | ||
111 | static int | ||
112 | svc_init_buffer(struct svc_rqst *rqstp, unsigned int size) | ||
113 | { | ||
114 | int pages; | ||
115 | int arghi; | ||
116 | |||
117 | if (size > RPCSVC_MAXPAYLOAD) | ||
118 | size = RPCSVC_MAXPAYLOAD; | ||
119 | pages = 2 + (size+ PAGE_SIZE -1) / PAGE_SIZE; | ||
120 | rqstp->rq_argused = 0; | ||
121 | rqstp->rq_resused = 0; | ||
122 | arghi = 0; | ||
123 | if (pages > RPCSVC_MAXPAGES) | ||
124 | BUG(); | ||
125 | while (pages) { | ||
126 | struct page *p = alloc_page(GFP_KERNEL); | ||
127 | if (!p) | ||
128 | break; | ||
129 | rqstp->rq_argpages[arghi++] = p; | ||
130 | pages--; | ||
131 | } | ||
132 | rqstp->rq_arghi = arghi; | ||
133 | return ! pages; | ||
134 | } | ||
135 | |||
136 | /* | ||
137 | * Release an RPC server buffer | ||
138 | */ | ||
139 | static void | ||
140 | svc_release_buffer(struct svc_rqst *rqstp) | ||
141 | { | ||
142 | while (rqstp->rq_arghi) | ||
143 | put_page(rqstp->rq_argpages[--rqstp->rq_arghi]); | ||
144 | while (rqstp->rq_resused) { | ||
145 | if (rqstp->rq_respages[--rqstp->rq_resused] == NULL) | ||
146 | continue; | ||
147 | put_page(rqstp->rq_respages[rqstp->rq_resused]); | ||
148 | } | ||
149 | rqstp->rq_argused = 0; | ||
150 | } | ||
151 | |||
152 | /* | ||
153 | * Create a server thread | ||
154 | */ | ||
155 | int | ||
156 | svc_create_thread(svc_thread_fn func, struct svc_serv *serv) | ||
157 | { | ||
158 | struct svc_rqst *rqstp; | ||
159 | int error = -ENOMEM; | ||
160 | |||
161 | rqstp = kmalloc(sizeof(*rqstp), GFP_KERNEL); | ||
162 | if (!rqstp) | ||
163 | goto out; | ||
164 | |||
165 | memset(rqstp, 0, sizeof(*rqstp)); | ||
166 | init_waitqueue_head(&rqstp->rq_wait); | ||
167 | |||
168 | if (!(rqstp->rq_argp = (u32 *) kmalloc(serv->sv_xdrsize, GFP_KERNEL)) | ||
169 | || !(rqstp->rq_resp = (u32 *) kmalloc(serv->sv_xdrsize, GFP_KERNEL)) | ||
170 | || !svc_init_buffer(rqstp, serv->sv_bufsz)) | ||
171 | goto out_thread; | ||
172 | |||
173 | serv->sv_nrthreads++; | ||
174 | rqstp->rq_server = serv; | ||
175 | error = kernel_thread((int (*)(void *)) func, rqstp, 0); | ||
176 | if (error < 0) | ||
177 | goto out_thread; | ||
178 | svc_sock_update_bufs(serv); | ||
179 | error = 0; | ||
180 | out: | ||
181 | return error; | ||
182 | |||
183 | out_thread: | ||
184 | svc_exit_thread(rqstp); | ||
185 | goto out; | ||
186 | } | ||
187 | |||
188 | /* | ||
189 | * Destroy an RPC server thread | ||
190 | */ | ||
191 | void | ||
192 | svc_exit_thread(struct svc_rqst *rqstp) | ||
193 | { | ||
194 | struct svc_serv *serv = rqstp->rq_server; | ||
195 | |||
196 | svc_release_buffer(rqstp); | ||
197 | if (rqstp->rq_resp) | ||
198 | kfree(rqstp->rq_resp); | ||
199 | if (rqstp->rq_argp) | ||
200 | kfree(rqstp->rq_argp); | ||
201 | if (rqstp->rq_auth_data) | ||
202 | kfree(rqstp->rq_auth_data); | ||
203 | kfree(rqstp); | ||
204 | |||
205 | /* Release the server */ | ||
206 | if (serv) | ||
207 | svc_destroy(serv); | ||
208 | } | ||
209 | |||
210 | /* | ||
211 | * Register an RPC service with the local portmapper. | ||
212 | * To unregister a service, call this routine with | ||
213 | * proto and port == 0. | ||
214 | */ | ||
215 | int | ||
216 | svc_register(struct svc_serv *serv, int proto, unsigned short port) | ||
217 | { | ||
218 | struct svc_program *progp; | ||
219 | unsigned long flags; | ||
220 | int i, error = 0, dummy; | ||
221 | |||
222 | progp = serv->sv_program; | ||
223 | |||
224 | dprintk("RPC: svc_register(%s, %s, %d)\n", | ||
225 | progp->pg_name, proto == IPPROTO_UDP? "udp" : "tcp", port); | ||
226 | |||
227 | if (!port) | ||
228 | clear_thread_flag(TIF_SIGPENDING); | ||
229 | |||
230 | for (i = 0; i < progp->pg_nvers; i++) { | ||
231 | if (progp->pg_vers[i] == NULL) | ||
232 | continue; | ||
233 | error = rpc_register(progp->pg_prog, i, proto, port, &dummy); | ||
234 | if (error < 0) | ||
235 | break; | ||
236 | if (port && !dummy) { | ||
237 | error = -EACCES; | ||
238 | break; | ||
239 | } | ||
240 | } | ||
241 | |||
242 | if (!port) { | ||
243 | spin_lock_irqsave(¤t->sighand->siglock, flags); | ||
244 | recalc_sigpending(); | ||
245 | spin_unlock_irqrestore(¤t->sighand->siglock, flags); | ||
246 | } | ||
247 | |||
248 | return error; | ||
249 | } | ||
250 | |||
251 | /* | ||
252 | * Process the RPC request. | ||
253 | */ | ||
254 | int | ||
255 | svc_process(struct svc_serv *serv, struct svc_rqst *rqstp) | ||
256 | { | ||
257 | struct svc_program *progp; | ||
258 | struct svc_version *versp = NULL; /* compiler food */ | ||
259 | struct svc_procedure *procp = NULL; | ||
260 | struct kvec * argv = &rqstp->rq_arg.head[0]; | ||
261 | struct kvec * resv = &rqstp->rq_res.head[0]; | ||
262 | kxdrproc_t xdr; | ||
263 | u32 *statp; | ||
264 | u32 dir, prog, vers, proc, | ||
265 | auth_stat, rpc_stat; | ||
266 | int auth_res; | ||
267 | u32 *accept_statp; | ||
268 | |||
269 | rpc_stat = rpc_success; | ||
270 | |||
271 | if (argv->iov_len < 6*4) | ||
272 | goto err_short_len; | ||
273 | |||
274 | /* setup response xdr_buf. | ||
275 | * Initially it has just one page | ||
276 | */ | ||
277 | svc_take_page(rqstp); /* must succeed */ | ||
278 | resv->iov_base = page_address(rqstp->rq_respages[0]); | ||
279 | resv->iov_len = 0; | ||
280 | rqstp->rq_res.pages = rqstp->rq_respages+1; | ||
281 | rqstp->rq_res.len = 0; | ||
282 | rqstp->rq_res.page_base = 0; | ||
283 | rqstp->rq_res.page_len = 0; | ||
284 | rqstp->rq_res.tail[0].iov_len = 0; | ||
285 | /* tcp needs a space for the record length... */ | ||
286 | if (rqstp->rq_prot == IPPROTO_TCP) | ||
287 | svc_putu32(resv, 0); | ||
288 | |||
289 | rqstp->rq_xid = svc_getu32(argv); | ||
290 | svc_putu32(resv, rqstp->rq_xid); | ||
291 | |||
292 | dir = ntohl(svc_getu32(argv)); | ||
293 | vers = ntohl(svc_getu32(argv)); | ||
294 | |||
295 | /* First words of reply: */ | ||
296 | svc_putu32(resv, xdr_one); /* REPLY */ | ||
297 | |||
298 | if (dir != 0) /* direction != CALL */ | ||
299 | goto err_bad_dir; | ||
300 | if (vers != 2) /* RPC version number */ | ||
301 | goto err_bad_rpc; | ||
302 | |||
303 | /* Save position in case we later decide to reject: */ | ||
304 | accept_statp = resv->iov_base + resv->iov_len; | ||
305 | |||
306 | svc_putu32(resv, xdr_zero); /* ACCEPT */ | ||
307 | |||
308 | rqstp->rq_prog = prog = ntohl(svc_getu32(argv)); /* program number */ | ||
309 | rqstp->rq_vers = vers = ntohl(svc_getu32(argv)); /* version number */ | ||
310 | rqstp->rq_proc = proc = ntohl(svc_getu32(argv)); /* procedure number */ | ||
311 | |||
312 | progp = serv->sv_program; | ||
313 | /* | ||
314 | * Decode auth data, and add verifier to reply buffer. | ||
315 | * We do this before anything else in order to get a decent | ||
316 | * auth verifier. | ||
317 | */ | ||
318 | auth_res = svc_authenticate(rqstp, &auth_stat); | ||
319 | /* Also give the program a chance to reject this call: */ | ||
320 | if (auth_res == SVC_OK) { | ||
321 | auth_stat = rpc_autherr_badcred; | ||
322 | auth_res = progp->pg_authenticate(rqstp); | ||
323 | } | ||
324 | switch (auth_res) { | ||
325 | case SVC_OK: | ||
326 | break; | ||
327 | case SVC_GARBAGE: | ||
328 | rpc_stat = rpc_garbage_args; | ||
329 | goto err_bad; | ||
330 | case SVC_SYSERR: | ||
331 | rpc_stat = rpc_system_err; | ||
332 | goto err_bad; | ||
333 | case SVC_DENIED: | ||
334 | goto err_bad_auth; | ||
335 | case SVC_DROP: | ||
336 | goto dropit; | ||
337 | case SVC_COMPLETE: | ||
338 | goto sendit; | ||
339 | } | ||
340 | |||
341 | if (prog != progp->pg_prog) | ||
342 | goto err_bad_prog; | ||
343 | |||
344 | if (vers >= progp->pg_nvers || | ||
345 | !(versp = progp->pg_vers[vers])) | ||
346 | goto err_bad_vers; | ||
347 | |||
348 | procp = versp->vs_proc + proc; | ||
349 | if (proc >= versp->vs_nproc || !procp->pc_func) | ||
350 | goto err_bad_proc; | ||
351 | rqstp->rq_server = serv; | ||
352 | rqstp->rq_procinfo = procp; | ||
353 | |||
354 | /* Syntactic check complete */ | ||
355 | serv->sv_stats->rpccnt++; | ||
356 | |||
357 | /* Build the reply header. */ | ||
358 | statp = resv->iov_base +resv->iov_len; | ||
359 | svc_putu32(resv, rpc_success); /* RPC_SUCCESS */ | ||
360 | |||
361 | /* Bump per-procedure stats counter */ | ||
362 | procp->pc_count++; | ||
363 | |||
364 | /* Initialize storage for argp and resp */ | ||
365 | memset(rqstp->rq_argp, 0, procp->pc_argsize); | ||
366 | memset(rqstp->rq_resp, 0, procp->pc_ressize); | ||
367 | |||
368 | /* un-reserve some of the out-queue now that we have a | ||
369 | * better idea of reply size | ||
370 | */ | ||
371 | if (procp->pc_xdrressize) | ||
372 | svc_reserve(rqstp, procp->pc_xdrressize<<2); | ||
373 | |||
374 | /* Call the function that processes the request. */ | ||
375 | if (!versp->vs_dispatch) { | ||
376 | /* Decode arguments */ | ||
377 | xdr = procp->pc_decode; | ||
378 | if (xdr && !xdr(rqstp, argv->iov_base, rqstp->rq_argp)) | ||
379 | goto err_garbage; | ||
380 | |||
381 | *statp = procp->pc_func(rqstp, rqstp->rq_argp, rqstp->rq_resp); | ||
382 | |||
383 | /* Encode reply */ | ||
384 | if (*statp == rpc_success && (xdr = procp->pc_encode) | ||
385 | && !xdr(rqstp, resv->iov_base+resv->iov_len, rqstp->rq_resp)) { | ||
386 | dprintk("svc: failed to encode reply\n"); | ||
387 | /* serv->sv_stats->rpcsystemerr++; */ | ||
388 | *statp = rpc_system_err; | ||
389 | } | ||
390 | } else { | ||
391 | dprintk("svc: calling dispatcher\n"); | ||
392 | if (!versp->vs_dispatch(rqstp, statp)) { | ||
393 | /* Release reply info */ | ||
394 | if (procp->pc_release) | ||
395 | procp->pc_release(rqstp, NULL, rqstp->rq_resp); | ||
396 | goto dropit; | ||
397 | } | ||
398 | } | ||
399 | |||
400 | /* Check RPC status result */ | ||
401 | if (*statp != rpc_success) | ||
402 | resv->iov_len = ((void*)statp) - resv->iov_base + 4; | ||
403 | |||
404 | /* Release reply info */ | ||
405 | if (procp->pc_release) | ||
406 | procp->pc_release(rqstp, NULL, rqstp->rq_resp); | ||
407 | |||
408 | if (procp->pc_encode == NULL) | ||
409 | goto dropit; | ||
410 | |||
411 | sendit: | ||
412 | if (svc_authorise(rqstp)) | ||
413 | goto dropit; | ||
414 | return svc_send(rqstp); | ||
415 | |||
416 | dropit: | ||
417 | svc_authorise(rqstp); /* doesn't hurt to call this twice */ | ||
418 | dprintk("svc: svc_process dropit\n"); | ||
419 | svc_drop(rqstp); | ||
420 | return 0; | ||
421 | |||
422 | err_short_len: | ||
423 | #ifdef RPC_PARANOIA | ||
424 | printk("svc: short len %Zd, dropping request\n", argv->iov_len); | ||
425 | #endif | ||
426 | goto dropit; /* drop request */ | ||
427 | |||
428 | err_bad_dir: | ||
429 | #ifdef RPC_PARANOIA | ||
430 | printk("svc: bad direction %d, dropping request\n", dir); | ||
431 | #endif | ||
432 | serv->sv_stats->rpcbadfmt++; | ||
433 | goto dropit; /* drop request */ | ||
434 | |||
435 | err_bad_rpc: | ||
436 | serv->sv_stats->rpcbadfmt++; | ||
437 | svc_putu32(resv, xdr_one); /* REJECT */ | ||
438 | svc_putu32(resv, xdr_zero); /* RPC_MISMATCH */ | ||
439 | svc_putu32(resv, xdr_two); /* Only RPCv2 supported */ | ||
440 | svc_putu32(resv, xdr_two); | ||
441 | goto sendit; | ||
442 | |||
443 | err_bad_auth: | ||
444 | dprintk("svc: authentication failed (%d)\n", ntohl(auth_stat)); | ||
445 | serv->sv_stats->rpcbadauth++; | ||
446 | /* Restore write pointer to location of accept status: */ | ||
447 | xdr_ressize_check(rqstp, accept_statp); | ||
448 | svc_putu32(resv, xdr_one); /* REJECT */ | ||
449 | svc_putu32(resv, xdr_one); /* AUTH_ERROR */ | ||
450 | svc_putu32(resv, auth_stat); /* status */ | ||
451 | goto sendit; | ||
452 | |||
453 | err_bad_prog: | ||
454 | #ifdef RPC_PARANOIA | ||
455 | if (prog != 100227 || progp->pg_prog != 100003) | ||
456 | printk("svc: unknown program %d (me %d)\n", prog, progp->pg_prog); | ||
457 | /* else it is just a Solaris client seeing if ACLs are supported */ | ||
458 | #endif | ||
459 | serv->sv_stats->rpcbadfmt++; | ||
460 | svc_putu32(resv, rpc_prog_unavail); | ||
461 | goto sendit; | ||
462 | |||
463 | err_bad_vers: | ||
464 | #ifdef RPC_PARANOIA | ||
465 | printk("svc: unknown version (%d)\n", vers); | ||
466 | #endif | ||
467 | serv->sv_stats->rpcbadfmt++; | ||
468 | svc_putu32(resv, rpc_prog_mismatch); | ||
469 | svc_putu32(resv, htonl(progp->pg_lovers)); | ||
470 | svc_putu32(resv, htonl(progp->pg_hivers)); | ||
471 | goto sendit; | ||
472 | |||
473 | err_bad_proc: | ||
474 | #ifdef RPC_PARANOIA | ||
475 | printk("svc: unknown procedure (%d)\n", proc); | ||
476 | #endif | ||
477 | serv->sv_stats->rpcbadfmt++; | ||
478 | svc_putu32(resv, rpc_proc_unavail); | ||
479 | goto sendit; | ||
480 | |||
481 | err_garbage: | ||
482 | #ifdef RPC_PARANOIA | ||
483 | printk("svc: failed to decode args\n"); | ||
484 | #endif | ||
485 | rpc_stat = rpc_garbage_args; | ||
486 | err_bad: | ||
487 | serv->sv_stats->rpcbadfmt++; | ||
488 | svc_putu32(resv, rpc_stat); | ||
489 | goto sendit; | ||
490 | } | ||
diff --git a/net/sunrpc/svcauth.c b/net/sunrpc/svcauth.c new file mode 100644 index 000000000000..bde8147ef2db --- /dev/null +++ b/net/sunrpc/svcauth.c | |||
@@ -0,0 +1,216 @@ | |||
1 | /* | ||
2 | * linux/net/sunrpc/svcauth.c | ||
3 | * | ||
4 | * The generic interface for RPC authentication on the server side. | ||
5 | * | ||
6 | * Copyright (C) 1995, 1996 Olaf Kirch <okir@monad.swb.de> | ||
7 | * | ||
8 | * CHANGES | ||
9 | * 19-Apr-2000 Chris Evans - Security fix | ||
10 | */ | ||
11 | |||
12 | #include <linux/types.h> | ||
13 | #include <linux/sched.h> | ||
14 | #include <linux/module.h> | ||
15 | #include <linux/sunrpc/types.h> | ||
16 | #include <linux/sunrpc/xdr.h> | ||
17 | #include <linux/sunrpc/svcsock.h> | ||
18 | #include <linux/sunrpc/svcauth.h> | ||
19 | #include <linux/err.h> | ||
20 | #include <linux/hash.h> | ||
21 | |||
22 | #define RPCDBG_FACILITY RPCDBG_AUTH | ||
23 | |||
24 | |||
25 | /* | ||
26 | * Table of authenticators | ||
27 | */ | ||
28 | extern struct auth_ops svcauth_null; | ||
29 | extern struct auth_ops svcauth_unix; | ||
30 | |||
31 | static DEFINE_SPINLOCK(authtab_lock); | ||
32 | static struct auth_ops *authtab[RPC_AUTH_MAXFLAVOR] = { | ||
33 | [0] = &svcauth_null, | ||
34 | [1] = &svcauth_unix, | ||
35 | }; | ||
36 | |||
37 | int | ||
38 | svc_authenticate(struct svc_rqst *rqstp, u32 *authp) | ||
39 | { | ||
40 | rpc_authflavor_t flavor; | ||
41 | struct auth_ops *aops; | ||
42 | |||
43 | *authp = rpc_auth_ok; | ||
44 | |||
45 | flavor = ntohl(svc_getu32(&rqstp->rq_arg.head[0])); | ||
46 | |||
47 | dprintk("svc: svc_authenticate (%d)\n", flavor); | ||
48 | |||
49 | spin_lock(&authtab_lock); | ||
50 | if (flavor >= RPC_AUTH_MAXFLAVOR || !(aops = authtab[flavor]) | ||
51 | || !try_module_get(aops->owner)) { | ||
52 | spin_unlock(&authtab_lock); | ||
53 | *authp = rpc_autherr_badcred; | ||
54 | return SVC_DENIED; | ||
55 | } | ||
56 | spin_unlock(&authtab_lock); | ||
57 | |||
58 | rqstp->rq_authop = aops; | ||
59 | return aops->accept(rqstp, authp); | ||
60 | } | ||
61 | |||
62 | int svc_set_client(struct svc_rqst *rqstp) | ||
63 | { | ||
64 | return rqstp->rq_authop->set_client(rqstp); | ||
65 | } | ||
66 | |||
67 | /* A request, which was authenticated, has now executed. | ||
68 | * Time to finalise the the credentials and verifier | ||
69 | * and release and resources | ||
70 | */ | ||
71 | int svc_authorise(struct svc_rqst *rqstp) | ||
72 | { | ||
73 | struct auth_ops *aops = rqstp->rq_authop; | ||
74 | int rv = 0; | ||
75 | |||
76 | rqstp->rq_authop = NULL; | ||
77 | |||
78 | if (aops) { | ||
79 | rv = aops->release(rqstp); | ||
80 | module_put(aops->owner); | ||
81 | } | ||
82 | return rv; | ||
83 | } | ||
84 | |||
85 | int | ||
86 | svc_auth_register(rpc_authflavor_t flavor, struct auth_ops *aops) | ||
87 | { | ||
88 | int rv = -EINVAL; | ||
89 | spin_lock(&authtab_lock); | ||
90 | if (flavor < RPC_AUTH_MAXFLAVOR && authtab[flavor] == NULL) { | ||
91 | authtab[flavor] = aops; | ||
92 | rv = 0; | ||
93 | } | ||
94 | spin_unlock(&authtab_lock); | ||
95 | return rv; | ||
96 | } | ||
97 | |||
98 | void | ||
99 | svc_auth_unregister(rpc_authflavor_t flavor) | ||
100 | { | ||
101 | spin_lock(&authtab_lock); | ||
102 | if (flavor < RPC_AUTH_MAXFLAVOR) | ||
103 | authtab[flavor] = NULL; | ||
104 | spin_unlock(&authtab_lock); | ||
105 | } | ||
106 | EXPORT_SYMBOL(svc_auth_unregister); | ||
107 | |||
108 | /************************************************** | ||
109 | * cache for domain name to auth_domain | ||
110 | * Entries are only added by flavours which will normally | ||
111 | * have a structure that 'inherits' from auth_domain. | ||
112 | * e.g. when an IP -> domainname is given to auth_unix, | ||
113 | * and the domain name doesn't exist, it will create a | ||
114 | * auth_unix_domain and add it to this hash table. | ||
115 | * If it finds the name does exist, but isn't AUTH_UNIX, | ||
116 | * it will complain. | ||
117 | */ | ||
118 | |||
119 | /* | ||
120 | * Auth auth_domain cache is somewhat different to other caches, | ||
121 | * largely because the entries are possibly of different types: | ||
122 | * each auth flavour has it's own type. | ||
123 | * One consequence of this that DefineCacheLookup cannot | ||
124 | * allocate a new structure as it cannot know the size. | ||
125 | * Notice that the "INIT" code fragment is quite different | ||
126 | * from other caches. When auth_domain_lookup might be | ||
127 | * creating a new domain, the new domain is passed in | ||
128 | * complete and it is used as-is rather than being copied into | ||
129 | * another structure. | ||
130 | */ | ||
131 | #define DN_HASHBITS 6 | ||
132 | #define DN_HASHMAX (1<<DN_HASHBITS) | ||
133 | #define DN_HASHMASK (DN_HASHMAX-1) | ||
134 | |||
135 | static struct cache_head *auth_domain_table[DN_HASHMAX]; | ||
136 | |||
137 | static void auth_domain_drop(struct cache_head *item, struct cache_detail *cd) | ||
138 | { | ||
139 | struct auth_domain *dom = container_of(item, struct auth_domain, h); | ||
140 | if (cache_put(item,cd)) | ||
141 | authtab[dom->flavour]->domain_release(dom); | ||
142 | } | ||
143 | |||
144 | |||
145 | struct cache_detail auth_domain_cache = { | ||
146 | .hash_size = DN_HASHMAX, | ||
147 | .hash_table = auth_domain_table, | ||
148 | .name = "auth.domain", | ||
149 | .cache_put = auth_domain_drop, | ||
150 | }; | ||
151 | |||
152 | void auth_domain_put(struct auth_domain *dom) | ||
153 | { | ||
154 | auth_domain_drop(&dom->h, &auth_domain_cache); | ||
155 | } | ||
156 | |||
157 | static inline int auth_domain_hash(struct auth_domain *item) | ||
158 | { | ||
159 | return hash_str(item->name, DN_HASHBITS); | ||
160 | } | ||
161 | static inline int auth_domain_match(struct auth_domain *tmp, struct auth_domain *item) | ||
162 | { | ||
163 | return strcmp(tmp->name, item->name) == 0; | ||
164 | } | ||
165 | |||
166 | struct auth_domain * | ||
167 | auth_domain_lookup(struct auth_domain *item, int set) | ||
168 | { | ||
169 | struct auth_domain *tmp = NULL; | ||
170 | struct cache_head **hp, **head; | ||
171 | head = &auth_domain_cache.hash_table[auth_domain_hash(item)]; | ||
172 | |||
173 | if (set) | ||
174 | write_lock(&auth_domain_cache.hash_lock); | ||
175 | else | ||
176 | read_lock(&auth_domain_cache.hash_lock); | ||
177 | for (hp=head; *hp != NULL; hp = &tmp->h.next) { | ||
178 | tmp = container_of(*hp, struct auth_domain, h); | ||
179 | if (!auth_domain_match(tmp, item)) | ||
180 | continue; | ||
181 | if (!set) { | ||
182 | cache_get(&tmp->h); | ||
183 | goto out_noset; | ||
184 | } | ||
185 | *hp = tmp->h.next; | ||
186 | tmp->h.next = NULL; | ||
187 | auth_domain_drop(&tmp->h, &auth_domain_cache); | ||
188 | goto out_set; | ||
189 | } | ||
190 | /* Didn't find anything */ | ||
191 | if (!set) | ||
192 | goto out_nada; | ||
193 | auth_domain_cache.entries++; | ||
194 | out_set: | ||
195 | item->h.next = *head; | ||
196 | *head = &item->h; | ||
197 | cache_get(&item->h); | ||
198 | write_unlock(&auth_domain_cache.hash_lock); | ||
199 | cache_fresh(&auth_domain_cache, &item->h, item->h.expiry_time); | ||
200 | cache_get(&item->h); | ||
201 | return item; | ||
202 | out_nada: | ||
203 | tmp = NULL; | ||
204 | out_noset: | ||
205 | read_unlock(&auth_domain_cache.hash_lock); | ||
206 | return tmp; | ||
207 | } | ||
208 | |||
209 | struct auth_domain *auth_domain_find(char *name) | ||
210 | { | ||
211 | struct auth_domain *rv, ad; | ||
212 | |||
213 | ad.name = name; | ||
214 | rv = auth_domain_lookup(&ad, 0); | ||
215 | return rv; | ||
216 | } | ||
diff --git a/net/sunrpc/svcauth_unix.c b/net/sunrpc/svcauth_unix.c new file mode 100644 index 000000000000..2b99b4028d31 --- /dev/null +++ b/net/sunrpc/svcauth_unix.c | |||
@@ -0,0 +1,502 @@ | |||
1 | #include <linux/types.h> | ||
2 | #include <linux/sched.h> | ||
3 | #include <linux/module.h> | ||
4 | #include <linux/sunrpc/types.h> | ||
5 | #include <linux/sunrpc/xdr.h> | ||
6 | #include <linux/sunrpc/svcsock.h> | ||
7 | #include <linux/sunrpc/svcauth.h> | ||
8 | #include <linux/err.h> | ||
9 | #include <linux/seq_file.h> | ||
10 | #include <linux/hash.h> | ||
11 | |||
12 | #define RPCDBG_FACILITY RPCDBG_AUTH | ||
13 | |||
14 | |||
15 | /* | ||
16 | * AUTHUNIX and AUTHNULL credentials are both handled here. | ||
17 | * AUTHNULL is treated just like AUTHUNIX except that the uid/gid | ||
18 | * are always nobody (-2). i.e. we do the same IP address checks for | ||
19 | * AUTHNULL as for AUTHUNIX, and that is done here. | ||
20 | */ | ||
21 | |||
22 | |||
23 | static char *strdup(char *s) | ||
24 | { | ||
25 | char *rv = kmalloc(strlen(s)+1, GFP_KERNEL); | ||
26 | if (rv) | ||
27 | strcpy(rv, s); | ||
28 | return rv; | ||
29 | } | ||
30 | |||
31 | struct unix_domain { | ||
32 | struct auth_domain h; | ||
33 | int addr_changes; | ||
34 | /* other stuff later */ | ||
35 | }; | ||
36 | |||
37 | struct auth_domain *unix_domain_find(char *name) | ||
38 | { | ||
39 | struct auth_domain *rv, ud; | ||
40 | struct unix_domain *new; | ||
41 | |||
42 | ud.name = name; | ||
43 | |||
44 | rv = auth_domain_lookup(&ud, 0); | ||
45 | |||
46 | foundit: | ||
47 | if (rv && rv->flavour != RPC_AUTH_UNIX) { | ||
48 | auth_domain_put(rv); | ||
49 | return NULL; | ||
50 | } | ||
51 | if (rv) | ||
52 | return rv; | ||
53 | |||
54 | new = kmalloc(sizeof(*new), GFP_KERNEL); | ||
55 | if (new == NULL) | ||
56 | return NULL; | ||
57 | cache_init(&new->h.h); | ||
58 | new->h.name = strdup(name); | ||
59 | new->h.flavour = RPC_AUTH_UNIX; | ||
60 | new->addr_changes = 0; | ||
61 | new->h.h.expiry_time = NEVER; | ||
62 | |||
63 | rv = auth_domain_lookup(&new->h, 2); | ||
64 | if (rv == &new->h) { | ||
65 | if (atomic_dec_and_test(&new->h.h.refcnt)) BUG(); | ||
66 | } else { | ||
67 | auth_domain_put(&new->h); | ||
68 | goto foundit; | ||
69 | } | ||
70 | |||
71 | return rv; | ||
72 | } | ||
73 | |||
74 | static void svcauth_unix_domain_release(struct auth_domain *dom) | ||
75 | { | ||
76 | struct unix_domain *ud = container_of(dom, struct unix_domain, h); | ||
77 | |||
78 | kfree(dom->name); | ||
79 | kfree(ud); | ||
80 | } | ||
81 | |||
82 | |||
83 | /************************************************** | ||
84 | * cache for IP address to unix_domain | ||
85 | * as needed by AUTH_UNIX | ||
86 | */ | ||
87 | #define IP_HASHBITS 8 | ||
88 | #define IP_HASHMAX (1<<IP_HASHBITS) | ||
89 | #define IP_HASHMASK (IP_HASHMAX-1) | ||
90 | |||
91 | struct ip_map { | ||
92 | struct cache_head h; | ||
93 | char m_class[8]; /* e.g. "nfsd" */ | ||
94 | struct in_addr m_addr; | ||
95 | struct unix_domain *m_client; | ||
96 | int m_add_change; | ||
97 | }; | ||
98 | static struct cache_head *ip_table[IP_HASHMAX]; | ||
99 | |||
100 | static void ip_map_put(struct cache_head *item, struct cache_detail *cd) | ||
101 | { | ||
102 | struct ip_map *im = container_of(item, struct ip_map,h); | ||
103 | if (cache_put(item, cd)) { | ||
104 | if (test_bit(CACHE_VALID, &item->flags) && | ||
105 | !test_bit(CACHE_NEGATIVE, &item->flags)) | ||
106 | auth_domain_put(&im->m_client->h); | ||
107 | kfree(im); | ||
108 | } | ||
109 | } | ||
110 | |||
111 | static inline int ip_map_hash(struct ip_map *item) | ||
112 | { | ||
113 | return hash_str(item->m_class, IP_HASHBITS) ^ | ||
114 | hash_long((unsigned long)item->m_addr.s_addr, IP_HASHBITS); | ||
115 | } | ||
116 | static inline int ip_map_match(struct ip_map *item, struct ip_map *tmp) | ||
117 | { | ||
118 | return strcmp(tmp->m_class, item->m_class) == 0 | ||
119 | && tmp->m_addr.s_addr == item->m_addr.s_addr; | ||
120 | } | ||
121 | static inline void ip_map_init(struct ip_map *new, struct ip_map *item) | ||
122 | { | ||
123 | strcpy(new->m_class, item->m_class); | ||
124 | new->m_addr.s_addr = item->m_addr.s_addr; | ||
125 | } | ||
126 | static inline void ip_map_update(struct ip_map *new, struct ip_map *item) | ||
127 | { | ||
128 | cache_get(&item->m_client->h.h); | ||
129 | new->m_client = item->m_client; | ||
130 | new->m_add_change = item->m_add_change; | ||
131 | } | ||
132 | |||
133 | static void ip_map_request(struct cache_detail *cd, | ||
134 | struct cache_head *h, | ||
135 | char **bpp, int *blen) | ||
136 | { | ||
137 | char text_addr[20]; | ||
138 | struct ip_map *im = container_of(h, struct ip_map, h); | ||
139 | __u32 addr = im->m_addr.s_addr; | ||
140 | |||
141 | snprintf(text_addr, 20, "%u.%u.%u.%u", | ||
142 | ntohl(addr) >> 24 & 0xff, | ||
143 | ntohl(addr) >> 16 & 0xff, | ||
144 | ntohl(addr) >> 8 & 0xff, | ||
145 | ntohl(addr) >> 0 & 0xff); | ||
146 | |||
147 | qword_add(bpp, blen, im->m_class); | ||
148 | qword_add(bpp, blen, text_addr); | ||
149 | (*bpp)[-1] = '\n'; | ||
150 | } | ||
151 | |||
152 | static struct ip_map *ip_map_lookup(struct ip_map *, int); | ||
153 | |||
154 | static int ip_map_parse(struct cache_detail *cd, | ||
155 | char *mesg, int mlen) | ||
156 | { | ||
157 | /* class ipaddress [domainname] */ | ||
158 | /* should be safe just to use the start of the input buffer | ||
159 | * for scratch: */ | ||
160 | char *buf = mesg; | ||
161 | int len; | ||
162 | int b1,b2,b3,b4; | ||
163 | char c; | ||
164 | struct ip_map ipm, *ipmp; | ||
165 | struct auth_domain *dom; | ||
166 | time_t expiry; | ||
167 | |||
168 | if (mesg[mlen-1] != '\n') | ||
169 | return -EINVAL; | ||
170 | mesg[mlen-1] = 0; | ||
171 | |||
172 | /* class */ | ||
173 | len = qword_get(&mesg, ipm.m_class, sizeof(ipm.m_class)); | ||
174 | if (len <= 0) return -EINVAL; | ||
175 | |||
176 | /* ip address */ | ||
177 | len = qword_get(&mesg, buf, mlen); | ||
178 | if (len <= 0) return -EINVAL; | ||
179 | |||
180 | if (sscanf(buf, "%u.%u.%u.%u%c", &b1, &b2, &b3, &b4, &c) != 4) | ||
181 | return -EINVAL; | ||
182 | |||
183 | expiry = get_expiry(&mesg); | ||
184 | if (expiry ==0) | ||
185 | return -EINVAL; | ||
186 | |||
187 | /* domainname, or empty for NEGATIVE */ | ||
188 | len = qword_get(&mesg, buf, mlen); | ||
189 | if (len < 0) return -EINVAL; | ||
190 | |||
191 | if (len) { | ||
192 | dom = unix_domain_find(buf); | ||
193 | if (dom == NULL) | ||
194 | return -ENOENT; | ||
195 | } else | ||
196 | dom = NULL; | ||
197 | |||
198 | ipm.m_addr.s_addr = | ||
199 | htonl((((((b1<<8)|b2)<<8)|b3)<<8)|b4); | ||
200 | ipm.h.flags = 0; | ||
201 | if (dom) { | ||
202 | ipm.m_client = container_of(dom, struct unix_domain, h); | ||
203 | ipm.m_add_change = ipm.m_client->addr_changes; | ||
204 | } else | ||
205 | set_bit(CACHE_NEGATIVE, &ipm.h.flags); | ||
206 | ipm.h.expiry_time = expiry; | ||
207 | |||
208 | ipmp = ip_map_lookup(&ipm, 1); | ||
209 | if (ipmp) | ||
210 | ip_map_put(&ipmp->h, &ip_map_cache); | ||
211 | if (dom) | ||
212 | auth_domain_put(dom); | ||
213 | if (!ipmp) | ||
214 | return -ENOMEM; | ||
215 | cache_flush(); | ||
216 | return 0; | ||
217 | } | ||
218 | |||
219 | static int ip_map_show(struct seq_file *m, | ||
220 | struct cache_detail *cd, | ||
221 | struct cache_head *h) | ||
222 | { | ||
223 | struct ip_map *im; | ||
224 | struct in_addr addr; | ||
225 | char *dom = "-no-domain-"; | ||
226 | |||
227 | if (h == NULL) { | ||
228 | seq_puts(m, "#class IP domain\n"); | ||
229 | return 0; | ||
230 | } | ||
231 | im = container_of(h, struct ip_map, h); | ||
232 | /* class addr domain */ | ||
233 | addr = im->m_addr; | ||
234 | |||
235 | if (test_bit(CACHE_VALID, &h->flags) && | ||
236 | !test_bit(CACHE_NEGATIVE, &h->flags)) | ||
237 | dom = im->m_client->h.name; | ||
238 | |||
239 | seq_printf(m, "%s %d.%d.%d.%d %s\n", | ||
240 | im->m_class, | ||
241 | htonl(addr.s_addr) >> 24 & 0xff, | ||
242 | htonl(addr.s_addr) >> 16 & 0xff, | ||
243 | htonl(addr.s_addr) >> 8 & 0xff, | ||
244 | htonl(addr.s_addr) >> 0 & 0xff, | ||
245 | dom | ||
246 | ); | ||
247 | return 0; | ||
248 | } | ||
249 | |||
250 | |||
251 | struct cache_detail ip_map_cache = { | ||
252 | .hash_size = IP_HASHMAX, | ||
253 | .hash_table = ip_table, | ||
254 | .name = "auth.unix.ip", | ||
255 | .cache_put = ip_map_put, | ||
256 | .cache_request = ip_map_request, | ||
257 | .cache_parse = ip_map_parse, | ||
258 | .cache_show = ip_map_show, | ||
259 | }; | ||
260 | |||
261 | static DefineSimpleCacheLookup(ip_map, 0) | ||
262 | |||
263 | |||
264 | int auth_unix_add_addr(struct in_addr addr, struct auth_domain *dom) | ||
265 | { | ||
266 | struct unix_domain *udom; | ||
267 | struct ip_map ip, *ipmp; | ||
268 | |||
269 | if (dom->flavour != RPC_AUTH_UNIX) | ||
270 | return -EINVAL; | ||
271 | udom = container_of(dom, struct unix_domain, h); | ||
272 | strcpy(ip.m_class, "nfsd"); | ||
273 | ip.m_addr = addr; | ||
274 | ip.m_client = udom; | ||
275 | ip.m_add_change = udom->addr_changes+1; | ||
276 | ip.h.flags = 0; | ||
277 | ip.h.expiry_time = NEVER; | ||
278 | |||
279 | ipmp = ip_map_lookup(&ip, 1); | ||
280 | |||
281 | if (ipmp) { | ||
282 | ip_map_put(&ipmp->h, &ip_map_cache); | ||
283 | return 0; | ||
284 | } else | ||
285 | return -ENOMEM; | ||
286 | } | ||
287 | |||
288 | int auth_unix_forget_old(struct auth_domain *dom) | ||
289 | { | ||
290 | struct unix_domain *udom; | ||
291 | |||
292 | if (dom->flavour != RPC_AUTH_UNIX) | ||
293 | return -EINVAL; | ||
294 | udom = container_of(dom, struct unix_domain, h); | ||
295 | udom->addr_changes++; | ||
296 | return 0; | ||
297 | } | ||
298 | |||
299 | struct auth_domain *auth_unix_lookup(struct in_addr addr) | ||
300 | { | ||
301 | struct ip_map key, *ipm; | ||
302 | struct auth_domain *rv; | ||
303 | |||
304 | strcpy(key.m_class, "nfsd"); | ||
305 | key.m_addr = addr; | ||
306 | |||
307 | ipm = ip_map_lookup(&key, 0); | ||
308 | |||
309 | if (!ipm) | ||
310 | return NULL; | ||
311 | if (cache_check(&ip_map_cache, &ipm->h, NULL)) | ||
312 | return NULL; | ||
313 | |||
314 | if ((ipm->m_client->addr_changes - ipm->m_add_change) >0) { | ||
315 | if (test_and_set_bit(CACHE_NEGATIVE, &ipm->h.flags) == 0) | ||
316 | auth_domain_put(&ipm->m_client->h); | ||
317 | rv = NULL; | ||
318 | } else { | ||
319 | rv = &ipm->m_client->h; | ||
320 | cache_get(&rv->h); | ||
321 | } | ||
322 | ip_map_put(&ipm->h, &ip_map_cache); | ||
323 | return rv; | ||
324 | } | ||
325 | |||
326 | void svcauth_unix_purge(void) | ||
327 | { | ||
328 | cache_purge(&ip_map_cache); | ||
329 | cache_purge(&auth_domain_cache); | ||
330 | } | ||
331 | |||
332 | static int | ||
333 | svcauth_unix_set_client(struct svc_rqst *rqstp) | ||
334 | { | ||
335 | struct ip_map key, *ipm; | ||
336 | |||
337 | rqstp->rq_client = NULL; | ||
338 | if (rqstp->rq_proc == 0) | ||
339 | return SVC_OK; | ||
340 | |||
341 | strcpy(key.m_class, rqstp->rq_server->sv_program->pg_class); | ||
342 | key.m_addr = rqstp->rq_addr.sin_addr; | ||
343 | |||
344 | ipm = ip_map_lookup(&key, 0); | ||
345 | |||
346 | if (ipm == NULL) | ||
347 | return SVC_DENIED; | ||
348 | |||
349 | switch (cache_check(&ip_map_cache, &ipm->h, &rqstp->rq_chandle)) { | ||
350 | default: | ||
351 | BUG(); | ||
352 | case -EAGAIN: | ||
353 | return SVC_DROP; | ||
354 | case -ENOENT: | ||
355 | return SVC_DENIED; | ||
356 | case 0: | ||
357 | rqstp->rq_client = &ipm->m_client->h; | ||
358 | cache_get(&rqstp->rq_client->h); | ||
359 | ip_map_put(&ipm->h, &ip_map_cache); | ||
360 | break; | ||
361 | } | ||
362 | return SVC_OK; | ||
363 | } | ||
364 | |||
365 | static int | ||
366 | svcauth_null_accept(struct svc_rqst *rqstp, u32 *authp) | ||
367 | { | ||
368 | struct kvec *argv = &rqstp->rq_arg.head[0]; | ||
369 | struct kvec *resv = &rqstp->rq_res.head[0]; | ||
370 | struct svc_cred *cred = &rqstp->rq_cred; | ||
371 | |||
372 | cred->cr_group_info = NULL; | ||
373 | rqstp->rq_client = NULL; | ||
374 | |||
375 | if (argv->iov_len < 3*4) | ||
376 | return SVC_GARBAGE; | ||
377 | |||
378 | if (svc_getu32(argv) != 0) { | ||
379 | dprintk("svc: bad null cred\n"); | ||
380 | *authp = rpc_autherr_badcred; | ||
381 | return SVC_DENIED; | ||
382 | } | ||
383 | if (svc_getu32(argv) != RPC_AUTH_NULL || svc_getu32(argv) != 0) { | ||
384 | dprintk("svc: bad null verf\n"); | ||
385 | *authp = rpc_autherr_badverf; | ||
386 | return SVC_DENIED; | ||
387 | } | ||
388 | |||
389 | /* Signal that mapping to nobody uid/gid is required */ | ||
390 | cred->cr_uid = (uid_t) -1; | ||
391 | cred->cr_gid = (gid_t) -1; | ||
392 | cred->cr_group_info = groups_alloc(0); | ||
393 | if (cred->cr_group_info == NULL) | ||
394 | return SVC_DROP; /* kmalloc failure - client must retry */ | ||
395 | |||
396 | /* Put NULL verifier */ | ||
397 | svc_putu32(resv, RPC_AUTH_NULL); | ||
398 | svc_putu32(resv, 0); | ||
399 | |||
400 | return SVC_OK; | ||
401 | } | ||
402 | |||
403 | static int | ||
404 | svcauth_null_release(struct svc_rqst *rqstp) | ||
405 | { | ||
406 | if (rqstp->rq_client) | ||
407 | auth_domain_put(rqstp->rq_client); | ||
408 | rqstp->rq_client = NULL; | ||
409 | if (rqstp->rq_cred.cr_group_info) | ||
410 | put_group_info(rqstp->rq_cred.cr_group_info); | ||
411 | rqstp->rq_cred.cr_group_info = NULL; | ||
412 | |||
413 | return 0; /* don't drop */ | ||
414 | } | ||
415 | |||
416 | |||
417 | struct auth_ops svcauth_null = { | ||
418 | .name = "null", | ||
419 | .owner = THIS_MODULE, | ||
420 | .flavour = RPC_AUTH_NULL, | ||
421 | .accept = svcauth_null_accept, | ||
422 | .release = svcauth_null_release, | ||
423 | .set_client = svcauth_unix_set_client, | ||
424 | }; | ||
425 | |||
426 | |||
427 | static int | ||
428 | svcauth_unix_accept(struct svc_rqst *rqstp, u32 *authp) | ||
429 | { | ||
430 | struct kvec *argv = &rqstp->rq_arg.head[0]; | ||
431 | struct kvec *resv = &rqstp->rq_res.head[0]; | ||
432 | struct svc_cred *cred = &rqstp->rq_cred; | ||
433 | u32 slen, i; | ||
434 | int len = argv->iov_len; | ||
435 | |||
436 | cred->cr_group_info = NULL; | ||
437 | rqstp->rq_client = NULL; | ||
438 | |||
439 | if ((len -= 3*4) < 0) | ||
440 | return SVC_GARBAGE; | ||
441 | |||
442 | svc_getu32(argv); /* length */ | ||
443 | svc_getu32(argv); /* time stamp */ | ||
444 | slen = XDR_QUADLEN(ntohl(svc_getu32(argv))); /* machname length */ | ||
445 | if (slen > 64 || (len -= (slen + 3)*4) < 0) | ||
446 | goto badcred; | ||
447 | argv->iov_base = (void*)((u32*)argv->iov_base + slen); /* skip machname */ | ||
448 | argv->iov_len -= slen*4; | ||
449 | |||
450 | cred->cr_uid = ntohl(svc_getu32(argv)); /* uid */ | ||
451 | cred->cr_gid = ntohl(svc_getu32(argv)); /* gid */ | ||
452 | slen = ntohl(svc_getu32(argv)); /* gids length */ | ||
453 | if (slen > 16 || (len -= (slen + 2)*4) < 0) | ||
454 | goto badcred; | ||
455 | cred->cr_group_info = groups_alloc(slen); | ||
456 | if (cred->cr_group_info == NULL) | ||
457 | return SVC_DROP; | ||
458 | for (i = 0; i < slen; i++) | ||
459 | GROUP_AT(cred->cr_group_info, i) = ntohl(svc_getu32(argv)); | ||
460 | |||
461 | if (svc_getu32(argv) != RPC_AUTH_NULL || svc_getu32(argv) != 0) { | ||
462 | *authp = rpc_autherr_badverf; | ||
463 | return SVC_DENIED; | ||
464 | } | ||
465 | |||
466 | /* Put NULL verifier */ | ||
467 | svc_putu32(resv, RPC_AUTH_NULL); | ||
468 | svc_putu32(resv, 0); | ||
469 | |||
470 | return SVC_OK; | ||
471 | |||
472 | badcred: | ||
473 | *authp = rpc_autherr_badcred; | ||
474 | return SVC_DENIED; | ||
475 | } | ||
476 | |||
477 | static int | ||
478 | svcauth_unix_release(struct svc_rqst *rqstp) | ||
479 | { | ||
480 | /* Verifier (such as it is) is already in place. | ||
481 | */ | ||
482 | if (rqstp->rq_client) | ||
483 | auth_domain_put(rqstp->rq_client); | ||
484 | rqstp->rq_client = NULL; | ||
485 | if (rqstp->rq_cred.cr_group_info) | ||
486 | put_group_info(rqstp->rq_cred.cr_group_info); | ||
487 | rqstp->rq_cred.cr_group_info = NULL; | ||
488 | |||
489 | return 0; | ||
490 | } | ||
491 | |||
492 | |||
493 | struct auth_ops svcauth_unix = { | ||
494 | .name = "unix", | ||
495 | .owner = THIS_MODULE, | ||
496 | .flavour = RPC_AUTH_UNIX, | ||
497 | .accept = svcauth_unix_accept, | ||
498 | .release = svcauth_unix_release, | ||
499 | .domain_release = svcauth_unix_domain_release, | ||
500 | .set_client = svcauth_unix_set_client, | ||
501 | }; | ||
502 | |||
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c new file mode 100644 index 000000000000..05907035bc96 --- /dev/null +++ b/net/sunrpc/svcsock.c | |||
@@ -0,0 +1,1585 @@ | |||
1 | /* | ||
2 | * linux/net/sunrpc/svcsock.c | ||
3 | * | ||
4 | * These are the RPC server socket internals. | ||
5 | * | ||
6 | * The server scheduling algorithm does not always distribute the load | ||
7 | * evenly when servicing a single client. May need to modify the | ||
8 | * svc_sock_enqueue procedure... | ||
9 | * | ||
10 | * TCP support is largely untested and may be a little slow. The problem | ||
11 | * is that we currently do two separate recvfrom's, one for the 4-byte | ||
12 | * record length, and the second for the actual record. This could possibly | ||
13 | * be improved by always reading a minimum size of around 100 bytes and | ||
14 | * tucking any superfluous bytes away in a temporary store. Still, that | ||
15 | * leaves write requests out in the rain. An alternative may be to peek at | ||
16 | * the first skb in the queue, and if it matches the next TCP sequence | ||
17 | * number, to extract the record marker. Yuck. | ||
18 | * | ||
19 | * Copyright (C) 1995, 1996 Olaf Kirch <okir@monad.swb.de> | ||
20 | */ | ||
21 | |||
22 | #include <linux/sched.h> | ||
23 | #include <linux/errno.h> | ||
24 | #include <linux/fcntl.h> | ||
25 | #include <linux/net.h> | ||
26 | #include <linux/in.h> | ||
27 | #include <linux/inet.h> | ||
28 | #include <linux/udp.h> | ||
29 | #include <linux/tcp.h> | ||
30 | #include <linux/unistd.h> | ||
31 | #include <linux/slab.h> | ||
32 | #include <linux/netdevice.h> | ||
33 | #include <linux/skbuff.h> | ||
34 | #include <net/sock.h> | ||
35 | #include <net/checksum.h> | ||
36 | #include <net/ip.h> | ||
37 | #include <net/tcp.h> | ||
38 | #include <asm/uaccess.h> | ||
39 | #include <asm/ioctls.h> | ||
40 | |||
41 | #include <linux/sunrpc/types.h> | ||
42 | #include <linux/sunrpc/xdr.h> | ||
43 | #include <linux/sunrpc/svcsock.h> | ||
44 | #include <linux/sunrpc/stats.h> | ||
45 | |||
46 | /* SMP locking strategy: | ||
47 | * | ||
48 | * svc_serv->sv_lock protects most stuff for that service. | ||
49 | * | ||
50 | * Some flags can be set to certain values at any time | ||
51 | * providing that certain rules are followed: | ||
52 | * | ||
53 | * SK_BUSY can be set to 0 at any time. | ||
54 | * svc_sock_enqueue must be called afterwards | ||
55 | * SK_CONN, SK_DATA, can be set or cleared at any time. | ||
56 | * after a set, svc_sock_enqueue must be called. | ||
57 | * after a clear, the socket must be read/accepted | ||
58 | * if this succeeds, it must be set again. | ||
59 | * SK_CLOSE can set at any time. It is never cleared. | ||
60 | * | ||
61 | */ | ||
62 | |||
63 | #define RPCDBG_FACILITY RPCDBG_SVCSOCK | ||
64 | |||
65 | |||
66 | static struct svc_sock *svc_setup_socket(struct svc_serv *, struct socket *, | ||
67 | int *errp, int pmap_reg); | ||
68 | static void svc_udp_data_ready(struct sock *, int); | ||
69 | static int svc_udp_recvfrom(struct svc_rqst *); | ||
70 | static int svc_udp_sendto(struct svc_rqst *); | ||
71 | |||
72 | static struct svc_deferred_req *svc_deferred_dequeue(struct svc_sock *svsk); | ||
73 | static int svc_deferred_recv(struct svc_rqst *rqstp); | ||
74 | static struct cache_deferred_req *svc_defer(struct cache_req *req); | ||
75 | |||
76 | /* | ||
77 | * Queue up an idle server thread. Must have serv->sv_lock held. | ||
78 | * Note: this is really a stack rather than a queue, so that we only | ||
79 | * use as many different threads as we need, and the rest don't polute | ||
80 | * the cache. | ||
81 | */ | ||
82 | static inline void | ||
83 | svc_serv_enqueue(struct svc_serv *serv, struct svc_rqst *rqstp) | ||
84 | { | ||
85 | list_add(&rqstp->rq_list, &serv->sv_threads); | ||
86 | } | ||
87 | |||
88 | /* | ||
89 | * Dequeue an nfsd thread. Must have serv->sv_lock held. | ||
90 | */ | ||
91 | static inline void | ||
92 | svc_serv_dequeue(struct svc_serv *serv, struct svc_rqst *rqstp) | ||
93 | { | ||
94 | list_del(&rqstp->rq_list); | ||
95 | } | ||
96 | |||
97 | /* | ||
98 | * Release an skbuff after use | ||
99 | */ | ||
100 | static inline void | ||
101 | svc_release_skb(struct svc_rqst *rqstp) | ||
102 | { | ||
103 | struct sk_buff *skb = rqstp->rq_skbuff; | ||
104 | struct svc_deferred_req *dr = rqstp->rq_deferred; | ||
105 | |||
106 | if (skb) { | ||
107 | rqstp->rq_skbuff = NULL; | ||
108 | |||
109 | dprintk("svc: service %p, releasing skb %p\n", rqstp, skb); | ||
110 | skb_free_datagram(rqstp->rq_sock->sk_sk, skb); | ||
111 | } | ||
112 | if (dr) { | ||
113 | rqstp->rq_deferred = NULL; | ||
114 | kfree(dr); | ||
115 | } | ||
116 | } | ||
117 | |||
118 | /* | ||
119 | * Any space to write? | ||
120 | */ | ||
121 | static inline unsigned long | ||
122 | svc_sock_wspace(struct svc_sock *svsk) | ||
123 | { | ||
124 | int wspace; | ||
125 | |||
126 | if (svsk->sk_sock->type == SOCK_STREAM) | ||
127 | wspace = sk_stream_wspace(svsk->sk_sk); | ||
128 | else | ||
129 | wspace = sock_wspace(svsk->sk_sk); | ||
130 | |||
131 | return wspace; | ||
132 | } | ||
133 | |||
134 | /* | ||
135 | * Queue up a socket with data pending. If there are idle nfsd | ||
136 | * processes, wake 'em up. | ||
137 | * | ||
138 | */ | ||
139 | static void | ||
140 | svc_sock_enqueue(struct svc_sock *svsk) | ||
141 | { | ||
142 | struct svc_serv *serv = svsk->sk_server; | ||
143 | struct svc_rqst *rqstp; | ||
144 | |||
145 | if (!(svsk->sk_flags & | ||
146 | ( (1<<SK_CONN)|(1<<SK_DATA)|(1<<SK_CLOSE)|(1<<SK_DEFERRED)) )) | ||
147 | return; | ||
148 | if (test_bit(SK_DEAD, &svsk->sk_flags)) | ||
149 | return; | ||
150 | |||
151 | spin_lock_bh(&serv->sv_lock); | ||
152 | |||
153 | if (!list_empty(&serv->sv_threads) && | ||
154 | !list_empty(&serv->sv_sockets)) | ||
155 | printk(KERN_ERR | ||
156 | "svc_sock_enqueue: threads and sockets both waiting??\n"); | ||
157 | |||
158 | if (test_bit(SK_DEAD, &svsk->sk_flags)) { | ||
159 | /* Don't enqueue dead sockets */ | ||
160 | dprintk("svc: socket %p is dead, not enqueued\n", svsk->sk_sk); | ||
161 | goto out_unlock; | ||
162 | } | ||
163 | |||
164 | if (test_bit(SK_BUSY, &svsk->sk_flags)) { | ||
165 | /* Don't enqueue socket while daemon is receiving */ | ||
166 | dprintk("svc: socket %p busy, not enqueued\n", svsk->sk_sk); | ||
167 | goto out_unlock; | ||
168 | } | ||
169 | |||
170 | set_bit(SOCK_NOSPACE, &svsk->sk_sock->flags); | ||
171 | if (((svsk->sk_reserved + serv->sv_bufsz)*2 | ||
172 | > svc_sock_wspace(svsk)) | ||
173 | && !test_bit(SK_CLOSE, &svsk->sk_flags) | ||
174 | && !test_bit(SK_CONN, &svsk->sk_flags)) { | ||
175 | /* Don't enqueue while not enough space for reply */ | ||
176 | dprintk("svc: socket %p no space, %d*2 > %ld, not enqueued\n", | ||
177 | svsk->sk_sk, svsk->sk_reserved+serv->sv_bufsz, | ||
178 | svc_sock_wspace(svsk)); | ||
179 | goto out_unlock; | ||
180 | } | ||
181 | clear_bit(SOCK_NOSPACE, &svsk->sk_sock->flags); | ||
182 | |||
183 | /* Mark socket as busy. It will remain in this state until the | ||
184 | * server has processed all pending data and put the socket back | ||
185 | * on the idle list. | ||
186 | */ | ||
187 | set_bit(SK_BUSY, &svsk->sk_flags); | ||
188 | |||
189 | if (!list_empty(&serv->sv_threads)) { | ||
190 | rqstp = list_entry(serv->sv_threads.next, | ||
191 | struct svc_rqst, | ||
192 | rq_list); | ||
193 | dprintk("svc: socket %p served by daemon %p\n", | ||
194 | svsk->sk_sk, rqstp); | ||
195 | svc_serv_dequeue(serv, rqstp); | ||
196 | if (rqstp->rq_sock) | ||
197 | printk(KERN_ERR | ||
198 | "svc_sock_enqueue: server %p, rq_sock=%p!\n", | ||
199 | rqstp, rqstp->rq_sock); | ||
200 | rqstp->rq_sock = svsk; | ||
201 | svsk->sk_inuse++; | ||
202 | rqstp->rq_reserved = serv->sv_bufsz; | ||
203 | svsk->sk_reserved += rqstp->rq_reserved; | ||
204 | wake_up(&rqstp->rq_wait); | ||
205 | } else { | ||
206 | dprintk("svc: socket %p put into queue\n", svsk->sk_sk); | ||
207 | list_add_tail(&svsk->sk_ready, &serv->sv_sockets); | ||
208 | } | ||
209 | |||
210 | out_unlock: | ||
211 | spin_unlock_bh(&serv->sv_lock); | ||
212 | } | ||
213 | |||
214 | /* | ||
215 | * Dequeue the first socket. Must be called with the serv->sv_lock held. | ||
216 | */ | ||
217 | static inline struct svc_sock * | ||
218 | svc_sock_dequeue(struct svc_serv *serv) | ||
219 | { | ||
220 | struct svc_sock *svsk; | ||
221 | |||
222 | if (list_empty(&serv->sv_sockets)) | ||
223 | return NULL; | ||
224 | |||
225 | svsk = list_entry(serv->sv_sockets.next, | ||
226 | struct svc_sock, sk_ready); | ||
227 | list_del_init(&svsk->sk_ready); | ||
228 | |||
229 | dprintk("svc: socket %p dequeued, inuse=%d\n", | ||
230 | svsk->sk_sk, svsk->sk_inuse); | ||
231 | |||
232 | return svsk; | ||
233 | } | ||
234 | |||
235 | /* | ||
236 | * Having read something from a socket, check whether it | ||
237 | * needs to be re-enqueued. | ||
238 | * Note: SK_DATA only gets cleared when a read-attempt finds | ||
239 | * no (or insufficient) data. | ||
240 | */ | ||
241 | static inline void | ||
242 | svc_sock_received(struct svc_sock *svsk) | ||
243 | { | ||
244 | clear_bit(SK_BUSY, &svsk->sk_flags); | ||
245 | svc_sock_enqueue(svsk); | ||
246 | } | ||
247 | |||
248 | |||
249 | /** | ||
250 | * svc_reserve - change the space reserved for the reply to a request. | ||
251 | * @rqstp: The request in question | ||
252 | * @space: new max space to reserve | ||
253 | * | ||
254 | * Each request reserves some space on the output queue of the socket | ||
255 | * to make sure the reply fits. This function reduces that reserved | ||
256 | * space to be the amount of space used already, plus @space. | ||
257 | * | ||
258 | */ | ||
259 | void svc_reserve(struct svc_rqst *rqstp, int space) | ||
260 | { | ||
261 | space += rqstp->rq_res.head[0].iov_len; | ||
262 | |||
263 | if (space < rqstp->rq_reserved) { | ||
264 | struct svc_sock *svsk = rqstp->rq_sock; | ||
265 | spin_lock_bh(&svsk->sk_server->sv_lock); | ||
266 | svsk->sk_reserved -= (rqstp->rq_reserved - space); | ||
267 | rqstp->rq_reserved = space; | ||
268 | spin_unlock_bh(&svsk->sk_server->sv_lock); | ||
269 | |||
270 | svc_sock_enqueue(svsk); | ||
271 | } | ||
272 | } | ||
273 | |||
274 | /* | ||
275 | * Release a socket after use. | ||
276 | */ | ||
277 | static inline void | ||
278 | svc_sock_put(struct svc_sock *svsk) | ||
279 | { | ||
280 | struct svc_serv *serv = svsk->sk_server; | ||
281 | |||
282 | spin_lock_bh(&serv->sv_lock); | ||
283 | if (!--(svsk->sk_inuse) && test_bit(SK_DEAD, &svsk->sk_flags)) { | ||
284 | spin_unlock_bh(&serv->sv_lock); | ||
285 | dprintk("svc: releasing dead socket\n"); | ||
286 | sock_release(svsk->sk_sock); | ||
287 | kfree(svsk); | ||
288 | } | ||
289 | else | ||
290 | spin_unlock_bh(&serv->sv_lock); | ||
291 | } | ||
292 | |||
293 | static void | ||
294 | svc_sock_release(struct svc_rqst *rqstp) | ||
295 | { | ||
296 | struct svc_sock *svsk = rqstp->rq_sock; | ||
297 | |||
298 | svc_release_skb(rqstp); | ||
299 | |||
300 | svc_free_allpages(rqstp); | ||
301 | rqstp->rq_res.page_len = 0; | ||
302 | rqstp->rq_res.page_base = 0; | ||
303 | |||
304 | |||
305 | /* Reset response buffer and release | ||
306 | * the reservation. | ||
307 | * But first, check that enough space was reserved | ||
308 | * for the reply, otherwise we have a bug! | ||
309 | */ | ||
310 | if ((rqstp->rq_res.len) > rqstp->rq_reserved) | ||
311 | printk(KERN_ERR "RPC request reserved %d but used %d\n", | ||
312 | rqstp->rq_reserved, | ||
313 | rqstp->rq_res.len); | ||
314 | |||
315 | rqstp->rq_res.head[0].iov_len = 0; | ||
316 | svc_reserve(rqstp, 0); | ||
317 | rqstp->rq_sock = NULL; | ||
318 | |||
319 | svc_sock_put(svsk); | ||
320 | } | ||
321 | |||
322 | /* | ||
323 | * External function to wake up a server waiting for data | ||
324 | */ | ||
325 | void | ||
326 | svc_wake_up(struct svc_serv *serv) | ||
327 | { | ||
328 | struct svc_rqst *rqstp; | ||
329 | |||
330 | spin_lock_bh(&serv->sv_lock); | ||
331 | if (!list_empty(&serv->sv_threads)) { | ||
332 | rqstp = list_entry(serv->sv_threads.next, | ||
333 | struct svc_rqst, | ||
334 | rq_list); | ||
335 | dprintk("svc: daemon %p woken up.\n", rqstp); | ||
336 | /* | ||
337 | svc_serv_dequeue(serv, rqstp); | ||
338 | rqstp->rq_sock = NULL; | ||
339 | */ | ||
340 | wake_up(&rqstp->rq_wait); | ||
341 | } | ||
342 | spin_unlock_bh(&serv->sv_lock); | ||
343 | } | ||
344 | |||
345 | /* | ||
346 | * Generic sendto routine | ||
347 | */ | ||
348 | static int | ||
349 | svc_sendto(struct svc_rqst *rqstp, struct xdr_buf *xdr) | ||
350 | { | ||
351 | struct svc_sock *svsk = rqstp->rq_sock; | ||
352 | struct socket *sock = svsk->sk_sock; | ||
353 | int slen; | ||
354 | char buffer[CMSG_SPACE(sizeof(struct in_pktinfo))]; | ||
355 | struct cmsghdr *cmh = (struct cmsghdr *)buffer; | ||
356 | struct in_pktinfo *pki = (struct in_pktinfo *)CMSG_DATA(cmh); | ||
357 | int len = 0; | ||
358 | int result; | ||
359 | int size; | ||
360 | struct page **ppage = xdr->pages; | ||
361 | size_t base = xdr->page_base; | ||
362 | unsigned int pglen = xdr->page_len; | ||
363 | unsigned int flags = MSG_MORE; | ||
364 | |||
365 | slen = xdr->len; | ||
366 | |||
367 | if (rqstp->rq_prot == IPPROTO_UDP) { | ||
368 | /* set the source and destination */ | ||
369 | struct msghdr msg; | ||
370 | msg.msg_name = &rqstp->rq_addr; | ||
371 | msg.msg_namelen = sizeof(rqstp->rq_addr); | ||
372 | msg.msg_iov = NULL; | ||
373 | msg.msg_iovlen = 0; | ||
374 | msg.msg_flags = MSG_MORE; | ||
375 | |||
376 | msg.msg_control = cmh; | ||
377 | msg.msg_controllen = sizeof(buffer); | ||
378 | cmh->cmsg_len = CMSG_LEN(sizeof(*pki)); | ||
379 | cmh->cmsg_level = SOL_IP; | ||
380 | cmh->cmsg_type = IP_PKTINFO; | ||
381 | pki->ipi_ifindex = 0; | ||
382 | pki->ipi_spec_dst.s_addr = rqstp->rq_daddr; | ||
383 | |||
384 | if (sock_sendmsg(sock, &msg, 0) < 0) | ||
385 | goto out; | ||
386 | } | ||
387 | |||
388 | /* send head */ | ||
389 | if (slen == xdr->head[0].iov_len) | ||
390 | flags = 0; | ||
391 | len = sock->ops->sendpage(sock, rqstp->rq_respages[0], 0, xdr->head[0].iov_len, flags); | ||
392 | if (len != xdr->head[0].iov_len) | ||
393 | goto out; | ||
394 | slen -= xdr->head[0].iov_len; | ||
395 | if (slen == 0) | ||
396 | goto out; | ||
397 | |||
398 | /* send page data */ | ||
399 | size = PAGE_SIZE - base < pglen ? PAGE_SIZE - base : pglen; | ||
400 | while (pglen > 0) { | ||
401 | if (slen == size) | ||
402 | flags = 0; | ||
403 | result = sock->ops->sendpage(sock, *ppage, base, size, flags); | ||
404 | if (result > 0) | ||
405 | len += result; | ||
406 | if (result != size) | ||
407 | goto out; | ||
408 | slen -= size; | ||
409 | pglen -= size; | ||
410 | size = PAGE_SIZE < pglen ? PAGE_SIZE : pglen; | ||
411 | base = 0; | ||
412 | ppage++; | ||
413 | } | ||
414 | /* send tail */ | ||
415 | if (xdr->tail[0].iov_len) { | ||
416 | result = sock->ops->sendpage(sock, rqstp->rq_respages[rqstp->rq_restailpage], | ||
417 | ((unsigned long)xdr->tail[0].iov_base)& (PAGE_SIZE-1), | ||
418 | xdr->tail[0].iov_len, 0); | ||
419 | |||
420 | if (result > 0) | ||
421 | len += result; | ||
422 | } | ||
423 | out: | ||
424 | dprintk("svc: socket %p sendto([%p %Zu... ], %d) = %d (addr %x)\n", | ||
425 | rqstp->rq_sock, xdr->head[0].iov_base, xdr->head[0].iov_len, xdr->len, len, | ||
426 | rqstp->rq_addr.sin_addr.s_addr); | ||
427 | |||
428 | return len; | ||
429 | } | ||
430 | |||
431 | /* | ||
432 | * Check input queue length | ||
433 | */ | ||
434 | static int | ||
435 | svc_recv_available(struct svc_sock *svsk) | ||
436 | { | ||
437 | mm_segment_t oldfs; | ||
438 | struct socket *sock = svsk->sk_sock; | ||
439 | int avail, err; | ||
440 | |||
441 | oldfs = get_fs(); set_fs(KERNEL_DS); | ||
442 | err = sock->ops->ioctl(sock, TIOCINQ, (unsigned long) &avail); | ||
443 | set_fs(oldfs); | ||
444 | |||
445 | return (err >= 0)? avail : err; | ||
446 | } | ||
447 | |||
448 | /* | ||
449 | * Generic recvfrom routine. | ||
450 | */ | ||
451 | static int | ||
452 | svc_recvfrom(struct svc_rqst *rqstp, struct kvec *iov, int nr, int buflen) | ||
453 | { | ||
454 | struct msghdr msg; | ||
455 | struct socket *sock; | ||
456 | int len, alen; | ||
457 | |||
458 | rqstp->rq_addrlen = sizeof(rqstp->rq_addr); | ||
459 | sock = rqstp->rq_sock->sk_sock; | ||
460 | |||
461 | msg.msg_name = &rqstp->rq_addr; | ||
462 | msg.msg_namelen = sizeof(rqstp->rq_addr); | ||
463 | msg.msg_control = NULL; | ||
464 | msg.msg_controllen = 0; | ||
465 | |||
466 | msg.msg_flags = MSG_DONTWAIT; | ||
467 | |||
468 | len = kernel_recvmsg(sock, &msg, iov, nr, buflen, MSG_DONTWAIT); | ||
469 | |||
470 | /* sock_recvmsg doesn't fill in the name/namelen, so we must.. | ||
471 | * possibly we should cache this in the svc_sock structure | ||
472 | * at accept time. FIXME | ||
473 | */ | ||
474 | alen = sizeof(rqstp->rq_addr); | ||
475 | sock->ops->getname(sock, (struct sockaddr *)&rqstp->rq_addr, &alen, 1); | ||
476 | |||
477 | dprintk("svc: socket %p recvfrom(%p, %Zu) = %d\n", | ||
478 | rqstp->rq_sock, iov[0].iov_base, iov[0].iov_len, len); | ||
479 | |||
480 | return len; | ||
481 | } | ||
482 | |||
483 | /* | ||
484 | * Set socket snd and rcv buffer lengths | ||
485 | */ | ||
486 | static inline void | ||
487 | svc_sock_setbufsize(struct socket *sock, unsigned int snd, unsigned int rcv) | ||
488 | { | ||
489 | #if 0 | ||
490 | mm_segment_t oldfs; | ||
491 | oldfs = get_fs(); set_fs(KERNEL_DS); | ||
492 | sock_setsockopt(sock, SOL_SOCKET, SO_SNDBUF, | ||
493 | (char*)&snd, sizeof(snd)); | ||
494 | sock_setsockopt(sock, SOL_SOCKET, SO_RCVBUF, | ||
495 | (char*)&rcv, sizeof(rcv)); | ||
496 | #else | ||
497 | /* sock_setsockopt limits use to sysctl_?mem_max, | ||
498 | * which isn't acceptable. Until that is made conditional | ||
499 | * on not having CAP_SYS_RESOURCE or similar, we go direct... | ||
500 | * DaveM said I could! | ||
501 | */ | ||
502 | lock_sock(sock->sk); | ||
503 | sock->sk->sk_sndbuf = snd * 2; | ||
504 | sock->sk->sk_rcvbuf = rcv * 2; | ||
505 | sock->sk->sk_userlocks |= SOCK_SNDBUF_LOCK|SOCK_RCVBUF_LOCK; | ||
506 | release_sock(sock->sk); | ||
507 | #endif | ||
508 | } | ||
509 | /* | ||
510 | * INET callback when data has been received on the socket. | ||
511 | */ | ||
512 | static void | ||
513 | svc_udp_data_ready(struct sock *sk, int count) | ||
514 | { | ||
515 | struct svc_sock *svsk = (struct svc_sock *)(sk->sk_user_data); | ||
516 | |||
517 | if (!svsk) | ||
518 | goto out; | ||
519 | dprintk("svc: socket %p(inet %p), count=%d, busy=%d\n", | ||
520 | svsk, sk, count, test_bit(SK_BUSY, &svsk->sk_flags)); | ||
521 | set_bit(SK_DATA, &svsk->sk_flags); | ||
522 | svc_sock_enqueue(svsk); | ||
523 | out: | ||
524 | if (sk->sk_sleep && waitqueue_active(sk->sk_sleep)) | ||
525 | wake_up_interruptible(sk->sk_sleep); | ||
526 | } | ||
527 | |||
528 | /* | ||
529 | * INET callback when space is newly available on the socket. | ||
530 | */ | ||
531 | static void | ||
532 | svc_write_space(struct sock *sk) | ||
533 | { | ||
534 | struct svc_sock *svsk = (struct svc_sock *)(sk->sk_user_data); | ||
535 | |||
536 | if (svsk) { | ||
537 | dprintk("svc: socket %p(inet %p), write_space busy=%d\n", | ||
538 | svsk, sk, test_bit(SK_BUSY, &svsk->sk_flags)); | ||
539 | svc_sock_enqueue(svsk); | ||
540 | } | ||
541 | |||
542 | if (sk->sk_sleep && waitqueue_active(sk->sk_sleep)) { | ||
543 | printk(KERN_WARNING "RPC svc_write_space: some sleeping on %p\n", | ||
544 | svsk); | ||
545 | wake_up_interruptible(sk->sk_sleep); | ||
546 | } | ||
547 | } | ||
548 | |||
549 | /* | ||
550 | * Receive a datagram from a UDP socket. | ||
551 | */ | ||
552 | extern int | ||
553 | csum_partial_copy_to_xdr(struct xdr_buf *xdr, struct sk_buff *skb); | ||
554 | |||
555 | static int | ||
556 | svc_udp_recvfrom(struct svc_rqst *rqstp) | ||
557 | { | ||
558 | struct svc_sock *svsk = rqstp->rq_sock; | ||
559 | struct svc_serv *serv = svsk->sk_server; | ||
560 | struct sk_buff *skb; | ||
561 | int err, len; | ||
562 | |||
563 | if (test_and_clear_bit(SK_CHNGBUF, &svsk->sk_flags)) | ||
564 | /* udp sockets need large rcvbuf as all pending | ||
565 | * requests are still in that buffer. sndbuf must | ||
566 | * also be large enough that there is enough space | ||
567 | * for one reply per thread. | ||
568 | */ | ||
569 | svc_sock_setbufsize(svsk->sk_sock, | ||
570 | (serv->sv_nrthreads+3) * serv->sv_bufsz, | ||
571 | (serv->sv_nrthreads+3) * serv->sv_bufsz); | ||
572 | |||
573 | if ((rqstp->rq_deferred = svc_deferred_dequeue(svsk))) { | ||
574 | svc_sock_received(svsk); | ||
575 | return svc_deferred_recv(rqstp); | ||
576 | } | ||
577 | |||
578 | clear_bit(SK_DATA, &svsk->sk_flags); | ||
579 | while ((skb = skb_recv_datagram(svsk->sk_sk, 0, 1, &err)) == NULL) { | ||
580 | if (err == -EAGAIN) { | ||
581 | svc_sock_received(svsk); | ||
582 | return err; | ||
583 | } | ||
584 | /* possibly an icmp error */ | ||
585 | dprintk("svc: recvfrom returned error %d\n", -err); | ||
586 | } | ||
587 | if (skb->stamp.tv_sec == 0) { | ||
588 | skb->stamp.tv_sec = xtime.tv_sec; | ||
589 | skb->stamp.tv_usec = xtime.tv_nsec * 1000; | ||
590 | /* Don't enable netstamp, sunrpc doesn't | ||
591 | need that much accuracy */ | ||
592 | } | ||
593 | svsk->sk_sk->sk_stamp = skb->stamp; | ||
594 | set_bit(SK_DATA, &svsk->sk_flags); /* there may be more data... */ | ||
595 | |||
596 | /* | ||
597 | * Maybe more packets - kick another thread ASAP. | ||
598 | */ | ||
599 | svc_sock_received(svsk); | ||
600 | |||
601 | len = skb->len - sizeof(struct udphdr); | ||
602 | rqstp->rq_arg.len = len; | ||
603 | |||
604 | rqstp->rq_prot = IPPROTO_UDP; | ||
605 | |||
606 | /* Get sender address */ | ||
607 | rqstp->rq_addr.sin_family = AF_INET; | ||
608 | rqstp->rq_addr.sin_port = skb->h.uh->source; | ||
609 | rqstp->rq_addr.sin_addr.s_addr = skb->nh.iph->saddr; | ||
610 | rqstp->rq_daddr = skb->nh.iph->daddr; | ||
611 | |||
612 | if (skb_is_nonlinear(skb)) { | ||
613 | /* we have to copy */ | ||
614 | local_bh_disable(); | ||
615 | if (csum_partial_copy_to_xdr(&rqstp->rq_arg, skb)) { | ||
616 | local_bh_enable(); | ||
617 | /* checksum error */ | ||
618 | skb_free_datagram(svsk->sk_sk, skb); | ||
619 | return 0; | ||
620 | } | ||
621 | local_bh_enable(); | ||
622 | skb_free_datagram(svsk->sk_sk, skb); | ||
623 | } else { | ||
624 | /* we can use it in-place */ | ||
625 | rqstp->rq_arg.head[0].iov_base = skb->data + sizeof(struct udphdr); | ||
626 | rqstp->rq_arg.head[0].iov_len = len; | ||
627 | if (skb->ip_summed != CHECKSUM_UNNECESSARY) { | ||
628 | if ((unsigned short)csum_fold(skb_checksum(skb, 0, skb->len, skb->csum))) { | ||
629 | skb_free_datagram(svsk->sk_sk, skb); | ||
630 | return 0; | ||
631 | } | ||
632 | skb->ip_summed = CHECKSUM_UNNECESSARY; | ||
633 | } | ||
634 | rqstp->rq_skbuff = skb; | ||
635 | } | ||
636 | |||
637 | rqstp->rq_arg.page_base = 0; | ||
638 | if (len <= rqstp->rq_arg.head[0].iov_len) { | ||
639 | rqstp->rq_arg.head[0].iov_len = len; | ||
640 | rqstp->rq_arg.page_len = 0; | ||
641 | } else { | ||
642 | rqstp->rq_arg.page_len = len - rqstp->rq_arg.head[0].iov_len; | ||
643 | rqstp->rq_argused += (rqstp->rq_arg.page_len + PAGE_SIZE - 1)/ PAGE_SIZE; | ||
644 | } | ||
645 | |||
646 | if (serv->sv_stats) | ||
647 | serv->sv_stats->netudpcnt++; | ||
648 | |||
649 | return len; | ||
650 | } | ||
651 | |||
652 | static int | ||
653 | svc_udp_sendto(struct svc_rqst *rqstp) | ||
654 | { | ||
655 | int error; | ||
656 | |||
657 | error = svc_sendto(rqstp, &rqstp->rq_res); | ||
658 | if (error == -ECONNREFUSED) | ||
659 | /* ICMP error on earlier request. */ | ||
660 | error = svc_sendto(rqstp, &rqstp->rq_res); | ||
661 | |||
662 | return error; | ||
663 | } | ||
664 | |||
665 | static void | ||
666 | svc_udp_init(struct svc_sock *svsk) | ||
667 | { | ||
668 | svsk->sk_sk->sk_data_ready = svc_udp_data_ready; | ||
669 | svsk->sk_sk->sk_write_space = svc_write_space; | ||
670 | svsk->sk_recvfrom = svc_udp_recvfrom; | ||
671 | svsk->sk_sendto = svc_udp_sendto; | ||
672 | |||
673 | /* initialise setting must have enough space to | ||
674 | * receive and respond to one request. | ||
675 | * svc_udp_recvfrom will re-adjust if necessary | ||
676 | */ | ||
677 | svc_sock_setbufsize(svsk->sk_sock, | ||
678 | 3 * svsk->sk_server->sv_bufsz, | ||
679 | 3 * svsk->sk_server->sv_bufsz); | ||
680 | |||
681 | set_bit(SK_DATA, &svsk->sk_flags); /* might have come in before data_ready set up */ | ||
682 | set_bit(SK_CHNGBUF, &svsk->sk_flags); | ||
683 | } | ||
684 | |||
685 | /* | ||
686 | * A data_ready event on a listening socket means there's a connection | ||
687 | * pending. Do not use state_change as a substitute for it. | ||
688 | */ | ||
689 | static void | ||
690 | svc_tcp_listen_data_ready(struct sock *sk, int count_unused) | ||
691 | { | ||
692 | struct svc_sock *svsk; | ||
693 | |||
694 | dprintk("svc: socket %p TCP (listen) state change %d\n", | ||
695 | sk, sk->sk_state); | ||
696 | |||
697 | if (sk->sk_state != TCP_LISTEN) { | ||
698 | /* | ||
699 | * This callback may called twice when a new connection | ||
700 | * is established as a child socket inherits everything | ||
701 | * from a parent LISTEN socket. | ||
702 | * 1) data_ready method of the parent socket will be called | ||
703 | * when one of child sockets become ESTABLISHED. | ||
704 | * 2) data_ready method of the child socket may be called | ||
705 | * when it receives data before the socket is accepted. | ||
706 | * In case of 2, we should ignore it silently. | ||
707 | */ | ||
708 | goto out; | ||
709 | } | ||
710 | if (!(svsk = (struct svc_sock *) sk->sk_user_data)) { | ||
711 | printk("svc: socket %p: no user data\n", sk); | ||
712 | goto out; | ||
713 | } | ||
714 | set_bit(SK_CONN, &svsk->sk_flags); | ||
715 | svc_sock_enqueue(svsk); | ||
716 | out: | ||
717 | if (sk->sk_sleep && waitqueue_active(sk->sk_sleep)) | ||
718 | wake_up_interruptible_all(sk->sk_sleep); | ||
719 | } | ||
720 | |||
721 | /* | ||
722 | * A state change on a connected socket means it's dying or dead. | ||
723 | */ | ||
724 | static void | ||
725 | svc_tcp_state_change(struct sock *sk) | ||
726 | { | ||
727 | struct svc_sock *svsk; | ||
728 | |||
729 | dprintk("svc: socket %p TCP (connected) state change %d (svsk %p)\n", | ||
730 | sk, sk->sk_state, sk->sk_user_data); | ||
731 | |||
732 | if (!(svsk = (struct svc_sock *) sk->sk_user_data)) { | ||
733 | printk("svc: socket %p: no user data\n", sk); | ||
734 | goto out; | ||
735 | } | ||
736 | set_bit(SK_CLOSE, &svsk->sk_flags); | ||
737 | svc_sock_enqueue(svsk); | ||
738 | out: | ||
739 | if (sk->sk_sleep && waitqueue_active(sk->sk_sleep)) | ||
740 | wake_up_interruptible_all(sk->sk_sleep); | ||
741 | } | ||
742 | |||
743 | static void | ||
744 | svc_tcp_data_ready(struct sock *sk, int count) | ||
745 | { | ||
746 | struct svc_sock * svsk; | ||
747 | |||
748 | dprintk("svc: socket %p TCP data ready (svsk %p)\n", | ||
749 | sk, sk->sk_user_data); | ||
750 | if (!(svsk = (struct svc_sock *)(sk->sk_user_data))) | ||
751 | goto out; | ||
752 | set_bit(SK_DATA, &svsk->sk_flags); | ||
753 | svc_sock_enqueue(svsk); | ||
754 | out: | ||
755 | if (sk->sk_sleep && waitqueue_active(sk->sk_sleep)) | ||
756 | wake_up_interruptible(sk->sk_sleep); | ||
757 | } | ||
758 | |||
759 | /* | ||
760 | * Accept a TCP connection | ||
761 | */ | ||
762 | static void | ||
763 | svc_tcp_accept(struct svc_sock *svsk) | ||
764 | { | ||
765 | struct sockaddr_in sin; | ||
766 | struct svc_serv *serv = svsk->sk_server; | ||
767 | struct socket *sock = svsk->sk_sock; | ||
768 | struct socket *newsock; | ||
769 | struct proto_ops *ops; | ||
770 | struct svc_sock *newsvsk; | ||
771 | int err, slen; | ||
772 | |||
773 | dprintk("svc: tcp_accept %p sock %p\n", svsk, sock); | ||
774 | if (!sock) | ||
775 | return; | ||
776 | |||
777 | err = sock_create_lite(PF_INET, SOCK_STREAM, IPPROTO_TCP, &newsock); | ||
778 | if (err) { | ||
779 | if (err == -ENOMEM) | ||
780 | printk(KERN_WARNING "%s: no more sockets!\n", | ||
781 | serv->sv_name); | ||
782 | return; | ||
783 | } | ||
784 | |||
785 | dprintk("svc: tcp_accept %p allocated\n", newsock); | ||
786 | newsock->ops = ops = sock->ops; | ||
787 | |||
788 | clear_bit(SK_CONN, &svsk->sk_flags); | ||
789 | if ((err = ops->accept(sock, newsock, O_NONBLOCK)) < 0) { | ||
790 | if (err != -EAGAIN && net_ratelimit()) | ||
791 | printk(KERN_WARNING "%s: accept failed (err %d)!\n", | ||
792 | serv->sv_name, -err); | ||
793 | goto failed; /* aborted connection or whatever */ | ||
794 | } | ||
795 | set_bit(SK_CONN, &svsk->sk_flags); | ||
796 | svc_sock_enqueue(svsk); | ||
797 | |||
798 | slen = sizeof(sin); | ||
799 | err = ops->getname(newsock, (struct sockaddr *) &sin, &slen, 1); | ||
800 | if (err < 0) { | ||
801 | if (net_ratelimit()) | ||
802 | printk(KERN_WARNING "%s: peername failed (err %d)!\n", | ||
803 | serv->sv_name, -err); | ||
804 | goto failed; /* aborted connection or whatever */ | ||
805 | } | ||
806 | |||
807 | /* Ideally, we would want to reject connections from unauthorized | ||
808 | * hosts here, but when we get encription, the IP of the host won't | ||
809 | * tell us anything. For now just warn about unpriv connections. | ||
810 | */ | ||
811 | if (ntohs(sin.sin_port) >= 1024) { | ||
812 | dprintk(KERN_WARNING | ||
813 | "%s: connect from unprivileged port: %u.%u.%u.%u:%d\n", | ||
814 | serv->sv_name, | ||
815 | NIPQUAD(sin.sin_addr.s_addr), ntohs(sin.sin_port)); | ||
816 | } | ||
817 | |||
818 | dprintk("%s: connect from %u.%u.%u.%u:%04x\n", serv->sv_name, | ||
819 | NIPQUAD(sin.sin_addr.s_addr), ntohs(sin.sin_port)); | ||
820 | |||
821 | /* make sure that a write doesn't block forever when | ||
822 | * low on memory | ||
823 | */ | ||
824 | newsock->sk->sk_sndtimeo = HZ*30; | ||
825 | |||
826 | if (!(newsvsk = svc_setup_socket(serv, newsock, &err, 0))) | ||
827 | goto failed; | ||
828 | |||
829 | |||
830 | /* make sure that we don't have too many active connections. | ||
831 | * If we have, something must be dropped. | ||
832 | * | ||
833 | * There's no point in trying to do random drop here for | ||
834 | * DoS prevention. The NFS clients does 1 reconnect in 15 | ||
835 | * seconds. An attacker can easily beat that. | ||
836 | * | ||
837 | * The only somewhat efficient mechanism would be if drop | ||
838 | * old connections from the same IP first. But right now | ||
839 | * we don't even record the client IP in svc_sock. | ||
840 | */ | ||
841 | if (serv->sv_tmpcnt > (serv->sv_nrthreads+3)*20) { | ||
842 | struct svc_sock *svsk = NULL; | ||
843 | spin_lock_bh(&serv->sv_lock); | ||
844 | if (!list_empty(&serv->sv_tempsocks)) { | ||
845 | if (net_ratelimit()) { | ||
846 | /* Try to help the admin */ | ||
847 | printk(KERN_NOTICE "%s: too many open TCP " | ||
848 | "sockets, consider increasing the " | ||
849 | "number of nfsd threads\n", | ||
850 | serv->sv_name); | ||
851 | printk(KERN_NOTICE "%s: last TCP connect from " | ||
852 | "%u.%u.%u.%u:%d\n", | ||
853 | serv->sv_name, | ||
854 | NIPQUAD(sin.sin_addr.s_addr), | ||
855 | ntohs(sin.sin_port)); | ||
856 | } | ||
857 | /* | ||
858 | * Always select the oldest socket. It's not fair, | ||
859 | * but so is life | ||
860 | */ | ||
861 | svsk = list_entry(serv->sv_tempsocks.prev, | ||
862 | struct svc_sock, | ||
863 | sk_list); | ||
864 | set_bit(SK_CLOSE, &svsk->sk_flags); | ||
865 | svsk->sk_inuse ++; | ||
866 | } | ||
867 | spin_unlock_bh(&serv->sv_lock); | ||
868 | |||
869 | if (svsk) { | ||
870 | svc_sock_enqueue(svsk); | ||
871 | svc_sock_put(svsk); | ||
872 | } | ||
873 | |||
874 | } | ||
875 | |||
876 | if (serv->sv_stats) | ||
877 | serv->sv_stats->nettcpconn++; | ||
878 | |||
879 | return; | ||
880 | |||
881 | failed: | ||
882 | sock_release(newsock); | ||
883 | return; | ||
884 | } | ||
885 | |||
886 | /* | ||
887 | * Receive data from a TCP socket. | ||
888 | */ | ||
889 | static int | ||
890 | svc_tcp_recvfrom(struct svc_rqst *rqstp) | ||
891 | { | ||
892 | struct svc_sock *svsk = rqstp->rq_sock; | ||
893 | struct svc_serv *serv = svsk->sk_server; | ||
894 | int len; | ||
895 | struct kvec vec[RPCSVC_MAXPAGES]; | ||
896 | int pnum, vlen; | ||
897 | |||
898 | dprintk("svc: tcp_recv %p data %d conn %d close %d\n", | ||
899 | svsk, test_bit(SK_DATA, &svsk->sk_flags), | ||
900 | test_bit(SK_CONN, &svsk->sk_flags), | ||
901 | test_bit(SK_CLOSE, &svsk->sk_flags)); | ||
902 | |||
903 | if ((rqstp->rq_deferred = svc_deferred_dequeue(svsk))) { | ||
904 | svc_sock_received(svsk); | ||
905 | return svc_deferred_recv(rqstp); | ||
906 | } | ||
907 | |||
908 | if (test_bit(SK_CLOSE, &svsk->sk_flags)) { | ||
909 | svc_delete_socket(svsk); | ||
910 | return 0; | ||
911 | } | ||
912 | |||
913 | if (test_bit(SK_CONN, &svsk->sk_flags)) { | ||
914 | svc_tcp_accept(svsk); | ||
915 | svc_sock_received(svsk); | ||
916 | return 0; | ||
917 | } | ||
918 | |||
919 | if (test_and_clear_bit(SK_CHNGBUF, &svsk->sk_flags)) | ||
920 | /* sndbuf needs to have room for one request | ||
921 | * per thread, otherwise we can stall even when the | ||
922 | * network isn't a bottleneck. | ||
923 | * rcvbuf just needs to be able to hold a few requests. | ||
924 | * Normally they will be removed from the queue | ||
925 | * as soon a a complete request arrives. | ||
926 | */ | ||
927 | svc_sock_setbufsize(svsk->sk_sock, | ||
928 | (serv->sv_nrthreads+3) * serv->sv_bufsz, | ||
929 | 3 * serv->sv_bufsz); | ||
930 | |||
931 | clear_bit(SK_DATA, &svsk->sk_flags); | ||
932 | |||
933 | /* Receive data. If we haven't got the record length yet, get | ||
934 | * the next four bytes. Otherwise try to gobble up as much as | ||
935 | * possible up to the complete record length. | ||
936 | */ | ||
937 | if (svsk->sk_tcplen < 4) { | ||
938 | unsigned long want = 4 - svsk->sk_tcplen; | ||
939 | struct kvec iov; | ||
940 | |||
941 | iov.iov_base = ((char *) &svsk->sk_reclen) + svsk->sk_tcplen; | ||
942 | iov.iov_len = want; | ||
943 | if ((len = svc_recvfrom(rqstp, &iov, 1, want)) < 0) | ||
944 | goto error; | ||
945 | svsk->sk_tcplen += len; | ||
946 | |||
947 | if (len < want) { | ||
948 | dprintk("svc: short recvfrom while reading record length (%d of %lu)\n", | ||
949 | len, want); | ||
950 | svc_sock_received(svsk); | ||
951 | return -EAGAIN; /* record header not complete */ | ||
952 | } | ||
953 | |||
954 | svsk->sk_reclen = ntohl(svsk->sk_reclen); | ||
955 | if (!(svsk->sk_reclen & 0x80000000)) { | ||
956 | /* FIXME: technically, a record can be fragmented, | ||
957 | * and non-terminal fragments will not have the top | ||
958 | * bit set in the fragment length header. | ||
959 | * But apparently no known nfs clients send fragmented | ||
960 | * records. */ | ||
961 | printk(KERN_NOTICE "RPC: bad TCP reclen 0x%08lx (non-terminal)\n", | ||
962 | (unsigned long) svsk->sk_reclen); | ||
963 | goto err_delete; | ||
964 | } | ||
965 | svsk->sk_reclen &= 0x7fffffff; | ||
966 | dprintk("svc: TCP record, %d bytes\n", svsk->sk_reclen); | ||
967 | if (svsk->sk_reclen > serv->sv_bufsz) { | ||
968 | printk(KERN_NOTICE "RPC: bad TCP reclen 0x%08lx (large)\n", | ||
969 | (unsigned long) svsk->sk_reclen); | ||
970 | goto err_delete; | ||
971 | } | ||
972 | } | ||
973 | |||
974 | /* Check whether enough data is available */ | ||
975 | len = svc_recv_available(svsk); | ||
976 | if (len < 0) | ||
977 | goto error; | ||
978 | |||
979 | if (len < svsk->sk_reclen) { | ||
980 | dprintk("svc: incomplete TCP record (%d of %d)\n", | ||
981 | len, svsk->sk_reclen); | ||
982 | svc_sock_received(svsk); | ||
983 | return -EAGAIN; /* record not complete */ | ||
984 | } | ||
985 | len = svsk->sk_reclen; | ||
986 | set_bit(SK_DATA, &svsk->sk_flags); | ||
987 | |||
988 | vec[0] = rqstp->rq_arg.head[0]; | ||
989 | vlen = PAGE_SIZE; | ||
990 | pnum = 1; | ||
991 | while (vlen < len) { | ||
992 | vec[pnum].iov_base = page_address(rqstp->rq_argpages[rqstp->rq_argused++]); | ||
993 | vec[pnum].iov_len = PAGE_SIZE; | ||
994 | pnum++; | ||
995 | vlen += PAGE_SIZE; | ||
996 | } | ||
997 | |||
998 | /* Now receive data */ | ||
999 | len = svc_recvfrom(rqstp, vec, pnum, len); | ||
1000 | if (len < 0) | ||
1001 | goto error; | ||
1002 | |||
1003 | dprintk("svc: TCP complete record (%d bytes)\n", len); | ||
1004 | rqstp->rq_arg.len = len; | ||
1005 | rqstp->rq_arg.page_base = 0; | ||
1006 | if (len <= rqstp->rq_arg.head[0].iov_len) { | ||
1007 | rqstp->rq_arg.head[0].iov_len = len; | ||
1008 | rqstp->rq_arg.page_len = 0; | ||
1009 | } else { | ||
1010 | rqstp->rq_arg.page_len = len - rqstp->rq_arg.head[0].iov_len; | ||
1011 | } | ||
1012 | |||
1013 | rqstp->rq_skbuff = NULL; | ||
1014 | rqstp->rq_prot = IPPROTO_TCP; | ||
1015 | |||
1016 | /* Reset TCP read info */ | ||
1017 | svsk->sk_reclen = 0; | ||
1018 | svsk->sk_tcplen = 0; | ||
1019 | |||
1020 | svc_sock_received(svsk); | ||
1021 | if (serv->sv_stats) | ||
1022 | serv->sv_stats->nettcpcnt++; | ||
1023 | |||
1024 | return len; | ||
1025 | |||
1026 | err_delete: | ||
1027 | svc_delete_socket(svsk); | ||
1028 | return -EAGAIN; | ||
1029 | |||
1030 | error: | ||
1031 | if (len == -EAGAIN) { | ||
1032 | dprintk("RPC: TCP recvfrom got EAGAIN\n"); | ||
1033 | svc_sock_received(svsk); | ||
1034 | } else { | ||
1035 | printk(KERN_NOTICE "%s: recvfrom returned errno %d\n", | ||
1036 | svsk->sk_server->sv_name, -len); | ||
1037 | svc_sock_received(svsk); | ||
1038 | } | ||
1039 | |||
1040 | return len; | ||
1041 | } | ||
1042 | |||
1043 | /* | ||
1044 | * Send out data on TCP socket. | ||
1045 | */ | ||
1046 | static int | ||
1047 | svc_tcp_sendto(struct svc_rqst *rqstp) | ||
1048 | { | ||
1049 | struct xdr_buf *xbufp = &rqstp->rq_res; | ||
1050 | int sent; | ||
1051 | u32 reclen; | ||
1052 | |||
1053 | /* Set up the first element of the reply kvec. | ||
1054 | * Any other kvecs that may be in use have been taken | ||
1055 | * care of by the server implementation itself. | ||
1056 | */ | ||
1057 | reclen = htonl(0x80000000|((xbufp->len ) - 4)); | ||
1058 | memcpy(xbufp->head[0].iov_base, &reclen, 4); | ||
1059 | |||
1060 | if (test_bit(SK_DEAD, &rqstp->rq_sock->sk_flags)) | ||
1061 | return -ENOTCONN; | ||
1062 | |||
1063 | sent = svc_sendto(rqstp, &rqstp->rq_res); | ||
1064 | if (sent != xbufp->len) { | ||
1065 | printk(KERN_NOTICE "rpc-srv/tcp: %s: %s %d when sending %d bytes - shutting down socket\n", | ||
1066 | rqstp->rq_sock->sk_server->sv_name, | ||
1067 | (sent<0)?"got error":"sent only", | ||
1068 | sent, xbufp->len); | ||
1069 | svc_delete_socket(rqstp->rq_sock); | ||
1070 | sent = -EAGAIN; | ||
1071 | } | ||
1072 | return sent; | ||
1073 | } | ||
1074 | |||
1075 | static void | ||
1076 | svc_tcp_init(struct svc_sock *svsk) | ||
1077 | { | ||
1078 | struct sock *sk = svsk->sk_sk; | ||
1079 | struct tcp_sock *tp = tcp_sk(sk); | ||
1080 | |||
1081 | svsk->sk_recvfrom = svc_tcp_recvfrom; | ||
1082 | svsk->sk_sendto = svc_tcp_sendto; | ||
1083 | |||
1084 | if (sk->sk_state == TCP_LISTEN) { | ||
1085 | dprintk("setting up TCP socket for listening\n"); | ||
1086 | sk->sk_data_ready = svc_tcp_listen_data_ready; | ||
1087 | set_bit(SK_CONN, &svsk->sk_flags); | ||
1088 | } else { | ||
1089 | dprintk("setting up TCP socket for reading\n"); | ||
1090 | sk->sk_state_change = svc_tcp_state_change; | ||
1091 | sk->sk_data_ready = svc_tcp_data_ready; | ||
1092 | sk->sk_write_space = svc_write_space; | ||
1093 | |||
1094 | svsk->sk_reclen = 0; | ||
1095 | svsk->sk_tcplen = 0; | ||
1096 | |||
1097 | tp->nonagle = 1; /* disable Nagle's algorithm */ | ||
1098 | |||
1099 | /* initialise setting must have enough space to | ||
1100 | * receive and respond to one request. | ||
1101 | * svc_tcp_recvfrom will re-adjust if necessary | ||
1102 | */ | ||
1103 | svc_sock_setbufsize(svsk->sk_sock, | ||
1104 | 3 * svsk->sk_server->sv_bufsz, | ||
1105 | 3 * svsk->sk_server->sv_bufsz); | ||
1106 | |||
1107 | set_bit(SK_CHNGBUF, &svsk->sk_flags); | ||
1108 | set_bit(SK_DATA, &svsk->sk_flags); | ||
1109 | if (sk->sk_state != TCP_ESTABLISHED) | ||
1110 | set_bit(SK_CLOSE, &svsk->sk_flags); | ||
1111 | } | ||
1112 | } | ||
1113 | |||
1114 | void | ||
1115 | svc_sock_update_bufs(struct svc_serv *serv) | ||
1116 | { | ||
1117 | /* | ||
1118 | * The number of server threads has changed. Update | ||
1119 | * rcvbuf and sndbuf accordingly on all sockets | ||
1120 | */ | ||
1121 | struct list_head *le; | ||
1122 | |||
1123 | spin_lock_bh(&serv->sv_lock); | ||
1124 | list_for_each(le, &serv->sv_permsocks) { | ||
1125 | struct svc_sock *svsk = | ||
1126 | list_entry(le, struct svc_sock, sk_list); | ||
1127 | set_bit(SK_CHNGBUF, &svsk->sk_flags); | ||
1128 | } | ||
1129 | list_for_each(le, &serv->sv_tempsocks) { | ||
1130 | struct svc_sock *svsk = | ||
1131 | list_entry(le, struct svc_sock, sk_list); | ||
1132 | set_bit(SK_CHNGBUF, &svsk->sk_flags); | ||
1133 | } | ||
1134 | spin_unlock_bh(&serv->sv_lock); | ||
1135 | } | ||
1136 | |||
1137 | /* | ||
1138 | * Receive the next request on any socket. | ||
1139 | */ | ||
1140 | int | ||
1141 | svc_recv(struct svc_serv *serv, struct svc_rqst *rqstp, long timeout) | ||
1142 | { | ||
1143 | struct svc_sock *svsk =NULL; | ||
1144 | int len; | ||
1145 | int pages; | ||
1146 | struct xdr_buf *arg; | ||
1147 | DECLARE_WAITQUEUE(wait, current); | ||
1148 | |||
1149 | dprintk("svc: server %p waiting for data (to = %ld)\n", | ||
1150 | rqstp, timeout); | ||
1151 | |||
1152 | if (rqstp->rq_sock) | ||
1153 | printk(KERN_ERR | ||
1154 | "svc_recv: service %p, socket not NULL!\n", | ||
1155 | rqstp); | ||
1156 | if (waitqueue_active(&rqstp->rq_wait)) | ||
1157 | printk(KERN_ERR | ||
1158 | "svc_recv: service %p, wait queue active!\n", | ||
1159 | rqstp); | ||
1160 | |||
1161 | /* Initialize the buffers */ | ||
1162 | /* first reclaim pages that were moved to response list */ | ||
1163 | svc_pushback_allpages(rqstp); | ||
1164 | |||
1165 | /* now allocate needed pages. If we get a failure, sleep briefly */ | ||
1166 | pages = 2 + (serv->sv_bufsz + PAGE_SIZE -1) / PAGE_SIZE; | ||
1167 | while (rqstp->rq_arghi < pages) { | ||
1168 | struct page *p = alloc_page(GFP_KERNEL); | ||
1169 | if (!p) { | ||
1170 | set_current_state(TASK_UNINTERRUPTIBLE); | ||
1171 | schedule_timeout(HZ/2); | ||
1172 | continue; | ||
1173 | } | ||
1174 | rqstp->rq_argpages[rqstp->rq_arghi++] = p; | ||
1175 | } | ||
1176 | |||
1177 | /* Make arg->head point to first page and arg->pages point to rest */ | ||
1178 | arg = &rqstp->rq_arg; | ||
1179 | arg->head[0].iov_base = page_address(rqstp->rq_argpages[0]); | ||
1180 | arg->head[0].iov_len = PAGE_SIZE; | ||
1181 | rqstp->rq_argused = 1; | ||
1182 | arg->pages = rqstp->rq_argpages + 1; | ||
1183 | arg->page_base = 0; | ||
1184 | /* save at least one page for response */ | ||
1185 | arg->page_len = (pages-2)*PAGE_SIZE; | ||
1186 | arg->len = (pages-1)*PAGE_SIZE; | ||
1187 | arg->tail[0].iov_len = 0; | ||
1188 | |||
1189 | try_to_freeze(PF_FREEZE); | ||
1190 | if (signalled()) | ||
1191 | return -EINTR; | ||
1192 | |||
1193 | spin_lock_bh(&serv->sv_lock); | ||
1194 | if (!list_empty(&serv->sv_tempsocks)) { | ||
1195 | svsk = list_entry(serv->sv_tempsocks.next, | ||
1196 | struct svc_sock, sk_list); | ||
1197 | /* apparently the "standard" is that clients close | ||
1198 | * idle connections after 5 minutes, servers after | ||
1199 | * 6 minutes | ||
1200 | * http://www.connectathon.org/talks96/nfstcp.pdf | ||
1201 | */ | ||
1202 | if (get_seconds() - svsk->sk_lastrecv < 6*60 | ||
1203 | || test_bit(SK_BUSY, &svsk->sk_flags)) | ||
1204 | svsk = NULL; | ||
1205 | } | ||
1206 | if (svsk) { | ||
1207 | set_bit(SK_BUSY, &svsk->sk_flags); | ||
1208 | set_bit(SK_CLOSE, &svsk->sk_flags); | ||
1209 | rqstp->rq_sock = svsk; | ||
1210 | svsk->sk_inuse++; | ||
1211 | } else if ((svsk = svc_sock_dequeue(serv)) != NULL) { | ||
1212 | rqstp->rq_sock = svsk; | ||
1213 | svsk->sk_inuse++; | ||
1214 | rqstp->rq_reserved = serv->sv_bufsz; | ||
1215 | svsk->sk_reserved += rqstp->rq_reserved; | ||
1216 | } else { | ||
1217 | /* No data pending. Go to sleep */ | ||
1218 | svc_serv_enqueue(serv, rqstp); | ||
1219 | |||
1220 | /* | ||
1221 | * We have to be able to interrupt this wait | ||
1222 | * to bring down the daemons ... | ||
1223 | */ | ||
1224 | set_current_state(TASK_INTERRUPTIBLE); | ||
1225 | add_wait_queue(&rqstp->rq_wait, &wait); | ||
1226 | spin_unlock_bh(&serv->sv_lock); | ||
1227 | |||
1228 | schedule_timeout(timeout); | ||
1229 | |||
1230 | try_to_freeze(PF_FREEZE); | ||
1231 | |||
1232 | spin_lock_bh(&serv->sv_lock); | ||
1233 | remove_wait_queue(&rqstp->rq_wait, &wait); | ||
1234 | |||
1235 | if (!(svsk = rqstp->rq_sock)) { | ||
1236 | svc_serv_dequeue(serv, rqstp); | ||
1237 | spin_unlock_bh(&serv->sv_lock); | ||
1238 | dprintk("svc: server %p, no data yet\n", rqstp); | ||
1239 | return signalled()? -EINTR : -EAGAIN; | ||
1240 | } | ||
1241 | } | ||
1242 | spin_unlock_bh(&serv->sv_lock); | ||
1243 | |||
1244 | dprintk("svc: server %p, socket %p, inuse=%d\n", | ||
1245 | rqstp, svsk, svsk->sk_inuse); | ||
1246 | len = svsk->sk_recvfrom(rqstp); | ||
1247 | dprintk("svc: got len=%d\n", len); | ||
1248 | |||
1249 | /* No data, incomplete (TCP) read, or accept() */ | ||
1250 | if (len == 0 || len == -EAGAIN) { | ||
1251 | rqstp->rq_res.len = 0; | ||
1252 | svc_sock_release(rqstp); | ||
1253 | return -EAGAIN; | ||
1254 | } | ||
1255 | svsk->sk_lastrecv = get_seconds(); | ||
1256 | if (test_bit(SK_TEMP, &svsk->sk_flags)) { | ||
1257 | /* push active sockets to end of list */ | ||
1258 | spin_lock_bh(&serv->sv_lock); | ||
1259 | if (!list_empty(&svsk->sk_list)) | ||
1260 | list_move_tail(&svsk->sk_list, &serv->sv_tempsocks); | ||
1261 | spin_unlock_bh(&serv->sv_lock); | ||
1262 | } | ||
1263 | |||
1264 | rqstp->rq_secure = ntohs(rqstp->rq_addr.sin_port) < 1024; | ||
1265 | rqstp->rq_chandle.defer = svc_defer; | ||
1266 | |||
1267 | if (serv->sv_stats) | ||
1268 | serv->sv_stats->netcnt++; | ||
1269 | return len; | ||
1270 | } | ||
1271 | |||
1272 | /* | ||
1273 | * Drop request | ||
1274 | */ | ||
1275 | void | ||
1276 | svc_drop(struct svc_rqst *rqstp) | ||
1277 | { | ||
1278 | dprintk("svc: socket %p dropped request\n", rqstp->rq_sock); | ||
1279 | svc_sock_release(rqstp); | ||
1280 | } | ||
1281 | |||
1282 | /* | ||
1283 | * Return reply to client. | ||
1284 | */ | ||
1285 | int | ||
1286 | svc_send(struct svc_rqst *rqstp) | ||
1287 | { | ||
1288 | struct svc_sock *svsk; | ||
1289 | int len; | ||
1290 | struct xdr_buf *xb; | ||
1291 | |||
1292 | if ((svsk = rqstp->rq_sock) == NULL) { | ||
1293 | printk(KERN_WARNING "NULL socket pointer in %s:%d\n", | ||
1294 | __FILE__, __LINE__); | ||
1295 | return -EFAULT; | ||
1296 | } | ||
1297 | |||
1298 | /* release the receive skb before sending the reply */ | ||
1299 | svc_release_skb(rqstp); | ||
1300 | |||
1301 | /* calculate over-all length */ | ||
1302 | xb = & rqstp->rq_res; | ||
1303 | xb->len = xb->head[0].iov_len + | ||
1304 | xb->page_len + | ||
1305 | xb->tail[0].iov_len; | ||
1306 | |||
1307 | /* Grab svsk->sk_sem to serialize outgoing data. */ | ||
1308 | down(&svsk->sk_sem); | ||
1309 | if (test_bit(SK_DEAD, &svsk->sk_flags)) | ||
1310 | len = -ENOTCONN; | ||
1311 | else | ||
1312 | len = svsk->sk_sendto(rqstp); | ||
1313 | up(&svsk->sk_sem); | ||
1314 | svc_sock_release(rqstp); | ||
1315 | |||
1316 | if (len == -ECONNREFUSED || len == -ENOTCONN || len == -EAGAIN) | ||
1317 | return 0; | ||
1318 | return len; | ||
1319 | } | ||
1320 | |||
1321 | /* | ||
1322 | * Initialize socket for RPC use and create svc_sock struct | ||
1323 | * XXX: May want to setsockopt SO_SNDBUF and SO_RCVBUF. | ||
1324 | */ | ||
1325 | static struct svc_sock * | ||
1326 | svc_setup_socket(struct svc_serv *serv, struct socket *sock, | ||
1327 | int *errp, int pmap_register) | ||
1328 | { | ||
1329 | struct svc_sock *svsk; | ||
1330 | struct sock *inet; | ||
1331 | |||
1332 | dprintk("svc: svc_setup_socket %p\n", sock); | ||
1333 | if (!(svsk = kmalloc(sizeof(*svsk), GFP_KERNEL))) { | ||
1334 | *errp = -ENOMEM; | ||
1335 | return NULL; | ||
1336 | } | ||
1337 | memset(svsk, 0, sizeof(*svsk)); | ||
1338 | |||
1339 | inet = sock->sk; | ||
1340 | |||
1341 | /* Register socket with portmapper */ | ||
1342 | if (*errp >= 0 && pmap_register) | ||
1343 | *errp = svc_register(serv, inet->sk_protocol, | ||
1344 | ntohs(inet_sk(inet)->sport)); | ||
1345 | |||
1346 | if (*errp < 0) { | ||
1347 | kfree(svsk); | ||
1348 | return NULL; | ||
1349 | } | ||
1350 | |||
1351 | set_bit(SK_BUSY, &svsk->sk_flags); | ||
1352 | inet->sk_user_data = svsk; | ||
1353 | svsk->sk_sock = sock; | ||
1354 | svsk->sk_sk = inet; | ||
1355 | svsk->sk_ostate = inet->sk_state_change; | ||
1356 | svsk->sk_odata = inet->sk_data_ready; | ||
1357 | svsk->sk_owspace = inet->sk_write_space; | ||
1358 | svsk->sk_server = serv; | ||
1359 | svsk->sk_lastrecv = get_seconds(); | ||
1360 | INIT_LIST_HEAD(&svsk->sk_deferred); | ||
1361 | INIT_LIST_HEAD(&svsk->sk_ready); | ||
1362 | sema_init(&svsk->sk_sem, 1); | ||
1363 | |||
1364 | /* Initialize the socket */ | ||
1365 | if (sock->type == SOCK_DGRAM) | ||
1366 | svc_udp_init(svsk); | ||
1367 | else | ||
1368 | svc_tcp_init(svsk); | ||
1369 | |||
1370 | spin_lock_bh(&serv->sv_lock); | ||
1371 | if (!pmap_register) { | ||
1372 | set_bit(SK_TEMP, &svsk->sk_flags); | ||
1373 | list_add(&svsk->sk_list, &serv->sv_tempsocks); | ||
1374 | serv->sv_tmpcnt++; | ||
1375 | } else { | ||
1376 | clear_bit(SK_TEMP, &svsk->sk_flags); | ||
1377 | list_add(&svsk->sk_list, &serv->sv_permsocks); | ||
1378 | } | ||
1379 | spin_unlock_bh(&serv->sv_lock); | ||
1380 | |||
1381 | dprintk("svc: svc_setup_socket created %p (inet %p)\n", | ||
1382 | svsk, svsk->sk_sk); | ||
1383 | |||
1384 | clear_bit(SK_BUSY, &svsk->sk_flags); | ||
1385 | svc_sock_enqueue(svsk); | ||
1386 | return svsk; | ||
1387 | } | ||
1388 | |||
1389 | /* | ||
1390 | * Create socket for RPC service. | ||
1391 | */ | ||
1392 | static int | ||
1393 | svc_create_socket(struct svc_serv *serv, int protocol, struct sockaddr_in *sin) | ||
1394 | { | ||
1395 | struct svc_sock *svsk; | ||
1396 | struct socket *sock; | ||
1397 | int error; | ||
1398 | int type; | ||
1399 | |||
1400 | dprintk("svc: svc_create_socket(%s, %d, %u.%u.%u.%u:%d)\n", | ||
1401 | serv->sv_program->pg_name, protocol, | ||
1402 | NIPQUAD(sin->sin_addr.s_addr), | ||
1403 | ntohs(sin->sin_port)); | ||
1404 | |||
1405 | if (protocol != IPPROTO_UDP && protocol != IPPROTO_TCP) { | ||
1406 | printk(KERN_WARNING "svc: only UDP and TCP " | ||
1407 | "sockets supported\n"); | ||
1408 | return -EINVAL; | ||
1409 | } | ||
1410 | type = (protocol == IPPROTO_UDP)? SOCK_DGRAM : SOCK_STREAM; | ||
1411 | |||
1412 | if ((error = sock_create_kern(PF_INET, type, protocol, &sock)) < 0) | ||
1413 | return error; | ||
1414 | |||
1415 | if (sin != NULL) { | ||
1416 | if (type == SOCK_STREAM) | ||
1417 | sock->sk->sk_reuse = 1; /* allow address reuse */ | ||
1418 | error = sock->ops->bind(sock, (struct sockaddr *) sin, | ||
1419 | sizeof(*sin)); | ||
1420 | if (error < 0) | ||
1421 | goto bummer; | ||
1422 | } | ||
1423 | |||
1424 | if (protocol == IPPROTO_TCP) { | ||
1425 | if ((error = sock->ops->listen(sock, 64)) < 0) | ||
1426 | goto bummer; | ||
1427 | } | ||
1428 | |||
1429 | if ((svsk = svc_setup_socket(serv, sock, &error, 1)) != NULL) | ||
1430 | return 0; | ||
1431 | |||
1432 | bummer: | ||
1433 | dprintk("svc: svc_create_socket error = %d\n", -error); | ||
1434 | sock_release(sock); | ||
1435 | return error; | ||
1436 | } | ||
1437 | |||
1438 | /* | ||
1439 | * Remove a dead socket | ||
1440 | */ | ||
1441 | void | ||
1442 | svc_delete_socket(struct svc_sock *svsk) | ||
1443 | { | ||
1444 | struct svc_serv *serv; | ||
1445 | struct sock *sk; | ||
1446 | |||
1447 | dprintk("svc: svc_delete_socket(%p)\n", svsk); | ||
1448 | |||
1449 | serv = svsk->sk_server; | ||
1450 | sk = svsk->sk_sk; | ||
1451 | |||
1452 | sk->sk_state_change = svsk->sk_ostate; | ||
1453 | sk->sk_data_ready = svsk->sk_odata; | ||
1454 | sk->sk_write_space = svsk->sk_owspace; | ||
1455 | |||
1456 | spin_lock_bh(&serv->sv_lock); | ||
1457 | |||
1458 | list_del_init(&svsk->sk_list); | ||
1459 | list_del_init(&svsk->sk_ready); | ||
1460 | if (!test_and_set_bit(SK_DEAD, &svsk->sk_flags)) | ||
1461 | if (test_bit(SK_TEMP, &svsk->sk_flags)) | ||
1462 | serv->sv_tmpcnt--; | ||
1463 | |||
1464 | if (!svsk->sk_inuse) { | ||
1465 | spin_unlock_bh(&serv->sv_lock); | ||
1466 | sock_release(svsk->sk_sock); | ||
1467 | kfree(svsk); | ||
1468 | } else { | ||
1469 | spin_unlock_bh(&serv->sv_lock); | ||
1470 | dprintk(KERN_NOTICE "svc: server socket destroy delayed\n"); | ||
1471 | /* svsk->sk_server = NULL; */ | ||
1472 | } | ||
1473 | } | ||
1474 | |||
1475 | /* | ||
1476 | * Make a socket for nfsd and lockd | ||
1477 | */ | ||
1478 | int | ||
1479 | svc_makesock(struct svc_serv *serv, int protocol, unsigned short port) | ||
1480 | { | ||
1481 | struct sockaddr_in sin; | ||
1482 | |||
1483 | dprintk("svc: creating socket proto = %d\n", protocol); | ||
1484 | sin.sin_family = AF_INET; | ||
1485 | sin.sin_addr.s_addr = INADDR_ANY; | ||
1486 | sin.sin_port = htons(port); | ||
1487 | return svc_create_socket(serv, protocol, &sin); | ||
1488 | } | ||
1489 | |||
1490 | /* | ||
1491 | * Handle defer and revisit of requests | ||
1492 | */ | ||
1493 | |||
1494 | static void svc_revisit(struct cache_deferred_req *dreq, int too_many) | ||
1495 | { | ||
1496 | struct svc_deferred_req *dr = container_of(dreq, struct svc_deferred_req, handle); | ||
1497 | struct svc_serv *serv = dreq->owner; | ||
1498 | struct svc_sock *svsk; | ||
1499 | |||
1500 | if (too_many) { | ||
1501 | svc_sock_put(dr->svsk); | ||
1502 | kfree(dr); | ||
1503 | return; | ||
1504 | } | ||
1505 | dprintk("revisit queued\n"); | ||
1506 | svsk = dr->svsk; | ||
1507 | dr->svsk = NULL; | ||
1508 | spin_lock_bh(&serv->sv_lock); | ||
1509 | list_add(&dr->handle.recent, &svsk->sk_deferred); | ||
1510 | spin_unlock_bh(&serv->sv_lock); | ||
1511 | set_bit(SK_DEFERRED, &svsk->sk_flags); | ||
1512 | svc_sock_enqueue(svsk); | ||
1513 | svc_sock_put(svsk); | ||
1514 | } | ||
1515 | |||
1516 | static struct cache_deferred_req * | ||
1517 | svc_defer(struct cache_req *req) | ||
1518 | { | ||
1519 | struct svc_rqst *rqstp = container_of(req, struct svc_rqst, rq_chandle); | ||
1520 | int size = sizeof(struct svc_deferred_req) + (rqstp->rq_arg.len); | ||
1521 | struct svc_deferred_req *dr; | ||
1522 | |||
1523 | if (rqstp->rq_arg.page_len) | ||
1524 | return NULL; /* if more than a page, give up FIXME */ | ||
1525 | if (rqstp->rq_deferred) { | ||
1526 | dr = rqstp->rq_deferred; | ||
1527 | rqstp->rq_deferred = NULL; | ||
1528 | } else { | ||
1529 | int skip = rqstp->rq_arg.len - rqstp->rq_arg.head[0].iov_len; | ||
1530 | /* FIXME maybe discard if size too large */ | ||
1531 | dr = kmalloc(size, GFP_KERNEL); | ||
1532 | if (dr == NULL) | ||
1533 | return NULL; | ||
1534 | |||
1535 | dr->handle.owner = rqstp->rq_server; | ||
1536 | dr->prot = rqstp->rq_prot; | ||
1537 | dr->addr = rqstp->rq_addr; | ||
1538 | dr->argslen = rqstp->rq_arg.len >> 2; | ||
1539 | memcpy(dr->args, rqstp->rq_arg.head[0].iov_base-skip, dr->argslen<<2); | ||
1540 | } | ||
1541 | spin_lock_bh(&rqstp->rq_server->sv_lock); | ||
1542 | rqstp->rq_sock->sk_inuse++; | ||
1543 | dr->svsk = rqstp->rq_sock; | ||
1544 | spin_unlock_bh(&rqstp->rq_server->sv_lock); | ||
1545 | |||
1546 | dr->handle.revisit = svc_revisit; | ||
1547 | return &dr->handle; | ||
1548 | } | ||
1549 | |||
1550 | /* | ||
1551 | * recv data from a deferred request into an active one | ||
1552 | */ | ||
1553 | static int svc_deferred_recv(struct svc_rqst *rqstp) | ||
1554 | { | ||
1555 | struct svc_deferred_req *dr = rqstp->rq_deferred; | ||
1556 | |||
1557 | rqstp->rq_arg.head[0].iov_base = dr->args; | ||
1558 | rqstp->rq_arg.head[0].iov_len = dr->argslen<<2; | ||
1559 | rqstp->rq_arg.page_len = 0; | ||
1560 | rqstp->rq_arg.len = dr->argslen<<2; | ||
1561 | rqstp->rq_prot = dr->prot; | ||
1562 | rqstp->rq_addr = dr->addr; | ||
1563 | return dr->argslen<<2; | ||
1564 | } | ||
1565 | |||
1566 | |||
1567 | static struct svc_deferred_req *svc_deferred_dequeue(struct svc_sock *svsk) | ||
1568 | { | ||
1569 | struct svc_deferred_req *dr = NULL; | ||
1570 | struct svc_serv *serv = svsk->sk_server; | ||
1571 | |||
1572 | if (!test_bit(SK_DEFERRED, &svsk->sk_flags)) | ||
1573 | return NULL; | ||
1574 | spin_lock_bh(&serv->sv_lock); | ||
1575 | clear_bit(SK_DEFERRED, &svsk->sk_flags); | ||
1576 | if (!list_empty(&svsk->sk_deferred)) { | ||
1577 | dr = list_entry(svsk->sk_deferred.next, | ||
1578 | struct svc_deferred_req, | ||
1579 | handle.recent); | ||
1580 | list_del_init(&dr->handle.recent); | ||
1581 | set_bit(SK_DEFERRED, &svsk->sk_flags); | ||
1582 | } | ||
1583 | spin_unlock_bh(&serv->sv_lock); | ||
1584 | return dr; | ||
1585 | } | ||
diff --git a/net/sunrpc/sysctl.c b/net/sunrpc/sysctl.c new file mode 100644 index 000000000000..1b9616a12e24 --- /dev/null +++ b/net/sunrpc/sysctl.c | |||
@@ -0,0 +1,193 @@ | |||
1 | /* | ||
2 | * linux/net/sunrpc/sysctl.c | ||
3 | * | ||
4 | * Sysctl interface to sunrpc module. | ||
5 | * | ||
6 | * I would prefer to register the sunrpc table below sys/net, but that's | ||
7 | * impossible at the moment. | ||
8 | */ | ||
9 | |||
10 | #include <linux/config.h> | ||
11 | #include <linux/types.h> | ||
12 | #include <linux/linkage.h> | ||
13 | #include <linux/ctype.h> | ||
14 | #include <linux/fs.h> | ||
15 | #include <linux/sysctl.h> | ||
16 | #include <linux/module.h> | ||
17 | |||
18 | #include <asm/uaccess.h> | ||
19 | #include <linux/sunrpc/types.h> | ||
20 | #include <linux/sunrpc/sched.h> | ||
21 | #include <linux/sunrpc/stats.h> | ||
22 | #include <linux/sunrpc/xprt.h> | ||
23 | |||
24 | /* | ||
25 | * Declare the debug flags here | ||
26 | */ | ||
27 | unsigned int rpc_debug; | ||
28 | unsigned int nfs_debug; | ||
29 | unsigned int nfsd_debug; | ||
30 | unsigned int nlm_debug; | ||
31 | |||
32 | #ifdef RPC_DEBUG | ||
33 | |||
34 | static struct ctl_table_header *sunrpc_table_header; | ||
35 | static ctl_table sunrpc_table[]; | ||
36 | |||
37 | void | ||
38 | rpc_register_sysctl(void) | ||
39 | { | ||
40 | if (!sunrpc_table_header) { | ||
41 | sunrpc_table_header = register_sysctl_table(sunrpc_table, 1); | ||
42 | #ifdef CONFIG_PROC_FS | ||
43 | if (sunrpc_table[0].de) | ||
44 | sunrpc_table[0].de->owner = THIS_MODULE; | ||
45 | #endif | ||
46 | } | ||
47 | |||
48 | } | ||
49 | |||
50 | void | ||
51 | rpc_unregister_sysctl(void) | ||
52 | { | ||
53 | if (sunrpc_table_header) { | ||
54 | unregister_sysctl_table(sunrpc_table_header); | ||
55 | sunrpc_table_header = NULL; | ||
56 | } | ||
57 | } | ||
58 | |||
59 | static int | ||
60 | proc_dodebug(ctl_table *table, int write, struct file *file, | ||
61 | void __user *buffer, size_t *lenp, loff_t *ppos) | ||
62 | { | ||
63 | char tmpbuf[20], c, *s; | ||
64 | char __user *p; | ||
65 | unsigned int value; | ||
66 | size_t left, len; | ||
67 | |||
68 | if ((*ppos && !write) || !*lenp) { | ||
69 | *lenp = 0; | ||
70 | return 0; | ||
71 | } | ||
72 | |||
73 | left = *lenp; | ||
74 | |||
75 | if (write) { | ||
76 | if (!access_ok(VERIFY_READ, buffer, left)) | ||
77 | return -EFAULT; | ||
78 | p = buffer; | ||
79 | while (left && __get_user(c, p) >= 0 && isspace(c)) | ||
80 | left--, p++; | ||
81 | if (!left) | ||
82 | goto done; | ||
83 | |||
84 | if (left > sizeof(tmpbuf) - 1) | ||
85 | return -EINVAL; | ||
86 | if (copy_from_user(tmpbuf, p, left)) | ||
87 | return -EFAULT; | ||
88 | tmpbuf[left] = '\0'; | ||
89 | |||
90 | for (s = tmpbuf, value = 0; '0' <= *s && *s <= '9'; s++, left--) | ||
91 | value = 10 * value + (*s - '0'); | ||
92 | if (*s && !isspace(*s)) | ||
93 | return -EINVAL; | ||
94 | while (left && isspace(*s)) | ||
95 | left--, s++; | ||
96 | *(unsigned int *) table->data = value; | ||
97 | /* Display the RPC tasks on writing to rpc_debug */ | ||
98 | if (table->ctl_name == CTL_RPCDEBUG) { | ||
99 | rpc_show_tasks(); | ||
100 | } | ||
101 | } else { | ||
102 | if (!access_ok(VERIFY_WRITE, buffer, left)) | ||
103 | return -EFAULT; | ||
104 | len = sprintf(tmpbuf, "%d", *(unsigned int *) table->data); | ||
105 | if (len > left) | ||
106 | len = left; | ||
107 | if (__copy_to_user(buffer, tmpbuf, len)) | ||
108 | return -EFAULT; | ||
109 | if ((left -= len) > 0) { | ||
110 | if (put_user('\n', (char __user *)buffer + len)) | ||
111 | return -EFAULT; | ||
112 | left--; | ||
113 | } | ||
114 | } | ||
115 | |||
116 | done: | ||
117 | *lenp -= left; | ||
118 | *ppos += *lenp; | ||
119 | return 0; | ||
120 | } | ||
121 | |||
122 | static unsigned int min_slot_table_size = RPC_MIN_SLOT_TABLE; | ||
123 | static unsigned int max_slot_table_size = RPC_MAX_SLOT_TABLE; | ||
124 | |||
125 | static ctl_table debug_table[] = { | ||
126 | { | ||
127 | .ctl_name = CTL_RPCDEBUG, | ||
128 | .procname = "rpc_debug", | ||
129 | .data = &rpc_debug, | ||
130 | .maxlen = sizeof(int), | ||
131 | .mode = 0644, | ||
132 | .proc_handler = &proc_dodebug | ||
133 | }, | ||
134 | { | ||
135 | .ctl_name = CTL_NFSDEBUG, | ||
136 | .procname = "nfs_debug", | ||
137 | .data = &nfs_debug, | ||
138 | .maxlen = sizeof(int), | ||
139 | .mode = 0644, | ||
140 | .proc_handler = &proc_dodebug | ||
141 | }, | ||
142 | { | ||
143 | .ctl_name = CTL_NFSDDEBUG, | ||
144 | .procname = "nfsd_debug", | ||
145 | .data = &nfsd_debug, | ||
146 | .maxlen = sizeof(int), | ||
147 | .mode = 0644, | ||
148 | .proc_handler = &proc_dodebug | ||
149 | }, | ||
150 | { | ||
151 | .ctl_name = CTL_NLMDEBUG, | ||
152 | .procname = "nlm_debug", | ||
153 | .data = &nlm_debug, | ||
154 | .maxlen = sizeof(int), | ||
155 | .mode = 0644, | ||
156 | .proc_handler = &proc_dodebug | ||
157 | }, | ||
158 | { | ||
159 | .ctl_name = CTL_SLOTTABLE_UDP, | ||
160 | .procname = "udp_slot_table_entries", | ||
161 | .data = &xprt_udp_slot_table_entries, | ||
162 | .maxlen = sizeof(unsigned int), | ||
163 | .mode = 0644, | ||
164 | .proc_handler = &proc_dointvec_minmax, | ||
165 | .strategy = &sysctl_intvec, | ||
166 | .extra1 = &min_slot_table_size, | ||
167 | .extra2 = &max_slot_table_size | ||
168 | }, | ||
169 | { | ||
170 | .ctl_name = CTL_SLOTTABLE_TCP, | ||
171 | .procname = "tcp_slot_table_entries", | ||
172 | .data = &xprt_tcp_slot_table_entries, | ||
173 | .maxlen = sizeof(unsigned int), | ||
174 | .mode = 0644, | ||
175 | .proc_handler = &proc_dointvec_minmax, | ||
176 | .strategy = &sysctl_intvec, | ||
177 | .extra1 = &min_slot_table_size, | ||
178 | .extra2 = &max_slot_table_size | ||
179 | }, | ||
180 | { .ctl_name = 0 } | ||
181 | }; | ||
182 | |||
183 | static ctl_table sunrpc_table[] = { | ||
184 | { | ||
185 | .ctl_name = CTL_SUNRPC, | ||
186 | .procname = "sunrpc", | ||
187 | .mode = 0555, | ||
188 | .child = debug_table | ||
189 | }, | ||
190 | { .ctl_name = 0 } | ||
191 | }; | ||
192 | |||
193 | #endif | ||
diff --git a/net/sunrpc/timer.c b/net/sunrpc/timer.c new file mode 100644 index 000000000000..bcbdf6430d5c --- /dev/null +++ b/net/sunrpc/timer.c | |||
@@ -0,0 +1,107 @@ | |||
1 | /* | ||
2 | * linux/net/sunrpc/timer.c | ||
3 | * | ||
4 | * Estimate RPC request round trip time. | ||
5 | * | ||
6 | * Based on packet round-trip and variance estimator algorithms described | ||
7 | * in appendix A of "Congestion Avoidance and Control" by Van Jacobson | ||
8 | * and Michael J. Karels (ACM Computer Communication Review; Proceedings | ||
9 | * of the Sigcomm '88 Symposium in Stanford, CA, August, 1988). | ||
10 | * | ||
11 | * This RTT estimator is used only for RPC over datagram protocols. | ||
12 | * | ||
13 | * Copyright (C) 2002 Trond Myklebust <trond.myklebust@fys.uio.no> | ||
14 | */ | ||
15 | |||
16 | #include <asm/param.h> | ||
17 | |||
18 | #include <linux/types.h> | ||
19 | #include <linux/unistd.h> | ||
20 | |||
21 | #include <linux/sunrpc/clnt.h> | ||
22 | #include <linux/sunrpc/xprt.h> | ||
23 | #include <linux/sunrpc/timer.h> | ||
24 | |||
25 | #define RPC_RTO_MAX (60*HZ) | ||
26 | #define RPC_RTO_INIT (HZ/5) | ||
27 | #define RPC_RTO_MIN (HZ/10) | ||
28 | |||
29 | void | ||
30 | rpc_init_rtt(struct rpc_rtt *rt, unsigned long timeo) | ||
31 | { | ||
32 | unsigned long init = 0; | ||
33 | unsigned i; | ||
34 | |||
35 | rt->timeo = timeo; | ||
36 | |||
37 | if (timeo > RPC_RTO_INIT) | ||
38 | init = (timeo - RPC_RTO_INIT) << 3; | ||
39 | for (i = 0; i < 5; i++) { | ||
40 | rt->srtt[i] = init; | ||
41 | rt->sdrtt[i] = RPC_RTO_INIT; | ||
42 | rt->ntimeouts[i] = 0; | ||
43 | } | ||
44 | } | ||
45 | |||
46 | /* | ||
47 | * NB: When computing the smoothed RTT and standard deviation, | ||
48 | * be careful not to produce negative intermediate results. | ||
49 | */ | ||
50 | void | ||
51 | rpc_update_rtt(struct rpc_rtt *rt, unsigned timer, long m) | ||
52 | { | ||
53 | long *srtt, *sdrtt; | ||
54 | |||
55 | if (timer-- == 0) | ||
56 | return; | ||
57 | |||
58 | /* jiffies wrapped; ignore this one */ | ||
59 | if (m < 0) | ||
60 | return; | ||
61 | |||
62 | if (m == 0) | ||
63 | m = 1L; | ||
64 | |||
65 | srtt = (long *)&rt->srtt[timer]; | ||
66 | m -= *srtt >> 3; | ||
67 | *srtt += m; | ||
68 | |||
69 | if (m < 0) | ||
70 | m = -m; | ||
71 | |||
72 | sdrtt = (long *)&rt->sdrtt[timer]; | ||
73 | m -= *sdrtt >> 2; | ||
74 | *sdrtt += m; | ||
75 | |||
76 | /* Set lower bound on the variance */ | ||
77 | if (*sdrtt < RPC_RTO_MIN) | ||
78 | *sdrtt = RPC_RTO_MIN; | ||
79 | } | ||
80 | |||
81 | /* | ||
82 | * Estimate rto for an nfs rpc sent via. an unreliable datagram. | ||
83 | * Use the mean and mean deviation of rtt for the appropriate type of rpc | ||
84 | * for the frequent rpcs and a default for the others. | ||
85 | * The justification for doing "other" this way is that these rpcs | ||
86 | * happen so infrequently that timer est. would probably be stale. | ||
87 | * Also, since many of these rpcs are | ||
88 | * non-idempotent, a conservative timeout is desired. | ||
89 | * getattr, lookup, | ||
90 | * read, write, commit - A+4D | ||
91 | * other - timeo | ||
92 | */ | ||
93 | |||
94 | unsigned long | ||
95 | rpc_calc_rto(struct rpc_rtt *rt, unsigned timer) | ||
96 | { | ||
97 | unsigned long res; | ||
98 | |||
99 | if (timer-- == 0) | ||
100 | return rt->timeo; | ||
101 | |||
102 | res = ((rt->srtt[timer] + 7) >> 3) + rt->sdrtt[timer]; | ||
103 | if (res > RPC_RTO_MAX) | ||
104 | res = RPC_RTO_MAX; | ||
105 | |||
106 | return res; | ||
107 | } | ||
diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c new file mode 100644 index 000000000000..4484931018eb --- /dev/null +++ b/net/sunrpc/xdr.c | |||
@@ -0,0 +1,917 @@ | |||
1 | /* | ||
2 | * linux/net/sunrpc/xdr.c | ||
3 | * | ||
4 | * Generic XDR support. | ||
5 | * | ||
6 | * Copyright (C) 1995, 1996 Olaf Kirch <okir@monad.swb.de> | ||
7 | */ | ||
8 | |||
9 | #include <linux/types.h> | ||
10 | #include <linux/socket.h> | ||
11 | #include <linux/string.h> | ||
12 | #include <linux/kernel.h> | ||
13 | #include <linux/pagemap.h> | ||
14 | #include <linux/errno.h> | ||
15 | #include <linux/in.h> | ||
16 | #include <linux/net.h> | ||
17 | #include <net/sock.h> | ||
18 | #include <linux/sunrpc/xdr.h> | ||
19 | #include <linux/sunrpc/msg_prot.h> | ||
20 | |||
21 | /* | ||
22 | * XDR functions for basic NFS types | ||
23 | */ | ||
24 | u32 * | ||
25 | xdr_encode_netobj(u32 *p, const struct xdr_netobj *obj) | ||
26 | { | ||
27 | unsigned int quadlen = XDR_QUADLEN(obj->len); | ||
28 | |||
29 | p[quadlen] = 0; /* zero trailing bytes */ | ||
30 | *p++ = htonl(obj->len); | ||
31 | memcpy(p, obj->data, obj->len); | ||
32 | return p + XDR_QUADLEN(obj->len); | ||
33 | } | ||
34 | |||
35 | u32 * | ||
36 | xdr_decode_netobj(u32 *p, struct xdr_netobj *obj) | ||
37 | { | ||
38 | unsigned int len; | ||
39 | |||
40 | if ((len = ntohl(*p++)) > XDR_MAX_NETOBJ) | ||
41 | return NULL; | ||
42 | obj->len = len; | ||
43 | obj->data = (u8 *) p; | ||
44 | return p + XDR_QUADLEN(len); | ||
45 | } | ||
46 | |||
47 | /** | ||
48 | * xdr_encode_opaque_fixed - Encode fixed length opaque data | ||
49 | * @p - pointer to current position in XDR buffer. | ||
50 | * @ptr - pointer to data to encode (or NULL) | ||
51 | * @nbytes - size of data. | ||
52 | * | ||
53 | * Copy the array of data of length nbytes at ptr to the XDR buffer | ||
54 | * at position p, then align to the next 32-bit boundary by padding | ||
55 | * with zero bytes (see RFC1832). | ||
56 | * Note: if ptr is NULL, only the padding is performed. | ||
57 | * | ||
58 | * Returns the updated current XDR buffer position | ||
59 | * | ||
60 | */ | ||
61 | u32 *xdr_encode_opaque_fixed(u32 *p, const void *ptr, unsigned int nbytes) | ||
62 | { | ||
63 | if (likely(nbytes != 0)) { | ||
64 | unsigned int quadlen = XDR_QUADLEN(nbytes); | ||
65 | unsigned int padding = (quadlen << 2) - nbytes; | ||
66 | |||
67 | if (ptr != NULL) | ||
68 | memcpy(p, ptr, nbytes); | ||
69 | if (padding != 0) | ||
70 | memset((char *)p + nbytes, 0, padding); | ||
71 | p += quadlen; | ||
72 | } | ||
73 | return p; | ||
74 | } | ||
75 | EXPORT_SYMBOL(xdr_encode_opaque_fixed); | ||
76 | |||
77 | /** | ||
78 | * xdr_encode_opaque - Encode variable length opaque data | ||
79 | * @p - pointer to current position in XDR buffer. | ||
80 | * @ptr - pointer to data to encode (or NULL) | ||
81 | * @nbytes - size of data. | ||
82 | * | ||
83 | * Returns the updated current XDR buffer position | ||
84 | */ | ||
85 | u32 *xdr_encode_opaque(u32 *p, const void *ptr, unsigned int nbytes) | ||
86 | { | ||
87 | *p++ = htonl(nbytes); | ||
88 | return xdr_encode_opaque_fixed(p, ptr, nbytes); | ||
89 | } | ||
90 | EXPORT_SYMBOL(xdr_encode_opaque); | ||
91 | |||
92 | u32 * | ||
93 | xdr_encode_string(u32 *p, const char *string) | ||
94 | { | ||
95 | return xdr_encode_array(p, string, strlen(string)); | ||
96 | } | ||
97 | |||
98 | u32 * | ||
99 | xdr_decode_string(u32 *p, char **sp, int *lenp, int maxlen) | ||
100 | { | ||
101 | unsigned int len; | ||
102 | char *string; | ||
103 | |||
104 | if ((len = ntohl(*p++)) > maxlen) | ||
105 | return NULL; | ||
106 | if (lenp) | ||
107 | *lenp = len; | ||
108 | if ((len % 4) != 0) { | ||
109 | string = (char *) p; | ||
110 | } else { | ||
111 | string = (char *) (p - 1); | ||
112 | memmove(string, p, len); | ||
113 | } | ||
114 | string[len] = '\0'; | ||
115 | *sp = string; | ||
116 | return p + XDR_QUADLEN(len); | ||
117 | } | ||
118 | |||
119 | u32 * | ||
120 | xdr_decode_string_inplace(u32 *p, char **sp, int *lenp, int maxlen) | ||
121 | { | ||
122 | unsigned int len; | ||
123 | |||
124 | if ((len = ntohl(*p++)) > maxlen) | ||
125 | return NULL; | ||
126 | *lenp = len; | ||
127 | *sp = (char *) p; | ||
128 | return p + XDR_QUADLEN(len); | ||
129 | } | ||
130 | |||
131 | void | ||
132 | xdr_encode_pages(struct xdr_buf *xdr, struct page **pages, unsigned int base, | ||
133 | unsigned int len) | ||
134 | { | ||
135 | struct kvec *tail = xdr->tail; | ||
136 | u32 *p; | ||
137 | |||
138 | xdr->pages = pages; | ||
139 | xdr->page_base = base; | ||
140 | xdr->page_len = len; | ||
141 | |||
142 | p = (u32 *)xdr->head[0].iov_base + XDR_QUADLEN(xdr->head[0].iov_len); | ||
143 | tail->iov_base = p; | ||
144 | tail->iov_len = 0; | ||
145 | |||
146 | if (len & 3) { | ||
147 | unsigned int pad = 4 - (len & 3); | ||
148 | |||
149 | *p = 0; | ||
150 | tail->iov_base = (char *)p + (len & 3); | ||
151 | tail->iov_len = pad; | ||
152 | len += pad; | ||
153 | } | ||
154 | xdr->buflen += len; | ||
155 | xdr->len += len; | ||
156 | } | ||
157 | |||
158 | void | ||
159 | xdr_inline_pages(struct xdr_buf *xdr, unsigned int offset, | ||
160 | struct page **pages, unsigned int base, unsigned int len) | ||
161 | { | ||
162 | struct kvec *head = xdr->head; | ||
163 | struct kvec *tail = xdr->tail; | ||
164 | char *buf = (char *)head->iov_base; | ||
165 | unsigned int buflen = head->iov_len; | ||
166 | |||
167 | head->iov_len = offset; | ||
168 | |||
169 | xdr->pages = pages; | ||
170 | xdr->page_base = base; | ||
171 | xdr->page_len = len; | ||
172 | |||
173 | tail->iov_base = buf + offset; | ||
174 | tail->iov_len = buflen - offset; | ||
175 | |||
176 | xdr->buflen += len; | ||
177 | } | ||
178 | |||
179 | void | ||
180 | xdr_partial_copy_from_skb(struct xdr_buf *xdr, unsigned int base, | ||
181 | skb_reader_t *desc, | ||
182 | skb_read_actor_t copy_actor) | ||
183 | { | ||
184 | struct page **ppage = xdr->pages; | ||
185 | unsigned int len, pglen = xdr->page_len; | ||
186 | int ret; | ||
187 | |||
188 | len = xdr->head[0].iov_len; | ||
189 | if (base < len) { | ||
190 | len -= base; | ||
191 | ret = copy_actor(desc, (char *)xdr->head[0].iov_base + base, len); | ||
192 | if (ret != len || !desc->count) | ||
193 | return; | ||
194 | base = 0; | ||
195 | } else | ||
196 | base -= len; | ||
197 | |||
198 | if (pglen == 0) | ||
199 | goto copy_tail; | ||
200 | if (base >= pglen) { | ||
201 | base -= pglen; | ||
202 | goto copy_tail; | ||
203 | } | ||
204 | if (base || xdr->page_base) { | ||
205 | pglen -= base; | ||
206 | base += xdr->page_base; | ||
207 | ppage += base >> PAGE_CACHE_SHIFT; | ||
208 | base &= ~PAGE_CACHE_MASK; | ||
209 | } | ||
210 | do { | ||
211 | char *kaddr; | ||
212 | |||
213 | len = PAGE_CACHE_SIZE; | ||
214 | kaddr = kmap_atomic(*ppage, KM_SKB_SUNRPC_DATA); | ||
215 | if (base) { | ||
216 | len -= base; | ||
217 | if (pglen < len) | ||
218 | len = pglen; | ||
219 | ret = copy_actor(desc, kaddr + base, len); | ||
220 | base = 0; | ||
221 | } else { | ||
222 | if (pglen < len) | ||
223 | len = pglen; | ||
224 | ret = copy_actor(desc, kaddr, len); | ||
225 | } | ||
226 | flush_dcache_page(*ppage); | ||
227 | kunmap_atomic(kaddr, KM_SKB_SUNRPC_DATA); | ||
228 | if (ret != len || !desc->count) | ||
229 | return; | ||
230 | ppage++; | ||
231 | } while ((pglen -= len) != 0); | ||
232 | copy_tail: | ||
233 | len = xdr->tail[0].iov_len; | ||
234 | if (base < len) | ||
235 | copy_actor(desc, (char *)xdr->tail[0].iov_base + base, len - base); | ||
236 | } | ||
237 | |||
238 | |||
239 | int | ||
240 | xdr_sendpages(struct socket *sock, struct sockaddr *addr, int addrlen, | ||
241 | struct xdr_buf *xdr, unsigned int base, int msgflags) | ||
242 | { | ||
243 | struct page **ppage = xdr->pages; | ||
244 | unsigned int len, pglen = xdr->page_len; | ||
245 | int err, ret = 0; | ||
246 | ssize_t (*sendpage)(struct socket *, struct page *, int, size_t, int); | ||
247 | |||
248 | len = xdr->head[0].iov_len; | ||
249 | if (base < len || (addr != NULL && base == 0)) { | ||
250 | struct kvec iov = { | ||
251 | .iov_base = xdr->head[0].iov_base + base, | ||
252 | .iov_len = len - base, | ||
253 | }; | ||
254 | struct msghdr msg = { | ||
255 | .msg_name = addr, | ||
256 | .msg_namelen = addrlen, | ||
257 | .msg_flags = msgflags, | ||
258 | }; | ||
259 | if (xdr->len > len) | ||
260 | msg.msg_flags |= MSG_MORE; | ||
261 | |||
262 | if (iov.iov_len != 0) | ||
263 | err = kernel_sendmsg(sock, &msg, &iov, 1, iov.iov_len); | ||
264 | else | ||
265 | err = kernel_sendmsg(sock, &msg, NULL, 0, 0); | ||
266 | if (ret == 0) | ||
267 | ret = err; | ||
268 | else if (err > 0) | ||
269 | ret += err; | ||
270 | if (err != iov.iov_len) | ||
271 | goto out; | ||
272 | base = 0; | ||
273 | } else | ||
274 | base -= len; | ||
275 | |||
276 | if (pglen == 0) | ||
277 | goto copy_tail; | ||
278 | if (base >= pglen) { | ||
279 | base -= pglen; | ||
280 | goto copy_tail; | ||
281 | } | ||
282 | if (base || xdr->page_base) { | ||
283 | pglen -= base; | ||
284 | base += xdr->page_base; | ||
285 | ppage += base >> PAGE_CACHE_SHIFT; | ||
286 | base &= ~PAGE_CACHE_MASK; | ||
287 | } | ||
288 | |||
289 | sendpage = sock->ops->sendpage ? : sock_no_sendpage; | ||
290 | do { | ||
291 | int flags = msgflags; | ||
292 | |||
293 | len = PAGE_CACHE_SIZE; | ||
294 | if (base) | ||
295 | len -= base; | ||
296 | if (pglen < len) | ||
297 | len = pglen; | ||
298 | |||
299 | if (pglen != len || xdr->tail[0].iov_len != 0) | ||
300 | flags |= MSG_MORE; | ||
301 | |||
302 | /* Hmm... We might be dealing with highmem pages */ | ||
303 | if (PageHighMem(*ppage)) | ||
304 | sendpage = sock_no_sendpage; | ||
305 | err = sendpage(sock, *ppage, base, len, flags); | ||
306 | if (ret == 0) | ||
307 | ret = err; | ||
308 | else if (err > 0) | ||
309 | ret += err; | ||
310 | if (err != len) | ||
311 | goto out; | ||
312 | base = 0; | ||
313 | ppage++; | ||
314 | } while ((pglen -= len) != 0); | ||
315 | copy_tail: | ||
316 | len = xdr->tail[0].iov_len; | ||
317 | if (base < len) { | ||
318 | struct kvec iov = { | ||
319 | .iov_base = xdr->tail[0].iov_base + base, | ||
320 | .iov_len = len - base, | ||
321 | }; | ||
322 | struct msghdr msg = { | ||
323 | .msg_flags = msgflags, | ||
324 | }; | ||
325 | err = kernel_sendmsg(sock, &msg, &iov, 1, iov.iov_len); | ||
326 | if (ret == 0) | ||
327 | ret = err; | ||
328 | else if (err > 0) | ||
329 | ret += err; | ||
330 | } | ||
331 | out: | ||
332 | return ret; | ||
333 | } | ||
334 | |||
335 | |||
336 | /* | ||
337 | * Helper routines for doing 'memmove' like operations on a struct xdr_buf | ||
338 | * | ||
339 | * _shift_data_right_pages | ||
340 | * @pages: vector of pages containing both the source and dest memory area. | ||
341 | * @pgto_base: page vector address of destination | ||
342 | * @pgfrom_base: page vector address of source | ||
343 | * @len: number of bytes to copy | ||
344 | * | ||
345 | * Note: the addresses pgto_base and pgfrom_base are both calculated in | ||
346 | * the same way: | ||
347 | * if a memory area starts at byte 'base' in page 'pages[i]', | ||
348 | * then its address is given as (i << PAGE_CACHE_SHIFT) + base | ||
349 | * Also note: pgfrom_base must be < pgto_base, but the memory areas | ||
350 | * they point to may overlap. | ||
351 | */ | ||
352 | static void | ||
353 | _shift_data_right_pages(struct page **pages, size_t pgto_base, | ||
354 | size_t pgfrom_base, size_t len) | ||
355 | { | ||
356 | struct page **pgfrom, **pgto; | ||
357 | char *vfrom, *vto; | ||
358 | size_t copy; | ||
359 | |||
360 | BUG_ON(pgto_base <= pgfrom_base); | ||
361 | |||
362 | pgto_base += len; | ||
363 | pgfrom_base += len; | ||
364 | |||
365 | pgto = pages + (pgto_base >> PAGE_CACHE_SHIFT); | ||
366 | pgfrom = pages + (pgfrom_base >> PAGE_CACHE_SHIFT); | ||
367 | |||
368 | pgto_base &= ~PAGE_CACHE_MASK; | ||
369 | pgfrom_base &= ~PAGE_CACHE_MASK; | ||
370 | |||
371 | do { | ||
372 | /* Are any pointers crossing a page boundary? */ | ||
373 | if (pgto_base == 0) { | ||
374 | flush_dcache_page(*pgto); | ||
375 | pgto_base = PAGE_CACHE_SIZE; | ||
376 | pgto--; | ||
377 | } | ||
378 | if (pgfrom_base == 0) { | ||
379 | pgfrom_base = PAGE_CACHE_SIZE; | ||
380 | pgfrom--; | ||
381 | } | ||
382 | |||
383 | copy = len; | ||
384 | if (copy > pgto_base) | ||
385 | copy = pgto_base; | ||
386 | if (copy > pgfrom_base) | ||
387 | copy = pgfrom_base; | ||
388 | pgto_base -= copy; | ||
389 | pgfrom_base -= copy; | ||
390 | |||
391 | vto = kmap_atomic(*pgto, KM_USER0); | ||
392 | vfrom = kmap_atomic(*pgfrom, KM_USER1); | ||
393 | memmove(vto + pgto_base, vfrom + pgfrom_base, copy); | ||
394 | kunmap_atomic(vfrom, KM_USER1); | ||
395 | kunmap_atomic(vto, KM_USER0); | ||
396 | |||
397 | } while ((len -= copy) != 0); | ||
398 | flush_dcache_page(*pgto); | ||
399 | } | ||
400 | |||
401 | /* | ||
402 | * _copy_to_pages | ||
403 | * @pages: array of pages | ||
404 | * @pgbase: page vector address of destination | ||
405 | * @p: pointer to source data | ||
406 | * @len: length | ||
407 | * | ||
408 | * Copies data from an arbitrary memory location into an array of pages | ||
409 | * The copy is assumed to be non-overlapping. | ||
410 | */ | ||
411 | static void | ||
412 | _copy_to_pages(struct page **pages, size_t pgbase, const char *p, size_t len) | ||
413 | { | ||
414 | struct page **pgto; | ||
415 | char *vto; | ||
416 | size_t copy; | ||
417 | |||
418 | pgto = pages + (pgbase >> PAGE_CACHE_SHIFT); | ||
419 | pgbase &= ~PAGE_CACHE_MASK; | ||
420 | |||
421 | do { | ||
422 | copy = PAGE_CACHE_SIZE - pgbase; | ||
423 | if (copy > len) | ||
424 | copy = len; | ||
425 | |||
426 | vto = kmap_atomic(*pgto, KM_USER0); | ||
427 | memcpy(vto + pgbase, p, copy); | ||
428 | kunmap_atomic(vto, KM_USER0); | ||
429 | |||
430 | pgbase += copy; | ||
431 | if (pgbase == PAGE_CACHE_SIZE) { | ||
432 | flush_dcache_page(*pgto); | ||
433 | pgbase = 0; | ||
434 | pgto++; | ||
435 | } | ||
436 | p += copy; | ||
437 | |||
438 | } while ((len -= copy) != 0); | ||
439 | flush_dcache_page(*pgto); | ||
440 | } | ||
441 | |||
442 | /* | ||
443 | * _copy_from_pages | ||
444 | * @p: pointer to destination | ||
445 | * @pages: array of pages | ||
446 | * @pgbase: offset of source data | ||
447 | * @len: length | ||
448 | * | ||
449 | * Copies data into an arbitrary memory location from an array of pages | ||
450 | * The copy is assumed to be non-overlapping. | ||
451 | */ | ||
452 | static void | ||
453 | _copy_from_pages(char *p, struct page **pages, size_t pgbase, size_t len) | ||
454 | { | ||
455 | struct page **pgfrom; | ||
456 | char *vfrom; | ||
457 | size_t copy; | ||
458 | |||
459 | pgfrom = pages + (pgbase >> PAGE_CACHE_SHIFT); | ||
460 | pgbase &= ~PAGE_CACHE_MASK; | ||
461 | |||
462 | do { | ||
463 | copy = PAGE_CACHE_SIZE - pgbase; | ||
464 | if (copy > len) | ||
465 | copy = len; | ||
466 | |||
467 | vfrom = kmap_atomic(*pgfrom, KM_USER0); | ||
468 | memcpy(p, vfrom + pgbase, copy); | ||
469 | kunmap_atomic(vfrom, KM_USER0); | ||
470 | |||
471 | pgbase += copy; | ||
472 | if (pgbase == PAGE_CACHE_SIZE) { | ||
473 | pgbase = 0; | ||
474 | pgfrom++; | ||
475 | } | ||
476 | p += copy; | ||
477 | |||
478 | } while ((len -= copy) != 0); | ||
479 | } | ||
480 | |||
481 | /* | ||
482 | * xdr_shrink_bufhead | ||
483 | * @buf: xdr_buf | ||
484 | * @len: bytes to remove from buf->head[0] | ||
485 | * | ||
486 | * Shrinks XDR buffer's header kvec buf->head[0] by | ||
487 | * 'len' bytes. The extra data is not lost, but is instead | ||
488 | * moved into the inlined pages and/or the tail. | ||
489 | */ | ||
490 | static void | ||
491 | xdr_shrink_bufhead(struct xdr_buf *buf, size_t len) | ||
492 | { | ||
493 | struct kvec *head, *tail; | ||
494 | size_t copy, offs; | ||
495 | unsigned int pglen = buf->page_len; | ||
496 | |||
497 | tail = buf->tail; | ||
498 | head = buf->head; | ||
499 | BUG_ON (len > head->iov_len); | ||
500 | |||
501 | /* Shift the tail first */ | ||
502 | if (tail->iov_len != 0) { | ||
503 | if (tail->iov_len > len) { | ||
504 | copy = tail->iov_len - len; | ||
505 | memmove((char *)tail->iov_base + len, | ||
506 | tail->iov_base, copy); | ||
507 | } | ||
508 | /* Copy from the inlined pages into the tail */ | ||
509 | copy = len; | ||
510 | if (copy > pglen) | ||
511 | copy = pglen; | ||
512 | offs = len - copy; | ||
513 | if (offs >= tail->iov_len) | ||
514 | copy = 0; | ||
515 | else if (copy > tail->iov_len - offs) | ||
516 | copy = tail->iov_len - offs; | ||
517 | if (copy != 0) | ||
518 | _copy_from_pages((char *)tail->iov_base + offs, | ||
519 | buf->pages, | ||
520 | buf->page_base + pglen + offs - len, | ||
521 | copy); | ||
522 | /* Do we also need to copy data from the head into the tail ? */ | ||
523 | if (len > pglen) { | ||
524 | offs = copy = len - pglen; | ||
525 | if (copy > tail->iov_len) | ||
526 | copy = tail->iov_len; | ||
527 | memcpy(tail->iov_base, | ||
528 | (char *)head->iov_base + | ||
529 | head->iov_len - offs, | ||
530 | copy); | ||
531 | } | ||
532 | } | ||
533 | /* Now handle pages */ | ||
534 | if (pglen != 0) { | ||
535 | if (pglen > len) | ||
536 | _shift_data_right_pages(buf->pages, | ||
537 | buf->page_base + len, | ||
538 | buf->page_base, | ||
539 | pglen - len); | ||
540 | copy = len; | ||
541 | if (len > pglen) | ||
542 | copy = pglen; | ||
543 | _copy_to_pages(buf->pages, buf->page_base, | ||
544 | (char *)head->iov_base + head->iov_len - len, | ||
545 | copy); | ||
546 | } | ||
547 | head->iov_len -= len; | ||
548 | buf->buflen -= len; | ||
549 | /* Have we truncated the message? */ | ||
550 | if (buf->len > buf->buflen) | ||
551 | buf->len = buf->buflen; | ||
552 | } | ||
553 | |||
554 | /* | ||
555 | * xdr_shrink_pagelen | ||
556 | * @buf: xdr_buf | ||
557 | * @len: bytes to remove from buf->pages | ||
558 | * | ||
559 | * Shrinks XDR buffer's page array buf->pages by | ||
560 | * 'len' bytes. The extra data is not lost, but is instead | ||
561 | * moved into the tail. | ||
562 | */ | ||
563 | static void | ||
564 | xdr_shrink_pagelen(struct xdr_buf *buf, size_t len) | ||
565 | { | ||
566 | struct kvec *tail; | ||
567 | size_t copy; | ||
568 | char *p; | ||
569 | unsigned int pglen = buf->page_len; | ||
570 | |||
571 | tail = buf->tail; | ||
572 | BUG_ON (len > pglen); | ||
573 | |||
574 | /* Shift the tail first */ | ||
575 | if (tail->iov_len != 0) { | ||
576 | p = (char *)tail->iov_base + len; | ||
577 | if (tail->iov_len > len) { | ||
578 | copy = tail->iov_len - len; | ||
579 | memmove(p, tail->iov_base, copy); | ||
580 | } else | ||
581 | buf->buflen -= len; | ||
582 | /* Copy from the inlined pages into the tail */ | ||
583 | copy = len; | ||
584 | if (copy > tail->iov_len) | ||
585 | copy = tail->iov_len; | ||
586 | _copy_from_pages((char *)tail->iov_base, | ||
587 | buf->pages, buf->page_base + pglen - len, | ||
588 | copy); | ||
589 | } | ||
590 | buf->page_len -= len; | ||
591 | buf->buflen -= len; | ||
592 | /* Have we truncated the message? */ | ||
593 | if (buf->len > buf->buflen) | ||
594 | buf->len = buf->buflen; | ||
595 | } | ||
596 | |||
597 | void | ||
598 | xdr_shift_buf(struct xdr_buf *buf, size_t len) | ||
599 | { | ||
600 | xdr_shrink_bufhead(buf, len); | ||
601 | } | ||
602 | |||
603 | /** | ||
604 | * xdr_init_encode - Initialize a struct xdr_stream for sending data. | ||
605 | * @xdr: pointer to xdr_stream struct | ||
606 | * @buf: pointer to XDR buffer in which to encode data | ||
607 | * @p: current pointer inside XDR buffer | ||
608 | * | ||
609 | * Note: at the moment the RPC client only passes the length of our | ||
610 | * scratch buffer in the xdr_buf's header kvec. Previously this | ||
611 | * meant we needed to call xdr_adjust_iovec() after encoding the | ||
612 | * data. With the new scheme, the xdr_stream manages the details | ||
613 | * of the buffer length, and takes care of adjusting the kvec | ||
614 | * length for us. | ||
615 | */ | ||
616 | void xdr_init_encode(struct xdr_stream *xdr, struct xdr_buf *buf, uint32_t *p) | ||
617 | { | ||
618 | struct kvec *iov = buf->head; | ||
619 | |||
620 | xdr->buf = buf; | ||
621 | xdr->iov = iov; | ||
622 | xdr->end = (uint32_t *)((char *)iov->iov_base + iov->iov_len); | ||
623 | buf->len = iov->iov_len = (char *)p - (char *)iov->iov_base; | ||
624 | xdr->p = p; | ||
625 | } | ||
626 | EXPORT_SYMBOL(xdr_init_encode); | ||
627 | |||
628 | /** | ||
629 | * xdr_reserve_space - Reserve buffer space for sending | ||
630 | * @xdr: pointer to xdr_stream | ||
631 | * @nbytes: number of bytes to reserve | ||
632 | * | ||
633 | * Checks that we have enough buffer space to encode 'nbytes' more | ||
634 | * bytes of data. If so, update the total xdr_buf length, and | ||
635 | * adjust the length of the current kvec. | ||
636 | */ | ||
637 | uint32_t * xdr_reserve_space(struct xdr_stream *xdr, size_t nbytes) | ||
638 | { | ||
639 | uint32_t *p = xdr->p; | ||
640 | uint32_t *q; | ||
641 | |||
642 | /* align nbytes on the next 32-bit boundary */ | ||
643 | nbytes += 3; | ||
644 | nbytes &= ~3; | ||
645 | q = p + (nbytes >> 2); | ||
646 | if (unlikely(q > xdr->end || q < p)) | ||
647 | return NULL; | ||
648 | xdr->p = q; | ||
649 | xdr->iov->iov_len += nbytes; | ||
650 | xdr->buf->len += nbytes; | ||
651 | return p; | ||
652 | } | ||
653 | EXPORT_SYMBOL(xdr_reserve_space); | ||
654 | |||
655 | /** | ||
656 | * xdr_write_pages - Insert a list of pages into an XDR buffer for sending | ||
657 | * @xdr: pointer to xdr_stream | ||
658 | * @pages: list of pages | ||
659 | * @base: offset of first byte | ||
660 | * @len: length of data in bytes | ||
661 | * | ||
662 | */ | ||
663 | void xdr_write_pages(struct xdr_stream *xdr, struct page **pages, unsigned int base, | ||
664 | unsigned int len) | ||
665 | { | ||
666 | struct xdr_buf *buf = xdr->buf; | ||
667 | struct kvec *iov = buf->tail; | ||
668 | buf->pages = pages; | ||
669 | buf->page_base = base; | ||
670 | buf->page_len = len; | ||
671 | |||
672 | iov->iov_base = (char *)xdr->p; | ||
673 | iov->iov_len = 0; | ||
674 | xdr->iov = iov; | ||
675 | |||
676 | if (len & 3) { | ||
677 | unsigned int pad = 4 - (len & 3); | ||
678 | |||
679 | BUG_ON(xdr->p >= xdr->end); | ||
680 | iov->iov_base = (char *)xdr->p + (len & 3); | ||
681 | iov->iov_len += pad; | ||
682 | len += pad; | ||
683 | *xdr->p++ = 0; | ||
684 | } | ||
685 | buf->buflen += len; | ||
686 | buf->len += len; | ||
687 | } | ||
688 | EXPORT_SYMBOL(xdr_write_pages); | ||
689 | |||
690 | /** | ||
691 | * xdr_init_decode - Initialize an xdr_stream for decoding data. | ||
692 | * @xdr: pointer to xdr_stream struct | ||
693 | * @buf: pointer to XDR buffer from which to decode data | ||
694 | * @p: current pointer inside XDR buffer | ||
695 | */ | ||
696 | void xdr_init_decode(struct xdr_stream *xdr, struct xdr_buf *buf, uint32_t *p) | ||
697 | { | ||
698 | struct kvec *iov = buf->head; | ||
699 | unsigned int len = iov->iov_len; | ||
700 | |||
701 | if (len > buf->len) | ||
702 | len = buf->len; | ||
703 | xdr->buf = buf; | ||
704 | xdr->iov = iov; | ||
705 | xdr->p = p; | ||
706 | xdr->end = (uint32_t *)((char *)iov->iov_base + len); | ||
707 | } | ||
708 | EXPORT_SYMBOL(xdr_init_decode); | ||
709 | |||
710 | /** | ||
711 | * xdr_inline_decode - Retrieve non-page XDR data to decode | ||
712 | * @xdr: pointer to xdr_stream struct | ||
713 | * @nbytes: number of bytes of data to decode | ||
714 | * | ||
715 | * Check if the input buffer is long enough to enable us to decode | ||
716 | * 'nbytes' more bytes of data starting at the current position. | ||
717 | * If so return the current pointer, then update the current | ||
718 | * pointer position. | ||
719 | */ | ||
720 | uint32_t * xdr_inline_decode(struct xdr_stream *xdr, size_t nbytes) | ||
721 | { | ||
722 | uint32_t *p = xdr->p; | ||
723 | uint32_t *q = p + XDR_QUADLEN(nbytes); | ||
724 | |||
725 | if (unlikely(q > xdr->end || q < p)) | ||
726 | return NULL; | ||
727 | xdr->p = q; | ||
728 | return p; | ||
729 | } | ||
730 | EXPORT_SYMBOL(xdr_inline_decode); | ||
731 | |||
732 | /** | ||
733 | * xdr_read_pages - Ensure page-based XDR data to decode is aligned at current pointer position | ||
734 | * @xdr: pointer to xdr_stream struct | ||
735 | * @len: number of bytes of page data | ||
736 | * | ||
737 | * Moves data beyond the current pointer position from the XDR head[] buffer | ||
738 | * into the page list. Any data that lies beyond current position + "len" | ||
739 | * bytes is moved into the XDR tail[]. The current pointer is then | ||
740 | * repositioned at the beginning of the XDR tail. | ||
741 | */ | ||
742 | void xdr_read_pages(struct xdr_stream *xdr, unsigned int len) | ||
743 | { | ||
744 | struct xdr_buf *buf = xdr->buf; | ||
745 | struct kvec *iov; | ||
746 | ssize_t shift; | ||
747 | unsigned int end; | ||
748 | int padding; | ||
749 | |||
750 | /* Realign pages to current pointer position */ | ||
751 | iov = buf->head; | ||
752 | shift = iov->iov_len + (char *)iov->iov_base - (char *)xdr->p; | ||
753 | if (shift > 0) | ||
754 | xdr_shrink_bufhead(buf, shift); | ||
755 | |||
756 | /* Truncate page data and move it into the tail */ | ||
757 | if (buf->page_len > len) | ||
758 | xdr_shrink_pagelen(buf, buf->page_len - len); | ||
759 | padding = (XDR_QUADLEN(len) << 2) - len; | ||
760 | xdr->iov = iov = buf->tail; | ||
761 | /* Compute remaining message length. */ | ||
762 | end = iov->iov_len; | ||
763 | shift = buf->buflen - buf->len; | ||
764 | if (shift < end) | ||
765 | end -= shift; | ||
766 | else if (shift > 0) | ||
767 | end = 0; | ||
768 | /* | ||
769 | * Position current pointer at beginning of tail, and | ||
770 | * set remaining message length. | ||
771 | */ | ||
772 | xdr->p = (uint32_t *)((char *)iov->iov_base + padding); | ||
773 | xdr->end = (uint32_t *)((char *)iov->iov_base + end); | ||
774 | } | ||
775 | EXPORT_SYMBOL(xdr_read_pages); | ||
776 | |||
777 | static struct kvec empty_iov = {.iov_base = NULL, .iov_len = 0}; | ||
778 | |||
779 | void | ||
780 | xdr_buf_from_iov(struct kvec *iov, struct xdr_buf *buf) | ||
781 | { | ||
782 | buf->head[0] = *iov; | ||
783 | buf->tail[0] = empty_iov; | ||
784 | buf->page_len = 0; | ||
785 | buf->buflen = buf->len = iov->iov_len; | ||
786 | } | ||
787 | |||
788 | /* Sets subiov to the intersection of iov with the buffer of length len | ||
789 | * starting base bytes after iov. Indicates empty intersection by setting | ||
790 | * length of subiov to zero. Decrements len by length of subiov, sets base | ||
791 | * to zero (or decrements it by length of iov if subiov is empty). */ | ||
792 | static void | ||
793 | iov_subsegment(struct kvec *iov, struct kvec *subiov, int *base, int *len) | ||
794 | { | ||
795 | if (*base > iov->iov_len) { | ||
796 | subiov->iov_base = NULL; | ||
797 | subiov->iov_len = 0; | ||
798 | *base -= iov->iov_len; | ||
799 | } else { | ||
800 | subiov->iov_base = iov->iov_base + *base; | ||
801 | subiov->iov_len = min(*len, (int)iov->iov_len - *base); | ||
802 | *base = 0; | ||
803 | } | ||
804 | *len -= subiov->iov_len; | ||
805 | } | ||
806 | |||
807 | /* Sets subbuf to the portion of buf of length len beginning base bytes | ||
808 | * from the start of buf. Returns -1 if base of length are out of bounds. */ | ||
809 | int | ||
810 | xdr_buf_subsegment(struct xdr_buf *buf, struct xdr_buf *subbuf, | ||
811 | int base, int len) | ||
812 | { | ||
813 | int i; | ||
814 | |||
815 | subbuf->buflen = subbuf->len = len; | ||
816 | iov_subsegment(buf->head, subbuf->head, &base, &len); | ||
817 | |||
818 | if (base < buf->page_len) { | ||
819 | i = (base + buf->page_base) >> PAGE_CACHE_SHIFT; | ||
820 | subbuf->pages = &buf->pages[i]; | ||
821 | subbuf->page_base = (base + buf->page_base) & ~PAGE_CACHE_MASK; | ||
822 | subbuf->page_len = min((int)buf->page_len - base, len); | ||
823 | len -= subbuf->page_len; | ||
824 | base = 0; | ||
825 | } else { | ||
826 | base -= buf->page_len; | ||
827 | subbuf->page_len = 0; | ||
828 | } | ||
829 | |||
830 | iov_subsegment(buf->tail, subbuf->tail, &base, &len); | ||
831 | if (base || len) | ||
832 | return -1; | ||
833 | return 0; | ||
834 | } | ||
835 | |||
836 | /* obj is assumed to point to allocated memory of size at least len: */ | ||
837 | int | ||
838 | read_bytes_from_xdr_buf(struct xdr_buf *buf, int base, void *obj, int len) | ||
839 | { | ||
840 | struct xdr_buf subbuf; | ||
841 | int this_len; | ||
842 | int status; | ||
843 | |||
844 | status = xdr_buf_subsegment(buf, &subbuf, base, len); | ||
845 | if (status) | ||
846 | goto out; | ||
847 | this_len = min(len, (int)subbuf.head[0].iov_len); | ||
848 | memcpy(obj, subbuf.head[0].iov_base, this_len); | ||
849 | len -= this_len; | ||
850 | obj += this_len; | ||
851 | this_len = min(len, (int)subbuf.page_len); | ||
852 | if (this_len) | ||
853 | _copy_from_pages(obj, subbuf.pages, subbuf.page_base, this_len); | ||
854 | len -= this_len; | ||
855 | obj += this_len; | ||
856 | this_len = min(len, (int)subbuf.tail[0].iov_len); | ||
857 | memcpy(obj, subbuf.tail[0].iov_base, this_len); | ||
858 | out: | ||
859 | return status; | ||
860 | } | ||
861 | |||
862 | static int | ||
863 | read_u32_from_xdr_buf(struct xdr_buf *buf, int base, u32 *obj) | ||
864 | { | ||
865 | u32 raw; | ||
866 | int status; | ||
867 | |||
868 | status = read_bytes_from_xdr_buf(buf, base, &raw, sizeof(*obj)); | ||
869 | if (status) | ||
870 | return status; | ||
871 | *obj = ntohl(raw); | ||
872 | return 0; | ||
873 | } | ||
874 | |||
875 | /* If the netobj starting offset bytes from the start of xdr_buf is contained | ||
876 | * entirely in the head or the tail, set object to point to it; otherwise | ||
877 | * try to find space for it at the end of the tail, copy it there, and | ||
878 | * set obj to point to it. */ | ||
879 | int | ||
880 | xdr_buf_read_netobj(struct xdr_buf *buf, struct xdr_netobj *obj, int offset) | ||
881 | { | ||
882 | u32 tail_offset = buf->head[0].iov_len + buf->page_len; | ||
883 | u32 obj_end_offset; | ||
884 | |||
885 | if (read_u32_from_xdr_buf(buf, offset, &obj->len)) | ||
886 | goto out; | ||
887 | obj_end_offset = offset + 4 + obj->len; | ||
888 | |||
889 | if (obj_end_offset <= buf->head[0].iov_len) { | ||
890 | /* The obj is contained entirely in the head: */ | ||
891 | obj->data = buf->head[0].iov_base + offset + 4; | ||
892 | } else if (offset + 4 >= tail_offset) { | ||
893 | if (obj_end_offset - tail_offset | ||
894 | > buf->tail[0].iov_len) | ||
895 | goto out; | ||
896 | /* The obj is contained entirely in the tail: */ | ||
897 | obj->data = buf->tail[0].iov_base | ||
898 | + offset - tail_offset + 4; | ||
899 | } else { | ||
900 | /* use end of tail as storage for obj: | ||
901 | * (We don't copy to the beginning because then we'd have | ||
902 | * to worry about doing a potentially overlapping copy. | ||
903 | * This assumes the object is at most half the length of the | ||
904 | * tail.) */ | ||
905 | if (obj->len > buf->tail[0].iov_len) | ||
906 | goto out; | ||
907 | obj->data = buf->tail[0].iov_base + buf->tail[0].iov_len - | ||
908 | obj->len; | ||
909 | if (read_bytes_from_xdr_buf(buf, offset + 4, | ||
910 | obj->data, obj->len)) | ||
911 | goto out; | ||
912 | |||
913 | } | ||
914 | return 0; | ||
915 | out: | ||
916 | return -1; | ||
917 | } | ||
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c new file mode 100644 index 000000000000..c74a6bb94074 --- /dev/null +++ b/net/sunrpc/xprt.c | |||
@@ -0,0 +1,1678 @@ | |||
1 | /* | ||
2 | * linux/net/sunrpc/xprt.c | ||
3 | * | ||
4 | * This is a generic RPC call interface supporting congestion avoidance, | ||
5 | * and asynchronous calls. | ||
6 | * | ||
7 | * The interface works like this: | ||
8 | * | ||
9 | * - When a process places a call, it allocates a request slot if | ||
10 | * one is available. Otherwise, it sleeps on the backlog queue | ||
11 | * (xprt_reserve). | ||
12 | * - Next, the caller puts together the RPC message, stuffs it into | ||
13 | * the request struct, and calls xprt_call(). | ||
14 | * - xprt_call transmits the message and installs the caller on the | ||
15 | * socket's wait list. At the same time, it installs a timer that | ||
16 | * is run after the packet's timeout has expired. | ||
17 | * - When a packet arrives, the data_ready handler walks the list of | ||
18 | * pending requests for that socket. If a matching XID is found, the | ||
19 | * caller is woken up, and the timer removed. | ||
20 | * - When no reply arrives within the timeout interval, the timer is | ||
21 | * fired by the kernel and runs xprt_timer(). It either adjusts the | ||
22 | * timeout values (minor timeout) or wakes up the caller with a status | ||
23 | * of -ETIMEDOUT. | ||
24 | * - When the caller receives a notification from RPC that a reply arrived, | ||
25 | * it should release the RPC slot, and process the reply. | ||
26 | * If the call timed out, it may choose to retry the operation by | ||
27 | * adjusting the initial timeout value, and simply calling rpc_call | ||
28 | * again. | ||
29 | * | ||
30 | * Support for async RPC is done through a set of RPC-specific scheduling | ||
31 | * primitives that `transparently' work for processes as well as async | ||
32 | * tasks that rely on callbacks. | ||
33 | * | ||
34 | * Copyright (C) 1995-1997, Olaf Kirch <okir@monad.swb.de> | ||
35 | * | ||
36 | * TCP callback races fixes (C) 1998 Red Hat Software <alan@redhat.com> | ||
37 | * TCP send fixes (C) 1998 Red Hat Software <alan@redhat.com> | ||
38 | * TCP NFS related read + write fixes | ||
39 | * (C) 1999 Dave Airlie, University of Limerick, Ireland <airlied@linux.ie> | ||
40 | * | ||
41 | * Rewrite of larges part of the code in order to stabilize TCP stuff. | ||
42 | * Fix behaviour when socket buffer is full. | ||
43 | * (C) 1999 Trond Myklebust <trond.myklebust@fys.uio.no> | ||
44 | */ | ||
45 | |||
46 | #include <linux/types.h> | ||
47 | #include <linux/slab.h> | ||
48 | #include <linux/capability.h> | ||
49 | #include <linux/sched.h> | ||
50 | #include <linux/errno.h> | ||
51 | #include <linux/socket.h> | ||
52 | #include <linux/in.h> | ||
53 | #include <linux/net.h> | ||
54 | #include <linux/mm.h> | ||
55 | #include <linux/udp.h> | ||
56 | #include <linux/tcp.h> | ||
57 | #include <linux/sunrpc/clnt.h> | ||
58 | #include <linux/file.h> | ||
59 | #include <linux/workqueue.h> | ||
60 | #include <linux/random.h> | ||
61 | |||
62 | #include <net/sock.h> | ||
63 | #include <net/checksum.h> | ||
64 | #include <net/udp.h> | ||
65 | #include <net/tcp.h> | ||
66 | |||
67 | /* | ||
68 | * Local variables | ||
69 | */ | ||
70 | |||
71 | #ifdef RPC_DEBUG | ||
72 | # undef RPC_DEBUG_DATA | ||
73 | # define RPCDBG_FACILITY RPCDBG_XPRT | ||
74 | #endif | ||
75 | |||
76 | #define XPRT_MAX_BACKOFF (8) | ||
77 | #define XPRT_IDLE_TIMEOUT (5*60*HZ) | ||
78 | #define XPRT_MAX_RESVPORT (800) | ||
79 | |||
80 | /* | ||
81 | * Local functions | ||
82 | */ | ||
83 | static void xprt_request_init(struct rpc_task *, struct rpc_xprt *); | ||
84 | static inline void do_xprt_reserve(struct rpc_task *); | ||
85 | static void xprt_disconnect(struct rpc_xprt *); | ||
86 | static void xprt_connect_status(struct rpc_task *task); | ||
87 | static struct rpc_xprt * xprt_setup(int proto, struct sockaddr_in *ap, | ||
88 | struct rpc_timeout *to); | ||
89 | static struct socket *xprt_create_socket(struct rpc_xprt *, int, int); | ||
90 | static void xprt_bind_socket(struct rpc_xprt *, struct socket *); | ||
91 | static int __xprt_get_cong(struct rpc_xprt *, struct rpc_task *); | ||
92 | |||
93 | static int xprt_clear_backlog(struct rpc_xprt *xprt); | ||
94 | |||
95 | #ifdef RPC_DEBUG_DATA | ||
96 | /* | ||
97 | * Print the buffer contents (first 128 bytes only--just enough for | ||
98 | * diropres return). | ||
99 | */ | ||
100 | static void | ||
101 | xprt_pktdump(char *msg, u32 *packet, unsigned int count) | ||
102 | { | ||
103 | u8 *buf = (u8 *) packet; | ||
104 | int j; | ||
105 | |||
106 | dprintk("RPC: %s\n", msg); | ||
107 | for (j = 0; j < count && j < 128; j += 4) { | ||
108 | if (!(j & 31)) { | ||
109 | if (j) | ||
110 | dprintk("\n"); | ||
111 | dprintk("0x%04x ", j); | ||
112 | } | ||
113 | dprintk("%02x%02x%02x%02x ", | ||
114 | buf[j], buf[j+1], buf[j+2], buf[j+3]); | ||
115 | } | ||
116 | dprintk("\n"); | ||
117 | } | ||
118 | #else | ||
119 | static inline void | ||
120 | xprt_pktdump(char *msg, u32 *packet, unsigned int count) | ||
121 | { | ||
122 | /* NOP */ | ||
123 | } | ||
124 | #endif | ||
125 | |||
126 | /* | ||
127 | * Look up RPC transport given an INET socket | ||
128 | */ | ||
129 | static inline struct rpc_xprt * | ||
130 | xprt_from_sock(struct sock *sk) | ||
131 | { | ||
132 | return (struct rpc_xprt *) sk->sk_user_data; | ||
133 | } | ||
134 | |||
135 | /* | ||
136 | * Serialize write access to sockets, in order to prevent different | ||
137 | * requests from interfering with each other. | ||
138 | * Also prevents TCP socket connects from colliding with writes. | ||
139 | */ | ||
140 | static int | ||
141 | __xprt_lock_write(struct rpc_xprt *xprt, struct rpc_task *task) | ||
142 | { | ||
143 | struct rpc_rqst *req = task->tk_rqstp; | ||
144 | |||
145 | if (test_and_set_bit(XPRT_LOCKED, &xprt->sockstate)) { | ||
146 | if (task == xprt->snd_task) | ||
147 | return 1; | ||
148 | if (task == NULL) | ||
149 | return 0; | ||
150 | goto out_sleep; | ||
151 | } | ||
152 | if (xprt->nocong || __xprt_get_cong(xprt, task)) { | ||
153 | xprt->snd_task = task; | ||
154 | if (req) { | ||
155 | req->rq_bytes_sent = 0; | ||
156 | req->rq_ntrans++; | ||
157 | } | ||
158 | return 1; | ||
159 | } | ||
160 | smp_mb__before_clear_bit(); | ||
161 | clear_bit(XPRT_LOCKED, &xprt->sockstate); | ||
162 | smp_mb__after_clear_bit(); | ||
163 | out_sleep: | ||
164 | dprintk("RPC: %4d failed to lock socket %p\n", task->tk_pid, xprt); | ||
165 | task->tk_timeout = 0; | ||
166 | task->tk_status = -EAGAIN; | ||
167 | if (req && req->rq_ntrans) | ||
168 | rpc_sleep_on(&xprt->resend, task, NULL, NULL); | ||
169 | else | ||
170 | rpc_sleep_on(&xprt->sending, task, NULL, NULL); | ||
171 | return 0; | ||
172 | } | ||
173 | |||
174 | static inline int | ||
175 | xprt_lock_write(struct rpc_xprt *xprt, struct rpc_task *task) | ||
176 | { | ||
177 | int retval; | ||
178 | |||
179 | spin_lock_bh(&xprt->sock_lock); | ||
180 | retval = __xprt_lock_write(xprt, task); | ||
181 | spin_unlock_bh(&xprt->sock_lock); | ||
182 | return retval; | ||
183 | } | ||
184 | |||
185 | |||
186 | static void | ||
187 | __xprt_lock_write_next(struct rpc_xprt *xprt) | ||
188 | { | ||
189 | struct rpc_task *task; | ||
190 | |||
191 | if (test_and_set_bit(XPRT_LOCKED, &xprt->sockstate)) | ||
192 | return; | ||
193 | if (!xprt->nocong && RPCXPRT_CONGESTED(xprt)) | ||
194 | goto out_unlock; | ||
195 | task = rpc_wake_up_next(&xprt->resend); | ||
196 | if (!task) { | ||
197 | task = rpc_wake_up_next(&xprt->sending); | ||
198 | if (!task) | ||
199 | goto out_unlock; | ||
200 | } | ||
201 | if (xprt->nocong || __xprt_get_cong(xprt, task)) { | ||
202 | struct rpc_rqst *req = task->tk_rqstp; | ||
203 | xprt->snd_task = task; | ||
204 | if (req) { | ||
205 | req->rq_bytes_sent = 0; | ||
206 | req->rq_ntrans++; | ||
207 | } | ||
208 | return; | ||
209 | } | ||
210 | out_unlock: | ||
211 | smp_mb__before_clear_bit(); | ||
212 | clear_bit(XPRT_LOCKED, &xprt->sockstate); | ||
213 | smp_mb__after_clear_bit(); | ||
214 | } | ||
215 | |||
216 | /* | ||
217 | * Releases the socket for use by other requests. | ||
218 | */ | ||
219 | static void | ||
220 | __xprt_release_write(struct rpc_xprt *xprt, struct rpc_task *task) | ||
221 | { | ||
222 | if (xprt->snd_task == task) { | ||
223 | xprt->snd_task = NULL; | ||
224 | smp_mb__before_clear_bit(); | ||
225 | clear_bit(XPRT_LOCKED, &xprt->sockstate); | ||
226 | smp_mb__after_clear_bit(); | ||
227 | __xprt_lock_write_next(xprt); | ||
228 | } | ||
229 | } | ||
230 | |||
231 | static inline void | ||
232 | xprt_release_write(struct rpc_xprt *xprt, struct rpc_task *task) | ||
233 | { | ||
234 | spin_lock_bh(&xprt->sock_lock); | ||
235 | __xprt_release_write(xprt, task); | ||
236 | spin_unlock_bh(&xprt->sock_lock); | ||
237 | } | ||
238 | |||
239 | /* | ||
240 | * Write data to socket. | ||
241 | */ | ||
242 | static inline int | ||
243 | xprt_sendmsg(struct rpc_xprt *xprt, struct rpc_rqst *req) | ||
244 | { | ||
245 | struct socket *sock = xprt->sock; | ||
246 | struct xdr_buf *xdr = &req->rq_snd_buf; | ||
247 | struct sockaddr *addr = NULL; | ||
248 | int addrlen = 0; | ||
249 | unsigned int skip; | ||
250 | int result; | ||
251 | |||
252 | if (!sock) | ||
253 | return -ENOTCONN; | ||
254 | |||
255 | xprt_pktdump("packet data:", | ||
256 | req->rq_svec->iov_base, | ||
257 | req->rq_svec->iov_len); | ||
258 | |||
259 | /* For UDP, we need to provide an address */ | ||
260 | if (!xprt->stream) { | ||
261 | addr = (struct sockaddr *) &xprt->addr; | ||
262 | addrlen = sizeof(xprt->addr); | ||
263 | } | ||
264 | /* Dont repeat bytes */ | ||
265 | skip = req->rq_bytes_sent; | ||
266 | |||
267 | clear_bit(SOCK_ASYNC_NOSPACE, &sock->flags); | ||
268 | result = xdr_sendpages(sock, addr, addrlen, xdr, skip, MSG_DONTWAIT); | ||
269 | |||
270 | dprintk("RPC: xprt_sendmsg(%d) = %d\n", xdr->len - skip, result); | ||
271 | |||
272 | if (result >= 0) | ||
273 | return result; | ||
274 | |||
275 | switch (result) { | ||
276 | case -ECONNREFUSED: | ||
277 | /* When the server has died, an ICMP port unreachable message | ||
278 | * prompts ECONNREFUSED. | ||
279 | */ | ||
280 | case -EAGAIN: | ||
281 | break; | ||
282 | case -ECONNRESET: | ||
283 | case -ENOTCONN: | ||
284 | case -EPIPE: | ||
285 | /* connection broken */ | ||
286 | if (xprt->stream) | ||
287 | result = -ENOTCONN; | ||
288 | break; | ||
289 | default: | ||
290 | printk(KERN_NOTICE "RPC: sendmsg returned error %d\n", -result); | ||
291 | } | ||
292 | return result; | ||
293 | } | ||
294 | |||
295 | /* | ||
296 | * Van Jacobson congestion avoidance. Check if the congestion window | ||
297 | * overflowed. Put the task to sleep if this is the case. | ||
298 | */ | ||
299 | static int | ||
300 | __xprt_get_cong(struct rpc_xprt *xprt, struct rpc_task *task) | ||
301 | { | ||
302 | struct rpc_rqst *req = task->tk_rqstp; | ||
303 | |||
304 | if (req->rq_cong) | ||
305 | return 1; | ||
306 | dprintk("RPC: %4d xprt_cwnd_limited cong = %ld cwnd = %ld\n", | ||
307 | task->tk_pid, xprt->cong, xprt->cwnd); | ||
308 | if (RPCXPRT_CONGESTED(xprt)) | ||
309 | return 0; | ||
310 | req->rq_cong = 1; | ||
311 | xprt->cong += RPC_CWNDSCALE; | ||
312 | return 1; | ||
313 | } | ||
314 | |||
315 | /* | ||
316 | * Adjust the congestion window, and wake up the next task | ||
317 | * that has been sleeping due to congestion | ||
318 | */ | ||
319 | static void | ||
320 | __xprt_put_cong(struct rpc_xprt *xprt, struct rpc_rqst *req) | ||
321 | { | ||
322 | if (!req->rq_cong) | ||
323 | return; | ||
324 | req->rq_cong = 0; | ||
325 | xprt->cong -= RPC_CWNDSCALE; | ||
326 | __xprt_lock_write_next(xprt); | ||
327 | } | ||
328 | |||
329 | /* | ||
330 | * Adjust RPC congestion window | ||
331 | * We use a time-smoothed congestion estimator to avoid heavy oscillation. | ||
332 | */ | ||
333 | static void | ||
334 | xprt_adjust_cwnd(struct rpc_xprt *xprt, int result) | ||
335 | { | ||
336 | unsigned long cwnd; | ||
337 | |||
338 | cwnd = xprt->cwnd; | ||
339 | if (result >= 0 && cwnd <= xprt->cong) { | ||
340 | /* The (cwnd >> 1) term makes sure | ||
341 | * the result gets rounded properly. */ | ||
342 | cwnd += (RPC_CWNDSCALE * RPC_CWNDSCALE + (cwnd >> 1)) / cwnd; | ||
343 | if (cwnd > RPC_MAXCWND(xprt)) | ||
344 | cwnd = RPC_MAXCWND(xprt); | ||
345 | __xprt_lock_write_next(xprt); | ||
346 | } else if (result == -ETIMEDOUT) { | ||
347 | cwnd >>= 1; | ||
348 | if (cwnd < RPC_CWNDSCALE) | ||
349 | cwnd = RPC_CWNDSCALE; | ||
350 | } | ||
351 | dprintk("RPC: cong %ld, cwnd was %ld, now %ld\n", | ||
352 | xprt->cong, xprt->cwnd, cwnd); | ||
353 | xprt->cwnd = cwnd; | ||
354 | } | ||
355 | |||
356 | /* | ||
357 | * Reset the major timeout value | ||
358 | */ | ||
359 | static void xprt_reset_majortimeo(struct rpc_rqst *req) | ||
360 | { | ||
361 | struct rpc_timeout *to = &req->rq_xprt->timeout; | ||
362 | |||
363 | req->rq_majortimeo = req->rq_timeout; | ||
364 | if (to->to_exponential) | ||
365 | req->rq_majortimeo <<= to->to_retries; | ||
366 | else | ||
367 | req->rq_majortimeo += to->to_increment * to->to_retries; | ||
368 | if (req->rq_majortimeo > to->to_maxval || req->rq_majortimeo == 0) | ||
369 | req->rq_majortimeo = to->to_maxval; | ||
370 | req->rq_majortimeo += jiffies; | ||
371 | } | ||
372 | |||
373 | /* | ||
374 | * Adjust timeout values etc for next retransmit | ||
375 | */ | ||
376 | int xprt_adjust_timeout(struct rpc_rqst *req) | ||
377 | { | ||
378 | struct rpc_xprt *xprt = req->rq_xprt; | ||
379 | struct rpc_timeout *to = &xprt->timeout; | ||
380 | int status = 0; | ||
381 | |||
382 | if (time_before(jiffies, req->rq_majortimeo)) { | ||
383 | if (to->to_exponential) | ||
384 | req->rq_timeout <<= 1; | ||
385 | else | ||
386 | req->rq_timeout += to->to_increment; | ||
387 | if (to->to_maxval && req->rq_timeout >= to->to_maxval) | ||
388 | req->rq_timeout = to->to_maxval; | ||
389 | req->rq_retries++; | ||
390 | pprintk("RPC: %lu retrans\n", jiffies); | ||
391 | } else { | ||
392 | req->rq_timeout = to->to_initval; | ||
393 | req->rq_retries = 0; | ||
394 | xprt_reset_majortimeo(req); | ||
395 | /* Reset the RTT counters == "slow start" */ | ||
396 | spin_lock_bh(&xprt->sock_lock); | ||
397 | rpc_init_rtt(req->rq_task->tk_client->cl_rtt, to->to_initval); | ||
398 | spin_unlock_bh(&xprt->sock_lock); | ||
399 | pprintk("RPC: %lu timeout\n", jiffies); | ||
400 | status = -ETIMEDOUT; | ||
401 | } | ||
402 | |||
403 | if (req->rq_timeout == 0) { | ||
404 | printk(KERN_WARNING "xprt_adjust_timeout: rq_timeout = 0!\n"); | ||
405 | req->rq_timeout = 5 * HZ; | ||
406 | } | ||
407 | return status; | ||
408 | } | ||
409 | |||
410 | /* | ||
411 | * Close down a transport socket | ||
412 | */ | ||
413 | static void | ||
414 | xprt_close(struct rpc_xprt *xprt) | ||
415 | { | ||
416 | struct socket *sock = xprt->sock; | ||
417 | struct sock *sk = xprt->inet; | ||
418 | |||
419 | if (!sk) | ||
420 | return; | ||
421 | |||
422 | write_lock_bh(&sk->sk_callback_lock); | ||
423 | xprt->inet = NULL; | ||
424 | xprt->sock = NULL; | ||
425 | |||
426 | sk->sk_user_data = NULL; | ||
427 | sk->sk_data_ready = xprt->old_data_ready; | ||
428 | sk->sk_state_change = xprt->old_state_change; | ||
429 | sk->sk_write_space = xprt->old_write_space; | ||
430 | write_unlock_bh(&sk->sk_callback_lock); | ||
431 | |||
432 | sk->sk_no_check = 0; | ||
433 | |||
434 | sock_release(sock); | ||
435 | } | ||
436 | |||
437 | static void | ||
438 | xprt_socket_autoclose(void *args) | ||
439 | { | ||
440 | struct rpc_xprt *xprt = (struct rpc_xprt *)args; | ||
441 | |||
442 | xprt_disconnect(xprt); | ||
443 | xprt_close(xprt); | ||
444 | xprt_release_write(xprt, NULL); | ||
445 | } | ||
446 | |||
447 | /* | ||
448 | * Mark a transport as disconnected | ||
449 | */ | ||
450 | static void | ||
451 | xprt_disconnect(struct rpc_xprt *xprt) | ||
452 | { | ||
453 | dprintk("RPC: disconnected transport %p\n", xprt); | ||
454 | spin_lock_bh(&xprt->sock_lock); | ||
455 | xprt_clear_connected(xprt); | ||
456 | rpc_wake_up_status(&xprt->pending, -ENOTCONN); | ||
457 | spin_unlock_bh(&xprt->sock_lock); | ||
458 | } | ||
459 | |||
460 | /* | ||
461 | * Used to allow disconnection when we've been idle | ||
462 | */ | ||
463 | static void | ||
464 | xprt_init_autodisconnect(unsigned long data) | ||
465 | { | ||
466 | struct rpc_xprt *xprt = (struct rpc_xprt *)data; | ||
467 | |||
468 | spin_lock(&xprt->sock_lock); | ||
469 | if (!list_empty(&xprt->recv) || xprt->shutdown) | ||
470 | goto out_abort; | ||
471 | if (test_and_set_bit(XPRT_LOCKED, &xprt->sockstate)) | ||
472 | goto out_abort; | ||
473 | spin_unlock(&xprt->sock_lock); | ||
474 | /* Let keventd close the socket */ | ||
475 | if (test_bit(XPRT_CONNECTING, &xprt->sockstate) != 0) | ||
476 | xprt_release_write(xprt, NULL); | ||
477 | else | ||
478 | schedule_work(&xprt->task_cleanup); | ||
479 | return; | ||
480 | out_abort: | ||
481 | spin_unlock(&xprt->sock_lock); | ||
482 | } | ||
483 | |||
484 | static void xprt_socket_connect(void *args) | ||
485 | { | ||
486 | struct rpc_xprt *xprt = (struct rpc_xprt *)args; | ||
487 | struct socket *sock = xprt->sock; | ||
488 | int status = -EIO; | ||
489 | |||
490 | if (xprt->shutdown || xprt->addr.sin_port == 0) | ||
491 | goto out; | ||
492 | |||
493 | /* | ||
494 | * Start by resetting any existing state | ||
495 | */ | ||
496 | xprt_close(xprt); | ||
497 | sock = xprt_create_socket(xprt, xprt->prot, xprt->resvport); | ||
498 | if (sock == NULL) { | ||
499 | /* couldn't create socket or bind to reserved port; | ||
500 | * this is likely a permanent error, so cause an abort */ | ||
501 | goto out; | ||
502 | } | ||
503 | xprt_bind_socket(xprt, sock); | ||
504 | xprt_sock_setbufsize(xprt); | ||
505 | |||
506 | status = 0; | ||
507 | if (!xprt->stream) | ||
508 | goto out; | ||
509 | |||
510 | /* | ||
511 | * Tell the socket layer to start connecting... | ||
512 | */ | ||
513 | status = sock->ops->connect(sock, (struct sockaddr *) &xprt->addr, | ||
514 | sizeof(xprt->addr), O_NONBLOCK); | ||
515 | dprintk("RPC: %p connect status %d connected %d sock state %d\n", | ||
516 | xprt, -status, xprt_connected(xprt), sock->sk->sk_state); | ||
517 | if (status < 0) { | ||
518 | switch (status) { | ||
519 | case -EINPROGRESS: | ||
520 | case -EALREADY: | ||
521 | goto out_clear; | ||
522 | } | ||
523 | } | ||
524 | out: | ||
525 | if (status < 0) | ||
526 | rpc_wake_up_status(&xprt->pending, status); | ||
527 | else | ||
528 | rpc_wake_up(&xprt->pending); | ||
529 | out_clear: | ||
530 | smp_mb__before_clear_bit(); | ||
531 | clear_bit(XPRT_CONNECTING, &xprt->sockstate); | ||
532 | smp_mb__after_clear_bit(); | ||
533 | } | ||
534 | |||
535 | /* | ||
536 | * Attempt to connect a TCP socket. | ||
537 | * | ||
538 | */ | ||
539 | void xprt_connect(struct rpc_task *task) | ||
540 | { | ||
541 | struct rpc_xprt *xprt = task->tk_xprt; | ||
542 | |||
543 | dprintk("RPC: %4d xprt_connect xprt %p %s connected\n", task->tk_pid, | ||
544 | xprt, (xprt_connected(xprt) ? "is" : "is not")); | ||
545 | |||
546 | if (xprt->shutdown) { | ||
547 | task->tk_status = -EIO; | ||
548 | return; | ||
549 | } | ||
550 | if (!xprt->addr.sin_port) { | ||
551 | task->tk_status = -EIO; | ||
552 | return; | ||
553 | } | ||
554 | if (!xprt_lock_write(xprt, task)) | ||
555 | return; | ||
556 | if (xprt_connected(xprt)) | ||
557 | goto out_write; | ||
558 | |||
559 | if (task->tk_rqstp) | ||
560 | task->tk_rqstp->rq_bytes_sent = 0; | ||
561 | |||
562 | task->tk_timeout = RPC_CONNECT_TIMEOUT; | ||
563 | rpc_sleep_on(&xprt->pending, task, xprt_connect_status, NULL); | ||
564 | if (!test_and_set_bit(XPRT_CONNECTING, &xprt->sockstate)) { | ||
565 | /* Note: if we are here due to a dropped connection | ||
566 | * we delay reconnecting by RPC_REESTABLISH_TIMEOUT/HZ | ||
567 | * seconds | ||
568 | */ | ||
569 | if (xprt->sock != NULL) | ||
570 | schedule_delayed_work(&xprt->sock_connect, | ||
571 | RPC_REESTABLISH_TIMEOUT); | ||
572 | else | ||
573 | schedule_work(&xprt->sock_connect); | ||
574 | } | ||
575 | return; | ||
576 | out_write: | ||
577 | xprt_release_write(xprt, task); | ||
578 | } | ||
579 | |||
580 | /* | ||
581 | * We arrive here when awoken from waiting on connection establishment. | ||
582 | */ | ||
583 | static void | ||
584 | xprt_connect_status(struct rpc_task *task) | ||
585 | { | ||
586 | struct rpc_xprt *xprt = task->tk_xprt; | ||
587 | |||
588 | if (task->tk_status >= 0) { | ||
589 | dprintk("RPC: %4d xprt_connect_status: connection established\n", | ||
590 | task->tk_pid); | ||
591 | return; | ||
592 | } | ||
593 | |||
594 | /* if soft mounted, just cause this RPC to fail */ | ||
595 | if (RPC_IS_SOFT(task)) | ||
596 | task->tk_status = -EIO; | ||
597 | |||
598 | switch (task->tk_status) { | ||
599 | case -ECONNREFUSED: | ||
600 | case -ECONNRESET: | ||
601 | case -ENOTCONN: | ||
602 | return; | ||
603 | case -ETIMEDOUT: | ||
604 | dprintk("RPC: %4d xprt_connect_status: timed out\n", | ||
605 | task->tk_pid); | ||
606 | break; | ||
607 | default: | ||
608 | printk(KERN_ERR "RPC: error %d connecting to server %s\n", | ||
609 | -task->tk_status, task->tk_client->cl_server); | ||
610 | } | ||
611 | xprt_release_write(xprt, task); | ||
612 | } | ||
613 | |||
614 | /* | ||
615 | * Look up the RPC request corresponding to a reply, and then lock it. | ||
616 | */ | ||
617 | static inline struct rpc_rqst * | ||
618 | xprt_lookup_rqst(struct rpc_xprt *xprt, u32 xid) | ||
619 | { | ||
620 | struct list_head *pos; | ||
621 | struct rpc_rqst *req = NULL; | ||
622 | |||
623 | list_for_each(pos, &xprt->recv) { | ||
624 | struct rpc_rqst *entry = list_entry(pos, struct rpc_rqst, rq_list); | ||
625 | if (entry->rq_xid == xid) { | ||
626 | req = entry; | ||
627 | break; | ||
628 | } | ||
629 | } | ||
630 | return req; | ||
631 | } | ||
632 | |||
633 | /* | ||
634 | * Complete reply received. | ||
635 | * The TCP code relies on us to remove the request from xprt->pending. | ||
636 | */ | ||
637 | static void | ||
638 | xprt_complete_rqst(struct rpc_xprt *xprt, struct rpc_rqst *req, int copied) | ||
639 | { | ||
640 | struct rpc_task *task = req->rq_task; | ||
641 | struct rpc_clnt *clnt = task->tk_client; | ||
642 | |||
643 | /* Adjust congestion window */ | ||
644 | if (!xprt->nocong) { | ||
645 | unsigned timer = task->tk_msg.rpc_proc->p_timer; | ||
646 | xprt_adjust_cwnd(xprt, copied); | ||
647 | __xprt_put_cong(xprt, req); | ||
648 | if (timer) { | ||
649 | if (req->rq_ntrans == 1) | ||
650 | rpc_update_rtt(clnt->cl_rtt, timer, | ||
651 | (long)jiffies - req->rq_xtime); | ||
652 | rpc_set_timeo(clnt->cl_rtt, timer, req->rq_ntrans - 1); | ||
653 | } | ||
654 | } | ||
655 | |||
656 | #ifdef RPC_PROFILE | ||
657 | /* Profile only reads for now */ | ||
658 | if (copied > 1024) { | ||
659 | static unsigned long nextstat; | ||
660 | static unsigned long pkt_rtt, pkt_len, pkt_cnt; | ||
661 | |||
662 | pkt_cnt++; | ||
663 | pkt_len += req->rq_slen + copied; | ||
664 | pkt_rtt += jiffies - req->rq_xtime; | ||
665 | if (time_before(nextstat, jiffies)) { | ||
666 | printk("RPC: %lu %ld cwnd\n", jiffies, xprt->cwnd); | ||
667 | printk("RPC: %ld %ld %ld %ld stat\n", | ||
668 | jiffies, pkt_cnt, pkt_len, pkt_rtt); | ||
669 | pkt_rtt = pkt_len = pkt_cnt = 0; | ||
670 | nextstat = jiffies + 5 * HZ; | ||
671 | } | ||
672 | } | ||
673 | #endif | ||
674 | |||
675 | dprintk("RPC: %4d has input (%d bytes)\n", task->tk_pid, copied); | ||
676 | list_del_init(&req->rq_list); | ||
677 | req->rq_received = req->rq_private_buf.len = copied; | ||
678 | |||
679 | /* ... and wake up the process. */ | ||
680 | rpc_wake_up_task(task); | ||
681 | return; | ||
682 | } | ||
683 | |||
684 | static size_t | ||
685 | skb_read_bits(skb_reader_t *desc, void *to, size_t len) | ||
686 | { | ||
687 | if (len > desc->count) | ||
688 | len = desc->count; | ||
689 | if (skb_copy_bits(desc->skb, desc->offset, to, len)) | ||
690 | return 0; | ||
691 | desc->count -= len; | ||
692 | desc->offset += len; | ||
693 | return len; | ||
694 | } | ||
695 | |||
696 | static size_t | ||
697 | skb_read_and_csum_bits(skb_reader_t *desc, void *to, size_t len) | ||
698 | { | ||
699 | unsigned int csum2, pos; | ||
700 | |||
701 | if (len > desc->count) | ||
702 | len = desc->count; | ||
703 | pos = desc->offset; | ||
704 | csum2 = skb_copy_and_csum_bits(desc->skb, pos, to, len, 0); | ||
705 | desc->csum = csum_block_add(desc->csum, csum2, pos); | ||
706 | desc->count -= len; | ||
707 | desc->offset += len; | ||
708 | return len; | ||
709 | } | ||
710 | |||
711 | /* | ||
712 | * We have set things up such that we perform the checksum of the UDP | ||
713 | * packet in parallel with the copies into the RPC client iovec. -DaveM | ||
714 | */ | ||
715 | int | ||
716 | csum_partial_copy_to_xdr(struct xdr_buf *xdr, struct sk_buff *skb) | ||
717 | { | ||
718 | skb_reader_t desc; | ||
719 | |||
720 | desc.skb = skb; | ||
721 | desc.offset = sizeof(struct udphdr); | ||
722 | desc.count = skb->len - desc.offset; | ||
723 | |||
724 | if (skb->ip_summed == CHECKSUM_UNNECESSARY) | ||
725 | goto no_checksum; | ||
726 | |||
727 | desc.csum = csum_partial(skb->data, desc.offset, skb->csum); | ||
728 | xdr_partial_copy_from_skb(xdr, 0, &desc, skb_read_and_csum_bits); | ||
729 | if (desc.offset != skb->len) { | ||
730 | unsigned int csum2; | ||
731 | csum2 = skb_checksum(skb, desc.offset, skb->len - desc.offset, 0); | ||
732 | desc.csum = csum_block_add(desc.csum, csum2, desc.offset); | ||
733 | } | ||
734 | if (desc.count) | ||
735 | return -1; | ||
736 | if ((unsigned short)csum_fold(desc.csum)) | ||
737 | return -1; | ||
738 | return 0; | ||
739 | no_checksum: | ||
740 | xdr_partial_copy_from_skb(xdr, 0, &desc, skb_read_bits); | ||
741 | if (desc.count) | ||
742 | return -1; | ||
743 | return 0; | ||
744 | } | ||
745 | |||
746 | /* | ||
747 | * Input handler for RPC replies. Called from a bottom half and hence | ||
748 | * atomic. | ||
749 | */ | ||
750 | static void | ||
751 | udp_data_ready(struct sock *sk, int len) | ||
752 | { | ||
753 | struct rpc_task *task; | ||
754 | struct rpc_xprt *xprt; | ||
755 | struct rpc_rqst *rovr; | ||
756 | struct sk_buff *skb; | ||
757 | int err, repsize, copied; | ||
758 | u32 _xid, *xp; | ||
759 | |||
760 | read_lock(&sk->sk_callback_lock); | ||
761 | dprintk("RPC: udp_data_ready...\n"); | ||
762 | if (!(xprt = xprt_from_sock(sk))) { | ||
763 | printk("RPC: udp_data_ready request not found!\n"); | ||
764 | goto out; | ||
765 | } | ||
766 | |||
767 | dprintk("RPC: udp_data_ready client %p\n", xprt); | ||
768 | |||
769 | if ((skb = skb_recv_datagram(sk, 0, 1, &err)) == NULL) | ||
770 | goto out; | ||
771 | |||
772 | if (xprt->shutdown) | ||
773 | goto dropit; | ||
774 | |||
775 | repsize = skb->len - sizeof(struct udphdr); | ||
776 | if (repsize < 4) { | ||
777 | printk("RPC: impossible RPC reply size %d!\n", repsize); | ||
778 | goto dropit; | ||
779 | } | ||
780 | |||
781 | /* Copy the XID from the skb... */ | ||
782 | xp = skb_header_pointer(skb, sizeof(struct udphdr), | ||
783 | sizeof(_xid), &_xid); | ||
784 | if (xp == NULL) | ||
785 | goto dropit; | ||
786 | |||
787 | /* Look up and lock the request corresponding to the given XID */ | ||
788 | spin_lock(&xprt->sock_lock); | ||
789 | rovr = xprt_lookup_rqst(xprt, *xp); | ||
790 | if (!rovr) | ||
791 | goto out_unlock; | ||
792 | task = rovr->rq_task; | ||
793 | |||
794 | dprintk("RPC: %4d received reply\n", task->tk_pid); | ||
795 | |||
796 | if ((copied = rovr->rq_private_buf.buflen) > repsize) | ||
797 | copied = repsize; | ||
798 | |||
799 | /* Suck it into the iovec, verify checksum if not done by hw. */ | ||
800 | if (csum_partial_copy_to_xdr(&rovr->rq_private_buf, skb)) | ||
801 | goto out_unlock; | ||
802 | |||
803 | /* Something worked... */ | ||
804 | dst_confirm(skb->dst); | ||
805 | |||
806 | xprt_complete_rqst(xprt, rovr, copied); | ||
807 | |||
808 | out_unlock: | ||
809 | spin_unlock(&xprt->sock_lock); | ||
810 | dropit: | ||
811 | skb_free_datagram(sk, skb); | ||
812 | out: | ||
813 | read_unlock(&sk->sk_callback_lock); | ||
814 | } | ||
815 | |||
816 | /* | ||
817 | * Copy from an skb into memory and shrink the skb. | ||
818 | */ | ||
819 | static inline size_t | ||
820 | tcp_copy_data(skb_reader_t *desc, void *p, size_t len) | ||
821 | { | ||
822 | if (len > desc->count) | ||
823 | len = desc->count; | ||
824 | if (skb_copy_bits(desc->skb, desc->offset, p, len)) | ||
825 | return 0; | ||
826 | desc->offset += len; | ||
827 | desc->count -= len; | ||
828 | return len; | ||
829 | } | ||
830 | |||
831 | /* | ||
832 | * TCP read fragment marker | ||
833 | */ | ||
834 | static inline void | ||
835 | tcp_read_fraghdr(struct rpc_xprt *xprt, skb_reader_t *desc) | ||
836 | { | ||
837 | size_t len, used; | ||
838 | char *p; | ||
839 | |||
840 | p = ((char *) &xprt->tcp_recm) + xprt->tcp_offset; | ||
841 | len = sizeof(xprt->tcp_recm) - xprt->tcp_offset; | ||
842 | used = tcp_copy_data(desc, p, len); | ||
843 | xprt->tcp_offset += used; | ||
844 | if (used != len) | ||
845 | return; | ||
846 | xprt->tcp_reclen = ntohl(xprt->tcp_recm); | ||
847 | if (xprt->tcp_reclen & 0x80000000) | ||
848 | xprt->tcp_flags |= XPRT_LAST_FRAG; | ||
849 | else | ||
850 | xprt->tcp_flags &= ~XPRT_LAST_FRAG; | ||
851 | xprt->tcp_reclen &= 0x7fffffff; | ||
852 | xprt->tcp_flags &= ~XPRT_COPY_RECM; | ||
853 | xprt->tcp_offset = 0; | ||
854 | /* Sanity check of the record length */ | ||
855 | if (xprt->tcp_reclen < 4) { | ||
856 | printk(KERN_ERR "RPC: Invalid TCP record fragment length\n"); | ||
857 | xprt_disconnect(xprt); | ||
858 | } | ||
859 | dprintk("RPC: reading TCP record fragment of length %d\n", | ||
860 | xprt->tcp_reclen); | ||
861 | } | ||
862 | |||
863 | static void | ||
864 | tcp_check_recm(struct rpc_xprt *xprt) | ||
865 | { | ||
866 | if (xprt->tcp_offset == xprt->tcp_reclen) { | ||
867 | xprt->tcp_flags |= XPRT_COPY_RECM; | ||
868 | xprt->tcp_offset = 0; | ||
869 | if (xprt->tcp_flags & XPRT_LAST_FRAG) { | ||
870 | xprt->tcp_flags &= ~XPRT_COPY_DATA; | ||
871 | xprt->tcp_flags |= XPRT_COPY_XID; | ||
872 | xprt->tcp_copied = 0; | ||
873 | } | ||
874 | } | ||
875 | } | ||
876 | |||
877 | /* | ||
878 | * TCP read xid | ||
879 | */ | ||
880 | static inline void | ||
881 | tcp_read_xid(struct rpc_xprt *xprt, skb_reader_t *desc) | ||
882 | { | ||
883 | size_t len, used; | ||
884 | char *p; | ||
885 | |||
886 | len = sizeof(xprt->tcp_xid) - xprt->tcp_offset; | ||
887 | dprintk("RPC: reading XID (%Zu bytes)\n", len); | ||
888 | p = ((char *) &xprt->tcp_xid) + xprt->tcp_offset; | ||
889 | used = tcp_copy_data(desc, p, len); | ||
890 | xprt->tcp_offset += used; | ||
891 | if (used != len) | ||
892 | return; | ||
893 | xprt->tcp_flags &= ~XPRT_COPY_XID; | ||
894 | xprt->tcp_flags |= XPRT_COPY_DATA; | ||
895 | xprt->tcp_copied = 4; | ||
896 | dprintk("RPC: reading reply for XID %08x\n", | ||
897 | ntohl(xprt->tcp_xid)); | ||
898 | tcp_check_recm(xprt); | ||
899 | } | ||
900 | |||
901 | /* | ||
902 | * TCP read and complete request | ||
903 | */ | ||
904 | static inline void | ||
905 | tcp_read_request(struct rpc_xprt *xprt, skb_reader_t *desc) | ||
906 | { | ||
907 | struct rpc_rqst *req; | ||
908 | struct xdr_buf *rcvbuf; | ||
909 | size_t len; | ||
910 | |||
911 | /* Find and lock the request corresponding to this xid */ | ||
912 | spin_lock(&xprt->sock_lock); | ||
913 | req = xprt_lookup_rqst(xprt, xprt->tcp_xid); | ||
914 | if (!req) { | ||
915 | xprt->tcp_flags &= ~XPRT_COPY_DATA; | ||
916 | dprintk("RPC: XID %08x request not found!\n", | ||
917 | ntohl(xprt->tcp_xid)); | ||
918 | spin_unlock(&xprt->sock_lock); | ||
919 | return; | ||
920 | } | ||
921 | |||
922 | rcvbuf = &req->rq_private_buf; | ||
923 | len = desc->count; | ||
924 | if (len > xprt->tcp_reclen - xprt->tcp_offset) { | ||
925 | skb_reader_t my_desc; | ||
926 | |||
927 | len = xprt->tcp_reclen - xprt->tcp_offset; | ||
928 | memcpy(&my_desc, desc, sizeof(my_desc)); | ||
929 | my_desc.count = len; | ||
930 | xdr_partial_copy_from_skb(rcvbuf, xprt->tcp_copied, | ||
931 | &my_desc, tcp_copy_data); | ||
932 | desc->count -= len; | ||
933 | desc->offset += len; | ||
934 | } else | ||
935 | xdr_partial_copy_from_skb(rcvbuf, xprt->tcp_copied, | ||
936 | desc, tcp_copy_data); | ||
937 | xprt->tcp_copied += len; | ||
938 | xprt->tcp_offset += len; | ||
939 | |||
940 | if (xprt->tcp_copied == req->rq_private_buf.buflen) | ||
941 | xprt->tcp_flags &= ~XPRT_COPY_DATA; | ||
942 | else if (xprt->tcp_offset == xprt->tcp_reclen) { | ||
943 | if (xprt->tcp_flags & XPRT_LAST_FRAG) | ||
944 | xprt->tcp_flags &= ~XPRT_COPY_DATA; | ||
945 | } | ||
946 | |||
947 | if (!(xprt->tcp_flags & XPRT_COPY_DATA)) { | ||
948 | dprintk("RPC: %4d received reply complete\n", | ||
949 | req->rq_task->tk_pid); | ||
950 | xprt_complete_rqst(xprt, req, xprt->tcp_copied); | ||
951 | } | ||
952 | spin_unlock(&xprt->sock_lock); | ||
953 | tcp_check_recm(xprt); | ||
954 | } | ||
955 | |||
956 | /* | ||
957 | * TCP discard extra bytes from a short read | ||
958 | */ | ||
959 | static inline void | ||
960 | tcp_read_discard(struct rpc_xprt *xprt, skb_reader_t *desc) | ||
961 | { | ||
962 | size_t len; | ||
963 | |||
964 | len = xprt->tcp_reclen - xprt->tcp_offset; | ||
965 | if (len > desc->count) | ||
966 | len = desc->count; | ||
967 | desc->count -= len; | ||
968 | desc->offset += len; | ||
969 | xprt->tcp_offset += len; | ||
970 | tcp_check_recm(xprt); | ||
971 | } | ||
972 | |||
973 | /* | ||
974 | * TCP record receive routine | ||
975 | * We first have to grab the record marker, then the XID, then the data. | ||
976 | */ | ||
977 | static int | ||
978 | tcp_data_recv(read_descriptor_t *rd_desc, struct sk_buff *skb, | ||
979 | unsigned int offset, size_t len) | ||
980 | { | ||
981 | struct rpc_xprt *xprt = rd_desc->arg.data; | ||
982 | skb_reader_t desc = { | ||
983 | .skb = skb, | ||
984 | .offset = offset, | ||
985 | .count = len, | ||
986 | .csum = 0 | ||
987 | }; | ||
988 | |||
989 | dprintk("RPC: tcp_data_recv\n"); | ||
990 | do { | ||
991 | /* Read in a new fragment marker if necessary */ | ||
992 | /* Can we ever really expect to get completely empty fragments? */ | ||
993 | if (xprt->tcp_flags & XPRT_COPY_RECM) { | ||
994 | tcp_read_fraghdr(xprt, &desc); | ||
995 | continue; | ||
996 | } | ||
997 | /* Read in the xid if necessary */ | ||
998 | if (xprt->tcp_flags & XPRT_COPY_XID) { | ||
999 | tcp_read_xid(xprt, &desc); | ||
1000 | continue; | ||
1001 | } | ||
1002 | /* Read in the request data */ | ||
1003 | if (xprt->tcp_flags & XPRT_COPY_DATA) { | ||
1004 | tcp_read_request(xprt, &desc); | ||
1005 | continue; | ||
1006 | } | ||
1007 | /* Skip over any trailing bytes on short reads */ | ||
1008 | tcp_read_discard(xprt, &desc); | ||
1009 | } while (desc.count); | ||
1010 | dprintk("RPC: tcp_data_recv done\n"); | ||
1011 | return len - desc.count; | ||
1012 | } | ||
1013 | |||
1014 | static void tcp_data_ready(struct sock *sk, int bytes) | ||
1015 | { | ||
1016 | struct rpc_xprt *xprt; | ||
1017 | read_descriptor_t rd_desc; | ||
1018 | |||
1019 | read_lock(&sk->sk_callback_lock); | ||
1020 | dprintk("RPC: tcp_data_ready...\n"); | ||
1021 | if (!(xprt = xprt_from_sock(sk))) { | ||
1022 | printk("RPC: tcp_data_ready socket info not found!\n"); | ||
1023 | goto out; | ||
1024 | } | ||
1025 | if (xprt->shutdown) | ||
1026 | goto out; | ||
1027 | |||
1028 | /* We use rd_desc to pass struct xprt to tcp_data_recv */ | ||
1029 | rd_desc.arg.data = xprt; | ||
1030 | rd_desc.count = 65536; | ||
1031 | tcp_read_sock(sk, &rd_desc, tcp_data_recv); | ||
1032 | out: | ||
1033 | read_unlock(&sk->sk_callback_lock); | ||
1034 | } | ||
1035 | |||
1036 | static void | ||
1037 | tcp_state_change(struct sock *sk) | ||
1038 | { | ||
1039 | struct rpc_xprt *xprt; | ||
1040 | |||
1041 | read_lock(&sk->sk_callback_lock); | ||
1042 | if (!(xprt = xprt_from_sock(sk))) | ||
1043 | goto out; | ||
1044 | dprintk("RPC: tcp_state_change client %p...\n", xprt); | ||
1045 | dprintk("RPC: state %x conn %d dead %d zapped %d\n", | ||
1046 | sk->sk_state, xprt_connected(xprt), | ||
1047 | sock_flag(sk, SOCK_DEAD), | ||
1048 | sock_flag(sk, SOCK_ZAPPED)); | ||
1049 | |||
1050 | switch (sk->sk_state) { | ||
1051 | case TCP_ESTABLISHED: | ||
1052 | spin_lock_bh(&xprt->sock_lock); | ||
1053 | if (!xprt_test_and_set_connected(xprt)) { | ||
1054 | /* Reset TCP record info */ | ||
1055 | xprt->tcp_offset = 0; | ||
1056 | xprt->tcp_reclen = 0; | ||
1057 | xprt->tcp_copied = 0; | ||
1058 | xprt->tcp_flags = XPRT_COPY_RECM | XPRT_COPY_XID; | ||
1059 | rpc_wake_up(&xprt->pending); | ||
1060 | } | ||
1061 | spin_unlock_bh(&xprt->sock_lock); | ||
1062 | break; | ||
1063 | case TCP_SYN_SENT: | ||
1064 | case TCP_SYN_RECV: | ||
1065 | break; | ||
1066 | default: | ||
1067 | if (xprt_test_and_clear_connected(xprt)) | ||
1068 | rpc_wake_up_status(&xprt->pending, -ENOTCONN); | ||
1069 | break; | ||
1070 | } | ||
1071 | out: | ||
1072 | read_unlock(&sk->sk_callback_lock); | ||
1073 | } | ||
1074 | |||
1075 | /* | ||
1076 | * Called when more output buffer space is available for this socket. | ||
1077 | * We try not to wake our writers until they can make "significant" | ||
1078 | * progress, otherwise we'll waste resources thrashing sock_sendmsg | ||
1079 | * with a bunch of small requests. | ||
1080 | */ | ||
1081 | static void | ||
1082 | xprt_write_space(struct sock *sk) | ||
1083 | { | ||
1084 | struct rpc_xprt *xprt; | ||
1085 | struct socket *sock; | ||
1086 | |||
1087 | read_lock(&sk->sk_callback_lock); | ||
1088 | if (!(xprt = xprt_from_sock(sk)) || !(sock = sk->sk_socket)) | ||
1089 | goto out; | ||
1090 | if (xprt->shutdown) | ||
1091 | goto out; | ||
1092 | |||
1093 | /* Wait until we have enough socket memory */ | ||
1094 | if (xprt->stream) { | ||
1095 | /* from net/core/stream.c:sk_stream_write_space */ | ||
1096 | if (sk_stream_wspace(sk) < sk_stream_min_wspace(sk)) | ||
1097 | goto out; | ||
1098 | } else { | ||
1099 | /* from net/core/sock.c:sock_def_write_space */ | ||
1100 | if (!sock_writeable(sk)) | ||
1101 | goto out; | ||
1102 | } | ||
1103 | |||
1104 | if (!test_and_clear_bit(SOCK_NOSPACE, &sock->flags)) | ||
1105 | goto out; | ||
1106 | |||
1107 | spin_lock_bh(&xprt->sock_lock); | ||
1108 | if (xprt->snd_task) | ||
1109 | rpc_wake_up_task(xprt->snd_task); | ||
1110 | spin_unlock_bh(&xprt->sock_lock); | ||
1111 | out: | ||
1112 | read_unlock(&sk->sk_callback_lock); | ||
1113 | } | ||
1114 | |||
1115 | /* | ||
1116 | * RPC receive timeout handler. | ||
1117 | */ | ||
1118 | static void | ||
1119 | xprt_timer(struct rpc_task *task) | ||
1120 | { | ||
1121 | struct rpc_rqst *req = task->tk_rqstp; | ||
1122 | struct rpc_xprt *xprt = req->rq_xprt; | ||
1123 | |||
1124 | spin_lock(&xprt->sock_lock); | ||
1125 | if (req->rq_received) | ||
1126 | goto out; | ||
1127 | |||
1128 | xprt_adjust_cwnd(req->rq_xprt, -ETIMEDOUT); | ||
1129 | __xprt_put_cong(xprt, req); | ||
1130 | |||
1131 | dprintk("RPC: %4d xprt_timer (%s request)\n", | ||
1132 | task->tk_pid, req ? "pending" : "backlogged"); | ||
1133 | |||
1134 | task->tk_status = -ETIMEDOUT; | ||
1135 | out: | ||
1136 | task->tk_timeout = 0; | ||
1137 | rpc_wake_up_task(task); | ||
1138 | spin_unlock(&xprt->sock_lock); | ||
1139 | } | ||
1140 | |||
1141 | /* | ||
1142 | * Place the actual RPC call. | ||
1143 | * We have to copy the iovec because sendmsg fiddles with its contents. | ||
1144 | */ | ||
1145 | int | ||
1146 | xprt_prepare_transmit(struct rpc_task *task) | ||
1147 | { | ||
1148 | struct rpc_rqst *req = task->tk_rqstp; | ||
1149 | struct rpc_xprt *xprt = req->rq_xprt; | ||
1150 | int err = 0; | ||
1151 | |||
1152 | dprintk("RPC: %4d xprt_prepare_transmit\n", task->tk_pid); | ||
1153 | |||
1154 | if (xprt->shutdown) | ||
1155 | return -EIO; | ||
1156 | |||
1157 | spin_lock_bh(&xprt->sock_lock); | ||
1158 | if (req->rq_received && !req->rq_bytes_sent) { | ||
1159 | err = req->rq_received; | ||
1160 | goto out_unlock; | ||
1161 | } | ||
1162 | if (!__xprt_lock_write(xprt, task)) { | ||
1163 | err = -EAGAIN; | ||
1164 | goto out_unlock; | ||
1165 | } | ||
1166 | |||
1167 | if (!xprt_connected(xprt)) { | ||
1168 | err = -ENOTCONN; | ||
1169 | goto out_unlock; | ||
1170 | } | ||
1171 | out_unlock: | ||
1172 | spin_unlock_bh(&xprt->sock_lock); | ||
1173 | return err; | ||
1174 | } | ||
1175 | |||
1176 | void | ||
1177 | xprt_transmit(struct rpc_task *task) | ||
1178 | { | ||
1179 | struct rpc_clnt *clnt = task->tk_client; | ||
1180 | struct rpc_rqst *req = task->tk_rqstp; | ||
1181 | struct rpc_xprt *xprt = req->rq_xprt; | ||
1182 | int status, retry = 0; | ||
1183 | |||
1184 | |||
1185 | dprintk("RPC: %4d xprt_transmit(%u)\n", task->tk_pid, req->rq_slen); | ||
1186 | |||
1187 | /* set up everything as needed. */ | ||
1188 | /* Write the record marker */ | ||
1189 | if (xprt->stream) { | ||
1190 | u32 *marker = req->rq_svec[0].iov_base; | ||
1191 | |||
1192 | *marker = htonl(0x80000000|(req->rq_slen-sizeof(*marker))); | ||
1193 | } | ||
1194 | |||
1195 | smp_rmb(); | ||
1196 | if (!req->rq_received) { | ||
1197 | if (list_empty(&req->rq_list)) { | ||
1198 | spin_lock_bh(&xprt->sock_lock); | ||
1199 | /* Update the softirq receive buffer */ | ||
1200 | memcpy(&req->rq_private_buf, &req->rq_rcv_buf, | ||
1201 | sizeof(req->rq_private_buf)); | ||
1202 | /* Add request to the receive list */ | ||
1203 | list_add_tail(&req->rq_list, &xprt->recv); | ||
1204 | spin_unlock_bh(&xprt->sock_lock); | ||
1205 | xprt_reset_majortimeo(req); | ||
1206 | } | ||
1207 | } else if (!req->rq_bytes_sent) | ||
1208 | return; | ||
1209 | |||
1210 | /* Continue transmitting the packet/record. We must be careful | ||
1211 | * to cope with writespace callbacks arriving _after_ we have | ||
1212 | * called xprt_sendmsg(). | ||
1213 | */ | ||
1214 | while (1) { | ||
1215 | req->rq_xtime = jiffies; | ||
1216 | status = xprt_sendmsg(xprt, req); | ||
1217 | |||
1218 | if (status < 0) | ||
1219 | break; | ||
1220 | |||
1221 | if (xprt->stream) { | ||
1222 | req->rq_bytes_sent += status; | ||
1223 | |||
1224 | /* If we've sent the entire packet, immediately | ||
1225 | * reset the count of bytes sent. */ | ||
1226 | if (req->rq_bytes_sent >= req->rq_slen) { | ||
1227 | req->rq_bytes_sent = 0; | ||
1228 | goto out_receive; | ||
1229 | } | ||
1230 | } else { | ||
1231 | if (status >= req->rq_slen) | ||
1232 | goto out_receive; | ||
1233 | status = -EAGAIN; | ||
1234 | break; | ||
1235 | } | ||
1236 | |||
1237 | dprintk("RPC: %4d xmit incomplete (%d left of %d)\n", | ||
1238 | task->tk_pid, req->rq_slen - req->rq_bytes_sent, | ||
1239 | req->rq_slen); | ||
1240 | |||
1241 | status = -EAGAIN; | ||
1242 | if (retry++ > 50) | ||
1243 | break; | ||
1244 | } | ||
1245 | |||
1246 | /* Note: at this point, task->tk_sleeping has not yet been set, | ||
1247 | * hence there is no danger of the waking up task being put on | ||
1248 | * schedq, and being picked up by a parallel run of rpciod(). | ||
1249 | */ | ||
1250 | task->tk_status = status; | ||
1251 | |||
1252 | switch (status) { | ||
1253 | case -EAGAIN: | ||
1254 | if (test_bit(SOCK_ASYNC_NOSPACE, &xprt->sock->flags)) { | ||
1255 | /* Protect against races with xprt_write_space */ | ||
1256 | spin_lock_bh(&xprt->sock_lock); | ||
1257 | /* Don't race with disconnect */ | ||
1258 | if (!xprt_connected(xprt)) | ||
1259 | task->tk_status = -ENOTCONN; | ||
1260 | else if (test_bit(SOCK_NOSPACE, &xprt->sock->flags)) { | ||
1261 | task->tk_timeout = req->rq_timeout; | ||
1262 | rpc_sleep_on(&xprt->pending, task, NULL, NULL); | ||
1263 | } | ||
1264 | spin_unlock_bh(&xprt->sock_lock); | ||
1265 | return; | ||
1266 | } | ||
1267 | /* Keep holding the socket if it is blocked */ | ||
1268 | rpc_delay(task, HZ>>4); | ||
1269 | return; | ||
1270 | case -ECONNREFUSED: | ||
1271 | task->tk_timeout = RPC_REESTABLISH_TIMEOUT; | ||
1272 | rpc_sleep_on(&xprt->sending, task, NULL, NULL); | ||
1273 | case -ENOTCONN: | ||
1274 | return; | ||
1275 | default: | ||
1276 | if (xprt->stream) | ||
1277 | xprt_disconnect(xprt); | ||
1278 | } | ||
1279 | xprt_release_write(xprt, task); | ||
1280 | return; | ||
1281 | out_receive: | ||
1282 | dprintk("RPC: %4d xmit complete\n", task->tk_pid); | ||
1283 | /* Set the task's receive timeout value */ | ||
1284 | spin_lock_bh(&xprt->sock_lock); | ||
1285 | if (!xprt->nocong) { | ||
1286 | int timer = task->tk_msg.rpc_proc->p_timer; | ||
1287 | task->tk_timeout = rpc_calc_rto(clnt->cl_rtt, timer); | ||
1288 | task->tk_timeout <<= rpc_ntimeo(clnt->cl_rtt, timer) + req->rq_retries; | ||
1289 | if (task->tk_timeout > xprt->timeout.to_maxval || task->tk_timeout == 0) | ||
1290 | task->tk_timeout = xprt->timeout.to_maxval; | ||
1291 | } else | ||
1292 | task->tk_timeout = req->rq_timeout; | ||
1293 | /* Don't race with disconnect */ | ||
1294 | if (!xprt_connected(xprt)) | ||
1295 | task->tk_status = -ENOTCONN; | ||
1296 | else if (!req->rq_received) | ||
1297 | rpc_sleep_on(&xprt->pending, task, NULL, xprt_timer); | ||
1298 | __xprt_release_write(xprt, task); | ||
1299 | spin_unlock_bh(&xprt->sock_lock); | ||
1300 | } | ||
1301 | |||
1302 | /* | ||
1303 | * Reserve an RPC call slot. | ||
1304 | */ | ||
1305 | static inline void | ||
1306 | do_xprt_reserve(struct rpc_task *task) | ||
1307 | { | ||
1308 | struct rpc_xprt *xprt = task->tk_xprt; | ||
1309 | |||
1310 | task->tk_status = 0; | ||
1311 | if (task->tk_rqstp) | ||
1312 | return; | ||
1313 | if (!list_empty(&xprt->free)) { | ||
1314 | struct rpc_rqst *req = list_entry(xprt->free.next, struct rpc_rqst, rq_list); | ||
1315 | list_del_init(&req->rq_list); | ||
1316 | task->tk_rqstp = req; | ||
1317 | xprt_request_init(task, xprt); | ||
1318 | return; | ||
1319 | } | ||
1320 | dprintk("RPC: waiting for request slot\n"); | ||
1321 | task->tk_status = -EAGAIN; | ||
1322 | task->tk_timeout = 0; | ||
1323 | rpc_sleep_on(&xprt->backlog, task, NULL, NULL); | ||
1324 | } | ||
1325 | |||
1326 | void | ||
1327 | xprt_reserve(struct rpc_task *task) | ||
1328 | { | ||
1329 | struct rpc_xprt *xprt = task->tk_xprt; | ||
1330 | |||
1331 | task->tk_status = -EIO; | ||
1332 | if (!xprt->shutdown) { | ||
1333 | spin_lock(&xprt->xprt_lock); | ||
1334 | do_xprt_reserve(task); | ||
1335 | spin_unlock(&xprt->xprt_lock); | ||
1336 | if (task->tk_rqstp) | ||
1337 | del_timer_sync(&xprt->timer); | ||
1338 | } | ||
1339 | } | ||
1340 | |||
1341 | /* | ||
1342 | * Allocate a 'unique' XID | ||
1343 | */ | ||
1344 | static inline u32 xprt_alloc_xid(struct rpc_xprt *xprt) | ||
1345 | { | ||
1346 | return xprt->xid++; | ||
1347 | } | ||
1348 | |||
1349 | static inline void xprt_init_xid(struct rpc_xprt *xprt) | ||
1350 | { | ||
1351 | get_random_bytes(&xprt->xid, sizeof(xprt->xid)); | ||
1352 | } | ||
1353 | |||
1354 | /* | ||
1355 | * Initialize RPC request | ||
1356 | */ | ||
1357 | static void | ||
1358 | xprt_request_init(struct rpc_task *task, struct rpc_xprt *xprt) | ||
1359 | { | ||
1360 | struct rpc_rqst *req = task->tk_rqstp; | ||
1361 | |||
1362 | req->rq_timeout = xprt->timeout.to_initval; | ||
1363 | req->rq_task = task; | ||
1364 | req->rq_xprt = xprt; | ||
1365 | req->rq_xid = xprt_alloc_xid(xprt); | ||
1366 | dprintk("RPC: %4d reserved req %p xid %08x\n", task->tk_pid, | ||
1367 | req, ntohl(req->rq_xid)); | ||
1368 | } | ||
1369 | |||
1370 | /* | ||
1371 | * Release an RPC call slot | ||
1372 | */ | ||
1373 | void | ||
1374 | xprt_release(struct rpc_task *task) | ||
1375 | { | ||
1376 | struct rpc_xprt *xprt = task->tk_xprt; | ||
1377 | struct rpc_rqst *req; | ||
1378 | |||
1379 | if (!(req = task->tk_rqstp)) | ||
1380 | return; | ||
1381 | spin_lock_bh(&xprt->sock_lock); | ||
1382 | __xprt_release_write(xprt, task); | ||
1383 | __xprt_put_cong(xprt, req); | ||
1384 | if (!list_empty(&req->rq_list)) | ||
1385 | list_del(&req->rq_list); | ||
1386 | xprt->last_used = jiffies; | ||
1387 | if (list_empty(&xprt->recv) && !xprt->shutdown) | ||
1388 | mod_timer(&xprt->timer, xprt->last_used + XPRT_IDLE_TIMEOUT); | ||
1389 | spin_unlock_bh(&xprt->sock_lock); | ||
1390 | task->tk_rqstp = NULL; | ||
1391 | memset(req, 0, sizeof(*req)); /* mark unused */ | ||
1392 | |||
1393 | dprintk("RPC: %4d release request %p\n", task->tk_pid, req); | ||
1394 | |||
1395 | spin_lock(&xprt->xprt_lock); | ||
1396 | list_add(&req->rq_list, &xprt->free); | ||
1397 | xprt_clear_backlog(xprt); | ||
1398 | spin_unlock(&xprt->xprt_lock); | ||
1399 | } | ||
1400 | |||
1401 | /* | ||
1402 | * Set default timeout parameters | ||
1403 | */ | ||
1404 | static void | ||
1405 | xprt_default_timeout(struct rpc_timeout *to, int proto) | ||
1406 | { | ||
1407 | if (proto == IPPROTO_UDP) | ||
1408 | xprt_set_timeout(to, 5, 5 * HZ); | ||
1409 | else | ||
1410 | xprt_set_timeout(to, 5, 60 * HZ); | ||
1411 | } | ||
1412 | |||
1413 | /* | ||
1414 | * Set constant timeout | ||
1415 | */ | ||
1416 | void | ||
1417 | xprt_set_timeout(struct rpc_timeout *to, unsigned int retr, unsigned long incr) | ||
1418 | { | ||
1419 | to->to_initval = | ||
1420 | to->to_increment = incr; | ||
1421 | to->to_maxval = incr * retr; | ||
1422 | to->to_retries = retr; | ||
1423 | to->to_exponential = 0; | ||
1424 | } | ||
1425 | |||
1426 | unsigned int xprt_udp_slot_table_entries = RPC_DEF_SLOT_TABLE; | ||
1427 | unsigned int xprt_tcp_slot_table_entries = RPC_DEF_SLOT_TABLE; | ||
1428 | |||
1429 | /* | ||
1430 | * Initialize an RPC client | ||
1431 | */ | ||
1432 | static struct rpc_xprt * | ||
1433 | xprt_setup(int proto, struct sockaddr_in *ap, struct rpc_timeout *to) | ||
1434 | { | ||
1435 | struct rpc_xprt *xprt; | ||
1436 | unsigned int entries; | ||
1437 | size_t slot_table_size; | ||
1438 | struct rpc_rqst *req; | ||
1439 | |||
1440 | dprintk("RPC: setting up %s transport...\n", | ||
1441 | proto == IPPROTO_UDP? "UDP" : "TCP"); | ||
1442 | |||
1443 | entries = (proto == IPPROTO_TCP)? | ||
1444 | xprt_tcp_slot_table_entries : xprt_udp_slot_table_entries; | ||
1445 | |||
1446 | if ((xprt = kmalloc(sizeof(struct rpc_xprt), GFP_KERNEL)) == NULL) | ||
1447 | return ERR_PTR(-ENOMEM); | ||
1448 | memset(xprt, 0, sizeof(*xprt)); /* Nnnngh! */ | ||
1449 | xprt->max_reqs = entries; | ||
1450 | slot_table_size = entries * sizeof(xprt->slot[0]); | ||
1451 | xprt->slot = kmalloc(slot_table_size, GFP_KERNEL); | ||
1452 | if (xprt->slot == NULL) { | ||
1453 | kfree(xprt); | ||
1454 | return ERR_PTR(-ENOMEM); | ||
1455 | } | ||
1456 | memset(xprt->slot, 0, slot_table_size); | ||
1457 | |||
1458 | xprt->addr = *ap; | ||
1459 | xprt->prot = proto; | ||
1460 | xprt->stream = (proto == IPPROTO_TCP)? 1 : 0; | ||
1461 | if (xprt->stream) { | ||
1462 | xprt->cwnd = RPC_MAXCWND(xprt); | ||
1463 | xprt->nocong = 1; | ||
1464 | xprt->max_payload = (1U << 31) - 1; | ||
1465 | } else { | ||
1466 | xprt->cwnd = RPC_INITCWND; | ||
1467 | xprt->max_payload = (1U << 16) - (MAX_HEADER << 3); | ||
1468 | } | ||
1469 | spin_lock_init(&xprt->sock_lock); | ||
1470 | spin_lock_init(&xprt->xprt_lock); | ||
1471 | init_waitqueue_head(&xprt->cong_wait); | ||
1472 | |||
1473 | INIT_LIST_HEAD(&xprt->free); | ||
1474 | INIT_LIST_HEAD(&xprt->recv); | ||
1475 | INIT_WORK(&xprt->sock_connect, xprt_socket_connect, xprt); | ||
1476 | INIT_WORK(&xprt->task_cleanup, xprt_socket_autoclose, xprt); | ||
1477 | init_timer(&xprt->timer); | ||
1478 | xprt->timer.function = xprt_init_autodisconnect; | ||
1479 | xprt->timer.data = (unsigned long) xprt; | ||
1480 | xprt->last_used = jiffies; | ||
1481 | xprt->port = XPRT_MAX_RESVPORT; | ||
1482 | |||
1483 | /* Set timeout parameters */ | ||
1484 | if (to) { | ||
1485 | xprt->timeout = *to; | ||
1486 | } else | ||
1487 | xprt_default_timeout(&xprt->timeout, xprt->prot); | ||
1488 | |||
1489 | rpc_init_wait_queue(&xprt->pending, "xprt_pending"); | ||
1490 | rpc_init_wait_queue(&xprt->sending, "xprt_sending"); | ||
1491 | rpc_init_wait_queue(&xprt->resend, "xprt_resend"); | ||
1492 | rpc_init_priority_wait_queue(&xprt->backlog, "xprt_backlog"); | ||
1493 | |||
1494 | /* initialize free list */ | ||
1495 | for (req = &xprt->slot[entries-1]; req >= &xprt->slot[0]; req--) | ||
1496 | list_add(&req->rq_list, &xprt->free); | ||
1497 | |||
1498 | xprt_init_xid(xprt); | ||
1499 | |||
1500 | /* Check whether we want to use a reserved port */ | ||
1501 | xprt->resvport = capable(CAP_NET_BIND_SERVICE) ? 1 : 0; | ||
1502 | |||
1503 | dprintk("RPC: created transport %p with %u slots\n", xprt, | ||
1504 | xprt->max_reqs); | ||
1505 | |||
1506 | return xprt; | ||
1507 | } | ||
1508 | |||
1509 | /* | ||
1510 | * Bind to a reserved port | ||
1511 | */ | ||
1512 | static inline int xprt_bindresvport(struct rpc_xprt *xprt, struct socket *sock) | ||
1513 | { | ||
1514 | struct sockaddr_in myaddr = { | ||
1515 | .sin_family = AF_INET, | ||
1516 | }; | ||
1517 | int err, port; | ||
1518 | |||
1519 | /* Were we already bound to a given port? Try to reuse it */ | ||
1520 | port = xprt->port; | ||
1521 | do { | ||
1522 | myaddr.sin_port = htons(port); | ||
1523 | err = sock->ops->bind(sock, (struct sockaddr *) &myaddr, | ||
1524 | sizeof(myaddr)); | ||
1525 | if (err == 0) { | ||
1526 | xprt->port = port; | ||
1527 | return 0; | ||
1528 | } | ||
1529 | if (--port == 0) | ||
1530 | port = XPRT_MAX_RESVPORT; | ||
1531 | } while (err == -EADDRINUSE && port != xprt->port); | ||
1532 | |||
1533 | printk("RPC: Can't bind to reserved port (%d).\n", -err); | ||
1534 | return err; | ||
1535 | } | ||
1536 | |||
1537 | static void | ||
1538 | xprt_bind_socket(struct rpc_xprt *xprt, struct socket *sock) | ||
1539 | { | ||
1540 | struct sock *sk = sock->sk; | ||
1541 | |||
1542 | if (xprt->inet) | ||
1543 | return; | ||
1544 | |||
1545 | write_lock_bh(&sk->sk_callback_lock); | ||
1546 | sk->sk_user_data = xprt; | ||
1547 | xprt->old_data_ready = sk->sk_data_ready; | ||
1548 | xprt->old_state_change = sk->sk_state_change; | ||
1549 | xprt->old_write_space = sk->sk_write_space; | ||
1550 | if (xprt->prot == IPPROTO_UDP) { | ||
1551 | sk->sk_data_ready = udp_data_ready; | ||
1552 | sk->sk_no_check = UDP_CSUM_NORCV; | ||
1553 | xprt_set_connected(xprt); | ||
1554 | } else { | ||
1555 | tcp_sk(sk)->nonagle = 1; /* disable Nagle's algorithm */ | ||
1556 | sk->sk_data_ready = tcp_data_ready; | ||
1557 | sk->sk_state_change = tcp_state_change; | ||
1558 | xprt_clear_connected(xprt); | ||
1559 | } | ||
1560 | sk->sk_write_space = xprt_write_space; | ||
1561 | |||
1562 | /* Reset to new socket */ | ||
1563 | xprt->sock = sock; | ||
1564 | xprt->inet = sk; | ||
1565 | write_unlock_bh(&sk->sk_callback_lock); | ||
1566 | |||
1567 | return; | ||
1568 | } | ||
1569 | |||
1570 | /* | ||
1571 | * Set socket buffer length | ||
1572 | */ | ||
1573 | void | ||
1574 | xprt_sock_setbufsize(struct rpc_xprt *xprt) | ||
1575 | { | ||
1576 | struct sock *sk = xprt->inet; | ||
1577 | |||
1578 | if (xprt->stream) | ||
1579 | return; | ||
1580 | if (xprt->rcvsize) { | ||
1581 | sk->sk_userlocks |= SOCK_RCVBUF_LOCK; | ||
1582 | sk->sk_rcvbuf = xprt->rcvsize * xprt->max_reqs * 2; | ||
1583 | } | ||
1584 | if (xprt->sndsize) { | ||
1585 | sk->sk_userlocks |= SOCK_SNDBUF_LOCK; | ||
1586 | sk->sk_sndbuf = xprt->sndsize * xprt->max_reqs * 2; | ||
1587 | sk->sk_write_space(sk); | ||
1588 | } | ||
1589 | } | ||
1590 | |||
1591 | /* | ||
1592 | * Datastream sockets are created here, but xprt_connect will create | ||
1593 | * and connect stream sockets. | ||
1594 | */ | ||
1595 | static struct socket * xprt_create_socket(struct rpc_xprt *xprt, int proto, int resvport) | ||
1596 | { | ||
1597 | struct socket *sock; | ||
1598 | int type, err; | ||
1599 | |||
1600 | dprintk("RPC: xprt_create_socket(%s %d)\n", | ||
1601 | (proto == IPPROTO_UDP)? "udp" : "tcp", proto); | ||
1602 | |||
1603 | type = (proto == IPPROTO_UDP)? SOCK_DGRAM : SOCK_STREAM; | ||
1604 | |||
1605 | if ((err = sock_create_kern(PF_INET, type, proto, &sock)) < 0) { | ||
1606 | printk("RPC: can't create socket (%d).\n", -err); | ||
1607 | return NULL; | ||
1608 | } | ||
1609 | |||
1610 | /* If the caller has the capability, bind to a reserved port */ | ||
1611 | if (resvport && xprt_bindresvport(xprt, sock) < 0) { | ||
1612 | printk("RPC: can't bind to reserved port.\n"); | ||
1613 | goto failed; | ||
1614 | } | ||
1615 | |||
1616 | return sock; | ||
1617 | |||
1618 | failed: | ||
1619 | sock_release(sock); | ||
1620 | return NULL; | ||
1621 | } | ||
1622 | |||
1623 | /* | ||
1624 | * Create an RPC client transport given the protocol and peer address. | ||
1625 | */ | ||
1626 | struct rpc_xprt * | ||
1627 | xprt_create_proto(int proto, struct sockaddr_in *sap, struct rpc_timeout *to) | ||
1628 | { | ||
1629 | struct rpc_xprt *xprt; | ||
1630 | |||
1631 | xprt = xprt_setup(proto, sap, to); | ||
1632 | if (IS_ERR(xprt)) | ||
1633 | dprintk("RPC: xprt_create_proto failed\n"); | ||
1634 | else | ||
1635 | dprintk("RPC: xprt_create_proto created xprt %p\n", xprt); | ||
1636 | return xprt; | ||
1637 | } | ||
1638 | |||
1639 | /* | ||
1640 | * Prepare for transport shutdown. | ||
1641 | */ | ||
1642 | static void | ||
1643 | xprt_shutdown(struct rpc_xprt *xprt) | ||
1644 | { | ||
1645 | xprt->shutdown = 1; | ||
1646 | rpc_wake_up(&xprt->sending); | ||
1647 | rpc_wake_up(&xprt->resend); | ||
1648 | rpc_wake_up(&xprt->pending); | ||
1649 | rpc_wake_up(&xprt->backlog); | ||
1650 | wake_up(&xprt->cong_wait); | ||
1651 | del_timer_sync(&xprt->timer); | ||
1652 | } | ||
1653 | |||
1654 | /* | ||
1655 | * Clear the xprt backlog queue | ||
1656 | */ | ||
1657 | static int | ||
1658 | xprt_clear_backlog(struct rpc_xprt *xprt) { | ||
1659 | rpc_wake_up_next(&xprt->backlog); | ||
1660 | wake_up(&xprt->cong_wait); | ||
1661 | return 1; | ||
1662 | } | ||
1663 | |||
1664 | /* | ||
1665 | * Destroy an RPC transport, killing off all requests. | ||
1666 | */ | ||
1667 | int | ||
1668 | xprt_destroy(struct rpc_xprt *xprt) | ||
1669 | { | ||
1670 | dprintk("RPC: destroying transport %p\n", xprt); | ||
1671 | xprt_shutdown(xprt); | ||
1672 | xprt_disconnect(xprt); | ||
1673 | xprt_close(xprt); | ||
1674 | kfree(xprt->slot); | ||
1675 | kfree(xprt); | ||
1676 | |||
1677 | return 0; | ||
1678 | } | ||